[{"data":1,"prerenderedAt":1926},["ShallowReactive",2],{"doc-\u002Fexamples\u002Fpython-text-analysis-script-example":3},{"id":4,"title":5,"body":6,"description":1919,"extension":1920,"meta":1921,"navigation":86,"path":1922,"seo":1923,"stem":1924,"__hash__":1925},"content\u002Fexamples\u002Fpython-text-analysis-script-example.md","Python Text Analysis Script Example",{"type":7,"value":8,"toc":1891},"minimark",[9,13,17,20,36,39,44,47,328,332,335,338,352,360,364,367,393,400,404,407,556,561,593,604,608,644,647,651,654,822,826,831,846,856,860,891,901,905,924,927,931,944,947,951,1000,1003,1023,1026,1191,1195,1198,1201,1221,1224,1464,1467,1515,1522,1525,1528,1542,1545,1646,1650,1653,1656,1719,1722,1745,1748,1765,1772,1776,1779,1802,1805,1809,1813,1819,1827,1834,1838,1841,1848,1851,1855,1884,1887],[10,11,5],"h1",{"id":12},"python-text-analysis-script-example",[14,15,16],"p",{},"This beginner-friendly example shows how to analyze text in Python.",[14,18,19],{},"You will build a small script that:",[21,22,23,27,30,33],"ul",{},[24,25,26],"li",{},"Counts characters",[24,28,29],{},"Counts words",[24,31,32],{},"Counts lines",[24,34,35],{},"Counts how often each word appears",[14,37,38],{},"It is a good practice project because it uses basic Python tools in a real script: strings, loops, dictionaries, and printing results clearly.",[40,41,43],"h2",{"id":42},"quick-example","Quick example",[14,45,46],{},"Use this small script if you want a fast example of basic text analysis without reading from a file.",[48,49,54],"pre",{"className":50,"code":51,"language":52,"meta":53,"style":53},"language-python shiki shiki-themes material-theme-lighter github-light github-dark","text = \"Python is simple. Python is useful.\"\n\nwords = text.lower().replace(\".\", \"\").split()\nprint(\"Characters:\", len(text))\nprint(\"Words:\", len(words))\nprint(\"Lines:\", len(text.splitlines()))\n\ncounts = {}\nfor word in words:\n    counts[word] = counts.get(word, 0) + 1\n\nprint(\"Word counts:\", counts)\n","python","",[55,56,57,81,88,138,167,192,222,227,238,257,302,307],"code",{"__ignoreMap":53},[58,59,62,66,70,74,78],"span",{"class":60,"line":61},"line",1,[58,63,65],{"class":64},"su5hD","text ",[58,67,69],{"class":68},"smGrS","=",[58,71,73],{"class":72},"sjJ54"," \"",[58,75,77],{"class":76},"s_sjI","Python is simple. Python is useful.",[58,79,80],{"class":72},"\"\n",[58,82,84],{"class":60,"line":83},2,[58,85,87],{"emptyLinePlaceholder":86},true,"\n",[58,89,91,94,96,99,103,107,110,113,116,119,121,123,126,129,132,135],{"class":60,"line":90},3,[58,92,93],{"class":64},"words ",[58,95,69],{"class":68},[58,97,98],{"class":64}," text",[58,100,102],{"class":101},"sP7_E",".",[58,104,106],{"class":105},"slqww","lower",[58,108,109],{"class":101},"().",[58,111,112],{"class":105},"replace",[58,114,115],{"class":101},"(",[58,117,118],{"class":72},"\"",[58,120,102],{"class":76},[58,122,118],{"class":72},[58,124,125],{"class":101},",",[58,127,128],{"class":72}," \"\"",[58,130,131],{"class":101},").",[58,133,134],{"class":105},"split",[58,136,137],{"class":101},"()\n",[58,139,141,145,147,149,152,154,156,159,161,164],{"class":60,"line":140},4,[58,142,144],{"class":143},"sptTA","print",[58,146,115],{"class":101},[58,148,118],{"class":72},[58,150,151],{"class":76},"Characters:",[58,153,118],{"class":72},[58,155,125],{"class":101},[58,157,158],{"class":143}," len",[58,160,115],{"class":101},[58,162,163],{"class":105},"text",[58,165,166],{"class":101},"))\n",[58,168,170,172,174,176,179,181,183,185,187,190],{"class":60,"line":169},5,[58,171,144],{"class":143},[58,173,115],{"class":101},[58,175,118],{"class":72},[58,177,178],{"class":76},"Words:",[58,180,118],{"class":72},[58,182,125],{"class":101},[58,184,158],{"class":143},[58,186,115],{"class":101},[58,188,189],{"class":105},"words",[58,191,166],{"class":101},[58,193,195,197,199,201,204,206,208,210,212,214,216,219],{"class":60,"line":194},6,[58,196,144],{"class":143},[58,198,115],{"class":101},[58,200,118],{"class":72},[58,202,203],{"class":76},"Lines:",[58,205,118],{"class":72},[58,207,125],{"class":101},[58,209,158],{"class":143},[58,211,115],{"class":101},[58,213,163],{"class":105},[58,215,102],{"class":101},[58,217,218],{"class":105},"splitlines",[58,220,221],{"class":101},"()))\n",[58,223,225],{"class":60,"line":224},7,[58,226,87],{"emptyLinePlaceholder":86},[58,228,230,233,235],{"class":60,"line":229},8,[58,231,232],{"class":64},"counts ",[58,234,69],{"class":68},[58,236,237],{"class":101}," {}\n",[58,239,241,245,248,251,254],{"class":60,"line":240},9,[58,242,244],{"class":243},"sVHd0","for",[58,246,247],{"class":64}," word ",[58,249,250],{"class":243},"in",[58,252,253],{"class":64}," words",[58,255,256],{"class":101},":\n",[58,258,260,263,266,269,272,275,278,280,283,285,287,289,293,296,299],{"class":60,"line":259},10,[58,261,262],{"class":64},"    counts",[58,264,265],{"class":101},"[",[58,267,268],{"class":64},"word",[58,270,271],{"class":101},"]",[58,273,274],{"class":68}," =",[58,276,277],{"class":64}," counts",[58,279,102],{"class":101},[58,281,282],{"class":105},"get",[58,284,115],{"class":101},[58,286,268],{"class":105},[58,288,125],{"class":101},[58,290,292],{"class":291},"srdBf"," 0",[58,294,295],{"class":101},")",[58,297,298],{"class":68}," +",[58,300,301],{"class":291}," 1\n",[58,303,305],{"class":60,"line":304},11,[58,306,87],{"emptyLinePlaceholder":86},[58,308,310,312,314,316,319,321,323,325],{"class":60,"line":309},12,[58,311,144],{"class":143},[58,313,115],{"class":101},[58,315,118],{"class":72},[58,317,318],{"class":76},"Word counts:",[58,320,118],{"class":72},[58,322,125],{"class":101},[58,324,277],{"class":105},[58,326,327],{"class":101},")\n",[40,329,331],{"id":330},"what-this-script-does","What this script does",[14,333,334],{},"This example is useful because it stays small, but still teaches real Python skills.",[14,336,337],{},"It shows how to:",[21,339,340,343,346,349],{},[24,341,342],{},"Use a string as input text",[24,344,345],{},"Analyze text by counting characters, words, and lines",[24,347,348],{},"Count repeated words with a dictionary",[24,350,351],{},"Build a simple script that you can improve later",[14,353,354,355,102],{},"If you are new to strings, see ",[356,357,359],"a",{"href":358},"\u002Flearn\u002Fpython-strings-explained-basics-and-examples\u002F","Python strings explained: basics and examples",[40,361,363],{"id":362},"skills-you-practice","Skills you practice",[14,365,366],{},"By working through this example, you practice:",[21,368,369,372,382,387,390],{},[24,370,371],{},"Working with strings",[24,373,374,375,381],{},"Using ",[356,376,378],{"href":377},"\u002Freference\u002Fpython-string-split-method\u002F",[55,379,380],{},"split()"," to break text into words",[24,383,384,385],{},"Looping through data with ",[55,386,244],{},[24,388,389],{},"Storing counts in a dictionary",[24,391,392],{},"Printing readable results",[14,394,395,396,102],{},"If dictionaries are new to you, read ",[356,397,399],{"href":398},"\u002Flearn\u002Fpython-dictionaries-explained\u002F","Python dictionaries explained",[40,401,403],{"id":402},"basic-version-count-lines-words-and-characters","Basic version: count lines, words, and characters",[14,405,406],{},"Start with a simple script that counts the text size in different ways.",[48,408,410],{"className":50,"code":409,"language":52,"meta":53,"style":53},"text = \"\"\"Python is simple.\nPython is useful.\nPython is fun to learn.\"\"\"\n\ncharacter_count = len(text)\nword_count = len(text.split())\nline_count = len(text.splitlines())\n\nprint(\"Characters:\", character_count)\nprint(\"Words:\", word_count)\nprint(\"Lines:\", line_count)\n",[55,411,412,424,429,437,441,456,476,495,499,518,537],{"__ignoreMap":53},[58,413,414,416,418,421],{"class":60,"line":61},[58,415,65],{"class":64},[58,417,69],{"class":68},[58,419,420],{"class":72}," \"\"\"",[58,422,423],{"class":76},"Python is simple.\n",[58,425,426],{"class":60,"line":83},[58,427,428],{"class":76},"Python is useful.\n",[58,430,431,434],{"class":60,"line":90},[58,432,433],{"class":76},"Python is fun to learn.",[58,435,436],{"class":72},"\"\"\"\n",[58,438,439],{"class":60,"line":140},[58,440,87],{"emptyLinePlaceholder":86},[58,442,443,446,448,450,452,454],{"class":60,"line":169},[58,444,445],{"class":64},"character_count ",[58,447,69],{"class":68},[58,449,158],{"class":143},[58,451,115],{"class":101},[58,453,163],{"class":105},[58,455,327],{"class":101},[58,457,458,461,463,465,467,469,471,473],{"class":60,"line":194},[58,459,460],{"class":64},"word_count ",[58,462,69],{"class":68},[58,464,158],{"class":143},[58,466,115],{"class":101},[58,468,163],{"class":105},[58,470,102],{"class":101},[58,472,134],{"class":105},[58,474,475],{"class":101},"())\n",[58,477,478,481,483,485,487,489,491,493],{"class":60,"line":224},[58,479,480],{"class":64},"line_count ",[58,482,69],{"class":68},[58,484,158],{"class":143},[58,486,115],{"class":101},[58,488,163],{"class":105},[58,490,102],{"class":101},[58,492,218],{"class":105},[58,494,475],{"class":101},[58,496,497],{"class":60,"line":229},[58,498,87],{"emptyLinePlaceholder":86},[58,500,501,503,505,507,509,511,513,516],{"class":60,"line":240},[58,502,144],{"class":143},[58,504,115],{"class":101},[58,506,118],{"class":72},[58,508,151],{"class":76},[58,510,118],{"class":72},[58,512,125],{"class":101},[58,514,515],{"class":105}," character_count",[58,517,327],{"class":101},[58,519,520,522,524,526,528,530,532,535],{"class":60,"line":259},[58,521,144],{"class":143},[58,523,115],{"class":101},[58,525,118],{"class":72},[58,527,178],{"class":76},[58,529,118],{"class":72},[58,531,125],{"class":101},[58,533,534],{"class":105}," word_count",[58,536,327],{"class":101},[58,538,539,541,543,545,547,549,551,554],{"class":60,"line":304},[58,540,144],{"class":143},[58,542,115],{"class":101},[58,544,118],{"class":72},[58,546,203],{"class":76},[58,548,118],{"class":72},[58,550,125],{"class":101},[58,552,553],{"class":105}," line_count",[58,555,327],{"class":101},[557,558,560],"h3",{"id":559},"how-it-works","How it works",[21,562,563,569,575,581,587],{},[24,564,565,568],{},[55,566,567],{},"len(text)"," counts every character in the string",[24,570,571,574],{},[55,572,573],{},"text.split()"," breaks the text into words",[24,576,577,580],{},[55,578,579],{},"len(text.split())"," gives the number of words",[24,582,583,586],{},[55,584,585],{},"text.splitlines()"," breaks the text into lines",[24,588,589,592],{},[55,590,591],{},"len(text.splitlines())"," gives the number of lines",[14,594,595,596,599,600,102],{},"If you want a closer look at ",[55,597,598],{},"len()",", see ",[356,601,603],{"href":602},"\u002Freference\u002Fpython-len-function-explained\u002F","Python len() function explained",[557,605,607],{"id":606},"expected-output","Expected output",[48,609,611],{"className":50,"code":610,"language":52,"meta":53,"style":53},"Characters: 57\nWords: 11\nLines: 3\n",[55,612,613,624,634],{"__ignoreMap":53},[58,614,615,618,621],{"class":60,"line":61},[58,616,617],{"class":64},"Characters",[58,619,620],{"class":101},":",[58,622,623],{"class":291}," 57\n",[58,625,626,629,631],{"class":60,"line":83},[58,627,628],{"class":64},"Words",[58,630,620],{"class":101},[58,632,633],{"class":291}," 11\n",[58,635,636,639,641],{"class":60,"line":90},[58,637,638],{"class":64},"Lines",[58,640,620],{"class":101},[58,642,643],{"class":291}," 3\n",[14,645,646],{},"The exact character count depends on the text, including spaces and punctuation.",[40,648,650],{"id":649},"how-word-counting-works","How word counting works",[14,652,653],{},"Now let’s extend the script to count how many times each word appears.",[48,655,657],{"className":50,"code":656,"language":52,"meta":53,"style":53},"text = \"Python is simple. Python is useful. Python is fun.\"\n\ncleaned_text = text.lower().replace(\".\", \"\")\nwords = cleaned_text.split()\n\ncounts = {}\n\nfor word in words:\n    counts[word] = counts.get(word, 0) + 1\n\nprint(\"Words:\", words)\nprint(\"Word counts:\", counts)\n",[55,658,659,672,676,707,722,726,734,738,750,782,786,804],{"__ignoreMap":53},[58,660,661,663,665,667,670],{"class":60,"line":61},[58,662,65],{"class":64},[58,664,69],{"class":68},[58,666,73],{"class":72},[58,668,669],{"class":76},"Python is simple. Python is useful. Python is fun.",[58,671,80],{"class":72},[58,673,674],{"class":60,"line":83},[58,675,87],{"emptyLinePlaceholder":86},[58,677,678,681,683,685,687,689,691,693,695,697,699,701,703,705],{"class":60,"line":90},[58,679,680],{"class":64},"cleaned_text ",[58,682,69],{"class":68},[58,684,98],{"class":64},[58,686,102],{"class":101},[58,688,106],{"class":105},[58,690,109],{"class":101},[58,692,112],{"class":105},[58,694,115],{"class":101},[58,696,118],{"class":72},[58,698,102],{"class":76},[58,700,118],{"class":72},[58,702,125],{"class":101},[58,704,128],{"class":72},[58,706,327],{"class":101},[58,708,709,711,713,716,718,720],{"class":60,"line":140},[58,710,93],{"class":64},[58,712,69],{"class":68},[58,714,715],{"class":64}," cleaned_text",[58,717,102],{"class":101},[58,719,134],{"class":105},[58,721,137],{"class":101},[58,723,724],{"class":60,"line":169},[58,725,87],{"emptyLinePlaceholder":86},[58,727,728,730,732],{"class":60,"line":194},[58,729,232],{"class":64},[58,731,69],{"class":68},[58,733,237],{"class":101},[58,735,736],{"class":60,"line":224},[58,737,87],{"emptyLinePlaceholder":86},[58,739,740,742,744,746,748],{"class":60,"line":229},[58,741,244],{"class":243},[58,743,247],{"class":64},[58,745,250],{"class":243},[58,747,253],{"class":64},[58,749,256],{"class":101},[58,751,752,754,756,758,760,762,764,766,768,770,772,774,776,778,780],{"class":60,"line":240},[58,753,262],{"class":64},[58,755,265],{"class":101},[58,757,268],{"class":64},[58,759,271],{"class":101},[58,761,274],{"class":68},[58,763,277],{"class":64},[58,765,102],{"class":101},[58,767,282],{"class":105},[58,769,115],{"class":101},[58,771,268],{"class":105},[58,773,125],{"class":101},[58,775,292],{"class":291},[58,777,295],{"class":101},[58,779,298],{"class":68},[58,781,301],{"class":291},[58,783,784],{"class":60,"line":259},[58,785,87],{"emptyLinePlaceholder":86},[58,787,788,790,792,794,796,798,800,802],{"class":60,"line":304},[58,789,144],{"class":143},[58,791,115],{"class":101},[58,793,118],{"class":72},[58,795,178],{"class":76},[58,797,118],{"class":72},[58,799,125],{"class":101},[58,801,253],{"class":105},[58,803,327],{"class":101},[58,805,806,808,810,812,814,816,818,820],{"class":60,"line":309},[58,807,144],{"class":143},[58,809,115],{"class":101},[58,811,118],{"class":72},[58,813,318],{"class":76},[58,815,118],{"class":72},[58,817,125],{"class":101},[58,819,277],{"class":105},[58,821,327],{"class":101},[557,823,825],{"id":824},"step-by-step","Step by step",[827,828,830],"h4",{"id":829},"_1-convert-to-lowercase","1. Convert to lowercase",[48,832,834],{"className":50,"code":833,"language":52,"meta":53,"style":53},"text.lower()\n",[55,835,836],{"__ignoreMap":53},[58,837,838,840,842,844],{"class":60,"line":61},[58,839,163],{"class":64},[58,841,102],{"class":101},[58,843,106],{"class":105},[58,845,137],{"class":101},[14,847,848,849,852,853,855],{},"This makes ",[55,850,851],{},"Python"," and ",[55,854,52],{}," count as the same word.",[827,857,859],{"id":858},"_2-remove-simple-punctuation","2. Remove simple punctuation",[48,861,863],{"className":50,"code":862,"language":52,"meta":53,"style":53},"text.lower().replace(\".\", \"\")\n",[55,864,865],{"__ignoreMap":53},[58,866,867,869,871,873,875,877,879,881,883,885,887,889],{"class":60,"line":61},[58,868,163],{"class":64},[58,870,102],{"class":101},[58,872,106],{"class":105},[58,874,109],{"class":101},[58,876,112],{"class":105},[58,878,115],{"class":101},[58,880,118],{"class":72},[58,882,102],{"class":76},[58,884,118],{"class":72},[58,886,125],{"class":101},[58,888,128],{"class":72},[58,890,327],{"class":101},[14,892,893,894,897,898,102],{},"This removes periods so words like ",[55,895,896],{},"simple."," become ",[55,899,900],{},"simple",[827,902,904],{"id":903},"_3-split-into-words","3. Split into words",[48,906,908],{"className":50,"code":907,"language":52,"meta":53,"style":53},"words = cleaned_text.split()\n",[55,909,910],{"__ignoreMap":53},[58,911,912,914,916,918,920,922],{"class":60,"line":61},[58,913,93],{"class":64},[58,915,69],{"class":68},[58,917,715],{"class":64},[58,919,102],{"class":101},[58,921,134],{"class":105},[58,923,137],{"class":101},[14,925,926],{},"This creates a list of words.",[827,928,930],{"id":929},"_4-create-an-empty-dictionary","4. Create an empty dictionary",[48,932,934],{"className":50,"code":933,"language":52,"meta":53,"style":53},"counts = {}\n",[55,935,936],{"__ignoreMap":53},[58,937,938,940,942],{"class":60,"line":61},[58,939,232],{"class":64},[58,941,69],{"class":68},[58,943,237],{"class":101},[14,945,946],{},"This dictionary will store each word and its count.",[827,948,950],{"id":949},"_5-loop-through-the-words","5. Loop through the words",[48,952,954],{"className":50,"code":953,"language":52,"meta":53,"style":53},"for word in words:\n    counts[word] = counts.get(word, 0) + 1\n",[55,955,956,968],{"__ignoreMap":53},[58,957,958,960,962,964,966],{"class":60,"line":61},[58,959,244],{"class":243},[58,961,247],{"class":64},[58,963,250],{"class":243},[58,965,253],{"class":64},[58,967,256],{"class":101},[58,969,970,972,974,976,978,980,982,984,986,988,990,992,994,996,998],{"class":60,"line":83},[58,971,262],{"class":64},[58,973,265],{"class":101},[58,975,268],{"class":64},[58,977,271],{"class":101},[58,979,274],{"class":68},[58,981,277],{"class":64},[58,983,102],{"class":101},[58,985,282],{"class":105},[58,987,115],{"class":101},[58,989,268],{"class":105},[58,991,125],{"class":101},[58,993,292],{"class":291},[58,995,295],{"class":101},[58,997,298],{"class":68},[58,999,301],{"class":291},[14,1001,1002],{},"This does the counting:",[21,1004,1005,1011,1017],{},[24,1006,1007,1010],{},[55,1008,1009],{},"counts.get(word, 0)"," gets the current count",[24,1012,1013,1014],{},"If the word is not in the dictionary yet, it uses ",[55,1015,1016],{},"0",[24,1018,1019,1020],{},"Then it adds ",[55,1021,1022],{},"1",[557,1024,607],{"id":1025},"expected-output-1",[48,1027,1029],{"className":50,"code":1028,"language":52,"meta":53,"style":53},"Words: ['python', 'is', 'simple', 'python', 'is', 'useful', 'python', 'is', 'fun']\nWord counts: {'python': 3, 'is': 3, 'simple': 1, 'useful': 1, 'fun': 1}\n",[55,1030,1031,1118],{"__ignoreMap":53},[58,1032,1033,1035,1037,1040,1043,1045,1047,1049,1052,1055,1057,1059,1061,1063,1065,1067,1069,1071,1073,1075,1077,1079,1081,1083,1085,1088,1090,1092,1094,1096,1098,1100,1102,1104,1106,1108,1110,1113,1115],{"class":60,"line":61},[58,1034,628],{"class":64},[58,1036,620],{"class":101},[58,1038,1039],{"class":101}," [",[58,1041,1042],{"class":72},"'",[58,1044,52],{"class":76},[58,1046,1042],{"class":72},[58,1048,125],{"class":101},[58,1050,1051],{"class":72}," '",[58,1053,1054],{"class":76},"is",[58,1056,1042],{"class":72},[58,1058,125],{"class":101},[58,1060,1051],{"class":72},[58,1062,900],{"class":76},[58,1064,1042],{"class":72},[58,1066,125],{"class":101},[58,1068,1051],{"class":72},[58,1070,52],{"class":76},[58,1072,1042],{"class":72},[58,1074,125],{"class":101},[58,1076,1051],{"class":72},[58,1078,1054],{"class":76},[58,1080,1042],{"class":72},[58,1082,125],{"class":101},[58,1084,1051],{"class":72},[58,1086,1087],{"class":76},"useful",[58,1089,1042],{"class":72},[58,1091,125],{"class":101},[58,1093,1051],{"class":72},[58,1095,52],{"class":76},[58,1097,1042],{"class":72},[58,1099,125],{"class":101},[58,1101,1051],{"class":72},[58,1103,1054],{"class":76},[58,1105,1042],{"class":72},[58,1107,125],{"class":101},[58,1109,1051],{"class":72},[58,1111,1112],{"class":76},"fun",[58,1114,1042],{"class":72},[58,1116,1117],{"class":101},"]\n",[58,1119,1120,1123,1125,1128,1130,1132,1134,1136,1139,1141,1143,1145,1147,1149,1151,1153,1155,1157,1159,1161,1164,1166,1168,1170,1172,1174,1176,1178,1180,1182,1184,1186,1188],{"class":60,"line":83},[58,1121,1122],{"class":64},"Word counts",[58,1124,620],{"class":101},[58,1126,1127],{"class":101}," {",[58,1129,1042],{"class":72},[58,1131,52],{"class":76},[58,1133,1042],{"class":72},[58,1135,620],{"class":101},[58,1137,1138],{"class":291}," 3",[58,1140,125],{"class":101},[58,1142,1051],{"class":72},[58,1144,1054],{"class":76},[58,1146,1042],{"class":72},[58,1148,620],{"class":101},[58,1150,1138],{"class":291},[58,1152,125],{"class":101},[58,1154,1051],{"class":72},[58,1156,900],{"class":76},[58,1158,1042],{"class":72},[58,1160,620],{"class":101},[58,1162,1163],{"class":291}," 1",[58,1165,125],{"class":101},[58,1167,1051],{"class":72},[58,1169,1087],{"class":76},[58,1171,1042],{"class":72},[58,1173,620],{"class":101},[58,1175,1163],{"class":291},[58,1177,125],{"class":101},[58,1179,1051],{"class":72},[58,1181,1112],{"class":76},[58,1183,1042],{"class":72},[58,1185,620],{"class":101},[58,1187,1163],{"class":291},[58,1189,1190],{"class":101},"}\n",[40,1192,1194],{"id":1193},"improving-the-script","Improving the script",[14,1196,1197],{},"Once the basic version works, you can make it more useful.",[14,1199,1200],{},"Common improvements:",[21,1202,1203,1206,1209,1212,1218],{},[24,1204,1205],{},"Sort words by frequency",[24,1207,1208],{},"Ignore very common words if needed",[24,1210,1211],{},"Read text from a file instead of using a hardcoded string",[24,1213,1214,1215],{},"Clean more punctuation with ",[55,1216,1217],{},"replace()",[24,1219,1220],{},"Show only the top 5 or top 10 words",[14,1222,1223],{},"Here is a simple version that sorts word counts from highest to lowest:",[48,1225,1227],{"className":50,"code":1226,"language":52,"meta":53,"style":53},"text = \"Python is simple. Python is useful. Python is fun.\"\n\ncleaned_text = text.lower().replace(\".\", \"\")\nwords = cleaned_text.split()\n\ncounts = {}\nfor word in words:\n    counts[word] = counts.get(word, 0) + 1\n\nsorted_counts = sorted(counts.items(), key=lambda item: item[1], reverse=True)\n\nprint(\"Most common words:\")\nfor word, count in sorted_counts:\n    print(word, count)\n",[55,1228,1229,1241,1245,1275,1289,1293,1301,1313,1345,1349,1408,1412,1427,1447],{"__ignoreMap":53},[58,1230,1231,1233,1235,1237,1239],{"class":60,"line":61},[58,1232,65],{"class":64},[58,1234,69],{"class":68},[58,1236,73],{"class":72},[58,1238,669],{"class":76},[58,1240,80],{"class":72},[58,1242,1243],{"class":60,"line":83},[58,1244,87],{"emptyLinePlaceholder":86},[58,1246,1247,1249,1251,1253,1255,1257,1259,1261,1263,1265,1267,1269,1271,1273],{"class":60,"line":90},[58,1248,680],{"class":64},[58,1250,69],{"class":68},[58,1252,98],{"class":64},[58,1254,102],{"class":101},[58,1256,106],{"class":105},[58,1258,109],{"class":101},[58,1260,112],{"class":105},[58,1262,115],{"class":101},[58,1264,118],{"class":72},[58,1266,102],{"class":76},[58,1268,118],{"class":72},[58,1270,125],{"class":101},[58,1272,128],{"class":72},[58,1274,327],{"class":101},[58,1276,1277,1279,1281,1283,1285,1287],{"class":60,"line":140},[58,1278,93],{"class":64},[58,1280,69],{"class":68},[58,1282,715],{"class":64},[58,1284,102],{"class":101},[58,1286,134],{"class":105},[58,1288,137],{"class":101},[58,1290,1291],{"class":60,"line":169},[58,1292,87],{"emptyLinePlaceholder":86},[58,1294,1295,1297,1299],{"class":60,"line":194},[58,1296,232],{"class":64},[58,1298,69],{"class":68},[58,1300,237],{"class":101},[58,1302,1303,1305,1307,1309,1311],{"class":60,"line":224},[58,1304,244],{"class":243},[58,1306,247],{"class":64},[58,1308,250],{"class":243},[58,1310,253],{"class":64},[58,1312,256],{"class":101},[58,1314,1315,1317,1319,1321,1323,1325,1327,1329,1331,1333,1335,1337,1339,1341,1343],{"class":60,"line":229},[58,1316,262],{"class":64},[58,1318,265],{"class":101},[58,1320,268],{"class":64},[58,1322,271],{"class":101},[58,1324,274],{"class":68},[58,1326,277],{"class":64},[58,1328,102],{"class":101},[58,1330,282],{"class":105},[58,1332,115],{"class":101},[58,1334,268],{"class":105},[58,1336,125],{"class":101},[58,1338,292],{"class":291},[58,1340,295],{"class":101},[58,1342,298],{"class":68},[58,1344,301],{"class":291},[58,1346,1347],{"class":60,"line":240},[58,1348,87],{"emptyLinePlaceholder":86},[58,1350,1351,1354,1356,1359,1361,1364,1366,1369,1372,1376,1378,1382,1386,1388,1390,1392,1394,1397,1400,1402,1406],{"class":60,"line":259},[58,1352,1353],{"class":64},"sorted_counts ",[58,1355,69],{"class":68},[58,1357,1358],{"class":143}," sorted",[58,1360,115],{"class":101},[58,1362,1363],{"class":105},"counts",[58,1365,102],{"class":101},[58,1367,1368],{"class":105},"items",[58,1370,1371],{"class":101},"(),",[58,1373,1375],{"class":1374},"s99_P"," key",[58,1377,69],{"class":68},[58,1379,1381],{"class":1380},"sbsja","lambda",[58,1383,1385],{"class":1384},"sFwrP"," item",[58,1387,620],{"class":101},[58,1389,1385],{"class":105},[58,1391,265],{"class":101},[58,1393,1022],{"class":291},[58,1395,1396],{"class":101},"],",[58,1398,1399],{"class":1374}," reverse",[58,1401,69],{"class":68},[58,1403,1405],{"class":1404},"s39Yj","True",[58,1407,327],{"class":101},[58,1409,1410],{"class":60,"line":304},[58,1411,87],{"emptyLinePlaceholder":86},[58,1413,1414,1416,1418,1420,1423,1425],{"class":60,"line":309},[58,1415,144],{"class":143},[58,1417,115],{"class":101},[58,1419,118],{"class":72},[58,1421,1422],{"class":76},"Most common words:",[58,1424,118],{"class":72},[58,1426,327],{"class":101},[58,1428,1430,1432,1435,1437,1440,1442,1445],{"class":60,"line":1429},13,[58,1431,244],{"class":243},[58,1433,1434],{"class":64}," word",[58,1436,125],{"class":101},[58,1438,1439],{"class":64}," count ",[58,1441,250],{"class":243},[58,1443,1444],{"class":64}," sorted_counts",[58,1446,256],{"class":101},[58,1448,1450,1453,1455,1457,1459,1462],{"class":60,"line":1449},14,[58,1451,1452],{"class":143},"    print",[58,1454,115],{"class":101},[58,1456,268],{"class":105},[58,1458,125],{"class":101},[58,1460,1461],{"class":105}," count",[58,1463,327],{"class":101},[557,1465,607],{"id":1466},"expected-output-2",[48,1468,1470],{"className":50,"code":1469,"language":52,"meta":53,"style":53},"Most common words:\npython 3\nis 3\nsimple 1\nuseful 1\nfun 1\n",[55,1471,1472,1479,1487,1493,1501,1508],{"__ignoreMap":53},[58,1473,1474,1477],{"class":60,"line":61},[58,1475,1476],{"class":64},"Most common words",[58,1478,256],{"class":101},[58,1480,1481,1484],{"class":60,"line":83},[58,1482,1483],{"class":64},"python ",[58,1485,1486],{"class":291},"3\n",[58,1488,1489,1491],{"class":60,"line":90},[58,1490,1054],{"class":68},[58,1492,643],{"class":291},[58,1494,1495,1498],{"class":60,"line":140},[58,1496,1497],{"class":64},"simple ",[58,1499,1500],{"class":291},"1\n",[58,1502,1503,1506],{"class":60,"line":169},[58,1504,1505],{"class":64},"useful ",[58,1507,1500],{"class":291},[58,1509,1510,1513],{"class":60,"line":194},[58,1511,1512],{"class":64},"fun ",[58,1514,1500],{"class":291},[14,1516,1517,1518,102],{},"You can also read text from a file and then use the same logic. For that, see ",[356,1519,1521],{"href":1520},"\u002Fhow-to\u002Fhow-to-read-a-file-in-python\u002F","How to read a file in Python",[40,1523,607],{"id":1524},"expected-output-3",[14,1526,1527],{},"Your script should show results like these:",[21,1529,1530,1533,1536,1539],{},[24,1531,1532],{},"Total characters",[24,1534,1535],{},"Total words",[24,1537,1538],{},"Total lines",[24,1540,1541],{},"A dictionary or sorted list of word counts",[14,1543,1544],{},"For example:",[48,1546,1548],{"className":50,"code":1547,"language":52,"meta":53,"style":53},"Characters: 42\nWords: 7\nLines: 1\nWord counts: {'python': 2, 'is': 2, 'simple': 1, 'and': 1, 'useful': 1}\n",[55,1549,1550,1559,1568,1576],{"__ignoreMap":53},[58,1551,1552,1554,1556],{"class":60,"line":61},[58,1553,617],{"class":64},[58,1555,620],{"class":101},[58,1557,1558],{"class":291}," 42\n",[58,1560,1561,1563,1565],{"class":60,"line":83},[58,1562,628],{"class":64},[58,1564,620],{"class":101},[58,1566,1567],{"class":291}," 7\n",[58,1569,1570,1572,1574],{"class":60,"line":90},[58,1571,638],{"class":64},[58,1573,620],{"class":101},[58,1575,301],{"class":291},[58,1577,1578,1580,1582,1584,1586,1588,1590,1592,1595,1597,1599,1601,1603,1605,1607,1609,1611,1613,1615,1617,1619,1621,1623,1626,1628,1630,1632,1634,1636,1638,1640,1642,1644],{"class":60,"line":140},[58,1579,1122],{"class":64},[58,1581,620],{"class":101},[58,1583,1127],{"class":101},[58,1585,1042],{"class":72},[58,1587,52],{"class":76},[58,1589,1042],{"class":72},[58,1591,620],{"class":101},[58,1593,1594],{"class":291}," 2",[58,1596,125],{"class":101},[58,1598,1051],{"class":72},[58,1600,1054],{"class":76},[58,1602,1042],{"class":72},[58,1604,620],{"class":101},[58,1606,1594],{"class":291},[58,1608,125],{"class":101},[58,1610,1051],{"class":72},[58,1612,900],{"class":76},[58,1614,1042],{"class":72},[58,1616,620],{"class":101},[58,1618,1163],{"class":291},[58,1620,125],{"class":101},[58,1622,1051],{"class":72},[58,1624,1625],{"class":76},"and",[58,1627,1042],{"class":72},[58,1629,620],{"class":101},[58,1631,1163],{"class":291},[58,1633,125],{"class":101},[58,1635,1051],{"class":72},[58,1637,1087],{"class":76},[58,1639,1042],{"class":72},[58,1641,620],{"class":101},[58,1643,1163],{"class":291},[58,1645,1190],{"class":101},[40,1647,1649],{"id":1648},"beginner-debugging-tips","Beginner debugging tips",[14,1651,1652],{},"If your result looks wrong, check the data at each step.",[14,1654,1655],{},"Useful debug prints:",[48,1657,1659],{"className":50,"code":1658,"language":52,"meta":53,"style":53},"print(text)\nprint(text.split())\nprint(text.lower())\nprint(words)\nprint(counts)\n",[55,1660,1661,1671,1685,1699,1709],{"__ignoreMap":53},[58,1662,1663,1665,1667,1669],{"class":60,"line":61},[58,1664,144],{"class":143},[58,1666,115],{"class":101},[58,1668,163],{"class":105},[58,1670,327],{"class":101},[58,1672,1673,1675,1677,1679,1681,1683],{"class":60,"line":83},[58,1674,144],{"class":143},[58,1676,115],{"class":101},[58,1678,163],{"class":105},[58,1680,102],{"class":101},[58,1682,134],{"class":105},[58,1684,475],{"class":101},[58,1686,1687,1689,1691,1693,1695,1697],{"class":60,"line":90},[58,1688,144],{"class":143},[58,1690,115],{"class":101},[58,1692,163],{"class":105},[58,1694,102],{"class":101},[58,1696,106],{"class":105},[58,1698,475],{"class":101},[58,1700,1701,1703,1705,1707],{"class":60,"line":140},[58,1702,144],{"class":143},[58,1704,115],{"class":101},[58,1706,189],{"class":105},[58,1708,327],{"class":101},[58,1710,1711,1713,1715,1717],{"class":60,"line":169},[58,1712,144],{"class":143},[58,1714,115],{"class":101},[58,1716,1363],{"class":105},[58,1718,327],{"class":101},[14,1720,1721],{},"These help you see:",[21,1723,1724,1727,1733,1736,1742],{},[24,1725,1726],{},"The original text",[24,1728,1729,1730,1732],{},"How ",[55,1731,380],{}," is breaking the text",[24,1734,1735],{},"Whether lowercase conversion worked",[24,1737,1738,1739,1741],{},"What is inside the ",[55,1740,189],{}," list",[24,1743,1744],{},"Whether the dictionary is counting correctly",[14,1746,1747],{},"Good things to check:",[21,1749,1750,1753,1756,1759,1762],{},[24,1751,1752],{},"Print the cleaned text before counting",[24,1754,1755],{},"Print the words list to confirm the split result",[24,1757,1758],{},"Print the dictionary after the loop",[24,1760,1761],{},"Check punctuation if the counts look strange",[24,1763,1764],{},"Check uppercase and lowercase words if duplicates appear",[14,1766,1767,1768,102],{},"If you want more practice with dictionaries, see ",[356,1769,1771],{"href":1770},"\u002Fhow-to\u002Fhow-to-loop-through-a-dictionary-in-python\u002F","how to loop through a dictionary in Python",[40,1773,1775],{"id":1774},"common-mistakes","Common mistakes",[14,1777,1778],{},"These are some common problems beginners run into:",[21,1780,1781,1784,1787,1796,1799],{},[24,1782,1783],{},"Forgetting to lowercase text before counting words",[24,1785,1786],{},"Not removing punctuation, which creates different versions of the same word",[24,1788,374,1789,1792,1793,1795],{},[55,1790,1791],{},"split(' ')"," instead of ",[55,1794,380],{},", which can behave badly with extra spaces",[24,1797,1798],{},"Trying to count words before converting non-string data to text",[24,1800,1801],{},"Expecting perfect natural language analysis from a simple script",[14,1803,1804],{},"A beginner script like this is great for learning, but it is still simple. Real text analysis usually needs better punctuation handling and more advanced cleaning.",[40,1806,1808],{"id":1807},"faq","FAQ",[557,1810,1812],{"id":1811},"does-this-script-count-punctuation-as-characters","Does this script count punctuation as characters?",[14,1814,1815,1816,1818],{},"Yes. ",[55,1817,567],{}," counts all characters in the string, including spaces and punctuation.",[557,1820,1822,1823,1826],{"id":1821},"why-use-lower-before-counting-words","Why use ",[55,1824,1825],{},"lower()"," before counting words?",[14,1828,1829,1830,852,1832,855],{},"It makes words like ",[55,1831,851],{},[55,1833,52],{},[557,1835,1837],{"id":1836},"can-i-analyze-a-text-file-instead-of-a-string","Can I analyze a text file instead of a string?",[14,1839,1840],{},"Yes. Read the file into a string first, then use the same counting steps.",[557,1842,1844,1845,1847],{"id":1843},"is-split-enough-for-real-text-analysis","Is ",[55,1846,380],{}," enough for real text analysis?",[14,1849,1850],{},"It is enough for a beginner example, but more advanced text analysis needs better text cleaning.",[40,1852,1854],{"id":1853},"see-also","See also",[21,1856,1857,1861,1865,1870,1874,1878],{},[24,1858,1859],{},[356,1860,359],{"href":358},[24,1862,1863],{},[356,1864,399],{"href":398},[24,1866,1867],{},[356,1868,1869],{"href":377},"Python string split() method",[24,1871,1872],{},[356,1873,603],{"href":602},[24,1875,1876],{},[356,1877,1521],{"href":1520},[24,1879,1880],{},[356,1881,1883],{"href":1882},"\u002Fexamples\u002Fpython-word-count-script-example\u002F","Python word count script example",[14,1885,1886],{},"Try the same script with file input next, then extend it so it shows the most common words first.",[1888,1889,1890],"style",{},"html pre.shiki code .su5hD, html code.shiki .su5hD{--shiki-light:#90A4AE;--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .smGrS, html code.shiki .smGrS{--shiki-light:#39ADB5;--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .sjJ54, html code.shiki .sjJ54{--shiki-light:#39ADB5;--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .s_sjI, html code.shiki .s_sjI{--shiki-light:#91B859;--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .sP7_E, html code.shiki .sP7_E{--shiki-light:#39ADB5;--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .slqww, html code.shiki .slqww{--shiki-light:#6182B8;--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .sptTA, html code.shiki .sptTA{--shiki-light:#6182B8;--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .sVHd0, html code.shiki .sVHd0{--shiki-light:#39ADB5;--shiki-light-font-style:italic;--shiki-default:#D73A49;--shiki-default-font-style:inherit;--shiki-dark:#F97583;--shiki-dark-font-style:inherit}html pre.shiki code .srdBf, html code.shiki .srdBf{--shiki-light:#F76D47;--shiki-default:#005CC5;--shiki-dark:#79B8FF}html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .s99_P, html code.shiki .s99_P{--shiki-light:#90A4AE;--shiki-light-font-style:italic;--shiki-default:#E36209;--shiki-default-font-style:inherit;--shiki-dark:#FFAB70;--shiki-dark-font-style:inherit}html pre.shiki code .sbsja, html code.shiki .sbsja{--shiki-light:#9C3EDA;--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .sFwrP, html code.shiki .sFwrP{--shiki-light:#90A4AE;--shiki-light-font-style:italic;--shiki-default:#24292E;--shiki-default-font-style:inherit;--shiki-dark:#E1E4E8;--shiki-dark-font-style:inherit}html pre.shiki code .s39Yj, html code.shiki .s39Yj{--shiki-light:#39ADB5;--shiki-default:#005CC5;--shiki-dark:#79B8FF}",{"title":53,"searchDepth":83,"depth":83,"links":1892},[1893,1894,1895,1896,1900,1904,1907,1908,1909,1910,1918],{"id":42,"depth":83,"text":43},{"id":330,"depth":83,"text":331},{"id":362,"depth":83,"text":363},{"id":402,"depth":83,"text":403,"children":1897},[1898,1899],{"id":559,"depth":90,"text":560},{"id":606,"depth":90,"text":607},{"id":649,"depth":83,"text":650,"children":1901},[1902,1903],{"id":824,"depth":90,"text":825},{"id":1025,"depth":90,"text":607},{"id":1193,"depth":83,"text":1194,"children":1905},[1906],{"id":1466,"depth":90,"text":607},{"id":1524,"depth":83,"text":607},{"id":1648,"depth":83,"text":1649},{"id":1774,"depth":83,"text":1775},{"id":1807,"depth":83,"text":1808,"children":1911},[1912,1913,1915,1916],{"id":1811,"depth":90,"text":1812},{"id":1821,"depth":90,"text":1914},"Why use lower() before counting words?",{"id":1836,"depth":90,"text":1837},{"id":1843,"depth":90,"text":1917},"Is split() enough for real text analysis?",{"id":1853,"depth":83,"text":1854},"Master python text analysis script example in our comprehensive Python beginner guide.","md",{},"\u002Fexamples\u002Fpython-text-analysis-script-example",{"title":5,"description":1919},"examples\u002Fpython-text-analysis-script-example","ogQWoTUqNOu7OCRLullf4PuvgMEY5MxnwjQTfsO74T4",1777585478051]