[{"data":1,"prerenderedAt":2622},["ShallowReactive",2],{"doc-\u002Fexamples\u002Fpython-data-cleaning-script-example":3},{"id":4,"title":5,"body":6,"description":2615,"extension":2616,"meta":2617,"navigation":264,"path":2618,"seo":2619,"stem":2620,"__hash__":2621},"content\u002Fexamples\u002Fpython-data-cleaning-script-example.md","Python Data Cleaning Script Example",{"type":7,"value":8,"toc":2595},"minimark",[9,13,17,20,39,42,47,50,542,546,549,586,590,593,777,780,794,797,801,807,978,981,1002,1013,1020,1024,1030,1068,1071,1080,1083,1086,1103,1106,1113,1119,1123,1126,1177,1179,1188,1194,1197,1208,1212,1218,1252,1254,1280,1289,1332,1334,1357,1360,1371,1389,1393,1396,1778,1781,1803,1806,1810,1813,1969,1972,1988,1992,1995,2015,2018,2363,2375,2379,2382,2398,2401,2483,2486,2521,2525,2530,2533,2537,2542,2546,2552,2556,2559,2563,2591],[10,11,5],"h1",{"id":12},"python-data-cleaning-script-example",[14,15,16],"p",{},"This beginner-friendly example shows one practical Python data cleaning script.",[14,18,19],{},"You will start with messy data stored as a list of dictionaries, then clean it by:",[21,22,23,27,30,33,36],"ul",{},[24,25,26],"li",{},"removing extra spaces",[24,28,29],{},"standardizing text",[24,31,32],{},"handling missing values",[24,34,35],{},"converting strings to numbers",[24,37,38],{},"building a new clean result",[14,40,41],{},"This is a useful pattern when you want to clean data before saving it, analyzing it, or using it in another part of your program.",[43,44,46],"h2",{"id":45},"quick-example","Quick example",[14,48,49],{},"Use this when you want a simple example of cleaning list-of-dictionary data before saving or using it.",[51,52,57],"pre",{"className":53,"code":54,"language":55,"meta":56,"style":56},"language-python shiki shiki-themes material-theme-lighter github-light github-dark","raw_data = [\n    {\"name\": \" Alice \", \"age\": \"25\", \"city\": \"new york\"},\n    {\"name\": \"Bob\", \"age\": \"\", \"city\": \" london \"},\n    {\"name\": \"  Cara\", \"age\": \"31\", \"city\": \"PARIS\"}\n]\n\ncleaned_data = []\n\nfor row in raw_data:\n    name = row[\"name\"].strip()\n    age_text = row[\"age\"].strip()\n    city = row[\"city\"].strip().title()\n\n    if not name:\n        continue\n\n    age = int(age_text) if age_text else None\n\n    cleaned_data.append({\n        \"name\": name,\n        \"age\": age,\n        \"city\": city\n    })\n\nprint(cleaned_data)\n","python","",[58,59,60,77,144,196,253,259,266,277,282,301,331,355,385,390,404,410,415,449,454,469,486,502,516,522,527],"code",{"__ignoreMap":56},[61,62,65,69,73],"span",{"class":63,"line":64},"line",1,[61,66,68],{"class":67},"su5hD","raw_data ",[61,70,72],{"class":71},"smGrS","=",[61,74,76],{"class":75},"sP7_E"," [\n",[61,78,80,83,87,91,93,96,99,102,104,107,109,112,114,116,118,121,123,125,127,130,132,134,136,139,141],{"class":63,"line":79},2,[61,81,82],{"class":75},"    {",[61,84,86],{"class":85},"sjJ54","\"",[61,88,90],{"class":89},"s_sjI","name",[61,92,86],{"class":85},[61,94,95],{"class":75},":",[61,97,98],{"class":85}," \"",[61,100,101],{"class":89}," Alice ",[61,103,86],{"class":85},[61,105,106],{"class":75},",",[61,108,98],{"class":85},[61,110,111],{"class":89},"age",[61,113,86],{"class":85},[61,115,95],{"class":75},[61,117,98],{"class":85},[61,119,120],{"class":89},"25",[61,122,86],{"class":85},[61,124,106],{"class":75},[61,126,98],{"class":85},[61,128,129],{"class":89},"city",[61,131,86],{"class":85},[61,133,95],{"class":75},[61,135,98],{"class":85},[61,137,138],{"class":89},"new york",[61,140,86],{"class":85},[61,142,143],{"class":75},"},\n",[61,145,147,149,151,153,155,157,159,162,164,166,168,170,172,174,177,179,181,183,185,187,189,192,194],{"class":63,"line":146},3,[61,148,82],{"class":75},[61,150,86],{"class":85},[61,152,90],{"class":89},[61,154,86],{"class":85},[61,156,95],{"class":75},[61,158,98],{"class":85},[61,160,161],{"class":89},"Bob",[61,163,86],{"class":85},[61,165,106],{"class":75},[61,167,98],{"class":85},[61,169,111],{"class":89},[61,171,86],{"class":85},[61,173,95],{"class":75},[61,175,176],{"class":85}," \"\"",[61,178,106],{"class":75},[61,180,98],{"class":85},[61,182,129],{"class":89},[61,184,86],{"class":85},[61,186,95],{"class":75},[61,188,98],{"class":85},[61,190,191],{"class":89}," london ",[61,193,86],{"class":85},[61,195,143],{"class":75},[61,197,199,201,203,205,207,209,211,214,216,218,220,222,224,226,228,231,233,235,237,239,241,243,245,248,250],{"class":63,"line":198},4,[61,200,82],{"class":75},[61,202,86],{"class":85},[61,204,90],{"class":89},[61,206,86],{"class":85},[61,208,95],{"class":75},[61,210,98],{"class":85},[61,212,213],{"class":89},"  Cara",[61,215,86],{"class":85},[61,217,106],{"class":75},[61,219,98],{"class":85},[61,221,111],{"class":89},[61,223,86],{"class":85},[61,225,95],{"class":75},[61,227,98],{"class":85},[61,229,230],{"class":89},"31",[61,232,86],{"class":85},[61,234,106],{"class":75},[61,236,98],{"class":85},[61,238,129],{"class":89},[61,240,86],{"class":85},[61,242,95],{"class":75},[61,244,98],{"class":85},[61,246,247],{"class":89},"PARIS",[61,249,86],{"class":85},[61,251,252],{"class":75},"}\n",[61,254,256],{"class":63,"line":255},5,[61,257,258],{"class":75},"]\n",[61,260,262],{"class":63,"line":261},6,[61,263,265],{"emptyLinePlaceholder":264},true,"\n",[61,267,269,272,274],{"class":63,"line":268},7,[61,270,271],{"class":67},"cleaned_data ",[61,273,72],{"class":71},[61,275,276],{"class":75}," []\n",[61,278,280],{"class":63,"line":279},8,[61,281,265],{"emptyLinePlaceholder":264},[61,283,285,289,292,295,298],{"class":63,"line":284},9,[61,286,288],{"class":287},"sVHd0","for",[61,290,291],{"class":67}," row ",[61,293,294],{"class":287},"in",[61,296,297],{"class":67}," raw_data",[61,299,300],{"class":75},":\n",[61,302,304,307,309,312,315,317,319,321,324,328],{"class":63,"line":303},10,[61,305,306],{"class":67},"    name ",[61,308,72],{"class":71},[61,310,311],{"class":67}," row",[61,313,314],{"class":75},"[",[61,316,86],{"class":85},[61,318,90],{"class":89},[61,320,86],{"class":85},[61,322,323],{"class":75},"].",[61,325,327],{"class":326},"slqww","strip",[61,329,330],{"class":75},"()\n",[61,332,334,337,339,341,343,345,347,349,351,353],{"class":63,"line":333},11,[61,335,336],{"class":67},"    age_text ",[61,338,72],{"class":71},[61,340,311],{"class":67},[61,342,314],{"class":75},[61,344,86],{"class":85},[61,346,111],{"class":89},[61,348,86],{"class":85},[61,350,323],{"class":75},[61,352,327],{"class":326},[61,354,330],{"class":75},[61,356,358,361,363,365,367,369,371,373,375,377,380,383],{"class":63,"line":357},12,[61,359,360],{"class":67},"    city ",[61,362,72],{"class":71},[61,364,311],{"class":67},[61,366,314],{"class":75},[61,368,86],{"class":85},[61,370,129],{"class":89},[61,372,86],{"class":85},[61,374,323],{"class":75},[61,376,327],{"class":326},[61,378,379],{"class":75},"().",[61,381,382],{"class":326},"title",[61,384,330],{"class":75},[61,386,388],{"class":63,"line":387},13,[61,389,265],{"emptyLinePlaceholder":264},[61,391,393,396,399,402],{"class":63,"line":392},14,[61,394,395],{"class":287},"    if",[61,397,398],{"class":71}," not",[61,400,401],{"class":67}," name",[61,403,300],{"class":75},[61,405,407],{"class":63,"line":406},15,[61,408,409],{"class":287},"        continue\n",[61,411,413],{"class":63,"line":412},16,[61,414,265],{"emptyLinePlaceholder":264},[61,416,418,421,423,427,430,433,436,439,442,445],{"class":63,"line":417},17,[61,419,420],{"class":67},"    age ",[61,422,72],{"class":71},[61,424,426],{"class":425},"sZMiF"," int",[61,428,429],{"class":75},"(",[61,431,432],{"class":326},"age_text",[61,434,435],{"class":75},")",[61,437,438],{"class":287}," if",[61,440,441],{"class":67}," age_text ",[61,443,444],{"class":287},"else",[61,446,448],{"class":447},"s39Yj"," None\n",[61,450,452],{"class":63,"line":451},18,[61,453,265],{"emptyLinePlaceholder":264},[61,455,457,460,463,466],{"class":63,"line":456},19,[61,458,459],{"class":67},"    cleaned_data",[61,461,462],{"class":75},".",[61,464,465],{"class":326},"append",[61,467,468],{"class":75},"({\n",[61,470,472,475,477,479,481,483],{"class":63,"line":471},20,[61,473,474],{"class":85},"        \"",[61,476,90],{"class":89},[61,478,86],{"class":85},[61,480,95],{"class":75},[61,482,401],{"class":326},[61,484,485],{"class":75},",\n",[61,487,489,491,493,495,497,500],{"class":63,"line":488},21,[61,490,474],{"class":85},[61,492,111],{"class":89},[61,494,86],{"class":85},[61,496,95],{"class":75},[61,498,499],{"class":326}," age",[61,501,485],{"class":75},[61,503,505,507,509,511,513],{"class":63,"line":504},22,[61,506,474],{"class":85},[61,508,129],{"class":89},[61,510,86],{"class":85},[61,512,95],{"class":75},[61,514,515],{"class":326}," city\n",[61,517,519],{"class":63,"line":518},23,[61,520,521],{"class":75},"    })\n",[61,523,525],{"class":63,"line":524},24,[61,526,265],{"emptyLinePlaceholder":264},[61,528,530,534,536,539],{"class":63,"line":529},25,[61,531,533],{"class":532},"sptTA","print",[61,535,429],{"class":75},[61,537,538],{"class":326},"cleaned_data",[61,540,541],{"class":75},")\n",[43,543,545],{"id":544},"what-this-example-does","What this example does",[14,547,548],{},"This script shows a simple data cleaning workflow:",[21,550,551,554,564,570,577,583],{},[24,552,553],{},"Uses a small list of dictionaries as sample messy data",[24,555,556,557],{},"Cleans text values with ",[558,559,561],"a",{"href":560},"\u002Freference\u002Fpython-string-strip-method",[58,562,563],{},"strip()",[24,565,566,567],{},"Standardizes city names with ",[58,568,569],{},"title()",[24,571,572,573,576],{},"Converts age from string to ",[58,574,575],{},"int"," when possible",[24,578,579,580],{},"Keeps missing age as ",[58,581,582],{},"None",[24,584,585],{},"Builds a new cleaned list instead of changing the original",[43,587,589],{"id":588},"the-messy-data-we-start-with","The messy data we start with",[14,591,592],{},"Here is the raw data:",[51,594,596],{"className":53,"code":595,"language":55,"meta":56,"style":56},"raw_data = [\n    {\"name\": \" Alice \", \"age\": \"25\", \"city\": \"new york\"},\n    {\"name\": \"Bob\", \"age\": \"\", \"city\": \" london \"},\n    {\"name\": \"  Cara\", \"age\": \"31\", \"city\": \"PARIS\"}\n]\n\nprint(raw_data)\n",[58,597,598,606,658,706,758,762,766],{"__ignoreMap":56},[61,599,600,602,604],{"class":63,"line":64},[61,601,68],{"class":67},[61,603,72],{"class":71},[61,605,76],{"class":75},[61,607,608,610,612,614,616,618,620,622,624,626,628,630,632,634,636,638,640,642,644,646,648,650,652,654,656],{"class":63,"line":79},[61,609,82],{"class":75},[61,611,86],{"class":85},[61,613,90],{"class":89},[61,615,86],{"class":85},[61,617,95],{"class":75},[61,619,98],{"class":85},[61,621,101],{"class":89},[61,623,86],{"class":85},[61,625,106],{"class":75},[61,627,98],{"class":85},[61,629,111],{"class":89},[61,631,86],{"class":85},[61,633,95],{"class":75},[61,635,98],{"class":85},[61,637,120],{"class":89},[61,639,86],{"class":85},[61,641,106],{"class":75},[61,643,98],{"class":85},[61,645,129],{"class":89},[61,647,86],{"class":85},[61,649,95],{"class":75},[61,651,98],{"class":85},[61,653,138],{"class":89},[61,655,86],{"class":85},[61,657,143],{"class":75},[61,659,660,662,664,666,668,670,672,674,676,678,680,682,684,686,688,690,692,694,696,698,700,702,704],{"class":63,"line":146},[61,661,82],{"class":75},[61,663,86],{"class":85},[61,665,90],{"class":89},[61,667,86],{"class":85},[61,669,95],{"class":75},[61,671,98],{"class":85},[61,673,161],{"class":89},[61,675,86],{"class":85},[61,677,106],{"class":75},[61,679,98],{"class":85},[61,681,111],{"class":89},[61,683,86],{"class":85},[61,685,95],{"class":75},[61,687,176],{"class":85},[61,689,106],{"class":75},[61,691,98],{"class":85},[61,693,129],{"class":89},[61,695,86],{"class":85},[61,697,95],{"class":75},[61,699,98],{"class":85},[61,701,191],{"class":89},[61,703,86],{"class":85},[61,705,143],{"class":75},[61,707,708,710,712,714,716,718,720,722,724,726,728,730,732,734,736,738,740,742,744,746,748,750,752,754,756],{"class":63,"line":198},[61,709,82],{"class":75},[61,711,86],{"class":85},[61,713,90],{"class":89},[61,715,86],{"class":85},[61,717,95],{"class":75},[61,719,98],{"class":85},[61,721,213],{"class":89},[61,723,86],{"class":85},[61,725,106],{"class":75},[61,727,98],{"class":85},[61,729,111],{"class":89},[61,731,86],{"class":85},[61,733,95],{"class":75},[61,735,98],{"class":85},[61,737,230],{"class":89},[61,739,86],{"class":85},[61,741,106],{"class":75},[61,743,98],{"class":85},[61,745,129],{"class":89},[61,747,86],{"class":85},[61,749,95],{"class":75},[61,751,98],{"class":85},[61,753,247],{"class":89},[61,755,86],{"class":85},[61,757,252],{"class":75},[61,759,760],{"class":63,"line":255},[61,761,258],{"class":75},[61,763,764],{"class":63,"line":261},[61,765,265],{"emptyLinePlaceholder":264},[61,767,768,770,772,775],{"class":63,"line":268},[61,769,533],{"class":532},[61,771,429],{"class":75},[61,773,774],{"class":326},"raw_data",[61,776,541],{"class":75},[14,778,779],{},"This data has several common problems:",[21,781,782,785,788,791],{},[24,783,784],{},"Extra spaces around values",[24,786,787],{},"Mixed uppercase and lowercase text",[24,789,790],{},"Missing values stored as empty strings",[24,792,793],{},"Numbers stored as text instead of real numbers",[14,795,796],{},"Real files often contain this kind of messy data, especially when the data comes from user input or CSV files.",[43,798,800],{"id":799},"step-1-remove-extra-whitespace","Step 1: Remove extra whitespace",[14,802,803,804,806],{},"Use ",[58,805,563],{}," on string values to remove spaces at the beginning and end.",[51,808,810],{"className":53,"code":809,"language":55,"meta":56,"style":56},"row = {\"name\": \" Alice \", \"age\": \"25\", \"city\": \" london \"}\n\nname = row[\"name\"].strip()\nage_text = row[\"age\"].strip()\ncity_text = row[\"city\"].strip()\n\nprint(name)\nprint(age_text)\nprint(city_text)\n",[58,811,812,870,874,897,920,943,947,957,967],{"__ignoreMap":56},[61,813,814,817,819,822,824,826,828,830,832,834,836,838,840,842,844,846,848,850,852,854,856,858,860,862,864,866,868],{"class":63,"line":64},[61,815,816],{"class":67},"row ",[61,818,72],{"class":71},[61,820,821],{"class":75}," {",[61,823,86],{"class":85},[61,825,90],{"class":89},[61,827,86],{"class":85},[61,829,95],{"class":75},[61,831,98],{"class":85},[61,833,101],{"class":89},[61,835,86],{"class":85},[61,837,106],{"class":75},[61,839,98],{"class":85},[61,841,111],{"class":89},[61,843,86],{"class":85},[61,845,95],{"class":75},[61,847,98],{"class":85},[61,849,120],{"class":89},[61,851,86],{"class":85},[61,853,106],{"class":75},[61,855,98],{"class":85},[61,857,129],{"class":89},[61,859,86],{"class":85},[61,861,95],{"class":75},[61,863,98],{"class":85},[61,865,191],{"class":89},[61,867,86],{"class":85},[61,869,252],{"class":75},[61,871,872],{"class":63,"line":79},[61,873,265],{"emptyLinePlaceholder":264},[61,875,876,879,881,883,885,887,889,891,893,895],{"class":63,"line":146},[61,877,878],{"class":67},"name ",[61,880,72],{"class":71},[61,882,311],{"class":67},[61,884,314],{"class":75},[61,886,86],{"class":85},[61,888,90],{"class":89},[61,890,86],{"class":85},[61,892,323],{"class":75},[61,894,327],{"class":326},[61,896,330],{"class":75},[61,898,899,902,904,906,908,910,912,914,916,918],{"class":63,"line":198},[61,900,901],{"class":67},"age_text ",[61,903,72],{"class":71},[61,905,311],{"class":67},[61,907,314],{"class":75},[61,909,86],{"class":85},[61,911,111],{"class":89},[61,913,86],{"class":85},[61,915,323],{"class":75},[61,917,327],{"class":326},[61,919,330],{"class":75},[61,921,922,925,927,929,931,933,935,937,939,941],{"class":63,"line":255},[61,923,924],{"class":67},"city_text ",[61,926,72],{"class":71},[61,928,311],{"class":67},[61,930,314],{"class":75},[61,932,86],{"class":85},[61,934,129],{"class":89},[61,936,86],{"class":85},[61,938,323],{"class":75},[61,940,327],{"class":326},[61,942,330],{"class":75},[61,944,945],{"class":63,"line":261},[61,946,265],{"emptyLinePlaceholder":264},[61,948,949,951,953,955],{"class":63,"line":268},[61,950,533],{"class":532},[61,952,429],{"class":75},[61,954,90],{"class":326},[61,956,541],{"class":75},[61,958,959,961,963,965],{"class":63,"line":279},[61,960,533],{"class":532},[61,962,429],{"class":75},[61,964,432],{"class":326},[61,966,541],{"class":75},[61,968,969,971,973,976],{"class":63,"line":284},[61,970,533],{"class":532},[61,972,429],{"class":75},[61,974,975],{"class":326},"city_text",[61,977,541],{"class":75},[14,979,980],{},"Expected output:",[51,982,984],{"className":53,"code":983,"language":55,"meta":56,"style":56},"Alice\n25\nlondon\n",[58,985,986,991,997],{"__ignoreMap":56},[61,987,988],{"class":63,"line":64},[61,989,990],{"class":67},"Alice\n",[61,992,993],{"class":63,"line":79},[61,994,996],{"class":995},"srdBf","25\n",[61,998,999],{"class":63,"line":146},[61,1000,1001],{"class":67},"london\n",[14,1003,1004,1005,1008,1009,1012],{},"Leading and trailing spaces can cause matching problems. For example, ",[58,1006,1007],{},"\"Alice\""," and ",[58,1010,1011],{},"\" Alice \""," look similar, but Python treats them as different strings.",[14,1014,1015,1016,462],{},"If you want a focused guide, see ",[558,1017,1019],{"href":1018},"\u002Fhow-to\u002Fhow-to-remove-whitespace-from-a-string-in-python","how to remove whitespace from a string in Python",[43,1021,1023],{"id":1022},"step-2-standardize-text-values","Step 2: Standardize text values",[14,1025,1026,1027,1029],{},"In this example, we use ",[58,1028,569],{}," for city names.",[51,1031,1033],{"className":53,"code":1032,"language":55,"meta":56,"style":56},"city = \"PARIS\".strip().title()\nprint(city)\n",[58,1034,1035,1058],{"__ignoreMap":56},[61,1036,1037,1040,1042,1044,1046,1048,1050,1052,1054,1056],{"class":63,"line":64},[61,1038,1039],{"class":67},"city ",[61,1041,72],{"class":71},[61,1043,98],{"class":85},[61,1045,247],{"class":89},[61,1047,86],{"class":85},[61,1049,462],{"class":75},[61,1051,327],{"class":326},[61,1053,379],{"class":75},[61,1055,382],{"class":326},[61,1057,330],{"class":75},[61,1059,1060,1062,1064,1066],{"class":63,"line":79},[61,1061,533],{"class":532},[61,1063,429],{"class":75},[61,1065,129],{"class":326},[61,1067,541],{"class":75},[14,1069,1070],{},"Output:",[51,1072,1074],{"className":53,"code":1073,"language":55,"meta":56,"style":56},"Paris\n",[58,1075,1076],{"__ignoreMap":56},[61,1077,1078],{"class":63,"line":64},[61,1079,1073],{"class":67},[14,1081,1082],{},"Standard formatting makes data easier to compare.",[14,1084,1085],{},"For example:",[21,1087,1088,1093,1098],{},[24,1089,1090],{},[58,1091,1092],{},"\"paris\"",[24,1094,1095],{},[58,1096,1097],{},"\"PARIS\"",[24,1099,1100],{},[58,1101,1102],{},"\" Paris \"",[14,1104,1105],{},"can all become:",[21,1107,1108],{},[24,1109,1110],{},[58,1111,1112],{},"\"Paris\"",[14,1114,1115,1116,1118],{},"This simple script uses ",[58,1117,569],{}," because it is easy to understand. In real projects, the best format depends on the data and the rules you want to enforce.",[43,1120,1122],{"id":1121},"step-3-handle-missing-values","Step 3: Handle missing values",[14,1124,1125],{},"Before converting the age, check whether the string is empty.",[51,1127,1129],{"className":53,"code":1128,"language":55,"meta":56,"style":56},"age_text = \"\"\n\nage = int(age_text) if age_text else None\nprint(age)\n",[58,1130,1131,1140,1144,1167],{"__ignoreMap":56},[61,1132,1133,1135,1137],{"class":63,"line":64},[61,1134,901],{"class":67},[61,1136,72],{"class":71},[61,1138,1139],{"class":85}," \"\"\n",[61,1141,1142],{"class":63,"line":79},[61,1143,265],{"emptyLinePlaceholder":264},[61,1145,1146,1149,1151,1153,1155,1157,1159,1161,1163,1165],{"class":63,"line":146},[61,1147,1148],{"class":67},"age ",[61,1150,72],{"class":71},[61,1152,426],{"class":425},[61,1154,429],{"class":75},[61,1156,432],{"class":326},[61,1158,435],{"class":75},[61,1160,438],{"class":287},[61,1162,441],{"class":67},[61,1164,444],{"class":287},[61,1166,448],{"class":447},[61,1168,1169,1171,1173,1175],{"class":63,"line":198},[61,1170,533],{"class":532},[61,1172,429],{"class":75},[61,1174,111],{"class":326},[61,1176,541],{"class":75},[14,1178,1070],{},[51,1180,1182],{"className":53,"code":1181,"language":55,"meta":56,"style":56},"None\n",[58,1183,1184],{"__ignoreMap":56},[61,1185,1186],{"class":63,"line":64},[61,1187,1181],{"class":447},[14,1189,1190,1191,1193],{},"Using ",[58,1192,582],{}," makes it clear that the value is missing.",[14,1195,1196],{},"That is usually better than keeping an empty string for numeric data, because:",[21,1198,1199,1202,1205],{},[24,1200,1201],{},"empty strings are still strings",[24,1203,1204],{},"numbers should be stored as numbers",[24,1206,1207],{},"missing values should be handled intentionally",[43,1209,1211],{"id":1210},"step-4-convert-data-types","Step 4: Convert data types",[14,1213,1214,1215,1217],{},"The ",[58,1216,111],{}," value starts as text:",[51,1219,1221],{"className":53,"code":1220,"language":55,"meta":56,"style":56},"age_text = \"25\"\nprint(type(age_text))\n",[58,1222,1223,1236],{"__ignoreMap":56},[61,1224,1225,1227,1229,1231,1233],{"class":63,"line":64},[61,1226,901],{"class":67},[61,1228,72],{"class":71},[61,1230,98],{"class":85},[61,1232,120],{"class":89},[61,1234,1235],{"class":85},"\"\n",[61,1237,1238,1240,1242,1245,1247,1249],{"class":63,"line":79},[61,1239,533],{"class":532},[61,1241,429],{"class":75},[61,1243,1244],{"class":425},"type",[61,1246,429],{"class":75},[61,1248,432],{"class":326},[61,1250,1251],{"class":75},"))\n",[14,1253,1070],{},[51,1255,1257],{"className":53,"code":1256,"language":55,"meta":56,"style":56},"\u003Cclass 'str'>\n",[58,1258,1259],{"__ignoreMap":56},[61,1260,1261,1264,1268,1271,1274,1277],{"class":63,"line":64},[61,1262,1263],{"class":71},"\u003C",[61,1265,1267],{"class":1266},"sbsja","class",[61,1269,1270],{"class":85}," '",[61,1272,1273],{"class":89},"str",[61,1275,1276],{"class":85},"'",[61,1278,1279],{"class":71},">\n",[14,1281,1282,1283,95],{},"You can convert it to an integer with ",[558,1284,1286],{"href":1285},"\u002Freference\u002Fpython-int-function-explained",[58,1287,1288],{},"int()",[51,1290,1292],{"className":53,"code":1291,"language":55,"meta":56,"style":56},"age = int(age_text)\nprint(age)\nprint(type(age))\n",[58,1293,1294,1308,1318],{"__ignoreMap":56},[61,1295,1296,1298,1300,1302,1304,1306],{"class":63,"line":64},[61,1297,1148],{"class":67},[61,1299,72],{"class":71},[61,1301,426],{"class":425},[61,1303,429],{"class":75},[61,1305,432],{"class":326},[61,1307,541],{"class":75},[61,1309,1310,1312,1314,1316],{"class":63,"line":79},[61,1311,533],{"class":532},[61,1313,429],{"class":75},[61,1315,111],{"class":326},[61,1317,541],{"class":75},[61,1319,1320,1322,1324,1326,1328,1330],{"class":63,"line":146},[61,1321,533],{"class":532},[61,1323,429],{"class":75},[61,1325,1244],{"class":425},[61,1327,429],{"class":75},[61,1329,111],{"class":326},[61,1331,1251],{"class":75},[14,1333,1070],{},[51,1335,1337],{"className":53,"code":1336,"language":55,"meta":56,"style":56},"25\n\u003Cclass 'int'>\n",[58,1338,1339,1343],{"__ignoreMap":56},[61,1340,1341],{"class":63,"line":64},[61,1342,996],{"class":995},[61,1344,1345,1347,1349,1351,1353,1355],{"class":63,"line":79},[61,1346,1263],{"class":71},[61,1348,1267],{"class":1266},[61,1350,1270],{"class":85},[61,1352,575],{"class":89},[61,1354,1276],{"class":85},[61,1356,1279],{"class":71},[14,1358,1359],{},"This matters because numbers should be stored as numbers when you want to:",[21,1361,1362,1365,1368],{},[24,1363,1364],{},"compare values",[24,1366,1367],{},"sort correctly",[24,1369,1370],{},"do math",[14,1372,1373,1374,1376,1377,1380,1381,1008,1385,462],{},"If the text is not numeric, ",[58,1375,1288],{}," will fail with a ",[58,1378,1379],{},"ValueError",". If you need help with that, see ",[558,1382,1384],{"href":1383},"\u002Ferrors\u002Fvalueerror-invalid-literal-for-int-with-base-10-fix","ValueError: invalid literal for int() with base 10",[558,1386,1388],{"href":1387},"\u002Fhow-to\u002Fhow-to-convert-string-to-int-in-python","how to convert string to int in Python",[43,1390,1392],{"id":1391},"step-5-create-the-cleaned-result","Step 5: Create the cleaned result",[14,1394,1395],{},"Now put the steps together into one script.",[51,1397,1398],{"className":53,"code":54,"language":55,"meta":56,"style":56},[58,1399,1400,1408,1460,1508,1560,1564,1568,1576,1580,1592,1614,1636,1662,1666,1676,1680,1684,1706,1710,1720,1734,1748,1760,1764,1768],{"__ignoreMap":56},[61,1401,1402,1404,1406],{"class":63,"line":64},[61,1403,68],{"class":67},[61,1405,72],{"class":71},[61,1407,76],{"class":75},[61,1409,1410,1412,1414,1416,1418,1420,1422,1424,1426,1428,1430,1432,1434,1436,1438,1440,1442,1444,1446,1448,1450,1452,1454,1456,1458],{"class":63,"line":79},[61,1411,82],{"class":75},[61,1413,86],{"class":85},[61,1415,90],{"class":89},[61,1417,86],{"class":85},[61,1419,95],{"class":75},[61,1421,98],{"class":85},[61,1423,101],{"class":89},[61,1425,86],{"class":85},[61,1427,106],{"class":75},[61,1429,98],{"class":85},[61,1431,111],{"class":89},[61,1433,86],{"class":85},[61,1435,95],{"class":75},[61,1437,98],{"class":85},[61,1439,120],{"class":89},[61,1441,86],{"class":85},[61,1443,106],{"class":75},[61,1445,98],{"class":85},[61,1447,129],{"class":89},[61,1449,86],{"class":85},[61,1451,95],{"class":75},[61,1453,98],{"class":85},[61,1455,138],{"class":89},[61,1457,86],{"class":85},[61,1459,143],{"class":75},[61,1461,1462,1464,1466,1468,1470,1472,1474,1476,1478,1480,1482,1484,1486,1488,1490,1492,1494,1496,1498,1500,1502,1504,1506],{"class":63,"line":146},[61,1463,82],{"class":75},[61,1465,86],{"class":85},[61,1467,90],{"class":89},[61,1469,86],{"class":85},[61,1471,95],{"class":75},[61,1473,98],{"class":85},[61,1475,161],{"class":89},[61,1477,86],{"class":85},[61,1479,106],{"class":75},[61,1481,98],{"class":85},[61,1483,111],{"class":89},[61,1485,86],{"class":85},[61,1487,95],{"class":75},[61,1489,176],{"class":85},[61,1491,106],{"class":75},[61,1493,98],{"class":85},[61,1495,129],{"class":89},[61,1497,86],{"class":85},[61,1499,95],{"class":75},[61,1501,98],{"class":85},[61,1503,191],{"class":89},[61,1505,86],{"class":85},[61,1507,143],{"class":75},[61,1509,1510,1512,1514,1516,1518,1520,1522,1524,1526,1528,1530,1532,1534,1536,1538,1540,1542,1544,1546,1548,1550,1552,1554,1556,1558],{"class":63,"line":198},[61,1511,82],{"class":75},[61,1513,86],{"class":85},[61,1515,90],{"class":89},[61,1517,86],{"class":85},[61,1519,95],{"class":75},[61,1521,98],{"class":85},[61,1523,213],{"class":89},[61,1525,86],{"class":85},[61,1527,106],{"class":75},[61,1529,98],{"class":85},[61,1531,111],{"class":89},[61,1533,86],{"class":85},[61,1535,95],{"class":75},[61,1537,98],{"class":85},[61,1539,230],{"class":89},[61,1541,86],{"class":85},[61,1543,106],{"class":75},[61,1545,98],{"class":85},[61,1547,129],{"class":89},[61,1549,86],{"class":85},[61,1551,95],{"class":75},[61,1553,98],{"class":85},[61,1555,247],{"class":89},[61,1557,86],{"class":85},[61,1559,252],{"class":75},[61,1561,1562],{"class":63,"line":255},[61,1563,258],{"class":75},[61,1565,1566],{"class":63,"line":261},[61,1567,265],{"emptyLinePlaceholder":264},[61,1569,1570,1572,1574],{"class":63,"line":268},[61,1571,271],{"class":67},[61,1573,72],{"class":71},[61,1575,276],{"class":75},[61,1577,1578],{"class":63,"line":279},[61,1579,265],{"emptyLinePlaceholder":264},[61,1581,1582,1584,1586,1588,1590],{"class":63,"line":284},[61,1583,288],{"class":287},[61,1585,291],{"class":67},[61,1587,294],{"class":287},[61,1589,297],{"class":67},[61,1591,300],{"class":75},[61,1593,1594,1596,1598,1600,1602,1604,1606,1608,1610,1612],{"class":63,"line":303},[61,1595,306],{"class":67},[61,1597,72],{"class":71},[61,1599,311],{"class":67},[61,1601,314],{"class":75},[61,1603,86],{"class":85},[61,1605,90],{"class":89},[61,1607,86],{"class":85},[61,1609,323],{"class":75},[61,1611,327],{"class":326},[61,1613,330],{"class":75},[61,1615,1616,1618,1620,1622,1624,1626,1628,1630,1632,1634],{"class":63,"line":333},[61,1617,336],{"class":67},[61,1619,72],{"class":71},[61,1621,311],{"class":67},[61,1623,314],{"class":75},[61,1625,86],{"class":85},[61,1627,111],{"class":89},[61,1629,86],{"class":85},[61,1631,323],{"class":75},[61,1633,327],{"class":326},[61,1635,330],{"class":75},[61,1637,1638,1640,1642,1644,1646,1648,1650,1652,1654,1656,1658,1660],{"class":63,"line":357},[61,1639,360],{"class":67},[61,1641,72],{"class":71},[61,1643,311],{"class":67},[61,1645,314],{"class":75},[61,1647,86],{"class":85},[61,1649,129],{"class":89},[61,1651,86],{"class":85},[61,1653,323],{"class":75},[61,1655,327],{"class":326},[61,1657,379],{"class":75},[61,1659,382],{"class":326},[61,1661,330],{"class":75},[61,1663,1664],{"class":63,"line":387},[61,1665,265],{"emptyLinePlaceholder":264},[61,1667,1668,1670,1672,1674],{"class":63,"line":392},[61,1669,395],{"class":287},[61,1671,398],{"class":71},[61,1673,401],{"class":67},[61,1675,300],{"class":75},[61,1677,1678],{"class":63,"line":406},[61,1679,409],{"class":287},[61,1681,1682],{"class":63,"line":412},[61,1683,265],{"emptyLinePlaceholder":264},[61,1685,1686,1688,1690,1692,1694,1696,1698,1700,1702,1704],{"class":63,"line":417},[61,1687,420],{"class":67},[61,1689,72],{"class":71},[61,1691,426],{"class":425},[61,1693,429],{"class":75},[61,1695,432],{"class":326},[61,1697,435],{"class":75},[61,1699,438],{"class":287},[61,1701,441],{"class":67},[61,1703,444],{"class":287},[61,1705,448],{"class":447},[61,1707,1708],{"class":63,"line":451},[61,1709,265],{"emptyLinePlaceholder":264},[61,1711,1712,1714,1716,1718],{"class":63,"line":456},[61,1713,459],{"class":67},[61,1715,462],{"class":75},[61,1717,465],{"class":326},[61,1719,468],{"class":75},[61,1721,1722,1724,1726,1728,1730,1732],{"class":63,"line":471},[61,1723,474],{"class":85},[61,1725,90],{"class":89},[61,1727,86],{"class":85},[61,1729,95],{"class":75},[61,1731,401],{"class":326},[61,1733,485],{"class":75},[61,1735,1736,1738,1740,1742,1744,1746],{"class":63,"line":488},[61,1737,474],{"class":85},[61,1739,111],{"class":89},[61,1741,86],{"class":85},[61,1743,95],{"class":75},[61,1745,499],{"class":326},[61,1747,485],{"class":75},[61,1749,1750,1752,1754,1756,1758],{"class":63,"line":504},[61,1751,474],{"class":85},[61,1753,129],{"class":89},[61,1755,86],{"class":85},[61,1757,95],{"class":75},[61,1759,515],{"class":326},[61,1761,1762],{"class":63,"line":518},[61,1763,521],{"class":75},[61,1765,1766],{"class":63,"line":524},[61,1767,265],{"emptyLinePlaceholder":264},[61,1769,1770,1772,1774,1776],{"class":63,"line":529},[61,1771,533],{"class":532},[61,1773,429],{"class":75},[61,1775,538],{"class":326},[61,1777,541],{"class":75},[14,1779,1780],{},"What this script does:",[21,1782,1783,1786,1789,1795,1798],{},[24,1784,1785],{},"Loops through each row",[24,1787,1788],{},"Cleans each field",[24,1790,1791,1792,1794],{},"Skips rows where ",[58,1793,90],{}," is empty",[24,1796,1797],{},"Converts age when possible",[24,1799,1800,1801],{},"Appends a new cleaned dictionary to ",[58,1802,538],{},[14,1804,1805],{},"Using a new list is safer for beginners because you keep the original data unchanged.",[43,1807,1809],{"id":1808},"expected-output","Expected output",[14,1811,1812],{},"Running the full script prints:",[51,1814,1816],{"className":53,"code":1815,"language":55,"meta":56,"style":56},"[{'name': 'Alice', 'age': 25, 'city': 'New York'}, {'name': 'Bob', 'age': None, 'city': 'London'}, {'name': 'Cara', 'age': 31, 'city': 'Paris'}]\n",[58,1817,1818],{"__ignoreMap":56},[61,1819,1820,1823,1825,1827,1829,1831,1833,1836,1838,1840,1842,1844,1846,1848,1851,1853,1855,1857,1859,1861,1863,1866,1868,1871,1873,1875,1877,1879,1881,1883,1885,1887,1889,1891,1893,1895,1897,1900,1902,1904,1906,1908,1910,1912,1915,1917,1919,1921,1923,1925,1927,1929,1931,1934,1936,1938,1940,1942,1944,1946,1949,1951,1953,1955,1957,1959,1961,1964,1966],{"class":63,"line":64},[61,1821,1822],{"class":75},"[{",[61,1824,1276],{"class":85},[61,1826,90],{"class":89},[61,1828,1276],{"class":85},[61,1830,95],{"class":75},[61,1832,1270],{"class":85},[61,1834,1835],{"class":89},"Alice",[61,1837,1276],{"class":85},[61,1839,106],{"class":75},[61,1841,1270],{"class":85},[61,1843,111],{"class":89},[61,1845,1276],{"class":85},[61,1847,95],{"class":75},[61,1849,1850],{"class":995}," 25",[61,1852,106],{"class":75},[61,1854,1270],{"class":85},[61,1856,129],{"class":89},[61,1858,1276],{"class":85},[61,1860,95],{"class":75},[61,1862,1270],{"class":85},[61,1864,1865],{"class":89},"New York",[61,1867,1276],{"class":85},[61,1869,1870],{"class":75},"},",[61,1872,821],{"class":75},[61,1874,1276],{"class":85},[61,1876,90],{"class":89},[61,1878,1276],{"class":85},[61,1880,95],{"class":75},[61,1882,1270],{"class":85},[61,1884,161],{"class":89},[61,1886,1276],{"class":85},[61,1888,106],{"class":75},[61,1890,1270],{"class":85},[61,1892,111],{"class":89},[61,1894,1276],{"class":85},[61,1896,95],{"class":75},[61,1898,1899],{"class":447}," None",[61,1901,106],{"class":75},[61,1903,1270],{"class":85},[61,1905,129],{"class":89},[61,1907,1276],{"class":85},[61,1909,95],{"class":75},[61,1911,1270],{"class":85},[61,1913,1914],{"class":89},"London",[61,1916,1276],{"class":85},[61,1918,1870],{"class":75},[61,1920,821],{"class":75},[61,1922,1276],{"class":85},[61,1924,90],{"class":89},[61,1926,1276],{"class":85},[61,1928,95],{"class":75},[61,1930,1270],{"class":85},[61,1932,1933],{"class":89},"Cara",[61,1935,1276],{"class":85},[61,1937,106],{"class":75},[61,1939,1270],{"class":85},[61,1941,111],{"class":89},[61,1943,1276],{"class":85},[61,1945,95],{"class":75},[61,1947,1948],{"class":995}," 31",[61,1950,106],{"class":75},[61,1952,1270],{"class":85},[61,1954,129],{"class":89},[61,1956,1276],{"class":85},[61,1958,95],{"class":75},[61,1960,1270],{"class":85},[61,1962,1963],{"class":89},"Paris",[61,1965,1276],{"class":85},[61,1967,1968],{"class":75},"}]\n",[14,1970,1971],{},"Notice what changed:",[21,1973,1974,1977,1980,1983],{},[24,1975,1976],{},"Extra spaces were removed",[24,1978,1979],{},"City names were standardized",[24,1981,1982],{},"Age values were converted from strings to integers",[24,1984,1985,1986],{},"Missing age became ",[58,1987,582],{},[43,1989,1991],{"id":1990},"useful-improvements-for-real-projects","Useful improvements for real projects",[14,1993,1994],{},"This example is intentionally simple. In real projects, you may also want to:",[21,1996,1997,2000,2006,2009,2012],{},[24,1998,1999],{},"Skip rows with missing required fields",[24,2001,803,2002,2005],{},[58,2003,2004],{},"try-except"," when converting numbers",[24,2007,2008],{},"Read messy data from CSV files",[24,2010,2011],{},"Write cleaned data back to a file",[24,2013,2014],{},"Move cleaning steps into a function",[14,2016,2017],{},"For example, if your age values might contain invalid text, you can make the conversion safer:",[51,2019,2021],{"className":53,"code":2020,"language":55,"meta":56,"style":56},"raw_data = [\n    {\"name\": \"Alice\", \"age\": \"25\", \"city\": \"new york\"},\n    {\"name\": \"Bob\", \"age\": \"unknown\", \"city\": \"london\"}\n]\n\ncleaned_data = []\n\nfor row in raw_data:\n    name = row[\"name\"].strip()\n    age_text = row[\"age\"].strip()\n    city = row[\"city\"].strip().title()\n\n    try:\n        age = int(age_text) if age_text else None\n    except ValueError:\n        age = None\n\n    cleaned_data.append({\n        \"name\": name,\n        \"age\": age,\n        \"city\": city\n    })\n\nprint(cleaned_data)\n",[58,2022,2023,2031,2083,2137,2141,2145,2153,2157,2169,2191,2213,2239,2243,2250,2273,2283,2291,2295,2305,2319,2333,2345,2349,2353],{"__ignoreMap":56},[61,2024,2025,2027,2029],{"class":63,"line":64},[61,2026,68],{"class":67},[61,2028,72],{"class":71},[61,2030,76],{"class":75},[61,2032,2033,2035,2037,2039,2041,2043,2045,2047,2049,2051,2053,2055,2057,2059,2061,2063,2065,2067,2069,2071,2073,2075,2077,2079,2081],{"class":63,"line":79},[61,2034,82],{"class":75},[61,2036,86],{"class":85},[61,2038,90],{"class":89},[61,2040,86],{"class":85},[61,2042,95],{"class":75},[61,2044,98],{"class":85},[61,2046,1835],{"class":89},[61,2048,86],{"class":85},[61,2050,106],{"class":75},[61,2052,98],{"class":85},[61,2054,111],{"class":89},[61,2056,86],{"class":85},[61,2058,95],{"class":75},[61,2060,98],{"class":85},[61,2062,120],{"class":89},[61,2064,86],{"class":85},[61,2066,106],{"class":75},[61,2068,98],{"class":85},[61,2070,129],{"class":89},[61,2072,86],{"class":85},[61,2074,95],{"class":75},[61,2076,98],{"class":85},[61,2078,138],{"class":89},[61,2080,86],{"class":85},[61,2082,143],{"class":75},[61,2084,2085,2087,2089,2091,2093,2095,2097,2099,2101,2103,2105,2107,2109,2111,2113,2116,2118,2120,2122,2124,2126,2128,2130,2133,2135],{"class":63,"line":146},[61,2086,82],{"class":75},[61,2088,86],{"class":85},[61,2090,90],{"class":89},[61,2092,86],{"class":85},[61,2094,95],{"class":75},[61,2096,98],{"class":85},[61,2098,161],{"class":89},[61,2100,86],{"class":85},[61,2102,106],{"class":75},[61,2104,98],{"class":85},[61,2106,111],{"class":89},[61,2108,86],{"class":85},[61,2110,95],{"class":75},[61,2112,98],{"class":85},[61,2114,2115],{"class":89},"unknown",[61,2117,86],{"class":85},[61,2119,106],{"class":75},[61,2121,98],{"class":85},[61,2123,129],{"class":89},[61,2125,86],{"class":85},[61,2127,95],{"class":75},[61,2129,98],{"class":85},[61,2131,2132],{"class":89},"london",[61,2134,86],{"class":85},[61,2136,252],{"class":75},[61,2138,2139],{"class":63,"line":198},[61,2140,258],{"class":75},[61,2142,2143],{"class":63,"line":255},[61,2144,265],{"emptyLinePlaceholder":264},[61,2146,2147,2149,2151],{"class":63,"line":261},[61,2148,271],{"class":67},[61,2150,72],{"class":71},[61,2152,276],{"class":75},[61,2154,2155],{"class":63,"line":268},[61,2156,265],{"emptyLinePlaceholder":264},[61,2158,2159,2161,2163,2165,2167],{"class":63,"line":279},[61,2160,288],{"class":287},[61,2162,291],{"class":67},[61,2164,294],{"class":287},[61,2166,297],{"class":67},[61,2168,300],{"class":75},[61,2170,2171,2173,2175,2177,2179,2181,2183,2185,2187,2189],{"class":63,"line":284},[61,2172,306],{"class":67},[61,2174,72],{"class":71},[61,2176,311],{"class":67},[61,2178,314],{"class":75},[61,2180,86],{"class":85},[61,2182,90],{"class":89},[61,2184,86],{"class":85},[61,2186,323],{"class":75},[61,2188,327],{"class":326},[61,2190,330],{"class":75},[61,2192,2193,2195,2197,2199,2201,2203,2205,2207,2209,2211],{"class":63,"line":303},[61,2194,336],{"class":67},[61,2196,72],{"class":71},[61,2198,311],{"class":67},[61,2200,314],{"class":75},[61,2202,86],{"class":85},[61,2204,111],{"class":89},[61,2206,86],{"class":85},[61,2208,323],{"class":75},[61,2210,327],{"class":326},[61,2212,330],{"class":75},[61,2214,2215,2217,2219,2221,2223,2225,2227,2229,2231,2233,2235,2237],{"class":63,"line":333},[61,2216,360],{"class":67},[61,2218,72],{"class":71},[61,2220,311],{"class":67},[61,2222,314],{"class":75},[61,2224,86],{"class":85},[61,2226,129],{"class":89},[61,2228,86],{"class":85},[61,2230,323],{"class":75},[61,2232,327],{"class":326},[61,2234,379],{"class":75},[61,2236,382],{"class":326},[61,2238,330],{"class":75},[61,2240,2241],{"class":63,"line":357},[61,2242,265],{"emptyLinePlaceholder":264},[61,2244,2245,2248],{"class":63,"line":387},[61,2246,2247],{"class":287},"    try",[61,2249,300],{"class":75},[61,2251,2252,2255,2257,2259,2261,2263,2265,2267,2269,2271],{"class":63,"line":392},[61,2253,2254],{"class":67},"        age ",[61,2256,72],{"class":71},[61,2258,426],{"class":425},[61,2260,429],{"class":75},[61,2262,432],{"class":326},[61,2264,435],{"class":75},[61,2266,438],{"class":287},[61,2268,441],{"class":67},[61,2270,444],{"class":287},[61,2272,448],{"class":447},[61,2274,2275,2278,2281],{"class":63,"line":406},[61,2276,2277],{"class":287},"    except",[61,2279,2280],{"class":425}," ValueError",[61,2282,300],{"class":75},[61,2284,2285,2287,2289],{"class":63,"line":412},[61,2286,2254],{"class":67},[61,2288,72],{"class":71},[61,2290,448],{"class":447},[61,2292,2293],{"class":63,"line":417},[61,2294,265],{"emptyLinePlaceholder":264},[61,2296,2297,2299,2301,2303],{"class":63,"line":451},[61,2298,459],{"class":67},[61,2300,462],{"class":75},[61,2302,465],{"class":326},[61,2304,468],{"class":75},[61,2306,2307,2309,2311,2313,2315,2317],{"class":63,"line":456},[61,2308,474],{"class":85},[61,2310,90],{"class":89},[61,2312,86],{"class":85},[61,2314,95],{"class":75},[61,2316,401],{"class":326},[61,2318,485],{"class":75},[61,2320,2321,2323,2325,2327,2329,2331],{"class":63,"line":471},[61,2322,474],{"class":85},[61,2324,111],{"class":89},[61,2326,86],{"class":85},[61,2328,95],{"class":75},[61,2330,499],{"class":326},[61,2332,485],{"class":75},[61,2334,2335,2337,2339,2341,2343],{"class":63,"line":488},[61,2336,474],{"class":85},[61,2338,129],{"class":89},[61,2340,86],{"class":85},[61,2342,95],{"class":75},[61,2344,515],{"class":326},[61,2346,2347],{"class":63,"line":504},[61,2348,521],{"class":75},[61,2350,2351],{"class":63,"line":518},[61,2352,265],{"emptyLinePlaceholder":264},[61,2354,2355,2357,2359,2361],{"class":63,"line":524},[61,2356,533],{"class":532},[61,2358,429],{"class":75},[61,2360,538],{"class":326},[61,2362,541],{"class":75},[14,2364,2365,2366,2370,2371,462],{},"If your data comes from a file, the next practical step is to ",[558,2367,2369],{"href":2368},"\u002Fhow-to\u002Fhow-to-read-a-csv-file-in-python","read a CSV file in Python",", clean each row, and then ",[558,2372,2374],{"href":2373},"\u002Fhow-to\u002Fhow-to-write-a-csv-file-in-python","write a CSV file in Python",[43,2376,2378],{"id":2377},"common-mistakes","Common mistakes",[14,2380,2381],{},"These are common causes of messy data problems:",[21,2383,2384,2387,2390,2393,2395],{},[24,2385,2386],{},"Extra spaces in user input or file data",[24,2388,2389],{},"Numbers stored as strings",[24,2391,2392],{},"Empty strings used instead of missing values",[24,2394,787],{},[24,2396,2397],{},"Assuming every row has valid data",[14,2399,2400],{},"If your script is not working, these quick debug prints can help:",[51,2402,2404],{"className":53,"code":2403,"language":55,"meta":56,"style":56},"print(raw_data)\nprint(row)\nprint(type(row['age']))\nprint(repr(row['name']))\nprint(cleaned_data)\n",[58,2405,2406,2416,2427,2450,2473],{"__ignoreMap":56},[61,2407,2408,2410,2412,2414],{"class":63,"line":64},[61,2409,533],{"class":532},[61,2411,429],{"class":75},[61,2413,774],{"class":326},[61,2415,541],{"class":75},[61,2417,2418,2420,2422,2425],{"class":63,"line":79},[61,2419,533],{"class":532},[61,2421,429],{"class":75},[61,2423,2424],{"class":326},"row",[61,2426,541],{"class":75},[61,2428,2429,2431,2433,2435,2437,2439,2441,2443,2445,2447],{"class":63,"line":146},[61,2430,533],{"class":532},[61,2432,429],{"class":75},[61,2434,1244],{"class":425},[61,2436,429],{"class":75},[61,2438,2424],{"class":326},[61,2440,314],{"class":75},[61,2442,1276],{"class":85},[61,2444,111],{"class":89},[61,2446,1276],{"class":85},[61,2448,2449],{"class":75},"]))\n",[61,2451,2452,2454,2456,2459,2461,2463,2465,2467,2469,2471],{"class":63,"line":198},[61,2453,533],{"class":532},[61,2455,429],{"class":75},[61,2457,2458],{"class":532},"repr",[61,2460,429],{"class":75},[61,2462,2424],{"class":326},[61,2464,314],{"class":75},[61,2466,1276],{"class":85},[61,2468,90],{"class":89},[61,2470,1276],{"class":85},[61,2472,2449],{"class":75},[61,2474,2475,2477,2479,2481],{"class":63,"line":255},[61,2476,533],{"class":532},[61,2478,429],{"class":75},[61,2480,538],{"class":326},[61,2482,541],{"class":75},[14,2484,2485],{},"Why these help:",[21,2487,2488,2494,2500,2509,2515],{},[24,2489,2490,2493],{},[58,2491,2492],{},"print(raw_data)"," shows the full original data",[24,2495,2496,2499],{},[58,2497,2498],{},"print(row)"," shows the current row being processed",[24,2501,2502,2505,2506,2508],{},[58,2503,2504],{},"print(type(row['age']))"," confirms whether ",[58,2507,111],{}," is a string or number",[24,2510,2511,2514],{},[58,2512,2513],{},"print(repr(row['name']))"," makes hidden spaces visible",[24,2516,2517,2520],{},[58,2518,2519],{},"print(cleaned_data)"," shows the result after cleaning",[43,2522,2524],{"id":2523},"faq","FAQ",[2526,2527,2529],"h3",{"id":2528},"why-use-a-new-cleaned-list-instead-of-changing-the-old-one","Why use a new cleaned list instead of changing the old one?",[14,2531,2532],{},"It is safer for beginners. You keep the original data and can compare before and after.",[2526,2534,2536],{"id":2535},"why-is-none-used-for-missing-values","Why is None used for missing values?",[14,2538,2539,2541],{},[58,2540,582],{}," clearly means no value. It is better than keeping an empty string for numeric data.",[2526,2543,2545],{"id":2544},"what-if-int-fails-during-cleaning","What if int() fails during cleaning?",[14,2547,2548,2549,2551],{},"Use a check first or wrap the conversion in ",[58,2550,2004],{}," if the input may contain invalid numbers.",[2526,2553,2555],{"id":2554},"can-i-use-this-script-with-csv-data","Can I use this script with CSV data?",[14,2557,2558],{},"Yes. The same cleaning steps work after reading rows from a CSV file into dictionaries.",[43,2560,2562],{"id":2561},"see-also","See also",[21,2564,2565,2570,2575,2580,2585],{},[24,2566,2567],{},[558,2568,2569],{"href":2368},"How to read a CSV file in Python",[24,2571,2572],{},[558,2573,2574],{"href":2373},"How to write a CSV file in Python",[24,2576,2577],{},[558,2578,2579],{"href":560},"Python string strip() method",[24,2581,2582],{},[558,2583,2584],{"href":1285},"Python int() function explained",[24,2586,2587],{},[558,2588,2590],{"href":2589},"\u002Fexamples\u002Fpython-remove-duplicates-from-data-example","Python remove duplicates from data example",[2592,2593,2594],"style",{},"html pre.shiki code .su5hD, html code.shiki .su5hD{--shiki-light:#90A4AE;--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .smGrS, html code.shiki .smGrS{--shiki-light:#39ADB5;--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .sP7_E, html code.shiki .sP7_E{--shiki-light:#39ADB5;--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .sjJ54, html code.shiki .sjJ54{--shiki-light:#39ADB5;--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .s_sjI, html code.shiki .s_sjI{--shiki-light:#91B859;--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .sVHd0, html code.shiki .sVHd0{--shiki-light:#39ADB5;--shiki-light-font-style:italic;--shiki-default:#D73A49;--shiki-default-font-style:inherit;--shiki-dark:#F97583;--shiki-dark-font-style:inherit}html pre.shiki code .slqww, html code.shiki .slqww{--shiki-light:#6182B8;--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .sZMiF, html code.shiki .sZMiF{--shiki-light:#E2931D;--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .s39Yj, html code.shiki .s39Yj{--shiki-light:#39ADB5;--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .sptTA, html code.shiki .sptTA{--shiki-light:#6182B8;--shiki-default:#005CC5;--shiki-dark:#79B8FF}html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .srdBf, html code.shiki .srdBf{--shiki-light:#F76D47;--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .sbsja, html code.shiki .sbsja{--shiki-light:#9C3EDA;--shiki-default:#D73A49;--shiki-dark:#F97583}",{"title":56,"searchDepth":79,"depth":79,"links":2596},[2597,2598,2599,2600,2601,2602,2603,2604,2605,2606,2607,2608,2614],{"id":45,"depth":79,"text":46},{"id":544,"depth":79,"text":545},{"id":588,"depth":79,"text":589},{"id":799,"depth":79,"text":800},{"id":1022,"depth":79,"text":1023},{"id":1121,"depth":79,"text":1122},{"id":1210,"depth":79,"text":1211},{"id":1391,"depth":79,"text":1392},{"id":1808,"depth":79,"text":1809},{"id":1990,"depth":79,"text":1991},{"id":2377,"depth":79,"text":2378},{"id":2523,"depth":79,"text":2524,"children":2609},[2610,2611,2612,2613],{"id":2528,"depth":146,"text":2529},{"id":2535,"depth":146,"text":2536},{"id":2544,"depth":146,"text":2545},{"id":2554,"depth":146,"text":2555},{"id":2561,"depth":79,"text":2562},"Master python data cleaning script example in our comprehensive Python beginner guide.","md",{},"\u002Fexamples\u002Fpython-data-cleaning-script-example",{"title":5,"description":2615},"examples\u002Fpython-data-cleaning-script-example","y-fF0jR133sNWpKVejFUZUHNR_Q1li2ozmul6FHMG04",1777585474998]