[{"data":1,"prerenderedAt":1735},["ShallowReactive",2],{"doc-\u002Fexamples\u002Fpython-simple-web-scraper-for-titles-example":3},{"id":4,"title":5,"body":6,"description":1728,"extension":1729,"meta":1730,"navigation":84,"path":1731,"seo":1732,"stem":1733,"__hash__":1734},"content\u002Fexamples\u002Fpython-simple-web-scraper-for-titles-example.md","Python Simple Web Scraper for Titles Example",{"type":7,"value":8,"toc":1701},"minimark",[9,13,17,26,31,43,204,207,229,232,241,245,248,262,265,269,272,293,296,312,316,323,340,346,362,367,373,384,388,391,530,535,569,575,583,587,590,604,607,844,848,868,879,883,886,1206,1209,1293,1296,1300,1303,1316,1323,1351,1354,1357,1371,1378,1382,1385,1410,1413,1445,1448,1482,1486,1491,1506,1512,1515,1538,1545,1548,1566,1569,1586,1590,1593,1609,1615,1619,1629,1632,1636,1642,1646,1649,1653,1660,1664,1667,1671,1697],[10,11,5],"h1",{"id":12},"python-simple-web-scraper-for-titles-example",[14,15,16],"p",{},"Build a small Python script that downloads a web page and extracts page titles. This example is meant to be practical, short, and easy to run step by step.",[14,18,19,20,25],{},"If you are new to packages, see ",[21,22,24],"a",{"href":23},"\u002Fhow-to\u002Fhow-to-install-a-python-package-with-pip\u002F","how to install a Python package with pip",".",[27,28,30],"h2",{"id":29},"quick-example","Quick example",[14,32,33,34,38,39,42],{},"This is the fastest working example. It assumes ",[35,36,37],"code",{},"requests"," and ",[35,40,41],{},"beautifulsoup4"," are installed.",[44,45,50],"pre",{"className":46,"code":47,"language":48,"meta":49,"style":49},"language-python shiki shiki-themes material-theme-lighter github-light github-dark","import requests\nfrom bs4 import BeautifulSoup\n\nurl = \"https:\u002F\u002Fexample.com\"\nresponse = requests.get(url)\nsoup = BeautifulSoup(response.text, \"html.parser\")\n\nprint(soup.title.string.strip())\n","python","",[35,51,52,65,79,86,107,134,169,174],{"__ignoreMap":49},[53,54,57,61],"span",{"class":55,"line":56},"line",1,[53,58,60],{"class":59},"sVHd0","import",[53,62,64],{"class":63},"su5hD"," requests\n",[53,66,68,71,74,76],{"class":55,"line":67},2,[53,69,70],{"class":59},"from",[53,72,73],{"class":63}," bs4 ",[53,75,60],{"class":59},[53,77,78],{"class":63}," BeautifulSoup\n",[53,80,82],{"class":55,"line":81},3,[53,83,85],{"emptyLinePlaceholder":84},true,"\n",[53,87,89,92,96,100,104],{"class":55,"line":88},4,[53,90,91],{"class":63},"url ",[53,93,95],{"class":94},"smGrS","=",[53,97,99],{"class":98},"sjJ54"," \"",[53,101,103],{"class":102},"s_sjI","https:\u002F\u002Fexample.com",[53,105,106],{"class":98},"\"\n",[53,108,110,113,115,118,121,125,128,131],{"class":55,"line":109},5,[53,111,112],{"class":63},"response ",[53,114,95],{"class":94},[53,116,117],{"class":63}," requests",[53,119,25],{"class":120},"sP7_E",[53,122,124],{"class":123},"slqww","get",[53,126,127],{"class":120},"(",[53,129,130],{"class":123},"url",[53,132,133],{"class":120},")\n",[53,135,137,140,142,145,147,150,152,156,159,161,164,167],{"class":55,"line":136},6,[53,138,139],{"class":63},"soup ",[53,141,95],{"class":94},[53,143,144],{"class":123}," BeautifulSoup",[53,146,127],{"class":120},[53,148,149],{"class":123},"response",[53,151,25],{"class":120},[53,153,155],{"class":154},"skxfh","text",[53,157,158],{"class":120},",",[53,160,99],{"class":98},[53,162,163],{"class":102},"html.parser",[53,165,166],{"class":98},"\"",[53,168,133],{"class":120},[53,170,172],{"class":55,"line":171},7,[53,173,85],{"emptyLinePlaceholder":84},[53,175,177,181,183,186,188,191,193,196,198,201],{"class":55,"line":176},8,[53,178,180],{"class":179},"sptTA","print",[53,182,127],{"class":120},[53,184,185],{"class":123},"soup",[53,187,25],{"class":120},[53,189,190],{"class":154},"title",[53,192,25],{"class":120},[53,194,195],{"class":154},"string",[53,197,25],{"class":120},[53,199,200],{"class":123},"strip",[53,202,203],{"class":120},"())\n",[14,205,206],{},"What it does:",[208,209,210,216,219,226],"ul",{},[211,212,213,214],"li",{},"Downloads the page at ",[35,215,103],{},[211,217,218],{},"Parses the HTML",[211,220,221,222,225],{},"Finds the ",[35,223,224],{},"\u003Ctitle>"," tag",[211,227,228],{},"Prints the title text",[14,230,231],{},"Expected output:",[44,233,235],{"className":46,"code":234,"language":48,"meta":49,"style":49},"Example Domain\n",[35,236,237],{"__ignoreMap":49},[53,238,239],{"class":55,"line":56},[53,240,234],{"class":63},[27,242,244],{"id":243},"what-this-example-does","What this example does",[14,246,247],{},"This script does four simple things:",[208,249,250,253,256,260],{},[211,251,252],{},"Downloads HTML from a web page",[211,254,255],{},"Parses the HTML with BeautifulSoup",[211,257,221,258,225],{},[35,259,224],{},[211,261,228],{},[14,263,264],{},"This is a good beginner scraping example because it focuses on one small task.",[27,266,268],{"id":267},"what-you-need-before-running-it","What you need before running it",[14,270,271],{},"Before you run the script, make sure you have:",[208,273,274,277,280,286,290],{},[211,275,276],{},"Python installed",[211,278,279],{},"Basic understanding of running a Python script",[211,281,282,283,285],{},"The ",[35,284,37],{}," package installed",[211,287,282,288,285],{},[35,289,41],{},[211,291,292],{},"An internet connection for live pages",[14,294,295],{},"You can check your Python version with:",[44,297,301],{"className":298,"code":299,"language":300,"meta":49,"style":49},"language-bash shiki shiki-themes material-theme-lighter github-light github-dark","python --version\n","bash",[35,302,303],{"__ignoreMap":49},[53,304,305,308],{"class":55,"line":56},[53,306,48],{"class":307},"sbgvK",[53,309,311],{"class":310},"stzsN"," --version\n",[27,313,315],{"id":314},"install-the-required-packages","Install the required packages",[14,317,318,319,322],{},"Install the required packages with ",[35,320,321],{},"pip",":",[44,324,326],{"className":298,"code":325,"language":300,"meta":49,"style":49},"pip install requests beautifulsoup4\n",[35,327,328],{"__ignoreMap":49},[53,329,330,332,335,337],{"class":55,"line":56},[53,331,321],{"class":307},[53,333,334],{"class":102}," install",[53,336,117],{"class":102},[53,338,339],{"class":102}," beautifulsoup4\n",[14,341,342,343,345],{},"If ",[35,344,321],{}," gives you errors, the problem is usually:",[208,347,348,353,356],{},[211,349,350,352],{},[35,351,321],{}," is not installed",[211,354,355],{},"You are using the wrong Python environment",[211,357,358,359,361],{},"Python and ",[35,360,321],{}," point to different installations",[14,363,364,365,25],{},"If needed, read ",[21,366,24],{"href":23},[14,368,369,370,372],{},"You can also check ",[35,371,321],{}," with:",[44,374,376],{"className":298,"code":375,"language":300,"meta":49,"style":49},"pip --version\n",[35,377,378],{"__ignoreMap":49},[53,379,380,382],{"class":55,"line":56},[53,381,321],{"class":307},[53,383,311],{"class":310},[27,385,387],{"id":386},"minimal-example-get-one-page-title","Minimal example: get one page title",[14,389,390],{},"Here is a simple version with the main steps shown clearly.",[44,392,394],{"className":46,"code":393,"language":48,"meta":49,"style":49},"import requests\nfrom bs4 import BeautifulSoup\n\nurl = \"https:\u002F\u002Fexample.com\"\n\nresponse = requests.get(url)\nhtml = response.text\n\nsoup = BeautifulSoup(html, \"html.parser\")\n\ntitle_text = soup.title.string\nprint(title_text)\n",[35,395,396,402,412,416,428,432,450,465,469,493,498,518],{"__ignoreMap":49},[53,397,398,400],{"class":55,"line":56},[53,399,60],{"class":59},[53,401,64],{"class":63},[53,403,404,406,408,410],{"class":55,"line":67},[53,405,70],{"class":59},[53,407,73],{"class":63},[53,409,60],{"class":59},[53,411,78],{"class":63},[53,413,414],{"class":55,"line":81},[53,415,85],{"emptyLinePlaceholder":84},[53,417,418,420,422,424,426],{"class":55,"line":88},[53,419,91],{"class":63},[53,421,95],{"class":94},[53,423,99],{"class":98},[53,425,103],{"class":102},[53,427,106],{"class":98},[53,429,430],{"class":55,"line":109},[53,431,85],{"emptyLinePlaceholder":84},[53,433,434,436,438,440,442,444,446,448],{"class":55,"line":136},[53,435,112],{"class":63},[53,437,95],{"class":94},[53,439,117],{"class":63},[53,441,25],{"class":120},[53,443,124],{"class":123},[53,445,127],{"class":120},[53,447,130],{"class":123},[53,449,133],{"class":120},[53,451,452,455,457,460,462],{"class":55,"line":171},[53,453,454],{"class":63},"html ",[53,456,95],{"class":94},[53,458,459],{"class":63}," response",[53,461,25],{"class":120},[53,463,464],{"class":154},"text\n",[53,466,467],{"class":55,"line":176},[53,468,85],{"emptyLinePlaceholder":84},[53,470,472,474,476,478,480,483,485,487,489,491],{"class":55,"line":471},9,[53,473,139],{"class":63},[53,475,95],{"class":94},[53,477,144],{"class":123},[53,479,127],{"class":120},[53,481,482],{"class":123},"html",[53,484,158],{"class":120},[53,486,99],{"class":98},[53,488,163],{"class":102},[53,490,166],{"class":98},[53,492,133],{"class":120},[53,494,496],{"class":55,"line":495},10,[53,497,85],{"emptyLinePlaceholder":84},[53,499,501,504,506,509,511,513,515],{"class":55,"line":500},11,[53,502,503],{"class":63},"title_text ",[53,505,95],{"class":94},[53,507,508],{"class":63}," soup",[53,510,25],{"class":120},[53,512,190],{"class":154},[53,514,25],{"class":120},[53,516,517],{"class":154},"string\n",[53,519,521,523,525,528],{"class":55,"line":520},12,[53,522,180],{"class":179},[53,524,127],{"class":120},[53,526,527],{"class":123},"title_text",[53,529,133],{"class":120},[531,532,534],"h3",{"id":533},"how-this-code-works","How this code works",[208,536,537,543,549,555,563],{},[211,538,539,542],{},[35,540,541],{},"requests.get(url)"," fetches the page",[211,544,545,548],{},[35,546,547],{},"response.text"," gives you the HTML as a string",[211,550,551,554],{},[35,552,553],{},"BeautifulSoup(html, \"html.parser\")"," parses the HTML",[211,556,557,560,561,225],{},[35,558,559],{},"soup.title"," finds the ",[35,562,224],{},[211,564,565,568],{},[35,566,567],{},"soup.title.string"," gets the text inside the tag",[14,570,571,572,574],{},"For ",[35,573,103],{},", the output is:",[44,576,577],{"className":46,"code":234,"language":48,"meta":49,"style":49},[35,578,579],{"__ignoreMap":49},[53,580,581],{"class":55,"line":56},[53,582,234],{"class":63},[27,584,586],{"id":585},"safer-version-with-basic-error-handling","Safer version with basic error handling",[14,588,589],{},"The first example is useful, but it can fail if:",[208,591,592,595,598,601],{},[211,593,594],{},"The page cannot be downloaded",[211,596,597],{},"The website returns an error",[211,599,600],{},"The page has no title tag",[211,602,603],{},"The title exists but has no text",[14,605,606],{},"This version is safer for beginners:",[44,608,610],{"className":46,"code":609,"language":48,"meta":49,"style":49},"import requests\nfrom bs4 import BeautifulSoup\n\nurl = \"https:\u002F\u002Fexample.com\"\n\ntry:\n    response = requests.get(url, timeout=10)\n    response.raise_for_status()\nexcept requests.RequestException:\n    print(\"Could not fetch the page.\")\nelse:\n    soup = BeautifulSoup(response.text, \"html.parser\")\n\n    if soup.title and soup.title.string:\n        print(soup.title.string.strip())\n    else:\n        print(\"No title tag was found.\")\n",[35,611,612,618,628,632,644,648,656,687,700,714,730,737,764,769,796,820,828],{"__ignoreMap":49},[53,613,614,616],{"class":55,"line":56},[53,615,60],{"class":59},[53,617,64],{"class":63},[53,619,620,622,624,626],{"class":55,"line":67},[53,621,70],{"class":59},[53,623,73],{"class":63},[53,625,60],{"class":59},[53,627,78],{"class":63},[53,629,630],{"class":55,"line":81},[53,631,85],{"emptyLinePlaceholder":84},[53,633,634,636,638,640,642],{"class":55,"line":88},[53,635,91],{"class":63},[53,637,95],{"class":94},[53,639,99],{"class":98},[53,641,103],{"class":102},[53,643,106],{"class":98},[53,645,646],{"class":55,"line":109},[53,647,85],{"emptyLinePlaceholder":84},[53,649,650,653],{"class":55,"line":136},[53,651,652],{"class":59},"try",[53,654,655],{"class":120},":\n",[53,657,658,661,663,665,667,669,671,673,675,679,681,685],{"class":55,"line":171},[53,659,660],{"class":63},"    response ",[53,662,95],{"class":94},[53,664,117],{"class":63},[53,666,25],{"class":120},[53,668,124],{"class":123},[53,670,127],{"class":120},[53,672,130],{"class":123},[53,674,158],{"class":120},[53,676,678],{"class":677},"s99_P"," timeout",[53,680,95],{"class":94},[53,682,684],{"class":683},"srdBf","10",[53,686,133],{"class":120},[53,688,689,692,694,697],{"class":55,"line":176},[53,690,691],{"class":63},"    response",[53,693,25],{"class":120},[53,695,696],{"class":123},"raise_for_status",[53,698,699],{"class":120},"()\n",[53,701,702,705,707,709,712],{"class":55,"line":471},[53,703,704],{"class":59},"except",[53,706,117],{"class":63},[53,708,25],{"class":120},[53,710,711],{"class":154},"RequestException",[53,713,655],{"class":120},[53,715,716,719,721,723,726,728],{"class":55,"line":495},[53,717,718],{"class":179},"    print",[53,720,127],{"class":120},[53,722,166],{"class":98},[53,724,725],{"class":102},"Could not fetch the page.",[53,727,166],{"class":98},[53,729,133],{"class":120},[53,731,732,735],{"class":55,"line":500},[53,733,734],{"class":59},"else",[53,736,655],{"class":120},[53,738,739,742,744,746,748,750,752,754,756,758,760,762],{"class":55,"line":520},[53,740,741],{"class":63},"    soup ",[53,743,95],{"class":94},[53,745,144],{"class":123},[53,747,127],{"class":120},[53,749,149],{"class":123},[53,751,25],{"class":120},[53,753,155],{"class":154},[53,755,158],{"class":120},[53,757,99],{"class":98},[53,759,163],{"class":102},[53,761,166],{"class":98},[53,763,133],{"class":120},[53,765,767],{"class":55,"line":766},13,[53,768,85],{"emptyLinePlaceholder":84},[53,770,772,775,777,779,781,784,786,788,790,792,794],{"class":55,"line":771},14,[53,773,774],{"class":59},"    if",[53,776,508],{"class":63},[53,778,25],{"class":120},[53,780,190],{"class":154},[53,782,783],{"class":94}," and",[53,785,508],{"class":63},[53,787,25],{"class":120},[53,789,190],{"class":154},[53,791,25],{"class":120},[53,793,195],{"class":154},[53,795,655],{"class":120},[53,797,799,802,804,806,808,810,812,814,816,818],{"class":55,"line":798},15,[53,800,801],{"class":179},"        print",[53,803,127],{"class":120},[53,805,185],{"class":123},[53,807,25],{"class":120},[53,809,190],{"class":154},[53,811,25],{"class":120},[53,813,195],{"class":154},[53,815,25],{"class":120},[53,817,200],{"class":123},[53,819,203],{"class":120},[53,821,823,826],{"class":55,"line":822},16,[53,824,825],{"class":59},"    else",[53,827,655],{"class":120},[53,829,831,833,835,837,840,842],{"class":55,"line":830},17,[53,832,801],{"class":179},[53,834,127],{"class":120},[53,836,166],{"class":98},[53,838,839],{"class":102},"No title tag was found.",[53,841,166],{"class":98},[53,843,133],{"class":120},[531,845,847],{"id":846},"why-this-version-is-better","Why this version is better",[208,849,850,856,862],{},[211,851,852,855],{},[35,853,854],{},"timeout=10"," prevents the request from hanging too long",[211,857,858,861],{},[35,859,860],{},"response.raise_for_status()"," catches HTTP errors like 404 and 500",[211,863,864,867],{},[35,865,866],{},"if soup.title and soup.title.string"," avoids crashes when the title is missing",[14,869,870,871,875,876,878],{},"If you are not familiar with HTTP requests yet, see ",[21,872,874],{"href":873},"\u002Fhow-to\u002Fhow-to-make-an-api-request-in-python\u002F","how to make an API request in Python",". The same ",[35,877,37],{}," library is used here.",[27,880,882],{"id":881},"example-scrape-titles-from-multiple-pages","Example: scrape titles from multiple pages",[14,884,885],{},"You can also put several URLs in a list and loop through them.",[44,887,889],{"className":46,"code":888,"language":48,"meta":49,"style":49},"import requests\nfrom bs4 import BeautifulSoup\n\nurls = [\n    \"https:\u002F\u002Fexample.com\",\n    \"https:\u002F\u002Fwww.python.org\",\n    \"https:\u002F\u002Fwww.wikipedia.org\",\n]\n\nfor url in urls:\n    try:\n        response = requests.get(url, timeout=10)\n        response.raise_for_status()\n        soup = BeautifulSoup(response.text, \"html.parser\")\n\n        if soup.title and soup.title.string:\n            title = soup.title.string.strip()\n        else:\n            title = \"No title found\"\n\n        print(f\"{url} -> {title}\")\n\n    except requests.RequestException:\n        print(f\"{url} -> Could not fetch page\")\n",[35,890,891,897,907,911,921,933,944,955,960,964,980,987,1014,1025,1052,1056,1081,1104,1112,1126,1131,1165,1170,1184],{"__ignoreMap":49},[53,892,893,895],{"class":55,"line":56},[53,894,60],{"class":59},[53,896,64],{"class":63},[53,898,899,901,903,905],{"class":55,"line":67},[53,900,70],{"class":59},[53,902,73],{"class":63},[53,904,60],{"class":59},[53,906,78],{"class":63},[53,908,909],{"class":55,"line":81},[53,910,85],{"emptyLinePlaceholder":84},[53,912,913,916,918],{"class":55,"line":88},[53,914,915],{"class":63},"urls ",[53,917,95],{"class":94},[53,919,920],{"class":120}," [\n",[53,922,923,926,928,930],{"class":55,"line":109},[53,924,925],{"class":98},"    \"",[53,927,103],{"class":102},[53,929,166],{"class":98},[53,931,932],{"class":120},",\n",[53,934,935,937,940,942],{"class":55,"line":136},[53,936,925],{"class":98},[53,938,939],{"class":102},"https:\u002F\u002Fwww.python.org",[53,941,166],{"class":98},[53,943,932],{"class":120},[53,945,946,948,951,953],{"class":55,"line":171},[53,947,925],{"class":98},[53,949,950],{"class":102},"https:\u002F\u002Fwww.wikipedia.org",[53,952,166],{"class":98},[53,954,932],{"class":120},[53,956,957],{"class":55,"line":176},[53,958,959],{"class":120},"]\n",[53,961,962],{"class":55,"line":471},[53,963,85],{"emptyLinePlaceholder":84},[53,965,966,969,972,975,978],{"class":55,"line":495},[53,967,968],{"class":59},"for",[53,970,971],{"class":63}," url ",[53,973,974],{"class":59},"in",[53,976,977],{"class":63}," urls",[53,979,655],{"class":120},[53,981,982,985],{"class":55,"line":500},[53,983,984],{"class":59},"    try",[53,986,655],{"class":120},[53,988,989,992,994,996,998,1000,1002,1004,1006,1008,1010,1012],{"class":55,"line":520},[53,990,991],{"class":63},"        response ",[53,993,95],{"class":94},[53,995,117],{"class":63},[53,997,25],{"class":120},[53,999,124],{"class":123},[53,1001,127],{"class":120},[53,1003,130],{"class":123},[53,1005,158],{"class":120},[53,1007,678],{"class":677},[53,1009,95],{"class":94},[53,1011,684],{"class":683},[53,1013,133],{"class":120},[53,1015,1016,1019,1021,1023],{"class":55,"line":766},[53,1017,1018],{"class":63},"        response",[53,1020,25],{"class":120},[53,1022,696],{"class":123},[53,1024,699],{"class":120},[53,1026,1027,1030,1032,1034,1036,1038,1040,1042,1044,1046,1048,1050],{"class":55,"line":771},[53,1028,1029],{"class":63},"        soup ",[53,1031,95],{"class":94},[53,1033,144],{"class":123},[53,1035,127],{"class":120},[53,1037,149],{"class":123},[53,1039,25],{"class":120},[53,1041,155],{"class":154},[53,1043,158],{"class":120},[53,1045,99],{"class":98},[53,1047,163],{"class":102},[53,1049,166],{"class":98},[53,1051,133],{"class":120},[53,1053,1054],{"class":55,"line":798},[53,1055,85],{"emptyLinePlaceholder":84},[53,1057,1058,1061,1063,1065,1067,1069,1071,1073,1075,1077,1079],{"class":55,"line":822},[53,1059,1060],{"class":59},"        if",[53,1062,508],{"class":63},[53,1064,25],{"class":120},[53,1066,190],{"class":154},[53,1068,783],{"class":94},[53,1070,508],{"class":63},[53,1072,25],{"class":120},[53,1074,190],{"class":154},[53,1076,25],{"class":120},[53,1078,195],{"class":154},[53,1080,655],{"class":120},[53,1082,1083,1086,1088,1090,1092,1094,1096,1098,1100,1102],{"class":55,"line":830},[53,1084,1085],{"class":63},"            title ",[53,1087,95],{"class":94},[53,1089,508],{"class":63},[53,1091,25],{"class":120},[53,1093,190],{"class":154},[53,1095,25],{"class":120},[53,1097,195],{"class":154},[53,1099,25],{"class":120},[53,1101,200],{"class":123},[53,1103,699],{"class":120},[53,1105,1107,1110],{"class":55,"line":1106},18,[53,1108,1109],{"class":59},"        else",[53,1111,655],{"class":120},[53,1113,1115,1117,1119,1121,1124],{"class":55,"line":1114},19,[53,1116,1085],{"class":63},[53,1118,95],{"class":94},[53,1120,99],{"class":98},[53,1122,1123],{"class":102},"No title found",[53,1125,106],{"class":98},[53,1127,1129],{"class":55,"line":1128},20,[53,1130,85],{"emptyLinePlaceholder":84},[53,1132,1134,1136,1138,1142,1144,1147,1149,1152,1155,1157,1159,1161,1163],{"class":55,"line":1133},21,[53,1135,801],{"class":179},[53,1137,127],{"class":120},[53,1139,1141],{"class":1140},"sbsja","f",[53,1143,166],{"class":102},[53,1145,1146],{"class":683},"{",[53,1148,130],{"class":123},[53,1150,1151],{"class":683},"}",[53,1153,1154],{"class":102}," -> ",[53,1156,1146],{"class":683},[53,1158,190],{"class":123},[53,1160,1151],{"class":683},[53,1162,166],{"class":102},[53,1164,133],{"class":120},[53,1166,1168],{"class":55,"line":1167},22,[53,1169,85],{"emptyLinePlaceholder":84},[53,1171,1173,1176,1178,1180,1182],{"class":55,"line":1172},23,[53,1174,1175],{"class":59},"    except",[53,1177,117],{"class":63},[53,1179,25],{"class":120},[53,1181,711],{"class":154},[53,1183,655],{"class":120},[53,1185,1187,1189,1191,1193,1195,1197,1199,1201,1204],{"class":55,"line":1186},24,[53,1188,801],{"class":179},[53,1190,127],{"class":120},[53,1192,1141],{"class":1140},[53,1194,166],{"class":102},[53,1196,1146],{"class":683},[53,1198,130],{"class":123},[53,1200,1151],{"class":683},[53,1202,1203],{"class":102}," -> Could not fetch page\"",[53,1205,133],{"class":120},[14,1207,1208],{},"Example output might look like this:",[44,1210,1212],{"className":46,"code":1211,"language":48,"meta":49,"style":49},"https:\u002F\u002Fexample.com -> Example Domain\nhttps:\u002F\u002Fwww.python.org -> Welcome to Python.org\nhttps:\u002F\u002Fwww.wikipedia.org -> Wikipedia\n",[35,1213,1214,1239,1269],{"__ignoreMap":49},[53,1215,1216,1219,1221,1224,1227,1229,1232,1236],{"class":55,"line":56},[53,1217,1218],{"class":63},"https",[53,1220,322],{"class":120},[53,1222,1223],{"class":94},"\u002F\u002F",[53,1225,1226],{"class":63},"example",[53,1228,25],{"class":120},[53,1230,1231],{"class":154},"com",[53,1233,1235],{"class":1234},"srjyR"," ->",[53,1237,1238],{"class":63}," Example Domain\n",[53,1240,1241,1243,1245,1247,1250,1252,1254,1256,1259,1261,1264,1266],{"class":55,"line":67},[53,1242,1218],{"class":63},[53,1244,322],{"class":120},[53,1246,1223],{"class":94},[53,1248,1249],{"class":63},"www",[53,1251,25],{"class":120},[53,1253,48],{"class":154},[53,1255,25],{"class":120},[53,1257,1258],{"class":154},"org",[53,1260,1235],{"class":1234},[53,1262,1263],{"class":63}," Welcome to Python",[53,1265,25],{"class":120},[53,1267,1268],{"class":154},"org\n",[53,1270,1271,1273,1275,1277,1279,1281,1284,1286,1288,1290],{"class":55,"line":81},[53,1272,1218],{"class":63},[53,1274,322],{"class":120},[53,1276,1223],{"class":94},[53,1278,1249],{"class":63},[53,1280,25],{"class":120},[53,1282,1283],{"class":154},"wikipedia",[53,1285,25],{"class":120},[53,1287,1258],{"class":154},[53,1289,1235],{"class":1234},[53,1291,1292],{"class":63}," Wikipedia\n",[14,1294,1295],{},"This example keeps the loop small and readable. That is a good way to start.",[27,1297,1299],{"id":1298},"how-this-example-works","How this example works",[14,1301,1302],{},"There are two main tools in this script:",[208,1304,1305,1310],{},[211,1306,1307,1309],{},[35,1308,37],{}," handles the HTTP request",[211,1311,1312,1315],{},[35,1313,1314],{},"BeautifulSoup"," reads and searches the HTML",[14,1317,1318,1319,1322],{},"A page title is usually inside the ",[35,1320,1321],{},"\u003Chead>"," section, like this:",[44,1324,1327],{"className":1325,"code":1326,"language":482,"meta":49,"style":49},"language-html shiki shiki-themes material-theme-lighter github-light github-dark","\u003Ctitle>Example Domain\u003C\u002Ftitle>\n",[35,1328,1329],{"__ignoreMap":49},[53,1330,1331,1334,1337,1340,1343,1346,1348],{"class":55,"line":56},[53,1332,1333],{"class":120},"\u003C",[53,1335,190],{"class":1336},"sQzsp",[53,1338,1339],{"class":120},">",[53,1341,1342],{"class":63},"Example Domain",[53,1344,1345],{"class":120},"\u003C\u002F",[53,1347,190],{"class":1336},[53,1349,1350],{"class":120},">\n",[14,1352,1353],{},"BeautifulSoup makes it easy to find that tag.",[14,1355,1356],{},"Keep in mind:",[208,1358,1359,1362,1365,1368],{},[211,1360,1361],{},"Not every page has a title",[211,1363,1364],{},"Some pages have messy HTML",[211,1366,1367],{},"Some pages return different content than you expect",[211,1369,1370],{},"Some sites load content later with JavaScript",[14,1372,1373,1374,25],{},"If you want a broader BeautifulSoup example after this one, see ",[21,1375,1377],{"href":1376},"\u002Fexamples\u002Fpython-web-scraping-example-beautifulsoup\u002F","Python web scraping example with BeautifulSoup",[27,1379,1381],{"id":1380},"common-problems-beginners-hit","Common problems beginners hit",[14,1383,1384],{},"Here are the most common causes when this example does not work:",[208,1386,1387,1391,1395,1401,1404,1407],{},[211,1388,1389,352],{},[35,1390,37],{},[211,1392,1393,352],{},[35,1394,41],{},[211,1396,1397,1398],{},"The URL is wrong or missing ",[35,1399,1400],{},"https:\u002F\u002F",[211,1402,1403],{},"The page request failed",[211,1405,1406],{},"The HTML has no title tag",[211,1408,1409],{},"The site uses JavaScript for content you expected to scrape",[14,1411,1412],{},"You may also run into these specific errors:",[208,1414,1415,1431],{},[211,1416,1417,1423,1424,1426,1427,1430],{},[21,1418,1420],{"href":1419},"\u002Ferrors\u002Fmodulenotfounderror-no-module-named-x-fix\u002F",[35,1421,1422],{},"ModuleNotFoundError: No module named ..."," if ",[35,1425,37],{}," or ",[35,1428,1429],{},"bs4"," is missing",[211,1432,1433,1423,1439,1441,1442],{},[21,1434,1436],{"href":1435},"\u002Ferrors\u002Fattributeerror-nonetype-object-has-no-attribute-fix\u002F",[35,1437,1438],{},"AttributeError: 'NoneType' object has no attribute ...",[35,1440,559],{}," is ",[35,1443,1444],{},"None",[14,1446,1447],{},"Useful commands for debugging:",[44,1449,1451],{"className":298,"code":1450,"language":300,"meta":49,"style":49},"pip install requests beautifulsoup4\npython --version\npip --version\npython script.py\n",[35,1452,1453,1463,1469,1475],{"__ignoreMap":49},[53,1454,1455,1457,1459,1461],{"class":55,"line":56},[53,1456,321],{"class":307},[53,1458,334],{"class":102},[53,1460,117],{"class":102},[53,1462,339],{"class":102},[53,1464,1465,1467],{"class":55,"line":67},[53,1466,48],{"class":307},[53,1468,311],{"class":310},[53,1470,1471,1473],{"class":55,"line":81},[53,1472,321],{"class":307},[53,1474,311],{"class":310},[53,1476,1477,1479],{"class":55,"line":88},[53,1478,48],{"class":307},[53,1480,1481],{"class":102}," script.py\n",[531,1483,1485],{"id":1484},"a-few-examples-of-common-mistakes","A few examples of common mistakes",[14,1487,342,1488,1490],{},[35,1489,1429],{}," is not installed:",[44,1492,1494],{"className":46,"code":1493,"language":48,"meta":49,"style":49},"from bs4 import BeautifulSoup\n",[35,1495,1496],{"__ignoreMap":49},[53,1497,1498,1500,1502,1504],{"class":55,"line":56},[53,1499,70],{"class":59},[53,1501,73],{"class":63},[53,1503,60],{"class":59},[53,1505,78],{"class":63},[14,1507,1508,1509,25],{},"You may get a ",[35,1510,1511],{},"ModuleNotFoundError",[14,1513,1514],{},"If the page has no title and you do this:",[44,1516,1518],{"className":46,"code":1517,"language":48,"meta":49,"style":49},"print(soup.title.string)\n",[35,1519,1520],{"__ignoreMap":49},[53,1521,1522,1524,1526,1528,1530,1532,1534,1536],{"class":55,"line":56},[53,1523,180],{"class":179},[53,1525,127],{"class":120},[53,1527,185],{"class":123},[53,1529,25],{"class":120},[53,1531,190],{"class":154},[53,1533,25],{"class":120},[53,1535,195],{"class":154},[53,1537,133],{"class":120},[14,1539,1540,1541,1441,1543,25],{},"You may get an error because ",[35,1542,559],{},[35,1544,1444],{},[14,1546,1547],{},"If the URL is incomplete:",[44,1549,1551],{"className":46,"code":1550,"language":48,"meta":49,"style":49},"url = \"example.com\"\n",[35,1552,1553],{"__ignoreMap":49},[53,1554,1555,1557,1559,1561,1564],{"class":55,"line":56},[53,1556,91],{"class":63},[53,1558,95],{"class":94},[53,1560,99],{"class":98},[53,1562,1563],{"class":102},"example.com",[53,1565,106],{"class":98},[14,1567,1568],{},"The request may fail. Use the full URL instead:",[44,1570,1572],{"className":46,"code":1571,"language":48,"meta":49,"style":49},"url = \"https:\u002F\u002Fexample.com\"\n",[35,1573,1574],{"__ignoreMap":49},[53,1575,1576,1578,1580,1582,1584],{"class":55,"line":56},[53,1577,91],{"class":63},[53,1579,95],{"class":94},[53,1581,99],{"class":98},[53,1583,103],{"class":102},[53,1585,106],{"class":98},[27,1587,1589],{"id":1588},"important-beginner-notes","Important beginner notes",[14,1591,1592],{},"Keep these points in mind:",[208,1594,1595,1598,1601,1604],{},[211,1596,1597],{},"This example only reads page HTML and extracts the title",[211,1599,1600],{},"It is not a full web scraping guide",[211,1602,1603],{},"Some sites do not allow scraping",[211,1605,1606,1607],{},"Start with simple public pages like ",[35,1608,1563],{},[14,1610,1611,1612,1614],{},"Also remember that ",[35,1613,37],{}," only downloads the initial HTML. If a site loads text later with JavaScript, your script may not see that content.",[27,1616,1618],{"id":1617},"faq","FAQ",[531,1620,1622,1623,1625,1626,1628],{"id":1621},"why-does-souptitle-return-none","Why does ",[35,1624,559],{}," return ",[35,1627,1444],{},"?",[14,1630,1631],{},"The page may not have a title tag, or the HTML was not fetched correctly.",[531,1633,1635],{"id":1634},"why-is-my-scraper-not-finding-text-i-can-see-in-the-browser","Why is my scraper not finding text I can see in the browser?",[14,1637,1638,1639,1641],{},"Some websites load content with JavaScript. ",[35,1640,37],{}," only gets the initial HTML.",[531,1643,1645],{"id":1644},"do-i-need-beautifulsoup-just-to-get-the-title","Do I need BeautifulSoup just to get the title?",[14,1647,1648],{},"Not always, but it makes HTML parsing much easier and clearer for beginners.",[531,1650,1652],{"id":1651},"what-packages-do-i-need-for-this-example","What packages do I need for this example?",[14,1654,1655,1656,38,1658,25],{},"You usually need ",[35,1657,37],{},[35,1659,41],{},[531,1661,1663],{"id":1662},"can-i-scrape-many-pages-with-a-loop","Can I scrape many pages with a loop?",[14,1665,1666],{},"Yes. Start with a short list of URLs and print each page title.",[27,1668,1670],{"id":1669},"see-also","See also",[208,1672,1673,1678,1683,1688,1693],{},[211,1674,1675],{},[21,1676,1677],{"href":23},"How to install a Python package with pip",[211,1679,1680],{},[21,1681,1682],{"href":873},"How to make an API request in Python",[211,1684,1685],{},[21,1686,1687],{"href":1419},"ModuleNotFoundError: No module named ... fix",[211,1689,1690],{},[21,1691,1692],{"href":1435},"AttributeError: 'NoneType' object has no attribute ... fix",[211,1694,1695],{},[21,1696,1377],{"href":1376},[1698,1699,1700],"style",{},"html pre.shiki code .sVHd0, html code.shiki .sVHd0{--shiki-light:#39ADB5;--shiki-light-font-style:italic;--shiki-default:#D73A49;--shiki-default-font-style:inherit;--shiki-dark:#F97583;--shiki-dark-font-style:inherit}html pre.shiki code .su5hD, html code.shiki .su5hD{--shiki-light:#90A4AE;--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .smGrS, html code.shiki .smGrS{--shiki-light:#39ADB5;--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .sjJ54, html code.shiki .sjJ54{--shiki-light:#39ADB5;--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .s_sjI, html code.shiki .s_sjI{--shiki-light:#91B859;--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .sP7_E, html code.shiki .sP7_E{--shiki-light:#39ADB5;--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .slqww, html code.shiki .slqww{--shiki-light:#6182B8;--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .skxfh, html code.shiki .skxfh{--shiki-light:#E53935;--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .sptTA, html code.shiki .sptTA{--shiki-light:#6182B8;--shiki-default:#005CC5;--shiki-dark:#79B8FF}html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .sbgvK, html code.shiki .sbgvK{--shiki-light:#E2931D;--shiki-default:#6F42C1;--shiki-dark:#B392F0}html pre.shiki code .stzsN, html code.shiki .stzsN{--shiki-light:#91B859;--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .s99_P, html code.shiki .s99_P{--shiki-light:#90A4AE;--shiki-light-font-style:italic;--shiki-default:#E36209;--shiki-default-font-style:inherit;--shiki-dark:#FFAB70;--shiki-dark-font-style:inherit}html pre.shiki code .srdBf, html code.shiki .srdBf{--shiki-light:#F76D47;--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .sbsja, html code.shiki .sbsja{--shiki-light:#9C3EDA;--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .srjyR, html code.shiki .srjyR{--shiki-light:#90A4AE;--shiki-light-font-style:inherit;--shiki-default:#B31D28;--shiki-default-font-style:italic;--shiki-dark:#FDAEB7;--shiki-dark-font-style:italic}html pre.shiki code .sQzsp, html code.shiki .sQzsp{--shiki-light:#E53935;--shiki-default:#22863A;--shiki-dark:#85E89D}",{"title":49,"searchDepth":67,"depth":67,"links":1702},[1703,1704,1705,1706,1707,1710,1713,1714,1715,1718,1719,1727],{"id":29,"depth":67,"text":30},{"id":243,"depth":67,"text":244},{"id":267,"depth":67,"text":268},{"id":314,"depth":67,"text":315},{"id":386,"depth":67,"text":387,"children":1708},[1709],{"id":533,"depth":81,"text":534},{"id":585,"depth":67,"text":586,"children":1711},[1712],{"id":846,"depth":81,"text":847},{"id":881,"depth":67,"text":882},{"id":1298,"depth":67,"text":1299},{"id":1380,"depth":67,"text":1381,"children":1716},[1717],{"id":1484,"depth":81,"text":1485},{"id":1588,"depth":67,"text":1589},{"id":1617,"depth":67,"text":1618,"children":1720},[1721,1723,1724,1725,1726],{"id":1621,"depth":81,"text":1722},"Why does soup.title return None?",{"id":1634,"depth":81,"text":1635},{"id":1644,"depth":81,"text":1645},{"id":1651,"depth":81,"text":1652},{"id":1662,"depth":81,"text":1663},{"id":1669,"depth":67,"text":1670},"Master python simple web scraper for titles example in our comprehensive Python beginner guide.","md",{},"\u002Fexamples\u002Fpython-simple-web-scraper-for-titles-example",{"title":5,"description":1728},"examples\u002Fpython-simple-web-scraper-for-titles-example","g05xlCKpRH0PAY2eIVG3ALxUlzEj8hRfXCi_xgBUQrQ",1777585478001]