[{"data":1,"prerenderedAt":2071},["ShallowReactive",2],{"doc-\u002Fexamples\u002Fpython-web-scraping-example-beautifulsoup":3},{"id":4,"title":5,"body":6,"description":2064,"extension":2065,"meta":2066,"navigation":67,"path":2067,"seo":2068,"stem":2069,"__hash__":2070},"content\u002Fexamples\u002Fpython-web-scraping-example-beautifulsoup.md","Python Web Scraping Example (BeautifulSoup)",{"type":7,"value":8,"toc":2019},"minimark",[9,13,17,20,25,284,299,303,306,325,329,332,355,362,366,369,390,393,415,418,431,434,463,467,470,652,657,674,677,684,715,718,731,739,743,758,761,764,781,784,788,819,833,840,844,875,882,885,914,917,924,928,979,986,992,999,1020,1023,1045,1049,1052,1058,1061,1083,1087,1090,1555,1558,1586,1590,1593,1597,1602,1606,1609,1612,1616,1619,1626,1630,1633,1637,1640,1644,1647,1664,1667,1671,1674,1685,1688,1702,1706,1709,1715,1725,1728,1742,1745,1763,1770,1777,1780,1803,1809,1812,1859,1869,1876,1884,1894,1897,1908,1914,1918,1921,1924,1928,1932,1935,1939,1945,1949,1952,1959,1969,1973,1976,1980,2008,2015],[10,11,5],"h1",{"id":12},"python-web-scraping-example-beautifulsoup",[14,15,16],"p",{},"This beginner-friendly example shows how to download a web page, parse its HTML with BeautifulSoup, and extract simple data like the page title and links.",[14,18,19],{},"The goal is to help you build a small working scraper. This page focuses on a practical example, not every detail of web scraping.",[21,22,24],"h2",{"id":23},"quick-example","Quick example",[26,27,32],"pre",{"className":28,"code":29,"language":30,"meta":31,"style":31},"language-python shiki shiki-themes material-theme-lighter github-light github-dark","import requests\nfrom bs4 import BeautifulSoup\n\nurl = \"https:\u002F\u002Fexample.com\"\nresponse = requests.get(url, timeout=10)\nresponse.raise_for_status()\n\nsoup = BeautifulSoup(response.text, \"html.parser\")\n\nprint(\"Page title:\", soup.title.string)\n\nfor link in soup.find_all(\"a\"):\n    print(link.get(\"href\"))\n","python","",[33,34,35,48,62,69,90,131,145,150,183,188,221,226,257],"code",{"__ignoreMap":31},[36,37,40,44],"span",{"class":38,"line":39},"line",1,[36,41,43],{"class":42},"sVHd0","import",[36,45,47],{"class":46},"su5hD"," requests\n",[36,49,51,54,57,59],{"class":38,"line":50},2,[36,52,53],{"class":42},"from",[36,55,56],{"class":46}," bs4 ",[36,58,43],{"class":42},[36,60,61],{"class":46}," BeautifulSoup\n",[36,63,65],{"class":38,"line":64},3,[36,66,68],{"emptyLinePlaceholder":67},true,"\n",[36,70,72,75,79,83,87],{"class":38,"line":71},4,[36,73,74],{"class":46},"url ",[36,76,78],{"class":77},"smGrS","=",[36,80,82],{"class":81},"sjJ54"," \"",[36,84,86],{"class":85},"s_sjI","https:\u002F\u002Fexample.com",[36,88,89],{"class":81},"\"\n",[36,91,93,96,98,101,105,109,112,115,118,122,124,128],{"class":38,"line":92},5,[36,94,95],{"class":46},"response ",[36,97,78],{"class":77},[36,99,100],{"class":46}," requests",[36,102,104],{"class":103},"sP7_E",".",[36,106,108],{"class":107},"slqww","get",[36,110,111],{"class":103},"(",[36,113,114],{"class":107},"url",[36,116,117],{"class":103},",",[36,119,121],{"class":120},"s99_P"," timeout",[36,123,78],{"class":77},[36,125,127],{"class":126},"srdBf","10",[36,129,130],{"class":103},")\n",[36,132,134,137,139,142],{"class":38,"line":133},6,[36,135,136],{"class":46},"response",[36,138,104],{"class":103},[36,140,141],{"class":107},"raise_for_status",[36,143,144],{"class":103},"()\n",[36,146,148],{"class":38,"line":147},7,[36,149,68],{"emptyLinePlaceholder":67},[36,151,153,156,158,161,163,165,167,171,173,175,178,181],{"class":38,"line":152},8,[36,154,155],{"class":46},"soup ",[36,157,78],{"class":77},[36,159,160],{"class":107}," BeautifulSoup",[36,162,111],{"class":103},[36,164,136],{"class":107},[36,166,104],{"class":103},[36,168,170],{"class":169},"skxfh","text",[36,172,117],{"class":103},[36,174,82],{"class":81},[36,176,177],{"class":85},"html.parser",[36,179,180],{"class":81},"\"",[36,182,130],{"class":103},[36,184,186],{"class":38,"line":185},9,[36,187,68],{"emptyLinePlaceholder":67},[36,189,191,195,197,199,202,204,206,209,211,214,216,219],{"class":38,"line":190},10,[36,192,194],{"class":193},"sptTA","print",[36,196,111],{"class":103},[36,198,180],{"class":81},[36,200,201],{"class":85},"Page title:",[36,203,180],{"class":81},[36,205,117],{"class":103},[36,207,208],{"class":107}," soup",[36,210,104],{"class":103},[36,212,213],{"class":169},"title",[36,215,104],{"class":103},[36,217,218],{"class":169},"string",[36,220,130],{"class":103},[36,222,224],{"class":38,"line":223},11,[36,225,68],{"emptyLinePlaceholder":67},[36,227,229,232,235,238,240,242,245,247,249,252,254],{"class":38,"line":228},12,[36,230,231],{"class":42},"for",[36,233,234],{"class":46}," link ",[36,236,237],{"class":42},"in",[36,239,208],{"class":46},[36,241,104],{"class":103},[36,243,244],{"class":107},"find_all",[36,246,111],{"class":103},[36,248,180],{"class":81},[36,250,251],{"class":85},"a",[36,253,180],{"class":81},[36,255,256],{"class":103},"):\n",[36,258,260,263,265,268,270,272,274,276,279,281],{"class":38,"line":259},13,[36,261,262],{"class":193},"    print",[36,264,111],{"class":103},[36,266,267],{"class":107},"link",[36,269,104],{"class":103},[36,271,108],{"class":107},[36,273,111],{"class":103},[36,275,180],{"class":81},[36,277,278],{"class":85},"href",[36,280,180],{"class":81},[36,282,283],{"class":103},"))\n",[14,285,286,290,291,294,295,298],{},[287,288,289],"strong",{},"Note:"," This is a minimal working example. You need to install ",[33,292,293],{},"requests"," and ",[33,296,297],{},"beautifulsoup4"," first.",[21,300,302],{"id":301},"what-this-example-does","What this example does",[14,304,305],{},"This script:",[307,308,309,313,316,319,322],"ul",{},[310,311,312],"li",{},"Downloads HTML from a web page",[310,314,315],{},"Parses the HTML with BeautifulSoup",[310,317,318],{},"Gets the page title",[310,320,321],{},"Finds all link tags",[310,323,324],{},"Prints each link URL",[21,326,328],{"id":327},"what-you-need-before-running-it","What you need before running it",[14,330,331],{},"Before you run the example, make sure you have:",[307,333,334,337,342,348,352],{},[310,335,336],{},"Python installed",[310,338,339,340],{},"A basic understanding of ",[33,341,43],{},[310,343,344,345,347],{},"The ",[33,346,293],{}," package installed",[310,349,344,350,347],{},[33,351,297],{},[310,353,354],{},"Internet access for the example URL",[14,356,357,358,104],{},"If you need help installing packages, see ",[251,359,361],{"href":360},"\u002Fhow-to\u002Fhow-to-install-a-python-package-with-pip\u002F","how to install a Python package with pip",[21,363,365],{"id":364},"install-the-required-packages","Install the required packages",[14,367,368],{},"You can install both packages with pip:",[26,370,374],{"className":371,"code":372,"language":373,"meta":31,"style":31},"language-bash shiki shiki-themes material-theme-lighter github-light github-dark","pip install requests beautifulsoup4\n","bash",[33,375,376],{"__ignoreMap":31},[36,377,378,382,385,387],{"class":38,"line":39},[36,379,381],{"class":380},"sbgvK","pip",[36,383,384],{"class":85}," install",[36,386,100],{"class":85},[36,388,389],{"class":85}," beautifulsoup4\n",[14,391,392],{},"If that does not work, try:",[26,394,396],{"className":371,"code":395,"language":373,"meta":31,"style":31},"python -m pip install requests beautifulsoup4\n",[33,397,398],{"__ignoreMap":31},[36,399,400,402,406,409,411,413],{"class":38,"line":39},[36,401,30],{"class":380},[36,403,405],{"class":404},"stzsN"," -m",[36,407,408],{"class":85}," pip",[36,410,384],{"class":85},[36,412,100],{"class":85},[36,414,389],{"class":85},[14,416,417],{},"What each package does:",[307,419,420,425],{},[310,421,422,424],{},[33,423,293],{}," downloads the web page",[310,426,427,430],{},[33,428,429],{},"BeautifulSoup"," reads the HTML structure so you can search through tags and attributes",[14,432,433],{},"Useful commands for checking your setup:",[26,435,437],{"className":371,"code":436,"language":373,"meta":31,"style":31},"python --version\npip show requests\npip show beautifulsoup4\n",[33,438,439,446,455],{"__ignoreMap":31},[36,440,441,443],{"class":38,"line":39},[36,442,30],{"class":380},[36,444,445],{"class":404}," --version\n",[36,447,448,450,453],{"class":38,"line":50},[36,449,381],{"class":380},[36,451,452],{"class":85}," show",[36,454,47],{"class":85},[36,456,457,459,461],{"class":38,"line":64},[36,458,381],{"class":380},[36,460,452],{"class":85},[36,462,389],{"class":85},[21,464,466],{"id":465},"step-by-step-code-breakdown","Step-by-step code breakdown",[14,468,469],{},"Here is the same example again:",[26,471,472],{"className":28,"code":29,"language":30,"meta":31,"style":31},[33,473,474,480,490,494,506,532,542,546,572,576,602,606,630],{"__ignoreMap":31},[36,475,476,478],{"class":38,"line":39},[36,477,43],{"class":42},[36,479,47],{"class":46},[36,481,482,484,486,488],{"class":38,"line":50},[36,483,53],{"class":42},[36,485,56],{"class":46},[36,487,43],{"class":42},[36,489,61],{"class":46},[36,491,492],{"class":38,"line":64},[36,493,68],{"emptyLinePlaceholder":67},[36,495,496,498,500,502,504],{"class":38,"line":71},[36,497,74],{"class":46},[36,499,78],{"class":77},[36,501,82],{"class":81},[36,503,86],{"class":85},[36,505,89],{"class":81},[36,507,508,510,512,514,516,518,520,522,524,526,528,530],{"class":38,"line":92},[36,509,95],{"class":46},[36,511,78],{"class":77},[36,513,100],{"class":46},[36,515,104],{"class":103},[36,517,108],{"class":107},[36,519,111],{"class":103},[36,521,114],{"class":107},[36,523,117],{"class":103},[36,525,121],{"class":120},[36,527,78],{"class":77},[36,529,127],{"class":126},[36,531,130],{"class":103},[36,533,534,536,538,540],{"class":38,"line":133},[36,535,136],{"class":46},[36,537,104],{"class":103},[36,539,141],{"class":107},[36,541,144],{"class":103},[36,543,544],{"class":38,"line":147},[36,545,68],{"emptyLinePlaceholder":67},[36,547,548,550,552,554,556,558,560,562,564,566,568,570],{"class":38,"line":152},[36,549,155],{"class":46},[36,551,78],{"class":77},[36,553,160],{"class":107},[36,555,111],{"class":103},[36,557,136],{"class":107},[36,559,104],{"class":103},[36,561,170],{"class":169},[36,563,117],{"class":103},[36,565,82],{"class":81},[36,567,177],{"class":85},[36,569,180],{"class":81},[36,571,130],{"class":103},[36,573,574],{"class":38,"line":185},[36,575,68],{"emptyLinePlaceholder":67},[36,577,578,580,582,584,586,588,590,592,594,596,598,600],{"class":38,"line":190},[36,579,194],{"class":193},[36,581,111],{"class":103},[36,583,180],{"class":81},[36,585,201],{"class":85},[36,587,180],{"class":81},[36,589,117],{"class":103},[36,591,208],{"class":107},[36,593,104],{"class":103},[36,595,213],{"class":169},[36,597,104],{"class":103},[36,599,218],{"class":169},[36,601,130],{"class":103},[36,603,604],{"class":38,"line":223},[36,605,68],{"emptyLinePlaceholder":67},[36,607,608,610,612,614,616,618,620,622,624,626,628],{"class":38,"line":228},[36,609,231],{"class":42},[36,611,234],{"class":46},[36,613,237],{"class":42},[36,615,208],{"class":46},[36,617,104],{"class":103},[36,619,244],{"class":107},[36,621,111],{"class":103},[36,623,180],{"class":81},[36,625,251],{"class":85},[36,627,180],{"class":81},[36,629,256],{"class":103},[36,631,632,634,636,638,640,642,644,646,648,650],{"class":38,"line":259},[36,633,262],{"class":193},[36,635,111],{"class":103},[36,637,267],{"class":107},[36,639,104],{"class":103},[36,641,108],{"class":107},[36,643,111],{"class":103},[36,645,180],{"class":81},[36,647,278],{"class":85},[36,649,180],{"class":81},[36,651,283],{"class":103},[653,654,656],"h3",{"id":655},"_1-set-the-target-url","1. Set the target URL",[26,658,660],{"className":28,"code":659,"language":30,"meta":31,"style":31},"url = \"https:\u002F\u002Fexample.com\"\n",[33,661,662],{"__ignoreMap":31},[36,663,664,666,668,670,672],{"class":38,"line":39},[36,665,74],{"class":46},[36,667,78],{"class":77},[36,669,82],{"class":81},[36,671,86],{"class":85},[36,673,89],{"class":81},[14,675,676],{},"This is the page you want to scrape.",[653,678,680,681],{"id":679},"_2-fetch-the-page-with-requestsget","2. Fetch the page with ",[33,682,683],{},"requests.get()",[26,685,687],{"className":28,"code":686,"language":30,"meta":31,"style":31},"response = requests.get(url, timeout=10)\n",[33,688,689],{"__ignoreMap":31},[36,690,691,693,695,697,699,701,703,705,707,709,711,713],{"class":38,"line":39},[36,692,95],{"class":46},[36,694,78],{"class":77},[36,696,100],{"class":46},[36,698,104],{"class":103},[36,700,108],{"class":107},[36,702,111],{"class":103},[36,704,114],{"class":107},[36,706,117],{"class":103},[36,708,121],{"class":120},[36,710,78],{"class":77},[36,712,127],{"class":126},[36,714,130],{"class":103},[14,716,717],{},"This sends an HTTP request and downloads the page.",[307,719,720,725],{},[310,721,722,724],{},[33,723,114],{}," is the page address",[310,726,727,730],{},[33,728,729],{},"timeout=10"," means Python will stop waiting after 10 seconds",[14,732,733,734,738],{},"If you are new to HTTP requests, ",[251,735,737],{"href":736},"\u002Fhow-to\u002Fhow-to-make-an-api-request-in-python\u002F","how to make an API request in Python"," explains the basic idea.",[653,740,742],{"id":741},"_3-stop-on-http-errors","3. Stop on HTTP errors",[26,744,746],{"className":28,"code":745,"language":30,"meta":31,"style":31},"response.raise_for_status()\n",[33,747,748],{"__ignoreMap":31},[36,749,750,752,754,756],{"class":38,"line":39},[36,751,136],{"class":46},[36,753,104],{"class":103},[36,755,141],{"class":107},[36,757,144],{"class":103},[14,759,760],{},"This is important.",[14,762,763],{},"It raises an error if the page could not be fetched correctly, such as:",[307,765,766,771,776],{},[310,767,768],{},[33,769,770],{},"404 Not Found",[310,772,773],{},[33,774,775],{},"403 Forbidden",[310,777,778],{},[33,779,780],{},"500 Internal Server Error",[14,782,783],{},"Without this line, your code may continue with a bad response and give confusing results later.",[653,785,787],{"id":786},"_4-parse-the-html","4. Parse the HTML",[26,789,791],{"className":28,"code":790,"language":30,"meta":31,"style":31},"soup = BeautifulSoup(response.text, \"html.parser\")\n",[33,792,793],{"__ignoreMap":31},[36,794,795,797,799,801,803,805,807,809,811,813,815,817],{"class":38,"line":39},[36,796,155],{"class":46},[36,798,78],{"class":77},[36,800,160],{"class":107},[36,802,111],{"class":103},[36,804,136],{"class":107},[36,806,104],{"class":103},[36,808,170],{"class":169},[36,810,117],{"class":103},[36,812,82],{"class":81},[36,814,177],{"class":85},[36,816,180],{"class":81},[36,818,130],{"class":103},[307,820,821,827],{},[310,822,823,826],{},[33,824,825],{},"response.text"," is the HTML content as a string",[310,828,829,832],{},[33,830,831],{},"\"html.parser\""," tells BeautifulSoup which parser to use",[14,834,835,836,839],{},"After this, ",[33,837,838],{},"soup"," becomes an object you can search.",[653,841,843],{"id":842},"_5-get-the-page-title","5. Get the page title",[26,845,847],{"className":28,"code":846,"language":30,"meta":31,"style":31},"print(\"Page title:\", soup.title.string)\n",[33,848,849],{"__ignoreMap":31},[36,850,851,853,855,857,859,861,863,865,867,869,871,873],{"class":38,"line":39},[36,852,194],{"class":193},[36,854,111],{"class":103},[36,856,180],{"class":81},[36,858,201],{"class":85},[36,860,180],{"class":81},[36,862,117],{"class":103},[36,864,208],{"class":107},[36,866,104],{"class":103},[36,868,213],{"class":169},[36,870,104],{"class":103},[36,872,218],{"class":169},[36,874,130],{"class":103},[14,876,877,878,881],{},"This tries to find the ",[33,879,880],{},"\u003Ctitle>"," tag and print its text.",[14,883,884],{},"For example, if the HTML contains:",[26,886,890],{"className":887,"code":888,"language":889,"meta":31,"style":31},"language-html shiki shiki-themes material-theme-lighter github-light github-dark","\u003Ctitle>Example Domain\u003C\u002Ftitle>\n","html",[33,891,892],{"__ignoreMap":31},[36,893,894,897,900,903,906,909,911],{"class":38,"line":39},[36,895,896],{"class":103},"\u003C",[36,898,213],{"class":899},"sQzsp",[36,901,902],{"class":103},">",[36,904,905],{"class":46},"Example Domain",[36,907,908],{"class":103},"\u003C\u002F",[36,910,213],{"class":899},[36,912,913],{"class":103},">\n",[14,915,916],{},"The output will be:",[26,918,922],{"className":919,"code":921,"language":170,"meta":31},[920],"language-text","Page title: Example Domain\n",[33,923,921],{"__ignoreMap":31},[653,925,927],{"id":926},"_6-find-all-links","6. Find all links",[26,929,931],{"className":28,"code":930,"language":30,"meta":31,"style":31},"for link in soup.find_all(\"a\"):\n    print(link.get(\"href\"))\n",[33,932,933,957],{"__ignoreMap":31},[36,934,935,937,939,941,943,945,947,949,951,953,955],{"class":38,"line":39},[36,936,231],{"class":42},[36,938,234],{"class":46},[36,940,237],{"class":42},[36,942,208],{"class":46},[36,944,104],{"class":103},[36,946,244],{"class":107},[36,948,111],{"class":103},[36,950,180],{"class":81},[36,952,251],{"class":85},[36,954,180],{"class":81},[36,956,256],{"class":103},[36,958,959,961,963,965,967,969,971,973,975,977],{"class":38,"line":50},[36,960,262],{"class":193},[36,962,111],{"class":103},[36,964,267],{"class":107},[36,966,104],{"class":103},[36,968,108],{"class":107},[36,970,111],{"class":103},[36,972,180],{"class":81},[36,974,278],{"class":85},[36,976,180],{"class":81},[36,978,283],{"class":103},[14,980,981,982,985],{},"This finds every ",[33,983,984],{},"\u003Ca>"," tag on the page.",[14,987,988,989,991],{},"Then it prints the value of the ",[33,990,278],{}," attribute for each one.",[14,993,994,995,998],{},"Using ",[33,996,997],{},"link.get(\"href\")"," is safer than:",[26,1000,1002],{"className":28,"code":1001,"language":30,"meta":31,"style":31},"link[\"href\"]\n",[33,1003,1004],{"__ignoreMap":31},[36,1005,1006,1008,1011,1013,1015,1017],{"class":38,"line":39},[36,1007,267],{"class":46},[36,1009,1010],{"class":103},"[",[36,1012,180],{"class":81},[36,1014,278],{"class":85},[36,1016,180],{"class":81},[36,1018,1019],{"class":103},"]\n",[14,1021,1022],{},"Why?",[307,1024,1025,1037],{},[310,1026,1027,1029,1030,1033,1034,1036],{},[33,1028,997],{}," returns ",[33,1031,1032],{},"None"," if ",[33,1035,278],{}," is missing",[310,1038,1039,1042,1043,1036],{},[33,1040,1041],{},"link[\"href\"]"," raises an error if ",[33,1044,278],{},[21,1046,1048],{"id":1047},"expected-output","Expected output",[14,1050,1051],{},"The exact output depends on the page, but it will usually look something like this:",[26,1053,1056],{"className":1054,"code":1055,"language":170,"meta":31},[920],"Page title: Example Domain\nhttps:\u002F\u002Fwww.iana.org\u002Fdomains\u002Fexample\n",[33,1057,1055],{"__ignoreMap":31},[14,1059,1060],{},"Keep in mind:",[307,1062,1063,1066,1069,1075],{},[310,1064,1065],{},"The page title is printed first",[310,1067,1068],{},"Each link URL is printed on a new line",[310,1070,1071,1072],{},"Some links may be relative paths like ",[33,1073,1074],{},"\u002Fabout",[310,1076,1077,1078,1080,1081],{},"Some ",[33,1079,278],{}," values may be ",[33,1082,1032],{},[21,1084,1086],{"id":1085},"beginner-friendly-improvements","Beginner-friendly improvements",[14,1088,1089],{},"The first example is intentionally small. Here is a slightly better version that skips missing links and converts relative URLs into full URLs.",[26,1091,1093],{"className":28,"code":1092,"language":30,"meta":31,"style":31},"import requests\nfrom bs4 import BeautifulSoup\nfrom urllib.parse import urljoin\n\nurl = \"https:\u002F\u002Fexample.com\"\nresponse = requests.get(url, timeout=10)\nresponse.raise_for_status()\n\nsoup = BeautifulSoup(response.text, \"html.parser\")\n\ntitle = soup.title.string if soup.title else \"No title found\"\nprint(\"Page title:\", title)\n\nlinks = []\n\nfor link in soup.find_all(\"a\"):\n    href = link.get(\"href\")\n    text = link.get_text(strip=True)\n\n    if href is None:\n        continue\n\n    full_url = urljoin(url, href)\n    links.append((text, full_url))\n\nfor text, full_url in links:\n    print(f\"Link text: {text or '[no text]'}\")\n    print(f\"URL: {full_url}\")\n    print(\"-\" * 20)\n",[33,1094,1095,1101,1111,1128,1132,1144,1170,1180,1184,1210,1214,1250,1269,1273,1284,1289,1314,1339,1367,1372,1390,1396,1401,1423,1446,1451,1471,1510,1533],{"__ignoreMap":31},[36,1096,1097,1099],{"class":38,"line":39},[36,1098,43],{"class":42},[36,1100,47],{"class":46},[36,1102,1103,1105,1107,1109],{"class":38,"line":50},[36,1104,53],{"class":42},[36,1106,56],{"class":46},[36,1108,43],{"class":42},[36,1110,61],{"class":46},[36,1112,1113,1115,1118,1120,1123,1125],{"class":38,"line":64},[36,1114,53],{"class":42},[36,1116,1117],{"class":46}," urllib",[36,1119,104],{"class":103},[36,1121,1122],{"class":46},"parse ",[36,1124,43],{"class":42},[36,1126,1127],{"class":46}," urljoin\n",[36,1129,1130],{"class":38,"line":71},[36,1131,68],{"emptyLinePlaceholder":67},[36,1133,1134,1136,1138,1140,1142],{"class":38,"line":92},[36,1135,74],{"class":46},[36,1137,78],{"class":77},[36,1139,82],{"class":81},[36,1141,86],{"class":85},[36,1143,89],{"class":81},[36,1145,1146,1148,1150,1152,1154,1156,1158,1160,1162,1164,1166,1168],{"class":38,"line":133},[36,1147,95],{"class":46},[36,1149,78],{"class":77},[36,1151,100],{"class":46},[36,1153,104],{"class":103},[36,1155,108],{"class":107},[36,1157,111],{"class":103},[36,1159,114],{"class":107},[36,1161,117],{"class":103},[36,1163,121],{"class":120},[36,1165,78],{"class":77},[36,1167,127],{"class":126},[36,1169,130],{"class":103},[36,1171,1172,1174,1176,1178],{"class":38,"line":147},[36,1173,136],{"class":46},[36,1175,104],{"class":103},[36,1177,141],{"class":107},[36,1179,144],{"class":103},[36,1181,1182],{"class":38,"line":152},[36,1183,68],{"emptyLinePlaceholder":67},[36,1185,1186,1188,1190,1192,1194,1196,1198,1200,1202,1204,1206,1208],{"class":38,"line":185},[36,1187,155],{"class":46},[36,1189,78],{"class":77},[36,1191,160],{"class":107},[36,1193,111],{"class":103},[36,1195,136],{"class":107},[36,1197,104],{"class":103},[36,1199,170],{"class":169},[36,1201,117],{"class":103},[36,1203,82],{"class":81},[36,1205,177],{"class":85},[36,1207,180],{"class":81},[36,1209,130],{"class":103},[36,1211,1212],{"class":38,"line":190},[36,1213,68],{"emptyLinePlaceholder":67},[36,1215,1216,1219,1221,1223,1225,1227,1229,1231,1234,1236,1238,1240,1243,1245,1248],{"class":38,"line":223},[36,1217,1218],{"class":46},"title ",[36,1220,78],{"class":77},[36,1222,208],{"class":46},[36,1224,104],{"class":103},[36,1226,213],{"class":169},[36,1228,104],{"class":103},[36,1230,218],{"class":169},[36,1232,1233],{"class":42}," if",[36,1235,208],{"class":46},[36,1237,104],{"class":103},[36,1239,213],{"class":169},[36,1241,1242],{"class":42}," else",[36,1244,82],{"class":81},[36,1246,1247],{"class":85},"No title found",[36,1249,89],{"class":81},[36,1251,1252,1254,1256,1258,1260,1262,1264,1267],{"class":38,"line":228},[36,1253,194],{"class":193},[36,1255,111],{"class":103},[36,1257,180],{"class":81},[36,1259,201],{"class":85},[36,1261,180],{"class":81},[36,1263,117],{"class":103},[36,1265,1266],{"class":107}," title",[36,1268,130],{"class":103},[36,1270,1271],{"class":38,"line":259},[36,1272,68],{"emptyLinePlaceholder":67},[36,1274,1276,1279,1281],{"class":38,"line":1275},14,[36,1277,1278],{"class":46},"links ",[36,1280,78],{"class":77},[36,1282,1283],{"class":103}," []\n",[36,1285,1287],{"class":38,"line":1286},15,[36,1288,68],{"emptyLinePlaceholder":67},[36,1290,1292,1294,1296,1298,1300,1302,1304,1306,1308,1310,1312],{"class":38,"line":1291},16,[36,1293,231],{"class":42},[36,1295,234],{"class":46},[36,1297,237],{"class":42},[36,1299,208],{"class":46},[36,1301,104],{"class":103},[36,1303,244],{"class":107},[36,1305,111],{"class":103},[36,1307,180],{"class":81},[36,1309,251],{"class":85},[36,1311,180],{"class":81},[36,1313,256],{"class":103},[36,1315,1317,1320,1322,1325,1327,1329,1331,1333,1335,1337],{"class":38,"line":1316},17,[36,1318,1319],{"class":46},"    href ",[36,1321,78],{"class":77},[36,1323,1324],{"class":46}," link",[36,1326,104],{"class":103},[36,1328,108],{"class":107},[36,1330,111],{"class":103},[36,1332,180],{"class":81},[36,1334,278],{"class":85},[36,1336,180],{"class":81},[36,1338,130],{"class":103},[36,1340,1342,1345,1347,1349,1351,1354,1356,1359,1361,1365],{"class":38,"line":1341},18,[36,1343,1344],{"class":46},"    text ",[36,1346,78],{"class":77},[36,1348,1324],{"class":46},[36,1350,104],{"class":103},[36,1352,1353],{"class":107},"get_text",[36,1355,111],{"class":103},[36,1357,1358],{"class":120},"strip",[36,1360,78],{"class":77},[36,1362,1364],{"class":1363},"s39Yj","True",[36,1366,130],{"class":103},[36,1368,1370],{"class":38,"line":1369},19,[36,1371,68],{"emptyLinePlaceholder":67},[36,1373,1375,1378,1381,1384,1387],{"class":38,"line":1374},20,[36,1376,1377],{"class":42},"    if",[36,1379,1380],{"class":46}," href ",[36,1382,1383],{"class":77},"is",[36,1385,1386],{"class":1363}," None",[36,1388,1389],{"class":103},":\n",[36,1391,1393],{"class":38,"line":1392},21,[36,1394,1395],{"class":42},"        continue\n",[36,1397,1399],{"class":38,"line":1398},22,[36,1400,68],{"emptyLinePlaceholder":67},[36,1402,1404,1407,1409,1412,1414,1416,1418,1421],{"class":38,"line":1403},23,[36,1405,1406],{"class":46},"    full_url ",[36,1408,78],{"class":77},[36,1410,1411],{"class":107}," urljoin",[36,1413,111],{"class":103},[36,1415,114],{"class":107},[36,1417,117],{"class":103},[36,1419,1420],{"class":107}," href",[36,1422,130],{"class":103},[36,1424,1426,1429,1431,1434,1437,1439,1441,1444],{"class":38,"line":1425},24,[36,1427,1428],{"class":46},"    links",[36,1430,104],{"class":103},[36,1432,1433],{"class":107},"append",[36,1435,1436],{"class":103},"((",[36,1438,170],{"class":107},[36,1440,117],{"class":103},[36,1442,1443],{"class":107}," full_url",[36,1445,283],{"class":103},[36,1447,1449],{"class":38,"line":1448},25,[36,1450,68],{"emptyLinePlaceholder":67},[36,1452,1454,1456,1459,1461,1464,1466,1469],{"class":38,"line":1453},26,[36,1455,231],{"class":42},[36,1457,1458],{"class":46}," text",[36,1460,117],{"class":103},[36,1462,1463],{"class":46}," full_url ",[36,1465,237],{"class":42},[36,1467,1468],{"class":46}," links",[36,1470,1389],{"class":103},[36,1472,1474,1476,1478,1482,1485,1488,1491,1494,1497,1500,1503,1506,1508],{"class":38,"line":1473},27,[36,1475,262],{"class":193},[36,1477,111],{"class":103},[36,1479,1481],{"class":1480},"sbsja","f",[36,1483,1484],{"class":85},"\"Link text: ",[36,1486,1487],{"class":126},"{",[36,1489,1490],{"class":107},"text ",[36,1492,1493],{"class":77},"or",[36,1495,1496],{"class":81}," '",[36,1498,1499],{"class":85},"[no text]",[36,1501,1502],{"class":81},"'",[36,1504,1505],{"class":126},"}",[36,1507,180],{"class":85},[36,1509,130],{"class":103},[36,1511,1513,1515,1517,1519,1522,1524,1527,1529,1531],{"class":38,"line":1512},28,[36,1514,262],{"class":193},[36,1516,111],{"class":103},[36,1518,1481],{"class":1480},[36,1520,1521],{"class":85},"\"URL: ",[36,1523,1487],{"class":126},[36,1525,1526],{"class":107},"full_url",[36,1528,1505],{"class":126},[36,1530,180],{"class":85},[36,1532,130],{"class":103},[36,1534,1536,1538,1540,1542,1545,1547,1550,1553],{"class":38,"line":1535},29,[36,1537,262],{"class":193},[36,1539,111],{"class":103},[36,1541,180],{"class":81},[36,1543,1544],{"class":85},"-",[36,1546,180],{"class":81},[36,1548,1549],{"class":77}," *",[36,1551,1552],{"class":126}," 20",[36,1554,130],{"class":103},[14,1556,1557],{},"This improved version:",[307,1559,1560,1568,1574,1580,1583],{},[310,1561,1562,1563,1565,1566],{},"Skips links where ",[33,1564,278],{}," is ",[33,1567,1032],{},[310,1569,1570,1571],{},"Converts relative links into full URLs with ",[33,1572,1573],{},"urljoin",[310,1575,1576,1577],{},"Extracts link text with ",[33,1578,1579],{},"get_text()",[310,1581,1582],{},"Stores results in a list",[310,1584,1585],{},"Prints cleaner output",[21,1587,1589],{"id":1588},"common-problems-when-scraping","Common problems when scraping",[14,1591,1592],{},"Web scraping often works well on simple pages, but there are common problems.",[653,1594,1596],{"id":1595},"the-site-blocks-requests-from-scripts","The site blocks requests from scripts",[14,1598,1599,1600,104],{},"Some websites do not allow automated scraping. They may return errors like ",[33,1601,775],{},[653,1603,1605],{"id":1604},"the-page-needs-javascript-before-content-appears","The page needs JavaScript before content appears",[14,1607,1608],{},"BeautifulSoup only parses the HTML you give it. It does not run JavaScript.",[14,1610,1611],{},"If the site loads data after the page opens in the browser, your scraper may see very little content.",[653,1613,1615],{"id":1614},"the-html-structure-is-different-than-expected","The HTML structure is different than expected",[14,1617,1618],{},"You may expect a title, a link, or a certain tag, but the page may not contain it.",[14,1620,1621,1622,104],{},"This can lead to errors such as ",[251,1623,1625],{"href":1624},"\u002Ferrors\u002Fattributeerror-object-has-no-attribute-fix\u002F","AttributeError: object has no attribute",[653,1627,1629],{"id":1628},"the-request-fails-because-of-a-bad-url-or-timeout","The request fails because of a bad URL or timeout",[14,1631,1632],{},"A typo in the URL or a slow website can cause the request to fail.",[653,1634,1636],{"id":1635},"the-scraper-breaks-when-the-site-layout-changes","The scraper breaks when the site layout changes",[14,1638,1639],{},"If the website changes its HTML, your scraper may stop finding the tags you need.",[21,1641,1643],{"id":1642},"important-beginner-note-about-legality-and-ethics","Important beginner note about legality and ethics",[14,1645,1646],{},"Before scraping any website, be careful.",[307,1648,1649,1652,1658,1661],{},[310,1650,1651],{},"Do not scrape sites that forbid it",[310,1653,1654,1655],{},"Check the site's terms and ",[33,1656,1657],{},"robots.txt",[310,1659,1660],{},"Do not send too many requests too quickly",[310,1662,1663],{},"Only scrape data you are allowed to access",[14,1665,1666],{},"A small practice scraper is fine for learning, but real websites may have rules you need to follow.",[21,1668,1670],{"id":1669},"when-this-example-is-enough-and-when-it-is-not","When this example is enough and when it is not",[14,1672,1673],{},"This example is a good starting point when:",[307,1675,1676,1679,1682],{},[310,1677,1678],{},"You are scraping a simple static HTML page",[310,1680,1681],{},"You want to learn how tags, attributes, and parsing work",[310,1683,1684],{},"You want a small script that gets titles and links",[14,1686,1687],{},"This example is not enough when:",[307,1689,1690,1693,1696,1699],{},[310,1691,1692],{},"The site requires login",[310,1694,1695],{},"The site depends heavily on JavaScript",[310,1697,1698],{},"You need a large production scraper",[310,1700,1701],{},"You need advanced retry logic, rate limiting, or data storage",[21,1703,1705],{"id":1704},"common-mistakes","Common mistakes",[14,1707,1708],{},"Here are some problems beginners often hit with this kind of script.",[653,1710,1712],{"id":1711},"modulenotfounderror",[33,1713,1714],{},"ModuleNotFoundError",[14,1716,1717,1718,1720,1721,1724],{},"You may see an error because ",[33,1719,293],{}," or ",[33,1722,1723],{},"bs4"," is not installed.",[14,1726,1727],{},"Try:",[26,1729,1730],{"className":371,"code":372,"language":373,"meta":31,"style":31},[33,1731,1732],{"__ignoreMap":31},[36,1733,1734,1736,1738,1740],{"class":38,"line":39},[36,1735,381],{"class":380},[36,1737,384],{"class":85},[36,1739,100],{"class":85},[36,1741,389],{"class":85},[14,1743,1744],{},"Or:",[26,1746,1747],{"className":371,"code":395,"language":373,"meta":31,"style":31},[33,1748,1749],{"__ignoreMap":31},[36,1750,1751,1753,1755,1757,1759,1761],{"class":38,"line":39},[36,1752,30],{"class":380},[36,1754,405],{"class":404},[36,1756,408],{"class":85},[36,1758,384],{"class":85},[36,1760,100],{"class":85},[36,1762,389],{"class":85},[14,1764,1765,1766,104],{},"If needed, see ",[251,1767,1769],{"href":1768},"\u002Ferrors\u002Fmodulenotfounderror-no-module-named-x-fix\u002F","how to fix ModuleNotFoundError: No module named X",[653,1771,1773,1776],{"id":1772},"attributeerror-when-a-tag-does-not-exist",[33,1774,1775],{},"AttributeError"," when a tag does not exist",[14,1778,1779],{},"This can happen if you write code like:",[26,1781,1783],{"className":28,"code":1782,"language":30,"meta":31,"style":31},"print(soup.title.string)\n",[33,1784,1785],{"__ignoreMap":31},[36,1786,1787,1789,1791,1793,1795,1797,1799,1801],{"class":38,"line":39},[36,1788,194],{"class":193},[36,1790,111],{"class":103},[36,1792,838],{"class":107},[36,1794,104],{"class":103},[36,1796,213],{"class":169},[36,1798,104],{"class":103},[36,1800,218],{"class":169},[36,1802,130],{"class":103},[14,1804,1805,1806,1808],{},"but the page has no ",[33,1807,880],{}," tag.",[14,1810,1811],{},"A safer version is:",[26,1813,1815],{"className":28,"code":1814,"language":30,"meta":31,"style":31},"title = soup.title.string if soup.title else \"No title found\"\nprint(title)\n",[33,1816,1817,1849],{"__ignoreMap":31},[36,1818,1819,1821,1823,1825,1827,1829,1831,1833,1835,1837,1839,1841,1843,1845,1847],{"class":38,"line":39},[36,1820,1218],{"class":46},[36,1822,78],{"class":77},[36,1824,208],{"class":46},[36,1826,104],{"class":103},[36,1828,213],{"class":169},[36,1830,104],{"class":103},[36,1832,218],{"class":169},[36,1834,1233],{"class":42},[36,1836,208],{"class":46},[36,1838,104],{"class":103},[36,1840,213],{"class":169},[36,1842,1242],{"class":42},[36,1844,82],{"class":81},[36,1846,1247],{"class":85},[36,1848,89],{"class":81},[36,1850,1851,1853,1855,1857],{"class":38,"line":50},[36,1852,194],{"class":193},[36,1854,111],{"class":103},[36,1856,213],{"class":107},[36,1858,130],{"class":103},[653,1860,1862,1865,1866,1868],{"id":1861},"typeerror-or-bad-output-from-missing-href-values",[33,1863,1864],{},"TypeError"," or bad output from missing ",[33,1867,278],{}," values",[14,1870,1077,1871,1873,1874,104],{},[33,1872,984],{}," tags do not have ",[33,1875,278],{},[14,1877,1878,1879,1881,1882,104],{},"This is why ",[33,1880,997],{}," is safer than ",[33,1883,1041],{},[653,1885,1887,1888,1720,1891],{"id":1886},"http-errors-like-403-or-404","HTTP errors like ",[33,1889,1890],{},"403",[33,1892,1893],{},"404",[14,1895,1896],{},"These happen when:",[307,1898,1899,1902,1905],{},[310,1900,1901],{},"The page does not exist",[310,1903,1904],{},"The site blocks the request",[310,1906,1907],{},"The URL is wrong",[14,1909,994,1910,1913],{},[33,1911,1912],{},"response.raise_for_status()"," helps catch this early.",[653,1915,1917],{"id":1916},"empty-results-because-content-is-loaded-with-javascript","Empty results because content is loaded with JavaScript",[14,1919,1920],{},"If your browser shows content but your script does not, the page may be using JavaScript to load data after the initial HTML response.",[14,1922,1923],{},"In that case, BeautifulSoup alone may not be enough.",[21,1925,1927],{"id":1926},"faq","FAQ",[653,1929,1931],{"id":1930},"what-is-beautifulsoup-used-for","What is BeautifulSoup used for?",[14,1933,1934],{},"It parses HTML or XML so you can find tags, attributes, and text more easily in Python.",[653,1936,1938],{"id":1937},"do-i-need-requests-to-use-beautifulsoup","Do I need requests to use BeautifulSoup?",[14,1940,1941,1942,1944],{},"Not always, but beginners often use ",[33,1943,293],{}," to download the page and BeautifulSoup to parse it.",[653,1946,1948],{"id":1947},"why-does-my-scraper-return-nothing","Why does my scraper return nothing?",[14,1950,1951],{},"The page may use JavaScript, the selectors may be wrong, or the request may have failed.",[653,1953,1955,1956,1958],{"id":1954},"why-do-some-links-print-none","Why do some links print ",[33,1957,1032],{},"?",[14,1960,1961,1962,1964,1965,1029,1967,104],{},"Some anchor tags do not have an ",[33,1963,278],{}," attribute, so ",[33,1966,997],{},[33,1968,1032],{},[653,1970,1972],{"id":1971},"can-beautifulsoup-scrape-javascript-rendered-websites","Can BeautifulSoup scrape JavaScript-rendered websites?",[14,1974,1975],{},"It can parse the HTML you give it, but it does not run JavaScript by itself.",[21,1977,1979],{"id":1978},"see-also","See also",[307,1981,1982,1987,1992,1997,2002],{},[310,1983,1984],{},[251,1985,1986],{"href":360},"How to install a Python package with pip",[310,1988,1989],{},[251,1990,1991],{"href":736},"How to make an API request in Python",[310,1993,1994],{},[251,1995,1996],{"href":1768},"ModuleNotFoundError: No module named X fix",[310,1998,1999],{},[251,2000,2001],{"href":1624},"AttributeError: object has no attribute fix",[310,2003,2004],{},[251,2005,2007],{"href":2006},"\u002Fexamples\u002Fpython-simple-web-scraper-for-titles-example\u002F","Python simple web scraper for titles example",[14,2009,2010,2011,104],{},"Try this scraper on a simple practice page first. After that, move to a smaller focused example, such as scraping only page titles or saving the results to a file with the ",[251,2012,2014],{"href":2013},"\u002Freference\u002Fpython-open-function-explained\u002F","Python open() function",[2016,2017,2018],"style",{},"html pre.shiki code .sVHd0, html code.shiki .sVHd0{--shiki-light:#39ADB5;--shiki-light-font-style:italic;--shiki-default:#D73A49;--shiki-default-font-style:inherit;--shiki-dark:#F97583;--shiki-dark-font-style:inherit}html pre.shiki code .su5hD, html code.shiki .su5hD{--shiki-light:#90A4AE;--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .smGrS, html code.shiki .smGrS{--shiki-light:#39ADB5;--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .sjJ54, html code.shiki .sjJ54{--shiki-light:#39ADB5;--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .s_sjI, html code.shiki .s_sjI{--shiki-light:#91B859;--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .sP7_E, html code.shiki .sP7_E{--shiki-light:#39ADB5;--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .slqww, html code.shiki .slqww{--shiki-light:#6182B8;--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .s99_P, html code.shiki .s99_P{--shiki-light:#90A4AE;--shiki-light-font-style:italic;--shiki-default:#E36209;--shiki-default-font-style:inherit;--shiki-dark:#FFAB70;--shiki-dark-font-style:inherit}html pre.shiki code .srdBf, html code.shiki .srdBf{--shiki-light:#F76D47;--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .skxfh, html code.shiki .skxfh{--shiki-light:#E53935;--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .sptTA, html code.shiki .sptTA{--shiki-light:#6182B8;--shiki-default:#005CC5;--shiki-dark:#79B8FF}html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .sbgvK, html code.shiki .sbgvK{--shiki-light:#E2931D;--shiki-default:#6F42C1;--shiki-dark:#B392F0}html pre.shiki code .stzsN, html code.shiki .stzsN{--shiki-light:#91B859;--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .sQzsp, html code.shiki .sQzsp{--shiki-light:#E53935;--shiki-default:#22863A;--shiki-dark:#85E89D}html pre.shiki code .s39Yj, html code.shiki .s39Yj{--shiki-light:#39ADB5;--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .sbsja, html code.shiki .sbsja{--shiki-light:#9C3EDA;--shiki-default:#D73A49;--shiki-dark:#F97583}",{"title":31,"searchDepth":50,"depth":50,"links":2020},[2021,2022,2023,2024,2025,2034,2035,2036,2043,2044,2045,2055,2063],{"id":23,"depth":50,"text":24},{"id":301,"depth":50,"text":302},{"id":327,"depth":50,"text":328},{"id":364,"depth":50,"text":365},{"id":465,"depth":50,"text":466,"children":2026},[2027,2028,2030,2031,2032,2033],{"id":655,"depth":64,"text":656},{"id":679,"depth":64,"text":2029},"2. Fetch the page with requests.get()",{"id":741,"depth":64,"text":742},{"id":786,"depth":64,"text":787},{"id":842,"depth":64,"text":843},{"id":926,"depth":64,"text":927},{"id":1047,"depth":50,"text":1048},{"id":1085,"depth":50,"text":1086},{"id":1588,"depth":50,"text":1589,"children":2037},[2038,2039,2040,2041,2042],{"id":1595,"depth":64,"text":1596},{"id":1604,"depth":64,"text":1605},{"id":1614,"depth":64,"text":1615},{"id":1628,"depth":64,"text":1629},{"id":1635,"depth":64,"text":1636},{"id":1642,"depth":50,"text":1643},{"id":1669,"depth":50,"text":1670},{"id":1704,"depth":50,"text":1705,"children":2046},[2047,2048,2050,2052,2054],{"id":1711,"depth":64,"text":1714},{"id":1772,"depth":64,"text":2049},"AttributeError when a tag does not exist",{"id":1861,"depth":64,"text":2051},"TypeError or bad output from missing href values",{"id":1886,"depth":64,"text":2053},"HTTP errors like 403 or 404",{"id":1916,"depth":64,"text":1917},{"id":1926,"depth":50,"text":1927,"children":2056},[2057,2058,2059,2060,2062],{"id":1930,"depth":64,"text":1931},{"id":1937,"depth":64,"text":1938},{"id":1947,"depth":64,"text":1948},{"id":1954,"depth":64,"text":2061},"Why do some links print None?",{"id":1971,"depth":64,"text":1972},{"id":1978,"depth":50,"text":1979},"Master python web scraping example beautifulsoup in our comprehensive Python beginner guide.","md",{},"\u002Fexamples\u002Fpython-web-scraping-example-beautifulsoup",{"title":5,"description":2064},"examples\u002Fpython-web-scraping-example-beautifulsoup","ITX90HC-qBkMeHM5Ohdc0nms2H2nKV0kGANK6yzRlbA",1777585509714]