[{"data":1,"prerenderedAt":1292},["ShallowReactive",2],{"post-2026-02-20-beautifulsoup-guide":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"tags":11,"body":15,"_type":1286,"_id":1287,"_source":1288,"_file":1289,"_stem":1290,"_extension":1291},"\u002Fblog\u002F2026-02-20-beautifulsoup-guide","blog",false,"","BeautifulSoup 使用说明","Python BeautifulSoup 库使用教程，HTML\u002FXML解析、节点导航、搜索与CSS选择器实战。","2026-02-20",[12,13,14],"Python","爬虫","HTML",{"type":16,"children":17,"toc":1280},"root",[18,27,33,53,73,78,83,117,164,169,174,214,220,226,242,248,271,290,321,331,336,346,369,374,424,448,453,465,488,492,538,543,556,570,574,604,609,622,666,691,696,701,709,730,760,768,781,811,819,824,840,870,901,909,946,984,989,1002,1007,1021,1030,1035,1049,1071,1111,1120,1125,1148,1157,1162,1185,1191,1203,1216,1263,1274],{"type":19,"tag":20,"props":21,"children":23},"element","h3",{"id":22},"beautifulsoup介绍",[24],{"type":25,"value":26},"text","BeautifulSoup介绍",{"type":19,"tag":28,"props":29,"children":30},"p",{},[31],{"type":25,"value":32},"BeautifulSoup是一个用于从HTML和XML文件中提取数据的Python库。它将自动将输入的文档转换为Unicode编码，输出文档转换为UTF-8编码。其主要功能有：",{"type":19,"tag":34,"props":35,"children":36},"ul",{},[37,43,48],{"type":19,"tag":38,"props":39,"children":40},"li",{},[41],{"type":25,"value":42},"导航",{"type":19,"tag":38,"props":44,"children":45},{},[46],{"type":25,"value":47},"搜索",{"type":19,"tag":38,"props":49,"children":50},{},[51],{"type":25,"value":52},"修改分析树",{"type":19,"tag":54,"props":55,"children":56},"blockquote",{},[57,68],{"type":19,"tag":28,"props":58,"children":59},{},[60,66],{"type":19,"tag":61,"props":62,"children":63},"strong",{},[64],{"type":25,"value":65},"Warning:",{"type":25,"value":67}," 注意",{"type":19,"tag":28,"props":69,"children":70},{},[71],{"type":25,"value":72},"一般情况下不需要考虑编码方式，除非文档没有指定一个编码方式，因此遇到BeautifulSoup不能自动识别编码方式的情况，只需要说明一下原始编码方式即可。",{"type":19,"tag":20,"props":74,"children":76},{"id":75},"环境准备",[77],{"type":25,"value":75},{"type":19,"tag":28,"props":79,"children":80},{},[81],{"type":25,"value":82},"Windows使用前需要安装库，终端指令如下：",{"type":19,"tag":84,"props":85,"children":89},"pre",{"className":86,"code":87,"language":88,"meta":7,"style":7},"language-shell shiki shiki-themes github-dark","pip install beautifulsoup4\n","shell",[90],{"type":19,"tag":91,"props":92,"children":93},"code",{"__ignoreMap":7},[94],{"type":19,"tag":95,"props":96,"children":99},"span",{"class":97,"line":98},"line",1,[100,106,112],{"type":19,"tag":95,"props":101,"children":103},{"style":102},"--shiki-default:#B392F0",[104],{"type":25,"value":105},"pip",{"type":19,"tag":95,"props":107,"children":109},{"style":108},"--shiki-default:#9ECBFF",[110],{"type":25,"value":111}," install",{"type":19,"tag":95,"props":113,"children":114},{"style":108},[115],{"type":25,"value":116}," beautifulsoup4\n",{"type":19,"tag":54,"props":118,"children":119},{},[120,130],{"type":19,"tag":28,"props":121,"children":122},{},[123,128],{"type":19,"tag":61,"props":124,"children":125},{},[126],{"type":25,"value":127},"Info:",{"type":25,"value":129}," 源码方式安装",{"type":19,"tag":131,"props":132,"children":133},"ol",{},[134,148,153],{"type":19,"tag":38,"props":135,"children":136},{},[137,139],{"type":25,"value":138},"打开源码地址进行下载：",{"type":19,"tag":140,"props":141,"children":145},"a",{"href":142,"rel":143},"https:\u002F\u002Fwww.crummy.com\u002Fsoftware\u002FBeautifulSoup\u002Fbs4\u002Fdownload\u002F",[144],"nofollow",[146],{"type":25,"value":147},"点击访问源码地址",{"type":19,"tag":38,"props":149,"children":150},{},[151],{"type":25,"value":152},"使用控制台打开进入源码所在路径",{"type":19,"tag":38,"props":154,"children":155},{},[156,158],{"type":25,"value":157},"执行命令：",{"type":19,"tag":91,"props":159,"children":161},{"className":160},[],[162],{"type":25,"value":163},"python setup.py install",{"type":19,"tag":20,"props":165,"children":167},{"id":166},"适配解释器",[168],{"type":25,"value":166},{"type":19,"tag":28,"props":170,"children":171},{},[172],{"type":25,"value":173},"BeautifulSoup支持python标准库中包含的HTML解析器，但常用的有lxml解析器和html5lib解析器，可以通过下列命令进行安装解析器(安装其一即可）：",{"type":19,"tag":84,"props":175,"children":177},{"className":86,"code":176,"language":88,"meta":7,"style":7},"pip install lxml\npip install html5lib\n",[178],{"type":19,"tag":91,"props":179,"children":180},{"__ignoreMap":7},[181,197],{"type":19,"tag":95,"props":182,"children":183},{"class":97,"line":98},[184,188,192],{"type":19,"tag":95,"props":185,"children":186},{"style":102},[187],{"type":25,"value":105},{"type":19,"tag":95,"props":189,"children":190},{"style":108},[191],{"type":25,"value":111},{"type":19,"tag":95,"props":193,"children":194},{"style":108},[195],{"type":25,"value":196}," lxml\n",{"type":19,"tag":95,"props":198,"children":200},{"class":97,"line":199},2,[201,205,209],{"type":19,"tag":95,"props":202,"children":203},{"style":102},[204],{"type":25,"value":105},{"type":19,"tag":95,"props":206,"children":207},{"style":108},[208],{"type":25,"value":111},{"type":19,"tag":95,"props":210,"children":211},{"style":108},[212],{"type":25,"value":213}," html5lib\n",{"type":19,"tag":20,"props":215,"children":217},{"id":216},"beautifulsoup的使用",[218],{"type":25,"value":219},"BeautifulSoup的使用",{"type":19,"tag":221,"props":222,"children":224},"h4",{"id":223},"导包",[225],{"type":25,"value":223},{"type":19,"tag":84,"props":227,"children":231},{"className":228,"code":229,"language":230,"meta":7,"style":7},"language-python shiki shiki-themes github-dark","from bs4 import BeautifulSoup\n","python",[232],{"type":19,"tag":91,"props":233,"children":234},{"__ignoreMap":7},[235],{"type":19,"tag":95,"props":236,"children":237},{"class":97,"line":98},[238],{"type":19,"tag":95,"props":239,"children":240},{},[241],{"type":25,"value":229},{"type":19,"tag":221,"props":243,"children":245},{"id":244},"创建beautifulsoup对象",[246],{"type":25,"value":247},"创建BeautifulSoup对象",{"type":19,"tag":84,"props":249,"children":251},{"className":228,"code":250,"language":230,"meta":7,"style":7},"# 创建BeautifulSoup对象并指定解析器为lxml\nsoup = BeautifulSoup(html_doc,features='lxml')\n",[252],{"type":19,"tag":91,"props":253,"children":254},{"__ignoreMap":7},[255,263],{"type":19,"tag":95,"props":256,"children":257},{"class":97,"line":98},[258],{"type":19,"tag":95,"props":259,"children":260},{},[261],{"type":25,"value":262},"# 创建BeautifulSoup对象并指定解析器为lxml\n",{"type":19,"tag":95,"props":264,"children":265},{"class":97,"line":199},[266],{"type":19,"tag":95,"props":267,"children":268},{},[269],{"type":25,"value":270},"soup = BeautifulSoup(html_doc,features='lxml')\n",{"type":19,"tag":28,"props":272,"children":273},{},[274,280,282,288],{"type":19,"tag":91,"props":275,"children":277},{"className":276},[],[278],{"type":25,"value":279},"html_doc",{"type":25,"value":281},"：为HTML的字符串\n",{"type":19,"tag":91,"props":283,"children":285},{"className":284},[],[286],{"type":25,"value":287},"features",{"type":25,"value":289},"：用于指定解析器",{"type":19,"tag":54,"props":291,"children":292},{},[293,301],{"type":19,"tag":28,"props":294,"children":295},{},[296],{"type":19,"tag":61,"props":297,"children":298},{},[299],{"type":25,"value":300},"Tips:",{"type":19,"tag":28,"props":302,"children":303},{},[304,306,311,313,319],{"type":25,"value":305},"若HTML为'index.html'类型的文件，则可以将原来的",{"type":19,"tag":91,"props":307,"children":309},{"className":308},[],[310],{"type":25,"value":279},{"type":25,"value":312},"替换为",{"type":19,"tag":91,"props":314,"children":316},{"className":315},[],[317],{"type":25,"value":318},"open('index.html')",{"type":25,"value":320},"即可。",{"type":19,"tag":28,"props":322,"children":323},{},[324,329],{"type":19,"tag":61,"props":325,"children":326},{},[327],{"type":25,"value":328},"BeautifulSoup对象的prettify()方法",{"type":25,"value":330},"可以对输出的内容自动进行格式化排版，按HTML层级添加缩进和换行，结构清晰。",{"type":19,"tag":221,"props":332,"children":334},{"id":333},"获取节点内容",[335],{"type":25,"value":333},{"type":19,"tag":28,"props":337,"children":338},{},[339,344],{"type":19,"tag":61,"props":340,"children":341},{},[342],{"type":25,"value":343},"获取节点对应的代码",{"type":25,"value":345},"语法格式如下：",{"type":19,"tag":84,"props":347,"children":349},{"className":228,"code":348,"language":230,"meta":7,"style":7},"soup.标签         # 获取节点对应的代码\nsoup.标签.name    # 获取节点对应的名称\n",[350],{"type":19,"tag":91,"props":351,"children":352},{"__ignoreMap":7},[353,361],{"type":19,"tag":95,"props":354,"children":355},{"class":97,"line":98},[356],{"type":19,"tag":95,"props":357,"children":358},{},[359],{"type":25,"value":360},"soup.标签         # 获取节点对应的代码\n",{"type":19,"tag":95,"props":362,"children":363},{"class":97,"line":199},[364],{"type":19,"tag":95,"props":365,"children":366},{},[367],{"type":25,"value":368},"soup.标签.name    # 获取节点对应的名称\n",{"type":19,"tag":28,"props":370,"children":371},{},[372],{"type":25,"value":373},"示例：",{"type":19,"tag":84,"props":375,"children":377},{"className":228,"code":376,"language":230,"meta":7,"style":7},"soup = BeautifulSoup(html_doc,features=\"lxml\")\nprint(soup.head)      # 打印head节点\nprint(soup.body)      # 打印body节点\nprint(soup.title)     # 打印title节点\nprint(soup.p.name)    # 打印p节点的名称\n",[378],{"type":19,"tag":91,"props":379,"children":380},{"__ignoreMap":7},[381,389,397,406,415],{"type":19,"tag":95,"props":382,"children":383},{"class":97,"line":98},[384],{"type":19,"tag":95,"props":385,"children":386},{},[387],{"type":25,"value":388},"soup = BeautifulSoup(html_doc,features=\"lxml\")\n",{"type":19,"tag":95,"props":390,"children":391},{"class":97,"line":199},[392],{"type":19,"tag":95,"props":393,"children":394},{},[395],{"type":25,"value":396},"print(soup.head)      # 打印head节点\n",{"type":19,"tag":95,"props":398,"children":400},{"class":97,"line":399},3,[401],{"type":19,"tag":95,"props":402,"children":403},{},[404],{"type":25,"value":405},"print(soup.body)      # 打印body节点\n",{"type":19,"tag":95,"props":407,"children":409},{"class":97,"line":408},4,[410],{"type":19,"tag":95,"props":411,"children":412},{},[413],{"type":25,"value":414},"print(soup.title)     # 打印title节点\n",{"type":19,"tag":95,"props":416,"children":418},{"class":97,"line":417},5,[419],{"type":19,"tag":95,"props":420,"children":421},{},[422],{"type":25,"value":423},"print(soup.p.name)    # 打印p节点的名称\n",{"type":19,"tag":54,"props":425,"children":426},{},[427,435],{"type":19,"tag":28,"props":428,"children":429},{},[430,434],{"type":19,"tag":61,"props":431,"children":432},{},[433],{"type":25,"value":65},{"type":25,"value":67},{"type":19,"tag":28,"props":436,"children":437},{},[438,440,446],{"type":25,"value":439},"该获取方法仅打印第一个检测到的",{"type":19,"tag":91,"props":441,"children":443},{"className":442},[],[444],{"type":25,"value":445},"\u003Cp>",{"type":25,"value":447},"标签而忽略其他。",{"type":19,"tag":221,"props":449,"children":451},{"id":450},"获取节点属性",[452],{"type":25,"value":450},{"type":19,"tag":28,"props":454,"children":455},{},[456,458,463],{"type":25,"value":457},"若已选择一个指定的节点名称，那么只需调用attrs即可获取这个节点下的所有属性，返回值为",{"type":19,"tag":61,"props":459,"children":460},{},[461],{"type":25,"value":462},"字典类型",{"type":25,"value":464},"。语法格式如下：",{"type":19,"tag":84,"props":466,"children":468},{"className":228,"code":467,"language":230,"meta":7,"style":7},"soup.标签.attrs           # 获取指定节点的属性\nsoup.标签.attrs[属性名]    # 获取指定节点的指定属性名的值(attrs可省略)\n",[469],{"type":19,"tag":91,"props":470,"children":471},{"__ignoreMap":7},[472,480],{"type":19,"tag":95,"props":473,"children":474},{"class":97,"line":98},[475],{"type":19,"tag":95,"props":476,"children":477},{},[478],{"type":25,"value":479},"soup.标签.attrs           # 获取指定节点的属性\n",{"type":19,"tag":95,"props":481,"children":482},{"class":97,"line":199},[483],{"type":19,"tag":95,"props":484,"children":485},{},[486],{"type":25,"value":487},"soup.标签.attrs[属性名]    # 获取指定节点的指定属性名的值(attrs可省略)\n",{"type":19,"tag":28,"props":489,"children":490},{},[491],{"type":25,"value":373},{"type":19,"tag":84,"props":493,"children":495},{"className":228,"code":494,"language":230,"meta":7,"style":7},"soup = BeautifulSoup(html_doc,features=\"lxml\")\nprint(soup.meta.attrs)         # 打印meta的属性\nprint(soup.link.attrs)         # 打印link的属性\nprint(soup.div.attrs['id'])    # 打印div的id属性值\nprint(soup.div['id'])          # 打印div的id属性值\n",[496],{"type":19,"tag":91,"props":497,"children":498},{"__ignoreMap":7},[499,506,514,522,530],{"type":19,"tag":95,"props":500,"children":501},{"class":97,"line":98},[502],{"type":19,"tag":95,"props":503,"children":504},{},[505],{"type":25,"value":388},{"type":19,"tag":95,"props":507,"children":508},{"class":97,"line":199},[509],{"type":19,"tag":95,"props":510,"children":511},{},[512],{"type":25,"value":513},"print(soup.meta.attrs)         # 打印meta的属性\n",{"type":19,"tag":95,"props":515,"children":516},{"class":97,"line":399},[517],{"type":19,"tag":95,"props":518,"children":519},{},[520],{"type":25,"value":521},"print(soup.link.attrs)         # 打印link的属性\n",{"type":19,"tag":95,"props":523,"children":524},{"class":97,"line":408},[525],{"type":19,"tag":95,"props":526,"children":527},{},[528],{"type":25,"value":529},"print(soup.div.attrs['id'])    # 打印div的id属性值\n",{"type":19,"tag":95,"props":531,"children":532},{"class":97,"line":417},[533],{"type":19,"tag":95,"props":534,"children":535},{},[536],{"type":25,"value":537},"print(soup.div['id'])          # 打印div的id属性值\n",{"type":19,"tag":221,"props":539,"children":541},{"id":540},"获取节点包含的文本内容",[542],{"type":25,"value":540},{"type":19,"tag":28,"props":544,"children":545},{},[546,548,554],{"type":25,"value":547},"若要获取节点包含的文本内容，只需在节点名称后面添加",{"type":19,"tag":91,"props":549,"children":551},{"className":550},[],[552],{"type":25,"value":553},"string",{"type":25,"value":555},"属性即可。语法如下：",{"type":19,"tag":84,"props":557,"children":559},{"className":228,"code":558,"language":230,"meta":7,"style":7},"soup.标签.string     # 获取指定节点的文本内容\n",[560],{"type":19,"tag":91,"props":561,"children":562},{"__ignoreMap":7},[563],{"type":19,"tag":95,"props":564,"children":565},{"class":97,"line":98},[566],{"type":19,"tag":95,"props":567,"children":568},{},[569],{"type":25,"value":558},{"type":19,"tag":28,"props":571,"children":572},{},[573],{"type":25,"value":373},{"type":19,"tag":84,"props":575,"children":577},{"className":228,"code":576,"language":230,"meta":7,"style":7},"soup = BeautifulSoup(html_doc,features=\"lxml\")\nprint(soup.title.string)        # 打印title节点包含的文本内容\nprint(soup.link.string)         # 打印link节点包含的文本内容\n",[578],{"type":19,"tag":91,"props":579,"children":580},{"__ignoreMap":7},[581,588,596],{"type":19,"tag":95,"props":582,"children":583},{"class":97,"line":98},[584],{"type":19,"tag":95,"props":585,"children":586},{},[587],{"type":25,"value":388},{"type":19,"tag":95,"props":589,"children":590},{"class":97,"line":199},[591],{"type":19,"tag":95,"props":592,"children":593},{},[594],{"type":25,"value":595},"print(soup.title.string)        # 打印title节点包含的文本内容\n",{"type":19,"tag":95,"props":597,"children":598},{"class":97,"line":399},[599],{"type":19,"tag":95,"props":600,"children":601},{},[602],{"type":25,"value":603},"print(soup.link.string)         # 打印link节点包含的文本内容\n",{"type":19,"tag":221,"props":605,"children":607},{"id":606},"嵌套获取节点内容",[608],{"type":25,"value":606},{"type":19,"tag":28,"props":610,"children":611},{},[612,614,620],{"type":25,"value":613},"使用beautifulsoup获取每个节点的内容时，可以通过 ",{"type":19,"tag":91,"props":615,"children":617},{"className":616},[],[618],{"type":25,"value":619},".",{"type":25,"value":621}," 直接获取下一个节点的内容，代码如下：",{"type":19,"tag":84,"props":623,"children":625},{"className":228,"code":624,"language":230,"meta":7,"style":7},"soup = BeautifulSoup(html_doc,features=\"lxml\")\nprint(soup.head.title)         # 打印head节点中title节点内容\nprint(soup.head.title.string)  # 打印head节点中title节点的文本内容\nprint(soup.div.attrs['id'])    # 打印div的id属性值\nprint(soup.div['id'])          # 打印div的id属性值\n",[626],{"type":19,"tag":91,"props":627,"children":628},{"__ignoreMap":7},[629,636,644,652,659],{"type":19,"tag":95,"props":630,"children":631},{"class":97,"line":98},[632],{"type":19,"tag":95,"props":633,"children":634},{},[635],{"type":25,"value":388},{"type":19,"tag":95,"props":637,"children":638},{"class":97,"line":199},[639],{"type":19,"tag":95,"props":640,"children":641},{},[642],{"type":25,"value":643},"print(soup.head.title)         # 打印head节点中title节点内容\n",{"type":19,"tag":95,"props":645,"children":646},{"class":97,"line":399},[647],{"type":19,"tag":95,"props":648,"children":649},{},[650],{"type":25,"value":651},"print(soup.head.title.string)  # 打印head节点中title节点的文本内容\n",{"type":19,"tag":95,"props":653,"children":654},{"class":97,"line":408},[655],{"type":19,"tag":95,"props":656,"children":657},{},[658],{"type":25,"value":529},{"type":19,"tag":95,"props":660,"children":661},{"class":97,"line":417},[662],{"type":19,"tag":95,"props":663,"children":664},{},[665],{"type":25,"value":537},{"type":19,"tag":54,"props":667,"children":668},{},[669,678],{"type":19,"tag":28,"props":670,"children":671},{},[672,676],{"type":19,"tag":61,"props":673,"children":674},{},[675],{"type":25,"value":127},{"type":25,"value":677}," 说明",{"type":19,"tag":28,"props":679,"children":680},{},[681,683,689],{"type":25,"value":682},"获取head与其内部的title节点内容时数据类型均为 ",{"type":19,"tag":91,"props":684,"children":686},{"className":685},[],[687],{"type":25,"value":688},"\u003Cclass 'bs4.element.Tag'>",{"type":25,"value":690},"，说明在Tag类的基础上可以获取当前节点的子节点内容",{"type":19,"tag":221,"props":692,"children":694},{"id":693},"关联获取",[695],{"type":25,"value":693},{"type":19,"tag":28,"props":697,"children":698},{},[699],{"type":25,"value":700},"先确认某一节点，然后以该节点为中心获取对应的子节点、孙节点、父节点及兄弟节点。",{"type":19,"tag":28,"props":702,"children":703},{},[704],{"type":19,"tag":61,"props":705,"children":706},{},[707],{"type":25,"value":708},"获取子节点",{"type":19,"tag":28,"props":710,"children":711},{},[712,714,720,722,728],{"type":25,"value":713},"获取某节点下面的所有的子节点时，可以使用 ",{"type":19,"tag":91,"props":715,"children":717},{"className":716},[],[718],{"type":25,"value":719},"contents",{"type":25,"value":721}," 或 ",{"type":19,"tag":91,"props":723,"children":725},{"className":724},[],[726],{"type":25,"value":727},"children",{"type":25,"value":729}," 属性来实现，其中contents返回一个列表，该列表中每个元素都是一个子节点内容，而children所返回的则是一个list_iterator类型的可迭代对象，需要转换成list类型或遍历进行获取。语法如下：",{"type":19,"tag":84,"props":731,"children":733},{"className":228,"code":732,"language":230,"meta":7,"style":7},"soup = BeautifulSoup(html_doc,features=\"lxml\")\nprint(soup.head.contents)         # 列表形式打印head节点下所有子节点\nprint(soup.head.children)         # 可迭代对象形式打印head节点下所有子节点\n",[734],{"type":19,"tag":91,"props":735,"children":736},{"__ignoreMap":7},[737,744,752],{"type":19,"tag":95,"props":738,"children":739},{"class":97,"line":98},[740],{"type":19,"tag":95,"props":741,"children":742},{},[743],{"type":25,"value":388},{"type":19,"tag":95,"props":745,"children":746},{"class":97,"line":199},[747],{"type":19,"tag":95,"props":748,"children":749},{},[750],{"type":25,"value":751},"print(soup.head.contents)         # 列表形式打印head节点下所有子节点\n",{"type":19,"tag":95,"props":753,"children":754},{"class":97,"line":399},[755],{"type":19,"tag":95,"props":756,"children":757},{},[758],{"type":25,"value":759},"print(soup.head.children)         # 可迭代对象形式打印head节点下所有子节点\n",{"type":19,"tag":28,"props":761,"children":762},{},[763],{"type":19,"tag":61,"props":764,"children":765},{},[766],{"type":25,"value":767},"获得孙节点",{"type":19,"tag":28,"props":769,"children":770},{},[771,773,779],{"type":25,"value":772},"在获取某节点下所有的子孙节点时，可以使用 ",{"type":19,"tag":91,"props":774,"children":776},{"className":775},[],[777],{"type":25,"value":778},"descendants",{"type":25,"value":780}," 属性来实现，该属性会返回一个generator对象，其内容需要转换成list类型或遍历进行获取。语法如下：",{"type":19,"tag":84,"props":782,"children":784},{"className":228,"code":783,"language":230,"meta":7,"style":7},"soup = BeautifulSoup(html_doc,features=\"lxml\")\n# 打印body节点下所欲子孙节点内容的generator对象\nprint(soup.body.descendants)\n",[785],{"type":19,"tag":91,"props":786,"children":787},{"__ignoreMap":7},[788,795,803],{"type":19,"tag":95,"props":789,"children":790},{"class":97,"line":98},[791],{"type":19,"tag":95,"props":792,"children":793},{},[794],{"type":25,"value":388},{"type":19,"tag":95,"props":796,"children":797},{"class":97,"line":199},[798],{"type":19,"tag":95,"props":799,"children":800},{},[801],{"type":25,"value":802},"# 打印body节点下所欲子孙节点内容的generator对象\n",{"type":19,"tag":95,"props":804,"children":805},{"class":97,"line":399},[806],{"type":19,"tag":95,"props":807,"children":808},{},[809],{"type":25,"value":810},"print(soup.body.descendants)\n",{"type":19,"tag":28,"props":812,"children":813},{},[814],{"type":19,"tag":61,"props":815,"children":816},{},[817],{"type":25,"value":818},"获取父节点",{"type":19,"tag":28,"props":820,"children":821},{},[822],{"type":25,"value":823},"获取父节点存在两种方式：",{"type":19,"tag":131,"props":825,"children":826},{},[827],{"type":19,"tag":38,"props":828,"children":829},{},[830,832,838],{"type":25,"value":831},"通过 ",{"type":19,"tag":91,"props":833,"children":835},{"className":834},[],[836],{"type":25,"value":837},"parents",{"type":25,"value":839}," 属性直接获取指定节点的父节点内容，还可以返回父节点及以上节点（祖先节点）内容，其内容需要转换成list类型或遍历进行获取。语法如下：",{"type":19,"tag":84,"props":841,"children":843},{"className":228,"code":842,"language":230,"meta":7,"style":7},"soup = BeautifulSoup(html_doc,features='lxml')\nprint(soup.title.parent)  # 打印title节点的父节点内容\nprint(soup.title.parents) # 打印title节点的父节点及以上内容的generator对象\n",[844],{"type":19,"tag":91,"props":845,"children":846},{"__ignoreMap":7},[847,854,862],{"type":19,"tag":95,"props":848,"children":849},{"class":97,"line":98},[850],{"type":19,"tag":95,"props":851,"children":852},{},[853],{"type":25,"value":270},{"type":19,"tag":95,"props":855,"children":856},{"class":97,"line":199},[857],{"type":19,"tag":95,"props":858,"children":859},{},[860],{"type":25,"value":861},"print(soup.title.parent)  # 打印title节点的父节点内容\n",{"type":19,"tag":95,"props":863,"children":864},{"class":97,"line":399},[865],{"type":19,"tag":95,"props":866,"children":867},{},[868],{"type":25,"value":869},"print(soup.title.parents) # 打印title节点的父节点及以上内容的generator对象\n",{"type":19,"tag":54,"props":871,"children":872},{},[873,881],{"type":19,"tag":28,"props":874,"children":875},{},[876,880],{"type":19,"tag":61,"props":877,"children":878},{},[879],{"type":25,"value":127},{"type":25,"value":677},{"type":19,"tag":28,"props":882,"children":883},{},[884,886,891,893,899],{"type":25,"value":885},"parents属性所获取父节点的顺序为head、html、",{"type":19,"tag":95,"props":887,"children":888},{},[889],{"type":25,"value":890},"document",{"type":25,"value":892},"，此处的 ",{"type":19,"tag":91,"props":894,"children":896},{"className":895},[],[897],{"type":25,"value":898},"[document]",{"type":25,"value":900}," 表示文档对象，时整个HTML文档，也是BeautifulSoup对象。",{"type":19,"tag":28,"props":902,"children":903},{},[904],{"type":19,"tag":61,"props":905,"children":906},{},[907],{"type":25,"value":908},"获取兄弟节点",{"type":19,"tag":28,"props":910,"children":911},{},[912,914,920,922,928,930,936,938,944],{"type":25,"value":913},"假若在一段HTML中获取第一个p节点的下一个div兄弟节点时可以使用 ",{"type":19,"tag":91,"props":915,"children":917},{"className":916},[],[918],{"type":25,"value":919},"next_sibling",{"type":25,"value":921}," 属性，若要获取当前div节点的上一个兄弟节点p时，则可以使用 ",{"type":19,"tag":91,"props":923,"children":925},{"className":924},[],[926],{"type":25,"value":927},"previous_sibling",{"type":25,"value":929}," 属性。想获取当前节点后面的所有兄弟节点，则可以使用 ",{"type":19,"tag":91,"props":931,"children":933},{"className":932},[],[934],{"type":25,"value":935},"next_siblings",{"type":25,"value":937}," 属性，若要获取前面的，则使用 ",{"type":19,"tag":91,"props":939,"children":941},{"className":940},[],[942],{"type":25,"value":943},"previous_siblings",{"type":25,"value":945}," 属性。这两个属性都将以generator对象的形式返回，语法格式如下：",{"type":19,"tag":84,"props":947,"children":949},{"className":228,"code":948,"language":230,"meta":7,"style":7},"soup = BeautifulSoup(html_doc,features='lxml')\nprint(soup.p.next_sibling)  # 打印第一个p节点的下一个兄弟节点\n# 打印p节点前面的所有兄弟节点的generator对象\nprint(soup.p.previous_siblings)\n",[950],{"type":19,"tag":91,"props":951,"children":952},{"__ignoreMap":7},[953,960,968,976],{"type":19,"tag":95,"props":954,"children":955},{"class":97,"line":98},[956],{"type":19,"tag":95,"props":957,"children":958},{},[959],{"type":25,"value":270},{"type":19,"tag":95,"props":961,"children":962},{"class":97,"line":199},[963],{"type":19,"tag":95,"props":964,"children":965},{},[966],{"type":25,"value":967},"print(soup.p.next_sibling)  # 打印第一个p节点的下一个兄弟节点\n",{"type":19,"tag":95,"props":969,"children":970},{"class":97,"line":399},[971],{"type":19,"tag":95,"props":972,"children":973},{},[974],{"type":25,"value":975},"# 打印p节点前面的所有兄弟节点的generator对象\n",{"type":19,"tag":95,"props":977,"children":978},{"class":97,"line":408},[979],{"type":19,"tag":95,"props":980,"children":981},{},[982],{"type":25,"value":983},"print(soup.p.previous_siblings)\n",{"type":19,"tag":221,"props":985,"children":987},{"id":986},"方法获取内容",[988],{"type":25,"value":986},{"type":19,"tag":34,"props":990,"children":991},{},[992,997],{"type":19,"tag":38,"props":993,"children":994},{},[995],{"type":25,"value":996},"find_all()",{"type":19,"tag":38,"props":998,"children":999},{},[1000],{"type":25,"value":1001},"find()",{"type":19,"tag":28,"props":1003,"children":1004},{},[1005],{"type":25,"value":1006},"find_all()获取所有符合条件的内容，find()获取第一个匹配的节点内容，接下来以find_all()为例进行整理：",{"type":19,"tag":84,"props":1008,"children":1010},{"className":228,"code":1009,"language":230,"meta":7,"style":7},"find_all(name=None,attrs={},recursive=True,text=None,limit=None,**kwargs)\n",[1011],{"type":19,"tag":91,"props":1012,"children":1013},{"__ignoreMap":7},[1014],{"type":19,"tag":95,"props":1015,"children":1016},{"class":97,"line":98},[1017],{"type":19,"tag":95,"props":1018,"children":1019},{},[1020],{"type":25,"value":1009},{"type":19,"tag":28,"props":1022,"children":1023},{},[1024],{"type":19,"tag":91,"props":1025,"children":1027},{"className":1026},[],[1028],{"type":25,"value":1029},"name参数",{"type":19,"tag":28,"props":1031,"children":1032},{},[1033],{"type":25,"value":1034},"用来指定节点名称，指定该参数以后将返回一个可迭代对象，所有符合条件的均为对象的一个元素。代码如下：",{"type":19,"tag":84,"props":1036,"children":1038},{"className":228,"code":1037,"language":230,"meta":7,"style":7},"print(soup.find_all(name='p')) # 打印所有名称为p的节点内容\n",[1039],{"type":19,"tag":91,"props":1040,"children":1041},{"__ignoreMap":7},[1042],{"type":19,"tag":95,"props":1043,"children":1044},{"class":97,"line":98},[1045],{"type":19,"tag":95,"props":1046,"children":1047},{},[1048],{"type":25,"value":1037},{"type":19,"tag":54,"props":1050,"children":1051},{},[1052,1060],{"type":19,"tag":28,"props":1053,"children":1054},{},[1055,1059],{"type":19,"tag":61,"props":1056,"children":1057},{},[1058],{"type":25,"value":127},{"type":25,"value":677},{"type":19,"tag":28,"props":1061,"children":1062},{},[1063,1065],{"type":25,"value":1064},"bs4.element.ResultSet类型的数据与python的列表类型，可以使用切片的方式进行数据获取，如：",{"type":19,"tag":91,"props":1066,"children":1068},{"className":1067},[],[1069],{"type":25,"value":1070},"print(soup.find_all(name='p')[0])",{"type":19,"tag":54,"props":1072,"children":1073},{},[1074,1083,1088],{"type":19,"tag":28,"props":1075,"children":1076},{},[1077,1081],{"type":19,"tag":61,"props":1078,"children":1079},{},[1080],{"type":25,"value":65},{"type":25,"value":1082}," 嵌套获取",{"type":19,"tag":28,"props":1084,"children":1085},{},[1086],{"type":25,"value":1087},"因为bs4.element.ResultSet数据中的每一个元素都是bs4.element.Tag类型，所以可以直接对某一元素进行嵌套获取，代码如下：",{"type":19,"tag":84,"props":1089,"children":1091},{"className":228,"code":1090,"language":230,"meta":7,"style":7},"print(soup.find_all(name='p')[0])\nprint(soup.find_all(name='p')[0].find_all(name='a'))\n",[1092],{"type":19,"tag":91,"props":1093,"children":1094},{"__ignoreMap":7},[1095,1103],{"type":19,"tag":95,"props":1096,"children":1097},{"class":97,"line":98},[1098],{"type":19,"tag":95,"props":1099,"children":1100},{},[1101],{"type":25,"value":1102},"print(soup.find_all(name='p')[0])\n",{"type":19,"tag":95,"props":1104,"children":1105},{"class":97,"line":199},[1106],{"type":19,"tag":95,"props":1107,"children":1108},{},[1109],{"type":25,"value":1110},"print(soup.find_all(name='p')[0].find_all(name='a'))\n",{"type":19,"tag":28,"props":1112,"children":1113},{},[1114],{"type":19,"tag":91,"props":1115,"children":1117},{"className":1116},[],[1118],{"type":25,"value":1119},"attrs参数",{"type":19,"tag":28,"props":1121,"children":1122},{},[1123],{"type":25,"value":1124},"在填写attrs参数时，默认情况下需要填写字典类型的参数值，不过也可以通过以赋值的方式填写参数。代码如下：",{"type":19,"tag":84,"props":1126,"children":1128},{"className":228,"code":1127,"language":230,"meta":7,"style":7},"print(soup.find_all(attrs={'values':'1'}))\nprint(soup.find_all(value='1')) # 打印value值为1的所有内容\n",[1129],{"type":19,"tag":91,"props":1130,"children":1131},{"__ignoreMap":7},[1132,1140],{"type":19,"tag":95,"props":1133,"children":1134},{"class":97,"line":98},[1135],{"type":19,"tag":95,"props":1136,"children":1137},{},[1138],{"type":25,"value":1139},"print(soup.find_all(attrs={'values':'1'}))\n",{"type":19,"tag":95,"props":1141,"children":1142},{"class":97,"line":199},[1143],{"type":19,"tag":95,"props":1144,"children":1145},{},[1146],{"type":25,"value":1147},"print(soup.find_all(value='1')) # 打印value值为1的所有内容\n",{"type":19,"tag":28,"props":1149,"children":1150},{},[1151],{"type":19,"tag":91,"props":1152,"children":1154},{"className":1153},[],[1155],{"type":25,"value":1156},"text参数",{"type":19,"tag":28,"props":1158,"children":1159},{},[1160],{"type":25,"value":1161},"指定text参数可以获取节点中的文本，该参数可以指定字符串或者正则表达式对象，代码如下：",{"type":19,"tag":84,"props":1163,"children":1165},{"className":228,"code":1164,"language":230,"meta":7,"style":7},"print(soup.find_all(text=\"Python\"))\nprint(soup.find_all(text=re.compile('Python')))\n",[1166],{"type":19,"tag":91,"props":1167,"children":1168},{"__ignoreMap":7},[1169,1177],{"type":19,"tag":95,"props":1170,"children":1171},{"class":97,"line":98},[1172],{"type":19,"tag":95,"props":1173,"children":1174},{},[1175],{"type":25,"value":1176},"print(soup.find_all(text=\"Python\"))\n",{"type":19,"tag":95,"props":1178,"children":1179},{"class":97,"line":199},[1180],{"type":19,"tag":95,"props":1181,"children":1182},{},[1183],{"type":25,"value":1184},"print(soup.find_all(text=re.compile('Python')))\n",{"type":19,"tag":221,"props":1186,"children":1188},{"id":1187},"css选择器",[1189],{"type":25,"value":1190},"CSS选择器",{"type":19,"tag":28,"props":1192,"children":1193},{},[1194,1196],{"type":25,"value":1195},"参考文档：",{"type":19,"tag":140,"props":1197,"children":1200},{"href":1198,"rel":1199},"https:\u002F\u002Fwww.w3school.com.cn\u002Fcssref\u002Fcss_selectors.ASP",[144],[1201],{"type":25,"value":1202},"点击进行访问",{"type":19,"tag":28,"props":1204,"children":1205},{},[1206,1208,1214],{"type":25,"value":1207},"若是Tag或BeautifulSoup对象都可以直接调用 ",{"type":19,"tag":91,"props":1209,"children":1211},{"className":1210},[],[1212],{"type":25,"value":1213},"select()",{"type":25,"value":1215}," 方法，然后填写指定参数即可通过CSS选择器获取节点中的内容。",{"type":19,"tag":84,"props":1217,"children":1219},{"className":228,"code":1218,"language":230,"meta":7,"style":7},"print(soup.select('p'))       # 打印所有p节点内容\nprint(soup.select('p')[0])    # 打印所有p节点中的第一个节点\nprint(soup.select('html head title')) # 打印逐层获取的title节点\nprint(soup.select('.test_2'))   # 打印类名为test_2所对应的节点\nprint(soup.select('#class_1'))  # 打印id值为class_1所对应的节点\n",[1220],{"type":19,"tag":91,"props":1221,"children":1222},{"__ignoreMap":7},[1223,1231,1239,1247,1255],{"type":19,"tag":95,"props":1224,"children":1225},{"class":97,"line":98},[1226],{"type":19,"tag":95,"props":1227,"children":1228},{},[1229],{"type":25,"value":1230},"print(soup.select('p'))       # 打印所有p节点内容\n",{"type":19,"tag":95,"props":1232,"children":1233},{"class":97,"line":199},[1234],{"type":19,"tag":95,"props":1235,"children":1236},{},[1237],{"type":25,"value":1238},"print(soup.select('p')[0])    # 打印所有p节点中的第一个节点\n",{"type":19,"tag":95,"props":1240,"children":1241},{"class":97,"line":399},[1242],{"type":19,"tag":95,"props":1243,"children":1244},{},[1245],{"type":25,"value":1246},"print(soup.select('html head title')) # 打印逐层获取的title节点\n",{"type":19,"tag":95,"props":1248,"children":1249},{"class":97,"line":408},[1250],{"type":19,"tag":95,"props":1251,"children":1252},{},[1253],{"type":25,"value":1254},"print(soup.select('.test_2'))   # 打印类名为test_2所对应的节点\n",{"type":19,"tag":95,"props":1256,"children":1257},{"class":97,"line":417},[1258],{"type":19,"tag":95,"props":1259,"children":1260},{},[1261],{"type":25,"value":1262},"print(soup.select('#class_1'))  # 打印id值为class_1所对应的节点\n",{"type":19,"tag":28,"props":1264,"children":1265},{},[1266,1272],{"type":19,"tag":91,"props":1267,"children":1269},{"className":1268},[],[1270],{"type":25,"value":1271},"select_one()",{"type":25,"value":1273},"方法：用于获取所有符合条件节点中的第一个节点。",{"type":19,"tag":1275,"props":1276,"children":1277},"style",{},[1278],{"type":25,"value":1279},"html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}",{"title":7,"searchDepth":199,"depth":199,"links":1281},[1282,1283,1284,1285],{"id":22,"depth":399,"text":26},{"id":75,"depth":399,"text":75},{"id":166,"depth":399,"text":166},{"id":216,"depth":399,"text":219},"markdown","content:blog:2026-02-20-beautifulsoup-guide.md","content","blog\u002F2026-02-20-beautifulsoup-guide.md","blog\u002F2026-02-20-beautifulsoup-guide","md",1780801017865]