如何使用Beautiful Soup 的CSS選擇器獲取節點信息
from bs4 import BeautifulSoup
html = '''
<html>
<head>
<meta charset="UTF-8">
<title>Beautiful Soup</title>
</head>
<body>
<div>
<ul>
<li class="item" value1="1234", value2="hello world"><a href="https://www.xxx.com">ruochen</a></li>
<li class="item2"><a href="https://www.xxx.com">若塵</a></li>
</ul>
<button id="button1">確定</button>
<ul>
<li class="item"><a href="https://www.taobao.com">淘寶</a></li>
<li id="myli" class="item4"><a href="https://www.microsoft">微軟</a></li>
<li class="item5"><a href="https://www.google.com">谷歌</a></li>
</ul>
</div>
<body>
'''
soup = BeautifulSoup(html, 'lxml')
tags = soup.select('.item')
print(tags)
print(type(tags))
for tag in tags:
print(tag)
[<li class="item" value1="1234" value2="hello world"><a href="https://www.xxx.com">ruochen</a></li>, <li class="item"><a href="https://www.taobao.com">淘寶</a></li>]
<class 'list'>
<li class="item" value1="1234" value2="hello world"><a href="https://www.xxx.com">ruochen</a></li>
<li class="item"><a href="https://www.taobao.com">淘寶</a></li>
tags= soup.select('#myli')
print(tags)
[<li class="item4" id="myli"><a href="https://www.microsoft">微軟</a></li>]
tags = soup.select('a')
for tag in tags:
print(tag)
print(tag['href'])
<a href="https://www.xxx.com">ruochen</a>
https://www.xxx.com
<a href="https://www.xxx.com">若塵</a>
https://www.xxx.com
<a href="https://www.taobao.com">淘寶</a>
https://www.taobao.com
<a href="https://www.microsoft">微軟</a>
https://www.microsoft
<a href="https://www.google.com">谷歌</a>
https://www.google.com
持續更新中。。。。