import re
import requests
from fake_useragent import UserAgent
url = 'https://www.baidu.com'
m = re.match(r'\w+',url)
print(m.group())
url2 = 'http://www.97xs.org/11/11389/3303898.html'
headers = {
'User-Agent':UserAgent().random,
}
req = requests.get(url2,headers=headers)
req.encoding='gbk'
contents = re.findall(r'<div id="htmlContent" class="contentbox">\s*(.+)',req.text)
for info in contents:
print(info)