Python提取sitemap.xml文章鏈接並推送百度
import urllib
import urllib.request
import re
import requests
header_baidu = {
'User-Agent': 'curl/7.12.1',
'Host':'data.zz.baidu.com',
'Content-Type': 'text/plain',
'Content-Length': '83'
}
url='https://hubaoquan.cn/sitemap.xml'
html=urllib.request.urlopen(url).read()
html=html.decode('utf-8')
r=re.compile(r'https://hubaoquan.cn/\S*?\/')
big=re.findall(r,html)
urlPost='http://data.zz.baidu.com/urls?site=https://hubaoquan.cn&token=nXV1hubaoquanPiFZ'
for i in big:
print(i)
responsePost = requests.post(urlPost,data=i,headers=header_baidu)
print(responsePost.text)
op_xml_txt=open('xml.txt','a')
op_xml_txt.write('%s\n'%i)