import requests
from lxml import etree
from urllib import parse
import re
#定義一個函數
def ygdy(baseurl):
headers ={
'Cookie' : 'cscpvcouplet4298_fidx=1; cscpvrich5041_fidx=1',
'Referer' : 'http://dytt8.net/',
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
}
response = requests.get(baseurl,headers=headers)
#根據網上的編碼轉換
response.encoding = 'gb2312'
dy_ele = etree.HTML(response.text)
# with open('dytt.html','wb') as f:
# f.write(response.content)
# print(dy_ele)
#定位數據
dy_ele_table = dy_ele.xpath('//div[@class="co_content8"]/ul/td/table')
# print(dy_ele_table)
for table in dy_ele_table:
# print(table)
try:
#獲取詳情路由以及文件名
dy_a_href = table.xpath('./tr[2]/td[2]/b/a/@href')[0]
dy_a_filename = table.xpath('./tr[2]/td[2]/b/a')[0].text
print(dy_a_filename)
# print(dy_a_href)
#路徑拼接
info_url = parse.urljoin(baseurl,dy_a_href)
response = requests.get(info_url,headers=headers)
#轉換成網站的編碼
response.encoding = 'gb2312'
info_text = response.text
#用正則查找自己需要的鏈接
p = r'<a href="(.*)"><stro'
res_cl = re.search(p,info_text)
print(res_cl.group(1))
info_dy = etree.HTML(response.text)
#用xpath找到另一個鏈接
info_lj = info_dy.xpath('//td[@style="WORD-WRAP: break-word"]/a/@href')[0]
print(info_lj)
#保存鏈接
with open('陽光電影.txt','ab')as f:
f.write(dy_a_filename.encode('utf-8')+'磁力鏈接:'.encode('utf-8')+res_cl.group(1).encode('utf-8')+'另一個鏈接:'.encode('utf-8')+info_lj.encode('utf-8')+'\r\n'.encode('utf-8'))
except:
print('dy_a_filename'+'no!')
#函數的調試
if __name__ == '__main__':
for i in range(1,178):
baseurl = 'http://www.ygdy8.net/html/gndy/dyzz/list_23_%s.html'%i
ygdy(baseurl)
一個簡單的爬取一個電影網的磁力鏈接
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.