一個簡單的爬取一個電影網的磁力鏈接

import requests
from lxml import etree
from urllib import parse
import re


#定義一個函數
def ygdy(baseurl):
    headers ={
        'Cookie' : 'cscpvcouplet4298_fidx=1; cscpvrich5041_fidx=1',
        'Referer' : 'http://dytt8.net/',
        'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',

    }
    response = requests.get(baseurl,headers=headers)
    #根據網上的編碼轉換
    response.encoding = 'gb2312'
    dy_ele = etree.HTML(response.text)
    # with open('dytt.html','wb') as f:
    #     f.write(response.content)

    # print(dy_ele)
    #定位數據
    dy_ele_table = dy_ele.xpath('//div[@class="co_content8"]/ul/td/table')
    # print(dy_ele_table)

    for table in dy_ele_table:
        # print(table)
        try:
            #獲取詳情路由以及文件名
            dy_a_href = table.xpath('./tr[2]/td[2]/b/a/@href')[0]
            dy_a_filename = table.xpath('./tr[2]/td[2]/b/a')[0].text
            print(dy_a_filename)
            # print(dy_a_href)
            #路徑拼接
            info_url = parse.urljoin(baseurl,dy_a_href)
            response = requests.get(info_url,headers=headers)
            #轉換成網站的編碼
            response.encoding = 'gb2312'
            info_text = response.text
            #用正則查找自己需要的鏈接
            p = r'<a href="(.*)"><stro'
            res_cl = re.search(p,info_text)
            print(res_cl.group(1))

            info_dy = etree.HTML(response.text)
            #用xpath找到另一個鏈接
            info_lj = info_dy.xpath('//td[@style="WORD-WRAP: break-word"]/a/@href')[0]
            print(info_lj)
            #保存鏈接
            with open('陽光電影.txt','ab')as f:
                f.write(dy_a_filename.encode('utf-8')+'磁力鏈接:'.encode('utf-8')+res_cl.group(1).encode('utf-8')+'另一個鏈接:'.encode('utf-8')+info_lj.encode('utf-8')+'\r\n'.encode('utf-8'))
        except:
            print('dy_a_filename'+'no!')
#函數的調試
if __name__ == '__main__':
    for i in range(1,178):
        baseurl = 'http://www.ygdy8.net/html/gndy/dyzz/list_23_%s.html'%i
        ygdy(baseurl)


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章