爬蟲--人人網-簡易爬取3

from urllib import  request,parse
from urllib.error import URLError,HTTPError

#導入包  ,保存cookie
from http import cookiejar


class Session (object):
    def __init__(self):
        # 通過對象保存cookie
        cookie = cookiejar.CookieJar()
        # handler 對應着一個操作
        handler = request.HTTPCookieProcessor(cookie)
        # opener 遇到有cookie的response的時候,調用handler內的一個函數 存儲cookie到 objec中
        self.opener = request.build_opener(handler)

    def get(self ,url, headers=None, opener=None):
        return get(url, headers=headers, opener=opener)

    def post(self ,url, form, headers=None, opener=None):
        return post(url, form, headers=headers, opener=opener)


def get(url, headers=None, opener = None):
    return urlrequsets(url, headers=headers, opener = opener)

def post(url, form, headers=None, opener = None):
    return urlrequsets(url, form, headers=headers, opener=opener)
import json
# 1.傳入url
# 2.user_agent
# 3.geaders
# 4.定義Request
# 5.urlopen
# 6.返回byte數組
def urlrequsets(url, form = None,headers=None, opener = None):

    user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36'
    if headers ==None:
        headers = {
            'User-Agent' : user_agent
        }
    html_byte = b''
    try:
        #
    # 判斷
        if form:
            # POST
            #轉str
            print(form)
            form_str = parse.urlencode(form)
            print(form_str)

            # 轉bytes
            form_bytes = form_str.encode('utf-8')
            req = request.Request(url, data=form_bytes, headers=headers)
        else:
            # GET
            req = request.Request(url,  headers=headers)
        if opener:
            response = opener.open(req)
        else:
            response = request.urlopen(req)
        # print(999)
        # print(response)
        html_byte = response.read()
        print(html_byte)
    except HTTPError as h:
        print(h)
    except URLError as u:
        print(u)


    return html_byte
if __name__ == '__main__':
    # url = 'http://fanyi.baidu.com'
    # form = {
    #     'kw' :'你好'
    # }
    # # print(111)
    # html_byte = urlrequsets(url)
    # # html_byte = json.loads(html_byte)
    # print(html_byte)

    url = 'http://www.baidu.com'
    html_byte = get(url)
    print(html_byte)
    # url='http://fanyi.baidu.com/sug'
    # data ={
    #     'kw':'哈哈'
    # }
    # a= post(url,data)
    # print(a)

調用上面的內容 ,

from tuozhan_all import Session,post,get

import json




# 1.url
url = 'http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=2018722359875'
# 2. form
form = {
    'email': '17600015762',
    'icode': '',
    'origURL': 'http://www.renren.com/home',
    'domain': 'renren.com',
    'key_id': '1',
    'captcha_type': 'web_login',
    'password': '000e2c3c99f8a73a61287bcaaa16d53f11dea4f308438ff687eb64c557a14962',
    'rkey': 'ccfaa6b14a7da2899fccd0a15cbd7b13',
    'f': 'http%3A%2F%2Fwww.renren.com%2F966927992',
}


s = Session()
html_byte = s.post(url, form)
print(html_byte)
#html_bytes = post(url, form=form)
# 打印結果
#print(html_bytes)
# 通過json獲取一個字典類型
res_dict = json.loads(html_byte.decode('utf-8'))

home_url = res_dict['homeUrl']

# 訪問頁面
html_byte = get(home_url)
print(html_byte)
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章