文章目錄

添加並查看用戶代理


import urllib.request

def load_baidu():
    url= "https://www.baidu.com"
    header = {
        #瀏覽器的版本
        "User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",
        # "haha":"hehe"
    }


    #創建請求對象
    request = urllib.request.Request(url)
	#request = urllib.request.Request(url,headers=header)
    #動態的去添加head的信息
    request.add_header("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36")
    #請求網絡數據(不在此處增加請求頭信息因爲此方法系統沒有提供參數)
    response = urllib.request.urlopen(request)
    print(response)
    data = response.read().decode("utf-8")

    #獲取到完整的url
    final_url = request.get_full_url()
    print(final_url)

    #響應頭
    # print(response.headers)
    #獲取請求頭的信息(所有的頭的信息)
    # request_headers = request.headers
    # print(request_headers)
    #(2)第二種方式打印headers的信息
    #注意點:首字母需要大寫,其他字母都小寫
    request_headers = request.get_header("User-agent")
    # print(request_headers)
    with open("02header.html","w")as f:
        f.write(data)



load_baidu()

隨機用不同用戶訪問瀏覽器

import urllib.request
import random

def load_baidu():

    url = "http://www.baidu.com"
    user_agent_list = [
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1",
        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
        "Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.9.168 Version/11.50"

    ]
    #每次請求的瀏覽器都是不一樣的
    random_user_agent = random.choice(user_agent_list)

    request = urllib.request.Request(url)

    #增加對應的請求頭信息(user_agent)
    request.add_header("User-Agent",random_user_agent)

    #請求數據
    response = urllib.request.urlopen(request)
    #請求頭的信息
    print(request.get_header("User-agent"))

load_baidu()

爲urlopen添加代理的功能

import urllib.request

def handler_openner():

    #系統的urlopen並沒有添加代理的功能所以需要我們自定義這個功能
    #安全 套接層 ssl第三方的CA數字證書
    #http80端口# 和https443
    #urlopen爲什麼可以請求數據 handler處理器
    #自己的oppener請求數據

    # urllib.request.urlopen()
    url = "https://blog.csdn.net/weixin_43362002/article/details/104658199"

    #創建自己的處理器
    handler = urllib.request.HTTPHandler()
    #創建自己的oppener
    opener=urllib.request.build_opener(handler)
    #用自己創建的opener調用open方法請求數據
    response = opener.open(url)
    # data = response.read()
    data = response.read().decode("utf-8")


    with open("05header.html", "w",encoding='utf-8')as f:
        f.write(data)

handler_openner()

添加一個ip地址

import urllib.request
def create_proxy_handler():
    url = "https://blog.csdn.net/weixin_43362002/article/details/104658199"

    #添加代理
    proxy = {
        #免費的寫法
        "http":"http://120.77.249.46:8080"
        # "http":"120.77.249.46:8080"
        #付費的代理
        # "http":"xiaoming":[email protected]
    }
    #代理處理器
    proxy_handler = urllib.request.ProxyHandler(proxy)

    #創建自己opener
    opener = urllib.request.build_opener(proxy_handler)
    #拿着代理ip去發送請求
    response = opener.open(url)
    data = response.read().decode("utf-8")

    with open("03header.html", "w",encoding='utf-8')as f:
        f.write(data)

create_proxy_handler()

添加多個ip地址

import urllib.request

def proxy_user():

    proxy_list = [
        {"https": "106.75.226.36:808"},
        {"https":"120.77.249.46:8080"},
        {"https":"61.135.217.7:80"},
        {"https":"125.70.13.77:8080"},
        {"https":"118.190.95.35:9001"}
    ]
    for proxy in proxy_list:
        print(proxy)
        #利用遍歷出來的ip創建處理器
        proxy_handler = urllib.request.ProxyHandler(proxy)
        #創建opener
        opener = urllib.request.build_opener(proxy_handler)

        try:
            data = opener.open("http://www.baidu.com",timeout=1)

            haha = data.read()
            print(haha)
        except Exception as e:
            print(e)

proxy_user()

知識小結

1.創建request

request = urllib.request.Request(url)

2.動態添加用戶代理

request.add_header("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36")

3.獲取請求頭的信息(所有的頭的信息)

request_headers = request.headers
print(request_headers)

4.第二種方式打印headers的信息

#注意點:首字母需要大寫,其他字母都小寫
request_headers = request.get_header("User-agent")
print(request_headers)

5.每次請求的瀏覽器都是不一樣的

user_agent_list = [
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1",
        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
        "Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.9.168 Version/11.50"

    ]
#每次請求的瀏覽器都是不一樣的
random_user_agent = random.choice(user_agent_list)

6.爲urlopen添加代理的功能
系統的urlopen並沒有添加代理的功能所以需要我們自定義這個功能
安全套接層 ssl第三方的CA數字證書
http–>80端口和https–>443端口
urlopen爲什麼可以請求數據 handler處理器
自己的oppener請求數據
7.添加代理
免費代理的寫法：
“http”:“http://ip地址：端口號” 例如

“http”:“http://120.77.249.46:8080”

寫法二：

“http”:“120.77.249.46:8080”

付費代理的寫法：
“http”:“賬號”:密碼@ip地址

“http”:“xiaoming”:[email protected]

8.IP分類
透明：對方知道真實的ip
匿名：對方不知道我們真實的ip，知道了你使用了代理
高匿：對方不知道我們真實的IP，也不知道我們使用了代理

廖雪峯爬蟲第二節

文章目錄

添加並查看用戶代理

隨機用不同用戶訪問瀏覽器

爲urlopen添加代理的功能

添加一個ip地址

添加多個ip地址

知識小結

基於 Nginx Ingress + 雲效 AppStack 實現灰度發佈

12款高效開源Wiki系統推薦，打造團隊知識管理利器

C語言--右移左移

一個開源且全面的C#算法實戰教程

dotnet 基於 DirectML 控制檯運行 Phi-3 模型

自定義MyBatis插件

一款.NET開源、功能強大、跨平臺的繪圖庫 - OxyPlot

常用的 Git 指令

鼠標控制軟件有可能和虛擬機軟件產生衝突

sm4加密工具類

Leetcode44：通配符匹配（附視頻解析）

VMware15安裝Centos7圖解教程

全國大學生大數據技能競賽

阿爾法python練習(4-7答案)

大數據技術之HDFS

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結