from urllib import parse as p
import urllib.request
def use_urlparse(url):
# 1、urlparse(url=,scheme=,allow_fragments=) url识别和分段
# scheme 当url不含有协议时生效
result = p.urlparse(url=url)
print("协议:"+result[0])
print("域名:"+result[1])
print("访问路径:"+result[2])
print("参数:"+result[3])
print("查询条件:"+result[4])
# 2、urlunparse() url合成
data=[]
for i in range(len(result)):
data.append(result[i])
print(p.urlunparse(data))
def use_urlsplit(url):
# 3、urlsplit() 与urlparse类似将param和path和并了
data_url = p.urlsplit(url=url)
print(data_url)
# 4、urlunsplit() 与urlunparse()类似
print(p.urlunsplit(data_url))
# 5、urljoin 链接合成 第一个参数为基础链接,第二个参数为基础链接
def use_urljoin():
# 分析基础链接,并对新连接缺失部分尽心补充
print(p.urljoin('http://www.baidu.com', 'FAQ.html '))
print(p.urljoin('http://www.baidu.com ', 'https://cuiqingcai.com/FAQ.html'))
print(p.urljoin ('http://www.baidu.com/about.html', 'https://www.cuiqingcai.com/FAQ.html'))
print(p.urljoin('http://www.baidu.com/about.html','https://cuiqingcai.com/FAQ.html?question=2'))
# 6、urlencode 字典转请求参数
def use_urlopen_data():
url = "http://www.baidu.com"
# urlopen data 参数
data={
'wd':'csdn'
}
# 格式化参数
# 将参数字典转化为字符串
form_data = p.urlencode(data).encode()
response = urllib.request.urlopen(url=url,data=form_data)
print(response.read())
# 7、将请求参数化成字典或元组
def use_parse_qs_qsl(query):
# 返回字典
print(p.parse_qs(query))
# 返回元组
print(p.parse_qsl(query))
# 8、quote 中文参数编码与解码
def use_quote(word):
# URL编码
url = 'https://www.baidu.com/s?wd='+p.quote(word)
print(url)
# URL解码
print(p.unquote(url))
if __name__ == '__main__':
url = 'https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&wd=%E6%83%85%E6%AD%8C&oq=yue&rsv_pq=dacbcb5f00043218&rsv_t=fae8tkAseCzI6Y%2FJ3EvIm%2BG4Zy1%2BnWO3oZ%2BaCJXmJwazl7yBr48QuZswiF4&rqlang=cn&rsv_enter=1&rsv_dl=tb&inputT=1744&rsv_sug3=11&rsv_sug1=6&rsv_sug7=100&rsv_sug2=0&rsv_sug4=1744'
query = 'ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&wd=%E6%83%85%E6%AD%8C&oq=yue&rsv_pq=dacbcb5f00043218&rsv_t=fae8tkAseCzI6Y%2FJ3EvIm%2BG4Zy1%2BnWO3oZ%2BaCJXmJwazl7yBr48QuZswiF4&rqlang=cn&rsv_enter=1&rsv_dl=tb&inputT=1744&rsv_sug3=11&rsv_sug1=6&rsv_sug7=100&rsv_sug2=0&rsv_sug4=1744'
use_urlparse(url)
use_urlsplit(url)
use_urljoin()
use_urlopen_data()
use_parse_qs_qsl(query)
use_quote('魏振东')
爬虫请求urllib. parse模块你知多少
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.