爬取CSDN百度搜索詞製作詞雲圖

通過Python爬取CSDN百度關鍵頁的搜索詞和入口頁次數,通過Word Art製作詞雲圖。

效果:


login.py(登錄CSDN)

# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time

username = "賬號"
password = "密碼"


def run():
    chrome_options = Options()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    browser = webdriver.Chrome(chrome_options=chrome_options)
    browser.get(url="https://passport.csdn.net/login")  # get訪問url
    browser.find_element_by_xpath('//*[@id="app"]/div/div/div/div[2]/div[2]/ul/li[1]/a').click()
    browser.find_element_by_xpath('//*[@id="all"]').send_keys(username)
    browser.find_element_by_xpath('//*[@id="password-number"]').send_keys(password)
    browser.find_element_by_xpath('//*[@id="app"]/div/div/div/div[2]/div[2]/form/div/div[6]/div/button').click()
    time.sleep(1)
    cookie = browser.get_cookies()
    cookies = {}
    for co in cookie:
        cookies[co["name"]] = co["value"]
    with open('cookie.json', 'w') as f:
        f.write(str(cookies))
    print("登陸成功!")
    browser.quit()

main.py(爬取搜索詞和入口頁次數)

# -*- coding: utf-8 -*-
import requests
from pyquery import PyQuery as pq
import login


def pa():
    login.run()  # 登錄
    with open("cookie.json", "r") as file:
        cookie = eval(file.read())
    file.close()

    # 驗證cookie
    h1 = requests.get("https://me.csdn.net/api/user/show", cookies=cookie)
    if h1.json()["code"] != 200:
        login.run()  # 登錄
        with open("cookie.json", "r") as file:
            cookie = eval(file.read())

    h2 = requests.get("https://mp.csdn.net/data/baidukeyword", cookies=cookie)
    h3 = pq(h2.text)("tbody")("tr").items()
    statistical = ""
    for h4 in h3:
        h5 = h4("td:not([rowspan])")
        keyword = h5.eq(0).text()
        number = h5.eq(2).text()
        statistical += keyword + " " + str(number) + "\n"
    print(statistical)


if __name__ == "__main__":
    pa()

使用Word Art製作詞雲圖【Word Art

 

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章