問財網 股票信息爬蟲案例

股票爬蟲案例

from selenium import webdriver
from my_fake_useragent import UserAgent
from lxml import etree
import json

"""
序號,股票代碼,股票簡稱,現價(元),漲跌幅(%)
"""


def selenium_test():
    # 配置Chrome瀏覽器
    chrome_options = webdriver.ChromeOptions()  # 創建一個配置
    ua = UserAgent().random  # 隨機抽取一個ua
    print('user-agent: ', ua)
    chrome_options.add_argument('user-agent=' + str(ua))
    chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
    chrome_options.add_argument('blink-settings=imagesEnabled=false')
    chrome_options.add_argument('--headless')  # 無頭模式
    chrome_options.add_argument('--disable-gpu')  # 禁用gpu加速

    # 發送請求
    url = 'http://www.iwencai.com/stockpick/search?typed=1&preParams=&ts=1&f=1&qs=result_rewrite&selfsectsn=&querytype=stock&searchfilter=&tid=stockpick&w=%E6%8D%A2%E6%89%8B%E7%8E%87%3E5%25&queryarea='
    driver = webdriver.Chrome(options=chrome_options)
    driver.get(url)

    # 獲取源碼
    html = driver.page_source

    # 保存網頁源碼到demo.html文件
    # with open('demo.html', 'w', encoding='utf8') as fp:
    #     fp.write(html)

    # 解析數據
    html = etree.HTML(html)
    tr_list_1 = html.xpath("//div[@class='static_con']//table//tbody/tr")
    tr_list_2 = html.xpath("//div[@class='scroll_tbody_con']//tbody/tr")

    info_all_1 = list()
    info_all_2 = list()

    #
    for tr in tr_list_1:
        info_line = list()
        xuhao = tr.xpath("./td[1]/div/text()")[0]
        bianhao = tr.xpath("./td[3]/div/text()")[0]
        name = tr.xpath("./td[4]/div/a/text()")[0]
        info_line.append(xuhao)
        info_line.append(bianhao)
        info_line.append(name)
        info_all_1.append(info_line)
    # print(info_all_1)

    for tr in tr_list_2:
        info_line = list()
        xianjia = tr.xpath("./td[1]/div/text()")[0]
        zhangdiefu = tr.xpath("./td[2]/div/text()")[0]
        info_line.append(xianjia)
        info_line.append(zhangdiefu)
        info_all_2.append(info_line)
    # print(info_all_2)

    info_all = list()
    for sub_list_1, sub_list_2 in zip(info_all_1, info_all_2):  # 同時遍歷兩個列表, 合併他們的每一個元素爲一個列表
        list_temp = sub_list_1 + sub_list_2
        info_all.append(list_temp)

    print(info_all)  # 最終結果

    # 保存結果
    # with open('data.txt', 'w', encoding='utf-8') as fp:
    #     json.dump(info_all, fp)

    # time.sleep(3)   # 等待
    driver.quit()   # 關閉


if __name__ == '__main__':
    selenium_test()

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章