问财网股票信息爬虫案例

原創

2020-06-16 03:31

股票爬虫案例

from selenium import webdriver
from my_fake_useragent import UserAgent
from lxml import etree
import json

"""
序号，股票代码，股票简称，现价（元），涨跌幅（%）
"""


def selenium_test():
    # 配置Chrome浏览器
    chrome_options = webdriver.ChromeOptions()  # 创建一个配置
    ua = UserAgent().random  # 随机抽取一个ua
    print('user-agent: ', ua)
    chrome_options.add_argument('user-agent=' + str(ua))
    chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
    chrome_options.add_argument('blink-settings=imagesEnabled=false')
    chrome_options.add_argument('--headless')  # 无头模式
    chrome_options.add_argument('--disable-gpu')  # 禁用gpu加速

    # 发送请求
    url = 'http://www.iwencai.com/stockpick/search?typed=1&preParams=&ts=1&f=1&qs=result_rewrite&selfsectsn=&querytype=stock&searchfilter=&tid=stockpick&w=%E6%8D%A2%E6%89%8B%E7%8E%87%3E5%25&queryarea='
    driver = webdriver.Chrome(options=chrome_options)
    driver.get(url)

    # 获取源码
    html = driver.page_source

    # 保存网页源码到demo.html文件
    # with open('demo.html', 'w', encoding='utf8') as fp:
    #     fp.write(html)

    # 解析数据
    html = etree.HTML(html)
    tr_list_1 = html.xpath("//div[@class='static_con']//table//tbody/tr")
    tr_list_2 = html.xpath("//div[@class='scroll_tbody_con']//tbody/tr")

    info_all_1 = list()
    info_all_2 = list()

    #
    for tr in tr_list_1:
        info_line = list()
        xuhao = tr.xpath("./td[1]/div/text()")[0]
        bianhao = tr.xpath("./td[3]/div/text()")[0]
        name = tr.xpath("./td[4]/div/a/text()")[0]
        info_line.append(xuhao)
        info_line.append(bianhao)
        info_line.append(name)
        info_all_1.append(info_line)
    # print(info_all_1)

    for tr in tr_list_2:
        info_line = list()
        xianjia = tr.xpath("./td[1]/div/text()")[0]
        zhangdiefu = tr.xpath("./td[2]/div/text()")[0]
        info_line.append(xianjia)
        info_line.append(zhangdiefu)
        info_all_2.append(info_line)
    # print(info_all_2)

    info_all = list()
    for sub_list_1, sub_list_2 in zip(info_all_1, info_all_2):  # 同时遍历两个列表, 合并他们的每一个元素为一个列表
        list_temp = sub_list_1 + sub_list_2
        info_all.append(list_temp)

    print(info_all)  # 最终结果

    # 保存结果
    # with open('data.txt', 'w', encoding='utf-8') as fp:
    #     json.dump(info_all, fp)

    # time.sleep(3)   # 等待
    driver.quit()   # 关闭


if __name__ == '__main__':
    selenium_test()

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

问财网股票信息爬虫案例

股票爬虫案例

诈骗（杀猪盘）网站进行渗透测试

Python 潮流周刊#50：我最喜欢的 Python 3.13 新特性！

【Python】保存gym截图

【译】使用 GitHub Copilot 作为你的编码 GPS

Linux 服务器配置-安装portainer-ce社区版

外行也能读懂的网络硬件设备功能原理速成

安装Auto-GPT

selenium 入門編程

獲取網站的ssl證書信息

selenium 效率優化

Python爬蟲, 抓取百度圖片

selenium 元素定位

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結

问财网 股票信息爬虫案例

股票爬虫案例

问财网股票信息爬虫案例