股票爬蟲案例
from selenium import webdriver
from my_fake_useragent import UserAgent
from lxml import etree
import json
"""
序號,股票代碼,股票簡稱,現價(元),漲跌幅(%)
"""
def selenium_test():
chrome_options = webdriver.ChromeOptions()
ua = UserAgent().random
print('user-agent: ', ua)
chrome_options.add_argument('user-agent=' + str(ua))
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
chrome_options.add_argument('blink-settings=imagesEnabled=false')
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
url = 'http://www.iwencai.com/stockpick/search?typed=1&preParams=&ts=1&f=1&qs=result_rewrite&selfsectsn=&querytype=stock&searchfilter=&tid=stockpick&w=%E6%8D%A2%E6%89%8B%E7%8E%87%3E5%25&queryarea='
driver = webdriver.Chrome(options=chrome_options)
driver.get(url)
html = driver.page_source
html = etree.HTML(html)
tr_list_1 = html.xpath("//div[@class='static_con']//table//tbody/tr")
tr_list_2 = html.xpath("//div[@class='scroll_tbody_con']//tbody/tr")
info_all_1 = list()
info_all_2 = list()
for tr in tr_list_1:
info_line = list()
xuhao = tr.xpath("./td[1]/div/text()")[0]
bianhao = tr.xpath("./td[3]/div/text()")[0]
name = tr.xpath("./td[4]/div/a/text()")[0]
info_line.append(xuhao)
info_line.append(bianhao)
info_line.append(name)
info_all_1.append(info_line)
for tr in tr_list_2:
info_line = list()
xianjia = tr.xpath("./td[1]/div/text()")[0]
zhangdiefu = tr.xpath("./td[2]/div/text()")[0]
info_line.append(xianjia)
info_line.append(zhangdiefu)
info_all_2.append(info_line)
info_all = list()
for sub_list_1, sub_list_2 in zip(info_all_1, info_all_2):
list_temp = sub_list_1 + sub_list_2
info_all.append(list_temp)
print(info_all)
driver.quit()
if __name__ == '__main__':
selenium_test()