Python3 selenium爬取蘇寧商家聯繫電話

此處使用了selenium插件 使用的是火狐瀏覽器 信息存儲到csv表格裏面
前面詳細不多講如果條件不滿足自行百度安裝

# -*- coding: utf-8 -*-
"""
Created on Wed Dec 11 20:21:04 2019

@author: Administrator
"""
from selenium import webdriver
import time
import random
import csv
import codecs

#此處爲要爬取的頁數默認爲 50頁
yema = 50
#要爬取的網址 此處網址爲搜索詳細產品出現的產品搜索結果頁
#注意蘇寧搜索行業詞出現的產品頁面是不一樣的
wangzhi = "https://search.suning.com/%E4%BC%91%E9%97%B2%E9%A3%9F%E5%93%81/"
#codevs 防止中文寫入時亂碼
f = codecs.open('suning.csv','a',encoding='utf-8')
csv_writer = csv.writer(f)
#谷歌
#browser = webdriver.chrom.webdirver.WebDriver(executable_path="chromedriver")
#火狐
browser1 = webdriver.Firefox(executable_path="geckodriver")

def browser_1(url,browser=browser1):

    #打開網頁
    browser.get(url)
    
    return(browser)

browser = browser_1(wangzhi)
#通過class找到元素
#input_guanggao = browser.find_element_by_class_name("close-btn")
#點一下
#input_guanggao.click()
#輸入
#input_txt.send_keys("111")
#翻頁鍵
#next_page = browser.find_element_by_class_name("next")
#數據提取
urls = []
nub = 1
for i in range(yema-1):
    print(i)
    #將滾動條拖到底部
    js="var q=document.documentElement.scrollTop=100000"
    browser.execute_script(js)
    time.sleep(random.randint(5,10))
    shops = browser.find_elements_by_class_name("sellPoint")
    for shop in shops:
        #print(shop)
        #print("*"*10)
        url = shop.get_attribute('href')
        vip = "/0000000000/"
        if vip not in url:
            urls.append(url)
            print(i,"--",nub,"--",url)
            nub += 1
    print(i,"頁")
    i += 1
    js="var q=document.documentElement.scrollTop=500"
    browser.execute_script(js)
    time.sleep(random.randint(3,5))
    next_page = browser.find_element_by_class_name("next")
    time.sleep(random.randint(3,5))
    next_page.click()
    time.sleep(random.randint(5,8))
print("---"*10)
for ul in urls:
    browser_shop = browser_1(ul)
    #公司名稱
    chead_companyName = browser_shop.find_element_by_id("chead_companyName")
    #電話
    chead_telPhone = browser_shop.find_element_by_id("chead_telPhone")
    #地址
    chead_companyAddress = browser_shop.find_element_by_id("chead_companyAddress")
    browser_shop.find_element_by_class_name("storname").click()
    #chead_telPhone.find_element_by_xpath("//*[contains(text(),'13816391436')]").click()
    companyName = chead_companyName.text
    if companyName == "":
        companyName = "null"
    telPhone = chead_telPhone.text
    if telPhone == "":
        telPhone = "null"
    companyAddress = chead_companyAddress.text
    if companyAddress == "":
        companyAddress = "null"
    print(companyName,"==",telPhone,"==",companyAddress)
    csv_writer.writerow([companyName,telPhone,companyAddress])
    #browser_shop.close()
f.close()
print("結束")      
發佈了13 篇原創文章 · 獲贊 14 · 訪問量 4萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章