美團酒店信息抓取，部分代碼

原創

2019-05-07 08:11

from selenium import webdriver
import time

from selenium.webdriver.chrome.options import Options

def chrome_driver(driver,url):
    # 讓瀏覽器打開百度
    driver.get(url)
    # js 處理滾動條
    js="var q=document.documentElement.scrollTop=1000"
    driver.execute_script(js)
    time.sleep(5)

    #    查找所有url 組成list
    a_list = driver.find_elements_by_xpath('//*[@id="list-view"]/div/article/div[2]/h3/a')
    a_url = []
    for a in a_list:
        a_url.append(a.get_attribute("href"))
    return a_url
    # 退出瀏覽器


def driver_paser(driver, a):
    # 讓瀏覽器打開百度
    driver.get(a)
    # js 處理滾動條
    js = "var q=document.documentElement.scrollTop=1000"
    driver.execute_script(js)
    time.sleep(5)
    item_info = driver.find_elements_by_css_selector("#deal > div > div:nth-child(2) > div.deal-section > ul > span > li")
    hotel_info = driver.find_element_by_css_selector(
        "#poiDetail > div > div > div.base-info > div > div.relative > div.relative.clear > span").text
    text_list =[]
    hotel_list = []
    for info in item_info:
        text_list.append(info.text.split("\n"))
    string = hotel_info +""
    hotel_list.append(string+"：")
    hotel_list.append(text_list)
    return hotel_list

def main():
    chrome_options = Options()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    # 創建一個瀏覽器對象
    path = r'E:\LiuLanQi\chromedriver.exe'
    driver = webdriver.Chrome(executable_path=path, chrome_options=chrome_options)
    url ="https://hotel.meituan.com/beijing/"
    a_url=chrome_driver(driver,url)
    text_list = []
    for a in a_url:
        text =driver_paser(driver,a)
        text_list.append(text)
    print(text_list)

if __name__ == '__main__':
    main()

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

美團酒店信息抓取，部分代碼

使用neovim打造go ide(支持代碼跳轉, 代碼補全, 實時語法檢查)

挑戰程序設計競賽 2.3章習題 poj 3046 Ant Counting

Shell/Python中的用戶名獲取

selenium自動化模擬

飛言情讀物抓取---bs4使用

flask-增刪改查

urllib使用及代理池

scrapy工作原理圖

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結