美團酒店信息抓取,部分代碼

from selenium import webdriver
import time

from selenium.webdriver.chrome.options import Options

def chrome_driver(driver,url):
    # 讓瀏覽器打開百度
    driver.get(url)
    # js 處理滾動條
    js="var q=document.documentElement.scrollTop=1000"
    driver.execute_script(js)
    time.sleep(5)

    #    查找所有url 組成list
    a_list = driver.find_elements_by_xpath('//*[@id="list-view"]/div/article/div[2]/h3/a')
    a_url = []
    for a in a_list:
        a_url.append(a.get_attribute("href"))
    return a_url
    # 退出瀏覽器


def driver_paser(driver, a):
    # 讓瀏覽器打開百度
    driver.get(a)
    # js 處理滾動條
    js = "var q=document.documentElement.scrollTop=1000"
    driver.execute_script(js)
    time.sleep(5)
    item_info = driver.find_elements_by_css_selector("#deal > div > div:nth-child(2) > div.deal-section > ul > span > li")
    hotel_info = driver.find_element_by_css_selector(
        "#poiDetail > div > div > div.base-info > div > div.relative > div.relative.clear > span").text
    text_list =[]
    hotel_list = []
    for info in item_info:
        text_list.append(info.text.split("\n"))
    string = hotel_info +""
    hotel_list.append(string+":")
    hotel_list.append(text_list)
    return hotel_list

def main():
    chrome_options = Options()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    # 創建一個瀏覽器對象
    path = r'E:\LiuLanQi\chromedriver.exe'
    driver = webdriver.Chrome(executable_path=path, chrome_options=chrome_options)
    url ="https://hotel.meituan.com/beijing/"
    a_url=chrome_driver(driver,url)
    text_list = []
    for a in a_url:
        text =driver_paser(driver,a)
        text_list.append(text)
    print(text_list)

if __name__ == '__main__':
    main()
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章