from selenium import webdriver import time from selenium.webdriver.chrome.options import Options def chrome_driver(driver,url): # 讓瀏覽器打開百度 driver.get(url) # js 處理滾動條 js="var q=document.documentElement.scrollTop=1000" driver.execute_script(js) time.sleep(5) # 查找所有url 組成list a_list = driver.find_elements_by_xpath('//*[@id="list-view"]/div/article/div[2]/h3/a') a_url = [] for a in a_list: a_url.append(a.get_attribute("href")) return a_url # 退出瀏覽器 def driver_paser(driver, a): # 讓瀏覽器打開百度 driver.get(a) # js 處理滾動條 js = "var q=document.documentElement.scrollTop=1000" driver.execute_script(js) time.sleep(5) item_info = driver.find_elements_by_css_selector("#deal > div > div:nth-child(2) > div.deal-section > ul > span > li") hotel_info = driver.find_element_by_css_selector( "#poiDetail > div > div > div.base-info > div > div.relative > div.relative.clear > span").text text_list =[] hotel_list = [] for info in item_info: text_list.append(info.text.split("\n")) string = hotel_info +"" hotel_list.append(string+":") hotel_list.append(text_list) return hotel_list def main(): chrome_options = Options() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') # 創建一個瀏覽器對象 path = r'E:\LiuLanQi\chromedriver.exe' driver = webdriver.Chrome(executable_path=path, chrome_options=chrome_options) url ="https://hotel.meituan.com/beijing/" a_url=chrome_driver(driver,url) text_list = [] for a in a_url: text =driver_paser(driver,a) text_list.append(text) print(text_list) if __name__ == '__main__': main()
美團酒店信息抓取,部分代碼
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.