py利用selenium庫 爬取 python123官網的練習題及答案

下載安裝selenium庫,安裝webdriver

具體安裝步驟請移步百度官網

最近在做python123官網的一些python 練習題,做完了想整理一下,題目過多,就想省點事,寫了一個很low的爬蟲,廢話不多說,看代碼吧,可以直接獲取你做過的所有題目,包括答案以及代碼

# _*_ coding:utf-8  _*_
# @Time   : 2020-5-9 15:42
# @Author  : BGLB
# @Software : PyCharm
from time import sleep

from selenium.webdriver import Chrome

web = Chrome()
web.maximize_window()


def get_element():
    web.get("https://www.python123.io/index/login")
    uername = web.find_element_by_xpath(
        '//*[@id="links"]/div[1]/div[2]/div/div[1]/div[1]/div/div[1]/div/div[1]/form/div[1]/div/input')
    passworld = web.find_element_by_xpath(
        '//*[@id="links"]/div[1]/div[2]/div/div[1]/div[1]/div/div[1]/div/div[1]/form/div[2]/div/input')

    uername.send_keys('[email protected]')  # python123官網的用戶名
    passworld.send_keys('abc123abcd1234')  # python123的賬戶密碼

    login = web.find_element_by_xpath(
        '//*[@id="links"]/div[1]/div[2]/div/div[1]/div[1]/div/div[1]/div/div[1]/div[3]/div[1]/button')
    login.click()
    sleep(1)
    web.get('https://www.python123.io/student/courses/1521/intro')
    sleep(2)
    web.find_element_by_xpath('//span[text()=10]').click()
    flag = True
    for i in range(10):
        if i == 9:
            flag = False
        tumu_list = web.find_elements_by_xpath(
            '//*[@id="app"]/div/div[1]/div[1]/div[2]/section/div/div/div[2]/div/section/div/div/div[2]/div[2]/div/div[2]/div[2]/div[2]/div')
        tumu_chuli(tumu_list[i], flag)
        web.back()  # 返回上一頁
        web.back()
        web.back()
        sleep(2)
    web.close()


def save(failname, content):
    '''
    保存到文件
    :param failname:文件名
    :param content:內容
    :return:
    '''
    with open(failname, 'a', encoding="utf-8") as f:
        f.write(content)
        f.close()


def tumu_chuli(list_timu, flag):
    failname = list_timu.find_element_by_tag_name('b').text[0:3]+".txt"
    list_timu.click()
    sleep(2)
    count = 2
    if flag:
        web.find_element_by_xpath(
            '//*[@id="group-wrapper"]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div[2]/div/div[1]/div/div/div[1]').click()

        xuanze = web.find_element_by_xpath(
            '//*[@id="group-wrapper"]/div/div[2]/div/div/div/div[2]/div/div[1]/div/div/div[1]')
        item = xuanze.find_element_by_tag_name('b').text
        xuanze_list = xuanze.find_elements_by_class_name('card')
        save(failname, item)
        for xz in xuanze_list:
            save(failname, "\n"+xz.text+"\n")
        web.back()
        sleep(2)

        web.find_element_by_xpath(
            '//*[@id="group-wrapper"]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div[2]/div/div[2]/div/div/div/div[1]').click()
        for q in range(count):
            bs_chuli(failname)

    else:
        count = 4
        web.find_element_by_xpath(
            '//*[@id="group-wrapper"]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div[2]/div/div/div/div/div/div[1]').click()
        for j in range(count):
            bs_chuli(failname)
    print(failname+"寫入完成!")


# 編程題的處理
def bs_chuli(failname):
    sleep(1)
    bs_timu = web.find_element_by_class_name('mce-content-body').text
    # 由於網站的代碼中有特殊字符,所以用了以下的方式替換了所有特殊字符
    bs_daan_my = web.find_element_by_xpath(
        '/html/body/div/div/div[1]/div[1]/div[2]/section/div/div/div[2]/div/section/div/div/div[2]/div/div/div/div[2]/div/div/div/div[1]/div/div[3]/div/div/div/div[3]/div/div/div/div[2]/div/div[3]'
    ).text.replace('·', ' ').replace('¤', '').replace('¬', '').replace('¶', '')
    web.find_element_by_xpath(
        '/html/body/div/div/div[1]/div[1]/div[2]/section/div/div/div[2]/div/section/div/div/div[2]/div/div/div/div[2]/div/div/div/div[1]/div/div[3]/div/div/div/div[2]/ul/li[3]').click()
    sleep(1)
    bs_daan_cankao = web.find_element_by_xpath(
        '/html/body/div/div/div[1]/div[1]/div[2]/section/div/div/div[2]/div/section/div/div/div[2]/div/div/div/div[2]/div/div/div/div[1]/div/div[3]/div/div/div/div[5]/div/div/div/div').text
    web.find_element_by_xpath(
        '//button[2]').click()
    save(failname, "\n"+bs_timu+"\n【我的答案】\n"+bs_daan_my+"\n"+bs_daan_cankao)
    sleep(1)


get_element()

代碼變量命名有點low,寫了大概一個下午吧,還是功力不夠哇,要是直接能保存爲一個好看的md格式,那就完美了,分享的時候會更加方便。慢慢加油吧!

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章