import requests
from bs4 import BeautifulSoup
def getHTML(url):
try:
r = requests.get(url,timeout=30)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return ""
def getLocation(url):
html = getHTML(url)
soup = BeautifulSoup(html,'html.parser')
paras = soup.select('.topbar h2')
return paras
def getWeather(url):
html = getHTML(url)
soup = BeautifulSoup(html,'html.parser')
paras = soup.select('.txt')
return parasdef getWet(url):
html = getHTML(url)
soup = BeautifulSoup(html,'html.parser')
paras = soup.select('.b2')
return parasdef getWind(url):
html = getHTML(url)
soup = BeautifulSoup(html,'html.parser')
paras = soup.select('.b3')
return parasdef saveFile(text):
f=open('C:\\Users\\Administrator\\Desktop\\爬蟲\\weather.txt','a')
for t in text:
if len(t) > 0:
f.writelines(t.get_text() + "\n")
f.close()def getAll(url):
location = getLocation(url)
saveFile(location)
weather = getWeather(url)
saveFile(weather)
wet = getWet(url)
saveFile(wet)
wind = getWind(url)
saveFile(wind)
def main():
beijing = 'https://m.tianqi.com/beijing/'
changsha = 'https://m.tianqi.com/changsha/'
getAll(beijing)
getAll(changsha)
main()