Coursera Using python to access Web data

XML and json:

 

1. 略

 

2.

 

import re
A = open('regex_sum_17593.txt')
sum = 0
B = []
for line in A:
    B = re.findall('[0-9]+',line)
    if len(B) > 0:
        for i in range(len(B)):
            sum = sum + int(B[i])
print(sum)

 

 

 

 

 

 

3. 

 

import socket

mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mysock.connect(('data.pr4e.org', 80))
cmd = 'GET http://data.pr4e.org/intro-short.txt HTTP/1.0\r\n\r\n'.encode()
mysock.send(cmd)

while True:
    data = mysock.recv(512)
    if (len(data) < 1):
        break
    print(data.decode())
mysock.close()

 


 
4.1

 
from urllib.request import urlopen from bs4 import BeautifulSoup import ssl # Ignore SSL certificate errors ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE url = input('Enter - ') html = urlopen(url, context=ctx).read() # html.parser is the HTML parser included in the standard Python 3 library. # information on other HTML parsers is here: # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser soup = BeautifulSoup(html, "html.parser") # Retrieve all of the anchor tags tags = soup('span') Sum = 0 count = 0 for tag in tags: Sum = Sum + int(tag.contents[0]) count = count + 1 print('Count', count) print('Sum', Sum)

 

 

4.2


 
import urllib.request, urllib.parse, urllib.error from bs4 import BeautifulSoup import ssl # Ignore SSL certificate errors ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE url = input('Enter - ') times = int(input('Enter count: ')) pos = int(input('Enter position: ')) html = urllib.request.urlopen(url, context=ctx).read() soup = BeautifulSoup(html, 'html.parser') print(url) # Retrieve all of the anchor tags tags = soup('a') for i in range(times): url = tags[pos-1].get('href', None) print(url) html = urllib.request.urlopen(url, context=ctx).read() soup = BeautifulSoup(html, 'html.parser') tags = soup('a') 

 

 

5.

 

 


 
import urllib.request, urllib.parse, urllib.error import xml.etree.ElementTree as ET num,count = 0,0 url = input('Enter location: ') print('Retrieving', url) uh = urllib.request.urlopen(url) data = uh.read().decode() print('Retrieved', len(data), 'characters') tree = ET.fromstring(data) results = tree.findall('comments/comment') for item in results: aa = item.find('count').text num = num + int(aa) count = count + 1 print('Count:',count) print('Sum:',num)

 
6.1

 
import urllib.request, urllib.parse, urllib.error import xml.etree.ElementTree as ET num,count = 0,0 url = input('Enter location: ') print('Retrieving', url) uh = urllib.request.urlopen(url) data = uh.read().decode() print('Retrieved', len(data), 'characters') tree = ET.fromstring(data) results = tree.findall('comments/comment') for item in results: aa = item.find('count').text num = num + int(aa) count = count + 1 print('Count:',count) print('Sum:',num)

 

6.2


 
import urllib.request, urllib.parse, urllib.error import json # Note that Google is increasingly requiring keys # for this API serviceurl = 'http://py4e-data.dr-chuck.net/geojson?' while True: address = input('Enter location: ') if len(address) < 1: break url = serviceurl + urllib.parse.urlencode( {'address': address}) print('Retrieving', url) uh = urllib.request.urlopen(url) data = uh.read().decode() print('Retrieved', len(data), 'characters') try: js = json.loads(data) except: js = None if not js or 'status' not in js or js['status'] != 'OK': print('==== Failure To Retrieve ====') print(data) continue aa = json.dumps(js, indent=4) #由一行調整縮進爲多行易讀模式 lat = js["results"][0]["place_id"] print('Place id', lat)

 

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章