前幾天碰到一個需求,要從日誌裏提取普陀區的訪問,經過一系列的查詢,測試,終於完成了一個腳本,能基本滿足需求,從訪問日誌裏提取出普陀區的IP地址,代碼如下:
# -*- coding: UTF-8 -*-
from pymongo import MongoClient
from bs4 import BeautifulSoup
import json
from urllib.request import urlopen # Python3
from urllib.error import HTTPError
import socket
from flask import Flask,request
from flask_restful import Api,Resource,reqparse,abort
from click.utils import echo
from pymongo import MongoClient
import datetime
from time import sleep, ctime
import json
from _csv import Error
from mmap import PAGESIZE
from bson.json_util import default
from flask import jsonify
ipCheckBaseUrl = "http://m.ip138.com/ip.asp?ip="
passIpPool = ['116.247.111.94','180.169.33.42']
class PutuoIPCheck(Resource):
def get(self):
getArgs = request.args
if('ip' in getArgs):
ip = getArgs['ip']
if(ip is not None):
checkStatus = self.doCheckPutuoIp(ip)
if(checkStatus==True):
return jsonify({'code':0,'msg':'是普陀區域IP'})
else:
return jsonify({'code':1,'msg':'非普陀區域IP'})
return jsonify({'code':1,'msg':'參數錯誤'})
def doCheckPutuoIp(self,ip):
if(ip in passIpPool):
return True
checkipurl = ipCheckBaseUrl + ip
print(checkipurl)
html = urlopen(checkipurl)
htmlContent = html.read()
bsObj = BeautifulSoup(htmlContent)
print(bsObj.prettify())
pObj = bsObj.find_all('p', attrs={'class':'result'})
ipResp = ""
for p in pObj:
ipResp = ipResp + p.text
if("普陀" in ipResp):
return True
else:
return False
#coding=utf-8
from PutuoIPCheck import *
from pymongo import MongoClient
import datetime
import time
from PIL import Image, ImageDraw, ImageFont
import traceback
import po
import articleinfo
import tools
import imagetools
from bs4 import BeautifulSoup
import requests
import urllib3
import json
from urllib.request import urlopen # Python3
from urllib.error import HTTPError
filePath = "/home/jy/IP/www.xxx.tv.access.log-20190624"
baseUrl = "http://m.ip138.com/ip.asp?ip="
ipaddfile = "/home/jy/IP/ipfile.txt"
putuoipfile = "/home/jy/IP/putuoipfile.txt"
ipset = set([])
def main():
fileSet = set([])
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190623")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190622")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190621")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190620")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190619")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190618")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190617")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190616")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190615")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190614")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190613")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190612")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190611")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190610")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190609")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190609")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190608")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190607")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190606")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190605")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190604")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190603")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190602")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190601")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190531")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190530")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190529")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190529")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190528")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190527")
fileSet.add("/home/jy/IP/www.xxx.tv.access.log-20190526")
for f in fileSet:
fileRead(f)
def fileRead(filePath):
f = open(filePath,"r",encoding="utf-8")
lines = f.readlines()
for i in lines:
processStr(i)
for ip in ipset:
addIPAddress(ip)
def processStr(str):
paraList = str.split(' - - ')
ipset.add(paraList[0])
def addIPAddress(ip):
checkipurl = baseUrl + ip
print(checkipurl)
print("start checkipurl =", checkipurl)
html = urlopen(checkipurl)
htmlContent = html.read()
bsObj = BeautifulSoup(htmlContent,features="lxml")
print(bsObj.prettify())
pObj = bsObj.find_all('p', attrs={'class':'result'})
ipResp = ""
for p in pObj:
ipResp = ipResp + p.text
if("普陀" in ipResp):
try:
f = open(putuoipfile,'a+')
f.write(ip+" | "+ipResp+"\n")
f.close()
except Exception as e:
print(e)
try:
f = open(ipaddfile,'a+')
f.write(ip+" | "+ipResp+"\n")
f.close()
except Exception as e:
print(e)
main()