python應用案例

1、統計目錄下多個txt文件,找出出現頻率最多的單詞

#coding=utf-8

##目錄下多個txt文件,找出出現頻率最多的單詞
import os,re
from collections import Counter

FILESOURECE = "D://pytest"

#過濾詞
stop_word  = ['the','in','of','and','to','has','that','s','is','are','a','with','as','an']

def getCounter(articlefileresource):
	'tdw'
	pattern = r'''[A-Za-z]+|\$?\d+%?$'''
	with open(articlefileresource) as f:
		r = re.findall(pattern, f.read())
		return Counter(r)



def getRun(FILE_PATH):
	os.chdir(FILE_PATH)
	total_counter=Counter()
	print os.listdir(os.getcwd())
	for i in os.listdir(os.getcwd()):
		if os.path.splitext(i)[1] == '.txt':
			total_counter += getCounter(i)

	#排除過濾出
	for i in stop_word:
		total_counter[i] = 0
	print total_counter.most_common()[0][0]


print getRun(FILESOURECE)
#if _name_ == '_main_':
	#print getRun(FILESOURECE)

2、批量修改某個文件夾下圖片的分辨率

#coding=utf-8

#批量修改某個文件夾下圖片分辨率
import os
from PIL import Image

mypath = "D://pytest/"

outpath = 'D://pycopy/'

def processimage(filesoure, destsource, name, imgtype):
	imgtype = 'jpeg' if imgtype == '.jpg' else 'png' 

	print filesoure+name
	#打開圖片
	im = Image.open(filesoure+name)
	#縮放比例
	rate = max(im.size[0]/1180.0 if im.size[0]>1180 else 0, im.size[1]/1774.0 if im.size[1]>1774 else 0)

	print '----------'
	if rate:
		im.thumbnail(im.size[0]/rate, im.size[1]/rate)

	im.save(destsource+name, imgtype)


def getRun():
	os.chdir(mypath)
	for i in os.listdir(os.getcwd()):
		#檢查後綴
		postfix = os.path.splitext(i)[1]

		if postfix == '.jpg' or postfix == '.png':
			processimage(mypath,outpath,i,postfix)



getRun()

3、統計一個目錄下所有的py文件代碼行數

#coding=utf-8

#統計一個目錄下所欲py文件的代碼行數

import re,os

def anynaise_code(codefilesource):
	'''統計一個py文件的代碼行數'''

	total_lines = 0
	coments_lines = 0
	blank_lines = 0
	with open(codefilesource) as f:
		lines = f.readlines()
		total_lines = len(lines)

		line_index = 0

		while line_index<total_lines:
			line = lines[line_index]
			if line.startswith('#'):
				coments_lines+=1
			
			#統計空格行數
			elif line == '\n':
				blank_lines+=1

			line_index+=1

	print "在%s中:" % codefilesource
	print "代碼行數:", total_lines
	print "註釋行數:", coments_lines
	print "空格行數:", blank_lines
	return [total_lines, coments_lines, blank_lines]


def getRun(filepath):
	os.chdir(filepath)
	total_commentlines = 0
	total_blanklines = 0
	total_lines = 0
	for i in os.listdir(os.getcwd()):
		if os.path.splitext(i)[1] == '.py':
			line = anynaise_code(i)
			total_lines,total_commentlines,total_blanklines = total_lines+line[0],total_commentlines+line[1],total_blanklines+line[2]

	
	print "總的代碼行數:", total_lines
	print "總的註釋行數:", total_commentlines
	print "總的空格行數:", total_blanklines

getRun("D://pythonworkspace//")

4、使用goose進行抓取一個網頁的正文內容:

這裏首先需要安裝goose庫,使用pip install goose-extractor進行安裝

#coding=utf-8

from goose import Goose
from goose.text import StopWordsChinese

def gooseExample():
    g = Goose()
    url = "http://www.chinadaily.com.cn/a/201712/22/WS5a3c7473a31008cf16da2d9e.html"
    article = g.extract(url=url)
    print(article.title)
    print(article.cleaned_text[:150])
    
def gooseChineseExample():
    g = Goose({'stopwords_class': StopWordsChinese})
    url = "https://item.btime.com/36a0f17i0489keqltn35q96p4lr?from=haozcxw"
    article = g.extract(url=url)
    print(article.title)
    print(article.meta_description)
    print(article.cleaned_text[:150])

if __name__ == '__main__':
    #begin_insert_job("knowledge", "person", "../data/Person.json")
    gooseExample()
    gooseChineseExample()

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章