[Effective Python] 第2章函數

# -*- coding: utf-8 -*-

import os

# 第二章 函數

#--------------------------------------
# 第14條：儘量用異常來表示特殊情況，而不要返回None

def divide(a, b):
	try:
		return a / b
	except ZeroDivisionError:
		return None

# 如果None這個返回值，對函數有特殊意義，那在編寫Python代碼來調用該函數時，就容易犯這裏的錯誤
x, y = 0, 5
result = divide(x, y)
if not result:
	print "Invalid inputs"	# This is wrong!

# 比較好的方式是不返回None，將異常拋給上一級，使得調用者必須應對它

def divide_2(a, b):
	try:
		return a / b
	except ZeroDivisionError as e:
		raise ValueError(e)

x, y = 5, 3
try:
	result = divide_2(x * 1.0, y)
except ValueError:
	print "Invalid inputs"
else:
	print "Result is {:.2}".format(result)

#--------------------------------------
# 第15條：瞭解如何在閉包裏使用外圍作用域中的變量

# 例子：有一份列表，其中的元素都是數字，現在要對其排序，但排序時，要把出現在某個羣組內的數字，放在羣組外數字之前

def sort_priority(values, group):
	def helper(x):
		if x in group:
			return (0, x)
		return (1, x)
	values.sort(key = helper)	# helper函數的返回值（排序關鍵字），將會用來決定列表中各元素的順序

numbers = [8,3,1,2,5,4,7,6]
group = {2,3,5,7}
sort_priority(numbers, group)
print "---sort_priority---"
print numbers

# 上述函數之所以能正常運作，是因爲：
# 1. Python支持閉包(closure)：閉包是一種定義在某個作用域中的函數，這種函數引用了那個作用域裏的變量
# 2. Python函數是一級對象(first-class object)：我們可以直接引用函數、把函數賦給變量、把函數當成參數傳給其他函數、並通過表達式及if對其進行比較和判斷
# 3. Python使用特殊的規則來比較兩個元組

# 如果這個sort_priority函數返回一個值，表示用戶界面裏是否出現了優先級較高的元件就更好了

# 試試如下寫法：

def sort_priority2(values, group):
	found = False				# Scope: "sort_priority2"
	def helper(x):
		if x in group:
			found = True		# Scope: "helper" -- Bad!
			return (0, x)
		return (1, x)
	values.sort(key = helper)
	return found

numbers = [8,3,1,2,5,4,7,6]
group = {2,3,5,7}
found = sort_priority2(numbers, group)
print "---sort_priority2---"
print numbers
print found	# False

# 注1：表達式中引用變量時，Python解釋器按如下順序遍歷各個作用域，以解析該引用：
# 1. 當前函數的作用域
# 2. 任何外圍作用域（如，包含當前函數的其他函數）
# 3. 包含當前代碼的那個模塊的作用域（全局作用域）
# 4. 內置作用域（也就是包含len及str等函數的那個作用域）
# 如果上面這些地方都沒有定義過名稱相符的變量，就拋出NameError異常

# 注2：給變量賦值時，規則有所不同
# 如果當前作用域已經定義了這個變量，那麼該變量就會具備新值；
# 如果當前作用域沒有這個變量，Python會把這次賦值視爲對該變量的定義，而新定義的這個變量，其作用域就是包含賦值操作的這個函數

# 注3：Python是故意這麼設計，可以防止函數中的局部變量污染函數外的那個模塊

# 獲取閉包數據

# Python3中可使用nonlocal語句

# 也可將相關狀態封裝爲輔助類(helper class)

class Sorter(object):
	def __init__(self, group):
		self.group = group
		self.found = False

	def __call__(self, x):
		if x in self.group:
			self.found = True
			return (0, x)
		return (1, x)

numbers = [8,3,1,2,5,4,7,6]
group = {2,3,5,7}
sorter = Sorter(group)	# 輔助對象
numbers.sort(key=sorter)
print "---Sorter---"
print numbers
print sorter.found

# Python2中的值
# Python2中不支持nonlocal關鍵字
# 可以利用Python的作用域規則來解決，雖然不優雅，但已經成爲一種Python編程習慣

def sort_priority3(values, group):
	found = [False]		# mutable，包含單個元素的列表
	def helper(x):
		if x in group:
			found[0] = True
			return (0, x)
		return (1, x)
	values.sort(key = helper)
	return found

numbers = [8,3,1,2,5,4,7,6]
group = {2,3,5,7}
found = sort_priority3(numbers, group)
print "---sort_priority3---"
print numbers
print found

#--------------------------------------
# 第16條：考慮用生成器來改寫直接返回列表的函數

# 例子：查出字符串中每個詞的首字母在整個字符串中的位置

def index_words(text):
	result = []
	if text:
		result.append(0)
	for index, letter in enumerate(text):
		if letter == ' ':
			result.append(index+1)
	return result

address = "Four score and seven years ago..."
result = index_words(address)
print result[:3]

# 用生成器來改寫

def index_words_iter(text):
	if text:
		yield 0
	for index, letter in enumerate(text):
		if letter == " ":
			yield index+1

result = list(index_words_iter(address))
print result[:3]

# 下面這個生成器，從文件裏面依次讀入各行內容，然後逐個處理每行中的單詞，併產生相應的結果
# 該函數執行所消耗的內存，由單行輸入值的最大字符數來界定

def index_file(handle):
	offset = 0
	for line in handle:
		if line:
			yield offset
		for letter in line:
			offset += 1
			if letter == " ":
				yield offset

with open("address.txt", "r") as f:
	it = index_file(f)
	print next(it)
	print next(it)

#--------------------------------------
# 第17條：在參數上面迭代時，要多加小心

# 例子：求出每個城市有遊客數量百分比

def normalize(numbers):
	total = sum(numbers)
	result = []
	for value in numbers:
		percent = 100.0 * value / total
		result.append(percent)
	return result

visits = [15, 35, 80]
percentages = normalize(visits)
print percentages

# 擴大函數應用範圍，把Texas每個城市的遊客數放在文件中，定義生成器函數來讀取每行數據

def read_visits(data_path):
	with open(data_path) as f:
		for line in f:
			yield int(line)

it = read_visits("my_numbers.txt")
percentages = normalize(it)
print percentages	# [] ！！！

# 注：出現這種情況的原因在於，迭代器只能產生一輪結果；在拋出過StopIteration異常的迭代器或生成器上面繼續迭代第二輪，是不會有結果的

it = read_visits("my_numbers.txt")
print list(it)	# [15, 35, 80]
print list(it)	# []

# 爲了解決這個問題，可以用該迭代器製作一份列表，然後操作該列表


def normalize_copy(numbers):
	numbers = list(numbers)		# Copy the iterator
	total =sum(numbers)
	result = []
	for value in numbers:
		percent = 100.0 * value / total
		result.append(percent)
	return result

percentages = normalize(visits)
print percentages

# 注，上面這種寫法的問題在於，待複製的那個迭代器可能包含大量數據，導致內存崩潰
# 一種解決辦法是，通過參數來接受另外一個函數，那個函數每次調用後，都能返回新的迭代器

def normalize_func(get_iter):
	total =sum(get_iter())
	result = []
	for value in get_iter():
		percent = 100.0 * value / total
		result.append(percent)
	return result

percentages = normalize_func(lambda : read_visits("my_numbers.txt"))
print percentages

# 還有更好的方法，達到同樣的效果，即，編寫一種實現迭代器協議的容器類（iterator protocol）
# Python在for循環便利容器時，就是依靠這個迭代器協議，for x in foo，實際上回調用iter(foo)，實際上調用foo.__iter__，
# 此方法必須返回迭代器對象，而那個迭代器本身，則實現了名爲__next__的特殊方法

# 簡單來說，就是要令自己的類把__iter__方法實現爲生成器
# 下面，定義一個可以的迭代的容器類

class ReadVisits(object):
	def __init__(self, data_path):
		self.data_path = data_path

	def __iter__(self):
		with open(self.data_path) as f:
			for line in f:
				yield int(line)

visits = ReadVisits("my_numbers.txt")	# 這個容器類可以傳給原來的normalize函數，無需再做修改
percentages = normalize(visits)			# normalize函數中的sum方法會調用ReadVisits.__iter__，得到新的迭代器對象；
										# 而for循環也會調用__iter__得到另一個新的迭代器對象，互補影響
print percentages

# 修改normalize函數，以確保調用者傳進來的參數不是迭代器對象本身
# 利用iter函數的行爲進行判斷：若傳入迭代器對象，則返回該迭代器；若傳入的是容器類型對象，則返回新的迭代器對象

def normalize_defensive(numbers):
	if iter(numbers) is iter(numbers):	# An iterator -- bad!
		raise TypeError("Must supply a container")
	total = sum(numbers)
	result = []
	for value in numbers:
		percent = 100.0 * value / total
		result.append(percent)
	return result

visits = [15, 35, 80]
normalize_defensive(visits)				# No error
visits = ReadVisits("my_numbers.txt")
normalize_defensive(visits)				# No error

it = iter(visits)
try:
	normalize_defensive(it)
except TypeError as e:
	print e
# Must supply a container

#--------------------------------------
# 第18條：用數量可變的位置參數減少視覺雜訊（visual noise）

def log(message, values):
	if not values:
		print message
	else:
		values_str = ", ".join(str(x) for x in values)
		print "{}: {}".format(message, values_str)

log("My numbers are", [1, 2])
log("Hi there", [])	# ugly

def log2(message, *values):
	print type(values)	# <type 'tuple'>
	if not values:
		print message
	else:
		values_str = ", ".join(str(x) for x in values)
		print "{}: {}".format(message, values_str)

log2("My numbers are", 1, 2)
log2("Hi there")

favorites = [7, 33, 99]
log2("Favorite colors", *favorites)	# 加上*，則Python將列表中的元素視爲位置參數

# 接受數量可變的位置參數，會帶來兩個問題：
# 1. 變長參數傳給函數時，總是要先轉化爲元組，如果傳入的是帶有*操作符的生成器，
# 則Python就必須把該生成器完整迭代一輪，放入元組中，可能消耗大量內存，導致崩潰

def my_generator():
	for i in xrange(10):
		yield i

def my_func(*args):
	print args

it = my_generator()
my_func(*it)
# (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)

# 注：只有當我們能夠確定輸入的參數個數比較少時，才應該令函數接受*args式的邊長參數

# 2. *args參數的第二個問題是，如果以後給函數添加新的位置參數，就必須修改調用該函數的那些舊代碼，否則會產生難以調試的錯誤

def log3(sequence, message, *values):	# log3 爲 log2 的新版本
	if not values:
		print "{}: {}".format(sequence, message)
	else:
		values_str = ", ".join(str(x) for x in values)
		print "{}: {}: {}".format(sequence, message, values_str)

log3(1, "Favorites", 7, 33)		# New usage is OK
log3("Favorites", 7, 33)		# Old usage breaks
# Favorites: 7: 33， 不是期望的結果，但沒有報錯

# 注：爲了避免此種情況，我們應該使用只能以關鍵字形式指定的參數來擴展這種接收接受*args的函數，見第21條

#--------------------------------------
# 第19條：用關鍵字參數來表達可選的行爲
# 注：位置參數必須出現在關鍵字參數前面

def remainder(number, divisor):
	return number % divisor

assert remainder(20, 7) == 6

# 好處1，代碼易讀
# 好處2，可在函數定義中提供默認值

def flow_rate(weight_diff, time_diff, period=1):
	return (weight_diff / time_diff) * period

# 好處3，提供了一種擴充函數參數的有效方式，擴充後的函數依然能與原有的那些調用代碼兼容

def flow_rate_2(weight_diff, time_diff, period=1, units_per_kg=1):
	return (weight_diff * units_per_kg/ time_diff) * period

# 注：以位置參數的形式來指定可選參數，是容易令人困惑的，建議，一直以關鍵字形式來指定這些參數
# pounds_per_hour = flow_rate_2(weight_diff, time_diff, 3600, 2.2)							# Bad
# pounds_per_hour = flow_rate_2(weight_diff, time_diff, period=3600, units_per_kg=2.2)		# Good

#--------------------------------------
# 第20條：用None和文檔字符串來描述具有動態默認值的參數

# 需求：有時我們想採用一種非靜態的類型，來作爲參數的默認值

from time import asctime, sleep
def logg(message, when=asctime()):
	print "{}: {}".format(when, message)

logg("Hi, there!")
sleep(1)
logg("Hi, again!")
# Fri Jan 10 17:46:48 2020: Hi, there!
# Fri Jan 10 17:46:48 2020: Hi, again!

# 這裏發現，兩個時間戳一樣，因爲asctime()在函數定義時執行了一次，之後就固定不變了
# 原因：參數的默認值，只會在程序加載模塊並讀到本函數的定義時評估一次，對於{}或[]等動態的值，可能會導致奇怪的行爲
# 在Python中若想正確實現動態默認值，習慣上是把默認值設爲None，並註釋描述

def loggg(message, when = None):
	"""
	Log a message with timestamp
	:param message:
	:param when: datetime of when the message occurred. Defaults to the present time.
	:return:
	"""
	when = asctime() if when is None else when
	print "{}: {}".format(when, message)

loggg("Hi, there!")
sleep(1)
loggg("Hi, again!")

# Fri Jan 10 17:55:03 2020: Hi, there!
# Fri Jan 10 17:55:04 2020: Hi, again!

# 如果參數的實際默認值是可變類型（mutable），那麼就一定要記得用None作爲形式上的默認值
# 例子：從編碼爲JSON格式的數據中載入某個值，若失敗，則默認返會空的字典

import json

def decode(data, default={}):
	try:
		return json.loads(data)
	except ValueError:
		return default

foo = decode("bad data")
foo["stuff"] = 5
bar = decode("also bad")
bar["meep"] = 1
print "foo = {}".format(foo)
print "bar = {}".format(bar)
# 結果如下，可見foo和bar都等同於寫在default中的那個字典
# foo = {'stuff': 5, 'meep': 1}
# bar = {'stuff': 5, 'meep': 1}

# 解決辦法如下

def decode2(data, default=None):
	"""
	Load Json data from a string
	Args:
		data: JSON data to decode
		default: Value to return if decoding fails.
			Defaults to an empty dicitionary.
	:param data:
	:param default:
	:return:
	"""
	if default is None:
		default = {}
	try:
		return json.loads(data)
	except ValueError:
		return default

foo = decode2("bad data")
foo["stuff"] = 5
bar = decode2("also bad")
bar["meep"] = 1
print "foo = {}".format(foo)
print "bar = {}".format(bar)

# foo = {'stuff': 5}
# bar = {'meep': 1}

#--------------------------------------
# 第21條：用只能以關鍵字形式指定的參數來確保代碼明晰

# 這裏展示Python2的實現方法，Python3有更簡單的方式

def print_args(*args, **kwargs):
	print "Positional: ", args	# args 用來接受數量可變的位置參數
	print "Keyword: ", kwargs	# kwargs用來接受任意數量的關鍵字參數

print_args(1, 2, foo="bar", stuff="meep")

def safe_division_d(number, divisor, **kwargs):
	ignore_overflow = kwargs.pop("ignore_overflow", False)		# 取走關鍵字參數，若無，則返回默認參數False
	ignore_zero_div = kwargs.pop("ignore_zero_division", False)
	if kwargs:	# 防止無效的參數值，拋出TypeError異常
		raise TypeError("Unexcepted **kwargs: %r" % kwargs)
	try:
		return number / divisor
	except OverflowError:
		if ignore_overflow:
			return 0
		else:
			raise
	except ZeroDivisionError:
		if ignore_zero_div:
			return float("inf")
		else:
			raise

safe_division_d(1.0, 10**500, ignore_overflow=True)
safe_division_d(1, 0, ignore_zero_division=True)
safe_division_d(1, 10)
# safe_division_d(1, 0, False, True)		# 錯誤，只能以關鍵字形式指明參數
# safe_division_d(0, 0, unexcepted=True)	# 錯誤，傳入了無效參數
[Effective Python] 第2章函數

前端使用 Konva 實現可視化設計器（13）- 折線 - 最優路徑應用【思路篇】

從零開始的 JSON 庫教程筆記

[Effective Python] 第2章函數

用C++實現簡單的文件I/O操作(轉載自http://developer.51cto.com/art/201107/277311.htm)

[Effective Python] 第1章用Pythonic方式來思考

C程序設計語言,第一章

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結

[Effective Python] 第2章 函數

[Effective Python] 第2章函數