cython用於加速python,可以簡單解釋爲帶有c數據格式的python。
1. hello world
創建 helloworld.pyx 文件,在其中添加測試代碼
print("hello word")
創建 setup.py 文件,在其中添加轉換編譯代碼
from distutils.core import setup
from Cython.Build import cythonize
setup(
ext_modules=cythonize("helloworld.pyx")
)
如果需要加入自定義選項
from distutils.core import setup
from distutils.extension import Extension
import numpy as np
from Cython.Distutils import build_ext
try:
numpy_include = np.get_include()
except AttributeError:
numpy_include = np.get_numpy_include()
ext_modules = [
Extension(
"helloworld",
["helloworld.pyx"],
extra_compile_args=["-Wno-cpp", "-Wno-unused-function"],
include_dirs=[numpy_include]
),
]
setup(
ext_modules=ext_modules,
cmdclass={'build_ext': build_ext},
)
運行命令
python setup.py build_ext --inplace
在測試文件中引入 helloworld module
import helloworld
執行後打印出 hello world 字符。
2. pyximport
: Cython Compilation for Developers
如果編寫cython module 不需要額外的c libraries or special build setup,就可以直接使用pyximport module 通過import 直接加載 .pyx 文件,而不需要運行setup.py,使用如下
>>> import pyximport; pyximport.install()
>>> import helloworld
Hello World
注意:不推薦使用pyximport在直接使用處構建代碼(會與使用者的系統相關),推薦使用wheel packing format預編譯binary packages
3. Fibonacci fun
創建fib.pyx文件,在其中定義方法
from __future__ import print_function
def fib(n):
"""Print the Fibonacci series up to n."""
a, b = 0, 1
while b < n:
print(b, end=' ')
a, b = b, a + b
print()
同樣編寫構建代碼setup.py
from distutils.core import setup
from Cython.Build import cythonize
setup(
ext_modules=cythonize("fib.pyx"),
)
生成c庫
python setup.py build_ext --inplace
調用查看結果
>>> import fib
>>> fib.fib(2000)
1 1 2 3 5 8 13 21 34 55 89 144 233 377 610 987 1597
4. Primes 求素數
創建primes.pyx文件,定義求素數方法
def primes(int nb_primes):
cdef int n, i, len_p
cdef int p[1000]
if nb_primes > 1000:
nb_primes = 1000
len_p = 0 # The current number of elements in p.
n = 2
while len_p < nb_primes:
# Is n prime?
for i in p[:len_p]:
if n % i == 0:
break
# If no break occurred in the loop, we have a prime.
else:
p[len_p] = n
len_p += 1
n += 1
# Let's return the result in a python list:
result_as_list = [prime for prime in p[:len_p]]
return result_as_list
其中
cdef int n, i, len_p
cdef int p[1000]
這兩行使用 cdef 定義c的局部變量,運行時結果會存儲在c數組 p 中,並且通過倒數第二行將結果複製到python list (result_as_list)中
執行結果
>>> import primes
>>> primes.primes(10)
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29]
5. 使用cython直接轉換 .py 文件
創建文件primes_py.py文件,定義python方法
def primes_python(nb_primes):
p = []
n = 2
while len(p) < nb_primes:
# Is n prime?
for i in p:
if n % i == 0:
break
# If no break occurred in the loop
else:
p.append(n)
n += 1
return p
使用Cython直接轉換python代碼
from distutils.core import setup
from Cython.Build import cythonize
setup(
ext_modules=cythonize(['primes.pyx', # Cython code file with primes() function
'primes_py.py'], # Python code file with primes_python_compiled() function
annotate=True), # enables generation of the html annotation file
)
對比cython代碼和直接轉換python代碼的結果是否一致
>>> primes_python(1000) == primes(1000)
True
>>> primes_python_compiled(1000) == primes(1000)
True
對比三種方式運行的效率
python -m timeit -s 'from primes_py import primes_python' 'primes_python(1000)'
10 loops, best of 3: 23 msec per loop
python -m timeit -s 'from primes_py import primes_python_compiled' 'primes_python_compiled(1000)'
100 loops, best of 3: 11.9 msec per loop
python -m timeit -s 'from primes import primes' 'primes(1000)'
1000 loops, best of 3: 1.65 msec per loop
直接使用cython轉換python代碼可以達python的2倍效率,使用cython編寫的代碼能達到python代碼的13倍效率。
6. Memory Allocation
對於大對象和複雜對象,需要手動控制其內存請求和釋放。c提供的函數 malloc()
, realloc()
, free() ,可以通過 clibc.stdlib導入cython。
void* malloc(size_t size)
void* realloc(void* ptr, size_t size)
void free(void* ptr)
使用例子
import random
from libc.stdlib cimport malloc, free
def random_noise(int number=1):
cdef int i
# allocate number * sizeof(double) bytes of memory
cdef double *my_array = <double *> malloc(number * sizeof(double))
if not my_array:
raise MemoryError()
try:
ran = random.normalvariate
for i in range(number):
my_array[i] = ran(0, 1)
# ... let's just assume we do some more heavy C calculations here to make up
# for the work that it takes to pack the C double values into Python float
# objects below, right after throwing away the existing objects above.
return [x for x in my_array[:number]]
finally:
# return the previously allocated memory to the system
free(my_array)
使用cython封裝的api PyMem_Malloc, PyMem_Realloc, PyMem_Free 同樣功能實現
from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
大塊並長長生命週期的內存可以同上例使用 try..finally 塊來控制。另一種比較好的方式時通過python 對象的運行時內存管理來控制,簡單用例如下,在對象創建時申請內存,回收時釋放內存。
from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
cdef class SomeMemory:
cdef double* data
def __cinit__(self, size_t number):
# allocate some memory (uninitialised, may contain arbitrary data)
self.data = <double*> PyMem_Malloc(number * sizeof(double))
if not self.data:
raise MemoryError()
def resize(self, size_t new_number):
# Allocates new_number * sizeof(double) bytes,
# preserving the current content and making a best-effort to
# re-use the original data location.
mem = <double*> PyMem_Realloc(self.data, new_number * sizeof(double))
if not mem:
raise MemoryError()
# Only overwrite the pointer if the memory was really reallocated.
# On error (mem is NULL), the originally memory has not been freed.
self.data = mem
def __dealloc__(self):
PyMem_Free(self.data) # no-op if self.data is NULL
7.使用動態數組
一個計算編輯距離的例子
from libc.stdlib cimport malloc, free
def calculate_edit_distance(word1, word2):
len1 = len(word1)
len2 = len(word2)
cdef int** dp = <int**> malloc((len1 + 1) * sizeof(int*))
for i in range(len1 + 1):
dp[i] = <int*> malloc((len2 + 1) * sizeof(int))
for i in range(len1 + 1):
dp[i][0] = i
for j in range(len2 + 1):
dp[0][j] = j
for i in range(1, len1 + 1):
for j in range(1, len2 + 1):
delta = 0 if word1[i - 1] == word2[j - 1] else 1
dp[i][j] = min(dp[i - 1][j - 1] + delta, min(dp[i - 1][j] + 1, dp[i][j - 1] + 1))
cdef result = dp[len1][len2]
for i in range(len1 + 1):
free(dp[i])
free(dp)
return result