jupyter:
jupytext:
text_representation:
extension: .md
format_name: markdown
format_version: '1.2'
jupytext_version: 1.4.1
kernelspec:
display_name: Python 3
language: python
name: python3
一,分析代码运行时间
1,测算代码单次运行时间
# 平凡方法
import time
tic = time.time()
much_job = [x**2 for x in range(1,1000000,3)]
toc = time.time()
print('used {:.5}s'.format(toc-tic))
# 快捷方法(jupyter)
%%time
much_job = [x**2 for x in range(1,1000000,3)]
2,测算代码重复执行多次平均用时
# 平凡方法
from timeit import timeit
g = lambda x:x**2+1
def main():
return(g(2)**120)
#timeit('main()',setup = 'from __main__ import main',number = 10)
timeit('main()',globals = {'main':main},number = 10)
# 快捷方法(jupyter)
%%timeit -n 10
g = lambda x:x**2+1
def main():
return(g(2)**120)
main()
3,按调用函数分析代码运行时间
# 平凡方法
def relu(x):
return(x if x>0 else 0)
def main():
result = [relu(x) for x in range(-100000,100000,1)]
return result
import profile
profile.run('main()')
# 快捷方法(jupyter)
%prun main()
4,按行分析代码运行时间
# 平凡方法
!pip install line_profiler
%load_ext line_profiler
def relu(x):
return(x if x>0 else 0)
def main():
result = [relu(x) for x in range(-100000,100000)]
return result
from line_profiler import LineProfiler
lprofile = LineProfiler(main,relu)
lprofile.run('main()')
lprofile.print_stats()
#快捷方法(jupyter)
%lprun -f main -f relu main()
二,加速你的查找
5,用set而非list进行in查找
#低速方法
data = (i**2 + 1 for i in range(1000000))
list_data = list(data)
set_data = set(data)
%%time
1098987 in list_data
# 高速方法
%%time
1098987 in set_data
6,用dict而非两个list进行匹配查找
# 低速方法
list_a = [2*i-1 for i in range(1000000)]
list_b = [i**2 for i in list_a ]
dict_ab = dict(zip(list_a,list_b))
%%time
print(list_b[list_a.index(876567)])
# 高速方法
%%time
print(dict_ab.get(876567,None))
三,加速你的循环
7,优先使用for循环而不是while循环
#低速方法
%%time
s,i = 0,0
while i<10000:
i = i + 1
s = s + i
print(s)
#高速方法
%%time
s = 0
for i in range(1,10001):
s = s + i
print(s)
8,循环体中避免重复运算
# 低速方法
a = [i**2+1 for i in range(2000)]
%%time
b = [i/sum(a) for i in a]
# 高速方法
%%time
sum_a = sum(a)
b = [i/sum_a for i in a]
四,加速你的函数
9,用缓存机制加速递归函数
# 低速方法
%%time
def fib(n):
return(1 if n in (1,2) else fib(n-1)+fib(n-2))
print(fib(30))
#高速方法
%%time
from functools import lru_cache
@lru_cache(100)
def fib(n):
return(1 if n in (1,2) else fib(n-1)+fib(n-2))
print(fib(30))
fib.cache_info()
10,用循环取代递归函数
# 低速方法
%%time
def fib(n):
return(1 if n in (1,2) else fib(n-1)+fib(n-2))
print(fib(30))
# 高速方法
%%time
def fib(n):
if n in (1,2):
return(1)
a,b = 1,1
for i in range(2,n):
a,b = b,a+b
return(b)
print(fib(30))
11, 使用Numba加速Python函数
# 低速方法
%%time
def my_power(x):
return(x**2)
def my_power_sum(n):
s = 0
for i in range(1,n+1):
s = s + my_power(i)
return(s)
print(my_power_sum(1000000))
# 高速方法
%%time
from numba import jit
@jit
def my_power(x):
return(x**2)
@jit
def my_power_sum(n):
s = 0
for i in range(1,n+1):
s = s + my_power(i)
return(s)
print(my_power_sum(1000000))
五,使用标准库函数进行加速
12,使用collections.Counter类加速计数
# 低速方法
data = [x**2%1989 for x in range(2000000)]
%%time
values_count = {}
for i in data:
i_cnt = values_count.get(i,0)
values_count[i] = i_cnt + 1
print(values_count.get(4,0))
# 高速方法
%%time
from collections import Counter
values_count = Counter(data)
print(values_count.get(4,0))
13, 使用collections.ChainMap加速字典合并
# 低速方法
dic_a = {i:i+1 for i in range(1,1000000,2)}
dic_b = {i:2*i+1 for i in range(1,1000000,3)}
dic_c = {i:3*i+1 for i in range(1,1000000,5)}
dic_d = {i:4*i+1 for i in range(1,1000000,7)}
%%time
result = dic_a.copy()
result.update(dic_b)
result.update(dic_c)
result.update(dic_d)
print(result.get(9999,0))
# 高速方法
%%time
from collections import ChainMap
chain = ChainMap(dic_a,dic_b,dic_c,dic_d)
print(chain.get(9999,0))
六,使用numpy向量化进行加速
14,使用np.array代替list
# 低速方法
%%time
a = range(1,1000000,3)
b = range(1000000,1,-3)
c = [3*a[i]-2*b[i] for i in range(0,len(a))]
# 高速方法
%%time
import numpy as np
array_a = np.arange(1,1000000,3)
array_b = np.arange(1000000,1,-3)
array_c = 3*array_a - 2*array_b
15,使用np.ufunc代替math.func
# 低速方法
%%time
import math
a = range(1,1000000,3)
b = [math.log(x) for x in a]
# 高速方法
%%time
import numpy as np
array_a = np.arange(1,1000000,3)
array_b = np.log(array_a)
16,使用np.where代替if
# 低速方法
import numpy as np
array_a = np.arange(-100000,1000000)
%%time
# np.vectorize可以将普通函数转换成支持向量化的函数
relu = np.vectorize(lambda x: x if x>0 else 0)
array_b = relu(array_a)
# 高速方法
%%time
relu = lambda x:np.where(x>0,x,0)
array_b = relu(array_a)
七,加速你的Pandas
17,优先直接使用np.ufunc函数
# 低速方法
import numpy as np
import pandas as pd
df = pd.DataFrame(np.random.randint(-10,11,size = (100000,26)),
columns = list('abcdefghijklmnopqrstuvwxyz'))
%time dfresult = df.applymap(lambda x:np.sin(x)+np.cos(x))
# 高速方法
%%time
dfresult = np.sin(df) + np.cos(df)
18,避免动态改变DataFrame的行数
# 低速方法
%%time
import pandas as pd
import numpy as np
df = pd.DataFrame(columns = list('abcdefghijklmnopqrstuvwxyz') )
for i in range(10000):
df.loc[i,:] = range(i,i+26)
# 高速方法
%%time
import pandas as pd
import numpy as np
df = pd.DataFrame(np.zeros((10000,26)),
columns = list('abcdefghijklmnopqrstuvwxyz'))
for i in range(10000):
df.loc[i,:] = range(i,i+26)
19,使用csv文件读写代替xlsx文件读写
# 低速方法
import numpy as np
import pandas as pd
df = pd.DataFrame(np.random.randint(-10,11,size=(10000,5)),
columns = list('abced'))
# 低速方法
%%time
df.to_excel('data.xlsx')
# 高速方法
%%time
df.to_csv('data.csv')
20,使用pandas多进程工具pandarallel
# 低速方法
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randint(-10,11,size=(10000,26)),
columns = list('abcdefghijklmnopqrstuvwxyz'))
%%time
result = df.apply(np.sum,axis = 1)
# 高速方法
!pip install pandarallel
%%time
from pandarallel import pandarallel
pandarallel.initialize(nb_workers=4)
result = df.parallel_apply(np.sum,axis = 1)
八,使用Dask进行加速
21,使用dask加速dataframe
# 低速方法
import numpy as np
import pandas as pd
df = pd.DataFrame(np.random.randint(0,6,size=(100000000,5)),
columns = list('abcde'))
%time df.groupby('a').mean()
# 高速方法
!pip install dask
import dask.dataframe as dd
df_dask = dd.from_pandas(df,npartitions=40)
%time df_dask.groupby('a').mean().compute()
22,使用dask.delayed应用多进程加速
# 低速方法
import time
def muchjob(x):
time.sleep(5)
return(x**2)
%%time
result = [muchjob(i) for i in range(5)]
result
# 高速方法
%%time
from dask import delayed,compute
from dask import threaded,multiprocessing
values = [delayed(muchjob)(i) for i in range(5)]
result = compute(*values,scheduler='multiprocessing')
九,应用多线程多进程加速
23,使用多线程提升IO密集任务效率
# 低速方法
%rm -rf *.txt
%%time
def writefile(i):
with open(str(i)+'.txt','w') as f:
s = ('hello %d'%i)*10000000
f.write(s)
# 串行任务
for i in range(30):
writefile(i)
# 高速方法
%%time
import threading
def writefile(i):
with open(str(i)+'.txt','w') as f:
s = ('hello %d'%i)*10000000
f.write(s)
# 多线程任务
thread_list = []
for i in range(30):
t =threading.Thread(target=writefile,args=(i,))
t.setDaemon(True) #设置为守护线程
thread_list.append(t)
for t in thread_list:
t.start() #启动线程
for t in thread_list:
t.join() #等待子线程结束
24,使用多进程提升CPU密集任务效率
# 低速方法
%%time
import time
def muchjob(x):
time.sleep(5)
return(x**2)
#串行任务
ans = [muchjob(i) for i in range(8)]
print(ans)
# 高速方法
%%time
import time
import multiprocessing
def muchjob(x):
time.sleep(5)
return(x**2)
#多进程任务
pool = multiprocessing.Pool(processes=4)
result = []
for i in range(8):
result.append(pool.apply_async(muchjob, (i,)))
pool.close()
pool.join()
ans = [res.get() for res in result]
print(ans)
转自:算法美食屋经典原创文章源码库(https://github.com/lyhue1991/PythonAiRoad)
标签:python,代码,list,%%,range,time,np,import,加速 From: https://www.cnblogs.com/aleiyoy/p/16904066.html