Python 中进行 lowess 平滑

模拟得到的数据,有时会存在较大的局部波动,导致画出来的曲线不够平滑,此时可以通过 lowess 平滑进行处理。

lowess(locally weighted scatterplot smoothing,局部加权回归散点平滑法)的主要思想是提取一定比例的局部数据,在这部分局部数据中拟合多项式回归曲线,从而使曲线更加平滑。下面记录lowess平滑的不同实现方法:

Python

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import numpy as np 
import pandas as pd
from pandas import Series, DataFrame

x=np.linspace(0,3.14*20,500)
y=np.sin(x) + np.random.normal(loc=0.0,scale=0.1,size=len(x))


# statsmodels.api
import statsmodels.api as sm
lowess=sm.nonparametric.lowess
z=lowess(y,x,frac=0.1)
plt.scatter(x,y)
plt.plot(z[:,0],z[:,1],color='k',lw=1)


# Python seaborn.lmplot()
import seaborn as sns
d=np.hstack((x.reshape(-1,1),y.reshape(-1,1)))
df=DataFrame(d,columns=['xdata','ydata'])
sns.lmplot(x='xdata', y='ydata', data=df,lowess=True)


# Savitzky-Golay filter 平滑
from scipy.signal import savgol_filter
zs=savgol_filter(y, 21, 3) # window size 51, polynomial order 3
plt.plot(x,zs,color='g',lw=1)


# 自定义函数 np.convolve()
def smooth(y, box_pts):
box = np.ones(box_pts)/box_pts
y_smooth = np.convolve(y, box, mode='same')
return y_smooth

z3=smooth(y,3)
z4=smooth(y,19)
plt.plot(x,z3,color='g',lw=1)
plt.plot(x,z4,color='g',lw=1)


# KernelReg
from statsmodels.nonparametric.kernel_regression import KernelReg

xnew=x
# The third parameter specifies the type of the variable x;
# 'c' stands for continuous;
# 'u' stands for discrete(unordered)
kr = KernelReg(y,x,'c')
zk= kr.fit(xnew)
plt.plot(x,zk,color='g',lw=1)


# 周期性信号 傅里叶变换
import scipy.fftpack

N=len(x)
w = scipy.fftpack.rfft(y)
f = scipy.fftpack.rfftfreq(N, x[1]-x[0])
spectrum = w**2

cutoff_idx = spectrum < (spectrum.max()/5)
w2 = w.copy()
w2[cutoff_idx] = 0
zf = scipy.fftpack.irfft(w2)
plt.plot(x,zf,color='g',lw=1)


# 样条插值
import numpy as np
from scipy.interpolate import splev, splrep
xnew=x
spl=splrep(x,y,k=3) # 3次
z1=splev(xnew,spl)
plt.plot(xnew,z1,color='g',lw=1)


# Rbf 插值
from scipy.interpolate import Rbf
xnew=x
rbf=Rbf(x,y)
z2=rbf(xnew)
plt.plot(xnew,z2,color='g',lw=1)

R 中的实现

1
lowess(x,y)

Test

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
%pylab 
#%matplotlib inline

import numpy as np
import pandas as pd
from pandas import Series, DataFrame

from scipy.signal import savgol_filter

import statsmodels.api as sm
lowess=sm.nonparametric.lowess

from scipy.interpolate import splev, splrep
from scipy.interpolate import Rbf

import seaborn as sns

import scipy.fftpack
from statsmodels.nonparametric.kernel_regression import KernelReg

####################
x=np.linspace(0,3.14*20,500)
y=np.sin(x) + np.random.normal(loc=0.0,scale=0.1,size=len(x))


# lowess
z=lowess(y,x,frac=0.1)

# bspline
spl=splrep(x,y,k=3) # 3次
z1=splev(x,spl)

# Rbf
rbf=Rbf(x,y)
z2=rbf(x)

# savgol_filter
zs=savgol_filter(y, 21, 3) # window size 51, polynomial order 3

# 自定义函数
def smooth(y, box_pts):
box = np.ones(box_pts)/box_pts
y_smooth = np.convolve(y, box, mode='same')
return y_smooth

z3=smooth(y,3)
z4=smooth(y,19)

# fft
N=len(x)
w = scipy.fftpack.rfft(y)
f = scipy.fftpack.rfftfreq(N, x[1]-x[0])
spectrum = w**2
cutoff_idx = spectrum < (spectrum.max()/5)
w2 = w.copy()
w2[cutoff_idx] = 0
zf = scipy.fftpack.irfft(w2)

# KernelReg
xnew=x
# The third parameter specifies the type of the variable x;
# 'c' stands for continuous;
# 'u' stands for discrete(unordered)
#kr = KernelReg(y,x,'c')
#zk= kr.fit(xnew)[0]

# plot
fig,ax=plt.subplots(2,4,figsize=(14,6))
ax=ax.flatten()
for a in ax:
a.scatter(x,y,s=1)
a.set_ylim(0,0.0006)

ax[0].plot(x,z[:,1],color='g',lw=1) # lowess
ax[1].plot(x,z1,color='y',lw=1) # bspline
ax[2].plot(x,z2,color='r',lw=1) # Rbf
ax[3].plot(x,zs,color='g',lw=1) # savgol_filter

ax[4].plot(x,z3,color='b',lw=1) # 自定义函数
ax[5].plot(x,z4,color='cyan',lw=1) # 自定义函数
ax[6].plot(x,zf,color='bex',lw=1) # fft
#ax[7].plot(x,zk,color='g',lw=1) # KernelReg