python - 高斯适合置信区间的python

我想对一些粗略的高斯拟合数据进行高斯拟合。我想要数据峰值（A），中心位置（mu）和标准偏差（sigma）以及这些值的95％置信区间的信息。

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.stats import norm

# gaussian function
def gaussian_func(x, A, mu, sigma):
    return A * np.exp( - (x - mu)**2 / (2 * sigma**2))

# generate toy data
x = np.arange(50)
y = [ 97.04421053,  96.53052632,  96.85684211,  96.33894737,  96.85052632,
  96.30526316,  96.87789474,  96.75157895,  97.05052632,  96.73473684,
  96.46736842,  96.23368421,  96.22526316,  96.11789474,  96.41263158,
  96.32631579,  96.33684211,  96.44421053,  96.48421053,  96.49894737,
  97.30105263,  98.58315789, 100.07368421, 101.43578947, 101.92210526,
 102.26736842, 101.80421053, 101.91157895, 102.07368421, 102.02105263,
 101.35578947,  99.83578947,  98.28,        96.98315789,  96.61473684,
  96.82947368,  97.09263158,  96.82105263,  96.24210526,  95.95578947,
  95.84210526,  95.67157895,  95.83157895,  95.37894737,  95.25473684,
  95.32842105,  95.45684211,  95.31578947,  95.42526316,  95.30526316]
plt.scatter(x,y)

# initial_guess_of_parameters
# この値はソルバーとかで求めましょう．
parameter_initial = np.array([652, 2.9, 1.3])

# estimate optimal parameter & parameter covariance
popt, pcov = curve_fit(gaussian_func, x, y, p0=parameter_initial)

# plot result
xd = np.arange(x.min(), x.max(), 0.01)
estimated_curve = gaussian_func(xd, popt[0], popt[1], popt[2])
plt.plot(xd, estimated_curve, label="Estimated curve", color="r")
plt.legend()
plt.savefig("gaussian_fitting.png")
plt.show()

# estimate standard Error
StdE = np.sqrt(np.diag(pcov))

# estimate 95% confidence interval
alpha=0.025
lwCI = popt + norm.ppf(q=alpha)*StdE
upCI = popt + norm.ppf(q=1-alpha)*StdE

# print result
mat = np.vstack((popt,StdE, lwCI, upCI)).T
df=pd.DataFrame(mat,index=("A", "mu", "sigma"),
columns=("Estimate", "Std. Error", "lwCI", "upCI"))
print(df)

Data Plot with Fitted Curve

数据峰值和中心位置似乎正确，但是标准偏差关闭。任何输入，不胜感激。

最佳答案

您的散点的确看起来类似于高斯分布，但它不在零附近。鉴于高斯函数的细节，将很难以您给我们的方式将高斯分布与数据很好地拟合。因此，我建议从定义x系列开始：

x = np.arange(0, 50) - 24.5

接下来，我将向您的高斯函数添加一个附加参数，即偏移量。由于常规高斯函数的尾部总是接近零，因此无法很好地拟合散点图：

def gaussian_function(x, A, mu, sigma, offset):
    return A * np.exp(-np.power((x - mu)/sigma, 2.)/2.) + offset

接下来，您应该定义一个error_loss_function以使其最小化：

def error_loss_function(params):
    gaussian = gaussian_function(x, params[0], params[1], params[2], params[3])
    errors = gaussian - y
    return sum(np.power(errors, 2))  # You can also pick a different error loss function!

现在剩下的一切都符合我们的曲线：

fit = scipy.optimize.minimize(fun=error_loss_function, x0=[2, 0, 0.2, 97])
params = fit.x  # A: 6.57592661,  mu: 1.95248855,  sigma: 3.93230503, offset: 96.12570778

xd = np.arange(x.min(), x.max(), 0.01)
estimated_curve = gaussian_function(xd, params[0], params[1], params[2], params[3])
plt.plot(xd, estimated_curve, label="Estimated curve", color="b")
plt.legend()
plt.show(block=False)

希望这会有所帮助。看起来像一个有趣的项目，如果我的答案不清楚，请告诉我。

关于python - 高斯适合置信区间的python，我们在Stack Overflow上找到一个类似的问题：https://stackoverflow.com/questions/58186439/