使用simpson's rule
库中的scipy.integrate
时出现问题。即使所有数字均为正且x轴上的值从左向右递增,有时计算得出的面积也为负。例如:
from scipy.integrate import simps
x = [0.0, 99.0, 100.0, 299.0, 400.0, 600.0, 1700.0, 3299.0, 3300.0, 3399.0, 3400.0, 3599.0, 3699.0, 3900.0,
4000.0, 4300.0, 4400.0, 4900.0, 5000.0, 5100.0, 5300.0, 5500.0, 5700.0, 5900.0, 6100.0, 6300.0, 6600.0,
6900.0, 7200.0, 7600.0, 7799.0, 8000.0, 8400.0, 8900.0, 9400.0, 10000.0, 10600.0, 11300.0, 11699.0,
11700.0, 11799.0]
y = [3399.68, 3399.68, 3309.76, 3309.76, 3274.95, 3234.34, 3203.88, 3203.88, 3843.5,
3843.5, 4893.57, 4893.57, 4893.57, 4847.16, 4764.49, 4867.46, 4921.13, 4886.32,
4761.59, 4731.13, 4689.07, 4649.91, 4610.75, 4578.84, 4545.48, 4515.02, 4475.86,
4438.15, 4403.34, 4364.18, 4364.18, 4327.92, 4291.66, 4258.31, 4226.4, 4188.69,
4152.43, 4120.52, 4120.52, 3747.77, 3747.77]
area = simps(y,x)
simps(y,x)
返回的结果是-226271544.06562585
。为什么是负面的?仅在某些情况下会发生这种情况,而在其他情况下会很好。例如:x = [0.0, 100.0, 101.0, 200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 1300.0, 3300.0, 3400.0, 3600.0, 3700.0,
5100.0, 5200.0, 5400.0, 5600.0, 5800.0, 6000.0, 6200.0, 6400.0, 6600.0, 6900.0, 7200.0, 7500.0, 7900.0,
8299.0, 8400.0, 8900.0, 9400.0, 10000.0, 10600.0, 11200.0, 11900.0, 12600.0, 13500.0, 14300.0, 15300.0,
16400.0, 16499.0, 17500.0, 18900.0, 20100.0, 20999.0, 21000.0, 21099.0]
y = [2813.73, 2813.73, 3200.98, 3309.76, 3356.17, 3296.71, 3243.04, 3243.04, 3198.08, 3161.82, 3488.16,
4929.83, 4897.92, 4897.92, 4763.04, 4726.78, 4680.37, 4638.31, 4597.69, 4561.44, 4525.18, 4494.72,
4464.26, 4426.55, 4388.84, 4354.03, 4316.32, 4316.32, 4275.71, 4239.45, 4203.19, 4171.28, 4136.47,
4104.57, 4074.11, 4042.2, 4011.74, 3979.83, 3949.38, 3918.92, 3918.92, 3887.01, 3855.1, 3824.64,
3824.64,3605.64, 3605.64]
area = simps(y,x)
在这种情况下,该区域为正
83849670.99112588
。这是什么原因呢?
最佳答案
问题是辛普森是如何工作的,它估计了最好的二次函数,并且使用了像您这样的数据,在其中几乎有一个垂直区域,因此操作是错误的。
import numpy as np
from scipy.integrate import simps, trapz
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def func(x, a, b, c):
return a + b * x + c * x ** 2
x = np.array([0.0, 99.0, 100.0, 299.0, 400.0, 600.0, 1700.0, 3299.0, 3300.0, 3399.0, 3400.0, 3599.0, 3699.0, 3900.0,
4000.0, 4300.0, 4400.0, 4900.0, 5000.0, 5100.0, 5300.0, 5500.0, 5700.0, 5900.0, 6100.0, 6300.0, 6600.0,
6900.0, 7200.0, 7600.0, 7799.0, 8000.0, 8400.0, 8900.0, 9400.0, 10000.0, 10600.0, 11300.0, 11699.0,
11700.0, 11799.0])
y = np.array([3399.68, 3399.68, 3309.76, 3309.76, 3274.95, 3234.34, 3203.88, 3203.88, 3843.5,
3843.5, 4893.57, 4893.57, 4893.57, 4847.16, 4764.49, 4867.46, 4921.13, 4886.32,
4761.59, 4731.13, 4689.07, 4649.91, 4610.75, 4578.84, 4545.48, 4515.02, 4475.86,
4438.15, 4403.34, 4364.18, 4364.18, 4327.92, 4291.66, 4258.31, 4226.4, 4188.69,
4152.43, 4120.52, 4120.52, 3747.77, 3747.77])
for i in range(3,len(x)):
popt, _ = curve_fit(func, x[i-3:i], y[i-3:i])
xnew = np.linspace(x[i-3], x[i-1], 100)
plt.plot(xnew, func(xnew, *popt), 'k-')
plt.plot(x, y)
plt.show()