但这里只是为了好玩,线性最小二乘通过正交分解 使用 家庭反射.(警告将在任何大量数据上缓慢运行.)-- 创建一个类型来表示一个二维矩阵CREATE TYPE dbo.Matrix AS TABLE (i int, j int, Aij float, PRIMARY KEY (i, j))去-- 执行 QR 分解的函数,即 A ->二维码创建函数 dbo.QRDecomposition (@matrix dbo.Matrix 只读)返回 @result 表(矩阵 char(1), i int, j int, Aij float)作为开始声明@m int、@n int、@i int、@j int、@a float选择@m = MAX(i), @n = MAX(j)来自@matrix设置@i = 1设置@j = 1声明@R dbo.Matrix声明@Qj dbo.Matrix声明@Q dbo.Matrix-- 通过@m 单位矩阵生成一个@m 以转换为Q,为m > 添加更多数字1000;与 e1(n) AS(SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALLSELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALLSELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1),e2(n) AS (SELECT 1 FROM e1 CROSS JOIN e1 AS b),e3(n) AS (SELECT 1 FROM e1 CROSS JOIN e2),numbers(n) AS (SELECT ROW_NUMBER() OVER (ORDER BY n) FROM e3)插入@Q (i, j, Aij)SELECT i.n, j.n, CASE 当 i.n = j.n THEN 1 ELSE 0 END从数字我交叉连接数 j其中 i.n = @iSELECT @a = -SIGN(Aij) * @a发件人@R其中 j = @j AND i = @j + (@j - 1);随着你 (i, j, Aij) AS (选择 i, 1, u.ui从 (SELECT i, CASE WHEN i = j THEN Aij + @a ELSE Aij END AS ui发件人@R哪里 j = @jAND i >= @i) 你)插入@Qj (i, j, Aij)SELECT i, j, CASE WHEN i = j THEN 1 - 2 * Aij ELSE - 2 * Aij END as Aij从 (SELECT u.i, ut.i AS j, u.Aij * ut.Aij/(SELECT SUM(Aij * Aij) FROM u) AS Aij从你你交叉加入你) vvt-- 将逆 Householder 反射应用于 Q更新 QjSET Aij = [Qj+1].Aij来自@Q Qj内部联接 (SELECT Q.i, QjT.j, SUM(QjT.Aij * Q.Aij) AS Aij来自@Q Q内部联接 (SELECT i AS j, j AS i, Aij来自@Qj) QjT ON QjT.i = Q.jGROUP BY Q.i, QjT.j) [Qj+1] ON [Qj+1].i = Qj.i AND [Qj+1].j = Qj.j-- 将家庭反射应用于 R更新 RjSET Aij = [Rj+1].Aij来自@R Rj内部联接 (SELECT Qj.i, R.j, SUM(Qj.Aij * R.Aij) AS Aij来自@Qj Qj内连接 @R R ON R.i = Qj.jGROUP BY Qj.i, R.j) [Rj+1] ON [Rj+1].i = Rj.i AND [Rj+1].j = Rj.j-- 为下一次家庭反思准备 Qj更新@QjSET Aij = CASE WHEN i = j THEN 1 ELSE 0 END其中 i 这是一个测试脚本/使用示例:DECLARE @TestData TABLE (i int IDENTITY(1, 1), X1 float, X2 float, X3 float, X4 float, y float)声明 @c 浮动声明 @b1 浮动声明 @b2 浮动声明 @b3 浮动声明 @b4 浮动-- bs 是目标系数SET @c = 兰德()SET @b1 = 2 * 兰德()SET @b2 = 3 * RAND()SET @b3 = 4 * RAND()SET @b4 = 5 * 兰德()-- 生成一些测试数据,从 c + Xb 加上一些噪声计算 y: y = c + Xb + e-- 注意:对 e 使用 RAND() 不是线性回归假设的正常分布噪声,这会干扰 c 的估计声明@k int = 1当@k I have developed Simple Linear regression function in SQL Server from here (https://ask.sqlservercentral.com/questions/96778/can-this-linear-regression-algorithm-for-sql-serve.html) to calculate Alpha,Beta and some extra values like Upper 95% and Lower 95%.The Simple Linear regression takes the argument as X and y.Now I am in need of perform Multiple Linear regression SQL Server, which takes arguments y and X1,X2,X3,.....XnHence the Output will be as follows: Coefficients Standard Error t Stat P-value Lower 95% Upper 95%+-------------------------------------------------------------------------------------------+ Intercept -23.94650812 19.85250194 -1.20622117 0.351059563 -109.3649298 X Variable 1 0.201064291 0.119759437 1.678901439 0.235179 -0.314218977 X Variable 2 -0.014046021 0.037366638 -0.375897368 0.743119791 -0.174821687 X Variable 3 0.502074905 0.295848189 1.697069389 0.231776287 -0.770857111 X Variable 4 0.068238344 0.219256527 0.311226057 0.785072958 -0.875146351Anyone can please suggest me a good way to achieve this. 解决方案 I would look at using CLR integration to take advantage of an existing .NET library supporting Linear Regression, for example Math.NET Numerics. Using a CLR stored procedure you would be able to read the data out of a table, transform it to the .NET libraries matrix type, run the regression, then either write the results back to a table or return a row set directly.But just for fun here is Linear Least Squares solved via Orthogonal Decomposition using Householder reflections in SQL. (Warning will run slowly on any significant amount of data.)-- Create a type to repsent a 2D MatrixCREATE TYPE dbo.Matrix AS TABLE (i int, j int, Aij float, PRIMARY KEY (i, j))GO-- Function to perform QR factorisation ie A -> QRCREATE FUNCTION dbo.QRDecomposition ( @matrix dbo.Matrix READONLY)RETURNS @result TABLE (matrix char(1), i int, j int, Aij float)ASBEGIN DECLARE @m int, @n int, @i int, @j int, @a float SELECT @m = MAX(i), @n = MAX(j) FROM @matrix SET @i = 1 SET @j = 1 DECLARE @R dbo.Matrix DECLARE @Qj dbo.Matrix DECLARE @Q dbo.Matrix -- Generate a @m by @m Identity Matrix to transform to Q, add more numbers for m > 1000 ;WITH e1(n) AS ( SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ), e2(n) AS (SELECT 1 FROM e1 CROSS JOIN e1 AS b), e3(n) AS (SELECT 1 FROM e1 CROSS JOIN e2), numbers(n) AS (SELECT ROW_NUMBER() OVER (ORDER BY n) FROM e3) INSERT INTO @Q (i, j, Aij) SELECT i.n, j.n, CASE WHEN i.n = j.n THEN 1 ELSE 0 END FROM numbers i CROSS JOIN numbers j WHERE i.n <= @m AND j.n <= @m -- Copy input matrix to be transformed to R INSERT @R (i, j, Aij) SELECT i, j, Aij FROM @matrix -- Loop performing Householder reflections WHILE @j < @n OR (@j = @n AND @m > @n) BEGIN SELECT @a = SQRT(SUM(Aij * Aij)) FROM @R WHERE j = @j AND i >= @i SELECT @a = -SIGN(Aij) * @a FROM @R WHERE j = @j AND i = @j + (@j - 1) ;WITH u (i, j, Aij) AS ( SELECT i, 1, u.ui FROM ( SELECT i, CASE WHEN i = j THEN Aij + @a ELSE Aij END AS ui FROM @R WHERE j = @j AND i >= @i ) u ) INSERT @Qj (i, j, Aij) SELECT i, j, CASE WHEN i = j THEN 1 - 2 * Aij ELSE - 2 * Aij END as Aij FROM ( SELECT u.i, ut.i AS j, u.Aij * ut.Aij / (SELECT SUM(Aij * Aij) FROM u) AS Aij FROM u u CROSS JOIN u ut ) vvt -- Apply inverse Householder reflection to Q UPDATE Qj SET Aij = [Qj+1].Aij FROM @Q Qj INNER JOIN ( SELECT Q.i, QjT.j, SUM(QjT.Aij * Q.Aij) AS Aij FROM @Q Q INNER JOIN ( SELECT i AS j, j AS i, Aij FROM @Qj ) QjT ON QjT.i = Q.j GROUP BY Q.i, QjT.j ) [Qj+1] ON [Qj+1].i = Qj.i AND [Qj+1].j = Qj.j -- Apply Householder reflections to R UPDATE Rj SET Aij = [Rj+1].Aij FROM @R Rj INNER JOIN ( SELECT Qj.i, R.j, SUM(Qj.Aij * R.Aij) AS Aij FROM @Qj Qj INNER JOIN @R R ON R.i = Qj.j GROUP BY Qj.i, R.j ) [Rj+1] ON [Rj+1].i = Rj.i AND [Rj+1].j = Rj.j -- Prepare Qj for next Householder reflection UPDATE @Qj SET Aij = CASE WHEN i = j THEN 1 ELSE 0 END WHERE i <= @j OR j <= @j DELETE FROM @Qj WHERE i > @j AND j > @j SET @j = @j + 1 SET @i = @i + 1 END -- Output Q INSERT @result (matrix, i, j, Aij) SELECT 'Q', i, j, Aij FROM @Q -- Output R INSERT @result (matrix, i, j, Aij) SELECT 'R', i, j, Aij FROM @R RETURNENDGO-- Function to perform linear regressionCREATE FUNCTION dbo.MatrixLeastSquareRegression ( @X dbo.Matrix READONLY , @y dbo.Matrix READONLY)RETURNS @b TABLE (i int, j int, Aij float)ASBEGIN DECLARE @QR TABLE (matrix char(1), i int, j int, Aij float) INSERT @QR(matrix, i, j, Aij) SELECT matrix, i, j, Aij FROM dbo.QRDecomposition(@X) DECLARE @Qty dbo.Matrix -- @Qty = Q'y INSERT INTO @Qty(i, j, Aij) SELECT a.j, b.j, SUM(a.Aij * b.Aij) FROM @QR a INNER JOIN @y b ON b.i = a.i WHERE a.matrix = 'Q' GROUP BY a.j, b.j DECLARE @m int, @n int, @i int, @j int, @a float SELECT @m = MAX(j) FROM @QR R WHERE R.matrix = 'R' SET @i = @m -- Solve Rb = Q'y via back substitution WHILE @i > 0 BEGIN INSERT @b (i, j, Aij) SELECT R.i, 1, ( y.Aij - ISNULL(sumKnown.Aij, 0) ) / R.Aij FROM @QR R INNER JOIN @Qty y ON y.i = R.i LEFT JOIN ( SELECT SUM(R.Aij * ISNULL(b.Aij, 0)) AS Aij FROM @QR R INNER JOIN @b b ON b.i = R.j WHERE R.matrix = 'R' AND R.i = @i ) sumKnown ON 1 = 1 WHERE R.matrix = 'R' AND R.i = @i AND R.j = @i SET @i = @i - 1 END RETURNENDGOHere is a test script/example of usage:DECLARE @TestData TABLE (i int IDENTITY(1, 1), X1 float, X2 float, X3 float, X4 float, y float)DECLARE @c floatDECLARE @b1 floatDECLARE @b2 floatDECLARE @b3 floatDECLARE @b4 float-- bs are the target coefficiantsSET @c = RAND()SET @b1 = 2 * RAND()SET @b2 = 3 * RAND()SET @b3 = 4 * RAND()SET @b4 = 5 * RAND()-- Generate some test data, calcualte y from c + Xb plus some noise: y = c + Xb + e-- Note: Using RAND() for e is not nomrally ditributed noise as linear regression assumes, this will mess with the estimate of cDECLARE @k int = 1WHILE @k < 50 BEGIN INSERT @TestData(X1, X2, X3, X4, y) SELECT x1, x2, x3, x4, @c + x1 * @b1 + x2 * @b2 + x3 * @b3 + x4 * @b4 + 0.2 * RAND() FROM ( SELECT RAND() AS x1, RAND() AS x2, RAND() AS x3, RAND() AS x4 ) X SET @k = @k + 1END-- Put our data into dbo.Matrix typesDECLARE @X dbo.MatrixINSERT @X (i, j, Aij)-- Extra column for constantSELECT i, 1, 1FROM @TestDataUNIONSELECT i, 2, X1FROM @TestDataUNIONSELECT i, 3, X2FROM @TestDataUNIONSELECT i, 4, X3FROM @TestDataUNIONSELECT i, 5, X4FROM @TestDataDECLARE @y dbo.MatrixINSERT @y (i, j, Aij)SELECT i, 1, yFROM @TestData-- Estimates for coefficient valuesDECLARE @bhat dbo.MatrixINSERT @bhat (i, j, Aij)SELECT i, j, AijFROM dbo.MatrixLeastSquareRegression(@X, @y)SELECT CASE i WHEN 1 THEN @c WHEN 2 THEN @b1 WHEN 3 THEN @b2 WHEN 4 THEN @b3 WHEN 5 THEN @b4 END AS b , Aij AS bestFROM @bhatSELECT y.Aij AS y, Xb.Aij AS yestFROM ( SELECT x.i, SUM(x.Aij * bh.Aij) AS Aij FROM @X x INNER JOIN @bhat bh ON bh.i = x.j GROUP BY x.i) XbINNER JOIN @y y ON y.i = Xb.iSELECT SUM(SQUARE(y.Aij - Xb.Aij)) / COUNT(*) AS [Variance]FROM ( SELECT x.i, SUM(x.Aij * bh.Aij) AS Aij FROM @X x INNER JOIN @bhat bh ON bh.i = x.j GROUP BY x.i) XbINNER JOIN @y y ON y.i = Xb.i 这篇关于SQL Server 中的多元线性回归函数的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持! 上岸,阿里云!
06-16 16:30