Tuesday, May 12, 2009

SQL 2008 Exponential Regression

As described in the SQL 2008 Excel-like LINEST Function post, the exponential regression function is now a simple matter of transforming the dependent variable (y) with the built-in T-SQL LOG function and the resulting slope, intercept and r-Squared calculations. Now, armed with both linear and exponential regression functions, we can regress large data sets quickly and accurately so that we can perform higher level analytics with the results.

CREATE FUNCTION [dbo].[ExponentialRegression]
(
@RegressionInput RegressionInput READONLY
)
RETURNS @RegressionOutput TABLE
(
Slope DECIMAL(18, 6),
Intercept DECIMAL(18, 6),
RSquare DECIMAL(18, 6)
)
AS
BEGIN
DECLARE @Xaverage AS DECIMAL(18, 6)
DECLARE @Yaverage AS DECIMAL(18, 6)
DECLARE @slope AS DECIMAL(18, 6)
DECLARE @intercept AS DECIMAL(18, 6)
DECLARE @rSquare AS DECIMAL(18, 6)

SELECT
@Xaverage = AVG(X),
@Yaverage = AVG(LOG(Y))
FROM
@RegressionInput

SELECT
@slope =
CASE
WHEN SUM(POWER(X - @Xaverage, 2)) <> 0 THEN
SUM((X - @Xaverage) * (LOG(Y) - @Yaverage))/SUM(POWER(X - @Xaverage, 2))
ELSE
0
END
FROM
@RegressionInput

SELECT
@intercept = EXP(@Yaverage - (@slope * @Xaverage))

SELECT
@rSquare =
CASE
WHEN (SUM(POWER(LOG(Y) - LOG((@intercept * POWER(EXP(1), @slope * X))), 2)) + SUM(POWER(LOG((@intercept * POWER(EXP(1), @slope * X))) - @Yaverage, 2))) <> 0 THEN
1 - (SUM(POWER(LOG(Y) - LOG((@intercept * POWER(EXP(1), @slope * X))), 2))/(SUM(POWER(LOG(Y) - LOG((@intercept * POWER(EXP(1), @slope * X))), 2)) + SUM(POWER(LOG((@intercept * POWER(EXP(1), @slope * X))) - @Yaverage, 2))))
ELSE
0
END
FROM
@RegressionInput

INSERT INTO
@RegressionOutput
SELECT
@slope,
@intercept,
@rSquare

RETURN

END

GO