/*
 *		Unofficial release 1.3.0
 *	        	B I N G
 *
 */

/* $Id: lin_reg.c,v 1.4 1999/10/16 07:07:58 fgouget Exp $ */

#include <math.h>
#include <float.h>

#include "lin_reg.h"

/*
 * The linear regression stuff.
 * The basic formula used to do the linear regression are given below but
 * for more details you should see a maths book.
 *
 * We have a set of n samples (xi,yi). Then if we write
 *
 *    y = a * x + b
 *
 * the a and b that best approximate the samples are given by:
 *
 *    a = Covariance ( xi , yi ) / Variance ( xi )
 *
 *    b = Covariance ( xi , yi ) / Variance (xi) * Mean ( xi ) - Mean ( yi )
 *
 * Note that in bing we only care about a.
 *
 * To compute a we maintain the variables below:
 *
 *    sum_x =  Sum ( xi )
 *    sum_x2 = Sum ( xi * xi )
 *    sum_y =  Sum ( yi )
 *    sum_xy = Sum ( xi * yi )
 *
 * And then the covariance and variance can be computed as:
 *
 *    Covariance ( xi , yi ) = ( sum_xy - sum_x * sum_y / n ) / ( n - 1 )
 *
 *    Variance ( xi ) = ( sum_x2 - sum_x * sum_x / n ) / ( n - 1 )
 *
 * Thus:
 *
 *      n * sum_xy - sum_x * sum_y
 * a = ----------------------------
 *      n * sum_x2 - sum_x * sum_x
 *
 *      sum_x2 * sum_y - sum_x * sum_xy
 * b = ---------------------------------
 *         n * sum_x2 - sum_x * sum_x
 *
 *
 * We also compute the correlation value so that we know whether the stats 
 * indeed look like a line or not. The correlation is computed from the 
 * following formula:
 *
 * Corr (xi , yi ) = Covariance ( xi , yi ) / Sqrt ( Variance ( xi ) / Variance ( yi ) )
 *
 * i.e.
 *
 *                                          n * sum_xy - sum_x * sum_y
 * Corr (xi , yi ) = -------------------------------------------------------------------------
 *                    Sqrt( ( n * sum_x2 - sum_x * sum_x ) * ( n * sum_y2 - sum_y * sum_y ) )
 *
 */

void linreg_update(linreg_t* linreg)
{
    if (linreg->nb_samples>1) {
        /*
         * This function computes the end result from the computed 
         * intermediate statistics.
         */
        linreg->a=(linreg->sum_xy-linreg->sum_x*linreg->sum_y/linreg->nb_samples)/
                  (linreg->sum_x2-linreg->sum_x*linreg->sum_x/linreg->nb_samples);
        linreg->b=(linreg->sum_x2*linreg->sum_y-linreg->sum_x*linreg->sum_xy)/
                  (linreg->nb_samples*linreg->sum_x2-linreg->sum_x*linreg->sum_x);
        linreg->corr=(linreg->sum_xy-linreg->sum_x*linreg->sum_y/linreg->nb_samples)/
                     sqrt((linreg->sum_x2-linreg->sum_x*linreg->sum_x/linreg->nb_samples) *
                          (linreg->sum_y2-linreg->sum_y*linreg->sum_y/linreg->nb_samples)
                         );
    } else {
        linreg->a=0;
        linreg->b=0;
        linreg->corr=0;
    }
}

int linreg_init(linreg_t *linreg)
{
    linreg->nb_samples=0;
    linreg->sum_x=0.0;
    linreg->sum_y=0.0;
    linreg->sum_x2=0.0;
    linreg->sum_y2=0.0;
    linreg->sum_xy=0.0;
    linreg->a=0.0;
    linreg->b=0.0;
    linreg->corr=0.0;

    return 0;
}

int linreg_add_all(linreg_t *linreg,int  nb_samples, int* x, double* y, int update)
{
    int i;
    
    /* Initialise the results data structure */
    for (i=0;i<nb_samples;i++) {
        if (y[i]==DBL_MAX)
            continue;
        linreg->sum_x+=x[i];
        linreg->sum_y+=y[i];
        linreg->sum_x2+=x[i]*x[i];
        linreg->sum_y2+=y[i]*y[i];
        linreg->sum_xy+=x[i]*y[i];
        linreg->nb_samples++;
    }
    
    /* Update the regression results */
    if (update!=0)
        linreg_update(linreg);

    return 0;
}


int linreg_del_sample(linreg_t *linreg, int x, double y, int update)
{
    linreg->nb_samples--;
    linreg->sum_x-=x;
    linreg->sum_y-=y;
    linreg->sum_x2-=x*x;
    linreg->sum_y2-=y*y;
    linreg->sum_xy-=x*y;
    
    /* Update the regression results */
    if (update!=0)
        linreg_update(linreg);

    return 0;
}

int linreg_add_sample(linreg_t *linreg, int x, double y, int update)
{
    linreg->nb_samples++;
    linreg->sum_x+=x;
    linreg->sum_y+=y;
    linreg->sum_x2+=x*x;
    linreg->sum_y2+=y*y;
    linreg->sum_xy+=x*y;

    /* Update the regression results */
    if (update!=0)
        linreg_update(linreg);

    return 0;
}

