/* ******************************************************************* Purpose: Estimate conditional variance using the local linear fit with bandwidth selected by the pre-asymptotic subsitution method of Fan and Gijbels (1995). Usage: To compile, type "cc autovar.c -lm". To run, type "a.out" and then follow the instructions. Input: n = sample size ngrid = number of grid point, xgridmin = lower endpoint of the interval where function to be est xgridmax = upper endpoint of the interval where function to be est hmgiven = given bandwidth; if hmgiven < 0, use data-driven bandwidth for the mean. hvgiven = given bandwidth; if hvgiven < 0, use data-driven bandwidth for the variance. Output: selected bandwidths for estimating mean and conditional variance, grid Points est-regression its SE est variance Its SE After these five columns, three columns x y residuals are appended. Source: This program was written to compute simulations and real data examples in the paper by Fan and Yao (1998) Reference: Fan, J. and Gijbels, I. (1995a). Data-driven bandwidth selection in local polynomial fitting: variable bandwidth and spatial adaptation. {\em J. Royal Statist. Soc. B}, {\bf 57}, 371--394. Fan, J. and Yao, Q. (1998). Efficient Estimation of Conditional Variance Functions in Stochastic Regression. {\em Biometrika}, {\bf 85}, 645-660. ********************************************************************* */ /* ***************** SIMULATED EXAMPLES ********************************* EXAMPLE 1--4 : The model considered here is: Y = a *(X + 2exp(-16X^2)) + sigma(X) varepsilon sigma(X) = 0.4*exp(-2*X^2)+0.2 with X Uniform(-2,2) and varepsilon N(0,1) with a=0.5, 1, 2, 4 respectively EXAMPLE 5--6: The model is Y(t) = 0.245Y(t-1)*{16-Y(t-1)} + epsilon epsilon distributed N(0,0.3^2) with lags 2 and 3 respectively for Examples 5 and 6 OUTPUT: mhat contains estimated regression curve at grid points vhat contains estimated conditional variance at grid points ************************************************************************ This program is a modification of the previous program "constband.c". The task now is to estimating the conditional variance using a constant bandwidth. *********************************************************************** */ #include #include #define MAX(i,j) ( (i) > (j) ? (i):(j) ) #define TWOSTAGE 1 /* 1 means yes, otherwise no */ #define AORDER 2 #define NU 0 #define ADJUST (NU == 0? (TWOSTAGE == 1 ? .7776:.8941):(TWOSTAGE == 0 ? .7639:.7643) ) main() { double *x, *y,*resid, *xgrid, *mhat, *vhat, *MISE, *MISEideal; double *MISEswap, *vtrue, **Tr, *mse, *vse; double hmgiven,hvgiven,hmopt, hvopt, xgridmin,xgridmax, tmp, tmp1, a, frac; int n, ngrid, nsim, Nsim, i, j, istep, order, idum, Idum[3]; int iexample, aorder, ind; FILE *reg, *in; double *vector(), **matrix(),ran1(), gasdev(), interp(); void sort(), autovar(), constband(), free_matrix(), free_vector(), exit(); char filename[15], filename0[15]; /* INITIALIZATION */ if(NU == 0) printf("Curve Estimation ADJUST = %10.6f\n", ADJUST); else printf("Derivative Estimation ADJUST = %10.6f\n", ADJUST); if( TWOSTAGE == 1) printf("TWO-STAGE: ADJUST = %10.6f\n", ADJUST); else printf("ONE-STAGE: ADJUST = %10.6f\n", ADJUST); order = 1+NU; ngrid = 101; printf("Which Example (0 -- 4) ?\n Example 0 is real data example: "); scanf("%d", &iexample); if(iexample > 6) {printf("out of example list and exiting ... \n"); exit(1); } if(iexample == 0) { printf("Enter sample size n, ngrid, xgridmin, xgridmax \n"); scanf("%d %d %lf %lf",&n, &ngrid, &xgridmin, &xgridmax); printf("Enter bandwidths hm & hv: NEGATIVE VALUE means using DATA-DRIVEN"); printf("bandwidth: \n"); scanf("%lf %lf", &hmgiven, &hvgiven); printf("Enter input filename: "); scanf("%s", filename0); printf("Enter OUTPUT filename: "); scanf("%s", filename); Nsim = 1; /*dummy variable*/ } else { printf("Enter n, Nsim: \n"); scanf("%d", &n); scanf("%d", &Nsim); sprintf(filename, ".var%d.%d", iexample, n); reg = fopen(filename, "w"); hmgiven = -1.0; hvgiven = -1.0; } /* MEMORY ALLOCATION */ xgrid=vector(0,ngrid); x=vector(0,n+105); y=vector(0,n); resid=vector(0,n); mhat=vector(0,ngrid); mse=vector(0,ngrid); vhat=vector(0,ngrid); vse=vector(0,ngrid); MISE = vector(0,Nsim); MISEideal = vector(0,Nsim); MISEswap = vector(0, Nsim); vtrue = vector(0, ngrid); Tr = matrix(0,4,0,ngrid); /* INITIALIZATION */ switch(iexample) {case 0: in = fopen(filename0, "r"); /*real data*/ reg = fopen(filename,"w"); for(i=0; i < n; i++) fscanf(in, "%lf %lf", x+i, y+i); for(i=0; i < ngrid; i++) xgrid[i] = xgridmin+i* (xgridmax-xgridmin)/(ngrid -1.0); autovar(x, y, n, order, xgrid, ngrid, mhat, &hmopt, mse, vhat, &hvopt, vse, resid,hmgiven,hvgiven); fprintf(reg, "Real data: ngrid=%d,xgridmin=%10.6f,xgridmax=%10.6f", ngrid, xgridmin, xgridmax); fprintf(reg,"\n estimated bandwidths for reg and var are %10.6f %10.6f", hmopt, hvopt); fprintf(reg, "\nGrid Points Regression Its SE Variance Its SE"); fprintf(reg, "\n\n\n\n\n"); for(j=0;j < ngrid; j++) fprintf(reg,"%12.7f %12.7f %12.7f %12.7f %12.7f\n", xgrid[j], mhat[j], mse[j], vhat[j], vse[j]); fprintf(reg, "\n \n x y Residuals after regression fit\n \n"); for(j=0;j < n; j++) fprintf(reg,"%12.7f %12.7f %12.7f\n", x[j],y[j], resid[j]); exit(1); case 1: a=0.5; goto Lab1; case 2: a=1.0; goto Lab1; case 3: a=2.0; goto Lab1; case 4: a=4.0; goto Lab1; Lab1: for(i=0; i < ngrid; i++) { tmp = -1.8 + (double) i * 3.6 /(double) (ngrid -1); xgrid[i] = tmp; vtrue[i] = 0.4*exp(-2*tmp*tmp)+0.2; } break; case 5: case 6: istep = iexample - 3; /* i-step ahead prediction */ n -= istep; in = fopen(".true.fun", "r"); /*read true functions*/ for(i=0; i < ngrid; i++) fscanf(in, "%lf %lf %lf %lf %lf", Tr[0]+i, Tr[1]+i, Tr[2]+i, Tr[3]+i, Tr[4]+i); if(iexample==5) ind = 2; else ind = 4; for(i=0; i < ngrid; i++) { tmp = 2.5 + (double) i * 12 /(double) (ngrid -1); xgrid[i] = tmp; vtrue[i] = sqrt(interp(tmp, Tr[0], ngrid, Tr[ind])); } break; default: printf("Out of Example list:\n"); exit(1); } /* ************************************************************* SIMULATION STARTS ************************************************************ */ for(nsim=0; nsim < Nsim; nsim++) { idum = -10*nsim*nsim-2+nsim-352*iexample; switch(iexample) { case 1: case 2: case 3: case 4: for(i=0; i < n; i++) x[i] = 4.0* ran1(&idum) - 2.0; sort(x,n); /* sorted x-design */ for(i=0; i < n; i++) { tmp = x[i]; y[i]= a*tmp+a*2.0*exp(-16.0*tmp*tmp); resid[i] = (0.2+ 0.4*exp(-2*tmp*tmp) ) * gasdev(&idum); y[i] += resid[i]; /* resid[i] will be used for the ideal estimation*/ } break; case 5: case 6: for(x[0]=8.0, i=0; i < n+101+istep; i++) /* pre-burning and data generation */ {x[i+1] = 0.235*x[i]*(16.0-x[i])+0.3*gasdev(&idum); /* checking if data out of range */ while( x[i+1] < 0.0 || x[i+1] > 16.0 ) { if(x[i+1] < 0.0) x[i+1] = - x[i+1]; if(x[i+1] > 16.0) x[i+1] = 32.0 - x[i+1]; } } if(iexample==5) ind = 1; else ind = 3; for(i=0; i < n; i++) { x[i] = x[i+100]; y[i] = x[i+istep+100]; tmp = x[i]; resid[i] =y[i] - interp(tmp, Tr[0], ngrid, Tr[ind]); } break; } /* compute ideal variance estimate */ for(i=0; i < n; i++) resid[i] = resid[i]*resid[i]; constband(x, resid, n, order, xgrid, ngrid, vhat, &hvopt, vse,hvgiven); tmp = 0.0; for(j=0; j < ngrid; j++) tmp += fabs(sqrt(MAX(vhat[j],0)) - vtrue[j]); /*mean absolute deviation error */ MISEideal[nsim] = tmp/ngrid; autovar(x, y, n, order, xgrid, ngrid, mhat, &hmopt, mse, vhat, &hvopt, vse, resid,hmgiven,hvgiven); tmp = 0.0; for(j=0; j < ngrid; j++) tmp += fabs(sqrt(vhat[j]) - vtrue[j]); MISE[nsim] = tmp/ngrid; printf("Example %d, n = %d, nsim =%d, MISE= %10.6f \n", iexample, n, nsim, MISE[nsim]); } /*finish the loop with nsim */ fprintf(reg, "EXAMPLE %d \n", iexample); fprintf(reg, "sample size n = %d, number of simulations = %d\n", n, Nsim); fprintf(reg, "MISE Ideal MISE: \n"); for(j=0; j < nsim; j++) fprintf(reg, "%10.6f %10.6f\n", MISE[j], MISEideal[j]); if(Nsim == 1) goto Lab2; /* Selecting a seed with the median performance */ for(i=0; i < Nsim; i++) MISEswap[i] = MISE[i]; sort(MISE, Nsim); i =0.5*Nsim; tmp = MISE[i]; /*50th percentile's random seed */ for(i=0; i < Nsim; i++) if(fabs(MISEswap[i] - tmp) < 0.00001) Idum[0] = -10*i*i-2+i; /* NOW RESIMULATE USING THE OLD SEED with the median performance */ idum = Idum[0]; switch(iexample) { case 1: case 2: case 3: case 4: for(i=0; i < n; i++) x[i] = 4.0* ran1(&idum) - 2.0; sort(x,n); /* sorted x-design */ for(i=0; i < n; i++) { tmp = x[i]; y[i]= a*tmp+a*2.0*exp(-16.0*tmp*tmp); resid[i] = (0.2+ 0.4*exp(-2*tmp*tmp) ) * gasdev(&idum); y[i] += resid[i]; } break; case 5: case 6: for(x[0]=8.0, i=0; i < n+101+istep; i++) /* pre-burning and data generation */ {x[i+1] = 0.235*x[i]*(16.0-x[i])+0.3*gasdev(&idum); /* checking if data out of range */ while( x[i+1] < 2.0 || x[i+1] > 16.0 ) { if(x[i+1] < 2.0) x[i+1] = 4.0 - x[i+1]; if(x[i+1] > 16.0) x[i+1] = 32.0 - x[i+1]; } } if(iexample==5) ind = 1; else ind = 3; for(i=0; i < n; i++) { x[i] = x[i+100]; y[i] = x[i+istep+100]; tmp = x[i]; resid[i] =y[i] - interp(tmp, Tr[0], ngrid, Tr[ind]); } break; } autovar(x, y, n, order, xgrid, ngrid, mhat, &hmopt, mse, vhat, &hvopt, vse, resid,hmgiven,hvgiven); Lab2: fprintf(reg, "\n\n Results for the simulation with median performance\n"); fprintf(reg,"\n \n estimated bandwidths for reg and var are %10.6f %10.6f", hmopt, hvopt); fprintf(reg, "\nGrid Points Regression Its SE Variance Its SE"); fprintf(reg, "\n\n\n"); for(j=0;j < ngrid; j++) fprintf(reg,"%12.4f %12.4f %12.4f %12.4f %12.4f\n", xgrid[j], mhat[j], mse[j], vhat[j], vse[j]); fprintf(reg, "\n \n x y Residuals after regression fit\n \n"); for(j=0;j < n; j++) fprintf(reg,"%12.4f %12.4f %12.4f\n", x[j], y[j], resid[j]); } /* ************************ autovar.c ********************* Objective: The program is to compute automatically the variance estimate using an existing automatic local polynomial smoother. In our implementation, we use the constant bandwidth selection method by Fan and Gijbels (1995). The code can easily be adapted to any other package. Approximation: When we want to plot a curve, the function values are often evaluated at grid points. xgrid is the grid points that we want to evaluate the conditional variance function. We also use the xgrid to compute the mean regression function and then use the interpolations and extrapolations to compute residuals. We assume that we use the same order of fit for the raw data and for the residuals. ************************************************************** */ void autovar(x, y, n, order, xgrid, ngrid, mhat, hmopt, mse, vhat, hvopt, vse, resid,hmgiven,hvgiven) double *x, *y, *xgrid, *mhat, *hmopt, *mse, *vhat, *hvopt, *vse, *resid; double hmgiven,hvgiven; int n,order, ngrid; #define MIN(i,j) ( (i) > (j) ? (j):(i) ) #define MAX(i,j) ( (i) > (j) ? (i):(j) ) /* Input : x :n-column vector containing the covariates; y : n-column vector with the responses; order : 0 = local constant approximation 1 = local linear approximation, 2 = local quadratic approximation, etc. ....., xgrid : ngrid-column vector with the grid points in which the mean regression estimator mhat and variance estimator vhat are calculated; hmgiven,hvgiven: given bandwidths for mean and variance smoothing, negative values mean automatic choice Output : mhat ngrid-vector estimated regression function at xgrid mse ngrid-vector estimated SE for the mean-regression hmopt: the optimal bandwidth for the regression function m. vhat: ngrid-vector estimated variance curve vhat at xgrid vse : ngrid-vector estimated variance for the variance curve hvopt: the optimal bandwidth for the variance function v. resid: n-vector the residual after smoothed regression */ { int i, j, k, ind, nm; double *resid1, delta, frac, a1 ,a2, a3, a4, a5; double intc1, intc2, slope1, slope2, *vector(); void constband(), free_vector(); resid1 = vector(0,n); /* compute the mean regression curve and residuals */ constband(x, y, n, order, xgrid, ngrid, mhat, hmopt, mse,hmgiven); delta = xgrid[1]-xgrid[0]; /* Using linear regression of tails ind points to extrapolate */ ind = MIN(5,ngrid-1); /* number of end points used for extrapolation*/ a1 = 0.0; a2=0.0; a3=0.0; a4=0.0; a5=0.0; for(i=0; i < ind; i++) { a1 += xgrid[i]; a2 += xgrid[i]*xgrid[i]; a3 += mhat[i]; a4 += mhat[i]*mhat[i]; a5 += mhat[i]*xgrid[i]; } intc1 = (a2*a3-a1*a5)/( ind* a2-a1*a1); slope1 = (ind*a5-a1*a3)/( ind* a2-a1*a1); a1 = 0; a2=0; a3=0; a4=0; a5=0; for(i=ngrid-ind; i < ngrid; i++) { a1 += xgrid[i]; a2 += xgrid[i]*xgrid[i]; a3 += mhat[i]; a4 += mhat[i]*mhat[i]; a5 += mhat[i]*xgrid[i]; } intc2 = (a2*a3-a1*a5)/( ind* a2-a1*a1); slope2 = (ind*a5-a1*a3)/( ind* a2-a1*a1); /* slopes at both end points */ for(i=0; i < n; i++) { if(x[i] >= xgrid[0] && x[i] <= xgrid[ngrid-1]) { ind = (x[i] - xgrid[0])/delta; /* which bin interval */ frac = (x[i] - xgrid[0])/delta - ind; resid[i] = y[i] - (1-frac)*mhat[ind]- frac*mhat[ind+1]; } else {if(x[i] < xgrid[0]) resid[i] = y[i] - intc1 - slope1*x[i]; else resid[i] = y[i] - intc2- slope2*x[i]; } resid1[i] = resid[i]*resid[i]; /* squared residuals */ } /* automatic estimation of the conditional variance */ constband(x, resid1, n, order, xgrid, ngrid, vhat, hvopt,vse,hvgiven); for(j=0; j < ngrid; j++) vhat[j] = MAX(0,vhat[j]); free_vector(resid1, 0, n); } /* ************************* CONSTBAND.C ****************************** ****************************************************************** Main program: Constant.C : select the bandwidth via a loop through bandwidth and do the fixed-order polynomial approximation. Calls for: LLS.C : which is the program doing the locally weighted least squares regression Other programs called are: GAUSSJ.C : Solves a linear equation using Gauss-Jordan elimination (see e.g. the book "Numerical Recipes in C", by Press,Flannery, Teukolsky and Vetterling, Cambridge University Press (1988)) SORT.C : sorting a vector RAN1.C : random number generator for uniform (0,1) distributed random variables GASDEV.C : random number generator, generates standard normal distributed random variables; relies on the Box-Muller method and uses the random number generator for uniform random variables (i.e calls RAN1.C) Utility Programs: IVECTOR.C : memory allocation for integer-valued vector VECTOR.C : memory allocation for vector MATRIX.C : memory allocation for matrix FREE_MATRIX.C : frees the memory allocated by MATRIC FREE_IVECTOR.C : frees the memory allocated by IVECTOR FREE_VECTOR.C : frees the memory allocated by VECTOR NERROR.C : prints an error message. ************************************************************************** ************************************************************************** */ void constband(x, y, n, order, xgrid, ngrid, mhat, hopt, se,hgiven) double *x, *y, *xgrid, *mhat, *hopt, *se, hgiven; int n,order, ngrid; #define LARGE 1.0e17 #define SPAN 1.1 /*maximum number of iteration*/ #define IUP 3 /*checking conv of iteration*/ #define MIN(i,j) ( (i) > (j) ? (j):(i) ) #define MAX(i,j) ( (i) > (j) ? (i):(j) ) #define ABS(i) ( (i) >= 0 ? (i):-(i)) /* OPTIMAL BANDWIDTH AND REGRESSION ESTIMATION Input : x :n-column vector containing the covariates; y : n-column vector with the responses; order : 0 = local constant approximation 1 = local linear approximation, 2 = local quadratic approximation, 3 = local cubic approximation, etc. ....., xgrid : ngrid-column vector with the grid points in which the final regression estimator mhat is calculated; hgiven: if hgiven > 0, use this bandwidth; otherwise use data driven one. Output : mhat estimated regression curve at grid points; se the standard error of the estimate hopt: the optimal bandwidth. */ { int i, ii, iup, ismooth, j, nrow, nm, a; double **beta, **betatmp, *SE, *RSC; double h, hmin, hmax,hopt1,MASE, MSEnew, MSEold, xx, xmin, xmax, tmp, sigma; double hopt2, MASEopt, *vector(), **matrix(); void free_vector(), free_matrix(); void sort(), lls(); /*MEMORY ALLOCATION */ nm = order+8; beta=matrix(0,nm,0,1); betatmp = matrix(0,nm, 0, ngrid); SE=vector(0,nm); RSC = vector(0,nm); xmin=x[0]; xmax=x[0]; for(i=0; i < n; i++) {if(x[i] < xmin) xmin = x[i]; if(x[i] > xmax) xmax = x[i]; } hmin = (xmax - xmin)*(order+2.0)/n; hmax = (xmax - xmin)/2.0; if(hgiven > 0) {hopt2 = hgiven; a = AORDER; goto lab5;} /* a = AORDER; approximation order*/ if(TWOSTAGE == 1) /* TWO-STAGE PLUGIN */ { a = AORDER; h = (2.0 +NU)* hmin;} else { a = 0; h = hmin;} hopt1 = h; iup = 0; MSEnew = LARGE; MASEopt = LARGE; /*SEARCH FOR OPTIMAL BANDWIDTH FOR LOCAL order+a FIT */ while(iup < IUP && h < hmax) { MASE = 0.0; for(ii = 0; ii < ngrid; ii++) /* for each grid point */ {xx = xgrid[ii]; /* nrow = # of effective data points */ for(nrow = 0, i=0; i < n; i++) if(fabs(xx - x[i]) < h) nrow++; if(nrow > 2*(order+a)) /* NOT Nearly singular case */ { lls(x,y,n,order+a,xx,h, beta, SE, &sigma, RSC, 1); MASE += RSC[0]; } else{iup=0; goto lab1;} } MSEold = MSEnew; MSEnew = MASE; if(MSEnew > MSEold) iup++; else iup = 0; if(MSEnew < MASEopt) {MASEopt = MSEnew; hopt1 = h;} lab1: h *= SPAN; } /*end-loop through h*/ hopt1 *= ADJUST; hopt2 = hopt1; if( TWOSTAGE == 1) { h = (1.0 + NU)*hmin; hopt2 = h; iup = 0; MSEnew = LARGE; MASEopt = LARGE; /* ************* ESTIMATE CURVATIVE FIRST ********** */ for(ii =0; ii < ngrid; ii++) {xx = xgrid[ii]; lls(x,y,n,order+a,xx,hopt1, beta, SE, &sigma, RSC, 0);/*(order+a)-fit */ for(i=0; i <= order+a; i++) betatmp[i][ii] = beta[i][0]; } /*END priliminary fit to learn beta_2 so that it can be passed */ while(iup < IUP && h < hmax) {MASE=0.0; for(ii=0; ii < ngrid; ii++) { xx = xgrid[ii]; for(i=0; i <= order+a; i++) beta[i][1] = betatmp[i][ii]; /*END priliminary fit to learn beta_2 so that it can be passed to compute the bias NOW TRY LOCAL order-FIT, nrow = # of effective data points */ for(nrow = 0, i=0; i < n; i++) if(fabs(xx - x[i]) < h) nrow++; if(nrow <= order+a) /* Nearly singular case */ {iup=0; goto lab2;} else { lls(x,y,n,order,xx,h, beta, SE, &sigma, RSC, a); MASE += beta[NU][1]*beta[NU][1] + SE[NU]*SE[NU]; } } /*end loop through ii */ MSEold = MSEnew; MSEnew = MASE; if(MSEnew > MSEold) iup++; else iup = 0; if(MSEnew < MASEopt) {MASEopt = MSEnew; hopt2 = h;} lab2: h *= SPAN; } /*end loop through h */ /* hopt2 = h * exp(-(IUP+1)*log(SPAN)); */ /* optimal bandwidth for local order-fit*/ } /* end-if with TWOSTAGE == 1 */ /* NOW COMPUTE COMPUTE THE ESTIMATED CURVE */ lab5: for(ii=0; ii < ngrid; ii++) { xx = xgrid[ii]; lls(x,y,n,order,xx,hopt2, beta, SE, &sigma, RSC, a); /* mhat[ii][0] = beta[NU][0]; mhat[ii][1] = beta[NU][1]; mhat[ii][2] = SE[NU]; */ mhat[ii] = beta[NU][0]; se [ii] = SE[NU]; } /*end the loop through ii */ hopt[0] = hopt2; free_matrix(beta,0,nm, 0, 1); free_matrix(betatmp,0,nm,0,ngrid); free_vector(SE,0,nm); free_vector(RSC, 0, nm); } /* ************* GAUSSJ.C ************** */ /* #include #include */ #define SWAP(a,b) {double temp=(a);(a)=(b);(b)=temp;} #define TINY 1.0e-12; void gaussj(a,n,b,m) /* Linear equation solution by Gauss-Jordan elimination Input: a: n by n input matrix b: n by m input matrix containing the m right-hand sides vectors of the equation-system In the present version, b is a column vector (m=1). The program has to be changed slightly for the general case (m > 1). In this case, b should be declared as a matrix and e.g. line 58 should look like: for (l=1; l<=m; l++) SWAP(b[irow-1][l-1], b[icol-1][l-1]) Output: a: the input matrix a is replaced be its matrix inverse b: the input matrix is replaced by the corresponding set of solution vectors. */ double **a,**b; int n,m; { int *indxc,*indxr,*ipiv; int i,icol,irow,j,k,l,ll, *ivector(); double big,dum,pivinv; void nrerror(), free_ivector(); indxc=ivector(1,n); indxr=ivector(1,n); ipiv=ivector(1,n); for (j=1; j <= n; j++) ipiv[j]=0; for (i=1; i <= n; i++) { big=0.0; for (j=1; j <= n; j++) if (ipiv[j] !=1) for (k=1; k<=n; k++) { if (ipiv[k] == 0) { if (fabs(a[j-1][k-1]) >= big) { big=fabs(a[j-1][k-1]); irow=j; icol=k; } } else if(ipiv[k] > 1)printf("GAUSSJ: Singular Matrix-1\nn"); /*else if (ipiv[k] > 1) nrerror("GAUSSJ: Singular Matrix-1");*/ } ++(ipiv[icol]); if (irow !=icol) { for (l=1; l<=n; l++) SWAP(a[irow-1][l-1], a[icol-1][l-1]) for (l=1; l<=m; l++) SWAP(b[irow-1][l-1], b[icol-1][l-1]) } indxr[i]=irow; indxc[i]=icol; if (fabs(a[icol-1][icol-1]) == 0.0) a[icol-1][icol-1] = TINY; pivinv=1.0/a[icol-1][icol-1]; a[icol-1][icol-1]=1.0; for(l=1;l<=n;l++) a[icol-1][l-1] *=pivinv; for(l=1;l<=m;l++) b[icol-1][l-1] *=pivinv; for (ll=1;ll<=n;ll++) if (ll !=icol) { dum=a[ll-1][icol-1]; a[ll-1][icol-1]=0.0; for (l=1;l<=n;l++) a[ll-1][l-1] -= a[icol-1][l-1]*dum; for (l=1;l<=m;l++) b[ll-1][l-1] -= b[icol-1][l-1]*dum; } } for (l=n; l>=1;l--) { if (indxr[l] !=indxc[l]) for(k=1; k <=n; k++) SWAP(a[k-1][indxr[l]-1],a[k-1][indxc[l]-1]); } free_ivector(ipiv,1,n); free_ivector(indxr,1,n); free_ivector(indxc,1,n); } /* ********************* SORT.C ***************** */ void sort(ra,n) int n; double *ra; /*index of ra is 0:n-1*/ { int l,j,ir,i; double rra; l=(n >> 1)+1; ir=n; for (;;) { if (l > 1) rra=ra[--l-1]; else { rra=ra[ir-1]; ra[ir-1]=ra[0]; if (--ir == 1) { ra[0]=rra; return; } } i=l; j=l << 1; while (j <= ir) { if (j < ir && ra[j-1] < ra[j]) ++j; if (rra < ra[j-1]) { ra[i-1]=ra[j-1]; j += (i=j); } else j=ir+1; } ra[i-1]=rra; } } /* ******************** UNIFORM RANDOM NUMBER GENERATOR: RAN1.c ******* */ #include #define M1 259200 #define IA1 7141 #define IC1 54773 #define RM1 (1.0/M1) #define M2 134456 #define IA2 8121 #define IC2 28411 #define RM2 (1.0/M2) #define M3 243000 #define IA3 4561 #define IC3 51349 double ran1(idum) int *idum; /* returns a uniform random deviate between 0.0 and 1.0. Set idum to any negative value to initialize or reinitialize the sequence */ { static long ix1,ix2,ix3; static double r[98]; double temp; static int iff=0; int j; void nrerror(); if (*idum < 0 || iff == 0) { iff=1; ix1=(IC1-(*idum)) % M1; ix1=(IA1*ix1+IC1) % M1; ix2=ix1 % M2; ix1=(IA1*ix1+IC1) % M1; ix3=ix1 % M3; for (j=1;j<=97;j++) { ix1=(IA1*ix1+IC1) % M1; ix2=(IA2*ix2+IC2) % M2; r[j]=(ix1+ix2*RM2)*RM1; } *idum=1; } ix1=(IA1*ix1+IC1) % M1; ix2=(IA2*ix2+IC2) % M2; ix3=(IA3*ix3+IC3) % M3; j=1 + ((97*ix3)/M3); if (j > 97 || j < 1) nrerror("RAN1: This cannot happen."); temp=r[j]; r[j]=(ix1+ix2*RM2)*RM1; return temp; } /* ***************** NORMAL RANDOM GENERATOR: GASDEV.C *********** */ double gasdev(idum) int *idum; /* returns a normally distributed deviate with mean 0 and variance 1, using ran1(idum) as the source of uniform deviates */ { static int iset=0; static double gset; double fac,r,v1,v2; double ran1(); if (iset == 0) { do { v1=2.0*ran1(idum)-1.0; v2=2.0*ran1(idum)-1.0; r=v1*v1+v2*v2; } while (r >= 1.0 || r == 0.0); fac= sqrt(-2.0*log(r)/r); gset=v1*fac; iset=1; return v2*fac; } else { iset=0; return gset; } } /* UTILITY PROGRAMS */ /* ************************************************************************* */ /* *************** IVECTOR.C ********** */ int *ivector(nl, nh) int nl, nh; { int *v; v = (int *)malloc((unsigned) (nh-nl+1) * sizeof(int)); return v-nl; } /* *************** VECTOR.C ********** */ double *vector(nl, nh) int nl, nh; { double *v; v = (double *)malloc((unsigned) (nh-nl+1) * sizeof(double)); return v-nl; } /* ***************MATRIX.C ********** */ double **matrix(nrl,nrh,ncl,nch) int nrl,nrh,ncl,nch; /* Allocates a double matrix with range [nrl..nrh][ncl..nch] */ { int i; double **m; m=(double **) malloc((unsigned) (nrh-nrl+1)*sizeof(double*)); m -=nrl; for(i=nrl;i<=nrh;i++) { m[i]=(double *) malloc((unsigned) (nch-ncl+1)*sizeof(double)); m[i] -= ncl; } return m; } /* ***************FREE_MATRIX.C ********** */ void free_matrix(m,nrl,nrh,ncl,nch) double **m; int nrl,nrh,ncl,nch; /* Frees a matrix allocated with matrix */ { int i; for(i=nrh;i>=nrl;i--) free((char*) (m[i]+ncl)); free((char*) (m+nrl)); } /* ***************FREE_IVECTOR.C ********** */ void free_ivector(v,nl,nh) int *v,nl,nh; /* Frees an int vector allocated by ivector() */ { free((char*) (v+nl)); } /* ***************FREE_VECTOR.C ********** */ void free_vector(v,nl,nh) double *v; int nl,nh; /* Frees a double vector allocated by vector() */ { free((char*) (v+nl)); } /* *********************NRERROR.C ******************** */ void nrerror(error_text) /* prints an error message */ char error_text[]; { fprintf(stderr,"Numerical recipes run-time error...\n"); fprintf(stderr,"%s\n",error_text); fprintf(stderr,"...now exiting to system...\n"); exit(1); } /* *******************************LLS.C ************************ */ void lls(x,y,n,p,xx,h, beta, SE, sigma, RSC, a) double *x, *y,xx,h, **beta, *SE,*sigma, *RSC; int n, p, a; /*Do locally weighted least-squared by using local polynomial of order p Input: x is n-vector of design y is n-vector of the response xx is the point where the estimated value to be returned h is the bandwidth and the kernel is Epnechnikov; n and p explained above beta: (p+a) by 2 matrix. beta[.][1] estimated coefficients to be used to estimate bias: (p+a)-vector; a: approximation order in the plub-in procedure and > 0, return regression coefficients along with others = 0, return only estimated regression coefficient. Output: beta[.][0]: estimated regression coefficients (0 -- p) beta[.][1]: estimated bias according to (2.7) (0 -- p). SE: p-column vector of estimated SE-values of the estimated regression coefficients for the first response vector. t = beta/SE sigma: the estimated standard error of linear model. RSC: Extrended cross-validated values computed as follows. */ { int i,j,l,nrow, nmax; double tmp, tmp1, *Xdes,*Y, **H, **Ha, *w ; double RSS, sum, sumw, trace; double **matrix(), *vector(), *s, *S; void free_matrix(), free_vector(); /*MEMORY ALLOCATION*/ nmax = MAX(p+a, 2*p+2); /* max moments to be computed */ Xdes = vector(0,n); Y = vector(0,n); H=matrix(0,p,0,p); Ha = matrix(0,p,0,p); w = vector(0, n); s = vector(0,nmax); S = vector(0, nmax); /* DETERMINING THE LOCAL DATA POINTS */ nrow=0; /* Effective number of data points*/ sumw =0.0; /* Total weight */ for(i=0; i < n; i++) {tmp = (xx - x[i])/h; tmp *=tmp; if(tmp < 1.0) {w[nrow] = 1.0 - tmp; /* Epanechnikov kernel */ sumw += w[nrow]; Xdes[nrow] = x[i] - xx; Y[nrow] = y[i]; nrow++; } } /* COMPUTE s[j] = \sum w[i] (X[i]-x)^j, S[j] = \sum w[i]^2 (X[i]-x)^j, beta[j][0] = X'Wy[j] */ for(j=0; j <= nmax; j++) {s[j]=0.0; S[j]=0.0;} for(j=0; j <= p; j++) beta[j][0] =0.0; for(i=0; i < nrow; i++) { tmp = 1.0; /*tmp = Xdes[i]^j */ for(j=0; j <= nmax; j++) {tmp1 = w[i]*tmp; s[j] += tmp1; S[j] += w[i]*tmp1; if(j <= p) beta[j][0] += tmp1*Y[i]; tmp *= Xdes[i]; } } /*computing H= X'WX, Ha = X'W^2X*/ for(i=0; i <=p; i++) for(j=0; j <=p; j++) { H[i][j] = s[i+j]; Ha[i][j] = S[i+j]; } /* compute beta[.][1] according to (2.7) */ if(a > 0) { for(i=0; i <= p; i++) { tmp = 0.0; for(j=p+1; j <= p+a; j++) if(j+i <= p+a) tmp += beta[j][1] * s[j+i]; beta[i][1] = tmp; } } gaussj(H,p+1,beta,2); /* solve H b = beta; output: H <- H^{-1}, beta <- H^{-1} beta */ if(a > 0) { for(RSS=.0, i=0; i < nrow; i++) /* compute weighted residual sum of squares RSS = sum_1^n (y_i - \hat{y}_i)^2 w_i for the 0th column of Y */ { sum=0.0; tmp=1.0; /* tmp = Xdes[i]^l */ for(l=0; l <= p; l++) { sum += tmp * beta[l][0] ; tmp *= Xdes[i];} RSS += (Y[i] - sum) * (Y[i] - sum)*w[i]; } trace = 0.0; /*trace = tr((X'WX)^{-1} X'W^2X )*/ for(i=0; i <= p ; i++) { tmp = 0.0; for(j=0; j= xgrid[0] && x <= xgrid[ngrid-1]) { ind = (x - xgrid[0])/delta; frac = (x - xgrid[0])/delta - ind; res = (1-frac)*funval[ind] + frac*funval[ind+1]; } else {if(x < xgrid[0]) { slope = (funval[2]-funval[0])/(xgrid[2]-xgrid[0]); res = funval[0]+slope*(x-xgrid[0]); } else {slope = (funval[ngrid-1]-funval[ngrid-3])/(xgrid[ngrid-1]-xgrid[ngrid-3]); res = funval[ngrid-1]+slope*(x-xgrid[ngrid-1]); } } return res; }