#Definitons on RSS0, RSS1 and Tn have also been modified below. # 03-16-2001 BY ZONGWU CAI # S-CODE for NONPARAMETRIC BOOTSTRAP TEST # THIS PROGRAM IS DOING TEST. IN THIS PROGRAM, TWO FUNCTIONS ARE DEFINED: # local(...) FOR COMPUTING LOCAL LINEAR ESTIMATION. # bandwidth(...) FOR COMPUTING THE AMS-VALUES DEFINED IN (12) FOR CERTAIN # RANGE OF BANDWIDTHS. # NOTE: THE SAME BANDWIDTH IS USED FOR BOOTSTRAP # THE EXAMPLE USED HERE IS TO TEST MODEL (7) VS MODEL (8). YOU NEED TO MODIFY # THE FUNCTIONS (DEFINED BELOW)) GIVEN IN H_0 # TO RUN THIS PROGRAM, WHAT YOU NEED TO DO IS TO INPUT FOLLOWINGS: # A. You need to set following parameters according to your case ############################################################################## pc<-0 #(1) Set pc=1 if running in PC; otherwise, set pc to be 0 if(pc==1){datafile<-"c:\\res\\far\\scode\\lynx.dat"}else {datafile<-"lynx.dat"} #(2) Set data file name: datafile="lynx.dat", a data file name. p<-2 #(3) set p=2, number of lagged variables in the model d<-2 #(4) Set d=2, the varying variable, say, lagged-two variable p0<--1 #(5) Set p0, number of exogenous variables; set p0=-1 if no exogenous Q<--1 #(6) Set Q, choice of Q in (12); set Q=-1 by default (Q=4) h0<--1 #(7) Set h0, a sequence of the possible values for bandwidths; set h0=-1 as default trans<-1 #(8) Set trans=1, if do log transformation on response. #Note: you need to modify the program if transformation is different nb<-1000 #(9) nb=1000, the number of bootstrap replications # B. Type ("test3.s") ############################################################################## ####################################################################### # YOU MIGHT NOT NEED TO MODIFY THE FOLLOWING CODE IF YOU DO NOT ####################################################################### ########################################################################### # READ DATA FROM DATA FILE. FOR EXAMPLE, lynx data set ########################################################################### if(p0>=1){ data<-matrix(scan(datafile), byrow=T, ncol=p0+1) n<-length(data[,1]) if(trans==1){ data0<-log10(data[,1])}else # do log transformation {data0<-data[,1]} y<-data0[(p+1):n] # response variable x<-rep(0, (p+p0)*(n-p)) dim(x)<-c(n-p,p+p0) for(j in 1:p){ x[,j]<-data0[(p-j+1):(n-j)] # lagged variable } x[,(p+1):(p+p0)]<-data0[(p+1):n,2:(1+p0)] # design matrix u<-data0[(d-1):(n-d)] # varying variable }else {data<-matrix(scan(datafile), byrow=T, ncol=1) n<-length(data[,1]) if(trans==1){data<-log10(data)} # do log transformation y<-data[(p+1):n] x<-rep(0, p*(n-p)) dim(x)<-c(n-p,p) for(j in 1:p){ x[,j]<-data[(p-j+1):(n-j)] # lagged variable } # design matrix u<-data[(d-1):(n-d)] } z<-u # grid points ########################################################################## ########################################################################## # DEFINE FUNCTIONS ########################################################################### ############################################################################ # Define the kernel function and the coefficient functions for TAR model # given in (7), which are needed modifying according to the specific problem ############################################################################# kernel<-function(x){0.75*(1-x^2)*(abs(x)<=1)} a0<-function(x){0.62*(x<=3.25)+2.25*(x>3.25)} a1<-function(x){1.25*(x<=3.25)+1.52*(x>3.25)} a2<-function(x){-0.43*(x<=3.25)-1.24*(x>3.25)} ########################################################################## ########################################################################## # DEFINE a FUNCTION TO COMPUTE the LOCAL LINEAR ESTIMATE ########################################################################## local<-function(y,x,h,u,z){ # y -- response variable; x -- design matrix; u -- varying variable; # h -- bandwidth; z -- grid point. ngrid<-length(z) # number of grid points n<-length(y) # number of data points p<-dim(x)[2] # number of covariates ff<-rep(0,ngrid*p) dim(ff)<-c(ngrid,p) p2<-2*p for (k in 1:ngrid){ dx<-(u-z[k])*x dx<-cbind(x,dx) w0<-kernel((u-z[k])/h)/h s0<-t(dx)%*%(w0*dx) s1<-solve(s0+0.001*diag(p2)) beta<-s1%*%(t(dx)%*%(w0*y)) ff[k,]<-beta[1:p] } return(ff) } ############################################################################ ########################################################################### # DEFINE BANDWIDTH SELECTOR BASED ON (12) ########################################################################## bandwidth<-function(y,x,u,h0,Q){ # y - response; x - design matrix; u - varying variable; # h0 - possible bandwidths; Q - value of folds n<-length(y) # sample size if(Q<=0){Q<-4} if(sum(h0)<=0){ y.range<-max(y)-min(y) aa<-n^{-0.2} h0<-seq(0.5*y.range*aa,2*y.range*aa,by=0.05) # As default, set h0=c*n^{-0.2} with c from half of range of data # to twice of range of data } m<-floor(0.1*n) # define m nh<-length(h0) # number of possible bandwidths ams<-rep(0,Q*nh) dim(ams)<-c(nh,Q) for(qq in 1:Q){ n1<-n-qq*m y0<-y[1:n1] x0<-x[1:n1,] u0<-u[1:n1] z0<-u[(n1+1):(n1+m)] for(j in 1:nh){ h<-h0[j]*(n/n1)^0.2 ff<-local(y0,x0,h,u0,z0) yhat<-apply(x[(n1+1):(n1+m),]*ff,1,sum) ams[j,qq]<-mean((y[(n1+1):(n1+m)]-yhat)^2) } } ams<-apply(ams,1,sum) return(cbind(h0,ams)) } ############################################################################ ########################################################################### # DO COMPUTATION ########################################################################## ############################################################################ beta<-cbind(a1(u),a2(u)) # it needs modifying according to the specific problem yhat0<-a0(u)+apply(beta*x,1,sum) # compute yhat under H_0 n<-length(y) ams<-bandwidth(y,x,u,h0,Q) # compute AMS for certain values of bandwidths h0<-ams[,1] ind<-order(ams[,2])[1] # locate the index of optimal bandwidth h.opt<-h0[ind] # find the optimal bandwidth minimizing AMS print("I AM DONE THE OPTIMAL BANDWIDTH SEARCH") print(c("THE OPTIMAL BANDWIDTH IS", h.opt)) beta<-local(y,x,h.opt,u,z) yhat1<-apply(beta*x,1,sum) # compute yhat under H_1 epsilon0<-y-yhat1 # compute the nonparametric residuals epsilon0<-epsilon0-mean(epsilon0) # centerized rss0<-sum((y-yhat0)^2) # compute RSS under H_0 rss1<-sum((y-yhat1)^2) # compute RSS under H_1 tn<-(rss0-rss1)/rss1*n/2 # compute the statistic T_n print(c("THE OBSERVED VALUE of RSS0 is", rss0)) print(c("THE OBSERVED VALUE of RSS1 is", rss1)) print(c("THE OBSERVED VALUE of T_n is", tn)) ############################################################################ ############################################################################ print(c("NOW, I BEGIN DOING BOOTSTRAP")) p.value<-0 tnstar<-rep(0,nb) for(b in 1:nb){ epsilon.star<-sample(epsilon0, rep=T) # draw Bootstrap sample ystar<-yhat0+epsilon.star # compute Y_i^* rss0<-sum((epsilon.star)^2) # Compute RSS_0^* beta<-local(ystar,x,h.opt,u,z) yhat1<-apply(beta*x,1,sum) rss1<-sum((ystar-yhat1)^2) # compute RSS_1^* tn.star<-(rss0-rss1)/rss1*n/2 # compute T_n^* tnstar[b]<-tn.star if(pc==1){write(t(tnstar),"c:\\temp\\tnstar.out",ncol=1)}else {write(t(tnstar),"tnstar.out",ncol=1)} if(tn.star>=tn){p.value<-p.value+1} print(c(b, "p-value", round(p.value/b, digits=3))) } # the end of the BOOTSTRAP loop p.value<-p.value/nb print(c("THE P-VALUE IS", p.value)) print(c("I AM DONE")) ##########################################################################