# 03-16-2001 BY ZONGWU CAI # "Functional-Coefficient Regression Models for Nonlinear Time Series" # THIS FILE IS FOR SEARCHING BEST MODEL FOR EXAMPLE 4, BASED ON PROCEDURE # DESCRIBED IN SECTION 3.3, IN THE PAPER BY CAI, FAN AND YAO (2000, JASA). # THE STRATEGY IS TO CONSIDER THE ORDER OF MODEL 2 <= p <= 11 AND TO FIND # THE VARYING VARIABLE AT LAG d, 1<= d <= p. # IN THIS PROGRAM, TWO FUNCTIONS ARE DEFINED:: # localfit(...) FOR COMPUTING LOCAL LINEAR ESTIMATION # bandwidth(...) FOR COMPUTING THE AMS-VALUES DEFINED IN (12) FOR CERTAIN # RANGE OF BANDWIDTHS. # TO RUN THIS PROGRAM, WHAT YOU NEED TO DO IS TO INPUT FOLLOWINGS: # A. You need to set following parameters according to your case #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ pc<-0 #(1) Set pc=1 if running in PC; otherwise, set pc to be 0 if(pc==1){datafile<-"c:\\res\\far\\scode\\sunspot.dat"}else {datafile<-"../datasets/sunspots.dat"} #NOTE: The first line will be IGNORE #(2) Set data file name: datafile="sunspot.dat", a data file name Q<--1 #(3) Set Q, the number of multi-folds; set Q=-1 for default h0<-seq(3,5,by=0.1) #(4) Set h0, a sequence of the possible values for bandwidths; set h0=-1 for default trans<-1 #(5) Set trans=1, if do squared-root transformation on response #Note: you need to modify the program if transformation is different p.max<-11 #(6) Set p.max, the largest order of model considered # B. Type ("far4-lag.s") #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # YOU MIGHT NOT NEED TO MODIFY THE FOLLOWING CODE IF YOU DO NOT #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #++++++++++++++++++ # DEFINE FUNCTIONS #++++++++++++++++++ #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ kernel<-function(x){0.75*(1-x^2)*(abs(x)<=1)} # DEFINE KERNEL FUNCTION #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # DEFINE THE FUNCTION TO COMPUTE the LOCAL LINEAR ESTIMATE #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ localfit<-function(y,x,h,u,z){ # y -- response variable; # x -- design matrix; # u -- varying variable; # h -- bandwidth; # z -- grid point. ngrid<-length(z) # number of grid points n<-length(y) # number of data points p<-dim(x)[2] # number of covariates ff<-rep(0,ngrid*p) dim(ff)<-c(ngrid,p) for (k in 1:ngrid){ dx<-cbind(x,(u-z[k])*x) w0<-kernel((u-z[k])/h)/h s0<-t(dx)%*%(w0*dx) s1<-solve(s0+0.001*diag(2*p)) beta<-s1%*%(t(dx)%*%(w0*y)) ff[k,]<-beta[1:p] } return(ff) } #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #+++++++++++++++++++++++++++++++++++++++++ # DEFINE BANDWIDTH SELECTOR BASED ON (12) #+++++++++++++++++++++++++++++++++++++++++ bandwidth<-function(y,x,h0,u,Q){ # y - response; # x - design matrix; # u - varying variable; # h0 - possible bandwidths; # Q - value of folds n<-length(y) # sample size if(Q<=0){Q<-3} if(sum(h0)<=0){ y.range<-max(y)-min(y) aa<-n^{-0.2} h0<-seq(0.4*y.range*aa,0.8*y.range*aa,by=0.2) # As default, set h0=c*n^{-0.2} with c from 0.4 to 0.8 } m<-floor(0.1*n) # define m nh<-length(h0) # number of possible bandwidths ams<-rep(0,Q*nh) dim(ams)<-c(nh,Q) for(qq in 1:Q){ n1<-n-qq*m y0<-y[1:n1] x0<-x[1:n1,] u0<-u[1:n1] z0<-u[(n1+1):(n1+m)] for(j in 1:nh){ h<-h0[j]*(n/n1)^0.2 ff<-localfit(y0,x0,h,u0,z0) yhat<-apply(ff*x[(n1+1):(n1+m),],1,sum) ams[j,qq]<-mean((y[(n1+1):(n1+m)]-yhat)^2)} } ams<-apply(ams,1,sum) return(ams) } #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # READ DATA FROM DATA FILE. FOR EXAMPLE, sunspot data set #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ data<-matrix(scan(datafile,skip=1), byrow=T, ncol=1) n<-288 # use data from 1700 to 1987 if(trans==1){ data<-2*(sqrt(data+1)-1)} # do squared-root transformation AMS<-rep(0,p.max-1) best.d<-rep(0,p.max-1) order.p<-2:p.max for(p in 2:p.max){ # start the model with two lags to p.max lags y<-data[(p+1):n] x<-rep(0, p*(n-p)) dim(x)<-c(n-p,p) for(j in 1:p){ x[,j]<-data[(p-j+1):(n-j)] # design matrix consisting of lagged variables } print(c("I am working on the model with p lags", p)) aa<-rep(0,p) print("Please wait, I am working on searching for varying variable") for(d in 1:p){ # start to search for varying variable u<-data[(p-d+1):(n-d)] # set varying variable as lag d, 1<= d <= p #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #++++++++++++++++ # DO COMPUTATION #++++++++++++++++ ams1<-bandwidth(y,x,h0,u,Q) # compute AMS for certain values of bandwidth aa[d]<-min(ams1) # Find the smallest AMS for this model } # end of the loop for d ind<-order(aa)[1] # locate the index of varying variable AMS[p-1]<-aa[ind] # the AMS value for the selected model best.d[p-1]<-ind data1<-cbind(order.p,best.d,AMS) print(data1) } # end of the loop for p print(("THE SEARCH RESULTS ARE AS FOLLOWS:")) print(data1) #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #+++++++++++++++++++++ print(c("I AM DONE")) #+++++++++++++++++++++