#Load in the splines and searching library library(splines) library("svcm") #Load in support functions (signal.fit and pnormal.der, etc.) and dataset source("functions.dump") source("2dfunctions.dump") source("mixture.dump") ####################################################################################################### #Single Index Model function sim.psr <- function(x, y, ps., lam., ord., max.iter, min., max.,M1.index,M2.index, p1, p2){ #1: Initialization: estimate alpha (from signal.fit) and f (from pnormal) x.index <- 1:ncol(x) psr.fit <- psp2dM(y, x, p1, p2, "unfolded",M1.index, M2.index, Pars=rbind(c(min.[1],max.[1],ps.[1],3,lam.[1],ord.[1]),c(min.[2],max.[2],ps.[2],3,lam.[2],ord.[2])), coef.plot=F, se=F, int=T) b <- psr.fit$b u <- x%*%b n1 <- ps.[1]+3 n2= ps.[2]+3 d1 <- ord.[1] D1 <- diag(n1) if(d1 != 0) { for(j in 1:d1) { D1 <- diff(D1) } } lambda1 <- lam.[1] P1 <- sqrt(lambda1) * kronecker(D1, diag(n2)) d2 <- ord.[2] D2 <- diag(n2) if(d2 != 0) { for(j in 1:d2) { D2 <- diff(D2) } } lambda2 <-lam.[2] P2 <- sqrt(lambda2) * kronecker(diag(n1), D2) Pen <- rbind(P1, P2) z1 <- rep(0, n2 * (n1 - d1)) z2 <- rep(0, n1 * (n2 - d2)) nix <- c(z1,z2) alpha <- as.vector(psr.fit$coef[-1]) int <- as.vector(psr.fit$coef[1]) eta <- as.vector(u%*%alpha) + int f.fit <- pnormal.der(x=eta, y=y, nseg=ps.[3], bdeg=3, pord=ord.[3], lambda=lam.[3], plot = F, se = F) der <- predict.pnormal(f.fit, eta, 1) f.eta <- predict.pnormal(f.fit, eta, 0) iter <- 1 #Initialize number of iterations cv <- f.fit$cv #Track the cv errors d.alpha <- NULL #Track convergence of alpha #2: Start iteration for (it in 2:max.iter){ ## Update alpha and intercept through lsfit y.star <- y-f.eta+der*eta u.tilda <- diag(der)%*%cbind(1,u) p <- cbind(0,Pen)#sqrt(lam.[1])*D) #nix <- rep(0,nrow(D)) u.p <- rbind(u.tilda,p) y.p <- c(y.star, nix) fit1 <- lsfit(u.p, y.p, intercept=F) int.new <- as.vector(fit1$coef[1]) alpha.new <- as.vector(fit1$coef[2:ncol(u.p)]) ## Check the convergence of alpha tmp1 <- alpha/sqrt(mean(alpha^2)) tmp2 <- alpha.new/sqrt(mean(alpha.new^2)) d.alpha <- c(d.alpha,mean((tmp2-tmp1)^2)/mean(tmp2^2)) if ( d.alpha[(it-1)] < 10^(-3) ) break ## Estimate f through pnormal alpha <- alpha.new int <- int.new eta <- as.vector(u%*%alpha) + int f.fit <- pnormal.der(x=eta, y=y, nseg=ps.[3], bdeg=3, pord=ord.[3], lambda=lam.[3], plot = F, se = F) der <- predict.pnormal(f.fit, eta, 1) f.eta <- predict.pnormal(f.fit, eta, 0) } out<-list(cv=cv, iter=(it-1), alpha=alpha, int=int, b=b, f=f.fit, ps.=ps., lam.=lam., ord.=ord.,fit1=fit1, delta.alpha=d.alpha, eta=eta,f.eta=f.eta) } predict.sim.psr <- function(obj, x){ eta <- as.vector(x%*%obj$b%*%obj$alpha) + obj$int out <- predict.pnormal(obj$f, eta, 0) out } predict.ext <- function(obj, x){ eta.ext <- as.vector(x%*%obj$b%*%obj$alpha) + obj$int yhat.ext <- predict.pnormal(obj$f, eta.ext, 0) glist=list(yhat.ext=yhat.ext,eta.ext=eta.ext) } ## Set the model parameters upper <- c(6,-3,-3) lower <- c(6,-3,-3) ngrid <- 1 ord. <- c(2,2,2) opt.type="greedy" # "greedy" or "honest" search.type='grid' # 'grid' or 'clever' ps. <- c(7,37,7) pure.out=F mask=F start <- upper subset36=F outliers=F sort.valid.test=T ## Split the dataset into training, valid and test sets #1: water; 2: ethanediol; 3: amino-1-propanol y <- as.vector(y.mixture[,2]) x0<-d.mixture.data pure=c(1,5,9) train=c(1:13,20,22,24) if(pure.out){train=train[-pure]} valid=c(14:19,21,23,25) test=c(26:34) x <-x0 p1<-12 p2=400 x.train <-x[train,] x.valid <-x[valid,] x.test <-x[test,] y.train <-y[train] y.valid <-y[valid] y.test <-y[test] nobs=length(y.train) if(sort.valid.test){ oo.=order(c(y.valid,y.test)) y.comb=c(y.valid,y.test)[oo.] x.comb=(rbind(x.valid,x.test))[oo.,] evens.=seq(from=2, to=length(y.comb), by=2) odds.=seq(from=1, to=length(y.comb),by=2) y.valid=y.comb[evens.] x.valid=x.comb[evens.,] y.test=y.comb[odds.] x.test=x.comb[odds.,] } ########## MSISR results ############# min.=c(30,700) max.=c(70,1100) M1.index=c(30,35,37.5,40, 45,47.5, 50, 55, 60, 62.5, 65, 70) M2.index=seq(from=701, to=1100, by=1) max.iter <- 100 # Number of iterations int <- T cleverwrapper<-function(vec){ fit<-sim.psr(x.train, y.train, ps., vec, ord., max.iter, min., max.,M1.index,M2.index, p1, p2) if(opt.type=="greedy"){ tmp1 <- predict.sim.psr(fit, x.valid) out <- sqrt(mean(((tmp1-y.valid)^2)))} if(opt.type=="honest"){ h <- hat(fit$fit1$qr, intercept = F)[1:nobs] trace1 <- sum(h) trace2=fit$f$effdim EDsum=trace1+trace2 sig2=sum((fit$fit1$residuals[1:nobs])^2)/(nobs) aic=2*( nobs*log(sqrt(sig2))+(EDsum) ) out=aic } out } #place start after ngrid if(search.type=='clever'){ opt.sim<-cleversearch(cleverwrapper, lower, upper, ngrid,start, logscale=TRUE, clever=TRUE, verbose=FALSE) } if(search.type=='grid'){ opt.sim<-cleversearch(cleverwrapper, lower, upper, ngrid, logscale=TRUE, clever=FALSE, verbose=FALSE) } opt.lam<-c(opt.sim$par) opt.sim$par fit<-sim.psr(rbind(x.train,x.valid), c(y.train,y.valid), ps., opt.lam, ord., max.iter,min., max.,M1.index,M2.index,p1,p2) h <- hat(fit$fit1$qr, intercept = F)[1:nobs] pred.train=predict.sim.psr(fit,x.train) pred<-predict.sim.psr(fit,x.test) #Prediction test.err<-sqrt(mean((y.test-pred)^2)) #Test error test.err