"lms"<- function(x, y, df.lms = c(4, 4, 4), X.mu = NULL, X.sigma = NULL, dist = "Gaussian", quantiles = c(0.05, 0.25, 0.5, 0.75, 0.95), cycle.num = 300, conv.pc = 0.01, x.predict = NULL, new.X.mu = NULL, new.X.sigma = NULL, quantile.plot = T) { # Nonparametric quantile regression using the Gaussian or Gamma distribution # # x=explantory variable (in time or space) # y=response # df=degree of freedom for smooth of lambda, mu, sigma # X.mu= matrix of covariates to model mu (may or may not include x): intercept automatic # X.sigma= matrix of covariates to model sigma (may or may not include x): intercept automatic # dist= "Gaussian" or dist="Gamma" # quantiles= vector of quantiles (between 0 and 1) # x.predict= vector of interesting x locations to get predicted quantiles # new.X.mu= matrix of eta.mu covariates for x.predict: intercept automatic # new.X.sigma= matrix of eta.sigma covariates for x.predict: intercept automatic # # Reference: Cole and Green (1992). Statistics in Medicine. # Reference: A. Lopatatzidis and P. Green, Submitted for Publication (1998) # # (c) Splus Function Written by Brian D. Marx (1999) # #------Initializations-------------------- # lx <- length(x) ones <- rep(1, lx) X.beta <- X.gamma <- 0 * ones mu.coef <- sigma.coef <- NULL mean.y <- mean(y) delta.mu <- delta.sig <- cycle <- beta.pc <- gamma.pc <- 0 xorder <- order(x) x <- x[xorder] y <- y[xorder] if(!missing(X.mu)) { X.mu <- as.matrix(cbind(ones, X.mu)) X.mu <- as.matrix(X.mu)[xorder, ] beta.pc <- 1 beta.new <- beta.old <- rep(0, ncol(as.matrix(X.mu))) } if(!missing(X.sigma)) { X.sigma <- as.matrix(cbind(ones, X.sigma)) X.sigma <- as.matrix(X.sigma)[xorder, ] gamma.pc <- 1 gamma.new <- gamma.old <- rep(0, ncol(as.matrix(X.sigma))) } df.lam <- df.lms[1] df.mu <- df.lms[2] df.sig <- df.lms[3] lam.old <- lam.new <- lam.init <- ones eta.mu <- mu.old <- mu.new <- mu.init <- abs(predict(smooth.spline(x, y, df = df.mu), x)$y) eta.sig <- sig.old <- sig.new <- sig.init <- abs(predict(smooth.spline( x, ((y/mu.old) - 1)^2, df = df.sig), x)$y) mean.sig <- mean(sig.init) lampc.new <- mupc.new <- sigpc.new <- lam.pc <- mu.pc <- sig.pc <- 1 repeat { # #------------Gaussian: Score and Information---------------- # if(dist == "Gaussian") { z <- (((y/eta.mu)^lam.old) - 1)/(lam.old * eta.sig) u.lam <- z/lam.old * (z - (log(abs(y/eta.mu))/eta.sig)) - log(abs(y/eta.mu)) * (z^2 - 1) u.mu <- (z/(eta.mu * eta.sig)) + (lam.old * (z^2 - 1))/ eta.mu u.sig <- (z^2 - 1)/eta.sig # w.lam <- (7 * eta.sig^2)/4 w.lam <- 5/lam.old + 2 * eta.sig^2 - 1/lam.old^2 iw.lam <- 1/w.lam w.mu <- (1 + 2 * lam.old^2 * eta.sig^2)/(eta.mu^2 * eta.sig^2) iw.mu <- 1/w.mu w.sig <- 2/eta.sig^2 iw.sig <- 1/w.sig w.lam.mu <- -1/(2 * eta.mu) w.lam.sig <- lam.old * eta.sig w.mu.sig <- (2 * lam.old)/(eta.mu * eta.sig) } # #----------------Gamma: Score and Information----------------- # if(dist == "Gamma") { z <- (y/eta.mu)^lam.old theta <- (eta.sig * lam.old)^(-2) psi <- log(theta) - 1/(2 * theta) - 1/(12 * theta^2) + 1/(120 * theta^4) - 1/(252 * theta^6) psi.d <- 1/theta + 1/(2 * theta^2) + 1/(6 * theta^3) - 1/(30 * theta^5) + 1/(42 * theta^7) u.lam <- 1/lam.old * (1 + 2 * theta * (psi + z - 1 - log(theta) - (log(z) * (z + 1))/2)) u.mu <- ((lam.old * theta)/eta.mu) * (z - 1) u.sig <- (2 * theta)/eta.sig * (psi + z - log(theta * z ) - 1) w.lam <- 1/lam.old^2 * (1 - 4 * theta + 4 + 2 * theta * lam.old * psi + 2 * lam.old - 2 * theta * lam.old * log(theta) + 4 * theta^2 * psi.d) iw.lam <- 1/w.lam w.mu <- 1/(eta.mu^2 * eta.sig^2) iw.mu <- 1/w.mu w.sig <- (4 * theta)/eta.sig^2 * (theta * psi.d - 1) iw.sig <- 1/w.sig w.lam.mu <- - theta/eta.mu * (psi + theta^(-1) - log( theta)) w.lam.sig <- 2 * theta^1.5 * (2 * theta * psi.d - theta^ (-1) - 2) w.mu.sig <- 0 } # #-----------Lambda Portion----------------- # if(lam.pc > conv.pc | cycle < 4) { if(min(w.lam) < 0) { w.lam[w.lam < 0] <- 1e-005 warning(paste( "w.lam (weight) is negative, have set 1e-5 at cycle", cycle)) } uwy.lam <- u.lam + w.lam * lam.old - (mu.new - mu.old) * w.lam.mu - (sig.new - sig.old) * w.lam.sig if(!missing(X.mu)) { uwy.lam <- uwy.lam - (w.lam.mu * X.mu) %*% as.matrix(beta.new - beta.old) } if(!missing(X.sigma)) { uwy.lam <- uwy.lam - (w.lam.sig * X.sigma) %*% as.matrix(gamma.new - gamma.old) } y.lam <- iw.lam * uwy.lam s.lam <- smooth.spline(x, y.lam, w = w.lam, df = df.lam ) lam.old <- lam.new lam.new <- predict(s.lam, x)$y lampc.old <- lampc.new lampc.new <- s.lam$pen.crit if(max(abs(lam.new)) > 4 | lam.pc > 2000) { warning(paste("|lambda| > 4 at cycle", cycle)) lam.new <- lam.init lam.old <- lam.init + runif(lx) lampc.new <- lampc.old + 999 } } # #------------Mu Portion---------------------- # if(mu.pc > conv.pc | cycle < 4) { uwy.mu <- u.mu + w.mu * mu.old - (sig.new - sig.old) * w.mu.sig - (lam.new - lam.old) * w.lam.mu if(!missing(X.mu)) { uwy.mu <- uwy.mu - (w.mu * X.mu) %*% as.matrix( beta.new - beta.old) } if(!missing(X.sigma)) { uwy.mu <- uwy.mu - (w.mu.sig * X.sigma) %*% as.matrix(gamma.new - gamma.old) } y.mu <- iw.mu * uwy.mu s.mu <- smooth.spline(x, y.mu, w = w.mu, df = df.mu) mu.old <- mu.new mu.new <- predict(s.mu, x)$y if(min(mu.new) < 0) { warning(paste( "mu negative, have set to 1e-5 at cycle", cycle)) } if(missing(X.mu)) { eta.mu <- mu.old } mupc.old <- mupc.new mupc.new <- s.mu$pen.crit } # #---------------Sigma Portion---------------------- # if(sig.pc > conv.pc | cycle < 4) { uwy.sig <- u.sig + w.sig * sig.old - (lam.new - lam.old ) * w.lam.sig - (mu.new - mu.old) * w.mu.sig if(!missing(X.mu)) { uwy.sig <- uwy.sig - (w.mu.sig * X.mu) %*% as.matrix(beta.new - beta.old) } if(!missing(X.sigma)) { uwy.sig <- uwy.sig - (w.sig * X.sigma) %*% as.matrix(gamma.new - gamma.old) } y.sig <- iw.sig * uwy.sig s.sig <- smooth.spline(x, y.sig, w = w.sig, df = df.sig ) sig.old <- sig.new sig.new <- predict(s.sig, x)$y if(min(sig.new) < 0 | min(sig.old) < 0) { warning(paste( "sigma negative, have set 1e-5 at cycle", cycle)) } if(missing(X.sigma)) { eta.sig <- sig.old } sigpc.old <- sigpc.new sigpc.new <- s.sig$pen.crit } # #---------------Eta.X = Mu + Xbeta Portion----------------- # if(!missing(X.mu)) { if(beta.pc > conv.pc | cycle < 4) { uwy.beta <- u.mu + (w.mu * X.mu) %*% as.matrix( beta.old) - w.lam.mu * (lam.new - lam.old) - w.mu * (mu.new - mu.old) - w.mu.sig * ( sig.new - sig.old) if(!missing(X.sigma)) { uwy.beta <- uwy.beta - w.mu.sig * X.sigma %*% as.matrix(gamma.new - gamma.old) } y.beta <- as.vector(iw.mu * uwy.beta) ls.beta <- lsfit(X.mu, y.beta, intercept = F, wt = w.mu) beta.old <- beta.new beta.new <- as.vector(ls.beta$coef) beta.pc <- sum(abs((beta.new[-1] - beta.old[-1] )/(beta.old[-1] + 1e-009))) X.beta <- X.mu %*% as.matrix(beta.old) eta.mu <- as.vector(mu.old + X.beta) } } # #-----------------Eta.W = Sigma + Wgamma Portion------------ # if(!missing(X.sigma)) { if(gamma.pc > conv.pc | cycle < 4) { uwy.gamma <- u.sig + (w.sig * X.sigma) %*% as.matrix(gamma.old) - w.lam.sig * (lam.new - lam.old) - w.mu.sig * (mu.new - mu.old) - w.sig * (sig.new - sig.old) if(!missing(X.mu)) { uwy.gamma <- uwy.gamma - (w.mu.sig * X.mu) %*% as.matrix(beta.new - beta.old) } y.gamma <- as.vector(iw.sig * uwy.gamma) ls.gamma <- lsfit(X.sigma, y.gamma, intercept = F, wt = w.sig) gamma.old <- gamma.new gamma.new <- as.vector(ls.gamma$coef) gamma.pc <- sum(abs((gamma.new[-1] - gamma.old[ -1 ])/(gamma.old[-1] + 1e-009))) X.gamma <- X.sigma %*% as.matrix(gamma.old) eta.sig <- as.vector(sig.old + X.gamma) } } # #--------------Conditon to Ensure Identifiability for beta and gamma------- # if(mu.pc > conv.pc & beta.pc > conv.pc) { delta.mu <- mean(mu.new) - mean.y mu.new <- mu.new - delta.mu beta.new[1] <- beta.new[1] + delta.mu } if(sig.pc > conv.pc & gamma.pc > conv.pc) { delta.sig <- mean(sig.new) - mean.sig sig.new <- sig.new - delta.sig gamma.new[1] <- gamma.new[1] + delta.sig } # #---------------Some Convergence Details-------------------------- # if(cycle > cycle.num) break lam.pc <- abs(lampc.old - lampc.new) mu.pc <- abs(mupc.old - mupc.new) sig.pc <- abs(sigpc.old - sigpc.new) maxx <- max(c(lam.pc, mu.pc, sig.pc)) if(!missing(X.mu)) { maxx <- max(c(maxx, beta.pc)) mu.coef <- ls.print(ls.beta, print.it = F) } if(!missing(X.sigma)) { maxx <- max(c(maxx, gamma.pc)) sigma.coef <- ls.print(ls.gamma, print.it = F) } print(c(round(cycle, 0), round(lam.pc, 4), round(mu.pc, 4), round(sig.pc, 4), round(beta.pc, 4), round(gamma.pc, 4) )) if(maxx < conv.pc) break cycle <- cycle + 1 } # #----------------Checks and Adjustments------------------------------ # if(!missing(X.mu) & !missing(x.predict) & missing(new.X.mu)) { warning(paste("new.X.mu REQUIRED for new predictions")) } if(!missing(X.sigma) & !missing(x.predict) & missing(new.X.sigma)) { warning(paste("new.X.sigma REQUIRED for new predictions")) } if(min(mu.new) < 0) { warning(paste("Converged Mu negative, have set to 1e-5")) mu.new[mu.new < 0] <- 1e-005 } if(min(sig.new) < 0) { warning(paste("Converged Sigma negative, have set to 1e-5")) sig.new[sig.new < 0] <- 1e-005 } # #-----------Plots and New Preditions---------------------------- # if(quantile.plot) { par(mfrow = c(1, 1)) if(dist == "Gaussian") { z <- (((y/eta.mu)^lam.new) - 1)/(lam.new * eta.sig) qqnorm(z, plot = T, ylab = "Z-scores") title(main = "Normal qqplot") abline(0, 1, col = 3) cat("PRESS ENTER to see LMS Smoothes and Quantiles") ans <- readline() } if(dist == "Gamma") { z <- (y/eta.mu)^lam.new z.rand <- rgamma(lx, theta, theta) z.plot <- qqplot(z, z.rand, plot = F) plot(z.plot, xlab = "g.gamma", ylab = "r.gamma") abline(0, 1, col = 3) title(main = "Gamma qqplot") cat("PRESS ENTER to see LMS Smoothes and Quantiles") ans <- readline() } } par(mfrow = c(2, 2)) plot(s.lam, xlab = "lambda", ylab = "smooth(lambda)", type = "b") plot(s.mu, xlab = "mu", ylab = "smooth(mu)", type = "b") plot(s.sig, xlab = "sigma", ylab = "smooth(sigma)", type = "b") plot(x, y, sub = "lms smooth") mmax <- max(y) mmin <- min(y) if(!missing(x.predict)) { n.pred <- nrow(as.matrix(x.predict)) quantile.predict <- matrix(0, n.pred, length(quantiles)) mu.pred <- predict(s.mu, x.predict)$y lam.pred <- predict(s.lam, x.predict)$y sig.pred <- predict(s.sig, x.predict)$y if(!missing(X.mu)) { mu.pred <- mu.pred + as.matrix(cbind(rep(1, n.pred), new.X.mu)) %*% as.matrix(beta.old) } if(!missing(X.sigma)) { sig.pred <- sig.pred + as.matrix(cbind(rep(1, n.pred), new.X.sigma)) %*% as.matrix(gamma.old) } } ave.X.beta <- mean(X.beta) ave.X.gamma <- mean(X.gamma) for(qq in 1:length(quantiles)) { if(dist == "Gaussian") { q.y <- ((mu.new + ave.X.beta) * (1 + (lam.new * ( sig.new + ave.X.gamma) * qnorm(quantiles[qq])))^ (1/lam.new)) } if(dist == "Gamma") { ave.theta <- ((sig.new + ave.X.gamma) * lam.new)^(-2) q.y <- (mu.new + ave.X.beta) * qgamma(quantiles[qq], ave.theta, ave.theta)^(1/lam.new) } q.y[q.y > mmax | q.y < mmin] <- NA lines(x, q.y, col = 2, lty = 1) if(!missing(x.predict)) { if(dist == "Gaussian") { quantile.predict[, qq] <- mu.pred * (1 + ( lam.pred * sig.pred * qnorm(quantiles[qq])))^( 1/lam.pred) } if(dist == "Gamma") { theta.pred <- (sig.pred * lam.pred)^(-2) quantile.predict[, qq] <- mu.pred * qgamma( quantiles[qq], theta.pred, theta.pred)^(1/ lam.pred) } } } q.pred <- NULL if(!missing(x.predict)) { q.pred <- cbind(x.predict, quantile.predict) if(!missing(X.mu)) { q.pred <- cbind(x.predict, new.X.mu, quantile.predict) } } glist <- list(x.pred.info = q.pred, smooth.mu = s.mu, smooth.lam = s.lam, smooth.sig = s.sig, df.lms = df.lms, mu.coef = mu.coef, sigma.coef = sigma.coef, cycle = cycle, dist = dist) glist }