`poly.signal.fit` <- function(response, x.index, x.signal, poly.order=1,importance = NULL, m.binomial = NULL, ps.intervals = 8, degree = 3, order = 3, wts = NULL, link = "default", family = "gaussian", r.gamma = NULL, lambda.vector = 0, y.predicted = NULL, x.predicted = NULL, ridge.adj = 0, int = T, coef.plot = T ) { # Function signal.fit: smooths signal (multivariate calibration) beta's using P-splines. # Input: x.index= abcissae of spectra (1:p). # Input: x.signal= (n X p) explanatory variable signal matrix (p >> n possible). # Input: response= response variable. # Input: importance: a vector of importance of B-splines for variable lambda: (ps.int+q) x 1 # Input: poly.order: order of polynomial for additive polynomial signal regression (1=PSR) # Input: family=gaussian, binomial, poisson, Gamma distribution. # Input: m.binomial=vector of binomial trials. Default is 1 vector. # Input: r.gamma=vector of gamma shape parameters. Default is 1 vector. # Input: link= link function (identity, log, sqrt, logit, probit, cloglog, loglog, recipical). # Input: ps.intervals= number of intervals for B-splines. Default=8. # Input: degree= degree of B-splines. Default=3. # Input: order= order of difference penalty. Default=3. # Input: lambda.vector= must have length = poly.order; smoothness regulalizing parameter ( >= 0). Default=0. # Input: x.predicted=a matrix of row (original) signals for prediction and twice stderr limits. # Input: y.predicted= a vector of responses from a cv data set (assoc. with cv x.predicted). # Input: ridge.adj= a small positive constant to help stabilize linear dependencies among B-splines. # Result: a plot of smoothed beta's and twice stderr. # Output: A list: including, AIC= deviance + 2*trace(Hat), dispers.parm, etc. # # Support Functions: pspline.fitter() and pspline.checker() # # References: # Marx, B.D. and Eilers, P.H.C. (1999). Generalized linear regression for sampled signals and # curves: A P-spline approach. Technometrics, 41(1): 1-13. # Eilers, P.H.C. and Marx, B.D. (1996). Flexible smoothing with B-splines and penalties (with comments # and rejoinder). Statistical Science, 11(2): 89-121. # # (c) 1995 Paul Eilers & Brian Marx # y <- response x <- x.index vv <- importance n <- length(y) if(missing(wts)) { wts <- rep(1, n) } parms <- pspline.checker(family, link, degree, order, ps.intervals, lambda=1, ridge.adj, wts) if(length(lambda.vector)!=poly.order){ lambda.vector<-rep(lambda.vector[1],poly.order) warning(paste("Only used first lambda.vector entry: lambda.vec should have length = poly.ord"))} family <- parms$family link <- parms$link q <- parms$degree d <- parms$order ridge.adj <- parms$ridge.adj #lambda <- parms$lambda ndx <- parms$ps.intervals wts <- parms$wts if(missing(m.binomial)) { m.binomial <- rep(1, n) } if(missing(r.gamma)) { r.gamma <- rep(1, n) } xl <- min(x) xr <- max(x) xmax <- xr + 0.01 * (xr - xl) xmin <- xl - 0.01 * (xr - xl) dx <- (xmax - xmin)/ndx knots <- seq(xmin - q * dx, xmax + q * dx, by = dx) b <- spline.des(knots, x, q + 1, 0 * x)$design n.col <- ncol(b) if(missing(importance)) { vv <- rep(1, n.col) } if(d < 0) { d <- min(3, (n.col - 1)) warning(paste("penalty order cannot be negative: have used", d) ) } if((d - n.col + 1) > 0) { d <- n.col - 1 warning(paste("penalty order was too large: have used", d)) } p.ridge <- NULL if(ridge.adj > 0) { nix.ridge <- rep(0, poly.order*n.col) p.ridge <- sqrt(ridge.adj) * diag(rep(1, poly.order*n.col)) } p <- diag(n.col) if(d != 0) { for(j in 1:d) { p <- diff(p) } } pdiff<-p n.row<-nrow(p) p <- sqrt(lambda.vector[1]) * (1/sqrt(vv + 1e-008)) * p nix <- rep(0, poly.order*(n.col - d)) x.signal <- as.matrix(x.signal) b<-as.matrix(b) xb <- x.signal %*% b PPen<-matrix(0,poly.order*n.row,poly.order*n.col) if(poly.order>1){ for(iii in 1:poly.order) {PPen[((iii-1)*n.row+1):(iii*n.row),((iii-1)*n.col+1):(iii*n.col)]<-sqrt(lambda.vector[iii])*(1/sqrt(vv+1e-8))*pdiff } p<-PPen } if(poly.order>1){ for(ii in c(2:poly.order)){ xbnew<-cbind(xb,(x.signal^ii)%*%b) xb<-xbnew } } if(int) { xb <- cbind(rep(1, n), xb) p <- cbind(rep(0, nrow(p)), p) if(ridge.adj > 0) { p.ridge <- cbind(rep(0, nrow(p.ridge)), p.ridge) } } ps.fit <- pspline.fitter(family, link, n.col, m.binomial, r.gamma, y, b = xb, p, p.ridge, nix, nix.ridge, ridge.adj, wts) mu <- ps.fit$mu coef <- ps.fit$coef bin.percent.correct <- NULL if(family == "binomial") { pcount <- 0 p.hat <- mu/m.binomial for(ii in 1:n) { if(p.hat[ii] > 0.5) { count <- y[ii] } if(p.hat[ii] <= 0.5) { count <- m.binomial[ii] - y[ii] } count <- pcount + count pcount <- count } bin.percent.correct <- count/sum(m.binomial) } w <- ps.fit$w e <- 1e-009 h <- hat(ps.fit$f$qr, intercept = F)[1:n] trace <- sum(h) if(family == "binomial") { dev <- 2 * sum((y + e) * log((y + e)/mu) + (m.binomial - y + e) * log((m.binomial - y + e)/(m.binomial - mu))) dispersion.parm <- 1 cv <- NULL } if(family == "poisson") { dev <- 2 * sum(y * log(y + e) - y - y * log(mu) + mu) dispersion.parm <- 1 cv <- NULL } if(family == "Gamma") { dev <- -2 * sum(r.gamma * (log((y + e)/mu) - ((y - mu)/mu))) ave.dev <- dev/n dispersion.parm <- (ave.dev * (6 + ave.dev))/(6 + 2 * ave.dev) cv <- NULL } cv <- press.mu <- press.e <- NULL if(family == "gaussian") { dev <- sum(ps.fit$f$residuals[1:n]^2) dispersion.parm <- dev/(n - trace) press.e <- ps.fit$f$residuals[1:n]/(1 - h) cv <- sqrt(sum((press.e)^2)/(n)) press.mu <- y - press.e } aic <- dev + 2 * trace w.aug <- c(w, (nix + 1)) yint<-NULL if(int){ yint <- ps.fit$coef[1] } #summary.beta <- beta if(coef.plot) { par(mfrow=c(2,2)) for(ii in c(1:poly.order)){ beta.in<-b%*%(as.vector(ps.fit$f$coef)[(int+(ii-1)*n.col+1):(int+(ii*n.col))]) plot(x.index, beta.in, col = 1, type = "l", lty = 1, xlab = "Coefficient Index", ylab = "P-spline Coefficient") } } summary.predicted <- NULL cv.predicted <- eta.predicted <- avediff.pred <- NULL if(!missing(x.predicted)) { x.predicted <- as.matrix(x.predicted) xpb<-x.predicted%*%b if(poly.order>1){ for(ii in c(2:poly.order)){ xpbnew<-cbind(xpb,(x.predicted^ii)%*%b) xpb<-xpbnew } } if(!int) { if(ncol(x.predicted) > 1) { eta.predicted <- xpb %*% as.vector(ps.fit$f$coef) } if(ncol(x.predicted) == 1) { eta.predicted <- t(xpb) %*% as.vector(ps.fit$f$coef) } } if(int) { dim.xp <- nrow(x.predicted) if(ncol(x.predicted) > 1) { one.xpred.b <- cbind(rep(1, dim.xp), ( xpb)) eta.predicted <- one.xpred.b %*% as.vector(ps.fit$f$coef) #+ yint } if(ncol(x.predicted) == 1) { one.xpred.b <- cbind(1, t(xpb)) eta.predicted <- t(one.xpred.b) %*% as.vector(ps.fit$f$coef) #+ yint } } summary.predicted <- eta.predicted if(!missing(y.predicted)) { if(family == "gaussian") { cv.predicted <- sqrt(sum((y.predicted - eta.predicted)^2)/(length(y.predicted))) avediff.pred <- (sum(y.predicted - eta.predicted))/length(y.predicted) } } bin.percent.correct <- NULL if(link == "logit") { summary.predicted <- 1/(1 + exp( - summary.predicted)) pcount <- 0 p.hat <- exp(eta.predicted)/(1 + exp(eta.predicted)) if(!missing(y.predicted)) { for(ii in 1:length(eta.predicted)) { if(p.hat[ii] > 0.5) { count <- y.predicted[ii] } if(p.hat[ii] <= 0.5) { count <- 1 - y.predicted[ii] } count <- pcount + count pcount <- count } bin.percent.correct <- count/length(y.predicted ) } } if(link == "probit") { summary.predicted <- apply(summary.predicted, c(1, 2), pnorm) } if(link == "cloglog") { summary.predicted <- (1 - exp( - exp(summary.predicted) )) } if(link == "loglog") { summary.predicted <- exp( - exp( - summary.predicted)) } if(link == "sqrt") { summary.predicted <- summary.predicted^2 } if(link == "log") { summary.predicted <- exp(summary.predicted) } if(link == "recipical") { summary.predd <- 1/(summary.predicted) summary.predd <- NULL } } llist <- list() llist$b <- b llist$coef <- coef llist$y.intercept <- yint llist$int <- int llist$press.mu <- press.mu llist$bin.percent.correct <- bin.percent.correct llist$family <- family llist$link <- link llist$ps.intervals <- ndx llist$order <- d llist$degree <- q llist$lambda <- lambda.vector llist$aic <- aic llist$deviance <- dev llist$eff.df <- trace - 1 llist$df.resid <- n - trace + 1 llist$bin.percent.correct <- bin.percent.correct llist$dispersion.param <- dispersion.parm llist$summary.predicted <- summary.predicted llist$eta.predicted <- eta.predicted llist$cv.deleteone <- cv llist$mu <- mu llist$avediff.pred <- avediff.pred llist$cv.predicted <- cv.predicted llist } `pspline.checker` <- function(family, link, degree, order, ps.intervals, lambda, ridge.adj, wts) { if(link == "default" && family == "gaussian") { link <- "identity" } if(link == "default" && family == "poisson") { link <- "log" } if(link == "default" && family == "binomial") { link <- "logit" } if(link == "default" && family == "Gamma") { link <- "log" } if(family != "binomial" && family != "gaussian" && family != "poisson" && family != "Gamma") { warning(paste("Improper FAMILY option. Choose: gaussian, poisson, binomial or Gamma" )) } if((family == "binomial") && (link != "logit" && link != "probit" && link != "cloglog" && link != "loglog")) { warning(paste("Improper LINK option with family=binomial. Choose: logit, probit, loglog, cloglog" )) } if((family == "Gamma") && (link != "log" && link != "recipical" && link != "identity")) { warning(paste("Improper LINK option with family=Gamma. Choose: recipical, log, identity" )) } if((family == "poisson") && (link != "log" && link != "sqrt" && link != "identity")) { warning(paste("Improper LINK option with family=poisson. Choose: log, sqrt, identity" )) } if((family == "gaussian") && (link != "identity")) { warning(paste("Improper LINK option with family=gaussian. Choose: identity" )) } if(degree < 0) { degree <- 1 warning(paste("degree must be non-neg integer: have used 1")) } if(order < 0) { order <- 0 warning(paste("order must be non-neg integer: have used 0")) } if(ps.intervals < 2) { ps.intervals <- 2 warning(paste("ps.intervals must be positive integer, > 1: have used 2" )) } if(lambda < 0) { lambda <- 0 warning(paste("lambda cannot be negative: have used 0")) } if(ridge.adj < 0) { ridge.adj <- 0 warning(paste("ridge.adj cannot be negative: have used 0")) } if(min(wts) < 0) { warning(paste("At least one weight entry is negative")) } llist <- list(family = family, link = link, degree = degree, order = order, ps.intervals = ps.intervals, lambda = lambda, ridge.adj = ridge.adj, wts = wts) return(llist) } `pspline.fitter` <- function(family, link, n.col, m.binomial, r.gamma, y, b, p, p.ridge, nix, nix.ridge, ridge.adj, wts, ...) { coef.est <- rep(1, ncol(b)) if(family == "binomial") { mu <- (y + 0.5 * m.binomial)/2 } if(family == "Gamma" || family == "poisson") { mu <- log(y + 3) } if(family == "gaussian") { mu <- rep(mean(y), length(y)) } it <- 0 repeat { if(it == 0) { if(link == "identity") { eta <- mu } if(link == "log") { eta <- log(mu) } if(link == "sqrt") { eta <- sqrt(mu) } if(link == "logit") { eta <- log(mu/(m.binomial - mu)) } if(link == "recipical") { eta <- 1/mu } if(link == "probit") { eta <- qnorm(mu/m.binomial) } if(link == "cloglog") { eta <- log( - log(1 - mu/m.binomial)) } if(link == "loglog") { eta <- - log( - log(mu/m.binomial)) } } it<-it+1 if(it > 25) break if(link == "identity") { mu <- eta h.prime <- 1 } if(link == "log") { mu <- exp(eta) h.prime <- mu } if(link == "sqrt") { mu <- eta^2 h.prime <- 2 * eta } if(link == "logit") { mu <- m.binomial/(1 + exp( - eta)) h.prime <- mu * (1 - mu/m.binomial) } if(link == "recipical") { mu <- 1/eta h.prime <- - (mu^2) } if(link == "probit") { mu <- m.binomial * pnorm(eta) h.prime <- m.binomial * dnorm(eta) } if(link == "cloglog") { mu <- m.binomial * (1 - exp( - exp(eta))) h.prime <- (m.binomial) * exp(eta) * exp( - exp(eta)) } if(link == "loglog") { mu <- m.binomial * exp( - exp( - eta)) h.prime <- m.binomial * exp( - eta) * exp( - exp( - eta )) } if(family == "gaussian") { w <- rep(1, length(y)) } if(family == "poisson") { w <- h.prime^2/mu } if(family == "binomial") { w <- h.prime^2/(mu * (1 - mu/m.binomial)) } if(family == "Gamma") { w <- (r.gamma * h.prime^2)/mu^2 } u <- (y - mu)/h.prime + eta if(ridge.adj > 0) { f <- lsfit(rbind(b, p, p.ridge), c(u, nix, nix.ridge), wt = c(wts, nix + 1, nix.ridge + 1) * c(w, (nix + 1), (nix.ridge + 1)), intercept = F) } if(ridge.adj == 0) { f <- lsfit(rbind(b, p), c(u, nix), wt = c(wts, nix + 1) * c(w, (nix + 1)), intercept = F) } coef.old <- coef.est coef.est <- as.vector(f$coef) d.coef <- max(abs((coef.est - coef.old)/coef.old)) # print(c(it, d.coef)) if(d.coef < 1e-008) break eta <- b %*% coef.est } if(it > 24) { warning(paste("parameter estimates did NOT converge in 25 iterations" )) } llist <- list(coef = coef.est, mu = mu, f = f, w = w * wts) return(llist) } `signal.fit` <- function(response, x.index, x.signal, importance = NULL, m.binomial = NULL, ps.intervals = 8, degree = 3, order = 3, wts = NULL, link = "default", family = "gaussian", r.gamma = NULL, lambda = 0, y.predicted = NULL, x.predicted = NULL, ridge.adj = 0, int = T, coef.plot = T, se.bands = T ) { # Function signal.fit: smooths signal (multivariate calibration) beta's using P-splines. # Input: x.index= abcissae of spectra (1:p). # Input: x.signal= (n X p) explanatory variable signal matrix (p >> n possible). # Input: response= response variable. # Input: importance: a vector of importance of B-splines for variable lambda: (ps.int+q) # Input: family=gaussian, binomial, poisson, Gamma distribution. # Input: m.binomial=vector of binomial trials. Default is 1 vector. # Input: r.gamma=vector of gamma shape parameters. Default is 1 vector. # Input: link= link function (identity, log, sqrt, logit, probit, cloglog, loglog, recipical). # Input: ps.intervals= number of intervals for B-splines. Default=8. # Input: degree= degree of B-splines. Default=3. # Input: order= order of difference penalty. Default=3. # Input: lambda= smoothness regulalizing parameter ( >= 0). Default=0. # Input: x.predicted=a matrix of row (original) signals for prediction and twice stderr limits. # Input: y.predicted= a vector of responses from a cv data set (assoc. with cv x.predicted). # Input: ridge.adj= a small positive constant to help stabilize linear dependencies among B-splines. # Result: a plot of smoothed beta's and twice stderr. # Output: A list: including, AIC= deviance + 2*trace(Hat), dispers.parm, etc. # # Support Functions: pspline.fitter() and pspline.checker() # # References: # Marx, B.D. and Eilers, P.H.C. (1999). Generalized linear regression for sampled signals and # curves: A P-spline approach. Technometrics, 41(1): 1-13. # Eilers, P.H.C. and Marx, B.D. (1996). Flexible smoothing with B-splines and penalties (with comments # and rejoinder). Statistical Science, 11(2): 89-121. # # (c) 1995 Paul Eilers & Brian Marx # y <- response x <- x.index vv <- importance n <- length(y) if(missing(wts)) { wts <- rep(1, n) } parms <- pspline.checker(family, link, degree, order, ps.intervals, lambda, ridge.adj, wts) family <- parms$family link <- parms$link q <- parms$degree d <- parms$order ridge.adj <- parms$ridge.adj lambda <- parms$lambda ndx <- parms$ps.intervals wts <- parms$wts if(missing(m.binomial)) { m.binomial <- rep(1, n) } if(missing(r.gamma)) { r.gamma <- rep(1, n) } xl <- min(x) xr <- max(x) xmax <- xr + 0.01 * (xr - xl) xmin <- xl - 0.01 * (xr - xl) dx <- (xmax - xmin)/ndx knots <- seq(xmin - q * dx, xmax + q * dx, by = dx) b <- spline.des(knots, x, q + 1, 0 * x)$design n.col <- ncol(b) if(missing(importance)) { vv <- rep(1, n.col) } if(d < 0) { d <- min(3, (n.col - 1)) warning(paste("penalty order cannot be negative: have used", d) ) } if((d - n.col + 1) > 0) { d <- n.col - 1 warning(paste("penalty order was too large: have used", d)) } p.ridge <- NULL if(ridge.adj > 0) { nix.ridge <- rep(0, n.col) p.ridge <- sqrt(ridge.adj) * diag(rep(1, n.col)) } p <- diag(n.col) if(d != 0) { for(j in 1:d) { p <- diff(p) } } p <- sqrt(lambda) * (1/sqrt(vv + 1e-008)) * p nix <- rep(0, n.col - d) x.signal <- as.matrix(x.signal) xb <- x.signal %*% as.matrix(b) if(int) { xb <- cbind(rep(1, n), xb) p <- cbind(rep(0, nrow(p)), p) if(ridge.adj > 0) { p.ridge <- cbind(rep(0, nrow(p.ridge)), p.ridge) } } ps.fit <- pspline.fitter(family, link, n.col, m.binomial, r.gamma, y, b = xb, p, p.ridge, nix, nix.ridge, ridge.adj, wts) mu <- ps.fit$mu coef <- ps.fit$coef bin.percent.correct <- NULL if(family == "binomial") { pcount <- 0 p.hat <- mu/m.binomial for(ii in 1:n) { if(p.hat[ii] > 0.5) { count <- y[ii] } if(p.hat[ii] <= 0.5) { count <- m.binomial[ii] - y[ii] } count <- pcount + count pcount <- count } bin.percent.correct <- count/sum(m.binomial) } w <- ps.fit$w e <- 1e-009 h <- hat(ps.fit$f$qr, intercept = F)[1:n] trace <- sum(h) if(family == "binomial") { dev <- 2 * sum((y + e) * log((y + e)/mu) + (m.binomial - y + e) * log((m.binomial - y + e)/(m.binomial - mu))) dispersion.parm <- 1 cv <- NULL } if(family == "poisson") { dev <- 2 * sum(y * log(y + e) - y - y * log(mu) + mu) dispersion.parm <- 1 cv <- NULL } if(family == "Gamma") { dev <- -2 * sum(r.gamma * (log((y + e)/mu) - ((y - mu)/mu))) ave.dev <- dev/n dispersion.parm <- (ave.dev * (6 + ave.dev))/(6 + 2 * ave.dev) cv <- NULL } cv <- press.mu <- press.e <- NULL if(family == "gaussian") { dev <- sum(ps.fit$f$residuals[1:n]^2) dispersion.parm <- dev/(n - trace) press.e <- ps.fit$f$residuals[1:n]/(1 - h) cv <- sqrt(sum((press.e)^2)/(n)) press.mu <- y - press.e } aic <- dev + 2 * trace w.aug <- c(w, (nix + 1)) if(int) { beta <- b %*% (as.vector(ps.fit$f$coef)[2:(n.col + 1)]) yint <- ps.fit$coef[1] } if(!int) { yint <- NULL beta <- b %*% as.vector(ps.fit$f$coef) } half.meat <- sqrt(c(w)) * xb meat <- t(half.meat) %*% half.meat if(ridge.adj > 0) { bread <- solve(meat + t(p) %*% p + t(p.ridge) %*% p.ridge) } if(ridge.adj == 0) { bread <- solve(meat + t(p) %*% p) } half.sw <- half.meat %*% bread[, (1 + int):(n.col + int)] var.c <- t(half.sw) %*% half.sw half.lunch <- half.sw %*% t(b) ones <- 0 * y + 1 var.beta <- ones %*% (half.lunch * half.lunch) stdev.beta <- sqrt(dispersion.parm) * t(sqrt(var.beta)) pivot <- 2 * stdev.beta upper <- beta + pivot lower <- beta - pivot summary.beta <- cbind(lower, beta, upper) if(coef.plot) { if(se.bands) { matplot(x.index, summary.beta, type = "l", lty = c(3, 1, 3), col = rep(1, 3), xlab = "Coefficient Index", ylab = "P-spline Coefficient") } if(!se.bands) { plot(x.index, summary.beta[, 2], col = 1, type = "l", lty = 1, xlab = "Coefficient Index", ylab = "P-spline Coefficient") } } summary.predicted <- NULL cv.predicted <- eta.predicted <- avediff.pred <- NULL if(!missing(x.predicted)) { x.predicted <- as.matrix(x.predicted) if(!int) { if(ncol(x.predicted) > 1) { eta.predicted <- x.predicted %*% beta var.pred <- x.predicted %*% b %*% var.c %*% t( x.predicted %*% b) } if(ncol(x.predicted) == 1) { eta.predicted <- t(x.predicted) %*% beta var.pred <- t(x.predicted) %*% b %*% var.c %*% t(b) %*% x.predicted } } if(int) { var.c <- t(bread) %*% t(half.meat) %*% half.meat %*% bread dim.xp <- nrow(x.predicted) if(ncol(x.predicted) > 1) { one.xpred.b <- cbind(rep(1, dim.xp), ( x.predicted %*% b)) eta.predicted <- x.predicted %*% beta + yint var.pred <- one.xpred.b %*% var.c %*% t( one.xpred.b) } if(ncol(x.predicted) == 1) { one.xpred.b <- cbind(1, t(x.predicted) %*% b) eta.predicted <- t(x.predicted) %*% beta + yint var.pred <- (one.xpred.b) %*% var.c %*% t( one.xpred.b) } } stdev.pred <- as.vector(sqrt(diag(var.pred))) stdev.pred <- sqrt(dispersion.parm) * stdev.pred pivot <- as.vector(2 * stdev.pred) upper <- eta.predicted + pivot lower <- eta.predicted - pivot summary.predicted <- cbind(lower, eta.predicted, upper) if(!missing(y.predicted)) { if(family == "gaussian") { cv.predicted <- sqrt(sum((y.predicted - eta.predicted)^2)/(length(y.predicted))) avediff.pred <- (sum(y.predicted - eta.predicted))/length(y.predicted) } } bin.percent.correct <- NULL if(link == "logit") { summary.predicted <- 1/(1 + exp( - summary.predicted)) pcount <- 0 p.hat <- exp(eta.predicted)/(1 + exp(eta.predicted)) if(!missing(y.predicted)) { for(ii in 1:length(eta.predicted)) { if(p.hat[ii] > 0.5) { count <- y.predicted[ii] } if(p.hat[ii] <= 0.5) { count <- 1 - y.predicted[ii] } count <- pcount + count pcount <- count } bin.percent.correct <- count/length(y.predicted ) } } if(link == "probit") { summary.predicted <- apply(summary.predicted, c(1, 2), pnorm) } if(link == "cloglog") { summary.predicted <- (1 - exp( - exp(summary.predicted) )) } if(link == "loglog") { summary.predicted <- exp( - exp( - summary.predicted)) } if(link == "sqrt") { summary.predicted <- summary.predicted^2 } if(link == "log") { summary.predicted <- exp(summary.predicted) } if(link == "recipical") { summary.predd <- 1/(summary.predicted) summary.predicted[, 1] <- summary.predd[, 3] summary.predicted[, 3] <- summary.predd[, 1] summary.predd <- NULL } summary.predicted <- as.matrix(summary.predicted) dimnames(summary.predicted) <- list(NULL, c("-2std_Lower", "Predicted", "+2std_Upper")) } llist <- list() llist$b <- b llist$coef <- coef llist$y.intercept <- yint llist$int <- int llist$press.mu <- press.mu llist$bin.percent.correct <- bin.percent.correct llist$family <- family llist$link <- link llist$ps.intervals <- ndx llist$order <- d llist$degree <- q llist$lambda <- lambda llist$aic <- aic llist$deviance <- dev llist$eff.df <- trace - 1 llist$df.resid <- n - trace + 1 llist$bin.percent.correct <- bin.percent.correct llist$dispersion.param <- dispersion.parm llist$summary.predicted <- summary.predicted llist$eta.predicted <- eta.predicted llist$cv.predicted <- cv.predicted llist$cv <- cv llist$mu <- mu llist$avediff.pred <- avediff.pred llist$beta<-beta llist } `ndiff` <- function(n, d = 1) { # Construct the matrix for n-th differences if (d == 1) {D <- diff(diag(n))} else {D <- diff(ndiff(n, d - 1))} D} `tpower` <- function(x, t, p) # Truncated p-th power function (x - t) ^ p * (x > t) `bbase` <- function(x, xl, xr, ndx, deg){ # Construct B-spline basis dx <- (xr - xl) / ndx knots <- seq(xl - deg * dx, xr + deg * dx, by = dx) P <- outer(x, knots, tpower, deg) n <- dim(P)[2] D <- ndiff(n, deg + 1) / (gamma(deg + 1) * dx ^ deg) B <- (-1) ^ (deg + 1) * P %*% t(D) B } `gauss` <- function(x, mu, sig) { # Gaussian-shaped function u <- (x - mu) / sig y <- exp(- u * u / 2) y } `gbase` <- function(x, mus) { # Construct Gaussian basis sig <- (mus[2] - mus[1]) / 2 G <- outer(x, mus, gauss, sig) G } `pbase` <- function(x, n) { # Construct polynomial basis u <- (x - min(x)) / (max(x) - min(x)) u <- 2 * (u - 0.5); P <- outer(u, seq(0, n, by = 1), "^") P } `pnormal.der` <- function(x, y, nseg, bdeg, pord, lambda, plot = F, se = F) #, xpred) { # Function pnormal: smooths scatterplot data with P-splines. # Input: # x = abcissae of data # y = response # nseg = number of intervals for B-splines # bdeg = degree of B-splines # pord = order of difference penalty # lambda = smoothness parameter # plot = plot parameter (T of F) # se = plot parameter (T or F) # Output: an object of class "pspfit" with the following fields # bdeg = degree of B-splines # cv = cross-validation sum of squares # ed.resid = effective degrees of freedom residuals # effdim = effective dimension P-spline model # family = "gaussian" (like glm object) # lambda = smoothing parameter # link = "identity" (like glm object) # muhat = expected values for y (at x) # mse = standard deviation of errors # nseg = number of B-spline segments on domain from xmin to xmax) # pord = order of difference penalty # x = x as input # xgrid = x grid used for plotting curve # xmin = left boundary of B-spline domain # xmax = right boundary of B-spline domain # y = y as input # ygrid = computed curve on x grid # # Side effect: a plot of (x,y) and the estimated curve (if plot = T) with twice se bands (if se=T). # # Paul Eilers and Brian Marx, 2003 (c) # # Compute B-spline basis m <- length(x) xl <- min(x) xr <- max(x) xmax <- xr + 0.5 * (xr - xl) xmin <- xl - 0.5 * (xr - xl) B <- bbase(x, xmin, xmax, nseg, bdeg) # Construct penalty stuff n <- dim(B)[2] P <- sqrt(lambda) * ndiff(n, pord) nix <- rep(0, n - pord) # Fit if(lambda == 0) { f <- lsfit(B, y, intercept = F) } if(lambda > 0) { f <- lsfit(rbind(B, P), c(y, nix), intercept = F) } h <- hat(f$qr)[1:m] beta <- as.vector(f$coef) mu <- B %*% beta # Cross-validation and dispersion r <- (y - mu)/(1 - h) cv <- sqrt((sum(r^2))/m) s <- sqrt(sum((y - mu)^2)/(m - sum(h))) # Compute curve on grid u <- seq(xl-.1, xr+.1, length = 100) Bu <- bbase(u, xmin-.1, xmax+.1, nseg, bdeg) zu <- Bu %*% as.vector(f$coef) #Bu2<-bbase(xpred, xmin, xmax, nseg, bdeg) #fit.xpred<-Bu2 %*% as.vector(f$coef) #Derivative B.der <- bbase(x, xmin, xmax, nseg, bdeg-1) alpha.der <- diff(f$coef) der <- B.der%*%alpha.der #B.der2 <- bbase(xpred, xmin, xmax, nseg, bdeg-1) #der.xpred<-B.der2 %*% alpha.der # Compute derivative on grid #u <- seq(xl, xr, length = 100) #Bu.der <- bbase(u, xmin, xmax, nseg, bdeg-1) #zu.der <- Bu.der %*% as.vector(diff(f$coef)) # Plot data and fit if(plot) { plot(x, y) lines(u, zu, col = 2) lines(u, zu.der, col = 4) if(se) { varf <- diag(Bu %*% solve(t(B) %*% B + t(P) %*% P) %*% t(Bu)) sef <- s * sqrt(varf) upperu <- zu + 2 * sef loweru <- zu - 2 * sef lines(u, upperu, lty = 3, col = 3) lines(u, loweru, lty = 3, col = 3) } } # Return list pp <- list(x = x, y = y, muhat = mu, nseg = nseg, xmin = xmin, bdeg = bdeg, pord = pord, lambda = lambda, xgrid = u, ygrid = zu, cv = cv, effdim = sum(h ), ed.resid = m - sum(h), family = "gaussian", link = "identity", sqrt.mse = s, pcoef=beta, xmin=xmin, xmax=xmax) #, fit.xpred=fit.xpred, der.xpred=der.xpred) class(pp) <- "pspfit" pp } `predict.pnormal` <- function(obj,x,der){ #der can only be either 0 or 1 if (der==0){ bu<-bbase(x,obj$xmin,obj$xmax,obj$nseg,obj$bdeg) out<-as.vector(bu%*%obj$pcoef) } if (der==1){ B.der<-bbase(x, obj$xmin, obj$xmax, obj$nseg, obj$bdeg-1) alpha.der<-diff(obj$pcoef) out<-as.vector(B.der%*%alpha.der)/((obj$xmax-obj$xmin)/obj$nseg) } out }