Skip to content

Commit b51770a

Browse files
committed
factorize.FDboost method, tests, and respective plotting/prediction methods
1 parent a700b19 commit b51770a

7 files changed

Lines changed: 1014 additions & 8 deletions

File tree

NAMESPACE

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ S3method("[",hmatrix)
44
S3method(coef,FDboost)
55
S3method(cvrisk,FDboost)
66
S3method(cvrisk,FDboostLSS)
7+
S3method(factorize,FDboost)
78
S3method(fitted,FDboost)
89
S3method(getArgvals,hmatrix)
910
S3method(getArgvalsLab,hmatrix)
@@ -18,6 +19,7 @@ S3method(plot,FDboost)
1819
S3method(plot,bootstrapCI)
1920
S3method(plot,validateFDboost)
2021
S3method(predict,FDboost)
22+
S3method(predict,FDboost_fac)
2123
S3method(print,FDboost)
2224
S3method(print,bootstrapCI)
2325
S3method(print,validateFDboost)
@@ -70,10 +72,13 @@ export(subset_hmatrix)
7072
export(truncateTime)
7173
export(validateFDboost)
7274
export(wide2long)
75+
exportClasses(FDboost_fac)
7376
import(Matrix)
7477
import(mboost)
7578
import(methods)
7679
importFrom(MASS,Null)
80+
importFrom(MASS,ginv)
81+
importFrom(Matrix,rankMatrix)
7782
importFrom(gamboostLSS,GaussianLSS)
7883
importFrom(gamboostLSS,GaussianMu)
7984
importFrom(gamboostLSS,GaussianSigma)
@@ -92,6 +97,7 @@ importFrom(graphics,par)
9297
importFrom(graphics,persp)
9398
importFrom(graphics,plot)
9499
importFrom(graphics,points)
100+
importFrom(methods,setOldClass)
95101
importFrom(mgcv,gam)
96102
importFrom(mgcv,s)
97103
importFrom(parallel,mclapply)

R/factorize.R

Lines changed: 346 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,354 @@
11

2-
#' Factorize tensor product effects
3-
#'
4-
#' @param x input
5-
#' @param ... additional parameters
2+
#' Factorize tensor product model
63
#'
4+
#' Factorize an FDboost tensor product model into the response and covariate parts
5+
#' \deqn{h_j(x, t) = \sum_{k} v_j^{(k)}(t) h_j^{(k)}(x), j = 1, ..., J,}
6+
#' for effect visualization as proposed in Stoecker and Greven (2021).
7+
#'
8+
#' @param x a model object of class FDboost.
9+
#' @param ... other arguments passed to methods.
10+
#'
11+
#' @details The mboost infrastructure is used for handling the orthogonal response
12+
#' directions \eqn{v_j^{(k)}(t)} in one \code{mboost}-object
13+
#' (with \eqn{k} running over iteration indices) and the effects into the respective
14+
#' directions \eqn{h_j^{(k)}(t)} in another, both of subclass \code{FDboost_fac}.
15+
#'
16+
#' @return a list of two mboost models of class \code{FDboost_fac} containing basis functions
17+
#' for response and covariates, respectively, as base-learners.
718
#' @export
19+
#'
820
#' @name factorize
9-
21+
#' @aliases factorise factorize.FDboost
22+
#' @importFrom MASS ginv
23+
#' @importFrom Matrix rankMatrix
24+
#' @seealso [FDboost_fac-class]
25+
#'
26+
#' @references
27+
#' Stoecker, A. and Greven, S. (2021):
28+
#' Functional additive models on manifolds of planar shapes and forms
29+
#' <arXiv:2109.02624>
30+
#'
31+
#'
32+
#' @example tests/factorize_test_irregular.R
33+
#' @example tests/factorize_test_regular.R
34+
#'
1035
factorize <- factorise <- function(x, ...) {
1136
UseMethod("factorize")
1237
}
1338

14-
factorize.default <- function(x, ...) stop("No default factorization.")
39+
#' @param newdata new data the factorization is based on.
40+
#' By default (\code{NULL}), the factorization is carried out on the data used for fitting.
41+
#' @param newweights vector of the length of the data or length one,
42+
#' containing new weights used for factorization.
43+
#' @param blwise logical, should the factorization be carried out base-learner-wise (\code{TRUE}, default)
44+
#' or for the whole model simultaneously.
45+
#'
46+
#' @method factorize FDboost
47+
#' @export
48+
#' @rdname factorize
49+
factorize.FDboost <- function(x, newdata = NULL, newweights = 1, blwise = TRUE, ...) {
50+
51+
FDboost_regular <- !inherits(x, c("FDboostScalar", "FDboostLong"))
52+
53+
nd <- !is.null(newdata)
54+
55+
# built subdata
56+
dat <- list()
57+
dat$cov <- if(!nd) x$data
58+
else newdata[names(x$data)]
59+
dat$cov[[x$yname]] <- rep(1, min(lengths(dat$cov)))
60+
if(is.list(x$yind)) {
61+
dat$resp <- if(!nd) x$yind else newdata[names(x$yind)]
62+
} else {
63+
dat$resp <- if(!nd) setNames(list(x$yind),
64+
attr(x$yind, "nameyind")) else
65+
newdata[attr(x$yind, "nameyind")]
66+
}
67+
dat$resp <- as.data.frame(dat$resp)
68+
dat$resp[[x$yname]] <- 1
69+
70+
# extract formulae
71+
formulae <- list()
72+
formulae$cov <- as.formula(x$formulaFDboost)
73+
formulae$resp <- as.formula(paste(x$yname, x$timeformula))
74+
75+
# set up component models
76+
mod <- list()
77+
# standard mboost model for covariates
78+
mod$cov <- mboost(formulae$cov,
79+
data = dat$cov,
80+
offset = 0,
81+
control = boost_control(mstop = 0, nu = 1))
82+
# artificial FDboost intercept model for response
83+
mod$resp <- mboost(formulae$resp,
84+
data = dat$resp,
85+
offset = if(FDboost_regular)
86+
matrix(x$offset, nrow = x$ydim[1])[1,] else
87+
x$offset,
88+
control = boost_control(mstop = 0, nu = 1))
89+
# copy essential parts from base model to response
90+
which_vars <- c("yname", "ydim", "predictOffset", "withIntercept",
91+
"callEval", "timeformula", "formulaFDboost",
92+
"formulaMboost", "family", "(weights)", "id")
93+
cls <- class(mod$resp)
94+
mod$resp[which_vars] <- unclass(x)[which_vars]
95+
if(FDboost_regular) mod$resp$ydim <- c(1, x$ydim[2])
96+
mod$resp$yind <- range(x$yind)
97+
attr(mod$resp$yind, "nameyind") <- attr(x$yind, "nameyind")
98+
if(FDboost_regular)
99+
class(mod$resp) <- c("FDboostLong", class(x)) else
100+
class(mod$resp) <- class(x)
101+
102+
if(nd) {
103+
if(length(newweights)==1)
104+
mod$resp[["(weights)"]] <- rep(newweights, length(dat$resp[[x$yname]])) else {
105+
stopifnot(length(newweights) == length(dat$resp[[x$yname]]))
106+
mod$resp[["(weights)"]] <- newweights
107+
}
108+
mod$resp$id <- newdata[[attr(mod$resp$id, "nameid")]]
109+
}
110+
111+
# set to FDboost_fac class
112+
for(i in names(mod))
113+
class(mod[[i]]) <- c("FDboost_fac", class(mod[[i]]))
114+
115+
# get coefficients (only of selected learners)
116+
bl_selected <- x$which(usedonly = TRUE)
117+
cf <- coef(x, raw = TRUE, which = bl_selected)
118+
119+
# extract design matrices
120+
121+
X <- list(
122+
cov = extract(mod$cov, what = "design", which = bl_selected),
123+
resp = extract(mod$resp, what = "design", which = 1)
124+
)
125+
index <- list(
126+
cov = extract(mod$cov, what = "index", which = bl_selected),
127+
resp = extract(mod$resp, what = "index", which = 1)
128+
)
129+
130+
wghts <- mod$resp$`(weights)`
131+
132+
if(is.null(wghts)) {
133+
wghts <- list(cov = 1, resp = 1)
134+
} else {
135+
if(FDboost_regular) {
136+
137+
dim(wghts) <- x$ydim
138+
wghts <- list(
139+
cov = rowMeans(wghts),
140+
resp = wghts[1, ]
141+
)
142+
} else {
143+
wghts <- list(cov = as.vector(tapply(wghts, mod$resp$id, mean)))
144+
wghts$resp <- mod$resp[["(weights)"]] / wghts$cov[mod$resp$id]
145+
}
146+
}
147+
148+
wghts <- Map(function(w, idx) {
149+
lapply(idx, function(i) {
150+
if(is.null(i)) w else
151+
c(tapply(w, i, sum))
152+
})
153+
}, wghts, index)
154+
155+
# multiply sqrt(weights) to X to take them into account
156+
X <- Map(function(x,w) {
157+
Map(function(.x, .w) sqrt(.w) * .x, x,w)
158+
}, X, wghts)
159+
# NOTE: X is now sqrt(w) * X !
160+
161+
# do QR decomposition to achieve orthonormal basis representation
162+
QR <- lapply(X, lapply, qr)
163+
## extract Q as orthonormal version of X
164+
# Q <- lapply(QR, lapply, qr.Q) # not necessary
165+
166+
# transform cf accordingly
167+
R <- lapply(QR, lapply, function(x) {
168+
if(inherits(x, "qr"))
169+
qr.R(x)[, order(x$pivot)] else
170+
qrR(x, backPermute = TRUE) })
171+
172+
cf <- Map(matrix, cf, nrow = lapply(X$cov, ncol), byrow = !FDboost_regular)
173+
cf <- Map(function(r1, o) r1 %*% tcrossprod(o, R$resp[[1]]), R$cov, cf)
174+
175+
# perform SVD on cf
176+
if(blwise) {
177+
SVD <- lapply(cf, svd)
178+
Ud <- lapply(SVD, function(x) sweep(x$u, 2, x$d, "*"))
179+
d2 <- list(cov = lapply(SVD, function(x) (x$d)^2))
180+
V <- lapply(SVD, `[[`, "v")
181+
rm(SVD)
182+
} else {
183+
cf <- do.call(rbind, cf)
184+
SVD <- svd(cf)
185+
cfidx <- relist(seq_len(nrow(cf)),
186+
lapply(X$cov, function(x) numeric(ncol(x))))
187+
Ud <- lapply(cfidx, function(idx)
188+
sweep(SVD$u[idx, , drop = FALSE], 2, SVD$d, "*"))
189+
d2 <- list(
190+
cov = lapply(Ud, function(ud) colSums(ud^2)),
191+
resp = SVD$d^2
192+
)
193+
V <- list(model = SVD$v)
194+
rm(SVD)
195+
}
196+
197+
# compute new coefs
198+
d_max <- sqrt(max(unlist(d2)))
199+
if(d_max == 0) d_max <- 1
200+
cf <- list()
201+
my_solve <- function(a, b) {
202+
ret <- try(solve(a, b), silent = TRUE)
203+
if(inherits(ret, "try-error")) {
204+
ret <- ginv(a) %*% b
205+
}
206+
ret
207+
}
208+
cf$cov <- Map(function(R, du) {
209+
as.matrix(my_solve(R, du)) / d_max
210+
}, R$cov, Ud)
211+
cf$resp <- setNames(
212+
lapply(V, my_solve, a = R$resp[[1]] / d_max),
213+
nm = if(blwise)
214+
paste0(names(X$resp)[1], " [", names(X$cov), "]") else
215+
names(X$resp)[1]
216+
)
217+
.no_mat <- which(!sapply(cf$resp, is.matrix))
218+
cf$resp[.no_mat] <-
219+
lapply(cf$resp[.no_mat], as.matrix)
220+
# drop dimension discrepancies
221+
if(length(cf$cov) == length(cf$resp)) {
222+
for(bl in seq_along(cf$cov)) {
223+
nc <- min(NCOL(cf$cov[[bl]]), NCOL(cf$resp[[bl]]))
224+
cf$cov[[bl]] <- cf$cov[[bl]][, 1:nc, drop = FALSE]
225+
cf$resp[[bl]] <- cf$resp[[bl]][, 1:nc, drop = FALSE]
226+
}
227+
}
228+
for(bl in seq_along(cf$cov)) {
229+
d2$cov[[bl]] <- head(d2$cov[[bl]], NCOL(cf$cov[[bl]]))
230+
}
231+
232+
# decomposition complete - now prepare output ---------------
233+
234+
# get model environments
235+
e <- lapply(mod, function(m) environment(m$predict))
236+
237+
# clone and equip baselearners
238+
bl_dims <- lapply(cf, sapply, NCOL)
239+
# vector for cloning bls
240+
bl_mltpl <- list(
241+
cov = rep(seq_along(bl_dims$cov), bl_dims$cov),
242+
resp = rep(1, sum(bl_dims$resp))
243+
)
244+
bl_names <- Map(function(.cf, .bl_dims)
245+
unlist(Map(function(name, len) paste0(name, " [", seq_len(len), "]"),
246+
names(.cf), .bl_dims), use.names = FALSE),
247+
cf, bl_dims)
248+
# order of newly generated bls with respect to their variance
249+
d2l <- lapply(d2, unlist)
250+
bl_order <- Map(function(bmlt, d2) order(bmlt)[order(d2, decreasing = TRUE)],
251+
bl_mltpl, d2l)
252+
253+
for(i in names(mod)) {
254+
this_select <- if(i=="cov") bl_selected else 1
255+
mod[[i]]$baselearner <- e[[i]]$blg <- setNames(
256+
e[[i]]$blg[this_select][bl_mltpl[[i]]], bl_names[[i]])
257+
mod[[i]]$basemodel <- e[[i]]$bl <- setNames(
258+
e[[i]]$bl[this_select][bl_mltpl[[i]]], bl_names[[i]])
259+
e[[i]]$bnames <- bl_names[[i]]
260+
# fill in coefs with bl order decreasing with explained variance
261+
e[[i]]$xselect <- bl_order[[i]]
262+
e[[i]]$ens <- unlist(lapply(cf[[i]], asplit, 2), recursive = FALSE)
263+
e[[i]]$ens <- Map( function(x, cls) {
264+
bm <- list(model = x)
265+
class(bm) <- gsub("bl", "bm", cls)
266+
bm
267+
},
268+
x = e[[i]]$ens[bl_order[[i]]],
269+
cls = lapply(mod[[i]]$basemodel, class)[bl_order[[i]]])
270+
# add risk
271+
this_d2l <- d2l[[i]]
272+
if(is.null(this_d2l))
273+
this_d2l <- d2l[[1]]
274+
e[[i]]$mrisk <- sum(this_d2l) -
275+
cumsum(c(0,sort(this_d2l, decreasing = TRUE)))
276+
# engage full number of components
277+
mod[[i]]$subset(sum(this_d2l>0))
278+
}
279+
280+
# return factor models
281+
mod
282+
}
283+
284+
285+
# define class and methods ----------------------------------------------------
286+
287+
#' @importFrom methods setOldClass
288+
#' @exportClass FDboost_fac
289+
290+
setOldClass("FDboost_fac")
291+
292+
#' `FDboost_fac` S3 class for factorized FDboost model components
293+
#'
294+
#' @description Model factorization with `factorize()` decomposes an
295+
#' `FDboost` model into two objects of class `FDboost_fac` - one for the
296+
#' response and one for the covariate predictor. The first is essentially
297+
#' an `FDboost` object and the second an `mboost` object, however,
298+
#' in a 'read-only' mode and slightly adjusted methods (method defaults).
299+
#'
300+
#' @name FDboost_fac-class
301+
#' @seealso [factorize(), factorize.FDboost()]
302+
NULL
303+
304+
305+
306+
#' Prediction and plotting for factorized FDboost model components
307+
#'
308+
#' @param object,x a model-factor given as a \code{FDboost_fac} object
309+
#' @param newdata optionally, a data frame or list
310+
#' in which to look for variables with which to predict.
311+
#' See \code{\link{predict.mboost}}.
312+
#' @param which a subset of base-learner components to take into
313+
#' account for computing predictions or coefficients. Different
314+
#' components are never aggregated to a joint prediction, but always
315+
#' returned as a matrix or list. Select the k-th component
316+
#' by name in the format \code{bl(x, ...)[k]} or all components of a base-learner
317+
#' by dropping the index or all base-learners of a variable by using
318+
#' the variable name.
319+
#' @param main the plot title. By default, base-learner names are used with
320+
#' component numbers \code{[k]}.
321+
#' @param ... additional arguments passed to underlying methods.
322+
#'
323+
#' @method predict FDboost_fac
324+
#'
325+
#' @export
326+
#' @name predict.FDboost_fac
327+
#' @aliases plot.FDboost_fac
328+
#'
329+
#' @seealso [factorize(), factorize.FDboost()]
330+
#'
331+
predict.FDboost_fac <- function(object, newdata = NULL, which = NULL, ...) {
332+
w <- object$which(which)
333+
if(any(is.na(w)))
334+
stop("Don't know 'which' base-learner is meant.")
335+
names(w) <- names(object$baselearner)[w]
336+
drop(sapply(w,
337+
function(x) predict.mboost(which = x,
338+
object = object,
339+
newdata = newdata,
340+
aggregate = "sum", ...)))
341+
}
342+
343+
#' @method plot FDboost_fac
344+
#' @rdname predict.FDboost_fac
345+
plot.FDboost_fac <- function(x, which = NULL, main = NULL, ...) {
346+
w <- x$which(which, usedonly = TRUE)
347+
if(any(is.na(w)))
348+
stop(paste("Don't know which base-learner is meant by:",
349+
which[which.min(is.na(w))]))
350+
if(is.null(main))
351+
main <- names(x$baselearner)[w]
352+
for(i in seq_along(w))
353+
plot.mboost(x, which = w[i], main = main[i], ...)
354+
}

0 commit comments

Comments
 (0)