11
2- # ' Factorize tensor product effects
3- # '
4- # ' @param x input
5- # ' @param ... additional parameters
2+ # ' Factorize tensor product model
63# '
4+ # ' Factorize an FDboost tensor product model into the response and covariate parts
5+ # ' \deqn{h_j(x, t) = \sum_{k} v_j^{(k)}(t) h_j^{(k)}(x), j = 1, ..., J,}
6+ # ' for effect visualization as proposed in Stoecker and Greven (2021).
7+ # '
8+ # ' @param x a model object of class FDboost.
9+ # ' @param ... other arguments passed to methods.
10+ # '
11+ # ' @details The mboost infrastructure is used for handling the orthogonal response
12+ # ' directions \eqn{v_j^{(k)}(t)} in one \code{mboost}-object
13+ # ' (with \eqn{k} running over iteration indices) and the effects into the respective
14+ # ' directions \eqn{h_j^{(k)}(t)} in another, both of subclass \code{FDboost_fac}.
15+ # '
16+ # ' @return a list of two mboost models of class \code{FDboost_fac} containing basis functions
17+ # ' for response and covariates, respectively, as base-learners.
718# ' @export
19+ # '
820# ' @name factorize
9-
21+ # ' @aliases factorise factorize.FDboost
22+ # ' @importFrom MASS ginv
23+ # ' @importFrom Matrix rankMatrix
24+ # ' @seealso [FDboost_fac-class]
25+ # '
26+ # ' @references
27+ # ' Stoecker, A. and Greven, S. (2021):
28+ # ' Functional additive models on manifolds of planar shapes and forms
29+ # ' <arXiv:2109.02624>
30+ # '
31+ # '
32+ # ' @example tests/factorize_test_irregular.R
33+ # ' @example tests/factorize_test_regular.R
34+ # '
1035factorize <- factorise <- function (x , ... ) {
1136 UseMethod(" factorize" )
1237}
1338
14- factorize.default <- function (x , ... ) stop(" No default factorization." )
39+ # ' @param newdata new data the factorization is based on.
40+ # ' By default (\code{NULL}), the factorization is carried out on the data used for fitting.
41+ # ' @param newweights vector of the length of the data or length one,
42+ # ' containing new weights used for factorization.
43+ # ' @param blwise logical, should the factorization be carried out base-learner-wise (\code{TRUE}, default)
44+ # ' or for the whole model simultaneously.
45+ # '
46+ # ' @method factorize FDboost
47+ # ' @export
48+ # ' @rdname factorize
49+ factorize.FDboost <- function (x , newdata = NULL , newweights = 1 , blwise = TRUE , ... ) {
50+
51+ FDboost_regular <- ! inherits(x , c(" FDboostScalar" , " FDboostLong" ))
52+
53+ nd <- ! is.null(newdata )
54+
55+ # built subdata
56+ dat <- list ()
57+ dat $ cov <- if (! nd ) x $ data
58+ else newdata [names(x $ data )]
59+ dat $ cov [[x $ yname ]] <- rep(1 , min(lengths(dat $ cov )))
60+ if (is.list(x $ yind )) {
61+ dat $ resp <- if (! nd ) x $ yind else newdata [names(x $ yind )]
62+ } else {
63+ dat $ resp <- if (! nd ) setNames(list (x $ yind ),
64+ attr(x $ yind , " nameyind" )) else
65+ newdata [attr(x $ yind , " nameyind" )]
66+ }
67+ dat $ resp <- as.data.frame(dat $ resp )
68+ dat $ resp [[x $ yname ]] <- 1
69+
70+ # extract formulae
71+ formulae <- list ()
72+ formulae $ cov <- as.formula(x $ formulaFDboost )
73+ formulae $ resp <- as.formula(paste(x $ yname , x $ timeformula ))
74+
75+ # set up component models
76+ mod <- list ()
77+ # standard mboost model for covariates
78+ mod $ cov <- mboost(formulae $ cov ,
79+ data = dat $ cov ,
80+ offset = 0 ,
81+ control = boost_control(mstop = 0 , nu = 1 ))
82+ # artificial FDboost intercept model for response
83+ mod $ resp <- mboost(formulae $ resp ,
84+ data = dat $ resp ,
85+ offset = if (FDboost_regular )
86+ matrix (x $ offset , nrow = x $ ydim [1 ])[1 ,] else
87+ x $ offset ,
88+ control = boost_control(mstop = 0 , nu = 1 ))
89+ # copy essential parts from base model to response
90+ which_vars <- c(" yname" , " ydim" , " predictOffset" , " withIntercept" ,
91+ " callEval" , " timeformula" , " formulaFDboost" ,
92+ " formulaMboost" , " family" , " (weights)" , " id" )
93+ cls <- class(mod $ resp )
94+ mod $ resp [which_vars ] <- unclass(x )[which_vars ]
95+ if (FDboost_regular ) mod $ resp $ ydim <- c(1 , x $ ydim [2 ])
96+ mod $ resp $ yind <- range(x $ yind )
97+ attr(mod $ resp $ yind , " nameyind" ) <- attr(x $ yind , " nameyind" )
98+ if (FDboost_regular )
99+ class(mod $ resp ) <- c(" FDboostLong" , class(x )) else
100+ class(mod $ resp ) <- class(x )
101+
102+ if (nd ) {
103+ if (length(newweights )== 1 )
104+ mod $ resp [[" (weights)" ]] <- rep(newweights , length(dat $ resp [[x $ yname ]])) else {
105+ stopifnot(length(newweights ) == length(dat $ resp [[x $ yname ]]))
106+ mod $ resp [[" (weights)" ]] <- newweights
107+ }
108+ mod $ resp $ id <- newdata [[attr(mod $ resp $ id , " nameid" )]]
109+ }
110+
111+ # set to FDboost_fac class
112+ for (i in names(mod ))
113+ class(mod [[i ]]) <- c(" FDboost_fac" , class(mod [[i ]]))
114+
115+ # get coefficients (only of selected learners)
116+ bl_selected <- x $ which(usedonly = TRUE )
117+ cf <- coef(x , raw = TRUE , which = bl_selected )
118+
119+ # extract design matrices
120+
121+ X <- list (
122+ cov = extract(mod $ cov , what = " design" , which = bl_selected ),
123+ resp = extract(mod $ resp , what = " design" , which = 1 )
124+ )
125+ index <- list (
126+ cov = extract(mod $ cov , what = " index" , which = bl_selected ),
127+ resp = extract(mod $ resp , what = " index" , which = 1 )
128+ )
129+
130+ wghts <- mod $ resp $ `(weights)`
131+
132+ if (is.null(wghts )) {
133+ wghts <- list (cov = 1 , resp = 1 )
134+ } else {
135+ if (FDboost_regular ) {
136+
137+ dim(wghts ) <- x $ ydim
138+ wghts <- list (
139+ cov = rowMeans(wghts ),
140+ resp = wghts [1 , ]
141+ )
142+ } else {
143+ wghts <- list (cov = as.vector(tapply(wghts , mod $ resp $ id , mean )))
144+ wghts $ resp <- mod $ resp [[" (weights)" ]] / wghts $ cov [mod $ resp $ id ]
145+ }
146+ }
147+
148+ wghts <- Map(function (w , idx ) {
149+ lapply(idx , function (i ) {
150+ if (is.null(i )) w else
151+ c(tapply(w , i , sum ))
152+ })
153+ }, wghts , index )
154+
155+ # multiply sqrt(weights) to X to take them into account
156+ X <- Map(function (x ,w ) {
157+ Map(function (.x , .w ) sqrt(.w ) * .x , x ,w )
158+ }, X , wghts )
159+ # NOTE: X is now sqrt(w) * X !
160+
161+ # do QR decomposition to achieve orthonormal basis representation
162+ QR <- lapply(X , lapply , qr )
163+ # # extract Q as orthonormal version of X
164+ # Q <- lapply(QR, lapply, qr.Q) # not necessary
165+
166+ # transform cf accordingly
167+ R <- lapply(QR , lapply , function (x ) {
168+ if (inherits(x , " qr" ))
169+ qr.R(x )[, order(x $ pivot )] else
170+ qrR(x , backPermute = TRUE ) })
171+
172+ cf <- Map(matrix , cf , nrow = lapply(X $ cov , ncol ), byrow = ! FDboost_regular )
173+ cf <- Map(function (r1 , o ) r1 %*% tcrossprod(o , R $ resp [[1 ]]), R $ cov , cf )
174+
175+ # perform SVD on cf
176+ if (blwise ) {
177+ SVD <- lapply(cf , svd )
178+ Ud <- lapply(SVD , function (x ) sweep(x $ u , 2 , x $ d , " *" ))
179+ d2 <- list (cov = lapply(SVD , function (x ) (x $ d )^ 2 ))
180+ V <- lapply(SVD , `[[` , " v" )
181+ rm(SVD )
182+ } else {
183+ cf <- do.call(rbind , cf )
184+ SVD <- svd(cf )
185+ cfidx <- relist(seq_len(nrow(cf )),
186+ lapply(X $ cov , function (x ) numeric (ncol(x ))))
187+ Ud <- lapply(cfidx , function (idx )
188+ sweep(SVD $ u [idx , , drop = FALSE ], 2 , SVD $ d , " *" ))
189+ d2 <- list (
190+ cov = lapply(Ud , function (ud ) colSums(ud ^ 2 )),
191+ resp = SVD $ d ^ 2
192+ )
193+ V <- list (model = SVD $ v )
194+ rm(SVD )
195+ }
196+
197+ # compute new coefs
198+ d_max <- sqrt(max(unlist(d2 )))
199+ if (d_max == 0 ) d_max <- 1
200+ cf <- list ()
201+ my_solve <- function (a , b ) {
202+ ret <- try(solve(a , b ), silent = TRUE )
203+ if (inherits(ret , " try-error" )) {
204+ ret <- ginv(a ) %*% b
205+ }
206+ ret
207+ }
208+ cf $ cov <- Map(function (R , du ) {
209+ as.matrix(my_solve(R , du )) / d_max
210+ }, R $ cov , Ud )
211+ cf $ resp <- setNames(
212+ lapply(V , my_solve , a = R $ resp [[1 ]] / d_max ),
213+ nm = if (blwise )
214+ paste0(names(X $ resp )[1 ], " [" , names(X $ cov ), " ]" ) else
215+ names(X $ resp )[1 ]
216+ )
217+ .no_mat <- which(! sapply(cf $ resp , is.matrix ))
218+ cf $ resp [.no_mat ] <-
219+ lapply(cf $ resp [.no_mat ], as.matrix )
220+ # drop dimension discrepancies
221+ if (length(cf $ cov ) == length(cf $ resp )) {
222+ for (bl in seq_along(cf $ cov )) {
223+ nc <- min(NCOL(cf $ cov [[bl ]]), NCOL(cf $ resp [[bl ]]))
224+ cf $ cov [[bl ]] <- cf $ cov [[bl ]][, 1 : nc , drop = FALSE ]
225+ cf $ resp [[bl ]] <- cf $ resp [[bl ]][, 1 : nc , drop = FALSE ]
226+ }
227+ }
228+ for (bl in seq_along(cf $ cov )) {
229+ d2 $ cov [[bl ]] <- head(d2 $ cov [[bl ]], NCOL(cf $ cov [[bl ]]))
230+ }
231+
232+ # decomposition complete - now prepare output ---------------
233+
234+ # get model environments
235+ e <- lapply(mod , function (m ) environment(m $ predict ))
236+
237+ # clone and equip baselearners
238+ bl_dims <- lapply(cf , sapply , NCOL )
239+ # vector for cloning bls
240+ bl_mltpl <- list (
241+ cov = rep(seq_along(bl_dims $ cov ), bl_dims $ cov ),
242+ resp = rep(1 , sum(bl_dims $ resp ))
243+ )
244+ bl_names <- Map(function (.cf , .bl_dims )
245+ unlist(Map(function (name , len ) paste0(name , " [" , seq_len(len ), " ]" ),
246+ names(.cf ), .bl_dims ), use.names = FALSE ),
247+ cf , bl_dims )
248+ # order of newly generated bls with respect to their variance
249+ d2l <- lapply(d2 , unlist )
250+ bl_order <- Map(function (bmlt , d2 ) order(bmlt )[order(d2 , decreasing = TRUE )],
251+ bl_mltpl , d2l )
252+
253+ for (i in names(mod )) {
254+ this_select <- if (i == " cov" ) bl_selected else 1
255+ mod [[i ]]$ baselearner <- e [[i ]]$ blg <- setNames(
256+ e [[i ]]$ blg [this_select ][bl_mltpl [[i ]]], bl_names [[i ]])
257+ mod [[i ]]$ basemodel <- e [[i ]]$ bl <- setNames(
258+ e [[i ]]$ bl [this_select ][bl_mltpl [[i ]]], bl_names [[i ]])
259+ e [[i ]]$ bnames <- bl_names [[i ]]
260+ # fill in coefs with bl order decreasing with explained variance
261+ e [[i ]]$ xselect <- bl_order [[i ]]
262+ e [[i ]]$ ens <- unlist(lapply(cf [[i ]], asplit , 2 ), recursive = FALSE )
263+ e [[i ]]$ ens <- Map( function (x , cls ) {
264+ bm <- list (model = x )
265+ class(bm ) <- gsub(" bl" , " bm" , cls )
266+ bm
267+ },
268+ x = e [[i ]]$ ens [bl_order [[i ]]],
269+ cls = lapply(mod [[i ]]$ basemodel , class )[bl_order [[i ]]])
270+ # add risk
271+ this_d2l <- d2l [[i ]]
272+ if (is.null(this_d2l ))
273+ this_d2l <- d2l [[1 ]]
274+ e [[i ]]$ mrisk <- sum(this_d2l ) -
275+ cumsum(c(0 ,sort(this_d2l , decreasing = TRUE )))
276+ # engage full number of components
277+ mod [[i ]]$ subset(sum(this_d2l > 0 ))
278+ }
279+
280+ # return factor models
281+ mod
282+ }
283+
284+
285+ # define class and methods ----------------------------------------------------
286+
287+ # ' @importFrom methods setOldClass
288+ # ' @exportClass FDboost_fac
289+
290+ setOldClass(" FDboost_fac" )
291+
292+ # ' `FDboost_fac` S3 class for factorized FDboost model components
293+ # '
294+ # ' @description Model factorization with `factorize()` decomposes an
295+ # ' `FDboost` model into two objects of class `FDboost_fac` - one for the
296+ # ' response and one for the covariate predictor. The first is essentially
297+ # ' an `FDboost` object and the second an `mboost` object, however,
298+ # ' in a 'read-only' mode and slightly adjusted methods (method defaults).
299+ # '
300+ # ' @name FDboost_fac-class
301+ # ' @seealso [factorize(), factorize.FDboost()]
302+ NULL
303+
304+
305+
306+ # ' Prediction and plotting for factorized FDboost model components
307+ # '
308+ # ' @param object,x a model-factor given as a \code{FDboost_fac} object
309+ # ' @param newdata optionally, a data frame or list
310+ # ' in which to look for variables with which to predict.
311+ # ' See \code{\link{predict.mboost}}.
312+ # ' @param which a subset of base-learner components to take into
313+ # ' account for computing predictions or coefficients. Different
314+ # ' components are never aggregated to a joint prediction, but always
315+ # ' returned as a matrix or list. Select the k-th component
316+ # ' by name in the format \code{bl(x, ...)[k]} or all components of a base-learner
317+ # ' by dropping the index or all base-learners of a variable by using
318+ # ' the variable name.
319+ # ' @param main the plot title. By default, base-learner names are used with
320+ # ' component numbers \code{[k]}.
321+ # ' @param ... additional arguments passed to underlying methods.
322+ # '
323+ # ' @method predict FDboost_fac
324+ # '
325+ # ' @export
326+ # ' @name predict.FDboost_fac
327+ # ' @aliases plot.FDboost_fac
328+ # '
329+ # ' @seealso [factorize(), factorize.FDboost()]
330+ # '
331+ predict.FDboost_fac <- function (object , newdata = NULL , which = NULL , ... ) {
332+ w <- object $ which(which )
333+ if (any(is.na(w )))
334+ stop(" Don't know 'which' base-learner is meant." )
335+ names(w ) <- names(object $ baselearner )[w ]
336+ drop(sapply(w ,
337+ function (x ) predict.mboost(which = x ,
338+ object = object ,
339+ newdata = newdata ,
340+ aggregate = " sum" , ... )))
341+ }
342+
343+ # ' @method plot FDboost_fac
344+ # ' @rdname predict.FDboost_fac
345+ plot.FDboost_fac <- function (x , which = NULL , main = NULL , ... ) {
346+ w <- x $ which(which , usedonly = TRUE )
347+ if (any(is.na(w )))
348+ stop(paste(" Don't know which base-learner is meant by:" ,
349+ which [which.min(is.na(w ))]))
350+ if (is.null(main ))
351+ main <- names(x $ baselearner )[w ]
352+ for (i in seq_along(w ))
353+ plot.mboost(x , which = w [i ], main = main [i ], ... )
354+ }
0 commit comments