FDboost/R/FDboost.R at 67a9fb490e70059feab924868091090cf05ffd63 · m-muecke/FDboost · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#################################################################################
#' Model-based Gradient Boosting for Functional Response
#'
#' Gradient boosting for optimizing arbitrary loss functions, where component-wise models
#' are utilized as base-learners in the case of functional responses.
#' Scalar responses are treated as the special case where each functional response has
#' only one observation.
#' This function is a wrapper for \code{mboost}'s \code{\link{mboost}} and its
#' siblings to fit models of the general form
#' \deqn{\xi(Y_i(t) | X_i = x_i) = \sum_{j} h_j(x_i, t), i = 1, ..., N,}
#' with a functional (but not necessarily continuous) response \eqn{Y(t)},
#' transformation function \eqn{\xi}, e.g., the expectation, the median or some quantile,
#' and partial effects \eqn{h_j(x_i, t)} depending on covariates \eqn{x_i}
#' and the current index of the response \eqn{t}. The index of the response can
#' be for example time.
#' Possible effects are, e.g., a smooth intercept \eqn{\beta_0(t)},
#' a linear functional effect \eqn{\int x_i(s)\beta(s,t)ds},
#' potentially with integration limits depending on \eqn{t},
#' smooth and linear effects of scalar covariates \eqn{f(z_i,t)} or \eqn{z_i \beta(t)}.
#' A hands-on tutorial for the package can be found at <doi:10.18637/jss.v094.i10>.
#'
#' @param formula a symbolic description of the model to be fit.
#' Per default no intercept is added, only a smooth offset, see argument \code{offset}.
#' To add a smooth intercept, use 1, e.g., \code{y ~ 1} for a pure intercept model.
#' @param timeformula one-sided formula for the specification of the effect over the index of the response.
#' For functional response \eqn{Y_i(t)} typically use \code{~ bbs(t)} to obtain smooth
#' effects over \eqn{t}.
#' In the limiting case of \eqn{Y_i} being a scalar response,
#' use \code{~ bols(1)}, which sets up a base-learner for the scalar 1.
#' Or use \code{timeformula = NULL}, then the scalar response is treated as scalar.
#' @param id defaults to NULL which means that all response trajectories are observed
#' on a common grid allowing to represent the response as a matrix.
#' If the response is given in long format for observation-specific grids, \code{id}
#' contains the information which observations belong to the same trajectory and must
#' be supplied as a formula, \code{~ nameid}, where the variable \code{nameid} should
#' contain integers 1, 2, 3, ..., N.
#' @param numInt integration scheme for the integration of the loss function.
#' One of \code{c("equal", "Riemann")} meaning equal weights of 1 or
#' trapezoidal Riemann weights.
#' Alternatively a vector of length \code{ncol(response)} containing
#' positive weights can be specified.
#' @param data a data frame or list containing the variables in the model.
#' @param weights only for internal use to specify resampling weights;
#' per default all weights are equal to 1.
#' @param offset_control parameters for the estimation of the offset,
#' defaults to \code{o_control()}, see \code{\link{o_control}}.
#' @param offset a numeric vector to be used as offset over the index of the response (optional).
#' If no offset is specified, per default \code{offset = NULL} which means that a
#' smooth time-specific offset is computed and used before the model fit to center the data.
#' If you do not want to use a time-specific offset, set \code{offset = "scalar"} to get an overall scalar offset,
#' like in \code{mboost}.
#' @param check0 logical, for response in matrix form, i.e. response that is observed on a common grid,
#' check the fitted effects for the sum-to-zero constraint
#' \eqn{h_j(x_i)(t) = 0} for all \eqn{t} and give a warning if it is not fulfilled. Defaults to \code{FALSE}.
#' @param ... additional arguments passed to \code{\link{mboost}},
#' including, \code{family} and \code{control}.
#'
#' @details In matrix representation of functional response and covariates each row
#' represents one functional observation, e.g., \code{Y[i,t_g]} corresponds to \eqn{Y_i(t_g)},
#' giving a <number of curves> by <number of evaluations> matrix.
#' For the model fit, the matrix of the functional
#' response evaluations \eqn{Y_i(t_g)} are stacked internally into one long vector.
#'
#' If it is possible to represent the model as a generalized linear array model
#' (Currie et al., 2006), the array structure is used for an efficient implementation,
#' see \code{\link{mboost}}. This is only possible if the design
#' matrix can be written as the Kronecker product of two marginal design
#' matrices yielding a functional linear array model (FLAM),
#' see Brockhaus et al. (2015) for details.
#' The Kronecker product of two marginal bases is implemented in R-package mboost
#' in the function \code{\%O\%}, see \code{\link[mboost:baselearners]{\%O\%}}.
#'
#' When \code{\%O\%} is called with a specification of \code{df} in both base-learners,
#' e.g., \code{bbs(x1, df = df1) \%O\% bbs(t, df = df2)}, the global \code{df} for the
#' Kroneckered base-learner is computed as \code{df = df1 * df2}.
#' And thus the penalty has only one smoothness parameter lambda resulting in an isotropic penalty.
#' A Kronecker product with anisotropic penalty is \code{\%A\%}, allowing for different
#' amount of smoothness in the two directions, see \code{\link{\%A\%}}.
#' If the formula contains base-learners connected by \code{\%O\%}, \code{\%A\%} or \code{\%A0\%},
#' those effects are not expanded with \code{timeformula}, allowing for model specifications
#' with different effects in time-direction.
#'
#' If the response is observed on curve-specific grids it must be supplied
#' as a vector in long format and the argument \code{id} has
#' to be specified (as formula!) to define which observations belong to which curve.
#' In this case the base-learners are built as row tensor-products of marginal base-learners,
#' see Scheipl et al. (2015) and Brockhaus et al. (2017), for details on how to set up the effects.
#' The row tensor product of two marginal bases is implemented in R-package mboost
#' in the function \code{\%X\%}, see \code{\link[mboost:baselearners]{\%X\%}}.
#'
#' A scalar response can be seen as special case of a functional response with only
#' one time-point, and thus it can be represented as FLAM with basis 1 in
#' time-direction, use \code{timeformula = ~bols(1)}. In this case, a penalty in the
#' time-direction is used, see Brockhaus et al. (2015) for details.
#' Alternatively, the scalar response is fitted as scalar response, like in the function
#' \code{\link{mboost}} in package mboost.
#' The advantage of using \code{FDboost} in that case
#' is that methods for the functional base-learners are available, e.g., \code{plot}.
#'
#' The desired regression type is specified by the \code{family}-argument,
#' see the help-page of \code{\link{mboost}}. For example a mean regression model is obtained by
#' \code{family = Gaussian()} which is the default or median regression
#' by \code{family = QuantReg()};
#' see \code{\link[mboost]{Family}} for a list of implemented families.
#'
#' With \code{FDboost} the following covariate effects can be estimated by specifying
#' the following effects in the \code{formula}
#' (similar to function \code{\link[refund]{pffr}}
#' in R-package refund.
#' The \code{timeformula} is used to expand the effects in \code{t}-direction.
#' \itemize{
#' \item Linear functional effect of scalar (numeric or factor) covariate \eqn{z} that varies
#'   smoothly over \eqn{t}, i.e. \eqn{z_i \beta(t)}, specified as
#'   \code{bolsc(z)}, see \code{\link{bolsc}},
#'   or for a group effect with mean zero use \code{brandomc(z)}.
#' \item Nonlinear effects  of a scalar covariate that vary smoothly over \eqn{t},
#'   i.e. \eqn{f(z_i, t)}, specified as \code{bbsc(z)},
#'   see \code{\link{bbsc}}.
#' \item (Nonlinear) effects of scalar covariates that are constant
#'   over \eqn{t}, e.g., \eqn{f(z_i)}, specified as \code{c(bbs(z))},
#'   or \eqn{\beta z_i}, specified as \code{c(bols(z))}.
#' \item Interaction terms between two scalar covariates, e.g., \eqn{z_i1 zi2 \beta(t)},
#'   are specified as \code{bols(z1) \%Xc\% bols(z2)} and
#'   an interaction \eqn{z_i1 f(zi2, t)} as \code{bols(z1) \%Xc\% bbs(z2)}, as
#'   \code{\%Xc\%} applies the sum-to-zero constraint to the desgin matrix of the tensor product
#'   built by \code{\%Xc\%}, see \code{\link{\%Xc\%}}.
#' \item Function-on-function regression terms of functional covariates \code{x},
#'   e.g., \eqn{\int x_i(s)\beta(s,t)ds}, specified as \code{bsignal(x, s = s)},
#'   using P-splines, see \code{\link{bsignal}}.
#'   Terms given by \code{\link{bfpc}} provide FPC-based effects of functional
#'   covariates, see \code{\link{bfpc}}.
#' \item Function-on-function regression terms of functional covariates \code{x}
#'   with integration limits \eqn{[l(t), u(t)]} depending on \eqn{t},
#'   e.g., \eqn{\int_[l(t), u(t)] x_i(s)\beta(s,t)ds}, specified as
#'   \code{bhist(x, s = s, time = t, limits)}. The \code{limits} argument defaults to
#'   \code{"s<=t"} which yields a historical effect with limits \eqn{[min(t),t]},
#'    see \code{\link{bhist}}.
#' \item Concurrent effects of functional covariates \code{x}
#'   measured on the same grid as the response, i.e., \eqn{x_i(s)\beta(t)},
#'   are specified as \code{bconcurrent(x, s = s, time = t)},
#'   see \code{\link{bconcurrent}}.
#' \item Interaction effects can be estimated as tensor product smooth, e.g.,
#'   \eqn{ z \int x_i(s)\beta(s,t)ds} as \code{bsignal(x, s = s) \%X\% bolsc(z)}
#' \item For interaction effects with historical functional effects, e.g.,
#'   \eqn{ z_i \int_[l(t),u(t)] x_i(s)\beta(s,t)ds} the base-learner
#'   \code{bhistx} should be used instead of \code{bhist},
#'   e.g., \code{bhistx(x, limits) \%X\% bolsc(z)}, see \code{\link{bhistx}}.
#' \item Generally, the \code{c()}-notation can be used to get effects that are
#'   constant over the index of the functional response.
#' \item If the \code{formula} in \code{FDboost} contains base-learners connected by
#' \code{\%O\%}, \code{\%A\%} or \code{\%A0\%}, those effects are not expanded with \code{timeformula},
#' allowing for model specifications with different effects in time-direction.
#' }
#'
#' In order to obtain a fair selection of base-learners, the same degrees of freedom (df)
#' should be specified for all baselearners. If the number of df differs among the base-learners,
#' the selection is biased towards more flexible base-learners with higher df as they are more
#' likely to yield larger improvements of the fit. It is recommended to use
#' a rather small number of df for all base-learners.
#' It is not possible to specify df larger than the rank of the design matrix.
#' For base-learners with rank-deficient penalty, it is not possible to specify df smaller than the
#' rank of the null space of the penalty (e.g., in \code{bbs} unpenalized part of P-splines).
#' The df of the base-learners in an FDboost-object can be checked using \code{extract(object, "df")},
#' see \code{\link[mboost:methods]{extract}}.
#'
#' The most important tuning parameter of component-wise gradient boosting
#' is the number of boosting iterations. It is recommended to use the number of
#' boosting iterations as only tuning parameter,
#' fixing the step-length at a small value (e.g., nu = 0.1).
#' Note that the default number of boosting iterations is 100 which is arbitrary and in most
#' cases not adequate (the optimal number of boosting iterations can considerably exceed 100).
#' The optimal stopping iteration can be determined by resampling methods like
#' cross-validation or bootstrapping, see the function \code{\link{cvrisk.FDboost}} which searches
#' the optimal stopping iteration on a grid, which in many cases has to be extended.
#'
#' @return An object of class \code{FDboost} that inherits from \code{mboost}.
#' Special \code{\link{predict.FDboost}}, \code{\link{coef.FDboost}} and
#' \code{\link{plot.FDboost}} methods are available.
#' The methods of \code{\link{mboost}} are available as well,
#' e.g., \code{\link[mboost:methods]{extract}}.
#' The \code{FDboost}-object is a named list containing:
#' \item{...}{all elements of an \code{mboost}-object}
#' \item{yname}{the name of the response}
#' \item{ydim}{dimension of the response matrix, if the response is represented as such}
#' \item{yind}{the observation (time-)points of the response, i.e. the evaluation points,
#'       with its name as attribute}
#' \item{data}{the data that was used for the model fit}
#' \item{id}{the id variable of the response}
#' \item{predictOffset}{the function to predict the smooth offset}
#' \item{offsetFDboost}{offset as specified in call to FDboost}
#' \item{offsetMboost}{offset as given to mboost}
#' \item{call}{the call to \code{FDboost}}
#' \item{callEval}{the evaluated function call to \code{FDboost} without data}
#' \item{numInt}{value of argument \code{numInt} determining the numerical integration scheme}
#' \item{timeformula}{the time-formula}
#' \item{formulaFDboost}{the formula with which \code{FDboost} was called}
#' \item{formulaMboost}{the formula with which \code{mboost} was called within \code{FDboost}}
#'
#' @author Sarah Brockhaus, Torsten Hothorn
#'
#' @seealso Note that \link{FDboost} calls \code{\link{mboost}} directly.
#' See, e.g., \code{\link[FDboost]{bsignal}} and \code{\link[FDboost]{bbsc}}
#' for possible base-learners.
#'
#' @keywords models regression nonlinear smooth
#'
#' @references
#' Brockhaus, S., Ruegamer, D. and Greven, S. (2017):
#' Boosting Functional Regression Models with FDboost.
#' <doi:10.18637/jss.v094.i10>
#'
#' Brockhaus, S., Scheipl, F., Hothorn, T. and Greven, S. (2015):
#' The functional linear array model. Statistical Modelling, 15(3), 279-300.
#'
#' Brockhaus, S., Melcher, M., Leisch, F. and Greven, S. (2017):
#' Boosting flexible functional regression models with a high number of functional historical effects,
#' Statistics and Computing, 27(4), 913-926.
#'
#' Currie, I.D., Durban, M. and Eilers P.H.C. (2006):
#' Generalized linear array models with applications to multidimensional smoothing.
#' Journal of the Royal Statistical Society, Series B-Statistical Methodology, 68(2), 259-280.
#'
#' Scheipl, F., Staicu, A.-M. and Greven, S. (2015):
#' Functional additive mixed models, Journal of Computational and Graphical Statistics, 24(2), 477-501.
#'
#' @examples
#' ######## Example for function-on-scalar-regression
#' data("viscosity", package = "FDboost")
#' ## set time-interval that should be modeled
#' interval <- "101"
#'
#' ## model time until "interval" and take log() of viscosity
#' end <- which(viscosity$timeAll == as.numeric(interval))
#' viscosity$vis <- log(viscosity$visAll[,1:end])
#' viscosity$time <- viscosity$timeAll[1:end]
#' # with(viscosity, funplot(time, vis, pch = 16, cex = 0.2))
#'
#' ## fit median regression model with 100 boosting iterations,
#' ## step-length 0.4 and smooth time-specific offset
#' ## the factors are coded such that the effects are zero for each timepoint t
#' ## no integration weights are used!
#' mod1 <- FDboost(vis ~ 1 + bolsc(T_C, df = 2) + bolsc(T_A, df = 2),
#'                timeformula = ~ bbs(time, df = 4),
#'                numInt = "equal", family = QuantReg(),
#'                offset = NULL, offset_control = o_control(k_min = 9),
#'                data = viscosity, control=boost_control(mstop = 100, nu = 0.4))
#'
#' \donttest{
#'   #### find optimal mstop over 5-fold bootstrap, small number of folds for example
#'   #### do the resampling on the level of curves
#'
#'   ## possibility 1: smooth offset and transformation matrices are refitted
#'   set.seed(123)
#'   appl1 <- applyFolds(mod1, folds = cv(rep(1, length(unique(mod1$id))), B = 5),
#'                       grid = 1:500)
#'   ## plot(appl1)
#'   mstop(appl1)
#'   mod1[mstop(appl1)]
#'
#'   ## possibility 2: smooth offset is refitted,
#'   ## computes oob-risk and the estimated coefficients on the folds
#'   set.seed(123)
#'   val1 <- validateFDboost(mod1, folds = cv(rep(1, length(unique(mod1$id))), B = 5),
#'                         grid = 1:500)
#'   ## plot(val1)
#'   mstop(val1)
#'   mod1[mstop(val1)]
#'
#'   ## possibility 3: very efficient
#'   ## using the function cvrisk; be careful to do the resampling on the level of curves
#'   folds1 <- cvLong(id = mod1$id, weights = model.weights(mod1), B = 5)
#'   cvm1 <- cvrisk(mod1, folds = folds1, grid = 1:500)
#'   ## plot(cvm1)
#'   mstop(cvm1)
#'
#' ## look at the model
#' summary(mod1)
#' coef(mod1)
#' plot(mod1)
#' plotPredicted(mod1, lwdPred = 2)
#' }
#'
#' ######## Example for scalar-on-function-regression
#' data("fuelSubset", package = "FDboost")
#'
#' ## center the functional covariates per observed wavelength
#' fuelSubset$UVVIS <- scale(fuelSubset$UVVIS, scale = FALSE)
#' fuelSubset$NIR <- scale(fuelSubset$NIR, scale = FALSE)
#'
#' ## to make mboost:::df2lambda() happy (all design matrix entries < 10)
#' ## reduce range of argvals to [0,1] to get smaller integration weights
#' fuelSubset$uvvis.lambda <- with(fuelSubset, (uvvis.lambda - min(uvvis.lambda)) /
#'                                           (max(uvvis.lambda) - min(uvvis.lambda) ))
#' fuelSubset$nir.lambda <- with(fuelSubset, (nir.lambda - min(nir.lambda)) /
#'                                           (max(nir.lambda) - min(nir.lambda) ))
#'
#' ## model fit with scalar response
#' ## include no intercept as all base-learners are centered around 0
#' mod2 <- FDboost(heatan ~ bsignal(UVVIS, uvvis.lambda, knots = 40, df = 4, check.ident = FALSE)
#'                + bsignal(NIR, nir.lambda, knots = 40, df = 4, check.ident = FALSE),
#'                timeformula = NULL, data = fuelSubset, control = boost_control(mstop = 200))
#'
#' ## additionally include a non-linear effect of the scalar variable h2o
#' mod2s <- FDboost(heatan ~ bsignal(UVVIS, uvvis.lambda, knots = 40, df = 4, check.ident = FALSE)
#'                + bsignal(NIR, nir.lambda, knots = 40, df = 4, check.ident = FALSE)
#'                + bbs(h2o, df = 4),
#'                timeformula = NULL, data = fuelSubset, control = boost_control(mstop = 200))
#'
#' ## alternative model fit as FLAM model with scalar response; as timeformula = ~ bols(1)
#' ## adds a penalty over the index of the response, i.e., here a ridge penalty
#' ## thus, mod2f and mod2 have different penalties
#' mod2f <- FDboost(heatan ~ bsignal(UVVIS, uvvis.lambda, knots = 40, df = 4, check.ident = FALSE)
#'                + bsignal(NIR, nir.lambda, knots = 40, df = 4, check.ident = FALSE),
#'                timeformula = ~ bols(1), data = fuelSubset, control = boost_control(mstop = 200))
#'
#' \donttest{
#'   ## bootstrap to find optimal mstop takes some time
#'   set.seed(123)
#'   folds2 <- cv(weights = model.weights(mod2), B = 10)
#'   cvm2 <- cvrisk(mod2, folds = folds2, grid = 1:1000)
#'   mstop(cvm2) ## mod2[327]
#'   summary(mod2)
#'   ## plot(mod2)
#' }
#'
#' ## Example for function-on-function-regression
#' if(require(fda)){
#'
#'   data("CanadianWeather", package = "fda")
#'   CanadianWeather$l10precip <- t(log(CanadianWeather$monthlyPrecip))
#'   CanadianWeather$temp <- t(CanadianWeather$monthlyTemp)
#'   CanadianWeather$region <- factor(CanadianWeather$region)
#'   CanadianWeather$month.s <- CanadianWeather$month.t <- 1:12
#'
#'   ## center the temperature curves per time-point
#'   CanadianWeather$temp <- scale(CanadianWeather$temp, scale = FALSE)
#'   rownames(CanadianWeather$temp) <- NULL ## delete row-names
#'
#'   ## fit model with cyclic splines over the year
#'   mod3 <- FDboost(l10precip ~ bols(region, df = 2.5, contrasts.arg = "contr.dummy")
#'                    + bsignal(temp, month.s, knots = 11, cyclic = TRUE,
#'                              df = 2.5, boundary.knots = c(0.5,12.5), check.ident = FALSE),
#'                   timeformula = ~ bbs(month.t, knots = 11, cyclic = TRUE,
#'                                       df = 3, boundary.knots = c(0.5, 12.5)),
#'                   offset = "scalar", offset_control = o_control(k_min = 5),
#'                   control = boost_control(mstop = 60),
#'                   data = CanadianWeather)
#'
#'  \donttest{
#'    #### find the optimal mstop over 5-fold bootstrap
#'    ## using the function applyFolds
#'    set.seed(123)
#'    folds3 <- cv(rep(1, length(unique(mod3$id))), B = 5)
#'    appl3 <- applyFolds(mod3, folds = folds3, grid = 1:200)
#'
#'    ## use function cvrisk; be careful to do the resampling on the level of curves
#'    set.seed(123)
#'    folds3long <- cvLong(id = mod3$id, weights = model.weights(mod3), B = 5)
#'    cvm3 <- cvrisk(mod3, folds = folds3long, grid = 1:200)
#'    mstop(cvm3) ## mod3[64]
#'
#'    summary(mod3)
#'    ## plot(mod3, pers = TRUE)
#'  }
#' }
#'
#' ######## Example for functional response observed on irregular grid
#' ######## Delete part of observations in viscosity data-set
#' data("viscosity", package = "FDboost")
#' ## set time-interval that should be modeled
#' interval <- "101"
#'
#' ## model time until "interval" and take log() of viscosity
#' end <- which(viscosity$timeAll == as.numeric(interval))
#' viscosity$vis <- log(viscosity$visAll[,1:end])
#' viscosity$time <- viscosity$timeAll[1:end]
#' # with(viscosity, funplot(time, vis, pch = 16, cex = 0.2))
#'
#' ## only keep one eighth of the observation points
#' set.seed(123)
#' selectObs <- sort(sample(x = 1:(64*46), size = 64*46/4, replace = FALSE))
#' dataIrregular <- with(viscosity, list(vis = c(vis)[selectObs],
#'                                       T_A = T_A, T_C = T_C,
#'                                       time = rep(time, each = 64)[selectObs],
#'                                       id = rep(1:64, 46)[selectObs]))
#'
#' ## fit median regression model with 50 boosting iterations,
#' ## step-length 0.4 and smooth time-specific offset
#' ## the factors are in effect coding -1, 1 for the levels
#' ## no integration weights are used!
#' mod4 <- FDboost(vis ~ 1 + bols(T_C, contrasts.arg = "contr.sum", intercept = FALSE)
#'                 + bols(T_A, contrasts.arg = "contr.sum", intercept=FALSE),
#'                 timeformula = ~ bbs(time, lambda = 100), id = ~id,
#'                 numInt = "Riemann", family = QuantReg(),
#'                 offset = NULL, offset_control = o_control(k_min = 9),
#'                 data = dataIrregular, control = boost_control(mstop = 50, nu = 0.4))
#' ## summary(mod4)
#' ## plot(mod4)
#' ## plotPredicted(mod4, lwdPred = 2)
#'
#' \donttest{
#'   ## Find optimal mstop, small grid/low B for a fast example
#'   set.seed(123)
#'   folds4 <- cv(rep(1, length(unique(mod4$id))), B = 3)
#'   appl4 <- applyFolds(mod4, folds = folds4, grid = 1:50)
#'   ## val4 <- validateFDboost(mod4, folds = folds4, grid = 1:50)
#'
#'   set.seed(123)
#'   folds4long <- cvLong(id = mod4$id, weights = model.weights(mod4), B = 3)
#'   cvm4 <- cvrisk(mod4, folds = folds4long, grid = 1:50)
#'   mstop(cvm4)
#' }
#'
#' ## Be careful if you want to predict newdata with irregular response,
#' ## as the argument index is not considered in the prediction of newdata.
#' ## Thus, all covariates have to be repeated according to the number of observations
#' ## in each response trajectroy.
#' ## Predict four response curves with full time-observations
#' ## for the four combinations of T_A and T_C.
#' newd <- list(T_A = factor(c(1,1,2,2), levels = 1:2,
#'                         labels = c("low", "high"))[rep(1:4, length(viscosity$time))],
#'              T_C = factor(c(1,2,1,2), levels = 1:2,
#'                         labels = c("low", "high"))[rep(1:4, length(viscosity$time))],
#'              time = rep(viscosity$time, 4))
#'
#' pred <- predict(mod4, newdata = newd)
#' ## funplot(x = rep(viscosity$time, 4), y = pred, id = rep(1:4, length(viscosity$time)))
#'
#'
#' @export
#' @import methods Matrix mboost
#' @importFrom grDevices heat.colors rgb
#' @importFrom graphics abline barplot contour legend lines matplot par persp plot points
#' @importFrom utils relist getS3method packageDescription
#' @importFrom stats setNames approx as.formula coef complete.cases fitted formula lm median model.matrix model.weights na.omit predict quantile sd terms.formula variable.names
#' @importFrom gamboostLSS GaussianLSS GaussianMu GaussianSigma make.grid cvrisk.mboostLSS mboostLSS_fit
#' @importFrom stabs stabsel stabsel_parameters
#' @importFrom splines bs splineDesign
#' @importFrom mgcv gam s
#' @importFrom zoo na.locf
#' @importFrom MASS Null
#' @importFrom parallel mclapply
FDboost <- function(formula,          ### response ~ xvars
                    timeformula,      ### time
                    id = NULL,          ### id variable if response is in long format
                    numInt = "equal",   ### option for approximation of integral over loss
                    data,             ### list of response, time, xvars
                    weights = NULL,   ### optional
                    offset = NULL,    ### optional
                    offset_control = o_control(), ### optional specification of offset model
                    check0 = FALSE,    ### check sum-to-zero-constraint of the fitted effects?
                    ...)              ### goes directly to mboost
{
  dots <- list(...)

  ### save formula of FDboost before it is changed
  formulaFDboost <- formula

  tf <- terms.formula(formula, specials = "c")
  trmstrings <- attr(tf, "term.labels")
  equalBrackets <- NULL
  if(length(trmstrings) > 0){
    ## insert id at end of each base-learner
    trmstrings2 <- paste0(substr(trmstrings, 1 , nchar(trmstrings)-1), ", index=", id[2],")")
    ## check if number of opening brackets is equal to number of closing brackets
    equalBrackets <- sapply(seq_along(trmstrings2), function(i)
    {
      lengths(regmatches(trmstrings2[i], gregexpr("(", trmstrings2[i], fixed = TRUE))) ==
        lengths(regmatches(trmstrings2[i], gregexpr(")", trmstrings2[i], fixed = TRUE)))
    })
  }

  ## check formulas
  if(inherits(try(id), "try-error")) stop("id must either be NULL or a formula object.")
  if(missing(timeformula) || inherits(try(timeformula),  "try-error"))
    stop("timeformula must either be NULL or a formula object.")
    stopifnot(inherits(formula, "formula"))
  if(!is.null(timeformula)) stopifnot(inherits(timeformula, "formula"))

  ## insert the id variable into the formula, to treat it like the other variables
  if(!is.null(id)){
    stopifnot(inherits(id, "formula"))
    ##tf <- terms.formula(formula, specials = c("c"))
    ##trmstrings <- attr(tf, "term.labels")
    ##equalBrackets <- NULL
    if(length(trmstrings) > 0){
      ## insert index into the other base-learners of the tensor-product as well
      for(i in seq_along(trmstrings)){
        if(grepl( "%X", trmstrings2[i], fixed = TRUE)){
          temp <- unlist(strsplit(trmstrings2[i], "%X", fixed = TRUE))
          temp1 <- temp[-length(temp)]
          ## http://stackoverflow.com/questions/2261079
          ## delete all trailing whitespace
          trim.trailing <- function (x) sub("\\s+$", "", x)
          temp1 <- trim.trailing(temp1)
          temp1 <- paste0(substr(temp1, 1 , nchar(temp1)-1), ", index=", id[2],")")
          trmstrings2[i] <- paste0(paste0(temp1, collapse = " %X"), " %X", temp[length(temp)])
        }
        ## do not add index to base-learners bhistx()
        if( grepl("bhistx", trmstrings[i], fixed = TRUE) ) trmstrings2[i] <- trmstrings[i]
        ##  do not add an index if an index is already part of the formula
        if( grepl("index[[:blank:]]*=", trmstrings[i]) ) trmstrings2[i] <- trmstrings[i]
        ##  do not add an index if an index for %A%, %A0%, %O%
        if( grepl("%A%", trmstrings[i], fixed = TRUE) ) trmstrings2[i] <- trmstrings[i]
        if( grepl("%A0%", trmstrings[i], fixed = TRUE) ) trmstrings2[i] <- trmstrings[i]
        if( grepl("%O%", trmstrings[i], fixed = TRUE) ) trmstrings2[i] <- trmstrings[i]
        ##  do not add an index for base-learner that do not have brackets
        if( i %in% which(!equalBrackets) ) trmstrings2[i] <- trmstrings[i]
      }
      trmstrings <- trmstrings2
    }
    xpart <- paste(as.vector(trmstrings), collapse = " + ")
    if(xpart != ""){
      if(any(substr(tf[[3]], 1, 1) == "1")) xpart <- paste0("1 + ", xpart)
    }else{
      xpart <- 1
    }
    formula <- as.formula(paste(tf[[2]], " ~ ", xpart))
    #print(formula)
    nameid <- paste(id[2])
    id <- data[[nameid]]
  }else{
    nameid <- NULL
  }

  ### extract response; a numeric matrix or a vector
  yname <- all.vars(formula)[1]
  response <- data[[yname]]
  if(is.null(response)) stop("The response <", yname, "> is not contained in data.")
  data[[yname]] <- NULL

  ### for scalar response ~bols(1) or NULL
  scalarResponse <- FALSE
  scalarNoFLAM <- FALSE
  response_factor <- NULL
  if(is.null(timeformula) || timeformula == ~bols(1)){

    scalarResponse <- TRUE
    if(is.null(timeformula)) scalarNoFLAM <- TRUE

    if(grepl("df", formula[3], fixed = TRUE) || !grepl("lambda", formula[3], fixed = TRUE) ){
      timeformula <- ~bols(ONEtime, intercept = FALSE, df = 1)
    }else{
      timeformula <- ~bols(ONEtime, intercept = FALSE)
    }

    data$ONEtime <- 1

    # if response is a matrix with one row, convert it to a vector
    if(is.matrix(response) && dim(response)[2] == 1){
      response <- c(response)
      warning("The scalar response is coerced from a one-column matrix to a vector. ",
              "Specify scalar response as vector.")
    }

  }

  if(scalarResponse && !identical(numInt,"equal"))
    stop("Integration weights numInt must be set to 'equal' for scalar response.")

  ## extract time(s) from timeformula
  yind <- all.vars(timeformula)
  if(length(yind) == 1) {
    yind <- yind[[1]]
    nameyind <- yind
    assign(yind, data[[yind]])
    time <- data[[yind]]
    if(!is.numeric(time)) warning("Non-numeric time variable specified.
                                'plot' and other convenience functions potentially not designed for that, yet.")
    data[[yind]] <- NULL
    attr(time, "nameyind") <- nameyind
  } else {
    warning("More than one variable specified in time formula,
                                'plot' and other convenience functions potentially not designed for that, yet.")
    nameyind <- yind
    for(yind_ in yind) assign(yind_, data[[yind_]])
    time <- data[yind]
  }

  ### extract covariates
  # data <- as.data.frame(data)
  allCovs <- unique(c(nameid, all.vars(formula)))
  if(length(allCovs) > 1){
    data <- data[allCovs[!allCovs %in% c(yname, nameyind)] ]
    if( anyNA(names(data)) ) data <- data[ !is.na(names(data)) ]
  }else{
    data <- list(NULL)  # <SB> intercept-model without covariates
  }

  ### get covariates that are modeled constant over time
  # code of function pffr() of package refund
  terms <- sapply(trmstrings, function(trm) as.call(parse(text = trm))[[1]], simplify = FALSE)
  # ugly, but getTerms(formula)[-1] does not work for terms like I(x1:x2)
  frmlenv <- environment(formula)
  where.c <- attr(tf, "specials")$c - 1    # indices of scalar offset terms

  # transform: c(foo) --> foo
  if(length(where.c)){
    trmstrings[where.c] <- sapply(trmstrings[where.c], function(x){
      sub("\\)$", "", sub("^c\\(", "", x)) #c(BLA) --> BLA
    })
    blconstant <- "bols(ONEtime, intercept = FALSE)"
  }
  assign("ONEtime", rep(1.0, length(time)))

  if(scalarResponse){ ## scalar response

    nr <- NROW(response)
    nobs <- nr # number of observed trajectories
    nc <- 1
    dresponse <- response

  }else{ ## functional response

    if(is.null(id)){
      ### check dimensions
      ### response has trajectories as rows
      stopifnot(is.matrix(response))
      # <SB> dataframe is list and can contain time-points of functional covariates of arbitrary length
      # if (nrow(data) > 0) stopifnot(nrow(response) == nrow(data))
      nr <- nrow(response)
      if(!is.list(time))
        stopifnot(ncol(response) == length(time)) else
          stopifnot(all(ncol(response) == lengths(time[sapply(time, is.vector)])))
      nc <- ncol(response)
      dresponse <- as.vector(response) # column-wise stacking of response
      ## convert characters to factor
      if(is.character(dresponse)) dresponse <- factor(dresponse)
      ## in case of a scalar factor response, use the original factor as response
      if(!is.null(response_factor)) dresponse <- response_factor
      nobs <- nr # number of observed trajectories
    }else{
      stopifnot(is.null(dim(response))) ## stopifnot(is.vector(response))
      # check length of response and its time and index
      if(is.list(time))
        stopifnot(all(length(response) == lengths(time)) & length(response) == length(id)) else
          stopifnot(length(response) == length(time) & length(response) == length(id))

      if(anyNA(response)) warning("For non-grid observations the response should not contain missing values.")
      if( !all(sort(unique(id)) == seq_along(unique(id))) ) stop("id has to be integers 1, 2, 3,..., N.")

      nr <- length(response) # total number of observations
      nc <- length(unique(id)) # number of trajectories
      dresponse <- as.vector(response) # column-wise stacking of response
      nobs <- length(unique(id)) # number of observed trajectories
    }

  }

  ### save original dimensions of response
  ydim <- dim(response)

  ### (pre-)check if length / number of rows of response and
  ### functional covariates match
  ### (only meaningful for models with no hmatrix)
  if(all(!sapply(data, is.hmatrix))){

    functcov <- sapply(data, NCOL) > 1

    if(!scalarResponse || any(functcov))
      if(any(ww <- ydim[1] != sapply(data[functcov], NROW)))
        stop(paste0("The length of the response and number of observations of ",
                    names(ww[1]), " do not match."))

  }

  ### variable to fit smooth intercept
  assign("ONEx", rep(1.0, nobs))

  ##### compose mboost formula

  ## get formula over time
  tfm <- paste(deparse(timeformula), collapse = "")
  tfm <- strsplit(tfm, "~", fixed = TRUE)[[1]]
  tfm <- strsplit(tfm[2], "+", fixed = TRUE)[[1]]

  ## get formula in covariates
  cfm <- paste(deparse(formula), collapse = "")
  cfm <- strsplit(cfm, "~", fixed = TRUE)[[1]]
  cfm0 <- cfm
  #xfm <- strsplit(cfm[2], "\\+")[[1]]
  xfm <- trmstrings

  ## check that the timevariable in timeformula and in the bhistx-base-learners have the same name
  if(any(grepl("bhistx", trmstrings, fixed = TRUE))){
    for(j in seq_along(trmstrings)){
      if(any(grepl("bhistx", trmstrings[j], fixed = TRUE))){
        if(grepl("%X", trmstrings[j], fixed = TRUE) ){
          temp <- strsplit(trmstrings[[j]], "%X.*%")[[1]]
          temp <- temp[ grepl("bhistx", temp, fixed = TRUE) ]
          ## pryr::standardise_call(quote(bhistx(X1h, df=3)))
          temp_name <- all.vars(formula(paste("~", temp)))[1]
        }else{
          temp_name <- all.vars(formula(paste("~", trmstrings[[j]])[1]))[1]
        }
        if(getTimeLab(data[[temp_name]]) != nameyind){
          stop("The timeLab of the hmatrix-object in bhistx(), '", getTimeLab(data[[temp_name]]),
               "', must be euqal to the name of the time-variable in timeformula, '", nameyind, "'.")
        }
        timeLong <- time
        ## for response matrix: expand time accordingly
        if(!is.null(ydim)) timeLong <- rep(time, each = ydim[1] )
        if( any( abs(getTime(data[[temp_name]]) - timeLong) > .Machine$double.eps*10^10) ){
          stop("The time of the hmatrix-object in bhistx() must match the time-variable in timeformula.")
        }
      }
    }
  }

  yfm <- strsplit(cfm[1], "+", fixed = TRUE)[[1]] ## name of response

  ## set up formula for effects constant in time
  if(length(where.c) > 0){
    # set c_df to the df/lambda in timeformula
    if( grepl("lambda", tfm, fixed = TRUE) ||
          ( grepl("bols", tfm, fixed = TRUE) &&  !grepl("df", tfm, fixed = TRUE)) ){
      c_lambda <- eval(parse(text = paste0(tfm, "$dpp(rep(1.0,", length(time), "))$df()")))["lambda"]
      cfm <- paste("bols(ONEtime, intercept = FALSE, lambda = ", c_lambda ,")")
    } else{
      c_df <- eval(parse(text=paste0(tfm, "$dpp(rep(1.0,", length(time), "))$df()")))["df"]
      cfm <- paste("bols(ONEtime, intercept = FALSE, df = ", c_df ,")")
    }
  }

  # make brackets around timeformula if more than one variable is involved
  if(length(all.vars(timeformula)) > 1) tfm <- paste("(", tfm, ")")

  # expand formula as Kronecker or tensor product
  if(is.null(id)){
    tmp <- outer(xfm, tfm, function(x, y) paste(x, y, sep = "%O%"))
  }else{
    ## expand the bl according to id
    # do not expand for terms without brackets, which is equal to having an unequal number of brackets
    # in the generation part of trmstrings
    if(is.null(equalBrackets)){ # for intercept models y ~ 1
      which_equalBrackets <- 0
    } else{
      which_equalBrackets <- which(equalBrackets)
    }
    xfmTemp <- paste0(substr(xfm[which_equalBrackets], 1 ,
                        nchar(xfm[which_equalBrackets]) - 1 ), ")") # , index=id is done in the beginning
    xfm[which_equalBrackets] <- xfmTemp
    rm(xfmTemp)
    tmp <- outer(xfm, tfm, function(x, y) paste(x, y, sep = "%X%"))
  }

  # do not expand an effect bconcurrent() or bhist() with timeformula
  if (any(grepl("bconcurrent|bhis", tmp)))
    tmp[c(grep("bconcurrent", tmp, fixed = TRUE), grep("bhist", tmp, fixed = TRUE))] <- xfm[c(grep("bconcurrent", tmp, fixed = TRUE), grep("bhist", tmp, fixed = TRUE))]

  ## do not expand effects in formula including %A% with timeformula
  if( any(grepl("%A%", xfm, fixed = TRUE)) )
    tmp[grep("%A%", xfm, fixed = TRUE)] <- xfm[grep("%A%", xfm, fixed = TRUE)]

  ## do not expand effects in formula including %A0% with timeformula
  if( any(grepl("%A0%", xfm, fixed = TRUE)) )
    tmp[grep("%A0%", xfm, fixed = TRUE)] <- xfm[grep("%A0%", xfm, fixed = TRUE)]

  ## do not expand effects in formula including %O% with timeformula
  if( any(grepl("%O%", xfm, fixed = TRUE)) )
    tmp[grep("%O%", xfm, fixed = TRUE)] <- xfm[grep("%O%", xfm, fixed = TRUE)]

  ## expand with a constant effect in t-direction
  if(length(where.c) > 0){
    tmp[where.c] <- outer(xfm[where.c], cfm, function(x, y) paste(x, y, sep = "%O%"))
  }

  ## for scalar response without FLAM-model do not use the Kronecker product
  if(scalarNoFLAM){
    tmp <- xfm
  }


  ####### find the number of df for each base-learner
  ## for a fair selection of bl the df must be equal in all bl
  if(is.list(time)) {
    warning("For timeformulas with multiple variables dfs are not checked automatically.
            Please make sure that all base-learner df are equal to ensure a fair selection.")
    bl_df <- NULL
  } else {
    get_df <- function(bl){
      split_bl <- unlist(strsplit(bl, split = "%.{1,3}%"))
      all_df <- c()
      for(i in seq_along(split_bl)){
        parti <- parse(text = split_bl[i])[[1]]
        parti <- expand.call(definition = get(as.character(parti[[1]])), call = parti)
        dfi <- parti$df # df of part i in bl
        if(is.symbol(dfi) || (!is.numeric(dfi) && is.numeric(eval(dfi)))) dfi <- eval(dfi)
        lambdai <- parti$lambda # if lambda is present, df is ignored
        if(is.symbol(lambdai)) lambdai <- eval(lambdai)
        if(!is.null(dfi)){
          all_df[i] <- dfi
        }else{ ## for df = NULL, the value of lambda is used
          if(lambdai == 0){
            all_df[i] <-  NCOL(extract(with(data, eval(parti)), "design"))
          }else{
            all_df[i] <- "" ## dont know df
          }
          if(grepl("%X.{0,3}%", bl)){ ## special behaviour of %X%
            all_df[i] <- 1
          }
        }
      }
      if(any(all_df == "")){
        ret <- NULL
      }else{
        ret <- prod(all_df) # global df for bl is product of all df
        if( identical(ret, numeric(0)) ) ret <- NULL
      }
      return(ret)
    }

    #### get the specified df for each base-learner
    ## does not take into account base-learners that do not have brackets
    if(length(tmp) == 0){
      bl_df <- NULL
    }else{
      bl_df <- vector("list", length(tmp))
      bl_df[equalBrackets] <- lapply(tmp[equalBrackets], function(x) try(get_df(x)))
      bl_df <- unlist(bl_df[equalBrackets & (!sapply(bl_df, function(x) inherits(x,  "try-error")))])
      #print(bl_df)

      if( !is.null(bl_df) && any(abs(bl_df - bl_df[1]) > .Machine$double.eps * 10^10) ){
        warning("The base-learners differ in the degrees of freedom.")
      }

      if(!is.null(bl_df)){
        df_timeformula <- get_df(tfm)
        df_effects <- min(bl_df)
      }
    }
  }

  ### replace "1" with intercept base learner
  formula_intercept <- FALSE
  if ( any( gsub(" ", "", strsplit(cfm0[2], "+", fixed = TRUE)[[1]], fixed = TRUE) ==  "1")){
    formula_intercept <- TRUE
    ## use df or lambda as in timeformula
    if( any(grepl("lambda", deparse(timeformula), fixed = TRUE)) ||
        any(( grepl("bols", deparse(timeformula), fixed = TRUE) &  !grepl("df", deparse(timeformula), fixed = TRUE))) ){
      tmp <- c("bols(ONEx, intercept = FALSE, lambda = 0)", tmp)
    } else{
      tmp <- c("bols(ONEx, intercept = FALSE, df = 1)", tmp)
    }

    ## adjust the df in the timeformula
    call_tfm <- as.call(parse(text = tfm)[[1]])
    if(!is.null(bl_df)) call_tfm$df <- df_effects
    tfm_df <- paste0(deparse(call_tfm), collapse = "")

    ## for FLAM model with %O% use anisotropic Kronecker product for not penalizing in direction of ONEx
    ## use %A0%, as smooth intercept has smooting parameter 0 in 1-direction
    if(is.null(id)){
      if(!scalarNoFLAM) tmp[[1]] <- paste(tmp[[1]], "%A0%", tfm_df)
    }else{ ## response in long format
      tmp[[1]] <- tfm_df
    }
  }

  ####### put together the model formula
  xpart <- paste(as.vector(tmp), collapse = " + ")
  fm <- as.formula(paste("dresponse ~ ", xpart))

  ## find variables that are defined in environment(formula) but not in environment(fm) or in data
  fm_vars <- all.vars(fm) # all variables of fm

  ## for bhist() the limits argument can be a function;
  ## in this case those function arguments should not be included
  terms_fm_bhist <- terms(formula, specials = c("bhist", "bhistx"))

  if(any(! sapply(attr(terms_fm_bhist,  "specials"), is.null))){ ## occurence of bhist or bhistx

    places_bhist <- c(attr(terms_fm_bhist,  "specials")$bhist,
                      attr(terms_fm_bhist,  "specials")$bhistx)

    vars_arg_limits_not_unique <- c()
    for(pl in seq_along(places_bhist)){ ## loop over all bhist-bl

      ## get the limits argument
      current_bl <- attr(terms_fm_bhist, "variables")[[places_bhist[pl] + 1]]
      # for base-learner with interaction, find bhistx / bhist
      if(any(grepl("%X", current_bl, fixed = TRUE))){
        #current_bl <- current_bl[ grepl("bhist", current_bl) ]
        arg_limits <- eval(as.call(as.list(current_bl[grepl("bhist", current_bl, fixed = TRUE)])[[1]])$limits)
      }else{
        # limits argument of bhist / bhistx
        arg_limits <- eval(as.call(current_bl)$limits)
      }

      if(is.function(arg_limits)){
        ## get the names of the arguments of the limits-function
        vars_arg_limits <- names(formals(arg_limits))
        ## check whether the variables uniquely occur in the limits-function
        var_occur <- table(all.vars(attr(terms_fm_bhist, "variables")[[places_bhist[pl] + 1]],
                                    unique = FALSE))[vars_arg_limits] == 1
        vars_arg_limits_not_unique <- c(vars_arg_limits_not_unique, vars_arg_limits[var_occur])
      }
    }

    if(length(vars_arg_limits_not_unique) > 0){
      delete_var <- table(all.vars(fm, unique = FALSE))[unique(vars_arg_limits_not_unique)] <=
        table(vars_arg_limits_not_unique)[unique(vars_arg_limits_not_unique)]
      fm_vars <- fm_vars[! fm_vars %in% names(delete_var)[delete_var]]
    }
    rm(vars_arg_limits_not_unique, places_bhist, arg_limits)

  }

  ## vars_envir_formula <- fm_vars[ ! fm_vars %in% c(names(data), "dresponse" , "ONEx", "ONEtime", yind) ]
  # variables that exist in environment(fm)
  vars1 <- sapply(fm_vars, exists, envir = environment(fm), inherits = FALSE)
  if(!is.null(names(data))){
    vars2 <- sapply(fm_vars, function(x){ x %in% names(data)} ) # variables that exist in data
  }else{
    vars2 <- FALSE
  }

  # variables that exist neither in environment(fm) nor in data...
  vars_envir_formula <- fm_vars[ !(vars1 | vars2) ]
  # ... take those from the environment of the formula with which FDboost was called
  for(i in seq_along(vars_envir_formula)){
    if(! exists(vars_envir_formula[i], envir = environment(formulaFDboost)))
      stop("Variable <", vars_envir_formula[i], "> does not exist.")
    tmp <- get(vars_envir_formula[i], envir = environment(formulaFDboost))
    assign(x = vars_envir_formula[i], value = tmp,  envir = environment(fm))
  }
  rm(tmp)

  # environment(fm)

  ### expand weights for observations
  if (is.null(weights)) weights <- rep(1, nr)
  w <- weights
  if(is.null(id)){
    if (length(w) == nr) w <- rep(w, nc) # expand weights if they are only on the columns
    # check dimensions of w
    if(length(w) != nc*nr) stop("Dimensions of weights do not match the dimensions of the response.")
  }

  ## save the integration weights as data_weights
  ## per default the data_weights are all 1
  data_weights <- 1

  ### multiply integration weights numInt to weights and w
  if(is.numeric(numInt)){
    .numInt_len_check <- if(is.list(time))
      all(length(numInt) == lengths(time))  else
        length(numInt) == length(time)
    if(!.numInt_len_check)
      stop("Length of integration weights and time vector are not equal.")
    data_weights <- numInt
    if(!is.null(ydim)){ ## only blow up for array model
      data_weights <- rep(data_weights, each = nr)
    }
  }else{
    if(!numInt %in% c("equal", "Riemann")) warning("argument numInt is ignored as it is neither numeric nor one of (\"equal\", \"Riemann\")")
    if(numInt == "Riemann"){
      if(!is.numeric(time))
        stop("Riemann integration weights only implemented for a single numeric time variable.")
      data_weights <- as.vector(integrationWeights(X1 = response, time, id = id))
    }
  }
  w <- w * data_weights

  ### set weights of missing values to 0
  if(sum(is.na(dresponse)) > 0){
    w[which(is.na(dresponse))] <- 0
  }

  if(all(w == 0)) stop("All weights are zero!")

  ### offset == "scalar", or offset = numeric of length 1, or scalar response
  ### -> use one scalar/user-specified offset like in mboost

  ### in case of factor or multiple time variables set offset to 0 and give a warning
  if(is.list(time) || !is.numeric(time)) {
    .offsetwarning <- is.null(offset)
    if(!.offsetwarning) {
      .offsetwarning <- (offset == "scalar")
    }
    if(.offsetwarning) {
      offset <- 0
      warning("In case of factor or multiple time variables no default offset implemented, yet.
              offset is set to 0.")
    }
  }

  ## remember the offset-specification of FDboost
  offsetFDboost <- offset

  if( scalarResponse || # scalar response
      !is.null(offset) && length(offset) == 1 ){  # offset == "scalar" / offset = numeric of length 1

    if( !is.null(offset) && !is.numeric(offset) && offset != "scalar" ){
      stop("User-specified offset must be numeric or 'scalar' to get a scalar offset as in mboost.")
    }

    # use one constant offset in mboost(), as default in mboost
    if(!is.null(offset) && offset == "scalar"){
      offsetVec <- NULL