-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDeselectBoost.R
More file actions
680 lines (517 loc) · 37.9 KB
/
DeselectBoost.R
File metadata and controls
680 lines (517 loc) · 37.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
DeselectBoost <- function(object, data = NULL, fam, tau = NULL, method = c('attributable','cumulative')){
require('plyr')
# require('dplyr')
tau = ifelse(is.null(tau), 0.01, tau)
if(any(class(object) %in% 'mboost')){
DeselectBoost_1(object, data = data, fam = fam, tau = tau, method = method[1])
}else if (!(any(class(object) %in% 'mboost')) & dim(object[[1]]$response)[2] == 1) {
switch(length(names(object))-1,{
DeselectBoostLSS_2(object, data = data, fam = fam, tau = tau, method = method[1])},{
DeselectBoostLSS_3(object, data = data, fam = fam, tau = tau, method = method[1])},{
DeselectBoostLSS_4(object, data = data, fam = fam, tau = tau, method = method[1])}, {
DeselectBoostLSS_5(object, data = data, fam = fam, tau = tau, method = method[1])
})
} else{
DeselectBoostLSS_multi_5(object, data = data, fam = fam, tau = tau, method = method[1])
}
}
DeselectBoost_1 <- function(object, data = NULL, fam, tau = NULL, method = c('attributable','cumulative')){
require('plyr')
if(is.null(data) && class(object$model.frame()) == 'list'){return(stop("Please enter the data."))
} else if(!is.null(data)){
data = data
}else{data <- object$model.frame()}
nameVar <- names(coef(object,which = ''))[-1]
which.response <- which(sapply(1:(dim(data)[2]), function(x){identical(as.numeric(data[,x]), as.numeric(object$response))}))
name.response <- colnames(data)[which.response]
mstop <- object$mstop()
RiskRed <- object$risk()
totalRiskRed <- RiskRed[1] - RiskRed[mstop+1]
diffRiskRed = sapply(seq(1:(mstop)), function(k){RiskRed[k]-RiskRed[k+1]})
if(any(class(object) %in% "glmboost")){
select = selected(object) - 1
diffRiskRed = diffRiskRed[selected(object)-1 != 0]
}else{
select = selected(object)
}
select = select[select != 0]
Var = count(select)[[1]]
Risk.Var <- lapply(1:length(Var),function(j){sum(diffRiskRed[which(count(select)[[1]][j] == select)])})
n.parameter <- c(names(object$coef()))
if('(Intercept)' %in% n.parameter) n.parameter <- n.parameter[-which(n.parameter == '(Intercept)')]
Risk.order <- data.frame(Var,n.parameter, as.numeric(Risk.Var))
Risk.order <- Risk.order[order(Risk.order$as.numeric.Risk.Var.),]
Risk.order$CumRisk <- cumsum(Risk.order$as.numeric.Risk.Var.)
colnames(Risk.order) <- c( 'Var', 'VarName', 'Risk', 'CumRisk')
perc <- ifelse(is.null(tau), 0.01, tau)
percRiskRed <- totalRiskRed * perc
if(method[1] == 'attributable'){RiskRedOver <- Risk.order[which(Risk.order$Risk > percRiskRed),]
}else{RiskRedOver <- Risk.order[which(Risk.order$CumRisk > percRiskRed),]}
if(empty(RiskRedOver)){form2 = as.formula(paste(name.response, "~ 1"))
}else{
help <- vector()
j = 1
for(i in 1:length(colnames(data))){
if(any(grepl(colnames(data)[i], RiskRedOver$VarName, fixed = TRUE))){
help[j] <- colnames(data)[i]
j = j+1
}
}
form2 <-as.formula(paste(name.response, " ~ ", paste(help, collapse= "+")))
#form2 <-as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName, collapse= "+")))
if(!is.null(environment(environment(object[["fitted"]])[["RET"]][["baselearner"]][[1]][["model.frame"]])[["df"]])){
dfbase = environment(environment(object[["fitted"]])[["RET"]][["baselearner"]][[1]][["model.frame"]])[["df"]]
}}
#if(is.null(object$call$family)){ fam <- Gaussian()
#}else fam <- eval(parse(text = object$call$family))
if(any(class(object) %in% "glmboost")){
model_after = glmboost(form2, data = data, weights = model.weights(object), family = fam, control = boost_control(mstop = mstop, nu = object$control$nu, risk = object$control$risk))
}else{
model_after = gamboost(form2, data = data, weights = model.weights(object), family = fam, control = boost_control(mstop = mstop, nu = object$control$nu, risk = object$control$risk))
}
out <- model_after
out$tau = tau
out$deselectmethod = method[1]
class(out) <- c(class(out))
return(out)
}
DeselectBoostLSS_2 <- function(object, data = NULL , fam, tau = NULL, method = c('attributable','cumulative')){
require('plyr')
data = attr(object,'data')
mstop <- ifelse(any(class(object)%in%'nc_mboostLSS'), mstop(object)[1],sum(mstop(object)))
if(any(class(object) %in% "betaboost")){
parameter = names(object)
which.response <- which(sapply(1:dim(data)[2], function(x){identical(as.numeric(data[,x]), as.numeric(object[[1]]$response))}))
name.response <- colnames(data)[which.response]
select <- selected(object, parameter = names(object))
RiskRed <- risk(object)
totalRiskRed <- lapply(1:length(parameter),function(i){RiskRed[[i]][1] - RiskRed[[i]][length(RiskRed[[i]])]})
diffRiskRed <- lapply(1:length(parameter),function(j){sapply(seq(1:(length(RiskRed[[j]])-1)),function(k){RiskRed[[j]][k]-RiskRed[[j]][k+1]})})
Var = lapply(1:length(parameter), function(i){count(select[[i]])[2]})
Risk.Var <- lapply(1:length(parameter),function(i){sapply(1:dim(Var[[i]])[1],function(j){sum(diffRiskRed[[i]][which(count(select[[i]])[[1]][j] == select[[i]])])})})
w.parameter <- c(rep(parameter[1],length(count(select[[1]])[[1]])),rep(parameter[2],length(count(select[[2]])[[1]])))
n.parameter <- c(names(object[[1]]$coef()),names(object[[2]]$coef()))
Risk.order <- data.frame(w.parameter, n.parameter, unlist(Risk.Var))
colnames(Risk.order) <- c('parameter', 'VarName', 'Risk')
Risk.order <- Risk.order[order(Risk.order$Risk),]
Risk.order$CumRisk <- cumsum(Risk.order$Risk)
perc <- ifelse(is.null(tau), 0.01, tau)
percRiskRed <- sum(Risk.order$Risk) * perc
if(method[1] == 'attributable'){RiskRedOver <- Risk.order[which(Risk.order$Risk > percRiskRed),]
}else{RiskRedOver <- Risk.order[which(Risk.order$CumRisk > percRiskRed),]}
if(any(RiskRedOver$parameter == parameter[1]) && any(RiskRedOver$parameter == parameter[2])){
form1 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[1]], collapse= "+")))
form2 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[2]], collapse= "+")))
}else{
if(any(RiskRedOver$parameter == parameter[1])){
form1 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[1]], collapse= "+")))
form2 <- as.formula(name.response~1)
}else{
form1 <- as.formula(name.response~1)
form2 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[2]], collapse= "+")))
}
}
model_after <- betaboost(form1, form2, data=data,
iterations = mstop , method = "noncycl", sl=c(object[[1]]$control$nu,object[[2]]$control$nu), weights = model.weights(object), form.type="gamboost")
}else{
if (length(risk(object, merge = T)) > (mstop + 2)) return(stop("risk cannot contain more entries than mstop"))
parameter = names(object)
which.response <- which(sapply(1:dim(data)[2], function(x){identical(as.numeric(data[,x]), as.numeric(object$mu$response))}))
name.response <- colnames(data)[which.response]
select <- selected(object, parameter = names(object))
select1 <- selected(object, parameter = names(object))
if(any(class(object) %in% 'glmboostLSS')){
select[[1]] <- select[[1]]-1
select[[2]] <- select[[2]]-1
select[[1]] <- select[[1]][select[[1]] !=0]
select[[2]] <- select[[2]][select[[2]] !=0]
RiskRed <- risk(object)
totalRiskRed <- lapply(1:length(parameter),function(i){RiskRed[[i]][1] - RiskRed[[i]][length(RiskRed[[i]])]})
diffRiskRed <- lapply(1:length(parameter),function(j){sapply(seq(1:(length(RiskRed[[j]])-1)),function(k){RiskRed[[j]][k]-RiskRed[[j]][k+1]})})
diffRiskRed[[1]] <- diffRiskRed[[1]][select1[[1]]-1 != 0]
diffRiskRed[[2]] <- diffRiskRed[[2]][select1[[2]]-1 != 0]
}else{
RiskRed <- risk(object)
totalRiskRed <- lapply(1:length(parameter),function(i){RiskRed[[i]][1] - RiskRed[[i]][length(RiskRed[[i]])]})
diffRiskRed <- lapply(1:length(parameter),function(j){sapply(seq(1:(length(RiskRed[[j]])-1)),function(k){RiskRed[[j]][k]-RiskRed[[j]][k+1]})})
}
Var = lapply(1:length(parameter), function(i){count(select[[i]])[2]})
zero <- sapply(1:length(parameter),function(i){length(select[[i]])})
Risk.Var <- lapply(which(zero != 0),function(i){sapply(1:dim(Var[[i]])[1],function(j){sum(diffRiskRed[[i]][which(count(select[[i]])[[1]][j] == select[[i]])])})})
w.parameter <- c(rep(parameter[1],length(count(select[[1]])[[1]])),rep(parameter[2],length(count(select[[2]])[[1]])))
n.parameter <- c(names(object[[1]]$coef()),names(object[[2]]$coef()))
if('(Intercept)' %in% n.parameter) n.parameter <- n.parameter[-which(n.parameter == '(Intercept)')]
Risk.order <- data.frame(w.parameter, n.parameter, unlist(Risk.Var))
colnames(Risk.order) <- c('parameter', 'VarName', 'Risk')
Risk.order <- Risk.order[order(Risk.order$Risk),]
Risk.order$CumRisk <- cumsum(Risk.order$Risk)
perc <- ifelse(is.null(tau), 0.01, tau) # 0.01 is default value
percRiskRed <- (risk(object, merge=T)[1]-risk(object, merge=T)[length(risk(object, merge=T))]) * perc
if(method[1] == 'attributable'){RiskRedOver <- Risk.order[which(Risk.order$Risk > percRiskRed),]
}else{RiskRedOver <- Risk.order[which(Risk.order$CumRisk > percRiskRed),]}
if(is.na(parameter[2])) parameter[2]<-0
if(is.na(parameter[1])) parameter[1]<-0
if(any(RiskRedOver$parameter == parameter[1]) && any(RiskRedOver$parameter == parameter[2])){
form1 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[1]], collapse= "+")))
form2 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[2]], collapse= "+")))
}else{
if(any(RiskRedOver$parameter == parameter[1])){
form1 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[1]], collapse= "+")))
form2 <- as.formula(paste(name.response, '~1'))
}else{
form1 <- as.formula(paste(name.response, '~1'))
form2 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[2]], collapse= "+")))
}
}
formula <- list(form1,form2)
names(formula)<- names(object)
dfbase <- environment(environment(environment(object[[1]][["fitted"]])[["RET"]][["baselearner"]][[1]][["model.frame"]])[["ret"]][["model.frame"]])[["df"]]
if(any(class(object) %in% 'nc_mboostLSS')){
if(any(class(object) %in% "glmboostLSS")){
model_after = glmboostLSS(formula, data = data, families = fam, method = 'noncyclic', weights = model.weights(object), control = boost_control(mstop = mstop(object), nu = list(as.numeric(object[[1]]$control$nu),as.numeric(object[[2]]$control$nu))))
}else{
model_after = gamboostLSS(formula, data = data, families = fam, method = 'noncyclic', weights = model.weights(object), control = boost_control(mstop = mstop(object), nu = list(as.numeric(object[[1]]$control$nu),as.numeric(object[[2]]$control$nu))))
}
}else{
if(any(class(object) %in% "glmboostLSS")){
model_after = glmboostLSS(formula, data = data, families = fam, method = 'cyclic', weights = model.weights(object), control = boost_control(mstop = mstop(object), nu = list(as.numeric(object[[1]]$control$nu),as.numeric(object[[2]]$control$nu))))
}else{
model_after = gamboostLSS(formula, data = data, families = fam, method = 'cyclic', weights = model.weights(object), control = boost_control(mstop = mstop(object), nu = list(as.numeric(object[[1]]$control$nu),as.numeric(object[[2]]$control$nu))))
}
}
}
out <- model_after
deselect_para <- list(tau = perc, deselectmethod = method[1])
out <-append(x = out, values = deselect_para)
class(out) <- c(class(out))
return(out)
}
DeselectBoostLSS_3 <- function(object, data = NULL, fam, tau = NULL, method = c('attributable','cumulative')){
data = attr(object,'data')
mstop <- ifelse(any(class(object)%in%'nc_mboostLSS'), mstop(object)[1],sum(mstop(object)))
if (length(risk(object, merge = T)) > (mstop + 3)) stop("risk cannot contain more entries than mstop")
parameter = names(object)
which.response <- which(colnames(data) %in% colnames(object[[1]]$response))# which(sapply(1:dim(data)[2], function(x){identical(as.numeric(data[,x]), as.numeric(object$mu$response))}))
name.response <- colnames(data)[which.response]
select <- selected(object, parameter = names(object))
select1 <- selected(object, parameter = names(object))
if(any(class(object) %in% 'glmboostLSS')){
select[[1]] <- select[[1]]-1
select[[2]] <- select[[2]]-1
select[[3]] <- select[[3]]-1
select[[1]] <- select[[1]][select[[1]] !=0]
select[[2]] <- select[[2]][select[[2]] !=0]
select[[3]] <- select[[3]][select[[3]] !=0]
RiskRed <- risk(object)
totalRiskRed <- lapply(1:length(parameter),function(i){RiskRed[[i]][1] - RiskRed[[i]][length(RiskRed[[i]])]})
diffRiskRed <- lapply(1:length(parameter),function(j){sapply(seq(1:(length(RiskRed[[j]])-1)),function(k){RiskRed[[j]][k]-RiskRed[[j]][k+1]})})
diffRiskRed[[1]] <- diffRiskRed[[1]][select1[[1]]-1 != 0]
diffRiskRed[[2]] <- diffRiskRed[[2]][select1[[2]]-1 != 0]
diffRiskRed[[3]] <- diffRiskRed[[3]][select1[[3]]-1 != 0]
}else{
RiskRed <- risk(object)
totalRiskRed <- lapply(1:length(parameter),function(i){RiskRed[[i]][1] - RiskRed[[i]][length(RiskRed[[i]])]})
diffRiskRed <- lapply(1:length(parameter),function(j){sapply(seq(1:(length(RiskRed[[j]])-1)),function(k){RiskRed[[j]][k]-RiskRed[[j]][k+1]})})
}
Var = lapply(1:length(parameter), function(i){count(select[[i]])[2]})
# If a parameter is not selected
zero <- sapply(1:length(parameter),function(i){length(select[[i]])})
Risk.Var <- lapply(which(zero != 0),function(i){sapply(1:dim(Var[[i]])[1],function(j){sum(diffRiskRed[[i]][which(count(select[[i]])[[1]][j] == select[[i]])])})})
w.parameter <- c(rep(parameter[1],length(count(select[[1]])[[1]])),rep(parameter[2],length(count(select[[2]])[[1]])),rep(parameter[3],length(count(select[[3]])[[1]])))
n.parameter <- c(names(object[[1]]$coef()),names(object[[2]]$coef()),names(object[[3]]$coef()))
if('(Intercept)' %in% n.parameter) n.parameter<-n.parameter[-which(n.parameter == '(Intercept)')]
Risk.order <- data.frame(w.parameter, n.parameter, unlist(Risk.Var))
colnames(Risk.order) <- c('parameter', 'VarName', 'Risk')
Risk.order <- Risk.order[order(Risk.order$Risk),]
Risk.order$CumRisk <- cumsum(Risk.order$Risk)
perc <- ifelse(is.null(tau), 0.01, tau)
percRiskRed <- sum(Risk.order$Risk) * perc
if(method[1] == 'attributable'){RiskRedOver <- Risk.order[which(Risk.order$Risk > percRiskRed),]
}else{RiskRedOver <- Risk.order[which(Risk.order$CumRisk > percRiskRed),]}
if(is.na(parameter[2])) parameter[2]<-0
if(is.na(parameter[1])) parameter[1]<-0
if(is.na(parameter[3])) parameter[3]<-0
if(length(name.response) > 1) name.response <- paste("cbind(",paste(name.response, collapse = ","),")")
if(any(RiskRedOver$parameter == parameter[1])){
form1 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[1]], collapse= "+")))
}else{ form1 <- as.formula(paste(name.response, '~1'))}
if(any(RiskRedOver$parameter == parameter[2])){
form2 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[2]], collapse= "+")))
}else{ form2 <- as.formula(paste(name.response, '~1'))}
if(any(RiskRedOver$parameter == parameter[3])){
form3 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[3]], collapse= "+")))
}else{ form3 <- as.formula(paste(name.response, '~1'))}
formula <- list(form1,form2,form3)
names(formula)<- names(object)
dfbase <- environment(environment(environment(object[[1]][["fitted"]])[["RET"]][["baselearner"]][[1]][["model.frame"]])[["ret"]][["model.frame"]])[["df"]]
if(inherits(object,'nc_mboostLSS')){
if(inherits(object,"glmboostLSS")){
model_after = glmboostLSS(formula, data = data, families = fam, method = 'noncyclic', weights = model.weights(object), control = boost_control(mstop = mstop(object), nu = list(as.numeric(object[[1]]$control$nu),as.numeric(object[[2]]$control$nu),as.numeric(object[[3]]$control$nu))))
}else{
model_after = gamboostLSS(formula, data = data, families = fam, method = 'noncyclic', weights = model.weights(object), control = boost_control(mstop = mstop(object), nu = list(as.numeric(object[[1]]$control$nu),as.numeric(object[[2]]$control$nu),as.numeric(object[[3]]$control$nu))))}
}else{
if(inherits(object,"glmboostLSS")){
model_after = glmboostLSS(formula, data = data, families = fam, method = 'cyclic', weights = model.weights(object), control = boost_control(mstop = mstop(object), nu = list(as.numeric(object[[1]]$control$nu),as.numeric(object[[2]]$control$nu),as.numeric(object[[3]]$control$nu))))
}else{
model_after = gamboostLSS(formula, data = data, families = fam, method = 'cyclic', weights = model.weights(object), control = boost_control(mstop = mstop(object), nu = list(as.numeric(object[[1]]$control$nu),as.numeric(object[[2]]$control$nu),as.numeric(object[[3]]$control$nu))))
}
}
out <- model_after
deselect_para <- list(tau = perc, deselectmethod = method[1])
out <-append(x = out, values = deselect_para)
class(out) <- c(class(out))
return(out)
}
DeselectBoostLSS_4 <- function(object, data = NULL, fam, tau = NULL, method = c('attributable','cumulative')){
data = attr(object,'data')
mstop <- ifelse(any(class(object)%in%'nc_mboostLSS'), mstop(object)[1],sum(mstop(object)))
if (length(risk(object, merge = T)) > (mstop + 4)) stop("risk cannot contain more entries than mstop")
parameter = names(object)
which.response <- which(sapply(1:dim(data)[2], function(x){identical(as.numeric(data[,x]), as.numeric(object$mu$response))}))
name.response <- colnames(data)[which.response]
select <- selected(object, parameter = names(object))
select1 <- selected(object, parameter = names(object))
if(any(class(object) %in% 'glmboostLSS')){
select[[1]] <- select[[1]]-1
select[[2]] <- select[[2]]-1
select[[3]] <- select[[3]]-1
select[[4]] <- select[[4]]-1
select[[1]] <- select[[1]][select[[1]] !=0]
select[[2]] <- select[[2]][select[[2]] !=0]
select[[3]] <- select[[3]][select[[3]] !=0]
select[[4]] <- select[[4]][select[[4]] !=0]
RiskRed <- risk(object)
totalRiskRed <- lapply(1:length(parameter),function(i){RiskRed[[i]][1] - RiskRed[[i]][length(RiskRed[[i]])]})
diffRiskRed <- lapply(1:length(parameter),function(j){sapply(seq(1:(length(RiskRed[[j]])-1)),function(k){RiskRed[[j]][k]-RiskRed[[j]][k+1]})})
diffRiskRed[[1]] <- diffRiskRed[[1]][select1[[1]]-1 != 0]
diffRiskRed[[2]] <- diffRiskRed[[2]][select1[[2]]-1 != 0]
diffRiskRed[[3]] <- diffRiskRed[[3]][select1[[3]]-1 != 0]
diffRiskRed[[4]] <- diffRiskRed[[4]][select1[[4]]-1 != 0]
}else{
RiskRed <- risk(object)
totalRiskRed <- lapply(1:length(parameter),function(i){RiskRed[[i]][1] - RiskRed[[i]][length(RiskRed[[i]])]})
diffRiskRed <- lapply(1:length(parameter),function(j){sapply(seq(1:(length(RiskRed[[j]])-1)),function(k){RiskRed[[j]][k]-RiskRed[[j]][k+1]})})
}
Var = lapply(1:length(parameter), function(i){count(select[[i]])[2]})
# If a parameter is not selected
zero<-sapply(1:length(parameter),function(i){length(select[[i]])})
Risk.Var <- lapply(which(zero != 0),function(i){sapply(1:dim(Var[[i]])[1],function(j){sum(diffRiskRed[[i]][which(count(select[[i]])[[1]][j] == select[[i]])])})})
w.parameter <- c(rep(parameter[1],length(count(select[[1]])[[1]])),rep(parameter[2],length(count(select[[2]])[[1]])),rep(parameter[3],length(count(select[[3]])[[1]])),rep(parameter[4],length(count(select[[4]])[[1]])))
n.parameter <- c(names(object[[1]]$coef()),names(object[[2]]$coef()),names(object[[3]]$coef()),names(object[[4]]$coef()))
if('(Intercept)' %in% n.parameter) n.parameter<-n.parameter[-which(n.parameter == '(Intercept)')]
Risk.order <- data.frame(w.parameter, n.parameter, unlist(Risk.Var))
colnames(Risk.order) <- c('parameter', 'VarName', 'Risk')
Risk.order <- Risk.order[order(Risk.order$Risk),]
Risk.order$CumRisk <- cumsum(Risk.order$Risk)
perc <- ifelse(is.null(tau), 0.01, tau)
percRiskRed <- sum(Risk.order$Risk) * perc
if(method[1] == 'attributable'){RiskRedOver <- Risk.order[which(Risk.order$Risk > percRiskRed),]
}else{RiskRedOver <- Risk.order[which(Risk.order$CumRisk > percRiskRed),]}
if(is.na(parameter[2])) parameter[2]<-0
if(is.na(parameter[1])) parameter[1]<-0
if(is.na(parameter[3])) parameter[3]<-0
if(is.na(parameter[4])) parameter[4]<-0
if(any(RiskRedOver$parameter == parameter[1])){
form1 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[1]], collapse= "+")))
}else{ form1 <- as.formula(paste(name.response, '~1'))}
if(any(RiskRedOver$parameter == parameter[2])){
form2 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[2]], collapse= "+")))
}else{ form2 <- as.formula(paste(name.response, '~1'))}
if(any(RiskRedOver$parameter == parameter[3])){
form3 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[3]], collapse= "+")))
}else{ form3 <- as.formula(paste(name.response, '~1'))}
if(any(RiskRedOver$parameter == parameter[4])){
form4 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[4]], collapse= "+")))
}else{ form4 <- as.formula(paste(name.response, '~1'))}
formula <- list(form1,form2,form3,form4)
names(formula)<- names(object)
dfbase <- environment(environment(environment(object[[1]][["fitted"]])[["RET"]][["baselearner"]][[1]][["model.frame"]])[["ret"]][["model.frame"]])[["df"]]
if(inherits(object,"nc_mboostLSS")){
if(inherits(object,"glmboostLSS")){
model_after = glmboostLSS(formula, data = data, families = fam, method = 'noncyclic', weights = model.weights(object), control = boost_control(mstop = mstop(object), nu = list(as.numeric(object[[1]]$control$nu),as.numeric(object[[2]]$control$nu),as.numeric(object[[3]]$control$nu),as.numeric(object[[4]]$control$nu))))
}else{
model_after = gamboostLSS(formula, data = data, families = fam, method = 'noncyclic', weights = model.weights(object), control = boost_control(mstop = mstop(object), nu = list(as.numeric(object[[1]]$control$nu),as.numeric(object[[2]]$control$nu),as.numeric(object[[3]]$control$nu),as.numeric(object[[4]]$control$nu))))
}
}else{
if(inherits(object,"glmboostLSS")){
model_after = glmboostLSS(formula, data = data, families = fam, method = 'cyclic', weights = model.weights(object), control = boost_control(mstop = mstop(object), nu = list(as.numeric(object[[1]]$control$nu),as.numeric(object[[2]]$control$nu),as.numeric(object[[3]]$control$nu),as.numeric(object[[4]]$control$nu))))
}else{
model_after = gamboostLSS(formula, data = data, families = fam, method = 'cyclic', weights = model.weights(object), control = boost_control(mstop = mstop(object), nu = list(as.numeric(object[[1]]$control$nu),as.numeric(object[[2]]$control$nu),as.numeric(object[[3]]$control$nu),as.numeric(object[[4]]$control$nu))))
}
}
out <- model_after
deselect_para <- list(tau = perc, deselectmethod = method[1])
out <- append(x = out, values = deselect_para)
class(out) <- c(class(out))
return(out)
}
DeselectBoostLSS_5 <- function(object, data = NULL, fam, tau = NULL, method = c('attributable','cumulative')){
data = attr(object,'data')
mstop <- ifelse(inherits(object,'nc_mboostLSS'), mstop(object)[1],sum(mstop(object)))
if (length(risk(object, merge = T)) > (mstop + 5)) stop("risk cannot contain more entries than mstop")
parameter = names(object)
which.response <- colnames(object[[1]]$response) == colnames(data)
name.response <- colnames(data)[which.response]
select <- selected(object, parameter = names(object))
select1 <- selected(object, parameter = names(object))
RiskRed <- risk(object,merge = T)
totalRiskRed <- (risk(object, merge=T)[1]-risk(object, merge=T)[length(risk(object, merge=T))])
diffRiskRed_all <-sapply(1:(length(RiskRed)-1),function(k){RiskRed[k]-RiskRed[k+1]})
diffRiskRed <- vector("list")
diffRiskRed[[1]] <- diffRiskRed_all[names(diffRiskRed_all)==parameter[[1]]]
diffRiskRed[[2]] <- diffRiskRed_all[names(diffRiskRed_all)==parameter[[2]]]
diffRiskRed[[3]] <- diffRiskRed_all[names(diffRiskRed_all)==parameter[[3]]]
diffRiskRed[[4]] <- diffRiskRed_all[names(diffRiskRed_all)==parameter[[4]]]
diffRiskRed[[5]] <- diffRiskRed_all[names(diffRiskRed_all)==parameter[[5]]]
Var = lapply(1:length(parameter), function(i){count(select[[i]])[2]})
# If a parameter is not selected
# zero <- sapply(1:length(parameter),function(i){length(select[[i]])})
# Risk.Var <- lapply(which(zero != 0),function(i){sapply(1:dim(Var[[i]])[1],function(j){sum(diffRiskRed[[i]][which(count(select[[i]])[[1]][j] == select[[i]])])})})
Risk.Var <- lapply(1:length(parameter),function(i){sapply(1:dim(Var[[i]])[1],function(j){sum(diffRiskRed[[i]][which(count(select[[i]])[[1]][j] == select[[i]])])})})
# diffRiskRed[[i]]
# which(count(select[[i]])[[1]][j] == select[[i]])
w.parameter <- c(rep(parameter[1],length(count(select[[1]])[[1]])),rep(parameter[2],length(count(select[[2]])[[1]])),rep(parameter[3],length(count(select[[3]])[[1]])),rep(parameter[4],length(count(select[[4]])[[1]])),rep(parameter[5],length(count(select[[5]])[[1]])))
n.parameter <- c(names(object[[1]]$coef()),names(object[[2]]$coef()),names(object[[3]]$coef()),names(object[[4]]$coef()),names(object[[5]]$coef()))
if('(Intercept)' %in% n.parameter) {
w.parameter <- w.parameter[-which(n.parameter == '(Intercept)')]
Risk.Var <- unlist(Risk.Var)[-which(n.parameter == '(Intercept)')]
n.parameter<-n.parameter[-which(n.parameter == '(Intercept)')]
}
Risk.order <- data.frame(w.parameter, n.parameter, unlist(Risk.Var))
colnames(Risk.order) <- c('parameter', 'VarName', 'Risk')
Risk.order <- Risk.order[order(Risk.order$Risk),]
Risk.order$CumRisk <- cumsum(Risk.order$Risk)
perc <- ifelse(is.null(tau), 0.01, tau)
percRiskRed <- sum(Risk.order$Risk) * perc
if(method[1] == 'attributable'){RiskRedOver <- Risk.order[which(Risk.order$Risk > percRiskRed),]
}else{RiskRedOver <- Risk.order[which(Risk.order$CumRisk > percRiskRed),]}
if(is.na(parameter[2])) parameter[2]<-0
if(is.na(parameter[1])) parameter[1]<-0
if(is.na(parameter[3])) parameter[3]<-0
if(is.na(parameter[4])) parameter[4]<-0
if(is.na(parameter[5])) parameter[5]<-0
if(length(name.response) > 1) name.response <- paste("cbind(",paste(name.response, collapse = ","),")")
if(any(RiskRedOver$parameter == parameter[1])){
# if(length(name.response) >1) name.response <- paste("cbind(",paste(name.response, collapse = ","),")")
form1 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[1]], collapse= "+")))
}else{ form1 <- as.formula(paste(name.response, '~1'))}
if(any(RiskRedOver$parameter == parameter[2])){
form2 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[2]], collapse= "+")))
}else{ form2 <- as.formula(paste(name.response, '~1'))}
if(any(RiskRedOver$parameter == parameter[3])){
form3 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[3]], collapse= "+")))
}else{ form3 <- as.formula(paste(name.response, '~1'))}
if(any(RiskRedOver$parameter == parameter[4])){
form4 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[4]], collapse= "+")))
}else{ form4 <- as.formula(paste(name.response, '~1'))}
if(any(RiskRedOver$parameter == parameter[5])){
form5 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[5]], collapse= "+")))
}else{ form5 <- as.formula(paste(name.response, '~1'))}
formula <- list(form1,form2,form3,form4,form5)
names(formula)<- names(object)
dfbase <- environment(environment(environment(object[[1]][["fitted"]])[["RET"]][["baselearner"]][[1]][["model.frame"]])[["ret"]][["model.frame"]])[["df"]]
if(inherits(object,"nc_mboostLSS")){
if(inherits(object,"glmboostLSS")){
model_after = glmboostLSS(formula, data = data, families = fam, method = 'noncyclic', weights = model.weights(object), control = boost_control(mstop = mstop(object), nu = list(as.numeric(object[[1]]$control$nu),as.numeric(object[[2]]$control$nu),as.numeric(object[[3]]$control$nu),as.numeric(object[[4]]$control$nu),as.numeric(object[[5]]$control$nu))))
}else{
model_after = gamboostLSS(formula, data = data, families = fam, method = 'noncyclic', weights = model.weights(object), control = boost_control(mstop = mstop(object), nu = list(as.numeric(object[[1]]$control$nu),as.numeric(object[[2]]$control$nu),as.numeric(object[[3]]$control$nu),as.numeric(object[[4]]$control$nu),as.numeric(object[[5]]$control$nu))))
}
}else{
if(inherits(object,"glmboostLSS")){
model_after = glmboostLSS(formula, data = data, families = fam, method = 'cyclic', weights = model.weights(object), control = boost_control(mstop = mstop(object), nu = list(as.numeric(object[[1]]$control$nu),as.numeric(object[[2]]$control$nu),as.numeric(object[[3]]$control$nu),as.numeric(object[[4]]$control$nu),as.numeric(object[[5]]$control$nu))))
}else{
model_after = gamboostLSS(formula, data = data, families = fam, method = 'cyclic', weights = model.weights(object), control = boost_control(mstop = mstop(object), nu = list(as.numeric(object[[1]]$control$nu),as.numeric(object[[2]]$control$nu),as.numeric(object[[3]]$control$nu),as.numeric(object[[4]]$control$nu),as.numeric(object[[5]]$control$nu))))
}
}
out <- model_after
Coef <- coef(model_after)
deselect_para <- list(coef = Coef, tau = tau, deselectmethod = method[1])
out <- append(x = out, values = deselect_para)
class(out) <- c(class(out))
return(out)
}
DeselectBoostLSS_multi_5 <- function(object, data = NULL, fam, tau = NULL, method = c('attributable','cumulative')){
data = attr(object,'data')
mstop <- ifelse(inherits(object,'nc_mboostLSS'), mstop(object)[1],sum(mstop(object)))
if (length(risk(object, merge = T)) > (mstop + 5)) stop("risk cannot contain more entries than mstop")
parameter = names(object)
which.response <- colnames(object[[1]]$response) == colnames(data)
name.response <- colnames(data)[which.response]
select <- selected(object, parameter = names(object))
select1 <- selected(object, parameter = names(object))
RiskRed <- risk(object,merge = T)
totalRiskRed <- (risk(object, merge=T)[1]-risk(object, merge=T)[length(risk(object, merge=T))])
diffRiskRed_all <-sapply(1:(length(RiskRed)-1),function(k){RiskRed[k]-RiskRed[k+1]})
diffRiskRed <- vector("list")
diffRiskRed[[1]] <- diffRiskRed_all[names(diffRiskRed_all)==parameter[[1]]]
diffRiskRed[[2]] <- diffRiskRed_all[names(diffRiskRed_all)==parameter[[2]]]
diffRiskRed[[3]] <- diffRiskRed_all[names(diffRiskRed_all)==parameter[[3]]]
diffRiskRed[[4]] <- diffRiskRed_all[names(diffRiskRed_all)==parameter[[4]]]
diffRiskRed[[5]] <- diffRiskRed_all[names(diffRiskRed_all)==parameter[[5]]]
Var = lapply(1:length(parameter), function(i){count(select[[i]])[2]})
# If a parameter is not selected
# zero <- sapply(1:length(parameter),function(i){length(select[[i]])})
# Risk.Var <- lapply(which(zero != 0),function(i){sapply(1:dim(Var[[i]])[1],function(j){sum(diffRiskRed[[i]][which(count(select[[i]])[[1]][j] == select[[i]])])})})
Risk.Var <- lapply(1:length(parameter),function(i){sapply(1:dim(Var[[i]])[1],function(j){sum(diffRiskRed[[i]][which(count(select[[i]])[[1]][j] == select[[i]])])})})
# diffRiskRed[[i]]
# which(count(select[[i]])[[1]][j] == select[[i]])
w.parameter <- c(rep(parameter[1],length(count(select[[1]])[[1]])),rep(parameter[2],length(count(select[[2]])[[1]])),rep(parameter[3],length(count(select[[3]])[[1]])),rep(parameter[4],length(count(select[[4]])[[1]])),rep(parameter[5],length(count(select[[5]])[[1]])))
n.parameter <- c(names(object[[1]]$coef()),names(object[[2]]$coef()),names(object[[3]]$coef()),names(object[[4]]$coef()),names(object[[5]]$coef()))
if('(Intercept)' %in% n.parameter) {
w.parameter <- w.parameter[-which(n.parameter == '(Intercept)')]
Risk.Var <- unlist(Risk.Var)[-which(n.parameter == '(Intercept)')]
n.parameter<-n.parameter[-which(n.parameter == '(Intercept)')]
}
Risk.order <- data.frame(w.parameter[!is.na(unlist(Risk.Var))], n.parameter, unlist(Risk.Var)[!is.na(unlist(Risk.Var))])
colnames(Risk.order) <- c('parameter', 'VarName', 'Risk')
Risk.order <- Risk.order[order(Risk.order$Risk),]
Risk.order$CumRisk <- cumsum(Risk.order$Risk)
perc <- ifelse(is.null(tau), 0.01, tau)
percRiskRed <- (risk(object, merge=T)[1]-risk(object, merge=T)[length(risk(object, merge=T))]) * perc
if(method[1] == 'attributable'){RiskRedOver <- Risk.order[which(Risk.order$Risk > percRiskRed),]
}else{RiskRedOver <- Risk.order[which(Risk.order$CumRisk > percRiskRed),]}
if(is.na(parameter[2])) parameter[2]<-0
if(is.na(parameter[1])) parameter[1]<-0
if(is.na(parameter[3])) parameter[3]<-0
if(is.na(parameter[4])) parameter[4]<-0
if(is.na(parameter[5])) parameter[5]<-0
if(length(name.response) > 1) name.response <- paste("cbind(",paste(name.response, collapse = ","),")")
if(any(RiskRedOver$parameter == parameter[1])){
# if(length(name.response) >1) name.response <- paste("cbind(",paste(name.response, collapse = ","),")")
form1 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[1]], collapse= "+")))
}else{ form1 <- as.formula(paste(name.response, '~bols(Intercept, intercept=F)'))}
if(any(RiskRedOver$parameter == parameter[2])){
form2 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[2]], collapse= "+")))
}else{ form2 <- as.formula(paste(name.response, '~bols(Intercept, intercept=F)'))}
if(any(RiskRedOver$parameter == parameter[3])){
form3 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[3]], collapse= "+")))
}else{ form3 <- as.formula(paste(name.response, '~bols(Intercept, intercept=F)'))}
if(any(RiskRedOver$parameter == parameter[4])){
form4 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[4]], collapse= "+")))
}else{ form4 <- as.formula(paste(name.response, '~bols(Intercept, intercept=F)'))}
if(any(RiskRedOver$parameter == parameter[5])){
form5 <- as.formula(paste(name.response, " ~ ", paste(RiskRedOver$VarName[RiskRedOver$parameter == parameter[5]], collapse= "+")))
}else{ form5 <- as.formula(paste(name.response, '~bols(Intercept, intercept=F)'))}
formula <- list(form1,form2,form3,form4,form5)
names(formula)<- names(object)
dfbase <- environment(environment(environment(object[[1]][["fitted"]])[["RET"]][["baselearner"]][[1]][["model.frame"]])[["ret"]][["model.frame"]])[["df"]]
if(inherits(object,"nc_mboostLSS")){
if(inherits(object,"glmboostLSS")){
model_after = glmboostLSS(formula, data = data, families = fam, method = 'noncyclic', weights = model.weights(object), control = boost_control(mstop = mstop(object), nu = list(as.numeric(object[[1]]$control$nu),as.numeric(object[[2]]$control$nu),as.numeric(object[[3]]$control$nu),as.numeric(object[[4]]$control$nu),as.numeric(object[[5]]$control$nu))))
}else{
model_after = gamboostLSS(formula, data = data, families = fam, method = 'noncyclic', weights = model.weights(object), control = boost_control(mstop = mstop(object), nu = list(as.numeric(object[[1]]$control$nu),as.numeric(object[[2]]$control$nu),as.numeric(object[[3]]$control$nu),as.numeric(object[[4]]$control$nu),as.numeric(object[[5]]$control$nu))))
}
}else{
if(inherits(object,"glmboostLSS")){
model_after = glmboostLSS(formula, data = data, families = fam, method = 'cyclic', weights = model.weights(object), control = boost_control(mstop = mstop(object), nu = list(as.numeric(object[[1]]$control$nu),as.numeric(object[[2]]$control$nu),as.numeric(object[[3]]$control$nu),as.numeric(object[[4]]$control$nu),as.numeric(object[[5]]$control$nu))))
}else{
model_after = gamboostLSS(formula, data = data, families = fam, method = 'cyclic', weights = model.weights(object), control = boost_control(mstop = mstop(object), nu = list(as.numeric(object[[1]]$control$nu),as.numeric(object[[2]]$control$nu),as.numeric(object[[3]]$control$nu),as.numeric(object[[4]]$control$nu),as.numeric(object[[5]]$control$nu))))
}
}
out <- model_after
Coef <- coef(model_after)
deselect_para <- list(coef = Coef, tau = perc, deselectmethod = method[1])
out <- append(x = out, values = deselect_para)
class(out) <- c(class(out))
return(out)
}