# Read in glow_bonemed
# Explore dataset
library(aplore3)
bonemed <-glow_bonemed
library(aplore3)
#need to narrow down and understand each one abit more
?glow_bonemed
?glow500
#Look the data for hig level summaries
head(bonemed)
## sub_id site_id phy_id priorfrac age weight height bmi premeno momfrac
## 1 1 1 14 No 62 70.3 158 28.16055 No No
## 2 2 4 284 No 65 87.1 160 34.02344 No No
## 3 3 6 305 Yes 88 50.8 157 20.60936 No Yes
## 4 4 6 309 No 82 62.1 160 24.25781 No No
## 5 5 1 37 No 61 68.0 152 29.43213 No No
## 6 6 5 299 Yes 67 68.0 161 26.23356 No No
## armassist smoke raterisk fracscore fracture bonemed bonemed_fu bonetreat
## 1 No No Same 1 No No No No
## 2 No No Same 2 No No No No
## 3 Yes No Less 11 No No No No
## 4 No No Less 5 No No No No
## 5 No No Same 1 No No No No
## 6 No Yes Same 4 No No No No
summary(bonemed)
## sub_id site_id phy_id priorfrac age
## Min. : 1.0 Min. :1.000 Min. : 1.00 No :374 Min. :55.00
## 1st Qu.:125.8 1st Qu.:2.000 1st Qu.: 57.75 Yes:126 1st Qu.:61.00
## Median :250.5 Median :3.000 Median :182.50 Median :67.00
## Mean :250.5 Mean :3.436 Mean :178.55 Mean :68.56
## 3rd Qu.:375.2 3rd Qu.:5.000 3rd Qu.:298.00 3rd Qu.:76.00
## Max. :500.0 Max. :6.000 Max. :325.00 Max. :90.00
## weight height bmi premeno momfrac armassist
## Min. : 39.90 Min. :134.0 Min. :14.88 No :403 No :435 No :312
## 1st Qu.: 59.90 1st Qu.:157.0 1st Qu.:23.27 Yes: 97 Yes: 65 Yes:188
## Median : 68.00 Median :161.5 Median :26.42
## Mean : 71.82 Mean :161.4 Mean :27.55
## 3rd Qu.: 81.30 3rd Qu.:165.0 3rd Qu.:30.79
## Max. :127.00 Max. :199.0 Max. :49.08
## smoke raterisk fracscore fracture bonemed bonemed_fu
## No :465 Less :167 Min. : 0.000 No :375 No :371 No :361
## Yes: 35 Same :186 1st Qu.: 2.000 Yes:125 Yes:129 Yes:139
## Greater:147 Median : 3.000
## Mean : 3.698
## 3rd Qu.: 5.000
## Max. :11.000
## bonetreat
## No :382
## Yes:118
##
##
##
##
#Convert the variables to appropriate datatype, if applicable
bonemed$Fracture.Num<-ifelse(bonemed$fracture=="Yes",2,1)
#bonemed$Fracture.Num<-as.factor(ifelse(bonemed$fracture=="Yes",2,1))
bonemed$fracscoref <- as.factor(bonemed$fracscore)
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(ggplot2)
#BMI & Weight are highly correlated. Need to drop one, weight, which more correlated to height than BMI
ggpairs(bonemed,
aes(color=fracture),
columnLabels = c('Age','Weight','Height','BMI'),
columns = 5:8) +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
View(bonemed)
ggsave("output_plot_eda_project_matrix2.png", width = 8, height = 10)
## Bar Chart Risk Fracture Scores
ggplot(bonemed,aes(x=fracscoref,color=fracture)) +
geom_bar(aes(fill=fracture)) +
xlab("Fracture Risk Score") + ylab("Number of Fractures") +
ggtitle("Number of Fractures per Fracture Risk Score") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = c(0.9, 0.7))
# Plot age by factor variables
library(gridExtra)
A1<-ggplot(bonemed,aes(x=age,y=Fracture.Num,color=bonetreat)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("Any Fracture") + labs(color=NULL) +
ggtitle("Bone Meds:Enroll & F-Up") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
A2<-ggplot(bonemed,aes(x=age,y=Fracture.Num,color=bonemed)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("") + labs(color=NULL) +
ggtitle("Bone Meds:Enrollment") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
A3<-ggplot(bonemed,aes(x=age,y=Fracture.Num,color=bonemed_fu)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("") + labs(color=NULL) +
ggtitle("Bone Meds:Follow-Up") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
A4<-ggplot(bonemed,aes(x=age,y=Fracture.Num,color=priorfrac)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("Any Fracture") + labs(color=NULL) +
ggtitle("History of Prior Fracture") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
A5<-ggplot(bonemed,aes(x=age,y=Fracture.Num,color=momfrac)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("") + labs(color=NULL) +
ggtitle("Mother Had Hip Fracture") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
A6<-ggplot(bonemed,aes(x=age,y=Fracture.Num,color=armassist)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("") + labs(color=NULL) +
ggtitle("Arms Needed,Stand fm Chair") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
A7<-ggplot(bonemed,aes(x=age,y=Fracture.Num,color=smoke)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("Any Fracture") + labs(color=NULL) +
ggtitle("Former or Current Smoker") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
legend.position = c(1.2, 0.5),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
grid.arrange(A1,A2,A3,A4,A5,A6,A7,ncol=3,nrow=3, top="Age")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
# Plot bmi by factor variables
B1<-ggplot(bonemed,aes(x=bmi,y=Fracture.Num,color=bonetreat)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("Any Fracture") + labs(color=NULL) +
ggtitle("Bone Meds:Enroll & F-Up") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
B2<-ggplot(bonemed,aes(x=bmi,y=Fracture.Num,color=bonemed)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("") + labs(color=NULL) +
ggtitle("Bone Meds:Enrollment") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
B3<-ggplot(bonemed,aes(x=bmi,y=Fracture.Num,color=bonemed_fu)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("") + labs(color=NULL) +
ggtitle("Bone Meds:Follow-Up") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
B4<-ggplot(bonemed,aes(x=bmi,y=Fracture.Num,color=priorfrac)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("Any Fracture") + labs(color=NULL) +
ggtitle("History of Prior Fracture") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
B5<-ggplot(bonemed,aes(x=bmi,y=Fracture.Num,color=momfrac)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("") + labs(color=NULL) +
ggtitle("Mother Had Hip Fracture") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
B6<-ggplot(bonemed,aes(x=bmi,y=Fracture.Num,color=armassist)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("") + labs(color=NULL) +
ggtitle("Arms Needed,Stand fm Chair") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
B7<-ggplot(bonemed,aes(x=bmi,y=Fracture.Num,color=smoke)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("Any Fracture") + labs(color=NULL) +
ggtitle("Former or Current Smoker") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = c(1.2, 0.5),
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
grid.arrange(B1,B2,B3,B4,B5,B6,B7,ncol=3,nrow=3, top="BMI")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
# Plot weight by factor variables
C1<-ggplot(bonemed,aes(x=weight,y=Fracture.Num,color=bonetreat)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("Any Fracture") + labs(color=NULL) +
ggtitle("Bone Meds:Enroll & F-Up") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
C2<-ggplot(bonemed,aes(x=weight,y=Fracture.Num,color=bonemed)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("") + labs(color=NULL) +
ggtitle("Bone Meds:Enrollment") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
C3<-ggplot(bonemed,aes(x=weight,y=Fracture.Num,color=bonemed_fu)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("") + labs(color=NULL) +
ggtitle("Bone Meds:Follow-Up") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
C4<-ggplot(bonemed,aes(x=weight,y=Fracture.Num,color=priorfrac)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("Any Fracture") + labs(color=NULL) +
ggtitle("History of Prior Fracture") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
C5<-ggplot(bonemed,aes(x=weight,y=Fracture.Num,color=momfrac)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("") + labs(color=NULL) +
ggtitle("Mother Had Hip Fracture") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
C6<-ggplot(bonemed,aes(x=weight,y=Fracture.Num,color=armassist)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("") + labs(color=NULL) +
ggtitle("Arms Needed,Stand fm Chair") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
C7<-ggplot(bonemed,aes(x=weight,y=Fracture.Num,color=smoke)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("Any Fracture") + labs(color=NULL) +
ggtitle("Former or Current Smoker") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = c(1.2, 0.5),
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
grid.arrange(C1,C2,C3,C4,C5,C6,C7,ncol=3,nrow=3, top="Weight")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
# Plot height by factor variables
D1<-ggplot(bonemed,aes(x=height,y=Fracture.Num,color=bonetreat)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("Any Fracture") + labs(color=NULL) +
ggtitle("Bone Meds:Enroll & F-Up") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
D2<-ggplot(bonemed,aes(x=height,y=Fracture.Num,color=bonemed)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("") + labs(color=NULL) +
ggtitle("Bone Meds:Enrollment") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
D3<-ggplot(bonemed,aes(x=height,y=Fracture.Num,color=bonemed_fu)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("") + labs(color=NULL) +
ggtitle("Bone Meds:Follow-Up") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
D4<-ggplot(bonemed,aes(x=height,y=Fracture.Num,color=priorfrac)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("Any Fracture") + labs(color=NULL) +
ggtitle("History of Prior Fracture") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
D5<-ggplot(bonemed,aes(x=height,y=Fracture.Num,color=momfrac)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("") + labs(color=NULL) +
ggtitle("Mother Had Hip Fracture") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
D6<-ggplot(bonemed,aes(x=height,y=Fracture.Num,color=armassist)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("") + labs(color=NULL) +
ggtitle("Arms Needed,Stand fm Chair") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
legend.position = "none",
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
D7<-ggplot(bonemed,aes(x=height,y=Fracture.Num,color=smoke)) +
geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
xlab("") + ylab("Any Fracture") + labs(color=NULL) +
ggtitle("Former or Current Smoker") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue")) +
theme(plot.title = element_text(hjust = 0.1,size=10),
legend.position = c(1.2, 0.5),
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
grid.arrange(D1,D2,D3,D4,D5,D6,D7,ncol=3,nrow=3,top="Height")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
# Self reported risk of fracture
rr1<-ggplot(bonemed,aes(x=age,y=Fracture.Num,color=factor(raterisk))) +
geom_point() + geom_smooth(method="loess",size=1,span=1) +
facet_wrap(~factor(raterisk)) +
xlab("Age") + ylab("Risk of Fracture") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue", "darkgreen")) +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "none",
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
rr2<-ggplot(bonemed,aes(x=bmi,y=Fracture.Num,color=factor(raterisk))) +
geom_point() + geom_smooth(method="loess",size=1,span=1) +
facet_wrap(~factor(raterisk), ncol = 3) +
xlab("BMI") + ylab("Risk of Fracture") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue", "darkgreen")) +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "none",
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
rr3<-ggplot(bonemed,aes(x=weight,y=Fracture.Num,color=factor(raterisk))) +
geom_point() + geom_smooth(method="loess",size=1,span=1) +
facet_wrap(~factor(raterisk)) +
xlab("Weight") + ylab("Risk of Fracture") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue", "darkgreen")) +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "none",
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
rr4<-ggplot(bonemed,aes(x=height,y=Fracture.Num,color=factor(raterisk))) +
geom_point() + geom_smooth(method="loess",size=1,span=1) +
facet_wrap(~factor(raterisk)) +
xlab("Height") + ylab("Risk of Fracture") +
theme_classic() +
scale_color_manual(values=c("darkred", "darkblue", "darkgreen")) +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "none",
axis.text.x = element_text(angle=45, vjust=1, hjust=1),
panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'))
grid.arrange(rr1,rr2,rr3,rr4,ncol=2,nrow=2,top="Self Reported Risk of Fracture")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
#These variables shouldn't be included, identifiers, however need to verify
ggplot(bonemed,aes(x=site_id,y=Fracture.Num)) +
geom_point() + geom_smooth(method="loess",size=1,span=1) +
ggtitle("Site ID")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(bonemed,aes(x=phy_id,y=Fracture.Num)) +
geom_point() + geom_smooth(method="loess",size=1,span=1) +
ggtitle("Physician ID")
## `geom_smooth()` using formula = 'y ~ x'
ggpairs(bonemed,
aes(colour=fracture),
axisLabels = "none",
columns = 1:3)
# install.packages("lemon")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lemon)
# Numbers/Proportion of Fractures
# Character variables for review
E1c<-ggplot(bonemed,aes(x=priorfrac,colour=fracture),) +
geom_bar(aes(fill=fracture)) +
xlab("") + ylab("Number of Fractures") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = "none")
E1 <-bonemed %>%
group_by(priorfrac,fracture) %>%
summarise(cnt=n()) %>%
mutate(perc=round(cnt/sum(cnt),4))%>%
arrange(desc(perc))
## `summarise()` has grouped output by 'priorfrac'. You can override using the
## `.groups` argument.
E1p<-ggplot(E1[c(1,2),],aes(x=reorder(priorfrac,-perc),y=perc,colour=priorfrac))+
geom_bar(aes(fill=priorfrac),show.legend=T,stat="identity")+
ylab("Proportion of Having Fractures ") + xlab("") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = "none")
grid_arrange_shared_legend(E1c,E1p,ncol=2,nrow=1,top="Prior Fractures")
###
E2c<-ggplot(bonemed,aes(x=premeno,colour=fracture)) +
geom_bar(aes(fill=fracture)) +
xlab("") + ylab("Number of Fractures") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = "none")
E2 <-bonemed %>%
group_by(premeno,fracture) %>%
summarise(cnt=n()) %>%
mutate(perc=round(cnt/sum(cnt),4))%>%
arrange(desc(perc))
## `summarise()` has grouped output by 'premeno'. You can override using the
## `.groups` argument.
E2p<-ggplot(E2[c(1,2),],aes(x=reorder(premeno,-perc),y=perc,colour=premeno))+
geom_bar(aes(fill=premeno),show.legend=T,stat="identity")+
ylab("Proportion of Having Fractures ") + xlab("") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = "none")
grid_arrange_shared_legend(E2c,E2p,ncol=2,nrow=1,top="Menopause Before Age 45")
###
E3c<-ggplot(bonemed,aes(x=armassist,colour=fracture)) +
geom_bar(aes(fill=fracture)) +
xlab("") + ylab("Number of Fractures") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = "none")
E3 <-bonemed %>%
group_by(armassist,fracture) %>%
summarise(cnt=n()) %>%
mutate(perc=round(cnt/sum(cnt),4))%>%
arrange(desc(perc))
## `summarise()` has grouped output by 'armassist'. You can override using the
## `.groups` argument.
E3p<-ggplot(E3[c(1,2),],aes(x=reorder(armassist,-perc),y=perc,colour=armassist))+
geom_bar(aes(fill=armassist),show.legend=T,stat="identity")+
ylab("Proportion of Having Fractures ") + xlab("") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = "none")
grid_arrange_shared_legend(E3c,E3p,ncol=2,nrow=1,top="Arms Needed to Stand from Chair")
###
E4c<-ggplot(bonemed,aes(x=smoke,colour=fracture)) +
geom_bar(aes(fill=fracture)) +
xlab("") + ylab("Number of Fractures") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = "none")
E4 <-bonemed %>%
group_by(smoke,fracture) %>%
summarise(cnt=n()) %>%
mutate(perc=round(cnt/sum(cnt),4))%>%
arrange(desc(perc))
## `summarise()` has grouped output by 'smoke'. You can override using the
## `.groups` argument.
E4p<-ggplot(E4[c(1,2),],aes(x=reorder(smoke,-perc),y=perc,colour=smoke))+
geom_bar(aes(fill=smoke),show.legend=T,stat="identity")+
ylab("Proportion of Having Fractures ") + xlab("") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = "none")
grid_arrange_shared_legend(E4c,E4p,ncol=2,nrow=1,top="Former or Current Smoker")
###
E6c<-ggplot(bonemed,aes(x=momfrac,colour=fracture)) +
geom_bar(aes(fill=fracture)) +
xlab("") + ylab("Number of Fractures") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = "none")
E6 <-bonemed %>%
group_by(momfrac,fracture) %>%
summarise(cnt=n()) %>%
mutate(perc=round(cnt/sum(cnt),4))%>%
arrange(desc(perc))
## `summarise()` has grouped output by 'momfrac'. You can override using the
## `.groups` argument.
E6p<-ggplot(E6[c(1,2),],aes(x=reorder(momfrac,-perc),y=perc,colour=momfrac))+
geom_bar(aes(fill=momfrac),show.legend=T,stat="identity")+
ylab("Proportion of Having Fractures ") + xlab("") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = "none")
grid_arrange_shared_legend(E6c,E6p,ncol=2,nrow=1,top="Mother had Hip Fracture")
# install.packages("lemon")
library(lemon)
# Numbers/Proportion of Fractures
# Character variables for review
E1c<-ggplot(bonemed,aes(x=priorfrac,colour=fracture),) +
geom_bar(aes(fill=fracture)) +
xlab("") + ylab("Number of Fractures") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = "none")
E1 <-bonemed %>%
group_by(priorfrac,fracture) %>%
summarise(cnt=n()) %>%
mutate(perc=round(cnt/sum(cnt),4))%>%
arrange(desc(perc))
## `summarise()` has grouped output by 'priorfrac'. You can override using the
## `.groups` argument.
E1p<-ggplot(E1[c(3,4),],aes(x=reorder(priorfrac,-perc),y=perc,colour=priorfrac))+
geom_bar(aes(fill=priorfrac),show.legend=T,stat="identity")+
ylab("Proportion of Having Fractures ") + xlab("") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = "none")
grid_arrange_shared_legend(E1c,E1p,ncol=2,nrow=1,top="Prior Fractures")
###
E2c<-ggplot(bonemed,aes(x=premeno,colour=fracture)) +
geom_bar(aes(fill=fracture)) +
xlab("") + ylab("Number of Fractures") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = "none")
E2 <-bonemed %>%
group_by(premeno,fracture) %>%
summarise(cnt=n()) %>%
mutate(perc=round(cnt/sum(cnt),4))%>%
arrange(desc(perc))
## `summarise()` has grouped output by 'premeno'. You can override using the
## `.groups` argument.
E2p<-ggplot(E2[c(3,4),],aes(x=reorder(premeno,-perc),y=perc,colour=premeno))+
geom_bar(aes(fill=premeno),show.legend=T,stat="identity")+
ylab("Proportion of Having Fractures ") + xlab("") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = "none")
grid_arrange_shared_legend(E2c,E2p,ncol=2,nrow=1,top="Menopause Before Age 45")
###
E3c<-ggplot(bonemed,aes(x=armassist,colour=fracture)) +
geom_bar(aes(fill=fracture)) +
xlab("") + ylab("Number of Fractures") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = "none")
E3 <-bonemed %>%
group_by(armassist,fracture) %>%
summarise(cnt=n()) %>%
mutate(perc=round(cnt/sum(cnt),4))%>%
arrange(desc(perc))
## `summarise()` has grouped output by 'armassist'. You can override using the
## `.groups` argument.
E3p<-ggplot(E3[c(3,4),],aes(x=reorder(armassist,-perc),y=perc,colour=armassist))+
geom_bar(aes(fill=armassist),show.legend=T,stat="identity")+
ylab("Proportion of Having Fractures ") + xlab("") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = "none")
grid_arrange_shared_legend(E3c,E3p,ncol=2,nrow=1,top="Arms Needed to Stand from Chair")
###
E4c<-ggplot(bonemed,aes(x=smoke,colour=fracture)) +
geom_bar(aes(fill=fracture)) +
xlab("") + ylab("Number of Fractures") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = "none")
E4 <-bonemed %>%
group_by(smoke,fracture) %>%
summarise(cnt=n()) %>%
mutate(perc=round(cnt/sum(cnt),4))%>%
arrange(desc(perc))
## `summarise()` has grouped output by 'smoke'. You can override using the
## `.groups` argument.
E4p<-ggplot(E4[c(3,4),],aes(x=reorder(smoke,-perc),y=perc,colour=smoke))+
geom_bar(aes(fill=smoke),show.legend=T,stat="identity")+
ylab("Proportion of Having Fractures ") + xlab("") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = "none")
grid_arrange_shared_legend(E4c,E4p,ncol=2,nrow=1,top="Former or Current Smoker")
###
E6c<-ggplot(bonemed,aes(x=momfrac,colour=fracture)) +
geom_bar(aes(fill=fracture)) +
xlab("") + ylab("Number of Fractures") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = "none")
E6 <-bonemed %>%
group_by(momfrac,fracture) %>%
summarise(cnt=n()) %>%
mutate(perc=round(cnt/sum(cnt),4))%>%
arrange(desc(perc))
## `summarise()` has grouped output by 'momfrac'. You can override using the
## `.groups` argument.
E6p<-ggplot(E6[c(3,4),],aes(x=reorder(momfrac,-perc),y=perc,colour=momfrac))+
geom_bar(aes(fill=momfrac),show.legend=T,stat="identity")+
ylab("Proportion of Having Fractures ") + xlab("") +
scale_fill_manual(values=c('darkred','darkblue')) +
scale_colour_manual(values=c('darkred','darkblue')) +
theme_classic() +
theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
strip.background = element_rect(fill = 'grey95', color = 'black'),
legend.position = "none")
grid_arrange_shared_legend(E6c,E6p,ncol=2,nrow=1,top="Mother had Hip Fracture")
#Split our Dataset
# install.packages("pROC")
library(caret)
## Loading required package: lattice
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
set.seed(1234)
trainIndex<-createDataPartition(bonemed$fracture,p=.70,list=F)
train <- bonemed[trainIndex,]
test <- bonemed[-trainIndex,]
#Model 1 simple additive model- no complexity added: simpleFit versus simpleFit2 (our model dropping bmi, fracscore, bonetreat) on training dataset
simpleFit <- glm(fracture~priorfrac + age + weight + height + bmi + premeno + momfrac + armassist + smoke + raterisk + fracscore+ bonemed + bonemed_fu + bonetreat, data=train,family="binomial")
simpleFit2 <- glm(fracture~priorfrac + age + weight + height + premeno + momfrac + armassist + smoke + raterisk + bonemed + bonemed_fu, data=train,family="binomial")
summary(simpleFit)
##
## Call:
## glm(formula = fracture ~ priorfrac + age + weight + height +
## bmi + premeno + momfrac + armassist + smoke + raterisk +
## fracscore + bonemed + bonemed_fu + bonetreat, family = "binomial",
## data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.13403 -0.72278 -0.52107 0.04504 2.23762
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -12.882227 15.628326 -0.824 0.40978
## priorfracYes 0.693187 0.464558 1.492 0.13566
## age 0.037700 0.066489 0.567 0.57071
## weight -0.092030 0.105323 -0.874 0.38224
## height 0.048330 0.098960 0.488 0.62529
## bmi 0.258150 0.271151 0.952 0.34107
## premenoYes 0.339882 0.358045 0.949 0.34248
## momfracYes 1.092681 0.518016 2.109 0.03491 *
## armassistYes 0.318889 0.728692 0.438 0.66166
## smokeYes -0.174829 0.676671 -0.258 0.79612
## rateriskSame 0.161346 0.347174 0.465 0.64212
## rateriskGreater 0.155500 0.387607 0.401 0.68829
## fracscore -0.006928 0.333286 -0.021 0.98342
## bonemedYes 2.530725 1.160812 2.180 0.02925 *
## bonemed_fuYes 1.559774 0.563121 2.770 0.00561 **
## bonetreatYes -3.851705 1.287125 -2.992 0.00277 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 395.31 on 350 degrees of freedom
## Residual deviance: 340.16 on 335 degrees of freedom
## AIC: 372.16
##
## Number of Fisher Scoring iterations: 4
summary(simpleFit2)
##
## Call:
## glm(formula = fracture ~ priorfrac + age + weight + height +
## premeno + momfrac + armassist + smoke + raterisk + bonemed +
## bonemed_fu, family = "binomial", data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8304 -0.7594 -0.5449 0.1339 2.2374
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.530517 3.786358 0.404 0.68605
## priorfracYes 0.777576 0.298661 2.604 0.00923 **
## age 0.041686 0.017522 2.379 0.01736 *
## weight 0.008035 0.010098 0.796 0.42620
## height -0.042567 0.022434 -1.897 0.05777 .
## premenoYes 0.300518 0.350431 0.858 0.39113
## momfracYes 1.067368 0.380868 2.802 0.00507 **
## armassistYes 0.285040 0.314312 0.907 0.36448
## smokeYes -0.169793 0.559139 -0.304 0.76138
## rateriskSame 0.104113 0.337918 0.308 0.75800
## rateriskGreater 0.155840 0.378042 0.412 0.68017
## bonemedYes -0.291392 0.504787 -0.577 0.56377
## bonemed_fuYes 0.600553 0.495126 1.213 0.22516
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 395.31 on 350 degrees of freedom
## Residual deviance: 353.20 on 338 degrees of freedom
## AIC: 379.2
##
## Number of Fisher Scoring iterations: 4
pred <- predict(simpleFit,test,type = "response")
simpleROC <- roc(response = test$fracture, predictor = pred, levels = c("No","Yes"))
## Setting direction: controls < cases
pred2 <- predict(simpleFit2,test,type = "response")
simpleROC2 <- roc(response = test$fracture, predictor = pred2, levels = c("No","Yes"))
## Setting direction: controls < cases
plot(simpleROC)#,print.thres = "best")
plot(simpleROC2,print.thres = "best",col = "red", add = T)
legend("bottomright",
legend=c("simpleFit", "simpleFit2"),
col=c("black", "red"),
lwd=4, cex =1, xpd = TRUE, horiz = FALSE)
threshPred <- ifelse(pred > 0.199, "Yes", "No")
confusionMatrix(table(threshPred,test$fracture))
## Confusion Matrix and Statistics
##
##
## threshPred No Yes
## No 65 7
## Yes 47 30
##
## Accuracy : 0.6376
## 95% CI : (0.5549, 0.7147)
## No Information Rate : 0.7517
## P-Value [Acc > NIR] : 0.9993
##
## Kappa : 0.2872
##
## Mcnemar's Test P-Value : 1.113e-07
##
## Sensitivity : 0.5804
## Specificity : 0.8108
## Pos Pred Value : 0.9028
## Neg Pred Value : 0.3896
## Prevalence : 0.7517
## Detection Rate : 0.4362
## Detection Prevalence : 0.4832
## Balanced Accuracy : 0.6956
##
## 'Positive' Class : No
##
exp(coef(simpleFit))
## (Intercept) priorfracYes age weight height
## 2.542845e-06 2.000080e+00 1.038419e+00 9.120777e-01 1.049517e+00
## bmi premenoYes momfracYes armassistYes smokeYes
## 1.294533e+00 1.404781e+00 2.982259e+00 1.375598e+00 8.396008e-01
## rateriskSame rateriskGreater fracscore bonemedYes bonemed_fuYes
## 1.175091e+00 1.168242e+00 9.930958e-01 1.256261e+01 4.757748e+00
## bonetreatYes
## 2.124348e-02
exp(coef(simpleFit2))
## (Intercept) priorfracYes age weight height
## 4.6205662 2.1761905 1.0425673 1.0080673 0.9583258
## premenoYes momfracYes armassistYes smokeYes rateriskSame
## 1.3505582 2.9077177 1.3298156 0.8438397 1.1097260
## rateriskGreater bonemedYes bonemed_fuYes
## 1.1686387 0.7472229 1.8231259
#unsure of interpretation
vif(simpleFit)
## GVIF Df GVIF^(1/(2*Df))
## priorfrac 2.636703 1 1.623793
## age 19.803720 1 4.450137
## weight 161.879275 1 12.723179
## height 22.109206 1 4.702043
## bmi 152.626428 1 12.354207
## premeno 1.141684 1 1.068496
## momfrac 2.011768 1 1.418368
## armassist 7.218523 1 2.686731
## smoke 1.486298 1 1.219138
## raterisk 1.370565 2 1.081994
## fracscore 36.742820 1 6.061586
## bonemed 15.523977 1 3.940048
## bonemed_fu 3.992355 1 1.998088
## bonetreat 18.535272 1 4.305261
#square the third column to get the VIF for interpretation
vif_values <- vif(simpleFit)[, 3]
# Square each value in the column
squared_vif <- vif_values^2
# Print or use the squared VIF values as needed
print(squared_vif)
## priorfrac age weight height bmi premeno momfrac
## 2.636703 19.803720 161.879275 22.109206 152.626428 1.141684 2.011768
## armassist smoke raterisk fracscore bonemed bonemed_fu bonetreat
## 7.218523 1.486298 1.170711 36.742820 15.523977 3.992355 18.535272
vif(simpleFit2)
## GVIF Df GVIF^(1/(2*Df))
## priorfrac 1.161462 1 1.077711
## age 1.462038 1 1.209148
## weight 1.615203 1 1.270906
## height 1.199241 1 1.095099
## premeno 1.122091 1 1.059288
## momfrac 1.108781 1 1.052987
## armassist 1.413060 1 1.188722
## smoke 1.086412 1 1.042311
## raterisk 1.368492 2 1.081585
## bonemed 3.134160 1 1.770356
## bonemed_fu 3.239427 1 1.799841
#square the third column to get the VIF for interpretation
vif_values_2 <- vif(simpleFit2)[, 3]
# Square each value in the column
squared_vif_2 <- vif_values_2^2
# Print or use the squared VIF values as needed
print(squared_vif_2)
## priorfrac age weight height premeno momfrac armassist
## 1.161462 1.462038 1.615203 1.199241 1.122091 1.108781 1.413060
## smoke raterisk bonemed bonemed_fu
## 1.086412 1.169825 3.134160 3.239427
exp(confint(simpleFit))
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 6.681249e-20 3.927494e+07
## priorfracYes 8.086189e-01 5.025006e+00
## age 9.119311e-01 1.184459e+00
## weight 7.366907e-01 1.117140e+00
## height 8.652955e-01 1.278407e+00
## bmi 7.667621e-01 2.241066e+00
## premenoYes 6.845111e-01 2.805011e+00
## momfracYes 1.087559e+00 8.354658e+00
## armassistYes 3.302160e-01 5.807852e+00
## smokeYes 2.062785e-01 3.031842e+00
## rateriskSame 5.957434e-01 2.336448e+00
## rateriskGreater 5.447745e-01 2.504448e+00
## fracscore 5.142176e-01 1.907485e+00
## bonemedYes 1.719265e+00 2.578341e+02
## bonemed_fuYes 1.593514e+00 1.488796e+01
## bonetreatYes 8.934937e-04 2.074946e-01
exp(confint(simpleFit2))
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 0.002778797 8275.372400
## priorfracYes 1.208986755 3.910684
## age 1.007538173 1.079431
## weight 0.988123408 1.028176
## height 0.916399839 1.001038
## premenoYes 0.668192139 2.657400
## momfracYes 1.369277740 6.139585
## armassistYes 0.715509295 2.460909
## smokeYes 0.254954482 2.371788
## rateriskSame 0.572380717 2.163469
## rateriskGreater 0.555804952 2.459731
## bonemedYes 0.274849345 2.015713
## bonemed_fuYes 0.684619458 4.839544
library(ResourceSelection)
## ResourceSelection 0.3-6 2023-06-27
hoslem.test(simpleFit$y,fitted(simpleFit))
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: simpleFit$y, fitted(simpleFit)
## X-squared = 13.059, df = 8, p-value = 0.1098
library(ResourceSelection)
hoslem.test(simpleFit2$y,fitted(simpleFit2))
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: simpleFit2$y, fitted(simpleFit2)
## X-squared = 5.4021, df = 8, p-value = 0.7139
simpleROC$auc
## Area under the curve: 0.7107
simpleROC2$auc
## Area under the curve: 0.7362
library(sjPlot)
library(sjmisc)
## Learn more about sjmisc with 'browseVignettes("sjmisc")'.
#effects plot
plot_model(simpleFit2,type="pred",terms=c("age"))
## Data were 'prettified'. Consider using `terms="age [all]"` to get smooth
## plots.
plot_model(simpleFit2,type="pred",terms=c("priorfrac"))
plot_model(simpleFit2,type="pred",terms=c("weight"))
## Data were 'prettified'. Consider using `terms="weight [all]"` to get
## smooth plots.
plot_model(simpleFit2,type="pred",terms=c("height"))
## Data were 'prettified'. Consider using `terms="height [all]"` to get
## smooth plots.
# plot_model(simpleFit2,type="pred",terms=c("bmi"))
plot_model(simpleFit2,type="pred",terms=c("premeno"))
plot_model(simpleFit2,type="pred",terms=c("momfrac"))
plot_model(simpleFit2,type="pred",terms=c("armassist"))
plot_model(simpleFit2,type="pred",terms=c("smoke"))
# plot_model(simpleFit2,type="pred",terms=c("raterisk"))
# plot_model(simpleFit2,type="pred",terms=c("fracscore"))
plot_model(simpleFit2,type="pred",terms=c("bonemed"))
plot_model(simpleFit2,type="pred",terms=c("bonemed_fu"))
# plot_model(simpleFit2,type="pred",terms=c("bonetreat"))
#Stepwise feature selection on simpleFit2.
# Using simpleFit we try stepwise feature selection on the full model
# GLMNET and Stepwise feature selection tools exist in caret.
# These can be used to handle situations where the number of predictors is too large to handle manually.
# Like with MLR, you can use these tools to determine candidate models, but it is up to you to decide if interactions should be included, transformations applied, etc.
library(caret)
fitControl<-trainControl(method="repeatedcv",number=5,repeats=1,classProbs=TRUE, summaryFunction=mnLogLoss)
set.seed(1234)
#note CV and error metric are not really used here, but logLoss is reported for the final model.
Step.fit<-train(fracture~priorfrac + age + weight + height + premeno + momfrac + armassist + smoke + raterisk + bonemed + bonemed_fu,
data=train,
method="glmStepAIC",
trControl=fitControl,
metric="logLoss")
## Start: AIC=311.84
## .outcome ~ priorfracYes + age + weight + height + premenoYes +
## momfracYes + armassistYes + smokeYes + rateriskSame + rateriskGreater +
## bonemedYes + bonemed_fuYes
##
## Df Deviance AIC
## - premenoYes 1 285.86 309.86
## - rateriskSame 1 285.99 309.99
## - smokeYes 1 286.00 310.00
## - rateriskGreater 1 286.10 310.10
## - weight 1 286.33 310.33
## - armassistYes 1 286.81 310.81
## - bonemedYes 1 286.85 310.85
## - age 1 287.57 311.57
## - bonemed_fuYes 1 287.84 311.84
## <none> 285.84 311.84
## - height 1 289.03 313.03
## - priorfracYes 1 290.55 314.55
## - momfracYes 1 292.15 316.15
##
## Step: AIC=309.86
## .outcome ~ priorfracYes + age + weight + height + momfracYes +
## armassistYes + smokeYes + rateriskSame + rateriskGreater +
## bonemedYes + bonemed_fuYes
##
## Df Deviance AIC
## - smokeYes 1 286.01 308.01
## - rateriskSame 1 286.02 308.02
## - rateriskGreater 1 286.14 308.14
## - weight 1 286.35 308.35
## - armassistYes 1 286.87 308.87
## - bonemedYes 1 286.88 308.88
## - age 1 287.57 309.57
## - bonemed_fuYes 1 287.84 309.84
## <none> 285.86 309.86
## - height 1 289.11 311.11
## - priorfracYes 1 290.61 312.61
## - momfracYes 1 292.18 314.18
##
## Step: AIC=308.01
## .outcome ~ priorfracYes + age + weight + height + momfracYes +
## armassistYes + rateriskSame + rateriskGreater + bonemedYes +
## bonemed_fuYes
##
## Df Deviance AIC
## - rateriskSame 1 286.16 306.16
## - rateriskGreater 1 286.28 306.28
## - weight 1 286.57 306.57
## - armassistYes 1 286.94 306.94
## - bonemedYes 1 287.07 307.07
## <none> 286.01 308.01
## - age 1 288.09 308.09
## - bonemed_fuYes 1 288.21 308.21
## - height 1 289.23 309.23
## - priorfracYes 1 290.61 310.61
## - momfracYes 1 292.57 312.57
##
## Step: AIC=306.16
## .outcome ~ priorfracYes + age + weight + height + momfracYes +
## armassistYes + rateriskGreater + bonemedYes + bonemed_fuYes
##
## Df Deviance AIC
## - rateriskGreater 1 286.29 304.29
## - weight 1 286.76 304.76
## - bonemedYes 1 287.15 305.15
## - armassistYes 1 287.19 305.19
## - age 1 288.15 306.15
## <none> 286.16 306.16
## - bonemed_fuYes 1 288.37 306.37
## - height 1 289.58 307.58
## - priorfracYes 1 290.76 308.76
## - momfracYes 1 293.00 311.00
##
## Step: AIC=304.29
## .outcome ~ priorfracYes + age + weight + height + momfracYes +
## armassistYes + bonemedYes + bonemed_fuYes
##
## Df Deviance AIC
## - weight 1 286.79 302.79
## - bonemedYes 1 287.39 303.39
## - armassistYes 1 287.48 303.48
## - age 1 288.17 304.17
## <none> 286.29 304.29
## - bonemed_fuYes 1 289.07 305.07
## - height 1 289.64 305.64
## - priorfracYes 1 291.36 307.36
## - momfracYes 1 293.40 309.40
##
## Step: AIC=302.79
## .outcome ~ priorfracYes + age + height + momfracYes + armassistYes +
## bonemedYes + bonemed_fuYes
##
## Df Deviance AIC
## - bonemedYes 1 288.26 302.26
## - age 1 288.27 302.27
## - armassistYes 1 288.71 302.71
## <none> 286.79 302.79
## - bonemed_fuYes 1 289.62 303.62
## - height 1 289.68 303.68
## - priorfracYes 1 292.03 306.03
## - momfracYes 1 293.58 307.58
##
## Step: AIC=302.26
## .outcome ~ priorfracYes + age + height + momfracYes + armassistYes +
## bonemed_fuYes
##
## Df Deviance AIC
## - age 1 289.44 301.44
## - bonemed_fuYes 1 289.64 301.64
## <none> 288.26 302.26
## - armassistYes 1 290.38 302.38
## - height 1 291.00 303.00
## - priorfracYes 1 293.38 305.38
## - momfracYes 1 295.48 307.48
##
## Step: AIC=301.44
## .outcome ~ priorfracYes + height + momfracYes + armassistYes +
## bonemed_fuYes
##
## Df Deviance AIC
## - bonemed_fuYes 1 291.02 301.02
## <none> 289.44 301.44
## - height 1 292.74 302.74
## - armassistYes 1 292.95 302.95
## - priorfracYes 1 295.87 305.87
## - momfracYes 1 296.70 306.70
##
## Step: AIC=301.02
## .outcome ~ priorfracYes + height + momfracYes + armassistYes
##
## Df Deviance AIC
## <none> 291.02 301.02
## - armassistYes 1 294.61 302.61
## - height 1 295.26 303.26
## - priorfracYes 1 298.64 306.64
## - momfracYes 1 298.90 306.90
## Start: AIC=301.65
## .outcome ~ priorfracYes + age + weight + height + premenoYes +
## momfracYes + armassistYes + smokeYes + rateriskSame + rateriskGreater +
## bonemedYes + bonemed_fuYes
##
## Df Deviance AIC
## - rateriskGreater 1 275.65 299.65
## - rateriskSame 1 275.73 299.73
## - bonemedYes 1 275.75 299.75
## - smokeYes 1 275.86 299.86
## - armassistYes 1 275.97 299.97
## - premenoYes 1 276.30 300.30
## - weight 1 277.00 301.00
## - bonemed_fuYes 1 277.47 301.47
## <none> 275.65 301.65
## - momfracYes 1 278.91 302.91
## - age 1 280.76 304.76
## - height 1 281.53 305.53
## - priorfracYes 1 282.89 306.89
##
## Step: AIC=299.65
## .outcome ~ priorfracYes + age + weight + height + premenoYes +
## momfracYes + armassistYes + smokeYes + rateriskSame + bonemedYes +
## bonemed_fuYes
##
## Df Deviance AIC
## - rateriskSame 1 275.75 297.75
## - bonemedYes 1 275.75 297.75
## - smokeYes 1 275.86 297.86
## - armassistYes 1 276.00 298.00
## - premenoYes 1 276.31 298.31
## - weight 1 277.03 299.03
## <none> 275.65 299.65
## - bonemed_fuYes 1 277.65 299.65
## - momfracYes 1 278.94 300.94
## - age 1 280.84 302.84
## - height 1 281.54 303.54
## - priorfracYes 1 283.05 305.05
##
## Step: AIC=297.75
## .outcome ~ priorfracYes + age + weight + height + premenoYes +
## momfracYes + armassistYes + smokeYes + bonemedYes + bonemed_fuYes
##
## Df Deviance AIC
## - bonemedYes 1 275.81 295.81
## - smokeYes 1 275.95 295.95
## - armassistYes 1 276.08 296.08
## - premenoYes 1 276.45 296.45
## - weight 1 277.26 297.26
## - bonemed_fuYes 1 277.66 297.66
## <none> 275.75 297.75
## - momfracYes 1 279.30 299.30
## - age 1 280.95 300.95
## - height 1 281.89 301.89
## - priorfracYes 1 283.08 303.08
##
## Step: AIC=295.82
## .outcome ~ priorfracYes + age + weight + height + premenoYes +
## momfracYes + armassistYes + smokeYes + bonemed_fuYes
##
## Df Deviance AIC
## - smokeYes 1 276.01 294.01
## - armassistYes 1 276.15 294.15
## - premenoYes 1 276.54 294.54
## - weight 1 277.49 295.49
## <none> 275.81 295.81
## - bonemed_fuYes 1 279.05 297.05
## - momfracYes 1 279.50 297.50
## - age 1 280.95 298.95
## - height 1 282.01 300.01
## - priorfracYes 1 283.10 301.10
##
## Step: AIC=294.01
## .outcome ~ priorfracYes + age + weight + height + premenoYes +
## momfracYes + armassistYes + bonemed_fuYes
##
## Df Deviance AIC
## - armassistYes 1 276.45 292.45
## - premenoYes 1 276.77 292.77
## - weight 1 277.56 293.56
## <none> 276.01 294.01
## - bonemed_fuYes 1 279.08 295.08
## - momfracYes 1 279.58 295.58
## - age 1 280.95 296.95
## - height 1 282.07 298.07
## - priorfracYes 1 283.53 299.53
##
## Step: AIC=292.45
## .outcome ~ priorfracYes + age + weight + height + premenoYes +
## momfracYes + bonemed_fuYes
##
## Df Deviance AIC
## - premenoYes 1 277.45 291.45
## <none> 276.45 292.45
## - weight 1 279.08 293.08
## - bonemed_fuYes 1 279.65 293.65
## - momfracYes 1 279.95 293.95
## - height 1 282.72 296.72
## - age 1 283.57 297.57
## - priorfracYes 1 284.55 298.55
##
## Step: AIC=291.45
## .outcome ~ priorfracYes + age + weight + height + momfracYes +
## bonemed_fuYes
##
## Df Deviance AIC
## <none> 277.45 291.45
## - weight 1 280.17 292.17
## - bonemed_fuYes 1 280.21 292.21
## - momfracYes 1 280.93 292.93
## - age 1 283.82 295.82
## - height 1 284.11 296.11
## - priorfracYes 1 285.63 297.63
## Start: AIC=303.23
## .outcome ~ priorfracYes + age + weight + height + premenoYes +
## momfracYes + armassistYes + smokeYes + rateriskSame + rateriskGreater +
## bonemedYes + bonemed_fuYes
##
## Df Deviance AIC
## - weight 1 277.24 301.24
## - smokeYes 1 277.26 301.26
## - bonemedYes 1 277.55 301.55
## - height 1 277.74 301.74
## - rateriskSame 1 278.00 302.00
## - bonemed_fuYes 1 278.58 302.58
## - rateriskGreater 1 279.13 303.13
## <none> 277.23 303.23
## - armassistYes 1 279.56 303.56
## - age 1 279.72 303.72
## - premenoYes 1 280.77 304.77
## - momfracYes 1 281.68 305.68
## - priorfracYes 1 283.18 307.18
##
## Step: AIC=301.24
## .outcome ~ priorfracYes + age + height + premenoYes + momfracYes +
## armassistYes + smokeYes + rateriskSame + rateriskGreater +
## bonemedYes + bonemed_fuYes
##
## Df Deviance AIC
## - smokeYes 1 277.27 299.27
## - bonemedYes 1 277.55 299.55
## - height 1 277.81 299.81
## - rateriskSame 1 278.01 300.01
## - bonemed_fuYes 1 278.58 300.58
## - rateriskGreater 1 279.20 301.20
## <none> 277.24 301.24
## - armassistYes 1 279.94 301.94
## - age 1 280.34 302.34
## - premenoYes 1 280.78 302.78
## - momfracYes 1 281.81 303.81
## - priorfracYes 1 283.21 305.21
##
## Step: AIC=299.27
## .outcome ~ priorfracYes + age + height + premenoYes + momfracYes +
## armassistYes + rateriskSame + rateriskGreater + bonemedYes +
## bonemed_fuYes
##
## Df Deviance AIC
## - bonemedYes 1 277.60 297.60
## - height 1 277.86 297.86
## - rateriskSame 1 278.03 298.03
## - bonemed_fuYes 1 278.67 298.67
## - rateriskGreater 1 279.21 299.21
## <none> 277.27 299.27
## - armassistYes 1 279.94 299.94
## - age 1 280.42 300.42
## - premenoYes 1 280.78 300.78
## - momfracYes 1 281.93 301.93
## - priorfracYes 1 283.22 303.22
##
## Step: AIC=297.6
## .outcome ~ priorfracYes + age + height + premenoYes + momfracYes +
## armassistYes + rateriskSame + rateriskGreater + bonemed_fuYes
##
## Df Deviance AIC
## - height 1 278.19 296.19
## - rateriskSame 1 278.32 296.32
## - bonemed_fuYes 1 279.09 297.09
## <none> 277.60 297.60
## - rateriskGreater 1 279.61 297.61
## - armassistYes 1 280.50 298.50
## - age 1 280.67 298.67
## - premenoYes 1 281.18 299.18
## - momfracYes 1 282.51 300.51
## - priorfracYes 1 283.26 301.26
##
## Step: AIC=296.19
## .outcome ~ priorfracYes + age + premenoYes + momfracYes + armassistYes +
## rateriskSame + rateriskGreater + bonemed_fuYes
##
## Df Deviance AIC
## - rateriskSame 1 279.09 295.09
## - bonemed_fuYes 1 280.06 296.06
## <none> 278.19 296.19
## - rateriskGreater 1 280.48 296.48
## - armassistYes 1 280.82 296.82
## - premenoYes 1 282.05 298.05
## - age 1 282.15 298.15
## - momfracYes 1 282.70 298.70
## - priorfracYes 1 283.56 299.56
##
## Step: AIC=295.09
## .outcome ~ priorfracYes + age + premenoYes + momfracYes + armassistYes +
## rateriskGreater + bonemed_fuYes
##
## Df Deviance AIC
## - rateriskGreater 1 280.49 294.49
## <none> 279.09 295.09
## - bonemed_fuYes 1 281.61 295.61
## - armassistYes 1 282.12 296.12
## - age 1 282.82 296.82
## - premenoYes 1 283.31 297.31
## - momfracYes 1 284.23 298.23
## - priorfracYes 1 284.47 298.47
##
## Step: AIC=294.49
## .outcome ~ priorfracYes + age + premenoYes + momfracYes + armassistYes +
## bonemed_fuYes
##
## Df Deviance AIC
## <none> 280.49 294.49
## - age 1 283.57 295.57
## - armassistYes 1 284.26 296.26
## - bonemed_fuYes 1 284.68 296.68
## - premenoYes 1 284.88 296.88
## - momfracYes 1 286.40 298.40
## - priorfracYes 1 286.49 298.49
## Start: AIC=298.97
## .outcome ~ priorfracYes + age + weight + height + premenoYes +
## momfracYes + armassistYes + smokeYes + rateriskSame + rateriskGreater +
## bonemedYes + bonemed_fuYes
##
## Df Deviance AIC
## - rateriskGreater 1 272.98 296.98
## - armassistYes 1 273.00 297.00
## - premenoYes 1 273.03 297.03
## - rateriskSame 1 273.08 297.08
## - bonemedYes 1 273.30 297.30
## - smokeYes 1 273.57 297.57
## <none> 272.97 298.97
## - weight 1 275.24 299.24
## - bonemed_fuYes 1 275.33 299.33
## - priorfracYes 1 278.87 302.87
## - height 1 279.04 303.04
## - age 1 281.54 305.54
## - momfracYes 1 282.02 306.02
##
## Step: AIC=296.98
## .outcome ~ priorfracYes + age + weight + height + premenoYes +
## momfracYes + armassistYes + smokeYes + rateriskSame + bonemedYes +
## bonemed_fuYes
##
## Df Deviance AIC
## - armassistYes 1 273.00 295.00
## - premenoYes 1 273.05 295.05
## - rateriskSame 1 273.09 295.09
## - bonemedYes 1 273.30 295.30
## - smokeYes 1 273.58 295.58
## <none> 272.98 296.98
## - weight 1 275.38 297.38
## - bonemed_fuYes 1 275.44 297.44
## - priorfracYes 1 278.93 300.93
## - height 1 279.04 301.04
## - age 1 282.09 304.09
## - momfracYes 1 282.15 304.15
##
## Step: AIC=295
## .outcome ~ priorfracYes + age + weight + height + premenoYes +
## momfracYes + smokeYes + rateriskSame + bonemedYes + bonemed_fuYes
##
## Df Deviance AIC
## - premenoYes 1 273.06 293.06
## - rateriskSame 1 273.12 293.12
## - bonemedYes 1 273.32 293.32
## - smokeYes 1 273.59 293.59
## <none> 273.00 295.00
## - bonemed_fuYes 1 275.50 295.50
## - weight 1 276.06 296.06
## - priorfracYes 1 279.00 299.00
## - height 1 279.08 299.08
## - momfracYes 1 282.15 302.15
## - age 1 284.00 304.00
##
## Step: AIC=293.06
## .outcome ~ priorfracYes + age + weight + height + momfracYes +
## smokeYes + rateriskSame + bonemedYes + bonemed_fuYes
##
## Df Deviance AIC
## - rateriskSame 1 273.18 291.18
## - bonemedYes 1 273.37 291.37
## - smokeYes 1 273.65 291.65
## <none> 273.06 293.06
## - bonemed_fuYes 1 275.57 293.57
## - weight 1 276.09 294.09
## - priorfracYes 1 279.00 297.00
## - height 1 279.08 297.08
## - momfracYes 1 282.18 300.18
## - age 1 284.60 302.60
##
## Step: AIC=291.18
## .outcome ~ priorfracYes + age + weight + height + momfracYes +
## smokeYes + bonemedYes + bonemed_fuYes
##
## Df Deviance AIC
## - bonemedYes 1 273.55 289.55
## - smokeYes 1 273.75 289.75
## <none> 273.18 291.18
## - bonemed_fuYes 1 275.80 291.80
## - weight 1 276.10 292.10
## - height 1 279.08 295.08
## - priorfracYes 1 279.27 295.27
## - momfracYes 1 282.20 298.20
## - age 1 284.63 300.63
##
## Step: AIC=289.55
## .outcome ~ priorfracYes + age + weight + height + momfracYes +
## smokeYes + bonemed_fuYes
##
## Df Deviance AIC
## - smokeYes 1 274.11 288.11
## <none> 273.55 289.55
## - bonemed_fuYes 1 276.94 290.94
## - weight 1 276.98 290.98
## - priorfracYes 1 279.50 293.50
## - height 1 279.60 293.60
## - momfracYes 1 282.56 296.56
## - age 1 284.65 298.65
##
## Step: AIC=288.11
## .outcome ~ priorfracYes + age + weight + height + momfracYes +
## bonemed_fuYes
##
## Df Deviance AIC
## <none> 274.11 288.11
## - weight 1 277.88 289.88
## - bonemed_fuYes 1 277.92 289.92
## - priorfracYes 1 279.74 291.74
## - height 1 280.47 292.47
## - momfracYes 1 283.45 295.45
## - age 1 285.80 297.80
## Start: AIC=309.58
## .outcome ~ priorfracYes + age + weight + height + premenoYes +
## momfracYes + armassistYes + smokeYes + rateriskSame + rateriskGreater +
## bonemedYes + bonemed_fuYes
##
## Df Deviance AIC
## - bonemed_fuYes 1 283.58 307.58
## - weight 1 283.58 307.58
## - bonemedYes 1 283.60 307.60
## - rateriskGreater 1 283.65 307.65
## - rateriskSame 1 283.66 307.66
## - smokeYes 1 284.17 308.17
## - armassistYes 1 284.40 308.40
## - premenoYes 1 284.97 308.97
## - height 1 285.08 309.08
## <none> 283.58 309.58
## - priorfracYes 1 287.94 311.94
## - momfracYes 1 290.71 314.71
## - age 1 290.78 314.78
##
## Step: AIC=307.58
## .outcome ~ priorfracYes + age + weight + height + premenoYes +
## momfracYes + armassistYes + smokeYes + rateriskSame + rateriskGreater +
## bonemedYes
##
## Df Deviance AIC
## - weight 1 283.58 305.58
## - rateriskGreater 1 283.65 305.65
## - rateriskSame 1 283.66 305.66
## - bonemedYes 1 283.67 305.67
## - smokeYes 1 284.17 306.17
## - armassistYes 1 284.41 306.41
## - premenoYes 1 284.99 306.99
## - height 1 285.08 307.08
## <none> 283.58 307.58
## - priorfracYes 1 287.95 309.95
## - momfracYes 1 290.74 312.74
## - age 1 290.79 312.79
##
## Step: AIC=305.58
## .outcome ~ priorfracYes + age + height + premenoYes + momfracYes +
## armassistYes + smokeYes + rateriskSame + rateriskGreater +
## bonemedYes
##
## Df Deviance AIC
## - rateriskGreater 1 283.65 303.65
## - rateriskSame 1 283.66 303.66
## - bonemedYes 1 283.69 303.69
## - smokeYes 1 284.19 304.19
## - armassistYes 1 284.61 304.61
## - premenoYes 1 284.99 304.99
## - height 1 285.21 305.21
## <none> 283.58 305.58
## - priorfracYes 1 287.98 307.98
## - momfracYes 1 290.90 310.90
## - age 1 291.62 311.62
##
## Step: AIC=303.66
## .outcome ~ priorfracYes + age + height + premenoYes + momfracYes +
## armassistYes + smokeYes + rateriskSame + bonemedYes
##
## Df Deviance AIC
## - rateriskSame 1 283.68 301.68
## - bonemedYes 1 283.72 301.72
## - smokeYes 1 284.25 302.25
## - armassistYes 1 284.83 302.83
## - premenoYes 1 285.18 303.18
## - height 1 285.34 303.34
## <none> 283.65 303.65
## - priorfracYes 1 288.27 306.27
## - momfracYes 1 291.52 309.52
## - age 1 291.64 309.64
##
## Step: AIC=301.68
## .outcome ~ priorfracYes + age + height + premenoYes + momfracYes +
## armassistYes + smokeYes + bonemedYes
##
## Df Deviance AIC
## - bonemedYes 1 283.74 299.74
## - smokeYes 1 284.27 300.27
## - armassistYes 1 284.88 300.88
## - premenoYes 1 285.23 301.23
## - height 1 285.41 301.41
## <none> 283.68 301.68
## - priorfracYes 1 288.30 304.30
## - age 1 291.66 307.66
## - momfracYes 1 291.73 307.73
##
## Step: AIC=299.74
## .outcome ~ priorfracYes + age + height + premenoYes + momfracYes +
## armassistYes + smokeYes
##
## Df Deviance AIC
## - smokeYes 1 284.30 298.30
## - armassistYes 1 284.93 298.93
## - premenoYes 1 285.39 299.39
## - height 1 285.42 299.42
## <none> 283.74 299.74
## - priorfracYes 1 288.33 302.33
## - age 1 291.66 305.66
## - momfracYes 1 291.86 305.86
##
## Step: AIC=298.29
## .outcome ~ priorfracYes + age + height + premenoYes + momfracYes +
## armassistYes
##
## Df Deviance AIC
## - armassistYes 1 285.35 297.35
## - premenoYes 1 285.81 297.81
## - height 1 286.03 298.03
## <none> 284.30 298.30
## - priorfracYes 1 288.61 300.61
## - momfracYes 1 292.86 304.86
## - age 1 292.93 304.93
##
## Step: AIC=297.35
## .outcome ~ priorfracYes + age + height + premenoYes + momfracYes
##
## Df Deviance AIC
## - height 1 286.83 296.83
## - premenoYes 1 287.22 297.22
## <none> 285.35 297.35
## - priorfracYes 1 290.13 300.13
## - momfracYes 1 293.78 303.78
## - age 1 296.09 306.09
##
## Step: AIC=296.83
## .outcome ~ priorfracYes + age + premenoYes + momfracYes
##
## Df Deviance AIC
## <none> 286.83 296.83
## - premenoYes 1 288.92 296.92
## - priorfracYes 1 291.99 299.99
## - momfracYes 1 294.54 302.54
## - age 1 298.01 306.01
## Start: AIC=379.2
## .outcome ~ priorfracYes + age + weight + height + premenoYes +
## momfracYes + armassistYes + smokeYes + rateriskSame + rateriskGreater +
## bonemedYes + bonemed_fuYes
##
## Df Deviance AIC
## - smokeYes 1 353.29 377.29
## - rateriskSame 1 353.29 377.29
## - rateriskGreater 1 353.37 377.37
## - bonemedYes 1 353.53 377.53
## - weight 1 353.83 377.83
## - premenoYes 1 353.92 377.92
## - armassistYes 1 354.02 378.02
## - bonemed_fuYes 1 354.66 378.66
## <none> 353.20 379.20
## - height 1 356.86 380.86
## - age 1 358.92 382.92
## - priorfracYes 1 359.89 383.89
## - momfracYes 1 360.81 384.81
##
## Step: AIC=377.29
## .outcome ~ priorfracYes + age + weight + height + premenoYes +
## momfracYes + armassistYes + rateriskSame + rateriskGreater +
## bonemedYes + bonemed_fuYes
##
## Df Deviance AIC
## - rateriskSame 1 353.38 375.38
## - rateriskGreater 1 353.45 375.45
## - bonemedYes 1 353.64 375.64
## - weight 1 353.99 375.99
## - premenoYes 1 353.99 375.99
## - armassistYes 1 354.05 376.05
## - bonemed_fuYes 1 354.87 376.87
## <none> 353.29 377.29
## - height 1 357.01 379.01
## - age 1 359.37 381.37
## - priorfracYes 1 359.89 381.89
## - momfracYes 1 361.10 383.10
##
## Step: AIC=375.38
## .outcome ~ priorfracYes + age + weight + height + premenoYes +
## momfracYes + armassistYes + rateriskGreater + bonemedYes +
## bonemed_fuYes
##
## Df Deviance AIC
## - rateriskGreater 1 353.46 373.46
## - bonemedYes 1 353.70 373.70
## - weight 1 354.10 374.10
## - premenoYes 1 354.14 374.14
## - armassistYes 1 354.21 374.21
## - bonemed_fuYes 1 355.00 375.00
## <none> 353.38 375.38
## - height 1 357.29 377.29
## - age 1 359.39 379.39
## - priorfracYes 1 359.98 379.98
## - momfracYes 1 361.63 381.63
##
## Step: AIC=373.46
## .outcome ~ priorfracYes + age + weight + height + premenoYes +
## momfracYes + armassistYes + bonemedYes + bonemed_fuYes
##
## Df Deviance AIC
## - bonemedYes 1 353.83 371.83
## - weight 1 354.11 372.11
## - premenoYes 1 354.25 372.25
## - armassistYes 1 354.42 372.42
## - bonemed_fuYes 1 355.43 373.43
## <none> 353.46 373.46
## - height 1 357.31 375.31
## - age 1 359.42 377.42
## - priorfracYes 1 360.46 378.46
## - momfracYes 1 361.83 379.83
##
## Step: AIC=371.83
## .outcome ~ priorfracYes + age + weight + height + premenoYes +
## momfracYes + armassistYes + bonemed_fuYes
##
## Df Deviance AIC
## - premenoYes 1 354.66 370.66
## - weight 1 354.68 370.68
## - armassistYes 1 354.79 370.79
## <none> 353.83 371.83
## - bonemed_fuYes 1 356.07 372.07
## - height 1 357.77 373.77
## - age 1 359.64 375.64
## - priorfracYes 1 360.64 376.64
## - momfracYes 1 362.55 378.55
##
## Step: AIC=370.66
## .outcome ~ priorfracYes + age + weight + height + momfracYes +
## armassistYes + bonemed_fuYes
##
## Df Deviance AIC
## - weight 1 355.47 369.47
## - armassistYes 1 355.91 369.91
## - bonemed_fuYes 1 356.57 370.57
## <none> 354.66 370.66
## - height 1 358.96 372.96
## - age 1 359.78 373.78
## - priorfracYes 1 361.70 375.70
## - momfracYes 1 363.46 377.46
##
## Step: AIC=369.47
## .outcome ~ priorfracYes + age + height + momfracYes + armassistYes +
## bonemed_fuYes
##
## Df Deviance AIC
## - bonemed_fuYes 1 356.99 368.99
## <none> 355.47 369.47
## - armassistYes 1 357.97 369.97
## - height 1 359.05 371.05
## - age 1 359.81 371.81
## - priorfracYes 1 362.66 374.66
## - momfracYes 1 363.77 375.77
##
## Step: AIC=368.99
## .outcome ~ priorfracYes + age + height + momfracYes + armassistYes
##
## Df Deviance AIC
## <none> 356.99 368.99
## - armassistYes 1 359.46 369.46
## - height 1 361.39 371.39
## - age 1 361.74 371.74
## - priorfracYes 1 365.40 375.40
## - momfracYes 1 365.90 375.90
#GLMNET
glmnet.fit<-train(fracture~priorfrac + age + weight + height + premeno + momfrac + armassist + smoke + raterisk + bonemed + bonemed_fu,
data=train,
method="glmnet",
trControl=fitControl,
metric="logLoss")
summary(Step.fit)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7903 -0.7273 -0.5617 0.1563 2.2008
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.03228 3.63670 0.834 0.40439
## priorfracYes 0.84112 0.28755 2.925 0.00344 **
## age 0.03331 0.01532 2.175 0.02964 *
## height -0.04373 0.02102 -2.081 0.03745 *
## momfracYes 1.11109 0.36538 3.041 0.00236 **
## armassistYes 0.43647 0.27643 1.579 0.11434
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 395.31 on 350 degrees of freedom
## Residual deviance: 356.99 on 345 degrees of freedom
## AIC: 368.99
##
## Number of Fisher Scoring iterations: 4
stepfit.fit_coefficients <-coef(Step.fit$finalModel)
glmnet.fit_coefficients <-coef(glmnet.fit$finalModel,glmnet.fit$finalModel$lambdaOpt)
#Examine what predictors are included
stepfit.fit_coefficients
## (Intercept) priorfracYes age height momfracYes armassistYes
## 3.03227756 0.84112254 0.03331054 -0.04373389 1.11108604 0.43647327
#for statistical interpretation use these coefficients.
glmnet.fit_coefficients
## 13 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 0.01156540
## priorfracYes 0.63664157
## age 0.02672410
## weight .
## height -0.02115462
## premenoYes .
## momfracYes 0.71652505
## armassistYes 0.24017188
## smokeYes .
## rateriskSame .
## rateriskGreater .
## bonemedYes .
## bonemed_fuYes 0.20814097
#simpleFit2_stepwise_selection - see how it performs by re-creating the model with only the variables that stepwise selected in the above code block and then seeing how well it performs on the valdiation dataset
# install.packages("gtsummary")
library(dplyr)
library(caret)
library(pROC)
library(car)
set.seed(1234)
trainIndex<-createDataPartition(bonemed$fracture,p=.70,list=F)
train <- bonemed[trainIndex,]
test <- bonemed[-trainIndex,]
# our model -- interpret
simpleFit2 <- glm(fracture~priorfrac + age + weight + height + premeno + momfrac + armassist + smoke + raterisk + bonemed + bonemed_fu, data=train,family="binomial")
summary(simpleFit2)
##
## Call:
## glm(formula = fracture ~ priorfrac + age + weight + height +
## premeno + momfrac + armassist + smoke + raterisk + bonemed +
## bonemed_fu, family = "binomial", data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8304 -0.7594 -0.5449 0.1339 2.2374
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.530517 3.786358 0.404 0.68605
## priorfracYes 0.777576 0.298661 2.604 0.00923 **
## age 0.041686 0.017522 2.379 0.01736 *
## weight 0.008035 0.010098 0.796 0.42620
## height -0.042567 0.022434 -1.897 0.05777 .
## premenoYes 0.300518 0.350431 0.858 0.39113
## momfracYes 1.067368 0.380868 2.802 0.00507 **
## armassistYes 0.285040 0.314312 0.907 0.36448
## smokeYes -0.169793 0.559139 -0.304 0.76138
## rateriskSame 0.104113 0.337918 0.308 0.75800
## rateriskGreater 0.155840 0.378042 0.412 0.68017
## bonemedYes -0.291392 0.504787 -0.577 0.56377
## bonemed_fuYes 0.600553 0.495126 1.213 0.22516
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 395.31 on 350 degrees of freedom
## Residual deviance: 353.20 on 338 degrees of freedom
## AIC: 379.2
##
## Number of Fisher Scoring iterations: 4
# based off our model after performing feature selection on it - interpret
simpleFit2_stepwise_selection<- glm(fracture~priorfrac + age + height + momfrac + armassist, data=train,family="binomial")
summary(simpleFit2_stepwise_selection)
##
## Call:
## glm(formula = fracture ~ priorfrac + age + height + momfrac +
## armassist, family = "binomial", data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7903 -0.7273 -0.5617 0.1563 2.2008
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.03228 3.63670 0.834 0.40439
## priorfracYes 0.84112 0.28755 2.925 0.00344 **
## age 0.03331 0.01532 2.175 0.02964 *
## height -0.04373 0.02102 -2.081 0.03745 *
## momfracYes 1.11109 0.36538 3.041 0.00236 **
## armassistYes 0.43647 0.27643 1.579 0.11434
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 395.31 on 350 degrees of freedom
## Residual deviance: 356.99 on 345 degrees of freedom
## AIC: 368.99
##
## Number of Fisher Scoring iterations: 4
pred <- predict(simpleFit2,test,type = "response")
simpleFit2_ROC <- roc(response = test$fracture, predictor = pred, levels = c("No","Yes"))
## Setting direction: controls < cases
pred_simpleFit2_stepwise_selection <- predict(simpleFit2_stepwise_selection,test,type = "response")
simpleFit2_stepwise_selection_ROC<- roc(response = test$fracture, predictor = pred_simpleFit2_stepwise_selection, levels = c("No","Yes"))
## Setting direction: controls < cases
plot(simpleFit2_ROC)#,print.thres = "best")
plot(simpleFit2_stepwise_selection_ROC,print.thres = "best",col = "red", add = T)
legend("bottomright",
legend=c("simpleFit2", "simpleFit2_stepwise_selection"),
col=c("black", "red"),
lwd=4, cex =1, xpd = TRUE, horiz = FALSE)
threshPred <- ifelse(pred > 0.199, "Yes", "No")
confusionMatrix(table(threshPred,test$fracture))
## Confusion Matrix and Statistics
##
##
## threshPred No Yes
## No 68 8
## Yes 44 29
##
## Accuracy : 0.651
## 95% CI : (0.5687, 0.7272)
## No Information Rate : 0.7517
## P-Value [Acc > NIR] : 0.9977
##
## Kappa : 0.2949
##
## Mcnemar's Test P-Value : 1.212e-06
##
## Sensitivity : 0.6071
## Specificity : 0.7838
## Pos Pred Value : 0.8947
## Neg Pred Value : 0.3973
## Prevalence : 0.7517
## Detection Rate : 0.4564
## Detection Prevalence : 0.5101
## Balanced Accuracy : 0.6955
##
## 'Positive' Class : No
##
exp(coef(simpleFit2))
## (Intercept) priorfracYes age weight height
## 4.6205662 2.1761905 1.0425673 1.0080673 0.9583258
## premenoYes momfracYes armassistYes smokeYes rateriskSame
## 1.3505582 2.9077177 1.3298156 0.8438397 1.1097260
## rateriskGreater bonemedYes bonemed_fuYes
## 1.1686387 0.7472229 1.8231259
exp(coef(simpleFit2_stepwise_selection))
## (Intercept) priorfracYes age height momfracYes armassistYes
## 20.7444255 2.3189686 1.0338715 0.9572086 3.0376556 1.5472409
exp(confint(simpleFit2))
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 0.002778797 8275.372400
## priorfracYes 1.208986755 3.910684
## age 1.007538173 1.079431
## weight 0.988123408 1.028176
## height 0.916399839 1.001038
## premenoYes 0.668192139 2.657400
## momfracYes 1.369277740 6.139585
## armassistYes 0.715509295 2.460909
## smokeYes 0.254954482 2.371788
## rateriskSame 0.572380717 2.163469
## rateriskGreater 0.555804952 2.459731
## bonemedYes 0.274849345 2.015713
## bonemed_fuYes 0.684619458 4.839544
exp(confint(simpleFit2_stepwise_selection))
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 0.01691048 2.787618e+04
## priorfracYes 1.31646496 4.075424e+00
## age 1.00337015 1.065647e+00
## height 0.91791949 9.971164e-01
## momfracYes 1.47466053 6.224112e+00
## armassistYes 0.89757148 2.659839e+00
library(ResourceSelection)
hoslem.test(simpleFit2$y,fitted(simpleFit2))
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: simpleFit2$y, fitted(simpleFit2)
## X-squared = 5.4021, df = 8, p-value = 0.7139
library(ResourceSelection)
hoslem.test(simpleFit2_stepwise_selection$y,fitted(simpleFit2_stepwise_selection))
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: simpleFit2_stepwise_selection$y, fitted(simpleFit2_stepwise_selection)
## X-squared = 12.857, df = 8, p-value = 0.1169
simpleFit2_ROC$auc
## Area under the curve: 0.7362
simpleFit2_stepwise_selection_ROC$auc
## Area under the curve: 0.7002
simpleFit2_ROC$auc
## Area under the curve: 0.7362
simpleFit2_stepwise_selection_ROC$auc
## Area under the curve: 0.7002
#lower VIF below 10 is better above means you have multicollinearity issue
vif(simpleFit2)
## GVIF Df GVIF^(1/(2*Df))
## priorfrac 1.161462 1 1.077711
## age 1.462038 1 1.209148
## weight 1.615203 1 1.270906
## height 1.199241 1 1.095099
## premeno 1.122091 1 1.059288
## momfrac 1.108781 1 1.052987
## armassist 1.413060 1 1.188722
## smoke 1.086412 1 1.042311
## raterisk 1.368492 2 1.081585
## bonemed 3.134160 1 1.770356
## bonemed_fu 3.239427 1 1.799841
# select the third column
vif_values2 <- vif(simpleFit2)[, 3]
# Square each value in the column
squared_vif_2<- vif_values2^2
print(squared_vif_2)
## priorfrac age weight height premeno momfrac armassist
## 1.161462 1.462038 1.615203 1.199241 1.122091 1.108781 1.413060
## smoke raterisk bonemed bonemed_fu
## 1.086412 1.169825 3.134160 3.239427
vif(simpleFit2_stepwise_selection)
## priorfrac age height momfrac armassist
## 1.089066 1.148113 1.049448 1.040631 1.104911
#lower AIC the better
AIC(simpleFit)
## [1] 372.1604
AIC(simpleFit2)
## [1] 379.1978
AIC(simpleFit2_stepwise_selection)
## [1] 368.9868
#OBJECTIVE 2
#complex our best model based off of simpleFit2 plus adding interactions where we thought necessary #then run penalized regression (feature selection) on our best fit complex model
##penalized complex model, maybe need to adjust alpha/lamda for better performance?
fitControl<-trainControl(method="repeatedcv",number=10,repeats=1,classProbs=TRUE)
set.seed(1234)
logistic.fit<-train(fracture~priorfrac + age + weight + height + premeno + momfrac + armassist + smoke + raterisk + fracscore+ bonemed + bonemed_fu + age*raterisk + age*momfrac + age*armassist + age*smoke + age*raterisk + age*weight + weight*priorfrac +weight*premeno + weight*momfrac + weight*raterisk + weight*fracscore + weight*bonemed + weight*bonemed_fu,
data=train,
method="glmnet",
trControl=fitControl,
metric="logLoss")
## Warning in train.default(x, y, weights = w, ...): The metric "logLoss" was not
## in the result set. Accuracy will be used instead.
logistic.fit
## glmnet
##
## 351 samples
## 12 predictor
## 2 classes: 'No', 'Yes'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 1 times)
## Summary of sample sizes: 316, 317, 315, 315, 316, 316, ...
## Resampling results across tuning parameters:
##
## alpha lambda Accuracy Kappa
## 0.10 0.0002283267 0.7183754 0.04665266
## 0.10 0.0022832674 0.7380626 0.08985573
## 0.10 0.0228326736 0.7551261 0.14107354
## 0.55 0.0002283267 0.7212325 0.05136438
## 0.55 0.0022832674 0.7437768 0.11094007
## 0.55 0.0228326736 0.7466340 0.09074508
## 1.00 0.0002283267 0.7212325 0.05136438
## 1.00 0.0022832674 0.7466340 0.11841413
## 1.00 0.0228326736 0.7521055 0.07046812
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were alpha = 0.1 and lambda = 0.02283267.
coef(logistic.fit$finalModel,logistic.fit$finalModel$lambdaOpt)
## 28 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 2.913217e+00
## priorfracYes 9.481578e-02
## age 1.846307e-02
## weight -9.482080e-04
## height -3.862941e-02
## premenoYes 2.160888e-01
## momfracYes 4.693081e-04
## armassistYes 6.862181e-02
## smokeYes -1.744867e-01
## rateriskSame 9.698252e-02
## rateriskGreater -1.636336e-02
## fracscore 3.511642e-02
## bonemedYes -7.677833e-02
## bonemed_fuYes .
## age:rateriskSame .
## age:rateriskGreater -1.208602e-03
## age:momfracYes 2.202190e-03
## age:armassistYes .
## age:smokeYes .
## age:weight .
## priorfracYes:weight 7.209026e-03
## weight:premenoYes 8.605912e-06
## weight:momfracYes 1.135259e-02
## weight:rateriskSame -1.261643e-04
## weight:rateriskGreater 4.430291e-03
## weight:fracscore 9.839505e-04
## weight:bonemedYes .
## weight:bonemed_fuYes 6.023700e-03
preds <- predict(logistic.fit, test, type = "prob")
# logROC <- roc(response = test$fracture, predictor = preds$Yes)
logROC <- roc(response = test$fracture, predictor = preds$Yes)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
logROC$auc
## Area under the curve: 0.7478
plot(simpleFit2_ROC,print.thres = "best")
plot(logROC,print.thres = "best",col = "red", add = T)
legend("bottomright",
legend=c("simpleFit2", "penalized_regression_fit_complex"),
col=c("black", "red"),
lwd=4, cex =1, xpd = TRUE, horiz = FALSE)
threshPred <- ifelse(pred > 0.199, "Yes", "No")
confusionMatrix(table(threshPred,test$fracture))
## Confusion Matrix and Statistics
##
##
## threshPred No Yes
## No 68 8
## Yes 44 29
##
## Accuracy : 0.651
## 95% CI : (0.5687, 0.7272)
## No Information Rate : 0.7517
## P-Value [Acc > NIR] : 0.9977
##
## Kappa : 0.2949
##
## Mcnemar's Test P-Value : 1.212e-06
##
## Sensitivity : 0.6071
## Specificity : 0.7838
## Pos Pred Value : 0.8947
## Neg Pred Value : 0.3973
## Prevalence : 0.7517
## Detection Rate : 0.4564
## Detection Prevalence : 0.5101
## Balanced Accuracy : 0.6955
##
## 'Positive' Class : No
##
logROC$auc
## Area under the curve: 0.7478
lda.fit<-train(fracture~priorfrac + age + weight + height + premeno + momfrac + armassist + smoke + raterisk + fracscore + bonemed + bonemed_fu,
data=train,
method="lda",
trControl=fitControl,
metric="logLoss")
## Warning in train.default(x, y, weights = w, ...): The metric "logLoss" was not
## in the result set. Accuracy will be used instead.
lda.fit
## Linear Discriminant Analysis
##
## 351 samples
## 12 predictor
## 2 classes: 'No', 'Yes'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 1 times)
## Summary of sample sizes: 316, 316, 316, 316, 316, 316, ...
## Resampling results:
##
## Accuracy Kappa
## 0.7433707 0.1368824
#Computing predicted probabilities on the training data
predictLDA <- predict(lda.fit, test, type = "prob")[,"Yes"]
#Getting confusion matrix
ldaROC <- roc(response = test$fracture, predictor = predictLDA, levels = c("No", "Yes"))
## Setting direction: controls < cases
plot(ldaROC, print.thres = "best")
threshold=0.210
lda.preds<-ifelse(predictLDA>threshold,"Yes","No")
confusionMatrix(table(lda.preds, test$fracture))
## Confusion Matrix and Statistics
##
##
## lda.preds No Yes
## No 74 8
## Yes 38 29
##
## Accuracy : 0.6913
## 95% CI : (0.6105, 0.7643)
## No Information Rate : 0.7517
## P-Value [Acc > NIR] : 0.9617
##
## Kappa : 0.3496
##
## Mcnemar's Test P-Value : 1.904e-05
##
## Sensitivity : 0.6607
## Specificity : 0.7838
## Pos Pred Value : 0.9024
## Neg Pred Value : 0.4328
## Prevalence : 0.7517
## Detection Rate : 0.4966
## Detection Prevalence : 0.5503
## Balanced Accuracy : 0.7222
##
## 'Positive' Class : No
##
ldaROC$auc
## Area under the curve: 0.7288
plot(simpleROC)
plot(ldaROC, add = T, col = "blue", print.thres = "best")
#knn model
#knn model, not sure if can be improved, performance is pretty bad
knn.fit <- train(fracture~priorfrac + age + weight + height + premeno + momfrac + armassist + smoke + raterisk + bonemed + bonemed_fu,
data = train,
method = "knn",
trControl = fitControl,
metric = "logLoss")
## Warning in train.default(x, y, weights = w, ...): The metric "logLoss" was not
## in the result set. Accuracy will be used instead.
knn.fit
## k-Nearest Neighbors
##
## 351 samples
## 11 predictor
## 2 classes: 'No', 'Yes'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 1 times)
## Summary of sample sizes: 316, 316, 316, 315, 317, 316, ...
## Resampling results across tuning parameters:
##
## k Accuracy Kappa
## 5 0.7206536 0.1093987
## 7 0.7407470 0.1015481
## 9 0.7464706 0.1025063
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 9.
knn.preds <- predict(knn.fit, test, type = "prob")
knn.roc <- roc(response = test$fracture, predictor = knn.preds$Yes, levels = c("No","Yes"))
## Setting direction: controls < cases
plot(knn.roc, print.thres = "best")
knn.roc$auc
## Area under the curve: 0.5211
knn.preds.thresh <- ifelse(knn.preds$Yes > 0.211, "Yes", "No")
confusionMatrix(table(knn.preds.thresh, test$fracture))
## Confusion Matrix and Statistics
##
##
## knn.preds.thresh No Yes
## No 39 10
## Yes 73 27
##
## Accuracy : 0.443
## 95% CI : (0.3617, 0.5265)
## No Information Rate : 0.7517
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0496
##
## Mcnemar's Test P-Value : 1.008e-11
##
## Sensitivity : 0.3482
## Specificity : 0.7297
## Pos Pred Value : 0.7959
## Neg Pred Value : 0.2700
## Prevalence : 0.7517
## Detection Rate : 0.2617
## Detection Prevalence : 0.3289
## Balanced Accuracy : 0.5390
##
## 'Positive' Class : No
##
plot(simpleROC)
plot(knn.roc, add = T, col = "blue", print.thres = "best")
knn.roc$auc
## Area under the curve: 0.5211
plot(simpleFit2_ROC)
plot(logROC, add = T, col = "red")
plot(knn.roc, add = T, col = "blue")
plot(ldaROC, add = T, col = "green")
legend("bottomright",legend=c("simpleFit2","complexFit", "kNN", "LDA"),col=c("black","red", "blue", "green"),lwd=3,cex=1,xpd=T,horiz=F)