# Read in glow_bonemed
# Explore dataset
library(aplore3)
bonemed <-glow_bonemed
library(aplore3)
#need to narrow down and understand each one abit more
?glow_bonemed
?glow500
#Look the data for hig level summaries
head(bonemed)
##   sub_id site_id phy_id priorfrac age weight height      bmi premeno momfrac
## 1      1       1     14        No  62   70.3    158 28.16055      No      No
## 2      2       4    284        No  65   87.1    160 34.02344      No      No
## 3      3       6    305       Yes  88   50.8    157 20.60936      No     Yes
## 4      4       6    309        No  82   62.1    160 24.25781      No      No
## 5      5       1     37        No  61   68.0    152 29.43213      No      No
## 6      6       5    299       Yes  67   68.0    161 26.23356      No      No
##   armassist smoke raterisk fracscore fracture bonemed bonemed_fu bonetreat
## 1        No    No     Same         1       No      No         No        No
## 2        No    No     Same         2       No      No         No        No
## 3       Yes    No     Less        11       No      No         No        No
## 4        No    No     Less         5       No      No         No        No
## 5        No    No     Same         1       No      No         No        No
## 6        No   Yes     Same         4       No      No         No        No
summary(bonemed)
##      sub_id         site_id          phy_id       priorfrac      age       
##  Min.   :  1.0   Min.   :1.000   Min.   :  1.00   No :374   Min.   :55.00  
##  1st Qu.:125.8   1st Qu.:2.000   1st Qu.: 57.75   Yes:126   1st Qu.:61.00  
##  Median :250.5   Median :3.000   Median :182.50             Median :67.00  
##  Mean   :250.5   Mean   :3.436   Mean   :178.55             Mean   :68.56  
##  3rd Qu.:375.2   3rd Qu.:5.000   3rd Qu.:298.00             3rd Qu.:76.00  
##  Max.   :500.0   Max.   :6.000   Max.   :325.00             Max.   :90.00  
##      weight           height           bmi        premeno   momfrac   armassist
##  Min.   : 39.90   Min.   :134.0   Min.   :14.88   No :403   No :435   No :312  
##  1st Qu.: 59.90   1st Qu.:157.0   1st Qu.:23.27   Yes: 97   Yes: 65   Yes:188  
##  Median : 68.00   Median :161.5   Median :26.42                                
##  Mean   : 71.82   Mean   :161.4   Mean   :27.55                                
##  3rd Qu.: 81.30   3rd Qu.:165.0   3rd Qu.:30.79                                
##  Max.   :127.00   Max.   :199.0   Max.   :49.08                                
##  smoke        raterisk     fracscore      fracture  bonemed   bonemed_fu
##  No :465   Less   :167   Min.   : 0.000   No :375   No :371   No :361   
##  Yes: 35   Same   :186   1st Qu.: 2.000   Yes:125   Yes:129   Yes:139   
##            Greater:147   Median : 3.000                                 
##                          Mean   : 3.698                                 
##                          3rd Qu.: 5.000                                 
##                          Max.   :11.000                                 
##  bonetreat
##  No :382  
##  Yes:118  
##           
##           
##           
## 
#Convert the variables to appropriate datatype, if applicable
bonemed$Fracture.Num<-ifelse(bonemed$fracture=="Yes",2,1)
#bonemed$Fracture.Num<-as.factor(ifelse(bonemed$fracture=="Yes",2,1))
bonemed$fracscoref <- as.factor(bonemed$fracscore)
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(ggplot2)

#BMI & Weight are highly correlated.  Need to drop one, weight, which more correlated to height than BMI
ggpairs(bonemed,
        aes(color=fracture),
        columnLabels = c('Age','Weight','Height','BMI'),
        columns = 5:8) + 
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

View(bonemed)

ggsave("output_plot_eda_project_matrix2.png", width = 8, height = 10)
## Bar Chart Risk Fracture Scores
ggplot(bonemed,aes(x=fracscoref,color=fracture)) + 
  geom_bar(aes(fill=fracture)) + 
  xlab("Fracture Risk Score") + ylab("Number of Fractures") +
  ggtitle("Number of Fractures per Fracture Risk Score") +
    scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = c(0.9, 0.7))

# Plot age by factor variables
library(gridExtra)
A1<-ggplot(bonemed,aes(x=age,y=Fracture.Num,color=bonetreat)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("Any Fracture") + labs(color=NULL) + 
  ggtitle("Bone Meds:Enroll & F-Up") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
A2<-ggplot(bonemed,aes(x=age,y=Fracture.Num,color=bonemed)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("") + labs(color=NULL) + 
  ggtitle("Bone Meds:Enrollment") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

A3<-ggplot(bonemed,aes(x=age,y=Fracture.Num,color=bonemed_fu)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("") + labs(color=NULL) + 
  ggtitle("Bone Meds:Follow-Up") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

A4<-ggplot(bonemed,aes(x=age,y=Fracture.Num,color=priorfrac)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("Any Fracture") + labs(color=NULL) + 
  ggtitle("History of Prior Fracture") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

A5<-ggplot(bonemed,aes(x=age,y=Fracture.Num,color=momfrac)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("") + labs(color=NULL) + 
  ggtitle("Mother Had Hip Fracture") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

A6<-ggplot(bonemed,aes(x=age,y=Fracture.Num,color=armassist)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("") + labs(color=NULL) + 
  ggtitle("Arms Needed,Stand fm Chair") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

A7<-ggplot(bonemed,aes(x=age,y=Fracture.Num,color=smoke)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("Any Fracture") + labs(color=NULL) + 
  ggtitle("Former or Current Smoker") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        legend.position = c(1.2, 0.5),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

grid.arrange(A1,A2,A3,A4,A5,A6,A7,ncol=3,nrow=3, top="Age")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

# Plot bmi by factor variables
B1<-ggplot(bonemed,aes(x=bmi,y=Fracture.Num,color=bonetreat)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("Any Fracture") + labs(color=NULL) + 
  ggtitle("Bone Meds:Enroll & F-Up") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

B2<-ggplot(bonemed,aes(x=bmi,y=Fracture.Num,color=bonemed)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("") + labs(color=NULL) + 
  ggtitle("Bone Meds:Enrollment") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

B3<-ggplot(bonemed,aes(x=bmi,y=Fracture.Num,color=bonemed_fu)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("") + labs(color=NULL) + 
  ggtitle("Bone Meds:Follow-Up") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

B4<-ggplot(bonemed,aes(x=bmi,y=Fracture.Num,color=priorfrac)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("Any Fracture") + labs(color=NULL) + 
  ggtitle("History of Prior Fracture") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

B5<-ggplot(bonemed,aes(x=bmi,y=Fracture.Num,color=momfrac)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("") + labs(color=NULL) + 
  ggtitle("Mother Had Hip Fracture") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

B6<-ggplot(bonemed,aes(x=bmi,y=Fracture.Num,color=armassist)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("") + labs(color=NULL) + 
  ggtitle("Arms Needed,Stand fm Chair") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

B7<-ggplot(bonemed,aes(x=bmi,y=Fracture.Num,color=smoke)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("Any Fracture") + labs(color=NULL) + 
  ggtitle("Former or Current Smoker") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = c(1.2, 0.5),
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

grid.arrange(B1,B2,B3,B4,B5,B6,B7,ncol=3,nrow=3, top="BMI")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

# Plot weight by factor variables
C1<-ggplot(bonemed,aes(x=weight,y=Fracture.Num,color=bonetreat)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("Any Fracture") + labs(color=NULL) + 
  ggtitle("Bone Meds:Enroll & F-Up") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

C2<-ggplot(bonemed,aes(x=weight,y=Fracture.Num,color=bonemed)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("") + labs(color=NULL) + 
  ggtitle("Bone Meds:Enrollment") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

C3<-ggplot(bonemed,aes(x=weight,y=Fracture.Num,color=bonemed_fu)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("") + labs(color=NULL) + 
  ggtitle("Bone Meds:Follow-Up") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

C4<-ggplot(bonemed,aes(x=weight,y=Fracture.Num,color=priorfrac)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("Any Fracture") + labs(color=NULL) + 
  ggtitle("History of Prior Fracture") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

C5<-ggplot(bonemed,aes(x=weight,y=Fracture.Num,color=momfrac)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("") + labs(color=NULL) + 
  ggtitle("Mother Had Hip Fracture") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

C6<-ggplot(bonemed,aes(x=weight,y=Fracture.Num,color=armassist)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("") + labs(color=NULL) + 
  ggtitle("Arms Needed,Stand fm Chair") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

C7<-ggplot(bonemed,aes(x=weight,y=Fracture.Num,color=smoke)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("Any Fracture") + labs(color=NULL) + 
  ggtitle("Former or Current Smoker") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = c(1.2, 0.5),
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

grid.arrange(C1,C2,C3,C4,C5,C6,C7,ncol=3,nrow=3, top="Weight")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

# Plot height by factor variables
D1<-ggplot(bonemed,aes(x=height,y=Fracture.Num,color=bonetreat)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
  xlab("") + ylab("Any Fracture") + labs(color=NULL) + 
  ggtitle("Bone Meds:Enroll & F-Up") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

D2<-ggplot(bonemed,aes(x=height,y=Fracture.Num,color=bonemed)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
  xlab("") + ylab("") + labs(color=NULL) + 
  ggtitle("Bone Meds:Enrollment") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

D3<-ggplot(bonemed,aes(x=height,y=Fracture.Num,color=bonemed_fu)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
  xlab("") + ylab("") + labs(color=NULL) + 
  ggtitle("Bone Meds:Follow-Up") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

D4<-ggplot(bonemed,aes(x=height,y=Fracture.Num,color=priorfrac)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
  xlab("") + ylab("Any Fracture") + labs(color=NULL) + 
  ggtitle("History of Prior Fracture") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

D5<-ggplot(bonemed,aes(x=height,y=Fracture.Num,color=momfrac)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
  xlab("") + ylab("") + labs(color=NULL) + 
  ggtitle("Mother Had Hip Fracture") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

D6<-ggplot(bonemed,aes(x=height,y=Fracture.Num,color=armassist)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) +
  xlab("") + ylab("") + labs(color=NULL) + 
  ggtitle("Arms Needed,Stand fm Chair") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        legend.position = "none",
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

D7<-ggplot(bonemed,aes(x=height,y=Fracture.Num,color=smoke)) +
  geom_point(size=.5) + geom_smooth(method="loess",size=1,span=1) + 
  xlab("") + ylab("Any Fracture") + labs(color=NULL) + 
  ggtitle("Former or Current Smoker") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue")) +
  theme(plot.title = element_text(hjust = 0.1,size=10),
        legend.position = c(1.2, 0.5),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

grid.arrange(D1,D2,D3,D4,D5,D6,D7,ncol=3,nrow=3,top="Height")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

# Self reported risk of fracture
rr1<-ggplot(bonemed,aes(x=age,y=Fracture.Num,color=factor(raterisk))) +
  geom_point() + geom_smooth(method="loess",size=1,span=1)  +
  facet_wrap(~factor(raterisk)) + 
  xlab("Age") + ylab("Risk of Fracture") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue", "darkgreen")) +
  theme(plot.title = element_text(hjust = 0.5), 
        legend.position = "none",
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

rr2<-ggplot(bonemed,aes(x=bmi,y=Fracture.Num,color=factor(raterisk))) +
  geom_point() + geom_smooth(method="loess",size=1,span=1)  +
  facet_wrap(~factor(raterisk), ncol = 3) + 
  xlab("BMI") + ylab("Risk of Fracture") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue", "darkgreen")) +
  theme(plot.title = element_text(hjust = 0.5), 
        legend.position = "none",
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

rr3<-ggplot(bonemed,aes(x=weight,y=Fracture.Num,color=factor(raterisk))) +
  geom_point() + geom_smooth(method="loess",size=1,span=1)  +
  facet_wrap(~factor(raterisk)) + 
  xlab("Weight") + ylab("Risk of Fracture") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue", "darkgreen")) +
  theme(plot.title = element_text(hjust = 0.5), 
        legend.position = "none",
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

rr4<-ggplot(bonemed,aes(x=height,y=Fracture.Num,color=factor(raterisk))) +
  geom_point() + geom_smooth(method="loess",size=1,span=1)  +
  facet_wrap(~factor(raterisk)) + 
  xlab("Height") + ylab("Risk of Fracture") +
  theme_classic() +
  scale_color_manual(values=c("darkred", "darkblue", "darkgreen")) +
  theme(plot.title = element_text(hjust = 0.5), 
        legend.position = "none",
        axis.text.x = element_text(angle=45, vjust=1, hjust=1), 
        panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'))

grid.arrange(rr1,rr2,rr3,rr4,ncol=2,nrow=2,top="Self Reported Risk of Fracture")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#These variables shouldn't be included, identifiers, however need to verify

ggplot(bonemed,aes(x=site_id,y=Fracture.Num)) +
  geom_point() + geom_smooth(method="loess",size=1,span=1) +
  ggtitle("Site ID")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(bonemed,aes(x=phy_id,y=Fracture.Num)) +
  geom_point() + geom_smooth(method="loess",size=1,span=1) +
  ggtitle("Physician ID")
## `geom_smooth()` using formula = 'y ~ x'

ggpairs(bonemed,
        aes(colour=fracture),
        axisLabels = "none",
        columns = 1:3)

# install.packages("lemon")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lemon)
# Numbers/Proportion of Fractures
# Character variables for review
E1c<-ggplot(bonemed,aes(x=priorfrac,colour=fracture),) + 
  geom_bar(aes(fill=fracture)) + 
  xlab("") + ylab("Number of Fractures") +
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = "none")
  
E1 <-bonemed %>% 
  group_by(priorfrac,fracture) %>%
  summarise(cnt=n()) %>%
  mutate(perc=round(cnt/sum(cnt),4))%>%
  arrange(desc(perc))
## `summarise()` has grouped output by 'priorfrac'. You can override using the
## `.groups` argument.
E1p<-ggplot(E1[c(1,2),],aes(x=reorder(priorfrac,-perc),y=perc,colour=priorfrac))+
  geom_bar(aes(fill=priorfrac),show.legend=T,stat="identity")+
  ylab("Proportion of Having Fractures ") + xlab("") +
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = "none")

grid_arrange_shared_legend(E1c,E1p,ncol=2,nrow=1,top="Prior Fractures")

###
E2c<-ggplot(bonemed,aes(x=premeno,colour=fracture)) + 
  geom_bar(aes(fill=fracture)) + 
  xlab("") + ylab("Number of Fractures") +
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = "none")

E2 <-bonemed %>% 
  group_by(premeno,fracture) %>%
  summarise(cnt=n()) %>%
  mutate(perc=round(cnt/sum(cnt),4))%>%
  arrange(desc(perc))
## `summarise()` has grouped output by 'premeno'. You can override using the
## `.groups` argument.
E2p<-ggplot(E2[c(1,2),],aes(x=reorder(premeno,-perc),y=perc,colour=premeno))+
  geom_bar(aes(fill=premeno),show.legend=T,stat="identity")+
  ylab("Proportion of Having Fractures ") + xlab("") +
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = "none")

grid_arrange_shared_legend(E2c,E2p,ncol=2,nrow=1,top="Menopause Before Age 45")

###
E3c<-ggplot(bonemed,aes(x=armassist,colour=fracture)) + 
  geom_bar(aes(fill=fracture)) + 
  xlab("") + ylab("Number of Fractures") +
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = "none")

E3 <-bonemed %>% 
  group_by(armassist,fracture) %>%
  summarise(cnt=n()) %>%
  mutate(perc=round(cnt/sum(cnt),4))%>%
  arrange(desc(perc))
## `summarise()` has grouped output by 'armassist'. You can override using the
## `.groups` argument.
E3p<-ggplot(E3[c(1,2),],aes(x=reorder(armassist,-perc),y=perc,colour=armassist))+
  geom_bar(aes(fill=armassist),show.legend=T,stat="identity")+
  ylab("Proportion of Having Fractures ") + xlab("") +
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = "none")

grid_arrange_shared_legend(E3c,E3p,ncol=2,nrow=1,top="Arms Needed to Stand from Chair")

###
E4c<-ggplot(bonemed,aes(x=smoke,colour=fracture)) + 
  geom_bar(aes(fill=fracture)) + 
  xlab("") + ylab("Number of Fractures") +
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = "none")

E4 <-bonemed %>% 
  group_by(smoke,fracture) %>%
  summarise(cnt=n()) %>%
  mutate(perc=round(cnt/sum(cnt),4))%>%
  arrange(desc(perc))
## `summarise()` has grouped output by 'smoke'. You can override using the
## `.groups` argument.
E4p<-ggplot(E4[c(1,2),],aes(x=reorder(smoke,-perc),y=perc,colour=smoke))+
  geom_bar(aes(fill=smoke),show.legend=T,stat="identity")+
  ylab("Proportion of Having Fractures ") + xlab("") +
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = "none")

grid_arrange_shared_legend(E4c,E4p,ncol=2,nrow=1,top="Former or Current Smoker")

###
E6c<-ggplot(bonemed,aes(x=momfrac,colour=fracture)) + 
  geom_bar(aes(fill=fracture)) + 
  xlab("") + ylab("Number of Fractures") +
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = "none")

E6 <-bonemed %>% 
  group_by(momfrac,fracture) %>%
  summarise(cnt=n()) %>%
  mutate(perc=round(cnt/sum(cnt),4))%>%
  arrange(desc(perc))
## `summarise()` has grouped output by 'momfrac'. You can override using the
## `.groups` argument.
E6p<-ggplot(E6[c(1,2),],aes(x=reorder(momfrac,-perc),y=perc,colour=momfrac))+
  geom_bar(aes(fill=momfrac),show.legend=T,stat="identity")+
  ylab("Proportion of Having Fractures ") + xlab("") +
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = "none")

grid_arrange_shared_legend(E6c,E6p,ncol=2,nrow=1,top="Mother had Hip Fracture")

# install.packages("lemon")
library(lemon)
# Numbers/Proportion of Fractures
# Character variables for review
E1c<-ggplot(bonemed,aes(x=priorfrac,colour=fracture),) + 
  geom_bar(aes(fill=fracture)) + 
  xlab("") + ylab("Number of Fractures") +
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = "none")
  
E1 <-bonemed %>% 
  group_by(priorfrac,fracture) %>%
  summarise(cnt=n()) %>%
  mutate(perc=round(cnt/sum(cnt),4))%>%
  arrange(desc(perc))
## `summarise()` has grouped output by 'priorfrac'. You can override using the
## `.groups` argument.
E1p<-ggplot(E1[c(3,4),],aes(x=reorder(priorfrac,-perc),y=perc,colour=priorfrac))+
  geom_bar(aes(fill=priorfrac),show.legend=T,stat="identity")+
  ylab("Proportion of Having Fractures ") + xlab("") +
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = "none")

grid_arrange_shared_legend(E1c,E1p,ncol=2,nrow=1,top="Prior Fractures")

###
E2c<-ggplot(bonemed,aes(x=premeno,colour=fracture)) + 
  geom_bar(aes(fill=fracture)) + 
  xlab("") + ylab("Number of Fractures") +
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = "none")

E2 <-bonemed %>% 
  group_by(premeno,fracture) %>%
  summarise(cnt=n()) %>%
  mutate(perc=round(cnt/sum(cnt),4))%>%
  arrange(desc(perc))
## `summarise()` has grouped output by 'premeno'. You can override using the
## `.groups` argument.
E2p<-ggplot(E2[c(3,4),],aes(x=reorder(premeno,-perc),y=perc,colour=premeno))+
  geom_bar(aes(fill=premeno),show.legend=T,stat="identity")+
  ylab("Proportion of Having Fractures ") + xlab("") +
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = "none")

grid_arrange_shared_legend(E2c,E2p,ncol=2,nrow=1,top="Menopause Before Age 45")

###
E3c<-ggplot(bonemed,aes(x=armassist,colour=fracture)) + 
  geom_bar(aes(fill=fracture)) + 
  xlab("") + ylab("Number of Fractures") +
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = "none")

E3 <-bonemed %>% 
  group_by(armassist,fracture) %>%
  summarise(cnt=n()) %>%
  mutate(perc=round(cnt/sum(cnt),4))%>%
  arrange(desc(perc))
## `summarise()` has grouped output by 'armassist'. You can override using the
## `.groups` argument.
E3p<-ggplot(E3[c(3,4),],aes(x=reorder(armassist,-perc),y=perc,colour=armassist))+
  geom_bar(aes(fill=armassist),show.legend=T,stat="identity")+
  ylab("Proportion of Having Fractures ") + xlab("") +
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = "none")

grid_arrange_shared_legend(E3c,E3p,ncol=2,nrow=1,top="Arms Needed to Stand from Chair")

###
E4c<-ggplot(bonemed,aes(x=smoke,colour=fracture)) + 
  geom_bar(aes(fill=fracture)) + 
  xlab("") + ylab("Number of Fractures") +
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = "none")

E4 <-bonemed %>% 
  group_by(smoke,fracture) %>%
  summarise(cnt=n()) %>%
  mutate(perc=round(cnt/sum(cnt),4))%>%
  arrange(desc(perc))
## `summarise()` has grouped output by 'smoke'. You can override using the
## `.groups` argument.
E4p<-ggplot(E4[c(3,4),],aes(x=reorder(smoke,-perc),y=perc,colour=smoke))+
  geom_bar(aes(fill=smoke),show.legend=T,stat="identity")+
  ylab("Proportion of Having Fractures ") + xlab("") +
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = "none")

grid_arrange_shared_legend(E4c,E4p,ncol=2,nrow=1,top="Former or Current Smoker")

###
E6c<-ggplot(bonemed,aes(x=momfrac,colour=fracture)) + 
  geom_bar(aes(fill=fracture)) + 
  xlab("") + ylab("Number of Fractures") +
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = "none")

E6 <-bonemed %>% 
  group_by(momfrac,fracture) %>%
  summarise(cnt=n()) %>%
  mutate(perc=round(cnt/sum(cnt),4))%>%
  arrange(desc(perc))
## `summarise()` has grouped output by 'momfrac'. You can override using the
## `.groups` argument.
E6p<-ggplot(E6[c(3,4),],aes(x=reorder(momfrac,-perc),y=perc,colour=momfrac))+
  geom_bar(aes(fill=momfrac),show.legend=T,stat="identity")+
  ylab("Proportion of Having Fractures ") + xlab("") +
  scale_fill_manual(values=c('darkred','darkblue')) +
  scale_colour_manual(values=c('darkred','darkblue')) +
  theme_classic() +
  theme(panel.background = element_rect(fill = 'grey95', color = 'grey95'),
        strip.background = element_rect(fill = 'grey95', color = 'black'),
        legend.position = "none")

grid_arrange_shared_legend(E6c,E6p,ncol=2,nrow=1,top="Mother had Hip Fracture")

Objective 1 Multiple Logistic Model

#Split our Dataset

# install.packages("pROC")
library(caret)
## Loading required package: lattice
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
set.seed(1234)
trainIndex<-createDataPartition(bonemed$fracture,p=.70,list=F)

train <- bonemed[trainIndex,]
test <- bonemed[-trainIndex,]

#Model 1 simple additive model- no complexity added: simpleFit versus simpleFit2 (our model dropping bmi, fracscore, bonetreat) on training dataset

simpleFit <- glm(fracture~priorfrac + age + weight + height + bmi + premeno + momfrac + armassist + smoke + raterisk + fracscore+ bonemed + bonemed_fu + bonetreat, data=train,family="binomial")

simpleFit2 <- glm(fracture~priorfrac + age + weight + height + premeno + momfrac + armassist + smoke + raterisk + bonemed + bonemed_fu, data=train,family="binomial")

summary(simpleFit)
## 
## Call:
## glm(formula = fracture ~ priorfrac + age + weight + height + 
##     bmi + premeno + momfrac + armassist + smoke + raterisk + 
##     fracscore + bonemed + bonemed_fu + bonetreat, family = "binomial", 
##     data = train)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.13403  -0.72278  -0.52107   0.04504   2.23762  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)   
## (Intercept)     -12.882227  15.628326  -0.824  0.40978   
## priorfracYes      0.693187   0.464558   1.492  0.13566   
## age               0.037700   0.066489   0.567  0.57071   
## weight           -0.092030   0.105323  -0.874  0.38224   
## height            0.048330   0.098960   0.488  0.62529   
## bmi               0.258150   0.271151   0.952  0.34107   
## premenoYes        0.339882   0.358045   0.949  0.34248   
## momfracYes        1.092681   0.518016   2.109  0.03491 * 
## armassistYes      0.318889   0.728692   0.438  0.66166   
## smokeYes         -0.174829   0.676671  -0.258  0.79612   
## rateriskSame      0.161346   0.347174   0.465  0.64212   
## rateriskGreater   0.155500   0.387607   0.401  0.68829   
## fracscore        -0.006928   0.333286  -0.021  0.98342   
## bonemedYes        2.530725   1.160812   2.180  0.02925 * 
## bonemed_fuYes     1.559774   0.563121   2.770  0.00561 **
## bonetreatYes     -3.851705   1.287125  -2.992  0.00277 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 395.31  on 350  degrees of freedom
## Residual deviance: 340.16  on 335  degrees of freedom
## AIC: 372.16
## 
## Number of Fisher Scoring iterations: 4
summary(simpleFit2)
## 
## Call:
## glm(formula = fracture ~ priorfrac + age + weight + height + 
##     premeno + momfrac + armassist + smoke + raterisk + bonemed + 
##     bonemed_fu, family = "binomial", data = train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8304  -0.7594  -0.5449   0.1339   2.2374  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)   
## (Intercept)      1.530517   3.786358   0.404  0.68605   
## priorfracYes     0.777576   0.298661   2.604  0.00923 **
## age              0.041686   0.017522   2.379  0.01736 * 
## weight           0.008035   0.010098   0.796  0.42620   
## height          -0.042567   0.022434  -1.897  0.05777 . 
## premenoYes       0.300518   0.350431   0.858  0.39113   
## momfracYes       1.067368   0.380868   2.802  0.00507 **
## armassistYes     0.285040   0.314312   0.907  0.36448   
## smokeYes        -0.169793   0.559139  -0.304  0.76138   
## rateriskSame     0.104113   0.337918   0.308  0.75800   
## rateriskGreater  0.155840   0.378042   0.412  0.68017   
## bonemedYes      -0.291392   0.504787  -0.577  0.56377   
## bonemed_fuYes    0.600553   0.495126   1.213  0.22516   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 395.31  on 350  degrees of freedom
## Residual deviance: 353.20  on 338  degrees of freedom
## AIC: 379.2
## 
## Number of Fisher Scoring iterations: 4
pred <- predict(simpleFit,test,type = "response")
simpleROC <- roc(response = test$fracture, predictor = pred, levels = c("No","Yes"))
## Setting direction: controls < cases
pred2 <- predict(simpleFit2,test,type = "response")
simpleROC2 <- roc(response = test$fracture, predictor = pred2, levels = c("No","Yes"))
## Setting direction: controls < cases
plot(simpleROC)#,print.thres = "best")
plot(simpleROC2,print.thres = "best",col = "red", add = T)
legend("bottomright",
       legend=c("simpleFit", "simpleFit2"),
       col=c("black", "red"),
       lwd=4, cex =1, xpd = TRUE, horiz = FALSE)

threshPred <- ifelse(pred > 0.199, "Yes", "No")
confusionMatrix(table(threshPred,test$fracture))
## Confusion Matrix and Statistics
## 
##           
## threshPred No Yes
##        No  65   7
##        Yes 47  30
##                                           
##                Accuracy : 0.6376          
##                  95% CI : (0.5549, 0.7147)
##     No Information Rate : 0.7517          
##     P-Value [Acc > NIR] : 0.9993          
##                                           
##                   Kappa : 0.2872          
##                                           
##  Mcnemar's Test P-Value : 1.113e-07       
##                                           
##             Sensitivity : 0.5804          
##             Specificity : 0.8108          
##          Pos Pred Value : 0.9028          
##          Neg Pred Value : 0.3896          
##              Prevalence : 0.7517          
##          Detection Rate : 0.4362          
##    Detection Prevalence : 0.4832          
##       Balanced Accuracy : 0.6956          
##                                           
##        'Positive' Class : No              
## 
exp(coef(simpleFit))
##     (Intercept)    priorfracYes             age          weight          height 
##    2.542845e-06    2.000080e+00    1.038419e+00    9.120777e-01    1.049517e+00 
##             bmi      premenoYes      momfracYes    armassistYes        smokeYes 
##    1.294533e+00    1.404781e+00    2.982259e+00    1.375598e+00    8.396008e-01 
##    rateriskSame rateriskGreater       fracscore      bonemedYes   bonemed_fuYes 
##    1.175091e+00    1.168242e+00    9.930958e-01    1.256261e+01    4.757748e+00 
##    bonetreatYes 
##    2.124348e-02
exp(coef(simpleFit2))
##     (Intercept)    priorfracYes             age          weight          height 
##       4.6205662       2.1761905       1.0425673       1.0080673       0.9583258 
##      premenoYes      momfracYes    armassistYes        smokeYes    rateriskSame 
##       1.3505582       2.9077177       1.3298156       0.8438397       1.1097260 
## rateriskGreater      bonemedYes   bonemed_fuYes 
##       1.1686387       0.7472229       1.8231259
#unsure of interpretation
vif(simpleFit)
##                  GVIF Df GVIF^(1/(2*Df))
## priorfrac    2.636703  1        1.623793
## age         19.803720  1        4.450137
## weight     161.879275  1       12.723179
## height      22.109206  1        4.702043
## bmi        152.626428  1       12.354207
## premeno      1.141684  1        1.068496
## momfrac      2.011768  1        1.418368
## armassist    7.218523  1        2.686731
## smoke        1.486298  1        1.219138
## raterisk     1.370565  2        1.081994
## fracscore   36.742820  1        6.061586
## bonemed     15.523977  1        3.940048
## bonemed_fu   3.992355  1        1.998088
## bonetreat   18.535272  1        4.305261
#square the third column to get the VIF for interpretation
vif_values <- vif(simpleFit)[, 3]
# Square each value in the column
squared_vif <- vif_values^2
# Print or use the squared VIF values as needed
print(squared_vif)
##  priorfrac        age     weight     height        bmi    premeno    momfrac 
##   2.636703  19.803720 161.879275  22.109206 152.626428   1.141684   2.011768 
##  armassist      smoke   raterisk  fracscore    bonemed bonemed_fu  bonetreat 
##   7.218523   1.486298   1.170711  36.742820  15.523977   3.992355  18.535272
vif(simpleFit2)
##                GVIF Df GVIF^(1/(2*Df))
## priorfrac  1.161462  1        1.077711
## age        1.462038  1        1.209148
## weight     1.615203  1        1.270906
## height     1.199241  1        1.095099
## premeno    1.122091  1        1.059288
## momfrac    1.108781  1        1.052987
## armassist  1.413060  1        1.188722
## smoke      1.086412  1        1.042311
## raterisk   1.368492  2        1.081585
## bonemed    3.134160  1        1.770356
## bonemed_fu 3.239427  1        1.799841
#square the third column to get the VIF for interpretation
vif_values_2 <- vif(simpleFit2)[, 3]
# Square each value in the column
squared_vif_2 <- vif_values_2^2
# Print or use the squared VIF values as needed
print(squared_vif_2)
##  priorfrac        age     weight     height    premeno    momfrac  armassist 
##   1.161462   1.462038   1.615203   1.199241   1.122091   1.108781   1.413060 
##      smoke   raterisk    bonemed bonemed_fu 
##   1.086412   1.169825   3.134160   3.239427
exp(confint(simpleFit))
## Waiting for profiling to be done...
##                        2.5 %       97.5 %
## (Intercept)     6.681249e-20 3.927494e+07
## priorfracYes    8.086189e-01 5.025006e+00
## age             9.119311e-01 1.184459e+00
## weight          7.366907e-01 1.117140e+00
## height          8.652955e-01 1.278407e+00
## bmi             7.667621e-01 2.241066e+00
## premenoYes      6.845111e-01 2.805011e+00
## momfracYes      1.087559e+00 8.354658e+00
## armassistYes    3.302160e-01 5.807852e+00
## smokeYes        2.062785e-01 3.031842e+00
## rateriskSame    5.957434e-01 2.336448e+00
## rateriskGreater 5.447745e-01 2.504448e+00
## fracscore       5.142176e-01 1.907485e+00
## bonemedYes      1.719265e+00 2.578341e+02
## bonemed_fuYes   1.593514e+00 1.488796e+01
## bonetreatYes    8.934937e-04 2.074946e-01
exp(confint(simpleFit2))
## Waiting for profiling to be done...
##                       2.5 %      97.5 %
## (Intercept)     0.002778797 8275.372400
## priorfracYes    1.208986755    3.910684
## age             1.007538173    1.079431
## weight          0.988123408    1.028176
## height          0.916399839    1.001038
## premenoYes      0.668192139    2.657400
## momfracYes      1.369277740    6.139585
## armassistYes    0.715509295    2.460909
## smokeYes        0.254954482    2.371788
## rateriskSame    0.572380717    2.163469
## rateriskGreater 0.555804952    2.459731
## bonemedYes      0.274849345    2.015713
## bonemed_fuYes   0.684619458    4.839544
library(ResourceSelection) 
## ResourceSelection 0.3-6   2023-06-27
hoslem.test(simpleFit$y,fitted(simpleFit))
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  simpleFit$y, fitted(simpleFit)
## X-squared = 13.059, df = 8, p-value = 0.1098
library(ResourceSelection) 
hoslem.test(simpleFit2$y,fitted(simpleFit2))
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  simpleFit2$y, fitted(simpleFit2)
## X-squared = 5.4021, df = 8, p-value = 0.7139
simpleROC$auc
## Area under the curve: 0.7107
simpleROC2$auc
## Area under the curve: 0.7362

Effects Plots - in appendix of presentation slides

library(sjPlot)
library(sjmisc)
## Learn more about sjmisc with 'browseVignettes("sjmisc")'.
#effects plot 
plot_model(simpleFit2,type="pred",terms=c("age"))
## Data were 'prettified'. Consider using `terms="age [all]"` to get smooth
##   plots.

plot_model(simpleFit2,type="pred",terms=c("priorfrac"))

plot_model(simpleFit2,type="pred",terms=c("weight"))
## Data were 'prettified'. Consider using `terms="weight [all]"` to get
##   smooth plots.

plot_model(simpleFit2,type="pred",terms=c("height"))
## Data were 'prettified'. Consider using `terms="height [all]"` to get
##   smooth plots.

# plot_model(simpleFit2,type="pred",terms=c("bmi"))
plot_model(simpleFit2,type="pred",terms=c("premeno"))

plot_model(simpleFit2,type="pred",terms=c("momfrac"))

plot_model(simpleFit2,type="pred",terms=c("armassist"))

plot_model(simpleFit2,type="pred",terms=c("smoke"))

# plot_model(simpleFit2,type="pred",terms=c("raterisk"))
# plot_model(simpleFit2,type="pred",terms=c("fracscore"))
plot_model(simpleFit2,type="pred",terms=c("bonemed"))

plot_model(simpleFit2,type="pred",terms=c("bonemed_fu"))

# plot_model(simpleFit2,type="pred",terms=c("bonetreat"))

#Stepwise feature selection on simpleFit2.

# Using simpleFit we try stepwise feature selection on the full model
# GLMNET and Stepwise feature selection tools exist in caret. 
# These can be used to handle situations where the number of predictors is too large to handle manually.  
# Like with MLR, you can use these tools to determine candidate models, but it is up to you to decide if interactions should be included, transformations applied, etc. 

library(caret)

fitControl<-trainControl(method="repeatedcv",number=5,repeats=1,classProbs=TRUE, summaryFunction=mnLogLoss)
set.seed(1234)
#note CV and error metric are not really used here, but logLoss is reported for the final model.
Step.fit<-train(fracture~priorfrac + age + weight + height + premeno + momfrac + armassist + smoke + raterisk + bonemed + bonemed_fu,
          data=train,
          method="glmStepAIC",
          trControl=fitControl,
          metric="logLoss")
## Start:  AIC=311.84
## .outcome ~ priorfracYes + age + weight + height + premenoYes + 
##     momfracYes + armassistYes + smokeYes + rateriskSame + rateriskGreater + 
##     bonemedYes + bonemed_fuYes
## 
##                   Df Deviance    AIC
## - premenoYes       1   285.86 309.86
## - rateriskSame     1   285.99 309.99
## - smokeYes         1   286.00 310.00
## - rateriskGreater  1   286.10 310.10
## - weight           1   286.33 310.33
## - armassistYes     1   286.81 310.81
## - bonemedYes       1   286.85 310.85
## - age              1   287.57 311.57
## - bonemed_fuYes    1   287.84 311.84
## <none>                 285.84 311.84
## - height           1   289.03 313.03
## - priorfracYes     1   290.55 314.55
## - momfracYes       1   292.15 316.15
## 
## Step:  AIC=309.86
## .outcome ~ priorfracYes + age + weight + height + momfracYes + 
##     armassistYes + smokeYes + rateriskSame + rateriskGreater + 
##     bonemedYes + bonemed_fuYes
## 
##                   Df Deviance    AIC
## - smokeYes         1   286.01 308.01
## - rateriskSame     1   286.02 308.02
## - rateriskGreater  1   286.14 308.14
## - weight           1   286.35 308.35
## - armassistYes     1   286.87 308.87
## - bonemedYes       1   286.88 308.88
## - age              1   287.57 309.57
## - bonemed_fuYes    1   287.84 309.84
## <none>                 285.86 309.86
## - height           1   289.11 311.11
## - priorfracYes     1   290.61 312.61
## - momfracYes       1   292.18 314.18
## 
## Step:  AIC=308.01
## .outcome ~ priorfracYes + age + weight + height + momfracYes + 
##     armassistYes + rateriskSame + rateriskGreater + bonemedYes + 
##     bonemed_fuYes
## 
##                   Df Deviance    AIC
## - rateriskSame     1   286.16 306.16
## - rateriskGreater  1   286.28 306.28
## - weight           1   286.57 306.57
## - armassistYes     1   286.94 306.94
## - bonemedYes       1   287.07 307.07
## <none>                 286.01 308.01
## - age              1   288.09 308.09
## - bonemed_fuYes    1   288.21 308.21
## - height           1   289.23 309.23
## - priorfracYes     1   290.61 310.61
## - momfracYes       1   292.57 312.57
## 
## Step:  AIC=306.16
## .outcome ~ priorfracYes + age + weight + height + momfracYes + 
##     armassistYes + rateriskGreater + bonemedYes + bonemed_fuYes
## 
##                   Df Deviance    AIC
## - rateriskGreater  1   286.29 304.29
## - weight           1   286.76 304.76
## - bonemedYes       1   287.15 305.15
## - armassistYes     1   287.19 305.19
## - age              1   288.15 306.15
## <none>                 286.16 306.16
## - bonemed_fuYes    1   288.37 306.37
## - height           1   289.58 307.58
## - priorfracYes     1   290.76 308.76
## - momfracYes       1   293.00 311.00
## 
## Step:  AIC=304.29
## .outcome ~ priorfracYes + age + weight + height + momfracYes + 
##     armassistYes + bonemedYes + bonemed_fuYes
## 
##                 Df Deviance    AIC
## - weight         1   286.79 302.79
## - bonemedYes     1   287.39 303.39
## - armassistYes   1   287.48 303.48
## - age            1   288.17 304.17
## <none>               286.29 304.29
## - bonemed_fuYes  1   289.07 305.07
## - height         1   289.64 305.64
## - priorfracYes   1   291.36 307.36
## - momfracYes     1   293.40 309.40
## 
## Step:  AIC=302.79
## .outcome ~ priorfracYes + age + height + momfracYes + armassistYes + 
##     bonemedYes + bonemed_fuYes
## 
##                 Df Deviance    AIC
## - bonemedYes     1   288.26 302.26
## - age            1   288.27 302.27
## - armassistYes   1   288.71 302.71
## <none>               286.79 302.79
## - bonemed_fuYes  1   289.62 303.62
## - height         1   289.68 303.68
## - priorfracYes   1   292.03 306.03
## - momfracYes     1   293.58 307.58
## 
## Step:  AIC=302.26
## .outcome ~ priorfracYes + age + height + momfracYes + armassistYes + 
##     bonemed_fuYes
## 
##                 Df Deviance    AIC
## - age            1   289.44 301.44
## - bonemed_fuYes  1   289.64 301.64
## <none>               288.26 302.26
## - armassistYes   1   290.38 302.38
## - height         1   291.00 303.00
## - priorfracYes   1   293.38 305.38
## - momfracYes     1   295.48 307.48
## 
## Step:  AIC=301.44
## .outcome ~ priorfracYes + height + momfracYes + armassistYes + 
##     bonemed_fuYes
## 
##                 Df Deviance    AIC
## - bonemed_fuYes  1   291.02 301.02
## <none>               289.44 301.44
## - height         1   292.74 302.74
## - armassistYes   1   292.95 302.95
## - priorfracYes   1   295.87 305.87
## - momfracYes     1   296.70 306.70
## 
## Step:  AIC=301.02
## .outcome ~ priorfracYes + height + momfracYes + armassistYes
## 
##                Df Deviance    AIC
## <none>              291.02 301.02
## - armassistYes  1   294.61 302.61
## - height        1   295.26 303.26
## - priorfracYes  1   298.64 306.64
## - momfracYes    1   298.90 306.90
## Start:  AIC=301.65
## .outcome ~ priorfracYes + age + weight + height + premenoYes + 
##     momfracYes + armassistYes + smokeYes + rateriskSame + rateriskGreater + 
##     bonemedYes + bonemed_fuYes
## 
##                   Df Deviance    AIC
## - rateriskGreater  1   275.65 299.65
## - rateriskSame     1   275.73 299.73
## - bonemedYes       1   275.75 299.75
## - smokeYes         1   275.86 299.86
## - armassistYes     1   275.97 299.97
## - premenoYes       1   276.30 300.30
## - weight           1   277.00 301.00
## - bonemed_fuYes    1   277.47 301.47
## <none>                 275.65 301.65
## - momfracYes       1   278.91 302.91
## - age              1   280.76 304.76
## - height           1   281.53 305.53
## - priorfracYes     1   282.89 306.89
## 
## Step:  AIC=299.65
## .outcome ~ priorfracYes + age + weight + height + premenoYes + 
##     momfracYes + armassistYes + smokeYes + rateriskSame + bonemedYes + 
##     bonemed_fuYes
## 
##                 Df Deviance    AIC
## - rateriskSame   1   275.75 297.75
## - bonemedYes     1   275.75 297.75
## - smokeYes       1   275.86 297.86
## - armassistYes   1   276.00 298.00
## - premenoYes     1   276.31 298.31
## - weight         1   277.03 299.03
## <none>               275.65 299.65
## - bonemed_fuYes  1   277.65 299.65
## - momfracYes     1   278.94 300.94
## - age            1   280.84 302.84
## - height         1   281.54 303.54
## - priorfracYes   1   283.05 305.05
## 
## Step:  AIC=297.75
## .outcome ~ priorfracYes + age + weight + height + premenoYes + 
##     momfracYes + armassistYes + smokeYes + bonemedYes + bonemed_fuYes
## 
##                 Df Deviance    AIC
## - bonemedYes     1   275.81 295.81
## - smokeYes       1   275.95 295.95
## - armassistYes   1   276.08 296.08
## - premenoYes     1   276.45 296.45
## - weight         1   277.26 297.26
## - bonemed_fuYes  1   277.66 297.66
## <none>               275.75 297.75
## - momfracYes     1   279.30 299.30
## - age            1   280.95 300.95
## - height         1   281.89 301.89
## - priorfracYes   1   283.08 303.08
## 
## Step:  AIC=295.82
## .outcome ~ priorfracYes + age + weight + height + premenoYes + 
##     momfracYes + armassistYes + smokeYes + bonemed_fuYes
## 
##                 Df Deviance    AIC
## - smokeYes       1   276.01 294.01
## - armassistYes   1   276.15 294.15
## - premenoYes     1   276.54 294.54
## - weight         1   277.49 295.49
## <none>               275.81 295.81
## - bonemed_fuYes  1   279.05 297.05
## - momfracYes     1   279.50 297.50
## - age            1   280.95 298.95
## - height         1   282.01 300.01
## - priorfracYes   1   283.10 301.10
## 
## Step:  AIC=294.01
## .outcome ~ priorfracYes + age + weight + height + premenoYes + 
##     momfracYes + armassistYes + bonemed_fuYes
## 
##                 Df Deviance    AIC
## - armassistYes   1   276.45 292.45
## - premenoYes     1   276.77 292.77
## - weight         1   277.56 293.56
## <none>               276.01 294.01
## - bonemed_fuYes  1   279.08 295.08
## - momfracYes     1   279.58 295.58
## - age            1   280.95 296.95
## - height         1   282.07 298.07
## - priorfracYes   1   283.53 299.53
## 
## Step:  AIC=292.45
## .outcome ~ priorfracYes + age + weight + height + premenoYes + 
##     momfracYes + bonemed_fuYes
## 
##                 Df Deviance    AIC
## - premenoYes     1   277.45 291.45
## <none>               276.45 292.45
## - weight         1   279.08 293.08
## - bonemed_fuYes  1   279.65 293.65
## - momfracYes     1   279.95 293.95
## - height         1   282.72 296.72
## - age            1   283.57 297.57
## - priorfracYes   1   284.55 298.55
## 
## Step:  AIC=291.45
## .outcome ~ priorfracYes + age + weight + height + momfracYes + 
##     bonemed_fuYes
## 
##                 Df Deviance    AIC
## <none>               277.45 291.45
## - weight         1   280.17 292.17
## - bonemed_fuYes  1   280.21 292.21
## - momfracYes     1   280.93 292.93
## - age            1   283.82 295.82
## - height         1   284.11 296.11
## - priorfracYes   1   285.63 297.63
## Start:  AIC=303.23
## .outcome ~ priorfracYes + age + weight + height + premenoYes + 
##     momfracYes + armassistYes + smokeYes + rateriskSame + rateriskGreater + 
##     bonemedYes + bonemed_fuYes
## 
##                   Df Deviance    AIC
## - weight           1   277.24 301.24
## - smokeYes         1   277.26 301.26
## - bonemedYes       1   277.55 301.55
## - height           1   277.74 301.74
## - rateriskSame     1   278.00 302.00
## - bonemed_fuYes    1   278.58 302.58
## - rateriskGreater  1   279.13 303.13
## <none>                 277.23 303.23
## - armassistYes     1   279.56 303.56
## - age              1   279.72 303.72
## - premenoYes       1   280.77 304.77
## - momfracYes       1   281.68 305.68
## - priorfracYes     1   283.18 307.18
## 
## Step:  AIC=301.24
## .outcome ~ priorfracYes + age + height + premenoYes + momfracYes + 
##     armassistYes + smokeYes + rateriskSame + rateriskGreater + 
##     bonemedYes + bonemed_fuYes
## 
##                   Df Deviance    AIC
## - smokeYes         1   277.27 299.27
## - bonemedYes       1   277.55 299.55
## - height           1   277.81 299.81
## - rateriskSame     1   278.01 300.01
## - bonemed_fuYes    1   278.58 300.58
## - rateriskGreater  1   279.20 301.20
## <none>                 277.24 301.24
## - armassistYes     1   279.94 301.94
## - age              1   280.34 302.34
## - premenoYes       1   280.78 302.78
## - momfracYes       1   281.81 303.81
## - priorfracYes     1   283.21 305.21
## 
## Step:  AIC=299.27
## .outcome ~ priorfracYes + age + height + premenoYes + momfracYes + 
##     armassistYes + rateriskSame + rateriskGreater + bonemedYes + 
##     bonemed_fuYes
## 
##                   Df Deviance    AIC
## - bonemedYes       1   277.60 297.60
## - height           1   277.86 297.86
## - rateriskSame     1   278.03 298.03
## - bonemed_fuYes    1   278.67 298.67
## - rateriskGreater  1   279.21 299.21
## <none>                 277.27 299.27
## - armassistYes     1   279.94 299.94
## - age              1   280.42 300.42
## - premenoYes       1   280.78 300.78
## - momfracYes       1   281.93 301.93
## - priorfracYes     1   283.22 303.22
## 
## Step:  AIC=297.6
## .outcome ~ priorfracYes + age + height + premenoYes + momfracYes + 
##     armassistYes + rateriskSame + rateriskGreater + bonemed_fuYes
## 
##                   Df Deviance    AIC
## - height           1   278.19 296.19
## - rateriskSame     1   278.32 296.32
## - bonemed_fuYes    1   279.09 297.09
## <none>                 277.60 297.60
## - rateriskGreater  1   279.61 297.61
## - armassistYes     1   280.50 298.50
## - age              1   280.67 298.67
## - premenoYes       1   281.18 299.18
## - momfracYes       1   282.51 300.51
## - priorfracYes     1   283.26 301.26
## 
## Step:  AIC=296.19
## .outcome ~ priorfracYes + age + premenoYes + momfracYes + armassistYes + 
##     rateriskSame + rateriskGreater + bonemed_fuYes
## 
##                   Df Deviance    AIC
## - rateriskSame     1   279.09 295.09
## - bonemed_fuYes    1   280.06 296.06
## <none>                 278.19 296.19
## - rateriskGreater  1   280.48 296.48
## - armassistYes     1   280.82 296.82
## - premenoYes       1   282.05 298.05
## - age              1   282.15 298.15
## - momfracYes       1   282.70 298.70
## - priorfracYes     1   283.56 299.56
## 
## Step:  AIC=295.09
## .outcome ~ priorfracYes + age + premenoYes + momfracYes + armassistYes + 
##     rateriskGreater + bonemed_fuYes
## 
##                   Df Deviance    AIC
## - rateriskGreater  1   280.49 294.49
## <none>                 279.09 295.09
## - bonemed_fuYes    1   281.61 295.61
## - armassistYes     1   282.12 296.12
## - age              1   282.82 296.82
## - premenoYes       1   283.31 297.31
## - momfracYes       1   284.23 298.23
## - priorfracYes     1   284.47 298.47
## 
## Step:  AIC=294.49
## .outcome ~ priorfracYes + age + premenoYes + momfracYes + armassistYes + 
##     bonemed_fuYes
## 
##                 Df Deviance    AIC
## <none>               280.49 294.49
## - age            1   283.57 295.57
## - armassistYes   1   284.26 296.26
## - bonemed_fuYes  1   284.68 296.68
## - premenoYes     1   284.88 296.88
## - momfracYes     1   286.40 298.40
## - priorfracYes   1   286.49 298.49
## Start:  AIC=298.97
## .outcome ~ priorfracYes + age + weight + height + premenoYes + 
##     momfracYes + armassistYes + smokeYes + rateriskSame + rateriskGreater + 
##     bonemedYes + bonemed_fuYes
## 
##                   Df Deviance    AIC
## - rateriskGreater  1   272.98 296.98
## - armassistYes     1   273.00 297.00
## - premenoYes       1   273.03 297.03
## - rateriskSame     1   273.08 297.08
## - bonemedYes       1   273.30 297.30
## - smokeYes         1   273.57 297.57
## <none>                 272.97 298.97
## - weight           1   275.24 299.24
## - bonemed_fuYes    1   275.33 299.33
## - priorfracYes     1   278.87 302.87
## - height           1   279.04 303.04
## - age              1   281.54 305.54
## - momfracYes       1   282.02 306.02
## 
## Step:  AIC=296.98
## .outcome ~ priorfracYes + age + weight + height + premenoYes + 
##     momfracYes + armassistYes + smokeYes + rateriskSame + bonemedYes + 
##     bonemed_fuYes
## 
##                 Df Deviance    AIC
## - armassistYes   1   273.00 295.00
## - premenoYes     1   273.05 295.05
## - rateriskSame   1   273.09 295.09
## - bonemedYes     1   273.30 295.30
## - smokeYes       1   273.58 295.58
## <none>               272.98 296.98
## - weight         1   275.38 297.38
## - bonemed_fuYes  1   275.44 297.44
## - priorfracYes   1   278.93 300.93
## - height         1   279.04 301.04
## - age            1   282.09 304.09
## - momfracYes     1   282.15 304.15
## 
## Step:  AIC=295
## .outcome ~ priorfracYes + age + weight + height + premenoYes + 
##     momfracYes + smokeYes + rateriskSame + bonemedYes + bonemed_fuYes
## 
##                 Df Deviance    AIC
## - premenoYes     1   273.06 293.06
## - rateriskSame   1   273.12 293.12
## - bonemedYes     1   273.32 293.32
## - smokeYes       1   273.59 293.59
## <none>               273.00 295.00
## - bonemed_fuYes  1   275.50 295.50
## - weight         1   276.06 296.06
## - priorfracYes   1   279.00 299.00
## - height         1   279.08 299.08
## - momfracYes     1   282.15 302.15
## - age            1   284.00 304.00
## 
## Step:  AIC=293.06
## .outcome ~ priorfracYes + age + weight + height + momfracYes + 
##     smokeYes + rateriskSame + bonemedYes + bonemed_fuYes
## 
##                 Df Deviance    AIC
## - rateriskSame   1   273.18 291.18
## - bonemedYes     1   273.37 291.37
## - smokeYes       1   273.65 291.65
## <none>               273.06 293.06
## - bonemed_fuYes  1   275.57 293.57
## - weight         1   276.09 294.09
## - priorfracYes   1   279.00 297.00
## - height         1   279.08 297.08
## - momfracYes     1   282.18 300.18
## - age            1   284.60 302.60
## 
## Step:  AIC=291.18
## .outcome ~ priorfracYes + age + weight + height + momfracYes + 
##     smokeYes + bonemedYes + bonemed_fuYes
## 
##                 Df Deviance    AIC
## - bonemedYes     1   273.55 289.55
## - smokeYes       1   273.75 289.75
## <none>               273.18 291.18
## - bonemed_fuYes  1   275.80 291.80
## - weight         1   276.10 292.10
## - height         1   279.08 295.08
## - priorfracYes   1   279.27 295.27
## - momfracYes     1   282.20 298.20
## - age            1   284.63 300.63
## 
## Step:  AIC=289.55
## .outcome ~ priorfracYes + age + weight + height + momfracYes + 
##     smokeYes + bonemed_fuYes
## 
##                 Df Deviance    AIC
## - smokeYes       1   274.11 288.11
## <none>               273.55 289.55
## - bonemed_fuYes  1   276.94 290.94
## - weight         1   276.98 290.98
## - priorfracYes   1   279.50 293.50
## - height         1   279.60 293.60
## - momfracYes     1   282.56 296.56
## - age            1   284.65 298.65
## 
## Step:  AIC=288.11
## .outcome ~ priorfracYes + age + weight + height + momfracYes + 
##     bonemed_fuYes
## 
##                 Df Deviance    AIC
## <none>               274.11 288.11
## - weight         1   277.88 289.88
## - bonemed_fuYes  1   277.92 289.92
## - priorfracYes   1   279.74 291.74
## - height         1   280.47 292.47
## - momfracYes     1   283.45 295.45
## - age            1   285.80 297.80
## Start:  AIC=309.58
## .outcome ~ priorfracYes + age + weight + height + premenoYes + 
##     momfracYes + armassistYes + smokeYes + rateriskSame + rateriskGreater + 
##     bonemedYes + bonemed_fuYes
## 
##                   Df Deviance    AIC
## - bonemed_fuYes    1   283.58 307.58
## - weight           1   283.58 307.58
## - bonemedYes       1   283.60 307.60
## - rateriskGreater  1   283.65 307.65
## - rateriskSame     1   283.66 307.66
## - smokeYes         1   284.17 308.17
## - armassistYes     1   284.40 308.40
## - premenoYes       1   284.97 308.97
## - height           1   285.08 309.08
## <none>                 283.58 309.58
## - priorfracYes     1   287.94 311.94
## - momfracYes       1   290.71 314.71
## - age              1   290.78 314.78
## 
## Step:  AIC=307.58
## .outcome ~ priorfracYes + age + weight + height + premenoYes + 
##     momfracYes + armassistYes + smokeYes + rateriskSame + rateriskGreater + 
##     bonemedYes
## 
##                   Df Deviance    AIC
## - weight           1   283.58 305.58
## - rateriskGreater  1   283.65 305.65
## - rateriskSame     1   283.66 305.66
## - bonemedYes       1   283.67 305.67
## - smokeYes         1   284.17 306.17
## - armassistYes     1   284.41 306.41
## - premenoYes       1   284.99 306.99
## - height           1   285.08 307.08
## <none>                 283.58 307.58
## - priorfracYes     1   287.95 309.95
## - momfracYes       1   290.74 312.74
## - age              1   290.79 312.79
## 
## Step:  AIC=305.58
## .outcome ~ priorfracYes + age + height + premenoYes + momfracYes + 
##     armassistYes + smokeYes + rateriskSame + rateriskGreater + 
##     bonemedYes
## 
##                   Df Deviance    AIC
## - rateriskGreater  1   283.65 303.65
## - rateriskSame     1   283.66 303.66
## - bonemedYes       1   283.69 303.69
## - smokeYes         1   284.19 304.19
## - armassistYes     1   284.61 304.61
## - premenoYes       1   284.99 304.99
## - height           1   285.21 305.21
## <none>                 283.58 305.58
## - priorfracYes     1   287.98 307.98
## - momfracYes       1   290.90 310.90
## - age              1   291.62 311.62
## 
## Step:  AIC=303.66
## .outcome ~ priorfracYes + age + height + premenoYes + momfracYes + 
##     armassistYes + smokeYes + rateriskSame + bonemedYes
## 
##                Df Deviance    AIC
## - rateriskSame  1   283.68 301.68
## - bonemedYes    1   283.72 301.72
## - smokeYes      1   284.25 302.25
## - armassistYes  1   284.83 302.83
## - premenoYes    1   285.18 303.18
## - height        1   285.34 303.34
## <none>              283.65 303.65
## - priorfracYes  1   288.27 306.27
## - momfracYes    1   291.52 309.52
## - age           1   291.64 309.64
## 
## Step:  AIC=301.68
## .outcome ~ priorfracYes + age + height + premenoYes + momfracYes + 
##     armassistYes + smokeYes + bonemedYes
## 
##                Df Deviance    AIC
## - bonemedYes    1   283.74 299.74
## - smokeYes      1   284.27 300.27
## - armassistYes  1   284.88 300.88
## - premenoYes    1   285.23 301.23
## - height        1   285.41 301.41
## <none>              283.68 301.68
## - priorfracYes  1   288.30 304.30
## - age           1   291.66 307.66
## - momfracYes    1   291.73 307.73
## 
## Step:  AIC=299.74
## .outcome ~ priorfracYes + age + height + premenoYes + momfracYes + 
##     armassistYes + smokeYes
## 
##                Df Deviance    AIC
## - smokeYes      1   284.30 298.30
## - armassistYes  1   284.93 298.93
## - premenoYes    1   285.39 299.39
## - height        1   285.42 299.42
## <none>              283.74 299.74
## - priorfracYes  1   288.33 302.33
## - age           1   291.66 305.66
## - momfracYes    1   291.86 305.86
## 
## Step:  AIC=298.29
## .outcome ~ priorfracYes + age + height + premenoYes + momfracYes + 
##     armassistYes
## 
##                Df Deviance    AIC
## - armassistYes  1   285.35 297.35
## - premenoYes    1   285.81 297.81
## - height        1   286.03 298.03
## <none>              284.30 298.30
## - priorfracYes  1   288.61 300.61
## - momfracYes    1   292.86 304.86
## - age           1   292.93 304.93
## 
## Step:  AIC=297.35
## .outcome ~ priorfracYes + age + height + premenoYes + momfracYes
## 
##                Df Deviance    AIC
## - height        1   286.83 296.83
## - premenoYes    1   287.22 297.22
## <none>              285.35 297.35
## - priorfracYes  1   290.13 300.13
## - momfracYes    1   293.78 303.78
## - age           1   296.09 306.09
## 
## Step:  AIC=296.83
## .outcome ~ priorfracYes + age + premenoYes + momfracYes
## 
##                Df Deviance    AIC
## <none>              286.83 296.83
## - premenoYes    1   288.92 296.92
## - priorfracYes  1   291.99 299.99
## - momfracYes    1   294.54 302.54
## - age           1   298.01 306.01
## Start:  AIC=379.2
## .outcome ~ priorfracYes + age + weight + height + premenoYes + 
##     momfracYes + armassistYes + smokeYes + rateriskSame + rateriskGreater + 
##     bonemedYes + bonemed_fuYes
## 
##                   Df Deviance    AIC
## - smokeYes         1   353.29 377.29
## - rateriskSame     1   353.29 377.29
## - rateriskGreater  1   353.37 377.37
## - bonemedYes       1   353.53 377.53
## - weight           1   353.83 377.83
## - premenoYes       1   353.92 377.92
## - armassistYes     1   354.02 378.02
## - bonemed_fuYes    1   354.66 378.66
## <none>                 353.20 379.20
## - height           1   356.86 380.86
## - age              1   358.92 382.92
## - priorfracYes     1   359.89 383.89
## - momfracYes       1   360.81 384.81
## 
## Step:  AIC=377.29
## .outcome ~ priorfracYes + age + weight + height + premenoYes + 
##     momfracYes + armassistYes + rateriskSame + rateriskGreater + 
##     bonemedYes + bonemed_fuYes
## 
##                   Df Deviance    AIC
## - rateriskSame     1   353.38 375.38
## - rateriskGreater  1   353.45 375.45
## - bonemedYes       1   353.64 375.64
## - weight           1   353.99 375.99
## - premenoYes       1   353.99 375.99
## - armassistYes     1   354.05 376.05
## - bonemed_fuYes    1   354.87 376.87
## <none>                 353.29 377.29
## - height           1   357.01 379.01
## - age              1   359.37 381.37
## - priorfracYes     1   359.89 381.89
## - momfracYes       1   361.10 383.10
## 
## Step:  AIC=375.38
## .outcome ~ priorfracYes + age + weight + height + premenoYes + 
##     momfracYes + armassistYes + rateriskGreater + bonemedYes + 
##     bonemed_fuYes
## 
##                   Df Deviance    AIC
## - rateriskGreater  1   353.46 373.46
## - bonemedYes       1   353.70 373.70
## - weight           1   354.10 374.10
## - premenoYes       1   354.14 374.14
## - armassistYes     1   354.21 374.21
## - bonemed_fuYes    1   355.00 375.00
## <none>                 353.38 375.38
## - height           1   357.29 377.29
## - age              1   359.39 379.39
## - priorfracYes     1   359.98 379.98
## - momfracYes       1   361.63 381.63
## 
## Step:  AIC=373.46
## .outcome ~ priorfracYes + age + weight + height + premenoYes + 
##     momfracYes + armassistYes + bonemedYes + bonemed_fuYes
## 
##                 Df Deviance    AIC
## - bonemedYes     1   353.83 371.83
## - weight         1   354.11 372.11
## - premenoYes     1   354.25 372.25
## - armassistYes   1   354.42 372.42
## - bonemed_fuYes  1   355.43 373.43
## <none>               353.46 373.46
## - height         1   357.31 375.31
## - age            1   359.42 377.42
## - priorfracYes   1   360.46 378.46
## - momfracYes     1   361.83 379.83
## 
## Step:  AIC=371.83
## .outcome ~ priorfracYes + age + weight + height + premenoYes + 
##     momfracYes + armassistYes + bonemed_fuYes
## 
##                 Df Deviance    AIC
## - premenoYes     1   354.66 370.66
## - weight         1   354.68 370.68
## - armassistYes   1   354.79 370.79
## <none>               353.83 371.83
## - bonemed_fuYes  1   356.07 372.07
## - height         1   357.77 373.77
## - age            1   359.64 375.64
## - priorfracYes   1   360.64 376.64
## - momfracYes     1   362.55 378.55
## 
## Step:  AIC=370.66
## .outcome ~ priorfracYes + age + weight + height + momfracYes + 
##     armassistYes + bonemed_fuYes
## 
##                 Df Deviance    AIC
## - weight         1   355.47 369.47
## - armassistYes   1   355.91 369.91
## - bonemed_fuYes  1   356.57 370.57
## <none>               354.66 370.66
## - height         1   358.96 372.96
## - age            1   359.78 373.78
## - priorfracYes   1   361.70 375.70
## - momfracYes     1   363.46 377.46
## 
## Step:  AIC=369.47
## .outcome ~ priorfracYes + age + height + momfracYes + armassistYes + 
##     bonemed_fuYes
## 
##                 Df Deviance    AIC
## - bonemed_fuYes  1   356.99 368.99
## <none>               355.47 369.47
## - armassistYes   1   357.97 369.97
## - height         1   359.05 371.05
## - age            1   359.81 371.81
## - priorfracYes   1   362.66 374.66
## - momfracYes     1   363.77 375.77
## 
## Step:  AIC=368.99
## .outcome ~ priorfracYes + age + height + momfracYes + armassistYes
## 
##                Df Deviance    AIC
## <none>              356.99 368.99
## - armassistYes  1   359.46 369.46
## - height        1   361.39 371.39
## - age           1   361.74 371.74
## - priorfracYes  1   365.40 375.40
## - momfracYes    1   365.90 375.90
#GLMNET 
glmnet.fit<-train(fracture~priorfrac + age + weight + height + premeno + momfrac + armassist + smoke + raterisk + bonemed + bonemed_fu,
            data=train,
            method="glmnet",
            trControl=fitControl,
            metric="logLoss")

summary(Step.fit)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7903  -0.7273  -0.5617   0.1563   2.2008  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   3.03228    3.63670   0.834  0.40439   
## priorfracYes  0.84112    0.28755   2.925  0.00344 **
## age           0.03331    0.01532   2.175  0.02964 * 
## height       -0.04373    0.02102  -2.081  0.03745 * 
## momfracYes    1.11109    0.36538   3.041  0.00236 **
## armassistYes  0.43647    0.27643   1.579  0.11434   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 395.31  on 350  degrees of freedom
## Residual deviance: 356.99  on 345  degrees of freedom
## AIC: 368.99
## 
## Number of Fisher Scoring iterations: 4
stepfit.fit_coefficients <-coef(Step.fit$finalModel)
glmnet.fit_coefficients <-coef(glmnet.fit$finalModel,glmnet.fit$finalModel$lambdaOpt)
#Examine what predictors are included 
stepfit.fit_coefficients 
##  (Intercept) priorfracYes          age       height   momfracYes armassistYes 
##   3.03227756   0.84112254   0.03331054  -0.04373389   1.11108604   0.43647327
#for statistical interpretation use these coefficients. 
glmnet.fit_coefficients
## 13 x 1 sparse Matrix of class "dgCMatrix"
##                          s1
## (Intercept)      0.01156540
## priorfracYes     0.63664157
## age              0.02672410
## weight           .         
## height          -0.02115462
## premenoYes       .         
## momfracYes       0.71652505
## armassistYes     0.24017188
## smokeYes         .         
## rateriskSame     .         
## rateriskGreater  .         
## bonemedYes       .         
## bonemed_fuYes    0.20814097

#simpleFit2_stepwise_selection - see how it performs by re-creating the model with only the variables that stepwise selected in the above code block and then seeing how well it performs on the valdiation dataset

# install.packages("gtsummary")
library(dplyr)
library(caret)
library(pROC)
library(car)
set.seed(1234)
trainIndex<-createDataPartition(bonemed$fracture,p=.70,list=F)

train <- bonemed[trainIndex,]
test <- bonemed[-trainIndex,]

# our model -- interpret
simpleFit2 <- glm(fracture~priorfrac + age + weight + height + premeno + momfrac + armassist + smoke + raterisk + bonemed + bonemed_fu, data=train,family="binomial")
summary(simpleFit2)
## 
## Call:
## glm(formula = fracture ~ priorfrac + age + weight + height + 
##     premeno + momfrac + armassist + smoke + raterisk + bonemed + 
##     bonemed_fu, family = "binomial", data = train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8304  -0.7594  -0.5449   0.1339   2.2374  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)   
## (Intercept)      1.530517   3.786358   0.404  0.68605   
## priorfracYes     0.777576   0.298661   2.604  0.00923 **
## age              0.041686   0.017522   2.379  0.01736 * 
## weight           0.008035   0.010098   0.796  0.42620   
## height          -0.042567   0.022434  -1.897  0.05777 . 
## premenoYes       0.300518   0.350431   0.858  0.39113   
## momfracYes       1.067368   0.380868   2.802  0.00507 **
## armassistYes     0.285040   0.314312   0.907  0.36448   
## smokeYes        -0.169793   0.559139  -0.304  0.76138   
## rateriskSame     0.104113   0.337918   0.308  0.75800   
## rateriskGreater  0.155840   0.378042   0.412  0.68017   
## bonemedYes      -0.291392   0.504787  -0.577  0.56377   
## bonemed_fuYes    0.600553   0.495126   1.213  0.22516   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 395.31  on 350  degrees of freedom
## Residual deviance: 353.20  on 338  degrees of freedom
## AIC: 379.2
## 
## Number of Fisher Scoring iterations: 4
# based off our model after performing feature selection on it - interpret
simpleFit2_stepwise_selection<- glm(fracture~priorfrac + age + height + momfrac + armassist, data=train,family="binomial")
summary(simpleFit2_stepwise_selection)
## 
## Call:
## glm(formula = fracture ~ priorfrac + age + height + momfrac + 
##     armassist, family = "binomial", data = train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7903  -0.7273  -0.5617   0.1563   2.2008  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   3.03228    3.63670   0.834  0.40439   
## priorfracYes  0.84112    0.28755   2.925  0.00344 **
## age           0.03331    0.01532   2.175  0.02964 * 
## height       -0.04373    0.02102  -2.081  0.03745 * 
## momfracYes    1.11109    0.36538   3.041  0.00236 **
## armassistYes  0.43647    0.27643   1.579  0.11434   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 395.31  on 350  degrees of freedom
## Residual deviance: 356.99  on 345  degrees of freedom
## AIC: 368.99
## 
## Number of Fisher Scoring iterations: 4
pred <- predict(simpleFit2,test,type = "response")
simpleFit2_ROC <- roc(response = test$fracture, predictor = pred, levels = c("No","Yes"))
## Setting direction: controls < cases
pred_simpleFit2_stepwise_selection <- predict(simpleFit2_stepwise_selection,test,type = "response")
simpleFit2_stepwise_selection_ROC<- roc(response = test$fracture, predictor = pred_simpleFit2_stepwise_selection, levels = c("No","Yes"))
## Setting direction: controls < cases
plot(simpleFit2_ROC)#,print.thres = "best")
plot(simpleFit2_stepwise_selection_ROC,print.thres = "best",col = "red", add = T)
legend("bottomright",
       legend=c("simpleFit2", "simpleFit2_stepwise_selection"),
       col=c("black", "red"),
       lwd=4, cex =1, xpd = TRUE, horiz = FALSE)

threshPred <- ifelse(pred > 0.199, "Yes", "No")
confusionMatrix(table(threshPred,test$fracture))
## Confusion Matrix and Statistics
## 
##           
## threshPred No Yes
##        No  68   8
##        Yes 44  29
##                                           
##                Accuracy : 0.651           
##                  95% CI : (0.5687, 0.7272)
##     No Information Rate : 0.7517          
##     P-Value [Acc > NIR] : 0.9977          
##                                           
##                   Kappa : 0.2949          
##                                           
##  Mcnemar's Test P-Value : 1.212e-06       
##                                           
##             Sensitivity : 0.6071          
##             Specificity : 0.7838          
##          Pos Pred Value : 0.8947          
##          Neg Pred Value : 0.3973          
##              Prevalence : 0.7517          
##          Detection Rate : 0.4564          
##    Detection Prevalence : 0.5101          
##       Balanced Accuracy : 0.6955          
##                                           
##        'Positive' Class : No              
## 
exp(coef(simpleFit2))
##     (Intercept)    priorfracYes             age          weight          height 
##       4.6205662       2.1761905       1.0425673       1.0080673       0.9583258 
##      premenoYes      momfracYes    armassistYes        smokeYes    rateriskSame 
##       1.3505582       2.9077177       1.3298156       0.8438397       1.1097260 
## rateriskGreater      bonemedYes   bonemed_fuYes 
##       1.1686387       0.7472229       1.8231259
exp(coef(simpleFit2_stepwise_selection))
##  (Intercept) priorfracYes          age       height   momfracYes armassistYes 
##   20.7444255    2.3189686    1.0338715    0.9572086    3.0376556    1.5472409
exp(confint(simpleFit2))
## Waiting for profiling to be done...
##                       2.5 %      97.5 %
## (Intercept)     0.002778797 8275.372400
## priorfracYes    1.208986755    3.910684
## age             1.007538173    1.079431
## weight          0.988123408    1.028176
## height          0.916399839    1.001038
## premenoYes      0.668192139    2.657400
## momfracYes      1.369277740    6.139585
## armassistYes    0.715509295    2.460909
## smokeYes        0.254954482    2.371788
## rateriskSame    0.572380717    2.163469
## rateriskGreater 0.555804952    2.459731
## bonemedYes      0.274849345    2.015713
## bonemed_fuYes   0.684619458    4.839544
exp(confint(simpleFit2_stepwise_selection))
## Waiting for profiling to be done...
##                   2.5 %       97.5 %
## (Intercept)  0.01691048 2.787618e+04
## priorfracYes 1.31646496 4.075424e+00
## age          1.00337015 1.065647e+00
## height       0.91791949 9.971164e-01
## momfracYes   1.47466053 6.224112e+00
## armassistYes 0.89757148 2.659839e+00
library(ResourceSelection) 
hoslem.test(simpleFit2$y,fitted(simpleFit2))
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  simpleFit2$y, fitted(simpleFit2)
## X-squared = 5.4021, df = 8, p-value = 0.7139
library(ResourceSelection) 
hoslem.test(simpleFit2_stepwise_selection$y,fitted(simpleFit2_stepwise_selection))
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  simpleFit2_stepwise_selection$y, fitted(simpleFit2_stepwise_selection)
## X-squared = 12.857, df = 8, p-value = 0.1169
simpleFit2_ROC$auc
## Area under the curve: 0.7362
simpleFit2_stepwise_selection_ROC$auc
## Area under the curve: 0.7002
simpleFit2_ROC$auc
## Area under the curve: 0.7362
simpleFit2_stepwise_selection_ROC$auc
## Area under the curve: 0.7002
#lower VIF below 10 is better above means you have multicollinearity issue 
vif(simpleFit2)
##                GVIF Df GVIF^(1/(2*Df))
## priorfrac  1.161462  1        1.077711
## age        1.462038  1        1.209148
## weight     1.615203  1        1.270906
## height     1.199241  1        1.095099
## premeno    1.122091  1        1.059288
## momfrac    1.108781  1        1.052987
## armassist  1.413060  1        1.188722
## smoke      1.086412  1        1.042311
## raterisk   1.368492  2        1.081585
## bonemed    3.134160  1        1.770356
## bonemed_fu 3.239427  1        1.799841
# select the third column 
vif_values2 <- vif(simpleFit2)[, 3]
# Square each value in the column
squared_vif_2<- vif_values2^2
print(squared_vif_2)
##  priorfrac        age     weight     height    premeno    momfrac  armassist 
##   1.161462   1.462038   1.615203   1.199241   1.122091   1.108781   1.413060 
##      smoke   raterisk    bonemed bonemed_fu 
##   1.086412   1.169825   3.134160   3.239427
vif(simpleFit2_stepwise_selection)
## priorfrac       age    height   momfrac armassist 
##  1.089066  1.148113  1.049448  1.040631  1.104911
#lower AIC the better 
AIC(simpleFit)
## [1] 372.1604
AIC(simpleFit2)
## [1] 379.1978
AIC(simpleFit2_stepwise_selection)
## [1] 368.9868

#OBJECTIVE 2

#complex our best model based off of simpleFit2 plus adding interactions where we thought necessary #then run penalized regression (feature selection) on our best fit complex model

##penalized complex model, maybe need to adjust alpha/lamda for better performance?
fitControl<-trainControl(method="repeatedcv",number=10,repeats=1,classProbs=TRUE)
set.seed(1234)

logistic.fit<-train(fracture~priorfrac + age + weight + height + premeno + momfrac + armassist + smoke + raterisk + fracscore+ bonemed + bonemed_fu + age*raterisk + age*momfrac + age*armassist + age*smoke + age*raterisk + age*weight + weight*priorfrac +weight*premeno + weight*momfrac + weight*raterisk + weight*fracscore + weight*bonemed + weight*bonemed_fu,
               data=train,
               method="glmnet",
               trControl=fitControl,
               metric="logLoss")
## Warning in train.default(x, y, weights = w, ...): The metric "logLoss" was not
## in the result set. Accuracy will be used instead.
logistic.fit
## glmnet 
## 
## 351 samples
##  12 predictor
##   2 classes: 'No', 'Yes' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 1 times) 
## Summary of sample sizes: 316, 317, 315, 315, 316, 316, ... 
## Resampling results across tuning parameters:
## 
##   alpha  lambda        Accuracy   Kappa     
##   0.10   0.0002283267  0.7183754  0.04665266
##   0.10   0.0022832674  0.7380626  0.08985573
##   0.10   0.0228326736  0.7551261  0.14107354
##   0.55   0.0002283267  0.7212325  0.05136438
##   0.55   0.0022832674  0.7437768  0.11094007
##   0.55   0.0228326736  0.7466340  0.09074508
##   1.00   0.0002283267  0.7212325  0.05136438
##   1.00   0.0022832674  0.7466340  0.11841413
##   1.00   0.0228326736  0.7521055  0.07046812
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were alpha = 0.1 and lambda = 0.02283267.
coef(logistic.fit$finalModel,logistic.fit$finalModel$lambdaOpt)
## 28 x 1 sparse Matrix of class "dgCMatrix"
##                                   s1
## (Intercept)             2.913217e+00
## priorfracYes            9.481578e-02
## age                     1.846307e-02
## weight                 -9.482080e-04
## height                 -3.862941e-02
## premenoYes              2.160888e-01
## momfracYes              4.693081e-04
## armassistYes            6.862181e-02
## smokeYes               -1.744867e-01
## rateriskSame            9.698252e-02
## rateriskGreater        -1.636336e-02
## fracscore               3.511642e-02
## bonemedYes             -7.677833e-02
## bonemed_fuYes           .           
## age:rateriskSame        .           
## age:rateriskGreater    -1.208602e-03
## age:momfracYes          2.202190e-03
## age:armassistYes        .           
## age:smokeYes            .           
## age:weight              .           
## priorfracYes:weight     7.209026e-03
## weight:premenoYes       8.605912e-06
## weight:momfracYes       1.135259e-02
## weight:rateriskSame    -1.261643e-04
## weight:rateriskGreater  4.430291e-03
## weight:fracscore        9.839505e-04
## weight:bonemedYes       .           
## weight:bonemed_fuYes    6.023700e-03
preds <- predict(logistic.fit, test, type = "prob")
# logROC <- roc(response = test$fracture, predictor = preds$Yes)
logROC <- roc(response = test$fracture, predictor = preds$Yes)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
logROC$auc
## Area under the curve: 0.7478
plot(simpleFit2_ROC,print.thres = "best")

plot(logROC,print.thres = "best",col = "red", add = T)
legend("bottomright",
       legend=c("simpleFit2", "penalized_regression_fit_complex"),
       col=c("black", "red"),
       lwd=4, cex =1, xpd = TRUE, horiz = FALSE)

threshPred <- ifelse(pred > 0.199, "Yes", "No")
confusionMatrix(table(threshPred,test$fracture))
## Confusion Matrix and Statistics
## 
##           
## threshPred No Yes
##        No  68   8
##        Yes 44  29
##                                           
##                Accuracy : 0.651           
##                  95% CI : (0.5687, 0.7272)
##     No Information Rate : 0.7517          
##     P-Value [Acc > NIR] : 0.9977          
##                                           
##                   Kappa : 0.2949          
##                                           
##  Mcnemar's Test P-Value : 1.212e-06       
##                                           
##             Sensitivity : 0.6071          
##             Specificity : 0.7838          
##          Pos Pred Value : 0.8947          
##          Neg Pred Value : 0.3973          
##              Prevalence : 0.7517          
##          Detection Rate : 0.4564          
##    Detection Prevalence : 0.5101          
##       Balanced Accuracy : 0.6955          
##                                           
##        'Positive' Class : No              
## 
logROC$auc
## Area under the curve: 0.7478

LDA Model

lda.fit<-train(fracture~priorfrac + age + weight + height + premeno + momfrac + armassist + smoke + raterisk + fracscore + bonemed + bonemed_fu,
               data=train,
               method="lda",
               trControl=fitControl,
               metric="logLoss")
## Warning in train.default(x, y, weights = w, ...): The metric "logLoss" was not
## in the result set. Accuracy will be used instead.
lda.fit
## Linear Discriminant Analysis 
## 
## 351 samples
##  12 predictor
##   2 classes: 'No', 'Yes' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 1 times) 
## Summary of sample sizes: 316, 316, 316, 316, 316, 316, ... 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.7433707  0.1368824
#Computing predicted probabilities on the training data
predictLDA <- predict(lda.fit, test, type = "prob")[,"Yes"]

#Getting confusion matrix
ldaROC <- roc(response = test$fracture, predictor = predictLDA, levels = c("No", "Yes"))
## Setting direction: controls < cases
plot(ldaROC, print.thres = "best")

threshold=0.210
lda.preds<-ifelse(predictLDA>threshold,"Yes","No")
confusionMatrix(table(lda.preds, test$fracture))
## Confusion Matrix and Statistics
## 
##          
## lda.preds No Yes
##       No  74   8
##       Yes 38  29
##                                           
##                Accuracy : 0.6913          
##                  95% CI : (0.6105, 0.7643)
##     No Information Rate : 0.7517          
##     P-Value [Acc > NIR] : 0.9617          
##                                           
##                   Kappa : 0.3496          
##                                           
##  Mcnemar's Test P-Value : 1.904e-05       
##                                           
##             Sensitivity : 0.6607          
##             Specificity : 0.7838          
##          Pos Pred Value : 0.9024          
##          Neg Pred Value : 0.4328          
##              Prevalence : 0.7517          
##          Detection Rate : 0.4966          
##    Detection Prevalence : 0.5503          
##       Balanced Accuracy : 0.7222          
##                                           
##        'Positive' Class : No              
## 
ldaROC$auc
## Area under the curve: 0.7288
plot(simpleROC)
plot(ldaROC, add = T, col = "blue", print.thres = "best")

#knn model 
#knn model, not sure if can be improved, performance is pretty bad 
knn.fit <- train(fracture~priorfrac + age + weight + height + premeno + momfrac + armassist + smoke + raterisk + bonemed + bonemed_fu,
                 data = train,
                 method = "knn",
                 trControl = fitControl,
                 metric = "logLoss")
## Warning in train.default(x, y, weights = w, ...): The metric "logLoss" was not
## in the result set. Accuracy will be used instead.
knn.fit
## k-Nearest Neighbors 
## 
## 351 samples
##  11 predictor
##   2 classes: 'No', 'Yes' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 1 times) 
## Summary of sample sizes: 316, 316, 316, 315, 317, 316, ... 
## Resampling results across tuning parameters:
## 
##   k  Accuracy   Kappa    
##   5  0.7206536  0.1093987
##   7  0.7407470  0.1015481
##   9  0.7464706  0.1025063
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 9.
knn.preds <- predict(knn.fit, test, type = "prob")
knn.roc <- roc(response = test$fracture, predictor = knn.preds$Yes, levels = c("No","Yes"))
## Setting direction: controls < cases
plot(knn.roc, print.thres = "best")

knn.roc$auc
## Area under the curve: 0.5211
knn.preds.thresh <- ifelse(knn.preds$Yes > 0.211, "Yes", "No")
confusionMatrix(table(knn.preds.thresh, test$fracture))
## Confusion Matrix and Statistics
## 
##                 
## knn.preds.thresh No Yes
##              No  39  10
##              Yes 73  27
##                                           
##                Accuracy : 0.443           
##                  95% CI : (0.3617, 0.5265)
##     No Information Rate : 0.7517          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0496          
##                                           
##  Mcnemar's Test P-Value : 1.008e-11       
##                                           
##             Sensitivity : 0.3482          
##             Specificity : 0.7297          
##          Pos Pred Value : 0.7959          
##          Neg Pred Value : 0.2700          
##              Prevalence : 0.7517          
##          Detection Rate : 0.2617          
##    Detection Prevalence : 0.3289          
##       Balanced Accuracy : 0.5390          
##                                           
##        'Positive' Class : No              
## 
plot(simpleROC)
plot(knn.roc, add = T, col = "blue", print.thres = "best")

knn.roc$auc
## Area under the curve: 0.5211
plot(simpleFit2_ROC)
plot(logROC, add = T, col = "red")
plot(knn.roc, add = T, col = "blue")
plot(ldaROC, add = T, col = "green")
legend("bottomright",legend=c("simpleFit2","complexFit", "kNN", "LDA"),col=c("black","red", "blue", "green"),lwd=3,cex=1,xpd=T,horiz=F)