library(simstudy) library(tidyverse) library(epitools) library(compareGroups) library(formattable) library(ggpubr) library(faraway) ############## Cas 1 remove(def) def <- defData(varname = "nr", dist = "nonrandom", formula = 10, id = "idnum") def <- defData(def, varname = "Group", formula = "0.3;0.2;0.5", dist = "categorical") def <- defData(def, varname = "Sex", formula = "0.5;0.5", dist = "categorical") def <- defData(def, varname = "Biomarker", formula = "10 + Group * 2 - Sex*3", variance = 4,dist='normal') def <- defData(def,varname='Status',formula='log(3)*Sex+log(2)*Group+log(1.2)*Biomarker',dist='binary',link='logit') def dt <- genData(10000, def) head(dt) dt$Sex <- factor(dt$Sex,labels=c('Male','Female')) dt$Group <- factor(dt$Group,labels=c('Control','A','B')) dt$Status <- factor(dt$Status,labels=c('Death','Alive')) dt<-dt %>% dplyr::select(Group,Sex,Biomarker,Status) saveRDS(dt,'RLogistica(1).R') ggplot(dt,aes(x=Sex,y=Biomarker,fill=Group))+geom_boxplot() + facet_wrap(.~Status) ggplot(dt,aes(x=Sex,y=Biomarker,fill=Status))+geom_boxplot() dt %>% group_by(Group,Sex,Status) %>% summarise(m=mean(Biomarker),sd=sd(Biomarker),n=n()) compareGroups(Status~.,data=dt) %>% createTable() res <- compareGroups(Sex~.-Status,data=dt) %>% createTable() strataTable(res,strata='Status') res <- compareGroups(Sex~.-Group,data=dt) %>% createTable(show.ratio = T,show.p.overall = F,type=1) strataTable(res,strata='Group') res <- glm(Status~Sex+Group+Biomarker,family='binomial',data=dt) summary(res) exp(confint(res)) ############################# remode(def) def <- defData(varname = "Sex", dist = "binary", formula = .5 , id="cid") def <- defData(def, varname = "Age", dist = "binary", formula = "-1.7 + .8*Sex", link="logit") def <- defData(def, varname = "baseDBP", dist = "normal", formula = 70, variance = 40) dtstudy <- genData(330, def) study1 <- trtAssign(dtstudy, n = 3, balanced = TRUE, strata = c("Sex", "Age"), grpName = "Group") study1$Sex <- factor(study1$Sex,labels=c('Male','Female')) study1$Age <- factor(study1$Sex,labels=c('Under65','Over65')) study1$baseDBP <- round(study1$baseDBP,1) study1$Group <- factor(study1$Group,labels=c('Control','A','B')) study1 %>% dplyr::select(-cid) formula1 <- c("-2 + 2*Sex - .5*Age", "-1 + 2*Sex + .5*Age") dtExp <- trtObserve(dtstudy, formulas = formula1, logit.link = TRUE, grpName = "exposure") dtstudy ############################# ANOVA 2F Exemple correcte effecte del grup i interacció amb sex ###################### remove(def) set.seed(54332) def <- defData(varname = "Sex", dist = "binary", formula = .5 , id="cid") def <- defData(def, varname = "Age", dist = "normal", formula = "50 + 0.5*(Sex+1)", variance=3) def <- defData(def,varname='Group',formula='1/3;1/3;1/3',dist='categorical') def <- defData(def,varname='Biomarker',formula='10+.5*Group-0.5*Sex+.3*Sex*Group+.02*Age', dist='normal',variance=3) df <- genData(300, def) df$Age <- round(df$Age,1) df$Biomarker <- round(df$Biomarker,1) df$Sex <- factor(df$Sex,labels=c('Male','Female')) df$Group <- factor(df$Group,labels=c('A','B','C')) df %>% head() ggplot(df,aes(x=Group,y=Biomarker,fill=Sex))+geom_boxplot() ggplot(df,aes(x=Age,y=Biomarker,group=Sex))+ geom_point(size=3,aes(color=Sex))+ geom_smooth(method = lm,se=F,aes(color=Sex)) res <- lm(Biomarker~Sex*Group,data=df) summary(res) anova(res) TukeyHSD(aov(res),'Group') %>% plot() confint(res) res <- lm(Biomarker~Sex*Group+Age,data=df) summary(res) anova(res) TukeyHSD(aov(res),'Group') %>% plot() confint(res) res <- df %>% group_by(Sex,Group) %>% summarise(m=mean(Biomarker), low=t.test(Biomarker)$conf.int[1], upper=t.test(Biomarker)$conf.int[2]) ggplot(res,aes(x=Group,y=m))+ geom_errorbar(aes(x=Group,ymin=low,ymax=upper,width=.1),color='blue',size=1)+ geom_point(size=4,shape=21,aes(fill=Sex))+ geom_line(aes(x=Group,y=m,group=Sex,color=Sex),size=1)+ ylab('95% CI of Mean') ############################ control y 2 tractaments ########################### simuC2T <- function(n=60,m=50,ef1=c(0,0),ef2=c(0,0),ef3=c(0,0),var=3) { df <- defData(varname='mu',dist='nonrandom',formula=m,id='cid') df <- defData(df,varname='e11',dist='nonrandom',formula=m+ef1[1],id='ef11') df <- defData(df,varname='e12',dist='nonrandom',formula=m+ef1[2],id='ef12') df <- defData(df,varname='e21',dist='nonrandom',formula=m+ef2[1],id='ef21') df <- defData(df,varname='e22',dist='nonrandom',formula=m+ef2[2],id='ef22') df <- defData(df,varname='e31',dist='nonrandom',formula=m+ef3[1],id='ef31') df <- defData(df,varname='e32',dist='nonrandom',formula=m+ef3[2],id='ef32') df <- defData(df,varname='group',formula='1/3;1/3;1/3',dist='categorical') df <- defData(df,varname='sex',formula='1/2;1/2',dist='categorical') dt<- genData(n,df) defC <- defCondition(condition = "group==1 & sex==1", formula = 'mu+e11',variance=var,dist='normal') defC <- defCondition(defC,condition = "group==1 & sex==2", formula = 'mu+e12',variance=var,dist='normal') defC <- defCondition(defC,condition = "group==2 & sex==1", formula = 'mu+e21',variance=var,dist='normal') defC <- defCondition(defC,condition = "group==2 & sex==2", formula = 'mu+e22',variance=var,dist='normal') defC <- defCondition(defC,condition = "group==3 & sex==1", formula = 'mu+e31',variance=var,dist='normal') defC <- defCondition(defC,condition = "group==3 & sex==2", formula = 'mu+e32',variance=var,dist='normal') dt <- addCondition(defC, dt, "biomarker") dt$biomarker <- round(dt$biomarker,1) dt <- dt %>% dplyr::select(-c(cid,mu)) dt <- dt %>% arrange(group) %>% dplyr::select(group,biomarker,sex) dt$group <- factor(dt$group,labels=c('Control','Treatment 1','Treatment 2')) dt$sex <- factor(dt$sex,labels=c('Male','Female')) res <- dt %>% group_by(sex,group) %>% summarise(m=mean(biomarker), low=t.test(biomarker)$conf.int[1], upper=t.test(biomarker)$conf.int[2]) list( plot = ggplot(res,aes(x=group,y=m))+ geom_errorbar(aes(x=group,ymin=low,ymax=upper,width=.1),color='blue',size=1)+ geom_point(size=4,shape=21,aes(fill=sex))+ geom_line(aes(x=group,y=m,group=sex,color=sex),size=1)+ ylab('95% CI of Mean'), data = dt) } simu <- simuC2T(ef2=c(3,7)) df <- simu$data df PlotCI(df,biomarker,f1=group,f2=sex) PlotCI(df,biomarker,sex,group) res <- lm(biomarker~sex*group,data=df) summary(res) t<-compareGroups(group~biomarker,data=df) tt<-createTable(t) strataTable(tt,strata='sex') ########################################### Function PlotCI ####################### PlotCI <- function(df,xx,f1,f2) { dt <- data.frame(val=eval(substitute(xx),df), a=eval(substitute(f1),df), b=eval(substitute(f2),df)) res <- dt %>% group_by(a,b) %>% summarise(m=mean(val), low=t.test(val)$conf.int[1], upper=t.test(val)$conf.int[2]) ggplot(res,aes(x=a,y=m))+ geom_errorbar(aes(x=a,ymin=low,ymax=upper,width=.1),color='blue',size=1)+ geom_point(size=4,shape=21,aes(fill=b))+ geom_line(aes(x=a,y=m,group=b,color=b),size=1)+ ylab('95% CI of Mean')+ xlab('')+theme(legend.title=element_blank()) } head(df) p1=PlotCI(df,biomarker,f1=group,f2=sex) p2=PlotCI(df,biomarker,sex,group) ggarrange(p1,p2,ncol=2) ####################### Comparacion de recta de regression set.seed(2011) n <- 100 var <- 3 df <- defData(varname='mu',dist='nonrandom',formula=m,id='cid') df <- defData(df,varname='a',formula=-10,dist='nonrandom') df <- defData(df,varname='b',formula=0.5,dist='nonrandom') df <- defData(df,varname='group',formula='1/3;1/3;1/3',dist='categorical') df <- defData(df,varname='sex',formula='1/2;1/2',dist='categorical') df <- defData(df,varname='age',formula='50',var=5,dist='normal') dt<- genData(n,df) dt$age <- round(dt$age,1) dt defC <- defCondition(condition = "group==1 & sex==1", formula = 'a + b*age',variance=var,dist='normal') defC <- defCondition(defC,condition = "group==1 & sex==2", formula = 'a + 0.7*age',variance=var,dist='normal') defC <- defCondition(defC,condition = "group==2 & sex==1", formula = 'a + 0.3*age',variance=var,dist='normal') defC <- defCondition(defC,condition = "group==2 & sex==2", formula = 'a + 0.7*age',variance=var,dist='normal') defC <- defCondition(defC,condition = "group==3 & sex==1", formula = 'a + 0.3*age',variance=var,dist='normal') defC <- defCondition(defC,condition = "group==3 & sex==2", formula = 'a + 0.7*age',variance=var,dist='normal') dt <- addCondition(defC, dt, "biomarker") dt$biomarker <- dt$biomarker %>% round(1) dt <- dt %>% dplyr::select(-c(cid,a,b,mu)) dt$sex <- factor(dt$sex,labels=c('Male','Female')) dt$group <- factor(dt$group,labels=c('A','B','C')) dt saveRDS(dt,'SeminarioReg (3).R') dt <- readRDS('SeminarioReg (3).R') ggplot(dt,aes(x=age,y=biomarker,group=sex))+ geom_point(aes(color=sex,shape=sex),size=3)+ facet_wrap(.~group) ggplot(dt,aes(x=age,y=biomarker,group=sex))+ geom_point(aes(color=sex,shape=sex),size=3)+ geom_smooth(aes(color=sex),method=lm,se=F)+ facet_wrap(.~group) dd <- dt %>% group_by(sex,group) %>% summarise(m=mean(biomarker), low=t.test(biomarker)$conf.int[1], upper=t.test(biomarker)$conf.int[2]) ggplot(dd,aes(x=group,y=m))+ geom_errorbar(aes(x=group,ymin=low,ymax=upper,width=.1),color='blue',size=1)+ geom_point(size=4,shape=21,aes(fill=sex))+ geom_line(aes(x=group,y=m,group=sex,color=sex),size=1)+ ylab('95% CI of Mean') m.1 <- lm(biomarker~sex*group*age,data=dt) summary(m.1) m.2 <- lm(biomarker~sex*group+age,data=dt) summary(m.2) m.3 <- lm(biomarker~sex+group+age,data=dt) summary(m.3) m.4 <- lm(biomarker~sex+age,data=dt) summary(m.4) m.4.1 <- lm(biomarker~sex*age,data=dt) summary(m.4.1) anova(m.4,m.4.1,m.3,m.2,m.1) ggplot(dt,aes(x=age,y=biomarker))+ geom_point(aes(color=group,shape=sex),size=3)+ geom_abline(slope=0.65,intercept=-17.67,color='red')+ geom_abline(slope=0.65,intercept=-17.67+10.12,color='turquoise') ggplot(dt,aes(x=age,y=biomarker,group=sex))+ geom_point(aes(color=sex,shape=sex),size=3)+ geom_smooth(aes(color=sex),method=lm,se=F)+ facet_wrap(.~group) ################### library(car) data(Robey) Robey[1:20,] ggplot(Robey,aes(x=contraceptors, y=tfr, shape=region))+ geom_point(aes(color=region),size=5)+ geom_smooth(method=lm,se=T)+ facet_wrap(.~region) res.1 <- lm(tfr~contraceptors,Robey) summary(res.1) anova(res.1) #################### Animals data("Animals") ggplot(Animals,aes(x=log(body),y=log(brain)))+ geom_point(size=3) subset(Animals,body>5000) df <- Animals %>% filter(body<5000) p0 <- ggplot(df,aes(x=body,y=brain))+ geom_point(size=3) p1 <- ggplot(df,aes(x=log(body),y=log(brain)))+ geom_point(size=3) ggarrange(p0,p1) model <- lm(log(brain)~log(body),data=df) summary(model) ggplot(df,aes(x=log(body),y=log(brain)))+ geom_point(size=3)+ geom_smooth(method=lm)