#Seth A. Berkowitz, MD MPH" #Code adapted from Kara Rudolph https://rss.onlinelibrary.wiley.com/doi/full/10.1111/rssb.12213# #Code cannot be used for commericial purposes and all use must attribute original authors# ## a variable needs to be named a and have values 0/1 ## site variable needs to be named 'site' and needs to have value 0 for the site where the outcome data is not used and value 1 for the site where the outcome data is used ## z variable needs to be named z and have values 0/1 ## y variable needs to be named y and have values 0/1 ## w variables in a dataframe named w and with names w1:wx ittatetmle<-function(a, z, y, site, w, aamodel, asitemodel, azmodel, aoutmodel, aq2model){ datw<-w n.dat<-nrow(datw) #calculate components of clever covariate cpa <- predict(glm(formula=aamodel, family="binomial", data=data.frame(cbind(datw, a=a))), newdata=datw, type="response") cps <- predict(glm(formula=asitemodel, data=data.frame(cbind(site=site, datw)), family="binomial"), type="response") zmodels0 <- glm(formula=azmodel, data=data.frame(cbind(a=a, z=z, site=site, datw)), subset=site==0, family="binomial") zmodels1 <- glm(formula=azmodel, data=data.frame(cbind(a=a, z=z, site=site, datw)),subset=site==1, family="binomial") data_new0<-data_new1<-datw data_new0$a<-0 data_new1$a<-1 dga1s0<-dbinom(z, 1,prob=predict(zmodels0, newdata=data_new1, type="response")) dga1s1<-dbinom(z, 1,prob=predict(zmodels1, newdata=data_new1, type="response")) dga0s0<-dbinom(z, 1,prob=predict(zmodels0, newdata=data_new0, type="response")) dga0s1<-dbinom(z, 1,prob=predict(zmodels1, newdata=data_new0, type="response")) #calculate clever covariate g0w<-(1-cpa)*(dga0s1/dga0s0)*(cps/(1-cps)) g1w<-cpa*(dga1s1/dga1s0)*(cps/(1-cps)) h0w<-((1-a)*I(site==1))/g0w h1w<-(a*I(site==1))/g1w ymodel<-glm(formula=aoutmodel,family="binomial", data=data.frame(cbind(datw, a=a,z=z, site=site, y=y)), subset=site==1) #initial prediciton q<-cbind(predict(ymodel, type="link", newdata=data.frame(cbind(datw, a=a,z=z))), predict(ymodel, type="link", newdata=data.frame(cbind(datw, a=0,z=z))), predict(ymodel, type="link", newdata=data.frame(cbind(datw, a=1,z=z)))) epsilon<-coef(glm(y ~ -1 + offset(q[,1]) + h0w + h1w , family="binomial", subset=site==1 )) #update initial prediction q1<- q + c((epsilon[1]*h0w + epsilon[2]*h1w), epsilon[1]/g0w, epsilon[2]/g1w) predmodela0<-suppressWarnings(glm(formula=paste("plogis(q1)", aq2model, sep="~"), data=data.frame(cbind(w, a=a,site=site, q1=q1[,2])), subset=site==0 & a==0 , family="binomial")) predmodela1<-suppressWarnings(glm(formula=paste("plogis(q1)", aq2model, sep="~"), data=data.frame(cbind(w,a=a, site=site, q1=q1[,3])), subset=site==0 & a==1 , family="binomial")) predmodelaa<-suppressWarnings(glm(formula=paste("plogis(q1) ~", aq2model, "+a", sep=""), data=data.frame(cbind(w, site=site, q1=q1[,1], a=a)), subset=site==0, family="binomial")) #get initial prediction for second regression model q2pred<-cbind(predict(predmodelaa, type="link", newdata=data.frame(cbind(datw, a=a))), predict(predmodela0, type="link", newdata=datw), predict(predmodela1, type="link", newdata=datw)) cz<-cbind(ifelse(a==0,I(site==0)/(1-cpa), I(site==0)/cpa), I(site==0)/(1-cpa), I(site==0)/cpa) epsilon2<-suppressWarnings(coef(glm(plogis(q1[,1]) ~ -1 + offset(q2pred[,1]) + cz[,2] + cz[,3], family="binomial", subset= site==0))) for(k in 1:2){ epsilon2[k]<-ifelse(is.na(epsilon2[k]), 0, epsilon2[k]) } q2<- q2pred + c((epsilon2[1]*cz[,2] + epsilon2[2]*cz[,3]), epsilon2[1]/(1-cpa), epsilon2[2]/cpa) tmleest<-mean(plogis(q2[,3][site==0]))-mean(plogis(q2[,2][site==0])) ps0<-mean(I(site==0)) eic<-(((h1w/ps0) - (h0w/ps0))*(y - plogis(q[,1]))) + (((a*cz[,3]/ps0) - ((1-a)*cz[,2]/ps0))* (plogis(q[,1]) - plogis(q2pred[,1]))) + ((I(site==0)/ps0)*((plogis(q2pred[,3]) - plogis(q2pred[,2])) - tmleest)) #seth additions tmle_treated <- mean(plogis(q2[,3] [site==0])) tmle_untreated <- mean(plogis(q2[,2] [site==0])) variance_rd <- var(eic)/n.dat rd_pvalue <- 2 * pnorm(-abs(tmleest/sqrt(variance_rd))) tmleest_95CI_rd <- c(tmleest - (1.96*(sqrt(variance_rd))), tmleest + (1.96*(sqrt(variance_rd)))) return(list("est"=tmleest, "var"=var(eic)/n.dat, "eic"=eic, "untreated"=tmle_untreated, "treated"=tmle_treated, "h1w"=h1w, "h0w"=h0w, "confint_rd"= tmleest_95CI_rd, "rd_pvalue"=rd_pvalue, "q2" = q2)) } library(sas7bdat) #set directory where your cleaned and formatted data is located# setwd ("H:TOPCAT/data/created data") #main model, PO dat <- read.sas7bdat("transport_s1.sas7bdat") dat1 <- dat[complete.cases(dat),] wmat <- dat1[, 9:22] names(wmat)[1:14] <- sprintf("w%d", 1:14) amodel<-"a ~ 1" sitemodel<-"site ~ w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + w11 + w12 + w13 + w14 " zmodel<-"z ~ 1" outmodel<-"y ~ z + w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + w11 + w12 + w13 + w14 " outmodelnoz<-"y ~ a + w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + w11 + w12 + w13 + w14 " q2model<-"w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + w11 + w12 + w13 + w14 " ittatetmletransport<-ittatetmle(a=dat1$treatment, z=dat1$instrument, y=dat1$po, site=dat1$site, w=wmat, aamodel=amodel, asitemodel=sitemodel, azmodel=zmodel, aoutmodel=outmodel, aq2model=q2model ) ittatetmletransport$est ittatetmletransport$var ittatetmletransport$treated ittatetmletransport$untreated ittatetmletransport$confint_rd ittatetmletransport$rd_pvalue #HL test q2 <- as.data.frame(ittatetmletransport$q2) dat1$expected <- plogis(q2$V1) dat2 <- subset(dat1, site==0) dat_treated <- subset(dat1, site==0 & treatment==1) dat_untreated <- subset(dat1, site==0 & treatment==0) dat_untransported <- subset(dat1, site==1) library(ResourceSelection) test <- hoslem.test(dat2$po, dat2$expected, g = 10) test #Plot library(givitiR) cb <- givitiCalibrationBelt(dat2$po, dat2$expected, "external") plot(cb, main = "TMLE calibration--Primary Outcome", xlab = "TMLE predicted risk of primary outcome", ylab = "Observed risk of primary outcome") #untreated library(ResourceSelection) test <- hoslem.test(dat_untreated$po, dat_untreated$expected, g = 10) test cb <- givitiCalibrationBelt(dat_untreated$po, dat_untreated$expected, "external") plot(cb, main = "TMLE calibration--Primary Outcome in Untreated", xlab = "TMLE predicted probability of mortality", ylab = "Observed risk of mortality") #treated library(ResourceSelection) test <- hoslem.test(dat_treated$po, dat_treated$expected, g = 10) test cb <- givitiCalibrationBelt(dat_treated$po, dat_treated$expected, "external") plot(cb, main = "TMLE calibration--Primary Outcome in Treated", xlab = "TMLE predicted probability of mortality", ylab = "Observed risk of mortality") #main model, Mortality dat <- read.sas7bdat("transport_s1.sas7bdat") dat1 <- dat[complete.cases(dat),] wmat <- dat1[, 9:22] names(wmat)[1:14] <- sprintf("w%d", 1:14) amodel<-"a ~ 1" sitemodel<-"site ~ w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + w11 + w12 + w13 + w14 " zmodel<-"z ~ 1" outmodel<-"y ~ z + w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + w11 + w12 + w13 + w14 " outmodelnoz<-"y ~ a + w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + w11 + w12 + w13 + w14 " q2model<-"w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + w11 + w12 + w13 + w14 " ittatetmletransport<-ittatetmle(a=dat1$treatment, z=dat1$instrument, y=dat1$died, site=dat1$site, w=wmat, aamodel=amodel, asitemodel=sitemodel, azmodel=zmodel, aoutmodel=outmodel, aq2model=q2model ) ittatetmletransport$est ittatetmletransport$var ittatetmletransport$treated ittatetmletransport$untreated ittatetmletransport$confint_rd ittatetmletransport$rd_pvalue #HL test q2 <- as.data.frame(ittatetmletransport$q2) dat1$expected <- plogis(q2$V1) dat2 <- subset(dat1, site==0) dat_treated <- subset(dat1, site==0 & treatment==1) dat_untreated <- subset(dat1, site==0 & treatment==0) dat_untransported <- subset(dat1, site==1) library(ResourceSelection) test <- hoslem.test(dat2$died, dat2$expected, g = 10) test #Plot library(givitiR) cb <- givitiCalibrationBelt(dat2$died, dat2$expected, "external") plot(cb, main = "TMLE calibration--Total Mortality", xlab = "TMLE predicted probability of mortality", ylab = "Observed risk of mortality") #untreated library(ResourceSelection) test <- hoslem.test(dat_untreated$died, dat_untreated$expected, g = 10) test cb <- givitiCalibrationBelt(dat_untreated$died, dat_untreated$expected, "external") plot(cb, main = "TMLE calibration--Total Mortality in Untreated", xlab = "TMLE predicted probability of mortality", ylab = "Observed risk of mortality") #treated library(ResourceSelection) test <- hoslem.test(dat_treated$died, dat_treated$expected, g = 10) test cb <- givitiCalibrationBelt(dat_treated$died, dat_treated$expected, "external") plot(cb, main = "TMLE calibration--Total Mortality in Treated", xlab = "TMLE predicted probability of mortality", ylab = "Observed risk of mortality")