#Copyright Seth A. Berkowitz MD MPH. #Code cannot be used for commericial purposes and all use must attribute original authors# library(nearfar) library(sas7bdat) library(MASS) setwd("H:/Community Servings/APCD Project/Data") data <- read.sas7bdat("analysis_hdps_matched.sas7bdat") names(data) data1 <- data[complete.cases(data),] names(matched_sample) covs <- c("age", "disability", "race_eth", "insurance", "english", "female", "index_date", "ip", "snf", "hha", "te", "pre_total_costs", "combinedscore", "hiv", "cancer", "renal", "diabetes", "chf", "medicare_spending", "pct_poverty") set.seed(12345) matched <- opt_nearfar(dta=data1, trt="cs", covs=covs, trt.type="bin", iv="distance15", imp.var=NA, tol.var=NA, adjust.IV=TRUE, max.time.seconds=2700) summary(matched) data1$ID <- seq.int(nrow(data1)) results <- matched$match; indices1 <- as.data.frame(results[,1]) colnames(indices1) <- "index" indices2 <- as.data.frame(results[,2]) colnames(indices2) <- "index" matches1 <- rbind(indices1, indices2) match_indices <- as.list(matches1$index) matched_sample <- data1[data1$ID %in% match_indices, ] indices1 <- as.list(indices1$index) matched_sample$encouraged <- ifelse(matched_sample$ID %in% indices1, 1,0) write.csv(matched_sample, "match2_results.csv") matched_sample <- read.csv("match2_results.csv") matched_sample$disability <- as.factor(matched_sample$disability) matched_sample$female <- as.factor(matched_sample$female) matched_sample$race_eth <- as.factor(matched_sample$race_eth) matched_sample$insurance <- as.factor(matched_sample$insurance) matched_sample$post_fu_mo <- (matched_sample$post_fu/30) matched_sample$post_fu_yrs <- (matched_sample$post_fu/365.25) sum(matched_sample$post_fu_yrs) #total cost analyses # first stage first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=matched_sample, family=binomial(link=logit), maxit=100) res <- residuals(first) # xuhat # TSRI second <- glm(post_avg_monthly_costs_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=matched_sample, family=Gamma(link=log)) summary(second) data_a0 <- matched_sample data_a0$cs <- 0 data_a1 <- matched_sample data_a1$cs <- 1 pred_a0 <- predict(second, newdata=data_a0, type='response') pred_a1 <- (predict(second, newdata=data_a1, type='response') +350) est_a1 <- mean(pred_a1) est_a0 <- mean(pred_a0) psi_tc <- mean(pred_a1) - mean(pred_a0) psiRR_tc <- mean(pred_a1) / mean(pred_a0) psi_tc psiRR_tc est_a1 est_a0 #ci for total cost #rd library(boot) set.seed(999) rd <- function(data, indices) { d <- data[indices,] # allows boot to select sample # first stage first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=d, family=binomial(link=logit), maxit=1000) res <- residuals(first) # xuhat # TSRI second <- glm(post_avg_monthly_costs_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=d, family=Gamma(link=log), maxit=1000) data_a0 <- d data_a0$cs <- 0 data_a1 <- d data_a1$cs <- 1 pred_a0 <- predict(second, newdata=data_a0, type='response') pred_a1 <- (predict(second, newdata=data_a1, type='response') +350) psi <- mean(pred_a1) - mean(pred_a0) return(psi) } # bootstrapping with 1000 replications results <- boot(data=matched_sample, statistic=rd, R=1000) # view results results # get 95% confidence interval se <- sd(results$t) ub <- results$t0 + 1.96*(se) lb <- results$t0 - 1.96*(se) ci_wald_rd_tc <- c(lb, ub) ci_wald_rd_tc #rr library(boot) set.seed(999) rd <- function(data, indices) { d <- data[indices,] # allows boot to select sample # first stage first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=d, family=binomial(link=logit), maxit=1000) res <- residuals(first) # xuhat # TSRI second <- glm(post_avg_monthly_costs_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=d, family=Gamma(link=log), maxit=1000) data_a0 <- d data_a0$cs <- 0 data_a1 <- d data_a1$cs <- 1 pred_a0 <- predict(second, newdata=data_a0, type='response') pred_a1 <- (predict(second, newdata=data_a1, type='response') +350) psi <- mean(pred_a1) / mean(pred_a0) return(psi) } # bootstrapping with 1000 replications results <- boot(data=matched_sample, statistic=rd, R=1000) # view results results # get 95% confidence interval se <- sd(results$t) ub <- results$t0 + 1.96*(se) lb <- results$t0 - 1.96*(se) ci_wald_rr_tc <- c(lb, ub) ci_wald_rr_tc #ip and snf costs library(DescTools) snf_costs <- read.sas7bdat('matched_sample_with_snf.sas7bdat') snf_costs$disability <- as.factor(snf_costs$disability) snf_costs$female <- as.factor(snf_costs$female) snf_costs$race_eth <- as.factor(snf_costs$race_eth) snf_costs$insurance <- as.factor(snf_costs$insurance) snf_costs$post_fu_mo <- (snf_costs$post_fu/30) snf_costs$post_snf_costs_mon <- (snf_costs$post_snf_costs/snf_costs$post_fu_mo) snf_costs$post_snf_costs_mon <- (snf_costs$post_snf_costs_mon + 1) snf_costs$post_snf_costs_mon_w <- Winsorize(snf_costs$post_snf_costs_mon, probs = c(.01, .99)) snf_costs$post_ip_costs_mon <- (snf_costs$post_ip_costs/snf_costs$post_fu_mo) snf_costs$post_ip_costs_mon <- (snf_costs$post_ip_costs_mon + 1) snf_costs$post_ip_costs_mon_w <- Winsorize(snf_costs$post_ip_costs_mon, probs = c(.01, .99)) snf_costs$post_ipsnf_costs_mon_w <- snf_costs$post_ip_costs_mon_w + snf_costs$post_snf_costs_mon_w # first stage first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=snf_costs, family=binomial(link=logit), maxit=100) res <- residuals(first) # xuhat # TSRI library('glm2') second <- glm2(post_ipsnf_costs_mon_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty , data=snf_costs, family=Gamma(link=log), maxit=1000) summary(second) data_a0 <- snf_costs data_a0$cs <- 0 data_a1 <- snf_costs data_a1$cs <- 1 pred_a0 <- predict(second, newdata=data_a0, type='response') pred_a1 <- (predict(second, newdata=data_a1, type='response')) est_a1 <- mean(pred_a1) est_a0 <- mean(pred_a0) psi_tc <- mean(pred_a1) - mean(pred_a0) psiRR_tc <- mean(pred_a1) / mean(pred_a0) psi_tc psiRR_tc est_a1 est_a0 #ci for ip and snf cost #rd library(boot) set.seed(999) rd <- function(data, indices) { d <- data[indices,] # allows boot to select sample # first stage first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=d, family=binomial(link=logit), maxit=1000) # TSRI library('glm2') second <- glm2(post_ipsnf_costs_mon_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty , data=d, family=Gamma(link=log), maxit=1000) data_a0 <- d data_a0$cs <- 0 data_a1 <- d data_a1$cs <- 1 pred_a0 <- predict(second, newdata=data_a0, type='response') pred_a1 <- predict(second, newdata=data_a1, type='response') psi <- mean(pred_a1) - mean(pred_a0) return(psi) } # bootstrapping with 1000 replications results <- boot(data=snf_costs, statistic=rd, R=1000) # view results results # get 95% confidence interval se <- sd(results$t) ub <- results$t0 + 1.96*(se) lb <- results$t0 - 1.96*(se) ci_wald_rd_tc <- c(lb, ub) ci_wald_rd_tc #pharm costs matched_sample$post_pharm_costs <- matched_sample$post_pharm_costs + .01 # first stage first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=matched_sample, family=binomial(link=logit), maxit=100) res <- residuals(first) # xuhat # TSRI second <- glm(post_pharm_costs ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + post_fu + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=matched_sample, family=Gamma(link=log)) summary(second) data_a0 <- matched_sample data_a0$cs <- 0 data_a1 <- matched_sample data_a1$cs <- 1 pred_a0 <- predict(second, newdata=data_a0, type='response') pred_a1 <- (predict(second, newdata=data_a1, type='response') +350) est_a1 <- mean(pred_a1) est_a0 <- mean(pred_a0) psi_tc <- mean(pred_a1) - mean(pred_a0) psiRR_tc <- mean(pred_a1) / mean(pred_a0) psi_tc psiRR_tc est_a1 est_a0 #ip analyses library(DescTools) quantile(matched_sample$post_ip) matched_sample$post_ip_w <- Winsorize(matched_sample$post_ip, probs = c(.01, .99)) matched_sample$post_ip_w <- round(matched_sample$post_ip_w) quantile(matched_sample$post_ip_w) sum(matched_sample$post_ip_w) # first stage first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=matched_sample, family=binomial(link=logit), maxit=100) res <- residuals(first) # xuhat # TSRI second <- glm(post_ip_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=matched_sample, family=poisson(link=log)) summary(second) data_a0 <- matched_sample data_a0$cs <- 0 data_a1 <- matched_sample data_a1$cs <- 1 pred_a0 <- predict(second, newdata=data_a0, type='response') pred_a1 <- predict(second, newdata=data_a1, type='response') est_a1 <- mean(pred_a1) est_a0 <- mean(pred_a0) psi_ip <- mean(pred_a1) - mean(pred_a0) psiRR_ip <- mean(pred_a1) / mean(pred_a0) psi_ip psiRR_ip est_a1 est_a0 #ci for ip #rd library(boot) set.seed(999) rd <- function(data, indices) { d <- data[indices,] # allows boot to select sample # first stage first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=d, family=binomial(link=logit)) res <- residuals(first) # xuhat # TSRI second <- glm(post_ip_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=d, family=poisson(link=log)) data_a0 <- d data_a0$cs <- 0 data_a1 <- d data_a1$cs <- 1 pred_a0 <- predict(second, newdata=data_a0, type='response') pred_a1 <- predict(second, newdata=data_a1, type='response') psi <- mean(pred_a1) - mean(pred_a0) return(psi) } # bootstrapping with 1000 replications results <- boot(data=matched_sample, statistic=rd, R=1000) # view results results # get 95% confidence interval se <- sd(results$t) ub <- results$t0 + 1.96*(se) lb <- results$t0 - 1.96*(se) ci_wald_rd_ip <- c(lb, ub) ci_wald_rd_ip #rr library(boot) set.seed(999) rd <- function(data, indices) { d <- data[indices,] # allows boot to select sample # first stage first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=d, family=binomial(link=logit)) res <- residuals(first) # xuhat # TSRI second <- glm(post_ip_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=d, family=poisson(link=log)) data_a0 <- d data_a0$cs <- 0 data_a1 <- d data_a1$cs <- 1 pred_a0 <- predict(second, newdata=data_a0, type='response') pred_a1 <- predict(second, newdata=data_a1, type='response') psi <- mean(pred_a1) / mean(pred_a0) return(psi) } # bootstrapping with 1000 replications results <- boot(data=matched_sample, statistic=rd, R=1000) # view results results # get 95% confidence interval se <- sd(results$t) ub <- results$t0 + 1.96*(se) lb <- results$t0 - 1.96*(se) ci_wald_rr_ip <- c(lb, ub) ci_wald_rr_ip #snf analyses library(DescTools) quantile(matched_sample$post_snf) matched_sample$post_snf_w <- Winsorize(matched_sample$post_snf, probs = c(.01, .99)) matched_sample$post_snf_w <- round(matched_sample$post_snf_w) quantile(matched_sample$post_snf_w) sum(matched_sample$post_snf_w) # first stage first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=matched_sample, family=binomial(link=logit), maxit=1000) res <- residuals(first) # xuhat # TSRI second <- glm(post_snf_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + post_fu_mo + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=matched_sample, family=poisson(link=log), maxit=1000) summary(second) data_a0 <- matched_sample data_a0$cs <- 0 data_a1 <- matched_sample data_a1$cs <- 1 pred_a0 <- predict(second, newdata=data_a0, type='response') pred_a1 <- predict(second, newdata=data_a1, type='response') est_a1 <- mean(pred_a1) est_a0 <- mean(pred_a0) psi_snf <- mean(pred_a1) - mean(pred_a0) psiRR_snf <- mean(pred_a1) / mean(pred_a0) psi_snf psiRR_snf est_a1 est_a0 #ci for snf #rd library(boot) set.seed(999) rd <- function(data, indices) { d <- data[indices,] # allows boot to select sample # first stage first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=d, family=binomial(link=logit), maxit=1000) res <- residuals(first) # xuhat # TSRI second <- glm(post_snf_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=d, family=poisson(link=log)) data_a0 <- d data_a0$cs <- 0 data_a1 <- d data_a1$cs <- 1 pred_a0 <- predict(second, newdata=data_a0, type='response') pred_a1 <- predict(second, newdata=data_a1, type='response') psi <- mean(pred_a1) - mean(pred_a0) return(psi) } # bootstrapping with 1000 replications results <- boot(data=matched_sample, statistic=rd, R=1000) # view results results # get 95% confidence interval se <- sd(results$t) ub <- results$t0 + 1.96*(se) lb <- results$t0 - 1.96*(se) ci_wald_rd_snf <- c(lb, ub) ci_wald_rd_snf #rr library(boot) set.seed(999) rd <- function(data, indices) { d <- data[indices,] # allows boot to select sample # first stage first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=d, family=binomial(link=logit)) res <- residuals(first) # xuhat # TSRI second <- glm(post_snf_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha + te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending + pct_poverty, data=d, family=poisson(link=log)) data_a0 <- d data_a0$cs <- 0 data_a1 <- d data_a1$cs <- 1 pred_a0 <- predict(second, newdata=data_a0, type='response') pred_a1 <- predict(second, newdata=data_a1, type='response') psi <- mean(pred_a1) / mean(pred_a0) return(psi) } # bootstrapping with 1000 replications results <- boot(data=matched_sample, statistic=rd, R=1000) # view results results # get 95% confidence interval se <- sd(results$t) ub <- results$t0 + 1.96*(se) lb <- results$t0 - 1.96*(se) ci_wald_rr_snf <- c(lb, ub) ci_wald_rr_snf