#Copyright Seth A. Berkowitz MD MPH. 
#Code cannot be used for commericial purposes and all use must attribute original authors#

library(nearfar)
library(sas7bdat)
library(MASS)


setwd("H:/Community Servings/APCD Project/Data")

data <- read.sas7bdat("analysis_hdps_matched.sas7bdat")
names(data)
data1 <- data[complete.cases(data),]

names(matched_sample)

covs <- c("age", "disability", "race_eth", "insurance", "english", "female", "index_date", "ip",  "snf", "hha", "te", 
          "pre_total_costs", "combinedscore", "hiv", "cancer", "renal", "diabetes", "chf", "medicare_spending", "pct_poverty")
set.seed(12345)
matched <-  opt_nearfar(dta=data1, trt="cs", covs=covs,
                        trt.type="bin", iv="distance15", imp.var=NA, tol.var=NA, adjust.IV=TRUE,
                        max.time.seconds=2700)

summary(matched)


data1$ID <- seq.int(nrow(data1))

results <- matched$match;

indices1 <- as.data.frame(results[,1])
colnames(indices1) <- "index"
indices2 <- as.data.frame(results[,2])
colnames(indices2) <- "index"
matches1 <- rbind(indices1, indices2)

match_indices <- as.list(matches1$index)

matched_sample <-  data1[data1$ID %in% match_indices, ]
indices1 <- as.list(indices1$index)

matched_sample$encouraged <-  ifelse(matched_sample$ID %in% indices1, 1,0)

write.csv(matched_sample, "match2_results.csv")






matched_sample <- read.csv("match2_results.csv")

matched_sample$disability <- as.factor(matched_sample$disability)
matched_sample$female <- as.factor(matched_sample$female)
matched_sample$race_eth <- as.factor(matched_sample$race_eth)
matched_sample$insurance <- as.factor(matched_sample$insurance)
matched_sample$post_fu_mo <- (matched_sample$post_fu/30)
matched_sample$post_fu_yrs <- (matched_sample$post_fu/365.25)

sum(matched_sample$post_fu_yrs)

#total cost analyses

# first stage
first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha +
              te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
            + pct_poverty, data=matched_sample, family=binomial(link=logit), maxit=100)

res <- residuals(first) # xuhat

# TSRI
second <- glm(post_avg_monthly_costs_w ~ cs + res + age + disability + race_eth + insurance  + english + female + index_date + 
                ip + snf+ hha + 
                te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
              + pct_poverty, data=matched_sample, family=Gamma(link=log))

summary(second)


data_a0 <- matched_sample
data_a0$cs <- 0

data_a1 <- matched_sample
data_a1$cs <- 1

pred_a0 <- predict(second, newdata=data_a0, type='response')
pred_a1 <- (predict(second, newdata=data_a1, type='response') +350)


est_a1 <- mean(pred_a1)
est_a0 <- mean(pred_a0)
psi_tc <- mean(pred_a1) - mean(pred_a0)
psiRR_tc <- mean(pred_a1) / mean(pred_a0)

psi_tc 
psiRR_tc
est_a1
est_a0






#ci for total cost
#rd
library(boot)
set.seed(999)

rd <- function(data, indices) {
  d <- data[indices,] # allows boot to select sample 
  # first stage
  first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha +
                 te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
               + pct_poverty, data=d, family=binomial(link=logit),  maxit=1000)
  res <- residuals(first) # xuhat 
  # TSRI
  second <- glm(post_avg_monthly_costs_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + 
                  ip + snf+ hha +
                  te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
                + pct_poverty, data=d, family=Gamma(link=log), maxit=1000)
  

  
  
  data_a0 <- d
  data_a0$cs <- 0
  
  data_a1 <- d
  data_a1$cs <- 1
  
  pred_a0 <- predict(second, newdata=data_a0, type='response')
  pred_a1 <- (predict(second, newdata=data_a1, type='response') +350)
  
  psi <- mean(pred_a1) - mean(pred_a0)
  return(psi)
} 
# bootstrapping with 1000 replications 
results <- boot(data=matched_sample, statistic=rd, 
                R=1000)

# view results
results 



# get 95% confidence interval
se <- sd(results$t)
ub <- results$t0 + 1.96*(se)
lb <-  results$t0 - 1.96*(se)
ci_wald_rd_tc <- c(lb, ub)
ci_wald_rd_tc



#rr

library(boot)
set.seed(999)

rd <- function(data, indices) {
  d <- data[indices,] # allows boot to select sample 
  # first stage
  first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha +
                 te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
               + pct_poverty, data=d, family=binomial(link=logit),  maxit=1000)
  res <- residuals(first) # xuhat
  # TSRI
  second <- glm(post_avg_monthly_costs_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + 
                  ip + snf+ hha +
                  te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
                + pct_poverty, data=d, family=Gamma(link=log), maxit=1000)
  
  
  
  
  data_a0 <- d
  data_a0$cs <- 0
  
  data_a1 <- d
  data_a1$cs <- 1
  
  pred_a0 <- predict(second, newdata=data_a0, type='response')
  pred_a1 <- (predict(second, newdata=data_a1, type='response') +350)
  
  psi <- mean(pred_a1) / mean(pred_a0)
  return(psi)
} 
# bootstrapping with 1000 replications 
results <- boot(data=matched_sample, statistic=rd, 
                R=1000)

# view results
results 


# get 95% confidence interval
se <- sd(results$t)
ub <- results$t0 + 1.96*(se)
lb <-  results$t0 - 1.96*(se)
ci_wald_rr_tc <- c(lb, ub)
ci_wald_rr_tc





#ip and snf costs
library(DescTools)
snf_costs <- read.sas7bdat('matched_sample_with_snf.sas7bdat')
snf_costs$disability <- as.factor(snf_costs$disability)
snf_costs$female <- as.factor(snf_costs$female)
snf_costs$race_eth <- as.factor(snf_costs$race_eth)
snf_costs$insurance <- as.factor(snf_costs$insurance)
snf_costs$post_fu_mo <- (snf_costs$post_fu/30)
snf_costs$post_snf_costs_mon <- (snf_costs$post_snf_costs/snf_costs$post_fu_mo)
snf_costs$post_snf_costs_mon <- (snf_costs$post_snf_costs_mon + 1)
snf_costs$post_snf_costs_mon_w <- Winsorize(snf_costs$post_snf_costs_mon, probs = c(.01, .99))
snf_costs$post_ip_costs_mon <- (snf_costs$post_ip_costs/snf_costs$post_fu_mo)
snf_costs$post_ip_costs_mon <- (snf_costs$post_ip_costs_mon + 1)
snf_costs$post_ip_costs_mon_w <- Winsorize(snf_costs$post_ip_costs_mon, probs = c(.01, .99))
snf_costs$post_ipsnf_costs_mon_w <- snf_costs$post_ip_costs_mon_w + snf_costs$post_snf_costs_mon_w 

# first stage
first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha +
               te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending 
             + pct_poverty, data=snf_costs, family=binomial(link=logit), maxit=100)

res <- residuals(first) # xuhat

# TSRI
library('glm2')

second <- glm2(post_ipsnf_costs_mon_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + 
                 ip + snf+ hha + 
                 te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
               + pct_poverty , data=snf_costs, family=Gamma(link=log), maxit=1000)


summary(second)


data_a0 <- snf_costs
data_a0$cs <- 0

data_a1 <- snf_costs
data_a1$cs <- 1

pred_a0 <- predict(second, newdata=data_a0, type='response')
pred_a1 <- (predict(second, newdata=data_a1, type='response'))


est_a1 <- mean(pred_a1)
est_a0 <- mean(pred_a0)
psi_tc <- mean(pred_a1) - mean(pred_a0)
psiRR_tc <- mean(pred_a1) / mean(pred_a0)

psi_tc 
psiRR_tc
est_a1
est_a0





#ci for ip and snf cost
#rd
library(boot)
set.seed(999)

rd <- function(data, indices) {
  d <- data[indices,] # allows boot to select sample 
  # first stage
  first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha +
                 te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
               + pct_poverty, data=d, family=binomial(link=logit),  maxit=1000)
   
  # TSRI
  library('glm2')
  
  second <- glm2(post_ipsnf_costs_mon_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + 
                   ip + snf+ hha + 
                   te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
                 + pct_poverty , data=d, family=Gamma(link=log), maxit=1000)
  
  
  
  
  data_a0 <- d
  data_a0$cs <- 0
  
  data_a1 <- d
  data_a1$cs <- 1
  
  pred_a0 <- predict(second, newdata=data_a0, type='response')
  pred_a1 <- predict(second, newdata=data_a1, type='response')
  
  psi <- mean(pred_a1) - mean(pred_a0)
  return(psi)
} 
# bootstrapping with 1000 replications 
results <- boot(data=snf_costs, statistic=rd, 
                R=1000)

# view results
results 



# get 95% confidence interval
se <- sd(results$t)
ub <- results$t0 + 1.96*(se)
lb <-  results$t0 - 1.96*(se)
ci_wald_rd_tc <- c(lb, ub)
ci_wald_rd_tc









#pharm costs
matched_sample$post_pharm_costs <- matched_sample$post_pharm_costs + .01

# first stage
first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha +
               te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending 
             + pct_poverty, data=matched_sample, family=binomial(link=logit), maxit=100)

res <- residuals(first) # xuhat

# TSRI
second <- glm(post_pharm_costs ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + 
                ip + snf+ hha + post_fu +
                te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
              + pct_poverty, data=matched_sample, family=Gamma(link=log))

summary(second)


data_a0 <- matched_sample
data_a0$cs <- 0

data_a1 <- matched_sample
data_a1$cs <- 1

pred_a0 <- predict(second, newdata=data_a0, type='response')
pred_a1 <- (predict(second, newdata=data_a1, type='response') +350)


est_a1 <- mean(pred_a1)
est_a0 <- mean(pred_a0)
psi_tc <- mean(pred_a1) - mean(pred_a0)
psiRR_tc <- mean(pred_a1) / mean(pred_a0)

psi_tc 
psiRR_tc
est_a1
est_a0







#ip analyses

library(DescTools)
quantile(matched_sample$post_ip)
matched_sample$post_ip_w <- Winsorize(matched_sample$post_ip, probs = c(.01, .99))
matched_sample$post_ip_w <- round(matched_sample$post_ip_w)
quantile(matched_sample$post_ip_w)

sum(matched_sample$post_ip_w)




# first stage
first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha +
               te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
             + pct_poverty, data=matched_sample, family=binomial(link=logit), maxit=100)
res <- residuals(first) # xuhat
# TSRI
second <- glm(post_ip_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + 
                ip + snf+ hha + 
                te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
              + pct_poverty,  data=matched_sample, family=poisson(link=log))

summary(second)


data_a0 <- matched_sample
data_a0$cs <- 0

data_a1 <- matched_sample
data_a1$cs <- 1

pred_a0 <- predict(second, newdata=data_a0, type='response')
pred_a1 <- predict(second, newdata=data_a1, type='response')


est_a1 <- mean(pred_a1)
est_a0 <- mean(pred_a0)

psi_ip <- mean(pred_a1) - mean(pred_a0)
psiRR_ip <- mean(pred_a1) / mean(pred_a0)

psi_ip 
psiRR_ip
est_a1
est_a0





#ci for ip
#rd
library(boot)
set.seed(999)


rd <- function(data, indices) {
  d <- data[indices,] # allows boot to select sample 
  # first stage
  first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha +
                 te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
               + pct_poverty, data=d, family=binomial(link=logit))
  res <- residuals(first) # xuhat   
  # TSRI
  second <- glm(post_ip_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + 
                  ip + snf+ hha +
                  te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
                + pct_poverty, data=d, family=poisson(link=log))
  
  
  
  
  data_a0 <- d
  data_a0$cs <- 0
  
  data_a1 <- d
  data_a1$cs <- 1
  
  pred_a0 <- predict(second, newdata=data_a0, type='response')
  pred_a1 <- predict(second, newdata=data_a1, type='response')
  
  psi <- mean(pred_a1) - mean(pred_a0)
  return(psi)
} 
# bootstrapping with 1000 replications 
results <- boot(data=matched_sample, statistic=rd, 
                R=1000)

# view results
results 


# get 95% confidence interval
se <- sd(results$t)
ub <- results$t0 + 1.96*(se)
lb <-  results$t0 - 1.96*(se)
ci_wald_rd_ip <- c(lb, ub)
ci_wald_rd_ip



#rr

library(boot)
set.seed(999)

rd <- function(data, indices) {
  d <- data[indices,] # allows boot to select sample 
  # first stage
  first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha +
                 te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
               + pct_poverty, data=d, family=binomial(link=logit))
  res <- residuals(first) # xuhat
  # TSRI
  second <- glm(post_ip_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + 
                  ip + snf+ hha +
                  te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
                + pct_poverty, data=d, family=poisson(link=log))
  
  
  
  
  data_a0 <- d
  data_a0$cs <- 0
  
  data_a1 <- d
  data_a1$cs <- 1
  
  pred_a0 <- predict(second, newdata=data_a0, type='response')
  pred_a1 <- predict(second, newdata=data_a1, type='response')
  
  psi <- mean(pred_a1) / mean(pred_a0)
  return(psi)
} 
# bootstrapping with 1000 replications 
results <- boot(data=matched_sample, statistic=rd, 
                R=1000)

# view results
results 


# get 95% confidence interval
se <- sd(results$t)
ub <- results$t0 + 1.96*(se)
lb <-  results$t0 - 1.96*(se)
ci_wald_rr_ip <- c(lb, ub)
ci_wald_rr_ip




#snf analyses


library(DescTools)
quantile(matched_sample$post_snf)
matched_sample$post_snf_w <- Winsorize(matched_sample$post_snf, probs = c(.01, .99))
matched_sample$post_snf_w <- round(matched_sample$post_snf_w)
quantile(matched_sample$post_snf_w)

sum(matched_sample$post_snf_w)

# first stage
first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha +
               te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
             + pct_poverty, data=matched_sample, family=binomial(link=logit), maxit=1000)
res <- residuals(first) # xuhat
# TSRI
second <- glm(post_snf_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + 
                ip + snf+ hha + post_fu_mo +
                te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
              + pct_poverty, data=matched_sample, family=poisson(link=log), maxit=1000)

summary(second)


data_a0 <- matched_sample
data_a0$cs <- 0

data_a1 <- matched_sample
data_a1$cs <- 1

pred_a0 <- predict(second, newdata=data_a0, type='response')
pred_a1 <- predict(second, newdata=data_a1, type='response')


est_a1 <- mean(pred_a1)
est_a0 <- mean(pred_a0)
psi_snf <- mean(pred_a1) - mean(pred_a0)
psiRR_snf <- mean(pred_a1) / mean(pred_a0)

psi_snf 
psiRR_snf
est_a1
est_a0




#ci for snf
#rd
library(boot)
set.seed(999)

rd <- function(data, indices) {
  d <- data[indices,] # allows boot to select sample 
  # first stage
  first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha +
                 te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
               + pct_poverty, data=d, family=binomial(link=logit),  maxit=1000)
  res <- residuals(first) # xuhat
  # TSRI
  second <- glm(post_snf_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + 
                  ip + snf+ hha +
                  te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
                + pct_poverty, data=d, family=poisson(link=log))
  
  
  
  
  data_a0 <- d
  data_a0$cs <- 0
  
  data_a1 <- d
  data_a1$cs <- 1
  
  pred_a0 <- predict(second, newdata=data_a0, type='response')
  pred_a1 <- predict(second, newdata=data_a1, type='response')
  
  psi <- mean(pred_a1) - mean(pred_a0)
  return(psi)
} 
# bootstrapping with 1000 replications 
results <- boot(data=matched_sample, statistic=rd, 
                R=1000)

# view results
results 


# get 95% confidence interval
se <- sd(results$t)
ub <- results$t0 + 1.96*(se)
lb <-  results$t0 - 1.96*(se)
ci_wald_rd_snf <- c(lb, ub)
ci_wald_rd_snf



#rr

library(boot)
set.seed(999)

rd <- function(data, indices) {
  d <- data[indices,] # allows boot to select sample 
  # first stage
  first <- glm(cs ~ distance15 + age + disability + race_eth + insurance + english + female + index_date + ip + snf+ hha +
                 te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
               + pct_poverty, data=d, family=binomial(link=logit))
  res <- residuals(first) # xuhat
  # TSRI
  second <- glm(post_snf_w ~ cs + res + age + disability + race_eth + insurance + english + female + index_date + 
                  ip + snf+ hha +
                  te + pre_total_costs + combinedscore + hiv + cancer + renal + diabetes + chf + medicare_spending
                + pct_poverty, data=d,  family=poisson(link=log))
  
  
  
  
  data_a0 <- d
  data_a0$cs <- 0
  
  data_a1 <- d
  data_a1$cs <- 1
  
  pred_a0 <- predict(second, newdata=data_a0, type='response')
  pred_a1 <- predict(second, newdata=data_a1, type='response')
  
  psi <- mean(pred_a1) / mean(pred_a0)
  return(psi)
} 
# bootstrapping with 1000 replications 
results <- boot(data=matched_sample, statistic=rd, 
                R=1000)

# view results
results 


# get 95% confidence interval
se <- sd(results$t)
ub <- results$t0 + 1.96*(se)
lb <-  results$t0 - 1.96*(se)
ci_wald_rr_snf <- c(lb, ub)
ci_wald_rr_snf