Project Code

The Effect of Chile’s Neoliberal Reforms in the 1970s

This page contains the complete R code used for the analysis in this project. The code is presented here for reference and is not executed live.


{r setup, include=FALSE} library(tidyverse) library(xlsx) library(Synth) library(WDI)

Data Setup

wdi_dat <- WDI(indicator = c("NY.GDP.PCAP.KD", "SP.DYN.LE00.IN", "SE.TER.ENRR", "NV.AGR.TOTL.ZS", "NV.IND.TOTL.ZS", "NY.GDP.TOTL.RT.ZS", "NE.IMP.GNFS.KD", "NE.EXP.GNFS.KD", "NY.GDP.MKTP.KD", "NE.GDI.TOTL.ZS"), start = 1960, end = 2019, extra = TRUE)
# gdp per capita 2015, life expectancy from birth, % college enrollment, agriculture %gdp, industry %gdp, resource rents %gdp, imports 2010, exports 2010, gdp 2010

wdi_dat <- subset(wdi_dat, region != "Aggregates")

wdi_dat <- wdi_dat %>%
  mutate(unit.num = as.numeric( factor( iso3c ) )) %>%
  mutate(gdppercap = NY.GDP.PCAP.KD) %>%
  mutate(loggdppercap = log(gdppercap)) %>%
  mutate(lifeexp = SP.DYN.LE00.IN) %>%
  mutate(agriculture = NV.AGR.TOTL.ZS) %>%
  mutate(industry = NV.IND.TOTL.ZS) %>%
  mutate(resources = NY.GDP.TOTL.RT.ZS) %>%
  mutate(imports = NE.IMP.GNFS.KD) %>%
  mutate(exports = NE.EXP.GNFS.KD) %>%
  mutate(gdp = NY.GDP.MKTP.KD) %>%
  mutate(openness = (imports + exports) / gdp) %>%
  select(unit.num, year, iso3c, gdppercap, loggdppercap, lifeexp, agriculture, industry, resources, openness)

Synth

Latin America Donors

Pre-treatment period is 1960:1973, and 1967:1973 is training data.

South America: All but URY Central America: CRI, MEX, GTM, HND, PAN

dataprep.out0 <- dataprep(foo = wdi_dat,
         predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
         dependent = "gdppercap",
         unit.variable = "unit.num",
         time.variable = "year",
         treatment.identifier = "CHL",
         controls.identifier = c(7, 26, 27, 42, 45, 57, 80, 84, 126, 152, 153, 160),
         time.predictors.prior = c(1967:1973),
         time.optimize.ssr = c(1960:1973),
         unit.names.variable = "iso3c",
         time.plot = 1960:2019)

synth.out0 <- synth(dataprep.out0)

# c("ARG", "BOL", "BRA", "COL", "CRI", "ECU", "GTM", "HND", "MEX", "PAN", "PER", "PRY")

## results

synth.tables <- synth.tab(
      dataprep.res = dataprep.out0,
      synth.res = synth.out0)
print(synth.tables)

#par(mfrow=c(1,2))
path.plot(dataprep.res = dataprep.out0,
          synth.res = synth.out0,
          tr.intake = 1973,
          Ylab = c("GDP per capita (2015, $)"),
          Ylim = c(2500, 15000),
          Legend.position = c("topleft"))

# goodness of fit

cbind( dataprep.out0$X1, dataprep.out0$X0 %*% synth.out0$solution.w )

# gaps

gaps.plot(synth.res = synth.out0,
          dataprep.res = dataprep.out0, 
          Ylab = c("Gap in GDP per Capita (2015 USD)"),
          Xlab = c("Year"), 
          Ylim = c(-7000, 7000),
          Main = "",
          tr.intake = 1973
)


########## log scale ##########


dataprep.out <- dataprep(foo = wdi_dat,
         predictors = c("loggdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
         dependent = "loggdppercap",
         unit.variable = "unit.num",
         time.variable = "year",
         treatment.identifier = "CHL",
         controls.identifier = c("ARG", "BRA", "BOL", "CRI", "COL", "ECU", "GTM", "HND", "MEX", "PAN", "PER", "PRY"),
         time.predictors.prior = c(1967:1973),
         time.optimize.ssr = c(1960:1973),
         unit.names.variable = "iso3c",
         time.plot = 1960:2019)

synth.out <- synth(dataprep.out)

## results

synth.tables <- synth.tab(
      dataprep.res = dataprep.out,
      synth.res = synth.out)
print(synth.tables)

path.plot(dataprep.res = dataprep.out,
          synth.res = synth.out,
          tr.intake = 1973,
          Ylab = c("GDP per capita (2015, log)"),
          Ylim = c(8, 9.75),
          Legend.position = c("topleft"))
          
# goodness of fit

cbind( dataprep.out$X1, dataprep.out$X0 %*% synth.out$solution.w )

Robustness Checks

Comparison to simple average

wdi_chile <- wdi_dat %>%
  filter(unit.num == 36) %>%
  arrange(year)

wdi_latam <- wdi_dat %>%
  filter(unit.num %in% c(7, 26, 27, 42, 45, 57, 80, 84, 126, 152, 153, 160))
gdppercap_la <- aggregate(gdppercap ~ year, wdi_latam, mean)

plot(gdppercap ~ year, data = wdi_chile,
     type = "l",
     xlab = "Year",
     ylim = c(2500, 15000),
     ylab = "GDP per capita (2015 USD)")
lines(gdppercap ~ year, data = gdppercap_la, lty = 2)

legend(x = "topleft",
       legend = c("Chile", "Donor Pool Average"),
       col = c("black", "black"),
       lty = c(1, 2),
       cex=0.7)

Leave-one-out donor pool

Create graph

plot(dataprep.out0$Y1plot ~ c(1960:2019),
     xlab = "Year",
     ylim = c(2500, 15000),
     ylab = "GDP per capita (2015 USD)",
     type = "l")

legend(x = "topleft",
       legend = c("Chile", "Synthetic Chile", "Synthetic Chile (leave-one-out)"),
       col = c("black", "black", "gray"),
       lty = c(1, 2, 1),
       cex=0.7)

lines(dataprep.out0$Y0plot %*% synth.out0$solution.w ~ c(1960:2019), lty = 2)

w/o ARG

dataprep.out <- dataprep(foo = wdi_dat,
         predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
         dependent = "gdppercap",
         unit.variable = "unit.num",
         time.variable = "year",
         treatment.identifier = "CHL",
         controls.identifier = c("BRA", "BOL", "CRI", "COL", "ECU", "GTM", "HND", "MEX", "PAN", "PER", "PRY"),
         time.predictors.prior = c(1967:1973),
         time.optimize.ssr = c(1960:1973),
         unit.names.variable = "iso3c",
         time.plot = 1960:2019)

synth.out <- synth(dataprep.out)

## results

synth.tables <- synth.tab(
      dataprep.res = dataprep.out,
      synth.res = synth.out)
print(synth.tables)

## plot

lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")

w/o CRI

dataprep.out <- dataprep(foo = wdi_dat,
         predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
         dependent = "gdppercap",
         unit.variable = "unit.num",
         time.variable = "year",
         treatment.identifier = "CHL",
         controls.identifier = c("ARG", "BRA", "BOL", "COL", "ECU", "GTM", "HND", "MEX", "PAN", "PER", "PRY"),
         time.predictors.prior = c(1967:1973),
         time.optimize.ssr = c(1960:1973),
         unit.names.variable = "iso3c",
         time.plot = 1960:2019)

synth.out <- synth(dataprep.out)

## results

synth.tables <- synth.tab(
      dataprep.res = dataprep.out,
      synth.res = synth.out)
print(synth.tables)

## plot

lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")

w/o PER

dataprep.out <- dataprep(foo = wdi_dat,
         predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
         dependent = "gdppercap",
         unit.variable = "unit.num",
         time.variable = "year",
         treatment.identifier = "CHL",
         controls.identifier = c("ARG", "BRA", "BOL", "CRI", "COL", "ECU", "GTM", "HND", "MEX", "PAN", "PRY"),
         time.predictors.prior = c(1967:1973),
         time.optimize.ssr = c(1960:1973),
         unit.names.variable = "iso3c",
         time.plot = 1960:2019)

synth.out <- synth(dataprep.out)

## results

synth.tables <- synth.tab(
      dataprep.res = dataprep.out,
      synth.res = synth.out)
print(synth.tables)

## plot

lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")

Leave-one-out predictors

Create graph

plot(dataprep.out0$Y1plot ~ c(1960:2019),
     xlab = "Year",
     ylim = c(2500, 15000),
     ylab = "GDP per capita (2015 USD)",
     type = "l")

legend(x = "topleft",
       legend = c("Chile", "Synthetic Chile", "Synthetic Chile (leave-one-out)"),
       col = c("black", "black", "gray"),
       lty = c(1, 2, 1),
       cex=0.7)

lines(dataprep.out0$Y0plot %*% synth.out0$solution.w ~ c(1960:2019), lty = 2)

w/o GDP per capita

dataprep.out <- dataprep(foo = wdi_dat,
         predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
         dependent = "gdppercap",
         unit.variable = "unit.num",
         time.variable = "year",
         treatment.identifier = "CHL",
         controls.identifier = c(7, 26, 27, 42, 45, 57, 80, 84, 126, 152, 153, 160),
         time.predictors.prior = c(1967:1973),
         time.optimize.ssr = c(1960:1973),
         unit.names.variable = "iso3c",
         time.plot = 1960:2019)

synth.out <- synth(dataprep.out)

## plot

lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")

w/o life expectancy

dataprep.out <- dataprep(foo = wdi_dat,
         predictors = c("gdppercap", "resources", "agriculture", "industry", "openness"),
         dependent = "gdppercap",
         unit.variable = "unit.num",
         time.variable = "year",
         treatment.identifier = "CHL",
         controls.identifier = c(7, 26, 27, 42, 45, 57, 80, 84, 126, 152, 153, 160),
         time.predictors.prior = c(1967:1973),
         time.optimize.ssr = c(1960:1973),
         unit.names.variable = "iso3c",
         time.plot = 1960:2019)

synth.out <- synth(dataprep.out)

## plot

lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")

w/o resources

dataprep.out <- dataprep(foo = wdi_dat,
         predictors = c("gdppercap", "lifeexp", "agriculture", "industry", "openness"),
         dependent = "gdppercap",
         unit.variable = "unit.num",
         time.variable = "year",
         treatment.identifier = "CHL",
         controls.identifier = c(7, 26, 27, 42, 45, 57, 80, 84, 126, 152, 153, 160),
         time.predictors.prior = c(1967:1973),
         time.optimize.ssr = c(1960:1973),
         unit.names.variable = "iso3c",
         time.plot = 1960:2019)

synth.out <- synth(dataprep.out)

## plot

lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")

w/o agriculture

dataprep.out <- dataprep(foo = wdi_dat,
         predictors = c("gdppercap", "lifeexp", "resources", "industry", "openness"),
         dependent = "gdppercap",
         unit.variable = "unit.num",
         time.variable = "year",
         treatment.identifier = "CHL",
         controls.identifier = c(7, 26, 27, 42, 45, 57, 80, 84, 126, 152, 153, 160),
         time.predictors.prior = c(1967:1973),
         time.optimize.ssr = c(1960:1973),
         unit.names.variable = "iso3c",
         time.plot = 1960:2019)

synth.out <- synth(dataprep.out)

## plot

lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")

w/o industry

dataprep.out <- dataprep(foo = wdi_dat,
         predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "openness"),
         dependent = "gdppercap",
         unit.variable = "unit.num",
         time.variable = "year",
         treatment.identifier = "CHL",
         controls.identifier = c(7, 26, 27, 42, 45, 57, 80, 84, 126, 152, 153, 160),
         time.predictors.prior = c(1967:1973),
         time.optimize.ssr = c(1960:1973),
         unit.names.variable = "iso3c",
         time.plot = 1960:2019)

synth.out <- synth(dataprep.out)

## plot

lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")

w/o openness

dataprep.out <- dataprep(foo = wdi_dat,
         predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
         dependent = "gdppercap",
         unit.variable = "unit.num",
         time.variable = "year",
         treatment.identifier = "CHL",
         controls.identifier = c(7, 26, 27, 42, 45, 57, 80, 84, 126, 152, 153, 160),
         time.predictors.prior = c(1967:1973),
         time.optimize.ssr = c(1960:1973),
         unit.names.variable = "iso3c",
         time.plot = 1960:2019)

synth.out <- synth(dataprep.out)

## plot

lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")

Changing donor pools

Create graph

plot(dataprep.out0$Y1plot ~ c(1960:2019),
     xlab = "Year",
     ylim = c(2500, 15000),
     ylab = "GDP per capita (2015 USD)",
     type = "l")

legend(x = "topleft",
       legend = c("Chile", "Synthetic Chile", "Synthetic Chile (different donor pools)"),
       col = c("black", "black", "gray"),
       lty = c(1, 2, 1),
       cex=0.7)

lines(dataprep.out0$Y0plot %*% synth.out0$solution.w ~ c(1960:2019), lty = 2)

Latin America

dataprep.out <- dataprep(foo = wdi_dat,
         predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
         dependent = "gdppercap",
         unit.variable = "unit.num",
         time.variable = "year",
         treatment.identifier = "CHL",
         controls.identifier = c(7, 26, 27, 42, 57, 153, 160),
         time.predictors.prior = c(1967:1973),
         time.optimize.ssr = c(1960:1973),
         unit.names.variable = "iso3c",
         time.plot = 1960:2019)

synth.out <- synth(dataprep.out)

# c("ARG", "BOL", "BRA", "COL", "CRI", "ECU", "GTM", "HND", "MEX", "PAN", "PER", "PRY")

## results

synth.tables <- synth.tab(
      dataprep.res = dataprep.out,
      synth.res = synth.out)
print(synth.tables)

## plot

# path.plot(dataprep.res = dataprep.out,
#           synth.res = synth.out,
#           tr.intake = 1973,
#           Ylab = c("GDP per capita (2015, $)"), Ylim = c(2500,15000),
#           Legend.position = c("topleft"))

lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")

Neighbors

dataprep.out <- dataprep(foo = wdi_dat,
         predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
         dependent = "gdppercap",
         unit.variable = "unit.num",
         time.variable = "year",
         treatment.identifier = "CHL",
         controls.identifier = c(7, 26, 153, 160),
         time.predictors.prior = c(1967:1973),
         time.optimize.ssr = c(1960:1973),
         unit.names.variable = "iso3c",
         time.plot = 1960:2019)

synth.out <- synth(dataprep.out)

## results

synth.tables <- synth.tab(
      dataprep.res = dataprep.out,
      synth.res = synth.out)
print(synth.tables)

## plot

# path.plot(dataprep.res = dataprep.out,
#           synth.res = synth.out,
#           tr.intake = 1973,
#           Ylab = c("GDP per capita (2015, $)"), Ylim = c(2500,15000),
#           Legend.position = c("topleft"))

lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")

Most similar countries

## calculating means of latam countries

wdi_latam_pre <- wdi_dat %>%
  filter(unit.num %in% c(36, 7, 26, 27, 42, 45, 57, 80, 84, 126, 152, 153, 160), year < 1974) %>%
  group_by(iso3c) %>%
  summarize_each(funs(mean(., na.rm = T)))

## k-means clustering to find most similar countries

mydata <- wdi_latam_pre[, -c(1:3, 5)]
wss <- (nrow(mydata) - 1)*sum(apply(mydata, 2, var))
  for (i in 2:5) wss[i] <- sum(kmeans(mydata, centers = i)$withinss)
# plot(1:5, wss, type = "b", xlab = "Number of Clusters",
#      ylab = "Within groups sum of squares")

wdi_latam_pre$cluster <- kmeans(wdi_latam_pre[, -c(1:3, 5)], centers = 3)$cluster

The optimal number of clusters is 3, which puts Chile in a cluster with Brazil, Costa Rica, Mexico, Panama, and Peru.

dataprep.out <- dataprep(foo = wdi_dat,
         predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
         dependent = "gdppercap",
         unit.variable = "unit.num",
         time.variable = "year",
         treatment.identifier = "CHL",
         controls.identifier = c(27, 45, 126, 152, 153),
         time.predictors.prior = c(1967:1973),
         time.optimize.ssr = c(1960:1973),
         unit.names.variable = "iso3c",
         time.plot = 1960:2019)

synth.out <- synth(dataprep.out)

## results

synth.tables <- synth.tab(
      dataprep.res = dataprep.out,
      synth.res = synth.out)
print(synth.tables)

## plot

# path.plot(dataprep.res = dataprep.out,
#           synth.res = synth.out,
#           tr.intake = 1973,
#           Ylab = c("GDP per capita (2015, $)"), Ylim = c(2500,15000),
#           Legend.position = c("topleft"))

lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")

# gdppercap in 2019

(dataprep.out$Y0plot %*% synth.out$solution.w)[60]

Placebos

ARG

dataprep.out <- dataprep(foo = wdi_dat,
         predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
         dependent = "gdppercap",
         unit.variable = "unit.num",
         time.variable = "year",
         treatment.identifier = "ARG",
         controls.identifier = c("CRI", "BOL", "CHL", "PER", "COL", "ECU", "GTM", "HND", "MEX", "PAN", "BRA", "PRY"),
         time.predictors.prior = c(1967:1973),
         time.optimize.ssr = c(1960:1973),
         unit.names.variable = "iso3c",
         time.plot = 1960:2019)

synth.out <- synth(dataprep.out)

## results

synth.tables <- synth.tab(
      dataprep.res = dataprep.out,
      synth.res = synth.out)
print(synth.tables)

path.plot(dataprep.res = dataprep.out,
          synth.res = synth.out,
          Ylab = c("GDP per capita (2015, $)"),
          Legend.position = c("topleft"))

# goodness of fit

cbind( dataprep.out$X1, dataprep.out$X0 %*% synth.out$solution.w )

CRI

dataprep.out <- dataprep(foo = wdi_dat,
         predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
         dependent = "gdppercap",
         unit.variable = "unit.num",
         time.variable = "year",
         treatment.identifier = "CRI",
         controls.identifier = c("PER", "BOL", "ARG", "COL", "ECU", "GTM", "HND", "MEX", "PAN", "BRA", "PRY"),
         time.predictors.prior = c(1967:1973),
         time.optimize.ssr = c(1960:1973),
         unit.names.variable = "iso3c",
         time.plot = 1960:2019)

synth.out <- synth(dataprep.out)

## results

synth.tables <- synth.tab(
      dataprep.res = dataprep.out,
      synth.res = synth.out)
print(synth.tables)

path.plot(dataprep.res = dataprep.out,
          synth.res = synth.out,
          Ylab = c("GDP per capita (2015, $)"),
          Legend.position = c("topleft"))

# goodness of fit

cbind( dataprep.out$X1, dataprep.out$X0 %*% synth.out$solution.w )

PER

dataprep.out <- dataprep(foo = wdi_dat,
         predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
         dependent = "gdppercap",
         unit.variable = "unit.num",
         time.variable = "year",
         treatment.identifier = "PER",
         controls.identifier = c("CRI", "BOL", "ARG", "COL", "ECU", "GTM", "HND", "MEX", "PAN", "BRA", "PRY"),
         time.predictors.prior = c(1967:1973),
         time.optimize.ssr = c(1960:1973),
         unit.names.variable = "iso3c",
         time.plot = 1960:2019)

synth.out <- synth(dataprep.out)

## results

synth.tables <- synth.tab(
      dataprep.res = dataprep.out,
      synth.res = synth.out)
print(synth.tables)

path.plot(dataprep.res = dataprep.out,
          synth.res = synth.out,
          Ylab = c("GDP per capita (2015, $)"),
          Legend.position = c("topleft"))

# goodness of fit

cbind( dataprep.out$X1, dataprep.out$X0 %*% synth.out$solution.w )