Project Code
The Effect of Chile’s Neoliberal Reforms in the 1970s
This page contains the complete R code used for the analysis in this project. The code is presented here for reference and is not executed live.
{r setup, include=FALSE} library(tidyverse) library(xlsx) library(Synth) library(WDI)
Data Setup
wdi_dat <- WDI(indicator = c("NY.GDP.PCAP.KD", "SP.DYN.LE00.IN", "SE.TER.ENRR", "NV.AGR.TOTL.ZS", "NV.IND.TOTL.ZS", "NY.GDP.TOTL.RT.ZS", "NE.IMP.GNFS.KD", "NE.EXP.GNFS.KD", "NY.GDP.MKTP.KD", "NE.GDI.TOTL.ZS"), start = 1960, end = 2019, extra = TRUE)
# gdp per capita 2015, life expectancy from birth, % college enrollment, agriculture %gdp, industry %gdp, resource rents %gdp, imports 2010, exports 2010, gdp 2010
wdi_dat <- subset(wdi_dat, region != "Aggregates")
wdi_dat <- wdi_dat %>%
mutate(unit.num = as.numeric( factor( iso3c ) )) %>%
mutate(gdppercap = NY.GDP.PCAP.KD) %>%
mutate(loggdppercap = log(gdppercap)) %>%
mutate(lifeexp = SP.DYN.LE00.IN) %>%
mutate(agriculture = NV.AGR.TOTL.ZS) %>%
mutate(industry = NV.IND.TOTL.ZS) %>%
mutate(resources = NY.GDP.TOTL.RT.ZS) %>%
mutate(imports = NE.IMP.GNFS.KD) %>%
mutate(exports = NE.EXP.GNFS.KD) %>%
mutate(gdp = NY.GDP.MKTP.KD) %>%
mutate(openness = (imports + exports) / gdp) %>%
select(unit.num, year, iso3c, gdppercap, loggdppercap, lifeexp, agriculture, industry, resources, openness)
Synth
Latin America Donors
Pre-treatment period is 1960:1973, and 1967:1973 is training data.
South America: All but URY Central America: CRI, MEX, GTM, HND, PAN
dataprep.out0 <- dataprep(foo = wdi_dat,
predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
dependent = "gdppercap",
unit.variable = "unit.num",
time.variable = "year",
treatment.identifier = "CHL",
controls.identifier = c(7, 26, 27, 42, 45, 57, 80, 84, 126, 152, 153, 160),
time.predictors.prior = c(1967:1973),
time.optimize.ssr = c(1960:1973),
unit.names.variable = "iso3c",
time.plot = 1960:2019)
synth.out0 <- synth(dataprep.out0)
# c("ARG", "BOL", "BRA", "COL", "CRI", "ECU", "GTM", "HND", "MEX", "PAN", "PER", "PRY")
## results
synth.tables <- synth.tab(
dataprep.res = dataprep.out0,
synth.res = synth.out0)
print(synth.tables)
#par(mfrow=c(1,2))
path.plot(dataprep.res = dataprep.out0,
synth.res = synth.out0,
tr.intake = 1973,
Ylab = c("GDP per capita (2015, $)"),
Ylim = c(2500, 15000),
Legend.position = c("topleft"))
# goodness of fit
cbind( dataprep.out0$X1, dataprep.out0$X0 %*% synth.out0$solution.w )
# gaps
gaps.plot(synth.res = synth.out0,
dataprep.res = dataprep.out0,
Ylab = c("Gap in GDP per Capita (2015 USD)"),
Xlab = c("Year"),
Ylim = c(-7000, 7000),
Main = "",
tr.intake = 1973
)
########## log scale ##########
dataprep.out <- dataprep(foo = wdi_dat,
predictors = c("loggdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
dependent = "loggdppercap",
unit.variable = "unit.num",
time.variable = "year",
treatment.identifier = "CHL",
controls.identifier = c("ARG", "BRA", "BOL", "CRI", "COL", "ECU", "GTM", "HND", "MEX", "PAN", "PER", "PRY"),
time.predictors.prior = c(1967:1973),
time.optimize.ssr = c(1960:1973),
unit.names.variable = "iso3c",
time.plot = 1960:2019)
synth.out <- synth(dataprep.out)
## results
synth.tables <- synth.tab(
dataprep.res = dataprep.out,
synth.res = synth.out)
print(synth.tables)
path.plot(dataprep.res = dataprep.out,
synth.res = synth.out,
tr.intake = 1973,
Ylab = c("GDP per capita (2015, log)"),
Ylim = c(8, 9.75),
Legend.position = c("topleft"))
# goodness of fit
cbind( dataprep.out$X1, dataprep.out$X0 %*% synth.out$solution.w )
Robustness Checks
Comparison to simple average
wdi_chile <- wdi_dat %>%
filter(unit.num == 36) %>%
arrange(year)
wdi_latam <- wdi_dat %>%
filter(unit.num %in% c(7, 26, 27, 42, 45, 57, 80, 84, 126, 152, 153, 160))
gdppercap_la <- aggregate(gdppercap ~ year, wdi_latam, mean)
plot(gdppercap ~ year, data = wdi_chile,
type = "l",
xlab = "Year",
ylim = c(2500, 15000),
ylab = "GDP per capita (2015 USD)")
lines(gdppercap ~ year, data = gdppercap_la, lty = 2)
legend(x = "topleft",
legend = c("Chile", "Donor Pool Average"),
col = c("black", "black"),
lty = c(1, 2),
cex=0.7)
Leave-one-out donor pool
Create graph
plot(dataprep.out0$Y1plot ~ c(1960:2019),
xlab = "Year",
ylim = c(2500, 15000),
ylab = "GDP per capita (2015 USD)",
type = "l")
legend(x = "topleft",
legend = c("Chile", "Synthetic Chile", "Synthetic Chile (leave-one-out)"),
col = c("black", "black", "gray"),
lty = c(1, 2, 1),
cex=0.7)
lines(dataprep.out0$Y0plot %*% synth.out0$solution.w ~ c(1960:2019), lty = 2)
w/o ARG
dataprep.out <- dataprep(foo = wdi_dat,
predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
dependent = "gdppercap",
unit.variable = "unit.num",
time.variable = "year",
treatment.identifier = "CHL",
controls.identifier = c("BRA", "BOL", "CRI", "COL", "ECU", "GTM", "HND", "MEX", "PAN", "PER", "PRY"),
time.predictors.prior = c(1967:1973),
time.optimize.ssr = c(1960:1973),
unit.names.variable = "iso3c",
time.plot = 1960:2019)
synth.out <- synth(dataprep.out)
## results
synth.tables <- synth.tab(
dataprep.res = dataprep.out,
synth.res = synth.out)
print(synth.tables)
## plot
lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")
w/o CRI
dataprep.out <- dataprep(foo = wdi_dat,
predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
dependent = "gdppercap",
unit.variable = "unit.num",
time.variable = "year",
treatment.identifier = "CHL",
controls.identifier = c("ARG", "BRA", "BOL", "COL", "ECU", "GTM", "HND", "MEX", "PAN", "PER", "PRY"),
time.predictors.prior = c(1967:1973),
time.optimize.ssr = c(1960:1973),
unit.names.variable = "iso3c",
time.plot = 1960:2019)
synth.out <- synth(dataprep.out)
## results
synth.tables <- synth.tab(
dataprep.res = dataprep.out,
synth.res = synth.out)
print(synth.tables)
## plot
lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")
w/o PER
dataprep.out <- dataprep(foo = wdi_dat,
predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
dependent = "gdppercap",
unit.variable = "unit.num",
time.variable = "year",
treatment.identifier = "CHL",
controls.identifier = c("ARG", "BRA", "BOL", "CRI", "COL", "ECU", "GTM", "HND", "MEX", "PAN", "PRY"),
time.predictors.prior = c(1967:1973),
time.optimize.ssr = c(1960:1973),
unit.names.variable = "iso3c",
time.plot = 1960:2019)
synth.out <- synth(dataprep.out)
## results
synth.tables <- synth.tab(
dataprep.res = dataprep.out,
synth.res = synth.out)
print(synth.tables)
## plot
lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")
Leave-one-out predictors
Create graph
plot(dataprep.out0$Y1plot ~ c(1960:2019),
xlab = "Year",
ylim = c(2500, 15000),
ylab = "GDP per capita (2015 USD)",
type = "l")
legend(x = "topleft",
legend = c("Chile", "Synthetic Chile", "Synthetic Chile (leave-one-out)"),
col = c("black", "black", "gray"),
lty = c(1, 2, 1),
cex=0.7)
lines(dataprep.out0$Y0plot %*% synth.out0$solution.w ~ c(1960:2019), lty = 2)
w/o GDP per capita
dataprep.out <- dataprep(foo = wdi_dat,
predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
dependent = "gdppercap",
unit.variable = "unit.num",
time.variable = "year",
treatment.identifier = "CHL",
controls.identifier = c(7, 26, 27, 42, 45, 57, 80, 84, 126, 152, 153, 160),
time.predictors.prior = c(1967:1973),
time.optimize.ssr = c(1960:1973),
unit.names.variable = "iso3c",
time.plot = 1960:2019)
synth.out <- synth(dataprep.out)
## plot
lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")
w/o life expectancy
dataprep.out <- dataprep(foo = wdi_dat,
predictors = c("gdppercap", "resources", "agriculture", "industry", "openness"),
dependent = "gdppercap",
unit.variable = "unit.num",
time.variable = "year",
treatment.identifier = "CHL",
controls.identifier = c(7, 26, 27, 42, 45, 57, 80, 84, 126, 152, 153, 160),
time.predictors.prior = c(1967:1973),
time.optimize.ssr = c(1960:1973),
unit.names.variable = "iso3c",
time.plot = 1960:2019)
synth.out <- synth(dataprep.out)
## plot
lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")
w/o resources
dataprep.out <- dataprep(foo = wdi_dat,
predictors = c("gdppercap", "lifeexp", "agriculture", "industry", "openness"),
dependent = "gdppercap",
unit.variable = "unit.num",
time.variable = "year",
treatment.identifier = "CHL",
controls.identifier = c(7, 26, 27, 42, 45, 57, 80, 84, 126, 152, 153, 160),
time.predictors.prior = c(1967:1973),
time.optimize.ssr = c(1960:1973),
unit.names.variable = "iso3c",
time.plot = 1960:2019)
synth.out <- synth(dataprep.out)
## plot
lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")
w/o agriculture
dataprep.out <- dataprep(foo = wdi_dat,
predictors = c("gdppercap", "lifeexp", "resources", "industry", "openness"),
dependent = "gdppercap",
unit.variable = "unit.num",
time.variable = "year",
treatment.identifier = "CHL",
controls.identifier = c(7, 26, 27, 42, 45, 57, 80, 84, 126, 152, 153, 160),
time.predictors.prior = c(1967:1973),
time.optimize.ssr = c(1960:1973),
unit.names.variable = "iso3c",
time.plot = 1960:2019)
synth.out <- synth(dataprep.out)
## plot
lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")
w/o industry
dataprep.out <- dataprep(foo = wdi_dat,
predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "openness"),
dependent = "gdppercap",
unit.variable = "unit.num",
time.variable = "year",
treatment.identifier = "CHL",
controls.identifier = c(7, 26, 27, 42, 45, 57, 80, 84, 126, 152, 153, 160),
time.predictors.prior = c(1967:1973),
time.optimize.ssr = c(1960:1973),
unit.names.variable = "iso3c",
time.plot = 1960:2019)
synth.out <- synth(dataprep.out)
## plot
lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")
w/o openness
dataprep.out <- dataprep(foo = wdi_dat,
predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
dependent = "gdppercap",
unit.variable = "unit.num",
time.variable = "year",
treatment.identifier = "CHL",
controls.identifier = c(7, 26, 27, 42, 45, 57, 80, 84, 126, 152, 153, 160),
time.predictors.prior = c(1967:1973),
time.optimize.ssr = c(1960:1973),
unit.names.variable = "iso3c",
time.plot = 1960:2019)
synth.out <- synth(dataprep.out)
## plot
lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")
Changing donor pools
Create graph
plot(dataprep.out0$Y1plot ~ c(1960:2019),
xlab = "Year",
ylim = c(2500, 15000),
ylab = "GDP per capita (2015 USD)",
type = "l")
legend(x = "topleft",
legend = c("Chile", "Synthetic Chile", "Synthetic Chile (different donor pools)"),
col = c("black", "black", "gray"),
lty = c(1, 2, 1),
cex=0.7)
lines(dataprep.out0$Y0plot %*% synth.out0$solution.w ~ c(1960:2019), lty = 2)
Latin America
dataprep.out <- dataprep(foo = wdi_dat,
predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
dependent = "gdppercap",
unit.variable = "unit.num",
time.variable = "year",
treatment.identifier = "CHL",
controls.identifier = c(7, 26, 27, 42, 57, 153, 160),
time.predictors.prior = c(1967:1973),
time.optimize.ssr = c(1960:1973),
unit.names.variable = "iso3c",
time.plot = 1960:2019)
synth.out <- synth(dataprep.out)
# c("ARG", "BOL", "BRA", "COL", "CRI", "ECU", "GTM", "HND", "MEX", "PAN", "PER", "PRY")
## results
synth.tables <- synth.tab(
dataprep.res = dataprep.out,
synth.res = synth.out)
print(synth.tables)
## plot
# path.plot(dataprep.res = dataprep.out,
# synth.res = synth.out,
# tr.intake = 1973,
# Ylab = c("GDP per capita (2015, $)"), Ylim = c(2500,15000),
# Legend.position = c("topleft"))
lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")
Neighbors
dataprep.out <- dataprep(foo = wdi_dat,
predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
dependent = "gdppercap",
unit.variable = "unit.num",
time.variable = "year",
treatment.identifier = "CHL",
controls.identifier = c(7, 26, 153, 160),
time.predictors.prior = c(1967:1973),
time.optimize.ssr = c(1960:1973),
unit.names.variable = "iso3c",
time.plot = 1960:2019)
synth.out <- synth(dataprep.out)
## results
synth.tables <- synth.tab(
dataprep.res = dataprep.out,
synth.res = synth.out)
print(synth.tables)
## plot
# path.plot(dataprep.res = dataprep.out,
# synth.res = synth.out,
# tr.intake = 1973,
# Ylab = c("GDP per capita (2015, $)"), Ylim = c(2500,15000),
# Legend.position = c("topleft"))
lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")
Most similar countries
## calculating means of latam countries
wdi_latam_pre <- wdi_dat %>%
filter(unit.num %in% c(36, 7, 26, 27, 42, 45, 57, 80, 84, 126, 152, 153, 160), year < 1974) %>%
group_by(iso3c) %>%
summarize_each(funs(mean(., na.rm = T)))
## k-means clustering to find most similar countries
mydata <- wdi_latam_pre[, -c(1:3, 5)]
wss <- (nrow(mydata) - 1)*sum(apply(mydata, 2, var))
for (i in 2:5) wss[i] <- sum(kmeans(mydata, centers = i)$withinss)
# plot(1:5, wss, type = "b", xlab = "Number of Clusters",
# ylab = "Within groups sum of squares")
wdi_latam_pre$cluster <- kmeans(wdi_latam_pre[, -c(1:3, 5)], centers = 3)$cluster
The optimal number of clusters is 3, which puts Chile in a cluster with Brazil, Costa Rica, Mexico, Panama, and Peru.
dataprep.out <- dataprep(foo = wdi_dat,
predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
dependent = "gdppercap",
unit.variable = "unit.num",
time.variable = "year",
treatment.identifier = "CHL",
controls.identifier = c(27, 45, 126, 152, 153),
time.predictors.prior = c(1967:1973),
time.optimize.ssr = c(1960:1973),
unit.names.variable = "iso3c",
time.plot = 1960:2019)
synth.out <- synth(dataprep.out)
## results
synth.tables <- synth.tab(
dataprep.res = dataprep.out,
synth.res = synth.out)
print(synth.tables)
## plot
# path.plot(dataprep.res = dataprep.out,
# synth.res = synth.out,
# tr.intake = 1973,
# Ylab = c("GDP per capita (2015, $)"), Ylim = c(2500,15000),
# Legend.position = c("topleft"))
lines(dataprep.out$Y0plot %*% synth.out$solution.w ~ c(1960:2019), col = "gray")
# gdppercap in 2019
(dataprep.out$Y0plot %*% synth.out$solution.w)[60]
Placebos
ARG
dataprep.out <- dataprep(foo = wdi_dat,
predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
dependent = "gdppercap",
unit.variable = "unit.num",
time.variable = "year",
treatment.identifier = "ARG",
controls.identifier = c("CRI", "BOL", "CHL", "PER", "COL", "ECU", "GTM", "HND", "MEX", "PAN", "BRA", "PRY"),
time.predictors.prior = c(1967:1973),
time.optimize.ssr = c(1960:1973),
unit.names.variable = "iso3c",
time.plot = 1960:2019)
synth.out <- synth(dataprep.out)
## results
synth.tables <- synth.tab(
dataprep.res = dataprep.out,
synth.res = synth.out)
print(synth.tables)
path.plot(dataprep.res = dataprep.out,
synth.res = synth.out,
Ylab = c("GDP per capita (2015, $)"),
Legend.position = c("topleft"))
# goodness of fit
cbind( dataprep.out$X1, dataprep.out$X0 %*% synth.out$solution.w )
CRI
dataprep.out <- dataprep(foo = wdi_dat,
predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
dependent = "gdppercap",
unit.variable = "unit.num",
time.variable = "year",
treatment.identifier = "CRI",
controls.identifier = c("PER", "BOL", "ARG", "COL", "ECU", "GTM", "HND", "MEX", "PAN", "BRA", "PRY"),
time.predictors.prior = c(1967:1973),
time.optimize.ssr = c(1960:1973),
unit.names.variable = "iso3c",
time.plot = 1960:2019)
synth.out <- synth(dataprep.out)
## results
synth.tables <- synth.tab(
dataprep.res = dataprep.out,
synth.res = synth.out)
print(synth.tables)
path.plot(dataprep.res = dataprep.out,
synth.res = synth.out,
Ylab = c("GDP per capita (2015, $)"),
Legend.position = c("topleft"))
# goodness of fit
cbind( dataprep.out$X1, dataprep.out$X0 %*% synth.out$solution.w )
PER
dataprep.out <- dataprep(foo = wdi_dat,
predictors = c("gdppercap", "lifeexp", "resources", "agriculture", "industry", "openness"),
dependent = "gdppercap",
unit.variable = "unit.num",
time.variable = "year",
treatment.identifier = "PER",
controls.identifier = c("CRI", "BOL", "ARG", "COL", "ECU", "GTM", "HND", "MEX", "PAN", "BRA", "PRY"),
time.predictors.prior = c(1967:1973),
time.optimize.ssr = c(1960:1973),
unit.names.variable = "iso3c",
time.plot = 1960:2019)
synth.out <- synth(dataprep.out)
## results
synth.tables <- synth.tab(
dataprep.res = dataprep.out,
synth.res = synth.out)
print(synth.tables)
path.plot(dataprep.res = dataprep.out,
synth.res = synth.out,
Ylab = c("GDP per capita (2015, $)"),
Legend.position = c("topleft"))
# goodness of fit
cbind( dataprep.out$X1, dataprep.out$X0 %*% synth.out$solution.w )