# ---- readme ----

# A"Common" European Interest? Explaining Variation in IPCEI State Aid Between EU Member States'
# R script used for the qualitative comparative analysis (QCA)

# Ruben De La Cruz, PhD fellow, Ghent Institute for International and European Studies (GIES), Ghent University
# This research was funded by the Research Foundation – Flanders (FWO) under PhD fellowship no. 11A8626N

# Issue: "Doing Industrial Policy in a Geotech World: Challenges and Opportunities”, Politics and Governance, Vol. 14
# Editors: Salih Işık Bora (Vrije Universiteit Brussel), Fabio Bulfone (Leiden University), and Timo Seidl (Technical University of Munich)
# Fully open access at https://doi.org/10.17645/pag.i482 
# Published by Cogitatio Press

# Please cite as: De La Cruz, R. (2026) A “Common” European Interest? Explaining Variation in IPCEI State Aid Between EU Member States. Politics and Governance, 14. https://doi.org/10.17645/pag.11453

# IMPORTANT NOTE: In order to properly load the data of the .xlsx file in R, please follow the instructions below.
# 1. Save the second sheet containing raw data as a .csv file named "raw-data.csv".
# 2. Save the third sheet containing calibrated data as .csv file named "calibrated-data.csv".

# ---- 0. Load libraries ----

library(readr)
library(dplyr)
library(tidyr)
library(QCA)
library(SetMethods)
library(ggplot2)
library(stringr)
library(tibble)

# ---- 1. Load and calibrate data ----
# Note: The raw and calibrated data are uploaded as one supplementary file on the publishers website: https://www.cogitatiopress.com/politicsandgovernance
# In order to properly load the data, download the supplementary .xslx file containing the raw and calibrated data.
# Save the first sheet containing raw data as a .csv file named "raw-data.csv".
# Save the second sheet containing calibrated data as .csv file named "calibrated-data.csv".

data.raw <- read.csv("raw-data.csv")

data.cal <- data.raw %>%
  mutate(
    outcome = calibrate(outcome, type = "fuzzy", method = "direct", thresholds = c(0, 0.1, 1)),
    gdp = calibrate(gdp, type = "fuzzy", method = "direct", thresholds = c(0, 100000, 1000000)),
    export = calibrate(export, type = "fuzzy", method = "direct", thresholds = c(0, 50, 100)),
    fdi = calibrate(fdi, type = "fuzzy", method = "direct", thresholds = c(0, 0.1, 0.3)),
    growenv = calibrate(growenv, type = "fuzzy", method = "direct", thresholds = c(0, 5, 10)),
    leader = calibrate(leader, type = "fuzzy", method = "direct", thresholds = c(0, 10, 50)),
    aid = calibrate(aid, type = "fuzzy", method = "direct", thresholds = c(0, 0.6, 2)),
    adm = calibrate(adm, type = "fuzzy", method = "direct", thresholds = c(0, 50, 100)),
    fiscstress = calibrate(fiscstress, type = "fuzzy", method = "direct", thresholds = c(0, 2, 6))
  )

# Nudge all exact 0.5 scores in the whole dataframe to >0.5
data.cal <- data.cal %>%
  mutate(across(everything(), ~ ifelse(. == 0.5, 0.500001, .)))

write.csv(data.cal, "calibrated-data.csv", row.names = F)

# ---- 2. Truth table analysis: presence of outcome ----
data.cal.tt <- read.csv("calibrated-data.csv", row.names = 1)

# Truth table
TT <- truthTable(data.cal.tt,
                 outcome = "outcome",
                 complete = T,
                 show.cases = T,
                 incl.cut = 0.865,
                 sort.by = "incl, n")

TT

# Produce csv without logical remainders for observation
TT.obs <- subset(TT$tt, n > 0)
TT.obs
write.csv(TT.obs, "truth-table.csv", row.names = F)

# Parsimonious solution
sol.pars <- minimize(TT,
                     include = "?",
                     details = T,
                     use.tilde = T)

sol.pars

# ---- 3. Select most fitting model based on theoretical expectations ----

# ---- 3.1 Justification for choice of model ----

### Rows 5-6: 1 combination should be present, because every model contains one of both 
### Row 5 (growenv*fiscstress): both conditions contradict theoretical expectations
##### However, literature on growenv is ambiguous
### Row 6 (~aid*fiscstress): both conditions contradict theoretical expectations
### Conclusion: row 5 (growenv*fiscstress) theoretically more plausible
##### Therefore, M1, M3, or M5 most fitting

### Rows 1-4: 2 combinations should be present, because M1, M3, and M5 contain 2 of these combinations
### Rows 1-3: all conditions follow theoretical expectations
#### Row 1 (gdp*~fdi)
#### Row 2 (gdp*aid)
#### Row 3 (~fdi*leader)
### Row 4 (fdi*aid): fdi contradicts theoretical expectations
#### Therefore, models containing rows 1-3 (M1, M2, M5, M6) most fitting

### Combining theoretical analyses above: M1 or M5 most fitting
### inclS, PRI, and covS higher for M1 than M5.
### Conclusion: M1 theoretically fits best

# ---- 3.2 Output M1 from parsimonious solution ----

# Select terms of M1
terms.M1 <- sol.pars$solution[[1]] 

terms.M1

# Select inclS, PRI, covS, covU, and cases
terms <- as_tibble(sol.pars$IC$overall$incl.cov,
                   rownames = NA)

terms

# Only keep inclS, PRI, covS, covU, and cases of M1
terms.filtered <- terms %>%
  rownames_to_column(var = "term") %>%
  filter(term %in% terms.M1)

terms.filtered

# Select inclS, PRI, and covS of M1 solution
solution.M1 <- as_tibble(sol.pars$IC$individual[[1]]$sol.incl.cov,
                         rownames = NA)

solution.M1

# Add inclS, PRI, and covS of M1 solution to df containing terms

### Prepare solution.M1 for merging with terms.filtered
solution.M1 <- solution.M1 %>%
  rownames_to_column(var = "term") %>%
  mutate(term = "solution M1",
         covU = NA,
         cases = NA)

### Merge M1 solution inclS, PRI, and covS with terms
sol.pars.M1 <- bind_rows(terms.filtered, solution.M1)
sol.pars.M1 <- column_to_rownames(sol.pars.M1, var = names(sol.pars.M1)[1])
sol.pars.M1

write.csv(sol.pars.M1, "parsimonious-solution.csv")

# ---- 4. Robustness checks: sensitivity ranges for conditions and consistency cut-off ----
# Workflow adapted from Oana, I.-.E., Schneider, C.Q. & Thomann, E. (2021). Qualitative Comparative Analysis Using R: A Beginner's Guide. Cambridge University Press. pp. 151-154

# Create an object storing the conditions' names
conds <- c("gdp", "export", "fdi", "growenv", "leader", "aid", "adm", "fiscstress")

# Sensitivity ranges for condition gdp
rob.calibrange(raw.data = data.raw,
               calib.data = data.cal,
               test.cond.raw = "gdp",
               test.cond.calib = "gdp",
               test.thresholds = c(0, 100000, 1000000),
               step = 1000,
               max.runs = 500,
               outcome = "outcome",
               conditions = conds,
               incl.cut = 0.865,
               n.cut = 1,
               include = "?")


# Sensitivity ranges for condition export
rob.calibrange(raw.data = data.raw,
               calib.data = data.cal,
               test.cond.raw = "export",
               test.cond.calib = "export",
               test.thresholds = c(0, 50, 100),
               step = 5,
               max.runs = 30,
               outcome = "outcome",
               conditions = conds,
               incl.cut = 0.865,
               n.cut = 1,
               include = "?")

# Sensitivity ranges for condition fdi
rob.calibrange(raw.data = data.raw,
               calib.data = data.cal,
               test.cond.raw = "fdi",
               test.cond.calib = "fdi",
               test.thresholds = c(0, 0.1, 0.3),
               step = 0.005,
               max.runs = 25,
               outcome = "outcome",
               conditions = conds,
               incl.cut = 0.865,
               n.cut = 1,
               include = "?")

# Sensitivity ranges for condition growenv
rob.calibrange(raw.data = data.raw,
               calib.data = data.cal,
               test.cond.raw = "growenv",
               test.cond.calib = "growenv",
               test.thresholds = c(0, 5, 10),
               step = 0.1,
               max.runs = 50,
               outcome = "outcome",
               conditions = conds,
               incl.cut = 0.865,
               n.cut = 1,
               include = "?")

# Sensitivity ranges for condition leader
rob.calibrange(raw.data = data.raw,
               calib.data = data.cal,
               test.cond.raw = "leader",
               test.cond.calib = "leader",
               test.thresholds = c(0, 10, 50),
               step = 1,
               max.runs = 50,
               outcome = "outcome",
               conditions = conds,
               incl.cut = 0.865,
               n.cut = 1,
               include = "?")

# Sensitivity ranges for condition aid
rob.calibrange(raw.data = data.raw,
               calib.data = data.cal,
               test.cond.raw = "aid",
               test.cond.calib = "aid",
               test.thresholds = c(0, 0.6, 2),
               step = 0.05,
               max.runs = 50,
               outcome = "outcome",
               conditions = conds,
               incl.cut = 0.865,
               n.cut = 1,
               include = "?")

# Sensitivity ranges for condition adm
rob.calibrange(raw.data = data.raw,
               calib.data = data.cal,
               test.cond.raw = "adm",
               test.cond.calib = "adm",
               test.thresholds = c(0, 50, 100),
               step = 1,
               max.runs = 50,
               outcome = "outcome",
               conditions = conds,
               incl.cut = 0.865,
               n.cut = 1,
               include = "?")

# Sensitivity ranges for condition fiscstress
rob.calibrange(raw.data = data.raw,
               calib.data = data.cal,
               test.cond.raw = "fiscstress",
               test.cond.calib = "fiscstress",
               test.thresholds = c(0, 2, 6),
               step = 0.1,
               max.runs = 60,
               outcome = "outcome",
               conditions = conds,
               incl.cut = 0.865,
               n.cut = 1,
               include = "?")