# ---- readme ---- # A"Common" European Interest? Explaining Variation in IPCEI State Aid Between EU Member States' # R script used for the qualitative comparative analysis (QCA) # Ruben De La Cruz, PhD fellow, Ghent Institute for International and European Studies (GIES), Ghent University # This research was funded by the Research Foundation – Flanders (FWO) under PhD fellowship no. 11A8626N # Issue: "Doing Industrial Policy in a Geotech World: Challenges and Opportunities”, Politics and Governance, Vol. 14 # Editors: Salih Işık Bora (Vrije Universiteit Brussel), Fabio Bulfone (Leiden University), and Timo Seidl (Technical University of Munich) # Fully open access at https://doi.org/10.17645/pag.i482 # Published by Cogitatio Press # Please cite as: De La Cruz, R. (2026) A “Common” European Interest? Explaining Variation in IPCEI State Aid Between EU Member States. Politics and Governance, 14. https://doi.org/10.17645/pag.11453 # IMPORTANT NOTE: In order to properly load the data of the .xlsx file in R, please follow the instructions below. # 1. Save the second sheet containing raw data as a .csv file named "raw-data.csv". # 2. Save the third sheet containing calibrated data as .csv file named "calibrated-data.csv". # ---- 0. Load libraries ---- library(readr) library(dplyr) library(tidyr) library(QCA) library(SetMethods) library(ggplot2) library(stringr) library(tibble) # ---- 1. Load and calibrate data ---- # Note: The raw and calibrated data are uploaded as one supplementary file on the publishers website: https://www.cogitatiopress.com/politicsandgovernance # In order to properly load the data, download the supplementary .xslx file containing the raw and calibrated data. # Save the first sheet containing raw data as a .csv file named "raw-data.csv". # Save the second sheet containing calibrated data as .csv file named "calibrated-data.csv". data.raw <- read.csv("raw-data.csv") data.cal <- data.raw %>% mutate( outcome = calibrate(outcome, type = "fuzzy", method = "direct", thresholds = c(0, 0.1, 1)), gdp = calibrate(gdp, type = "fuzzy", method = "direct", thresholds = c(0, 100000, 1000000)), export = calibrate(export, type = "fuzzy", method = "direct", thresholds = c(0, 50, 100)), fdi = calibrate(fdi, type = "fuzzy", method = "direct", thresholds = c(0, 0.1, 0.3)), growenv = calibrate(growenv, type = "fuzzy", method = "direct", thresholds = c(0, 5, 10)), leader = calibrate(leader, type = "fuzzy", method = "direct", thresholds = c(0, 10, 50)), aid = calibrate(aid, type = "fuzzy", method = "direct", thresholds = c(0, 0.6, 2)), adm = calibrate(adm, type = "fuzzy", method = "direct", thresholds = c(0, 50, 100)), fiscstress = calibrate(fiscstress, type = "fuzzy", method = "direct", thresholds = c(0, 2, 6)) ) # Nudge all exact 0.5 scores in the whole dataframe to >0.5 data.cal <- data.cal %>% mutate(across(everything(), ~ ifelse(. == 0.5, 0.500001, .))) write.csv(data.cal, "calibrated-data.csv", row.names = F) # ---- 2. Truth table analysis: presence of outcome ---- data.cal.tt <- read.csv("calibrated-data.csv", row.names = 1) # Truth table TT <- truthTable(data.cal.tt, outcome = "outcome", complete = T, show.cases = T, incl.cut = 0.865, sort.by = "incl, n") TT # Produce csv without logical remainders for observation TT.obs <- subset(TT$tt, n > 0) TT.obs write.csv(TT.obs, "truth-table.csv", row.names = F) # Parsimonious solution sol.pars <- minimize(TT, include = "?", details = T, use.tilde = T) sol.pars # ---- 3. Select most fitting model based on theoretical expectations ---- # ---- 3.1 Justification for choice of model ---- ### Rows 5-6: 1 combination should be present, because every model contains one of both ### Row 5 (growenv*fiscstress): both conditions contradict theoretical expectations ##### However, literature on growenv is ambiguous ### Row 6 (~aid*fiscstress): both conditions contradict theoretical expectations ### Conclusion: row 5 (growenv*fiscstress) theoretically more plausible ##### Therefore, M1, M3, or M5 most fitting ### Rows 1-4: 2 combinations should be present, because M1, M3, and M5 contain 2 of these combinations ### Rows 1-3: all conditions follow theoretical expectations #### Row 1 (gdp*~fdi) #### Row 2 (gdp*aid) #### Row 3 (~fdi*leader) ### Row 4 (fdi*aid): fdi contradicts theoretical expectations #### Therefore, models containing rows 1-3 (M1, M2, M5, M6) most fitting ### Combining theoretical analyses above: M1 or M5 most fitting ### inclS, PRI, and covS higher for M1 than M5. ### Conclusion: M1 theoretically fits best # ---- 3.2 Output M1 from parsimonious solution ---- # Select terms of M1 terms.M1 <- sol.pars$solution[[1]] terms.M1 # Select inclS, PRI, covS, covU, and cases terms <- as_tibble(sol.pars$IC$overall$incl.cov, rownames = NA) terms # Only keep inclS, PRI, covS, covU, and cases of M1 terms.filtered <- terms %>% rownames_to_column(var = "term") %>% filter(term %in% terms.M1) terms.filtered # Select inclS, PRI, and covS of M1 solution solution.M1 <- as_tibble(sol.pars$IC$individual[[1]]$sol.incl.cov, rownames = NA) solution.M1 # Add inclS, PRI, and covS of M1 solution to df containing terms ### Prepare solution.M1 for merging with terms.filtered solution.M1 <- solution.M1 %>% rownames_to_column(var = "term") %>% mutate(term = "solution M1", covU = NA, cases = NA) ### Merge M1 solution inclS, PRI, and covS with terms sol.pars.M1 <- bind_rows(terms.filtered, solution.M1) sol.pars.M1 <- column_to_rownames(sol.pars.M1, var = names(sol.pars.M1)[1]) sol.pars.M1 write.csv(sol.pars.M1, "parsimonious-solution.csv") # ---- 4. Robustness checks: sensitivity ranges for conditions and consistency cut-off ---- # Workflow adapted from Oana, I.-.E., Schneider, C.Q. & Thomann, E. (2021). Qualitative Comparative Analysis Using R: A Beginner's Guide. Cambridge University Press. pp. 151-154 # Create an object storing the conditions' names conds <- c("gdp", "export", "fdi", "growenv", "leader", "aid", "adm", "fiscstress") # Sensitivity ranges for condition gdp rob.calibrange(raw.data = data.raw, calib.data = data.cal, test.cond.raw = "gdp", test.cond.calib = "gdp", test.thresholds = c(0, 100000, 1000000), step = 1000, max.runs = 500, outcome = "outcome", conditions = conds, incl.cut = 0.865, n.cut = 1, include = "?") # Sensitivity ranges for condition export rob.calibrange(raw.data = data.raw, calib.data = data.cal, test.cond.raw = "export", test.cond.calib = "export", test.thresholds = c(0, 50, 100), step = 5, max.runs = 30, outcome = "outcome", conditions = conds, incl.cut = 0.865, n.cut = 1, include = "?") # Sensitivity ranges for condition fdi rob.calibrange(raw.data = data.raw, calib.data = data.cal, test.cond.raw = "fdi", test.cond.calib = "fdi", test.thresholds = c(0, 0.1, 0.3), step = 0.005, max.runs = 25, outcome = "outcome", conditions = conds, incl.cut = 0.865, n.cut = 1, include = "?") # Sensitivity ranges for condition growenv rob.calibrange(raw.data = data.raw, calib.data = data.cal, test.cond.raw = "growenv", test.cond.calib = "growenv", test.thresholds = c(0, 5, 10), step = 0.1, max.runs = 50, outcome = "outcome", conditions = conds, incl.cut = 0.865, n.cut = 1, include = "?") # Sensitivity ranges for condition leader rob.calibrange(raw.data = data.raw, calib.data = data.cal, test.cond.raw = "leader", test.cond.calib = "leader", test.thresholds = c(0, 10, 50), step = 1, max.runs = 50, outcome = "outcome", conditions = conds, incl.cut = 0.865, n.cut = 1, include = "?") # Sensitivity ranges for condition aid rob.calibrange(raw.data = data.raw, calib.data = data.cal, test.cond.raw = "aid", test.cond.calib = "aid", test.thresholds = c(0, 0.6, 2), step = 0.05, max.runs = 50, outcome = "outcome", conditions = conds, incl.cut = 0.865, n.cut = 1, include = "?") # Sensitivity ranges for condition adm rob.calibrange(raw.data = data.raw, calib.data = data.cal, test.cond.raw = "adm", test.cond.calib = "adm", test.thresholds = c(0, 50, 100), step = 1, max.runs = 50, outcome = "outcome", conditions = conds, incl.cut = 0.865, n.cut = 1, include = "?") # Sensitivity ranges for condition fiscstress rob.calibrange(raw.data = data.raw, calib.data = data.cal, test.cond.raw = "fiscstress", test.cond.calib = "fiscstress", test.thresholds = c(0, 2, 6), step = 0.1, max.runs = 60, outcome = "outcome", conditions = conds, incl.cut = 0.865, n.cut = 1, include = "?")