Compute Dissent Scores

We begin by setting up the data for Stan.

# load packages
library(tidyverse)
library(cmdstanr)
library(countrycode)
library(tidybayes)

# load events data 
counts <- read.csv("output/idea-counts.csv") %>%
  arrange(ccode, year) %>%
  mutate(observation_index = 1:n()) %>%
  glimpse()
Rows: 2,775
Columns: 6
$ year              <int> 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998…
$ ccode             <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 20, 20,…
$ idea_code         <chr> "USA", "USA", "USA", "USA", "USA", "USA", "USA", "US…
$ n_dissent_events  <int> 118, 109, 124, 138, 127, 117, 123, 130, 147, 121, 14…
$ n_events          <int> 135537, 140735, 151180, 177717, 194368, 195981, 2056…
$ observation_index <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1…
# create data set with lengths of time-series for each country
s <- counts %>%
  group_by(ccode) %>%
  summarize(size = n()) 

# setup data for stan
N <- nrow(counts)
J <- nrow(s)
s <- s$size
stan_data <- list(N = N,
                  J = J, 
                  s = s,
                  n_dissent_events = counts$n_dissent_events, 
                  n_events = counts$n_events)

Next, we fit the model in Stan.

data {
  int<lower=1> N; // number of observations
  int<lower=1> J; // number of groups
  array[J] int s; // number of time periods for each group; trick for ragged arrays
  array[N] int<lower=0> n_dissent_events; // number of dissent events
  array[N] int<lower=0> n_events; // total number of events
}

parameters {
  real Mu;
  real<lower=0> sigma_mu;
  real<lower=0> sigma_alpha;
  real<lower=1> nu_mu;
  real<lower=1> nu_alpha;
  array[N] real alpha; // innovations on logit scale
  array[J] real mu;    // group-level mean intensity on logit scale
}

transformed parameters {
  array[N] real eta;
  {  // bracket is a trick to "hide" this unallowed integer from the block
  // below, I use a trick for ragged arrays
  int pos;
  pos = 1;
  for (j in 1:J) {
    for (t in 1:s[j]) {
      eta[(pos - 1) + t] = mu[j] + alpha[(pos - 1) + t]; 
    }
    pos = pos + s[j];
  }
  }
}

model {
  sigma_mu ~ cauchy(0, 3);
  sigma_alpha ~ cauchy(0, 3);
  nu_mu ~ gamma(2, 0.1);
  nu_alpha ~ gamma(2, 0.1);
  mu ~ student_t(nu_mu, Mu, sigma_mu);
  alpha ~ student_t(nu_alpha, 0, sigma_alpha);
  n_dissent_events ~ binomial_logit(n_events, eta);
}

generated quantities {
  vector[N] log_lik;
  for (i in 1:N) {
    log_lik[i] = binomial_logit_lpmf(n_dissent_events[i] | n_events[i], eta[i]);
  }
}
# fit model
mod <- cmdstan_model("src/dissent.stan")
fit <- mod$sample(
  data = stan_data, 
  seed = 97854, 
  iter_sampling = 15000,
  iter_warmup = 5000,
  chains = 10, 
  parallel_chains = 10,
  refresh = 100 # print update every 500 iters
)

Finally, we do the post-processing of the simulations.

# extract posterior simulations
set.seed(4238)
draws <- fit$draws(variables = "eta") %>%
  posterior::as_draws_df() %>% 
  sample_n(5000) 

# test that seed worked properly
as.numeric(draws[1, 1])
[1] -7.0471
as.numeric(draws[1, 1]) == -7.0471
[1] TRUE
# compute dissent scores
dissent_scores <- draws %>% 
  # pivot data set into long format with columns for the eta index
  pivot_longer(cols = starts_with("eta["), names_to = "par", values_to = "eta") %>% 
  # extract observation index (i.e., country-year) from par
  mutate(observation_index = str_remove_all(par, "[^[:digit:]]"),
         observation_index = as.numeric(observation_index)) %>% 
  # compute posterior averages and se
  group_by(observation_index) %>%
  summarize(avg_eta = mean(eta), 
            se_eta = sd(eta),
            avg_pi = mean(plogis(eta))) %>%
  ungroup() %>%
  # rescale posterior average of eta to create dissent score
  mutate(dissent_score = (avg_eta - mean(avg_eta))/(2*sd(avg_eta)),
         se_dissent_score = se_eta/(2*sd(avg_eta))) %>%
  # join in raw counts data
  left_join(counts) %>% 
  mutate(frac_dissent_events = ifelse(n_events == 0, 0, n_dissent_events/n_events)) %>%
  # add in additional country names
  #   - country_name: country.name from {countrycode}
  #   - stateabb: COW 3-digit alpha from {countrycode} 
  mutate(stateabb = countrycode(ccode, "cown", "cowc"),
         country_name = countrycode(ccode, "cown", "country.name.en")) %>%
  # make repairs for german case
  mutate(stateabb = ifelse(ccode == 260, "GFR", stateabb),
         country_name = ifelse(ccode == 260, "Germany (GFR)", country_name)) %>%
  # add version info
  # add version info
  mutate(release = "V0.1 (Preprint)", 
         release_date = "2023-12-18", 
         created_date = Sys.Date(), 
         data_source = "IDEA") %>%
  # select variables for final data set
  select(release, release_date, data_source, created_date, country_name, ccode, stateabb, year, n_events, n_dissent_events, frac_dissent_events, avg_pi, avg_eta, dissent_score, se_dissent_score)

# write latent measures to file
write_csv(dissent_scores, "output/dissent-scores.csv")
haven::write_dta(dissent_scores, "output/dissent-scores.dta")
write_rds(dissent_scores, "output/dissent-scores.rds")
# verify that just-created version matches dataverse version
new <- read_csv("output/dissent-scores.csv") %>% 
  select(release, release_date, ccode, year, dissent_score)
dv <- dataverse::get_dataframe_by_name(
  filename = "dissent-scores.tab",
  dataset  = "doi:10.7910/DVN/CL4CA8",
  server   = "dataverse.harvard.edu", 
  original = TRUE, 
  .f = readr::read_csv) %>% 
  select(release, release_date, ccode, year, dissent_score)

all.equal(new, dv)
[1] TRUE