Compute Dissent Scores

We begin by setting up the data for Stan.

``````# load packages
library(tidyverse)
library(cmdstanr)
library(countrycode)
library(tidybayes)

arrange(ccode, year) %>%
mutate(observation_index = 1:n()) %>%
glimpse()``````
``````Rows: 2,775
Columns: 6
\$ year              <int> 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998…
\$ ccode             <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 20, 20,…
\$ idea_code         <chr> "USA", "USA", "USA", "USA", "USA", "USA", "USA", "US…
\$ n_dissent_events  <int> 118, 109, 124, 138, 127, 117, 123, 130, 147, 121, 14…
\$ n_events          <int> 135537, 140735, 151180, 177717, 194368, 195981, 2056…
\$ observation_index <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1…``````
``````# create data set with lengths of time-series for each country
s <- counts %>%
group_by(ccode) %>%
summarize(size = n())

# setup data for stan
N <- nrow(counts)
J <- nrow(s)
s <- s\$size
stan_data <- list(N = N,
J = J,
s = s,
n_dissent_events = counts\$n_dissent_events,
n_events = counts\$n_events)``````

Next, we fit the model in Stan.

``````data {
int<lower=1> N; // number of observations
int<lower=1> J; // number of groups
array[J] int s; // number of time periods for each group; trick for ragged arrays
array[N] int<lower=0> n_dissent_events; // number of dissent events
array[N] int<lower=0> n_events; // total number of events
}

parameters {
real Mu;
real<lower=0> sigma_mu;
real<lower=0> sigma_alpha;
real<lower=1> nu_mu;
real<lower=1> nu_alpha;
array[N] real alpha; // innovations on logit scale
array[J] real mu;    // group-level mean intensity on logit scale
}

transformed parameters {
array[N] real eta;
{  // bracket is a trick to "hide" this unallowed integer from the block
// below, I use a trick for ragged arrays
int pos;
pos = 1;
for (j in 1:J) {
for (t in 1:s[j]) {
eta[(pos - 1) + t] = mu[j] + alpha[(pos - 1) + t];
}
pos = pos + s[j];
}
}
}

model {
sigma_mu ~ cauchy(0, 3);
sigma_alpha ~ cauchy(0, 3);
nu_mu ~ gamma(2, 0.1);
nu_alpha ~ gamma(2, 0.1);
mu ~ student_t(nu_mu, Mu, sigma_mu);
alpha ~ student_t(nu_alpha, 0, sigma_alpha);
n_dissent_events ~ binomial_logit(n_events, eta);
}

generated quantities {
vector[N] log_lik;
for (i in 1:N) {
log_lik[i] = binomial_logit_lpmf(n_dissent_events[i] | n_events[i], eta[i]);
}
}``````
``````# fit model
mod <- cmdstan_model("src/dissent.stan")
fit <- mod\$sample(
data = stan_data,
seed = 97854,
iter_sampling = 15000,
iter_warmup = 5000,
chains = 10,
parallel_chains = 10,
refresh = 100 # print update every 500 iters
)``````

Finally, we do the post-processing of the simulations.

``````# extract posterior simulations
set.seed(4238)
draws <- fit\$draws(variables = "eta") %>%
posterior::as_draws_df() %>%
sample_n(5000)

# test that seed worked properly
as.numeric(draws[1, 1])``````
``[1] -7.0471``
``as.numeric(draws[1, 1]) == -7.0471``
``[1] TRUE``
``````# compute dissent scores
dissent_scores <- draws %>%
# pivot data set into long format with columns for the eta index
pivot_longer(cols = starts_with("eta["), names_to = "par", values_to = "eta") %>%
# extract observation index (i.e., country-year) from par
mutate(observation_index = str_remove_all(par, "[^[:digit:]]"),
observation_index = as.numeric(observation_index)) %>%
# compute posterior averages and se
group_by(observation_index) %>%
summarize(avg_eta = mean(eta),
se_eta = sd(eta),
avg_pi = mean(plogis(eta))) %>%
ungroup() %>%
# rescale posterior average of eta to create dissent score
mutate(dissent_score = (avg_eta - mean(avg_eta))/(2*sd(avg_eta)),
se_dissent_score = se_eta/(2*sd(avg_eta))) %>%
# join in raw counts data
left_join(counts) %>%
mutate(frac_dissent_events = ifelse(n_events == 0, 0, n_dissent_events/n_events)) %>%
#   - country_name: country.name from {countrycode}
#   - stateabb: COW 3-digit alpha from {countrycode}
mutate(stateabb = countrycode(ccode, "cown", "cowc"),
country_name = countrycode(ccode, "cown", "country.name.en")) %>%
# make repairs for german case
mutate(stateabb = ifelse(ccode == 260, "GFR", stateabb),
country_name = ifelse(ccode == 260, "Germany (GFR)", country_name)) %>%
mutate(release = "V0.1 (Preprint)",
release_date = "2023-12-18",
created_date = Sys.Date(),
data_source = "IDEA") %>%
# select variables for final data set
select(release, release_date, data_source, created_date, country_name, ccode, stateabb, year, n_events, n_dissent_events, frac_dissent_events, avg_pi, avg_eta, dissent_score, se_dissent_score)

# write latent measures to file
write_csv(dissent_scores, "output/dissent-scores.csv")
haven::write_dta(dissent_scores, "output/dissent-scores.dta")
write_rds(dissent_scores, "output/dissent-scores.rds")``````
``````# verify that just-created version matches dataverse version
``[1] TRUE``