Fill the Country-Years with Counts

First, load the empty country-years to fill with the count data.

# load packages
library(tidyverse)
library(lubridate)

# load data set of country-years to fill
country_year <-  read_csv("output/empty-country-years.csv") %>%
  glimpse()
Rows: 2,775
Columns: 3
$ year      <dbl> 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, …
$ ccode     <dbl> 2, 20, 31, 40, 41, 42, 51, 52, 53, 54, 55, 56, 57, 58, 60, 7…
$ idea_code <chr> "USA", "CAN", "BHM", "CUB", "HAI", "DOM", "JAM", "TRI", "BAR…

Second, load the dissent events data and count the events for each country-year.

# load the dissent events and count for each country year
dissent_events <- read_csv("output/idea-dissent-events.csv") %>%
  mutate(year = year(date)) %>%
  select(idea_code = where_idea, year) %>%
  group_by(idea_code, year) %>%
  summarize(n_dissent_events = n()) %>%
  glimpse()
Rows: 1,977
Columns: 3
Groups: idea_code [203]
$ idea_code        <chr> "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG…
$ year             <dbl> 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,…
$ n_dissent_events <int> 49, 44, 47, 47, 40, 32, 31, 30, 9, 10, 4, 68, 39, 55,…

Third, load all events and count the events for each country-year.

# load all events and count for each country year
all_events <- read_csv("output/idea-all-events.csv") %>%
  separate(EVENTDAT, c("date", "time"), sep = " ", fill = "right") %>%
  mutate(year = year(mdy(date))) %>%
  select(idea_code = PLACE, year) %>%
  group_by(idea_code, year) %>%
  summarize(n_events = n()) %>%
  glimpse()
Rows: 4,228
Columns: 3
Groups: idea_code [369]
$ idea_code <chr> "ACI", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG…
$ year      <dbl> 1996, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, …
$ n_events  <int> 1, 489, 605, 1298, 957, 711, 849, 1572, 1268, 1249, 868, 763…

Finally, join the counts with the country-year data set and replace NAs with zeros.

counts <- country_year |>
  left_join(dissent_events) |>
  left_join(all_events) %>%
  # fill missing values with zero (i.e., no events)
  mutate(n_events = ifelse(is.na(n_events), 0, n_events)) %>%
  mutate(n_dissent_events = ifelse(is.na(n_dissent_events), 0, n_dissent_events)) %>%
  write_csv("output/idea-counts.csv") %>%
  glimpse()
Rows: 2,775
Columns: 5
$ year             <dbl> 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990,…
$ ccode            <dbl> 2, 20, 31, 40, 41, 42, 51, 52, 53, 54, 55, 56, 57, 58…
$ idea_code        <chr> "USA", "CAN", "BHM", "CUB", "HAI", "DOM", "JAM", "TRI…
$ n_dissent_events <dbl> 118, 12, 0, 6, 5, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 9, 1,…
$ n_events         <dbl> 135537, 7781, 22, 979, 310, 121, 140, 220, 131, 6, 1,…