## Uncomment the following lines if you don't have the packages installed already
# packages <- c("readr", "dplyr", "stringr", "purrr", "rvest")
# packages_to_install <- packages[!(packages %in% installed.packages())]
# if (length(packages_to_install) > 0) install.packages(packages_to_install)
library(readr)
library(dplyr)
library(stringr)
library(purrr)
library(rvest)Knock Knock…Who’s There? It’s me, Canada!
Background
There is a beeswarm plot on Observable HQ that shows photos of American presidents ordered by the year they were inaugurated. As a proud Canadian and amateur chart maker, I decided to re-create the beeswarm plot, but this time with a distinctly Canadian flavour!
Setup
First, we load some packages from the tidyverse.
If you’re new to R or the tidyverse eco-system, here is a simple breakdown of the packages we’ve loaded:
rvestfor web-scraping.dplyrfor data cleaning.purrrfor functional programming tools.stringrfor string manipulation.readrfor reading/writing csv files.
Note: one should be careful about equating base R syntax and tidyverse syntax. It is best to think of R as a big sandbox and the tidyverse as a very mighty sandcastle inside its walls. The tidyverse has its own king, judicial system, charter but more importantly pays taxes to the mighty R king. So, if you prefer to perform this analysis with base R, please feel free!
Historical Rankings
Since the original plot has the inauguration date in the x-axis, we need to get this information from Wikipedia along with the images. To accomplish this feat, we download the table from historical rankings of prime ministers of Canada under section Scholar survey results.
## Wikipedia site that stores historical rankings of Canadian Prime Ministers
prime_minister_rankings_webpage <- read_html("https://en.wikipedia.org/wiki/Historical_rankings_of_prime_ministers_of_Canada")
## Get the rankings table and standardize the column names to match image table (coming later)
prime_minister_rankings <-
prime_minister_rankings_webpage |>
html_table() |>
_[[1]] |>
rename(
pm_id = Sequence,
name = `Prime Minister`,
party = `Political party`,
macleans_1997 = `Maclean's 1997[2]`,
macleans_2011 = `Maclean's 2011[3]` ,
macleans_2016 = `Maclean's 2016[4]` ,
macleans_ranking = `Aggr.[5]`,
time_in_office_ranking = `Time in office (rank)`
) |>
select("pm_id", "party", "macleans_ranking", "time_in_office_ranking")Portraits
The next step is to collect a portrait of each prime minister. Below we fetch the Wikipedia portrait URLs from here, then download the portrait image file into a local directory (path). If you’re following along, set the path variable to the folder you’d like to save the portraits on your laptop.
## Wikipedia site that lists all Canadian Prime Ministers and provides a URL to their portrait
prime_minister_webpage <- read_html("https://en.wikipedia.org/wiki/List_of_prime_ministers_of_Canada")
## Get the portrait urls from retrieved `Prime Ministers` html
prime_minister_portrait_urls <-
prime_minister_webpage |>
html_elements(css = "img") |>
html_attr(name = "src") |>
as_tibble() |>
filter(str_detect(value, "thumb"), str_detect(value, "commons"), !str_detect(value, "Maple_Leaf")) |>
mutate(urls = str_remove(value, "^//")) |>
select(-"value") |>
mutate(urls = sprintf("https://%s", urls) |> as.character()) |>
filter(row_number() > 1) |>
mutate(image_id = 1:length(urls)) |>
relocate(image_id, .before = 1)
## Download each portrait to `path`
path <- "< Insert your local directory (folder) here to store the portraits >"
walk2(
.x = prime_minister_portrait_urls$urls,
.y = seq_along(prime_minister_portrait_urls$urls),
~ download.file(
url = .x,
destfile = sprintf("%s/%s.jpg", path, .y),
mode = "wb"
)
)Term Information
To complete the chart, we need to know each prime ministers inauguration date. Using the same webpage as the portraits (Wikipedia), we fetch their birth, death, term start and term end date
The Big Reveal
Voila! Here is the final result with all 23 of Canada’s past and current prime ministers accounted for.
Using the Macleans rankings data, we can re-create another plot from Observable HQ. Our data is slightly different since it is not a percentage, but for plotting purposes we use their historical Macleans magazine ranking. We see from the plot there is no discernible pattern, every period of Canadian history has beloved and loathed prime ministers!