Ch. 6 Working with Dates

6.1 Overview

At some point in time (:p), you’ll need to work with date data. This tutorial will illustrate concepts including: converting date formats, extracting date information (e.g., day of week, year), and calculate a date relative to another.

6.2 Load libs

# create date matrix
df <- expand.grid(month=seq(1,12,1),
                  day=seq(1,31,1),
                  year=seq(1930,2030)) %>%
  mutate(mdy = lubridate::mdy(paste0(month,"/",day,"/",year))) %>%
  mutate(dt_month = lubridate::month(mdy),
         dt_day = lubridate::day(mdy),
         dt_year = lubridate::year(mdy),
         dt_week = lubridate::week(mdy),
         dt_yday = lubridate::yday(mdy),
         dt_weekday = lubridate::wday(mdy),
         dt_weekday_l = lubridate::wday(mdy, label=T)) %>%
  filter(!is.na(dt_weekday)) # remove feb 29, 30, 31
## Warning: 682 failed to parse.
knitr::kable(head(df))
month day year mdy dt_month dt_day dt_year dt_week dt_yday dt_weekday dt_weekday_l
1 1 1930 1930-01-01 1 1 1930 1 1 4 Wed
2 1 1930 1930-02-01 2 1 1930 5 32 7 Sat
3 1 1930 1930-03-01 3 1 1930 9 60 7 Sat
4 1 1930 1930-04-01 4 1 1930 13 91 3 Tue
5 1 1930 1930-05-01 5 1 1930 18 121 5 Thu
6 1 1930 1930-06-01 6 1 1930 22 152 1 Sun
# mark specific dates
df_marked <- df %>%
  mutate(cinco_de_mayo_taco_tuesday = ifelse(dt_month == 5 &
                                dt_day == 5 &
                                dt_weekday_l == "Tue", 1, 0),
         friday_13th = ifelse(dt_day == 13 &
                                dt_weekday_l == "Fri", 1, 0),
         erica_bday = ifelse(dt_day == 4 &
                               dt_month == 4, 1, 0))

# segment days Alexa mentioned
all_cinco_de_mayo_taco_tuesday <- df_marked %>%
  filter(cinco_de_mayo_taco_tuesday == 1)

all_friday13th <- df_marked %>%
  filter(friday_13th == 1)

all_erica <- df_marked %>%
  filter(erica_bday == 1) %>%
  filter(year >= 1991)

6.3 Count wife birthday by day of week

erica_wkday <- all_erica %>%
  group_by(dt_weekday_l) %>%
  summarise(n = n())

6.4 Plot time series of Cinco de Mayo landing on a Tuesday (Taco Tuesday!)

ggplot(df_marked, aes(year, cinco_de_mayo_taco_tuesday)) +
  geom_tile() +
  theme_minimal()
ggplot(df_marked, aes(year, cinco_de_mayo_taco_tuesday)) +
  geom_path() +
  theme_minimal()
ggplot(erica_wkday, aes(dt_weekday_l, n, group=dt_weekday_l, fill=dt_weekday_l)) +
  geom_bar(stat="identity")

6.5 Calculating dates relative to other dates

These operations work on types as.Date data types (and related as.POSIXct), not character data.

ref_date = as.Date("2020-10-11")
paste0("Reference date: ", ref_date)
## [1] "Reference date: 2020-10-11"
paste0("Reference date - 90 days: ", ref_date - 90)
## [1] "Reference date - 90 days: 2020-07-13"
paste0("Reference date + 90 days: ", ref_date + 90)
## [1] "Reference date + 90 days: 2021-01-09"