Ch. 10 Working with JSON data

As part of my research program, I am developing a chatbot to assess cognitive function. For this example, we will work with a sample output file from the chatbot, in JSON format.

10.1 Load data

library(tidyverse)
cogbot_d = jsonlite::read_json("https://cogtasks.com//x/cogbot/data.json")

10.2 Pre-process data (tidy it)

# separate data ----
keystroke_df = tibble(cogbot_d$keystrokes)
message_df = tibble(cogbot_d$messages)

# pre-process data ----
message_pp = message_df %>% 
  separate(`cogbot_d$messages`,
           sep = ",",
           into = c("ts", "session_uuid", "msg_uuid", "msg_id", "sender", "msg")) %>%
  mutate(ts_c = anytime::anytime(gsub('"', "", gsub("\\[", "", ts)))) %>%
  select(-ts) %>%
  select(ts_c, everything())
## Warning: Expected 6 pieces. Additional pieces discarded in 1 rows [5].
head(message_pp)
## # A tibble: 6 × 6
##   ts_c                session_uuid                 msg_uuid  msg_id sender msg  
##   <dttm>              <chr>                        <chr>     <chr>  <chr>  <chr>
## 1 2022-02-21 01:13:24 "\"5ihdywx2ocnmvpiu8xaqyf\"" "\"icmeu… 0      "\"bo… "\"W…
## 2 2022-02-21 01:14:01 "\"5ihdywx2ocnmvpiu8xaqyf\"" "\"z9wa6… 1      "\"us… "\"a…
## 3 2022-02-21 01:14:02 "\"5ihdywx2ocnmvpiu8xaqyf\"" "\"ei2xk… 2      "\"bo… "\"W…
## 4 2022-02-21 01:14:03 "\"5ihdywx2ocnmvpiu8xaqyf\"" "\"74jz1… 3      "\"us… "\"3…
## 5 2022-02-21 01:14:04 "\"5ihdywx2ocnmvpiu8xaqyf\"" "\"bad8d… 4      "\"bo… "\"O…
## 6 2022-02-21 01:14:05 "\"5ihdywx2ocnmvpiu8xaqyf\"" "\"m0kb3… 5      "\"us… "\"3…
keystroke_pp = keystroke_df %>%
  separate(`cogbot_d$keystrokes`,
           sep = ",",
           into = c("ts", "session_uuid", "msg_uuid", "event_uuid",
                    "e_ts", "e_loc", "e_key", "e_type", "e_keycode",
                    "e_which", "e_ctrlkey", "e_shiftkey", "e_altkey", "e_metakey",
                    "e_repeat")) %>%
  mutate(e_ts_c = as.numeric(e_ts)) %>%
  mutate(ts_c = anytime::anytime(gsub('"', "", gsub("\\[", "", ts)))) %>%
  select(-ts) %>%
  select(ts_c, everything())

head(keystroke_pp)
## # A tibble: 6 × 16
##   ts_c                session_uuid  msg_uuid event_uuid e_ts  e_loc e_key e_type
##   <dttm>              <chr>         <chr>    <chr>      <chr> <chr> <chr> <chr> 
## 1 2022-02-21 01:13:24 "\"\""        "\"\""   "\"l4j6w2… 6059… null  "nul… "\"cl…
## 2 2022-02-21 01:14:00 "\"5ihdywx2o… "\"icme… "\"o1kbj1… 4223… 0     "\"a… "\"ke…
## 3 2022-02-21 01:14:01 "\"5ihdywx2o… "\"icme… "\"4mb7bw… 4267… 0     "\"p… "\"ke…
## 4 2022-02-21 01:14:01 "\"5ihdywx2o… "\"icme… "\"e9f346… 4282… 0     "\"p… "\"ke…
## 5 2022-02-21 01:14:01 "\"5ihdywx2o… "\"icme… "\"06c6qa… 4296… 0     "\"l… "\"ke…
## 6 2022-02-21 01:14:01 "\"5ihdywx2o… "\"icme… "\"2ezwmt… 4307… 0     "\"e… "\"ke…
## # … with 8 more variables: e_keycode <chr>, e_which <chr>, e_ctrlkey <chr>,
## #   e_shiftkey <chr>, e_altkey <chr>, e_metakey <chr>, e_repeat <chr>,
## #   e_ts_c <dbl>
# merge dataframes ----

both_pp = keystroke_pp %>%
  full_join(message_pp, by=c("session_uuid", "msg_uuid"))

10.3 Visualize data

10.3.1 Keypresses over time

ggplot(both_pp, aes(e_ts_c, e_key)) + 
  geom_point() +
  theme_bw() +
  theme(axis.text.x = element_text(angle=90))
## Warning: Removed 8 rows containing missing values (geom_point).

10.3.2 Keypresses per question asked by the chatbot

ggplot(both_pp, aes(e_ts_c, e_key)) + 
  geom_point() +
  theme_bw() +
  theme(axis.text.x = element_text(angle=90)) + 
  facet_grid(.~msg)
## Warning: Removed 8 rows containing missing values (geom_point).