Link Search Menu Expand Document

P3D4: Programming with spatial data (part 2)

These slides map to the R example slides on Day 24 or P3D10

library(tidyverse)
library(sf)
library(USAboundaries)
library(leaflet)

httpgd::hgd() # for VSCode
httpgd::hgd_browse() # for VSCode

dat <- read_rds("chipotle_nested.rds") %>%
    st_as_sf(coords = c("longitude", "latitude"), crs = 4326)

cal <- us_counties(states = "California") %>%
    select(countyfp, countyns, name, aland, awater, state_abbr, geometry)

Calculating areas

  • What units are the awater and aland variables in?
  • Does aland include awater?

  • st_area
cal %>%
    mutate(
        states_area = aland + awater,
        sf_area = st_area(geometry)) %>%
    select(name, states_area, aland, sf_area, awater) %>%
    filter(name == "Santa Barbara")

Other spatial calculations

ksu <- tibble(latitude = 34.037876, longitude =  -84.58102) %>%
    st_as_sf(coords = c("longitude", "latitude"), crs = 3310)
# https://epsg.io/4326 units are degrees
calw <- cal %>%
    st_transform(3310) %>% # search https://spatialreference.org/ref/?search=california&srtext=Search. Units are in meters for buffer.
    filter(name != "San Francisco") %>%
    mutate(
        aland_acres = aland * 0.000247105,
        awater_acres = awater * 0.000247105,
        percent_water = 100 * (awater / aland),
        sf_area = st_area(geometry),
        sf_center = st_centroid(geometry),
        sf_length = st_length(geometry),
        sf_distance = st_distance(sf_center, ksu),
        sf_buffer = st_buffer(sf_center, 24140.2), # 24140.2 is 15 miles
        sf_intersects = st_intersects(., filter(., name == "Los Angeles"), sparse = FALSE)
        )

Now let’s figure out how to plot the varied spatial layers with ggplot2

ggplot(data = calw) +
    geom_sf(aes(fill = sf_intersects)) + 
    geom_sf(aes(geometry = sf_buffer), fill = "white") +
    geom_sf(aes(geometry = sf_center), color = "darkgrey") +
    geom_sf_text(aes(label = name), color = "lightgrey") +
    geom_sf(data = filter(dat, region == "CA"), color = "black") + # our chipotle locations
    theme_bw() 

Counting Chipotle stores by county

Let’s create this chart in ggplot2.

The joining steps

First thing we are going to do is label each Chipotle with its respective county.

store_in_county <- st_join(dat, cal, join = st_within) %>%
    select(placekey, city, region, geometry, countyfp, name)

Notice how I have kepts the countyfp column to use for my join. The name column has the county name from the cal sf object.

placekey city region geometry countyfp name
227-222@627-sd6-7dv White Plains NY POINT (-73.76443 41.03327) NA NA
zzw-223@5r8-fqv-xkf Tulsa OK POINT (-95.96862 36.14081) NA NA
227-222@5z6-96f-btv Palm Desert CA POINT (-116.4031 33.72817) 065 Riverside
222-222@63v-v66-nqz Erie PA POINT (-80.13165 42.1054) NA NA
224-222@5qw-jn9-3yv Fort Worth TX POINT (-97.31382 32.91515) NA NA
22r-222@5z4-zwb-q4v Los Angeles CA POINT (-118.4463 34.06106) 037 Los Angeles

Now we can group them and count by our county keys.

store_in_county_count <- store_in_county %>%
    as_tibble() %>% 
    count(countyfp, name) %>%
    filter(!is.na(countyfp)) # drop the NA counts.
countyfp name n
001 Alameda 21
007 Butte 4
013 Contra Costa 13
017 El Dorado 3
019 Fresno 9
023 Humboldt 1

Now we can join the new feature we calculated by county back with our wrangled California sf object calw.

calw <- calw %>%
    left_join(store_in_county_count, fill = 0) %>%
    replace_na(list(n = 0)) 

Finally we can create our map.

calw %>%
ggplot() +
    geom_sf(aes(fill = n)) + 
    scale_fill_continuous(trans = "sqrt") +
    geom_sf(data = filter(dat, region == "CA"), color = "white", shape = "x") +
    theme_bw() +
    theme(legend.position = "bottom") +
    labs(fill = "Number of Chipotle\nstores")

Unnesting, calculating, then joining

Creating our county group columns

Notice how the sf object structure is kept for the left object dat and the non-geometry columns are brought into dat from the cal object.

dat_wc <- st_join(dat, cal, join = st_within) 

Unnest and pivot

days_week_long <- dat_wc %>%
    filter(region == "CA") %>%
    as_tibble() %>% # notice this line to break the sf object rules.
    rename(name_county = name) %>%
    unnest(popularity_by_day) %>%
    select(placekey, city, region, contains("raw"),
        name, value, geometry, countyfp, name_county)

Now we have our table of popularity by day in long format. We want to move the days to columns with the counts by day in each column (pivot_wider())

days_week <-  days_week_long %>%
    pivot_wider(names_from = name, values_from = value)

Calculating summaries

Now we can create summaries using our new pivoted columns

# average visits per day over the n stores by county.
visits_day_join <- days_week %>%
    group_by(countyfp, name_county) %>%
    summarise(
        count = n(),
        Monday = sum(Monday, na.rm = TRUE) / count,
        Tuesday = sum(Tuesday, na.rm = TRUE) / count,
        Wednesday = sum(Wednesday, na.rm = TRUE) / count,
        Thursday = sum(Thursday, na.rm = TRUE) / count,
        Friday = sum(Friday, na.rm = TRUE) / count,
        Saturday = sum(Saturday, na.rm = TRUE) / count,
        Sunday = sum(Sunday, na.rm = TRUE) / count,
    ) %>%
    ungroup()

Combining summaries back into our calw sf object.

calw <- calw %>%
    left_join(visits_day_join %>% select(-name_county)) %>%
    replace_na(list(Monday = 0, Tuesday = 0, Wednesday = 0,
      Thursday = 0, Friday = 0, Saturday = 0, Sunday = 0)) 

Plotting Saturday use

Let’s make a plot similar to our previous chart.

calw %>%
ggplot() +
    geom_sf(aes(fill = Saturday)) + 
    geom_sf(data = filter(dat, region == "CA"), color = "white", shape = "x") +
    theme_bw() +
    theme(legend.position = "bottom") +
    labs(fill = "Average store traffic", 
      title = "Saturday traffic for Chipotle")

We can add our count variable to provide some additional insight

calw %>%
ggplot() +
    geom_sf(aes(fill = Saturday)) + 
    geom_sf(aes(geometry = sf_center, size = count), color = "grey") +
    theme_bw() +
    scale_size_continuous(breaks = c(1, 5, 10, 25, 50, 75),
      trans = "sqrt", range = c(2, 15)) +
    labs(
        fill = "Average store traffic",
        size = "Number of stores",
        title = "Saturday traffic for Chipotle")

Plotting with leaflet

calw_4326 <- st_transform(calw, 4326) # will need in 4326 for leaflet 

bins <- c(0, 10, 20, 30, 50, 70, 90, 110)
pal <- colorBin("YlOrRd", domain = calw_4326$n)

m <- leaflet(calw_4326) %>%
    addPolygons(
        data = calw_4326,
        fillColor = ~pal(n),
        fillOpacity = .5,
        color = "darkgrey",
        weight = 2) %>%
    addCircleMarkers(
        data = filter(dat, region == "CA"),
        radius = 3,
        color = "grey") %>%
    addProviderTiles(providers$CartoDB.Positron)