Risk categorization REMOVE?

Step-by-Step Instructions

Loading required libraries

library(dplyr)
library(purrr)

Combining all 4 incidence estimates into 1 dataframe

# Use crude incidence dataset and add the 3 other incidence adjustments
all_inc <- crude_inc %>%
      left_join(adj_inc1, by = c("adm1", "adm2", "adm3", "year")) %>%
      left_join(adj_inc2, by = c("adm1", "adm2", "adm3", "year")) %>%
      left_join(adj_inc3, by = c("adm1", "adm2", "adm3", "year"))

# Now create categories using cut_off values for each incidence
# Create an object for the incidence variables

vars <- c("crudeinc", "adjinc1", "adjinc2", "adjinc3")

# Define breaks and labels for the categories
cut_offs <- c(-Inf, 5, 50, 100, 250, 450, 750, Inf)
cut_lab <- c("<5", "5-50", "50-100", "100-250", "250-450", "450-750", ">750")

# create numerical values for the categories
all_inc <- all_inc %>%
  mutate(across(all_of(vars), ~ cut(., breaks = cut_offs,
                                             labels = cut_lab),
                 .names = "{.col}_cat"))

# Categorization of prevalence estimates. This assumes we have prevalence estimates for multiple years, just as we have for incidence

vars_prev <- c("prev_y1", "prev_y2", "prev_y3")

# Define breaks and labels for prevalence estimates
cut_offs <- c(-Inf, 1, 5, 10, 20, 35, 50, Inf)
cut_lab <- c("<1", "1-5", "5-10", "10-20", "20-35", "35-50", ">50")

# create numerical values for the categories
prev_df <- prev_df %>%
  mutate(across(all_of(vars_prev), ~ cut(., breaks = cut_offs,
                                                   labels = cut_lab),
                 .names = "{.col}_cat"))

We will first combine the incidence dataset and the prevalence dataset. Based on the discussion with the SNT team, the incidence and prevalence estimates for the most current year may be used for the risk stratification.

# First combine incidence and prevalence datasets and filter for most current year
year = "most current year" # place holder for most current year

inc_prev <- all_inc %>%
      left_join(prev_df, by = c("adm1", "adm2", "adm3", "year")) %>%
  dplyr::filter(year == year)

# Conducting first risk stratification for the most current year by summing strata values in inc and prev
# let's get the risk stratification combine each of the incidence estimates to the prevalence (assuming prev_y3 is the most current prevalence estimats)

inc_prev <- inc_prev %>%
  mutate(sumcat_crude = crudeinc_cat + prev_y3_cat,
         sumcat_adj1 = adjinc1_cat + prev_y3_cat,
         sumcat_adj2 = adjinc2_cat + prev_y3_cat,
         sumcat_adj3 = adjinc3_cat + prev_y3_cat)

# Next we will recode the values of the sumcat variables into meaningful thresholds based on the range of values from the sumcat variables. The figures provided here are for illustrative purposes

inc_prev <- inc_prev %>%
  mutate( # generating recode values for crude estimates
    sumcat_crude_rec = case_match(
      sumcat_crude %in% 6:7 ~ 1,
      sumcat_crude == 8 ~ 2,
      sumcat_crude == 9 ~ 3,
      sumcat_crude == 10 ~ 4,
      sumcat_crude %in% 11:13 ~ 5,
      TRUE ~ NA_real_
    ),
    # generating recode values for adjusted 1 estimates
    sumcat_adj1_rec = case_when(
      sumcat_adj1 %in% 6:7 ~ 1,
      sumcat_adj1 == 8 ~ 2,
      sumcat_adj1 == 9 ~ 3,
      sumcat_adj1 == 10 ~ 4,
      sumcat_adj1 %in% 11:13 ~ 5,
      TRUE ~ NA_real_
    ),
    # generating recode values for adjusted 2 estimates
    sumcat_adj2_rec = case_when(
      sumcat_adj2 %in% 6:7 ~ 1,
      sumcat_adj2 == 8 ~ 2,
      sumcat_adj2 == 9 ~ 3,
      sumcat_adj2 == 10 ~ 4,
      sumcat_adj2 %in% 11:13 ~ 5,
      TRUE ~ NA_real_
    ),

    # generating recode values for adjusted 3 estimates
    sumcat_adj3_rec = case_when(
      sumcat_adj3 %in% 6:7 ~ 1,
      sumcat_adj3 == 8 ~ 2,
      sumcat_adj3 == 9 ~ 3,
      sumcat_adj3 == 10 ~ 4,
      sumcat_adj3 %in% 11:13 ~ 5,
      TRUE ~ NA_real_
    )
  )

# define labels and apply it to the recoded values
# define labels
labels <- c(
  "1" = "Very low",
  "2" = "Low",
  "3" = "Moderate",
  "4" = "High",
  "5" = "Very High"
)

# apply to the recoded values
inc_prev <- inc_prev %>%
  mutate(
    sumcat_crude_rec = factor(sumcat_crude_rec, levels = 1:5, labels = labels),
    sumcat_adj1_rec  = factor(sumcat_adj1_rec, levels = 1:5, labels = labels),
    sumcat_adj2_rec  = factor(sumcat_adj2_rec, levels = 1:5, labels = labels),
    sumcat_adj3_rec  = factor(sumcat_adj3_rec, levels = 1:5, labels = labels)
  )

Next steps of codes plot each of the risk strata on a map at the appropriate adm level. in this code we plot it at admin 3 level.

this first set of codes plots each map separately

# first join the final dataset with the adm3 shapefile

inc_prev_shp <- adm3_sf %>%
  left_join(inc_prev, by = c("adm1", "adm2", "adm3"))

# Plot for each of the sum cat variables
# Map for Crude incidence stratification

ggplot(inc_prev_shp) +
  geom_sf(aes(fill = sumcat_crude_rec), color = "gray80", size = 0.2) +  # inner borders
  geom_sf(data = adm1_sf, fill = NA, color = "black", size = 0.3) + # adm1 borders
  scale_fill_manual(
    name = "Strata",
    values = c(
      "Very low" = "#c6dbef",
      "Low" = "#6baed6",
      "Moderate" = "#fdd0a2",
      "High" = "#e6550d",
      "Very High" = "#de2d26"
    ),
    drop = FALSE
  ) +
  theme_minimal() +
  labs(
    title = "Strata - Prev+Crudeinc",
    subtitle = "Country",
    fill = "Strata"
  ) +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12),
    legend.title = element_text(size = 10, face = "bold"),
    legend.text = element_text(size = 9)
  )

# Map for Adjusted incidence1 stratification

ggplot(inc_prev_shp) +
  geom_sf(aes(fill = sumcat_adj1_rec), color = "gray80", size = 0.2) +  # inner borders
  geom_sf(data = adm1_sf, fill = NA, color = "black", size = 0.3) + # adm1 borders
  scale_fill_manual(
    name = "Strata",
    values = c(
      "Very low" = "#c6dbef",
      "Low" = "#6baed6",
      "Moderate" = "#fdd0a2",
      "High" = "#e6550d",
      "Very High" = "#de2d26"
    ),
    drop = FALSE
  ) +
  theme_minimal() +
  labs(
    title = "Strata - Prev+Adjinc1",
    subtitle = "Country",
    fill = "Strata"
  ) +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12),
    legend.title = element_text(size = 10, face = "bold"),
    legend.text = element_text(size = 9)
  )


# Map for Adjusted incidence 2 stratification

ggplot(inc_prev_shp) +
  geom_sf(aes(fill = sumcat_adj2_rec), color = "gray80", size = 0.2) +  # inner borders
  geom_sf(data = adm1_sf, fill = NA, color = "black", size = 0.3) + # adm1 borders
  scale_fill_manual(
    name = "Strata",
    values = c(
      "Very low" = "#c6dbef",
      "Low" = "#6baed6",
      "Moderate" = "#fdd0a2",
      "High" = "#e6550d",
      "Very High" = "#de2d26"
    ),
    drop = FALSE
  ) +
  theme_minimal() +
  labs(
    title = "Strata - Prev+Adjinc2",
    subtitle = "Country",
    fill = "Strata"
  ) +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12),
    legend.title = element_text(size = 10, face = "bold"),
    legend.text = element_text(size = 9)
  )


# Map for Adjusted incidence 3 stratification

ggplot(inc_prev_shp) +
  geom_sf(aes(fill = sumcat_adj3_rec), color = "gray80", size = 0.2) +  # inner borders
  geom_sf(data = adm1_sf, fill = NA, color = "black", size = 0.3) + # adm1 borders
  scale_fill_manual(
    name = "Strata",
    values = c(
      "Very low" = "#c6dbef",
      "Low" = "#6baed6",
      "Moderate" = "#fdd0a2",
      "High" = "#e6550d",
      "Very High" = "#de2d26"
    ),
    drop = FALSE
  ) +
  theme_minimal() +
  labs(
    title = "Strata - Prev+Adjinc3",
    subtitle = "Country",
    fill = "Strata"
  ) +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12),
    legend.title = element_text(size = 10, face = "bold"),
    legend.text = element_text(size = 9)
  )

The alternative will be to show the maps on a facet grid

# first reshape data into long format
inc_prev_shp_long <- inc_prev_shp %>%
  pivot_longer(
    cols = c(sumcat_crude_rec, sumcat_adj1_rec, sumcat_adj2_rec, sumcat_adj3_rec),
    names_to = "indicator",
    values_to = "strata"
  ) %>%
  mutate(indicator = recode(indicator,
                            sumcat_crude_rec = "Prev+Crude",
                            sumcat_adj1_rec = "Prev+Adjinc1",
                            sumcat_adj2_rec = "Prev+Adjinc2",
                            sumcat_adj3_rec = "Prev+Adjinc3"))

# Next we plot all maps with facets
ggplot(inc_prev_shp_long) +
  geom_sf(aes(fill = strata), color = "gray80", size = 0.2) +
  geom_sf(data = adm1_sf, fill = NA, color = "black", size = 0.3) +
  scale_fill_manual(
    values = c(
      "Very low" = "#c6dbef",
      "Low" = "#6baed6",
      "Moderate" = "#fdd0a2",
      "High" = "#e6550d",
      "Very High" = "#de2d26"
    ),
    name = "Strata",
    drop = FALSE
  ) +
  facet_wrap(~indicator) +
  theme_minimal() +
  labs(
    title = "Strata by Indicator (Prevalence + Incidence)",
    subtitle = "Country",
    fill = "Strata"
  ) +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12),
    legend.title = element_text(size = 10),
    legend.text = element_text(size = 9)
  )

Risk categorization REMOVE?

Step-by-Step Instructions

Combining all 4 incidence estimates into 1 dataframe

Full code

Output