# Install `pacman` if not already installed
if (!requireNamespace("pacman", quietly = TRUE)) {
install.packages("pacman")
}
# Load required packages using pacman
pacman::p_load(
readxl, # Read Excel files
tidyr, # data organization
sf, # handling shapefile data
dplyr, # data manipulation
ggplot2, # plotting]
viridis, # color palettes
shadowtext, # plot labels
nngeo, # spatial analysis
here, # file path management
stringr # cleaning up character strings
)Basic shapefile use and visualization
Beginner
Overview
In the context of SNT, having official, accurate, and current shapefiles is crucial. These files form the backbone for linking data to the geographic units. This page provides a step-by-step guide on how to load, visualize, and use shapefile data effectively.
Effective visualization of shapefiles serves two critical purposes in SNT:
- Validating the geometric integrity of the boundaries themselves
- Providing the spatial framework for all subsequent data analysis
A well-prepared shapefile should render cleanly at both national and subnational scales, with boundaries that align precisely with known geographic features and administrative divisions.
For background information on shapefiles and links to all the spatial data content in the SNT code library, including rasters and point data, please visit Spatial data overview. For suggestions on troubleshooting your shapefile, please visit Shapefile management and customization.
- Import all required shapefile components
- Set a consistent Coordinate Reference System (CRS)
- Create basic maps from shapefile data
- Overlay multiple administrative levels
- Use shapefiles to visualize different types of data
Using the right shapefiles
Not every shapefile is the correct one to use for SNT.
One of the key first steps prior to the initiation of any SNT analysis is the discussion amongst the SNT team regarding the lowest operational administrative unit for decision making in the country where specific interventions can be feasibly implemented. Analysts must always confirm this decision before embarking on the analysis, or initiate discussion if this has not already taken place. The unit of analysis affects the unit for data collection and the geographical scale at which the analysis is conducted. The latter will determine the shapefile used for SNT.
All shapefiles used in SNT must be reviewed and validated by the SNT team and must represent the official national boundary set. This ensures alignment with national standards, guarantees boundary accuracy, and avoids discrepancies in spatial data.
Don’t forget to also request all the official shapefiles of higher (less granular levels) than the shapefile of the chosen unit of analysis. For example, if the SNT team has selected adm2 as the unit of analysis, you will also need adm1 shapefiles. This is important because output maps for SNT should always include all the official boundaries for ease of interpretation.
If a custom mix of shapefiles (for example, a mix of adm2 and adm3) is required for SNT, please see Shapefile management and customization.
Step-by-Step
In this section, we guide you through the essential steps to load and view shapefile data.
The example uses administrative boundary shapefiles from Sierra Leone, with focus on chiefdom (adm3) and district (adm2) levels. The principles can be applied to shapefiles from any country.
To skip the step-by-step explanation, jump to the full code at the end of this page.
Step 1: Install and load packages
First, install and load the necessary packages for handling spatial data, data manipulation, and visualization. These libraries provide essential functions for working with spatial data.
The sf package is particularly important as it implements simple features standards for handling geographic vector data in R.
To adapt the code:
- Do not modify anything in the code above
Step 2: Import shapefiles and set the CRS
In this step, we define file paths to our shapefile components and read them as spatial objects. We read in both an adm3 shapefile (Chiefdom) and an adm2 shapefile (District), then print the top 10 rows of the adm3 spatial object to examine its contents.
Properly loading all components of a shapefile and setting a consistent coordinate reference system (CRS) ensures accurate spatial representation and compatibility with other geospatial datasets. The CRS is the way in which the spatial data is mapped onto the earth’s curved surface, and when working with multiple datasets, it is essential that they use the same CRS. More information on CRS can be found here.
# Define file paths using `here` for reproducibility
shapefile_shp <- here::here("english/data_r/shapefiles", "Chiefdom2021.shp")
shapefile_shx <- here::here("english/data_r/shapefiles", "Chiefdom2021.shx")
shapefile_dbf <- here::here("english/data_r/shapefiles", "Chiefdom2021.dbf")
# Administrative level 1 (e.g., districts)
adm2_shapefile <- here::here("english/data_r/shapefiles", "District 2021.shp")
# Load chiefdom shapefile into an sf object
gdf <- sf::st_read(shapefile_shp, quiet = TRUE)
# Load adm1 shapefile
adm2_gdf <- sf::st_read(adm2_shapefile, quiet = TRUE)
# Set the Coordinate Reference System (CRS)
# Check current CRS
print(paste("Original CRS:", st_crs(gdf)$input))[1] "Original CRS: NA"
To adapt the code:
- Line 2-4: Change
"english/data_r/shapefiles"to match the"folder/subfolder"where your shapefile components are saved - Line 2-4: Replace
"Chiefdom2021"with the name of your.shp,.shx, and.dbffiles - Line 7: Replace
"english/data_r/shapefiles"and"District2021.shp"with your adm1 level shapefile"folder/subfolder"and"filename.shp"respectively
Step 3: Visualize shapefile contents
Thorough inspection of both attribute data and geometries helps identify issues before they affect analysis, such as:
- Administrative mismatches: Discrepancies between admin unit names in shapefile vs in other data
- Geometric artifacts: Gaps, overlaps, or invalid polygons that may disrupt spatial operations
- CRS inconsistencies: Coordinate systems that don’t match other program datasets
This page assumes you are working with a clean and well-formatted shapefile. While a few troubleshooting tips are shown below, please see Shapefile management and customization for additional suggestions on inspecting, troubleshooting, and cleaning your shapefile.
Initial mapping serves as a diagnostic tool. Visual inspection can reveal issues like implausible boundary shapes or misaligned enclaves. A clean shapefile should:
- Maintain clean boundaries at all levels
- Preserve topological relationships (no overlapping polygons)
- Align with known geographic boundaries
- Display attribute data without rendering artifacts
In this step we generate maps to visually represent the spatial data, starting with a basic map of a single administrative level. This code generates the chiefdom-level (adm3) administrative boundaries for Sierra Leone.
Step 3.1: Basic admin unit map
# Plot the chiefdom shapefile
basic_map <- ggplot2::ggplot() +
ggplot2::geom_sf(data = gdf, fill = "lightblue", color = "black") +
ggplot2::theme_minimal() +
ggplot2::theme(
panel.grid = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank()
) +
ggplot2::labs(
title = "Map of Sierra Leone Chiefdoms (adm3)",
subtitle = "Administrative level 3 boundaries"
) +
ggplot2::theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 12)
)
# Save plot
ggplot2::ggsave(
plot = basic_map,
here::here("03_output/3a_figures/basic_map.png"),
width = 10,
height = 8,
dpi = 300)To adapt the code:
- Line 3: Adjust
colorandfillto fit your needs and preferences - Line 12-13: Modify the plot
titleandsubtitlebased on the country’s data you are using - Line 18-22: Adjust
width,height, anddpiinggsavebased on your output needs
Before proceeding with further analysis, share your shapefile maps with the SNT team for validation. The team will help ensure that:
- The shapefiles accurately represent the current administrative boundaries
- The boundaries align with the operational units for decision-making
- There are no discrepancies between the shapefile data and official boundaries
- Any mapping decisions (colors, labels, symbology) are consistent with program standards
This validation step is crucial for maintaining data integrity throughout the SNT process.
Step 3.2: Overlay multiple administrative levels
To add more contextual information onto the previous map, we overlay adm2 boundaries and labels on top of the existing adm3 map.
This plot uses shadowtext::geom_shadowtext to improve adm2 label legibility.
Show the code
overlay_map <- ggplot2::ggplot() +
ggplot2::geom_sf(data = gdf, fill = "#628DA5", color = "white") +
shadowtext::geom_shadowtext(
data = adm2_gdf,
ggplot2::aes(x = sf::st_coordinates(sf::st_point_on_surface(geometry))[,1],
y = sf::st_coordinates(sf::st_point_on_surface(geometry))[,2],
label = FIRST_DNAM),
bg.color = "white",
color = "black",
size = 2,
fontface = "bold"
) +
ggplot2::geom_sf(data = adm2_gdf, fill = NA,
color = "black", linewidth = 0.5) +
ggplot2::theme_minimal() +
ggplot2::theme(
panel.grid = ggplot2::element_blank(),
axis.title = ggplot2::element_blank(),
axis.text = ggplot2::element_blank(),
axis.ticks = ggplot2::element_blank()
) +
ggplot2::labs(
title = "Overlay of Sierra Leone administrative boundaries",
subtitle = "Districts (adm2) and Chiefdoms (adm3)"
) +
ggplot2::theme(
plot.title = ggplot2::element_text(face = "bold", size = 14),
plot.subtitle = ggplot2::element_text(size = 12)
)
# Save plot
ggplot2::ggsave(
plot = overlay_map,
filename = here::here("03_output/3a_figures/overlay_map.png"),
width = 10,
height = 8,
dpi = 300)To adapt the code:
- Line 2-4: Adjust
colorandfillto fit your needs and preferences - Line 8: Adjust the
labelparameter ingeom_shadowtext()to match the column in your data containing unit names - Line 22-23: Modify the plot
titleandsubtitlebased on the country’s data you are using - To visualize more than two administrative levels, you can add additional
geom_sf()layers
Step 3.3: Troubleshooting visualization issues
The table below outlines problems that may be encountered when attempting a visualization, their likely causes, and recommended fixes. please see Shapefile management and customization for additional suggestions on inspecting, troubleshooting, and cleaning your shapefile.
Common Visualization Problems and Solutions| Symptom | Likely Cause | Diagnostic Check | Immediate Solution |
|---|---|---|---|
| Missing polygons | Invalid geometries | sum(!sf::st_is_valid(gdf)) |
gdf <- sf::st_make_valid(gdf) |
| Distorted country shape | Incorrect CRS | sf::st_crs(gdf)$input |
sf::st_transform(gdf, 4326) |
| Blank output map | Empty geometries | sum(sf::st_is_empty(gdf)) |
gdf <- gdf[!sf::st_is_empty(gdf), ] |
| Misaligned labels | CRS mismatch | Compare sf::st_crs() of layers |
Reproject both to same CRS |
| Jagged boundaries | Over-simplified geometries | Check mapview::npts(gdf) |
Reduce dTolerance in simplification |
When troubleshooting shapefiles, be sure to:
- Create a backup copy of the original shapefile data before modifying geometries
- Document all changes made to the original shapefile
- Verify repairs and the resulting shapefile with the SNT team before proceeding to analysis
Step 4:Advanced shapefile use and visualization
Advanced techniques to visualize shapefiles can allow you to convey more information through additional plot elements. For this example, we use Sierra Leone’s merged shapefile-tabular data. See Merging Spatial Vectors with Tabular Data on how to merge admin unit data tables with your shapefile to prepare for visualization.
The merged dataset examples demonstrate how shapefiles transition from basic maps to analytical tools. When visualizing indicator data:
- Administrative boundaries provide the spatial framework
- Color scales represent indicator values
- Hierarchy is maintained through careful layer ordering
Step 4.1: Load and prepare merged data for advanced visualizations
We first load the data required to create advanced shapefile visualizations. This step loads both categorical intervention data and DHIS2 data, each of which is then merged with the shapefile. The steps for merging have been adapted from the Merging Spatial Vectors with Tabular Data page of this library.
We use dplyr::inner_join which keeps only perfectly matching records. If you need to keep all shapefile units even when data is missing, consider dplyr::left_join and handle NA values appropriately.
Show the code
categorical_intervention_data <- read_excel(
here::here(
"1.2_epidemiology",
"1.2a_routine_surveillance",
"processed",
"scenario_with_irs_no_smc_06_20_2025.xlsx"
)
)
cat_intervention_admins <- categorical_intervention_data |>
dplyr::distinct(FIRST_DNAM, FIRST_CHIE) # Using adm2 and adm3 equivalents
# Drop geometry from gdf for name-only joins
shp_names_cat <- gdf |>
sf::st_drop_geometry() |>
dplyr::distinct(FIRST_REGI, FIRST_DNAM, FIRST_CHIE) # adm1, adm2, adm3
# Inner join to count exact matches across admin levels 2-3
shp_with_cat <- shp_names_cat |>
dplyr::inner_join(
cat_intervention_admins,
by = c("FIRST_DNAM", "FIRST_CHIE")
)
print(paste("Number of exact matches across admin levels 2-3:", nrow(shp_with_cat)))
# Now perform the actual merge with admin levels 2-3
gdf_cat_joined <- gdf |>
dplyr::inner_join(
categorical_intervention_data,
by = c("FIRST_DNAM", "FIRST_CHIE")
) |>
sf::st_as_sf() # Ensure it remains spatial
print(paste("Final merged row count for intervention data:", nrow(gdf_cat_joined)))
# Check the structure of the merged data
colnames(gdf_cat_joined)
#==============================================================================#
sle_dhis2_df_coord_spatial_adm3 <- readRDS(
here::here(
"1.2_epidemiology",
"1.2a_routine_surveillance",
"processed",
"sle_dhis2_df_coord_spatial_adm3.rds"
)
)
# Get distinct admin names from sle_dhis2 (excluding adm0)
dhis2_admins <- sle_dhis2_df_coord_spatial_adm3 |>
dplyr::distinct(adm1, adm2, adm3)
# Drop geometry from gdf for name-only joins (excluding country level)
shp_names <- gdf |>
sf::st_drop_geometry() |>
dplyr::distinct(FIRST_REGI, FIRST_DNAM, FIRST_CHIE) # adm1, adm2, adm3 equivalents
# Inner join to count exact matches across admin levels 1-3
shp_with_dhis2 <- shp_names |>
dplyr::inner_join(
dhis2_admins,
by = c("FIRST_REGI" = "adm1", "FIRST_DNAM" = "adm2", "FIRST_CHIE" = "adm3")
)
print(paste("Number of exact matches across admin levels 1-3:", nrow(shp_with_dhis2)))
# Now perform the actual merge with admin levels 1-3 only
tabshp <- gdf |>
dplyr::inner_join(
sle_dhis2_df_coord_spatial_adm3,
by = c("FIRST_REGI" = "adm1", "FIRST_DNAM" = "adm2", "FIRST_CHIE" = "adm3")
) |>
sf::st_as_sf() # Ensure it remains spatial
print(paste("Final merged row count:", nrow(tabshp)))
# Check the structure of the merged data
colnames(tabshp)To adapt the code:
- Update file and column names based on the data you are merging
Step 4.2: Categorical color mapping
Categorical mapping is for discrete, non-numeric data or distinct groups. This example uses planned IRS coverage in Sierra Leone by Chiefdom for 2026-2030. The mapping assigns distinct colors or shapes to differentiate between categories.
Show the code
categorical_map <- ggplot2::ggplot() +
ggplot2::geom_sf(
data = gdf_cat_joined,
ggplot2::aes(fill = irs),
color = "white",
size = 0.2
) +
ggplot2::scale_fill_brewer(
name = "Planned IRS Coverage",
palette = "Accent",
na.value = "grey90"
) +
ggplot2::geom_sf(
data = gdf_cat_joined,
fill = NA,
color = "grey30",
linewidth = 0.3
) +
ggplot2::geom_sf(data = adm2_gdf, fill = NA,
color = "black", linewidth = 0.5) +
ggplot2::theme_minimal() +
ggplot2::labs(
title = "Planned Indoor Residual Spraying (IRS)\n Coverage By Chiefdom: 2026-2030"
) +
ggplot2::theme(
legend.position = "bottom",
legend.key.size = ggplot2::unit(0.5, "cm"),
panel.grid = ggplot2::element_blank(),
axis.text = ggplot2::element_blank(),
plot.title = ggplot2::element_text(face = "bold", size = 14),
plot.subtitle = ggplot2::element_text(size = 12)
)
# Save plot
ggplot2::ggsave(
plot = categorical_map,
here::here("03_output/3a_figures/categorical_map.png"),
width = 10,
height = 8,
dpi = 300)To adapt the code:
- Line 4: Replace
irswith the desired name of the categorical variable you wish to plot - Line 9: Modify contents of the
nameparameter to reflect the categorical variable you are plotting - Line 23: Modify contents of the
titleparameter based on the data you are plotting
Step 4.3 Binned color mapping
Binned mapping works well for numeric data that benefits from grouping into intervals, such as incidence levels, proportion of suspected cases that are tested, or proportion of people using an ITN. This allows the SNT team to immediately identify which admin units have met a meaningful quantitative threshold and thus facilitates decision-making. This example creates bins for the proportion of positive tests, also known as test positivity rate, by Chiefdom. Test positivity rate is first calculated and then plotted.
Show the code
# Calculate test positivity rates as percentages
tabshp_with_rates <- tabshp |>
dplyr::filter(
!is.na(conf) & !is.na(test) &
!is.na(conf_u5) & !is.na(test_u5) &
!is.na(conf_5_14) & !is.na(test_5_14) &
!is.na(conf_ov15) & !is.na(test_ov15)
) |>
dplyr::mutate(
# Overall positivity rate
tpr_overall_pct = dplyr::case_when(
test > 0 ~ (conf / test) * 100,
TRUE ~ NA_real_
),
# Age-specific calculations
tpr_u5_pct = dplyr::case_when(
test_u5 > 0 ~ (conf_u5 / test_u5) * 100,
TRUE ~ NA_real_
),
tpr_5_14_pct = dplyr::case_when(
test_5_14 > 0 ~ (conf_5_14 / test_5_14) * 100,
TRUE ~ NA_real_
),
tpr_ov15_pct = dplyr::case_when(
test_ov15 > 0 ~ (conf_ov15 / test_ov15) * 100,
TRUE ~ NA_real_
)
)
binned_map <- ggplot2::ggplot() +
ggplot2::geom_sf(
data = tabshp_with_rates,
ggplot2::aes(fill = base::cut(tpr_overall_pct,
breaks = c(0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100))),
color = "white",
size = 0.2
) +
ggplot2::scale_fill_brewer(
name = "Test Positivity Rate (%)",
palette = "RdYlBu",
direction = -1,
labels = c("0-10", "10-20","20-30","30-40", "40-50", "50-60",
"60-70", "70-80", "80-90", "90-100")) +
ggplot2::geom_sf(
data = tabshp,
fill = NA,
color = "grey30",
linewidth = 0.3 # Thinner boundaries
) +
ggplot2::geom_sf(data = adm2_gdf, fill = NA,
color = "black", linewidth = 0.5) +
ggplot2::theme_minimal() +
ggplot2::labs(title = "All-Age Test Positivity Rate in Sierra Leone\n by Chiefdom (2023)") +
ggplot2::theme(
legend.position = "bottom",
panel.grid = ggplot2::element_blank(),
axis.text = ggplot2::element_blank(),
plot.title = ggplot2::element_text(face = "bold", size = 14)
) +
guides(fill = guide_legend(nrow = 2))
# Save plot
ggplot2::ggsave(
plot = binned_map,
here::here("03_output/3a_figures/binned_map.png"),
width = 10,
height = 8,
dpi = 300)To adapt the code:
- Line 5-23: Modify continuous indicator calculation based on your data and desired plots. If plotting an existing continuous calculator, remove this chunk entirely.
- Line 28: Replace
tpr_overall_pctwith the continuous variable you wish to bin and plot - Line 29: Modify the
breaksbased on the range and distribution of your continuous variable - Line 34: Modify contents of the
nameparameter based on the continuous variable you are plotting - Line 38-39: Modify
labelsbased on the breaks defined in Line 29 - Line 50: Modify the
titlebased on the continuous variable you are plotting
Step 4.4: Continuous color mapping
Continuous mapping is appropriate for visualizing numeric data with a smooth, uninterrupted range. It can be a helpful first step before re-making a binned version of the map, or may be appropriate on its own. In this example, we show a continuous version of the same test positivity indicator in the previous step.
Show the code
continuous_map <- ggplot2::ggplot() +
ggplot2::geom_sf(
data = tabshp_with_rates,
ggplot2::aes(fill = tpr_overall_pct),
color = "white",
size = 0.2
) +
ggplot2::scale_fill_distiller(
name = "Test Positivity Rate (%)",
palette = "RdYlBu",
direction = -1,
limits = c(0,100),
na.value = "grey90"
) +
ggplot2::geom_sf(
data = tabshp,
fill = NA,
color = "grey40",
linewidth = 0.5
) +
ggplot2::geom_sf(data = adm2_gdf, fill = NA,
color = "black", linewidth = 0.5) +
ggplot2::theme_minimal() +
ggplot2::labs(title = "All-Age Test Positivity Rate in Sierra Leone\n by Chiefdom (2023)") +
ggplot2::theme(
legend.position = "bottom",
panel.grid = ggplot2::element_blank(),
plot.title = ggplot2::element_text(face = "bold", size = 14),
axis.text = ggplot2::element_blank()
)
# Save plot
ggplot2::ggsave(
plot = continuous_map,
here::here("03_output/3a_figures/continuous_map.png"),
width = 10,
height = 8,
dpi = 300)To adapt the code:
- Line 4: Replace
tpr_overall_pctwith the continuous variable you wish to plot - Line 9: Modify contents of the
nameparameter based on the continuous variable you are plotting - Line 12: Modify
limitsbased on the range of the continuous variable you are plotting - Line 24: Modify the
titlebased on the data you are plotting
Step 4.5: Plot subdivisions by larger regions
You may be asked to produce a map that shows shapes of one adminstrative level with coloring and labelling of another administrative level. This example plots Sierra Leone’s adm2 and adm3 shapes (black and white boundaries respectively) with adm1 labels and coloring.
Show the code
# Clean geometries
gdf <- gdf |>
sf::st_make_valid() |> # Fixes any invalid geometries
nngeo::st_remove_holes() |> # Removes any holes in polygons
sf::st_buffer(dist = 0) # Fixes potential self-intersections
# ADM1 labels with error handling
adm1_labels <- tryCatch({
gdf |>
dplyr::group_by(FIRST_REGI) |>
dplyr::summarise(geometry = sf::st_union(geometry)) |>
sf::st_make_valid() |>
dplyr::mutate(
centroid = sf::st_point_on_surface(geometry),
coords = sf::st_coordinates(centroid),
x = coords[,1],
y = coords[,2]
)
}, error = function(e) {
adm2_gdf |>
dplyr::mutate(
centroid = sf::st_point_on_surface(geometry),
coords = sf::st_coordinates(centroid),
x = coords[,1],
y = coords[,2]
)
})
# Automatic color palette
n_adm1 <- length(unique(gdf$FIRST_REGI))
adm1_colors <- viridis::plasma(n_adm1)
names(adm1_colors) <- unique(gdf$FIRST_REGI)
# Plot
subdivided_plot <- ggplot() +
geom_sf(
data = gdf,
aes(fill = FIRST_REGI), # Map directly to the region column
color = "white",
linewidth = 0.35
) +
scale_fill_manual(values = adm1_colors) +
geom_sf(
data = adm2_gdf,
fill = NA,
color = "black",
linewidth = 0.8
) +
geom_shadowtext(
data = adm1_labels,
aes(x = x, y = y, label = FIRST_REGI),
size = 3,
fontface = "bold",
color = "black",
bg.color = "white",
bg.r = 0.25
) +
theme_void() +
labs(title = "Sierra Leone Subdivded Adm1 and Adm2 Boundaries")
# Save plot
ggplot2::ggsave(
plot = subdivided_plot,
here::here("03_output/3a_figures/subdivided_map.png"),
width = 10,
height = 8,
dpi = 300)To adapt the code:
- Line 10, 30, 32, 37, 50: Replace
FIRST_REGIwith the column of your data that has ADM1 names - Line 59: Modify the
titlebased on the data you are plotting
Step 4.6: Faceted maps
Faceted plots enable comparison across administrative units. This example demonstrates continuous test positivity indicators and displays each map in a separate panel with identical geography, applies consistent coloring and legends across facets, and maintains uniform scales for direct comparison. This requires data to be reshaped into long format to simplify plotting multiple indicators in faceted visualizations.
Show the code
# Select the TPR percentage columns
tpr_cols <- c("tpr_u5_pct", "tpr_5_14_pct", "tpr_ov15_pct", "tpr_overall_pct")
# Convert data to long format and create binned categories
tpr_long_data <- tabshp_with_rates |>
dplyr::select(geometry, all_of(tpr_cols)) |>
tidyr::pivot_longer(
cols = -geometry,
names_to = "age_group",
values_to = "tpr_percentage"
) |>
dplyr::mutate(
age_group = dplyr::recode(age_group,
"tpr_u5_pct" = "Under 5 years",
"tpr_5_14_pct" = "5-14 years",
"tpr_ov15_pct" = "Over 15 years",
"tpr_overall_pct" = "Overall"
),
age_group = factor(age_group,
levels = c("Under 5 years", "5-14 years", "Over 15 years", "Overall"),
ordered = TRUE),
# Create binned categories (same breaks as your previous code)
tpr_binned = base::cut(tpr_percentage,
breaks = c(0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100),
labels = c("0-10", "10-20","20-30","30-40", "40-50",
"50-60", "60-70", "70-80", "80-90", "90-100"),
include.lowest = TRUE
)
)
# Create faceted plot with binned data
ggplot2::ggplot(tpr_long_data) +
ggplot2::geom_sf(ggplot2::aes(fill = tpr_binned), color = "grey30", linewidth = 0.2) +
ggplot2::facet_wrap(~ age_group, ncol = 2) +
ggplot2::scale_fill_brewer(
name = "Test Positivity Rate (%)",
palette = "RdYlBu",
direction = -1
) +
ggplot2::geom_sf(data = adm2_gdf, fill = NA,
color = "black", linewidth = 0.5) +
ggplot2::labs(
title = "Test Positivity Rates by Age Group and Chiefdom (2023)",
subtitle = "Proportion of positive tests"
) +
ggplot2::theme_void() +
ggplot2::theme(
plot.title = ggplot2::element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = ggplot2::element_text(hjust = 0.5, size = 10),
plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
panel.spacing = unit(0.5, "cm"),
strip.text = ggplot2::element_text(face = "bold", size = 11),
legend.position = "bottom",
legend.key.width = ggplot2::unit(1.5, "cm"),
legend.title = ggplot2::element_text(size = 10),
legend.text = ggplot2::element_text(size = 9)
) +
guides(fill = guide_legend(nrow = 2))
# Save plot
ggplot2::ggsave(
plot = faceted_tpr_plot,
here::here("03_output/3a_figures/faceted_tpr_plot.png"),
width = 10,
height = 8,
dpi = 300
)To adapt the code:
- Line 2: Define the vector of columns corresponding to plot facets
- Lines 14-21: Modify age groups based on the number and names of columns selected for plot facets
- Lines 24-25: Modify scale
breaksandlabelsbaSed on your needs and preferences - Line 35: Modify the legend
namebased on the data you are plotting - Lines 43-44: Modify the
titleand ’subtitle` based on the data you are plotting - More plot facets variables can be added by extending the initial vector with additional column names. Adjust facet layout by modifying the
ncolparameter infacet_wrapto accommodate additional variables
Summary
This page has outlined the process for using and visualizing shapefile data. This begins with importing and validating spatial data, ensuring proper coordinate systems are established for accurate geographic alignment.
The visualization techniques demonstrated can be used for different purposes. Basic boundary mapping verifies proper data loading, while layered maps create the geographic framework needed for stratified analysis. These outputs become the foundation for key SNT applications: calculating intervention coverage across jurisdictions, identifying gaps in service delivery, and monitoring performance against geographic targets.
While implementation details vary across analytical environments, the core principles apply universally: validate before analysis, maintain consistent spatial references, and visualize at multiple scales to confirm operational relevance. The resulting spatial datasets enable evidence-based decision-making tailored to subnational contexts.
Full code
Find the full code script for visualizing and using shapefile data below.
Show the code
# ----------------------------------
# Step 1: Set up the environment
# ----------------------------------
# Install `pacman` if not already installed
if (!requireNamespace("pacman", quietly = TRUE)) {
install.packages("pacman")
}
# Load required packages using pacman
pacman::p_load(
readxl, # Read Excel files
tidyr, # data organization
sf, # handling shapefile data
dplyr, # data manipulation
ggplot2, # plotting]
viridis, # color palettes
shadowtext, # plot labels
nngeo, # spatial analysis
here, # file path management
stringr # cleaning up character strings
)
# ----------------------------------------------
# Step 2: Import and prepare the shapefiles
#-----------------------------------------------
# Define file paths using `here` for reproducibility
shapefile_shp <- here::here("english/data_r/shapefiles", "Chiefdom2021.shp")
shapefile_shx <- here::here("english/data_r/shapefiles", "Chiefdom2021.shx")
shapefile_dbf <- here::here("english/data_r/shapefiles", "Chiefdom2021.dbf")
# Administrative level 1 (e.g., districts)
adm2_shapefile <- here::here("english/data_r/shapefiles", "District 2021.shp")
# Load chiefdom shapefile into an sf object
gdf <- sf::st_read(shapefile_shp, quiet = TRUE)
# Load adm1 shapefile
adm2_gdf <- sf::st_read(adm2_shapefile, quiet = TRUE)
# Set the Coordinate Reference System (CRS)
sf::st_crs(gdf) <- 4326
sf::st_crs(adm2_gdf) <- 4326
# --------------------------------------------------
# Step 3.1: Visualize shapefile contents
# --------------------------------------------------
# Plot the chiefdom shapefile
basic_map <- ggplot2::ggplot() +
ggplot2::geom_sf(data = gdf, fill = "lightblue", color = "black") +
ggplot2::theme_minimal() +
ggplot2::theme(
panel.grid = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank()
) +
ggplot2::labs(
title = "Map of Sierra Leone Chiefdoms (adm3)",
subtitle = "Administrative level 3 boundaries"
) +
ggplot2::theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 12)
)
# Save plot
ggplot2::ggsave(
plot = basic_map,
here::here("03_output/3a_figures/basic_map.png"),
width = 10,
height = 8,
dpi = 300)
# --------------------------------------------------
# Step 3.2: Overlay multiple adminstrative levels
# --------------------------------------------------
overlay_map <- ggplot2::ggplot() +
ggplot2::geom_sf(data = gdf, fill = "#628DA5", color = "gray") +
ggplot2::geom_sf(data = adm2_gdf, fill = NA,
color = "#0F2333", linewidth = 0.5) +
shadowtext::geom_shadowtext(
data = adm2_gdf,
ggplot2::aes(x = sf::st_coordinates(sf::st_centroid(geometry))[,1],
y = sf::st_coordinates(sf::st_centroid(geometry))[,2],
label = FIRST_DNAM),
bg.color = "white",
color = "black",
size = 2,
fontface = "bold"
) +
ggplot2::theme_minimal() +
ggplot2::theme(
panel.grid = ggplot2::element_blank(),
axis.title = ggplot2::element_blank(),
axis.text = ggplot2::element_blank(),
axis.ticks = ggplot2::element_blank()
) +
ggplot2::labs(
title = "Overlay of Sierra Leone administrative boundaries",
subtitle = "Districts (adm2) and Chiefdoms (adm3)"
) +
ggplot2::theme(
plot.title = ggplot2::element_text(face = "bold", size = 14),
plot.subtitle = ggplot2::element_text(size = 12)
)
# Save plot
ggplot2::ggsave(
plot = overlay_map,
filename = here::here("03_output/3a_figures/overlay_map.png"),
width = 10,
height = 8,
dpi = 300)
# --------------------------------------------------
# Step 4: Advanced shapefile visualization
# --------------------------------------------------
# Load and merge data
categorical_intervention_data <- read_excel(
here::here(
"1.2_epidemiology",
"1.2a_routine_surveillance",
"processed",
"scenario_with_irs_no_smc_06_20_2025.xlsx"
)
)
cat_intervention_admins <- categorical_intervention_data |>
dplyr::distinct(FIRST_DNAM, FIRST_CHIE) # Using adm2 and adm3 equivalents
# Drop geometry from gdf for name-only joins
shp_names_cat <- gdf |>
sf::st_drop_geometry() |>
dplyr::distinct(FIRST_REGI, FIRST_DNAM, FIRST_CHIE) # adm1, adm2, adm3
# Inner join to count exact matches across admin levels 2-3
shp_with_cat <- shp_names_cat |>
dplyr::inner_join(
cat_intervention_admins,
by = c("FIRST_DNAM", "FIRST_CHIE")
)
print(paste("Number of exact matches across admin levels 2-3:", nrow(shp_with_cat)))
# Now perform the actual merge with admin levels 2-3
gdf_cat_joined <- gdf |>
dplyr::inner_join(
categorical_intervention_data,
by = c("FIRST_DNAM", "FIRST_CHIE")
) |>
sf::st_as_sf() # Ensure it remains spatial
print(paste("Final merged row count for intervention data:", nrow(gdf_cat_joined)))
# Check the structure of the merged data
colnames(gdf_cat_joined)
#==============================================================================#
sle_dhis2_df_coord_spatial_adm3 <- readRDS(
here::here(
"1.2_epidemiology",
"1.2a_routine_surveillance",
"processed",
"sle_dhis2_df_coord_spatial_adm3.rds"
)
)
# Get distinct admin names from sle_dhis2 (excluding adm0)
dhis2_admins <- sle_dhis2_df_coord_spatial_adm3 |>
dplyr::distinct(adm1, adm2, adm3)
# Drop geometry from gdf for name-only joins (excluding country level)
shp_names <- gdf |>
sf::st_drop_geometry() |>
dplyr::distinct(FIRST_REGI, FIRST_DNAM, FIRST_CHIE) # adm1, adm2, adm3 equivalents
# Inner join to count exact matches across admin levels 1-3
shp_with_dhis2 <- shp_names |>
dplyr::inner_join(
dhis2_admins,
by = c("FIRST_REGI" = "adm1", "FIRST_DNAM" = "adm2", "FIRST_CHIE" = "adm3")
)
print(paste("Number of exact matches across admin levels 1-3:", nrow(shp_with_dhis2)))
# Now perform the actual merge with admin levels 1-3 only
tabshp <- gdf |>
dplyr::inner_join(
sle_dhis2_df_coord_spatial_adm3,
by = c("FIRST_REGI" = "adm1", "FIRST_DNAM" = "adm2", "FIRST_CHIE" = "adm3")
) |>
sf::st_as_sf() # Ensure it remains spatial
print(paste("Final merged row count:", nrow(tabshp)))
#Binned map
categorical_map <- ggplot2::ggplot() +
ggplot2::geom_sf(
data = gdf_cat_joined,
ggplot2::aes(fill = irs),
color = "white",
size = 0.2
) +
ggplot2::scale_fill_brewer(
name = "Planned IRS Coverage",
palette = "Accent",
na.value = "grey90"
) +
ggplot2::geom_sf(
data = gdf_cat_joined,
fill = NA,
color = "black",
linewidth = 0.3
) +
ggplot2::theme_minimal() +
ggplot2::labs(
title = "Planned Indoor Residual Spraying (IRS)\n Coverage By Chiefdom: 2026-2030"
) +
ggplot2::theme(
legend.position = "bottom",
legend.key.size = ggplot2::unit(0.5, "cm"),
panel.grid = ggplot2::element_blank(),
axis.text = ggplot2::element_blank(),
plot.title = ggplot2::element_text(face = "bold", size = 14),
plot.subtitle = ggplot2::element_text(size = 12)
)
# Save plot
ggplot2::ggsave(
plot = categorical_map,
here::here("03_output/3a_figures/categorical_map.png"),
width = 10,
height = 8,
dpi = 300)
# Categorical map
# Calculate test positivity rates as percentages only
tabshp_with_rates <- tabshp |>
dplyr::mutate(
# Age-specific positivity rates as percentages
tpr_u5_pct = dplyr::case_when(
test_u5 > 0 ~ (conf_u5 / test_u5) * 100,
TRUE ~ NA_real_
),
tpr_5_14_pct = dplyr::case_when(
test_5_14 > 0 ~ (conf_5_14 / test_5_14) * 100,
TRUE ~ NA_real_
),
tpr_ov15_pct = dplyr::case_when(
test_ov15 > 0 ~ (conf_ov15 / test_ov15) * 100,
TRUE ~ NA_real_
),
# Overall positivity rate as percentage
tpr_overall_pct = dplyr::case_when(
tests_total > 0 ~ (confirmed_total / tests_total) * 100,
TRUE ~ NA_real_
)
)
binned_map <- ggplot2::ggplot() +
ggplot2::geom_sf(
data = tabshp_with_rates,
ggplot2::aes(fill = base::cut(tpr_overall_pct,
breaks = c(0, 20, 40, 60, 80, 100))),
color = "white",
size = 0.2
) +
ggplot2::scale_fill_brewer(
name = "Test Positivity Rate (%)",
palette = "RdYlBu",
direction = -1,
na.value = "grey90",
labels = c("0-20", "20-40", "40-60", "60-80", "80-100")
) +
ggplot2::geom_sf(
data = tabshp,
fill = NA,
color = "black",
linewidth = 0.5
) +
ggplot2::theme_minimal() +
ggplot2::labs(title = "All-Age Test Positivity Rate in Sierra Leone\n by Chiefdom (2023)") +
ggplot2::theme(
legend.position = "bottom",
panel.grid = ggplot2::element_blank(),
plot.title = ggplot2::element_text(face = "bold", size = 14),
axis.text = ggplot2::element_blank()
)
# Save plot
ggplot2::ggsave(
plot = binned_map,
here::here("03_output/3a_figures/binned_map.png"),
width = 10,
height = 8,
dpi = 300)
#Continuous map
continuous_map <- ggplot2::ggplot() +
ggplot2::geom_sf(
data = tabshp_with_rates,
ggplot2::aes(fill = tpr_overall_pct),
color = "white",
size = 0.2
) +
ggplot2::scale_fill_distiller(
name = "Test Positivity Rate (%)",
palette = "RdYlBu",
direction = -1,
limits = c(0,100),
na.value = "grey90"
) +
ggplot2::geom_sf(
data = tabshp,
fill = NA,
color = "black",
linewidth = 0.3
) +
ggplot2::theme_minimal() +
ggplot2::labs(title = "All-Age Test Positivity Rate in Sierra Leone by Chiefdom (2023)") +
ggplot2::theme(
legend.position = "bottom",
panel.grid = ggplot2::element_blank(),
plot.title = ggplot2::element_text(face = "bold", size = 14),
axis.text = ggplot2::element_blank()
)
# Save plot
ggplot2::ggsave(
plot = continuous_map,
here::here("03_output/3a_figures/continuous_map.png"),
width = 10,
height = 8,
dpi = 300)
#Subdivided map
gdf <- gdf |>
sf::st_make_valid() |> # Fixes any invalid geometries
nngeo::st_remove_holes() |> # Removes any holes in polygons
sf::st_buffer(dist = 0) # Fixes potential self-intersections
# ADM1 labels with error handling
adm1_labels <- tryCatch({
gdf |>
dplyr::group_by(FIRST_REGI) |>
dplyr::summarise(geometry = sf::st_union(geometry)) |>
sf::st_make_valid() |>
dplyr::mutate(
centroid = sf::st_point_on_surface(geometry),
coords = sf::st_coordinates(centroid),
x = coords[,1],
y = coords[,2]
)
}, error = function(e) {
adm2_gdf |>
dplyr::mutate(
centroid = sf::st_point_on_surface(geometry),
coords = sf::st_coordinates(centroid),
x = coords[,1],
y = coords[,2]
)
})
# Automatic color palette
n_adm1 <- length(unique(gdf$FIRST_REGI))
adm1_colors <- viridis::plasma(n_adm1)
names(adm1_colors) <- unique(gdf$FIRST_REGI)
# Plot
subdivided_plot <- ggplot() +
geom_sf(
data = gdf |> mutate(fill_col = adm1_colors[FIRST_REGI]),
aes(fill = fill_col),
color = "white",
linewidth = 0.35
) +
geom_sf(
data = adm2_gdf,
fill = NA,
color = "black",
linewidth = 0.8
) +
geom_shadowtext(
data = adm1_labels,
aes(x = x, y = y, label = FIRST_REGI),
size = 3,
fontface = "bold",
color = "black",
bg.color = "white",
bg.r = 0.25
) +
scale_fill_identity() +
theme_void() +
labs(title = "Sierra Leone Subdivded Adm1 and Adm2 Boundaries")
# Save plot
ggplot2::ggsave(
plot = subdivided_plot,
here::here("03_output/3a_figures/subdivided_map.png"),
width = 10,
height = 8,
dpi = 300)
# Faceted map
# Select the TPR percentage columns
tpr_cols <- c("tpr_u5_pct", "tpr_5_14_pct", "tpr_ov15_pct", "tpr_overall_pct")
# Convert data to long format
tpr_long_data <- tabshp_with_rates |>
dplyr::select(geometry, all_of(tpr_cols)) |>
tidyr::pivot_longer(
cols = -geometry,
names_to = "age_group",
values_to = "tpr_percentage"
) |>
dplyr::mutate(
age_group = dplyr::recode(age_group,
"tpr_u5_pct" = "Under 5 years",
"tpr_5_14_pct" = "5-14 years",
"tpr_ov15_pct" = "Over 15 years",
"tpr_overall_pct" = "Overall"
),
age_group = factor(age_group,
levels = c("Under 5 years", "5-14 years", "Over 15 years", "Overall"),
ordered = TRUE)
)
# Create faceted plot with shared continuous legend
faceted_tpr_plot <- ggplot2::ggplot(tpr_long_data) +
ggplot2::geom_sf(ggplot2::aes(fill = tpr_percentage), color = "white", linewidth = 0.2) +
ggplot2::facet_wrap(~ age_group, ncol = 4) +
ggplot2::scale_fill_distiller(
name = "Test Positivity Rate (%)",
palette = "RdYlBu",
direction = -1,
na.value = "grey90",
limits = c(0, 100)
) +
ggplot2::labs(
title = "Test Positivity Rates by Age Group and Chiefdom (2023)",
subtitle = "Proportion of positive tests"
) +
ggplot2::theme_void() +
ggplot2::theme(
plot.title = ggplot2::element_text(hjust = 0.5, face = "bold", size = 14),
plot.subtitle = ggplot2::element_text(hjust = 0.5, size = 10),
strip.text = ggplot2::element_text(face = "bold", size = 11),
legend.position = "bottom",
legend.key.width = ggplot2::unit(1.5, "cm"),
legend.title = ggplot2::element_text(size = 10),
legend.text = ggplot2::element_text(size = 9)
)
# Save plot
ggplot2::ggsave(
plot = faceted_tpr_plot,
here::here("03_output/3a_figures/faceted_tpr_plot.png"),
width = 10,
height = 8,
dpi = 300
)