# Install pacman if not already installed
if (!require("pacman")) install.packages("pacman")
# Load all required packages using pacman
pacman::p_load(
here, # For file path management
readxl, # For reading Excel files
dplyr, # For data manipulation
tidyr, # For data reshaping
openxlsx, # For writing Excel files
knitr, # For creating tables
sf, # For spatial data
ggplot2 # For visualization
)Seasonal Malaria Chemoprevention (SMC) Eligibility Analysis
Overview
What is Seasonal Malaria Chemoprevention (SMC)?
Seasonal Malaria Chemoprevention (SMC) protects young children from malaria during months when malaria transmission is highest. The World Health Organization (WHO) recommends SMC for areas with strong seasonal malaria patterns.
Health workers give children aged 3 months to 5 years malaria medicine once each month during the rainy season, usually for 3 to 5 months.
Importance of SMC
Reduce Malaria Cases
SMC lowers malaria cases in children by about 75 percent during the malaria season. The intervention also reduces malaria deaths among children under five.
Provide High Value for Cost
SMC delivers strong health results at low cost, making it one of the most efficient malaria control tools in seasonal transmission areas.
Strengthen Other Interventions
SMC works best when combined with other preventive measures such as: - Sleeping under insecticide treated nets
- Spraying homes with insecticide
Target the Right Locations
SMC focuses on places where most malaria cases occur in a few months each year.
Example:
If a district records 70 percent of malaria cases between June and September, it qualifies for SMC.
Key Terms
Seasonality
Seasonality means malaria transmission happens mainly during certain months rather than throughout the year.
Example: In some areas, malaria increases during the rainy season and declines during the dry season.
Rolling Window Analysis
Rolling window analysis checks every possible 3, 4, or 5 month group in a year to find which one has the highest rainfall or malaria activity.
Example: Test 4 month groups (Jan–Apr, Feb–May, Mar–Jun, etc.) to identify the period with the most rainfall.
Peak Transmission Period
The peak transmission period refers to the consecutive months when malaria reaches its highest level. This period guides the timing of SMC delivery.
Example: If malaria peaks between August and November, the program schedules SMC during these months.
SMC Eligibility
SMC eligibility confirms whether an area meets the criteria for SMC. Analysts check: 1. If malaria follows a seasonal pattern.
2. If malaria incidence is high enough to justify implementation.
Rainfall as a Proxy
Rainfall data helps estimate malaria risk because mosquito breeding increases after rainfall.
Example: When rains start in June, mosquito breeding increases, and malaria cases usually rise in July.
Analytical Approach
Rolling Window Approach
Analysts test every possible 3, 4, or 5 month period in a year instead of using fixed calendar months.
They identify the period that captures the largest share of total rainfall or malaria cases.
Example:
- Jan–Apr: 30% of yearly rain
- Feb–May: 50%
- Mar–Jun: 70% → This is the main rainfall period.
Selecting the Duration (4 or 5 Months)
The analysis compares how much more rainfall or malaria transmission appears when extending the period from 4 to 5 months.
If the fifth month adds a large increase (more than 10 percentage points), the 5 month window becomes the preferred option.
If the increase is small, the 4 month window remains sufficient and more efficient.
Reason for the 10 Percent Rule:
The 10 percent rule serves as a practical guide to balance two priorities: - Capturing the majority of seasonal malaria activity
- Avoiding unnecessary use of time, medicine, and logistics
The 10 percent threshold is flexible, not mandatory.
Each program can change it based on local needs and operational capacity.
Examples:
A program with enough resources may use a 5 percent rule to extend coverage.
A program with limited staff or funding may use 15 percent to focus only on the strongest malaria months.
This flexibility allows decision makers to tailor SMC plans to their local realities while keeping data driven justification.
Combining Rainfall and Malaria Data
Analysts combine rainfall data with malaria incidence data to identify eligible areas.
- Rainfall shows when malaria risk increases.
- Incidence confirms whether the disease burden is high enough to warrant intervention.
Only locations with sufficient malaria burden, such as more than 250 cases per 1,000 people, qualify for SMC.
Spatial Analysis
Analysts calculate results at local administrative levels such as chiefdoms or districts.
Each location receives its own analysis on:
Seasonal pattern
Optimal SMC timing
Malaria burden
Example:
Bo district may need SMC from July to October, while Koinadugu district may need it from August to November.
Multi Year Analysis
Analysts use rainfall and malaria data from multiple years (for example, 6 or more years) to produce consistent results.
This approach reduces the effect of abnormal years and reveals the true seasonal pattern.
Example:
If rainfall levels vary from year to year, using multi year averages provides a stable estimate of the malaria season.
Prerequisites
Before starting the SMC eligibility analysis, confirm the following conditions:
Complete the Seasonality Eligibility step.
Refer to the Seasonality page here:
Seasonality Eligibility AnalysisConfirm that rainfall data and malaria data are both available for the target locations.
- Rainfall data helps identify seasonal patterns.
- Malaria incidence or prevalence data ensures the area has enough disease burden for SMC.
- Rainfall data helps identify seasonal patterns.
Check that data sources are consistent across all administrative units to allow accurate comparisons.
The analysis framework aims to identify and guide effective SMC implementation by:
- Detecting areas with seasonal malaria transmission
- Determining the best 3–5 month window for intervention
- Confirming malaria burden using incidence data
- Producing clear recommendations for timing and location
In summary, the analysis links rainfall patterns and malaria data to determine where to implement SMC, when to start it, and how long to maintain it each year.
Step-by-Step
Step 1: Import packages
To adapt the code:
- Lines 11-19: Add or remove packages based on your specific needs.
Step 2: Rainfall peak analysis setup
Step 2.1: Configure peak analysis parameters
# Peak analysis configuration
INPUT_FILE <- here::here("english/data_r/modeled", "chirps_data_2015_2023_lastest.xls")
PEAK_ANALYSIS_OUTPUT <- "rainfall_max_percentages_by_location.xlsx"
# Analysis time range
PEAK_ANALYSIS_START_YEAR <- 2018
PEAK_ANALYSIS_END_YEAR <- 2023
# Column mappings
LOCATION_COL <- c("FIRST_DNAM", "FIRST_CHIE")
RAINFALL_COL <- "mean_rain"
YEAR_COL <- "Year"
MONTH_COL <- "Month"To adapt the code:
- Lines 7-18: Update parameters to match your dataset context.
Step 2.2: Load and process peak analysis data
# Load data for peak analysis
df <- readxl::read_excel(INPUT_FILE)
# Filter to analysis period
df <- df |>
dplyr::filter(
!!sym(YEAR_COL) >= PEAK_ANALYSIS_START_YEAR &
!!sym(YEAR_COL) <= PEAK_ANALYSIS_END_YEAR
)
# Pivot data - each month + location is a row, columns are years
df_pivot <- df |>
dplyr::select(
dplyr::all_of(LOCATION_COL),
!!sym(MONTH_COL),
!!sym(YEAR_COL),
!!sym(RAINFALL_COL)
) |>
tidyr::pivot_wider(
names_from = !!sym(YEAR_COL),
values_from = !!sym(RAINFALL_COL),
names_sort = TRUE
)
# Calculate mean rainfall across years
year_columns <- as.character(PEAK_ANALYSIS_START_YEAR:PEAK_ANALYSIS_END_YEAR)
df_pivot <- df_pivot |>
dplyr::rowwise() |>
dplyr::mutate(
Mean = mean(dplyr::c_across(dplyr::all_of(year_columns)), na.rm = TRUE)
) |>
dplyr::ungroup()To adapt the code:
- Do not change anything in the code.
Step 3: Generate 3-month rolling windows
# 3-month windows starting from each month (January through December)
w1_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(1, 2, 3)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 1, window_size = 3)
w2_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(2, 3, 4)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 2, window_size = 3)
w3_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(3, 4, 5)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 3, window_size = 3)
w4_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(4, 5, 6)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 4, window_size = 3)
w5_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(5, 6, 7)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 5, window_size = 3)
w6_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(6, 7, 8)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 6, window_size = 3)
w7_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(7, 8, 9)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 7, window_size = 3)
w8_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(8, 9, 10)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 8, window_size = 3)
w9_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(9, 10, 11)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 9, window_size = 3)
w10_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(10, 11, 12)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 10, window_size = 3)
w11_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(11, 12, 1)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 11, window_size = 3)
w12_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(12, 1, 2)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 12, window_size = 3)To adapt the code:
- Do not change anything in the code
Step 4: Generate 4-month rolling windows
# 4-month windows starting from each month (January through December)
w1_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(1, 2, 3, 4)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 1, window_size = 4)
w2_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(2, 3, 4, 5)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 2, window_size = 4)
w3_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(3, 4, 5, 6)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 3, window_size = 4)
w4_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(4, 5, 6, 7)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 4, window_size = 4)
w5_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(5, 6, 7, 8)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 5, window_size = 4)
w6_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(6, 7, 8, 9)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 6, window_size = 4)
w7_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(7, 8, 9, 10)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 7, window_size = 4)
w8_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(8, 9, 10, 11)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 8, window_size = 4)
w9_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(9, 10, 11, 12)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 9, window_size = 4)
w10_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(10, 11, 12, 1)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 10, window_size = 4)
w11_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(11, 12, 1, 2)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 11, window_size = 4)
w12_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(12, 1, 2, 3)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 12, window_size = 4)To adapt the code:
- Do not change anything in the code
Step 5: Generate 5-month rolling windows
# 5-month windows starting from each month (January through December)
w1_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(1, 2, 3, 4, 5)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 1, window_size = 5)
w2_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(2, 3, 4, 5, 6)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 2, window_size = 5)
w3_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(3, 4, 5, 6, 7)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 3, window_size = 5)
w4_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(4, 5, 6, 7, 8)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 4, window_size = 5)
w5_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(5, 6, 7, 8, 9)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 5, window_size = 5)
w6_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(6, 7, 8, 9, 10)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 6, window_size = 5)
w7_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(7, 8, 9, 10, 11)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 7, window_size = 5)
w8_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(8, 9, 10, 11, 12)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 8, window_size = 5)
w9_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(9, 10, 11, 12, 1)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 9, window_size = 5)
w10_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(10, 11, 12, 1, 2)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 10, window_size = 5)
w11_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(11, 12, 1, 2, 3)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 11, window_size = 5)
w12_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(12, 1, 2, 3, 4)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 12, window_size = 5)To adapt the code:
- Do not change anything in the code
Step 6: Window analysis consolidation
Step 6.1: Combine all windows
# Combine all windows into one DataFrame
all_windows <- dplyr::bind_rows(
# 3-month windows
w1_3, w2_3, w3_3, w4_3, w5_3, w6_3,
w7_3, w8_3, w9_3, w10_3, w11_3, w12_3,
# 4-month windows
w1_4, w2_4, w3_4, w4_4, w5_4, w6_4,
w7_4, w8_4, w9_4, w10_4, w11_4, w12_4,
# 5-month windows
w1_5, w2_5, w3_5, w4_5, w5_5, w6_5,
w7_5, w8_5, w9_5, w10_5, w11_5, w12_5
)To adapt the code:
- Do not change anything in the code
Step 6.2: Reshape and calculate percentages
# Pivot wider so each window size is a column
final_result <- all_windows |>
tidyr::pivot_wider(
names_from = window_size,
values_from = rainfall_mean,
names_glue = "window_{window_size}_month_mean"
)
# Calculate annual total rainfall for each location
annual_total <- df_pivot |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(annual_total = sum(Mean, na.rm = TRUE), .groups = "drop")
# Merge annual totals and calculate percentages
final_result <- final_result |>
dplyr::left_join(annual_total, by = LOCATION_COL) |>
dplyr::mutate(
window_3_month_pct = ifelse(
annual_total == 0,
0,
(window_3_month_mean / annual_total) * 100
),
window_4_month_pct = ifelse(
annual_total == 0,
0,
(window_4_month_mean / annual_total) * 100
),
window_5_month_pct = ifelse(
annual_total == 0,
0,
(window_5_month_mean / annual_total) * 100
)
)To adapt the code:
- Do not change anything in the code
Step 7: Peak identification and SMC window selection
Step 7.1: Find maximum percentages
# Find maximum percentages for each window size
max_3_month_details <- final_result |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::slice_max(window_3_month_pct, n = 1, with_ties = FALSE) |>
dplyr::select(
dplyr::all_of(LOCATION_COL),
max_3_month_pct = window_3_month_pct,
max_3_month_start = start_month
) |>
dplyr::ungroup()
max_4_month_details <- final_result |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::slice_max(window_4_month_pct, n = 1, with_ties = FALSE) |>
dplyr::select(
dplyr::all_of(LOCATION_COL),
max_4_month_pct = window_4_month_pct,
max_4_month_start = start_month
) |>
dplyr::ungroup()
max_5_month_details <- final_result |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::slice_max(window_5_month_pct, n = 1, with_ties = FALSE) |>
dplyr::select(
dplyr::all_of(LOCATION_COL),
max_5_month_pct = window_5_month_pct,
max_5_month_start = start_month
) |>
dplyr::ungroup()To adapt the code:
- Do not change anything in the code
Step 7.2: Create consolidated peak summary
To adapt the code:
- Do not change anything in the code
Step 7.3: Calculate difference between 5-month and 4-month windows
# Calculate the percentage point difference between 5-month and 4-month windows
max_summary <- max_summary |>
dplyr::mutate(
difference_5mo_vs_4mo_percent = max_5_month_pct - max_4_month_pct
)
# Display the difference for review
knitr::kable(
head(max_summary |>
dplyr::select(dplyr::all_of(LOCATION_COL),
max_4_month_pct, max_5_month_pct,
difference_5mo_vs_4mo_percent), 10),
caption = "Comparison of 4-Month vs 5-Month Window Coverage (First 10 locations)"
)| FIRST_DNAM | FIRST_CHIE | max_4_month_pct | max_5_month_pct | difference_5mo_vs_4mo_percent |
|---|---|---|---|---|
| BO | BADJIA | 65.47582 | 77.10272 | 11.62689 |
| BO | BAGBO | 68.87826 | 79.53138 | 10.65312 |
| BO | BAGBWE(BAGBE) | 65.84418 | 77.74460 | 11.90043 |
| BO | BO TOWN | 67.28930 | 79.58617 | 12.29687 |
| BO | BOAMA | 65.57453 | 76.79504 | 11.22050 |
| BO | BONGOR | 66.65230 | 77.59831 | 10.94602 |
| BO | BUMPE NGAO | 69.65803 | 80.90015 | 11.24212 |
| BO | GBO | 68.53724 | 81.03535 | 12.49811 |
| BO | JAIAMA | 66.48878 | 77.33371 | 10.84493 |
| BO | KAKUA | 67.32694 | 79.16680 | 11.83985 |
To adapt the code:
- Do not change anything in the code
Step 8: Apply window selection logic and determine SMC cycles
Step 8.1: Select optimal window size based on 10% threshold
# Helper data for month conversion
month_names <- c(
"January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December"
)
# Apply the 10% threshold rule to select window size
max_summary <- max_summary |>
dplyr::rowwise() |>
dplyr::mutate(
# Decision: if difference > 10%, use 5-month; otherwise use 4-month
SMC_window_size_selected = ifelse(difference_5mo_vs_4mo_percent > 10, 5, 4),
# Select the appropriate start month based on chosen window size
best_start_month = dplyr::case_when(
SMC_window_size_selected == 4 ~ max_4_month_start,
SMC_window_size_selected == 5 ~ max_5_month_start,
TRUE ~ NA_real_
),
# Calculate end month
end_month_num = ((best_start_month - 1 + SMC_window_size_selected - 1) %% 12) + 1,
# Create human-readable SMC cycle description
SMC_cycles = paste0(
month_names[best_start_month],
" - ",
month_names[end_month_num]
)
) |>
dplyr::ungroup() |>
dplyr::select(-end_month_num)
# Save peak analysis results
openxlsx::write.xlsx(max_summary, PEAK_ANALYSIS_OUTPUT, rowNames = FALSE)
# Display results showing the window selection
knitr::kable(
tail(max_summary |>
dplyr::select(dplyr::all_of(LOCATION_COL),
difference_5mo_vs_4mo_percent,
SMC_window_size_selected,
SMC_cycles), 10),
caption = "Window Selection Results (Last 10 locations)"
)| FIRST_DNAM | FIRST_CHIE | difference_5mo_vs_4mo_percent | SMC_window_size_selected | SMC_cycles |
|---|---|---|---|---|
| WESTERN RURAL | WATERLOO RURAL | 9.677518 | 4 | June - September |
| WESTERN RURAL | YORK RURAL | 9.243874 | 4 | June - September |
| WESTERN URBAN | CENTRAL I | 9.695110 | 4 | July - October |
| WESTERN URBAN | CENTRAL II | 0.000000 | 4 | January - April |
| WESTERN URBAN | EAST I | 0.000000 | 4 | January - April |
| WESTERN URBAN | EAST II | 0.000000 | 4 | January - April |
| WESTERN URBAN | EAST III | 9.138793 | 4 | June - September |
| WESTERN URBAN | WEST I | 0.000000 | 4 | January - April |
| WESTERN URBAN | WEST II | 0.000000 | 4 | January - April |
| WESTERN URBAN | WEST III | 8.786952 | 4 | July - October |
To adapt the code:
- Do not change anything in the code
Step 9: Integrate incidence data and determine final eligibility
Step 9.1: Configure final analysis parameters
To adapt the code:
- Lines 11-12: Update the seasonality and incidence column names to match the dataset.
Step 9.2: Load incidence data and merge all results
# Load location summary data (seasonality classifications)
location_summary_file <- here::here("english/library/stratification/other", "location_seasonality_summary.xlsx")
location_summary <- readxl::read_excel(location_summary_file)
# Load incidence data
incidence_data <- readxl::read_excel(incidence_file)
# Merge all datasets: seasonality classification + peak timing + incidence
final_smc_data <- location_summary |>
dplyr::left_join(max_summary, by = LOCATION_COL) |>
dplyr::left_join(incidence_data, by = LOCATION_COL)To adapt the code:
- Line 7: Update the file path to match your location_summary Excel file location.
Step 9.3: Apply final SMC eligibility criteria
# Determine final SMC eligibility and cycles
final_output <- final_smc_data |>
dplyr::mutate(
SMC_cycles = dplyr::case_when(
.data[[SEASONALITY_COL]] == SEASONALITY_REQUIRED &
.data[[INCIDENCE_COL]] > INCIDENCE_THRESHOLD ~ SMC_cycles,
TRUE ~ "Not Eligible"
)
) |>
dplyr::arrange(dplyr::across(dplyr::all_of(LOCATION_COL)))
# Display final summary
knitr::kable(head(final_output, 10), caption = "Final SMC Eligibility Results")| FIRST_DNAM | FIRST_CHIE | SeasonalYears | TotalYears | Seasonality | max_3_month_pct | max_3_month_start | max_4_month_pct | max_4_month_start | max_5_month_pct | max_5_month_start | difference_5mo_vs_4mo_percent | SMC_window_size_selected | best_start_month | SMC_cycles | adm3 | incidence |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| BO | BADJIA | 8 | 8 | Seasonal | 53.01616 | 7 | 65.47582 | 6 | 77.10272 | 6 | 11.62689 | 5 | 6 | June - October | Badjia Chiefdom | 376.9948 |
| BO | BAGBO | 8 | 8 | Seasonal | 54.44574 | 7 | 68.87826 | 6 | 79.53138 | 6 | 10.65312 | 5 | 6 | June - October | Bargbo Chiefdom | 371.6494 |
| BO | BAGBWE(BAGBE) | 8 | 8 | Seasonal | 52.29493 | 7 | 65.84418 | 6 | 77.74460 | 6 | 11.90043 | 5 | 6 | June - October | Bagbwe Chiefdom | 343.2675 |
| BO | BO TOWN | 8 | 8 | Seasonal | 52.84036 | 7 | 67.28930 | 6 | 79.58617 | 6 | 12.29687 | 5 | 6 | Not Eligible | Bo City | 225.9378 |
| BO | BOAMA | 8 | 8 | Seasonal | 51.94043 | 7 | 65.57453 | 6 | 76.79504 | 6 | 11.22050 | 5 | 6 | June - October | Baoma Chiefdom | 432.4502 |
| BO | BONGOR | 8 | 8 | Seasonal | 52.98731 | 7 | 66.65230 | 6 | 77.59831 | 6 | 10.94602 | 5 | 6 | June - October | Bongor Chiefdom | 266.0600 |
| BO | BUMPE NGAO | 8 | 8 | Seasonal | 56.58911 | 7 | 69.65803 | 6 | 80.90015 | 6 | 11.24212 | 5 | 6 | June - October | Bumpe Ngao Chiefdom | 433.4976 |
| BO | GBO | 8 | 8 | Seasonal | 55.46016 | 7 | 68.53724 | 6 | 81.03535 | 6 | 12.49811 | 5 | 6 | June - October | Gbo Chiefdom | 417.0264 |
| BO | JAIAMA | 8 | 8 | Seasonal | 52.40935 | 7 | 66.48878 | 6 | 77.33371 | 6 | 10.84493 | 5 | 6 | June - October | Jaiama Chiefdom | 263.0465 |
| BO | KAKUA | 8 | 8 | Seasonal | 53.77728 | 7 | 67.32694 | 6 | 79.16680 | 6 | 11.83985 | 5 | 6 | Not Eligible | Kakua Chiefdom | 234.4795 |
# Generate summary statistics
smc_summary <- final_output |>
dplyr::count(SMC_cycles, name = "Number_of_Locations") |>
dplyr::arrange(desc(Number_of_Locations))
knitr::kable(smc_summary, caption = "SMC Eligibility Summary by Cycle")| SMC_cycles | Number_of_Locations |
|---|---|
| June - October | 93 |
| Not Eligible | 91 |
| June - September | 24 |
To adapt the code:
- Do not change anything in the code
Step 10: Load and merge spatial data
Show the code
Reading layer `Chiefdom2021' from data source
`C:\Users\User\snt-code-library\english\data_r\shapefiles\Chiefdom2021.shp'
using driver `ESRI Shapefile'
Simple feature collection with 208 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -13.30736 ymin: 6.923379 xmax: -10.27056 ymax: 10.00989
CRS: NA
To adapt the code:
- Line 10: Update the file path to match your shapefile location (e.g., change “Chiefdom2021.shp” to your shapefile name)
- Line 16: Update the join keys (“FIRST_DNAM”, “FIRST_CHIE”) to match the column names in your shapefile that correspond to the location columns used throughout the analysis
Step 11: Maximum window percentage maps
Show the code
# Function: Categorize rainfall
categorize <- function(val) {
if (is.na(val)) {
return("No data")
} else if (val < 60) {
return("<60")
} else if (val >= 60 && val < 70) {
return("60-<70")
} else if (val >= 70 && val < 80) {
return("70-<80")
} else if (val >= 80 && val < 90) {
return("80-<90")
} else if (val >= 90 && val <= 95) {
return("90-95")
} else {
return("No data")
}
}
# Define colors using tab10 palette
colors <- c(
"<60" = "#4E79A7", # Blue
"60-<70" = "#F28E2B", # Orange
"70-<80" = "#E15759", # Red
"80-<90" = "#59A14F", # Green
"90-95" = "#EDC948", # Yellow
"No data" = "white"
)
# Boundaries for chiefdoms
boundaries <- merged |>
dplyr::group_by(FIRST_DNAM) |>
dplyr::summarise(geometry = sf::st_union(geometry))
# Loop for 3, 4, and 5-month windows
month_windows <- list(
list(months = 3, colname = "max_3_month_pct"),
list(months = 4, colname = "max_4_month_pct"),
list(months = 5, colname = "max_5_month_pct")
)
for (window in month_windows) {
months <- window$months
colname <- window$colname
# Create category column
merged$Category <- sapply(merged[[colname]], categorize)
# Convert Category to factor with specific order
merged$Category <- factor(merged$Category,
levels = c("<60", "60-<70", "70-<80",
"80-<90", "90-95", "No data"))
# Count categories for legend (excluding zero counts)
cat_counts <- table(merged$Category)
cat_counts <- cat_counts[cat_counts > 0]
# Create labels with counts
legend_labels <- paste0(names(cat_counts), " (", cat_counts, ")")
names(legend_labels) <- names(cat_counts)
# Filter colors to only include categories that exist
colors_filtered <- colors[names(colors) %in% names(cat_counts)]
# Plot
p <- ggplot2::ggplot() +
ggplot2::geom_sf(data = merged, ggplot2::aes(fill = Category),
color = "black", linewidth = 0.3) +
ggplot2::geom_sf(data = boundaries, fill = NA,
color = "black", linewidth = 0.8) +
ggplot2::scale_fill_manual(
values = colors_filtered,
labels = legend_labels,
name = "max %"
) +
ggplot2::labs(title = paste0("Rainfall ", months, "-month window")) +
ggplot2::theme_void() +
ggplot2::theme(
plot.title = ggplot2::element_text(hjust = 0.5, size = 14, face = "bold"),
legend.position = "right",
legend.background = ggplot2::element_blank(),
legend.box.background = ggplot2::element_blank(),
legend.key = ggplot2::element_rect(color = "black", linewidth = 0.3)
)
# Save plot
ggplot2::ggsave(
filename = paste0("rainfall_", months, "_month_window_max_percentage.png"),
plot = p,
width = 7,
height = 5,
dpi = 300
)
# Display plot
print(p)
}To adapt the code:
- Lines 11-25: Modify the rainfall percentage categories and thresholds to match your analysis requirements (e.g., change 60, 70, 80, 90, 95 to different values)
- Lines 28-34: Update the color palette if you want different colors for each category
- Line 38: Change “FIRST_DNAM” to match the administrative boundary column name in your shapefile for creating district/regional boundaries
- Lines 42-46: Update column names if your analysis uses different names for the maximum percentage columns (e.g., change “max_3_month_pct” to match your naming convention)
Step 12: Maximum month window start maps
Show the code
# Function: Recode months
recode_month <- function(val) {
month_map <- c(
"3" = "March", "4" = "April", "5" = "May", "6" = "June",
"7" = "July", "8" = "August", "9" = "September",
"10" = "October", "11" = "November"
)
if (is.na(val)) {
return("No data")
}
result <- month_map[as.character(val)]
if (is.na(result)) {
return(as.character(val))
}
return(result)
}
# Color palette (Tableau 10)
colors <- c(
"March" = "#4E79A7", # Blue
"April" = "#F28E2B", # Orange
"May" = "#E15759", # Red
"June" = "#AF7AA1", # Mauve
"July" = "#59A14F", # Green
"August" = "#EDC948", # Yellow
"September" = "#B07AA1", # Purple
"October" = "#9C755F", # Brown
"November" = "#76B7B2", # Cyan
"No data" = "white"
)
# Boundaries for FIRST_DNAM
boundaries <- merged |>
dplyr::group_by(FIRST_DNAM) |>
dplyr::summarise(geometry = sf::st_union(geometry))
# Month-window columns to loop over
month_windows <- list(
list(months = 3, colname = "max_3_month_start"),
list(months = 4, colname = "max_4_month_start"),
list(months = 5, colname = "max_5_month_start")
)
# Loop for plotting
for (window in month_windows) {
months <- window$months
colname <- window$colname
# Recode numeric month to names
merged$Category <- sapply(merged[[colname]], recode_month)
# Convert Category to factor with specific order
month_order <- c("March", "April", "May", "June", "July",
"August", "September", "October", "November", "No data")
merged$Category <- factor(merged$Category, levels = month_order)
# Count categories (excluding zero counts)
cat_counts <- table(merged$Category)
cat_counts <- cat_counts[cat_counts > 0]
# Create labels with counts
legend_labels <- paste0(names(cat_counts), " (", cat_counts, ")")
names(legend_labels) <- names(cat_counts)
# Filter colors to only include categories that exist
colors_filtered <- colors[names(colors) %in% names(cat_counts)]
# Plot
p <- ggplot2::ggplot() +
ggplot2::geom_sf(data = merged, ggplot2::aes(fill = Category),
color = "black", linewidth = 0.3) +
ggplot2::geom_sf(data = boundaries, fill = NA,
color = "black", linewidth = 0.8) +
ggplot2::scale_fill_manual(
values = colors_filtered,
labels = legend_labels,
name = paste0(months, "-month start")
) +
ggplot2::labs(title = paste0("Rainfall ", months, "-month window start month")) +
ggplot2::theme_void() +
ggplot2::theme(
plot.title = ggplot2::element_text(hjust = 0.5, size = 14, face = "bold"),
legend.position = "right",
legend.background = ggplot2::element_blank(),
legend.box.background = ggplot2::element_blank(),
legend.key = ggplot2::element_rect(color = "black", linewidth = 0.3)
)
# Save plot
ggplot2::ggsave(
filename = paste0("rainfall_", months, "_month_window_start.png"),
plot = p,
width = 7,
height = 5,
dpi = 300
)
# Display plot
print(p)
}To adapt the code:
- Lines 12-15: Update the month_map to include all months that appear in your data (add or remove months as needed, e.g., add “1” = “January”, “2” = “February” if your data includes those months)
- Lines 27-37: Modify the color palette if you want different colors for each month
- Line 41: Change “FIRST_DNAM” to match your administrative boundary column name
- Lines 46-49: Update column names if your analysis uses different names for the start month columns (e.g., change “max_3_month_start” to match your naming convention)
- Line 60: Update month_order list to match the months included in your month_map (lines 12-15)
Step 13: SMC cycles map
Show the code
# Replace NA with "No data" if needed
merged$SMC_cycles[is.na(merged$SMC_cycles)] <- "No data"
# Color palette
colors <- c(
"June - October" = "#4E79A7", # Blue (Tableau 10)
"June - September" = "#59A14F", # Green (Tableau 10)
"Not Eligible" = "gray80", # Gray
"No data" = "white"
)
# Boundaries for FIRST_DNAM
boundaries <- merged |>
dplyr::group_by(FIRST_DNAM) |>
dplyr::summarise(geometry = sf::st_union(geometry))
# Convert to factor
merged$SMC_cycles <- factor(merged$SMC_cycles,
levels = c("June - October", "June - September",
"Not Eligible", "No data"))
# Count categories (excluding zero counts)
cat_counts <- table(merged$SMC_cycles)
cat_counts <- cat_counts[cat_counts > 0]
# Create labels with counts
legend_labels <- paste0(names(cat_counts), " (", cat_counts, ")")
names(legend_labels) <- names(cat_counts)
# Filter colors to only include categories that exist
colors_filtered <- colors[names(colors) %in% names(cat_counts)]
# Plot
p <- ggplot2::ggplot() +
ggplot2::geom_sf(data = merged, ggplot2::aes(fill = SMC_cycles),
color = "black", linewidth = 0.3) +
ggplot2::geom_sf(data = boundaries, fill = NA,
color = "black", linewidth = 0.8) +
ggplot2::scale_fill_manual(
values = colors_filtered,
labels = legend_labels,
name = "SMC Cycles"
) +
ggplot2::labs(title = "Seasonal Malaria Chemoprevention (SMC) Cycles") +
ggplot2::theme_void() +
ggplot2::theme(
plot.title = ggplot2::element_text(hjust = 0.5, size = 14, face = "bold"),
legend.position = "right",
legend.background = ggplot2::element_blank(),
legend.box.background = ggplot2::element_blank(),
legend.key = ggplot2::element_rect(color = "black", linewidth = 0.3)
)
# Display plot
print(p)
# Save plot
ggplot2::ggsave(
filename = "smc_cycles_map.png",
plot = p,
width = 7,
height = 5,
dpi = 300
)To adapt the code:
- Lines 14-18: Update the color palette and SMC cycle categories to match your final eligibility results (e.g., if your analysis produces different cycle periods like “July - November” or “May - September”, add or modify these entries)
- Line 22: Change “FIRST_DNAM” to match your administrative boundary column name
- Lines 27-28: Update the factor levels to match the SMC cycle categories from lines 14-18 (must be identical)
- Line 51: Modify the plot title if desired
Summary
This comprehensive analysis provides a systematic framework for determining SMC eligibility based on:
- Seasonality Assessment: Using time-block analysis to identify consistent seasonal patterns in malaria transmission
- Peak Period Identification: Determining optimal intervention timing through rolling window analysis (3, 4, and 5-month windows)
- Window Selection: Comparing 4-month and 5-month windows to select the most appropriate intervention duration
- Incidence Integration: Incorporating malaria burden data to refine targeting and ensure resources reach high-burden areas
- Final Eligibility Determination: Combining all criteria to determine SMC-eligible areas and optimal implementation periods
- Visualization: Creating maps to display maximum window percentages, window start months, and final SMC cycles for spatial understanding of results
Full Code
# ==============================================================================
# STEP 1: IMPORT PACKAGES
# ==============================================================================
# Install pacman if not already installed
if (!require("pacman")) install.packages("pacman")
# Load all required packages using pacman
pacman::p_load(
here, # For file path management
readxl, # For reading Excel files
dplyr, # For data manipulation
tidyr, # For data reshaping
openxlsx, # For writing Excel files
knitr, # For creating tables
sf, # For spatial data
ggplot2 # For visualization
)
# ==============================================================================
# STEP 2: RAINFALL PEAK ANALYSIS SETUP
# ==============================================================================
# Step 2.1: Configure peak analysis parameters
# ------------------------------------------------------------------------------
# Peak analysis configuration
INPUT_FILE <- here::here("english/data_r/modeled", "chirps_data_2015_2023_lastest.xls")
PEAK_ANALYSIS_OUTPUT <- "rainfall_max_percentages_by_location.xlsx"
# Analysis time range
PEAK_ANALYSIS_START_YEAR <- 2018
PEAK_ANALYSIS_END_YEAR <- 2023
# Column mappings
LOCATION_COL <- c("FIRST_DNAM", "FIRST_CHIE")
RAINFALL_COL <- "mean_rain"
YEAR_COL <- "Year"
MONTH_COL <- "Month"
# Step 2.2: Load and process peak analysis data
# ------------------------------------------------------------------------------
# Load data for peak analysis
df <- readxl::read_excel(INPUT_FILE)
# Filter to analysis period
df <- df |>
dplyr::filter(
!!sym(YEAR_COL) >= PEAK_ANALYSIS_START_YEAR &
!!sym(YEAR_COL) <= PEAK_ANALYSIS_END_YEAR
)
# Pivot data - each month + location is a row, columns are years
df_pivot <- df |>
dplyr::select(
dplyr::all_of(LOCATION_COL),
!!sym(MONTH_COL),
!!sym(YEAR_COL),
!!sym(RAINFALL_COL)
) |>
tidyr::pivot_wider(
names_from = !!sym(YEAR_COL),
values_from = !!sym(RAINFALL_COL),
names_sort = TRUE
)
# Calculate mean rainfall across years
year_columns <- as.character(PEAK_ANALYSIS_START_YEAR:PEAK_ANALYSIS_END_YEAR)
df_pivot <- df_pivot |>
dplyr::rowwise() |>
dplyr::mutate(
Mean = mean(dplyr::c_across(dplyr::all_of(year_columns)), na.rm = TRUE)
) |>
dplyr::ungroup()
# ==============================================================================
# STEP 3: GENERATE 3-MONTH ROLLING WINDOWS
# ==============================================================================
# 3-month windows starting from each month (January through December)
w1_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(1, 2, 3)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 1, window_size = 3)
w2_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(2, 3, 4)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 2, window_size = 3)
w3_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(3, 4, 5)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 3, window_size = 3)
w4_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(4, 5, 6)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 4, window_size = 3)
w5_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(5, 6, 7)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 5, window_size = 3)
w6_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(6, 7, 8)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 6, window_size = 3)
w7_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(7, 8, 9)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 7, window_size = 3)
w8_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(8, 9, 10)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 8, window_size = 3)
w9_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(9, 10, 11)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 9, window_size = 3)
w10_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(10, 11, 12)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 10, window_size = 3)
w11_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(11, 12, 1)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 11, window_size = 3)
w12_3 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(12, 1, 2)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 12, window_size = 3)
# ==============================================================================
# STEP 4: GENERATE 4-MONTH ROLLING WINDOWS
# ==============================================================================
# 4-month windows starting from each month (January through December)
w1_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(1, 2, 3, 4)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 1, window_size = 4)
w2_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(2, 3, 4, 5)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 2, window_size = 4)
w3_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(3, 4, 5, 6)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 3, window_size = 4)
w4_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(4, 5, 6, 7)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 4, window_size = 4)
w5_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(5, 6, 7, 8)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 5, window_size = 4)
w6_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(6, 7, 8, 9)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 6, window_size = 4)
w7_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(7, 8, 9, 10)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 7, window_size = 4)
w8_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(8, 9, 10, 11)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 8, window_size = 4)
w9_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(9, 10, 11, 12)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 9, window_size = 4)
w10_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(10, 11, 12, 1)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 10, window_size = 4)
w11_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(11, 12, 1, 2)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 11, window_size = 4)
w12_4 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(12, 1, 2, 3)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 12, window_size = 4)
# ==============================================================================
# STEP 5: GENERATE 5-MONTH ROLLING WINDOWS
# ==============================================================================
# 5-month windows starting from each month (January through December)
w1_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(1, 2, 3, 4, 5)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 1, window_size = 5)
w2_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(2, 3, 4, 5, 6)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 2, window_size = 5)
w3_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(3, 4, 5, 6, 7)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 3, window_size = 5)
w4_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(4, 5, 6, 7, 8)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 4, window_size = 5)
w5_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(5, 6, 7, 8, 9)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 5, window_size = 5)
w6_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(6, 7, 8, 9, 10)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 6, window_size = 5)
w7_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(7, 8, 9, 10, 11)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 7, window_size = 5)
w8_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(8, 9, 10, 11, 12)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 8, window_size = 5)
w9_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(9, 10, 11, 12, 1)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 9, window_size = 5)
w10_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(10, 11, 12, 1, 2)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 10, window_size = 5)
w11_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(11, 12, 1, 2, 3)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 11, window_size = 5)
w12_5 <- df_pivot |>
dplyr::filter(!!sym(MONTH_COL) %in% c(12, 1, 2, 3, 4)) |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(rainfall_mean = sum(Mean, na.rm = TRUE), .groups = "drop") |>
dplyr::mutate(start_month = 12, window_size = 5)
# ==============================================================================
# STEP 6: WINDOW ANALYSIS CONSOLIDATION
# ==============================================================================
# Step 6.1: Combine all windows
# ------------------------------------------------------------------------------
# Combine all windows into one DataFrame
all_windows <- dplyr::bind_rows(
# 3-month windows
w1_3, w2_3, w3_3, w4_3, w5_3, w6_3,
w7_3, w8_3, w9_3, w10_3, w11_3, w12_3,
# 4-month windows
w1_4, w2_4, w3_4, w4_4, w5_4, w6_4,
w7_4, w8_4, w9_4, w10_4, w11_4, w12_4,
# 5-month windows
w1_5, w2_5, w3_5, w4_5, w5_5, w6_5,
w7_5, w8_5, w9_5, w10_5, w11_5, w12_5
)
# Step 6.2: Reshape and calculate percentages
# ------------------------------------------------------------------------------
# Pivot wider so each window size is a column
final_result <- all_windows |>
tidyr::pivot_wider(
names_from = window_size,
values_from = rainfall_mean,
names_glue = "window_{window_size}_month_mean"
)
# Calculate annual total rainfall for each location
annual_total <- df_pivot |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::summarise(annual_total = sum(Mean, na.rm = TRUE), .groups = "drop")
# Merge annual totals and calculate percentages
final_result <- final_result |>
dplyr::left_join(annual_total, by = LOCATION_COL) |>
dplyr::mutate(
window_3_month_pct = ifelse(
annual_total == 0,
0,
(window_3_month_mean / annual_total) * 100
),
window_4_month_pct = ifelse(
annual_total == 0,
0,
(window_4_month_mean / annual_total) * 100
),
window_5_month_pct = ifelse(
annual_total == 0,
0,
(window_5_month_mean / annual_total) * 100
)
)
# ==============================================================================
# STEP 7: PEAK IDENTIFICATION AND SMC WINDOW SELECTION
# ==============================================================================
# Step 7.1: Find maximum percentages
# ------------------------------------------------------------------------------
# Find maximum percentages for each window size
max_3_month_details <- final_result |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::slice_max(window_3_month_pct, n = 1, with_ties = FALSE) |>
dplyr::select(
dplyr::all_of(LOCATION_COL),
max_3_month_pct = window_3_month_pct,
max_3_month_start = start_month
) |>
dplyr::ungroup()
max_4_month_details <- final_result |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::slice_max(window_4_month_pct, n = 1, with_ties = FALSE) |>
dplyr::select(
dplyr::all_of(LOCATION_COL),
max_4_month_pct = window_4_month_pct,
max_4_month_start = start_month
) |>
dplyr::ungroup()
max_5_month_details <- final_result |>
dplyr::group_by(dplyr::across(dplyr::all_of(LOCATION_COL))) |>
dplyr::slice_max(window_5_month_pct, n = 1, with_ties = FALSE) |>
dplyr::select(
dplyr::all_of(LOCATION_COL),
max_5_month_pct = window_5_month_pct,
max_5_month_start = start_month
) |>
dplyr::ungroup()
# Step 7.2: Create consolidated peak summary
# ------------------------------------------------------------------------------
# Merge all maximum details
max_summary <- max_3_month_details |>
dplyr::left_join(max_4_month_details, by = LOCATION_COL) |>
dplyr::left_join(max_5_month_details, by = LOCATION_COL)
# Step 7.3: Calculate difference between 5-month and 4-month windows
# ------------------------------------------------------------------------------
# Calculate the percentage point difference between 5-month and 4-month windows
max_summary <- max_summary |>
dplyr::mutate(
difference_5mo_vs_4mo_percent = max_5_month_pct - max_4_month_pct
)
# ==============================================================================
# STEP 8: APPLY WINDOW SELECTION LOGIC AND DETERMINE SMC CYCLES
# ==============================================================================
# Step 8.1: Select optimal window size based on 10% threshold
# ------------------------------------------------------------------------------
# Helper data for month conversion
month_names <- c(
"January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December"
)
# Apply the 10% threshold rule to select window size
max_summary <- max_summary |>
dplyr::rowwise() |>
dplyr::mutate(
# Decision: if difference > 10%, use 5-month; otherwise use 4-month
SMC_window_size_selected = ifelse(difference_5mo_vs_4mo_percent > 10, 5, 4),
# Select the appropriate start month based on chosen window size
best_start_month = dplyr::case_when(
SMC_window_size_selected == 4 ~ max_4_month_start,
SMC_window_size_selected == 5 ~ max_5_month_start,
TRUE ~ NA_real_
),
# Calculate end month
end_month_num = ((best_start_month - 1 + SMC_window_size_selected - 1) %% 12) + 1,
# Create human-readable SMC cycle description
SMC_cycles = paste0(
month_names[best_start_month],
" - ",
month_names[end_month_num]
)
) |>
dplyr::ungroup() |>
dplyr::select(-end_month_num)
# Save peak analysis results
openxlsx::write.xlsx(max_summary, PEAK_ANALYSIS_OUTPUT, rowNames = FALSE)
# ==============================================================================
# STEP 9: INTEGRATE INCIDENCE DATA AND DETERMINE FINAL ELIGIBILITY
# ==============================================================================
# Step 9.1: Configure final analysis parameters
# ------------------------------------------------------------------------------
# Final analysis configuration
incidence_file <- here::here("english/data_r/modeled", "incidence.xlsx")
# SMC Eligibility criteria
SEASONALITY_REQUIRED <- "Seasonal"
INCIDENCE_THRESHOLD <- 250
# Column names
SEASONALITY_COL <- "Seasonality"
INCIDENCE_COL <- "incidence"
# Step 9.2: Load incidence data and merge all results
# ------------------------------------------------------------------------------
# Load location summary data (seasonality classifications)
location_summary_file <- here::here("english/library/stratification/other", "location_seasonality_summary.xlsx")
location_summary <- readxl::read_excel(location_summary_file)
# Load incidence data
incidence_data <- readxl::read_excel(incidence_file)
# Merge all datasets: seasonality classification + peak timing + incidence
final_smc_data <- location_summary |>
dplyr::left_join(max_summary, by = LOCATION_COL) |>
dplyr::left_join(incidence_data, by = LOCATION_COL)
# Step 9.3: Apply final SMC eligibility criteria
# ------------------------------------------------------------------------------
# Determine final SMC eligibility and cycles
final_output <- final_smc_data |>
dplyr::mutate(
SMC_cycles = dplyr::case_when(
.data[[SEASONALITY_COL]] == SEASONALITY_REQUIRED &
.data[[INCIDENCE_COL]] > INCIDENCE_THRESHOLD ~ SMC_cycles,
TRUE ~ "Not Eligible"
)
) |>
dplyr::arrange(dplyr::across(dplyr::all_of(LOCATION_COL)))
# ==============================================================================
# STEP 10: LOAD AND MERGE SPATIAL DATA
# ==============================================================================
# Define file paths
file_path <- here::here("english/data_r/shapefiles", "Chiefdom2021.shp")
# Load shapefile
spatial_data <- sf::st_read(file_path)
# Merge datasets on matching columns
merged <- spatial_data |>
dplyr::left_join(final_output, by = c("FIRST_DNAM", "FIRST_CHIE"))
# ==============================================================================
# STEP 11: MAXIMUM WINDOW PERCENTAGE MAPS
# ==============================================================================
# Function: Categorize rainfall
categorize <- function(val) {
if (is.na(val)) {
return("No data")
} else if (val < 60) {
return("<60")
} else if (val >= 60 && val < 70) {
return("60-<70")
} else if (val >= 70 && val < 80) {
return("70-<80")
} else if (val >= 80 && val < 90) {
return("80-<90")
} else if (val >= 90 && val <= 95) {
return("90-95")
} else {
return("No data")
}
}
# Define colors using tab10 palette
colors <- c(
"<60" = "#4E79A7",
"60-<70" = "#F28E2B",
"70-<80" = "#E15759",
"80-<90" = "#59A14F",
"90-95" = "#EDC948",
"No data" = "white"
)
# Boundaries for chiefdoms
boundaries <- merged |>
dplyr::group_by(FIRST_DNAM) |>
dplyr::summarise(geometry = sf::st_union(geometry))
# Loop for 3, 4, and 5-month windows
month_windows <- list(
list(months = 3, colname = "max_3_month_pct"),
list(months = 4, colname = "max_4_month_pct"),
list(months = 5, colname = "max_5_month_pct")
)
for (window in month_windows) {
months <- window$months
colname <- window$colname
merged$Category <- sapply(merged[[colname]], categorize)
merged$Category <- factor(merged$Category,
levels = c("<60", "60-<70", "70-<80",
"80-<90", "90-95", "No data"))
cat_counts <- table(merged$Category)
cat_counts <- cat_counts[cat_counts > 0]
legend_labels <- paste0(names(cat_counts), " (", cat_counts, ")")
names(legend_labels) <- names(cat_counts)
colors_filtered <- colors[names(colors) %in% names(cat_counts)]
p <- ggplot2::ggplot() +
ggplot2::geom_sf(data = merged, ggplot2::aes(fill = Category),
color = "black", linewidth = 0.3) +
ggplot2::geom_sf(data = boundaries, fill = NA,
color = "black", linewidth = 0.8) +
ggplot2::scale_fill_manual(
values = colors_filtered,
labels = legend_labels,
name = "max %"
) +
ggplot2::labs(title = paste0("Rainfall ", months, "-month window")) +
ggplot2::theme_void() +
ggplot2::theme(
plot.title = ggplot2::element_text(hjust = 0.5, size = 14, face = "bold"),
legend.position = "right",
legend.background = ggplot2::element_blank(),
legend.box.background = ggplot2::element_blank(),
legend.key = ggplot2::element_rect(color = "black", linewidth = 0.3)
)
ggplot2::ggsave(
filename = paste0("rainfall_", months, "_month_window_max_percentage.png"),
plot = p,
width = 7,
height = 5,
dpi = 300
)
print(p)
}
# ==============================================================================
# STEP 12: MAXIMUM MONTH WINDOW START MAPS
# ==============================================================================
# Function: Recode months
recode_month <- function(val) {
month_map <- c(
"3" = "March", "4" = "April", "5" = "May", "6" = "June",
"7" = "July", "8" = "August", "9" = "September",
"10" = "October", "11" = "November"
)
if (is.na(val)) {
return("No data")
}
result <- month_map[as.character(val)]
if (is.na(result)) {
return(as.character(val))
}
return(result)
}
# Color palette (Tableau 10)
colors <- c(
"March" = "#4E79A7",
"April" = "#F28E2B",
"May" = "#E15759",
"June" = "#AF7AA1",
"July" = "#59A14F",
"August" = "#EDC948",
"September" = "#B07AA1",
"October" = "#9C755F",
"November" = "#76B7B2",
"No data" = "white"
)
# Boundaries for FIRST_DNAM
boundaries <- merged |>
dplyr::group_by(FIRST_DNAM) |>
dplyr::summarise(geometry = sf::st_union(geometry))
# Month-window columns to loop over
month_windows <- list(
list(months = 3, colname = "max_3_month_start"),
list(months = 4, colname = "max_4_month_start"),
list(months = 5, colname = "max_5_month_start")
)
# Loop for plotting
for (window in month_windows) {
months <- window$months
colname <- window$colname
merged$Category <- sapply(merged[[colname]], recode_month)
month_order <- c("March", "April", "May", "June", "July",
"August", "September", "October", "November", "No data")
merged$Category <- factor(merged$Category, levels = month_order)
cat_counts <- table(merged$Category)
cat_counts <- cat_counts[cat_counts > 0]
legend_labels <- paste0(names(cat_counts), " (", cat_counts, ")")
names(legend_labels) <- names(cat_counts)
colors_filtered <- colors[names(colors) %in% names(cat_counts)]
p <- ggplot2::ggplot() +
ggplot2::geom_sf(data = merged, ggplot2::aes(fill = Category),
color = "black", linewidth = 0.3) +
ggplot2::geom_sf(data = boundaries, fill = NA,
color = "black", linewidth = 0.8) +
ggplot2::scale_fill_manual(
values = colors_filtered,
labels = legend_labels,
name = paste0(months, "-month start")
) +
ggplot2::labs(title = paste0("Rainfall ", months, "-month window start month")) +
ggplot2::theme_void() +
ggplot2::theme(
plot.title = ggplot2::element_text(hjust = 0.5, size = 14, face = "bold"),
legend.position = "right",
legend.background = ggplot2::element_blank(),
legend.box.background = ggplot2::element_blank(),
legend.key = ggplot2::element_rect(color = "black", linewidth = 0.3)
)
ggplot2::ggsave(
filename = paste0("rainfall_", months, "_month_window_start.png"),
plot = p,
width = 7,
height = 5,
dpi = 300
)
print(p)
}
# ==============================================================================
# STEP 13: SMC CYCLES MAP
# ==============================================================================
# Replace NA with "No data" if needed
merged$SMC_cycles[is.na(merged$SMC_cycles)] <- "No data"
# Color palette
colors <- c(
"June - October" = "#4E79A7",
"June - September" = "#59A14F",
"Not Eligible" = "gray80",
"No data" = "white"
)
# Boundaries for FIRST_DNAM
boundaries <- merged |>
dplyr::group_by(FIRST_DNAM) |>
dplyr::summarise(geometry = sf::st_union(geometry))
# Convert to factor
merged$SMC_cycles <- factor(merged$SMC_cycles,
levels = c("June - October", "June - September",
"Not Eligible", "No data"))
# Count categories (excluding zero counts)
cat_counts <- table(merged$SMC_cycles)
cat_counts <- cat_counts[cat_counts > 0]
# Create labels with counts
legend_labels <- paste0(names(cat_counts), " (", cat_counts, ")")
names(legend_labels) <- names(cat_counts)
# Filter colors to only include categories that exist
colors_filtered <- colors[names(colors) %in% names(cat_counts)]
# Plot
p <- ggplot2::ggplot() +
ggplot2::geom_sf(data = merged, ggplot2::aes(fill = SMC_cycles),
color = "black", linewidth = 0.3) +
ggplot2::geom_sf(data = boundaries, fill = NA,
color = "black", linewidth = 0.8) +
ggplot2::scale_fill_manual(
values = colors_filtered,
labels = legend_labels,
name = "SMC Cycles"
) +
ggplot2::labs(title = "Seasonal Malaria Chemoprevention (SMC) Cycles") +
ggplot2::theme_void() +
ggplot2::theme(
plot.title = ggplot2::element_text(hjust = 0.5, size = 14, face = "bold"),
legend.position = "right",
legend.background = ggplot2::element_blank(),
legend.box.background = ggplot2::element_blank(),
legend.key = ggplot2::element_rect(color = "black", linewidth = 0.3)
)
print(p)
ggplot2::ggsave(
filename = "smc_cycles_map.png",
plot = p,
width = 7,
height = 5,
dpi = 300
)