# Install pacman only if it's not already installed
if (!requireNamespace("pacman", quietly = TRUE)) {
install.packages("pacman")}
# Install or load relevant packages
pacman::p_load(
dplyr, # Data manipulation
tidyr,
readxl, # Read in Excel files
lubridate, # Date handling
season, # Seasonal trend analysis
data.table, # Efficient data handling
zoo, # Time series analysis
stlplus, # Seasonal decomposition
trend, # Mann-Kendall trend tests
tmap, # Thematic mapping
sf, # Spatial data handling
ggplot2, # Data visualization
openxlsx # Write Excel files
)6.1: Trend analysis
Advanced
Overview
This page provides the full code for retrospective trend analysis of malaria incidence. It includes statistical methods for detecting trends, spatial visualization of results, and multiple adjustment approaches to account for testing completeness, notification rates, and healthcare-seeking behavior. The analysis supports evidence-based decision making for subnational targeting of malaria interventions.
- Quantify temporal trends in malaria incidence using Mann-Kendall tests and Sen’s slope analysis
- Compare crude and adjusted incidence rates across three methodological approaches
- Identify districts with significant trends (increasing/decreasing) for targeted interventions
- Visualize spatial patterns of malaria burden and trends across national and subnational boundaries
- Calculate rate variations to measure progress over time
- Provide reproducible code for ongoing malaria surveillance and trend monitoring
This page uses code for the Burkina Faso retrospective trend analysis of malaria incidence between 2020 and 2024. The analysis uses code adapted by Ousmane Diallo and Safa Siddiqui in the 2025 analysis.
Step-by-step
To skip the step-by-step explanation, jump to the full code at the end of this page.
Step 1: Install and load packages
Install and load all packages necessary for retrospective trend analysis.
Step 2: Load and prepare monthly incidence data
This steps loads the required data for retrospective trend analysis–monthly incidence rates using multiple adjustment methods for crude and adjusted incidence.
# Load data
adj_incidence_all_pop <- read_excel(
here::here(
"english/data_r/retrospective",
"20250808_updated_adjusted_incidence_plotting_data_2015_2024.xlsx"
)
)
# Ensure district coding consistency
adj_incidence_all_pop$adm2 <- adj_incidence_all_pop$adm2_DHIS2
# Calculate monthly incidence rates with multiple adjustment methods
monthly_DS_incide <- adj_incidence_all_pop %>%
dplyr::mutate(
Date = make_date(year = Year, month = Month, day = 1), # Create date object
TPR = `cas positif` / `test realises`, # Test positivity rate
pop_monthly = `Population totale`/12, # Monthly population estimate
total_cases_report_public_sector = `cas positif`,
# Four incidence calculation methods:
mal_cases = (Nbrut / pop_monthly) * 1000, # Crude incidence per 1000
incidence_adj_presumed_cases = (N1 / pop_monthly) * 1000, # Adjustment 1: Testing rates
incidence_adj_presumed_cases_RR = (N2 / pop_monthly) * 1000, # Adjustment 2: Notification rates
incidence_adj_presumed_cases_RR_TSR = (N3 / pop_monthly) * 1000 # Adjustment 3: Healthcare-seeking behavior
) %>%
# Handle mathematical errors
dplyr::mutate(across(where(is.numeric), ~ ifelse(is.nan(.), 0, .)))To adapt the code: Modify incidence calculation formulas based on your specific adjustment methods
Step 3: Visualize temporal trends
We begin by generating time series plots showing malaria incidence trends across all districts for the period of analysis.
Show the code
# Plot incidence trends for all districts (2020-2024)
# Crude incidence plot
p1 = ggplot(monthly_DS_incide, aes(x = Date, y = mal_cases, group = as.factor(adm2))) +
geom_line(alpha = 0.25, size = 1, show.legend = FALSE, color = "blue") +
ggtitle("Monthly crude incidence (2020-2024)") +
scale_x_yearmon("Date", breaks = sort(unique(monthly_DS_incide$Date))[c(seq(1,60,6), 60)],
labels = sort(unique(monthly_DS_incide$Date))[c(seq(1,60,6), 60)]) +
theme_bw() + theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 45, hjust = 1),
panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
# Adjusted incidence 1 plot (testing rates)
p2 = ggplot(monthly_DS_incide, aes(x = Date, y = incidence_adj_presumed_cases,
group = as.factor(adm2))) +
geom_line(alpha = 0.25, size = 1, show.legend = FALSE, color = "blue") + ylab("") +
ggtitle("Monthly adjusted 1 (testing rates)") +
scale_x_yearmon("Date", breaks = sort(unique(monthly_DS_incide$Date))[c(seq(1,96,6), 96)],
labels = sort(unique(monthly_DS_incide$Date))[c(seq(1,96,6), 96)]) +
theme_bw() + theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 45, hjust = 1),
panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
# Adjusted incidence 2 plot (notification rates)
p3 = ggplot(monthly_DS_incide, aes(x = Date, y = incidence_adj_presumed_cases_RR,
group = as.factor(adm2))) +
geom_line(alpha = 0.25, size = 1, show.legend = FALSE, color = "blue") + ylab("") +
ggtitle("Monthly adjust 2 (notification rates)") +
scale_x_yearmon("Date", breaks = sort(unique(monthly_DS_incide$Date))[c(seq(1,96,6), 96)],
labels = sort(unique(monthly_DS_incide$Date))[c(seq(1,96,6), 96)]) +
theme_bw() + theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 45, hjust = 1),
panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
# Adjusted incidence 3 plot (healthcare-seeking behavior)
p4 = ggplot(monthly_DS_incide, aes(x = Date, y = incidence_adj_presumed_cases_RR_TSR,
group = as.factor(adm2))) +
geom_line(alpha = 0.25, size = 1, show.legend = FALSE, color = "blue") + ylab("") +
ggtitle("Monthly adjusted 3 (healthcare-seeking behavior)") +
scale_x_yearmon("Date", breaks = sort(unique(monthly_DS_incide$Date))[c(seq(1,96,6), 96)],
labels = sort(unique(monthly_DS_incide$Date))[c(seq(1,96,6), 96)]) +
theme_bw() + theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 45, hjust = 1),
panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))To adapt the code: Adjust date breaks, colors, and titles to match your time period and aesthetic preferences
Step 4: Annual incidence calculation
Next we convert monthly data to annual incidence rates for trend comarison across several years.
# ANNUAL INCIDENCE CALCULATION
# Aggregate monthly data to annual incidence for trend comparison
annual_incidence <- monthly_DS_incide %>%
dplyr::mutate(year = year(Date)) %>%
dplyr::group_by(districts, year) %>%
dplyr::summarise(
n_months = n_distinct(floor_date(Date, "month")),
pop_vals = n_distinct(`Population totale`, na.rm = TRUE),
annual_pop = first(`Population totale`), # Annual population estimate
# Case totals for each adjustment method
cases_brut = sum(Nbrut, na.rm = TRUE),
cases_N1 = sum(N1, na.rm = TRUE),
cases_N2 = sum(N2, na.rm = TRUE),
cases_N3 = sum(N3, na.rm = TRUE),
# Annual incidence rates per 1000
inc_brut_yr = (cases_brut / annual_pop) * 1000,
inc_N1_yr = (cases_N1 / annual_pop) * 1000,
inc_N2_yr = (cases_N2 / annual_pop) * 1000,
inc_N3_yr = (cases_N3 / annual_pop) * 1000,
.groups = "drop"
)To adapt the code: Modify grouping variables and incidence calculations based on your data structure
Step 5: Rate variation analysis
This code calculates rate variation a simple calculation of the percentage changes in incidence between the first and last year of analysis.
# RATE VARIATION ANALYSIS (2020 vs 2024)
# Calculate percentage change between first and last year
Augment_dimuni <- annual_incidence %>%
dplyr::group_by(districts) %>%
dplyr::arrange(year) %>%
dplyr::summarise(
# Extract first and last year values for each indicator
first_crude = first(inc_brut_yr),
last_crude = last(inc_brut_yr),
first_N3 = first(inc_N3_yr),
last_N3 = last(inc_N3_yr),
first_N2 = first(inc_N2_yr),
last_N2 = last(inc_N2_yr),
first_N1 = first(inc_N1_yr),
last_N1 = last(inc_N1_yr),
# Calculate percentage change
rate = (last_crude - first_crude) / first_crude * 100,
rateN3 = (last_N3 - first_N3) / first_N3 * 100,
rateN2 = (last_N2 - first_N2) / first_N2 * 100,
rateN1 = (last_N1 - first_N1) / first_N1 * 100
) %>%
dplyr::ungroup() %>%
dplyr::distinct(districts, .keep_all = TRUE)To adapt the code: Adjust the years being compared and add/remove adjustment methods as needed
Step 6: Statistical trend analysis
Step 6.1: Normalization function
The normalization function getNormalized standardizes time series data for trend detection.
# TREND ANALYSIS: MANN-KENDALL AND SEN'S SLOPE
# Normalization function for time series analysis
getNormalized <- function(vec) {
# Standardizes values for better trend detection
if (!is.numeric(vec) || all(is.na(vec))) {
warning("Input vector is non-numeric or all NA; returning original vector")
return(vec)
}
vec_mean <- mean(vec, na.rm = TRUE)
vec_sd <- sd(vec, na.rm = TRUE)
if (is.na(vec_sd) || vec_sd == 0) {
warning("Standard deviation is 0 or NA; returning original vector")
return(vec)
}
return((vec - vec_mean) / vec_sd) # Z-score normalization
}To adapt the code: Modify the normalization method if you prefer min-max scaling or other techniques
Step 6.2: Perform trend analysis by district
Next we use the previously defined getNormalized to apply Mann-Kendall tests of significance and Sen’s slope analysis to each district and each adjustment method.
Show the code
# Prepare the monthly data for trend analysis
monthly_DS_incidence1 <- monthly_DS_incide
# Initialize results as a list for flexibility
STL_result_DF_norm_list <- list()
# Define indicators for trend analysis
indicators <- list(
list(col = "mal_cases", type = "Crude Incidence"),
list(col = "incidence_adj_presumed_cases", type = "Adjusted Presumed Cases"),
list(col = "incidence_adj_presumed_cases_RR", type = "Adjusted Presumed Cases RR"),
list(col = "incidence_adj_presumed_cases_RR_TSR", type = "Adjusted Presumed Cases RR TSR")
)
# Loop over districts for trend analysis
for (DS in sort(unique(monthly_DS_incidence1$adm2))) {
cases_dist <- monthly_DS_incidence1 %>%
filter(adm2 == DS) %>%
arrange(Date)
if (nrow(cases_dist) == 0) {
warning(paste("No data for district:", DS, "- skipping"))
next
}
for (ind in indicators) {
if (!ind$col %in% names(cases_dist)) {
warning(paste("Column", ind$col, "not found in district", DS, "- skipping"))
next
}
ind_values <- cases_dist[[ind$col]]
if (!is.numeric(ind_values) || all(is.na(ind_values)) || length(ind_values) == 0) {
warning(paste("Invalid data for", ind$col, "in district", DS, "- skipping"))
next
}
ind_norm <- getNormalized(ind_values)
valid_idx <- !is.na(ind_values) # Logical index of non-NA values
valid_ind_norm <- ind_norm[valid_idx]
valid_dates <- cases_dist$Date[valid_idx]
if (length(valid_ind_norm) < 2) {
warning(paste("Fewer than 2 valid observations for", ind$col, "in district", DS, "- skipping"))
next
}
# Create time series
start_year <- as.numeric(format(valid_dates[1], "%Y"))
start_month <- as.numeric(format(valid_dates[1], "%m"))
ind_ts <- ts(valid_ind_norm, start = c(start_year, start_month), deltat = 1/12)
# Perform STL decomposition
ind_stl <- stlplus(ind_ts, s.window = "periodic")
# Ensure STL output matches valid dates
ind_stl_ts <- as.data.frame(ind_stl$data[, 1:4])
if (nrow(ind_stl_ts) != length(valid_dates)) {
warning(paste("STL output length mismatch for", ind$col, "in district", DS))
next
}
# Perform Mann-Kendall test
mk_result <- tryCatch({
smk.test(ind_ts)
}, error = function(e) {
warning(paste("Mann-Kendall test failed for", ind$col, "in district", DS, ":", e$message))
list(p.value = NA)
})
# Perform Sen's slope
sens_result <- tryCatch({
sea.sens.slope(ind_ts)
}, error = function(e) {
warning(paste("Sen's slope failed for", ind$col, "in district", DS, ":", e$message))
NA
})
# Prepare results
ind_stl_ts$type <- ind$type
ind_stl_ts$dates <- valid_dates
ind_stl_ts$MK_p <- ifelse(!is.null(mk_result$p.value), mk_result$p.value, NA)
ind_stl_ts$sens_slope <- sens_result
ind_stl_ts$District <- DS
# Append to list
STL_result_DF_norm_list[[paste(DS, ind$col, sep = "_")]] <- ind_stl_ts
}
}
# Combine results
STL_result_DF_norm <- rbindlist(STL_result_DF_norm_list, fill = TRUE)To adapt the code: Modify the indicators list to include your specific variables and adjust error handling as needed
Step 7: Categorize trend results
Trends must then be categorized. For simplicity, the categories used here are increasing, decreasing, or non-significant based on statistical tests.
# Extract unique slope results
STL_result_DF_slopes <- unique(STL_result_DF_norm[, c("type", "MK_p", "sens_slope", "District")])
# Categorize slopes based on significance
STL_result_DF_slopes$plotting_sens_slope <- STL_result_DF_slopes$sens_slope
STL_result_DF_slopes[which(STL_result_DF_slopes$MK_p > .05), "plotting_sens_slope"] <- NA
# Create slope categories
STL_result_DF_slope = STL_result_DF_slopes %>%
dplyr::mutate(slope = ifelse(plotting_sens_slope < 0, 'Diminution',
ifelse(plotting_sens_slope > 0, 'augmentation',
ifelse(is.na(plotting_sens_slope), 'non significatif', ''))))
STL_result_DF_slope = STL_result_DF_slope %>%
dplyr::mutate(slope = ifelse(is.na(slope), 'non significatif', slope), adm2 = District)To adapt the code: Adjust significance threshold (0.05) or category names based on your analysis needs
Step 8: Spatial mapping
Step 8.1: Trend direction plots
The first visualization we’ll create is a map showing spatial pattern in malaria trends over time.
# SPATIAL ANALYSIS: SHAPEFILE INTEGRATION
# Load and prepare district shapefiles for mapping
HDsh <- st_read("C:/Users/exh1349/NU-malaria-team Dropbox/projects/hbhi_burkina/project_notes/publication/retrospective analysis 2023/Shapefiles/BFA_Districts_Sanitaires_2022.shp")
nlleshapefile_sf = st_read('C:/Users/exh1349/NU-malaria-team Dropbox/projects/hbhi_burkina/project_notes/publication/retrospective analysis 2023/Shapefiles/BFA_Districts_Sanitaires_2022.shp')
# Merge trend results with spatial data
nvlle_HD_slope = STL_result_DF_slope %>%
full_join(nlleshapefile_sf, ., by = c('adm2'))
# CREATE TREND MAPS
# Sen's slope map with significance coloring
sens_slope = nvlle_HD_slope %>%
tm_shape() +
tm_polygons("slope",
palette = c("#D73027", "#4575B4", "#999999"), # Red for decrease, blue for increase, gray for non-significant
title = "Trend Direction\n(Red=Decrease, Blue=Increase, Gray=Non-significant)") +
tm_facets("type", ncol = 2) + # Separate maps for each adjustment method
tm_layout(legend.outside = TRUE,
legend.title.size = 0.6,
legend.text.size = 0.6,
frame = FALSE)To adapt the code: Update the shapefile path and adjust color scheme or layout parameters
Step 8.2: Rate variation plots
The next visualization shows the percentage change in cidence between first and last year using a defined function.
Show the code
# Rate variation maps (2020-2024 comparison)
# Define a function to create the tmap plot
create_rate_map <- function(data, rate_col, file_name, breaks, title = "Variation de taux") {
tm <- tm_shape(data) +
tm_polygons(
col = rate_col,
title = title,
breaks = breaks,
palette = "-RdYlBu", # Red-Yellow-Blue diverging palette
legend.show = TRUE
) +
tm_layout(
main.title = title,
main.title.size = 1,
main.title.position = "center",
legend.outside = TRUE,
legend.title.size = 0.6,
legend.text.size = 0.6,
frame = FALSE
)
# Save to JPEG
jpeg(file_name, width = 1600, height = 1200, res = 300)
print(tm)
dev.off()
return(tm)
}
# Define parameters for rate variation maps
rate_configs <- list(
list(col = "rate", file = "N_rate_crude.jpg", breaks = c(-100, 0, 50, 100, 250), title = "Taux brut de variation"),
list(col = "rateN1", file = "N1_rate.jpg", breaks = c(-100, 0, 50, 100, 250), title = "Taux de variation N1"),
list(col = "rateN2", file = "N2_rate.jpg", breaks = c(-100, 0, 50, 100, 250), title = "Taux de variation N2"),
list(col = "rateN3", file = "N3_rate.jpg", breaks = c(-100, 0, 50, 100, 250), title = "Taux de variation N3")
)
# Apply the function to all configurations
lapply(rate_configs, function(config) {
create_rate_map(
data = HD,
rate_col = config$col,
file_name = config$file,
breaks = config$breaks,
title = config$title
)
})To adapt the code: Modify the break points, color palette, or file naming convention based on your data distribution and output requirements
Step 9: Export and save results
{Produce comprehensive summary statistics and trend categorization counts}
# EXPORT ANALYTICAL RESULTS
# Save comprehensive trend analysis results to Excel files
# Create a workbook for trend results
trend_wb <- createWorkbook()
# Sheet 1: Complete Sen's slope and Mann-Kendall results
addWorksheet(trend_wb, "Trend_Analysis_Detailed")
writeData(trend_wb, "Trend_Analysis_Detailed", STL_result_DF_slope, startRow = 1)
# Sheet 2: Summary statistics by adjustment method
trend_summary <- STL_result_DF_slope %>%
group_by(type, slope) %>%
summarise(
count = n(),
mean_sens_slope = mean(sens_slope, na.rm = TRUE),
median_sens_slope = median(sens_slope, na.rm = TRUE),
min_sens_slope = min(sens_slope, na.rm = TRUE),
max_sens_slope = max(sens_slope, na.rm = TRUE),
.groups = 'drop'
) %>%
mutate(percentage = (count / sum(count)) * 100)
addWorksheet(trend_wb, "Trend_Summary")
writeData(trend_wb, "Trend_Summary", trend_summary, startRow = 1)
# Sheet 3: Rate variation results
addWorksheet(trend_wb, "Rate_Variation")
writeData(trend_wb, "Rate_Variation", Augment_dimuni, startRow = 1)
# Sheet 4: Annual incidence data
addWorksheet(trend_wb, "Annual_Incidence")
writeData(trend_wb, "Annual_Incidence", annual_incidence, startRow = 1)
# Save the workbook
saveWorkbook(trend_wb, "malaria_trend_analysis_results.xlsx", overwrite = TRUE)To adapt the code: Modify the summary format or add additional statistics like mean Sen’s slope values or confidence intervals
Full Code
Find the full code script for retrospective trend analysis below.
Show the code
# MALARIA TREND ANALYSIS FOR SUB-NATIONAL TAILORING
# =================================================
# This script performs retrospective trend analysis of malaria incidence data
# from Burkina Faso's DHIS2 system (2020-2024). It includes:
# 1. Incidence calculation with multiple adjustment methods
# 2. Mann-Kendall trend tests and Sen's slope analysis
# 3. Spatial mapping of trends and rate variations
# 4. Visualization of results for intervention planning
# SETUP AND DEPENDENCIES
# ----------------------
# Install pacman only if it's not already installed
if (!requireNamespace("pacman", quietly = TRUE)) {
install.packages("pacman")}
# Install or load relevant packages
pacman::p_load(
dplyr, # Data manipulation
readxl, # Read in Excel files
lubridate, # Date handling
season, # Seasonal trend analysis
data.table, # Efficient data handling
zoo, # Time series analysis
stlplus, # Seasonal decomposition
trend, # Mann-Kendall trend tests
tmap, # Thematic mapping
sf, # Spatial data handling
ggplot2 # Data visualization
)
# DATA PREPARATION
# ----------------
# adj_incidence_all_pop contains annual incidence data for each district of Burkina Faso
# with the following adjustment methods:
# - Incidence brute: Crude incidence
# - Incidence ajustement 1: Adjusted for incomplete and heterogeneous testing rates
# - Incidence ajustement 2: Adjusted for variable notification rates (includes ajustement 1)
# - Incidence ajustement 3: Adjusted for healthcare-seeking behavior (includes ajustements 1 and 2)
# Load data
adj_incidence_all_pop <- read_excel(
here::here(
"english/data_r/retrospective",
"20250808_updated_adjusted_incidence_plotting_data_2015_2024.xlsx"
)
)
# Ensure district coding consistency
adj_incidence_all_pop$adm2 <- adj_incidence_all_pop$districts
# Calculate monthly incidence rates with multiple adjustment methods
monthly_DS_incide <- adj_incidence_all_pop %>%
dplyr::mutate(
Date = make_date(year = Year, month = Month, day = 1), # Create date object
TPR = `cas positif` / `test realises`, # Test positivity rate
pop_monthly = `Population totale`/12, # Monthly population estimate
total_cases_report_public_sector = `cas positif`,
# Four incidence calculation methods:
mal_cases = (Nbrut / pop_monthly) * 1000, # Crude incidence per 1000
incidence_adj_presumed_cases = (N1 / pop_monthly) * 1000, # Adjustment 1: Testing rates
incidence_adj_presumed_cases_RR = (N2 / pop_monthly) * 1000, # Adjustment 2: Notification rates
incidence_adj_presumed_cases_RR_TSR = (N3 / pop_monthly) * 1000 # Adjustment 3: Healthcare-seeking behavior
) %>%
# Handle mathematical errors
dplyr::mutate(across(where(is.numeric), ~ ifelse(is.nan(.), 0, .)))
# VISUALIZATION: TEMPORAL TRENDS
# ------------------------------
# Plot incidence trends for all districts (2020-2024)
# Crude incidence plot
p1 = ggplot(monthly_DS_incide, aes(x = Date, y = mal_cases, group = as.factor(adm2))) +
geom_line(alpha = 0.25, size = 1, show.legend = FALSE, color = "blue") +
ggtitle("Monthly crude incidence (2020-2024)") +
scale_x_yearmon("Date", breaks = sort(unique(monthly_DS_incide$Date))[c(seq(1,60,6), 60)],
labels = sort(unique(monthly_DS_incide$Date))[c(seq(1,60,6), 60)]) +
theme_bw() + theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 45, hjust = 1),
panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
# Adjusted incidence 1 plot (testing rates)
p2 = ggplot(monthly_DS_incide, aes(x = Date, y = incidence_adj_presumed_cases,
group = as.factor(adm2))) +
geom_line(alpha = 0.25, size = 1, show.legend = FALSE, color = "blue") + ylab("") +
ggtitle("Monthly adjusted 1 (testing rates)") +
scale_x_yearmon("Date", breaks = sort(unique(monthly_DS_incide$Date))[c(seq(1,96,6), 96)],
labels = sort(unique(monthly_DS_incide$Date))[c(seq(1,96,6), 96)]) +
theme_bw() + theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 45, hjust = 1),
panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
# Adjusted incidence 2 plot (notification rates)
p3 = ggplot(monthly_DS_incide, aes(x = Date, y = incidence_adj_presumed_cases_RR,
group = as.factor(adm2))) +
geom_line(alpha = 0.25, size = 1, show.legend = FALSE, color = "blue") + ylab("") +
ggtitle("Monthly adjust 2 (notification rates)") +
scale_x_yearmon("Date", breaks = sort(unique(monthly_DS_incide$Date))[c(seq(1,96,6), 96)],
labels = sort(unique(monthly_DS_incide$Date))[c(seq(1,96,6), 96)]) +
theme_bw() + theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 45, hjust = 1),
panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
# Adjusted incidence 3 plot (healthcare-seeking behavior)
p4 = ggplot(monthly_DS_incide, aes(x = Date, y = incidence_adj_presumed_cases_RR_TSR,
group = as.factor(adm2))) +
geom_line(alpha = 0.25, size = 1, show.legend = FALSE, color = "blue") + ylab("") +
ggtitle("Monthly adjusted 3 (healthcare-seeking behavior)") +
scale_x_yearmon("Date", breaks = sort(unique(monthly_DS_incide$Date))[c(seq(1,96,6), 96)],
labels = sort(unique(monthly_DS_incide$Date))[c(seq(1,96,6), 96)]) +
theme_bw() + theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 45, hjust = 1),
panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
# ANNUAL INCIDENCE CALCULATION
# ----------------------------
# Aggregate monthly data to annual incidence for trend comparison
annual_incidence <- monthly_DS_incide %>%
dplyr::mutate(year = year(Date)) %>%
dplyr::group_by(districts, year) %>%
dplyr::summarise(
n_months = n_distinct(floor_date(Date, "month")),
pop_vals = n_distinct(`Population totale`, na.rm = TRUE),
annual_pop = first(`Population totale`), # Annual population estimate
# Case totals for each adjustment method
cases_brut = sum(Nbrut, na.rm = TRUE),
cases_N1 = sum(N1, na.rm = TRUE),
cases_N2 = sum(N2, na.rm = TRUE),
cases_N3 = sum(N3, na.rm = TRUE),
# Annual incidence rates per 1000
inc_brut_yr = (cases_brut / annual_pop) * 1000,
inc_N1_yr = (cases_N1 / annual_pop) * 1000,
inc_N2_yr = (cases_N2 / annual_pop) * 1000,
inc_N3_yr = (cases_N3 / annual_pop) * 1000,
.groups = "drop"
)
# RATE VARIATION ANALYSIS (2020 vs 2024)
# --------------------------------------
# Calculate percentage change between first and last year
Augment_dimuni <- annual_incidence %>%
dplyr::group_by(districts) %>%
dplyr::arrange(year) %>%
dplyr::summarise(
# Extract first and last year values for each indicator
first_crude = first(inc_brut_yr),
last_crude = last(inc_brut_yr),
first_N3 = first(inc_N3_yr),
last_N3 = last(inc_N3_yr),
first_N2 = first(inc_N2_yr),
last_N2 = last(inc_N2_yr),
first_N1 = first(inc_N1_yr),
last_N1 = last(inc_N1_yr),
# Calculate percentage change
rate = (last_crude - first_crude) / first_crude * 100,
rateN3 = (last_N3 - first_N3) / first_N3 * 100,
rateN2 = (last_N2 - first_N2) / first_N2 * 100,
rateN1 = (last_N1 - first_N1) / first_N1 * 100
) %>%
dplyr::ungroup() %>%
dplyr::distinct(districts, .keep_all = TRUE)
# TREND ANALYSIS: MANN-KENDALL AND SEN'S SLOPE
# --------------------------------------------
# Statistical analysis of trends over time
# Normalization function for time series analysis
getNormalized <- function(vec) {
# Standardizes values for better trend detection
if (!is.numeric(vec) || all(is.na(vec))) {
warning("Input vector is non-numeric or all NA; returning original vector")
return(vec)
}
vec_mean <- mean(vec, na.rm = TRUE)
vec_sd <- sd(vec, na.rm = TRUE)
if (is.na(vec_sd) || vec_sd == 0) {
warning("Standard deviation is 0 or NA; returning original vector")
return(vec)
}
return((vec - vec_mean) / vec_sd) # Z-score normalization
}
# Prepare the monthly data for trend analysis
monthly_DS_incidence1 <- monthly_DS_incide
# Initialize results as a list for flexibility
STL_result_DF_norm_list <- list()
# Define indicators for trend analysis
indicators <- list(
list(col = "mal_cases", type = "Crude Incidence"),
list(col = "incidence_adj_presumed_cases", type = "Adjusted Presumed Cases"),
list(col = "incidence_adj_presumed_cases_RR", type = "Adjusted Presumed Cases RR"),
list(col = "incidence_adj_presumed_cases_RR_TSR", type = "Adjusted Presumed Cases RR TSR")
)
# Loop over districts for trend analysis
for (DS in sort(unique(monthly_DS_incidence1$adm2))) {
cases_dist <- monthly_DS_incidence1 %>%
filter(adm2 == DS) %>%
arrange(Date)
if (nrow(cases_dist) == 0) {
warning(paste("No data for district:", DS, "- skipping"))
next
}
for (ind in indicators) {
if (!ind$col %in% names(cases_dist)) {
warning(paste("Column", ind$col, "not found in district", DS, "- skipping"))
next
}
ind_values <- cases_dist[[ind$col]]
if (!is.numeric(ind_values) || all(is.na(ind_values)) || length(ind_values) == 0) {
warning(paste("Invalid data for", ind$col, "in district", DS, "- skipping"))
next
}
ind_norm <- getNormalized(ind_values)
valid_idx <- !is.na(ind_values) # Logical index of non-NA values
valid_ind_norm <- ind_norm[valid_idx]
valid_dates <- cases_dist$Date[valid_idx]
if (length(valid_ind_norm) < 2) {
warning(paste("Fewer than 2 valid observations for", ind$col, "in district", DS, "- skipping"))
next
}
# Create time series
start_year <- as.numeric(format(valid_dates[1], "%Y"))
start_month <- as.numeric(format(valid_dates[1], "%m"))
ind_ts <- ts(valid_ind_norm, start = c(start_year, start_month), deltat = 1/12)
# Perform STL decomposition
ind_stl <- stlplus(ind_ts, s.window = "periodic")
# Ensure STL output matches valid dates
ind_stl_ts <- as.data.frame(ind_stl$data[, 1:4])
if (nrow(ind_stl_ts) != length(valid_dates)) {
warning(paste("STL output length mismatch for", ind$col, "in district", DS))
next
}
# Perform Mann-Kendall test
mk_result <- tryCatch({
smk.test(ind_ts)
}, error = function(e) {
warning(paste("Mann-Kendall test failed for", ind$col, "in district", DS, ":", e$message))
list(p.value = NA)
})
# Perform Sen's slope
sens_result <- tryCatch({
sea.sens.slope(ind_ts)
}, error = function(e) {
warning(paste("Sen's slope failed for", ind$col, "in district", DS, ":", e$message))
NA
})
# Prepare results
ind_stl_ts$type <- ind$type
ind_stl_ts$dates <- valid_dates
ind_stl_ts$MK_p <- ifelse(!is.null(mk_result$p.value), mk_result$p.value, NA)
ind_stl_ts$sens_slope <- sens_result
ind_stl_ts$District <- DS
# Append to list
STL_result_DF_norm_list[[paste(DS, ind$col, sep = "_")]] <- ind_stl_ts
}
}
# Combine results
STL_result_DF_norm <- rbindlist(STL_result_DF_norm_list, fill = TRUE)
# Extract unique slope results
STL_result_DF_slopes <- unique(STL_result_DF_norm[, c("type", "MK_p", "sens_slope", "District")])
# Categorize slopes based on significance
STL_result_DF_slopes$plotting_sens_slope <- STL_result_DF_slopes$sens_slope
STL_result_DF_slopes[which(STL_result_DF_slopes$MK_p > .05), "plotting_sens_slope"] <- NA
# Create slope categories
STL_result_DF_slope = STL_result_DF_slopes %>%
dplyr::mutate(slope = ifelse(plotting_sens_slope < 0, 'Diminution',
ifelse(plotting_sens_slope > 0, 'augmentation',
ifelse(is.na(plotting_sens_slope), 'non significatif', ''))))
STL_result_DF_slope = STL_result_DF_slope %>%
dplyr::mutate(slope = ifelse(is.na(slope), 'non significatif', slope), adm2 = District)
# SPATIAL ANALYSIS: SHAPEFILE INTEGRATION
# ---------------------------------------
# Load and prepare district shapefiles for mapping
HDsh <- st_read("C:/Users/exh1349/NU-malaria-team Dropbox/projects/hbhi_burkina/project_notes/publication/retrospective analysis 2023/Shapefiles/BFA_Districts_Sanitaires_2022.shp")
nlleshapefile_sf = st_read('C:/Users/exh1349/NU-malaria-team Dropbox/projects/hbhi_burkina/project_notes/publication/retrospective analysis 2023/Shapefiles/BFA_Districts_Sanitaires_2022.shp')
# Standardize district names between data and shapefile
# [District name recoding...]
# Merge trend results with spatial data
nvlle_HD_slope = STL_result_DF_slope %>%
full_join(nlleshapefile_sf, ., by = c('adm2'))
# CREATE TREND MAPS
# -----------------
# Visualize spatial patterns of malaria trends
# Sen's slope map with significance coloring
sens_slope = nvlle_HD_slope %>%
tm_shape() +
tm_polygons("slope",
palette = c("#D73027", "#4575B4", "#999999"), # Red for decrease, blue for increase, gray for non-significant
title = "Trend Direction\n(Red=Decrease, Blue=Increase, Gray=Non-significant)") +
tm_facets("type", ncol = 2) + # Separate maps for each adjustment method
tm_layout(legend.outside = TRUE,
legend.title.size = 0.6,
legend.text.size = 0.6,
frame = FALSE)
# Rate variation maps (2020-2024 comparison)
# Define a function to create the tmap plot
create_rate_map <- function(data, rate_col, file_name, breaks, title = "Variation de taux") {
tm <- tm_shape(data) +
tm_polygons(
col = rate_col,
title = title,
breaks = breaks,
palette = "-RdYlBu", # Red-Yellow-Blue diverging palette
legend.show = TRUE
) +
tm_layout(
main.title = title,
main.title.size = 1,
main.title.position = "center",
legend.outside = TRUE,
legend.title.size = 0.6,
legend.text.size = 0.6,
frame = FALSE
)
# Save to JPEG
jpeg(file_name, width = 1600, height = 1200, res = 300)
print(tm)
dev.off()
return(tm)
}
# Define parameters for rate variation maps
rate_configs <- list(
list(col = "rate", file = "N_rate_crude.jpg", breaks = c(-100, 0, 50, 100, 250), title = "Taux brut de variation"),
list(col = "rateN1", file = "N1_rate.jpg", breaks = c(-100, 0, 50, 100, 250), title = "Taux de variation N1"),
list(col = "rateN2", file = "N2_rate.jpg", breaks = c(-100, 0, 50, 100, 250), title = "Taux de variation N2"),
list(col = "rateN3", file = "N3_rate.jpg", breaks = c(-100, 0, 50, 100, 250), title = "Taux de variation N3")
)
# Apply the function to all configurations
lapply(rate_configs, function(config) {
create_rate_map(
data = HD,
rate_col = config$col,
file_name = config$file,
breaks = config$breaks,
title = config$title
)
})
# OUTPUT AND EXPORT
# -----------------
# Save visualizations for reporting
# Save Sen's slope map
jpeg('sen_slope_map.jpg', width = 1600, height = 1200, res = 300)
print(sens_slope)
dev.off()
# Save incidence trend plots
jpeg('incidence_trends.jpg', width = 1600, height = 1200, res = 300)
grid.arrange(p1, p2, p3, p4, ncol = 2)
dev.off()
# SUMMARY OUTPUT
# --------------
# Print summary statistics
cat("Trend Analysis Summary (2020-2024)\n")
cat("=================================\n\n")
# Count districts by trend direction for each adjustment method
for (adjustment_type in unique(STL_result_DF_slope$type)) {
cat("Adjustment:", adjustment_type, "\n")
trend_summary <- STL_result_DF_slope %>%
filter(type == adjustment_type) %>%
count(slope) %>%
mutate(percentage = n / sum(n) * 100)
print(trend_summary)
cat("\n")
}
# INTERPRETATION GUIDELINES
# -------------------------
# The analysis provides three key outputs:
# 1. Incidence maps for each year (2020-2024) - Shows spatial distribution of malaria burden
# 2. Rate of change maps (2020 vs 2024) - Shows percentage change in incidence over time
# 3. Sen's slope maps with Mann-Kendall significance - Shows trend direction and statistical significance
#
# Interpretation:
# - Red colors: Significant decreasing trends (p < 0.05)
# - Blue colors: Significant increasing trends (p < 0.05)
# - Gray colors: Non-significant trends (p >= 0.05)
# - Larger absolute values indicate stronger trends
#
# These results can inform subnational targeting of malaria interventions by identifying:
# - Districts with persistently high incidence (require intensive interventions)
# - Districts with increasing trends (require preventive measures)
# - Districts with decreasing trends (success stories to learn from)