# ============================================================================
# PART A: DATA PROCESSING
# ============================================================================
# Step 1: Install and load packages
if (! base:: requireNamespace ("pacman" , quietly = TRUE )) {
utils:: install.packages ("pacman" )
}
pacman:: p_load (
readxl, writexl, dplyr, tidyr, stringr, here, kableExtra,
sf, RColorBrewer, ggplot2
)
# Step 2: Read and combine district files
files <- base:: list.files (
path = here:: here ("english/data_r/intervention_data" ),
pattern = " \\ .xlsx$" ,
full.names = TRUE
)
epi_data <- base:: lapply (files, readxl:: read_excel) |>
dplyr:: bind_rows ()
base:: names (epi_data) <- base:: trimws (base:: names (epi_data))
# Step 3: Process date column
month_map <- base:: c (
"January" = "01" , "February" = "02" , "March" = "03" ,
"April" = "04" , "May" = "05" , "June" = "06" ,
"July" = "07" , "August" = "08" , "September" = "09" ,
"October" = "10" , "November" = "11" , "December" = "12"
)
epi_data <- epi_data |>
tidyr:: separate (
periodname,
into = c ("month_name" , "year" ),
sep = " " ,
remove = FALSE
) |>
dplyr:: mutate (
month = month_map[month_name],
year = base:: as.numeric (year),
date = base:: paste (year, month, sep = "-" )
)
# Step 4: Rename columns
epi_data <- epi_data |>
dplyr:: rename (
adm0 = orgunitlevel1,
adm1 = orgunitlevel2,
adm2 = orgunitlevel3,
adm3 = orgunitlevel4,
hf = organisationunitname
)
# Step 5: Calculate intervention totals
# Penta
epi_data$ penta1 <- rowSums (epi_data[, c ('Pentavalent 1st dose In_Facility_X, 0-11m_X' , 'Pentavalent 1st dose Outreach_X, 0-11m_X' )], na.rm = TRUE )
epi_data$ penta3 <- rowSums (epi_data[, c ('Pentavalent 3rd dose In_Facility_X, 0-11m_X' , 'Pentavalent 3rd dose Outreach_X, 0-11m_X' )], na.rm = TRUE )
# LLINs given during penta3 and anc
epi_data$ llins_given_during_penta3 <- rowSums (epi_data[, c ('LLITN given at Pentavalent 3rd dose In_Facility_X, 0-11m_X' , 'LLITN given at Pentavalent 3rd dose Outreach_X, 0-11m_X' )], na.rm = TRUE )
epi_data$ llins_given_during_anc <- rowSums (epi_data[, c ('Antenatal client given LLITN In_Facility_X' , 'Antenatal client given LLITN Outreach_X' )], na.rm = TRUE )
# Measles
epi_data$ measles_infants <- rowSums (epi_data[, c ('Measles 1st dose In_Facility_X, 0-11m_X' , 'Measles 1st dose Outreach_X, 0-11m_X' )], na.rm = TRUE )
epi_data$ measles_child <- rowSums (epi_data[, c ('Measles 1st dose In_Facility_X, 12-59m_X' , 'Measles 1st dose Outreach_X, 12-59m_X' )], na.rm = TRUE )
# IPTi
epi_data$ ipti1 <- rowSums (epi_data[, c ('IPTi 1st dose given In_Facility_X, 0-11m_X' , 'IPTi 1st dose given Outreach_X, 0-11m_X' )], na.rm = TRUE )
epi_data$ ipti2 <- rowSums (epi_data[, c ('IPTi 2nd dose given In_Facility_X, 0-11m_X' , 'IPTi 2nd dose given Outreach_X, 0-11m_X' )], na.rm = TRUE )
epi_data$ ipti3 <- rowSums (epi_data[, c ('IPTi 3rd dose given In_Facility_X, 0-11m_X' , 'IPTi 3rd dose given Outreach_X, 0-11m_X' )], na.rm = TRUE )
# ANC
epi_data$ anc1 <- rowSums (epi_data[, c ('Antenatal client 1st visit In_Facility_X' , 'Antenatal client 1st visit Outreach_X' )], na.rm = TRUE )
epi_data$ anc4 <- rowSums (epi_data[, c ('Antenatal client 4th visit In_Facility_X' , 'Antenatal client 4th visit Outreach_X' )], na.rm = TRUE )
epi_data$ anc8 <- rowSums (epi_data[, c ('Antenatal client 8th visit In_Facility_X' , 'Antenatal client 8th visit Outreach_X' )], na.rm = TRUE )
# IPT
epi_data$ ipt1 <- rowSums (epi_data[, c ('Antenatal client IPT 1st dose In_Facility_X' , 'Antenatal client IPT 1st dose Outreach_X' )], na.rm = TRUE )
epi_data$ ipt2 <- rowSums (epi_data[, c ('Antenatal client IPT 2nd dose In_Facility_X' , 'Antenatal client IPT 2nd dose Outreach_X' )], na.rm = TRUE )
epi_data$ ipt3 <- rowSums (epi_data[, c ('Antenatal client IPT 3rd dose In_Facility_X' , 'Antenatal client IPT 3rd dose Outreach_X' )], na.rm = TRUE )
# Vitamin A
epi_data$ vitamin_infants <- rowSums (epi_data[, c ('Vitamin A supplement 6-11 months In_Facility_X' , 'Vitamin A supplement 6-11 months Outreach_X' )], na.rm = TRUE )
epi_data$ vitamin_child <- rowSums (epi_data[, c ('Vitamin A supplement 12-59 months In_Facility_X' , 'Vitamin A supplement 12-59 months Outreach_X' )], na.rm = TRUE )
# Step 6: Import population data
pop_data <- readxl:: read_excel (here:: here ("english/data_r/pop/sle_pop_data.xlsx" ))
# Step 7: Aggregate by year
intervention_cols <- base:: c (
"penta1" , "penta3" , "ipti1" , "ipti2" , "ipti3" ,
"ipt1" , "ipt2" , "ipt3" , "vitamin_infants" , "vitamin_child" ,
"measles_infants" , "measles_child" , "anc1" , "anc4" , "anc8" ,
"llins_given_during_anc" , "llins_given_during_penta3"
)
yearly_data <- base:: list ()
for (yr in 2015 : 2023 ) {
yearly_data[[base:: as.character (yr)]] <- epi_data |>
dplyr:: filter (year == yr) |>
dplyr:: group_by (adm1, adm2, adm3) |>
dplyr:: summarise (
dplyr:: across (dplyr:: all_of (intervention_cols), ~ base:: sum (.x, na.rm = TRUE )),
.groups = "drop"
) |>
dplyr:: rename_with (~ base:: paste0 (.x, "_" , yr), dplyr:: all_of (intervention_cols))
}
epi_merged <- purrr:: reduce (yearly_data, dplyr:: full_join, by = base:: c ("adm1" , "adm2" , "adm3" ))
# Step 8: Calculate coverage rates
for (yr in 2015 : 2023 ) {
pop_col <- base:: paste0 ("pop" , yr)
if (! pop_col %in% base:: names (pop_data)) next
# Penta1 coverage
epi_merged[[base:: paste0 ("penta1_coverage_" , yr)]] <- base:: round (
(epi_merged[[base:: paste0 ("penta1_" , yr)]] / (pop_data[[pop_col]] * 0.037 )) * 100 , 2
)
# Penta3 coverage
epi_merged[[base:: paste0 ("penta3_coverage_" , yr)]] <- base:: ifelse (
base:: is.na (epi_merged[[base:: paste0 ("penta1_" , yr)]]) | epi_merged[[base:: paste0 ("penta1_" , yr)]] == 0 ,
0 ,
base:: round ((epi_merged[[base:: paste0 ("penta3_" , yr)]] / epi_merged[[base:: paste0 ("penta1_" , yr)]]) * 100 , 2 )
)
# IPTi1 coverage
epi_merged[[base:: paste0 ("ipti1_coverage_" , yr)]] <- base:: round (
(epi_merged[[base:: paste0 ("ipti1_" , yr)]] / (pop_data[[pop_col]] * 0.037 )) * 100 , 2
)
# IPTi2 coverage
epi_merged[[base:: paste0 ("ipti2_coverage_" , yr)]] <- base:: ifelse (
base:: is.na (epi_merged[[base:: paste0 ("ipti1_" , yr)]]) | epi_merged[[base:: paste0 ("ipti1_" , yr)]] == 0 ,
0 ,
base:: round ((epi_merged[[base:: paste0 ("ipti2_" , yr)]] / epi_merged[[base:: paste0 ("ipti1_" , yr)]]) * 100 , 2 )
)
# IPTi3 coverage
epi_merged[[base:: paste0 ("ipti3_coverage_" , yr)]] <- base:: ifelse (
base:: is.na (epi_merged[[base:: paste0 ("ipti1_" , yr)]]) | epi_merged[[base:: paste0 ("ipti1_" , yr)]] == 0 ,
0 ,
base:: round ((epi_merged[[base:: paste0 ("ipti3_" , yr)]] / epi_merged[[base:: paste0 ("ipti1_" , yr)]]) * 100 , 2 )
)
# IPT1 coverage
epi_merged[[base:: paste0 ("ipt1_coverage_" , yr)]] <- base:: round (
(epi_merged[[base:: paste0 ("ipt1_" , yr)]] / (pop_data[[pop_col]] * 0.037 )) * 100 , 2
)
# IPT2 coverage
epi_merged[[base:: paste0 ("ipt2_coverage_" , yr)]] <- base:: ifelse (
base:: is.na (epi_merged[[base:: paste0 ("ipt1_" , yr)]]) | epi_merged[[base:: paste0 ("ipt1_" , yr)]] == 0 ,
0 ,
base:: round ((epi_merged[[base:: paste0 ("ipt2_" , yr)]] / epi_merged[[base:: paste0 ("ipt1_" , yr)]]) * 100 , 2 )
)
# IPT3 coverage
epi_merged[[base:: paste0 ("ipt3_coverage_" , yr)]] <- base:: ifelse (
base:: is.na (epi_merged[[base:: paste0 ("ipt1_" , yr)]]) | epi_merged[[base:: paste0 ("ipt1_" , yr)]] == 0 ,
0 ,
base:: round ((epi_merged[[base:: paste0 ("ipt3_" , yr)]] / epi_merged[[base:: paste0 ("ipt1_" , yr)]]) * 100 , 2 )
)
# Vitamin A infants coverage
epi_merged[[base:: paste0 ("vitamin_infants_coverage_" , yr)]] <- base:: ifelse (
base:: is.na (epi_merged[[base:: paste0 ("vitamin_infants_" , yr)]]) | base:: is.na (pop_data[[pop_col]]),
0 ,
base:: round ((epi_merged[[base:: paste0 ("vitamin_infants_" , yr)]] / (pop_data[[pop_col]] * 0.02 )) * 100 , 2 )
)
# Vitamin A child coverage
epi_merged[[base:: paste0 ("vitamin_child_coverage_" , yr)]] <- base:: ifelse (
base:: is.na (epi_merged[[base:: paste0 ("vitamin_child_" , yr)]]) | base:: is.na (pop_data[[pop_col]]),
0 ,
base:: round ((epi_merged[[base:: paste0 ("vitamin_child_" , yr)]] / (pop_data[[pop_col]] * 0.137 )) * 100 , 2 )
)
# Measles infants coverage
epi_merged[[base:: paste0 ("measles_infants_coverage_" , yr)]] <- base:: ifelse (
base:: is.na (epi_merged[[base:: paste0 ("measles_infants_" , yr)]]) | base:: is.na (pop_data[[pop_col]]),
0 ,
base:: round ((epi_merged[[base:: paste0 ("measles_infants_" , yr)]] / (pop_data[[pop_col]] * 0.037 )) * 100 , 2 )
)
# Measles child coverage
epi_merged[[base:: paste0 ("measles_child_coverage_" , yr)]] <- base:: ifelse (
base:: is.na (epi_merged[[base:: paste0 ("measles_child_" , yr)]]) | base:: is.na (pop_data[[pop_col]]),
0 ,
base:: round ((epi_merged[[base:: paste0 ("measles_child_" , yr)]] / (pop_data[[pop_col]] * 0.137 )) * 100 , 2 )
)
# ANC1 coverage
epi_merged[[base:: paste0 ("anc1_coverage_" , yr)]] <- base:: round (
(epi_merged[[base:: paste0 ("anc1_" , yr)]] / (pop_data[[pop_col]] * 0.044 )) * 100 , 2
)
# ANC4 coverage
epi_merged[[base:: paste0 ("anc4_coverage_" , yr)]] <- base:: ifelse (
base:: is.na (epi_merged[[base:: paste0 ("anc1_" , yr)]]) | epi_merged[[base:: paste0 ("anc1_" , yr)]] == 0 ,
0 ,
base:: round ((epi_merged[[base:: paste0 ("anc4_" , yr)]] / epi_merged[[base:: paste0 ("anc1_" , yr)]]) * 100 , 2 )
)
# ANC8 coverage
epi_merged[[base:: paste0 ("anc8_coverage_" , yr)]] <- base:: ifelse (
base:: is.na (epi_merged[[base:: paste0 ("anc1_" , yr)]]) | epi_merged[[base:: paste0 ("anc1_" , yr)]] == 0 ,
0 ,
base:: round ((epi_merged[[base:: paste0 ("anc8_" , yr)]] / epi_merged[[base:: paste0 ("anc1_" , yr)]]) * 100 , 2 )
)
# LLINs during ANC coverage
epi_merged[[base:: paste0 ("llins_anc_coverage_" , yr)]] <- base:: ifelse (
base:: is.na (epi_merged[[base:: paste0 ("anc1_" , yr)]]) | epi_merged[[base:: paste0 ("anc1_" , yr)]] == 0 ,
0 ,
base:: round ((epi_merged[[base:: paste0 ("llins_given_during_anc_" , yr)]] / epi_merged[[base:: paste0 ("anc1_" , yr)]]) * 100 , 2 )
)
# LLINs during Penta3 coverage
epi_merged[[base:: paste0 ("llins_penta3_coverage_" , yr)]] <- base:: ifelse (
base:: is.na (epi_merged[[base:: paste0 ("penta3_" , yr)]]) | epi_merged[[base:: paste0 ("penta3_" , yr)]] == 0 ,
0 ,
base:: round ((epi_merged[[base:: paste0 ("llins_given_during_penta3_" , yr)]] / epi_merged[[base:: paste0 ("penta3_" , yr)]]) * 100 , 2 )
)
# Penta dropout rate
epi_merged[[base:: paste0 ("penta_dropout_rate_" , yr)]] <- base:: ifelse (
base:: is.na (epi_merged[[base:: paste0 ("penta1_" , yr)]]) | epi_merged[[base:: paste0 ("penta1_" , yr)]] == 0 ,
0 ,
base:: round (((epi_merged[[base:: paste0 ("penta1_" , yr)]] - epi_merged[[base:: paste0 ("penta3_" , yr)]]) / epi_merged[[base:: paste0 ("penta1_" , yr)]]) * 100 , 2 )
)
# IPTi dropout rate
epi_merged[[base:: paste0 ("ipti_dropout_rate_" , yr)]] <- base:: ifelse (
base:: is.na (epi_merged[[base:: paste0 ("ipti1_" , yr)]]) | epi_merged[[base:: paste0 ("ipti1_" , yr)]] == 0 ,
0 ,
base:: round (((epi_merged[[base:: paste0 ("ipti1_" , yr)]] - epi_merged[[base:: paste0 ("ipti3_" , yr)]]) / epi_merged[[base:: paste0 ("ipti1_" , yr)]]) * 100 , 2 )
)
# IPT dropout rate
epi_merged[[base:: paste0 ("ipt_dropout_rate_" , yr)]] <- base:: ifelse (
base:: is.na (epi_merged[[base:: paste0 ("ipt1_" , yr)]]) | epi_merged[[base:: paste0 ("ipt1_" , yr)]] == 0 ,
0 ,
base:: round (((epi_merged[[base:: paste0 ("ipt1_" , yr)]] - epi_merged[[base:: paste0 ("ipt3_" , yr)]]) / epi_merged[[base:: paste0 ("ipt1_" , yr)]]) * 100 , 2 )
)
# ANC dropout rate
epi_merged[[base:: paste0 ("anc_dropout_rate_" , yr)]] <- base:: ifelse (
base:: is.na (epi_merged[[base:: paste0 ("anc1_" , yr)]]) | epi_merged[[base:: paste0 ("anc1_" , yr)]] == 0 ,
0 ,
base:: round (((epi_merged[[base:: paste0 ("anc1_" , yr)]] - epi_merged[[base:: paste0 ("anc8_" , yr)]]) / epi_merged[[base:: paste0 ("anc1_" , yr)]]) * 100 , 2 )
)
}
# Step 9: Display the data and save
writexl:: write_xlsx (epi_merged, here:: here ("past_intervention_coverage_processed_data.xlsx" ))
epi_merged |>
head () |>
knitr:: kable ("html" , caption = "Preview of epi_merged dataset" ) |>
kableExtra:: kable_styling (bootstrap_options = c ("striped" , "hover" , "condensed" , "responsive" ))
# ============================================================================
# PART B: SPATIAL VISUALIZATION
# ============================================================================
# Step 10: Load intervention data
epi_merged <- readxl:: read_excel (here:: here ("past_intervention_coverage_processed_data.xlsx" ))
# Step 11: Load shapefiles
shapefile_chiefdom <- sf:: st_read (here:: here ("english/data_r/shapefiles/Chiefdom2021.shp" ))
shapefile_district <- sf:: st_read (here:: here ("english/data_r/shapefiles/District 2021.shp" ))
# Step 12: Load merge key
key_file <- readxl:: read_excel (here:: here ("english/data_r/shapefiles/Key_shapefile.xlsx" ))
# Step 13: Merge datasets
epi_merged <- base:: merge (epi_merged, key_file, by = "adm3" , all.x = TRUE )
data <- base:: merge (shapefile_chiefdom, epi_merged, by.x = base:: c ("FIRST_DNAM" , "FIRST_CHIE" ), by.y = base:: c ("FIRST_DNAM" , "FIRST_CHIE" ), all.x = TRUE )
# Step 14: Define visualization parameters
colors <- RColorBrewer:: brewer.pal (5 , "Blues" )
columns_to_plot <- base:: c (
"penta1_coverage" , "penta3_coverage" , "penta_dropout_rate" ,
"vitamin_infants_coverage" , "vitamin_child_coverage" ,
"measles_infants_coverage" , "measles_child_coverage" ,
"ipti1_coverage" , "ipti2_coverage" , "ipti3_coverage" ,
"ipti_dropout_rate" , "ipt1_coverage" , "ipt2_coverage" ,
"ipt3_coverage" , "ipt_dropout_rate" , "anc1_coverage" ,
"anc4_coverage" , "anc8_coverage" , "llins_penta3_coverage" ,
"llins_anc_coverage"
)
titles <- base:: c (
"Penta1 coverage" , "Penta3 coverage" , "Penta dropout rate" ,
"Vitamin A 6-11 months coverage" , "Vitamin A 12-59 months coverage" ,
"Measles 0-11 months coverage" , "Measles 12-59 months coverage" ,
"IPTi1 coverage" , "IPTi2 coverage" , "IPTi3 coverage" ,
"IPTi dropout rate" , "IPT1 coverage" , "IPT2 coverage" ,
"IPT3 coverage" , "IPT dropout rate" , "ANC 1st visit coverage" ,
"ANC 4th visit coverage" , "ANC 8th visit coverage" ,
"LLINs given during 3rd dose of pentavalent" ,
"LLINs given during ANC visit"
)
# Step 15: Generate choropleth maps
year <- 2023
for (col_index in base:: seq_along (columns_to_plot)) {
coverage_prefix <- columns_to_plot[col_index]
plot_title <- titles[col_index]
coverage_col <- base:: paste0 (coverage_prefix, "_" , year)
if (! (coverage_col %in% base:: names (data))) {
base:: print (base:: paste ("Skipping" , coverage_col, "- column not found" ))
next
}
data[[coverage_col]] <- base:: as.numeric (base:: as.character (data[[coverage_col]]))
data[[coverage_col]][base:: is.na (data[[coverage_col]])] <- 0
data$ coverage_category <- base:: cut (
data[[coverage_col]],
breaks = base:: c (- Inf , 25 , 50 , 75 , 100 , Inf ),
labels = base:: c ("<25%" , "25-50%" , "50-75%" , "75-100%" , ">100%" ),
include.lowest = TRUE
)
counts <- data |>
dplyr:: group_by (coverage_category) |>
dplyr:: summarise (count = dplyr:: n ()) |>
dplyr:: ungroup ()
non_zero_counts <- counts[counts$ count > 0 , ]
labels_with_counts <- base:: paste (non_zero_counts$ coverage_category, "(" , non_zero_counts$ count, ")" , sep = "" )
data$ coverage_category <- base:: factor (
data$ coverage_category,
levels = base:: c ("<25%" , "25-50%" , "50-75%" , "75-100%" , ">100%" )
)
plot <- ggplot2:: ggplot (data) +
ggplot2:: geom_sf (ggplot2:: aes (fill = coverage_category), color = "gray" , linewidth = 0.5 ) +
ggplot2:: geom_sf (data = shapefile_district, fill = NA , color = "black" , linewidth = 1.0 ) +
ggplot2:: scale_fill_manual (
values = colors[1 : base:: length (non_zero_counts$ coverage_category)],
breaks = non_zero_counts$ coverage_category,
labels = labels_with_counts,
name = "Coverage (%)"
) +
ggplot2:: labs (title = base:: paste (plot_title, year)) +
ggplot2:: theme_minimal () +
ggplot2:: theme (
plot.title = ggplot2:: element_text (size = 14 , face = "bold" , hjust = 0.5 ),
legend.title = ggplot2:: element_text (size = 12 ),
legend.text = ggplot2:: element_text (size = 10 ),
legend.position = "right" ,
panel.grid = ggplot2:: element_blank (),
axis.text = ggplot2:: element_blank (),
axis.ticks = ggplot2:: element_blank ()
)
base:: print (plot)
}