# Install pacman only if it's not already installed
if (!requireNamespace("pacman", quietly = TRUE)) {
install.packages("pacman")
}
# install or load relevant packages
pacman::p_load(
readxl, # import and read Excel files
ggplot2, # plotting
rio, # for importing and exporting files
gridExtra, # plot arrangements
here, # shows path to file
stringr, # clean up names,
xts, # return first or last element of a vector
tidyverse, # contains functions for data manipulations
sf, # spatial features for use in mapping
scales # calculates "pretty" breaks
)Incidence adjustment 1: incomplete testing
Overview
First adjustment: To adjust for incomplete testing rate, a first correction is applied to the number of presumed cases using the test positivity rate (TPR = confirmed / tested) and added to the number of confirmed malaria cases by either RDT or microscopy. This adjustment allows estimating the number of additional cases not tested that were likely to be true malaria cases (N1), assuming that the TPR among the presumed is similar to the TPR among the cases tested. the formula for estimating the first incidence adjustment is given by; N1 = a + (b*(a/c))
Where
- N1 is the corrected number of cases for testing rates
- a is the confirmed malaria cases reported from the public sector;
- b is the presumed cases (not tested but treated as malaria cases);
- c is the tested cases;
- TBD
Step-by-Step Instructions
To skip the step-by-step explanation, jump to the full code at the end of this page.
Step 1: Load packages and datasets
Step 1.1: Load required packages
The first step is to install and load the libraries required for this section.
Step 1.2: Load incidence data file
We bring in the monthly incidence data we saved in the crude incidence page as well as the facility level cleaned routine data we will be using to calculate test positivity rate.
Step 2: Prepare and examine test positivity rates (TPR)
We then calculate monthly testing rates and test positivity rates. We will conduct the calculation at the facility level and later join the datasets to the incidence data we have been working with.
Step 2.1: Calculate testing rate and test positivity rate
## Calculating Adjusted Incidence 1
routine_data <- clean_malaria_routine_data_final |>
# calculate testing rate
dplyr::mutate(test_rate = case_when(
susp == 0 & test == 0 ~ NA_real_, # if both susp and test are 0, set to NA
susp == 0 ~ NA_real_, # if susp is 0 but test is not, set to NA
TRUE ~ test / susp # perform normal cacluation if otherwise
),
# Perform a similar algorithm for tpr
# calculate test positivity rate
tpr = case_when(
test == 0 & conf == 0 ~ NA_real_, # if both test and conf are 0, set to NA
test == 0 ~ NA_real_, # if test is 0 but conf is not, set to NA
TRUE ~ conf/test # perform normal cacluation if otherwise
)
)
summary(routine_data)Step 2.2: Visualize testing rate
After calculating for tr and tpr you want to visualize the range of values. We plot values of testing rate for each health facility over time to check for outliers
## Plot a Box Plot of Testing Rate
ggplot(routine_data, aes(x = factor(hf), y = test_rate)) +
geom_boxplot() +
labs(title = "Distribution of Testing Rate by health facility",
x = "Health facility",
y = "Testing Rate") +
theme_minimal()
# if the values are within expected ranges, you can use the density plot below which provide insights into the distribution
ggplot(routine_data, aes(x = test_rate)) +
geom_density(fill = "green", alpha = 0.5) +
labs(title = "Density Plot of Testing Rate",
x = "Test Positivity Rate",
y = "Density") +
theme_minimal()Step 2.3: Visualize test positivity rate
We plot values of test positivity rate for each health facility over time to check for outliers
## Plot a Boxplot of Testing Rate
ggplot(routine_data, aes(x = factor(hf), y = tpr)) +
geom_boxplot() +
labs(title = "Distribution of Test Positivity Rate by Health facility",
x = "Health facility",
y = "Test Positivity Rate") +
theme_minimal()
# if the values are within expected ranges, you can use the density plot below which provide insights into the distribution
ggplot(routine_data, aes(x = tpr)) +
geom_density(fill = "green", alpha = 0.5) +
labs(title = "Density Plot of Test Positivity Rate",
x = "Test Positivity Rate",
y = "Density") +
theme_minimal()Step 2.4: Generate clean database with valid TPR for each row
(row = admin unit or facility depending on what you chose in Step 1)
If you identify testing rates and tpr above 1, efforts should be made find reasons why testing rates and tpr are >1 for a particular month and the necessary corrections effected. As a conservative measure, we may equate testing rates >1 or tpr >1 to 1.
Step 3: Estimate additional cases if presumed cases had been tested
Step 3.1: Apply test positivity rate to the presumed cases
To be able to determine the number of confirmed cases out the presumed cases, we assume that the tpr for those not tested are the same as those tested. Here we apply the calculated tpr to the pres cases to derive additional confirmed cases
Step 3.2: Aggregate calculated cases at operational unit level by month
To be able to proceed with our analysis, we aggregate the TPR dataset at admin3 level (since Sierra Leone is making decisions at the adm3 level) and join to the incidence data we have been using.
## Calculating Adjusted Incidence 1
agg_data <- routine_data |>
group_by(adm1, adm2, adm3, year, month) |>
# select appropriate statistic to summarize variables of interest over. Use sum for counts and mean for proportions
dplyr::summarise(
across(c(allout:maldth, conf_tpr), sum, na.rm = TRUE),
across(c(test_rate, tpr), mean, na.rm = TRUE)
) |>
dplyr::mutate(year = as.numeric(year),
month = as.numeric(month))
head(agg_data)Step 3.3: Join dataset with additional confirmed cases to incidence dataset
Once we have calculated our additional confirmed cases, we join to the incidence data we have been using. Since the incidence data already has the case count variables (alout, susp, test, etc) we only select only the calculated variables to join to the incidence dataset
Step 4: Calculate adjusted cases and adjusted incidence (N1)
Step 4.1 Calculate monthly adjusted incidence cases and incidence
We compute monthly adjusted cases1 by adding the additional confirmed cases calculated to the crude cases
Step 4.2: Calculate annual adjusted incidence proportions (N1)
In the section above, we calculated monthly adjusted incidence estimates for each adm3 level. This might not be helpful enough operationally, unless we want to conduct further analysis to answer other question like the effect of low testing rates on case management.
For SNT analysis, it is helpful to aggregate the data at operational level annually. Note: for count variables, you summarize over sum and use mean for proportions.
Step 4.3: Standardize the calculated incidence proportions
We have calculated the annual adjusted1 incidence proportions. Here we standardize by multiplying by 100 which becomes more useful to compare between years and admin levels
Step 5: Map testing rates and tpr
We want to map and compare testing rates and tpr over time. We reformat the data into long and then join with adm3 shapefile
Step 5.1: Join incidence dataset with shapefile
Step 5.2: Map testing rates by year
Step 5.3: Map test positivity rates by year
Step 6: Save Files
Now we save our incidence data as a csv file
# Save monthly routine health facility data
rio::export(
routine_data,
file =
here("english/library/stratification", 'epidemiological/data_r', 'routine_data_hf.csv')
)
## Save monthly incidence data set
rio::export(
inc_data,
file =
here("english/library/stratification/epidemiological/data_r/monthly_inc_data.csv"
))
# Save annual incidence data set
rio::export(
adj1_inc_ann,
file = here("english/library/stratification/epidemiological/data_r/annual_inc_data.csv"
)
)