# Install pacman only if it's not already installed
if (!requireNamespace("pacman", quietly = TRUE)) {
install.packages("pacman")
}
# install or load relevant packages
pacman::p_load(
readxl, # import and read Excel files
ggplot2, # plotting
rio, # for importing and exporting files
gridExtra, # plot arrangements
here, # shows path to file
stringr, # clean up names,
xts, # return first or last element of a vector
tidyverse, # contains functions for data manipulations
sf, # spatial features for use in mapping
scales # calculates "pretty" breaks
)Incidence adjustment 2: incomplete reporting
Overview
Second adjustment: A second adjustment is made to account for the varying reporting rates (RRs) per area-time by inflating the number of corrected confirmed cases by the fraction of the expected records not received (N2). Through this step, it is assumed that the non-reported data follows a similar distribution to the data reported. Reporting rates can be calculated per health facility type to avoid an over- or underestimate of the effect of missing data observed in smaller or larger health facilities, respectively. An alternative approach to this adjustment is the imputation of data for the months of missing values per health facility. This can be computationally intensive and requires a relatively complete database to appropriately inform imputations, but it would provide a complete database for which a reporting rate adjustment would not be necessary. The equation for second incidence adjustment is given by: N2= N1/d
Where
- N2 are the corrected number of cases for testing and reporting rates;
- d are the reporting rates (records received / records expected), which can be weighted per the type of HF that did not report in a given point in time
- TBD
Step-by-Step Instructions
To skip the step-by-step explanation, jump to the full code at the end of this page.
Step 1: Load required packages and files
Step 1.1: Load packages
The first step is to install and load the libraries required for this section.
Step 1.2: Import data files
We bring in all the data files we will use in this section. They include 1. Monthly routine data at facility level that we worked on in adjusted1 2. The monthly incidence data we saved in the adjusted1 incidence page
Step 2: Calculate reporting rate
Here we calculate the monthly reporting rate using, adjusted incidence (N1), as our indicator of interest. This is to account for months in a facility where there were stock out of RDTs and so all cases were presumed, hence no cases were tested. We use the monthly facility level data for this calculation and then summarize afterwards at operation admin level.
The code performs the following steps:
Create a variable which gives a value of 1 if the facility reported for N1 and 0 otherwise
Create a variable which takes the first reporting date into consideration and gives a value of 1 if the facility is expected to report for the N1 and 0 otherwise
Count the number of non-NA reports for the indicator of interest (observed reports), aggregated by month and by the admin unit level of analysis
Count the number of expected reports by month and admin unit level; here we use find the first time the facility reported in the timeseries to determine it’s reporting status for the rest of the time series
Merge the two datasets (observed reports and expected reports, i.e. reporting rate numerator and denominator)
Compute the reporting rate
mon_rep_vars <- routine_data |>
# generate first date of reporting
dplyr::mutate(
date_obj = as.Date(date_obj),
# create a reporting variable condition on whether suspected or presumed was reported
rep_var = if_else(!is.na(susp) | !is.na(pres), 1L, 0L)
) |>
group_by(hf_uid) |>
mutate(
first_month_reported_date = suppressWarnings(min(date_obj[rep_var == 1], na.rm = TRUE)),
first_month_reported_date = if_else(
is.infinite(first_month_reported_date),
as.Date(NA),
first_month_reported_date
),
rep_expected = if_else(
!is.na(first_month_reported_date) & date_obj >= first_month_reported_date,
1L,
0L
)
) |>
ungroup()
# Now we define a variable for observed reporting for N1, summarize at adm3 and calculate reporting rate
mon_rep_rate <- mon_rep_vars |>
# create a variable if N1 was reported
dplyr::mutate(
N1_rep = if_else(!is.na(conf_tpr), 1L, 0L)
) |>
# aggregate at adm3 level
dplyr::group_by(adm0, adm1, adm2, adm3, year, month) |>
# calculate observed and expected reports
dplyr::summarise(
exp_rep = sum(rep_expected, na.rm = TRUE),
obs_rep = sum(N1_rep, na.rm = TRUE
)) |>
# calculate reporting rate
dplyr::mutate(reprate = obs_rep/exp_rep
)
# check of reporting rate is greater than 1
mon_rep_rate |>
dplyr::filter(reprate > 1)
# view data
knitr::kable(head(mon_rep_rate, 15))To adapt the code:
Step 3: Join incidence data with Reporting rate data
We start with joining the file created under reporting rate to the incidence file we have been working with. Reporting rates are usually summarized at nearest operational admin level above health facilities by month-year. Here we use adm3 for illustration but countries can adapt to their setting.
Note: it is highly recommended that first and second adjusted incidence cases are calculated by month.
Step 3.2: Map reporting rate
Step 4: Calculate adjusted incidence (N2)
step 4.1: Calculate monthly adjusted incidence (N2) cases
This involves adjusting for reporting rates
Next we calculate for adjusted cases by accounting for reporting rates at adm3 level. We account for reporting rates by multiplying the adjusted 1 cases by the proportion of non-reporting i.e 1-reporting rate. The result is the additional number of cases should all facilities have reported
Step 4.2: Calculate annual adjusted incidence 2
For the purposes of SNT annual incidence estimates are more useful to compare between years and admin levels
## Aggregate the dataset by year
adj2_inc_ann <- inc_data |>
dplyr::group_by(adm0, adm1, adm2, adm3, year) |>
dplyr::summarise(
across(c(susp:conf_tpr, adjcases1, adjinc1, adjcases2, adjinc2), sum, na.rm=TRUE),
across(c(pop, test_rate, tpr, reprate), mean, na.rm = TRUE)
) |>
ungroup()
head(adj2_inc_ann)
# calculate annual crude incidence
adj2_inc_ann <- adj2_inc_ann |>
dplyr::mutate(
ann_crude = crudeinc * 1000,
ann_adjinc1 = adjinc1 * 1000,
ann_adjinc2 = adjinc2 * 1000)
# visualize the first observations of the data set
head(adj2_inc_ann, 10)Step 5: Save Updated Files
Now we save our incidence data as a csv file
# Save routine data
rio::export(
routine_data,
file =
here("english/library/stratification", 'epidemiological/data_r', 'routine_data_hf.csv')
)
## Save Monthly Incidence Data Set
rio::export(
inc_data,
file =
here("english/library/stratification", 'epidemiological/data_r', "monthly_inc_data.csv"
))
# Save annual incidence data set
rio::export(
adj2_inc_ann,
file =
here("english/library/stratification", 'epidemiological/data_r', "annual_inc_data.csv"
))Summary
TBD