596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719 | def FiveDayAgebinAnalyzer(jdir, wdir, sweep_variables, age_groups_aggregates=None, cc=None):
"""
Run the FiveDayAgebinAnalyzer for OpenMalaria experiments to generate
and save a results dataframe for defined age groups over time (5-day intervals and years).
This function processes simulation outputs, aggregating malaria-related metrics every five days
and across specified age bins and sweep variables. It outputs metrics like prevalence, clinical
incidence, and severe incidence per age group. The processed data is saved to a CSV file.
Args:
jdir (str): Path to the directory containing the scenario data file `scenarios_wseeds.csv`.
wdir (str): Path to the working directory, where the experiment data (`daily.csv`) is stored and output is saved.
sweep_variables (list of str): List of column names used for grouping in the aggregation.
age_groups_aggregates (list of lists, optional): List of age ranges to aggregate in the form of
[[min_age, max_age], ...]. Defaults to standard age bins if not provided.
cc (bool, optional): If True, an additional climate change data file `mmmpy_ccstep_daily.csv` will be generated.
Raises:
FileNotFoundError: If required files (`daily.csv`, `EIR_daily.csv`, or `scenarios_wseeds.csv`) are missing.
ValueError: If expected data columns are not found in the input files.
Saves:
mmmpy_5day.csv
mmmpy_ccstep_daily.csv (if `cc=True`).
Returns:
None
"""
print("Running FiveDayAgebinAnalyzer...", flush=True)
if not age_groups_aggregates:
age_groups_aggregates = [[0, 0.5], [0.5, 1], [1, 2], [2, 5], [5, 10], [10, 15], [15, 20], [20, 100], [0, 5],
[0, 100]]
# Read the EIR data from the CSV file
eir_df = pd.read_csv(os.path.join(wdir, 'EIR_daily.csv'))
eir_df = eir_df.groupby(sweep_variables + ['year', 'timestep'])[
['simulatedEIR', 'output_target', 'n_infectious_mos', 'n_total_mos_pop']].agg(
np.mean).reset_index() # mean across runs per timestep , month and year
eir_df = eir_df[['index', 'timestep', 'year', 'simulatedEIR', 'n_infectious_mos', 'n_total_mos_pop']]
# Convert the experiment data to a DataFrame and format it
channels_to_keep = ['index', 'timestep', 'year', 'ageGroup', 'age_upper',
'n_inc', 'n_inc_clinical', 'n_inc_severe', 'n', 'nPopulation', 'prevalence_2to10', 'prev']
df = pd.read_csv(os.path.join(wdir, 'daily.csv'), usecols=channels_to_keep)
# df = df[channels_to_keep]
df['agebin'] = round(df['age_upper'] / 365, 1)
df['n_prev'] = df['prev'] * df['n']
df['day'] = (df['timestep'] - 1) % 365 + 1
df['5day'] = df['day'].apply(lambda x: 5 * math.ceil(x / 5))
df = df.groupby(sweep_variables + ['agebin', '5day', 'year']).agg({'n_prev': ['sum'],
'n_inc': ['sum'],
'n_inc_clinical': ['sum'],
'n_inc_severe': ['sum'],
'n': ['sum'],
'nPopulation': ['mean'],
'prevalence_2to10': ['mean'],
'prev': ['mean']}).reset_index()
df.columns = df.columns.get_level_values(0)
df = df.rename(columns={'5day': 'day'})
## EIR
eir_df['day'] = (eir_df['timestep'] - 1) % 365 + 1
eir_df['5day'] = eir_df['day'].apply(lambda x: 5 * math.ceil(x / 5))
eir_df = eir_df.groupby(sweep_variables + ['5day', 'year']).agg(
{'simulatedEIR': ['sum'], 'n_infectious_mos': ['mean'],
'n_total_mos_pop': ['mean'], 'timestep': ['max']}).reset_index()
eir_df.columns = eir_df.columns.get_level_values(0)
eir_df = eir_df.rename(columns={'5day': 'day'})
df_pfpr2to10 = df.groupby(sweep_variables + ['day', 'year'])[['prevalence_2to10']].agg(
np.mean).reset_index()
df = df[sweep_variables + ['agebin', 'day', 'year', 'n_prev', 'n_inc', 'n_inc_clinical', 'n_inc_severe', 'n']]
df['n'] = df['n'] / (365 / 73)
df['n_prev'] = df['n_prev'] / (365 / 73)
cdf = pd.DataFrame()
# Loop over age groups to aggregate results data
for i in range(0, len(age_groups_aggregates)):
ages = age_groups_aggregates[i]
ageCond_labels = f'{str(ages[0])}-{str(ages[1])}'
adf = df[(df.agebin > ages[0]) & (df.agebin <= ages[1])]
if adf.empty:
pass
else:
## Aggregate by age group
adf = adf.groupby(sweep_variables + ['day', 'year'])[
['n_prev', 'n_inc', 'n_inc_clinical', 'n_inc_severe', 'n']].agg(np.sum).reset_index()
adf['prevalence'] = adf['n_prev'] / (adf['n'])
# events per person per annum (annualized)
# adf['incidence'] = (adf['n_inc'] / adf['n']/ 12)
adf['clinical_incidence'] = (adf['n_inc_clinical'] / (adf['n'] / 73)) ## 5-Daily to annualized incidence
adf['severe_incidence'] = (adf['n_inc_severe'] / (adf['n'] / 73))
adf['ageGroup'] = ageCond_labels
cdf = pd.concat([cdf, adf])
cdf = pd.merge(left=cdf, right=df_pfpr2to10, on=sweep_variables + ['day', 'year'])
scen_df = pd.read_csv(os.path.join(jdir, 'scenarios_wseeds.csv'))
scen_df = scen_df.drop(['entomology_mode'],
axis=1, errors='ignore') ## remove EMOD specific columns
cdf = cdf.merge(scen_df, on=sweep_variables, how='inner')
cdf = cdf.merge(eir_df, on=['index', 'day', 'year'], how='inner')
# Severe Incidence Recalculation
cdf['severe_incidence'] = cdf['severe_incidence'] * (0.5 * cdf['cm_severe'] + (1 - cdf['cm_severe']))
# Rename columns for alignment with OpenMalaria results
cdf['mortality'] = ''
cdf = cdf.rename({'n': 'nHost'}, axis=1)
cdf = cdf.drop(columns=['n_prev', 'n_inc', 'n_inc_clinical', 'n_inc_severe'])
# cdf['date'] = cdf.apply(lambda x: datetime.date(int(x['year']), 1, 1) + datetime.timedelta(days=int(x['day']) - 1), axis=1)
print(f'\nSaving outputs to: {wdir}')
# Save the processed DataFrame to a CSV file
cdf.to_csv((os.path.join(wdir, 'mmmpy_5day.csv')), index=False)
if cc:
cdf = cdf[['scen_id', 'seed', 'timestep', 'cm_clinical', 'seasonality', 'output_target', 'cc_change',
'cc_title', 'ageGroup', 'timestep', 'simulatedEIR', 'prevalence_2to10', 'prevalence',
'clinical_incidence', 'severe_incidence', 'n_total_mos_pop', 'n_infectious_mos']]
cdf.to_csv((os.path.join(wdir, 'mmmpy_ccstep_daily.csv')), index=False)
|