80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179 | def get_output_df(wdir, modelname, yr=False, mth=False, daily=False, custom_name=None,
save_combined=False):
"""
Load and combine data from the model output files.
This function reads model output files from a specified working directory
and combines the data into a single DataFrame. It supports different data
formats based on the specified parameters for yearly, monthly, or daily
data.
Args:
wdir (str): Working directory where the data files are located.
modelname (str or list of str): Name of models for which result CSVs
should be loaded (case sensitive).
yr (bool, optional): Set to True if the data files have yearly data.
Defaults to False.
mth (bool, optional): Set to True if the data files have monthly data.
Defaults to False.
daily (bool, optional): Set to True if the data files have daily timestep
data. Defaults to False. If both mth and daily
are True, only daily will be processed.
custom_name (str, optional): Custom filename to use instead of the default
based on the time period. Defaults to None.
save_combined (bool, optional): Set to True to save the combined DataFrame
to a CSV file. Defaults to False.
Returns:
tuple: A tuple containing:
- df (DataFrame): Combined DataFrame containing the combined data
for the models listed in modelname.
- wdir (str): Updated working directory (if applicable).
Raises:
ValueError: If an invalid modelname value is specified.
"""
cols_to_keep = None # default read all
fname = 'mmmpy_timeavrg.csv'
if yr:
fname = 'mmmpy_yr.csv'
if mth:
fname = 'mmmpy_mth.csv'
if daily:
fname = 'mmmpy_daily.csv'
# cols_to_keep = ['index', 'timestep', 'ageGroup', 'simulatedEIR', 'prevalence_2to10', 'prevalence',
# 'clinical_incidence', 'severe_incidence', 'seed']
if custom_name:
fname = f'{custom_name}.csv'
file_paths = [os.path.join(wdir, fname)]
for model in modelname:
file_paths.append(os.path.join(wdir, model, fname))
existing_files = [path for path in file_paths if os.path.isfile(path)]
if not existing_files:
return pd.DataFrame(), wdir
if os.path.isfile(os.path.join(wdir, fname)):
df = pd.read_csv((os.path.join(wdir, fname)), low_memory=False)
else:
dfs = []
for model in modelname:
model_path = os.path.join(wdir, model, fname)
try:
if os.path.isfile(model_path):
df = pd.read_csv(model_path, usecols=cols_to_keep)
df['modelname'] = model
if model == 'EMOD':
df['seed'] = df['seed'] + 1
dfs.append(df)
else:
print(f"File not found for {model}: {model_path}")
except Exception as e:
print(f"Error reading {model_path}: {e}")
if not dfs:
return pd.DataFrame(), wdir
df = pd.concat(dfs, ignore_index=True)
if 'ageGroup' in df.columns:
try:
age_grps = sorted(list(df['ageGroup'].unique()), key=custom_sort_key)
except:
age_grps = list(df['ageGroup'].unique())
df['ageGroup'] = df['ageGroup'].astype('category')
df['ageGroup'] = df['ageGroup'].cat.reorder_categories(age_grps)
warning_df = df[df['simulatedEIR'] == 0]
if len(warning_df) > 0 and daily is False: # we don't want to include simulations were eir was 0 or less, because we won't get any outcome measures and that crashes the system
print('Warning: some eirs had simulated EIRS of 0, and were removed')
df = df[df['simulatedEIR'] > 0]
df = df[df['simulatedEIR'].notnull()]
if not daily and save_combined:
df.to_csv(os.path.join(wdir, f'{fname}'), index=False)
return df, wdir
|