Coverage for aixcalibuha/sensitivity_analysis/sensitivity_analyzer.py: 94%

407 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-01-27 10:48 +0000

1"""Package containing modules for sensitivity analysis. 

2The module contains the relevant base-classes.""" 

3import abc 

4import copy 

5import os 

6import pathlib 

7import multiprocessing as mp 

8from typing import List 

9from collections import Counter 

10import numpy as np 

11import pandas as pd 

12from ebcpy.utils import setup_logger 

13from ebcpy.utils.reproduction import CopyFile 

14from ebcpy.simulationapi import SimulationAPI 

15from aixcalibuha import CalibrationClass, data_types 

16from aixcalibuha import utils 

17from aixcalibuha.sensitivity_analysis.plotting import plot_single, plot_time_dependent 

18 

19 

20def _load_single_file(_filepath, parquet_engine='pyarrow'): 

21 """Helper function""" 

22 if _filepath is None: 

23 return None 

24 return data_types.TimeSeriesData(_filepath, default_tag='sim', key='simulation', 

25 engine=parquet_engine) 

26 

27 

28def _load_files(_filepaths, parquet_engine='pyarrow'): 

29 """Helper function""" 

30 results = [] 

31 for _filepath in _filepaths: 

32 results.append(_load_single_file(_filepath, parquet_engine=parquet_engine)) 

33 return results 

34 

35 

36def _restruct_verbose(list_output_verbose): 

37 """Helper function""" 

38 output_verbose = {} 

39 for key, val in list_output_verbose[0].items(): 

40 output_verbose[key] = np.array([]) 

41 for i in list_output_verbose: 

42 for key, val in i.items(): 

43 output_verbose[key] = np.append(output_verbose[key], np.array([val[1]])) 

44 return output_verbose 

45 

46 

47def _concat_all_sims(sim_results_list): 

48 """Helper function that concat all results in a list to one DataFrame.""" 

49 sim_results_list = [r.to_df() for r in sim_results_list] 

50 sim_results_list = pd.concat(sim_results_list, keys=range(len(sim_results_list)), 

51 axis='columns') 

52 sim_results_list = sim_results_list.swaplevel(axis=1).sort_index(axis=1) 

53 return sim_results_list 

54 

55 

56def _restruct_time_dependent(sen_time_dependent_list, time_index): 

57 """Helper function that restructures the time dependent sensitivity results.""" 

58 

59 def _restruct_single(sen_time_dependent_list_s, second_order=False): 

60 sen_time_dependent_df = pd.concat(sen_time_dependent_list_s, keys=time_index, axis=0) 

61 sen_time_dependent_df = sen_time_dependent_df.droplevel('Class', axis='index') 

62 sen_time_dependent_df = sen_time_dependent_df.swaplevel(0, 1) 

63 sen_time_dependent_df = sen_time_dependent_df.swaplevel(1, 2).sort_index(axis=0) 

64 if second_order: 

65 sen_time_dependent_df = sen_time_dependent_df.swaplevel(2, 3).sort_index(axis=0) 

66 sen_time_dependent_df.index.set_names( 

67 ['Goal', 'Analysis variable', 'Interaction', 'time'], inplace=True) 

68 else: 

69 sen_time_dependent_df.index.set_names(['Goal', 'Analysis variable', 'time'], 

70 inplace=True) 

71 return sen_time_dependent_df 

72 

73 if isinstance(sen_time_dependent_list[0], tuple): 

74 sen_time_dependent_list1, sen_time_dependent_list2 = zip(*sen_time_dependent_list) 

75 return _restruct_single(sen_time_dependent_list1), _restruct_single( 

76 sen_time_dependent_list2, True) 

77 return _restruct_single(sen_time_dependent_list) 

78 

79 

80def _divide_chunks(long_list, chunk_length): 

81 """Helper function that divides all list into multiple list with a specific chunk length.""" 

82 for i in range(0, len(long_list), chunk_length): 

83 yield long_list[i:i + chunk_length] 

84 

85 

86class SenAnalyzer(abc.ABC): 

87 """ 

88 Class to perform a Sensitivity Analysis. 

89 

90 :param SimulationAPI sim_api: 

91 Simulation-API used to simulate the samples 

92 :param int num_samples: 

93 The parameter `N` to the sampler methods of sobol and morris. NOTE: This is not the 

94 number of samples produced, but relates to the total number of samples produced in 

95 a manner dependent on the sampler method used. See the documentation of the specific  

96 method in the SALib for more information. 

97 :keyword str,os.path.normpath cd: 

98 The path for the current working directory. 

99 Logger and results will be stored here. 

100 :keyword boolean fail_on_error: 

101 Default is False. If True, the calibration will stop with an error if 

102 the simulation fails. See also: ``ret_val_on_error`` 

103 :keyword float,np.NAN ret_val_on_error: 

104 Default is np.NAN. If ``fail_on_error`` is false, you can specify here 

105 which value to return in the case of a failed simulation. Possible 

106 options are np.NaN, np.inf or some other high numbers. be aware that this 

107 max influence the solver. 

108 :keyword boolean save_files: 

109 Default False. If true, all simulation files for each iteration will be saved! 

110 :keyword str suffix_files: 

111 Default 'csv'. Specifies the data format to store the simulation files in. 

112 Options are 'csv', 'hdf', 'parquet'. 

113 :keyword str parquet_engine: 

114 The engine to use for the data format parquet. 

115 Supported options can be extracted 

116 from the ebcpy.TimeSeriesData.save() function. 

117 Default is 'pyarrow'. 

118 :keyword str,os.path.normpath savepath_sim: 

119 Default is cd. Own directory for the time series data sets of all simulations 

120 during the sensitivity analysis. The own dir can be necessary for large data sets, 

121 because they can crash IDE during indexing when they are in the project folder. 

122 

123 """ 

124 

125 def __init__(self, 

126 sim_api: SimulationAPI, 

127 num_samples: int, 

128 **kwargs): 

129 """Instantiate class parameters""" 

130 # Setup the instance attributes 

131 self.sim_api = sim_api 

132 self.num_samples = num_samples 

133 

134 # Update kwargs 

135 self.fail_on_error = kwargs.pop("fail_on_error", True) 

136 self.save_files = kwargs.pop("save_files", False) 

137 self.suffix_files = kwargs.pop('suffix_files', 'csv') 

138 self.parquet_engine = kwargs.pop('parquet_engine', 'pyarrow') 

139 self.ret_val_on_error = kwargs.pop("ret_val_on_error", np.NAN) 

140 self.cd = kwargs.pop("cd", os.getcwd()) 

141 self.savepath_sim = kwargs.pop('savepath_sim', self.cd) 

142 

143 if isinstance(self.cd, str): 

144 self.cd = pathlib.Path(self.cd) 

145 if isinstance(self.savepath_sim, str): 

146 self.savepath_sim = pathlib.Path(self.savepath_sim) 

147 

148 # Setup the logger 

149 self.logger = setup_logger(cd=self.cd, name=self.__class__.__name__) 

150 

151 # Setup default values 

152 self.problem: dict = None 

153 self.reproduction_files = [] 

154 

155 @property 

156 @abc.abstractmethod 

157 def analysis_variables(self) -> List[str]: 

158 """ 

159 Indicate which variables are 

160 able to be selected for analysis 

161 

162 :return: 

163 A list of strings 

164 :rtype: List[str] 

165 """ 

166 raise NotImplementedError(f'{self.__class__.__name__}.analysis_variables ' 

167 f'property is not defined yet') 

168 

169 @abc.abstractmethod 

170 def analysis_function(self, x, y): 

171 """ 

172 Use the method to analyze the simulation results. 

173 

174 :param np.array x: 

175 the `X` parameter of the method (The NumPy matrix containing the model inputs) 

176 :param np.array y: 

177 The NumPy array containing the model outputs 

178 """ 

179 raise NotImplementedError(f'{self.__class__.__name__}.analysis_function ' 

180 f'function is not defined yet') 

181 

182 @abc.abstractmethod 

183 def create_sampler_demand(self) -> dict: 

184 """ 

185 Return the sampler parameters 

186 

187 :return: 

188 dict: A dict with the sampler demand 

189 """ 

190 raise NotImplementedError(f'{self.__class__.__name__}.analysis_function ' 

191 f'function is not defined yet') 

192 

193 @abc.abstractmethod 

194 def generate_samples(self): 

195 """ 

196 Run the sampler specified by `method` and return the results. 

197 

198 :return: 

199 The list of samples generated as a NumPy array with one row per sample 

200 and each row containing one value for each variable name in `problem['names']`. 

201 :rtype: np.ndarray 

202 """ 

203 raise NotImplementedError(f'{self.__class__.__name__}.generate_samples ' 

204 f'function is not defined yet') 

205 

206 def simulate_samples(self, cal_class, **kwargs): 

207 """ 

208 Creates the samples for the calibration class and simulates them. 

209 

210 :param cal_class: 

211 One class for calibration. Goals and tuner_paras have to be set 

212 :keyword scale: 

213 Default is False. If True the bounds of the tuner-parameters 

214 will be scaled between 0 and 1. 

215 

216 :return: 

217 Returns two lists. First a list with the simulation results for each sample. 

218 If save_files the list contains the filepaths to the results 

219 Second a list of the samples. 

220 :rtype: list 

221 """ 

222 scale = kwargs.pop('scale', False) 

223 # Set the output interval according the given Goals 

224 mean_freq = cal_class.goals.get_meas_frequency() 

225 self.logger.info("Setting output_interval of simulation according " 

226 "to measurement target data frequency: %s", mean_freq) 

227 self.sim_api.sim_setup.output_interval = mean_freq 

228 initial_names = cal_class.tuner_paras.get_names() 

229 self.sim_api.set_sim_setup({"start_time": cal_class.start_time, 

230 "stop_time": cal_class.stop_time}) 

231 self.sim_api.result_names = cal_class.goals.get_sim_var_names() 

232 

233 self.problem = self.create_problem(cal_class.tuner_paras, scale=scale) 

234 samples = self.generate_samples() 

235 

236 # creat df of samples with the result_file_names as the index 

237 result_file_names = [f"simulation_{idx}" for idx in range(len(samples))] 

238 samples_df = pd.DataFrame(samples, columns=initial_names, index=result_file_names) 

239 samples_df.to_csv(self.cd.joinpath(f'samples_{cal_class.name}.csv')) 

240 

241 # Simulate the current values 

242 parameters = [] 

243 for initial_values in samples: 

244 if scale: 

245 initial_values = cal_class.tuner_paras.descale(initial_values) 

246 parameters.append(dict(zip(initial_names, initial_values))) 

247 

248 self.logger.info('Starting %s parameter variations on %s cores', 

249 len(samples), self.sim_api.n_cpu) 

250 if self.save_files: 

251 sim_dir = self.savepath_sim.joinpath(f'simulations_{cal_class.name}') 

252 os.makedirs(sim_dir, exist_ok=True) 

253 samples_df.to_csv(self.savepath_sim.joinpath(f'samples_{cal_class.name}.csv')) 

254 self.logger.info(f'Saving simulation files in: {sim_dir}') 

255 _filepaths = self.sim_api.simulate( 

256 parameters=parameters, 

257 return_option="savepath", 

258 savepath=sim_dir, 

259 result_file_name=result_file_names, 

260 result_file_suffix=self.suffix_files, 

261 parquet_engine=self.parquet_engine, 

262 fail_on_error=self.fail_on_error, 

263 inputs=cal_class.inputs, 

264 **cal_class.input_kwargs 

265 ) 

266 self.reproduction_files.extend(_filepaths) 

267 results = _filepaths 

268 else: 

269 results = self.sim_api.simulate( 

270 parameters=parameters, 

271 inputs=cal_class.inputs, 

272 fail_on_error=self.fail_on_error, 

273 **cal_class.input_kwargs 

274 ) 

275 self.logger.info('Finished %s simulations', len(samples)) 

276 return results, samples 

277 

278 def _check_index(self, tsd: data_types.TimeSeriesData, sim_num=None): 

279 freq = tsd.frequency 

280 if sim_num is None: 

281 sim_num = tsd.filepath.name 

282 if freq[0] != self.sim_api.sim_setup.output_interval: 

283 self.logger.info( 

284 f'The mean value of the frequency from {sim_num} does not match output ' 

285 'interval index will be cleaned and spaced equally') 

286 tsd.to_datetime_index() 

287 tsd.clean_and_space_equally(f'{str(self.sim_api.sim_setup.output_interval * 1000)}ms') 

288 tsd.to_float_index() 

289 freq = tsd.frequency 

290 if freq[1] > 0.0: 

291 self.logger.info(f'The standard deviation of the frequency from {sim_num} is to high ' 

292 f'and will be rounded to the accuracy of the output interval') 

293 tsd.index = np.round(tsd.index.astype("float64"), 

294 str(self.sim_api.sim_setup.output_interval)[::-1].find('.')) 

295 return tsd 

296 

297 def _single_eval_statistical_measure(self, kwargs_eval): 

298 """Evaluates statistical measure of one result""" 

299 cal_class = kwargs_eval.pop('cal_class') 

300 result = kwargs_eval.pop('result') 

301 num_sim = kwargs_eval.pop('sim_num', None) 

302 if result is None: 

303 verbose_error = {} 

304 for goal, weight in zip(cal_class.goals.get_goals_list(), cal_class.goals.weightings): 

305 verbose_error[goal] = (weight, self.ret_val_on_error) 

306 return self.ret_val_on_error, verbose_error 

307 result = self._check_index(result, num_sim) 

308 cal_class.goals.set_sim_target_data(result) 

309 cal_class.goals.set_relevant_time_intervals(cal_class.relevant_intervals) 

310 # Evaluate the current objective 

311 total_res, verbose_calculation = cal_class.goals.eval_difference(verbose=True) 

312 return total_res, verbose_calculation 

313 

314 def eval_statistical_measure(self, cal_class, results, verbose=True): 

315 """Evaluates statistical measures of results on single core""" 

316 self.logger.info('Starting evaluation of statistical measure') 

317 output = [] 

318 list_output_verbose = [] 

319 for i, result in enumerate(results): 

320 total_res, verbose_calculation = self._single_eval_statistical_measure( 

321 {'cal_class': cal_class, 'result': result, 'sim_num': f'simulation_{i}'} 

322 ) 

323 output.append(total_res) 

324 list_output_verbose.append(verbose_calculation) 

325 if verbose: 

326 # restructure output_verbose 

327 output_verbose = _restruct_verbose(list_output_verbose) 

328 return np.asarray(output), output_verbose 

329 return np.asarray(output) 

330 

331 def _single_load_eval_file(self, kwargs_load_eval): 

332 """For multiprocessing""" 

333 filepath = kwargs_load_eval.pop('filepath') 

334 _result = _load_single_file(filepath, self.parquet_engine) 

335 kwargs_load_eval.update({'result': _result}) 

336 total_res, verbose_calculation = self._single_eval_statistical_measure(kwargs_load_eval) 

337 return total_res, verbose_calculation 

338 

339 def _mp_load_eval(self, _filepaths, cal_class, n_cpu): 

340 """ 

341 Loading and evaluating the statistical measure of saved simulation files on multiple cores 

342 """ 

343 self.logger.info(f'Load files and evaluate statistical measure on {n_cpu} processes.') 

344 kwargs_load_eval = [] 

345 for filepath in _filepaths: 

346 kwargs_load_eval.append({'filepath': filepath, 'cal_class': cal_class}) 

347 output_array = [] 

348 list_output_verbose = [] 

349 with mp.Pool(processes=n_cpu) as pool: 

350 for total, verbose in pool.imap(self._single_load_eval_file, kwargs_load_eval): 

351 output_array.append(total) 

352 list_output_verbose.append(verbose) 

353 output_array = np.asarray(output_array) 

354 output_verbose = _restruct_verbose(list_output_verbose) 

355 return output_array, output_verbose 

356 

357 def _load_eval(self, _filepaths, cal_class, n_cpu): 

358 """ 

359 Loading and evaluating the statistical measure of saved simulation files. 

360 Single- or multiprocessing possible with definition of n_cpu. 

361 """ 

362 if n_cpu == 1: 

363 results = _load_files(_filepaths, self.parquet_engine) 

364 output_array, output_verbose = self.eval_statistical_measure( 

365 cal_class=cal_class, 

366 results=results 

367 ) 

368 return output_array, output_verbose 

369 output_array, output_verbose = self._mp_load_eval(_filepaths, cal_class, n_cpu) 

370 return output_array, output_verbose 

371 

372 def run(self, calibration_classes, merge_multiple_classes=True, **kwargs): 

373 """ 

374 Execute the sensitivity analysis for each class and 

375 return the result. 

376 

377 :param CalibrationClass,list calibration_classes: 

378 Either one or multiple classes for calibration with same tuner-parameters. 

379 :param bool merge_multiple_classes: 

380 Default True. If False, the given list of calibration-classes 

381 is handled as-is. This means if you pass two CalibrationClass objects 

382 with the same name (e.g. "device on"), the calibration process will run 

383 for both these classes stand-alone. 

384 This will automatically yield an intersection of tuner-parameters, however may 

385 have advantages in some cases. 

386 :keyword bool verbose: 

387 Default False. If True, in addition to the combined Goals of the Classes 

388 (saved under index Goal: all), the sensitivity measures of the individual 

389 Goals will also be calculated and returned. 

390 :keyword scale: 

391 Default is False. If True the bounds of the tuner-parameters 

392 will be scaled between 0 and 1. 

393 :keyword bool use_fist_sim: 

394 Default False. If True, the simulations of the first calibration class will be used for 

395 all other calibration classes with their relevant time intervals. 

396 The simulations must be stored on a hard-drive, so it must be used with 

397 either save_files or load_files. 

398 :keyword int n_cpu: 

399 Default is 1. The number of processes to use for the evaluation of the statistical 

400 measure. For n_cpu > 1 only one simulation file is loaded at once in a process and 

401 dumped directly after the evaluation of the statistical measure, 

402 so that only minimal memory is used. 

403 Use this option for large analyses. 

404 Only implemented for save_files=True or load_sim_files=True. 

405 :keyword bool load_sim_files: 

406 Default False. If True, no new simulations are done and old simulations are loaded. 

407 The simulations and corresponding samples will be loaded from self.savepath_sim like 

408 they were saved from self.save_files. Currently, the name of the sim folder must be 

409 "simulations_CAL_CLASS_NAME" and for the samples "samples_CAL_CLASS_NAME". 

410 The usage of the same simulations for different 

411 calibration classes is not supported yet. 

412 :keyword bool save_results: 

413 Default True. If True, all results are saved as a csv in cd. 

414 (samples, statistical measures and analysis variables). 

415 :keyword bool plot_result: 

416 Default True. If True, the results will be plotted. 

417 :return: 

418 Returns a pandas.DataFrame. The DataFrame has a Multiindex with the 

419 levels Class, Goal and Analysis variable. The Goal name of combined goals is 'all'. 

420 The variables are the tuner-parameters. 

421 For the Sobol Method and calc_second_order returns a tuple of DataFrames (df_1, df_2) 

422 where df_2 contains the second oder analysis variables and has an extra index level 

423 Interaction, which also contains the variables. 

424 :rtype: pandas.DataFrame 

425 """ 

426 verbose = kwargs.pop('verbose', False) 

427 scale = kwargs.pop('scale', False) 

428 use_first_sim = kwargs.pop('use_first_sim', False) 

429 n_cpu = kwargs.pop('n_cpu', 1) 

430 save_results = kwargs.pop('save_results', True) 

431 plot_result = kwargs.pop('plot_result', True) 

432 load_sim_files = kwargs.pop('load_sim_files', False) 

433 # Check correct input 

434 calibration_classes = utils.validate_cal_class_input(calibration_classes) 

435 # Merge the classes for avoiding possible intersection of tuner-parameters 

436 if merge_multiple_classes: 

437 calibration_classes = data_types.merge_calibration_classes(calibration_classes) 

438 

439 # Check n_cpu 

440 if n_cpu > mp.cpu_count(): 

441 raise ValueError(f"Given n_cpu '{n_cpu}' is greater " 

442 "than the available number of " 

443 f"cpus on your machine '{mp.cpu_count()}'") 

444 

445 # Check if the usage of the simulations from the first calibration class for all is possible 

446 if use_first_sim: 

447 if not self.save_files and not load_sim_files: 

448 raise AttributeError('To use the simulations of the first calibration class ' 

449 'for all classes the simulation files must be saved. ' 

450 'Either set save_files=True or load already exiting files ' 

451 'with load_sim_files=True.') 

452 start_time = 0 

453 stop_time = 0 

454 for idx, cal_class in enumerate(calibration_classes): 

455 if idx == 0: 

456 start_time = cal_class.start_time 

457 stop_time = cal_class.stop_time 

458 continue 

459 if start_time > cal_class.start_time or stop_time < cal_class.stop_time: 

460 raise ValueError(f'To use the simulations of the first calibration class ' 

461 f'for all classes the start and stop times of the other ' 

462 f'classes must be in the interval [{start_time}, {stop_time}] ' 

463 f'of the first calibration class.') 

464 

465 all_results = [] 

466 for idx, cal_class in enumerate(calibration_classes): 

467 

468 self.logger.info('Start sensitivity analysis of class: %s, ' 

469 'Time-Interval: %s-%s s', cal_class.name, 

470 cal_class.start_time, cal_class.stop_time) 

471 

472 # Generate list with metrics of every parameter variation 

473 results_goals = {} 

474 if load_sim_files: 

475 self.problem = self.create_problem(cal_class.tuner_paras, scale=scale) 

476 if use_first_sim: 

477 class_name = calibration_classes[0].name 

478 else: 

479 class_name = cal_class.name 

480 sim_dir = self.savepath_sim.joinpath(f'simulations_{class_name}') 

481 samples_path = self.savepath_sim.joinpath(f'samples_{class_name}.csv') 

482 self.logger.info(f'Loading samples from {samples_path}') 

483 samples = pd.read_csv(samples_path, 

484 header=0, 

485 index_col=0) 

486 samples = samples.to_numpy() 

487 result_file_names = [f"simulation_{idx}.{self.suffix_files}" for idx in 

488 range(len(samples))] 

489 _filepaths = [sim_dir.joinpath(result_file_name) for result_file_name in 

490 result_file_names] 

491 self.logger.info(f'Loading simulation files from {sim_dir}') 

492 output_array, output_verbose = self._load_eval(_filepaths, cal_class, n_cpu) 

493 else: 

494 results, samples = self.simulate_samples( 

495 cal_class=cal_class, 

496 scale=scale 

497 ) 

498 if self.save_files: 

499 output_array, output_verbose = self._load_eval(results, cal_class, n_cpu) 

500 else: 

501 output_array, output_verbose = self.eval_statistical_measure( 

502 cal_class=cal_class, 

503 results=results 

504 ) 

505 if use_first_sim: 

506 load_sim_files = True 

507 

508 # combine output_array and output_verbose 

509 # set key for output_array depending on one or multiple goals 

510 stat_mea = {'all': output_array} 

511 if len(output_verbose) == 1: 

512 stat_mea = output_verbose 

513 if len(output_verbose) > 1 and verbose: 

514 stat_mea.update(output_verbose) 

515 

516 # save statistical measure and corresponding samples for each cal_class in cd 

517 if save_results: 

518 result_file_names = [f"simulation_{idx}" for idx in range(len(output_array))] 

519 stat_mea_df = pd.DataFrame(stat_mea, index=result_file_names) 

520 savepath_stat_mea = self.cd.joinpath( 

521 f'{cal_class.goals.statistical_measure}_{cal_class.name}.csv') 

522 stat_mea_df.to_csv(savepath_stat_mea) 

523 self.reproduction_files.append(savepath_stat_mea) 

524 samples_df = pd.DataFrame(samples, columns=cal_class.tuner_paras.get_names(), 

525 index=result_file_names) 

526 savepath_samples = self.cd.joinpath(f'samples_{cal_class.name}.csv') 

527 samples_df.to_csv(savepath_samples) 

528 self.reproduction_files.append(savepath_samples) 

529 

530 self.logger.info('Starting calculation of analysis variables') 

531 for key, val in stat_mea.items(): 

532 result_goal = self.analysis_function( 

533 x=samples, 

534 y=val 

535 ) 

536 results_goals[key] = result_goal 

537 all_results.append(results_goals) 

538 self.logger.info('Finished sensitivity analysis of class: %s, ' 

539 'Time-Interval: %s-%s s', cal_class.name, 

540 cal_class.start_time, cal_class.stop_time) 

541 result = self._conv_local_results(results=all_results, 

542 local_classes=calibration_classes) 

543 if save_results: 

544 self._save(result) 

545 if plot_result: 

546 self.plot(result) 

547 return result, calibration_classes 

548 

549 def _save(self, result: pd.DataFrame, time_dependent: bool = False): 

550 """ 

551 Saves the result DataFrame of run and run_time_dependent. 

552 Needs to be overwritten for Sobol results. 

553 """ 

554 if time_dependent: 

555 savepath_result = self.cd.joinpath(f'{self.__class__.__name__}_results_time.csv') 

556 else: 

557 savepath_result = self.cd.joinpath(f'{self.__class__.__name__}_results.csv') 

558 result.to_csv(savepath_result) 

559 self.reproduction_files.append(savepath_result) 

560 

561 @staticmethod 

562 def create_problem(tuner_paras, scale=False) -> dict: 

563 """Create function for later access if multiple calibration-classes are used.""" 

564 num_vars = len(tuner_paras.get_names()) 

565 bounds = np.array(tuner_paras.get_bounds()) 

566 if scale: 

567 bounds = [np.zeros_like(bounds[0]), np.ones_like(bounds[1])] 

568 problem = {'num_vars': num_vars, 

569 'names': tuner_paras.get_names(), 

570 'bounds': np.transpose(bounds)} 

571 return problem 

572 

573 @staticmethod 

574 def select_by_threshold(calibration_classes, result, analysis_variable, threshold): 

575 """ 

576 Automatically select sensitive tuner parameters based on a given threshold 

577 of a given analysis variable from a sensitivity result. 

578 Uses only the combined goals. 

579 

580 :param list calibration_classes: 

581 List of aixcalibuha.data_types.CalibrationClass objects that you want to 

582 automatically select sensitive tuner-parameters. 

583 :param pd.DataFrame result: 

584 Result object of sensitivity analysis run 

585 :param str analysis_variable: 

586 Analysis variable to use for the selection 

587 :param float threshold: 

588 Minimal required value of given key 

589 :return: list calibration_classes 

590 """ 

591 for cal_class in calibration_classes: 

592 first_goal = result.index.get_level_values(1)[0] 

593 class_result = result.loc[cal_class.name, first_goal, analysis_variable] 

594 tuner_paras = copy.deepcopy(cal_class.tuner_paras) 

595 select_names = class_result[class_result < threshold].index.values 

596 tuner_paras.remove_names(select_names) 

597 if not tuner_paras.get_names(): 

598 raise ValueError( 

599 'Automatic selection removed all tuner parameter ' 

600 f'from class {cal_class.name} after ' 

601 'SensitivityAnalysis was done. Please adjust the ' 

602 'threshold in json or manually chose tuner ' 

603 'parameters for the calibration.') 

604 # cal_class.set_tuner_paras(tuner_paras) 

605 cal_class.tuner_paras = tuner_paras 

606 return calibration_classes 

607 

608 @staticmethod 

609 def select_by_threshold_verbose(calibration_class: CalibrationClass, 

610 result: pd.DataFrame, 

611 analysis_variable: str, 

612 threshold: float, 

613 calc_names_for_selection: List[str] = None): 

614 """ 

615 Select tuner-parameters of single calibration class with verbose sensitivity results. 

616 This function selects tuner-parameters if their sensitivity is equal or greater 

617 than the threshold in just one target value of one calibration class in the 

618 sensitivity result. This can be more robust because a small sensitivity in one target 

619 value and state of the system can mean that the parameter can also be calibrated in 

620 a global calibration class which calibrates multiple states and target values at 

621 the same time and has there not directly the same sensitivity as in the isolated 

622 view of a calibration class for only one state. 

623 

624 :param CalibrationClass calibration_class: 

625 The calibration class from which the tuner parameters will be selected. 

626 :param pd.DataFrame result: 

627 Sensitivity results to use for the selection. Can include multiple classes. 

628 :param str analysis_variable: 

629 The analysis variable to use for the selection. 

630 :param float threshold: 

631 Minimal required value of given analysis variable. 

632 :param List[str] calc_names_for_selection: 

633 Specifies which calibration classes in the sensitivity results will be used for 

634 the selection. Default are all classes. 

635 """ 

636 if Counter(calibration_class.tuner_paras.get_names()) != Counter(list(result.columns)): 

637 raise NameError("The tuner-parameter of the calibration class do not " 

638 "match the tuner-parameters in the sensitivity result." 

639 "They have to match.") 

640 

641 result = result.loc[:, :, analysis_variable] 

642 calc_names_results = result.index.get_level_values("Class").unique() 

643 if calc_names_for_selection: 

644 for cal_class in calc_names_for_selection: 

645 if cal_class not in calc_names_results: 

646 raise NameError(f"The calibration class name {cal_class} " 

647 f"does not match any class name " 

648 f"in the given sensitivity result.") 

649 result = result.loc[calc_names_for_selection, :, :] 

650 

651 selected_tuners = (result >= threshold).any() 

652 

653 remove_tuners = [] 

654 for tuner, selected in selected_tuners.items(): 

655 if not selected: 

656 remove_tuners.append(tuner) 

657 tuner_paras = copy.deepcopy(calibration_class.tuner_paras) 

658 tuner_paras.remove_names(remove_tuners) 

659 if not tuner_paras.get_names(): 

660 raise ValueError("Threshold to small. All tuner-parameters would be removed.") 

661 calibration_class.tuner_paras = tuner_paras 

662 return calibration_class 

663 

664 def run_time_dependent(self, cal_class: CalibrationClass, **kwargs): 

665 """ 

666 Calculate the time dependent sensitivity for all the single goals in the calibration class. 

667 

668 :param CalibrationClass cal_class: 

669 Calibration class with tuner-parameters to calculate sensitivity for. 

670 Can include dummy target date. 

671 :keyword scale: 

672 Default is False. If True the bounds of the tuner-parameters 

673 will be scaled between 0 and 1. 

674 :keyword bool load_sim_files: 

675 Default False. If True, no new simulations are done and old simulations are loaded. 

676 The simulations and corresponding samples will be loaded from self.savepath_sim like 

677 they were saved from self.save_files. Currently, the name of the sim folder must be 

678 "simulations_CAL_CLASS_NAME" and for the samples "samples_CAL_CLASS_NAME". 

679 :keyword bool save_results: 

680 Default True. If True, all results are saved as a csv in cd. 

681 (samples and analysis variables). 

682 :keyword int n_steps: 

683 Default is all time steps. If the problem is large, the evaluation of all time steps 

684 at once can cause a memory error. Then n_steps defines how many time_steps 

685 are evaluated at once in chunks. This increases the needed time exponentially and 

686 the simulation files must be saved. 

687 :keyword bool plot_result: 

688 Default True. If True, the results will be plotted. 

689 :return: 

690 Returns a pandas.DataFrame. 

691 :rtype: pandas.DataFrame 

692 """ 

693 scale = kwargs.pop('scale', False) 

694 save_results = kwargs.pop('save_results', True) 

695 plot_result = kwargs.pop('plot_result', True) 

696 load_sim_files = kwargs.pop('load_sim_files', False) 

697 n_steps = kwargs.pop('n_steps', 'all') 

698 

699 self.logger.info("Start time dependent sensitivity analysis.") 

700 if load_sim_files: 

701 self.problem = self.create_problem(cal_class.tuner_paras, scale=scale) 

702 sim_dir = self.savepath_sim.joinpath(f'simulations_{cal_class.name}') 

703 samples_path = self.savepath_sim.joinpath(f'samples_{cal_class.name}.csv') 

704 samples = pd.read_csv(samples_path, 

705 header=0, 

706 index_col=0) 

707 samples = samples.to_numpy() 

708 result_file_names = [f"simulation_{idx}.{self.suffix_files}" for idx in 

709 range(len(samples))] 

710 _filepaths = [sim_dir.joinpath(result_file_name) for result_file_name in 

711 result_file_names] 

712 

713 sen_time_dependent_list, time_index = self._load_analyze_tsteps(_filepaths=_filepaths, 

714 samples=samples, 

715 n_steps=n_steps, 

716 cal_class=cal_class) 

717 sen_time_dependent_df = _restruct_time_dependent(sen_time_dependent_list, time_index) 

718 else: 

719 results, samples = self.simulate_samples( 

720 cal_class=cal_class, 

721 scale=scale 

722 ) 

723 if self.save_files: 

724 sen_time_dependent_list, time_index = self._load_analyze_tsteps(_filepaths=results, 

725 samples=samples, 

726 n_steps=n_steps, 

727 cal_class=cal_class) 

728 sen_time_dependent_df = _restruct_time_dependent(sen_time_dependent_list, 

729 time_index) 

730 else: 

731 variables = results[0].get_variable_names() 

732 time_index = results[0].index.to_numpy() 

733 total_result = _concat_all_sims(results) 

734 sen_time_dependent_list = [] 

735 for time_step in time_index: 

736 result_df_tstep = self._analyze_tstep_df(time_step=time_step, 

737 tsteps_sim_results=total_result, 

738 variables=variables, 

739 samples=samples, 

740 cal_class=cal_class) 

741 sen_time_dependent_list.append(result_df_tstep) 

742 sen_time_dependent_df = _restruct_time_dependent(sen_time_dependent_list, 

743 time_index) 

744 self.logger.info("Finished time dependent sensitivity analysys.") 

745 if save_results: 

746 self._save(sen_time_dependent_df, time_dependent=True) 

747 if plot_result: 

748 if isinstance(sen_time_dependent_df, pd.DataFrame): 

749 plot_time_dependent(sen_time_dependent_df) 

750 else: 

751 plot_time_dependent(sen_time_dependent_df[0]) 

752 return sen_time_dependent_df 

753 

754 def _analyze_tstep_df(self, time_step, tsteps_sim_results, variables, samples, cal_class): 

755 """Analyze the sensitivity at a single time step.""" 

756 result_dict_tstep = {} 

757 for var in variables: 

758 result_tstep_var = tsteps_sim_results[var].loc[time_step].to_numpy() 

759 if np.all(result_tstep_var == result_tstep_var[0]): 

760 sen_tstep_var = None 

761 else: 

762 sen_tstep_var = self.analysis_function( 

763 x=samples, 

764 y=result_tstep_var 

765 ) 

766 result_dict_tstep[var] = sen_tstep_var 

767 result_df_tstep = self._conv_local_results(results=[result_dict_tstep], 

768 local_classes=[cal_class]) 

769 return result_df_tstep 

770 

771 def _load_tsteps_df(self, tsteps, _filepaths): 

772 """ 

773 Load all simulations and extract and concat the sim results of the time steps in tsteps. 

774 """ 

775 self.logger.info( 

776 f"Loading time steps from {tsteps[0]} to {tsteps[-1]} of the simulation files.") 

777 tsteps_sim_results = [] 

778 for _filepath in _filepaths: 

779 sim = _load_single_file(_filepath) 

780 tsteps_sim_results.append(sim.loc[tsteps[0]:tsteps[-1]]) 

781 tsteps_sim_results = _concat_all_sims(tsteps_sim_results) 

782 return tsteps_sim_results 

783 

784 def _load_analyze_tsteps(self, _filepaths, samples, n_steps, cal_class): 

785 """ 

786 Load and analyze all time steps in chunks with n_steps time steps. 

787 """ 

788 sim1 = _load_single_file(_filepaths[0]) 

789 time_index = sim1.index.to_numpy() 

790 variables = sim1.get_variable_names() 

791 sen_time_dependent_list = [] 

792 if n_steps == 'all': 

793 list_tsteps = [time_index] 

794 elif isinstance(n_steps, int) and not (n_steps <= 0 or n_steps > len(time_index)): 

795 list_tsteps = _divide_chunks(time_index, n_steps) 

796 else: 

797 raise ValueError( 

798 f"n_steps can only be between 1 and {len(time_index)} or the string all.") 

799 

800 for tsteps in list_tsteps: 

801 tsteps_sim_results = self._load_tsteps_df(tsteps=tsteps, _filepaths=_filepaths) 

802 self.logger.info("Analyzing these time steps.") 

803 for tstep in tsteps: 

804 result_df_tstep = self._analyze_tstep_df(time_step=tstep, 

805 tsteps_sim_results=tsteps_sim_results, 

806 variables=variables, 

807 samples=samples, 

808 cal_class=cal_class) 

809 sen_time_dependent_list.append(result_df_tstep) 

810 return sen_time_dependent_list, time_index 

811 

812 def _conv_global_result(self, result: dict, cal_class: CalibrationClass, 

813 analysis_variable: str): 

814 glo_res_dict = self._get_res_dict(result=result, cal_class=cal_class, 

815 analysis_variable=analysis_variable) 

816 return pd.DataFrame(glo_res_dict, index=['global']) 

817 

818 def _conv_local_results(self, results: list, local_classes: list): 

819 """ 

820 Convert the result dictionaries form SALib of each class and goal into one DataFrame. 

821 Overwritten for Sobol. 

822 """ 

823 _conv_results = [] 

824 tuples = [] 

825 for class_results, local_class in zip(results, local_classes): 

826 for goal, goal_results in class_results.items(): 

827 for analysis_var in self.analysis_variables: 

828 _conv_results.append(self._get_res_dict(result=goal_results, 

829 cal_class=local_class, 

830 analysis_variable=analysis_var)) 

831 tuples.append((local_class.name, goal, analysis_var)) 

832 index = pd.MultiIndex.from_tuples(tuples=tuples, 

833 names=['Class', 'Goal', 'Analysis variable']) 

834 df = pd.DataFrame(_conv_results, index=index) 

835 return df 

836 

837 @abc.abstractmethod 

838 def _get_res_dict(self, result: dict, cal_class: CalibrationClass, analysis_variable: str): 

839 """ 

840 Convert the result object to a dict with the key 

841 being the variable name and the value being the result 

842 associated to analysis_variable. 

843 """ 

844 raise NotImplementedError 

845 

846 def plot(self, result): 

847 """ 

848 Plot the results of the sensitivity analysis method from run(). 

849 

850 :param pd.DataFrame result: 

851 Dataframe of the results like from the run() function. 

852 :return tuple of matplotlib objects (fig, ax): 

853 """ 

854 plot_single(result=result) 

855 

856 @staticmethod 

857 def load_from_csv(path): 

858 """ 

859 Load sensitivity results which were saved with the run() or run_time_dependent() function. 

860 

861 For second order results use the load_second_order_from_csv() function of the SobolAnalyzer. 

862 """ 

863 result = pd.read_csv(path, index_col=[0, 1, 2]) 

864 return result 

865 

866 def save_for_reproduction(self, 

867 title: str, 

868 path: pathlib.Path = None, 

869 files: list = None, 

870 exclude_sim_files: bool = False, 

871 remove_saved_files: bool = False, 

872 **kwargs): 

873 """ 

874 Save the settings of the SenAnalyzer and SimApi in order to 

875 reproduce the simulations and sensitivity analysis method. 

876 All saved results will be also saved in the reproduction 

877 archive. The simulations can be excluded from saving. 

878 

879 :param str title: 

880 Title of the study 

881 :param pathlib.Path path: 

882 Where to store the .zip file. If not given, self.cd is used. 

883 :param list files: 

884 List of files to save along the standard ones. 

885 Examples would be plots, tables etc. 

886 :param bool exclude_sim_files: 

887 Default False. If True, the simulation files will not be saved in 

888 the reproduction archive. 

889 :param bool remove_saved_files: 

890 Default False. If True, the result and simulation files will be moved 

891 instead of just copied. 

892 :param dict kwargs: 

893 All keyword arguments except title, files, and path of the function 

894 `save_reproduction_archive`. Most importantly, `log_message` may be 

895 specified to avoid input during execution. 

896 """ 

897 if files is None: 

898 files = [] 

899 

900 for file_path in self.reproduction_files: 

901 if exclude_sim_files: 

902 if 'simulation' in str(file_path): 

903 continue 

904 filename = "SenAnalyzer" + str(file_path).rsplit(self.cd.name, maxsplit=1)[-1] 

905 files.append(CopyFile( 

906 sourcepath=file_path, 

907 filename=filename, 

908 remove=remove_saved_files 

909 )) 

910 

911 return self.sim_api.save_for_reproduction( 

912 title=title, 

913 path=path, 

914 files=files, 

915 **kwargs 

916 )