Coverage for ebcpy/utils/reproduction.py: 76%
147 statements
« prev ^ index » next coverage.py v7.4.4, created at 2025-08-26 09:12 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2025-08-26 09:12 +0000
1"""
2This module contains scripts to extract information
3out of simulation / programming based research and
4enable a reproduction of the results at a later stage.
5"""
6import json
7import pathlib
8import sys
9import platform
10import os
11import logging
12from typing import List, Tuple, Union
13import zipfile
14from datetime import datetime
15from dataclasses import dataclass
16from importlib.metadata import distributions
18logger = logging.getLogger(__name__)
21@dataclass
22class ReproductionFile:
23 """
24 Data-class for a text-file which will be written to te zip.
26 Arguments:
27 filename: str
28 Name of the file in the zip. Can be a relative path.
29 content: str
30 Content of the text file
31 """
32 filename: str
33 content: Union[str, bytes]
36@dataclass
37class CopyFile:
38 """
39 Data-class for information on a file
40 which will be copied to the zip
42 :param str filename:
43 Name of the file in the zip. Can be a relative path.
44 :param pathlib.Path sourcepath:
45 Path on the current machine where the file to copy
46 is located
47 :param bool remove:
48 If True, the file will be moved instead of just copied.
49 """
50 filename: str
51 sourcepath: pathlib.Path
52 remove: bool
55def save_reproduction_archive(
56 title: str = None,
57 path: Union[pathlib.Path, str] = None,
58 log_message: str = None,
59 files: List[Union[ReproductionFile, CopyFile]] = None,
60 file: Union[pathlib.Path, str] = None,
61 search_on_pypi: bool = False
62):
63 """
64 Function to save a reproduction archive which contains
65 files to reproduce any simulation/software based study.
67 :param str title:
68 Title of the study
69 :param pathlib.Path path:
70 Where to store the .zip file. If not given, os.getcwd() is used.
71 :param str log_message:
72 Specific message for this run of the study. If given,
73 you are not asked at the end of your script to give the
74 log_message.
75 :param list files:
76 List of files to save along the standard ones.
77 Examples would be plots, tables etc.
78 :param pathlib.Path file:
79 The file which is used to run.
80 Default is __file__ of __main__ module
81 :param bool search_on_pypi:
82 If True, all python packages which are
83 not a git-repo are checked for availability on pypi
84 Default is False. Does not work if no internet connection
85 is available.
86 """
87 _py_requirements_name = "python/requirements.txt"
88 if path is None:
89 path = os.getcwd()
90 if file is None:
91 file = pathlib.Path(sys.modules['__main__'].__file__).absolute()
92 if title is None:
93 title = file.name.replace(".py", "")
94 path = pathlib.Path(path)
95 os.makedirs(path, exist_ok=True)
96 if files is None:
97 files = []
98 current_time = datetime.now().strftime('%Y%m%d_%H%M%S')
100 # Start with the file currently running:
101 file_running = pathlib.Path(file).absolute()
102 files.append(ReproductionFile(
103 filename=file_running.name,
104 content=file_running.read_text()
105 ))
106 # Check if it's a git-repo:
107 for _dir_path in [file_running] + list(file_running.parents):
108 repo_info = get_git_information(
109 path=_dir_path,
110 zip_folder_path="study_repository"
111 )
112 if repo_info is not None: # That means it's a repo
113 files.extend(repo_info.pop("difference_files", []))
114 files.append(ReproductionFile(
115 filename="study_repository/repo_info.txt",
116 content=json.dumps(repo_info, indent=2)
117 ))
118 break
119 # Get log
120 if log_message is None:
121 log_message = input("Please enter the specifications / log for this study: ")
122 if not log_message:
123 log_message = "The user was to lazy to pass any useful information on " \
124 "what made this research study different to others."
126 with open(path.joinpath(f"Study_Log_{title}.txt"), "a+") as f:
127 f.write(f"{current_time}: {log_message}\n")
129 # General info
130 files.append(ReproductionFile(
131 filename="Information_to_reproduce.txt",
132 content=_get_general_information(
133 title=title,
134 log_message=log_message,
135 current_time=current_time
136 )
137 ))
139 # Python-Reproduction:
140 py_requirements_content, diff_files, pip_version = _get_python_package_information(
141 search_on_pypi=search_on_pypi
142 )
143 files.append(ReproductionFile(
144 filename=_py_requirements_name,
145 content=py_requirements_content,
146 ))
147 files.extend(diff_files)
149 py_repro = _get_python_reproduction(
150 title=title,
151 pip_version=pip_version
152 )
153 files.append(ReproductionFile(
154 filename="python/Reproduce_python_environment.txt",
155 content=py_repro,
156 ))
158 zip_file_name = path.joinpath(
159 f"{current_time}_{title}.zip"
160 )
161 with zipfile.ZipFile(zip_file_name, "w", zipfile.ZIP_DEFLATED) as zip_file:
162 # Save all result files:
163 for file in files:
164 if isinstance(file, str):
165 if os.path.exists(file):
166 zip_file.write(file, f"Results/{pathlib.Path(file).name}")
167 else:
168 logger.error("Given file '%s' is a string but "
169 "not an existing file. Skipping...", file)
170 elif isinstance(file, ReproductionFile):
171 if isinstance(file.content, str):
172 # Use surrogateescape to make sure all characters survive
173 zip_file.writestr(file.filename, file.content.encode("utf-8", errors="surrogateescape"))
174 else:
175 zip_file.writestr(file.filename, file.content)
176 elif isinstance(file, CopyFile):
177 zip_file.write(file.sourcepath, file.filename)
178 if file.remove:
179 try:
180 os.remove(file.sourcepath)
181 except PermissionError:
182 logger.error(f"Could not remove {file.sourcepath}")
183 else:
184 raise TypeError(
185 f"Given file '{file}' has no "
186 f"valid type. Type is '{type(file)}'")
187 return zip_file_name
190def get_git_information(
191 path: pathlib.Path,
192 name: str = None,
193 zip_folder_path: str = None
194):
195 """
196 Function to get the git information for a given path.
198 :param pathlib.Path path:
199 Path to possible git repo
200 :param str name:
201 Name of the repo.
202 If not given, the name in the URL will be used.
203 :param str zip_folder_path:
204 If given, the PATH of the difference_files for the .zip
205 will be zip_folder_path plus WARNING_GIT_DIFFERENCE...
207 Returns:
208 If the path is not a git repository, this function returns None.
209 Else, a dictionary with the keys 'url', 'commit' and 'difference_files'.
210 """
211 try:
212 from git import Repo, InvalidGitRepositoryError, RemoteReference
213 except ImportError as err:
214 raise ImportError(
215 "Could not save data for reproduction, install GitPython using "
216 "`pip install GitPython`: " + str(err)
217 )
218 try:
219 repo = Repo(path)
220 except InvalidGitRepositoryError:
221 return
222 commit = repo.head.commit
223 commit_hex = commit.hexsha
224 diff_last_cmt = repo.git.diff(commit)
225 diff_remote_main = ""
226 remote_main_cmt = ""
227 for ref in repo.references:
228 if isinstance(ref, RemoteReference) and ref.name in ['origin/master', 'origin/main']:
229 diff_remote_main = repo.git.diff(ref.commit)
230 remote_main_cmt = ref.commit.hexsha
231 break
232 data = {
233 "url": next(repo.remotes[0].urls),
234 "commit": commit_hex,
235 "difference_files": []
236 }
238 if name is None:
239 # Get last part of url
240 name = data["url"].split("/")[-1].replace(".git", "")
241 if zip_folder_path is None:
242 zip_folder_path = ""
243 else:
244 zip_folder_path += "/"
245 # Check new files
246 if diff_last_cmt:
247 data["difference_files"].append(ReproductionFile(
248 filename=f"{zip_folder_path}WARNING_GIT_DIFFERENCE_{name}_to_local_head.txt",
249 content=diff_last_cmt,
250 ))
251 # Check if pushed to remote
252 if not repo.git.branch("-r", contains=commit_hex):
253 data["difference_files"].append(ReproductionFile(
254 filename=f"{zip_folder_path}WARNING_GIT_DIFFERENCE_{name}_to_remote_main.txt",
255 content=diff_remote_main,
256 ))
257 data["commit"] = remote_main_cmt
258 return data
261def creat_copy_files_from_dir(foldername: str,
262 sourcepath: pathlib.Path,
263 remove: bool = False):
264 """
265 Creates a list with CopyFiles for each file in a directory
266 where which will be saved in the zip under the foldername
267 with all subdirectories.
269 :param str foldername:
270 Name of the folder in the zip. Can be a relative path.
271 :param pathlib.Path sourcepath:
272 Path on the current machine where the directory to copy
273 is located
274 :param bool remove:
275 Default is False. If True, the files in the directory
276 will be moved instead of just copied.
278 :return list:
279 Returns a list with CopyFiles for each file in the directory source path.
280 """
281 files = []
282 for dirpath, dirnames, filenames in os.walk(sourcepath):
283 for file in filenames:
284 filename = foldername + dirpath.__str__().split(sourcepath.name)[-1] + '/' + file
285 files.append(CopyFile(
286 sourcepath=os.path.join(dirpath, file),
287 filename=filename,
288 remove=remove
289 ))
290 return files
293def _get_general_information(title: str, log_message: str, current_time: str):
294 """
295 Function to save the general information of the study.
296 Time, machine information, and an intro on how to reproduce
297 the study is given.
298 """
300 info_header = f"""This folder contains information necessary to reproduce the python based research study named '{title}'.
301Reason the user performed this study:
302"%s"
304To reproduce, make sure you have installed the following programs:
305- Anaconda
306- Dymola (If a folder named Dymola exists in this zip)
308Run the lines in the file 'python/reproduce_python_environment.txt' in a shell with the PATH variable pointing to anaconda (or in Anaconda Prompt).
309After execution, make sure to check for any differences in git-based python code.
310These files are included in this folder and are named e.g. "WARNING_GIT_DIFFERENCE_some_package".
311If this happens, make sure to change the files in the git-based python packages after installation.
312For future use, be sure to commit and push your changes before running any research study.
313""" % log_message
314 _data = {
315 "Time": current_time,
316 "Author": os.getlogin(),
317 "Machine": platform.machine(),
318 "Version": platform.version(),
319 "Platform": platform.platform(),
320 "System": platform.system(),
321 "Processor": platform.processor(),
322 }
323 _content_lines = [
324 info_header + "\n",
325 "General system information of performed study:",
326 ] + [f"{k}: {v}" for k, v in _data.items()]
327 return "\n".join(_content_lines)
330def get_installed_packages() -> List[dict]:
331 """
332 Returns a list of tuples containing (package_name, version, location)
333 for all installed Python packages.
334 """
335 packages = []
336 for dist in distributions():
337 packages.append(dict(
338 name=dist.metadata['Name'],
339 version=dist.version,
340 location=os.path.normpath(str(dist.locate_file('')))
341 ))
342 return packages
345def _get_python_package_information(search_on_pypi: bool):
346 """
347 Function to get the content of python packages installed
348 as a requirement.txt format content.
349 """
350 installed_packages = get_installed_packages()
351 diff_paths = []
352 requirement_txt_content = []
353 pip_version = ""
354 for package in installed_packages:
355 repo_info = get_git_information(
356 path=package["location"],
357 name=package["name"],
358 zip_folder_path="python"
359 )
360 if repo_info is None:
361 # Check if in python path:
362 if package["name"] == "pip": # exclude pip in requirements and give info to _get_python_reproduction
363 pip_version = f'=={package["version"]}'
364 else:
365 requirement_txt_content.append(
366 f'{package["name"]}=={package["version"]}'
367 )
368 if search_on_pypi:
369 from pypisearch.search import Search
370 res = Search(package["name"]).result
371 if not res:
372 raise ModuleNotFoundError(
373 "Package '%s' is neither a git "
374 "repo nor a package on pypi. "
375 "Won't be able to reproduce it!",
376 package["name"]
377 )
378 else:
379 cmt_sha = repo_info["commit"]
380 requirement_txt_content.append(
381 f"git+{repo_info['url']}.git@{cmt_sha}#egg={package['name']}"
382 )
383 diff_paths.extend(repo_info["difference_files"])
384 return "\n".join(requirement_txt_content), diff_paths, pip_version
387def _get_python_reproduction(title: str, pip_version: str):
388 """
389 Get the content of a script to reproduce the python
390 environment used for the study.
391 """
392 _v = sys.version_info
393 py_version = ".".join([str(_v.major), str(_v.minor), str(_v.micro)])
394 env_name = f"py_{title}"
395 py_reproduce_content = [
396 f"conda create -n {env_name} python={py_version} -y",
397 f"conda activate {env_name}",
398 f"python -m pip install pip{pip_version}",
399 f"pip install -r requirements.txt",
400 ]
401 return "\n".join(py_reproduce_content)
404if __name__ == '__main__':
405 save_reproduction_archive(
406 title="my_study",
407 path=r"D:\00_temp\reproduction",
408 )