Coverage for ebcpy/utils/reproduction.py: 76%
145 statements
« prev ^ index » next coverage.py v7.4.4, created at 2025-01-04 08:02 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2025-01-04 08:02 +0000
1"""
2This module contains scripts to extract information
3out of simulation / programming based research and
4enable a reproduction of the results at a later stage.
5"""
6import json
7import pathlib
8import sys
9import platform
10import os
11import logging
12from typing import List, Tuple, Union
13import zipfile
14from datetime import datetime
15from dataclasses import dataclass
16from importlib.metadata import distributions
18logger = logging.getLogger(__name__)
21@dataclass
22class ReproductionFile:
23 """
24 Data-class for a text-file which will be written to te zip.
26 Arguments:
27 filename: str
28 Name of the file in the zip. Can be a relative path.
29 content: str
30 Content of the text file
31 """
32 filename: str
33 content: str
36@dataclass
37class CopyFile:
38 """
39 Data-class for information on a file
40 which will be copied to the zip
42 :param str filename:
43 Name of the file in the zip. Can be a relative path.
44 :param pathlib.Path sourcepath:
45 Path on the current machine where the file to copy
46 is located
47 :param bool remove:
48 If True, the file will be moved instead of just copied.
49 """
50 filename: str
51 sourcepath: pathlib.Path
52 remove: bool
55def save_reproduction_archive(
56 title: str = None,
57 path: Union[pathlib.Path, str] = None,
58 log_message: str = None,
59 files: List[Union[ReproductionFile, CopyFile]] = None,
60 file: Union[pathlib.Path, str] = None,
61 search_on_pypi: bool = False
62):
63 """
64 Function to save a reproduction archive which contains
65 files to reproduce any simulation/software based study.
67 :param str title:
68 Title of the study
69 :param pathlib.Path path:
70 Where to store the .zip file. If not given, os.getcwd() is used.
71 :param str log_message:
72 Specific message for this run of the study. If given,
73 you are not asked at the end of your script to give the
74 log_message.
75 :param list files:
76 List of files to save along the standard ones.
77 Examples would be plots, tables etc.
78 :param pathlib.Path file:
79 The file which is used to run.
80 Default is __file__ of __main__ module
81 :param bool search_on_pypi:
82 If True, all python packages which are
83 not a git-repo are checked for availability on pypi
84 Default is False. Does not work if no internet connection
85 is available.
86 """
87 _py_requirements_name = "python/requirements.txt"
88 if path is None:
89 path = os.getcwd()
90 if file is None:
91 file = pathlib.Path(sys.modules['__main__'].__file__).absolute()
92 if title is None:
93 title = file.name.replace(".py", "")
94 path = pathlib.Path(path)
95 os.makedirs(path, exist_ok=True)
96 if files is None:
97 files = []
98 current_time = datetime.now().strftime('%Y%m%d_%H%M%S')
100 # Start with the file currently running:
101 file_running = pathlib.Path(file).absolute()
102 files.append(ReproductionFile(
103 filename=file_running.name,
104 content=file_running.read_text()
105 ))
106 # Check if it's a git-repo:
107 for _dir_path in [file_running] + list(file_running.parents):
108 repo_info = get_git_information(
109 path=_dir_path,
110 zip_folder_path="study_repository"
111 )
112 if repo_info is not None: # That means it's a repo
113 files.extend(repo_info.pop("difference_files", []))
114 files.append(ReproductionFile(
115 filename="study_repository/repo_info.txt",
116 content=json.dumps(repo_info, indent=2)
117 ))
118 break
119 # Get log
120 if log_message is None:
121 log_message = input("Please enter the specifications / log for this study: ")
122 if not log_message:
123 log_message = "The user was to lazy to pass any useful information on " \
124 "what made this research study different to others."
126 with open(path.joinpath(f"Study_Log_{title}.txt"), "a+") as f:
127 f.write(f"{current_time}: {log_message}\n")
129 # General info
130 files.append(ReproductionFile(
131 filename="Information_to_reproduce.txt",
132 content=_get_general_information(
133 title=title,
134 log_message=log_message,
135 current_time=current_time
136 )
137 ))
139 # Python-Reproduction:
140 py_requirements_content, diff_files, pip_version = _get_python_package_information(
141 search_on_pypi=search_on_pypi
142 )
143 files.append(ReproductionFile(
144 filename=_py_requirements_name,
145 content=py_requirements_content,
146 ))
147 files.extend(diff_files)
149 py_repro = _get_python_reproduction(
150 title=title,
151 pip_version=pip_version
152 )
153 files.append(ReproductionFile(
154 filename="python/Reproduce_python_environment.txt",
155 content=py_repro,
156 ))
158 zip_file_name = path.joinpath(
159 f"{current_time}_{title}.zip"
160 )
161 with zipfile.ZipFile(zip_file_name, "w", zipfile.ZIP_DEFLATED) as zip_file:
162 # Save all result files:
163 for file in files:
164 if isinstance(file, str):
165 if os.path.exists(file):
166 zip_file.write(file, f"Results/{pathlib.Path(file).name}")
167 else:
168 logger.error("Given file '%s' is a string but "
169 "not an existing file. Skipping...", file)
170 elif isinstance(file, ReproductionFile):
171 zip_file.writestr(file.filename, file.content)
172 elif isinstance(file, CopyFile):
173 zip_file.write(file.sourcepath, file.filename)
174 if file.remove:
175 try:
176 os.remove(file.sourcepath)
177 except PermissionError:
178 logger.error(f"Could not remove {file.sourcepath}")
179 else:
180 raise TypeError(
181 f"Given file '{file}' has no "
182 f"valid type. Type is '{type(file)}'")
183 return zip_file_name
186def get_git_information(
187 path: pathlib.Path,
188 name: str = None,
189 zip_folder_path: str = None
190):
191 """
192 Function to get the git information for a given path.
194 :param pathlib.Path path:
195 Path to possible git repo
196 :param str name:
197 Name of the repo.
198 If not given, the name in the URL will be used.
199 :param str zip_folder_path:
200 If given, the PATH of the difference_files for the .zip
201 will be zip_folder_path plus WARNING_GIT_DIFFERENCE...
203 Returns:
204 If the path is not a git repository, this function returns None.
205 Else, a dictionary with the keys 'url', 'commit' and 'difference_files'.
206 """
207 try:
208 from git import Repo, InvalidGitRepositoryError, RemoteReference
209 except ImportError as err:
210 raise ImportError(
211 "Could not save data for reproduction, install GitPython using "
212 "`pip install GitPython`: " + str(err)
213 )
214 try:
215 repo = Repo(path)
216 except InvalidGitRepositoryError:
217 return
218 commit = repo.head.commit
219 commit_hex = commit.hexsha
220 diff_last_cmt = repo.git.diff(commit)
221 diff_remote_main = ""
222 remote_main_cmt = ""
223 for ref in repo.references:
224 if isinstance(ref, RemoteReference) and ref.name in ['origin/master', 'origin/main']:
225 diff_remote_main = repo.git.diff(ref.commit)
226 remote_main_cmt = ref.commit.hexsha
227 break
228 data = {
229 "url": next(repo.remotes[0].urls),
230 "commit": commit_hex,
231 "difference_files": []
232 }
234 if name is None:
235 # Get last part of url
236 name = data["url"].split("/")[-1].replace(".git", "")
237 if zip_folder_path is None:
238 zip_folder_path = ""
239 else:
240 zip_folder_path += "/"
241 # Check new files
242 if diff_last_cmt:
243 data["difference_files"].append(ReproductionFile(
244 filename=f"{zip_folder_path}WARNING_GIT_DIFFERENCE_{name}_to_local_head.txt",
245 content=diff_last_cmt,
246 ))
247 # Check if pushed to remote
248 if not repo.git.branch("-r", contains=commit_hex):
249 data["difference_files"].append(ReproductionFile(
250 filename=f"{zip_folder_path}WARNING_GIT_DIFFERENCE_{name}_to_remote_main.txt",
251 content=diff_remote_main,
252 ))
253 data["commit"] = remote_main_cmt
254 return data
257def creat_copy_files_from_dir(foldername: str,
258 sourcepath: pathlib.Path,
259 remove: bool = False):
260 """
261 Creates a list with CopyFiles for each file in a directory
262 where which will be saved in the zip under the foldername
263 with all subdirectories.
265 :param str foldername:
266 Name of the folder in the zip. Can be a relative path.
267 :param pathlib.Path sourcepath:
268 Path on the current machine where the directory to copy
269 is located
270 :param bool remove:
271 Default is False. If True, the files in the directory
272 will be moved instead of just copied.
274 :return list:
275 Returns a list with CopyFiles for each file in the directory source path.
276 """
277 files = []
278 for dirpath, dirnames, filenames in os.walk(sourcepath):
279 for file in filenames:
280 filename = foldername + dirpath.__str__().split(sourcepath.name)[-1] + '/' + file
281 files.append(CopyFile(
282 sourcepath=os.path.join(dirpath, file),
283 filename=filename,
284 remove=remove
285 ))
286 return files
289def _get_general_information(title: str, log_message: str, current_time: str):
290 """
291 Function to save the general information of the study.
292 Time, machine information, and an intro on how to reproduce
293 the study is given.
294 """
296 info_header = f"""This folder contains information necessary to reproduce the python based research study named '{title}'.
297Reason the user performed this study:
298"%s"
300To reproduce, make sure you have installed the following programs:
301- Anaconda
302- Dymola (If a folder named Dymola exists in this zip)
304Run the lines in the file 'python/reproduce_python_environment.txt' in a shell with the PATH variable pointing to anaconda (or in Anaconda Prompt).
305After execution, make sure to check for any differences in git-based python code.
306These files are included in this folder and are named e.g. "WARNING_GIT_DIFFERENCE_some_package".
307If this happens, make sure to change the files in the git-based python packages after installation.
308For future use, be sure to commit and push your changes before running any research study.
309""" % log_message
310 _data = {
311 "Time": current_time,
312 "Author": os.getlogin(),
313 "Machine": platform.machine(),
314 "Version": platform.version(),
315 "Platform": platform.platform(),
316 "System": platform.system(),
317 "Processor": platform.processor(),
318 }
319 _content_lines = [
320 info_header + "\n",
321 "General system information of performed study:",
322 ] + [f"{k}: {v}" for k, v in _data.items()]
323 return "\n".join(_content_lines)
326def get_installed_packages() -> List[dict]:
327 """
328 Returns a list of tuples containing (package_name, version, location)
329 for all installed Python packages.
330 """
331 packages = []
332 for dist in distributions():
333 packages.append(dict(
334 name=dist.metadata['Name'],
335 version=dist.version,
336 location=os.path.normpath(str(dist.locate_file('')))
337 ))
338 return packages
341def _get_python_package_information(search_on_pypi: bool):
342 """
343 Function to get the content of python packages installed
344 as a requirement.txt format content.
345 """
346 installed_packages = get_installed_packages()
347 diff_paths = []
348 requirement_txt_content = []
349 pip_version = ""
350 for package in installed_packages:
351 repo_info = get_git_information(
352 path=package["location"],
353 name=package["name"],
354 zip_folder_path="python"
355 )
356 if repo_info is None:
357 # Check if in python path:
358 if package["name"] == "pip": # exclude pip in requirements and give info to _get_python_reproduction
359 pip_version = f'=={package["version"]}'
360 else:
361 requirement_txt_content.append(
362 f'{package["name"]}=={package["version"]}'
363 )
364 if search_on_pypi:
365 from pypisearch.search import Search
366 res = Search(package["name"]).result
367 if not res:
368 raise ModuleNotFoundError(
369 "Package '%s' is neither a git "
370 "repo nor a package on pypi. "
371 "Won't be able to reproduce it!",
372 package["name"]
373 )
374 else:
375 cmt_sha = repo_info["commit"]
376 requirement_txt_content.append(
377 f"git+{repo_info['url']}.git@{cmt_sha}#egg={package['name']}"
378 )
379 diff_paths.extend(repo_info["difference_files"])
380 return "\n".join(requirement_txt_content), diff_paths, pip_version
383def _get_python_reproduction(title: str, pip_version: str):
384 """
385 Get the content of a script to reproduce the python
386 environment used for the study.
387 """
388 _v = sys.version_info
389 py_version = ".".join([str(_v.major), str(_v.minor), str(_v.micro)])
390 env_name = f"py_{title}"
391 py_reproduce_content = [
392 f"conda create -n {env_name} python={py_version} -y",
393 f"conda activate {env_name}",
394 f"python -m pip install pip{pip_version}",
395 f"pip install -r requirements.txt",
396 ]
397 return "\n".join(py_reproduce_content)
400if __name__ == '__main__':
401 save_reproduction_archive(
402 title="my_study",
403 path=r"D:\00_temp\reproduction",
404 )