From 51791eff642ae470d84c90bd42957cf0c914cee7 Mon Sep 17 00:00:00 2001 From: evidencebp Date: Fri, 15 Nov 2024 13:23:31 +0200 Subject: [PATCH 01/16] mpcontribs-api\mpcontribs\api\attachments\document.py line-too-long Made a readable line shorter --- mpcontribs-api/mpcontribs/api/attachments/document.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mpcontribs-api/mpcontribs/api/attachments/document.py b/mpcontribs-api/mpcontribs/api/attachments/document.py index c5584a11a..0b7b72b3a 100644 --- a/mpcontribs-api/mpcontribs/api/attachments/document.py +++ b/mpcontribs-api/mpcontribs/api/attachments/document.py @@ -44,7 +44,8 @@ def post_init(cls, sender, document, **kwargs): if "content" in requested_fields: if not document.md5: # document.reload("md5") # TODO AttributeError: _changed_fields - raise ValueError("Please also request md5 field to retrieve attachment content!") + raise ValueError( + "Please also request md5 field to retrieve attachment content!") retr = s3_client.get_object(Bucket=BUCKET, Key=document.md5) document.content = b64encode(retr["Body"].read()).decode("utf-8") From d14dda850a6fe7d850e702469ed2e58a48259858 Mon Sep 17 00:00:00 2001 From: evidencebp Date: Fri, 15 Nov 2024 13:30:47 +0200 Subject: [PATCH 02/16] mpcontribs-api\gunicorn.conf.py line-too-long Was a very long format line with 135 characters which wasn't readable. I reduced it to 107, higher than the recommended 100, while keeping the main part in the same line. --- mpcontribs-api/gunicorn.conf.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mpcontribs-api/gunicorn.conf.py b/mpcontribs-api/gunicorn.conf.py index fc24a8c25..a28f66479 100644 --- a/mpcontribs-api/gunicorn.conf.py +++ b/mpcontribs-api/gunicorn.conf.py @@ -6,9 +6,11 @@ statsd_host = "{}:8125".format(os.getenv("DD_AGENT_HOST")) accesslog = "-" errorlog = "-" -access_log_format = '{}/{}: %(h)s %(t)s %(m)s %(U)s?%(q)s %(H)s %(s)s %(b)s "%(f)s" "%(a)s" %(D)s %(p)s %({{x-consumer-id}}i)s'.format( - os.getenv("SUPERVISOR_GROUP_NAME"), os.getenv("SUPERVISOR_PROCESS_NAME") -) +access_log_format = ( +'{}/{}: %(h)s %(t)s %(m)s %(U)s?%(q)s %(H)s %(s)s %(b)s "%(f)s" "%(a)s" %(D)s %(p)s %({{x-consumer-id}}i)s' + .format(os.getenv("SUPERVISOR_GROUP_NAME"), + os.getenv("SUPERVISOR_PROCESS_NAME")) + ) max_requests = os.getenv("MAX_REQUESTS") max_requests_jitter = os.getenv("MAX_REQUESTS_JITTER") proc_name = os.getenv("SUPERVISOR_PROCESS_NAME") From 188f43fd85bb095fc3612fdd7243f1cbc2915787 Mon Sep 17 00:00:00 2001 From: evidencebp Date: Fri, 15 Nov 2024 13:36:56 +0200 Subject: [PATCH 03/16] mpcontribs-portal\gunicorn.conf.py line-too-long Was a very long format line with 135 characters which wasn't readable. I reduced it to 107, higher than the recommended 100, while keeping the main part in the same line. --- mpcontribs-portal/gunicorn.conf.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mpcontribs-portal/gunicorn.conf.py b/mpcontribs-portal/gunicorn.conf.py index 612305305..e690025a4 100644 --- a/mpcontribs-portal/gunicorn.conf.py +++ b/mpcontribs-portal/gunicorn.conf.py @@ -6,9 +6,11 @@ statsd_host = "{}:8125".format(os.getenv("DD_AGENT_HOST")) accesslog = "-" errorlog = "-" -access_log_format = '{}/{}: %(h)s %(t)s %(m)s %(U)s?%(q)s %(H)s %(s)s %(b)s "%(f)s" "%(a)s" %(D)s %(p)s %({{x-consumer-id}}i)s'.format( - os.getenv("SUPERVISOR_GROUP_NAME"), os.getenv("SUPERVISOR_PROCESS_NAME") -) +access_log_format = ( +'{}/{}: %(h)s %(t)s %(m)s %(U)s?%(q)s %(H)s %(s)s %(b)s "%(f)s" "%(a)s" %(D)s %(p)s %({{x-consumer-id}}i)s' +.format(os.getenv("SUPERVISOR_GROUP_NAME") + , os.getenv("SUPERVISOR_PROCESS_NAME")) + ) max_requests = os.getenv("MAX_REQUESTS") max_requests_jitter = os.getenv("MAX_REQUESTS_JITTER") proc_name = os.getenv("SUPERVISOR_PROCESS_NAME") From 6b351fab07a5f28cd1e0f688023b8e6d3dc546c5 Mon Sep 17 00:00:00 2001 From: evidencebp Date: Fri, 15 Nov 2024 13:43:42 +0200 Subject: [PATCH 04/16] mpcontribs-portal\mpcontribs\users\qmcdb\main\views.py line-too-long Readable line in commented out code --- mpcontribs-portal/mpcontribs/users/qmcdb/main/views.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mpcontribs-portal/mpcontribs/users/qmcdb/main/views.py b/mpcontribs-portal/mpcontribs/users/qmcdb/main/views.py index 4e4c20ac4..aeed2f4ea 100644 --- a/mpcontribs-portal/mpcontribs/users/qmcdb/main/views.py +++ b/mpcontribs-portal/mpcontribs/users/qmcdb/main/views.py @@ -44,7 +44,8 @@ def dash_if_empty(inp): for symbol in elements ], ) - # query = reduce(lambda x, y: x | y, [Q(formula__icontains=symbol) for symbol in elements]) + # query = reduce( + # lambda x, y: x | y, [Q(formula__icontains=symbol) for symbol in elements]) # if spacegroup != None: # query &= Q(primitive_structure__spacegroup=spacegroup) From cf79706c83115f8edb08500c45ad2fe175893977 Mon Sep 17 00:00:00 2001 From: evidencebp Date: Fri, 15 Nov 2024 14:40:49 +0200 Subject: [PATCH 05/16] mpcontribs-api\mpcontribs\api\projects\document.py too-many-nested-blocks The post_save class method had 6 nesting levels while pylint recommends not to have more than 5. I extracted the small and targeted update_columns_by_flat method, with 3 nesting levels. --- mpcontribs-api/mpcontribs/api/projects/document.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/mpcontribs-api/mpcontribs/api/projects/document.py b/mpcontribs-api/mpcontribs/api/projects/document.py index 868574723..4328f0c90 100644 --- a/mpcontribs-api/mpcontribs/api/projects/document.py +++ b/mpcontribs-api/mpcontribs/api/projects/document.py @@ -263,11 +263,7 @@ def post_save(cls, sender, document, **kwargs): remap(merged, visit=visit, enter=enter), reducer="dot" ) - for k, v in flat.items(): - if k.startswith("data."): - columns[k] = Column(path=k) - if v is not None: - columns[k].unit = v + cls.update_columns_by_flat(columns, flat) # start pipeline for stats: match project pipeline = [{"$match": {"project": document.id}}] @@ -366,6 +362,14 @@ def post_save(cls, sender, document, **kwargs): stats = Stats(**stats_kwargs) document.update(stats=stats, columns=columns.values()) + @classmethod + def update_columns_by_flat(cls, columns, flat): + for k, v in flat.items(): + if k.startswith("data."): + columns[k] = Column(path=k) + if v is not None: + columns[k].unit = v + @classmethod def post_delete(cls, sender, document, **kwargs): admin_email = current_app.config["MAIL_DEFAULT_SENDER"] From f6dbc351d86f4360caf38832ff063a24f1396f3c Mon Sep 17 00:00:00 2001 From: evidencebp Date: Fri, 15 Nov 2024 14:49:08 +0200 Subject: [PATCH 06/16] mpcontribs-portal\mpcontribs\users\als_beamline\scripts\__main__.py wildcard-import Wildcard imports make it harder to understand what is imported from where. Removing it is also a defensive programming act, lowering the probability of collisions due to future new imports or objects. --- .../mpcontribs/users/als_beamline/scripts/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mpcontribs-portal/mpcontribs/users/als_beamline/scripts/__main__.py b/mpcontribs-portal/mpcontribs/users/als_beamline/scripts/__main__.py index bcd0122bd..240d262bb 100644 --- a/mpcontribs-portal/mpcontribs/users/als_beamline/scripts/__main__.py +++ b/mpcontribs-portal/mpcontribs/users/als_beamline/scripts/__main__.py @@ -1,6 +1,6 @@ import argparse, os from mpcontribs.io.archieml.mpfile import MPFile -from pre_submission import * +from pre_submission import run parser = argparse.ArgumentParser( description="""generate MPFile from directory of related XAS measurements""" From 784a6c4c6b8708e636abf4a7e59e43db47d1397e Mon Sep 17 00:00:00 2001 From: evidencebp Date: Fri, 15 Nov 2024 15:17:30 +0200 Subject: [PATCH 07/16] mpcontribs-io\mpcontribs\io\archie\mpfile.py too-many-branches The methods get_string and from_string of the class MPFile had 14 branches each. Pylint recommends not to have more than 12 to reduce complexity, ease understanding and testing. I extracted from each method a small local method to reduce the number of branches and increase structuring. --- mpcontribs-io/mpcontribs/io/archie/mpfile.py | 44 ++++++++++++-------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/mpcontribs-io/mpcontribs/io/archie/mpfile.py b/mpcontribs-io/mpcontribs/io/archie/mpfile.py index 1eff4d13c..ceead57d2 100644 --- a/mpcontribs-io/mpcontribs/io/archie/mpfile.py +++ b/mpcontribs-io/mpcontribs/io/archie/mpfile.py @@ -30,12 +30,7 @@ def from_string(data): for key in list(rdct.keys()): is_general, root_key = normalize_root_level(key) - if is_general: - # make part of shared (meta-)data, i.e. nest under `general` at - # the beginning of the MPFile - if mp_level01_titles[0] not in rdct: - rdct[mp_level01_titles[0]] = RecursiveDict() - rdct.move_to_end(mp_level01_titles[0], last=False) + MPFile.handle_rdct(rdct, is_general) # normalize identifier key (pop & insert) # using rec_update since we're looping over all entries @@ -99,24 +94,22 @@ def from_string(data): return MPFile.from_dict(rdct) + @staticmethod + def handle_rdct(rdct, is_general): + if is_general: + # make part of shared (meta-)data, i.e. nest under `general` at + # the beginning of the MPFile + if mp_level01_titles[0] not in rdct: + rdct[mp_level01_titles[0]] = RecursiveDict() + rdct.move_to_end(mp_level01_titles[0], last=False) + def get_string(self, df_head_only=False): from pymatgen.core import Structure lines, scope = [], [] for key, value in self.document.iterate(): if isinstance(value, Table): - lines[-1] = lines[-1].replace("{", "[+").replace("}", "]") - header = any([isinstance(col, str) for col in value]) - if isinstance(value.index, MultiIndex): - value.reset_index(inplace=True) - if df_head_only: - value = value.head() - csv_string = value.to_csv( - index=False, header=header, float_format="%g", encoding="utf-8" - )[:-1] - lines += csv_string.split("\n") - if df_head_only: - lines.append("...") + value = self._handle_table(df_head_only, lines, value) elif isinstance(value, Structure): from pymatgen.io.cif import CifWriter @@ -162,5 +155,20 @@ def get_string(self, df_head_only=False): ) return "\n".join(lines) + "\n" + def _handle_table(self, df_head_only, lines, value): + lines[-1] = lines[-1].replace("{", "[+").replace("}", "]") + header = any([isinstance(col, str) for col in value]) + if isinstance(value.index, MultiIndex): + value.reset_index(inplace=True) + if df_head_only: + value = value.head() + csv_string = value.to_csv( + index=False, header=header, float_format="%g", encoding="utf-8" + )[:-1] + lines += csv_string.split("\n") + if df_head_only: + lines.append("...") + return value + MPFileCore.register(MPFile) From 6e5ad889617e9b51291b922bcad15b025bd093ec Mon Sep 17 00:00:00 2001 From: evidencebp Date: Fri, 15 Nov 2024 15:42:29 +0200 Subject: [PATCH 08/16] Update pre_submission.py Method run had 82 statements while pylint suggest not to have more than 50. I extracted some small local methods, adding structure and making the method shorter. Please note that the end of the method was not reachable (and still so) due to a break, which might be a bug. I added a TODO there to mark that. --- .../users/redox_thermo_csp/pre_submission.py | 106 ++++++++++-------- 1 file changed, 62 insertions(+), 44 deletions(-) diff --git a/mpcontribs-portal/mpcontribs/users/redox_thermo_csp/pre_submission.py b/mpcontribs-portal/mpcontribs/users/redox_thermo_csp/pre_submission.py index 15cfaf4de..9e42f099d 100644 --- a/mpcontribs-portal/mpcontribs/users/redox_thermo_csp/pre_submission.py +++ b/mpcontribs-portal/mpcontribs/users/redox_thermo_csp/pre_submission.py @@ -125,27 +125,8 @@ def run(mpfile, **kwargs): print("could not import solar_perovskite, clone github repo") sys.exit(1) - input_files = mpfile.hdata.general["input_files"] - input_dir = os.path.dirname(solar_perovskite.__file__) - input_file = os.path.join(input_dir, input_files["exp"]) - exp_table = read_csv(open(input_file, "r").read().replace(";", ",")) - print("exp data loaded.") - with open(os.path.join(input_dir, input_files["theo"]), "r") as f: - theo_data = json.loads(f.read()).pop("collection") - print("theo data loaded.") - with open(input_files["energy"], "r") as f: - data = json.load(f).pop("collection") - print("energy data loaded.") - l = [ - dict(sdoc, parameters=doc["_id"]) - for doc in data - for sdoc in doc["energy_analysis"] - ] - frame = pd.DataFrame(l) - parameters = frame["parameters"] - frame.drop(labels=["parameters"], axis=1, inplace=True) - frame.insert(0, "parameters", parameters) - print("energy dataframe:", frame.shape) + exp_table, data = _load_data(mpfile, solar_perovskite) + frame = _build_frame(data) mpfile_singles = [m for m in mpfile.split()] for mpfile_single in mpfile_singles: @@ -159,17 +140,7 @@ def run(mpfile, **kwargs): print(identifier) print("add hdata ...") - d = RecursiveDict() - d["data"] = RecursiveDict() - compstr = hdata["pars"]["theo_compstr"] - row = exp_table.loc[exp_table["theo_compstr"] == compstr] - if not row.empty: - sample_number = int(row.iloc[0]["sample_number"]) - d["pars"] = get_fit_pars(sample_number) - d["data"]["availability"] = "Exp+Theo" - else: - d["pars"] = RecursiveDict() - d["data"]["availability"] = "Theo" + d, compstr, row, sample_number = populate_data_structures(exp_table, hdata) # print('dh_min, dh_max ...') # _, dh_min, dh_max, _ = redenth_act(compstr) # d['pars']['dh_min'] = clean_value(dh_min, max_dgts=4) @@ -215,23 +186,15 @@ def run(mpfile, **kwargs): # + "_" + str(float(red_pr)) + "_" + data_sources + \ # "_" + str(float(enth_steps)) - print("add energy analysis ...") - group = frame.query('compstr.str.contains("{}")'.format(compstr[:-1])) - group.drop(labels="compstr", axis=1, inplace=True) - for prodstr, subgroup in group.groupby(["prodstr", "prodstr_alt"], sort=False): - subgroup.drop(labels=["prodstr", "prodstr_alt"], axis=1, inplace=True) - for unstable, subsubgroup in subgroup.groupby("unstable", sort=False): - subsubgroup.drop(labels="unstable", axis=1, inplace=True) - name = "energy-analysis_{}_{}".format( - "unstable" if unstable else "stable", "-".join(prodstr) - ) - print(name) - mpfile_single.add_data_table(identifier, subsubgroup, name) + _add_energy_analysis(frame, mpfile_single, identifier, compstr) print(mpfile_single) mpfile.concat(mpfile_single) break + # TODO - the code bellow is not reachable due to the break above. + # If it is intended, the code can be removed and make the method shorter, + # Otherwise, it is a bug that should be fixed. if not row.empty: print("add ΔH ...") exp_thermo = GetExpThermo(sample_number, plotting=False) @@ -280,3 +243,58 @@ def run(mpfile, **kwargs): inplace=True, ) mpfile_single.add_data_table(identifier, table, name="raw") + +def populate_data_structures(exp_table, hdata): + d = RecursiveDict() + d["data"] = RecursiveDict() + compstr = hdata["pars"]["theo_compstr"] + row = exp_table.loc[exp_table["theo_compstr"] == compstr] + if not row.empty: + sample_number = int(row.iloc[0]["sample_number"]) + d["pars"] = get_fit_pars(sample_number) + d["data"]["availability"] = "Exp+Theo" + else: + d["pars"] = RecursiveDict() + d["data"]["availability"] = "Theo" + return d,compstr,row,sample_number + +def _add_energy_analysis(frame, mpfile_single, identifier, compstr): + print("add energy analysis ...") + group = frame.query('compstr.str.contains("{}")'.format(compstr[:-1])) + group.drop(labels="compstr", axis=1, inplace=True) + for prodstr, subgroup in group.groupby(["prodstr", "prodstr_alt"], sort=False): + subgroup.drop(labels=["prodstr", "prodstr_alt"], axis=1, inplace=True) + for unstable, subsubgroup in subgroup.groupby("unstable", sort=False): + subsubgroup.drop(labels="unstable", axis=1, inplace=True) + name = "energy-analysis_{}_{}".format( + "unstable" if unstable else "stable", "-".join(prodstr) + ) + print(name) + mpfile_single.add_data_table(identifier, subsubgroup, name) + +def _build_frame(data): + l = [ + dict(sdoc, parameters=doc["_id"]) + for doc in data + for sdoc in doc["energy_analysis"] + ] + frame = pd.DataFrame(l) + parameters = frame["parameters"] + frame.drop(labels=["parameters"], axis=1, inplace=True) + frame.insert(0, "parameters", parameters) + print("energy dataframe:", frame.shape) + return frame + +def _load_data(mpfile, solar_perovskite): + input_files = mpfile.hdata.general["input_files"] + input_dir = os.path.dirname(solar_perovskite.__file__) + input_file = os.path.join(input_dir, input_files["exp"]) + exp_table = read_csv(open(input_file, "r").read().replace(";", ",")) + print("exp data loaded.") + with open(os.path.join(input_dir, input_files["theo"]), "r") as f: + theo_data = json.loads(f.read()).pop("collection") + print("theo data loaded.") + with open(input_files["energy"], "r") as f: + data = json.load(f).pop("collection") + print("energy data loaded.") + return exp_table,data From b56e230a24be52bf342e7076e54aed6de84f88c1 Mon Sep 17 00:00:00 2001 From: evidencebp Date: Fri, 15 Nov 2024 15:57:29 +0200 Subject: [PATCH 09/16] mpcontribs-io\mpcontribs\io\core\mpfile.py too-many-branches The method add_structure of class MPFileCore had 17 branches while pylint recommends to have no more than 12. I extracted a small method _build_structure that builds the structure from the source while using many branches to handle the source possible types. --- mpcontribs-io/mpcontribs/io/core/mpfile.py | 28 ++++++++++++---------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/mpcontribs-io/mpcontribs/io/core/mpfile.py b/mpcontribs-io/mpcontribs/io/core/mpfile.py index 990597f78..1f67ea80d 100644 --- a/mpcontribs-io/mpcontribs/io/core/mpfile.py +++ b/mpcontribs-io/mpcontribs/io/core/mpfile.py @@ -217,18 +217,7 @@ def add_structure(self, source, name=None, identifier=None, fmt=None): from pymatgen.core import Structure from pymatgen.ext.matproj import MPRester - if isinstance(source, Structure): - structure = source - elif isinstance(source, dict): - structure = Structure.from_dict(source) - elif os.path.exists(source): - structure = Structure.from_file(source, sort=True) - elif isinstance(source, six.string_types): - if fmt is None: - raise ValueError("Need fmt to get structure from string!") - structure = Structure.from_str(source, fmt, sort=True) - else: - raise ValueError(source, "not supported!") + structure = self._build_structure(source, fmt) if name is not None: if not isinstance(name, six.string_types): @@ -276,6 +265,21 @@ def add_structure(self, source, name=None, identifier=None, fmt=None): ) return identifier + def _build_structure(self, source, fmt): + if isinstance(source, Structure): + structure = source + elif isinstance(source, dict): + structure = Structure.from_dict(source) + elif os.path.exists(source): + structure = Structure.from_file(source, sort=True) + elif isinstance(source, six.string_types): + if fmt is None: + raise ValueError("Need fmt to get structure from string!") + structure = Structure.from_str(source, fmt, sort=True) + else: + raise ValueError(source, "not supported!") + return structure + def __repr__(self): return self.get_string(df_head_only=True) From bed45b5fa2921eab9423558fcddbef7b8a734d1a Mon Sep 17 00:00:00 2001 From: evidencebp Date: Fri, 15 Nov 2024 16:15:41 +0200 Subject: [PATCH 10/16] mpcontribs-portal\mpcontribs\users\dilute_solute_diffusion\pre_submission.py too-many-statements Function run had 117 statements while pylint recommends not to have more than 50. The function was actually structured since it had different logics for different crystal structures. I extracted small unrelated functions for these logics. --- .../dilute_solute_diffusion/pre_submission.py | 295 ++++++++++-------- 1 file changed, 160 insertions(+), 135 deletions(-) diff --git a/mpcontribs-portal/mpcontribs/users/dilute_solute_diffusion/pre_submission.py b/mpcontribs-portal/mpcontribs/users/dilute_solute_diffusion/pre_submission.py index 045f5bd71..fcfca365e 100644 --- a/mpcontribs-portal/mpcontribs/users/dilute_solute_diffusion/pre_submission.py +++ b/mpcontribs-portal/mpcontribs/users/dilute_solute_diffusion/pre_submission.py @@ -22,30 +22,12 @@ def run(mpfile, hosts=None, download=False): if download or not os.path.exists(fpath): - figshare_id = 1546772 - url = "https://api.figshare.com/v2/articles/{}".format(figshare_id) - print("get figshare article {}".format(figshare_id)) - r = requests.get(url) - figshare = json.loads(r.content) - print("version =", figshare["version"]) # TODO set manually in "other"? - - print("read excel from figshare into DataFrame") - df_dct = None - for d in figshare["files"]: - if "xlsx" in d["name"]: - # Dict of DataFrames is returned, with keys representing sheets - df_dct = read_excel(d["download_url"], sheet_name=None) - break + df_dct = download_data(fpath) + if df_dct is None: print("no excel sheet found on figshare") return - print("save excel to disk") - writer = ExcelWriter(fpath) - for sheet, df in df_dct.items(): - df.to_excel(writer, sheet) - writer.save() - else: df_dct = read_excel(fpath, sheet_name=None) @@ -63,33 +45,14 @@ def run(mpfile, hosts=None, download=False): elif isinstance(hosts, list) and not host in hosts: continue - print("get mp-id for {}".format(host)) - mpid = None - for doc in mpr.query( - criteria={"pretty_formula": host}, properties={"task_id": 1} - ): - if "decomposes_to" not in doc["sbxd"][0]: - mpid = doc["task_id"] - break + mpid = get_mpid(mpr, host) if mpid is None: print("mp-id for {} not found".format(host)) continue print("add host info for {}".format(mpid)) hdata = host_info[host].to_dict(into=RecursiveDict) - for k in list(hdata.keys()): - v = hdata.pop(k) - ks = k.split() - if ks[0] not in hdata: - hdata[ks[0]] = RecursiveDict() - unit = ks[-1][1:-1] if ks[-1].startswith("[") else "" - subkey = "_".join(ks[1:-1] if unit else ks[1:]).split(",")[0] - if subkey == "lattice_constant": - unit = "Å" - try: - hdata[ks[0]][subkey] = clean_value(v, unit.replace("angstrom", "Å")) - except ValueError: - hdata[ks[0]][subkey] = v + _add_host_info(hdata) hdata["formula"] = host df = df_dct["{}-X".format(host)] rows = list(isnull(df).any(1).nonzero()[0]) @@ -100,100 +63,25 @@ def run(mpfile, hosts=None, download=False): df.drop(rows, inplace=True) mpfile.add_hierarchical_data(nest_dict(hdata, ["data"]), identifier=mpid) - print("add table for D₀/Q data for {}".format(mpid)) - df.set_index(df["Solute element number"], inplace=True) - df.drop("Solute element number", axis=1, inplace=True) - df.columns = df.iloc[0] - df.index.name = "index" - df.drop("Solute element name", inplace=True) - df = df.T.reset_index() - if str(host) == "Fe": - df_D0_Q = df[ - [ - "Solute element name", - "Solute D0, paramagnetic [cm^2/s]", - "Solute Q, paramagnetic [eV]", - ] - ] - elif hdata["Host"]["crystal_structure"] == "HCP": - df_D0_Q = df[ - [ - "Solute element name", - "Solute D0 basal [cm^2/s]", - "Solute Q basal [eV]", - ] - ] - else: - df_D0_Q = df[["Solute element name", "Solute D0 [cm^2/s]", "Solute Q [eV]"]] - df_D0_Q.columns = ["Solute", "D₀ [cm²/s]", "Q [eV]"] - anums = [z[el] for el in df_D0_Q["Solute"]] - df_D0_Q.insert(0, "Z", Series(anums, index=df_D0_Q.index)) - df_D0_Q.sort_values("Z", inplace=True) - df_D0_Q.reset_index(drop=True, inplace=True) - mpfile.add_data_table(mpid, df_D0_Q, "D₀_Q") + df = _add_table(mpfile, host, mpid, hdata, df) if hdata["Host"]["crystal_structure"] == "BCC": - print("add table for hop activation barriers for {} (BCC)".format(mpid)) - columns_E = ( - ["Hop activation barrier, E_{} [eV]".format(i) for i in range(2, 5)] - + ["Hop activation barrier, E'_{} [eV]".format(i) for i in range(3, 5)] - + ["Hop activation barrier, E''_{} [eV]".format(i) for i in range(3, 5)] - + ["Hop activation barrier, E_{} [eV]".format(i) for i in range(5, 7)] - ) - df_E = df[["Solute element name"] + columns_E] - df_E.columns = ( - ["Solute"] - + ["E{} [eV]".format(i) for i in ["₂", "₃", "₄"]] - + ["E`{} [eV]".format(i) for i in ["₃", "₄"]] - + ["E``{} [eV]".format(i) for i in ["₃", "₄"]] - + ["E{} [eV]".format(i) for i in ["₅", "₆"]] - ) - mpfile.add_data_table(mpid, df_E, "hop_activation_barriers") - - print("add table for hop attempt frequencies for {} (BCC)".format(mpid)) - columns_v = ( - ["Hop attempt frequency, v_{} [THz]".format(i) for i in range(2, 5)] - + ["Hop attempt frequency, v'_{} [THz]".format(i) for i in range(3, 5)] - + ["Hop attempt frequency, v''_{} [THz]".format(i) for i in range(3, 5)] - + ["Hop attempt frequency, v_{} [THz]".format(i) for i in range(5, 7)] - ) - df_v = df[["Solute element name"] + columns_v] - df_v.columns = ( - ["Solute"] - + ["v{} [THz]".format(i) for i in ["₂", "₃", "₄"]] - + ["v`{} [THz]".format(i) for i in ["₃", "₄"]] - + ["v``{} [THz]".format(i) for i in ["₃", "₄"]] - + ["v{} [THz]".format(i) for i in ["₅", "₆"]] - ) - mpfile.add_data_table(mpid, df_v, "hop_attempt_frequencies") + _handle_BBC(mpfile, mpid, df) elif hdata["Host"]["crystal_structure"] == "FCC": - print("add table for hop activation barriers for {} (FCC)".format(mpid)) - columns_E = [ - "Hop activation barrier, E_{} [eV]".format(i) for i in range(5) - ] - df_E = df[["Solute element name"] + columns_E] - df_E.columns = ["Solute"] + [ - "E{} [eV]".format(i) for i in ["₀", "₁", "₂", "₃", "₄"] - ] - mpfile.add_data_table(mpid, df_E, "hop_activation_barriers") - - print("add table for hop attempt frequencies for {} (FCC)".format(mpid)) - columns_v = [ - "Hop attempt frequency, v_{} [THz]".format(i) for i in range(5) - ] - df_v = df[["Solute element name"] + columns_v] - df_v.columns = ["Solute"] + [ - "v{} [THz]".format(i) for i in ["₀", "₁", "₂", "₃", "₄"] - ] - mpfile.add_data_table(mpid, df_v, "hop_attempt_frequencies") + _handle_FCC(mpfile, mpid, df) elif hdata["Host"]["crystal_structure"] == "HCP": - print("add table for hop activation barriers for {} (HCP)".format(mpid)) - columns_E = [ + _handle_HCP(mpfile, mpid, df) + + print("DONE") + +def _handle_HCP(mpfile, mpid, df): + print("add table for hop activation barriers for {} (HCP)".format(mpid)) + columns_E = [ "Hop activation barrier, E_X [eV]", "Hop activation barrier, E'_X [eV]", "Hop activation barrier, E_a [eV]", @@ -203,8 +91,8 @@ def run(mpfile, hosts=None, download=False): "Hop activation barrier, E_c [eV]", "Hop activation barrier, E'_c [eV]", ] - df_E = df[["Solute element name"] + columns_E] - df_E.columns = ["Solute"] + [ + df_E = df[["Solute element name"] + columns_E] + df_E.columns = ["Solute"] + [ "Eₓ [eV]", "E`ₓ [eV]", "Eₐ [eV]", @@ -214,17 +102,154 @@ def run(mpfile, hosts=None, download=False): "Eꪱ [eV]", "E`ꪱ [eV]", ] - mpfile.add_data_table(mpid, df_E, "hop_activation_barriers") + mpfile.add_data_table(mpid, df_E, "hop_activation_barriers") - print("add table for hop attempt frequencies for {} (HCP)".format(mpid)) - columns_v = ["Hop attempt frequency, v_a [THz]"] + [ + print("add table for hop attempt frequencies for {} (HCP)".format(mpid)) + columns_v = ["Hop attempt frequency, v_a [THz]"] + [ "Hop attempt frequency, v_X [THz]" ] - df_v = df[["Solute element name"] + columns_v] - df_v.columns = ["Solute"] + ["vₐ [THz]"] + ["vₓ [THz]"] - mpfile.add_data_table(mpid, df_v, "hop_attempt_frequencies") + df_v = df[["Solute element name"] + columns_v] + df_v.columns = ["Solute"] + ["vₐ [THz]"] + ["vₓ [THz]"] + mpfile.add_data_table(mpid, df_v, "hop_attempt_frequencies") - print("DONE") +def _handle_FCC(mpfile, mpid, df): + print("add table for hop activation barriers for {} (FCC)".format(mpid)) + columns_E = [ + "Hop activation barrier, E_{} [eV]".format(i) for i in range(5) + ] + df_E = df[["Solute element name"] + columns_E] + df_E.columns = ["Solute"] + [ + "E{} [eV]".format(i) for i in ["₀", "₁", "₂", "₃", "₄"] + ] + mpfile.add_data_table(mpid, df_E, "hop_activation_barriers") + + print("add table for hop attempt frequencies for {} (FCC)".format(mpid)) + columns_v = [ + "Hop attempt frequency, v_{} [THz]".format(i) for i in range(5) + ] + df_v = df[["Solute element name"] + columns_v] + df_v.columns = ["Solute"] + [ + "v{} [THz]".format(i) for i in ["₀", "₁", "₂", "₃", "₄"] + ] + mpfile.add_data_table(mpid, df_v, "hop_attempt_frequencies") + +def _handle_BBC(mpfile, mpid, df): + print("add table for hop activation barriers for {} (BCC)".format(mpid)) + columns_E = ( + ["Hop activation barrier, E_{} [eV]".format(i) for i in range(2, 5)] + + ["Hop activation barrier, E'_{} [eV]".format(i) for i in range(3, 5)] + + ["Hop activation barrier, E''_{} [eV]".format(i) for i in range(3, 5)] + + ["Hop activation barrier, E_{} [eV]".format(i) for i in range(5, 7)] + ) + df_E = df[["Solute element name"] + columns_E] + df_E.columns = ( + ["Solute"] + + ["E{} [eV]".format(i) for i in ["₂", "₃", "₄"]] + + ["E`{} [eV]".format(i) for i in ["₃", "₄"]] + + ["E``{} [eV]".format(i) for i in ["₃", "₄"]] + + ["E{} [eV]".format(i) for i in ["₅", "₆"]] + ) + mpfile.add_data_table(mpid, df_E, "hop_activation_barriers") + + print("add table for hop attempt frequencies for {} (BCC)".format(mpid)) + columns_v = ( + ["Hop attempt frequency, v_{} [THz]".format(i) for i in range(2, 5)] + + ["Hop attempt frequency, v'_{} [THz]".format(i) for i in range(3, 5)] + + ["Hop attempt frequency, v''_{} [THz]".format(i) for i in range(3, 5)] + + ["Hop attempt frequency, v_{} [THz]".format(i) for i in range(5, 7)] + ) + df_v = df[["Solute element name"] + columns_v] + df_v.columns = ( + ["Solute"] + + ["v{} [THz]".format(i) for i in ["₂", "₃", "₄"]] + + ["v`{} [THz]".format(i) for i in ["₃", "₄"]] + + ["v``{} [THz]".format(i) for i in ["₃", "₄"]] + + ["v{} [THz]".format(i) for i in ["₅", "₆"]] + ) + mpfile.add_data_table(mpid, df_v, "hop_attempt_frequencies") + +def _add_table(mpfile, host, mpid, hdata, df): + print("add table for D₀/Q data for {}".format(mpid)) + df.set_index(df["Solute element number"], inplace=True) + df.drop("Solute element number", axis=1, inplace=True) + df.columns = df.iloc[0] + df.index.name = "index" + df.drop("Solute element name", inplace=True) + df = df.T.reset_index() + if str(host) == "Fe": + df_D0_Q = df[ + [ + "Solute element name", + "Solute D0, paramagnetic [cm^2/s]", + "Solute Q, paramagnetic [eV]", + ] + ] + elif hdata["Host"]["crystal_structure"] == "HCP": + df_D0_Q = df[ + [ + "Solute element name", + "Solute D0 basal [cm^2/s]", + "Solute Q basal [eV]", + ] + ] + else: + df_D0_Q = df[["Solute element name", "Solute D0 [cm^2/s]", "Solute Q [eV]"]] + df_D0_Q.columns = ["Solute", "D₀ [cm²/s]", "Q [eV]"] + anums = [z[el] for el in df_D0_Q["Solute"]] + df_D0_Q.insert(0, "Z", Series(anums, index=df_D0_Q.index)) + df_D0_Q.sort_values("Z", inplace=True) + df_D0_Q.reset_index(drop=True, inplace=True) + mpfile.add_data_table(mpid, df_D0_Q, "D₀_Q") + return df + +def _add_host_info(hdata): + for k in list(hdata.keys()): + v = hdata.pop(k) + ks = k.split() + if ks[0] not in hdata: + hdata[ks[0]] = RecursiveDict() + unit = ks[-1][1:-1] if ks[-1].startswith("[") else "" + subkey = "_".join(ks[1:-1] if unit else ks[1:]).split(",")[0] + if subkey == "lattice_constant": + unit = "Å" + try: + hdata[ks[0]][subkey] = clean_value(v, unit.replace("angstrom", "Å")) + except ValueError: + hdata[ks[0]][subkey] = v + +def get_mpid(mpr, host): + print("get mp-id for {}".format(host)) + mpid = None + for doc in mpr.query( + criteria={"pretty_formula": host}, properties={"task_id": 1} + ): + if "decomposes_to" not in doc["sbxd"][0]: + mpid = doc["task_id"] + break + return mpid + +def download_data(fpath): + figshare_id = 1546772 + url = "https://api.figshare.com/v2/articles/{}".format(figshare_id) + print("get figshare article {}".format(figshare_id)) + r = requests.get(url) + figshare = json.loads(r.content) + print("version =", figshare["version"]) # TODO set manually in "other"? + + print("read excel from figshare into DataFrame") + df_dct = None + for d in figshare["files"]: + if "xlsx" in d["name"]: + # Dict of DataFrames is returned, with keys representing sheets + df_dct = read_excel(d["download_url"], sheet_name=None) + break + if df_dct is not None: + print("save excel to disk") + writer = ExcelWriter(fpath) + for sheet, df in df_dct.items(): + df.to_excel(writer, sheet) + writer.save() + return df_dct mpfile = MPFile() From c1ceee325b84e87f1e3efbc88be796e706b28d95 Mon Sep 17 00:00:00 2001 From: evidencebp Date: Fri, 15 Nov 2024 17:16:19 +0200 Subject: [PATCH 11/16] mpcontribs-api\mpcontribs\api\notebooks\views.py too-many-statements The make function had 74 statement while pylint recommends to have at most 50. I extracted small methods handling parts of the function. --- .../mpcontribs/api/notebooks/views.py | 146 +++++++++++------- 1 file changed, 87 insertions(+), 59 deletions(-) diff --git a/mpcontribs-api/mpcontribs/api/notebooks/views.py b/mpcontribs-api/mpcontribs/api/notebooks/views.py index e3bf88af8..19dfdc016 100644 --- a/mpcontribs-api/mpcontribs/api/notebooks/views.py +++ b/mpcontribs-api/mpcontribs/api/notebooks/views.py @@ -141,14 +141,7 @@ def make(projects=None, cids=None, force=False): start = time.perf_counter() remaining_time = rq.default_timeout - 5 mask = ["id", "needs_build", "notebook"] - query = Q() - - if projects: - query &= Q(project__in=projects) - if cids: - query &= Q(id__in=cids) - if not force: - query &= Q(needs_build=True) | Q(needs_build__exists=False) + query = _build_query(projects, cids, force) job = get_current_job() ret = {"input": {"projects": projects, "cids": cids, "force": force}} @@ -181,14 +174,7 @@ def make(projects=None, cids=None, force=False): not getattr(document, "needs_build", True): continue - if document.notebook: - try: - nb = Notebooks.objects.get(id=document.notebook.id) - nb.delete() - document.update(unset__notebook="") - logger.debug(f"Notebook {document.notebook.id} deleted.") - except DoesNotExist: - pass + _handle_document_notebook(document) cid = str(document.id) logger.debug(f"prep notebook for {cid} ...") @@ -212,35 +198,11 @@ def make(projects=None, cids=None, force=False): ])), ] - if document.tables: - cells.append(nbf.new_markdown_cell("## Tables")) - for table in document.tables: - cells.append( - nbf.new_code_cell("\n".join([ - f't = client.get_table("{table.id}")', - 't.display()' - ])) - ) + _handle_document_tables(document, cells) - if document.structures: - cells.append(nbf.new_markdown_cell("## Structures")) - for structure in document.structures: - cells.append( - nbf.new_code_cell("\n".join([ - f's = client.get_structure("{structure.id}")', - 's.display()' - ])) - ) + _handle_document_structures(document, cells) - if document.attachments: - cells.append(nbf.new_markdown_cell("## Attachments")) - for attachment in document.attachments: - cells.append( - nbf.new_code_cell("\n".join([ - f'a = client.get_attachment("{attachment.id}")', - 'a.info()' - ])) - ) + _handle_document_attachmentss(document, cells) try: outputs = execute_cells(cid, cells) @@ -265,23 +227,10 @@ def make(projects=None, cids=None, force=False): for idx, output in outputs.items(): cells[idx]["outputs"] = output - doc = nbf.new_notebook() - doc["cells"] = [ - nbf.new_code_cell("from mpcontribs.client import Client"), - nbf.new_code_cell(f'client = Client()'), - ] - doc["cells"] += cells[1:] # skip localhost Client - - try: - nb = Notebooks(**doc).save() - document.update(notebook=nb, needs_build=False) - except Exception as e: - if job: - restart_kernels() + doc = _set_doc(cells) - ret["result"] = { - "status": "ERROR", "cid": cid, "count": count, "total": total, "exc": str(e) - } + ret = _update_doc_notebook(doc, document, job, cid, count, total) + if ret: return ret count += 1 @@ -291,3 +240,82 @@ def make(projects=None, cids=None, force=False): ret["result"] = {"status": "COMPLETED", "count": count, "total": total} return ret + +def _update_doc_notebook(doc, document, job, cid, count, total): + ret = None + try: + nb = Notebooks(**doc).save() + document.update(notebook=nb, needs_build=False) + except Exception as e: + if job: + restart_kernels() + + ret["result"] = { + "status": "ERROR", "cid": cid, "count": count, "total": total, "exc": str(e) + } + return ret + + +def _set_doc(cells): + doc = nbf.new_notebook() + doc["cells"] = [ + nbf.new_code_cell("from mpcontribs.client import Client"), + nbf.new_code_cell(f'client = Client()'), + ] + doc["cells"] += cells[1:] # skip localhost Client + + return doc + +def _handle_document_notebook(document): + if document.notebook: + try: + nb = Notebooks.objects.get(id=document.notebook.id) + nb.delete() + document.update(unset__notebook="") + logger.debug(f"Notebook {document.notebook.id} deleted.") + except DoesNotExist: + pass + +def _handle_document_attachmentss(document, cells): + if document.attachments: + cells.append(nbf.new_markdown_cell("## Attachments")) + for attachment in document.attachments: + cells.append( + nbf.new_code_cell("\n".join([ + f'a = client.get_attachment("{attachment.id}")', + 'a.info()' + ])) + ) + +def _handle_document_structures(document, cells): + if document.structures: + cells.append(nbf.new_markdown_cell("## Structures")) + for structure in document.structures: + cells.append( + nbf.new_code_cell("\n".join([ + f's = client.get_structure("{structure.id}")', + 's.display()' + ])) + ) + +def _handle_document_tables(document, cells): + if document.tables: + cells.append(nbf.new_markdown_cell("## Tables")) + for table in document.tables: + cells.append( + nbf.new_code_cell("\n".join([ + f't = client.get_table("{table.id}")', + 't.display()' + ])) + ) + +def _build_query(projects, cids, force): + query = Q() + + if projects: + query &= Q(project__in=projects) + if cids: + query &= Q(id__in=cids) + if not force: + query &= Q(needs_build=True) | Q(needs_build__exists=False) + return query From 5c5b7505ac3729b776a8198c3a5cfa22e08e5814 Mon Sep 17 00:00:00 2001 From: evidencebp Date: Fri, 15 Nov 2024 17:41:28 +0200 Subject: [PATCH 12/16] mpcontribs-api\mpcontribs\api\contributions\document.py broad-exception-caught Catching Exception instead of specific exceptions might hide unexpected ones. Function format_cell caught Exception. isnan does not raise exceptions https://docs.python.org/3/library/math.html#math.isnan str might lead to UnicodeEncodeError https://stackoverflow.com/questions/33556465/is-there-any-object-can-make-str-function-throw-error-or-exception-in-python --- mpcontribs-api/mpcontribs/api/contributions/document.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mpcontribs-api/mpcontribs/api/contributions/document.py b/mpcontribs-api/mpcontribs/api/contributions/document.py index 55c9e204b..d46206196 100644 --- a/mpcontribs-api/mpcontribs/api/contributions/document.py +++ b/mpcontribs-api/mpcontribs/api/contributions/document.py @@ -74,7 +74,7 @@ def format_cell(cell): q = truncate_digits(q) try: return str(q.nominal_value) if isnan(q.std_dev) else str(q) - except Exception: + except UnicodeEncodeError: return cell From ae585188468ab18f03950987b892c694e338d624 Mon Sep 17 00:00:00 2001 From: evidencebp Date: Sat, 16 Nov 2024 15:55:34 +0200 Subject: [PATCH 13/16] mpcontribs-io\mpcontribs\io\core\components\tdata.py broad-exception-caught Method to_backgrid_dict of class table catches Exception. Catching Exception instead of specific exceptions might hide unexpected ones. I changed to catching AttributeError, as in the StackOverflow post in the comment above. --- mpcontribs-io/mpcontribs/io/core/components/tdata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mpcontribs-io/mpcontribs/io/core/components/tdata.py b/mpcontribs-io/mpcontribs/io/core/components/tdata.py index 17293619a..c0dfc5d20 100644 --- a/mpcontribs-io/mpcontribs/io/core/components/tdata.py +++ b/mpcontribs-io/mpcontribs/io/core/components/tdata.py @@ -112,7 +112,7 @@ def to_backgrid_dict(self): if not all([result.scheme, result.netloc, result.path]): break is_url_column = True - except Exception: + except AttributeError: break cell_type = "uri" if is_url_column else "string" From 3789a49cbb4a2e127b8be7a8c8e30bac74f1ecfa Mon Sep 17 00:00:00 2001 From: evidencebp Date: Sat, 16 Nov 2024 17:38:28 +0200 Subject: [PATCH 14/16] mpcontribs-api\mpcontribs\api\core.py too-many-branches get_specs Function get_specs had 19 branches while pylint suggest not to have more than 12. I extracted method for populating the data structures and handling the different method names. --- mpcontribs-api/mpcontribs/api/core.py | 278 ++++++++++++++------------ 1 file changed, 153 insertions(+), 125 deletions(-) diff --git a/mpcontribs-api/mpcontribs/api/core.py b/mpcontribs-api/mpcontribs/api/core.py index 6524d099f..647475fb6 100644 --- a/mpcontribs-api/mpcontribs/api/core.py +++ b/mpcontribs-api/mpcontribs/api/core.py @@ -120,140 +120,26 @@ def get_specs(klass, method, collection): "description": description, } - field_pagination_params = [] - for field, limits in klass.resource.fields_to_paginate.items(): - field_pagination_params.append( - { - "name": f"{field}_page", - "in": "query", - "default": 1, - "type": "integer", - "description": f"page to retrieve for {field} field", - } - ) - field_pagination_params.append( - { - "name": f"{field}_per_page", - "in": "query", - "default": limits[0], - "maximum": limits[1], - "type": "integer", - "description": f"number of items to retrieve per page for {field} field", - } - ) + field_pagination_params = _populate_field_pagination_params(klass) - filter_params = [] - if hasattr(klass.resource, "filters"): - for k, v in klass.resource.filters.items(): - filter_params += get_filter_params(k, v) + filter_params = _populate_filter_params(klass) - order_params = [] - if klass.resource.allowed_ordering: - allowed_ordering = [ - o.pattern if isinstance(o, Pattern) else o - for o in klass.resource.allowed_ordering - ] - order_params = [ - { - "name": "_sort", - "in": "query", - "type": "string", - "description": f"sort {collection} via {allowed_ordering}. Prepend +/- for asc/desc.", - } - ] + order_params = _populate_order_params(klass, collection) spec = None if method_name == "Fetch": - params = [ - { - "name": "pk", - "in": "path", - "type": "string", - "required": True, - "description": f"{collection[:-1]} (primary key)", - } - ] - if fields_param is not None: - params.append(fields_param) - params += field_pagination_params - spec = { - "summary": f"Retrieve a {collection[:-1]}.", - "operationId": f"get{doc_name}By{id_field}", - "parameters": params, - "responses": { - 200: { - "description": f"single {collection} entry", - "schema": {"$ref": f"#/definitions/{klass.schema_name}"}, - }, - "default": default_response, - }, - } + _handle_fetch_method(klass, collection, default_response + , id_field, doc_name, fields_param, field_pagination_params) elif method_name == "BulkFetch": - params = [fields_param] if fields_param is not None else [] - params += field_pagination_params - params += order_params - params += filter_params - schema_props = { - "data": { - "type": "array", - "items": {"$ref": f"#/definitions/{klass.schema_name}"}, - } - } - if klass.resource.paginate: - schema_props["has_more"] = {"type": "boolean"} - schema_props["total_count"] = {"type": "integer"} - schema_props["total_pages"] = {"type": "integer"} - params += get_limit_params(klass.resource, method_name) - spec = { - "summary": f"Filter and retrieve {collection}.", - "operationId": f"query{doc_name}s", - "parameters": params, - "responses": { - 200: { - "description": f"list of {collection}", - "schema": {"type": "object", "properties": schema_props}, - }, - "default": default_response, - }, - } + _handle_bulk_fetch_method(klass, collection, method_name, default_response + , doc_name, fields_param, field_pagination_params + , filter_params, order_params) elif method_name == "Download": - params = [ - { - "name": "short_mime", - "in": "path", - "type": "string", - "required": True, - "description": "MIME Download Type: gz", - "default": "gz", - }, - { - "name": "format", - "in": "query", - "type": "string", - "required": True, - "description": f"download {collection} in different formats: {klass.resource.download_formats}", - }, - ] - params += [fields_param] if fields_param is not None else [] - params += order_params - params += filter_params - if klass.resource.paginate: - params += get_limit_params(klass.resource, method_name) - spec = { - "summary": f"Filter and download {collection}.", - "operationId": f"download{doc_name}s", - "parameters": params, - "produces": ["application/gzip"], - "responses": { - 200: { - "description": f"{collection} download", - "schema": {"type": "file"}, - }, - "default": default_response, - }, - } + _handle_downlaod_method(klass, collection, method_name + , default_response, doc_name, fields_param + , filter_params, order_params) elif method_name == "Create": spec = { @@ -407,6 +293,148 @@ def get_specs(klass, method, collection): return spec +def _handle_downlaod_method(klass, collection, method_name, default_response, doc_name, fields_param, filter_params, order_params): + params = [ + { + "name": "short_mime", + "in": "path", + "type": "string", + "required": True, + "description": "MIME Download Type: gz", + "default": "gz", + }, + { + "name": "format", + "in": "query", + "type": "string", + "required": True, + "description": f"download {collection} in different formats: {klass.resource.download_formats}", + }, + ] + params += [fields_param] if fields_param is not None else [] + params += order_params + params += filter_params + if klass.resource.paginate: + params += get_limit_params(klass.resource, method_name) + spec = { + "summary": f"Filter and download {collection}.", + "operationId": f"download{doc_name}s", + "parameters": params, + "produces": ["application/gzip"], + "responses": { + 200: { + "description": f"{collection} download", + "schema": {"type": "file"}, + }, + "default": default_response, + }, + } + +def _handle_bulk_fetch_method(klass, collection, method_name, default_response, doc_name, fields_param, field_pagination_params, filter_params, order_params): + params = [fields_param] if fields_param is not None else [] + params += field_pagination_params + params += order_params + params += filter_params + schema_props = { + "data": { + "type": "array", + "items": {"$ref": f"#/definitions/{klass.schema_name}"}, + } + } + if klass.resource.paginate: + schema_props["has_more"] = {"type": "boolean"} + schema_props["total_count"] = {"type": "integer"} + schema_props["total_pages"] = {"type": "integer"} + params += get_limit_params(klass.resource, method_name) + spec = { + "summary": f"Filter and retrieve {collection}.", + "operationId": f"query{doc_name}s", + "parameters": params, + "responses": { + 200: { + "description": f"list of {collection}", + "schema": {"type": "object", "properties": schema_props}, + }, + "default": default_response, + }, + } + +def _handle_fetch_method(klass, collection, default_response, id_field, doc_name, fields_param, field_pagination_params): + params = [ + { + "name": "pk", + "in": "path", + "type": "string", + "required": True, + "description": f"{collection[:-1]} (primary key)", + } + ] + if fields_param is not None: + params.append(fields_param) + params += field_pagination_params + spec = { + "summary": f"Retrieve a {collection[:-1]}.", + "operationId": f"get{doc_name}By{id_field}", + "parameters": params, + "responses": { + 200: { + "description": f"single {collection} entry", + "schema": {"$ref": f"#/definitions/{klass.schema_name}"}, + }, + "default": default_response, + }, + } + +def _populate_order_params(klass, collection): + order_params = [] + if klass.resource.allowed_ordering: + allowed_ordering = [ + o.pattern if isinstance(o, Pattern) else o + for o in klass.resource.allowed_ordering + ] + order_params = [ + { + "name": "_sort", + "in": "query", + "type": "string", + "description": f"sort {collection} via {allowed_ordering}. Prepend +/- for asc/desc.", + } + ] + + return order_params + +def _populate_filter_params(klass): + filter_params = [] + if hasattr(klass.resource, "filters"): + for k, v in klass.resource.filters.items(): + filter_params += get_filter_params(k, v) + return filter_params + +def _populate_field_pagination_params(klass): + field_pagination_params = [] + for field, limits in klass.resource.fields_to_paginate.items(): + field_pagination_params.append( + { + "name": f"{field}_page", + "in": "query", + "default": 1, + "type": "integer", + "description": f"page to retrieve for {field} field", + } + ) + field_pagination_params.append( + { + "name": f"{field}_per_page", + "in": "query", + "default": limits[0], + "maximum": limits[1], + "type": "integer", + "description": f"number of items to retrieve per page for {field} field", + } + ) + + return field_pagination_params + class SwaggerView(OriginalSwaggerView, ResourceView): """A class-based view defining additional methods""" From 6eadee33ad618ba96c750c1bdd7e1978ca060cc7 Mon Sep 17 00:00:00 2001 From: evidencebp Date: Sat, 16 Nov 2024 17:39:36 +0200 Subject: [PATCH 15/16] More of prior commit --- mpcontribs-api/mpcontribs/api/core.py | 98 ++++++++++++++------------- 1 file changed, 52 insertions(+), 46 deletions(-) diff --git a/mpcontribs-api/mpcontribs/api/core.py b/mpcontribs-api/mpcontribs/api/core.py index 647475fb6..8ada428a1 100644 --- a/mpcontribs-api/mpcontribs/api/core.py +++ b/mpcontribs-api/mpcontribs/api/core.py @@ -223,54 +223,10 @@ def get_specs(klass, method, collection): }, } elif method_name == "BulkUpdate": - params = filter_params - params.append( - { - "name": f"{collection}", - "in": "body", - "description": f"The object to use for {collection} bulk update", - "schema": {"type": "object"}, - } - ) - schema_props = {"count": {"type": "integer"}} - if klass.resource.paginate: - schema_props["has_more"] = {"type": "boolean"} - schema_props["total_count"] = {"type": "integer"} - schema_props["total_pages"] = {"type": "integer"} - params += get_limit_params(klass.resource, method_name) - spec = { - "summary": f"Filter and update {collection}.", - "operationId": f"update{doc_name}s", - "parameters": params, - "responses": { - 200: { - "description": f"Number of {collection} updated", - "schema": {"type": "object", "properties": schema_props}, - }, - "default": default_response, - }, - } + _handle_bulk_update_method(klass, collection, method_name, default_response, doc_name, filter_params) elif method_name == "BulkDelete": - params = filter_params - schema_props = {"count": {"type": "integer"}} - if klass.resource.paginate: - schema_props["has_more"] = {"type": "boolean"} - schema_props["total_count"] = {"type": "integer"} - schema_props["total_pages"] = {"type": "integer"} - params += get_limit_params(klass.resource, method_name) - spec = { - "summary": f"Filter and delete {collection}.", - "operationId": f"delete{doc_name}s", - "parameters": params, - "responses": { - 200: { - "description": f"Number of {collection} deleted", - "schema": {"type": "object", "properties": schema_props}, - }, - "default": default_response, - }, - } + _handle_bulk_delete_method(klass, collection, method_name, default_response, doc_name, filter_params) elif method_name == "Delete": spec = { @@ -293,6 +249,56 @@ def get_specs(klass, method, collection): return spec +def _handle_bulk_delete_method(klass, collection, method_name, default_response, doc_name, filter_params): + params = filter_params + schema_props = {"count": {"type": "integer"}} + if klass.resource.paginate: + schema_props["has_more"] = {"type": "boolean"} + schema_props["total_count"] = {"type": "integer"} + schema_props["total_pages"] = {"type": "integer"} + params += get_limit_params(klass.resource, method_name) + spec = { + "summary": f"Filter and delete {collection}.", + "operationId": f"delete{doc_name}s", + "parameters": params, + "responses": { + 200: { + "description": f"Number of {collection} deleted", + "schema": {"type": "object", "properties": schema_props}, + }, + "default": default_response, + }, + } + +def _handle_bulk_update_method(klass, collection, method_name, default_response, doc_name, filter_params): + params = filter_params + params.append( + { + "name": f"{collection}", + "in": "body", + "description": f"The object to use for {collection} bulk update", + "schema": {"type": "object"}, + } + ) + schema_props = {"count": {"type": "integer"}} + if klass.resource.paginate: + schema_props["has_more"] = {"type": "boolean"} + schema_props["total_count"] = {"type": "integer"} + schema_props["total_pages"] = {"type": "integer"} + params += get_limit_params(klass.resource, method_name) + spec = { + "summary": f"Filter and update {collection}.", + "operationId": f"update{doc_name}s", + "parameters": params, + "responses": { + 200: { + "description": f"Number of {collection} updated", + "schema": {"type": "object", "properties": schema_props}, + }, + "default": default_response, + }, + } + def _handle_downlaod_method(klass, collection, method_name, default_response, doc_name, fields_param, filter_params, order_params): params = [ { From 76d7052aae43e62fddcd609b3284c57e95eb33ce Mon Sep 17 00:00:00 2001 From: evidencebp Date: Sun, 17 Nov 2024 09:28:47 +0200 Subject: [PATCH 16/16] mpcontribs-api\mpcontribs\api\core.py too-many-branches has_read_permission Method has_read_permission of class SwaggerView had 20 branches while pylint recommends to have no more than 12. I extracted method to reduce that, making the code more structured. This fixed also the too-many-return-statements and too-many-statements alerts in the same method. --- mpcontribs-api/mpcontribs/api/core.py | 101 +++++++++++++++----------- 1 file changed, 60 insertions(+), 41 deletions(-) diff --git a/mpcontribs-api/mpcontribs/api/core.py b/mpcontribs-api/mpcontribs/api/core.py index 8ada428a1..7d78a3e19 100644 --- a/mpcontribs-api/mpcontribs/api/core.py +++ b/mpcontribs-api/mpcontribs/api/core.py @@ -574,17 +574,11 @@ def has_read_permission(self, request, qs): approved_public_filter = Q(is_public=True, is_approved=True) if request.path.startswith("/projects/"): - # external or internal requests can both read full project info - # anonymous requests can only read public approved projects - if is_anonymous: - return qs.filter(approved_public_filter) - - # authenticated requests can read approved public or accessible non-public projects - qfilter = approved_public_filter | Q(owner=username) - if groups: - qfilter |= Q(name__in=list(groups)) - - return qs.filter(qfilter) + return self._handle_projects(is_anonymous + , qs + , approved_public_filter + , username + , groups) else: # contributions are set private/public independent from projects # anonymous requests: @@ -596,32 +590,11 @@ def has_read_permission(self, request, qs): component = request.path.split("/")[1] if component == "contributions": - q = qs._query - if is_anonymous and is_external: - qs = qs.exclude("data") - - if q and "project" in q and isinstance(q["project"], str): - projects = self.get_projects() - try: - project = projects.get(name=q["project"]) - except DoesNotExist: - return qs.none() - - if project.owner == username or project.name in groups: - return qs - elif project.is_public and project.is_approved: - return qs.filter(is_public=True) - else: - return qs.none() - else: - names = None - if q and "project" in q and "$in" in q["project"]: - names = q.pop("project").pop("$in") - - qfilter = self.get_projects_filter( - username, groups, filter_names=names - ) - return qs.filter(qfilter) + return self._handle_contributions(qs + , is_anonymous + , is_external + , username + , groups) else: # get component Object IDs for queryset pk = request.view_args.get("pk") @@ -630,10 +603,7 @@ def has_read_permission(self, request, qs): resource = get_resource(component) qfilter = lambda qs: qs.clone() - if pk: - ids = [resource.get_object(pk, qfilter=qfilter).id] - else: - ids = [o.id for o in resource.get_objects(qfilter=qfilter)[0]] + ids = self._set_ids(qfilter, pk, resource) if not ids: return qs.none() @@ -664,6 +634,55 @@ def has_read_permission(self, request, qs): return qs + def _set_ids(self, qfilter, pk, resource): + if pk: + ids = [resource.get_object(pk, qfilter=qfilter).id] + else: + ids = [o.id for o in resource.get_objects(qfilter=qfilter)[0]] + return ids + + + def _handle_projects(self, is_anonymous, qs, approved_public_filter, username, groups): + # external or internal requests can both read full project info + # anonymous requests can only read public approved projects + if is_anonymous: + return qs.filter(approved_public_filter) + + # authenticated requests can read approved public or accessible non-public projects + qfilter = approved_public_filter | Q(owner=username) + if groups: + qfilter |= Q(name__in=list(groups)) + + return qs.filter(qfilter) + + def _handle_contributions(self, qs, is_anonymous, is_external, username, groups): + q = qs._query + if is_anonymous and is_external: + qs = qs.exclude("data") + + if q and "project" in q and isinstance(q["project"], str): + projects = self.get_projects() + try: + project = projects.get(name=q["project"]) + except DoesNotExist: + return qs.none() + + if project.owner == username or project.name in groups: + return qs + elif project.is_public and project.is_approved: + return qs.filter(is_public=True) + else: + return qs.none() + else: + names = None + if q and "project" in q and "$in" in q["project"]: + names = q.pop("project").pop("$in") + + qfilter = self.get_projects_filter( + username, groups, filter_names=names + ) + return qs.filter(qfilter) + def has_add_permission(self, request, obj): return self.is_admin_or_project_user(request, obj)