From 51791eff642ae470d84c90bd42957cf0c914cee7 Mon Sep 17 00:00:00 2001
From: evidencebp <idanamit@gmail.com>
Date: Fri, 15 Nov 2024 13:23:31 +0200
Subject: [PATCH 01/16] mpcontribs-api\mpcontribs\api\attachments\document.py
 line-too-long

Made a readable line shorter
---
 mpcontribs-api/mpcontribs/api/attachments/document.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mpcontribs-api/mpcontribs/api/attachments/document.py b/mpcontribs-api/mpcontribs/api/attachments/document.py
index c5584a11a..0b7b72b3a 100644
--- a/mpcontribs-api/mpcontribs/api/attachments/document.py
+++ b/mpcontribs-api/mpcontribs/api/attachments/document.py
@@ -44,7 +44,8 @@ def post_init(cls, sender, document, **kwargs):
             if "content" in requested_fields:
                 if not document.md5:
                     # document.reload("md5")  # TODO AttributeError: _changed_fields
-                    raise ValueError("Please also request md5 field to retrieve attachment content!")
+                    raise ValueError(
+                        "Please also request md5 field to retrieve attachment content!")
 
                 retr = s3_client.get_object(Bucket=BUCKET, Key=document.md5)
                 document.content = b64encode(retr["Body"].read()).decode("utf-8")

From d14dda850a6fe7d850e702469ed2e58a48259858 Mon Sep 17 00:00:00 2001
From: evidencebp <idanamit@gmail.com>
Date: Fri, 15 Nov 2024 13:30:47 +0200
Subject: [PATCH 02/16] mpcontribs-api\gunicorn.conf.py line-too-long

Was a very long format line with 135 characters which wasn't readable.
I reduced it to 107, higher than the recommended 100, while keeping the main part in the same line.
---
 mpcontribs-api/gunicorn.conf.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/mpcontribs-api/gunicorn.conf.py b/mpcontribs-api/gunicorn.conf.py
index fc24a8c25..a28f66479 100644
--- a/mpcontribs-api/gunicorn.conf.py
+++ b/mpcontribs-api/gunicorn.conf.py
@@ -6,9 +6,11 @@
 statsd_host = "{}:8125".format(os.getenv("DD_AGENT_HOST"))
 accesslog = "-"
 errorlog = "-"
-access_log_format = '{}/{}: %(h)s %(t)s %(m)s %(U)s?%(q)s %(H)s %(s)s %(b)s "%(f)s" "%(a)s" %(D)s %(p)s %({{x-consumer-id}}i)s'.format(
-    os.getenv("SUPERVISOR_GROUP_NAME"), os.getenv("SUPERVISOR_PROCESS_NAME")
-)
+access_log_format = (
+'{}/{}: %(h)s %(t)s %(m)s %(U)s?%(q)s %(H)s %(s)s %(b)s "%(f)s" "%(a)s" %(D)s %(p)s %({{x-consumer-id}}i)s'
+        .format(os.getenv("SUPERVISOR_GROUP_NAME"), 
+                os.getenv("SUPERVISOR_PROCESS_NAME"))
+                    )
 max_requests = os.getenv("MAX_REQUESTS")
 max_requests_jitter = os.getenv("MAX_REQUESTS_JITTER")
 proc_name = os.getenv("SUPERVISOR_PROCESS_NAME")

From 188f43fd85bb095fc3612fdd7243f1cbc2915787 Mon Sep 17 00:00:00 2001
From: evidencebp <idanamit@gmail.com>
Date: Fri, 15 Nov 2024 13:36:56 +0200
Subject: [PATCH 03/16] mpcontribs-portal\gunicorn.conf.py line-too-long

Was a very long format line with 135 characters which wasn't readable. I reduced it to 107, higher than the recommended 100, while keeping the main part in the same line.
---
 mpcontribs-portal/gunicorn.conf.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/mpcontribs-portal/gunicorn.conf.py b/mpcontribs-portal/gunicorn.conf.py
index 612305305..e690025a4 100644
--- a/mpcontribs-portal/gunicorn.conf.py
+++ b/mpcontribs-portal/gunicorn.conf.py
@@ -6,9 +6,11 @@
 statsd_host = "{}:8125".format(os.getenv("DD_AGENT_HOST"))
 accesslog = "-"
 errorlog = "-"
-access_log_format = '{}/{}: %(h)s %(t)s %(m)s %(U)s?%(q)s %(H)s %(s)s %(b)s "%(f)s" "%(a)s" %(D)s %(p)s %({{x-consumer-id}}i)s'.format(
-    os.getenv("SUPERVISOR_GROUP_NAME"), os.getenv("SUPERVISOR_PROCESS_NAME")
-)
+access_log_format = (
+'{}/{}: %(h)s %(t)s %(m)s %(U)s?%(q)s %(H)s %(s)s %(b)s "%(f)s" "%(a)s" %(D)s %(p)s %({{x-consumer-id}}i)s'
+.format(os.getenv("SUPERVISOR_GROUP_NAME")
+        , os.getenv("SUPERVISOR_PROCESS_NAME"))
+                    )
 max_requests = os.getenv("MAX_REQUESTS")
 max_requests_jitter = os.getenv("MAX_REQUESTS_JITTER")
 proc_name = os.getenv("SUPERVISOR_PROCESS_NAME")

From 6b351fab07a5f28cd1e0f688023b8e6d3dc546c5 Mon Sep 17 00:00:00 2001
From: evidencebp <idanamit@gmail.com>
Date: Fri, 15 Nov 2024 13:43:42 +0200
Subject: [PATCH 04/16] mpcontribs-portal\mpcontribs\users\qmcdb\main\views.py
 line-too-long

Readable line in commented out code
---
 mpcontribs-portal/mpcontribs/users/qmcdb/main/views.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mpcontribs-portal/mpcontribs/users/qmcdb/main/views.py b/mpcontribs-portal/mpcontribs/users/qmcdb/main/views.py
index 4e4c20ac4..aeed2f4ea 100644
--- a/mpcontribs-portal/mpcontribs/users/qmcdb/main/views.py
+++ b/mpcontribs-portal/mpcontribs/users/qmcdb/main/views.py
@@ -44,7 +44,8 @@ def dash_if_empty(inp):
                     for symbol in elements
                 ],
             )
-            # query = reduce(lambda x, y: x | y, [Q(formula__icontains=symbol) for symbol in elements])
+            # query = reduce(
+            #   lambda x, y: x | y, [Q(formula__icontains=symbol) for symbol in elements])
             # if spacegroup != None:
             # 	query &= Q(primitive_structure__spacegroup=spacegroup)
 

From cf79706c83115f8edb08500c45ad2fe175893977 Mon Sep 17 00:00:00 2001
From: evidencebp <idanamit@gmail.com>
Date: Fri, 15 Nov 2024 14:40:49 +0200
Subject: [PATCH 05/16] mpcontribs-api\mpcontribs\api\projects\document.py
  too-many-nested-blocks

The post_save class method had 6 nesting levels while pylint recommends not to have more than 5.
I extracted the small and targeted update_columns_by_flat method, with 3 nesting levels.
---
 mpcontribs-api/mpcontribs/api/projects/document.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/mpcontribs-api/mpcontribs/api/projects/document.py b/mpcontribs-api/mpcontribs/api/projects/document.py
index 868574723..4328f0c90 100644
--- a/mpcontribs-api/mpcontribs/api/projects/document.py
+++ b/mpcontribs-api/mpcontribs/api/projects/document.py
@@ -263,11 +263,7 @@ def post_save(cls, sender, document, **kwargs):
                         remap(merged, visit=visit, enter=enter), reducer="dot"
                     )
 
-                    for k, v in flat.items():
-                        if k.startswith("data."):
-                            columns[k] = Column(path=k)
-                            if v is not None:
-                                columns[k].unit = v
+                    cls.update_columns_by_flat(columns, flat)
 
                 # start pipeline for stats: match project
                 pipeline = [{"$match": {"project": document.id}}]
@@ -366,6 +362,14 @@ def post_save(cls, sender, document, **kwargs):
                 stats = Stats(**stats_kwargs)
                 document.update(stats=stats, columns=columns.values())
 
+    @classmethod
+    def update_columns_by_flat(cls, columns, flat):
+        for k, v in flat.items():
+            if k.startswith("data."):
+                columns[k] = Column(path=k)
+                if v is not None:
+                    columns[k].unit = v
+
     @classmethod
     def post_delete(cls, sender, document, **kwargs):
         admin_email = current_app.config["MAIL_DEFAULT_SENDER"]

From f6dbc351d86f4360caf38832ff063a24f1396f3c Mon Sep 17 00:00:00 2001
From: evidencebp <idanamit@gmail.com>
Date: Fri, 15 Nov 2024 14:49:08 +0200
Subject: [PATCH 06/16] 
 mpcontribs-portal\mpcontribs\users\als_beamline\scripts\__main__.py
 wildcard-import

Wildcard imports make it harder to understand what is imported from where.
Removing it is also a defensive programming act, lowering the probability of collisions due to future new imports or objects.
---
 .../mpcontribs/users/als_beamline/scripts/__main__.py           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mpcontribs-portal/mpcontribs/users/als_beamline/scripts/__main__.py b/mpcontribs-portal/mpcontribs/users/als_beamline/scripts/__main__.py
index bcd0122bd..240d262bb 100644
--- a/mpcontribs-portal/mpcontribs/users/als_beamline/scripts/__main__.py
+++ b/mpcontribs-portal/mpcontribs/users/als_beamline/scripts/__main__.py
@@ -1,6 +1,6 @@
 import argparse, os
 from mpcontribs.io.archieml.mpfile import MPFile
-from pre_submission import *
+from pre_submission import run
 
 parser = argparse.ArgumentParser(
     description="""generate MPFile from directory of related XAS measurements"""

From 784a6c4c6b8708e636abf4a7e59e43db47d1397e Mon Sep 17 00:00:00 2001
From: evidencebp <idanamit@gmail.com>
Date: Fri, 15 Nov 2024 15:17:30 +0200
Subject: [PATCH 07/16] mpcontribs-io\mpcontribs\io\archie\mpfile.py
 too-many-branches

The methods get_string and from_string of the class MPFile had 14 branches each.
Pylint recommends not to have more than 12 to reduce complexity, ease understanding and testing.
I extracted from each method a small local method to reduce the number of branches and increase structuring.
---
 mpcontribs-io/mpcontribs/io/archie/mpfile.py | 44 ++++++++++++--------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/mpcontribs-io/mpcontribs/io/archie/mpfile.py b/mpcontribs-io/mpcontribs/io/archie/mpfile.py
index 1eff4d13c..ceead57d2 100644
--- a/mpcontribs-io/mpcontribs/io/archie/mpfile.py
+++ b/mpcontribs-io/mpcontribs/io/archie/mpfile.py
@@ -30,12 +30,7 @@ def from_string(data):
         for key in list(rdct.keys()):
             is_general, root_key = normalize_root_level(key)
 
-            if is_general:
-                # make part of shared (meta-)data, i.e. nest under `general` at
-                # the beginning of the MPFile
-                if mp_level01_titles[0] not in rdct:
-                    rdct[mp_level01_titles[0]] = RecursiveDict()
-                    rdct.move_to_end(mp_level01_titles[0], last=False)
+            MPFile.handle_rdct(rdct, is_general)
 
             # normalize identifier key (pop & insert)
             # using rec_update since we're looping over all entries
@@ -99,24 +94,22 @@ def from_string(data):
 
         return MPFile.from_dict(rdct)
 
+    @staticmethod
+    def handle_rdct(rdct, is_general):
+        if is_general:
+                # make part of shared (meta-)data, i.e. nest under `general` at
+                # the beginning of the MPFile
+            if mp_level01_titles[0] not in rdct:
+                rdct[mp_level01_titles[0]] = RecursiveDict()
+                rdct.move_to_end(mp_level01_titles[0], last=False)
+
     def get_string(self, df_head_only=False):
         from pymatgen.core import Structure
 
         lines, scope = [], []
         for key, value in self.document.iterate():
             if isinstance(value, Table):
-                lines[-1] = lines[-1].replace("{", "[+").replace("}", "]")
-                header = any([isinstance(col, str) for col in value])
-                if isinstance(value.index, MultiIndex):
-                    value.reset_index(inplace=True)
-                if df_head_only:
-                    value = value.head()
-                csv_string = value.to_csv(
-                    index=False, header=header, float_format="%g", encoding="utf-8"
-                )[:-1]
-                lines += csv_string.split("\n")
-                if df_head_only:
-                    lines.append("...")
+                value = self._handle_table(df_head_only, lines, value)
             elif isinstance(value, Structure):
                 from pymatgen.io.cif import CifWriter
 
@@ -162,5 +155,20 @@ def get_string(self, df_head_only=False):
                     )
         return "\n".join(lines) + "\n"
 
+    def _handle_table(self, df_head_only, lines, value):
+        lines[-1] = lines[-1].replace("{", "[+").replace("}", "]")
+        header = any([isinstance(col, str) for col in value])
+        if isinstance(value.index, MultiIndex):
+            value.reset_index(inplace=True)
+        if df_head_only:
+            value = value.head()
+        csv_string = value.to_csv(
+                    index=False, header=header, float_format="%g", encoding="utf-8"
+                )[:-1]
+        lines += csv_string.split("\n")
+        if df_head_only:
+            lines.append("...")
+        return value
+
 
 MPFileCore.register(MPFile)

From 6e5ad889617e9b51291b922bcad15b025bd093ec Mon Sep 17 00:00:00 2001
From: evidencebp <idanamit@gmail.com>
Date: Fri, 15 Nov 2024 15:42:29 +0200
Subject: [PATCH 08/16] Update pre_submission.py

Method run had 82 statements while pylint suggest not to have more than 50.
I extracted some small local methods, adding structure and making the method shorter.

Please note that the end of the method was not reachable (and still so) due to a break, which might be a bug.
I added a TODO there to mark that.
---
 .../users/redox_thermo_csp/pre_submission.py  | 106 ++++++++++--------
 1 file changed, 62 insertions(+), 44 deletions(-)

diff --git a/mpcontribs-portal/mpcontribs/users/redox_thermo_csp/pre_submission.py b/mpcontribs-portal/mpcontribs/users/redox_thermo_csp/pre_submission.py
index 15cfaf4de..9e42f099d 100644
--- a/mpcontribs-portal/mpcontribs/users/redox_thermo_csp/pre_submission.py
+++ b/mpcontribs-portal/mpcontribs/users/redox_thermo_csp/pre_submission.py
@@ -125,27 +125,8 @@ def run(mpfile, **kwargs):
         print("could not import solar_perovskite, clone github repo")
         sys.exit(1)
 
-    input_files = mpfile.hdata.general["input_files"]
-    input_dir = os.path.dirname(solar_perovskite.__file__)
-    input_file = os.path.join(input_dir, input_files["exp"])
-    exp_table = read_csv(open(input_file, "r").read().replace(";", ","))
-    print("exp data loaded.")
-    with open(os.path.join(input_dir, input_files["theo"]), "r") as f:
-        theo_data = json.loads(f.read()).pop("collection")
-    print("theo data loaded.")
-    with open(input_files["energy"], "r") as f:
-        data = json.load(f).pop("collection")
-    print("energy data loaded.")
-    l = [
-        dict(sdoc, parameters=doc["_id"])
-        for doc in data
-        for sdoc in doc["energy_analysis"]
-    ]
-    frame = pd.DataFrame(l)
-    parameters = frame["parameters"]
-    frame.drop(labels=["parameters"], axis=1, inplace=True)
-    frame.insert(0, "parameters", parameters)
-    print("energy dataframe:", frame.shape)
+    exp_table, data = _load_data(mpfile, solar_perovskite)
+    frame = _build_frame(data)
 
     mpfile_singles = [m for m in mpfile.split()]
     for mpfile_single in mpfile_singles:
@@ -159,17 +140,7 @@ def run(mpfile, **kwargs):
         print(identifier)
 
         print("add hdata ...")
-        d = RecursiveDict()
-        d["data"] = RecursiveDict()
-        compstr = hdata["pars"]["theo_compstr"]
-        row = exp_table.loc[exp_table["theo_compstr"] == compstr]
-        if not row.empty:
-            sample_number = int(row.iloc[0]["sample_number"])
-            d["pars"] = get_fit_pars(sample_number)
-            d["data"]["availability"] = "Exp+Theo"
-        else:
-            d["pars"] = RecursiveDict()
-            d["data"]["availability"] = "Theo"
+        d, compstr, row, sample_number = populate_data_structures(exp_table, hdata)
         # print('dh_min, dh_max ...')
         # _, dh_min, dh_max, _ = redenth_act(compstr)
         # d['pars']['dh_min'] = clean_value(dh_min, max_dgts=4)
@@ -215,23 +186,15 @@ def run(mpfile, **kwargs):
         #                                + "_" + str(float(red_pr)) + "_" + data_sources + \
         #                                "_" + str(float(enth_steps))
 
-        print("add energy analysis ...")
-        group = frame.query('compstr.str.contains("{}")'.format(compstr[:-1]))
-        group.drop(labels="compstr", axis=1, inplace=True)
-        for prodstr, subgroup in group.groupby(["prodstr", "prodstr_alt"], sort=False):
-            subgroup.drop(labels=["prodstr", "prodstr_alt"], axis=1, inplace=True)
-            for unstable, subsubgroup in subgroup.groupby("unstable", sort=False):
-                subsubgroup.drop(labels="unstable", axis=1, inplace=True)
-                name = "energy-analysis_{}_{}".format(
-                    "unstable" if unstable else "stable", "-".join(prodstr)
-                )
-                print(name)
-                mpfile_single.add_data_table(identifier, subsubgroup, name)
+        _add_energy_analysis(frame, mpfile_single, identifier, compstr)
 
         print(mpfile_single)
         mpfile.concat(mpfile_single)
         break
 
+        # TODO - the code bellow is not reachable due to the break above.
+        # If it is intended, the code can be removed and make the method shorter,
+        # Otherwise, it is a bug that should be fixed.
         if not row.empty:
             print("add ΔH ...")
             exp_thermo = GetExpThermo(sample_number, plotting=False)
@@ -280,3 +243,58 @@ def run(mpfile, **kwargs):
                     inplace=True,
                 )
                 mpfile_single.add_data_table(identifier, table, name="raw")
+
+def populate_data_structures(exp_table, hdata):
+    d = RecursiveDict()
+    d["data"] = RecursiveDict()
+    compstr = hdata["pars"]["theo_compstr"]
+    row = exp_table.loc[exp_table["theo_compstr"] == compstr]
+    if not row.empty:
+        sample_number = int(row.iloc[0]["sample_number"])
+        d["pars"] = get_fit_pars(sample_number)
+        d["data"]["availability"] = "Exp+Theo"
+    else:
+        d["pars"] = RecursiveDict()
+        d["data"]["availability"] = "Theo"
+    return d,compstr,row,sample_number
+
+def _add_energy_analysis(frame, mpfile_single, identifier, compstr):
+    print("add energy analysis ...")
+    group = frame.query('compstr.str.contains("{}")'.format(compstr[:-1]))
+    group.drop(labels="compstr", axis=1, inplace=True)
+    for prodstr, subgroup in group.groupby(["prodstr", "prodstr_alt"], sort=False):
+        subgroup.drop(labels=["prodstr", "prodstr_alt"], axis=1, inplace=True)
+        for unstable, subsubgroup in subgroup.groupby("unstable", sort=False):
+            subsubgroup.drop(labels="unstable", axis=1, inplace=True)
+            name = "energy-analysis_{}_{}".format(
+                    "unstable" if unstable else "stable", "-".join(prodstr)
+                )
+            print(name)
+            mpfile_single.add_data_table(identifier, subsubgroup, name)
+
+def _build_frame(data):
+    l = [
+        dict(sdoc, parameters=doc["_id"])
+        for doc in data
+        for sdoc in doc["energy_analysis"]
+    ]
+    frame = pd.DataFrame(l)
+    parameters = frame["parameters"]
+    frame.drop(labels=["parameters"], axis=1, inplace=True)
+    frame.insert(0, "parameters", parameters)
+    print("energy dataframe:", frame.shape)
+    return frame
+
+def _load_data(mpfile, solar_perovskite):
+    input_files = mpfile.hdata.general["input_files"]
+    input_dir = os.path.dirname(solar_perovskite.__file__)
+    input_file = os.path.join(input_dir, input_files["exp"])
+    exp_table = read_csv(open(input_file, "r").read().replace(";", ","))
+    print("exp data loaded.")
+    with open(os.path.join(input_dir, input_files["theo"]), "r") as f:
+        theo_data = json.loads(f.read()).pop("collection")
+    print("theo data loaded.")
+    with open(input_files["energy"], "r") as f:
+        data = json.load(f).pop("collection")
+    print("energy data loaded.")
+    return exp_table,data

From b56e230a24be52bf342e7076e54aed6de84f88c1 Mon Sep 17 00:00:00 2001
From: evidencebp <idanamit@gmail.com>
Date: Fri, 15 Nov 2024 15:57:29 +0200
Subject: [PATCH 09/16] mpcontribs-io\mpcontribs\io\core\mpfile.py	
 too-many-branches

The method add_structure of class MPFileCore had 17 branches while pylint recommends to have no more than 12.

I extracted a small method _build_structure that builds the structure from the source while using many branches to handle the source possible types.
---
 mpcontribs-io/mpcontribs/io/core/mpfile.py | 28 ++++++++++++----------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/mpcontribs-io/mpcontribs/io/core/mpfile.py b/mpcontribs-io/mpcontribs/io/core/mpfile.py
index 990597f78..1f67ea80d 100644
--- a/mpcontribs-io/mpcontribs/io/core/mpfile.py
+++ b/mpcontribs-io/mpcontribs/io/core/mpfile.py
@@ -217,18 +217,7 @@ def add_structure(self, source, name=None, identifier=None, fmt=None):
         from pymatgen.core import Structure
         from pymatgen.ext.matproj import MPRester
 
-        if isinstance(source, Structure):
-            structure = source
-        elif isinstance(source, dict):
-            structure = Structure.from_dict(source)
-        elif os.path.exists(source):
-            structure = Structure.from_file(source, sort=True)
-        elif isinstance(source, six.string_types):
-            if fmt is None:
-                raise ValueError("Need fmt to get structure from string!")
-            structure = Structure.from_str(source, fmt, sort=True)
-        else:
-            raise ValueError(source, "not supported!")
+        structure = self._build_structure(source, fmt)
 
         if name is not None:
             if not isinstance(name, six.string_types):
@@ -276,6 +265,21 @@ def add_structure(self, source, name=None, identifier=None, fmt=None):
         )
         return identifier
 
+    def _build_structure(self, source, fmt):
+        if isinstance(source, Structure):
+            structure = source
+        elif isinstance(source, dict):
+            structure = Structure.from_dict(source)
+        elif os.path.exists(source):
+            structure = Structure.from_file(source, sort=True)
+        elif isinstance(source, six.string_types):
+            if fmt is None:
+                raise ValueError("Need fmt to get structure from string!")
+            structure = Structure.from_str(source, fmt, sort=True)
+        else:
+            raise ValueError(source, "not supported!")
+        return structure
+
     def __repr__(self):
         return self.get_string(df_head_only=True)
 

From bed45b5fa2921eab9423558fcddbef7b8a734d1a Mon Sep 17 00:00:00 2001
From: evidencebp <idanamit@gmail.com>
Date: Fri, 15 Nov 2024 16:15:41 +0200
Subject: [PATCH 10/16] 
 mpcontribs-portal\mpcontribs\users\dilute_solute_diffusion\pre_submission.py
  too-many-statements

Function run had 117 statements while pylint recommends not to have more than 50.
The function was actually structured since it had different logics for different crystal structures.
I extracted small unrelated functions for these logics.
---
 .../dilute_solute_diffusion/pre_submission.py | 295 ++++++++++--------
 1 file changed, 160 insertions(+), 135 deletions(-)

diff --git a/mpcontribs-portal/mpcontribs/users/dilute_solute_diffusion/pre_submission.py b/mpcontribs-portal/mpcontribs/users/dilute_solute_diffusion/pre_submission.py
index 045f5bd71..fcfca365e 100644
--- a/mpcontribs-portal/mpcontribs/users/dilute_solute_diffusion/pre_submission.py
+++ b/mpcontribs-portal/mpcontribs/users/dilute_solute_diffusion/pre_submission.py
@@ -22,30 +22,12 @@ def run(mpfile, hosts=None, download=False):
 
     if download or not os.path.exists(fpath):
 
-        figshare_id = 1546772
-        url = "https://api.figshare.com/v2/articles/{}".format(figshare_id)
-        print("get figshare article {}".format(figshare_id))
-        r = requests.get(url)
-        figshare = json.loads(r.content)
-        print("version =", figshare["version"])  # TODO set manually in "other"?
-
-        print("read excel from figshare into DataFrame")
-        df_dct = None
-        for d in figshare["files"]:
-            if "xlsx" in d["name"]:
-                # Dict of DataFrames is returned, with keys representing sheets
-                df_dct = read_excel(d["download_url"], sheet_name=None)
-                break
+        df_dct = download_data(fpath)
+
         if df_dct is None:
             print("no excel sheet found on figshare")
             return
 
-        print("save excel to disk")
-        writer = ExcelWriter(fpath)
-        for sheet, df in df_dct.items():
-            df.to_excel(writer, sheet)
-        writer.save()
-
     else:
         df_dct = read_excel(fpath, sheet_name=None)
 
@@ -63,33 +45,14 @@ def run(mpfile, hosts=None, download=False):
             elif isinstance(hosts, list) and not host in hosts:
                 continue
 
-        print("get mp-id for {}".format(host))
-        mpid = None
-        for doc in mpr.query(
-            criteria={"pretty_formula": host}, properties={"task_id": 1}
-        ):
-            if "decomposes_to" not in doc["sbxd"][0]:
-                mpid = doc["task_id"]
-                break
+        mpid = get_mpid(mpr, host)
         if mpid is None:
             print("mp-id for {} not found".format(host))
             continue
 
         print("add host info for {}".format(mpid))
         hdata = host_info[host].to_dict(into=RecursiveDict)
-        for k in list(hdata.keys()):
-            v = hdata.pop(k)
-            ks = k.split()
-            if ks[0] not in hdata:
-                hdata[ks[0]] = RecursiveDict()
-            unit = ks[-1][1:-1] if ks[-1].startswith("[") else ""
-            subkey = "_".join(ks[1:-1] if unit else ks[1:]).split(",")[0]
-            if subkey == "lattice_constant":
-                unit = "Å"
-            try:
-                hdata[ks[0]][subkey] = clean_value(v, unit.replace("angstrom", "Å"))
-            except ValueError:
-                hdata[ks[0]][subkey] = v
+        _add_host_info(hdata)
         hdata["formula"] = host
         df = df_dct["{}-X".format(host)]
         rows = list(isnull(df).any(1).nonzero()[0])
@@ -100,100 +63,25 @@ def run(mpfile, hosts=None, download=False):
             df.drop(rows, inplace=True)
         mpfile.add_hierarchical_data(nest_dict(hdata, ["data"]), identifier=mpid)
 
-        print("add table for D₀/Q data for {}".format(mpid))
-        df.set_index(df["Solute element number"], inplace=True)
-        df.drop("Solute element number", axis=1, inplace=True)
-        df.columns = df.iloc[0]
-        df.index.name = "index"
-        df.drop("Solute element name", inplace=True)
-        df = df.T.reset_index()
-        if str(host) == "Fe":
-            df_D0_Q = df[
-                [
-                    "Solute element name",
-                    "Solute D0, paramagnetic [cm^2/s]",
-                    "Solute Q, paramagnetic [eV]",
-                ]
-            ]
-        elif hdata["Host"]["crystal_structure"] == "HCP":
-            df_D0_Q = df[
-                [
-                    "Solute element name",
-                    "Solute D0 basal [cm^2/s]",
-                    "Solute Q basal [eV]",
-                ]
-            ]
-        else:
-            df_D0_Q = df[["Solute element name", "Solute D0 [cm^2/s]", "Solute Q [eV]"]]
-        df_D0_Q.columns = ["Solute", "D₀ [cm²/s]", "Q [eV]"]
-        anums = [z[el] for el in df_D0_Q["Solute"]]
-        df_D0_Q.insert(0, "Z", Series(anums, index=df_D0_Q.index))
-        df_D0_Q.sort_values("Z", inplace=True)
-        df_D0_Q.reset_index(drop=True, inplace=True)
-        mpfile.add_data_table(mpid, df_D0_Q, "D₀_Q")
+        df = _add_table(mpfile, host, mpid, hdata, df)
 
         if hdata["Host"]["crystal_structure"] == "BCC":
 
-            print("add table for hop activation barriers for {} (BCC)".format(mpid))
-            columns_E = (
-                ["Hop activation barrier, E_{} [eV]".format(i) for i in range(2, 5)]
-                + ["Hop activation barrier, E'_{} [eV]".format(i) for i in range(3, 5)]
-                + ["Hop activation barrier, E''_{} [eV]".format(i) for i in range(3, 5)]
-                + ["Hop activation barrier, E_{} [eV]".format(i) for i in range(5, 7)]
-            )
-            df_E = df[["Solute element name"] + columns_E]
-            df_E.columns = (
-                ["Solute"]
-                + ["E{} [eV]".format(i) for i in ["₂", "₃", "₄"]]
-                + ["E`{} [eV]".format(i) for i in ["₃", "₄"]]
-                + ["E``{} [eV]".format(i) for i in ["₃", "₄"]]
-                + ["E{} [eV]".format(i) for i in ["₅", "₆"]]
-            )
-            mpfile.add_data_table(mpid, df_E, "hop_activation_barriers")
-
-            print("add table for hop attempt frequencies for {} (BCC)".format(mpid))
-            columns_v = (
-                ["Hop attempt frequency, v_{} [THz]".format(i) for i in range(2, 5)]
-                + ["Hop attempt frequency, v'_{} [THz]".format(i) for i in range(3, 5)]
-                + ["Hop attempt frequency, v''_{} [THz]".format(i) for i in range(3, 5)]
-                + ["Hop attempt frequency, v_{} [THz]".format(i) for i in range(5, 7)]
-            )
-            df_v = df[["Solute element name"] + columns_v]
-            df_v.columns = (
-                ["Solute"]
-                + ["v{} [THz]".format(i) for i in ["₂", "₃", "₄"]]
-                + ["v`{} [THz]".format(i) for i in ["₃", "₄"]]
-                + ["v``{} [THz]".format(i) for i in ["₃", "₄"]]
-                + ["v{} [THz]".format(i) for i in ["₅", "₆"]]
-            )
-            mpfile.add_data_table(mpid, df_v, "hop_attempt_frequencies")
+            _handle_BBC(mpfile, mpid, df)
 
         elif hdata["Host"]["crystal_structure"] == "FCC":
 
-            print("add table for hop activation barriers for {} (FCC)".format(mpid))
-            columns_E = [
-                "Hop activation barrier, E_{} [eV]".format(i) for i in range(5)
-            ]
-            df_E = df[["Solute element name"] + columns_E]
-            df_E.columns = ["Solute"] + [
-                "E{} [eV]".format(i) for i in ["₀", "₁", "₂", "₃", "₄"]
-            ]
-            mpfile.add_data_table(mpid, df_E, "hop_activation_barriers")
-
-            print("add table for hop attempt frequencies for {} (FCC)".format(mpid))
-            columns_v = [
-                "Hop attempt frequency, v_{} [THz]".format(i) for i in range(5)
-            ]
-            df_v = df[["Solute element name"] + columns_v]
-            df_v.columns = ["Solute"] + [
-                "v{} [THz]".format(i) for i in ["₀", "₁", "₂", "₃", "₄"]
-            ]
-            mpfile.add_data_table(mpid, df_v, "hop_attempt_frequencies")
+            _handle_FCC(mpfile, mpid, df)
 
         elif hdata["Host"]["crystal_structure"] == "HCP":
 
-            print("add table for hop activation barriers for {} (HCP)".format(mpid))
-            columns_E = [
+            _handle_HCP(mpfile, mpid, df)
+
+    print("DONE")
+
+def _handle_HCP(mpfile, mpid, df):
+    print("add table for hop activation barriers for {} (HCP)".format(mpid))
+    columns_E = [
                 "Hop activation barrier, E_X [eV]",
                 "Hop activation barrier, E'_X [eV]",
                 "Hop activation barrier, E_a [eV]",
@@ -203,8 +91,8 @@ def run(mpfile, hosts=None, download=False):
                 "Hop activation barrier, E_c [eV]",
                 "Hop activation barrier, E'_c [eV]",
             ]
-            df_E = df[["Solute element name"] + columns_E]
-            df_E.columns = ["Solute"] + [
+    df_E = df[["Solute element name"] + columns_E]
+    df_E.columns = ["Solute"] + [
                 "Eₓ [eV]",
                 "E`ₓ [eV]",
                 "Eₐ [eV]",
@@ -214,17 +102,154 @@ def run(mpfile, hosts=None, download=False):
                 "Eꪱ [eV]",
                 "E`ꪱ [eV]",
             ]
-            mpfile.add_data_table(mpid, df_E, "hop_activation_barriers")
+    mpfile.add_data_table(mpid, df_E, "hop_activation_barriers")
 
-            print("add table for hop attempt frequencies for {} (HCP)".format(mpid))
-            columns_v = ["Hop attempt frequency, v_a [THz]"] + [
+    print("add table for hop attempt frequencies for {} (HCP)".format(mpid))
+    columns_v = ["Hop attempt frequency, v_a [THz]"] + [
                 "Hop attempt frequency, v_X [THz]"
             ]
-            df_v = df[["Solute element name"] + columns_v]
-            df_v.columns = ["Solute"] + ["vₐ [THz]"] + ["vₓ [THz]"]
-            mpfile.add_data_table(mpid, df_v, "hop_attempt_frequencies")
+    df_v = df[["Solute element name"] + columns_v]
+    df_v.columns = ["Solute"] + ["vₐ [THz]"] + ["vₓ [THz]"]
+    mpfile.add_data_table(mpid, df_v, "hop_attempt_frequencies")
 
-    print("DONE")
+def _handle_FCC(mpfile, mpid, df):
+    print("add table for hop activation barriers for {} (FCC)".format(mpid))
+    columns_E = [
+                "Hop activation barrier, E_{} [eV]".format(i) for i in range(5)
+            ]
+    df_E = df[["Solute element name"] + columns_E]
+    df_E.columns = ["Solute"] + [
+                "E{} [eV]".format(i) for i in ["₀", "₁", "₂", "₃", "₄"]
+            ]
+    mpfile.add_data_table(mpid, df_E, "hop_activation_barriers")
+
+    print("add table for hop attempt frequencies for {} (FCC)".format(mpid))
+    columns_v = [
+                "Hop attempt frequency, v_{} [THz]".format(i) for i in range(5)
+            ]
+    df_v = df[["Solute element name"] + columns_v]
+    df_v.columns = ["Solute"] + [
+                "v{} [THz]".format(i) for i in ["₀", "₁", "₂", "₃", "₄"]
+            ]
+    mpfile.add_data_table(mpid, df_v, "hop_attempt_frequencies")
+
+def _handle_BBC(mpfile, mpid, df):
+    print("add table for hop activation barriers for {} (BCC)".format(mpid))
+    columns_E = (
+                ["Hop activation barrier, E_{} [eV]".format(i) for i in range(2, 5)]
+                + ["Hop activation barrier, E'_{} [eV]".format(i) for i in range(3, 5)]
+                + ["Hop activation barrier, E''_{} [eV]".format(i) for i in range(3, 5)]
+                + ["Hop activation barrier, E_{} [eV]".format(i) for i in range(5, 7)]
+            )
+    df_E = df[["Solute element name"] + columns_E]
+    df_E.columns = (
+                ["Solute"]
+                + ["E{} [eV]".format(i) for i in ["₂", "₃", "₄"]]
+                + ["E`{} [eV]".format(i) for i in ["₃", "₄"]]
+                + ["E``{} [eV]".format(i) for i in ["₃", "₄"]]
+                + ["E{} [eV]".format(i) for i in ["₅", "₆"]]
+            )
+    mpfile.add_data_table(mpid, df_E, "hop_activation_barriers")
+
+    print("add table for hop attempt frequencies for {} (BCC)".format(mpid))
+    columns_v = (
+                ["Hop attempt frequency, v_{} [THz]".format(i) for i in range(2, 5)]
+                + ["Hop attempt frequency, v'_{} [THz]".format(i) for i in range(3, 5)]
+                + ["Hop attempt frequency, v''_{} [THz]".format(i) for i in range(3, 5)]
+                + ["Hop attempt frequency, v_{} [THz]".format(i) for i in range(5, 7)]
+            )
+    df_v = df[["Solute element name"] + columns_v]
+    df_v.columns = (
+                ["Solute"]
+                + ["v{} [THz]".format(i) for i in ["₂", "₃", "₄"]]
+                + ["v`{} [THz]".format(i) for i in ["₃", "₄"]]
+                + ["v``{} [THz]".format(i) for i in ["₃", "₄"]]
+                + ["v{} [THz]".format(i) for i in ["₅", "₆"]]
+            )
+    mpfile.add_data_table(mpid, df_v, "hop_attempt_frequencies")
+
+def _add_table(mpfile, host, mpid, hdata, df):
+    print("add table for D₀/Q data for {}".format(mpid))
+    df.set_index(df["Solute element number"], inplace=True)
+    df.drop("Solute element number", axis=1, inplace=True)
+    df.columns = df.iloc[0]
+    df.index.name = "index"
+    df.drop("Solute element name", inplace=True)
+    df = df.T.reset_index()
+    if str(host) == "Fe":
+        df_D0_Q = df[
+                [
+                    "Solute element name",
+                    "Solute D0, paramagnetic [cm^2/s]",
+                    "Solute Q, paramagnetic [eV]",
+                ]
+            ]
+    elif hdata["Host"]["crystal_structure"] == "HCP":
+        df_D0_Q = df[
+                [
+                    "Solute element name",
+                    "Solute D0 basal [cm^2/s]",
+                    "Solute Q basal [eV]",
+                ]
+            ]
+    else:
+        df_D0_Q = df[["Solute element name", "Solute D0 [cm^2/s]", "Solute Q [eV]"]]
+    df_D0_Q.columns = ["Solute", "D₀ [cm²/s]", "Q [eV]"]
+    anums = [z[el] for el in df_D0_Q["Solute"]]
+    df_D0_Q.insert(0, "Z", Series(anums, index=df_D0_Q.index))
+    df_D0_Q.sort_values("Z", inplace=True)
+    df_D0_Q.reset_index(drop=True, inplace=True)
+    mpfile.add_data_table(mpid, df_D0_Q, "D₀_Q")
+    return df
+
+def _add_host_info(hdata):
+    for k in list(hdata.keys()):
+        v = hdata.pop(k)
+        ks = k.split()
+        if ks[0] not in hdata:
+            hdata[ks[0]] = RecursiveDict()
+        unit = ks[-1][1:-1] if ks[-1].startswith("[") else ""
+        subkey = "_".join(ks[1:-1] if unit else ks[1:]).split(",")[0]
+        if subkey == "lattice_constant":
+            unit = "Å"
+        try:
+            hdata[ks[0]][subkey] = clean_value(v, unit.replace("angstrom", "Å"))
+        except ValueError:
+            hdata[ks[0]][subkey] = v
+
+def get_mpid(mpr, host):
+    print("get mp-id for {}".format(host))
+    mpid = None
+    for doc in mpr.query(
+            criteria={"pretty_formula": host}, properties={"task_id": 1}
+        ):
+        if "decomposes_to" not in doc["sbxd"][0]:
+            mpid = doc["task_id"]
+            break
+    return mpid
+
+def download_data(fpath):
+    figshare_id = 1546772
+    url = "https://api.figshare.com/v2/articles/{}".format(figshare_id)
+    print("get figshare article {}".format(figshare_id))
+    r = requests.get(url)
+    figshare = json.loads(r.content)
+    print("version =", figshare["version"])  # TODO set manually in "other"?
+
+    print("read excel from figshare into DataFrame")
+    df_dct = None
+    for d in figshare["files"]:
+        if "xlsx" in d["name"]:
+                # Dict of DataFrames is returned, with keys representing sheets
+            df_dct = read_excel(d["download_url"], sheet_name=None)
+            break
+    if df_dct is not None:
+        print("save excel to disk")
+        writer = ExcelWriter(fpath)
+        for sheet, df in df_dct.items():
+            df.to_excel(writer, sheet)
+        writer.save()
+    return df_dct
 
 
 mpfile = MPFile()

From c1ceee325b84e87f1e3efbc88be796e706b28d95 Mon Sep 17 00:00:00 2001
From: evidencebp <idanamit@gmail.com>
Date: Fri, 15 Nov 2024 17:16:19 +0200
Subject: [PATCH 11/16] mpcontribs-api\mpcontribs\api\notebooks\views.py 
 too-many-statements

The make function had 74 statement while pylint recommends to have at most 50.

I extracted small methods handling parts of the function.
---
 .../mpcontribs/api/notebooks/views.py         | 146 +++++++++++-------
 1 file changed, 87 insertions(+), 59 deletions(-)

diff --git a/mpcontribs-api/mpcontribs/api/notebooks/views.py b/mpcontribs-api/mpcontribs/api/notebooks/views.py
index e3bf88af8..19dfdc016 100644
--- a/mpcontribs-api/mpcontribs/api/notebooks/views.py
+++ b/mpcontribs-api/mpcontribs/api/notebooks/views.py
@@ -141,14 +141,7 @@ def make(projects=None, cids=None, force=False):
     start = time.perf_counter()
     remaining_time = rq.default_timeout - 5
     mask = ["id", "needs_build", "notebook"]
-    query = Q()
-
-    if projects:
-        query &= Q(project__in=projects)
-    if cids:
-        query &= Q(id__in=cids)
-    if not force:
-        query &= Q(needs_build=True) | Q(needs_build__exists=False)
+    query = _build_query(projects, cids, force)
 
     job = get_current_job()
     ret = {"input": {"projects": projects, "cids": cids, "force": force}}
@@ -181,14 +174,7 @@ def make(projects=None, cids=None, force=False):
                 not getattr(document, "needs_build", True):
             continue
 
-        if document.notebook:
-            try:
-                nb = Notebooks.objects.get(id=document.notebook.id)
-                nb.delete()
-                document.update(unset__notebook="")
-                logger.debug(f"Notebook {document.notebook.id} deleted.")
-            except DoesNotExist:
-                pass
+        _handle_document_notebook(document)
 
         cid = str(document.id)
         logger.debug(f"prep notebook for {cid} ...")
@@ -212,35 +198,11 @@ def make(projects=None, cids=None, force=False):
             ])),
         ]
 
-        if document.tables:
-            cells.append(nbf.new_markdown_cell("## Tables"))
-            for table in document.tables:
-                cells.append(
-                    nbf.new_code_cell("\n".join([
-                        f't = client.get_table("{table.id}")',
-                        't.display()'
-                    ]))
-                )
+        _handle_document_tables(document, cells)
 
-        if document.structures:
-            cells.append(nbf.new_markdown_cell("## Structures"))
-            for structure in document.structures:
-                cells.append(
-                    nbf.new_code_cell("\n".join([
-                        f's = client.get_structure("{structure.id}")',
-                        's.display()'
-                    ]))
-                )
+        _handle_document_structures(document, cells)
 
-        if document.attachments:
-            cells.append(nbf.new_markdown_cell("## Attachments"))
-            for attachment in document.attachments:
-                cells.append(
-                    nbf.new_code_cell("\n".join([
-                        f'a = client.get_attachment("{attachment.id}")',
-                        'a.info()'
-                    ]))
-                )
+        _handle_document_attachmentss(document, cells)
 
         try:
             outputs = execute_cells(cid, cells)
@@ -265,23 +227,10 @@ def make(projects=None, cids=None, force=False):
         for idx, output in outputs.items():
             cells[idx]["outputs"] = output
 
-        doc = nbf.new_notebook()
-        doc["cells"] = [
-            nbf.new_code_cell("from mpcontribs.client import Client"),
-            nbf.new_code_cell(f'client = Client()'),
-        ]
-        doc["cells"] += cells[1:]  # skip localhost Client
-
-        try:
-            nb = Notebooks(**doc).save()
-            document.update(notebook=nb, needs_build=False)
-        except Exception as e:
-            if job:
-                restart_kernels()
+        doc = _set_doc(cells)
 
-            ret["result"] = {
-                "status": "ERROR", "cid": cid, "count": count, "total": total, "exc": str(e)
-            }
+        ret = _update_doc_notebook(doc, document, job, cid, count, total)
+        if ret:
             return ret
 
         count += 1
@@ -291,3 +240,82 @@ def make(projects=None, cids=None, force=False):
 
     ret["result"] = {"status": "COMPLETED", "count": count, "total": total}
     return ret
+
+def _update_doc_notebook(doc, document, job, cid, count, total):
+    ret = None
+    try:
+        nb = Notebooks(**doc).save()
+        document.update(notebook=nb, needs_build=False)
+    except Exception as e:
+        if job:
+            restart_kernels()
+
+        ret["result"] = {
+            "status": "ERROR", "cid": cid, "count": count, "total": total, "exc": str(e)
+        }
+        return ret
+
+
+def _set_doc(cells):
+    doc = nbf.new_notebook()
+    doc["cells"] = [
+        nbf.new_code_cell("from mpcontribs.client import Client"),
+        nbf.new_code_cell(f'client = Client()'),
+    ]
+    doc["cells"] += cells[1:]  # skip localhost Client
+
+    return doc
+
+def _handle_document_notebook(document):
+    if document.notebook:
+        try:
+            nb = Notebooks.objects.get(id=document.notebook.id)
+            nb.delete()
+            document.update(unset__notebook="")
+            logger.debug(f"Notebook {document.notebook.id} deleted.")
+        except DoesNotExist:
+            pass
+
+def _handle_document_attachmentss(document, cells):
+    if document.attachments:
+        cells.append(nbf.new_markdown_cell("## Attachments"))
+        for attachment in document.attachments:
+            cells.append(
+                    nbf.new_code_cell("\n".join([
+                        f'a = client.get_attachment("{attachment.id}")',
+                        'a.info()'
+                    ]))
+                )
+
+def _handle_document_structures(document, cells):
+    if document.structures:
+        cells.append(nbf.new_markdown_cell("## Structures"))
+        for structure in document.structures:
+            cells.append(
+                    nbf.new_code_cell("\n".join([
+                        f's = client.get_structure("{structure.id}")',
+                        's.display()'
+                    ]))
+                )
+
+def _handle_document_tables(document, cells):
+    if document.tables:
+        cells.append(nbf.new_markdown_cell("## Tables"))
+        for table in document.tables:
+            cells.append(
+                    nbf.new_code_cell("\n".join([
+                        f't = client.get_table("{table.id}")',
+                        't.display()'
+                    ]))
+                )
+
+def _build_query(projects, cids, force):
+    query = Q()
+
+    if projects:
+        query &= Q(project__in=projects)
+    if cids:
+        query &= Q(id__in=cids)
+    if not force:
+        query &= Q(needs_build=True) | Q(needs_build__exists=False)
+    return query

From 5c5b7505ac3729b776a8198c3a5cfa22e08e5814 Mon Sep 17 00:00:00 2001
From: evidencebp <idanamit@gmail.com>
Date: Fri, 15 Nov 2024 17:41:28 +0200
Subject: [PATCH 12/16] mpcontribs-api\mpcontribs\api\contributions\document.py
 broad-exception-caught

Catching Exception instead of specific exceptions might hide unexpected ones.
Function format_cell caught Exception.
isnan does not raise exceptions
https://docs.python.org/3/library/math.html#math.isnan
str might lead to UnicodeEncodeError
https://stackoverflow.com/questions/33556465/is-there-any-object-can-make-str-function-throw-error-or-exception-in-python
---
 mpcontribs-api/mpcontribs/api/contributions/document.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mpcontribs-api/mpcontribs/api/contributions/document.py b/mpcontribs-api/mpcontribs/api/contributions/document.py
index 55c9e204b..d46206196 100644
--- a/mpcontribs-api/mpcontribs/api/contributions/document.py
+++ b/mpcontribs-api/mpcontribs/api/contributions/document.py
@@ -74,7 +74,7 @@ def format_cell(cell):
     q = truncate_digits(q)
     try:
         return str(q.nominal_value) if isnan(q.std_dev) else str(q)
-    except Exception:
+    except UnicodeEncodeError:
         return cell
 
 

From ae585188468ab18f03950987b892c694e338d624 Mon Sep 17 00:00:00 2001
From: evidencebp <idanamit@gmail.com>
Date: Sat, 16 Nov 2024 15:55:34 +0200
Subject: [PATCH 13/16] mpcontribs-io\mpcontribs\io\core\components\tdata.py
 broad-exception-caught

Method to_backgrid_dict of class table catches Exception.
Catching Exception instead of specific exceptions might hide unexpected ones.

I changed to catching AttributeError, as in the StackOverflow post in the comment above.
---
 mpcontribs-io/mpcontribs/io/core/components/tdata.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mpcontribs-io/mpcontribs/io/core/components/tdata.py b/mpcontribs-io/mpcontribs/io/core/components/tdata.py
index 17293619a..c0dfc5d20 100644
--- a/mpcontribs-io/mpcontribs/io/core/components/tdata.py
+++ b/mpcontribs-io/mpcontribs/io/core/components/tdata.py
@@ -112,7 +112,7 @@ def to_backgrid_dict(self):
                                 if not all([result.scheme, result.netloc, result.path]):
                                     break
                                 is_url_column = True
-                            except Exception:
+                            except AttributeError:
                                 break
 
                 cell_type = "uri" if is_url_column else "string"

From 3789a49cbb4a2e127b8be7a8c8e30bac74f1ecfa Mon Sep 17 00:00:00 2001
From: evidencebp <idanamit@gmail.com>
Date: Sat, 16 Nov 2024 17:38:28 +0200
Subject: [PATCH 14/16] mpcontribs-api\mpcontribs\api\core.py too-many-branches
 get_specs

Function get_specs had 19 branches while pylint suggest not to have more than 12.
I extracted method for populating the data structures and handling the different method names.
---
 mpcontribs-api/mpcontribs/api/core.py | 278 ++++++++++++++------------
 1 file changed, 153 insertions(+), 125 deletions(-)

diff --git a/mpcontribs-api/mpcontribs/api/core.py b/mpcontribs-api/mpcontribs/api/core.py
index 6524d099f..647475fb6 100644
--- a/mpcontribs-api/mpcontribs/api/core.py
+++ b/mpcontribs-api/mpcontribs/api/core.py
@@ -120,140 +120,26 @@ def get_specs(klass, method, collection):
             "description": description,
         }
 
-    field_pagination_params = []
-    for field, limits in klass.resource.fields_to_paginate.items():
-        field_pagination_params.append(
-            {
-                "name": f"{field}_page",
-                "in": "query",
-                "default": 1,
-                "type": "integer",
-                "description": f"page to retrieve for {field} field",
-            }
-        )
-        field_pagination_params.append(
-            {
-                "name": f"{field}_per_page",
-                "in": "query",
-                "default": limits[0],
-                "maximum": limits[1],
-                "type": "integer",
-                "description": f"number of items to retrieve per page for {field} field",
-            }
-        )
+    field_pagination_params = _populate_field_pagination_params(klass)
 
-    filter_params = []
-    if hasattr(klass.resource, "filters"):
-        for k, v in klass.resource.filters.items():
-            filter_params += get_filter_params(k, v)
+    filter_params = _populate_filter_params(klass)
 
-    order_params = []
-    if klass.resource.allowed_ordering:
-        allowed_ordering = [
-            o.pattern if isinstance(o, Pattern) else o
-            for o in klass.resource.allowed_ordering
-        ]
-        order_params = [
-            {
-                "name": "_sort",
-                "in": "query",
-                "type": "string",
-                "description": f"sort {collection} via {allowed_ordering}. Prepend +/- for asc/desc.",
-            }
-        ]
+    order_params = _populate_order_params(klass, collection)
 
     spec = None
     if method_name == "Fetch":
-        params = [
-            {
-                "name": "pk",
-                "in": "path",
-                "type": "string",
-                "required": True,
-                "description": f"{collection[:-1]} (primary key)",
-            }
-        ]
-        if fields_param is not None:
-            params.append(fields_param)
-        params += field_pagination_params
-        spec = {
-            "summary": f"Retrieve a {collection[:-1]}.",
-            "operationId": f"get{doc_name}By{id_field}",
-            "parameters": params,
-            "responses": {
-                200: {
-                    "description": f"single {collection} entry",
-                    "schema": {"$ref": f"#/definitions/{klass.schema_name}"},
-                },
-                "default": default_response,
-            },
-        }
+        _handle_fetch_method(klass, collection, default_response
+                             , id_field, doc_name, fields_param, field_pagination_params)
 
     elif method_name == "BulkFetch":
-        params = [fields_param] if fields_param is not None else []
-        params += field_pagination_params
-        params += order_params
-        params += filter_params
-        schema_props = {
-            "data": {
-                "type": "array",
-                "items": {"$ref": f"#/definitions/{klass.schema_name}"},
-            }
-        }
-        if klass.resource.paginate:
-            schema_props["has_more"] = {"type": "boolean"}
-            schema_props["total_count"] = {"type": "integer"}
-            schema_props["total_pages"] = {"type": "integer"}
-            params += get_limit_params(klass.resource, method_name)
-        spec = {
-            "summary": f"Filter and retrieve {collection}.",
-            "operationId": f"query{doc_name}s",
-            "parameters": params,
-            "responses": {
-                200: {
-                    "description": f"list of {collection}",
-                    "schema": {"type": "object", "properties": schema_props},
-                },
-                "default": default_response,
-            },
-        }
+        _handle_bulk_fetch_method(klass, collection, method_name, default_response
+                                  , doc_name, fields_param, field_pagination_params
+                                  , filter_params, order_params)
 
     elif method_name == "Download":
-        params = [
-            {
-                "name": "short_mime",
-                "in": "path",
-                "type": "string",
-                "required": True,
-                "description": "MIME Download Type: gz",
-                "default": "gz",
-            },
-            {
-                "name": "format",
-                "in": "query",
-                "type": "string",
-                "required": True,
-                "description": f"download {collection} in different formats: {klass.resource.download_formats}",
-            },
-        ]
-        params += [fields_param] if fields_param is not None else []
-        params += order_params
-        params += filter_params
-        if klass.resource.paginate:
-            params += get_limit_params(klass.resource, method_name)
-        spec = {
-            "summary": f"Filter and download {collection}.",
-            "operationId": f"download{doc_name}s",
-            "parameters": params,
-            "produces": ["application/gzip"],
-            "responses": {
-                200: {
-                    "description": f"{collection} download",
-                    "schema": {"type": "file"},
-                },
-                "default": default_response,
-            },
-        }
+        _handle_downlaod_method(klass, collection, method_name
+                                , default_response, doc_name, fields_param
+                                , filter_params, order_params)
 
     elif method_name == "Create":
         spec = {
@@ -407,6 +293,148 @@ def get_specs(klass, method, collection):
 
     return spec
 
+def _handle_downlaod_method(klass, collection, method_name, default_response, doc_name, fields_param, filter_params, order_params):
+    params = [
+            {
+                "name": "short_mime",
+                "in": "path",
+                "type": "string",
+                "required": True,
+                "description": "MIME Download Type: gz",
+                "default": "gz",
+            },
+            {
+                "name": "format",
+                "in": "query",
+                "type": "string",
+                "required": True,
+                "description": f"download {collection} in different formats: {klass.resource.download_formats}",
+            },
+        ]
+    params += [fields_param] if fields_param is not None else []
+    params += order_params
+    params += filter_params
+    if klass.resource.paginate:
+        params += get_limit_params(klass.resource, method_name)
+    spec = {
+            "summary": f"Filter and download {collection}.",
+            "operationId": f"download{doc_name}s",
+            "parameters": params,
+            "produces": ["application/gzip"],
+            "responses": {
+                200: {
+                    "description": f"{collection} download",
+                    "schema": {"type": "file"},
+                },
+                "default": default_response,
+            },
+        }
+
+def _handle_bulk_fetch_method(klass, collection, method_name, default_response, doc_name, fields_param, field_pagination_params, filter_params, order_params):
+    params = [fields_param] if fields_param is not None else []
+    params += field_pagination_params
+    params += order_params
+    params += filter_params
+    schema_props = {
+            "data": {
+                "type": "array",
+                "items": {"$ref": f"#/definitions/{klass.schema_name}"},
+            }
+        }
+    if klass.resource.paginate:
+        schema_props["has_more"] = {"type": "boolean"}
+        schema_props["total_count"] = {"type": "integer"}
+        schema_props["total_pages"] = {"type": "integer"}
+        params += get_limit_params(klass.resource, method_name)
+    spec = {
+            "summary": f"Filter and retrieve {collection}.",
+            "operationId": f"query{doc_name}s",
+            "parameters": params,
+            "responses": {
+                200: {
+                    "description": f"list of {collection}",
+                    "schema": {"type": "object", "properties": schema_props},
+                },
+                "default": default_response,
+            },
+        }
+
+def _handle_fetch_method(klass, collection, default_response, id_field, doc_name, fields_param, field_pagination_params):
+    params = [
+            {
+                "name": "pk",
+                "in": "path",
+                "type": "string",
+                "required": True,
+                "description": f"{collection[:-1]} (primary key)",
+            }
+        ]
+    if fields_param is not None:
+        params.append(fields_param)
+    params += field_pagination_params
+    spec = {
+            "summary": f"Retrieve a {collection[:-1]}.",
+            "operationId": f"get{doc_name}By{id_field}",
+            "parameters": params,
+            "responses": {
+                200: {
+                    "description": f"single {collection} entry",
+                    "schema": {"$ref": f"#/definitions/{klass.schema_name}"},
+                },
+                "default": default_response,
+            },
+        }
+
+def _populate_order_params(klass, collection):
+    order_params = []
+    if klass.resource.allowed_ordering:
+        allowed_ordering = [
+            o.pattern if isinstance(o, Pattern) else o
+            for o in klass.resource.allowed_ordering
+        ]
+        order_params = [
+            {
+                "name": "_sort",
+                "in": "query",
+                "type": "string",
+                "description": f"sort {collection} via {allowed_ordering}. Prepend +/- for asc/desc.",
+            }
+        ]
+        
+    return order_params
+
+def _populate_filter_params(klass):
+    filter_params = []
+    if hasattr(klass.resource, "filters"):
+        for k, v in klass.resource.filters.items():
+            filter_params += get_filter_params(k, v)
+    return filter_params
+
+def _populate_field_pagination_params(klass):
+    field_pagination_params = []
+    for field, limits in klass.resource.fields_to_paginate.items():
+        field_pagination_params.append(
+            {
+                "name": f"{field}_page",
+                "in": "query",
+                "default": 1,
+                "type": "integer",
+                "description": f"page to retrieve for {field} field",
+            }
+        )
+        field_pagination_params.append(
+            {
+                "name": f"{field}_per_page",
+                "in": "query",
+                "default": limits[0],
+                "maximum": limits[1],
+                "type": "integer",
+                "description": f"number of items to retrieve per page for {field} field",
+            }
+        )
+        
+    return field_pagination_params
+
 
 class SwaggerView(OriginalSwaggerView, ResourceView):
     """A class-based view defining additional methods"""

From 6eadee33ad618ba96c750c1bdd7e1978ca060cc7 Mon Sep 17 00:00:00 2001
From: evidencebp <idanamit@gmail.com>
Date: Sat, 16 Nov 2024 17:39:36 +0200
Subject: [PATCH 15/16] More of prior commit

---
 mpcontribs-api/mpcontribs/api/core.py | 98 ++++++++++++++-------------
 1 file changed, 52 insertions(+), 46 deletions(-)

diff --git a/mpcontribs-api/mpcontribs/api/core.py b/mpcontribs-api/mpcontribs/api/core.py
index 647475fb6..8ada428a1 100644
--- a/mpcontribs-api/mpcontribs/api/core.py
+++ b/mpcontribs-api/mpcontribs/api/core.py
@@ -223,54 +223,10 @@ def get_specs(klass, method, collection):
             },
         }
     elif method_name == "BulkUpdate":
-        params = filter_params
-        params.append(
-            {
-                "name": f"{collection}",
-                "in": "body",
-                "description": f"The object to use for {collection} bulk update",
-                "schema": {"type": "object"},
-            }
-        )
-        schema_props = {"count": {"type": "integer"}}
-        if klass.resource.paginate:
-            schema_props["has_more"] = {"type": "boolean"}
-            schema_props["total_count"] = {"type": "integer"}
-            schema_props["total_pages"] = {"type": "integer"}
-            params += get_limit_params(klass.resource, method_name)
-        spec = {
-            "summary": f"Filter and update {collection}.",
-            "operationId": f"update{doc_name}s",
-            "parameters": params,
-            "responses": {
-                200: {
-                    "description": f"Number of {collection} updated",
-                    "schema": {"type": "object", "properties": schema_props},
-                },
-                "default": default_response,
-            },
-        }
+        _handle_bulk_update_method(klass, collection, method_name, default_response, doc_name, filter_params)
 
     elif method_name == "BulkDelete":
-        params = filter_params
-        schema_props = {"count": {"type": "integer"}}
-        if klass.resource.paginate:
-            schema_props["has_more"] = {"type": "boolean"}
-            schema_props["total_count"] = {"type": "integer"}
-            schema_props["total_pages"] = {"type": "integer"}
-            params += get_limit_params(klass.resource, method_name)
-        spec = {
-            "summary": f"Filter and delete {collection}.",
-            "operationId": f"delete{doc_name}s",
-            "parameters": params,
-            "responses": {
-                200: {
-                    "description": f"Number of {collection} deleted",
-                    "schema": {"type": "object", "properties": schema_props},
-                },
-                "default": default_response,
-            },
-        }
+        _handle_bulk_delete_method(klass, collection, method_name, default_response, doc_name, filter_params)
 
     elif method_name == "Delete":
         spec = {
@@ -293,6 +249,56 @@ def get_specs(klass, method, collection):
 
     return spec
 
+def _handle_bulk_delete_method(klass, collection, method_name, default_response, doc_name, filter_params):
+    params = filter_params
+    schema_props = {"count": {"type": "integer"}}
+    if klass.resource.paginate:
+        schema_props["has_more"] = {"type": "boolean"}
+        schema_props["total_count"] = {"type": "integer"}
+        schema_props["total_pages"] = {"type": "integer"}
+        params += get_limit_params(klass.resource, method_name)
+    spec = {
+            "summary": f"Filter and delete {collection}.",
+            "operationId": f"delete{doc_name}s",
+            "parameters": params,
+            "responses": {
+                200: {
+                    "description": f"Number of {collection} deleted",
+                    "schema": {"type": "object", "properties": schema_props},
+                },
+                "default": default_response,
+            },
+        }
+
+def _handle_bulk_update_method(klass, collection, method_name, default_response, doc_name, filter_params):
+    params = filter_params
+    params.append(
+            {
+                "name": f"{collection}",
+                "in": "body",
+                "description": f"The object to use for {collection} bulk update",
+                "schema": {"type": "object"},
+            }
+        )
+    schema_props = {"count": {"type": "integer"}}
+    if klass.resource.paginate:
+        schema_props["has_more"] = {"type": "boolean"}
+        schema_props["total_count"] = {"type": "integer"}
+        schema_props["total_pages"] = {"type": "integer"}
+        params += get_limit_params(klass.resource, method_name)
+    spec = {
+            "summary": f"Filter and update {collection}.",
+            "operationId": f"update{doc_name}s",
+            "parameters": params,
+            "responses": {
+                200: {
+                    "description": f"Number of {collection} updated",
+                    "schema": {"type": "object", "properties": schema_props},
+                },
+                "default": default_response,
+            },
+        }
+
 def _handle_downlaod_method(klass, collection, method_name, default_response, doc_name, fields_param, filter_params, order_params):
     params = [
             {

From 76d7052aae43e62fddcd609b3284c57e95eb33ce Mon Sep 17 00:00:00 2001
From: evidencebp <idanamit@gmail.com>
Date: Sun, 17 Nov 2024 09:28:47 +0200
Subject: [PATCH 16/16] mpcontribs-api\mpcontribs\api\core.py too-many-branches
 has_read_permission

Method has_read_permission of class SwaggerView had 20 branches while pylint recommends to have no more than 12.
I extracted method to reduce that, making the code more structured.
This fixed also the too-many-return-statements and too-many-statements alerts in the same method.
---
 mpcontribs-api/mpcontribs/api/core.py | 101 +++++++++++++++-----------
 1 file changed, 60 insertions(+), 41 deletions(-)

diff --git a/mpcontribs-api/mpcontribs/api/core.py b/mpcontribs-api/mpcontribs/api/core.py
index 8ada428a1..7d78a3e19 100644
--- a/mpcontribs-api/mpcontribs/api/core.py
+++ b/mpcontribs-api/mpcontribs/api/core.py
@@ -574,17 +574,11 @@ def has_read_permission(self, request, qs):
         approved_public_filter = Q(is_public=True, is_approved=True)
 
         if request.path.startswith("/projects/"):
-            # external or internal requests can both read full project info
-            # anonymous requests can only read public approved projects
-            if is_anonymous:
-                return qs.filter(approved_public_filter)
-
-            # authenticated requests can read approved public or accessible non-public projects
-            qfilter = approved_public_filter | Q(owner=username)
-            if groups:
-                qfilter |= Q(name__in=list(groups))
-
-            return qs.filter(qfilter)
+            return self._handle_projects(is_anonymous
+                                         , qs
+                                         , approved_public_filter
+                                         , username
+                                         , groups)
         else:
             # contributions are set private/public independent from projects
             # anonymous requests:
@@ -596,32 +590,11 @@ def has_read_permission(self, request, qs):
             component = request.path.split("/")[1]
 
             if component == "contributions":
-                q = qs._query
-                if is_anonymous and is_external:
-                    qs = qs.exclude("data")
-
-                if q and "project" in q and isinstance(q["project"], str):
-                    projects = self.get_projects()
-                    try:
-                        project = projects.get(name=q["project"])
-                    except DoesNotExist:
-                        return qs.none()
-
-                    if project.owner == username or project.name in groups:
-                        return qs
-                    elif project.is_public and project.is_approved:
-                        return qs.filter(is_public=True)
-                    else:
-                        return qs.none()
-                else:
-                    names = None
-                    if q and "project" in q and "$in" in q["project"]:
-                        names = q.pop("project").pop("$in")
-
-                    qfilter = self.get_projects_filter(
-                        username, groups, filter_names=names
-                    )
-                    return qs.filter(qfilter)
+                return self._handle_contributions(qs
+                                                  , is_anonymous
+                                                  , is_external
+                                                  , username
+                                                  , groups)
             else:
                 # get component Object IDs for queryset
                 pk = request.view_args.get("pk")
@@ -630,10 +603,7 @@ def has_read_permission(self, request, qs):
                 resource = get_resource(component)
                 qfilter = lambda qs: qs.clone()
 
-                if pk:
-                    ids = [resource.get_object(pk, qfilter=qfilter).id]
-                else:
-                    ids = [o.id for o in resource.get_objects(qfilter=qfilter)[0]]
+                ids = self._set_ids(qfilter, pk, resource)
 
                 if not ids:
                     return qs.none()
@@ -664,6 +634,55 @@ def has_read_permission(self, request, qs):
 
                 return qs
 
+    def _set_ids(self, qfilter, pk, resource):
+        if pk:
+            ids = [resource.get_object(pk, qfilter=qfilter).id]
+        else:
+            ids = [o.id for o in resource.get_objects(qfilter=qfilter)[0]]
+        return ids
+
+
+    def _handle_projects(self, is_anonymous, qs, approved_public_filter, username, groups):
+        # external or internal requests can both read full project info
+        # anonymous requests can only read public approved projects
+        if is_anonymous:
+            return qs.filter(approved_public_filter)
+
+        # authenticated requests can read approved public or accessible non-public projects
+        qfilter = approved_public_filter | Q(owner=username)
+        if groups:
+            qfilter |= Q(name__in=list(groups))
+
+        return qs.filter(qfilter)
+    
+    def _handle_contributions(self, qs, is_anonymous, is_external, username, groups):
+        q = qs._query
+        if is_anonymous and is_external:
+            qs = qs.exclude("data")
+
+        if q and "project" in q and isinstance(q["project"], str):
+            projects = self.get_projects()
+            try:
+                project = projects.get(name=q["project"])
+            except DoesNotExist:
+                return qs.none()
+
+            if project.owner == username or project.name in groups:
+                return qs
+            elif project.is_public and project.is_approved:
+                return qs.filter(is_public=True)
+            else:
+                return qs.none()
+        else:
+            names = None
+            if q and "project" in q and "$in" in q["project"]:
+                names = q.pop("project").pop("$in")
+
+            qfilter = self.get_projects_filter(
+                username, groups, filter_names=names
+            )
+            return qs.filter(qfilter)
+
     def has_add_permission(self, request, obj):
         return self.is_admin_or_project_user(request, obj)