Skip to content

Commit

Permalink
Use raw strings to avoid 'invalid escape sequence' warnings in regex
Browse files Browse the repository at this point in the history
  • Loading branch information
joshua-hampton committed Jan 10, 2024
1 parent 4fbcf80 commit 0aba95d
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 18 deletions.
4 changes: 2 additions & 2 deletions checksit/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

# date formate regex
# could be yyyy, yyyymm, yyyymmdd, yyyymmdd-HH, yyyymmdd-HHMM, yyyymmdd-HHMMSS
date_regex = re.compile("^\d{4}$|^\d{6}$|^\d{8}$|^\d{8}-\d{2}$|^\d{8}-\d{4}$|^\d{8}-\d{6}$")
date_regex = re.compile(r"^\d{4}$|^\d{6}$|^\d{8}$|^\d{8}-\d{2}$|^\d{8}-\d{4}$|^\d{8}-\d{6}$")

def _get_bounds_var_ids(dct):
return [var_id for var_id in dct["variables"] if (
Expand Down Expand Up @@ -313,7 +313,7 @@ def check_file_name(file_name, vocab_checks=None, **kwargs):

# check version number format
version_component = file_name_parts[-1].split(".nc")[0]
if not re.match("^v\d.\d$", version_component):
if not re.match(r"^v\d.\d$", version_component):
errors.append(f"[file name]: Invalid file name format - incorrect file version number {version_component}")

# check number of options - max length of splitted file name
Expand Down
22 changes: 11 additions & 11 deletions checksit/make_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,14 @@ def make_amof_specs(version_number):
elif compliance.lower() == "valid url _or_ n/a":
rule = "regex-rule:valid-url-or-na"
elif "match: " in compliance.lower():
if 'YYYY-MM-DDThh:mm:ss\.\d+ _or_ N/A' in compliance:
if r'YYYY-MM-DDThh:mm:ss\.\d+ _or_ N/A' in compliance:
rule = "regex-rule:datetime-or-na"
elif 'vN.M' in compliance:
rule = "regex-rule:match:vN.M"
elif 'YYYY-MM-DDThh:mm:ss\.\d+' in compliance:
elif r'YYYY-MM-DDThh:mm:ss\.\d+' in compliance:
rule = "regex-rule:datetime"
elif '<number> m' in compliance:
rule = "regex:^-?\d+\.?\d* m$"
rule = r"regex:^-?\d+\.?\d* m$"
else:
rule = f"regex-rule:EDIT:{compliance}"
elif compliance.lower() in ["number","integer","int","float","string","str"]:
Expand Down Expand Up @@ -88,9 +88,9 @@ def make_amof_specs(version_number):
rule = f"rule-func:match-one-of:{options}"
else:
rule = f"UNKNOWN compliance: {compliance}"
rule = rule.replace('(','\(')
rule = rule.replace(')','\)')
rule = [ rule.replace(' ','\s') if "regex:" in rule else rule ][0]
rule = rule.replace('(',r'\(')
rule = rule.replace(')',r'\)')
rule = [ rule.replace(' ',r'\s') if "regex:" in rule else rule ][0]
attr_rules[attr] = rule


Expand Down Expand Up @@ -205,11 +205,11 @@ def make_amof_specs(version_number):
elif compliance.lower() == "valid url _or_ n/a":
rule = "regex-rule:valid-url-or-na"
elif "match: " in compliance.lower():
if 'YYYY-MM-DDThh:mm:ss\.\d+ _or_ N/A' in compliance:
if r'YYYY-MM-DDThh:mm:ss\.\d+ _or_ N/A' in compliance:
rule = "regex-rule:datetime-or-na"
elif 'vN.M' in compliance:
rule = "regex-rule:match:vN.M"
elif 'YYYY-MM-DDThh:mm:ss\.\d+' in compliance:
elif r'YYYY-MM-DDThh:mm:ss\.\d+' in compliance:
rule = "regex-rule:datetime"
else:
rule = f"regex-rule:EDIT:{compliance}"
Expand All @@ -232,9 +232,9 @@ def make_amof_specs(version_number):
rule = f"rule-func:match-one-of:{options}"
else:
rule = f"UNKNOWN compliance: {compliance}"
rule = rule.replace('(','\(')
rule = rule.replace(')','\)')
rule = [ rule.replace(' ','\s') if "regex:" in rule else rule ][0]
rule = rule.replace('(',r'\(')
rule = rule.replace(')',r'\)')
rule = [ rule.replace(' ',r'\s') if "regex:" in rule else rule ][0]
attr_rules[attr] = rule
prod_attrs_exist = True
else:
Expand Down
2 changes: 1 addition & 1 deletion checksit/readers/cdl.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def _get_sections(self, lines, split_patterns, start_at):
if split_patterns:
splitter = split_patterns.popleft()
else:
line_no_comments = re.split(";\s+//.*$", line)[0].strip().rstrip(";").strip()
line_no_comments = re.split(r";\s+//.*$", line)[0].strip().rstrip(";").strip()
if not line_no_comments.startswith("//"):
current.append(line_no_comments)

Expand Down
2 changes: 1 addition & 1 deletion checksit/rules/rule_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def string_of_length(value, context, extras=None, label=""):
Matches string of length...
"""
spec = extras[0]
min_length = int(re.match("^(\d+)\+?", spec).groups()[0])
min_length = int(re.match(r"^(\d+)\+?", spec).groups()[0])

errors = []

Expand Down
4 changes: 2 additions & 2 deletions checksit/rules/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ def __init__(self):
"valid-url": r"https?://[^\s]+\.[^\s]*[^\s\.](/[^\s]+)?",
"valid-url-or-na": r"(https?://[^\s]+\.[^\s]*[^\s\.](/[^\s]+))|" + _NOT_APPLICABLE_RULES,
"match:vN.M": r"v\d\.\d",
"datetime": "\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?",
"datetime": r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?",
"datetime-or-na":
"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?)|" + _NOT_APPLICABLE_RULES,
r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?)|" + _NOT_APPLICABLE_RULES,
"number": r"-?\d+(\.\d+)?"
}

Expand Down
2 changes: 1 addition & 1 deletion checksit/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def summarise(log_files=None, log_directory=None, show_files=False,

for filename in log_files:
df = pd.read_csv(filename, sep=sep, index_col=None, header=None, names=headers)
df = df.replace({"^\s*|\s*$":""}, regex=True)
df = df.replace({r"^\s*|\s*$":""}, regex=True)
df["logfile"] = os.path.basename(filename)
count += len(df)
li.append(df)
Expand Down

0 comments on commit 0aba95d

Please sign in to comment.