From 9f61bdbc73e5801794b87e2a1b3c575a0f7d0760 Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Thu, 11 Jan 2024 21:30:09 +0000 Subject: [PATCH 01/20] allow choice to be non-integer --- reproschema/redcap2reproschema.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 16b80c7..6535334 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -53,15 +53,22 @@ def parse_field_type_and_value(data, input_type_map): return input_type, value_type - def process_choices(choices_str): choices = [] for choice in choices_str.split("|"): parts = choice.split(", ") - choice_obj = {"schema:value": int(parts[0]), "schema:name": parts[1]} + # Try to convert the first part to an integer, if it fails, keep it as a string + try: + value = int(parts[0]) + except ValueError: + value = parts[0] + + choice_obj = {"schema:value": value, "schema:name": parts[1]} + if len(parts) == 3: # TODO: handle image url choice_obj["schema:image"] = f"{parts[2]}.png" + choices.append(choice_obj) return choices From 87073fcf5720ed5f2c06a98e828a086ed927cc32 Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Thu, 11 Jan 2024 23:36:30 +0000 Subject: [PATCH 02/20] add yesno fieldtype conversion --- reproschema/redcap2reproschema.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 6535334..909b554 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -33,7 +33,6 @@ def process_visibility(data): } return visibility_obj - def parse_field_type_and_value(data, input_type_map): field_type = data.get("Field Type", "") @@ -53,10 +52,17 @@ def parse_field_type_and_value(data, input_type_map): return input_type, value_type -def process_choices(choices_str): +def process_choices(field_type, choices_str): + if field_type not in ['radio', 'dropdown']: # Handle only radio and dropdown types + return None + choices = [] for choice in choices_str.split("|"): parts = choice.split(", ") + if len(parts) < 2: + print(f"Warning: Skipping invalid choice format '{choice}' in a {field_type} field") + continue + # Try to convert the first part to an integer, if it fails, keep it as a string try: value = int(parts[0]) @@ -64,15 +70,12 @@ def process_choices(choices_str): value = parts[0] choice_obj = {"schema:value": value, "schema:name": parts[1]} - if len(parts) == 3: - # TODO: handle image url + # Handle image url choice_obj["schema:image"] = f"{parts[2]}.png" - choices.append(choice_obj) return choices - def write_to_file(abs_folder_path, form_name, field_name, rowData): file_path = os.path.join( f"{abs_folder_path}", "activities", form_name, "items", f"{field_name}" @@ -131,6 +134,15 @@ def process_row( if value_type: rowData["responseOptions"] = {"valueType": value_type} + if field_type == "yesno": + rowData["responseOptions"] = { + "valueType": "xsd:boolean", + "choices": [ + {"schema:value": 1, "schema:name": "Yes"}, + {"schema:value": 0, "schema:name": "No"} + ] + } + for key, value in field.items(): if schema_map.get(key) == "allow" and value: rowData.setdefault("ui", {}).update({schema_map[key]: value.split(", ")}) From eee664ac0442849050863b53f1af18dc7051ba68 Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Thu, 11 Jan 2024 23:39:40 +0000 Subject: [PATCH 03/20] fix missing argument in process_choice --- reproschema/redcap2reproschema.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 909b554..a28e261 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -158,10 +158,12 @@ def process_row( rowData.setdefault("responseOptions", {}).update({schema_map[key]: value}) elif schema_map.get(key) == "choices" and value: + # Pass both field_type and value to process_choices rowData.setdefault("responseOptions", {}).update( - {"choices": process_choices(value)} + {"choices": process_choices(field_type, value)} ) + elif schema_map.get(key) == "scoringLogic" and value: condition = normalize_condition(value) rowData.setdefault("ui", {}).update({"hidden": True}) From 2491c475009fd475640f947ab9275490c71aaf38 Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Fri, 12 Jan 2024 00:09:02 +0000 Subject: [PATCH 04/20] fix language parse --- reproschema/redcap2reproschema.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index a28e261..a9b2426 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -100,10 +100,10 @@ def parse_html(input_string, default_language="en"): text = element.get_text(strip=True) if text: result[lang] = text - if not result: + if not result: # If no text was extracted result[default_language] = soup.get_text(strip=True) else: - result[default_language] = input_string + result[default_language] = soup.get_text(strip=True) # Use the entire text as default language text return result From d6685a2c10ea5a96bc1d4829c42ba675a69491ba Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Fri, 12 Jan 2024 00:28:19 +0000 Subject: [PATCH 05/20] add print --- reproschema/redcap2reproschema.py | 1 + 1 file changed, 1 insertion(+) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index a9b2426..6beb731 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -181,6 +181,7 @@ def process_row( ) elif key in ["question", "schema:description", "preamble"] and value: + print(key, value) rowData.update({schema_map[key]: parse_html(value)}) elif key == "Identifier?" and value: From b95822f7caf3e748afd648425283ff7745eec3c9 Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Fri, 12 Jan 2024 00:30:38 +0000 Subject: [PATCH 06/20] add new test print --- reproschema/redcap2reproschema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 6beb731..f6f06a2 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -144,6 +144,7 @@ def process_row( } for key, value in field.items(): + print(f"Key: {key}, Value: {value}") if schema_map.get(key) == "allow" and value: rowData.setdefault("ui", {}).update({schema_map[key]: value.split(", ")}) @@ -181,7 +182,6 @@ def process_row( ) elif key in ["question", "schema:description", "preamble"] and value: - print(key, value) rowData.update({schema_map[key]: parse_html(value)}) elif key == "Identifier?" and value: From 34b7b99e1235e4741be13b11d6d2c42a4a652ee4 Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Fri, 12 Jan 2024 00:34:47 +0000 Subject: [PATCH 07/20] another print test --- reproschema/redcap2reproschema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index f6f06a2..1f5329c 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -144,7 +144,7 @@ def process_row( } for key, value in field.items(): - print(f"Key: {key}, Value: {value}") + print(f"Schema Map Key: {schema_map.get(key)}, Original Key: {key}, Value: {value}") if schema_map.get(key) == "allow" and value: rowData.setdefault("ui", {}).update({schema_map[key]: value.split(", ")}) From 5b53e0726e179cda9a5481353981fd3f96b2ec5f Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Fri, 12 Jan 2024 00:38:59 +0000 Subject: [PATCH 08/20] fix schema_map.get(key) error --- reproschema/redcap2reproschema.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 1f5329c..349a067 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -144,7 +144,6 @@ def process_row( } for key, value in field.items(): - print(f"Schema Map Key: {schema_map.get(key)}, Original Key: {key}, Value: {value}") if schema_map.get(key) == "allow" and value: rowData.setdefault("ui", {}).update({schema_map[key]: value.split(", ")}) @@ -181,7 +180,7 @@ def process_row( {"variableName": field["Variable / Field Name"], "isVis": condition} ) - elif key in ["question", "schema:description", "preamble"] and value: + elif schema_map.get(key) in ["question", "schema:description", "preamble"] and value: rowData.update({schema_map[key]: parse_html(value)}) elif key == "Identifier?" and value: From ffed151960f8f962187145c517951edcbc447035 Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Fri, 12 Jan 2024 00:42:08 +0000 Subject: [PATCH 09/20] fix presentation order in item --- reproschema/redcap2reproschema.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 349a067..0885aac 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -144,7 +144,10 @@ def process_row( } for key, value in field.items(): - if schema_map.get(key) == "allow" and value: + if schema_map.get(key) in ["question", "schema:description", "preamble"] and value: + rowData.update({schema_map[key]: parse_html(value)}) + + elif schema_map.get(key) == "allow" and value: rowData.setdefault("ui", {}).update({schema_map[key]: value.split(", ")}) elif key in ui_list and value: @@ -180,9 +183,6 @@ def process_row( {"variableName": field["Variable / Field Name"], "isVis": condition} ) - elif schema_map.get(key) in ["question", "schema:description", "preamble"] and value: - rowData.update({schema_map[key]: parse_html(value)}) - elif key == "Identifier?" and value: identifier_val = value.lower() == "y" rowData.update( From c8d915fdb8f5b43fac41e07499c1620d3d0db7f7 Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Thu, 18 Jan 2024 17:09:00 +0000 Subject: [PATCH 10/20] add id to items --- reproschema/redcap2reproschema.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 0885aac..7ed5183 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -6,6 +6,7 @@ import yaml from bs4 import BeautifulSoup +matrix_group_count = {} def normalize_condition(condition_str): re_parentheses = re.compile(r"\(([0-9]*)\)") @@ -119,9 +120,19 @@ def process_row( response_list, additional_notes_list, ): + matrix_group_name = field.get("Matrix Group Name") + if matrix_group_name: + matrix_group_count[matrix_group_name] = matrix_group_count.get(matrix_group_name, 0) + 1 + item_id = f"{matrix_group_name}_{matrix_group_count[matrix_group_name]}" + else: + item_id = field["Variable / Field Name"] + rowData = { "@context": schema_context_url, "@type": "reproschema:Field", + "@id": item_id, + "prefLabel": item_id, + "description": f"{item_id} of {form_name}" } field_type = field.get("Field Type", "") From 585b516336969c47ce33d08d6efd6bb63c7d2088 Mon Sep 17 00:00:00 2001 From: yibeichan Date: Thu, 18 Jan 2024 12:38:52 -0500 Subject: [PATCH 11/20] add test --- reproschema/redcap2reproschema.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 7ed5183..64aa1e8 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -120,12 +120,14 @@ def process_row( response_list, additional_notes_list, ): - matrix_group_name = field.get("Matrix Group Name") + global matrix_group_count + + matrix_group_name = field.get("Matrix Group Name", "") if matrix_group_name: matrix_group_count[matrix_group_name] = matrix_group_count.get(matrix_group_name, 0) + 1 item_id = f"{matrix_group_name}_{matrix_group_count[matrix_group_name]}" else: - item_id = field["Variable / Field Name"] + item_id = field.get("Variable / Field Name", "") rowData = { "@context": schema_context_url, @@ -155,6 +157,7 @@ def process_row( } for key, value in field.items(): + print(key, value, filed) if schema_map.get(key) in ["question", "schema:description", "preamble"] and value: rowData.update({schema_map[key]: parse_html(value)}) From c86d80d56e748a5f7f301df3cfa39c9a3e3bc3d6 Mon Sep 17 00:00:00 2001 From: yibeichan Date: Thu, 18 Jan 2024 12:40:44 -0500 Subject: [PATCH 12/20] add test --- reproschema/redcap2reproschema.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 64aa1e8..a3fc5e4 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -359,6 +359,7 @@ def process_csv( languages = parse_language_iso_codes(row["Field Label"]) for field in datas[form_name]: + print(f"process_csv-field: {field}") field_name = field["Variable / Field Name"] order[form_name].append(f"items/{field_name}") process_row( @@ -518,6 +519,7 @@ def main(): parser.add_argument("csv_file", help="Path to the REDCap data dictionary CSV file.") parser.add_argument("yaml_file", help="Path to the Reproschema protocol YAML file.") args = parser.parse_args() + print("Start processing") # Call the main conversion function redcap2reproschema(args.csv_file, args.yaml_file) From 6ec5d715a593d1168fc94a6654215b93c0c7cca1 Mon Sep 17 00:00:00 2001 From: yibeichan Date: Thu, 18 Jan 2024 12:46:01 -0500 Subject: [PATCH 13/20] remove BOM --- reproschema/redcap2reproschema.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index a3fc5e4..10c4467 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -8,6 +8,9 @@ matrix_group_count = {} +def clean_header(header): + return {k.lstrip('\ufeff'): v for k, v in header.items()} + def normalize_condition(condition_str): re_parentheses = re.compile(r"\(([0-9]*)\)") re_non_gt_lt_equal = re.compile(r"([^>|<])=") @@ -345,6 +348,7 @@ def process_csv( with open(csv_file, mode="r", encoding="utf-8") as csvfile: reader = csv.DictReader(csvfile) for row in reader: + row = clean_header(row) form_name = row["Form Name"] if form_name not in datas: datas[form_name] = [] @@ -359,7 +363,6 @@ def process_csv( languages = parse_language_iso_codes(row["Field Label"]) for field in datas[form_name]: - print(f"process_csv-field: {field}") field_name = field["Variable / Field Name"] order[form_name].append(f"items/{field_name}") process_row( From 683f601c7f34d83857b2b6d7ca056707104bf53b Mon Sep 17 00:00:00 2001 From: yibeichan Date: Thu, 18 Jan 2024 12:48:54 -0500 Subject: [PATCH 14/20] add prints to test --- reproschema/redcap2reproschema.py | 1 + 1 file changed, 1 insertion(+) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 10c4467..e3e0463 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -363,6 +363,7 @@ def process_csv( languages = parse_language_iso_codes(row["Field Label"]) for field in datas[form_name]: + print(f"process_csv-field: {field}, datas: {datas}") field_name = field["Variable / Field Name"] order[form_name].append(f"items/{field_name}") process_row( From 9a35ee9aa20e527738eb7e65a99e4a65cf587a2f Mon Sep 17 00:00:00 2001 From: yibeichan Date: Thu, 18 Jan 2024 12:52:07 -0500 Subject: [PATCH 15/20] add prints to test --- reproschema/redcap2reproschema.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index e3e0463..b7f50c0 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -124,8 +124,9 @@ def process_row( additional_notes_list, ): global matrix_group_count - + print(f"process_row-field-before: {field}") matrix_group_name = field.get("Matrix Group Name", "") + print(f"process_row-field-after: {field}") if matrix_group_name: matrix_group_count[matrix_group_name] = matrix_group_count.get(matrix_group_name, 0) + 1 item_id = f"{matrix_group_name}_{matrix_group_count[matrix_group_name]}" From 7e931457c6cd6ac4f8bca35f665b6dfab6f12c74 Mon Sep 17 00:00:00 2001 From: yibeichan Date: Thu, 18 Jan 2024 12:57:19 -0500 Subject: [PATCH 16/20] fix typo --- reproschema/redcap2reproschema.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index b7f50c0..4ae98d6 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -37,8 +37,9 @@ def process_visibility(data): } return visibility_obj -def parse_field_type_and_value(data, input_type_map): - field_type = data.get("Field Type", "") +def parse_field_type_and_value(field, input_type_map): + print(f"parse_field_type_and_value-field: {field}") + field_type = field.get("Field Type", "") input_type = input_type_map.get(field_type, field_type) @@ -50,7 +51,7 @@ def parse_field_type_and_value(data, input_type_map): "email": "email", "phone": "phone", } - validation_type = data.get("Text Validation Type OR Show Slider Number", "") + validation_type = field.get("Text Validation Type OR Show Slider Number", "") value_type = value_type_map.get(validation_type, "xsd:string") @@ -124,9 +125,7 @@ def process_row( additional_notes_list, ): global matrix_group_count - print(f"process_row-field-before: {field}") matrix_group_name = field.get("Matrix Group Name", "") - print(f"process_row-field-after: {field}") if matrix_group_name: matrix_group_count[matrix_group_name] = matrix_group_count.get(matrix_group_name, 0) + 1 item_id = f"{matrix_group_name}_{matrix_group_count[matrix_group_name]}" @@ -161,7 +160,6 @@ def process_row( } for key, value in field.items(): - print(key, value, filed) if schema_map.get(key) in ["question", "schema:description", "preamble"] and value: rowData.update({schema_map[key]: parse_html(value)}) From 003e850006f4e22f06886d9eda62953ed8974fbd Mon Sep 17 00:00:00 2001 From: yibeichan Date: Thu, 18 Jan 2024 19:17:43 -0500 Subject: [PATCH 17/20] improve responseOptions --- reproschema/redcap2reproschema.py | 40 ++++++++++++++++++------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 4ae98d6..c551b50 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -38,22 +38,31 @@ def process_visibility(data): return visibility_obj def parse_field_type_and_value(field, input_type_map): - print(f"parse_field_type_and_value-field: {field}") field_type = field.get("Field Type", "") - input_type = input_type_map.get(field_type, field_type) + # Initialize the default value type as string + value_type = "xsd:string" + + # Map certain field types directly to xsd types value_type_map = { - "number": "xsd:int", + "text": "xsd:string", "date_": "xsd:date", - "datetime_": "datetime", - "time_": "xsd:date", - "email": "email", - "phone": "phone", - } - validation_type = field.get("Text Validation Type OR Show Slider Number", "") - - value_type = value_type_map.get(validation_type, "xsd:string") + "datetime_": "xsd:dateTime", + "time_": "xsd:time", + "email": "xsd:string", + "phone": "xsd:string", + } # todo: input_type="signature" + + # Get the validation type from the field, if available + validation_type = field.get("Text Validation Type OR Show Slider Number", "").strip() + + if validation_type: + # Map the validation type to an XSD type if it's in the map + value_type = value_type_map.get(validation_type, "xsd:string") + elif field_type in ["radio", "dropdown"]: + # If there's no validation type, but the field type is radio or dropdown, use xsd:integer + value_type = "xsd:integer" return input_type, value_type @@ -74,7 +83,7 @@ def process_choices(field_type, choices_str): except ValueError: value = parts[0] - choice_obj = {"schema:value": value, "schema:name": parts[1]} + choice_obj = {"name": parts[1], "value": value} if len(parts) == 3: # Handle image url choice_obj["schema:image"] = f"{parts[2]}.png" @@ -154,8 +163,8 @@ def process_row( rowData["responseOptions"] = { "valueType": "xsd:boolean", "choices": [ - {"schema:value": 1, "schema:name": "Yes"}, - {"schema:value": 0, "schema:name": "No"} + {"name": "Yes", "value": 1}, + {"name": "No", "value": 0} ] } @@ -362,7 +371,6 @@ def process_csv( languages = parse_language_iso_codes(row["Field Label"]) for field in datas[form_name]: - print(f"process_csv-field: {field}, datas: {datas}") field_name = field["Variable / Field Name"] order[form_name].append(f"items/{field_name}") process_row( @@ -522,9 +530,7 @@ def main(): parser.add_argument("csv_file", help="Path to the REDCap data dictionary CSV file.") parser.add_argument("yaml_file", help="Path to the Reproschema protocol YAML file.") args = parser.parse_args() - print("Start processing") - # Call the main conversion function redcap2reproschema(args.csv_file, args.yaml_file) From 37853b23d69c09397c91c6b3941ab967cd24df6a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 19 Jan 2024 00:26:21 +0000 Subject: [PATCH 18/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- reproschema/redcap2reproschema.py | 42 ++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index c551b50..47473cb 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -8,8 +8,10 @@ matrix_group_count = {} + def clean_header(header): - return {k.lstrip('\ufeff'): v for k, v in header.items()} + return {k.lstrip("\ufeff"): v for k, v in header.items()} + def normalize_condition(condition_str): re_parentheses = re.compile(r"\(([0-9]*)\)") @@ -37,6 +39,7 @@ def process_visibility(data): } return visibility_obj + def parse_field_type_and_value(field, input_type_map): field_type = field.get("Field Type", "") input_type = input_type_map.get(field_type, field_type) @@ -52,10 +55,12 @@ def parse_field_type_and_value(field, input_type_map): "time_": "xsd:time", "email": "xsd:string", "phone": "xsd:string", - } # todo: input_type="signature" + } # todo: input_type="signature" # Get the validation type from the field, if available - validation_type = field.get("Text Validation Type OR Show Slider Number", "").strip() + validation_type = field.get( + "Text Validation Type OR Show Slider Number", "" + ).strip() if validation_type: # Map the validation type to an XSD type if it's in the map @@ -66,15 +71,18 @@ def parse_field_type_and_value(field, input_type_map): return input_type, value_type + def process_choices(field_type, choices_str): - if field_type not in ['radio', 'dropdown']: # Handle only radio and dropdown types + if field_type not in ["radio", "dropdown"]: # Handle only radio and dropdown types return None choices = [] for choice in choices_str.split("|"): parts = choice.split(", ") if len(parts) < 2: - print(f"Warning: Skipping invalid choice format '{choice}' in a {field_type} field") + print( + f"Warning: Skipping invalid choice format '{choice}' in a {field_type} field" + ) continue # Try to convert the first part to an integer, if it fails, keep it as a string @@ -90,6 +98,7 @@ def process_choices(field_type, choices_str): choices.append(choice_obj) return choices + def write_to_file(abs_folder_path, form_name, field_name, rowData): file_path = os.path.join( f"{abs_folder_path}", "activities", form_name, "items", f"{field_name}" @@ -117,7 +126,9 @@ def parse_html(input_string, default_language="en"): if not result: # If no text was extracted result[default_language] = soup.get_text(strip=True) else: - result[default_language] = soup.get_text(strip=True) # Use the entire text as default language text + result[default_language] = soup.get_text( + strip=True + ) # Use the entire text as default language text return result @@ -136,7 +147,9 @@ def process_row( global matrix_group_count matrix_group_name = field.get("Matrix Group Name", "") if matrix_group_name: - matrix_group_count[matrix_group_name] = matrix_group_count.get(matrix_group_name, 0) + 1 + matrix_group_count[matrix_group_name] = ( + matrix_group_count.get(matrix_group_name, 0) + 1 + ) item_id = f"{matrix_group_name}_{matrix_group_count[matrix_group_name]}" else: item_id = field.get("Variable / Field Name", "") @@ -146,7 +159,7 @@ def process_row( "@type": "reproschema:Field", "@id": item_id, "prefLabel": item_id, - "description": f"{item_id} of {form_name}" + "description": f"{item_id} of {form_name}", } field_type = field.get("Field Type", "") @@ -162,16 +175,16 @@ def process_row( if field_type == "yesno": rowData["responseOptions"] = { "valueType": "xsd:boolean", - "choices": [ - {"name": "Yes", "value": 1}, - {"name": "No", "value": 0} - ] + "choices": [{"name": "Yes", "value": 1}, {"name": "No", "value": 0}], } for key, value in field.items(): - if schema_map.get(key) in ["question", "schema:description", "preamble"] and value: + if ( + schema_map.get(key) in ["question", "schema:description", "preamble"] + and value + ): rowData.update({schema_map[key]: parse_html(value)}) - + elif schema_map.get(key) == "allow" and value: rowData.setdefault("ui", {}).update({schema_map[key]: value.split(", ")}) @@ -191,7 +204,6 @@ def process_row( {"choices": process_choices(field_type, value)} ) - elif schema_map.get(key) == "scoringLogic" and value: condition = normalize_condition(value) rowData.setdefault("ui", {}).update({"hidden": True}) From 1533f114af2dab5db12fabcb791a04b2463f80b5 Mon Sep 17 00:00:00 2001 From: yibeichan Date: Fri, 19 Jan 2024 11:55:14 -0500 Subject: [PATCH 19/20] get unique order --- reproschema/redcap2reproschema.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 47473cb..a2c5596 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -248,6 +248,10 @@ def create_form_schema( matrix_list, scores_list, ): + + # Use a set to track unique items and preserve order + unique_order = list(dict.fromkeys(order.get(form_name, []))) + # Construct the JSON-LD structure json_ld = { "@context": schema_context_url, @@ -258,7 +262,7 @@ def create_form_schema( "schemaVersion": "1.0.0-rc4", "version": "0.0.1", "ui": { - "order": order.get(form_name, []), + "order": unique_order, "addProperties": bl_list, "shuffle": False, }, From 891d5809b78703b6f38595d129d291e13ac1950f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 19 Jan 2024 16:55:29 +0000 Subject: [PATCH 20/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- reproschema/redcap2reproschema.py | 1 - 1 file changed, 1 deletion(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index a2c5596..f267a2f 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -248,7 +248,6 @@ def create_form_schema( matrix_list, scores_list, ): - # Use a set to track unique items and preserve order unique_order = list(dict.fromkeys(order.get(form_name, [])))