From 82851ebf0bcc4a5fbfaa0c8bc41c69e1ca9028bc Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Tue, 9 Jan 2024 19:32:38 +0000 Subject: [PATCH 1/6] fix matching with redcap --- reproschema/reproschema2redcap.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/reproschema/reproschema2redcap.py b/reproschema/reproschema2redcap.py index 05350fc..03b4c64 100644 --- a/reproschema/reproschema2redcap.py +++ b/reproschema/reproschema2redcap.py @@ -30,25 +30,33 @@ def find_Ftype_and_colH(item_json, row_data): col_h = "" # Check the input type and update the field type and column header accordingly - if f_type == "integer": + if f_type in ["text", "textarea", "email"]: + f_type = "text" + elif f_type == "integer": + f_type = "text" + col_h = "integer" + elif f_type == "number" or f_type == "float": f_type = "text" col_h = "number" - elif f_type == "select": - f_type = "dropdown" elif f_type == "date": f_type = "text" - col_h = "ddate_mdy" + col_h = "date_mdy" + elif f_type in ["radio", "checkbox", "dropdown", "file"]: + # No change needed, these are valid REDCap field types + pass + else: + # Fallback for unsupported types + f_type = "text" # Update the row_data dictionary with the field type - row_data["field_type"] = f_type + row_data["field_type"] = f_type.lower() # Update the row_data dictionary with the column header if available if col_h: - row_data["val_type_OR_slider"] = col_h + row_data["val_type_OR_slider"] = col_h.lower() return row_data - def process_item(item_json, activity_name): """ Process an item in JSON format and extract relevant information into a dictionary. From 74199d720953aa6da6174d79985c478ce20b2935 Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Tue, 9 Jan 2024 22:25:43 +0000 Subject: [PATCH 2/6] make better field mappings --- reproschema/reproschema2redcap.py | 106 ++++++++++++++++++++++-------- 1 file changed, 77 insertions(+), 29 deletions(-) diff --git a/reproschema/reproschema2redcap.py b/reproschema/reproschema2redcap.py index 03b4c64..060b7ce 100644 --- a/reproschema/reproschema2redcap.py +++ b/reproschema/reproschema2redcap.py @@ -2,7 +2,7 @@ import json import csv from pathlib import Path - +import requests def read_json_file(file_path): try: @@ -12,7 +12,28 @@ def read_json_file(file_path): print(f"Error reading file {file_path}: {e}") return None +def fetch_choices_from_url(url): + try: + response = requests.get(url) + response.raise_for_status() + data = response.json() + + # Check if the data is a list or a dictionary and process accordingly + if isinstance(data, list): + # Assuming each item in the list is a dictionary with one key-value pair + choices = [list(item.values())[0] for item in data if isinstance(item, dict) and item] + elif isinstance(data, dict): + # Extracting the values from the dictionary + choices = list(data.values()) + else: + # If data is neither a list nor a dictionary, return an empty string + return "" + return " | ".join(str(choice) for choice in choices) + except Exception as e: + print(f"Error fetching choices from {url}: {e}") + return "" + def find_Ftype_and_colH(item_json, row_data): """ Find the field type and column header based on the given item_json. @@ -44,6 +65,9 @@ def find_Ftype_and_colH(item_json, row_data): elif f_type in ["radio", "checkbox", "dropdown", "file"]: # No change needed, these are valid REDCap field types pass + elif f_type == "select": + multiple_choice = item_json.get("responseOptions", {}).get("multipleChoice", False) + f_type = "checkbox" if multiple_choice else "radio" else: # Fallback for unsupported types f_type = "text" @@ -68,43 +92,43 @@ def process_item(item_json, activity_name): Returns: dict: A dictionary containing the extracted information. """ - row_data = {} + row_data = { + "val_min": "", + "val_max": "", + "choices": "", + "required": "", + "field_notes": "", + "var_name": "", + "activity": activity_name.lower(), + "field_label": "", + } # Extract min and max values from response options, if available response_options = item_json.get("responseOptions", {}) row_data["val_min"] = response_options.get("schema:minValue", "") row_data["val_max"] = response_options.get("schema:maxValue", "") + # 'choices' processing is now handled in 'find_Ftype_and_colH' if it's a URL choices = response_options.get("choices") - if choices: + if choices and not isinstance(choices, str): if isinstance(choices, list): - # Extract choice values and names, and join them with a '|' item_choices = [ f"{ch.get('schema:value', ch.get('value', ''))}, {ch.get('schema:name', ch.get('name', ''))}" for ch in choices ] row_data["choices"] = " | ".join(item_choices) - elif isinstance(choices, str): - row_data["choices"] = choices - else: - row_data["choices"] = "" row_data["required"] = response_options.get("requiredValue", "") - row_data["field_notes"] = item_json.get("skos:altLabel", "") - row_data["var_name"] = item_json.get("@id", "") - row_data["activity"] = activity_name question = item_json.get("question") if isinstance(question, dict): row_data["field_label"] = question.get("en", "") elif isinstance(question, str): row_data["field_label"] = question - else: - row_data["field_label"] = "" - # Call helper function to find Ftype and colH values and update row_data + # Call helper function to find field type and validation type (if any) and update row_data row_data = find_Ftype_and_colH(item_json, row_data) return row_data @@ -148,28 +172,52 @@ def get_csv_data(dir_path): def write_to_csv(csv_data, output_csv_filename): - # Define the headers for the CSV file as per the JavaScript file + # REDCap-specific headers headers = [ - "var_name", - "activity", - "section", - "field_type", - "field_label", - "choices", - "field_notes", - "val_type_OR_slider", - "val_min", - "val_max", - "identifier", - "visibility", - "required", + "Variable / Field Name", + "Form Name", + "Section Header", + "Field Type", + "Field Label", + "Choices, Calculations, OR Slider Labels", + "Field Note", + "Text Validation Type OR Show Slider Number", + "Text Validation Min", + "Text Validation Max", + "Identifier?", + "Branching Logic (Show field only if...)", + "Required Field?", + "Custom Alignment", + "Question Number (surveys only)", + "Matrix Group Name", + "Matrix Ranking?", + "Field Annotation" ] # Writing to the CSV file with open(output_csv_filename, "w", newline="", encoding="utf-8") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=headers) - writer.writeheader() + + # Map the data from your format to REDCap format + redcap_data = [] for row in csv_data: + redcap_row = { + "Variable / Field Name": row["var_name"], + "Form Name": row["activity"], + "Section Header": "", # Update this if your data includes section headers + "Field Type": row["field_type"], + "Field Label": row["field_label"], + "Choices, Calculations, OR Slider Labels": row["choices"], + "Field Note": row["field_notes"], + "Text Validation Type OR Show Slider Number": row.get("val_type_OR_slider", ""), + "Text Validation Min": row["val_min"], + "Text Validation Max": row["val_max"], + # Add other fields as necessary based on your data + } + redcap_data.append(redcap_row) + + writer.writeheader() + for row in redcap_data: writer.writerow(row) print("The CSV file was written successfully") From ce5af6256a24c68fd74531a1f819d3f3c893ae6c Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Tue, 9 Jan 2024 23:42:40 +0000 Subject: [PATCH 3/6] fix inputype start with select --- reproschema/reproschema2redcap.py | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/reproschema/reproschema2redcap.py b/reproschema/reproschema2redcap.py index 060b7ce..db58d80 100644 --- a/reproschema/reproschema2redcap.py +++ b/reproschema/reproschema2redcap.py @@ -35,47 +35,37 @@ def fetch_choices_from_url(url): return "" def find_Ftype_and_colH(item_json, row_data): - """ - Find the field type and column header based on the given item_json. - - Args: - item_json (dict): The JSON object containing the item information. - row_data (dict): The row data dictionary. - - Returns: - dict: The updated row data dictionary with field type and column header. - - """ # Extract the input type from the item_json f_type = item_json.get("ui", {}).get("inputType", "") col_h = "" - # Check the input type and update the field type and column header accordingly if f_type in ["text", "textarea", "email"]: f_type = "text" elif f_type == "integer": f_type = "text" col_h = "integer" - elif f_type == "number" or f_type == "float": + elif f_type in ["number", "float"]: f_type = "text" col_h = "number" elif f_type == "date": f_type = "text" col_h = "date_mdy" - elif f_type in ["radio", "checkbox", "dropdown", "file"]: - # No change needed, these are valid REDCap field types - pass elif f_type == "select": multiple_choice = item_json.get("responseOptions", {}).get("multipleChoice", False) - f_type = "checkbox" if multiple_choice else "radio" + f_type = "checkbox" if multiple_choice else "dropdown" + elif f_type.startswith("select"): + # Adjusting for selectCountry, selectLanguage, selectState types + f_type = "radio" + choices_url = item_json.get("responseOptions", {}).get("choices", "") + if choices_url and isinstance(choices_url, str): + choices_data = fetch_choices_from_url(choices_url) + if choices_data: + row_data["choices"] = choices_data else: - # Fallback for unsupported types f_type = "text" - # Update the row_data dictionary with the field type row_data["field_type"] = f_type.lower() - # Update the row_data dictionary with the column header if available if col_h: row_data["val_type_OR_slider"] = col_h.lower() From 259a2e1f624c6867d7d9c627cc784540b2cc23b2 Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Tue, 9 Jan 2024 23:49:03 +0000 Subject: [PATCH 4/6] reformat choices --- reproschema/reproschema2redcap.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/reproschema/reproschema2redcap.py b/reproschema/reproschema2redcap.py index db58d80..9b2289e 100644 --- a/reproschema/reproschema2redcap.py +++ b/reproschema/reproschema2redcap.py @@ -18,18 +18,16 @@ def fetch_choices_from_url(url): response.raise_for_status() data = response.json() - # Check if the data is a list or a dictionary and process accordingly if isinstance(data, list): - # Assuming each item in the list is a dictionary with one key-value pair choices = [list(item.values())[0] for item in data if isinstance(item, dict) and item] elif isinstance(data, dict): - # Extracting the values from the dictionary choices = list(data.values()) else: - # If data is neither a list nor a dictionary, return an empty string return "" - return " | ".join(str(choice) for choice in choices) + # Format choices as 'code, description' + formatted_choices = [f"{idx}, {choice}" for idx, choice in enumerate(choices)] + return " | ".join(formatted_choices) except Exception as e: print(f"Error fetching choices from {url}: {e}") return "" From bd841467ae83b7b993159d258cd3e25186a904df Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Wed, 10 Jan 2024 03:22:59 +0000 Subject: [PATCH 5/6] account for media as input --- reproschema/reproschema2redcap.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/reproschema/reproschema2redcap.py b/reproschema/reproschema2redcap.py index 9b2289e..8e9d038 100644 --- a/reproschema/reproschema2redcap.py +++ b/reproschema/reproschema2redcap.py @@ -59,6 +59,8 @@ def find_Ftype_and_colH(item_json, row_data): choices_data = fetch_choices_from_url(choices_url) if choices_data: row_data["choices"] = choices_data + elif f_type.startswith(("audio", "video", "image", "document")): + f_type = "file" else: f_type = "text" From 01e6c1c8ae3e0147fed47cba7c94a82adae275e4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 10 Jan 2024 17:08:41 +0000 Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- reproschema/reproschema2redcap.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/reproschema/reproschema2redcap.py b/reproschema/reproschema2redcap.py index 8e9d038..3d03cf3 100644 --- a/reproschema/reproschema2redcap.py +++ b/reproschema/reproschema2redcap.py @@ -4,6 +4,7 @@ from pathlib import Path import requests + def read_json_file(file_path): try: with open(file_path, "r", encoding="utf-8") as file: @@ -12,6 +13,7 @@ def read_json_file(file_path): print(f"Error reading file {file_path}: {e}") return None + def fetch_choices_from_url(url): try: response = requests.get(url) @@ -19,7 +21,11 @@ def fetch_choices_from_url(url): data = response.json() if isinstance(data, list): - choices = [list(item.values())[0] for item in data if isinstance(item, dict) and item] + choices = [ + list(item.values())[0] + for item in data + if isinstance(item, dict) and item + ] elif isinstance(data, dict): choices = list(data.values()) else: @@ -31,7 +37,8 @@ def fetch_choices_from_url(url): except Exception as e: print(f"Error fetching choices from {url}: {e}") return "" - + + def find_Ftype_and_colH(item_json, row_data): # Extract the input type from the item_json f_type = item_json.get("ui", {}).get("inputType", "") @@ -49,7 +56,9 @@ def find_Ftype_and_colH(item_json, row_data): f_type = "text" col_h = "date_mdy" elif f_type == "select": - multiple_choice = item_json.get("responseOptions", {}).get("multipleChoice", False) + multiple_choice = item_json.get("responseOptions", {}).get( + "multipleChoice", False + ) f_type = "checkbox" if multiple_choice else "dropdown" elif f_type.startswith("select"): # Adjusting for selectCountry, selectLanguage, selectState types @@ -71,6 +80,7 @@ def find_Ftype_and_colH(item_json, row_data): return row_data + def process_item(item_json, activity_name): """ Process an item in JSON format and extract relevant information into a dictionary. @@ -181,7 +191,7 @@ def write_to_csv(csv_data, output_csv_filename): "Question Number (surveys only)", "Matrix Group Name", "Matrix Ranking?", - "Field Annotation" + "Field Annotation", ] # Writing to the CSV file @@ -199,7 +209,9 @@ def write_to_csv(csv_data, output_csv_filename): "Field Label": row["field_label"], "Choices, Calculations, OR Slider Labels": row["choices"], "Field Note": row["field_notes"], - "Text Validation Type OR Show Slider Number": row.get("val_type_OR_slider", ""), + "Text Validation Type OR Show Slider Number": row.get( + "val_type_OR_slider", "" + ), "Text Validation Min": row["val_min"], "Text Validation Max": row["val_max"], # Add other fields as necessary based on your data