From 82851ebf0bcc4a5fbfaa0c8bc41c69e1ca9028bc Mon Sep 17 00:00:00 2001
From: Yibei Chen <yibeichan@gmail.com>
Date: Tue, 9 Jan 2024 19:32:38 +0000
Subject: [PATCH 1/6] fix matching with redcap

---
 reproschema/reproschema2redcap.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/reproschema/reproschema2redcap.py b/reproschema/reproschema2redcap.py
index 05350fc..03b4c64 100644
--- a/reproschema/reproschema2redcap.py
+++ b/reproschema/reproschema2redcap.py
@@ -30,25 +30,33 @@ def find_Ftype_and_colH(item_json, row_data):
     col_h = ""
 
     # Check the input type and update the field type and column header accordingly
-    if f_type == "integer":
+    if f_type in ["text", "textarea", "email"]:
+        f_type = "text"
+    elif f_type == "integer":
+        f_type = "text"
+        col_h = "integer"
+    elif f_type == "number" or f_type == "float":
         f_type = "text"
         col_h = "number"
-    elif f_type == "select":
-        f_type = "dropdown"
     elif f_type == "date":
         f_type = "text"
-        col_h = "ddate_mdy"
+        col_h = "date_mdy"
+    elif f_type in ["radio", "checkbox", "dropdown", "file"]:
+        # No change needed, these are valid REDCap field types
+        pass
+    else:
+        # Fallback for unsupported types
+        f_type = "text"
 
     # Update the row_data dictionary with the field type
-    row_data["field_type"] = f_type
+    row_data["field_type"] = f_type.lower()
 
     # Update the row_data dictionary with the column header if available
     if col_h:
-        row_data["val_type_OR_slider"] = col_h
+        row_data["val_type_OR_slider"] = col_h.lower()
 
     return row_data
 
-
 def process_item(item_json, activity_name):
     """
     Process an item in JSON format and extract relevant information into a dictionary.

From 74199d720953aa6da6174d79985c478ce20b2935 Mon Sep 17 00:00:00 2001
From: Yibei Chen <yibeichan@gmail.com>
Date: Tue, 9 Jan 2024 22:25:43 +0000
Subject: [PATCH 2/6] make better field mappings

---
 reproschema/reproschema2redcap.py | 106 ++++++++++++++++++++++--------
 1 file changed, 77 insertions(+), 29 deletions(-)

diff --git a/reproschema/reproschema2redcap.py b/reproschema/reproschema2redcap.py
index 03b4c64..060b7ce 100644
--- a/reproschema/reproschema2redcap.py
+++ b/reproschema/reproschema2redcap.py
@@ -2,7 +2,7 @@
 import json
 import csv
 from pathlib import Path
-
+import requests
 
 def read_json_file(file_path):
     try:
@@ -12,7 +12,28 @@ def read_json_file(file_path):
         print(f"Error reading file {file_path}: {e}")
         return None
 
+def fetch_choices_from_url(url):
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        data = response.json()
+
+        # Check if the data is a list or a dictionary and process accordingly
+        if isinstance(data, list):
+            # Assuming each item in the list is a dictionary with one key-value pair
+            choices = [list(item.values())[0] for item in data if isinstance(item, dict) and item]
+        elif isinstance(data, dict):
+            # Extracting the values from the dictionary
+            choices = list(data.values())
+        else:
+            # If data is neither a list nor a dictionary, return an empty string
+            return ""
 
+        return " | ".join(str(choice) for choice in choices)
+    except Exception as e:
+        print(f"Error fetching choices from {url}: {e}")
+        return ""
+    
 def find_Ftype_and_colH(item_json, row_data):
     """
     Find the field type and column header based on the given item_json.
@@ -44,6 +65,9 @@ def find_Ftype_and_colH(item_json, row_data):
     elif f_type in ["radio", "checkbox", "dropdown", "file"]:
         # No change needed, these are valid REDCap field types
         pass
+    elif f_type == "select":
+        multiple_choice = item_json.get("responseOptions", {}).get("multipleChoice", False)
+        f_type = "checkbox" if multiple_choice else "radio"
     else:
         # Fallback for unsupported types
         f_type = "text"
@@ -68,43 +92,43 @@ def process_item(item_json, activity_name):
     Returns:
         dict: A dictionary containing the extracted information.
     """
-    row_data = {}
+    row_data = {
+        "val_min": "",
+        "val_max": "",
+        "choices": "",
+        "required": "",
+        "field_notes": "",
+        "var_name": "",
+        "activity": activity_name.lower(),
+        "field_label": "",
+    }
 
     # Extract min and max values from response options, if available
     response_options = item_json.get("responseOptions", {})
     row_data["val_min"] = response_options.get("schema:minValue", "")
     row_data["val_max"] = response_options.get("schema:maxValue", "")
 
+    # 'choices' processing is now handled in 'find_Ftype_and_colH' if it's a URL
     choices = response_options.get("choices")
-    if choices:
+    if choices and not isinstance(choices, str):
         if isinstance(choices, list):
-            # Extract choice values and names, and join them with a '|'
             item_choices = [
                 f"{ch.get('schema:value', ch.get('value', ''))}, {ch.get('schema:name', ch.get('name', ''))}"
                 for ch in choices
             ]
             row_data["choices"] = " | ".join(item_choices)
-        elif isinstance(choices, str):
-            row_data["choices"] = choices
-        else:
-            row_data["choices"] = ""
 
     row_data["required"] = response_options.get("requiredValue", "")
-
     row_data["field_notes"] = item_json.get("skos:altLabel", "")
-
     row_data["var_name"] = item_json.get("@id", "")
-    row_data["activity"] = activity_name
 
     question = item_json.get("question")
     if isinstance(question, dict):
         row_data["field_label"] = question.get("en", "")
     elif isinstance(question, str):
         row_data["field_label"] = question
-    else:
-        row_data["field_label"] = ""
 
-    # Call helper function to find Ftype and colH values and update row_data
+    # Call helper function to find field type and validation type (if any) and update row_data
     row_data = find_Ftype_and_colH(item_json, row_data)
 
     return row_data
@@ -148,28 +172,52 @@ def get_csv_data(dir_path):
 
 
 def write_to_csv(csv_data, output_csv_filename):
-    # Define the headers for the CSV file as per the JavaScript file
+    # REDCap-specific headers
     headers = [
-        "var_name",
-        "activity",
-        "section",
-        "field_type",
-        "field_label",
-        "choices",
-        "field_notes",
-        "val_type_OR_slider",
-        "val_min",
-        "val_max",
-        "identifier",
-        "visibility",
-        "required",
+        "Variable / Field Name",
+        "Form Name",
+        "Section Header",
+        "Field Type",
+        "Field Label",
+        "Choices, Calculations, OR Slider Labels",
+        "Field Note",
+        "Text Validation Type OR Show Slider Number",
+        "Text Validation Min",
+        "Text Validation Max",
+        "Identifier?",
+        "Branching Logic (Show field only if...)",
+        "Required Field?",
+        "Custom Alignment",
+        "Question Number (surveys only)",
+        "Matrix Group Name",
+        "Matrix Ranking?",
+        "Field Annotation"
     ]
 
     # Writing to the CSV file
     with open(output_csv_filename, "w", newline="", encoding="utf-8") as csvfile:
         writer = csv.DictWriter(csvfile, fieldnames=headers)
-        writer.writeheader()
+
+        # Map the data from your format to REDCap format
+        redcap_data = []
         for row in csv_data:
+            redcap_row = {
+                "Variable / Field Name": row["var_name"],
+                "Form Name": row["activity"],
+                "Section Header": "",  # Update this if your data includes section headers
+                "Field Type": row["field_type"],
+                "Field Label": row["field_label"],
+                "Choices, Calculations, OR Slider Labels": row["choices"],
+                "Field Note": row["field_notes"],
+                "Text Validation Type OR Show Slider Number": row.get("val_type_OR_slider", ""),
+                "Text Validation Min": row["val_min"],
+                "Text Validation Max": row["val_max"],
+                # Add other fields as necessary based on your data
+            }
+            redcap_data.append(redcap_row)
+
+        writer.writeheader()
+        for row in redcap_data:
             writer.writerow(row)
 
     print("The CSV file was written successfully")

From ce5af6256a24c68fd74531a1f819d3f3c893ae6c Mon Sep 17 00:00:00 2001
From: Yibei Chen <yibeichan@gmail.com>
Date: Tue, 9 Jan 2024 23:42:40 +0000
Subject: [PATCH 3/6] fix inputype start with select

---
 reproschema/reproschema2redcap.py | 30 ++++++++++--------------------
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/reproschema/reproschema2redcap.py b/reproschema/reproschema2redcap.py
index 060b7ce..db58d80 100644
--- a/reproschema/reproschema2redcap.py
+++ b/reproschema/reproschema2redcap.py
@@ -35,47 +35,37 @@ def fetch_choices_from_url(url):
         return ""
     
 def find_Ftype_and_colH(item_json, row_data):
-    """
-    Find the field type and column header based on the given item_json.
-
-    Args:
-        item_json (dict): The JSON object containing the item information.
-        row_data (dict): The row data dictionary.
-
-    Returns:
-        dict: The updated row data dictionary with field type and column header.
-
-    """
     # Extract the input type from the item_json
     f_type = item_json.get("ui", {}).get("inputType", "")
     col_h = ""
 
-    # Check the input type and update the field type and column header accordingly
     if f_type in ["text", "textarea", "email"]:
         f_type = "text"
     elif f_type == "integer":
         f_type = "text"
         col_h = "integer"
-    elif f_type == "number" or f_type == "float":
+    elif f_type in ["number", "float"]:
         f_type = "text"
         col_h = "number"
     elif f_type == "date":
         f_type = "text"
         col_h = "date_mdy"
-    elif f_type in ["radio", "checkbox", "dropdown", "file"]:
-        # No change needed, these are valid REDCap field types
-        pass
     elif f_type == "select":
         multiple_choice = item_json.get("responseOptions", {}).get("multipleChoice", False)
-        f_type = "checkbox" if multiple_choice else "radio"
+        f_type = "checkbox" if multiple_choice else "dropdown"
+    elif f_type.startswith("select"):
+        # Adjusting for selectCountry, selectLanguage, selectState types
+        f_type = "radio"
+        choices_url = item_json.get("responseOptions", {}).get("choices", "")
+        if choices_url and isinstance(choices_url, str):
+            choices_data = fetch_choices_from_url(choices_url)
+            if choices_data:
+                row_data["choices"] = choices_data
     else:
-        # Fallback for unsupported types
         f_type = "text"
 
-    # Update the row_data dictionary with the field type
     row_data["field_type"] = f_type.lower()
 
-    # Update the row_data dictionary with the column header if available
     if col_h:
         row_data["val_type_OR_slider"] = col_h.lower()
 

From 259a2e1f624c6867d7d9c627cc784540b2cc23b2 Mon Sep 17 00:00:00 2001
From: Yibei Chen <yibeichan@gmail.com>
Date: Tue, 9 Jan 2024 23:49:03 +0000
Subject: [PATCH 4/6] reformat choices

---
 reproschema/reproschema2redcap.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/reproschema/reproschema2redcap.py b/reproschema/reproschema2redcap.py
index db58d80..9b2289e 100644
--- a/reproschema/reproschema2redcap.py
+++ b/reproschema/reproschema2redcap.py
@@ -18,18 +18,16 @@ def fetch_choices_from_url(url):
         response.raise_for_status()
         data = response.json()
 
-        # Check if the data is a list or a dictionary and process accordingly
         if isinstance(data, list):
-            # Assuming each item in the list is a dictionary with one key-value pair
             choices = [list(item.values())[0] for item in data if isinstance(item, dict) and item]
         elif isinstance(data, dict):
-            # Extracting the values from the dictionary
             choices = list(data.values())
         else:
-            # If data is neither a list nor a dictionary, return an empty string
             return ""
 
-        return " | ".join(str(choice) for choice in choices)
+        # Format choices as 'code, description'
+        formatted_choices = [f"{idx}, {choice}" for idx, choice in enumerate(choices)]
+        return " | ".join(formatted_choices)
     except Exception as e:
         print(f"Error fetching choices from {url}: {e}")
         return ""

From bd841467ae83b7b993159d258cd3e25186a904df Mon Sep 17 00:00:00 2001
From: Yibei Chen <yibeichan@gmail.com>
Date: Wed, 10 Jan 2024 03:22:59 +0000
Subject: [PATCH 5/6] account for media as input

---
 reproschema/reproschema2redcap.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/reproschema/reproschema2redcap.py b/reproschema/reproschema2redcap.py
index 9b2289e..8e9d038 100644
--- a/reproschema/reproschema2redcap.py
+++ b/reproschema/reproschema2redcap.py
@@ -59,6 +59,8 @@ def find_Ftype_and_colH(item_json, row_data):
             choices_data = fetch_choices_from_url(choices_url)
             if choices_data:
                 row_data["choices"] = choices_data
+    elif f_type.startswith(("audio", "video", "image", "document")):
+        f_type = "file"
     else:
         f_type = "text"
 

From 01e6c1c8ae3e0147fed47cba7c94a82adae275e4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 10 Jan 2024 17:08:41 +0000
Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 reproschema/reproschema2redcap.py | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/reproschema/reproschema2redcap.py b/reproschema/reproschema2redcap.py
index 8e9d038..3d03cf3 100644
--- a/reproschema/reproschema2redcap.py
+++ b/reproschema/reproschema2redcap.py
@@ -4,6 +4,7 @@
 from pathlib import Path
 import requests
 
+
 def read_json_file(file_path):
     try:
         with open(file_path, "r", encoding="utf-8") as file:
@@ -12,6 +13,7 @@ def read_json_file(file_path):
         print(f"Error reading file {file_path}: {e}")
         return None
 
+
 def fetch_choices_from_url(url):
     try:
         response = requests.get(url)
@@ -19,7 +21,11 @@ def fetch_choices_from_url(url):
         data = response.json()
 
         if isinstance(data, list):
-            choices = [list(item.values())[0] for item in data if isinstance(item, dict) and item]
+            choices = [
+                list(item.values())[0]
+                for item in data
+                if isinstance(item, dict) and item
+            ]
         elif isinstance(data, dict):
             choices = list(data.values())
         else:
@@ -31,7 +37,8 @@ def fetch_choices_from_url(url):
     except Exception as e:
         print(f"Error fetching choices from {url}: {e}")
         return ""
-    
+
+
 def find_Ftype_and_colH(item_json, row_data):
     # Extract the input type from the item_json
     f_type = item_json.get("ui", {}).get("inputType", "")
@@ -49,7 +56,9 @@ def find_Ftype_and_colH(item_json, row_data):
         f_type = "text"
         col_h = "date_mdy"
     elif f_type == "select":
-        multiple_choice = item_json.get("responseOptions", {}).get("multipleChoice", False)
+        multiple_choice = item_json.get("responseOptions", {}).get(
+            "multipleChoice", False
+        )
         f_type = "checkbox" if multiple_choice else "dropdown"
     elif f_type.startswith("select"):
         # Adjusting for selectCountry, selectLanguage, selectState types
@@ -71,6 +80,7 @@ def find_Ftype_and_colH(item_json, row_data):
 
     return row_data
 
+
 def process_item(item_json, activity_name):
     """
     Process an item in JSON format and extract relevant information into a dictionary.
@@ -181,7 +191,7 @@ def write_to_csv(csv_data, output_csv_filename):
         "Question Number (surveys only)",
         "Matrix Group Name",
         "Matrix Ranking?",
-        "Field Annotation"
+        "Field Annotation",
     ]
 
     # Writing to the CSV file
@@ -199,7 +209,9 @@ def write_to_csv(csv_data, output_csv_filename):
                 "Field Label": row["field_label"],
                 "Choices, Calculations, OR Slider Labels": row["choices"],
                 "Field Note": row["field_notes"],
-                "Text Validation Type OR Show Slider Number": row.get("val_type_OR_slider", ""),
+                "Text Validation Type OR Show Slider Number": row.get(
+                    "val_type_OR_slider", ""
+                ),
                 "Text Validation Min": row["val_min"],
                 "Text Validation Max": row["val_max"],
                 # Add other fields as necessary based on your data