From 9f61bdbc73e5801794b87e2a1b3c575a0f7d0760 Mon Sep 17 00:00:00 2001
From: Yibei Chen <yibeichan@gmail.com>
Date: Thu, 11 Jan 2024 21:30:09 +0000
Subject: [PATCH 01/20] allow choice to be non-integer

---
 reproschema/redcap2reproschema.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index 16b80c7..6535334 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -53,15 +53,22 @@ def parse_field_type_and_value(data, input_type_map):
 
     return input_type, value_type
 
-
 def process_choices(choices_str):
     choices = []
     for choice in choices_str.split("|"):
         parts = choice.split(", ")
-        choice_obj = {"schema:value": int(parts[0]), "schema:name": parts[1]}
+        # Try to convert the first part to an integer, if it fails, keep it as a string
+        try:
+            value = int(parts[0])
+        except ValueError:
+            value = parts[0]
+
+        choice_obj = {"schema:value": value, "schema:name": parts[1]}
+
         if len(parts) == 3:
             # TODO: handle image url
             choice_obj["schema:image"] = f"{parts[2]}.png"
+
         choices.append(choice_obj)
     return choices
 

From 87073fcf5720ed5f2c06a98e828a086ed927cc32 Mon Sep 17 00:00:00 2001
From: Yibei Chen <yibeichan@gmail.com>
Date: Thu, 11 Jan 2024 23:36:30 +0000
Subject: [PATCH 02/20] add yesno fieldtype conversion

---
 reproschema/redcap2reproschema.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index 6535334..909b554 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -33,7 +33,6 @@ def process_visibility(data):
     }
     return visibility_obj
 
-
 def parse_field_type_and_value(data, input_type_map):
     field_type = data.get("Field Type", "")
 
@@ -53,10 +52,17 @@ def parse_field_type_and_value(data, input_type_map):
 
     return input_type, value_type
 
-def process_choices(choices_str):
+def process_choices(field_type, choices_str):
+    if field_type not in ['radio', 'dropdown']:  # Handle only radio and dropdown types
+        return None
+
     choices = []
     for choice in choices_str.split("|"):
         parts = choice.split(", ")
+        if len(parts) < 2:
+            print(f"Warning: Skipping invalid choice format '{choice}' in a {field_type} field")
+            continue
+
         # Try to convert the first part to an integer, if it fails, keep it as a string
         try:
             value = int(parts[0])
@@ -64,15 +70,12 @@ def process_choices(choices_str):
             value = parts[0]
 
         choice_obj = {"schema:value": value, "schema:name": parts[1]}
-
         if len(parts) == 3:
-            # TODO: handle image url
+            # Handle image url
             choice_obj["schema:image"] = f"{parts[2]}.png"
-
         choices.append(choice_obj)
     return choices
 
-
 def write_to_file(abs_folder_path, form_name, field_name, rowData):
     file_path = os.path.join(
         f"{abs_folder_path}", "activities", form_name, "items", f"{field_name}"
@@ -131,6 +134,15 @@ def process_row(
     if value_type:
         rowData["responseOptions"] = {"valueType": value_type}
 
+    if field_type == "yesno":
+        rowData["responseOptions"] = {
+            "valueType": "xsd:boolean",
+            "choices": [
+                {"schema:value": 1, "schema:name": "Yes"},
+                {"schema:value": 0, "schema:name": "No"}
+            ]
+        }
+
     for key, value in field.items():
         if schema_map.get(key) == "allow" and value:
             rowData.setdefault("ui", {}).update({schema_map[key]: value.split(", ")})

From eee664ac0442849050863b53f1af18dc7051ba68 Mon Sep 17 00:00:00 2001
From: Yibei Chen <yibeichan@gmail.com>
Date: Thu, 11 Jan 2024 23:39:40 +0000
Subject: [PATCH 03/20] fix missing argument in process_choice

---
 reproschema/redcap2reproschema.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index 909b554..a28e261 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -158,10 +158,12 @@ def process_row(
             rowData.setdefault("responseOptions", {}).update({schema_map[key]: value})
 
         elif schema_map.get(key) == "choices" and value:
+            # Pass both field_type and value to process_choices
             rowData.setdefault("responseOptions", {}).update(
-                {"choices": process_choices(value)}
+                {"choices": process_choices(field_type, value)}
             )
 
+
         elif schema_map.get(key) == "scoringLogic" and value:
             condition = normalize_condition(value)
             rowData.setdefault("ui", {}).update({"hidden": True})

From 2491c475009fd475640f947ab9275490c71aaf38 Mon Sep 17 00:00:00 2001
From: Yibei Chen <yibeichan@gmail.com>
Date: Fri, 12 Jan 2024 00:09:02 +0000
Subject: [PATCH 04/20] fix language parse

---
 reproschema/redcap2reproschema.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index a28e261..a9b2426 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -100,10 +100,10 @@ def parse_html(input_string, default_language="en"):
             text = element.get_text(strip=True)
             if text:
                 result[lang] = text
-        if not result:
+        if not result:  # If no text was extracted
             result[default_language] = soup.get_text(strip=True)
     else:
-        result[default_language] = input_string
+        result[default_language] = soup.get_text(strip=True)  # Use the entire text as default language text
 
     return result
 

From d6685a2c10ea5a96bc1d4829c42ba675a69491ba Mon Sep 17 00:00:00 2001
From: Yibei Chen <yibeichan@gmail.com>
Date: Fri, 12 Jan 2024 00:28:19 +0000
Subject: [PATCH 05/20] add print

---
 reproschema/redcap2reproschema.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index a9b2426..6beb731 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -181,6 +181,7 @@ def process_row(
             )
 
         elif key in ["question", "schema:description", "preamble"] and value:
+            print(key, value)
             rowData.update({schema_map[key]: parse_html(value)})
 
         elif key == "Identifier?" and value:

From b95822f7caf3e748afd648425283ff7745eec3c9 Mon Sep 17 00:00:00 2001
From: Yibei Chen <yibeichan@gmail.com>
Date: Fri, 12 Jan 2024 00:30:38 +0000
Subject: [PATCH 06/20] add new test print

---
 reproschema/redcap2reproschema.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index 6beb731..f6f06a2 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -144,6 +144,7 @@ def process_row(
         }
 
     for key, value in field.items():
+        print(f"Key: {key}, Value: {value}")
         if schema_map.get(key) == "allow" and value:
             rowData.setdefault("ui", {}).update({schema_map[key]: value.split(", ")})
 
@@ -181,7 +182,6 @@ def process_row(
             )
 
         elif key in ["question", "schema:description", "preamble"] and value:
-            print(key, value)
             rowData.update({schema_map[key]: parse_html(value)})
 
         elif key == "Identifier?" and value:

From 34b7b99e1235e4741be13b11d6d2c42a4a652ee4 Mon Sep 17 00:00:00 2001
From: Yibei Chen <yibeichan@gmail.com>
Date: Fri, 12 Jan 2024 00:34:47 +0000
Subject: [PATCH 07/20] another print test

---
 reproschema/redcap2reproschema.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index f6f06a2..1f5329c 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -144,7 +144,7 @@ def process_row(
         }
 
     for key, value in field.items():
-        print(f"Key: {key}, Value: {value}")
+        print(f"Schema Map Key: {schema_map.get(key)}, Original Key: {key}, Value: {value}")
         if schema_map.get(key) == "allow" and value:
             rowData.setdefault("ui", {}).update({schema_map[key]: value.split(", ")})
 

From 5b53e0726e179cda9a5481353981fd3f96b2ec5f Mon Sep 17 00:00:00 2001
From: Yibei Chen <yibeichan@gmail.com>
Date: Fri, 12 Jan 2024 00:38:59 +0000
Subject: [PATCH 08/20] fix schema_map.get(key) error

---
 reproschema/redcap2reproschema.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index 1f5329c..349a067 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -144,7 +144,6 @@ def process_row(
         }
 
     for key, value in field.items():
-        print(f"Schema Map Key: {schema_map.get(key)}, Original Key: {key}, Value: {value}")
         if schema_map.get(key) == "allow" and value:
             rowData.setdefault("ui", {}).update({schema_map[key]: value.split(", ")})
 
@@ -181,7 +180,7 @@ def process_row(
                 {"variableName": field["Variable / Field Name"], "isVis": condition}
             )
 
-        elif key in ["question", "schema:description", "preamble"] and value:
+        elif schema_map.get(key) in ["question", "schema:description", "preamble"] and value:
             rowData.update({schema_map[key]: parse_html(value)})
 
         elif key == "Identifier?" and value:

From ffed151960f8f962187145c517951edcbc447035 Mon Sep 17 00:00:00 2001
From: Yibei Chen <yibeichan@gmail.com>
Date: Fri, 12 Jan 2024 00:42:08 +0000
Subject: [PATCH 09/20] fix presentation order in item

---
 reproschema/redcap2reproschema.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index 349a067..0885aac 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -144,7 +144,10 @@ def process_row(
         }
 
     for key, value in field.items():
-        if schema_map.get(key) == "allow" and value:
+        if schema_map.get(key) in ["question", "schema:description", "preamble"] and value:
+            rowData.update({schema_map[key]: parse_html(value)})
+        
+        elif schema_map.get(key) == "allow" and value:
             rowData.setdefault("ui", {}).update({schema_map[key]: value.split(", ")})
 
         elif key in ui_list and value:
@@ -180,9 +183,6 @@ def process_row(
                 {"variableName": field["Variable / Field Name"], "isVis": condition}
             )
 
-        elif schema_map.get(key) in ["question", "schema:description", "preamble"] and value:
-            rowData.update({schema_map[key]: parse_html(value)})
-
         elif key == "Identifier?" and value:
             identifier_val = value.lower() == "y"
             rowData.update(

From c8d915fdb8f5b43fac41e07499c1620d3d0db7f7 Mon Sep 17 00:00:00 2001
From: Yibei Chen <yibeichan@gmail.com>
Date: Thu, 18 Jan 2024 17:09:00 +0000
Subject: [PATCH 10/20] add id to items

---
 reproschema/redcap2reproschema.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index 0885aac..7ed5183 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -6,6 +6,7 @@
 import yaml
 from bs4 import BeautifulSoup
 
+matrix_group_count = {}
 
 def normalize_condition(condition_str):
     re_parentheses = re.compile(r"\(([0-9]*)\)")
@@ -119,9 +120,19 @@ def process_row(
     response_list,
     additional_notes_list,
 ):
+    matrix_group_name = field.get("Matrix Group Name")
+    if matrix_group_name:
+        matrix_group_count[matrix_group_name] = matrix_group_count.get(matrix_group_name, 0) + 1
+        item_id = f"{matrix_group_name}_{matrix_group_count[matrix_group_name]}"
+    else:
+        item_id = field["Variable / Field Name"]
+
     rowData = {
         "@context": schema_context_url,
         "@type": "reproschema:Field",
+        "@id": item_id,
+        "prefLabel": item_id,
+        "description": f"{item_id} of {form_name}"
     }
 
     field_type = field.get("Field Type", "")

From 585b516336969c47ce33d08d6efd6bb63c7d2088 Mon Sep 17 00:00:00 2001
From: yibeichan <yibeichan@gmail.com>
Date: Thu, 18 Jan 2024 12:38:52 -0500
Subject: [PATCH 11/20] add test

---
 reproschema/redcap2reproschema.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index 7ed5183..64aa1e8 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -120,12 +120,14 @@ def process_row(
     response_list,
     additional_notes_list,
 ):
-    matrix_group_name = field.get("Matrix Group Name")
+    global matrix_group_count
+    
+    matrix_group_name = field.get("Matrix Group Name", "")
     if matrix_group_name:
         matrix_group_count[matrix_group_name] = matrix_group_count.get(matrix_group_name, 0) + 1
         item_id = f"{matrix_group_name}_{matrix_group_count[matrix_group_name]}"
     else:
-        item_id = field["Variable / Field Name"]
+        item_id = field.get("Variable / Field Name", "")
 
     rowData = {
         "@context": schema_context_url,
@@ -155,6 +157,7 @@ def process_row(
         }
 
     for key, value in field.items():
+        print(key, value, filed)
         if schema_map.get(key) in ["question", "schema:description", "preamble"] and value:
             rowData.update({schema_map[key]: parse_html(value)})
         

From c86d80d56e748a5f7f301df3cfa39c9a3e3bc3d6 Mon Sep 17 00:00:00 2001
From: yibeichan <yibeichan@gmail.com>
Date: Thu, 18 Jan 2024 12:40:44 -0500
Subject: [PATCH 12/20] add test

---
 reproschema/redcap2reproschema.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index 64aa1e8..a3fc5e4 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -359,6 +359,7 @@ def process_csv(
                 languages = parse_language_iso_codes(row["Field Label"])
 
             for field in datas[form_name]:
+                print(f"process_csv-field: {field}")
                 field_name = field["Variable / Field Name"]
                 order[form_name].append(f"items/{field_name}")
                 process_row(
@@ -518,6 +519,7 @@ def main():
     parser.add_argument("csv_file", help="Path to the REDCap data dictionary CSV file.")
     parser.add_argument("yaml_file", help="Path to the Reproschema protocol YAML file.")
     args = parser.parse_args()
+    print("Start processing")
 
     # Call the main conversion function
     redcap2reproschema(args.csv_file, args.yaml_file)

From 6ec5d715a593d1168fc94a6654215b93c0c7cca1 Mon Sep 17 00:00:00 2001
From: yibeichan <yibeichan@gmail.com>
Date: Thu, 18 Jan 2024 12:46:01 -0500
Subject: [PATCH 13/20] remove BOM

---
 reproschema/redcap2reproschema.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index a3fc5e4..10c4467 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -8,6 +8,9 @@
 
 matrix_group_count = {}
 
+def clean_header(header):
+    return {k.lstrip('\ufeff'): v for k, v in header.items()}
+
 def normalize_condition(condition_str):
     re_parentheses = re.compile(r"\(([0-9]*)\)")
     re_non_gt_lt_equal = re.compile(r"([^>|<])=")
@@ -345,6 +348,7 @@ def process_csv(
     with open(csv_file, mode="r", encoding="utf-8") as csvfile:
         reader = csv.DictReader(csvfile)
         for row in reader:
+            row = clean_header(row)
             form_name = row["Form Name"]
             if form_name not in datas:
                 datas[form_name] = []
@@ -359,7 +363,6 @@ def process_csv(
                 languages = parse_language_iso_codes(row["Field Label"])
 
             for field in datas[form_name]:
-                print(f"process_csv-field: {field}")
                 field_name = field["Variable / Field Name"]
                 order[form_name].append(f"items/{field_name}")
                 process_row(

From 683f601c7f34d83857b2b6d7ca056707104bf53b Mon Sep 17 00:00:00 2001
From: yibeichan <yibeichan@gmail.com>
Date: Thu, 18 Jan 2024 12:48:54 -0500
Subject: [PATCH 14/20] add prints to test

---
 reproschema/redcap2reproschema.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index 10c4467..e3e0463 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -363,6 +363,7 @@ def process_csv(
                 languages = parse_language_iso_codes(row["Field Label"])
 
             for field in datas[form_name]:
+                print(f"process_csv-field: {field}, datas: {datas}")
                 field_name = field["Variable / Field Name"]
                 order[form_name].append(f"items/{field_name}")
                 process_row(

From 9a35ee9aa20e527738eb7e65a99e4a65cf587a2f Mon Sep 17 00:00:00 2001
From: yibeichan <yibeichan@gmail.com>
Date: Thu, 18 Jan 2024 12:52:07 -0500
Subject: [PATCH 15/20] add prints to test

---
 reproschema/redcap2reproschema.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index e3e0463..b7f50c0 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -124,8 +124,9 @@ def process_row(
     additional_notes_list,
 ):
     global matrix_group_count
-    
+    print(f"process_row-field-before: {field}")
     matrix_group_name = field.get("Matrix Group Name", "")
+    print(f"process_row-field-after: {field}")
     if matrix_group_name:
         matrix_group_count[matrix_group_name] = matrix_group_count.get(matrix_group_name, 0) + 1
         item_id = f"{matrix_group_name}_{matrix_group_count[matrix_group_name]}"

From 7e931457c6cd6ac4f8bca35f665b6dfab6f12c74 Mon Sep 17 00:00:00 2001
From: yibeichan <yibeichan@gmail.com>
Date: Thu, 18 Jan 2024 12:57:19 -0500
Subject: [PATCH 16/20] fix typo

---
 reproschema/redcap2reproschema.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index b7f50c0..4ae98d6 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -37,8 +37,9 @@ def process_visibility(data):
     }
     return visibility_obj
 
-def parse_field_type_and_value(data, input_type_map):
-    field_type = data.get("Field Type", "")
+def parse_field_type_and_value(field, input_type_map):
+    print(f"parse_field_type_and_value-field: {field}")
+    field_type = field.get("Field Type", "")
 
     input_type = input_type_map.get(field_type, field_type)
 
@@ -50,7 +51,7 @@ def parse_field_type_and_value(data, input_type_map):
         "email": "email",
         "phone": "phone",
     }
-    validation_type = data.get("Text Validation Type OR Show Slider Number", "")
+    validation_type = field.get("Text Validation Type OR Show Slider Number", "")
 
     value_type = value_type_map.get(validation_type, "xsd:string")
 
@@ -124,9 +125,7 @@ def process_row(
     additional_notes_list,
 ):
     global matrix_group_count
-    print(f"process_row-field-before: {field}")
     matrix_group_name = field.get("Matrix Group Name", "")
-    print(f"process_row-field-after: {field}")
     if matrix_group_name:
         matrix_group_count[matrix_group_name] = matrix_group_count.get(matrix_group_name, 0) + 1
         item_id = f"{matrix_group_name}_{matrix_group_count[matrix_group_name]}"
@@ -161,7 +160,6 @@ def process_row(
         }
 
     for key, value in field.items():
-        print(key, value, filed)
         if schema_map.get(key) in ["question", "schema:description", "preamble"] and value:
             rowData.update({schema_map[key]: parse_html(value)})
         

From 003e850006f4e22f06886d9eda62953ed8974fbd Mon Sep 17 00:00:00 2001
From: yibeichan <yibeichan@gmail.com>
Date: Thu, 18 Jan 2024 19:17:43 -0500
Subject: [PATCH 17/20] improve responseOptions

---
 reproschema/redcap2reproschema.py | 40 ++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index 4ae98d6..c551b50 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -38,22 +38,31 @@ def process_visibility(data):
     return visibility_obj
 
 def parse_field_type_and_value(field, input_type_map):
-    print(f"parse_field_type_and_value-field: {field}")
     field_type = field.get("Field Type", "")
-
     input_type = input_type_map.get(field_type, field_type)
 
+    # Initialize the default value type as string
+    value_type = "xsd:string"
+
+    # Map certain field types directly to xsd types
     value_type_map = {
-        "number": "xsd:int",
+        "text": "xsd:string",
         "date_": "xsd:date",
-        "datetime_": "datetime",
-        "time_": "xsd:date",
-        "email": "email",
-        "phone": "phone",
-    }
-    validation_type = field.get("Text Validation Type OR Show Slider Number", "")
-
-    value_type = value_type_map.get(validation_type, "xsd:string")
+        "datetime_": "xsd:dateTime",
+        "time_": "xsd:time",
+        "email": "xsd:string",
+        "phone": "xsd:string",
+    } # todo: input_type="signature"
+
+    # Get the validation type from the field, if available
+    validation_type = field.get("Text Validation Type OR Show Slider Number", "").strip()
+
+    if validation_type:
+        # Map the validation type to an XSD type if it's in the map
+        value_type = value_type_map.get(validation_type, "xsd:string")
+    elif field_type in ["radio", "dropdown"]:
+        # If there's no validation type, but the field type is radio or dropdown, use xsd:integer
+        value_type = "xsd:integer"
 
     return input_type, value_type
 
@@ -74,7 +83,7 @@ def process_choices(field_type, choices_str):
         except ValueError:
             value = parts[0]
 
-        choice_obj = {"schema:value": value, "schema:name": parts[1]}
+        choice_obj = {"name": parts[1], "value": value}
         if len(parts) == 3:
             # Handle image url
             choice_obj["schema:image"] = f"{parts[2]}.png"
@@ -154,8 +163,8 @@ def process_row(
         rowData["responseOptions"] = {
             "valueType": "xsd:boolean",
             "choices": [
-                {"schema:value": 1, "schema:name": "Yes"},
-                {"schema:value": 0, "schema:name": "No"}
+                {"name": "Yes", "value": 1},
+                {"name": "No", "value": 0}
             ]
         }
 
@@ -362,7 +371,6 @@ def process_csv(
                 languages = parse_language_iso_codes(row["Field Label"])
 
             for field in datas[form_name]:
-                print(f"process_csv-field: {field}, datas: {datas}")
                 field_name = field["Variable / Field Name"]
                 order[form_name].append(f"items/{field_name}")
                 process_row(
@@ -522,9 +530,7 @@ def main():
     parser.add_argument("csv_file", help="Path to the REDCap data dictionary CSV file.")
     parser.add_argument("yaml_file", help="Path to the Reproschema protocol YAML file.")
     args = parser.parse_args()
-    print("Start processing")
 
-    # Call the main conversion function
     redcap2reproschema(args.csv_file, args.yaml_file)
 
 

From 37853b23d69c09397c91c6b3941ab967cd24df6a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 19 Jan 2024 00:26:21 +0000
Subject: [PATCH 18/20] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 reproschema/redcap2reproschema.py | 42 ++++++++++++++++++++-----------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index c551b50..47473cb 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -8,8 +8,10 @@
 
 matrix_group_count = {}
 
+
 def clean_header(header):
-    return {k.lstrip('\ufeff'): v for k, v in header.items()}
+    return {k.lstrip("\ufeff"): v for k, v in header.items()}
+
 
 def normalize_condition(condition_str):
     re_parentheses = re.compile(r"\(([0-9]*)\)")
@@ -37,6 +39,7 @@ def process_visibility(data):
     }
     return visibility_obj
 
+
 def parse_field_type_and_value(field, input_type_map):
     field_type = field.get("Field Type", "")
     input_type = input_type_map.get(field_type, field_type)
@@ -52,10 +55,12 @@ def parse_field_type_and_value(field, input_type_map):
         "time_": "xsd:time",
         "email": "xsd:string",
         "phone": "xsd:string",
-    } # todo: input_type="signature"
+    }  # todo: input_type="signature"
 
     # Get the validation type from the field, if available
-    validation_type = field.get("Text Validation Type OR Show Slider Number", "").strip()
+    validation_type = field.get(
+        "Text Validation Type OR Show Slider Number", ""
+    ).strip()
 
     if validation_type:
         # Map the validation type to an XSD type if it's in the map
@@ -66,15 +71,18 @@ def parse_field_type_and_value(field, input_type_map):
 
     return input_type, value_type
 
+
 def process_choices(field_type, choices_str):
-    if field_type not in ['radio', 'dropdown']:  # Handle only radio and dropdown types
+    if field_type not in ["radio", "dropdown"]:  # Handle only radio and dropdown types
         return None
 
     choices = []
     for choice in choices_str.split("|"):
         parts = choice.split(", ")
         if len(parts) < 2:
-            print(f"Warning: Skipping invalid choice format '{choice}' in a {field_type} field")
+            print(
+                f"Warning: Skipping invalid choice format '{choice}' in a {field_type} field"
+            )
             continue
 
         # Try to convert the first part to an integer, if it fails, keep it as a string
@@ -90,6 +98,7 @@ def process_choices(field_type, choices_str):
         choices.append(choice_obj)
     return choices
 
+
 def write_to_file(abs_folder_path, form_name, field_name, rowData):
     file_path = os.path.join(
         f"{abs_folder_path}", "activities", form_name, "items", f"{field_name}"
@@ -117,7 +126,9 @@ def parse_html(input_string, default_language="en"):
         if not result:  # If no text was extracted
             result[default_language] = soup.get_text(strip=True)
     else:
-        result[default_language] = soup.get_text(strip=True)  # Use the entire text as default language text
+        result[default_language] = soup.get_text(
+            strip=True
+        )  # Use the entire text as default language text
 
     return result
 
@@ -136,7 +147,9 @@ def process_row(
     global matrix_group_count
     matrix_group_name = field.get("Matrix Group Name", "")
     if matrix_group_name:
-        matrix_group_count[matrix_group_name] = matrix_group_count.get(matrix_group_name, 0) + 1
+        matrix_group_count[matrix_group_name] = (
+            matrix_group_count.get(matrix_group_name, 0) + 1
+        )
         item_id = f"{matrix_group_name}_{matrix_group_count[matrix_group_name]}"
     else:
         item_id = field.get("Variable / Field Name", "")
@@ -146,7 +159,7 @@ def process_row(
         "@type": "reproschema:Field",
         "@id": item_id,
         "prefLabel": item_id,
-        "description": f"{item_id} of {form_name}"
+        "description": f"{item_id} of {form_name}",
     }
 
     field_type = field.get("Field Type", "")
@@ -162,16 +175,16 @@ def process_row(
     if field_type == "yesno":
         rowData["responseOptions"] = {
             "valueType": "xsd:boolean",
-            "choices": [
-                {"name": "Yes", "value": 1},
-                {"name": "No", "value": 0}
-            ]
+            "choices": [{"name": "Yes", "value": 1}, {"name": "No", "value": 0}],
         }
 
     for key, value in field.items():
-        if schema_map.get(key) in ["question", "schema:description", "preamble"] and value:
+        if (
+            schema_map.get(key) in ["question", "schema:description", "preamble"]
+            and value
+        ):
             rowData.update({schema_map[key]: parse_html(value)})
-        
+
         elif schema_map.get(key) == "allow" and value:
             rowData.setdefault("ui", {}).update({schema_map[key]: value.split(", ")})
 
@@ -191,7 +204,6 @@ def process_row(
                 {"choices": process_choices(field_type, value)}
             )
 
-
         elif schema_map.get(key) == "scoringLogic" and value:
             condition = normalize_condition(value)
             rowData.setdefault("ui", {}).update({"hidden": True})

From 1533f114af2dab5db12fabcb791a04b2463f80b5 Mon Sep 17 00:00:00 2001
From: yibeichan <yibeichan@gmail.com>
Date: Fri, 19 Jan 2024 11:55:14 -0500
Subject: [PATCH 19/20] get unique order

---
 reproschema/redcap2reproschema.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index 47473cb..a2c5596 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -248,6 +248,10 @@ def create_form_schema(
     matrix_list,
     scores_list,
 ):
+
+    # Use a set to track unique items and preserve order
+    unique_order = list(dict.fromkeys(order.get(form_name, [])))
+
     # Construct the JSON-LD structure
     json_ld = {
         "@context": schema_context_url,
@@ -258,7 +262,7 @@ def create_form_schema(
         "schemaVersion": "1.0.0-rc4",
         "version": "0.0.1",
         "ui": {
-            "order": order.get(form_name, []),
+            "order": unique_order,
             "addProperties": bl_list,
             "shuffle": False,
         },

From 891d5809b78703b6f38595d129d291e13ac1950f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 19 Jan 2024 16:55:29 +0000
Subject: [PATCH 20/20] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 reproschema/redcap2reproschema.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py
index a2c5596..f267a2f 100644
--- a/reproschema/redcap2reproschema.py
+++ b/reproschema/redcap2reproschema.py
@@ -248,7 +248,6 @@ def create_form_schema(
     matrix_list,
     scores_list,
 ):
-
     # Use a set to track unique items and preserve order
     unique_order = list(dict.fromkeys(order.get(form_name, [])))