From a49aea6e7a0b73fa8669385bfa3dfbc9bb2858fc Mon Sep 17 00:00:00 2001
From: Mohamad <mo@julia>
Date: Tue, 13 Dec 2022 17:22:21 +0300
Subject: [PATCH 01/14] piping bdgraph

---
 workflow/rules/module_strings.py              |  12 ++
 .../bdgraph_pip/Dockerfile                    |   3 +
 .../bdgraph_pip/docs.rst                      |   0
 .../bdgraph_pip/info.json                     |  22 +++
 .../bdgraph_pip/rule.smk                      |  20 +++
 .../bdgraph_pip/schema.json                   | 164 ++++++++++++++++++
 .../bdgraph_pip/script.R                      | 164 ++++++++++++++++++
 7 files changed, 385 insertions(+)
 create mode 100644 workflow/rules/structure_learning_algorithms/bdgraph_pip/Dockerfile
 create mode 100644 workflow/rules/structure_learning_algorithms/bdgraph_pip/docs.rst
 create mode 100644 workflow/rules/structure_learning_algorithms/bdgraph_pip/info.json
 create mode 100644 workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk
 create mode 100644 workflow/rules/structure_learning_algorithms/bdgraph_pip/schema.json
 create mode 100644 workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R

diff --git a/workflow/rules/module_strings.py b/workflow/rules/module_strings.py
index 69892ea4..f8251f6c 100644
--- a/workflow/rules/module_strings.py
+++ b/workflow/rules/module_strings.py
@@ -8,6 +8,8 @@
 # of the id of some algorithm. Thus the id has to be exptracted into a path string first.
 
 def idtopath(mylist, json_string):
+    if mylist is None:
+        return "None"
     if isinstance(mylist, list):
         return [json_string[startalg][0] for startalg in mylist]
     else:
@@ -35,6 +37,16 @@ def idtopath(mylist, json_string):
                         for val in order_mcmc_list } )
 
 
+if "bdgraph_pip" in pattern_strings:
+    bdgraph_pip_list = config["resources"]["structure_learning_algorithms"]["bdgraph_pip"]
+    # The path to the startspace algorithm is extended here
+    for items in bdgraph_pip_list:    
+        items["startalg"] = idtopath(items["startalg"], json_string)
+
+    json_string.update({val["id"]: expand(pattern_strings["bdgraph_pip"], **val,) 
+                        for val in bdgraph_pip_list} )
+
+    
 
 if "bidag_partition_mcmc" in pattern_strings:
     bidag_partition_mcmc_list = config["resources"]["structure_learning_algorithms"]["bidag_partition_mcmc"]
diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/Dockerfile b/workflow/rules/structure_learning_algorithms/bdgraph_pip/Dockerfile
new file mode 100644
index 00000000..627e29e0
--- /dev/null
+++ b/workflow/rules/structure_learning_algorithms/bdgraph_pip/Dockerfile
@@ -0,0 +1,3 @@
+FROM r-base
+
+RUN R -e "install.packages(\"BDgraph\", repos=\"https://cran.rstudio.com\")" --no-save
diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/docs.rst b/workflow/rules/structure_learning_algorithms/bdgraph_pip/docs.rst
new file mode 100644
index 00000000..e69de29b
diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/info.json b/workflow/rules/structure_learning_algorithms/bdgraph_pip/info.json
new file mode 100644
index 00000000..9b750edb
--- /dev/null
+++ b/workflow/rules/structure_learning_algorithms/bdgraph_pip/info.json
@@ -0,0 +1,22 @@
+{
+    "title": "BDgraph",
+    "version": "2.64",
+    "package": {
+        "title": "BDgraph",
+        "url": "https://cran.r-project.org/web/packages/BDgraph/index.html"
+    },
+    "docs_url": "https://cran.r-project.org/web/packages/BDgraph/BDgraph.pdf",
+    "papers": [
+        {
+            "title": "BDgraph: An R Package for Bayesian Structure Learning in Graphical Models",
+            "url": "https://www.jstatsoft.org/article/view/v089i03"
+        }
+    ],
+    "outputs": [
+        "graphtraj"
+    ],
+    "graph_types": [
+        "UG"
+    ],
+    "language": "R"
+}
\ No newline at end of file
diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk b/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk
new file mode 100644
index 00000000..e60c4427
--- /dev/null
+++ b/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk
@@ -0,0 +1,20 @@
+
+def fix_none_startalg(wildcards):
+        if wildcards["startalg"] == "None":
+            return []
+        else:
+            return "{output_dir}/adjmat_estimate/{data}/algorithm=/{startalg}/seed={replicate}/adjmat.csv",
+
+rule:
+    name: module_name
+    input:
+        data=alg_input_data(),
+        startgraph_file=fix_none_startalg,
+    output:  
+        seqgraph=alg_output_seqgraph_path(module_name),
+        time=alg_output_time_path(module_name),
+        ntests=touch(alg_output_ntests_path(module_name))
+    container:
+        docker_image("bdgraph")
+    script:
+        "script.R"
diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/schema.json b/workflow/rules/structure_learning_algorithms/bdgraph_pip/schema.json
new file mode 100644
index 00000000..f039885d
--- /dev/null
+++ b/workflow/rules/structure_learning_algorithms/bdgraph_pip/schema.json
@@ -0,0 +1,164 @@
+{
+    "description": "BDgraph MCMC objects",
+    "title": "bdgraph",
+    "type": "array",
+    "items": {
+        "title": "bdgraph item",
+        "description": "bdgraph instance",
+        "type": "object",
+        "properties": {
+            "id": {
+                "type": "string",
+                "description": "Unique identifier"
+            },
+            "method": {
+                "anyOf": [
+                    {
+                        "type": "string",
+                        "enum": [
+                            "ggm",
+                            "gcgm"
+                        ]
+                    },
+                    {
+                        "type": "array",
+                        "items": {
+                            "type": "string",
+                            "enum": [
+                                "ggm",
+                                "gcgm"
+                            ]
+                        }
+                    }
+                ]
+            },
+            "algo": {
+                "anyOf": [
+                    {
+                        "type": "string",
+                        "enum": [
+                            "rjmcmc",
+                            "bdmcmc",
+                            "bd-dmh",
+                            "rj-dmh"
+                        ]
+                    },
+                    {
+                        "type": "array",
+                        "items": {
+                            "type": "string",
+                            "enum": [
+                                "rjmcmc",
+                                "bdmcmc",
+                                "bd-dmh",
+                                "rj-dmh"
+                            ]
+                        }
+                    }
+                ]
+            },
+            "iter": {
+                "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegint"
+            },
+            "gprior": {
+                "$ref": "../../../schemas/definitions.schema.json#/definitions/flexprob"
+            },
+            "dfprior": {
+                "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegint"
+            },
+            "gstart": {
+                "type": "string",
+                "enum": [
+                    "full",
+                    "empty"
+                ]
+            },
+            "thresh": {
+                "$ref": "../../../schemas/definitions.schema.json#/definitions/flexprob"
+            },
+            "threshold": {
+                "$ref": "../../../schemas/definitions.schema.json#/definitions/flexprob"
+            },
+            "mcmc_estimator": {
+                "anyOf": [
+                    {
+                        "type": "string",
+                        "enum": [
+                            "map",
+                            "threshold"
+                        ]
+                    },
+                    {
+                        "type": "array",
+                        "items": {
+                            "type": "string",
+                            "enum": [
+                                "map",
+                                "threshold"
+                            ]
+                        }
+                    }
+                ]
+            },
+            "burnin_frac": {
+                "$ref": "../../../schemas/definitions.schema.json#/definitions/flexprob"
+            },
+            "mcmc_seed": {
+                "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegint"
+            },
+            "timeout": {
+                "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnumnull"
+            },
+            "startalg": {
+                "anyOf": [
+                    {
+                        "type": "string"
+                    },
+                    {
+                        "type": "null"
+                    }
+                ]
+        },
+        "additionalProperties": true,
+        "required": [
+            "mcmc_estimator",
+            "burnin_frac",
+            "threshold",
+            "method",
+            "algo",
+            "startalg",
+            "iter",
+            "gprior",
+            "gstart",
+            "dfprior",
+            "thresh",
+            "mcmc_seed",
+            "timeout"
+        ],
+        "examples": [
+            {
+                "id": "bdgraph",
+                "method": "ggm",
+                "algo": "bdmcmc",
+                "iter": 3000,
+                "gprior": 0.5,
+                "dfprior": 3,
+                "gstart": "empty",
+            "timeout": null,
+            "startalg": null,
+                "mcmc_seed": 1,
+                "thresh": 0.5,
+                "mcmc_estimator": "threshold",
+                "threshold": [
+                    0.1,
+                    0.3,
+                    0.5,
+                    0.7,
+                    0.9,
+                    1.0
+                ],
+                "burnin_frac": 0.3
+            }
+        ]
+    }
+}
diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R b/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R
new file mode 100644
index 00000000..e543410e
--- /dev/null
+++ b/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R
@@ -0,0 +1,164 @@
+source("workflow/scripts/utils/add_timeout.R")
+library(BDgraph)
+library(R.utils)
+
+adjmatToEdgeString <- function(adjmat, labels) {
+  edgeinds <- which(adjmat == 1, arr.ind = TRUE)
+  df <- data.frame(edgeinds)
+  edges <- "["
+  firstrow <- TRUE
+  for (i in rownames(df)) {
+    edge <- paste(labels[df[i, "row"]], labels[df[i, "col"]], sep = "-")
+    if (firstrow) {
+      sep <- ""
+      firstrow <- FALSE
+    } else {
+      sep <- ";"
+    }
+    edges <- paste(edges, edge, sep = sep)
+  }
+  edges <- paste(edges, sep = "", "]")
+  return(edges)
+}
+
+dummyEdges <- function(labels) {
+  n <- length(labels)
+  added <- "["
+  for (i in 2:n) {
+    edge <- paste(labels[1], labels[i], sep = "-")
+    if (i == 2) {
+      sep <- ""
+    } else {
+      sep <- ";"
+    }
+    added <- paste(added, edge, sep = sep)
+  }
+  added <- paste(added, sep = "", "]")
+  return(added)
+}
+
+strvec_to_adjmat <- function(str, p) {
+  vec <- strsplit(str, "")[[1]]
+
+  adjmat <- matrix(0, nrow = p, ncol = p)
+  k <- 1
+  for (i in seq(2, p)) {
+    for (j in seq(1, i - 1)) {
+      adjmat[i, j] <- as.integer(vec[k])
+      k <- k + 1
+    }
+  }
+
+  return(adjmat)
+}
+
+
+filename <- file.path(snakemake@output[["seqgraph"]])
+filename_data <- snakemake@input[["data"]]
+seed <- as.integer(snakemake@wildcards[["mcmc_seed"]])
+data <- read.csv(filename_data, check.names = FALSE)
+
+
+wrapper <- function() {
+  start <- proc.time()[1]
+  set.seed(seed) # BUG: Seeds doesn't seem to work in BDgraph.
+  p <- dim(data)[2]
+
+  bdgraph.obj <- bdgraph(data,
+    n = NULL,
+    method = snakemake@wildcards[["method"]],
+    algorithm = snakemake@wildcards[["algo"]],
+    iter = as.integer(snakemake@wildcards[["iter"]]),
+    burnin = 0,
+    not.cont = NULL,
+    g.prior = as.numeric(snakemake@wildcards[["gprior"]]),
+    df.prior = as.numeric(snakemake@wildcards[["dfprior"]]),
+    g.start = snakemake@wildcards[["gstart"]],
+    jump = NULL,
+    save = TRUE,
+    cores = NULL,
+    threshold = as.numeric(snakemake@wildcards[["thresh"]])
+  )
+
+  totaltime <- proc.time()[1] - start
+
+  adjmat_traj <- list()
+  # all_graph contain indices from sample_graphs
+  j <- 1
+
+  for (i in bdgraph.obj$all_graphs) {
+    strvec <- bdgraph.obj$sample_graphs[[i]]
+
+    # graphs are stores as upper triangular matrices, stacked as strings
+    # like 00100110 of length (p choose 2).
+    adjmat <- strvec_to_adjmat(strvec, p)
+    adjmat_traj[[j]] <- adjmat
+    j <- j + 1
+  }
+
+  # This returns a string which is a list of flattened adjacency matrices.
+  labels <- colnames(data)
+  added <- dummyEdges(labels)
+
+  start_edges <- adjmatToEdgeString(
+    adjmat_traj[[1]],
+    labels
+  )
+    minweight = min(bdgraph.obj$all_weights)
+  
+  if (snakemake@wildcards[["algo"]] %in% c("bdmcmc", "bd-dmh")) {
+    # Translate weights into indices by dividing by the smallest weight. This
+    # is not perfect though, so one could instead divide by the smallest weight diff.
+    # However, which gives the highest resolution depends on the chain.
+    # We also save the actual times in the time column.
+        
+    indices <- c(0, cumsum(bdgraph.obj$all_weights)) / minweight 
+    indices <- round(indices)
+    
+  } else {
+    indices <- c(0, cumsum(bdgraph.obj$all_weights)) # no difference since the weight are all 1s.
+  }
+
+  res <- data.frame(
+    "index" = c(-2, -1, indices[1]),
+    "time"= c(-2, -1, indices[1]), # adding the raw times as well. Can be used in e.g. heatmaps
+    "score" = c(0, 0, bdgraph.obj$graph_weights[[bdgraph.obj$all_graphs[[1]]]]),
+    "added" = c(added, "[]", start_edges),
+    "removed" = c("[]", added, "[]")
+  )
+
+  m <- length(adjmat_traj)
+
+  prevmat <- adjmat_traj[[1]]
+
+  cumsum_weights = c(0,cumsum(bdgraph.obj$all_weights))
+
+  for (i in seq(2, m)) {
+    if (all(adjmat_traj[[i]] == prevmat)) {
+      next
+    }
+
+    removed_edge_mat <- prevmat - (prevmat & adjmat_traj[[i]]) * 1
+    added_edge_mat <- adjmat_traj[[i]] - (prevmat & adjmat_traj[[i]]) * 1
+
+    added_edges <- adjmatToEdgeString(added_edge_mat, labels)
+    removed_edges <- adjmatToEdgeString(removed_edge_mat, labels)
+
+    df <- data.frame(
+      "index" = indices[i],
+      "time"= cumsum_weights[i], # adding the raw times as well. Can be used in e.g. heamaps
+      "score" = bdgraph.obj$graph_weights[[bdgraph.obj$all_graphs[[i]]]],
+      "added" = added_edges,
+      "removed" = removed_edges
+    )
+
+    res <- rbind(res, df)
+    prevmat <- adjmat_traj[[i]]
+  }
+
+  write.csv(x = res, file = filename, row.names = FALSE, quote = FALSE)
+  write(totaltime, file = snakemake@output[["time"]])
+
+}
+
+add_timeout(wrapper)

From 57c5fb6a22f308ddaf1b5ce9c86cf11421fd796e Mon Sep 17 00:00:00 2001
From: Mohamad <mo@julia>
Date: Tue, 13 Dec 2022 17:24:23 +0300
Subject: [PATCH 02/14] config file

---
 config/bdgraph_pip_random-5.json | 279 +++++++++++++++++++++++++++++++
 1 file changed, 279 insertions(+)
 create mode 100644 config/bdgraph_pip_random-5.json

diff --git a/config/bdgraph_pip_random-5.json b/config/bdgraph_pip_random-5.json
new file mode 100644
index 00000000..f6cd913b
--- /dev/null
+++ b/config/bdgraph_pip_random-5.json
@@ -0,0 +1,279 @@
+{
+  "benchmark_setup": {
+    "data": [
+           {
+        "data_id": "example1",
+        "graph_id": "random-5",
+        "parameters_id": "gwi",
+        "seed_range": [
+          1,
+          1
+        ]
+      }
+    ],
+    "evaluation": {
+      "benchmarks": {
+        "filename_prefix": "bdgraph_pip_random5/",
+        "show_seed": false,
+        "errorbar": true,
+        "errorbarh": false,
+        "scatter": true,
+        "path": true,
+        "text": false,
+        "ids": [
+            "bdgraph",
+            "gg99",
+            "bdgraph_pip"
+        ]
+      },
+      "graph_true_plots": true,
+      "graph_true_stats": false,
+      "ggally_ggpairs": false,
+      "graph_plots": [
+        "bdgraph",
+          "gg99",
+          "bdgraph_pip"
+      ],
+      "mcmc_traj_plots": [
+        {
+          "id": "gg99",
+          "burn_in": 100000,
+          "thinning": 100,
+          "functional": [
+            "score",
+            "size"
+          ],
+          "active": false
+        },
+        {
+          "id": "bdgraph",
+          "burn_in": 500,
+          "thinning": 1,
+          "functional": [
+            "size"
+          ],
+            "active": false
+        }
+      ],
+      "mcmc_heatmaps": [
+        {
+          "id": "gg99",
+          "burn_in": 0.5,
+          "active": true
+        },
+        {
+          "id": "bdgraph",
+          "burn_in": 0.5,
+          "active": true
+        },
+          {
+            "id": "bdgraph_pip",
+            "burn_in": 0.5,
+            "active": true
+        }
+      ],
+      "mcmc_autocorr_plots": [
+          {
+          "id": "gg99",
+          "burn_in": 150000,
+          "thinning": 500,
+          "lags": 50,
+          "functional": [
+            "score",
+            "size"
+          ],
+          "active": false
+        }
+      ]
+    }
+  },
+  "resources": {
+    "data": {
+      "iid": [
+        {
+          "id": "example1",
+          "standardized": false,
+          "sample_sizes": [
+            100,
+            200
+          ]
+        }
+      ]
+    },
+    "graph": {
+      "bdgraph_graphsim": [
+        {
+          "id": "lattice",
+          "p": 50,
+          "graph": "lattice",
+          "class": null,
+          "size": null,
+          "prob": 0.2
+        },
+        {
+          "id": "random-1",
+          "p": 50,
+          "graph": "random",
+          "class": null,
+          "size": null,
+          "prob": 0.1
+        },
+        {
+          "id": "random-2",
+          "p": 50,
+          "graph": "random",
+          "class": null,
+          "size": null,
+          "prob": 0.2
+        },
+        {
+          "id": "random-5",
+          "p": 50,
+          "graph": "random",
+          "class": null,
+          "size": null,
+          "prob": 0.5
+        },
+        {
+          "id": "cluster",
+          "p": 50,
+          "graph": "cluster",
+          "class": null,
+          "size": 20,
+          "prob": 0.2
+        },
+        {
+          "id": "circle",
+          "p": 50,
+          "graph": "circle",
+          "class": null,
+          "size": null,
+          "prob": 0.2
+        },
+        {
+          "id": "hub",
+          "p": 50,
+          "graph": "hub",
+          "class": null,
+          "size": null,
+          "prob": 0.2
+        },
+        {
+          "id": "scale-free",
+          "p": 50,
+          "graph": "scale-free",
+          "class": null,
+          "size": null,
+          "prob": 0.2
+        }
+      ]
+    },
+    "parameters": {
+      "bdgraph_rgwish": [
+        {
+          "id": "gwi",
+          "b": 3,
+          "threshold_conv": 0.0000001
+        }
+      ]
+    },
+    "structure_learning_algorithms": {
+      "bdgraph": [
+        {
+          "id": "bdgraph",
+          "method": "ggm",
+          "algo": [
+            "bdmcmc"
+          ],
+          "iter": 30000,
+          "gprior": 0.5,
+          "dfprior": 3,
+          "gstart": "empty",
+          "timeout": null,
+          "mcmc_seed": [
+            1
+          ],
+          "thresh": 0.5,
+          "mcmc_estimator": [
+            "threshold"
+          ],
+          "threshold": [
+            0.1,
+            0.3,
+            0.5,
+            0.7,
+            0.9,
+            1.0
+          ],
+          "burnin_frac": 0.3
+        }
+      ],
+      "gg99_singlepair": [
+        {
+          "id": "gg99",
+          "n_samples": [
+            2000000
+          ],
+          "datatype": "continuous",
+          "randomits": 100,
+          "prior": "ep",
+          "ascore": null,
+          "bscore": null,
+          "clq": 2,
+          "sep": 4,
+          "penalty": [
+            0.0
+          ],
+          "mcmc_seed": [
+            1
+          ],
+          "timeout": null,
+          "mcmc_estimator": [
+            "threshold"
+          ],
+          "threshold": [
+            0.1,
+            0.3,
+            0.5,
+            0.7,
+            0.9,
+            1.0
+          ],
+          "burnin_frac": 0.3
+        }
+      ],
+        
+        "bdgraph_pip": [
+        {
+          "id": "bdgraph_pip",
+          "method": "ggm",
+          "algo": [
+            "bdmcmc"
+          ],
+          "iter": 3000,
+          "gprior": 0.5,
+            "dfprior": 3,
+            "startalg": "gg99",
+          "gstart": "empty",
+          "timeout": null,
+          "mcmc_seed": [
+            1
+          ],
+          "thresh": 0.5,
+          "mcmc_estimator": [
+            "threshold"
+          ],
+          "threshold": [
+            0.1,
+            0.3,
+            0.5,
+            0.7,
+            0.9,
+            1.0
+          ],
+          "burnin_frac": 0.3
+        }
+      ]
+    }
+  }
+}

From 267fc385da55701095deea591f1d3bc330d8365c Mon Sep 17 00:00:00 2001
From: Mohamad <mo@julia>
Date: Tue, 13 Dec 2022 21:32:56 +0300
Subject: [PATCH 03/14] fixing typo

---
 .../rules/structure_learning_algorithms/bdgraph_pip/schema.json  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/schema.json b/workflow/rules/structure_learning_algorithms/bdgraph_pip/schema.json
index f039885d..1ab2ecdb 100644
--- a/workflow/rules/structure_learning_algorithms/bdgraph_pip/schema.json
+++ b/workflow/rules/structure_learning_algorithms/bdgraph_pip/schema.json
@@ -118,6 +118,7 @@
                         "type": "null"
                     }
                 ]
+            }
         },
         "additionalProperties": true,
         "required": [

From e1d33ff7d16518252194c2dd93d41ce83808191a Mon Sep 17 00:00:00 2001
From: Felix Rios <felix.rios@rocafuerte.se>
Date: Fri, 16 Dec 2022 11:17:10 +0100
Subject: [PATCH 04/14] Comments of problem with mcmc input alg. Not working
 yet.

---
 config/bdgraph_pip_random-5.json              | 58 ++++++++++++-------
 workflow/rules/module_patterns.py             |  6 +-
 workflow/rules/module_strings.py              | 27 ++++++---
 .../bdgraph_pip/rule.smk                      |  2 +-
 .../bdgraph_pip/script.R                      |  2 +-
 5 files changed, 63 insertions(+), 32 deletions(-)

diff --git a/config/bdgraph_pip_random-5.json b/config/bdgraph_pip_random-5.json
index f6cd913b..90453a68 100644
--- a/config/bdgraph_pip_random-5.json
+++ b/config/bdgraph_pip_random-5.json
@@ -1,7 +1,7 @@
 {
   "benchmark_setup": {
     "data": [
-           {
+      {
         "data_id": "example1",
         "graph_id": "random-5",
         "parameters_id": "gwi",
@@ -21,18 +21,16 @@
         "path": true,
         "text": false,
         "ids": [
-            "bdgraph",
-            "gg99",
-            "bdgraph_pip"
+          "gg99",
+          "bdgraph_pip"
         ]
       },
       "graph_true_plots": true,
       "graph_true_stats": false,
       "ggally_ggpairs": false,
       "graph_plots": [
-        "bdgraph",
-          "gg99",
-          "bdgraph_pip"
+        "gg99",
+        "bdgraph_pip"
       ],
       "mcmc_traj_plots": [
         {
@@ -52,28 +50,28 @@
           "functional": [
             "size"
           ],
-            "active": false
+          "active": false
         }
       ],
       "mcmc_heatmaps": [
         {
           "id": "gg99",
           "burn_in": 0.5,
-          "active": true
+          "active": false
         },
         {
           "id": "bdgraph",
           "burn_in": 0.5,
-          "active": true
+          "active": false
         },
-          {
-            "id": "bdgraph_pip",
-            "burn_in": 0.5,
-            "active": true
+        {
+          "id": "bdgraph_pip",
+          "burn_in": 0.5,
+          "active": false
         }
       ],
       "mcmc_autocorr_plots": [
-          {
+        {
           "id": "gg99",
           "burn_in": 150000,
           "thinning": 500,
@@ -178,6 +176,25 @@
       ]
     },
     "structure_learning_algorithms": {
+      "bnlearn_tabu": [
+        {
+            "id": "tabu-bge",
+            "score": "bge",
+            "iss": 1,
+            "issmu": [
+                0.0001,
+                0.001,
+                0.01,
+                0.05
+            ],
+            "issw": null,
+            "l": 5,
+            "k": 1,
+            "prior": "uniform",
+            "beta": 1,
+            "timeout": null
+        }
+    ],
       "bdgraph": [
         {
           "id": "bdgraph",
@@ -212,7 +229,7 @@
         {
           "id": "gg99",
           "n_samples": [
-            2000000
+            20000
           ],
           "datatype": "continuous",
           "randomits": 100,
@@ -242,8 +259,7 @@
           "burnin_frac": 0.3
         }
       ],
-        
-        "bdgraph_pip": [
+      "bdgraph_pip": [
         {
           "id": "bdgraph_pip",
           "method": "ggm",
@@ -252,8 +268,8 @@
           ],
           "iter": 3000,
           "gprior": 0.5,
-            "dfprior": 3,
-            "startalg": "gg99",
+          "dfprior": 3,
+          "startalg": "tabu-bge",
           "gstart": "empty",
           "timeout": null,
           "mcmc_seed": [
@@ -276,4 +292,4 @@
       ]
     }
   }
-}
+}
\ No newline at end of file
diff --git a/workflow/rules/module_patterns.py b/workflow/rules/module_patterns.py
index 39934cc0..6ec5827b 100644
--- a/workflow/rules/module_patterns.py
+++ b/workflow/rules/module_patterns.py
@@ -13,10 +13,12 @@ def dict_to_path(d):
 
     c = d[0].copy() # take the first element in the list. BUG
     c.pop("id") # remove id from the string as only the parameters should identify the computation.
-    if "burnin" in c: 
-        c.pop("burnin")
+    if "burnin_frac" in c: 
+        c.pop("burnin_frac")
     if "threshold" in c:      
         c.pop("threshold")
+    if "mcmc_estimator" in c:      
+        c.pop("mcmc_estimator")
     if "active" in c:
         c.pop("active")
     sep = "/"
diff --git a/workflow/rules/module_strings.py b/workflow/rules/module_strings.py
index f8251f6c..f838df79 100644
--- a/workflow/rules/module_strings.py
+++ b/workflow/rules/module_strings.py
@@ -7,11 +7,18 @@
 # Order MCMC is special in the sense that it can define a startspace by means 
 # of the id of some algorithm. Thus the id has to be exptracted into a path string first.
 
-def idtopath(mylist, json_string):
+def idtopath(mylist, json_string, mcmc_est=False):
+    print("mylist")
+    print(mylist)
+    
+    print("json_string")
+    print(json_string)
     if mylist is None:
         return "None"
     if isinstance(mylist, list):
+        #if mcmc_est:
         return [json_string[startalg][0] for startalg in mylist]
+        
     else:
         return json_string[str(mylist)]
 
@@ -20,14 +27,17 @@ def idtopath(mylist, json_string):
 # Generate strings from the config file.
 for alg in config["resources"]["structure_learning_algorithms"]:
     # Some algorihtm takes input graphs. These are treated separately.
-    has_input_algs = ["bidag_order_mcmc", "bidag_partition_mcmc"] 
-    if alg not in has_input_algs:
+    has_input_algs = ["bidag_order_mcmc", "bidag_partition_mcmc" , "bdgraph_pip"] 
+    if (alg not in has_input_algs) and (alg not in mcmc_modules): # not the mcmc_modules yet
         json_string.update({val["id"]: expand(pattern_strings[alg], **val)
                         for val in config["resources"]["structure_learning_algorithms"][alg]})
 
 
 # These are special and have to be the last one since they take input strings as start space.
 # The start space path has to be gnerated first.
+
+# Maybe this can be done in the end.
+# Need to check if the startspace_algoruthm is an mcmc algorithm. If so idtopath should give also append pattern_strings["mcmc_est"]
 if "bidag_order_mcmc" in pattern_strings:
     order_mcmc_list = config["resources"]["structure_learning_algorithms"]["bidag_order_mcmc"]
     for items in order_mcmc_list:    
@@ -40,12 +50,15 @@ def idtopath(mylist, json_string):
 if "bdgraph_pip" in pattern_strings:
     bdgraph_pip_list = config["resources"]["structure_learning_algorithms"]["bdgraph_pip"]
     # The path to the startspace algorithm is extended here
-    for items in bdgraph_pip_list:    
-        items["startalg"] = idtopath(items["startalg"], json_string)
+    for items in bdgraph_pip_list:
+        #print("hej")
+        #print(items) 
+        items["startalg"] = idtopath(items["startalg"], json_string, mcmc_est=True) # Need to att the estmimator to the startalg as well
 
-    json_string.update({val["id"]: expand(pattern_strings["bdgraph_pip"], **val,) 
+    json_string.update({val["id"]: expand(pattern_strings["bdgraph_pip"] +"/"+pattern_strings["mcmc_est"] , **val,) 
                         for val in bdgraph_pip_list} )
 
+#print(json_string["bdgraph_pip"])
     
 
 if "bidag_partition_mcmc" in pattern_strings:
@@ -54,7 +67,7 @@ def idtopath(mylist, json_string):
     for items in bidag_partition_mcmc_list:    
         items["startspace_algorithm"] = idtopath(items["startspace_algorithm"], json_string)
 
-    json_string.update({val["id"]: expand(pattern_strings["bidag_partition_mcmc"], **val,) 
+    json_string.update({val["id"]: expand(pattern_strings["bidag_partition_mcmc"]+"/"+pattern_strings["mcmc_est"], **val,) 
                         for val in bidag_partition_mcmc_list } )
 
 
diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk b/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk
index e60c4427..8a944b7d 100644
--- a/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk
+++ b/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk
@@ -15,6 +15,6 @@ rule:
         time=alg_output_time_path(module_name),
         ntests=touch(alg_output_ntests_path(module_name))
     container:
-        docker_image("bdgraph")
+        "docker://onceltuca/bdgraph:2.64"
     script:
         "script.R"
diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R b/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R
index e543410e..d3689701 100644
--- a/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R
+++ b/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R
@@ -1,6 +1,6 @@
 source("workflow/scripts/utils/add_timeout.R")
 library(BDgraph)
-library(R.utils)
+
 
 adjmatToEdgeString <- function(adjmat, labels) {
   edgeinds <- which(adjmat == 1, arr.ind = TRUE)

From a5c9e4828e76fe9b18aac67e57a9dcf9998fa8d2 Mon Sep 17 00:00:00 2001
From: Felix Rios <felixleopoldo.rios@unibas.ch>
Date: Wed, 21 Dec 2022 17:05:11 +0100
Subject: [PATCH 05/14] Supporting single MCMC startalg in bdgraph_pip. Not
 supporting list of startalg yet..

---
 config/bdgraph_pip_random-5.json              | 554 +++++++++---------
 workflow/rules/module_strings.py              |  77 ++-
 .../bdgraph_pip/rule.smk                      |   1 +
 .../bdgraph_pip/script.R                      |   1 +
 4 files changed, 322 insertions(+), 311 deletions(-)

diff --git a/config/bdgraph_pip_random-5.json b/config/bdgraph_pip_random-5.json
index 90453a68..54d998e3 100644
--- a/config/bdgraph_pip_random-5.json
+++ b/config/bdgraph_pip_random-5.json
@@ -1,295 +1,279 @@
 {
-  "benchmark_setup": {
-    "data": [
-      {
-        "data_id": "example1",
-        "graph_id": "random-5",
-        "parameters_id": "gwi",
-        "seed_range": [
-          1,
-          1
-        ]
-      }
-    ],
-    "evaluation": {
-      "benchmarks": {
-        "filename_prefix": "bdgraph_pip_random5/",
-        "show_seed": false,
-        "errorbar": true,
-        "errorbarh": false,
-        "scatter": true,
-        "path": true,
-        "text": false,
-        "ids": [
-          "gg99",
-          "bdgraph_pip"
-        ]
-      },
-      "graph_true_plots": true,
-      "graph_true_stats": false,
-      "ggally_ggpairs": false,
-      "graph_plots": [
-        "gg99",
-        "bdgraph_pip"
-      ],
-      "mcmc_traj_plots": [
-        {
-          "id": "gg99",
-          "burn_in": 100000,
-          "thinning": 100,
-          "functional": [
-            "score",
-            "size"
-          ],
-          "active": false
-        },
-        {
-          "id": "bdgraph",
-          "burn_in": 500,
-          "thinning": 1,
-          "functional": [
-            "size"
-          ],
-          "active": false
-        }
-      ],
-      "mcmc_heatmaps": [
-        {
-          "id": "gg99",
-          "burn_in": 0.5,
-          "active": false
-        },
-        {
-          "id": "bdgraph",
-          "burn_in": 0.5,
-          "active": false
-        },
-        {
-          "id": "bdgraph_pip",
-          "burn_in": 0.5,
-          "active": false
-        }
-      ],
-      "mcmc_autocorr_plots": [
-        {
-          "id": "gg99",
-          "burn_in": 150000,
-          "thinning": 500,
-          "lags": 50,
-          "functional": [
-            "score",
-            "size"
-          ],
-          "active": false
-        }
-      ]
-    }
-  },
-  "resources": {
-    "data": {
-      "iid": [
-        {
-          "id": "example1",
-          "standardized": false,
-          "sample_sizes": [
-            100,
-            200
-          ]
+    "benchmark_setup": {
+        "data": [
+            {
+                "data_id": "example1",
+                "graph_id": "random-5",
+                "parameters_id": "gwi",
+                "seed_range": [
+                    1,
+                    1
+                ]
+            }
+        ],
+        "evaluation": {
+            "benchmarks": {
+                "filename_prefix": "bdgraph_pip_random5/",
+                "show_seed": false,
+                "errorbar": true,
+                "errorbarh": false,
+                "scatter": true,
+                "path": true,
+                "text": false,
+                "ids": [
+                    "bdgraph_pip"
+                ]
+            },
+            "graph_true_plots": true,
+            "graph_true_stats": false,
+            "ggally_ggpairs": false,
+            "graph_plots": [
+                "bdgraph_pip"
+            ],
+            "mcmc_traj_plots": [
+                {
+                    "id": "gg99",
+                    "burn_in": 100000,
+                    "thinning": 100,
+                    "functional": [
+                        "score",
+                        "size"
+                    ],
+                    "active": false
+                },
+                {
+                    "id": "bdgraph",
+                    "burn_in": 500,
+                    "thinning": 1,
+                    "functional": [
+                        "size"
+                    ],
+                    "active": false
+                }
+            ],
+            "mcmc_heatmaps": [
+                {
+                    "id": "gg99",
+                    "burn_in": 0.5,
+                    "active": false
+                },
+                {
+                    "id": "bdgraph",
+                    "burn_in": 0.5,
+                    "active": false
+                },
+                {
+                    "id": "bdgraph_pip",
+                    "burn_in": 0.5,
+                    "active": false
+                }
+            ],
+            "mcmc_autocorr_plots": [
+                {
+                    "id": "gg99",
+                    "burn_in": 150000,
+                    "thinning": 500,
+                    "lags": 50,
+                    "functional": [
+                        "score",
+                        "size"
+                    ],
+                    "active": false
+                }
+            ]
         }
-      ]
     },
-    "graph": {
-      "bdgraph_graphsim": [
-        {
-          "id": "lattice",
-          "p": 50,
-          "graph": "lattice",
-          "class": null,
-          "size": null,
-          "prob": 0.2
-        },
-        {
-          "id": "random-1",
-          "p": 50,
-          "graph": "random",
-          "class": null,
-          "size": null,
-          "prob": 0.1
-        },
-        {
-          "id": "random-2",
-          "p": 50,
-          "graph": "random",
-          "class": null,
-          "size": null,
-          "prob": 0.2
+    "resources": {
+        "data": {
+            "iid": [
+                {
+                    "id": "example1",
+                    "standardized": false,
+                    "sample_sizes": [
+                        200
+                    ]
+                }
+            ]
         },
-        {
-          "id": "random-5",
-          "p": 50,
-          "graph": "random",
-          "class": null,
-          "size": null,
-          "prob": 0.5
+        "graph": {
+            "bdgraph_graphsim": [
+                {
+                    "id": "lattice",
+                    "p": 50,
+                    "graph": "lattice",
+                    "class": null,
+                    "size": null,
+                    "prob": 0.2
+                },
+                {
+                    "id": "random-1",
+                    "p": 50,
+                    "graph": "random",
+                    "class": null,
+                    "size": null,
+                    "prob": 0.1
+                },
+                {
+                    "id": "random-2",
+                    "p": 10,
+                    "graph": "random",
+                    "class": null,
+                    "size": null,
+                    "prob": 0.2
+                },
+                {
+                    "id": "random-5",
+                    "p": 50,
+                    "graph": "random",
+                    "class": null,
+                    "size": null,
+                    "prob": 0.5
+                },
+                {
+                    "id": "cluster",
+                    "p": 50,
+                    "graph": "cluster",
+                    "class": null,
+                    "size": 20,
+                    "prob": 0.2
+                },
+                {
+                    "id": "circle",
+                    "p": 50,
+                    "graph": "circle",
+                    "class": null,
+                    "size": null,
+                    "prob": 0.2
+                },
+                {
+                    "id": "hub",
+                    "p": 50,
+                    "graph": "hub",
+                    "class": null,
+                    "size": null,
+                    "prob": 0.2
+                },
+                {
+                    "id": "scale-free",
+                    "p": 50,
+                    "graph": "scale-free",
+                    "class": null,
+                    "size": null,
+                    "prob": 0.2
+                }
+            ]
         },
-        {
-          "id": "cluster",
-          "p": 50,
-          "graph": "cluster",
-          "class": null,
-          "size": 20,
-          "prob": 0.2
+        "parameters": {
+            "bdgraph_rgwish": [
+                {
+                    "id": "gwi",
+                    "b": 3,
+                    "threshold_conv": 0.0000001
+                }
+            ]
         },
-        {
-          "id": "circle",
-          "p": 50,
-          "graph": "circle",
-          "class": null,
-          "size": null,
-          "prob": 0.2
-        },
-        {
-          "id": "hub",
-          "p": 50,
-          "graph": "hub",
-          "class": null,
-          "size": null,
-          "prob": 0.2
-        },
-        {
-          "id": "scale-free",
-          "p": 50,
-          "graph": "scale-free",
-          "class": null,
-          "size": null,
-          "prob": 0.2
-        }
-      ]
-    },
-    "parameters": {
-      "bdgraph_rgwish": [
-        {
-          "id": "gwi",
-          "b": 3,
-          "threshold_conv": 0.0000001
-        }
-      ]
-    },
-    "structure_learning_algorithms": {
-      "bnlearn_tabu": [
-        {
-            "id": "tabu-bge",
-            "score": "bge",
-            "iss": 1,
-            "issmu": [
-                0.0001,
-                0.001,
-                0.01,
-                0.05
+        "structure_learning_algorithms": {
+            "bnlearn_tabu": [
+                {
+                    "id": "tabu-bge",
+                    "score": "bge",
+                    "iss": 1,
+                    "issmu": [
+                        0.001
+                    ],
+                    "issw": null,
+                    "l": 5,
+                    "k": 1,
+                    "prior": "uniform",
+                    "beta": 1,
+                    "timeout": null
+                }
             ],
-            "issw": null,
-            "l": 5,
-            "k": 1,
-            "prior": "uniform",
-            "beta": 1,
-            "timeout": null
-        }
-    ],
-      "bdgraph": [
-        {
-          "id": "bdgraph",
-          "method": "ggm",
-          "algo": [
-            "bdmcmc"
-          ],
-          "iter": 30000,
-          "gprior": 0.5,
-          "dfprior": 3,
-          "gstart": "empty",
-          "timeout": null,
-          "mcmc_seed": [
-            1
-          ],
-          "thresh": 0.5,
-          "mcmc_estimator": [
-            "threshold"
-          ],
-          "threshold": [
-            0.1,
-            0.3,
-            0.5,
-            0.7,
-            0.9,
-            1.0
-          ],
-          "burnin_frac": 0.3
-        }
-      ],
-      "gg99_singlepair": [
-        {
-          "id": "gg99",
-          "n_samples": [
-            20000
-          ],
-          "datatype": "continuous",
-          "randomits": 100,
-          "prior": "ep",
-          "ascore": null,
-          "bscore": null,
-          "clq": 2,
-          "sep": 4,
-          "penalty": [
-            0.0
-          ],
-          "mcmc_seed": [
-            1
-          ],
-          "timeout": null,
-          "mcmc_estimator": [
-            "threshold"
-          ],
-          "threshold": [
-            0.1,
-            0.3,
-            0.5,
-            0.7,
-            0.9,
-            1.0
-          ],
-          "burnin_frac": 0.3
-        }
-      ],
-      "bdgraph_pip": [
-        {
-          "id": "bdgraph_pip",
-          "method": "ggm",
-          "algo": [
-            "bdmcmc"
-          ],
-          "iter": 3000,
-          "gprior": 0.5,
-          "dfprior": 3,
-          "startalg": "tabu-bge",
-          "gstart": "empty",
-          "timeout": null,
-          "mcmc_seed": [
-            1
-          ],
-          "thresh": 0.5,
-          "mcmc_estimator": [
-            "threshold"
-          ],
-          "threshold": [
-            0.1,
-            0.3,
-            0.5,
-            0.7,
-            0.9,
-            1.0
-          ],
-          "burnin_frac": 0.3
+            "bdgraph": [
+                {
+                    "id": "bdgraph",
+                    "method": "ggm",
+                    "algo": [
+                        "bdmcmc"
+                    ],
+                    "iter": 30000,
+                    "gprior": 0.5,
+                    "dfprior": 3,
+                    "gstart": "empty",
+                    "timeout": null,
+                    "mcmc_seed": [
+                        1
+                    ],
+                    "thresh": 0.5,
+                    "mcmc_estimator": [
+                        "threshold"
+                    ],
+                    "threshold": [
+                        0.1,
+                        0.3,
+                        0.5,
+                        0.7,
+                        0.9,
+                        1.0
+                    ],
+                    "burnin_frac": 0.3
+                }
+            ],
+            "gg99_singlepair": [
+                {
+                    "id": "gg99",
+                    "n_samples": [
+                        20000
+                    ],
+                    "datatype": "continuous",
+                    "randomits": 100,
+                    "prior": "ep",
+                    "ascore": null,
+                    "bscore": null,
+                    "clq": 2,
+                    "sep": 4,
+                    "penalty": [
+                        0.0
+                    ],
+                    "mcmc_seed": [
+                        1
+                    ],
+                    "timeout": null,
+                    "mcmc_estimator": [
+                        "threshold"
+                    ],
+                    "threshold": [
+                        0.5
+                    ],
+                    "burnin_frac": 0.3
+                }
+            ],
+            "bdgraph_pip": [
+                {
+                    "id": "bdgraph_pip",
+                    "method": "ggm",
+                    "algo": [
+                        "bdmcmc"
+                    ],
+                    "iter": 3000,
+                    "gprior": 0.5,
+                    "dfprior": 3,
+                    "startalg": "gg99",
+                    "gstart": "empty",
+                    "timeout": null,
+                    "mcmc_seed": [
+                        1
+                    ],
+                    "thresh": 0.5,
+                    "mcmc_estimator": [
+                        "threshold"
+                    ],
+                    "threshold": [
+                        0.5
+                    ],
+                    "burnin_frac": 0.3
+                }
+            ]
         }
-      ]
     }
-  }
 }
\ No newline at end of file
diff --git a/workflow/rules/module_strings.py b/workflow/rules/module_strings.py
index f838df79..eefd5d08 100644
--- a/workflow/rules/module_strings.py
+++ b/workflow/rules/module_strings.py
@@ -1,27 +1,47 @@
-# This file pricudes a dict where the paths for all algorithms are generated based on the
-# algorithm pattern strings and the values in the config file.
-#
-# MCMC methods are special since the the estimation parameters should not mix with the 
-# parameters that define the run.
-#
-# Order MCMC is special in the sense that it can define a startspace by means 
-# of the id of some algorithm. Thus the id has to be exptracted into a path string first.
-
-def idtopath(mylist, json_string, mcmc_est=False):
-    print("mylist")
-    print(mylist)
+
+
+
+def id_to_alg(id):
+    for key, alg in config["resources"]["structure_learning_algorithms"].items():
+        for obj in alg:
+            if obj["id"] == id:
+                return key
+   
+    return None
+    
+    
+
+"""
+    This file procudes a dict where the paths for all algorithms are generated based on the
+    algorithm pattern strings and the values in the config file.
+    MCMC methods are special since the the estimation parameters should not mix with the 
+    parameters that define the run.
+    Order MCMC is special in the sense that it can define a startspace by means 
+    of the id of some algorithm. Thus the id has to be exptracted into a path string first.
+"""
+def idtopath(idlist):
+
+    
+    # mylist can either be None, an id, or a list of ids.
+    # The id may correspond to an MCMC alg, then the estimator parameters should be added too.
+    
+    alg = id_to_alg(idlist)
+    vals = config["resources"]["structure_learning_algorithms"][alg][0]
     
-    print("json_string")
-    print(json_string)
-    if mylist is None:
+    if idlist is None:
         return "None"
-    if isinstance(mylist, list):
-        #if mcmc_est:
-        return [json_string[startalg][0] for startalg in mylist]
-        
+    if isinstance(idlist, list): # TODO: not spported yet
+        if id_to_alg(idlist[0]) in mcmc_modules:
+            return
+        else:            
+            return [json_string[startalg][0] for startalg in idlist]
     else:
-        return json_string[str(mylist)]
+        if alg in mcmc_modules:
+            return expand(pattern_strings[alg]+"/"+pattern_strings["mcmc_est"], **vals)
+        else:
+            return expand(pattern_strings[alg], **vals)
 
+ 
 json_string = {}
 
 # Generate strings from the config file.
@@ -41,38 +61,39 @@ def idtopath(mylist, json_string, mcmc_est=False):
 if "bidag_order_mcmc" in pattern_strings:
     order_mcmc_list = config["resources"]["structure_learning_algorithms"]["bidag_order_mcmc"]
     for items in order_mcmc_list:    
-        items["startspace_algorithm"] = idtopath(items["startspace_algorithm"], json_string)
+        items["startspace_algorithm"] = idtopath(items["startspace_algorithm"])
 
     json_string.update({val["id"]: expand(pattern_strings["bidag_order_mcmc"]+"/"+pattern_strings["mcmc_est"], **val,) 
                         for val in order_mcmc_list } )
 
 
+# BUG: The the algs used as input need to bedefied before. PErhaps we can determined the pathstring here as well..
 if "bdgraph_pip" in pattern_strings:
     bdgraph_pip_list = config["resources"]["structure_learning_algorithms"]["bdgraph_pip"]
     # The path to the startspace algorithm is extended here
     for items in bdgraph_pip_list:
         #print("hej")
         #print(items) 
-        items["startalg"] = idtopath(items["startalg"], json_string, mcmc_est=True) # Need to att the estmimator to the startalg as well
+        
+        items["startalg"] = idtopath(items["startalg"]) # Need to att the estmimator to the startalg as well
 
     json_string.update({val["id"]: expand(pattern_strings["bdgraph_pip"] +"/"+pattern_strings["mcmc_est"] , **val,) 
                         for val in bdgraph_pip_list} )
 
-#print(json_string["bdgraph_pip"])
-    
+
 
 if "bidag_partition_mcmc" in pattern_strings:
     bidag_partition_mcmc_list = config["resources"]["structure_learning_algorithms"]["bidag_partition_mcmc"]
     # The path to the startspace algorithm is extended here
     for items in bidag_partition_mcmc_list:    
-        items["startspace_algorithm"] = idtopath(items["startspace_algorithm"], json_string)
+        items["startspace_algorithm"] = idtopath(items["startspace_algorithm"])
 
     json_string.update({val["id"]: expand(pattern_strings["bidag_partition_mcmc"]+"/"+pattern_strings["mcmc_est"], **val,) 
                         for val in bidag_partition_mcmc_list } )
 
 
-# parallelDG has MCMC estimator
 
+# All MCMC algs 
 for alg in mcmc_modules:
     if alg in pattern_strings:
         json_string.update({val["id"]: expand(pattern_strings[alg]+"/"+pattern_strings["mcmc_est"], **val)
@@ -86,6 +107,10 @@ def idtopath(mylist, json_string, mcmc_est=False):
         json_string_mcmc_noest.update({val["id"]: expand(pattern_strings[alg], **val,) 
                         for val in config["resources"]["structure_learning_algorithms"][alg]})
 
+
+
+
+
 # Evaluation strings
 def gen_evaluation_string_from_conf(method, alg_id):
     # This essentially converts a dict in (from an evaluation method conf) to a path string following a pattern 
diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk b/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk
index 8a944b7d..80c4f73b 100644
--- a/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk
+++ b/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk
@@ -3,6 +3,7 @@ def fix_none_startalg(wildcards):
         if wildcards["startalg"] == "None":
             return []
         else:
+            print("hoho")
             return "{output_dir}/adjmat_estimate/{data}/algorithm=/{startalg}/seed={replicate}/adjmat.csv",
 
 rule:
diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R b/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R
index d3689701..2ef32af5 100644
--- a/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R
+++ b/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R
@@ -63,6 +63,7 @@ wrapper <- function() {
   start <- proc.time()[1]
   set.seed(seed) # BUG: Seeds doesn't seem to work in BDgraph.
   p <- dim(data)[2]
+  
 
   bdgraph.obj <- bdgraph(data,
     n = NULL,

From a1028db8161d3fb6dae2f17cea49d81de5e693e3 Mon Sep 17 00:00:00 2001
From: Mohamad <mo@julia>
Date: Tue, 27 Dec 2022 14:28:39 +0300
Subject: [PATCH 06/14] adding ROC curves

---
 .../rules/evaluation/benchmarks/plot_ROC.R    | 115 ++++++++++++++++--
 .../rules/evaluation/benchmarks/rules.smk     |   1 +
 .../evaluation/benchmarks/run_summarise.R     |  15 ++-
 3 files changed, 119 insertions(+), 12 deletions(-)

diff --git a/workflow/rules/evaluation/benchmarks/plot_ROC.R b/workflow/rules/evaluation/benchmarks/plot_ROC.R
index 86e13562..4368ac6f 100644
--- a/workflow/rules/evaluation/benchmarks/plot_ROC.R
+++ b/workflow/rules/evaluation/benchmarks/plot_ROC.R
@@ -21,6 +21,7 @@ dir.create(snakemake@output[["graph_type"]])
 dir.create(snakemake@output[["SHD_cpdag_joint"]])
 dir.create(snakemake@output[["f1_skel_joint"]])
 dir.create(snakemake@output[["ntests_joint"]])
+dir.create(snakemake@output[["roc_FPR_TPR"]])
 
 if (file.info(snakemake@input[["csv"]])$size == 0) {
   # Do nothing. Everything was timed out so the directories will be empty.
@@ -222,6 +223,8 @@ unique_data <- unique(joint_bench$data)
           theme(plot.title = element_text(hjust = 0.5))
         ggsave(file = paste(snakemake@output[["fpr_tpr_pattern"]],"/", plt_counter, ".png", sep=""), plot = gg)
 
+
+            
         gg <- ggplot() + {
             if (errorbar) {
               geom_errorbar(
@@ -277,8 +280,8 @@ unique_data <- unique(joint_bench$data)
               geom_point(
                 data = joint_bench, alpha = 0.15, show.legend = FALSE,
                 aes(
-                  x = FPR_skel,
-                  y = TP_skel / true_n_edges_skel,
+                  x = FPR_pattern,
+                  y = TPR_pattern,
                   col = id_numlev
                 ),
                 shape = 20,
@@ -290,8 +293,8 @@ unique_data <- unique(joint_bench$data)
               geom_text(
                 data = joint_bench, alpha = 0.25, show.legend = FALSE,
                 aes(
-                  x = FPR_skel,
-                  y = TP_skel / true_n_edges_skel,
+                  x = FPR_pattern,
+                  y = TPR_pattern,
                   label = replicate, col = id_numlev, shape = id_numlev
                 ),
                 check_overlap = FALSE
@@ -335,15 +338,113 @@ unique_data <- unique(joint_bench$data)
               ylim(ylim[1], ylim[2])
             }
           } +
-          xlab("FP/P") +
-          ylab("TP/P") +
-          ggtitle("Median FP/P vs. TP/P (undirected skeleton)") +
+          xlab("FPR") +
+          ylab("TPR") +
+          ggtitle("Median FPR vs. TPR (undirected skeleton)") +
           labs(col = "id") +
           theme_bw() +
           theme(plot.title = element_text(hjust = 0.5))
         ggsave(file = paste(snakemake@output[["roc_FPRp_TPR_skel"]],"/", plt_counter, ".png", sep=""), plot = gg)
 
 
+            
+        gg <- ggplot() + {
+            if (path) {
+              geom_path(
+                data = toplot, alpha = 0.8,
+                aes(
+                  x = FPR_skel_median,
+                  y = TPR_skel_median,
+                  col = id_numlev
+                )
+              )
+            }
+          } + {
+            if (!param_annot) {
+              geom_point(
+                data = toplot, alpha = 0.5,
+                aes(
+                  x = FPR_skel_median,
+                  y = TPR_skel_median,
+                  col = id_numlev,
+                  shape = id_numlev
+                ),
+                size = 1
+              )
+            }
+          } + {
+            if (scatter && !show_seed) {
+              geom_point(
+                data = joint_bench, alpha = 0.15, show.legend = FALSE,
+                aes(
+                  x = FPR_pattern,
+                  y = TPR_pattern,
+                  col = id_numlev
+                ),
+                shape = 20,
+                size = 1
+              )
+            }
+          } + {
+            if (scatter && show_seed) {
+              geom_text(
+                data = joint_bench, alpha = 0.25, show.legend = FALSE,
+                aes(
+                  x = FPR_pattern,
+                  y = TPR_pattern,
+                  label = replicate, col = id_numlev, shape = id_numlev
+                ),
+                check_overlap = FALSE
+              )
+            }
+          } + {
+            if (path && !param_annot) {
+              geom_label(
+                data = toplot %>%
+                  group_by(id_numlev) %>%
+                  replace_na(list("curve_vals" = 0)) %>%
+                  filter(curve_vals == max(curve_vals)),
+                alpha = 0.8, position = "dodge", alpha = 1, show.legend = FALSE,
+                aes(
+                  x = FPR_skel_median, y = TPR_skel_median,
+                  col = id_numlev, label = id_num
+                )
+              )
+            }
+          } + {
+            if (param_annot) {
+              geom_label(
+                data = toplot, alpha = 0.5,
+                aes(
+                  x = FPR_skel_median,
+                  y = TPR_skel_median,
+                  label = curve_vals, col = id_numlev, shape = id_numlev
+                ),
+                check_overlap = FALSE
+              )
+            }
+          } +
+          guides(shape = FALSE) +
+          facet_wrap(. ~ adjmat + parameters + data, nrow = 2) +
+          {
+            if (!is.null(xlim)) {
+              xlim(xlim[1], xlim[2])
+            }
+          } + {
+            if (!is.null(ylim)) {
+              ylim(ylim[1], ylim[2])
+            }
+          } +
+          xlab("FPR") +
+          ylab("TPR") +
+          ggtitle("FPR vs. TPR (undirected skeleton)") +
+          labs(col = "id") +
+          theme_bw() +
+          theme(plot.title = element_text(hjust = 0.5))
+          ggsave(file = paste(snakemake@output[["roc_FPR_TPR"]],"/", plt_counter, ".png", sep=""), plot = gg)
+
+
+            
         gg <- ggplot() +
           {
             if (errorbar) {
diff --git a/workflow/rules/evaluation/benchmarks/rules.smk b/workflow/rules/evaluation/benchmarks/rules.smk
index 447ec9f4..023c41c4 100644
--- a/workflow/rules/evaluation/benchmarks/rules.smk
+++ b/workflow/rules/evaluation/benchmarks/rules.smk
@@ -37,6 +37,7 @@ rule benchmarks:
         FPRp_FNR_skel=directory("results/output/benchmarks/"+config["benchmark_setup"]["evaluation"]["benchmarks"]["filename_prefix"] + "FPRp_FNR_skel"),
         fnr_fprp_skel=directory("results/output/benchmarks/"+config["benchmark_setup"]["evaluation"]["benchmarks"]["filename_prefix"] + "FNR_FPR_skel"),
         roc_FPRp_TPR_skel=directory("results/output/benchmarks/"+config["benchmark_setup"]["evaluation"]["benchmarks"]["filename_prefix"] + "FPR_TPR_skel"),
+        roc_FPR_TPR=directory("results/output/benchmarks/"+config["benchmark_setup"]["evaluation"]["benchmarks"]["filename_prefix"] + "roc_FPR_TPR"),
         elapsed_time_joint=directory("results/output/benchmarks/"+config["benchmark_setup"]["evaluation"]["benchmarks"]["filename_prefix"] + "elapsed_time_joint"),
         graph_type=directory("results/output/benchmarks/"+config["benchmark_setup"]["evaluation"]["benchmarks"]["filename_prefix"] + "graph_type"),
         SHD_cpdag_joint=directory("results/output/benchmarks/"+config["benchmark_setup"]["evaluation"]["benchmarks"]["filename_prefix"] + "SHD_cpdag_joint"),
diff --git a/workflow/rules/evaluation/benchmarks/run_summarise.R b/workflow/rules/evaluation/benchmarks/run_summarise.R
index c8292b6a..7ff2a33b 100644
--- a/workflow/rules/evaluation/benchmarks/run_summarise.R
+++ b/workflow/rules/evaluation/benchmarks/run_summarise.R
@@ -18,6 +18,7 @@ compareEGs <- function(estEG, trueEG) {
   estSkel <- EGskel(estEG) # estimated skeleton
   trueSkel <- EGskel(trueEG) # true skeleton
   P <- sum(trueSkel) / 2 # number of positives
+  N <- sum(1-trueSkel) /2 # number of negative
   diffSkel <- estSkel - trueSkel
   extra_edges <- which(diffSkel > 0) # edges in estimated but not true EG
   FP <- length(extra_edges) / 2 # count to FPs
@@ -42,17 +43,20 @@ compareEGs <- function(estEG, trueEG) {
     if (FP >= 0) {
       TPR <- 0
       FPR_P <- 1
+      FPR <- FP / N
     } else {
       TPR <- 1
       FPR_P <- 0
+      FPR <- 0
     }
   } else {
     # true graph is non-empty
-    TPR <- TP / P
-    FPR_P <- FP / P
+      TPR <- TP / P
+      FPR_P <- FP / P
+      FPR <- FP / N
   }
-  compEGs <- c(TP, FP, SHD, TPR, FPR_P)
-  names(compEGs) <- c("TP", "FP", "SHD", "TPR", "FPR_P")
+  compEGs <- c(TP, FP, SHD, TPR, FPR_P, FPR)
+  names(compEGs) <- c("TP", "FP", "SHD", "TPR", "FPR_P", "FPR")
   return(compEGs)
 }
 
@@ -142,7 +146,8 @@ benchmarks <- function(true_adjmat, estimated_adjmat) {
     graph_type <- "dag"
   }
   df <- data.frame(
-    TPR_pattern = compres["TPR"], # should be for all times
+      TPR_pattern = compres["TPR"], # should be for all times
+      FPR_pattern = compres["FPR"],
     FPRn_pattern = compres["FPR_P"],
     SHD_pattern = compres["SHD"],
     SHD_cpdag = SHD_cpdag,

From 2808db9bd70258a451a45d12595db5f42ab098bb Mon Sep 17 00:00:00 2001
From: Mohamad <mo@julia>
Date: Tue, 27 Dec 2022 16:25:32 +0300
Subject: [PATCH 07/14] chaning burnin to prob, for traj plots and
 autocorrelation

---
 .../plot_autocorr_from_traj.py                    | 15 +++++++++------
 .../evaluation/mcmc_autocorr_plots/schema.json    |  8 ++++----
 .../rules/evaluation/mcmc_traj_plots/schema.json  |  8 ++++----
 workflow/scripts/evaluation/write_graph_traj.py   | 15 ++++++++-------
 4 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/workflow/rules/evaluation/mcmc_autocorr_plots/plot_autocorr_from_traj.py b/workflow/rules/evaluation/mcmc_autocorr_plots/plot_autocorr_from_traj.py
index 48e86ebb..9962f0a5 100644
--- a/workflow/rules/evaluation/mcmc_autocorr_plots/plot_autocorr_from_traj.py
+++ b/workflow/rules/evaluation/mcmc_autocorr_plots/plot_autocorr_from_traj.py
@@ -113,11 +113,13 @@ def edges_str_to_list(str, edgesymb="-"):
 
         df2 = df2.fillna(method="ffill")
 
+        burnin = int(float(snakemake.wildcards["burn_in"]) * df2.shape[0])
+        
         if snakemake.wildcards["thinning"] != "None":
-            dfplot = df2["size"][int(snakemake.wildcards["burn_in"]):].iloc[::int(
-                snakemake.wildcards["thinning"])]
+            thinning = int(snakemake.wildcards["thinning"])
+            dfplot = df2["size"][burnin:].iloc[::thinning]
         else:
-            dfplot = df2["size"][int(snakemake.wildcards["burn_in"]):]
+            dfplot = df2["size"][burnin:]
         # pd.plotting.autocorrelation_plot(dfplot.iloc[::int(snakemake.wildcards["thinning"])])
 
     elif snakemake.wildcards["functional"] == "score":
@@ -134,11 +136,12 @@ def edges_str_to_list(str, edgesymb="-"):
             df2.at[0,'score'] = int(0)
 
         df2 = df2.fillna(method="ffill")
-        
+        burnin = int(float(snakemake.wildcards["burn_in"]) * df2.shape[0])
         if snakemake.wildcards["thinning"] != "None":
-            dfplot = df2[int(snakemake.wildcards["burn_in"]):][::int(snakemake.wildcards["thinning"])]
+            thinning = int(snakemake.wildcards["thinning"])
+            dfplot = df2[burnin:].iloc[::thinning]
         else:
-            dfplot = df2[int(snakemake.wildcards["burn_in"]):]
+            dfplot = df2[burnin:]
 
     if snakemake.wildcards["lags"] != "None":
         sm.graphics.tsa.plot_acf(dfplot, lags=int(snakemake.wildcards["lags"]))
diff --git a/workflow/rules/evaluation/mcmc_autocorr_plots/schema.json b/workflow/rules/evaluation/mcmc_autocorr_plots/schema.json
index 23fa7d4e..c91fb150 100644
--- a/workflow/rules/evaluation/mcmc_autocorr_plots/schema.json
+++ b/workflow/rules/evaluation/mcmc_autocorr_plots/schema.json
@@ -9,7 +9,7 @@
     "examples": [
         {
             "id": "omcmc_itsample-bge",
-            "burn_in": 0,
+            "burn_in": 0.5,
             "thinning": 1,
             "lags": 50,
             "functional": [
@@ -26,7 +26,7 @@
             "examples": [
                 {
                     "id": "omcmc_itsample-bge",
-                    "burn_in": 0,
+                    "burn_in": 0.5,
                     "thinning": 1,
                     "lags": 50,
                     "functional": [
@@ -41,7 +41,7 @@
                     "type": "string"
                 },
                 "burn_in": {
-                    "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegint"
+                    "$ref": "../../../schemas/definitions.schema.json#/definitions/flexprob"
                 },
                 "thinning": {
                     "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegintnull"
@@ -84,4 +84,4 @@
             ]
         }
     }
-}
\ No newline at end of file
+}
diff --git a/workflow/rules/evaluation/mcmc_traj_plots/schema.json b/workflow/rules/evaluation/mcmc_traj_plots/schema.json
index bf5e52e6..0efbf96a 100644
--- a/workflow/rules/evaluation/mcmc_traj_plots/schema.json
+++ b/workflow/rules/evaluation/mcmc_traj_plots/schema.json
@@ -9,7 +9,7 @@
     "examples": [
         {
             "id": "omcmc_itsample-bge",
-            "burn_in": 0,
+            "burn_in": 0.5,
             "thinning": 1,
             "functional": [
                 "score",
@@ -23,7 +23,7 @@
             "examples": [
                 {
                     "id": "omcmc_itsample-bge",
-                    "burn_in": 0,
+                    "burn_in": 0.5,
                     "thinning": 1,
                     "functional": [
                         "score",
@@ -40,7 +40,7 @@
                     "type": "string"
                 },
                 "burn_in": {
-                    "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegint"
+                    "$ref": "../../../schemas/definitions.schema.json#/definitions/flexprob"
                 },
                 "thinning": {
                     "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegintnull"
@@ -79,4 +79,4 @@
             "additionalItems": false
         }
     }
-}
\ No newline at end of file
+}
diff --git a/workflow/scripts/evaluation/write_graph_traj.py b/workflow/scripts/evaluation/write_graph_traj.py
index 2944b431..cc672335 100644
--- a/workflow/scripts/evaluation/write_graph_traj.py
+++ b/workflow/scripts/evaluation/write_graph_traj.py
@@ -53,12 +53,13 @@ def edges_str_to_list(str, edgesymb="-"):
 
         df2 = df2.reindex(newindex).reset_index().reindex(
             columns=df2.columns).fillna(method="ffill")
-
+        
+        burnin = int(float(snakemake.wildcards["burn_in"]) * df2.shape[0])
         if snakemake.wildcards["thinning"] != "None":
-            df_noburnin = df2[int(snakemake.wildcards["burn_in"]):][::int(
+            df_noburnin = df2[burnin:][::int(
                 snakemake.wildcards["thinning"])]
         else:
-            df_noburnin = df2[int(snakemake.wildcards["burn_in"]):]
+            df_noburnin = df2[burnin:]
 
     # Score trajectory
     elif snakemake.wildcards["functional"] == "score":
@@ -70,12 +71,12 @@ def edges_str_to_list(str, edgesymb="-"):
         df2 = df[["index", "score"]][2:].set_index("index")
         df2 = df2.reindex(newindex).reset_index().reindex(
             columns=df2.columns).fillna(method="ffill")
-
+        burnin = int(float(snakemake.wildcards["burn_in"]) * df2.shape[0])
         if snakemake.wildcards["thinning"] != "None":
-            df_noburnin = df2[int(snakemake.wildcards["burn_in"]):][::int(
+            df_noburnin = df2[burnin:][::int(
                 snakemake.wildcards["thinning"])]
         else:
-            df_noburnin = df2[int(snakemake.wildcards["burn_in"]):]
+            df_noburnin = df2[burnin:]
             
     df_noburnin.rename(columns={snakemake.wildcards["functional"]:"plotvalue"}, inplace=True)
 
@@ -115,4 +116,4 @@ def edges_str_to_list(str, edgesymb="-"):
     df_noburnin["alg_string"] = snakemake.params["alg_string"]
     df_noburnin["eval_string"] = snakemake.params["eval_string"]
     
-    df_noburnin.to_csv(snakemake.output["traj"])
\ No newline at end of file
+    df_noburnin.to_csv(snakemake.output["traj"])

From 1600fa93deadbd414bc1e4ed3d97ec0376e19d23 Mon Sep 17 00:00:00 2001
From: Mohamad <mo@julia>
Date: Wed, 28 Dec 2022 15:07:56 +0300
Subject: [PATCH 08/14] issue with thining parameter when null

---
 .../evaluation/mcmc_autocorr_plots/write_autocorr_traj.py   | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/workflow/rules/evaluation/mcmc_autocorr_plots/write_autocorr_traj.py b/workflow/rules/evaluation/mcmc_autocorr_plots/write_autocorr_traj.py
index 16628a74..aae19505 100644
--- a/workflow/rules/evaluation/mcmc_autocorr_plots/write_autocorr_traj.py
+++ b/workflow/rules/evaluation/mcmc_autocorr_plots/write_autocorr_traj.py
@@ -12,7 +12,11 @@
 varying_param = traj["param"].unique()[0]
 varying_param_val = traj["param_val"].unique()[0]
 
-df = pd.DataFrame({"plotvalue":acs}, index=np.arange(1, int(snakemake.wildcards["lags"])+1) * int(snakemake.wildcards["thinning"]))
+thinning = int(1)
+if snakemake.wildcards["thinning"] != "None":
+    thinning = int(snakemake.wildcards["thinning"])
+
+df = pd.DataFrame({"plotvalue":acs}, index=np.arange(1, int(snakemake.wildcards["lags"])+1) * thinning)
 
 
 df["param"] = varying_param

From aaad5a4ccf6773d8b9f3deeb8e1871f00de19710 Mon Sep 17 00:00:00 2001
From: Mohamad <mo@julia>
Date: Sun, 1 Jan 2023 14:06:47 +0300
Subject: [PATCH 09/14] fixing roc curves

---
 .../evaluation/benchmarks/combine_ROC_data.R  | 12 ++--
 .../rules/evaluation/benchmarks/plot_ROC.R    | 56 ++++++++++++++-----
 2 files changed, 50 insertions(+), 18 deletions(-)

diff --git a/workflow/rules/evaluation/benchmarks/combine_ROC_data.R b/workflow/rules/evaluation/benchmarks/combine_ROC_data.R
index 77fe67c2..27f2bc3c 100644
--- a/workflow/rules/evaluation/benchmarks/combine_ROC_data.R
+++ b/workflow/rules/evaluation/benchmarks/combine_ROC_data.R
@@ -92,10 +92,14 @@ for (algorithm in active_algorithms) {
         TPR_pattern_median = median(TPR_pattern),
         TPR_pattern_q1 = quantile(TPR_pattern, probs = c(0.05)),
         TPR_pattern_q3 = quantile(TPR_pattern, probs = c(0.95)),
-        FPR_pattern_mean = mean(FPRn_pattern),
-        FPR_pattern_median = median(FPRn_pattern),
-        FPR_pattern_q1 = quantile(FPRn_pattern, probs = c(0.05)),
-        FPR_pattern_q3 = quantile(FPRn_pattern, probs = c(0.95)),
+        FPR_pattern_mean = mean(FPR_pattern),
+        FPR_pattern_median = median(FPR_pattern),
+        FPR_pattern_q1 = quantile(FPR_pattern, probs = c(0.05)),
+        FPR_pattern_q3 = quantile(FPR_pattern, probs = c(0.95)),
+        FPRn_pattern_mean = mean(FPRn_pattern),
+        FPRn_pattern_median = median(FPRn_pattern),
+        FPRn_pattern_q1 = quantile(FPRn_pattern, probs = c(0.05)),
+        FPRn_pattern_q3 = quantile(FPRn_pattern, probs = c(0.95)),
         TPR_skel_mean = mean(TP_skel / true_n_edges_skel),
         TPR_skel_median = median(TP_skel / true_n_edges_skel),
         TPR_skel_q1 = quantile(TP_skel / true_n_edges_skel, probs = c(0.05)),
diff --git a/workflow/rules/evaluation/benchmarks/plot_ROC.R b/workflow/rules/evaluation/benchmarks/plot_ROC.R
index 4368ac6f..6e226013 100644
--- a/workflow/rules/evaluation/benchmarks/plot_ROC.R
+++ b/workflow/rules/evaluation/benchmarks/plot_ROC.R
@@ -105,7 +105,7 @@ unique_data <- unique(joint_bench$data)
               geom_errorbar(
                 data = toplot, alpha = 0.5,
                 aes(
-                  x = FPR_pattern_median,
+                  x = FPRn_pattern_median,
                   ymin = TPR_pattern_q1,
                   ymax = TPR_pattern_q3,
                   col = id_numlev
@@ -120,8 +120,8 @@ unique_data <- unique(joint_bench$data)
                 data = toplot, alpha = 0.5,
                 aes(
                   y = TPR_pattern_median,
-                  xmin = FPR_pattern_q1,
-                  xmax = FPR_pattern_q3,
+                  xmin = FPRn_pattern_q1,
+                  xmax = FPRn_pattern_q3,
                   col = id_numlev
                 ),
                 height = 0.01
@@ -132,7 +132,7 @@ unique_data <- unique(joint_bench$data)
               geom_path(
                 data = toplot, alpha = 0.8,
                 aes(
-                  x = FPR_pattern_median,
+                  x = FPRn_pattern_median,
                   y = TPR_pattern_median,
                   col = id_numlev
                 )
@@ -144,7 +144,7 @@ unique_data <- unique(joint_bench$data)
               geom_point(
                 data = toplot, alpha = 0.5,
                 aes(
-                  x = FPR_pattern_median,
+                  x = FPRn_pattern_median,
                   y = TPR_pattern_median,
                   col = id_numlev,
                   shape = id_numlev
@@ -182,11 +182,11 @@ unique_data <- unique(joint_bench$data)
                 data = toplot %>%
                   group_by(id, adjmat, parameters, data) %>%
                   replace_na(list("curve_vals" = 0)) %>%
-                  mutate(SHDP_pattern_median = 1 - TPR_pattern_median + FPR_pattern_median) %>%
+                  mutate(SHDP_pattern_median = 1 - TPR_pattern_median + FPRn_pattern_median) %>%
                   filter(SHDP_pattern_median == min(SHDP_pattern_median)),
                 alpha = 0.8, position = "dodge", alpha = 1, show.legend = FALSE,
                 aes(
-                  x = FPR_pattern_median, y = TPR_pattern_median,
+                  x = FPRn_pattern_median, y = TPR_pattern_median,
                   col = id_numlev, label = id_num
                 )
               )
@@ -196,7 +196,7 @@ unique_data <- unique(joint_bench$data)
               geom_label(
                 data = toplot, alpha = 0.5,
                 aes(
-                  x = FPR_pattern_median,
+                  x = FPRn_pattern_median,
                   y = TPR_pattern_median,
                   label = curve_vals, col = id_numlev, shape = id_numlev
                 ),
@@ -348,13 +348,41 @@ unique_data <- unique(joint_bench$data)
 
 
             
-        gg <- ggplot() + {
+            gg <- ggplot() + {
+                {
+                    if (errorbar) {
+              geom_errorbar(
+                data = toplot, alpha = 0.5,
+                aes(
+                  x = FPR_pattern_mean,
+                  ymin = TPR_pattern_q1,
+                  ymax = TPR_pattern_q3,
+                  col = id_numlev
+                ),
+                width = 0.01
+              )
+            }
+          } + {
+
+            if (errorbarh) {
+              geom_errorbarh(
+                data = toplot, alpha = 0.5,
+                aes(
+                  y = TPR_pattern_median,
+                  xmin = FPR_pattern_q1,
+                  xmax = FPR_pattern_q3,
+                  col = id_numlev
+                ),
+                height = 0.01
+              )
+            }
+          
             if (path) {
               geom_path(
                 data = toplot, alpha = 0.8,
                 aes(
-                  x = FPR_skel_median,
-                  y = TPR_skel_median,
+                  x = FPR_pattern_mean,
+                  y = TPR_pattern_mean,
                   col = id_numlev
                 )
               )
@@ -364,8 +392,8 @@ unique_data <- unique(joint_bench$data)
               geom_point(
                 data = toplot, alpha = 0.5,
                 aes(
-                  x = FPR_skel_median,
-                  y = TPR_skel_median,
+                  x = FPR_pattern_median,
+                  y = TPR_pattern_median,
                   col = id_numlev,
                   shape = id_numlev
                 ),
@@ -637,7 +665,7 @@ unique_data <- unique(joint_bench$data)
               #                alpha=0.8, position = "dodge", alpha=1,
               #                aes(segment.linetype="solid", segment.alpha=0.3,
               ## segment.color="black",
-              # x =FPR_pattern_median, y = TPR_pattern_median,col = id, label=id_num))
+              # x =FPRn_pattern_median, y = TPR_pattern_median,col = id, label=id_num))
             }
           } + {
             if (param_annot) {

From 35502f46820976422738f7e01ddcf2a74c8c5cc0 Mon Sep 17 00:00:00 2001
From: Mohamad <mo@julia>
Date: Sun, 1 Jan 2023 14:55:10 +0300
Subject: [PATCH 10/14] small typo

---
 workflow/rules/evaluation/benchmarks/run_summarise.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/workflow/rules/evaluation/benchmarks/run_summarise.R b/workflow/rules/evaluation/benchmarks/run_summarise.R
index 7ff2a33b..44149138 100644
--- a/workflow/rules/evaluation/benchmarks/run_summarise.R
+++ b/workflow/rules/evaluation/benchmarks/run_summarise.R
@@ -171,7 +171,8 @@ if (file.info(argv$adjmat_est)$size > 0) {
   df <- benchmarks(true_adjmat, estimated_adjmat)
 } else {
   df <- data.frame(
-    TPR_pattern = "None", # should be for all times
+      TPR_pattern = "None", # should be for all times
+      FPR_pattern = "None",
     FPRn_pattern = "None",
     SHD_pattern = "None",
     SHD_cpdag = "None",

From 73b6b28f77cf461aee8e6fb04a11e53d1cda5460 Mon Sep 17 00:00:00 2001
From: Mohamad <mo@julia>
Date: Sun, 1 Jan 2023 15:49:59 +0300
Subject: [PATCH 11/14] fixing ROC plot to 0,1

---
 .../rules/evaluation/benchmarks/plot_ROC.R    | 35 +++++++++----------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/workflow/rules/evaluation/benchmarks/plot_ROC.R b/workflow/rules/evaluation/benchmarks/plot_ROC.R
index 6e226013..4a46cd8e 100644
--- a/workflow/rules/evaluation/benchmarks/plot_ROC.R
+++ b/workflow/rules/evaluation/benchmarks/plot_ROC.R
@@ -42,7 +42,7 @@ if (file.info(snakemake@input[["csv"]])$size == 0) {
 
   revlevlist <- c()
   revlevnumlist <- c()
-  lev <- levels(toplot$id)
+  lev <- levels(factor(toplot$id))
   numlev <- c()
 
   for (i in seq_along(lev)) {
@@ -63,7 +63,7 @@ if (file.info(snakemake@input[["csv"]])$size == 0) {
   # Add extra columns with id number like: 3 and id and level like: 3. pcalg_pc.
   toplot <- toplot %>%
     mutate(id_num = lev_to_num(id)) %>%
-    mutate(id_numlev = lev_to_levnum(id))
+      mutate(id_numlev = lev_to_levnum(id))
   toplot$id_numlev <- factor(toplot$id_numlev, levels = numlev)
   joint_bench <- joint_bench %>%
     mutate(id_num = lev_to_num(id)) %>%
@@ -221,8 +221,8 @@ unique_data <- unique(joint_bench$data)
           labs(col = "id") +
           theme_bw() +
           theme(plot.title = element_text(hjust = 0.5))
-        ggsave(file = paste(snakemake@output[["fpr_tpr_pattern"]],"/", plt_counter, ".png", sep=""), plot = gg)
 
+            ggsave(file = paste(snakemake@output[["fpr_tpr_pattern"]],"/", plt_counter, ".png", sep=""), plot = gg)
 
             
         gg <- ggplot() + {
@@ -344,14 +344,12 @@ unique_data <- unique(joint_bench$data)
           labs(col = "id") +
           theme_bw() +
           theme(plot.title = element_text(hjust = 0.5))
-        ggsave(file = paste(snakemake@output[["roc_FPRp_TPR_skel"]],"/", plt_counter, ".png", sep=""), plot = gg)
-
+            ggsave(file = paste(snakemake@output[["roc_FPRp_TPR_skel"]],"/", plt_counter, ".png", sep=""), plot = gg)
 
             
             gg <- ggplot() + {
-                {
-                    if (errorbar) {
-              geom_errorbar(
+                if (errorbar) {
+                        geom_errorbar(
                 data = toplot, alpha = 0.5,
                 aes(
                   x = FPR_pattern_mean,
@@ -363,8 +361,7 @@ unique_data <- unique(joint_bench$data)
               )
             }
           } + {
-
-            if (errorbarh) {
+              if (errorbarh) {
               geom_errorbarh(
                 data = toplot, alpha = 0.5,
                 aes(
@@ -376,8 +373,8 @@ unique_data <- unique(joint_bench$data)
                 height = 0.01
               )
             }
-          
-            if (path) {
+          } + {
+              if (path) {
               geom_path(
                 data = toplot, alpha = 0.8,
                 aes(
@@ -414,7 +411,7 @@ unique_data <- unique(joint_bench$data)
               )
             }
           } + {
-            if (scatter && show_seed) {
+           if (scatter && show_seed) {
               geom_text(
                 data = joint_bench, alpha = 0.25, show.legend = FALSE,
                 aes(
@@ -434,7 +431,8 @@ unique_data <- unique(joint_bench$data)
                   filter(curve_vals == max(curve_vals)),
                 alpha = 0.8, position = "dodge", alpha = 1, show.legend = FALSE,
                 aes(
-                  x = FPR_skel_median, y = TPR_skel_median,
+                    x = FPR_pattern_mean,
+                    y = TPR_pattern_mean,
                   col = id_numlev, label = id_num
                 )
               )
@@ -444,9 +442,9 @@ unique_data <- unique(joint_bench$data)
               geom_label(
                 data = toplot, alpha = 0.5,
                 aes(
-                  x = FPR_skel_median,
-                  y = TPR_skel_median,
-                  label = curve_vals, col = id_numlev, shape = id_numlev
+                    x = FPR_pattern_mean,
+                    y = TPR_pattern_mean,
+                    label = curve_vals, col = id_numlev, shape = id_numlev
                 ),
                 check_overlap = FALSE
               )
@@ -469,8 +467,7 @@ unique_data <- unique(joint_bench$data)
           labs(col = "id") +
           theme_bw() +
           theme(plot.title = element_text(hjust = 0.5))
-          ggsave(file = paste(snakemake@output[["roc_FPR_TPR"]],"/", plt_counter, ".png", sep=""), plot = gg)
-
+            ggsave(file = paste(snakemake@output[["roc_FPR_TPR"]],"/", plt_counter, ".png", sep=""), plot = gg)
 
             
         gg <- ggplot() +

From d39243cd0dce0fb006fbfc150f2179b266f2b14c Mon Sep 17 00:00:00 2001
From: Mohamad <mo@julia>
Date: Sun, 1 Jan 2023 16:16:13 +0300
Subject: [PATCH 12/14] fixing ROC ylim and xlim

---
 workflow/rules/evaluation/benchmarks/plot_ROC.R | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/workflow/rules/evaluation/benchmarks/plot_ROC.R b/workflow/rules/evaluation/benchmarks/plot_ROC.R
index 4a46cd8e..13a3036e 100644
--- a/workflow/rules/evaluation/benchmarks/plot_ROC.R
+++ b/workflow/rules/evaluation/benchmarks/plot_ROC.R
@@ -224,7 +224,6 @@ unique_data <- unique(joint_bench$data)
 
             ggsave(file = paste(snakemake@output[["fpr_tpr_pattern"]],"/", plt_counter, ".png", sep=""), plot = gg)
 
-            
         gg <- ggplot() + {
             if (errorbar) {
               geom_errorbar(
@@ -451,16 +450,9 @@ unique_data <- unique(joint_bench$data)
             }
           } +
           guides(shape = FALSE) +
-          facet_wrap(. ~ adjmat + parameters + data, nrow = 2) +
-          {
-            if (!is.null(xlim)) {
-              xlim(xlim[1], xlim[2])
-            }
-          } + {
-            if (!is.null(ylim)) {
-              ylim(ylim[1], ylim[2])
-            }
-          } +
+              facet_wrap(. ~ adjmat + parameters + data, nrow = 2) +
+              xlim(0, 1) +
+              ylim(0, 1) +
           xlab("FPR") +
           ylab("TPR") +
           ggtitle("FPR vs. TPR (undirected skeleton)") +

From c301fa61ae25b66b6096ec0a6e15a09ab700f59e Mon Sep 17 00:00:00 2001
From: Mohamad <mo@julia>
Date: Tue, 3 Jan 2023 12:31:18 +0300
Subject: [PATCH 13/14] changing docs for burnin in autocorre, trajplot,
 heatmap

---
 workflow/rules/evaluation/mcmc_autocorr_plots/docs.rst | 2 +-
 workflow/rules/evaluation/mcmc_heatmaps/docs.rst       | 2 +-
 workflow/rules/evaluation/mcmc_heatmaps/schema.json    | 4 ++--
 workflow/rules/evaluation/mcmc_traj_plots/docs.rst     | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/workflow/rules/evaluation/mcmc_autocorr_plots/docs.rst b/workflow/rules/evaluation/mcmc_autocorr_plots/docs.rst
index 892201ef..5f351e82 100644
--- a/workflow/rules/evaluation/mcmc_autocorr_plots/docs.rst
+++ b/workflow/rules/evaluation/mcmc_autocorr_plots/docs.rst
@@ -6,7 +6,7 @@ This module plots the auto-correlation of a functional of the graphs in a MCMC t
 +----------------+----------------------------------------------------------------------------------------------------------------------------+
 | ``id``         | algorithm module object id.                                                                                                |
 +----------------+----------------------------------------------------------------------------------------------------------------------------+
-| ``burn_in``    | use samples starting from this value. Use 0 if no burn-in.                                                                 |
+| ``burn_in``    | percent to burn of the number of samples.                                                                 |
 +----------------+----------------------------------------------------------------------------------------------------------------------------+
 | ``thinning``   | use only each ``thinning`` sample of the chain. (It is usually recommended to use this if the number of samples if large). |
 +----------------+----------------------------------------------------------------------------------------------------------------------------+
diff --git a/workflow/rules/evaluation/mcmc_heatmaps/docs.rst b/workflow/rules/evaluation/mcmc_heatmaps/docs.rst
index 0b01966a..f27bca30 100644
--- a/workflow/rules/evaluation/mcmc_heatmaps/docs.rst
+++ b/workflow/rules/evaluation/mcmc_heatmaps/docs.rst
@@ -9,7 +9,7 @@ This module has a list of objects, where each object has
 +-------------+-------------------------+
 | ``id``      | the algorithm object id |
 +-------------+-------------------------+
-| ``burn_in`` | the burn-in period.     |
+| ``burn_in`` | percent to burn of the number of samples.     |
 +-------------+-------------------------+
 
 The estimated probabilities are plotted in heatmaps using seaborn which are saved in *results/mcmc_heatmaps/* and copied to *results/output/mcmc_heatmaps/* for easy reference.
diff --git a/workflow/rules/evaluation/mcmc_heatmaps/schema.json b/workflow/rules/evaluation/mcmc_heatmaps/schema.json
index 8b318826..92bd4593 100644
--- a/workflow/rules/evaluation/mcmc_heatmaps/schema.json
+++ b/workflow/rules/evaluation/mcmc_heatmaps/schema.json
@@ -21,7 +21,7 @@
                 "type": "boolean"
             },
             "burn_in": {
-                "title": "Burn-in start index.",
+                "title": "Burn-in percentage.",
                 "$ref": "../../../schemas/definitions.schema.json#/definitions/flexprob"
             }
         },
@@ -31,4 +31,4 @@
         ]
     },
     "additionalItems": false
-}
\ No newline at end of file
+}
diff --git a/workflow/rules/evaluation/mcmc_traj_plots/docs.rst b/workflow/rules/evaluation/mcmc_traj_plots/docs.rst
index 061a30fd..bbf8fe7d 100644
--- a/workflow/rules/evaluation/mcmc_traj_plots/docs.rst
+++ b/workflow/rules/evaluation/mcmc_traj_plots/docs.rst
@@ -7,7 +7,7 @@ The ``mcmc_traj_plots`` module has a list of objects, where each object has
 +----------------+-------------------------------------------------------------------+
 | ``id``         | algorithm module object id.                                       |
 +----------------+-------------------------------------------------------------------+
-| ``burn_in``    | use samples starting from this value. Use 0 if no burn-in.        |
+| ``burn_in``    | percent to burn of the number of samples.        |
 +----------------+-------------------------------------------------------------------+
 | ``functional`` | the currently supported functionals are *size* and graph *score*. |
 +----------------+-------------------------------------------------------------------+

From 71371d8436bcb65a1e8f86b0d70a3d86ce2e27c9 Mon Sep 17 00:00:00 2001
From: Mohamad <mo@julia>
Date: Tue, 3 Jan 2023 12:59:15 +0300
Subject: [PATCH 14/14] fixing docs

---
 .../evaluation/mcmc_autocorr_plots/docs.rst   | 10 ++++++---
 .../rules/evaluation/mcmc_traj_plots/docs.rst | 22 +++++++++++--------
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/workflow/rules/evaluation/mcmc_autocorr_plots/docs.rst b/workflow/rules/evaluation/mcmc_autocorr_plots/docs.rst
index 5f351e82..e66459fe 100644
--- a/workflow/rules/evaluation/mcmc_autocorr_plots/docs.rst
+++ b/workflow/rules/evaluation/mcmc_autocorr_plots/docs.rst
@@ -1,12 +1,10 @@
 
-This module plots the auto-correlation of a functional of the graphs in a MCMC trajectory. 
-
 +----------------+----------------------------------------------------------------------------------------------------------------------------+
 | Field          | Description                                                                                                                |
 +----------------+----------------------------------------------------------------------------------------------------------------------------+
 | ``id``         | algorithm module object id.                                                                                                |
 +----------------+----------------------------------------------------------------------------------------------------------------------------+
-| ``burn_in``    | percent to burn of the number of samples.                                                                 |
+| ``burn_in``    | percent to burn of the number of samples.                                                                                  |
 +----------------+----------------------------------------------------------------------------------------------------------------------------+
 | ``thinning``   | use only each ``thinning`` sample of the chain. (It is usually recommended to use this if the number of samples if large). |
 +----------------+----------------------------------------------------------------------------------------------------------------------------+
@@ -14,6 +12,12 @@ This module plots the auto-correlation of a functional of the graphs in a MCMC t
 +----------------+----------------------------------------------------------------------------------------------------------------------------+
 | ``lags``       | The maximum number of lags after ``thinning``.                                                                             |
 +----------------+----------------------------------------------------------------------------------------------------------------------------+
+| ``active``     | should be set to *false* to deactivate.                                                                                        |
++----------------+----------------------------------------------------------------------------------------------------------------------------+
+
+
+This module plots the auto-correlation of a functional of the graphs in a MCMC trajectory. 
+
 
 ..  figure:: _static/omcmcscoreautocorr.png
     :alt: Score trajectory of order MCMC
diff --git a/workflow/rules/evaluation/mcmc_traj_plots/docs.rst b/workflow/rules/evaluation/mcmc_traj_plots/docs.rst
index bbf8fe7d..4c689050 100644
--- a/workflow/rules/evaluation/mcmc_traj_plots/docs.rst
+++ b/workflow/rules/evaluation/mcmc_traj_plots/docs.rst
@@ -2,15 +2,19 @@ This module plots the  values in the trajectory of a given functional.
 
 The ``mcmc_traj_plots`` module has a list of objects, where each object has
 
-+----------------+-------------------------------------------------------------------+
-| Field          | Description                                                       |
-+----------------+-------------------------------------------------------------------+
-| ``id``         | algorithm module object id.                                       |
-+----------------+-------------------------------------------------------------------+
-| ``burn_in``    | percent to burn of the number of samples.        |
-+----------------+-------------------------------------------------------------------+
-| ``functional`` | the currently supported functionals are *size* and graph *score*. |
-+----------------+-------------------------------------------------------------------+
++----------------+-------------------------------------------------------------------------------+
+| Field          | Description                                                                   |
++----------------+-------------------------------------------------------------------------------+
+| ``id``         | algorithm module object id.                                                   |
++----------------+-------------------------------------------------------------------------------+
+| ``burn_in``    | percent to burn of the number of samples.                                     |
++----------------+-------------------------------------------------------------------------------+
+| ``functional`` | the currently supported functionals are *size* and graph *score*.             |
++----------------+-------------------------------------------------------------------------------+
+|``thinning``    | tells the module to only considering every graph at a given interval length.  |
++----------------+-------------------------------------------------------------------------------+
+|``active``      | should be set to *false* to deactivate.                                       |
++----------------+-------------------------------------------------------------------------------+
 
 Since the trajectories tend to be very long, the user may choose to thin out the trajectory by only considering every graph at a given interval length specified by the ``thinning`` field.