Skip to content

Commit

Permalink
Merge pull request #139 from RobLBaker/main
Browse files Browse the repository at this point in the history
make test_missing_data more robust to file-order mismatches
  • Loading branch information
RobLBaker authored Jun 24, 2024
2 parents 6c294ec + 4566f92 commit c4ff19f
Show file tree
Hide file tree
Showing 141 changed files with 305 additions and 412 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Description: Allows the user (and reviewer) to check a data package and test whe
License: MIT + file LICENSE
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
VignetteBuilder: knitr
Suggests:
here,
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# DPchecker 0.3.4

2024-06-24
* Fixed bug in `test_missing_data()` where if the order of files listed in metadata did not match the order of files produced by `list.files()` the function would evaluate the wrong file and produce inadvertent and unhelpful errors.
2024-02-05
* Fix bug in `test_date_range()` that was adding UTC to temporalCoverage
* `test_missing_data()` now also handles the missing data codes "blank" and "empty".
Expand Down
6 changes: 5 additions & 1 deletion R/optional_eml_elements.R
Original file line number Diff line number Diff line change
Expand Up @@ -376,12 +376,16 @@ test_orcid_match <- function(metadata = load_metadata(directory)){
}
}

#if there are any orcids, record orcids bad orcids:
#if there are any orcids, record orcids & bad orcids:
if(!is.null(existing_orcid)){
bad_orcid <- NULL
wrong_person <- NULL
for(i in seq_along(surName)){
orcid_url <- existing_orcid[i]
is_it_na <- stringr::str_sub(orcid_url, start = -2)
if(is_it_na == "NA") {
next
}
#api request to ORCID:

tryCatch({test_req <- httr::GET(orcid_url)},
Expand Down
35 changes: 21 additions & 14 deletions R/tabular_data_congruence.R
Original file line number Diff line number Diff line change
Expand Up @@ -627,8 +627,7 @@ test_missing_data <- function(directory = here::here(),
#detail_level <- match.arg(arg_choices)

# get dataTable and all children elements
data_tbl <- EML::eml_get(metadata, "dataTable")
data_tbl$`@context` <- NULL
data_tbl <- metadata[["dataset"]][["dataTable"]]
# If there's only one csv, data_tbl ends up with one less level of nesting. Re-nest it so that the rest of the code works consistently
if ("attributeList" %in% names(data_tbl)) {
data_tbl <- list(data_tbl)
Expand All @@ -650,36 +649,44 @@ test_missing_data <- function(directory = here::here(),
for (j in seq_len(ncol(dat))) {
#look for NAs; if NAs found, look for correct missing data codes
if (sum(is.na(dat[,j])) > 0) {
missing <- data_tbl[[i]][["attributeList"]][["attribute"]][[j]][["missingValueCode"]][["code"]]
if(is.null(missing) || sum(missing != missing_types) < 1) {
#file level error message output:
if (detail_level == "files") {
error_log <- append(error_log,
for(k in 1:length(seq_along(data_tbl))){
if(data_tbl[[k]][["physical"]][["objectName"]] != data_files[i]){
next
} else {
missing <- data_tbl[[k]][["attributeList"]][["attribute"]][[j]][["missingValueCode"]][["code"]]
if(is.null(missing) || sum(missing != missing_types) < 1) {
#file level error message output:
if (detail_level == "files") {
error_log <- append(error_log,
paste0(" ",
"---> {.file ",
data_files[i],
"} contains missing data without a corresponding missing data code in metadata." ))
break
}
#column level error message output:
if (detail_level == "columns") {
error_log <- append(error_log,
break
}
#column level error message output:
if (detail_level == "columns") {
error_log <- append(error_log,
paste0(" ",
"---> {.file ",
data_files[i],
"} {.field ",
names(dat)[j],
"} contains missing data without a corresponding missing data code in metadata."))
}
}
}
}
}
}
}
}
if(is.null(error_log)){
cli::cli_inform(c("v" = "Missing data listed as NA is accounted for in metadata"))
cli::cli_inform(c(
"v" = "Missing data listed as NA is accounted for in metadata"))
}
else{
# really only need to say it once per file/column combo
error_log <- unique(error_log)
msg <- error_log
names(msg) <- rep(" ", length(msg))
err <- paste0("Undocumented missing data detected. Please document all missing data in metadata:\n")
Expand Down
10 changes: 4 additions & 6 deletions docs/404.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions docs/LICENSE-text.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit c4ff19f

Please sign in to comment.