From e1efba59225cf0e628d891e153523014535b61a7 Mon Sep 17 00:00:00 2001 From: Jonathan Callahan Date: Wed, 7 Jun 2023 17:09:03 -0700 Subject: [PATCH] feat: handle AIRSIS ESAM_MULTI2022 data format --- DESCRIPTION | 4 +- NAMESPACE | 1 + NEWS.md | 4 + R/airsis_ESAM_MULTI2022QualityControl.R | 303 +++++++++++++++++++++ R/airsis_createDataDataframe.R | 6 +- R/airsis_createMonitorObject.R | 4 +- R/airsis_identifyMonitorType.R | 12 + R/airsis_parseData.R | 50 ++++ R/airsis_qualityControl.R | 2 + local_jon/Jacob_Wolf_Idaho_DEQ.R | 37 +++ man/airsis_ESAM_MULTI2022QualityControl.Rd | 60 ++++ 11 files changed, 478 insertions(+), 5 deletions(-) create mode 100644 R/airsis_ESAM_MULTI2022QualityControl.R create mode 100644 local_jon/Jacob_Wolf_Idaho_DEQ.R create mode 100644 man/airsis_ESAM_MULTI2022QualityControl.Rd diff --git a/DESCRIPTION b/DESCRIPTION index dd834197..117d96f7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Type: Package Package: PWFSLSmoke -Version: 1.2.118 +Version: 1.2.119 Title: Utilities for Working with Air Quality Monitoring Data Authors@R: c( person("Jonathan", "Callahan", email="jonathan.s.callahan@gmail.com", role=c("aut","cre")), @@ -75,4 +75,4 @@ URL: https://github.com/MazamaScience/PWFSLSmoke BugReports: https://github.com/MazamaScience/PWFSLSmoke/issues Encoding: UTF-8 LazyData: true -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.3 diff --git a/NAMESPACE b/NAMESPACE index 7c987ddd..06b20a50 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -40,6 +40,7 @@ export(airsis_EBAM_MULTI2QualityControl) export(airsis_EBAM_MULTI2_BQualityControl) export(airsis_EBAM_PLUS_MULTIQualityControl) export(airsis_ESAMQualityControl) +export(airsis_ESAM_MULTI2022QualityControl) export(airsis_ESAM_MULTIQualityControl) export(airsis_availableUnits) export(airsis_createDataDataframe) diff --git a/NEWS.md b/NEWS.md index 7d241e5e..0cfd0c0e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# PWFSLSmoke 1.2.119 + +* Updated `airsis_identifyMonitorType()` to handle a new data format. + # PWFSLSmoke 1.2.118 * Updated `wrcc_identifyMonitorType()` to handle a new data format. diff --git a/R/airsis_ESAM_MULTI2022QualityControl.R b/R/airsis_ESAM_MULTI2022QualityControl.R new file mode 100644 index 00000000..be766983 --- /dev/null +++ b/R/airsis_ESAM_MULTI2022QualityControl.R @@ -0,0 +1,303 @@ +#' @keywords AIRSIS +#' @export +#' @import MazamaCoreUtils +#' +#' @title Apply Quality Control to raw AIRSIS E-Sampler dataframe +#' +#' @param tbl single site tibble created by \code{airsis_downloadData()} +#' @param valid_Longitude range of valid Longitude values +#' @param valid_Latitude range of valid Latitude values +#' @param remove_Lon_zero flag to remove rows where Longitude == 0 +#' @param remove_Lat_zero flag to remove rows where Latitude == 0 +#' @param valid_Flow range of valid Flow values +#' @param valid_AT range of valid AT values +#' @param valid_RHi range of valid RHi values +#' @param valid_Conc range of valid ConcHr values +#' @param flagAndKeep flag, rather then remove, bad data during the QC process +#' @description Perform various QC measures on AIRSIS E-Sampler data. +#' +#' The following columns of data are tested against valid ranges: +#' \itemize{ +#' \item{\code{Flow}} +#' \item{\code{AT}} +#' \item{\code{RHi}} +#' \item{\code{ConcHr}} +#' } +#' +#' A \code{POSIXct datetime} column (UTC) is also added based on \code{TimeStamp}. +#' +#' @return Cleaned up tibble of AIRSIS monitor data. +#' @seealso \code{\link{airsis_qualityControl}} + +airsis_ESAM_MULTI2022QualityControl <- function( + tbl, + valid_Longitude = c(-180, 180), + valid_Latitude = c(-90, 90), + remove_Lon_zero = TRUE, + remove_Lat_zero = TRUE, + valid_Flow = c(1.999, 2.001), # anything other than 2 is bad + valid_AT = c(-Inf, 150), + valid_RHi = c(-Inf, 55), + valid_Conc = c(-Inf, 5), + flagAndKeep = FALSE +) { + + logger.debug(" ----- airsis_ESAM_MULTI2022QualityControl() ----- ") + + # > print(names(tbl), width = 90) + # [1] "MasterTable_ID" "Alias" "Latitude" + # [4] "Longitude" "COncRT" "ConcHr" + # [7] "Flow" "AT" "BP.PA." + # [10] "RHx" "RHi" "W.S" + # [13] "W.D" "BV" "Alarm" + # [16] "Oceaneering.Unit.Voltage" "FT" "TimeStamp" + # [19] "PDate" "monitorName" "monitorType" + # [22] "monitorSubtype" "Date.Time.GMT" + + ### NOTE: ESAM (non-MULTI) looks like this: + ### > names(tbl) + ### [1] "MasterTable_ID" "Alias" "Latitude" "Longitude" + ### [5] "Conc.mg.m3." "Flow.l.m." "AT.C." "BP.PA." + ### [9] "RHx..." "RHi..." "WS.M.S." "WD.Deg." + ### [13] "BV.V." "Alarm" "Start.Date.Time..GMT." "Serial.Number" + ### [17] "System.Volts" "Data.1" "Data.2" "TimeStamp" + ### [21] "PDate" "monitorName" "monitorType" + + monitorName <- tbl$monitorName[1] + + # ----- Missing Values ------------------------------------------------------ + + # Handle various missing value flags + + # ----- Setup for flagAndKeep argument utility ------------------------------ + + if ( flagAndKeep ) { + # verb for logging messages + verb <- "Flagging" + + tbl$rowID <- as.integer(rownames(tbl)) + + # duplicate tbl and add columns for flags + tblFlagged <- tbl + tblFlagged$QCFlag_anyBad <- FALSE + tblFlagged$QCFlag_reasonCode <- NA + tblFlagged$QCFlag_badLon <- FALSE + tblFlagged$QCFlag_badLat <- FALSE + tblFlagged$QCFlag_badType <- FALSE # no type info for ESAMs + tblFlagged$QCFlag_badFlow <- FALSE + tblFlagged$QCFlag_badAT <- FALSE + tblFlagged$QCFlag_badRHi <- FALSE + tblFlagged$QCFlag_badConcHr <- FALSE + tblFlagged$QCFlag_badDateAndTime <- FALSE + tblFlagged$QCFlag_duplicateHr <- FALSE + } else { + # verb for logging messages + verb <- "Discarding" + } + + # ----- Location ------------------------------------------------------------ + + # Latitude and longitude must be in range + if ( remove_Lon_zero ) { + goodLonMask <- !is.na(tbl$Longitude) & (tbl$Longitude >= valid_Longitude[1]) & (tbl$Longitude <= valid_Longitude[2]) & (tbl$Longitude != 0) + } else { + goodLonMask <- !is.na(tbl$Longitude) & (tbl$Longitude >= valid_Longitude[1]) & (tbl$Longitude <= valid_Longitude[2]) + } + + if ( remove_Lat_zero ) { + goodLatMask <- !is.na(tbl$Latitude) & (tbl$Latitude >= valid_Latitude[1]) & (tbl$Latitude <= valid_Latitude[2]) & (tbl$Latitude != 0) + } else { + goodLatMask <- !is.na(tbl$Latitude) & (tbl$Latitude >= valid_Latitude[1]) & (tbl$Latitude <= valid_Latitude[2]) + } + + badRows <- !(goodLonMask & goodLatMask) + badRowCount <- sum(badRows) + if ( badRowCount > 0 ) { + logger.trace(paste(verb,"%s rows with invalid location information"), badRowCount) + badLocations <- paste('(',tbl$Longitude[badRows],',',tbl$Latitude[badRows],')',sep = '') + logger.trace("Bad locations: %s", paste0(badLocations, collapse = ", ")) + if ( flagAndKeep ) { + # apply flags + tblFlagged$QCFlag_badLon[tbl$rowID[!goodLonMask]] <- TRUE + tblFlagged$QCFlag_badLat[tbl$rowID[!goodLatMask]] <- TRUE + tblFlagged$QCFlag_anyBad <- tblFlagged$QCFlag_anyBad | tblFlagged$QCFlag_badLon | tblFlagged$QCFlag_badLat + # apply reason codes + tblFlagged$QCFlag_reasonCode[tbl$rowID[!goodLonMask]] <- paste(tblFlagged$QCFlag_reasonCode[tbl$rowID[!goodLonMask]],"badLon") + tblFlagged$QCFlag_reasonCode[tbl$rowID[!goodLatMask]] <- paste(tblFlagged$QCFlag_reasonCode[tbl$rowID[!goodLatMask]],"badLat") + } + } + + tbl <- tbl[goodLonMask & goodLatMask,] + + # Sanity check -- row count + if ( nrow(tbl) < 1 && !flagAndKeep ) { + err_msg <- paste0("No valid PM2.5 data for ", monitorName) + logger.warn(err_msg) # This is more of a warning than some error in the data. + stop(err_msg, call. = FALSE) + } + + # ----- Time ---------------------------------------------------------------- + + # NOTE: Assume all times downloaded from are UTC + + # NOTE: It appears the ESAM "TimeStamp" data drifts throughout the day, with >60 minutes between timestamps during most + # NOTE: hours in the day, and then a daily re-synch. For now we are assuming this is a communication issue rather than + # NOTE: an issue in the actual sampling period. For example, we are assuming that a record that is received at 4:44pm is + # NOTE: actually a record for 4:00pm (which is really representative of data during the 3:00 hour -- see NOTE below). + + # Add a POSIXct datetime + tbl$datetime <- lubridate::floor_date(lubridate::mdy_hms(tbl$TimeStamp), unit = "hour") - lubridate::dhours(1) + if ( flagAndKeep ) { + # TODO: Unable to get datetime moved from tbl to tblFlagged without timezone and/or display getting messed up. + # For now just duplicating the calculation, then assigning row values to NA after the fact for rows that were + # removed from tbl prior to calculating datetime above. Clean up later if possible. + tblFlagged$datetime <- lubridate::floor_date(lubridate::mdy_hms(tblFlagged$TimeStamp), unit = "hour") - lubridate::dhours(1) + tblFlagged$datetime[ which(!(tblFlagged$rowID %in% tbl$rowID)) ] <- NA + } + + # NOTE: The time above truncates the timestamp to the top of an hour, and then subtracts one hour, + # NOTE: since the measurement that comes in at a few minutes past the hour is actually representative + # NOTE: of the data over the previous hour (e.g. reading received at 12:04 is actually the average of + # NOTE: the data during Hour 11). This allows for a simpler understanding of the averages, since an + # NOTE: hour's average will be representative of the data within that hour (this is similar to + # NOTE: how an average over a year, such as 2016, is referred to as 2016's value, not 2017's, even + # NOTE: though the average wasn't available until the beginning of 2017). + + # Leland Tarnay QC ----------------------------------------------------------- + + # NOTE: Override ConcHr high value with 5000 as per conversation with Mike Broughton + + ###tmp.2013_NIFC_GOES65_wrcc$concQA <- with(tmp.2013_NIFC_GOES65_wrcc, + ### ifelse(Flow < 2 "FlowLow", + ### ifelse(Flow > 2, "FlowHigh", + ### ifelse(AT > 150, "HighTemp", + ### ifelse(RHi > 55,"HighRHi", + ### ifelse(ConcHr < 0, "Negative", + ### ifelse(ConcHr > 984, "HighConc", 'OK'))))))) + ####create a ConcHr numerical column, with NA values that aren't verbose about errors.. + ### + ###tmp.2013_NIFC_GOES65_wrcc$ConcHr <- with(tmp.2013_NIFC_GOES65_wrcc, + ### ifelse(concQA == 'Negative', 0, + ### ifelse(concQA == 'OK', ConcHr, NA))) + + goodFlow <- !is.na(tbl$Flow) & tbl$Flow >= valid_Flow[1] & tbl$Flow <= valid_Flow[2] + goodAT <- !is.na(tbl$AT) & tbl$AT >= valid_AT[1] & tbl$AT <= valid_AT[2] + goodRHi <- !is.na(tbl$RHi) & tbl$RHi >= valid_RHi[1] & tbl$RHi <= valid_RHi[2] + goodConcHr <- !is.na(tbl$ConcHr) & tbl$ConcHr >= valid_Conc[1] & tbl$ConcHr <= valid_Conc[2] + gooddatetime <- !is.na(tbl$datetime) & tbl$datetime < lubridate::now(tzone = "UTC") # saw a future date once + + logger.trace("Flow has %s missing or out of range values", sum(!goodFlow)) + if (sum(!goodFlow) > 0) logger.trace("Bad Flow values: %s", paste0(sort(unique(tbl$Flow[!goodFlow]),na.last=TRUE), collapse = ", ")) + logger.trace("AT has %s missing or out of range values", sum(!goodAT)) + if (sum(!goodAT) > 0) logger.trace("Bad AT values: %s", paste0(sort(unique(tbl$AT[!goodAT]),na.last=TRUE), collapse = ", ")) + logger.trace("RHi has %s missing or out of range values", sum(!goodRHi)) + if (sum(!goodRHi) > 0) logger.trace("Bad RHi values: %s", paste0(sort(unique(tbl$RHi[!goodRHi]),na.last=TRUE), collapse = ", ")) + logger.trace("Conc has %s missing or out of range values", sum(!goodConcHr)) + if (sum(!goodConcHr) > 0) logger.trace("Bad Conc values: %s", paste0(sort(unique(tbl$Conc.ug.m3.[!goodConcHr]),na.last=TRUE), collapse = ", ")) + logger.trace("datetime has %s missing or out of range values", sum(!gooddatetime)) + if (sum(!gooddatetime) > 0) logger.trace("Bad datetime values: %s", paste0(sort(unique(tbl$datetime[!gooddatetime]),na.last=TRUE), collapse = ", ")) + + goodMask <- goodFlow & goodAT & goodRHi & goodConcHr & gooddatetime + badQCCount <- sum(!goodMask) + + if ( badQCCount > 0 ) { + logger.trace(paste(verb,"%s rows because of QC logic"), badQCCount) + if ( flagAndKeep ) { + # apply flags + tblFlagged$QCFlag_badFlow[tbl$rowID[!goodFlow]] <- TRUE + tblFlagged$QCFlag_badAT[tbl$rowID[!goodAT]] <- TRUE + tblFlagged$QCFlag_badRHi[tbl$rowID[!goodRHi]] <- TRUE + tblFlagged$QCFlag_badConcHr[tbl$rowID[!goodConcHr]] <- TRUE + tblFlagged$QCFlag_badDateAndTime[tbl$rowID[!gooddatetime]] <- TRUE + tblFlagged$QCFlag_anyBad <- (tblFlagged$QCFlag_anyBad | tblFlagged$QCFlag_badFlow | tblFlagged$QCFlag_badAT | + tblFlagged$QCFlag_badRHi | tblFlagged$QCFlag_badConcHr | tblFlagged$QCFlag_badDateAndTime) + # apply reason codes + tblFlagged$QCFlag_reasonCode[tbl$rowID[!goodFlow]] <- paste(tblFlagged$QCFlag_reasonCode[tbl$rowID[!goodFlow]],"badFlow") + tblFlagged$QCFlag_reasonCode[tbl$rowID[!goodAT]] <- paste(tblFlagged$QCFlag_reasonCode[tbl$rowID[!goodAT]],"badAT") + tblFlagged$QCFlag_reasonCode[tbl$rowID[!goodRHi]] <- paste(tblFlagged$QCFlag_reasonCode[tbl$rowID[!goodRHi]],"badRHi") + tblFlagged$QCFlag_reasonCode[tbl$rowID[!goodConcHr]] <- paste(tblFlagged$QCFlag_reasonCode[tbl$rowID[!goodConcHr]],"badConcHr") + tblFlagged$QCFlag_reasonCode[tbl$rowID[!gooddatetime]] <- paste(tblFlagged$QCFlag_reasonCode[tbl$rowID[!gooddatetime]],"badDateAndTime") + } + } + + tbl <- tbl[goodMask,] + + # Sanity check -- row count + if (nrow(tbl) < 1 && !flagAndKeep) { + err_msg <- paste0("No valid PM2.5 data for ", monitorName) + logger.warn(err_msg) # This is more of a warning than some error in the data. + stop(err_msg, call.=FALSE) + } + + # ----- Duplicate Hours ----------------------------------------------------- + + # For hours with multiple records, discard all but the one with the latest processing date/time + # NOTE: Current setup for this section assumes that the last entry will be the latest one. May + # NOTE: want to build in functionality to ensure that the latest is picked if more than one exists + # NOTE: (for example, if the data is not in order by timestamp for whatever reason) + + dupHrMask <- duplicated(tbl$datetime,fromLast = TRUE) + dupHrCount <- sum(dupHrMask) + uniqueHrMask <- !dupHrMask + + if ( dupHrCount > 0 ) { + logger.trace(paste(verb,"%s duplicate time entries"), dupHrCount) + logger.trace("Duplicate Hours (may be >1 per timestamp): %s", paste0(sort(unique(tbl$TimeStamp[dupHrMask])), collapse = ", ")) + if ( flagAndKeep ) { + # apply flags + tblFlagged$QCFlag_duplicateHr[tbl$rowID[dupHrMask]] <- TRUE + tblFlagged$QCFlag_anyBad <- tblFlagged$QCFlag_anyBad | tblFlagged$QCFlag_duplicateHr + # apply reason code + tblFlagged$QCFlag_reasonCode[tbl$rowID[dupHrMask]] <- paste(tblFlagged$QCFlag_reasonCode[tbl$rowID[dupHrMask]],"duplicateHr") + } + } + + tbl <- tbl[uniqueHrMask,] + + # Sanity check -- row count + if (nrow(tbl) < 1 && !flagAndKeep) { + err_msg <- paste0("No valid PM2.5 data for ", monitorName) + logger.warn(err_msg) # This is more of a warning than some error in the data. + stop(err_msg, call.=FALSE) + } + + # ----- More QC ------------------------------------------------------------- + + # NOTE: Additional QC would go here + + if ( flagAndKeep ) { + logger.trace("Retaining %d rows of measurements; %d bad rows flagged", nrow(tbl), sum(tblFlagged$QCFlag_anyBad)) + } else { + logger.trace("Retaining %d rows of validated measurements", nrow(tbl)) + } + + # ----- Final cleanup ------------------------------------------------------- + + if ( flagAndKeep ) { + tblFlagged$QCFlag_reasonCode <- stringr::str_sub(tblFlagged$QCFlag_reasonCode, 3) + tblFlagged$QCFlag_reasonCode <- stringr::str_trim(tblFlagged$QCFlag_reasonCode) + tbl <- tblFlagged + tbl$rowID <- NULL + } + + return(tbl) + +} + +# ===== DEBUGGING ============================================================== + +if ( FALSE ) { + + # tbl <- + + valid_Longitude = c(-180, 180) + valid_Latitude = c(-90, 90) + remove_Lon_zero = TRUE + remove_Lat_zero = TRUE + valid_Flow = c(1.999, 2.001) # anything other than 2 is bad + valid_AT = c(-Inf, 150) + valid_RHi = c(-Inf, 55) + valid_Conc = c(-Inf, 5) + flagAndKeep = FALSE + +} diff --git a/R/airsis_createDataDataframe.R b/R/airsis_createDataDataframe.R index ab02c66d..88964324 100644 --- a/R/airsis_createDataDataframe.R +++ b/R/airsis_createDataDataframe.R @@ -73,7 +73,11 @@ airsis_createDataDataframe <- function( if ( monitorType == 'EBAM' ) { pm25Var <- 'ConcHr' } else if ( monitorType == 'ESAM' ) { - pm25Var <- 'Conc.mg.m3.' + if ( monitorSubtype == 'MULTI2022' ) { + pm25Var <- 'ConcHr' + } else { + pm25Var <- 'Conc.mg.m3.' + } } else if ( monitorType == 'BAM1020' ) { pm25Var <- 'Conc..\u00B5g.m3.' } else { diff --git a/R/airsis_createMonitorObject.R b/R/airsis_createMonitorObject.R index 2bc0d9c4..667ef023 100644 --- a/R/airsis_createMonitorObject.R +++ b/R/airsis_createMonitorObject.R @@ -191,12 +191,12 @@ if ( FALSE ) { logger.setLevel(TRACE) - # 2021 "EBAM MULTI2_B" format -- usfs.1085 + # 2021 "EBAM MULTI2022" format -- usfs.1072 startdate = strftime(lubridate::now(tzone = "UTC"), "%Y0101", tz = "UTC") enddate = strftime(lubridate::now(tzone = "UTC"), "%Y%m%d", tz = "UTC") provider = 'USFS' - unitID = 1085 + unitID = 1072 clusterDiameter = 1000 zeroMinimum = TRUE baseUrl = "http://xxxx.airsis.com/vision/common/CSVExport.aspx?" diff --git a/R/airsis_identifyMonitorType.R b/R/airsis_identifyMonitorType.R index 2e09787b..d9dd773b 100644 --- a/R/airsis_identifyMonitorType.R +++ b/R/airsis_identifyMonitorType.R @@ -113,6 +113,12 @@ airsis_identifyMonitorType <- function(df) { olderEbam_2_names <- make.names(olderEbam_2_rawNames) olderEbam_2_types <- 'ccddcc' + # provider = "USFS"; unitID = "1072"; year = 2022 + esamMulti2022_header <- "MasterTable_ID,Alias,Latitude,Longitude,ConcRT,ConcHR,Flow,AT,BP(PA),RHx,RHi,W/S,W/D,BV,Alarm,Oceaneering Unit Voltage,FT,TimeStamp,PDate" + esamMulti2022_rawNames <- unlist(stringr::str_split(esamMulti2022_header, ',')) + esamMulti2022_names <- make.names(esamMulti2022_rawNames) + esamMulti2022_types <- 'ccddddddddddddcddcc' + # Need to duplicate these to handle the addition of the 'UnitID' column before 'Alias' # ARB2_ebamMulti (Starting in August, 2018) @@ -235,6 +241,12 @@ airsis_identifyMonitorType <- function(df) { rawNames <- bam1020_rawNames columnNames <- bam1020_names columnTypes <- bam1020_types + } else if ( dplyr::setequal(esamMulti2022_names, colNames) ) { + monitorType <- "ESAM" + monitorSubtype <- "MULTI2022" + rawNames <- esamMulti2022_rawNames + columnNames <- esamMulti2022_names + columnTypes <- esamMulti2022_types } monitorTypeList <- list(monitorType = monitorType, diff --git a/R/airsis_parseData.R b/R/airsis_parseData.R index 41b4b2f8..09354a16 100644 --- a/R/airsis_parseData.R +++ b/R/airsis_parseData.R @@ -107,6 +107,12 @@ airsis_parseData <- function(fileString) { } + if ( monitorSubtype == "MULTI2022" ) { + + # TODO: Handle any errors + + } + } else if ( monitorType == "OTHER_1" ) { logger.error("Older EBAM 1 file parsing is not supported") @@ -241,6 +247,48 @@ airsis_parseData <- function(fileString) { tbl$Date.Time.GMT <- strftime(assignedHour, "%m/%d/%Y %H:%M:%S", tz = 'UTC') } + } else if ( monitorSubtype == "MULTI2022" ) { + + # HACK + # usfs.1072 in June, 2023 does not return a "Date.Time.GMT" column + # We add one here by flooring the "TimeStamp" colum. + + logger.trace("Adding Date.Time.GMT column to ESAM_MULTI2022 data.") + if ( !"Date.Time.GMT" %in% names(tbl) && "TimeStamp" %in% names(tbl) ) { + # Remove rows where TimeStamp is NA + badMask <- is.na(tbl$TimeStamp) | tbl$TimeStamp == "NA" + tbl <- tbl[!badMask,] + datetime <- lubridate::mdy_hms(tbl$TimeStamp, tz = "UTC") + assignedHour <- lubridate::floor_date(datetime, unit = "hour") + tbl$Date.Time.GMT <- strftime(assignedHour, "%m/%d/%Y %H:%M:%S", tz = 'UTC') + } + + # > names(tbl) %>% print(width = 90) + # [1] "MasterTable_ID" "Alias" "Latitude" + # [4] "Longitude" "ConcRT" "ConcHR" + # [7] "Flow" "AT" "BP.PA." + # [10] "RHx" "RHi" "W.S" + # [13] "W.D" "BV" "Alarm" + # [16] "Oceaneering.Unit.Voltage" "FT" "TimeStamp" + # [19] "PDate" "monitorName" "monitorType" + # [22] "monitorSubtype" "Date.Time.GMT" + + # Need to rename some columns to those expected by airsis_createDataDataframe() + # + # [1] "MasterTable_ID" "UnitID" "Alias" "Latitude" + # [5] "Longitude" "Date.Time.GMT" "Start.Date.Time..GMT." "COncRT" + # [9] "ConcHr" "Flow" "W.S" "W.D" + # [13] "AT" "RHx" "RHi" "BV" + # [17] "FT" "Alarm" "Type" "Serial.Number" + # [21] "Version" "Sys..Volts" "TimeStamp" "PDate" + # [25] "monitorName" "monitorType" "datetime" "medoidLon" + # [29] "medoidLat" "deploymentID" + + tbl <- tbl %>% + dplyr::rename( + COncRT = ConcRT, + ConcHr = ConcHR + ) } else { @@ -289,6 +337,8 @@ airsis_parseData <- function(fileString) { } else if (monitorType == "ESAM") { if ( monitorSubtype == "MULTI" ) { voltLabel <- "Oceaneering.Unit.Voltage" + } else if ( monitorSubtype == "MULTI2022" ) { + voltLabel <- "Oceaneering.Unit.Voltage" } else { voltLabel <- "System.Volts" } diff --git a/R/airsis_qualityControl.R b/R/airsis_qualityControl.R index e126fe39..d50d1b37 100644 --- a/R/airsis_qualityControl.R +++ b/R/airsis_qualityControl.R @@ -101,6 +101,8 @@ airsis_qualityControl <- function( if ( monitorSubtype == "MULTI" ) { tbl <- airsis_ESAM_MULTIQualityControl(tbl, ...) + } else if ( monitorSubtype == "MULTI2022" ) { + tbl <- airsis_ESAM_MULTI2022QualityControl(tbl, ...) } else { tbl <- airsis_ESAMQualityControl(tbl, ...) } diff --git a/local_jon/Jacob_Wolf_Idaho_DEQ.R b/local_jon/Jacob_Wolf_Idaho_DEQ.R new file mode 100644 index 00000000..b895bf6a --- /dev/null +++ b/local_jon/Jacob_Wolf_Idaho_DEQ.R @@ -0,0 +1,37 @@ +# Jacob_Wolf_Idaho_DEQ.R +# +# Why do the Mv4 daily plot and Data Report disagree about the daily average +# color in Salmon, Idaho on July 19'th? + +library(PWFSLSmoke) + +Salmon <- + airnow_loadLatest() %>% + monitor_subset(monitorIDs = "160590004_01") + +monitor_dailyBarplot(Salmon) # July 19 is RED + +AirMonitorPlots::monitor_ggDailyHourlyBarplot(Salmon) # July 19 is RED + +Salmon %>% + monitor_dailyStatistic(mean) %>% + monitor_extractData() + +# datetime 160590004_01 +# 1 2022-07-12 4.125000 +# 2 2022-07-13 4.695652 +# 3 2022-07-14 6.541667 +# 4 2022-07-15 7.956522 +# 5 2022-07-16 6.250000 +# 6 2022-07-17 5.458333 +# 7 2022-07-18 7.708333 +# 8 2022-07-19 55.833333 +# 9 2022-07-20 44.625000 + +# > PWFSLSmoke::AQI$breaks_24 +# [1] -Inf 12.0 35.5 55.5 150.5 250.5 Inf +# > PWFSLSmoke::AQI$colors +# [1] "#00E400" "#FFFF00" "#FF7E00" "#FF0000" "#8F3F97" "#7E0023" +# +# Should be RED beween 55.5 and 150.5 + diff --git a/man/airsis_ESAM_MULTI2022QualityControl.Rd b/man/airsis_ESAM_MULTI2022QualityControl.Rd new file mode 100644 index 00000000..fa4157df --- /dev/null +++ b/man/airsis_ESAM_MULTI2022QualityControl.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/airsis_ESAM_MULTI2022QualityControl.R +\name{airsis_ESAM_MULTI2022QualityControl} +\alias{airsis_ESAM_MULTI2022QualityControl} +\title{Apply Quality Control to raw AIRSIS E-Sampler dataframe} +\usage{ +airsis_ESAM_MULTI2022QualityControl( + tbl, + valid_Longitude = c(-180, 180), + valid_Latitude = c(-90, 90), + remove_Lon_zero = TRUE, + remove_Lat_zero = TRUE, + valid_Flow = c(1.999, 2.001), + valid_AT = c(-Inf, 150), + valid_RHi = c(-Inf, 55), + valid_Conc = c(-Inf, 5), + flagAndKeep = FALSE +) +} +\arguments{ +\item{tbl}{single site tibble created by \code{airsis_downloadData()}} + +\item{valid_Longitude}{range of valid Longitude values} + +\item{valid_Latitude}{range of valid Latitude values} + +\item{remove_Lon_zero}{flag to remove rows where Longitude == 0} + +\item{remove_Lat_zero}{flag to remove rows where Latitude == 0} + +\item{valid_Flow}{range of valid Flow values} + +\item{valid_AT}{range of valid AT values} + +\item{valid_RHi}{range of valid RHi values} + +\item{valid_Conc}{range of valid ConcHr values} + +\item{flagAndKeep}{flag, rather then remove, bad data during the QC process} +} +\value{ +Cleaned up tibble of AIRSIS monitor data. +} +\description{ +Perform various QC measures on AIRSIS E-Sampler data. + +The following columns of data are tested against valid ranges: +\itemize{ +\item{\code{Flow}} +\item{\code{AT}} +\item{\code{RHi}} +\item{\code{ConcHr}} +} + +A \code{POSIXct datetime} column (UTC) is also added based on \code{TimeStamp}. +} +\seealso{ +\code{\link{airsis_qualityControl}} +} +\keyword{AIRSIS}