Skip to content

Commit

Permalink
apacheGH-37994: [R] Create wrapper functions for the CSV*Options clas…
Browse files Browse the repository at this point in the history
…ses (apache#37995)

### Rationale for this change

It's hard to find the docs for the R6 objects for the CSV reading/writing etc options classes

### What changes are included in this PR?

Create wrapper functions, which are more easily documented

### Are these changes tested?

Yep, I've swapped some existing tests to using the wrappers

### Are there any user-facing changes?

Yes
* Closes: apache#37994

Authored-by: Nic Crane <[email protected]>
Signed-off-by: Nic Crane <[email protected]>
  • Loading branch information
thisisnic authored Oct 9, 2023
1 parent 76f987e commit 293819c
Show file tree
Hide file tree
Showing 15 changed files with 475 additions and 122 deletions.
4 changes: 4 additions & 0 deletions r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,10 @@ export(contains)
export(copy_files)
export(cpu_count)
export(create_package_with_all_dependencies)
export(csv_convert_options)
export(csv_parse_options)
export(csv_read_options)
export(csv_write_options)
export(dataset_factory)
export(date32)
export(date64)
Expand Down
264 changes: 189 additions & 75 deletions r/R/csv.R

Large diffs are not rendered by default.

46 changes: 23 additions & 23 deletions r/R/dataset-format.R
Original file line number Diff line number Diff line change
Expand Up @@ -217,19 +217,19 @@ check_csv_file_format_args <- function(args, partitioning = NULL) {
if (is.null(args$parse_options)) {
options$parse_options <- do.call(csv_file_format_parse_opts, args)
} else if (is.list(args$parse_options)) {
options$parse_options <- do.call(CsvParseOptions$create, args$parse_options)
options$parse_options <- do.call(csv_parse_options, args$parse_options)
}

if (is.null(args$convert_options)) {
options$convert_options <- do.call(csv_file_format_convert_opts, args)
} else if (is.list(args$convert_options)) {
options$convert_options <- do.call(CsvConvertOptions$create, args$convert_options)
options$convert_options <- do.call(csv_convert_options, args$convert_options)
}

if (is.null(args$read_options)) {
options$read_options <- do.call(csv_file_format_read_opts, c(args, list(partitioning = partitioning)))
} else if (is.list(args$read_options)) {
options$read_options <- do.call(CsvReadOptions$create, args$read_options)
options$read_options <- do.call(csv_read_options, args$read_options)
}

options
Expand All @@ -239,16 +239,16 @@ check_unsupported_args <- function(args) {
opt_names <- get_opt_names(args)

# Filter out arguments meant for CsvConvertOptions/CsvReadOptions
supported_convert_opts <- c(names(formals(CsvConvertOptions$create)), "na", "quoted_na")
supported_convert_opts <- c(names(formals(csv_convert_options)), "na", "quoted_na")

supported_read_opts <- c(
names(formals(CsvReadOptions$create)),
names(formals(csv_read_options)),
names(formals(readr_to_csv_read_options))
)

# We only currently support all of the readr options for parseoptions
supported_parse_opts <- c(
names(formals(CsvParseOptions$create)),
names(formals(csv_parse_options)),
names(formals(readr_to_csv_parse_options))
)

Expand Down Expand Up @@ -303,9 +303,9 @@ check_unrecognised_args <- function(opts) {
opt_names <- get_opt_names(opts)

arrow_opts <- c(
names(formals(CsvParseOptions$create)),
names(formals(CsvReadOptions$create)),
names(formals(CsvConvertOptions$create)),
names(formals(csv_parse_options)),
names(formals(csv_read_options)),
names(formals(csv_convert_options)),
"schema"
)

Expand Down Expand Up @@ -395,9 +395,9 @@ check_schema <- function(schema, partitioning, column_names) {
csv_file_format_parse_opts <- function(...) {
opts <- list(...)
# Filter out arguments meant for CsvConvertOptions/CsvReadOptions
convert_opts <- c(names(formals(CsvConvertOptions$create)), "na", "quoted_na", "convert_options")
convert_opts <- c(names(formals(csv_convert_options)), "na", "quoted_na", "convert_options")
read_opts <- c(
names(formals(CsvReadOptions$create)),
names(formals(csv_read_options)),
names(formals(readr_to_csv_read_options)),
"read_options"
)
Expand All @@ -407,7 +407,7 @@ csv_file_format_parse_opts <- function(...) {
opts[["parse_options"]] <- NULL
opt_names <- get_opt_names(opts)

arrow_opts <- c(names(formals(CsvParseOptions$create)))
arrow_opts <- c(names(formals(csv_parse_options)))
readr_opts <- c(names(formals(readr_to_csv_parse_options)))

is_arrow_opt <- !is.na(pmatch(opt_names, arrow_opts))
Expand All @@ -427,17 +427,17 @@ csv_file_format_parse_opts <- function(...) {
}
do.call(readr_to_csv_parse_options, opts) # all options have readr-style names
} else {
do.call(CsvParseOptions$create, opts) # all options have Arrow C++ names
do.call(csv_parse_options, opts) # all options have Arrow C++ names
}
}

csv_file_format_convert_opts <- function(...) {
opts <- list(...)
# Filter out arguments meant for CsvParseOptions/CsvReadOptions
arrow_opts <- c(names(formals(CsvParseOptions$create)), "parse_options")
arrow_opts <- c(names(formals(csv_parse_options)), "parse_options")
readr_opts <- names(formals(readr_to_csv_parse_options))
read_opts <- c(
names(formals(CsvReadOptions$create)),
names(formals(csv_read_options)),
names(formals(readr_to_csv_read_options)),
"read_options"
)
Expand All @@ -458,23 +458,23 @@ csv_file_format_convert_opts <- function(...) {
opts[["quoted_na"]] <- NULL
}

do.call(CsvConvertOptions$create, opts)
do.call(csv_convert_options, opts)
}

csv_file_format_read_opts <- function(schema = NULL, partitioning = NULL, ...) {

opts <- list(...)
# Filter out arguments meant for CsvParseOptions/CsvConvertOptions
arrow_opts <- c(names(formals(CsvParseOptions$create)), "parse_options")
arrow_opts <- c(names(formals(csv_parse_options)), "parse_options")
readr_opts <- names(formals(readr_to_csv_parse_options))
convert_opts <- c(names(formals(CsvConvertOptions$create)), "na", "quoted_na", "convert_options")
convert_opts <- c(names(formals(csv_convert_options)), "na", "quoted_na", "convert_options")
opts[arrow_opts] <- NULL
opts[readr_opts] <- NULL
opts[convert_opts] <- NULL
opts[["read_options"]] <- NULL

opt_names <- names(opts)
arrow_opts <- c(names(formals(CsvReadOptions$create)))
arrow_opts <- c(names(formals(csv_read_options)))
readr_opts <- c(names(formals(readr_to_csv_read_options)))

is_arrow_opt <- !is.na(match(opt_names, arrow_opts))
Expand Down Expand Up @@ -505,7 +505,7 @@ csv_file_format_read_opts <- function(schema = NULL, partitioning = NULL, ...) {
}
do.call(readr_to_csv_read_options, opts) # all options have readr-style names
} else {
do.call(CsvReadOptions$create, opts) # all options have Arrow C++ names
do.call(csv_read_options, opts) # all options have Arrow C++ names
}
}

Expand Down Expand Up @@ -648,7 +648,7 @@ FileWriteOptions <- R6Class("FileWriteOptions",
)
} else if (format %in% c("csv", "tsv", "txt", "text")) {
supported_args <- c(
names(formals(CsvWriteOptions$create)),
names(formals(csv_write_options)),
names(formals(readr_to_csv_write_options))
)
}
Expand Down Expand Up @@ -703,7 +703,7 @@ FileWriteOptions <- R6Class("FileWriteOptions",
)
}
} else if (self$type %in% c("csv", "tsv", "txt", "text")) {
arrow_opts <- names(formals(CsvWriteOptions$create))
arrow_opts <- names(formals(csv_write_options))
readr_opts <- names(formals(readr_to_csv_write_options))
readr_only_opts <- setdiff(readr_opts, arrow_opts)
arrow_only_opts <- setdiff(arrow_opts, readr_opts)
Expand All @@ -727,7 +727,7 @@ FileWriteOptions <- R6Class("FileWriteOptions",
} else {
dataset___CsvFileWriteOptions__update(
self,
do.call(CsvWriteOptions$create, args[is_arrow_opt])
do.call(csv_write_options, args[is_arrow_opt])
)
}
}
Expand Down
12 changes: 8 additions & 4 deletions r/R/dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,8 @@ open_delim_dataset <- function(sources,
convert_options = NULL,
read_options = NULL,
timestamp_parsers = NULL,
quoted_na = TRUE) {
quoted_na = TRUE,
parse_options = NULL) {
open_dataset(
sources = sources,
schema = schema,
Expand All @@ -297,7 +298,8 @@ open_delim_dataset <- function(sources,
convert_options = convert_options,
read_options = read_options,
timestamp_parsers = timestamp_parsers,
quoted_na = quoted_na
quoted_na = quoted_na,
parse_options = parse_options
)
}

Expand All @@ -320,7 +322,8 @@ open_csv_dataset <- function(sources,
convert_options = NULL,
read_options = NULL,
timestamp_parsers = NULL,
quoted_na = TRUE) {
quoted_na = TRUE,
parse_options = NULL) {
mc <- match.call()
mc$delim <- ","
mc[[1]] <- get("open_delim_dataset", envir = asNamespace("arrow"))
Expand All @@ -346,7 +349,8 @@ open_tsv_dataset <- function(sources,
convert_options = NULL,
read_options = NULL,
timestamp_parsers = NULL,
quoted_na = TRUE) {
quoted_na = TRUE,
parse_options = NULL) {
mc <- match.call()
mc$delim <- "\t"
mc[[1]] <- get("open_delim_dataset", envir = asNamespace("arrow"))
Expand Down
5 changes: 5 additions & 0 deletions r/_pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ reference:
- open_delim_dataset
- open_csv_dataset
- open_tsv_dataset
- csv_read_options
- csv_parse_options
- csv_convert_options


- title: Write datasets
desc: >
Expand All @@ -155,6 +159,7 @@ reference:
- write_delim_dataset
- write_csv_dataset
- write_tsv_dataset
- csv_write_options

- title: Read files
desc: >
Expand Down
5 changes: 5 additions & 0 deletions r/man/CsvReadOptions.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

69 changes: 69 additions & 0 deletions r/man/csv_convert_options.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

46 changes: 46 additions & 0 deletions r/man/csv_parse_options.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 293819c

Please sign in to comment.