diff --git a/flint/cache_file_state.rds b/flint/cache_file_state.rds new file mode 100644 index 00000000..5d0a95b8 Binary files /dev/null and b/flint/cache_file_state.rds differ diff --git a/flint/config.yml b/flint/config.yml new file mode 100644 index 00000000..1340d586 --- /dev/null +++ b/flint/config.yml @@ -0,0 +1,49 @@ +keep: + - any_duplicated + - any_is_na + - class_equals + - condition_message + - double_assignment + - duplicate_argument + - empty_assignment + - equal_assignment + - equals_na + - expect_comparison + - expect_identical + - expect_length + - expect_named + - expect_not + - expect_null + - expect_true_false + - expect_type + - for_loop_index + - function_return + - implicit_assignment + - is_numeric + - length_levels + - length_test + - lengths + - library_call + - literal_coercion + - matrix_apply + - missing_argument + - nested_ifelse + - numeric_leading_zero + - outer_negation + - package_hooks + - paste + - redundant_equals + - redundant_ifelse + - rep_len + - right_assignment + - sample_int + - semicolon + - seq + - sort + - T_and_F_symbol + - todo_comment + - undesirable_function + - undesirable_operator + - unnecessary_nesting + - unreachable_code + - which_grepl diff --git a/flint/rules/builtin/T_and_F_symbol.yml b/flint/rules/builtin/T_and_F_symbol.yml new file mode 100644 index 00000000..ebe54ba1 --- /dev/null +++ b/flint/rules/builtin/T_and_F_symbol.yml @@ -0,0 +1,97 @@ +id: true_false_symbol +language: r +severity: warning +rule: + pattern: T + kind: identifier + not: + any: + - precedes: + any: + - pattern: <- + - pattern: = + - regex: ^~$ + - follows: + any: + - pattern: $ + - regex: ^~$ + - inside: + any: + - kind: parameter + - kind: call + - kind: binary_operator + follows: + regex: ^~$ + stopBy: end + stopBy: + kind: + argument +fix: TRUE +message: Use TRUE instead of the symbol T. + +--- + +id: true_false_symbol-2 +language: r +severity: warning +rule: + pattern: F + kind: identifier + not: + any: + - precedes: + any: + - pattern: <- + - pattern: = + - regex: ^~$ + - follows: + any: + - pattern: $ + - regex: ^~$ + - inside: + any: + - kind: parameter + - kind: call + - kind: binary_operator + follows: + regex: ^~$ + stopBy: end + stopBy: + kind: + argument +fix: FALSE +message: Use FALSE instead of the symbol F. + +--- + +id: true_false_symbol-3 +language: r +severity: warning +rule: + pattern: T + kind: identifier + precedes: + any: + - pattern: <- + - pattern: = + not: + inside: + kind: argument +message: Don't use T as a variable name, as it can break code relying on T being TRUE. + +--- + +id: true_false_symbol-4 +language: r +severity: warning +rule: + pattern: F + kind: identifier + precedes: + any: + - pattern: <- + - pattern: = + not: + inside: + kind: argument +message: Don't use F as a variable name, as it can break code relying on F being FALSE. diff --git a/flint/rules/builtin/absolute_path.yml b/flint/rules/builtin/absolute_path.yml new file mode 100644 index 00000000..2c5bb3d1 --- /dev/null +++ b/flint/rules/builtin/absolute_path.yml @@ -0,0 +1,13 @@ +# id: absolute_path-1 +# language: r +# severity: warning +# rule: +# kind: string_content +# any: +# - regex: '^~[[:alpha:]]' +# - regex: '^~/[[:alpha:]]' +# - regex: '^[[:alpha:]]:' +# - regex: '^(/|~)$' +# - regex: '^/[[:alpha:]]' +# - regex: '^\\' +# message: Do not use absolute paths. diff --git a/flint/rules/builtin/any_duplicated.yml b/flint/rules/builtin/any_duplicated.yml new file mode 100644 index 00000000..514b028f --- /dev/null +++ b/flint/rules/builtin/any_duplicated.yml @@ -0,0 +1,91 @@ +id: any_duplicated-1 +language: r +severity: warning +rule: + pattern: any($$$ duplicated($MYVAR) $$$) +fix: anyDuplicated(~~MYVAR~~) > 0 +message: anyDuplicated(x, ...) > 0 is better than any(duplicated(x), ...). + +--- + +id: any_duplicated-2 +language: r +severity: warning +rule: + any: + - pattern: length(unique($MYVAR)) == length($MYVAR) + - pattern: length($MYVAR) == length(unique($MYVAR)) +fix: anyDuplicated(~~MYVAR~~) == 0L +message: anyDuplicated(x) == 0L is better than length(unique(x)) == length(x). + +--- + +id: any_duplicated-3 +language: r +severity: warning +rule: + pattern: length(unique($MYVAR)) != length($MYVAR) +fix: anyDuplicated(~~MYVAR~~) != 0L +message: | + Use anyDuplicated(x) != 0L (or > or <) instead of length(unique(x)) != length(x) + (or > or <). + +--- + +id: any_duplicated-4 +language: r +severity: warning +rule: + any: + - pattern: nrow($DATA) != length(unique($DATA$µCOL)) + - pattern: length(unique($DATA$µCOL)) != nrow($DATA) +fix: anyDuplicated(~~DATA~~$~~COL~~) != 0L +message: | + anyDuplicated(DF$col) != 0L is better than length(unique(DF$col)) != nrow(DF) + +--- + +# id: any_duplicated-5 +# language: r +# severity: warning +# rule: +# any: +# - pattern: +# context: nrow($DATA) != length(unique($DATA[["µCOL"]])) +# strictness: ast +# - pattern: +# context: length(unique($DATA[["µCOL"]])) != nrow($DATA) +# strictness: ast +# fix: anyDuplicated(~~DATA~~[["~~COL~~"]]) != 0L +# message: | +# anyDuplicated(DF[["col"]]) != 0L is better than length(unique(DF[["col"]])) != nrow(DF) +# +# --- + +id: any_duplicated-6 +language: r +severity: warning +rule: + any: + - pattern: nrow($DATA) == length(unique($DATA$µCOL)) + - pattern: length(unique($DATA$µCOL)) == nrow($DATA) +fix: anyDuplicated(~~DATA~~$~~COL~~) == 0L +message: | + anyDuplicated(DF$col) == 0L is better than length(unique(DF$col)) == nrow(DF) + +# --- +# +# id: any_duplicated-7 +# language: r +# severity: warning +# rule: +# any: +# - pattern: +# context: nrow($DATA) == length(unique($DATA[["µCOL"]])) +# strictness: ast +# - pattern: +# context: length(unique($DATA[["µCOL"]])) == nrow($DATA) +# strictness: ast +# fix: anyDuplicated(~~DATA~~[["~~COL~~"]]) == 0L +# message: | +# anyDuplicated(DF[["col"]]) == 0L is better than length(unique(DF[["col"]])) == nrow(DF) diff --git a/flint/rules/builtin/any_is_na.yml b/flint/rules/builtin/any_is_na.yml new file mode 100644 index 00000000..7b05a75b --- /dev/null +++ b/flint/rules/builtin/any_is_na.yml @@ -0,0 +1,7 @@ +id: any_na-1 +language: r +severity: warning +rule: + pattern: any($$$ is.na($MYVAR) $$$) +fix: anyNA(~~MYVAR~~) +message: anyNA(x) is better than any(is.na(x)). diff --git a/flint/rules/builtin/class_equals.yml b/flint/rules/builtin/class_equals.yml new file mode 100644 index 00000000..2b7eb4c7 --- /dev/null +++ b/flint/rules/builtin/class_equals.yml @@ -0,0 +1,42 @@ +id: class_equals-1 +language: r +severity: warning +rule: + any: + - pattern: class($VAR) == $CLASSNAME + - pattern: $CLASSNAME == class($VAR) + not: + inside: + kind: argument +fix: inherits(~~VAR~~, ~~CLASSNAME~~) +message: Instead of comparing class(x) with ==, use inherits(x, 'class-name') or is. or is(x, 'class') + +--- + +id: class_equals-2 +language: r +severity: warning +rule: + any: + - pattern: class($VAR) != $CLASSNAME + - pattern: $CLASSNAME != class($VAR) + not: + inside: + kind: argument +fix: "!inherits(~~VAR~~, ~~CLASSNAME~~)" +message: "Instead of comparing class(x) with !=, use !inherits(x, 'class-name') or is. or is(x, 'class')" + +--- + +id: class_equals-3 +language: r +severity: warning +rule: + any: + - pattern: $CLASSNAME %in% class($VAR) + - pattern: class($VAR) %in% $CLASSNAME +constraints: + CLASSNAME: + kind: string +fix: inherits(~~VAR~~, ~~CLASSNAME~~) +message: Instead of comparing class(x) with %in%, use inherits(x, 'class-name') or is. or is(x, 'class') diff --git a/flint/rules/builtin/condition_message.yml b/flint/rules/builtin/condition_message.yml new file mode 100644 index 00000000..eb324666 --- /dev/null +++ b/flint/rules/builtin/condition_message.yml @@ -0,0 +1,23 @@ +id: condition_message-1 +language: r +severity: warning +rule: + pattern: $FUN($$$ paste0($$$MSG) $$$) + kind: call + not: + any: + - has: + kind: extract_operator + - has: + stopBy: end + kind: argument + has: + field: name + regex: "^collapse|recycle0$" + stopBy: end +constraints: + FUN: + regex: "^(packageStartupMessage|stop|warning)$" +fix: ~~FUN~~(~~MSG~~) +message: | + ~~FUN~~(paste0(...)) can be rewritten as ~~FUN~~(...). diff --git a/flint/rules/builtin/double_assignment.yml b/flint/rules/builtin/double_assignment.yml new file mode 100644 index 00000000..60a04e23 --- /dev/null +++ b/flint/rules/builtin/double_assignment.yml @@ -0,0 +1,23 @@ +id: right_double_assignment +language: r +severity: hint +rule: + pattern: $RHS ->> $LHS + has: + field: rhs + kind: identifier +message: ->> can have hard-to-predict behavior; prefer assigning to a + specific environment instead (with assign() or <-). + +--- + +id: left_double_assignment +language: r +severity: hint +rule: + pattern: $LHS <<- $RHS + has: + field: lhs + kind: identifier +message: <<- can have hard-to-predict behavior; prefer assigning to a + specific environment instead (with assign() or <-). diff --git a/flint/rules/builtin/duplicate_argument.yml b/flint/rules/builtin/duplicate_argument.yml new file mode 100644 index 00000000..415b9ff8 --- /dev/null +++ b/flint/rules/builtin/duplicate_argument.yml @@ -0,0 +1,46 @@ +id: duplicate_argument-1 +language: r +severity: warning +rule: + # Look for a function argument... + kind: argument + any: + - has: + kind: identifier + field: name + pattern: $OBJ + - has: + kind: string_content + pattern: $OBJ + stopBy: end + + # ... that follows other argument(s) with the same name... + follows: + kind: argument + stopBy: end + has: + stopBy: end + kind: identifier + field: name + pattern: $OBJ + + # ... inside a function call (or a subset environment for data.table)... + inside: + kind: arguments + follows: + any: + - kind: identifier + pattern: $FUN + - kind: string + inside: + any: + - kind: call + - kind: subset + +# ... that is not a function listed below. +constraints: + FUN: + not: + regex: ^(mutate|transmute)$ + +message: Avoid duplicate arguments in function calls. diff --git a/flint/rules/builtin/empty_assignment.yml b/flint/rules/builtin/empty_assignment.yml new file mode 100644 index 00000000..ccc995fa --- /dev/null +++ b/flint/rules/builtin/empty_assignment.yml @@ -0,0 +1,15 @@ +id: empty_assignment-1 +language: r +severity: warning +rule: + any: + - pattern: $OBJ <- {} + - pattern: $OBJ <- {$CONTENT} + - pattern: $OBJ = {} + - pattern: $OBJ = {$CONTENT} +constraints: + CONTENT: + regex: ^\s+$ +message: | + Assign NULL explicitly or, whenever possible, allocate the empty object with + the right type and size. diff --git a/flint/rules/builtin/equal_assignment.yml b/flint/rules/builtin/equal_assignment.yml new file mode 100644 index 00000000..77074fe1 --- /dev/null +++ b/flint/rules/builtin/equal_assignment.yml @@ -0,0 +1,10 @@ +id: equal_assignment +language: r +severity: hint +rule: + pattern: $LHS = $RHS + has: + field: lhs + kind: identifier +fix: ~~LHS~~ <- ~~RHS~~ +message: Use <-, not =, for assignment. diff --git a/flint/rules/builtin/equals_na.yml b/flint/rules/builtin/equals_na.yml new file mode 100644 index 00000000..64e74543 --- /dev/null +++ b/flint/rules/builtin/equals_na.yml @@ -0,0 +1,37 @@ +id: equals_na +language: r +severity: warning +rule: + any: + - pattern: $MYVAR == NA + - pattern: $MYVAR == NA_integer_ + - pattern: $MYVAR == NA_real_ + - pattern: $MYVAR == NA_complex_ + - pattern: $MYVAR == NA_character_ + - pattern: NA == $MYVAR + - pattern: NA_integer_ == $MYVAR + - pattern: NA_real_ == $MYVAR + - pattern: NA_complex_ == $MYVAR + - pattern: NA_character_ == $MYVAR +fix: is.na(~~MYVAR~~) +message: Use is.na for comparisons to NA (not == or !=). + +--- + +id: equals_na-2 +language: r +severity: warning +rule: + any: + - pattern: $MYVAR != NA + - pattern: $MYVAR != NA_integer_ + - pattern: $MYVAR != NA_real_ + - pattern: $MYVAR != NA_complex_ + - pattern: $MYVAR != NA_character_ + - pattern: NA != $MYVAR + - pattern: NA_integer_ != $MYVAR + - pattern: NA_real_ != $MYVAR + - pattern: NA_complex_ != $MYVAR + - pattern: NA_character_ != $MYVAR +fix: is.na(~~MYVAR~~) +message: Use is.na for comparisons to NA (not == or !=). diff --git a/flint/rules/builtin/expect_comparison.yml b/flint/rules/builtin/expect_comparison.yml new file mode 100644 index 00000000..6af9bb13 --- /dev/null +++ b/flint/rules/builtin/expect_comparison.yml @@ -0,0 +1,37 @@ +id: expect_comparison-1 +language: r +severity: warning +rule: + pattern: expect_true($X > $Y) +fix: expect_gt(~~X~~, ~~Y~~) +message: expect_gt(x, y) is better than expect_true(x > y). + +--- + +id: expect_comparison-2 +language: r +severity: warning +rule: + pattern: expect_true($X >= $Y) +fix: expect_gte(~~X~~, ~~Y~~) +message: expect_gte(x, y) is better than expect_true(x >= y). + +--- + +id: expect_comparison-3 +language: r +severity: warning +rule: + pattern: expect_true($X < $Y) +fix: expect_lt(~~X~~, ~~Y~~) +message: expect_lt(x, y) is better than expect_true(x < y). + +--- + +id: expect_comparison-4 +language: r +severity: warning +rule: + pattern: expect_true($X <= $Y) +fix: expect_lte(~~X~~, ~~Y~~) +message: expect_lte(x, y) is better than expect_true(x <= y). diff --git a/flint/rules/builtin/expect_identical.yml b/flint/rules/builtin/expect_identical.yml new file mode 100644 index 00000000..3af5a855 --- /dev/null +++ b/flint/rules/builtin/expect_identical.yml @@ -0,0 +1,42 @@ +id: expect_identical-1 +language: r +severity: warning +rule: + pattern: expect_true(identical($VAL1, $VAL2)) +fix: expect_identical(~~VAL1~~, ~~VAL2~~) +message: Use expect_identical(x, y) instead of expect_true(identical(x, y)). + +--- + +id: expect_identical-2 +language: r +severity: warning +rule: + pattern: expect_equal($VAL1, $VAL2) +fix: expect_identical(~~VAL1~~, ~~VAL2~~) +constraints: + VAL1: + all: + - not: + has: + stopBy: end + kind: float + regex: \. + - not: + regex: ^typeof + - not: + pattern: NULL + VAL2: + all: + - not: + has: + stopBy: end + kind: float + regex: \. + - not: + regex: ^typeof + - not: + pattern: NULL +message: | + Use expect_identical(x, y) by default; resort to expect_equal() only when + needed, e.g. when setting ignore_attr= or tolerance=. diff --git a/flint/rules/builtin/expect_length.yml b/flint/rules/builtin/expect_length.yml new file mode 100644 index 00000000..cd5f2db0 --- /dev/null +++ b/flint/rules/builtin/expect_length.yml @@ -0,0 +1,12 @@ +id: expect_length-1 +language: r +severity: warning +rule: + any: + - pattern: $FUN(length($OBJ), $VALUE) + - pattern: $FUN($VALUE, length($OBJ)) +constraints: + FUN: + regex: ^(expect_identical|expect_equal)$ +fix: expect_length(~~OBJ~~, ~~VALUE~~) +message: expect_length(x, n) is better than ~~FUN~~(length(x), n). diff --git a/flint/rules/builtin/expect_named.yml b/flint/rules/builtin/expect_named.yml new file mode 100644 index 00000000..bf88f366 --- /dev/null +++ b/flint/rules/builtin/expect_named.yml @@ -0,0 +1,79 @@ +id: expect_named-1 +language: r +severity: warning +rule: + any: + - pattern: + context: expect_identical(names($OBJ), $VALUES) + strictness: ast + - pattern: + context: expect_identical($VALUES, names($OBJ)) + strictness: ast +constraints: + VALUES: + not: + regex: colnames|rownames|dimnames|NULL + has: + kind: null +fix: expect_named(~~OBJ~~, ~~VALUES~~) +message: expect_named(x, n) is better than expect_identical(names(x), n). + +--- + +id: expect_named-2 +language: r +severity: warning +rule: + any: + - pattern: + context: expect_equal(names($OBJ), $VALUES) + strictness: ast + - pattern: + context: expect_equal($VALUES, names($OBJ)) + strictness: ast +constraints: + VALUES: + not: + regex: colnames|rownames|dimnames|NULL +fix: expect_named(~~OBJ~~, ~~VALUES~~) +message: expect_named(x, n) is better than expect_equal(names(x), n). + +--- + +id: expect_named-3 +language: r +severity: warning +rule: + any: + - pattern: + context: testthat::expect_identical(names($OBJ), $VALUES) + strictness: ast + - pattern: + context: testthat::expect_identical($VALUES, names($OBJ)) + strictness: ast +constraints: + VALUES: + not: + regex: colnames|rownames|dimnames|NULL +fix: testthat::expect_named(~~OBJ~~, ~~VALUES~~) +message: expect_named(x, n) is better than expect_identical(names(x), n). + +--- + +id: expect_named-4 +language: r +severity: warning +rule: + any: + - pattern: + context: testthat::expect_equal(names($OBJ), $VALUES) + strictness: ast + - pattern: + context: testthat::expect_equal($VALUES, names($OBJ)) + strictness: ast +constraints: + VALUES: + not: + regex: colnames|rownames|dimnames|NULL +fix: testthat::expect_named(~~OBJ~~, ~~VALUES~~) +message: expect_named(x, n) is better than expect_equal(names(x), n). diff --git a/flint/rules/builtin/expect_not.yml b/flint/rules/builtin/expect_not.yml new file mode 100644 index 00000000..3a23e9f2 --- /dev/null +++ b/flint/rules/builtin/expect_not.yml @@ -0,0 +1,23 @@ +id: expect_not-1 +language: r +severity: warning +rule: + all: + - pattern: expect_true(!$COND) + - not: + regex: '^expect_true\(!!' +fix: expect_false(~~COND~~) +message: expect_false(x) is better than expect_true(!x), and vice versa. + +--- + +id: expect_not-2 +language: r +severity: warning +rule: + all: + - pattern: expect_false(!$COND) + - not: + regex: '^expect_false\(!!' +fix: expect_true(~~COND~~) +message: expect_false(x) is better than expect_true(!x), and vice versa. diff --git a/flint/rules/builtin/expect_null.yml b/flint/rules/builtin/expect_null.yml new file mode 100644 index 00000000..7888fdb0 --- /dev/null +++ b/flint/rules/builtin/expect_null.yml @@ -0,0 +1,22 @@ +id: expect_null-1 +language: r +severity: warning +rule: + any: + - pattern: $FUN(NULL, $VALUES) + - pattern: $FUN($VALUES, NULL) +constraints: + FUN: + regex: ^(expect_identical|expect_equal)$ +fix: expect_null(~~VALUES~~) +message: expect_null(x) is better than ~~FUN~~(x, NULL). + +--- + +id: expect_null-2 +language: r +severity: warning +rule: + pattern: expect_true(is.null($VALUES)) +fix: expect_null(~~VALUES~~) +message: expect_null(x) is better than expect_true(is.null(x)). diff --git a/flint/rules/builtin/expect_true_false.yml b/flint/rules/builtin/expect_true_false.yml new file mode 100644 index 00000000..784843d8 --- /dev/null +++ b/flint/rules/builtin/expect_true_false.yml @@ -0,0 +1,28 @@ +id: expect_true_false-1 +language: r +severity: warning +rule: + any: + - pattern: $FUN(TRUE, $VALUES) + - pattern: $FUN($VALUES, TRUE) +constraints: + FUN: + regex: ^(expect_identical|expect_equal)$ +fix: expect_true(~~VALUES~~) +message: expect_true(x) is better than ~~FUN~~(x, TRUE). + +--- + +id: expect_true_false-2 +language: r +severity: warning +rule: + any: + - pattern: $FUN(FALSE, $VALUES) + - pattern: $FUN($VALUES, FALSE) +constraints: + FUN: + regex: ^(expect_identical|expect_equal)$ +fix: expect_false(~~VALUES~~) +message: expect_false(x) is better than ~~FUN~~(x, FALSE). + diff --git a/flint/rules/builtin/expect_type.yml b/flint/rules/builtin/expect_type.yml new file mode 100644 index 00000000..1ab20eca --- /dev/null +++ b/flint/rules/builtin/expect_type.yml @@ -0,0 +1,51 @@ +id: expect_type-1 +language: r +severity: warning +rule: + any: + - pattern: + context: expect_identical(typeof($OBJ), $VALUES) + strictness: ast + - pattern: + context: expect_identical($VALUES, typeof($OBJ)) + strictness: ast +constraints: + VALUES: + not: + regex: typeof +fix: expect_type(~~OBJ~~, ~~VALUES~~) +message: expect_type(x, t) is better than expect_identical(typeof(x), t). + +--- + +id: expect_type-2 +language: r +severity: warning +rule: + any: + - pattern: + context: expect_equal(typeof($OBJ), $VALUES) + strictness: ast + - pattern: + context: expect_equal($VALUES, typeof($OBJ)) + strictness: ast +constraints: + VALUES: + not: + regex: typeof +fix: expect_type(~~OBJ~~, ~~VALUES~~) +message: expect_type(x, t) is better than expect_equal(typeof(x), t). + +--- + +id: expect_type-3 +language: r +severity: warning +rule: + pattern: expect_true($FUN($OBJ)) +constraints: + FUN: + regex: ^is\. + not: + regex: data\.frame$ +message: expect_type(x, t) is better than expect_true(is.(x)). diff --git a/flint/rules/builtin/for_loop_index.yml b/flint/rules/builtin/for_loop_index.yml new file mode 100644 index 00000000..e5b28b43 --- /dev/null +++ b/flint/rules/builtin/for_loop_index.yml @@ -0,0 +1,27 @@ +id: for_loop_index-1 +language: r +severity: warning +rule: + pattern: for ($IDX in $IDX) +message: Don't re-use any sequence symbols as the index symbol in a for loop. + +--- + +id: for_loop_index-2 +language: r +severity: warning +rule: + pattern: for ($IDX in $SEQ) +constraints: + SEQ: + kind: call + has: + kind: arguments + has: + kind: argument + stopBy: end + has: + kind: identifier + field: value + pattern: $IDX +message: Don't re-use any sequence symbols as the index symbol in a for loop. diff --git a/flint/rules/builtin/function_return.yml b/flint/rules/builtin/function_return.yml new file mode 100644 index 00000000..31ba46be --- /dev/null +++ b/flint/rules/builtin/function_return.yml @@ -0,0 +1,11 @@ +id: function_return-1 +language: r +severity: warning +rule: + any: + - pattern: return($OBJ <- $VAL) + - pattern: return($OBJ <<- $VAL) + - pattern: return($VAL -> $OBJ) + - pattern: return($VAL ->> $OBJ) +message: | + Move the assignment outside of the return() clause, or skip assignment altogether. diff --git a/flint/rules/builtin/implicit_assignment.yml b/flint/rules/builtin/implicit_assignment.yml new file mode 100644 index 00000000..133a40ef --- /dev/null +++ b/flint/rules/builtin/implicit_assignment.yml @@ -0,0 +1,69 @@ +id: implicit_assignment-1 +language: r +severity: warning +rule: + any: + - pattern: $RECEIVER <- $VALUE + - pattern: $RECEIVER <<- $VALUE + - pattern: $VALUE -> $RECEIVER + - pattern: $VALUE ->> $RECEIVER + inside: + any: + - kind: if_statement + - kind: while_statement + field: condition + stopBy: end + strictness: cst +message: | + Avoid implicit assignments in function calls. For example, instead of + `if (x <- 1L) { ... }`, write `x <- 1L; if (x) { ... }`. + +--- + +id: implicit_assignment-2 +language: r +severity: warning +rule: + any: + - pattern: $RECEIVER <- $VALUE + - pattern: $RECEIVER <<- $VALUE + - pattern: $VALUE -> $RECEIVER + - pattern: $VALUE ->> $RECEIVER + inside: + kind: for_statement + field: sequence + stopBy: end + strictness: cst +message: | + Avoid implicit assignments in function calls. For example, instead of + `if (x <- 1L) { ... }`, write `x <- 1L; if (x) { ... }`. + +# --- +# +# id: implicit_assignment-3 +# language: r +# severity: warning +# rule: +# any: +# - pattern: $RECEIVER <- $VALUE +# - pattern: $RECEIVER <<- $VALUE +# - pattern: $VALUE -> $RECEIVER +# - pattern: $VALUE ->> $RECEIVER +# inside: +# kind: argument +# field: value +# strictness: cst +# stopBy: end +# not: +# inside: +# kind: call +# field: function +# has: +# kind: identifier +# regex: ^(lapply)$ +# stopBy: end +# strictness: cst +# message: | +# Avoid implicit assignments in function calls. For example, instead of +# `if (x <- 1L) { ... }`, write `x <- 1L; if (x) { ... }`. + diff --git a/flint/rules/builtin/is_numeric.yml b/flint/rules/builtin/is_numeric.yml new file mode 100644 index 00000000..0a76b083 --- /dev/null +++ b/flint/rules/builtin/is_numeric.yml @@ -0,0 +1,25 @@ +id: is_numeric-1 +language: r +severity: warning +rule: + any: + - pattern: is.numeric($VAR) || is.integer($VAR) + - pattern: is.integer($VAR) || is.numeric($VAR) +message: is.numeric(x) || is.integer(x) can be simplified to is.numeric(x). Use + is.double(x) to test for objects stored as 64-bit floating point. + +--- + +id: is_numeric-2 +language: r +severity: warning +rule: + any: + - pattern: + context: class($VAR) %in% c("numeric", "integer") + strictness: ast + - pattern: + context: class($VAR) %in% c("integer", "numeric") + strictness: ast +message: class(x) %in% c("numeric", "integer") can be simplified to is.numeric(x). Use + is.double(x) to test for objects stored as 64-bit floating point. diff --git a/flint/rules/builtin/length_levels.yml b/flint/rules/builtin/length_levels.yml new file mode 100644 index 00000000..8e029780 --- /dev/null +++ b/flint/rules/builtin/length_levels.yml @@ -0,0 +1,7 @@ +id: length_levels-1 +language: r +severity: warning +rule: + pattern: length(levels($VAR)) +fix: nlevels(~~VAR~~) +message: nlevels(x) is better than length(levels(x)). df diff --git a/flint/rules/builtin/length_test.yml b/flint/rules/builtin/length_test.yml new file mode 100644 index 00000000..f9ba9ed7 --- /dev/null +++ b/flint/rules/builtin/length_test.yml @@ -0,0 +1,59 @@ +# Strangely, having something like pattern: length($VAR $OP $VAR2) doesn't work + +id: length_test-1 +language: r +severity: warning +rule: + pattern: length($VAR == $VAR2) +fix: length(~~VAR~~) == ~~VAR2~~ +message: Checking the length of a logical vector is likely a mistake. + +--- + +id: length_test-2 +language: r +severity: warning +rule: + pattern: length($VAR != $VAR2) +fix: length(~~VAR~~) != ~~VAR2~~ +message: Checking the length of a logical vector is likely a mistake. + +--- + +id: length_test-3 +language: r +severity: warning +rule: + pattern: length($VAR > $VAR2) +fix: length(~~VAR~~) > ~~VAR2~~ +message: Checking the length of a logical vector is likely a mistake. + +--- + +id: length_test-4 +language: r +severity: warning +rule: + pattern: length($VAR >= $VAR2) +fix: length(~~VAR~~) >= ~~VAR2~~ +message: Checking the length of a logical vector is likely a mistake. + +--- + +id: length_test-5 +language: r +severity: warning +rule: + pattern: length($VAR < $VAR2) +fix: length(~~VAR~~) < ~~VAR2~~ +message: Checking the length of a logical vector is likely a mistake. + +--- + +id: length_test-6 +language: r +severity: warning +rule: + pattern: length($VAR <= $VAR2) +fix: length(~~VAR~~) <= ~~VAR2~~ +message: Checking the length of a logical vector is likely a mistake. diff --git a/flint/rules/builtin/lengths.yml b/flint/rules/builtin/lengths.yml new file mode 100644 index 00000000..a4164402 --- /dev/null +++ b/flint/rules/builtin/lengths.yml @@ -0,0 +1,59 @@ +id: sapply_lengths-1 +language: r +severity: warning +rule: + any: + - pattern: sapply($MYVAR, length) + - pattern: sapply(FUN = length, $MYVAR) + - pattern: sapply($MYVAR, FUN = length) + - pattern: vapply($MYVAR, length $$$) + + - pattern: map_dbl($MYVAR, length) + - pattern: map_dbl($MYVAR, .f = length) + - pattern: map_dbl(.f = length, $MYVAR) + - pattern: map_int($MYVAR, length) + - pattern: map_int($MYVAR, .f = length) + - pattern: map_int(.f = length, $MYVAR) + + - pattern: purrr::map_dbl($MYVAR, length) + - pattern: purrr::map_dbl($MYVAR, .f = length) + - pattern: purrr::map_dbl(.f = length, $MYVAR) + - pattern: purrr::map_int($MYVAR, length) + - pattern: purrr::map_int($MYVAR, .f = length) + - pattern: purrr::map_int(.f = length, $MYVAR) +fix: lengths(~~MYVAR~~) +message: Use lengths() to find the length of each element in a list. + +--- + +id: sapply_lengths-2 +language: r +severity: warning +rule: + any: + - pattern: $MYVAR |> sapply(length) + - pattern: $MYVAR |> sapply(FUN = length) + - pattern: $MYVAR |> vapply(length $$$) + - pattern: $MYVAR |> map_int(length) + - pattern: $MYVAR |> map_int(length $$$) + - pattern: $MYVAR |> purrr::map_int(length) + - pattern: $MYVAR |> purrr::map_int(length $$$) +fix: ~~MYVAR~~ |> lengths() +message: Use lengths() to find the length of each element in a list. + +--- + +id: sapply_lengths-3 +language: r +severity: warning +rule: + any: + - pattern: $MYVAR %>% sapply(length) + - pattern: $MYVAR %>% sapply(FUN = length) + - pattern: $MYVAR %>% vapply(length $$$) + - pattern: $MYVAR %>% map_int(length) + - pattern: $MYVAR %>% map_int(length $$$) + - pattern: $MYVAR %>% purrr::map_int(length) + - pattern: $MYVAR %>% purrr::map_int(length $$$) +fix: ~~MYVAR~~ %>% lengths() +message: Use lengths() to find the length of each element in a list. diff --git a/flint/rules/builtin/library_call.yml b/flint/rules/builtin/library_call.yml new file mode 100644 index 00000000..ef14d540 --- /dev/null +++ b/flint/rules/builtin/library_call.yml @@ -0,0 +1,26 @@ +id: library_call +language: r +severity: warning +rule: + kind: call + has: + regex: ^library|require$ + kind: identifier + follows: + not: + any: + - kind: call + has: + regex: ^library|require$ + kind: identifier + - kind: comment + not: + inside: + any: + - kind: function_definition + - kind: call + has: + pattern: suppressPackageStartupMessages + kind: identifier +message: Move all library/require calls to the top of the script. + diff --git a/flint/rules/builtin/literal_coercion.yml b/flint/rules/builtin/literal_coercion.yml new file mode 100644 index 00000000..e61fb24a --- /dev/null +++ b/flint/rules/builtin/literal_coercion.yml @@ -0,0 +1,89 @@ +id: literal_coercion-1 +language: r +severity: warning +rule: + pattern: $FUN($VALUE) +constraints: + VALUE: + kind: argument + has: + kind: float + not: + regex: 'e' + FUN: + regex: ^(int|as\.integer)$ +fix: ~~VALUE~~L +message: | + Use ~~VALUE~~L instead of ~~FUN~~(~~VALUE~~), i.e., use literals directly + where possible, instead of coercion. + +--- + +id: literal_coercion-2 +language: r +severity: warning +rule: + pattern: as.character(NA) +fix: NA_character_ +message: | + Use NA_character_ instead of as.character(NA), i.e., use literals directly + where possible, instead of coercion. + +--- + +id: literal_coercion-3 +language: r +severity: warning +rule: + pattern: as.logical($VAR) +constraints: + VAR: + kind: argument + has: + any: + - regex: ^1L$ + - regex: ^1$ + - regex: 'true' +fix: TRUE +message: Use TRUE instead of as.logical(~~VAR~~). + +--- + +id: literal_coercion-4 +language: r +severity: warning +rule: + pattern: $FUN($VAR) +constraints: + VAR: + kind: argument + has: + kind: float + FUN: + regex: ^(as\.numeric|as\.double)$ +fix: ~~VAR~~ +message: Use ~~VAR~~ instead of ~~FUN~~(~~VAR~~). + +--- + +id: literal_coercion-5 +language: r +severity: warning +rule: + pattern: as.integer(NA) +fix: NA_integer_ +message: Use NA_integer_ instead of as.integer(NA). + +--- + +id: literal_coercion-6 +language: r +severity: warning +rule: + pattern: $FUN(NA) +constraints: + FUN: + regex: ^(as\.numeric|as\.double)$ +fix: NA_real_ +message: Use NA_real_ instead of ~~FUN~~(NA). + diff --git a/flint/rules/builtin/matrix_apply.yml b/flint/rules/builtin/matrix_apply.yml new file mode 100644 index 00000000..bbc68505 --- /dev/null +++ b/flint/rules/builtin/matrix_apply.yml @@ -0,0 +1,110 @@ +id: matrix_apply-1 +language: r +severity: warning +rule: + any: + - pattern: apply($INPUT, 2, sum) + - pattern: apply($INPUT, MARGIN = 2, sum) + - pattern: apply($INPUT, 2, FUN = sum) + - pattern: apply($INPUT, MARGIN = 2, FUN = sum) +fix: colSums(~~INPUT~~) +message: Use colSums(x) rather than apply(x, 1, sum) + +--- + +id: matrix_apply-2 +language: r +severity: warning +rule: + any: + - pattern: apply($INPUT, 2, sum, na.rm = $NARM) + - pattern: apply($INPUT, MARGIN = 2, sum, na.rm = $NARM) + - pattern: apply($INPUT, 2, FUN = sum, na.rm = $NARM) + - pattern: apply($INPUT, MARGIN = 2, FUN = sum, na.rm = $NARM) +fix: colSums(~~INPUT~~, na.rm = ~~NARM~~) +message: Use colSums(x, na.rm = ~~NARM~~) rather than apply(x, 2, sum, na.rm = ~~NARM~~). + +--- + +id: matrix_apply-3 +language: r +severity: warning +rule: + any: + - pattern: apply($INPUT, 1, sum) + - pattern: apply($INPUT, MARGIN = 1, sum) + - pattern: apply($INPUT, 1, FUN = sum) + - pattern: apply($INPUT, MARGIN = 1, FUN = sum) +fix: rowSums(~~INPUT~~) +message: Use rowSums(x) rather than apply(x, 1, sum) + +--- + +id: matrix_apply-4 +language: r +severity: warning +rule: + any: + - pattern: apply($INPUT, 1, sum, na.rm = $NARM) + - pattern: apply($INPUT, MARGIN = 1, sum, na.rm = $NARM) + - pattern: apply($INPUT, 1, FUN = sum, na.rm = $NARM) + - pattern: apply($INPUT, MARGIN = 1, FUN = sum, na.rm = $NARM) +fix: rowSums(~~INPUT~~, na.rm = ~~NARM~~) +message: Use rowSums(x, na.rm = ~~NARM~~) rather than apply(x, 1, sum, na.rm = ~~NARM~~). + +--- + +id: matrix_apply-5 +language: r +severity: warning +rule: + any: + - pattern: apply($INPUT, 1, mean) + - pattern: apply($INPUT, MARGIN = 1, mean) + - pattern: apply($INPUT, 1, FUN = mean) + - pattern: apply($INPUT, MARGIN = 1, FUN = mean) +fix: rowMeans(~~INPUT~~) +message: Use rowMeans(x) rather than apply(x, 1, mean). + +--- + +id: matrix_apply-6 +language: r +severity: warning +rule: + any: + - pattern: apply($INPUT, 1, mean, na.rm = $NARM) + - pattern: apply($INPUT, MARGIN = 1, mean, na.rm = $NARM) + - pattern: apply($INPUT, 1, FUN = mean, na.rm = $NARM) + - pattern: apply($INPUT, MARGIN = 1, FUN = mean, na.rm = $NARM) +fix: rowMeans(~~INPUT~~, na.rm = ~~NARM~~) +message: Use rowMeans(x, na.rm = ~~NARM~~) rather than apply(x, 1, mean, na.rm = ~~NARM~~). + +--- + +id: matrix_apply-7 +language: r +severity: warning +rule: + any: + - pattern: apply($INPUT, 2, mean) + - pattern: apply($INPUT, MARGIN = 2, mean) + - pattern: apply($INPUT, 2, FUN = mean) + - pattern: apply($INPUT, MARGIN = 2, FUN = mean) +fix: colMeans(~~INPUT~~) +message: Use colMeans(x) rather than apply(x, 2, mean). + +--- + +id: matrix_apply-8 +language: r +severity: warning +rule: + any: + - pattern: apply($INPUT, 2, mean, na.rm = $NARM) + - pattern: apply($INPUT, MARGIN = 2, mean, na.rm = $NARM) + - pattern: apply($INPUT, 2, FUN = mean, na.rm = $NARM) + - pattern: apply($INPUT, MARGIN = 2, FUN = mean, na.rm = $NARM) +fix: colMeans(~~INPUT~~, na.rm = ~~NARM~~) +message: Use colMeans(x, na.rm = ~~NARM~~) rather than apply(x, 2, mean, na.rm = ~~NARM~~). + diff --git a/flint/rules/builtin/missing_argument.yml b/flint/rules/builtin/missing_argument.yml new file mode 100644 index 00000000..9f47d17a --- /dev/null +++ b/flint/rules/builtin/missing_argument.yml @@ -0,0 +1,44 @@ +id: missing_argument-1 +language: r +severity: warning +rule: + kind: arguments + has: + kind: comma + any: + - precedes: + stopBy: neighbor + any: + - regex: '^\)$' + - kind: comma + - follows: + any: + - regex: '^\($' + - kind: argument + regex: '=$' + follows: + kind: identifier + not: + regex: '^(quote|switch|alist)$' + inside: + kind: call +message: Missing argument in function call. + +--- + +id: missing_argument-2 +language: r +severity: warning +rule: + kind: arguments + regex: '=(\s+|)\)$' + follows: + any: + - kind: identifier + - kind: extract_operator + - kind: namespace_operator + not: + regex: '^(quote|switch|alist)$' + inside: + kind: call +message: Missing argument in function call. diff --git a/flint/rules/builtin/nested_ifelse.yml b/flint/rules/builtin/nested_ifelse.yml new file mode 100644 index 00000000..64dcb08e --- /dev/null +++ b/flint/rules/builtin/nested_ifelse.yml @@ -0,0 +1,29 @@ +id: nested_ifelse-1 +language: r +severity: warning +rule: + pattern: $FUN($COND, $TRUE, $FALSE) +constraints: + FALSE: + regex: ^(ifelse|if_else|fifelse) + FUN: + regex: ^(ifelse|if_else|fifelse) +message: | + Don't use nested ~~FUN~~() calls; instead, try (1) data.table::fcase; + (2) dplyr::case_when; or (3) using a lookup table. + +--- + +id: nested_ifelse-2 +language: r +severity: warning +rule: + pattern: $FUN($COND, $TRUE, $FALSE) +constraints: + TRUE: + regex: ^(ifelse|if_else|fifelse) + FUN: + regex: ^(ifelse|if_else|fifelse) +message: | + Don't use nested ~~FUN~~() calls; instead, try (1) data.table::fcase; + (2) dplyr::case_when; or (3) using a lookup table. diff --git a/flint/rules/builtin/numeric_leading_zero.yml b/flint/rules/builtin/numeric_leading_zero.yml new file mode 100644 index 00000000..1d6762dc --- /dev/null +++ b/flint/rules/builtin/numeric_leading_zero.yml @@ -0,0 +1,11 @@ +id: numeric_leading_zero-1 +language: r +severity: warning +rule: + pattern: $VALUE + any: + - kind: float + - kind: identifier + regex: ^\.[0-9] +fix: 0~~VALUE~~ +message: Include the leading zero for fractional numeric constants. diff --git a/flint/rules/builtin/outer_negation.yml b/flint/rules/builtin/outer_negation.yml new file mode 100644 index 00000000..ca377de3 --- /dev/null +++ b/flint/rules/builtin/outer_negation.yml @@ -0,0 +1,29 @@ +id: outer_negation-1 +language: r +severity: warning +rule: + pattern: all(!$VAR) +constraints: + VAR: + not: + regex: '^!' +fix: '!any(~~VAR~~)' +message: | + !any(x) is better than all(!x). The former applies negation only once after + aggregation instead of many times for each element of x. + +--- + +id: outer_negation-2 +language: r +severity: warning +rule: + pattern: any(! $VAR) +constraints: + VAR: + not: + regex: '^!' +fix: '!all(~~VAR~~)' +message: | + !all(x) is better than any(!x). The former applies negation only once after + aggregation instead of many times for each element of x. diff --git a/flint/rules/builtin/package_hooks.yml b/flint/rules/builtin/package_hooks.yml new file mode 100644 index 00000000..f4dfa756 --- /dev/null +++ b/flint/rules/builtin/package_hooks.yml @@ -0,0 +1,127 @@ +id: package_hooks-1 +language: r +severity: warning +rule: + pattern: packageStartupMessage($$$) + inside: + stopBy: end + kind: binary_operator + has: + stopBy: end + field: lhs + pattern: .onLoad +message: Put packageStartupMessage() calls in .onAttach(), not .onLoad(). + +--- + +id: package_hooks-2 +language: r +severity: warning +rule: + pattern: library.dynam($$$) + inside: + stopBy: end + kind: binary_operator + has: + stopBy: end + field: lhs + pattern: .onAttach +message: Put library.dynam() calls in .onLoad(), not .onAttach(). + +--- + +id: package_hooks-3 +language: r +severity: warning +rule: + pattern: $FN($$$) + inside: + stopBy: end + kind: binary_operator + has: + stopBy: end + field: lhs + pattern: .onLoad +constraints: + FN: + regex: '^(cat|installed.packages|message|packageStartupMessage|print|writeLines)$' +message: Don't use ~~FN~~() in .onLoad(). + +--- + +id: package_hooks-4 +language: r +severity: warning +rule: + pattern: $FN($$$) + inside: + stopBy: end + kind: binary_operator + has: + stopBy: end + field: lhs + pattern: .onAttach +constraints: + FN: + # library.dynam already has its own linter + regex: '^(cat|installed.packages|message|print|writeLines)$' +message: Don't use ~~FN~~() in .onAttach(). + +--- + +id: package_hooks-5 +language: r +severity: warning +rule: + pattern: $FN($$$) + inside: + stopBy: end + kind: binary_operator + has: + stopBy: end + field: lhs + pattern: $LOAD +constraints: + LOAD: + regex: '^(\.onAttach|\.onLoad)$' + FN: + regex: '^(require|library)$' +message: Don't alter the search() path in ~~LOAD~~() by calling ~~FN~~(). + +--- + +id: package_hooks-6 +language: r +severity: warning +rule: + pattern: installed.packages($$$) + inside: + stopBy: end + kind: binary_operator + has: + stopBy: end + field: lhs + pattern: $LOAD +constraints: + LOAD: + regex: '^(\.onAttach|\.onLoad)$' +message: Don't slow down package load by running installed.packages() in ~~LOAD~~(). + +--- + +id: package_hooks-7 +language: r +severity: warning +rule: + pattern: library.dynam.unload($$$) + inside: + stopBy: end + kind: binary_operator + has: + stopBy: end + field: lhs + pattern: $LOAD +constraints: + LOAD: + regex: '^(\.onDetach|\.Last\.lib)$' +message: Use library.dynam.unload() calls in .onUnload(), not ~~LOAD~~(). diff --git a/flint/rules/builtin/paste.yml b/flint/rules/builtin/paste.yml new file mode 100644 index 00000000..800b676b --- /dev/null +++ b/flint/rules/builtin/paste.yml @@ -0,0 +1,75 @@ +id: paste-1 +language: r +severity: warning +rule: + pattern: + context: paste($$$CONTENT sep = "" $$$CONTENT2) + strictness: ast +# fix: paste0($$$CONTENT) +message: paste0(...) is better than paste(..., sep = ""). + +--- + +id: paste-2 +language: r +severity: warning +rule: + any: + - pattern: + context: paste($CONTENT, collapse = ", ") + strictness: ast + - pattern: + context: paste(collapse = ", ", $CONTENT) + strictness: ast +# fix: paste0($$$CONTENT) +message: toString(.) is more expressive than paste(., collapse = ", "). + +--- + +id: paste-3 +language: r +severity: warning +rule: + pattern: + context: paste0($$$CONTENT sep = $USELESS $$$CONTENT2) + strictness: ast +# fix: paste0($$$CONTENT) +message: | + sep= is not a formal argument to paste0(); did you mean to use paste(), or + collapse=? + +--- + +id: paste-4 +language: r +severity: warning +rule: + any: + - pattern: + context: paste0($CONTENT, collapse = $FOO) + strictness: ast + - pattern: + context: paste0(collapse = $FOO, $CONTENT) + strictness: ast + not: + has: + regex: sep + kind: argument +# fix: paste0($$$CONTENT) +message: | + Use paste(), not paste0(), to collapse a character vector when sep= is not used. + +# --- +# +# id: paste-5 +# language: r +# severity: warning +# rule: +# pattern: +# context: paste0(rep($VAR, $TIMES), collapse = "") +# strictness: ast +# constraints: +# VAR: +# kind: string +# fix: strrep(~~VAR~~, ~~TIMES~~) +# message: strrep(x, times) is better than paste0(rep(x, times), collapse = ""). diff --git a/flint/rules/builtin/redundant_equals.yml b/flint/rules/builtin/redundant_equals.yml new file mode 100644 index 00000000..79a6abcf --- /dev/null +++ b/flint/rules/builtin/redundant_equals.yml @@ -0,0 +1,29 @@ +id: redundant_equals-1 +language: r +severity: warning +rule: + any: + - pattern: $VAR == TRUE + - pattern: TRUE == $VAR + - pattern: $VAR == FALSE + - pattern: FALSE == $VAR +message: | + Using == on a logical vector is redundant. Well-named logical vectors can be + used directly in filtering. For data.table's `i` argument, wrap the column + name in (), like `DT[(is_treatment)]`. + +--- + +id: redundant_equals-2 +language: r +severity: warning +rule: + any: + - pattern: $VAR != TRUE + - pattern: TRUE != $VAR + - pattern: $VAR != FALSE + - pattern: FALSE != $VAR +message: | + Using != on a logical vector is redundant. Well-named logical vectors can be + used directly in filtering. For data.table's `i` argument, wrap the column + name in (), like `DT[(is_treatment)]`. diff --git a/flint/rules/builtin/redundant_ifelse.yml b/flint/rules/builtin/redundant_ifelse.yml new file mode 100644 index 00000000..8f252e6e --- /dev/null +++ b/flint/rules/builtin/redundant_ifelse.yml @@ -0,0 +1,67 @@ +id: redundant_ifelse-1 +language: r +severity: warning +rule: + pattern: $FUN($COND, $VAL1, $VAL2) +constraints: + VAL1: + regex: ^TRUE$ + VAL2: + regex: ^FALSE$ + FUN: + regex: ^(ifelse|fifelse|if_else)$ +fix: ~~COND~~ +message: | + Use ~~COND~~ directly instead of calling ~~FUN~~(~~COND~~, TRUE, FALSE). + +--- + +id: redundant_ifelse-2 +language: r +severity: warning +rule: + pattern: $FUN($COND, $VAL1, $VAL2) +constraints: + VAL1: + regex: ^FALSE$ + VAL2: + regex: ^TRUE$ + FUN: + regex: ^(ifelse|fifelse|if_else)$ +fix: '!(~~COND~~)' +message: | + Use !(~~COND~~) directly instead of calling ~~FUN~~(~~COND~~, FALSE, TRUE). + +--- + +id: redundant_ifelse-3 +language: r +severity: warning +rule: + pattern: $FUN($COND, $VAL1, $VAL2) +constraints: + VAL1: + regex: ^(1|1L)$ + VAL2: + regex: ^(0|0L)$ + FUN: + regex: ^(ifelse|fifelse|if_else)$ +fix: as.integer(~~COND~~) +message: Prefer as.integer(~~COND~~) to ~~FUN~~(~~COND~~, ~~VAL1~~, ~~VAL2~~). + +--- + +id: redundant_ifelse-4 +language: r +severity: warning +rule: + pattern: $FUN($COND, $VAL1, $VAL2) +constraints: + VAL1: + regex: ^(0|0L)$ + VAL2: + regex: ^(1|1L)$ + FUN: + regex: ^(ifelse|fifelse|if_else)$ +fix: as.integer(!(~~COND~~)) +message: Prefer as.integer(!(~~COND~~)) to ~~FUN~~(~~COND~~, ~~VAL1~~, ~~VAL2~~). diff --git a/flint/rules/builtin/rep_len.yml b/flint/rules/builtin/rep_len.yml new file mode 100644 index 00000000..d4d78e5d --- /dev/null +++ b/flint/rules/builtin/rep_len.yml @@ -0,0 +1,7 @@ +id: rep_len-1 +language: r +severity: warning +rule: + pattern: rep($OBJ, length.out = $LEN) +fix: rep_len(~~OBJ~~, ~~LEN~~) +message: Use rep_len(x, n) instead of rep(x, length.out = n). diff --git a/flint/rules/builtin/right_assignment.yml b/flint/rules/builtin/right_assignment.yml new file mode 100644 index 00000000..76b736e9 --- /dev/null +++ b/flint/rules/builtin/right_assignment.yml @@ -0,0 +1,10 @@ +id: right_assignment +language: r +severity: hint +rule: + pattern: $RHS -> $LHS + has: + field: rhs + kind: identifier +fix: ~~LHS~~<- ~~RHS~~ +message: Use <-, not ->, for assignment. diff --git a/flint/rules/builtin/sample_int.yml b/flint/rules/builtin/sample_int.yml new file mode 100644 index 00000000..091825c2 --- /dev/null +++ b/flint/rules/builtin/sample_int.yml @@ -0,0 +1,43 @@ +id: sample_int-1 +language: r +severity: warning +rule: + any: + - pattern: sample(1:$N, $$$OTHER) + - pattern: sample(1L:$N, $$$OTHER) +fix: sample.int(~~N~~, ~~OTHER~~) +message: sample.int(n, m, ...) is preferable to sample(1:n, m, ...). + +--- + +id: sample_int-2 +language: r +severity: warning +rule: + pattern: sample(seq($N), $$$OTHER) +fix: sample.int(~~N~~, ~~OTHER~~) +message: sample.int(n, m, ...) is preferable to sample(seq(n), m, ...). + +--- + +id: sample_int-3 +language: r +severity: warning +rule: + pattern: sample(seq_len($N), $$$OTHER) +fix: sample.int(~~N~~, ~~OTHER~~) +message: sample.int(n, m, ...) is preferable to sample(seq_len(n), m, ...). + +--- + +# Strangely this panicks if I rename FIRST to N +id: sample_int-4 +language: r +severity: warning +rule: + pattern: sample($FIRST, $$$OTHER) +constraints: + FIRST: + regex: ^\d+(L|)$ +fix: sample.int(~~N~~, ~~OTHER~~) +message: sample.int(n, m, ...) is preferable to sample(n, m, ...). diff --git a/flint/rules/builtin/semicolon.yml b/flint/rules/builtin/semicolon.yml new file mode 100644 index 00000000..fb5dd75f --- /dev/null +++ b/flint/rules/builtin/semicolon.yml @@ -0,0 +1,10 @@ +id: semicolon-1 +language: r +severity: warning +rule: + regex: ;\s+$ + not: + inside: + kind: string + stopBy: end +message: Trailing semicolons are not needed. diff --git a/flint/rules/builtin/seq.yml b/flint/rules/builtin/seq.yml new file mode 100644 index 00000000..c199c5d8 --- /dev/null +++ b/flint/rules/builtin/seq.yml @@ -0,0 +1,121 @@ +id: seq-1 +language: r +severity: warning +rule: + pattern: seq(length($VAR)) +fix: seq_along(~~VAR~~) +message: | + seq(length(...)) is likely to be wrong in the empty edge case. Use seq_along(...) instead. + +--- + +id: seq-2 +language: r +severity: warning +rule: + any: + - pattern: 1:nrow($VAR) + - pattern: 1L:nrow($VAR) + regex: ^1 +fix: seq_len(nrow(~~VAR~~)) +message: | + 1:nrow(...) is likely to be wrong in the empty edge case. Use seq_len(nrow(...)) instead. + +--- + +id: seq-3 +language: r +severity: warning +rule: + any: + - pattern: 1:n() + - pattern: 1L:n() + regex: ^1 +fix: seq_len(n()) +message: | + 1:n() is likely to be wrong in the empty edge case. Use seq_len(n()) instead. + +--- + +id: seq-4 +language: r +severity: warning +rule: + pattern: seq(nrow($VAR)) +fix: seq_len(nrow(~~VAR~~)) +message: | + seq(nrow(...)) is likely to be wrong in the empty edge case. Use seq_len(nrow(...)) instead. + +--- + +id: seq-5 +language: r +severity: warning +rule: + any: + - pattern: 1:length($VAR) + - pattern: 1L:length($VAR) + regex: ^1 +fix: seq_along(~~VAR~~) +message: | + 1:length(...) is likely to be wrong in the empty edge case. Use seq_along(...) instead. + +--- + +id: seq-6 +language: r +severity: warning +rule: + any: + - pattern: 1:ncol($VAR) + - pattern: 1L:ncol($VAR) + regex: ^1 +fix: seq_len(ncol(~~VAR~~)) +message: | + 1:ncol(...) is likely to be wrong in the empty edge case. Use seq_len(ncol(...)) instead. + +--- + +id: seq-7 +language: r +severity: warning +rule: + any: + - pattern: 1:NCOL($VAR) + - pattern: 1L:NCOL($VAR) + regex: ^1 +fix: seq_len(NCOL(~~VAR~~)) +message: | + 1:NCOL(...) is likely to be wrong in the empty edge case. Use seq_len(NCOL(...)) instead. + +--- + +id: seq-8 +language: r +severity: warning +rule: + any: + - pattern: 1:NROW($VAR) + - pattern: 1L:NROW($VAR) + regex: ^1 +fix: seq_len(NROW(~~VAR~~)) +message: | + 1:NROW(...) is likely to be wrong in the empty edge case. Use seq_len(NROW(...)) instead. + + +--- + +id: seq-9 +language: r +severity: warning +rule: + pattern: seq(1, $VAL) + not: + pattern: seq(1, 0) +constraints: + VAL: + regex: ^\d+(|L)$ +fix: seq_len(~~VAL~~) +message: seq_len(~~VAL~~) is more efficient than seq(1, ~~VAL~~). + + diff --git a/flint/rules/builtin/sort.yml b/flint/rules/builtin/sort.yml new file mode 100644 index 00000000..930f5c6d --- /dev/null +++ b/flint/rules/builtin/sort.yml @@ -0,0 +1,85 @@ +id: sort-1 +language: r +severity: warning +rule: + pattern: $OBJ[order($OBJ)] +fix: sort(~~OBJ~~, na.last = TRUE) +message: sort(~~OBJ~~, na.last = TRUE) is better than ~~OBJ~~[order(~~OBJ~~)]. + +--- + +id: sort-2 +language: r +severity: warning +rule: + any: + - pattern: $OBJ[order($OBJ, decreasing = $DECREASING)] + - pattern: $OBJ[order(decreasing = $DECREASING, $OBJ)] +constraints: + DECREASING: + regex: ^(TRUE|FALSE)$ +fix: sort(~~OBJ~~, decreasing = ~~DECREASING~~, na.last = TRUE) +message: | + sort(~~OBJ~~, decreasing = ~~DECREASING~~, na.last = TRUE) is better than + ~~OBJ~~[order(~~OBJ~~, decreasing = ~~DECREASING~~)]. + +--- + +id: sort-3 +language: r +severity: warning +rule: + any: + - pattern: $OBJ[order($OBJ, na.last = $NALAST)] + - pattern: $OBJ[order(na.last = $NALAST, $OBJ)] +constraints: + NALAST: + regex: ^(TRUE|FALSE)$ +fix: sort(~~OBJ~~, na.last = ~~NALAST~~, na.last = TRUE) +message: | + sort(~~OBJ~~, na.last = ~~NALAST~~, na.last = TRUE) is better than + ~~OBJ~~[order(~~OBJ~~, na.last = ~~NALAST~~)]. + +--- + +id: sort-4 +language: r +severity: warning +rule: + any: + - pattern: $OBJ[order($OBJ, decreasing = TRUE, na.last = FALSE)] + - pattern: $OBJ[order($OBJ, na.last = FALSE, decreasing = TRUE)] + - pattern: $OBJ[order(decreasing = TRUE, $OBJ, na.last = FALSE)] + - pattern: $OBJ[order(decreasing = TRUE, na.last = FALSE, $OBJ)] + - pattern: $OBJ[order(na.last = FALSE, decreasing = TRUE, $OBJ)] + - pattern: $OBJ[order(na.last = FALSE, $OBJ, decreasing = TRUE)] +fix: sort(~~OBJ~~, decreasing = TRUE, na.last = FALSE) +message: | + sort(~~OBJ~~, decreasing = TRUE, na.last = FALSE) is better than + ~~OBJ~~[order(~~OBJ~~, na.last = FALSE, decreasing = TRUE)]. + +--- + +id: sort-5 +language: r +severity: warning +rule: + any: + - pattern: sort($OBJ) == $OBJ + - pattern: $OBJ == sort($OBJ) +fix: !is.unsorted(~~OBJ~~) +message: | + Use !is.unsorted(~~OBJ~~) to test the sortedness of a vector. + +--- + +id: sort-6 +language: r +severity: warning +rule: + any: + - pattern: sort($OBJ) != $OBJ + - pattern: $OBJ != sort($OBJ) +fix: is.unsorted(~~OBJ~~) +message: | + Use is.unsorted(~~OBJ~~) to test the unsortedness of a vector. diff --git a/flint/rules/builtin/todo_comment.yml b/flint/rules/builtin/todo_comment.yml new file mode 100644 index 00000000..83d86edf --- /dev/null +++ b/flint/rules/builtin/todo_comment.yml @@ -0,0 +1,7 @@ +id: todo_comment-1 +language: r +severity: warning +rule: + kind: comment + regex: '(?i)#(|\s+)\b(todo|fixme)\b' +message: Remove TODO comments. diff --git a/flint/rules/builtin/undesirable_function.yml b/flint/rules/builtin/undesirable_function.yml new file mode 100644 index 00000000..c9b17562 --- /dev/null +++ b/flint/rules/builtin/undesirable_function.yml @@ -0,0 +1,13 @@ +id: undesirable_function-1 +language: r +severity: warning +rule: + pattern: $FUN + kind: identifier + not: + inside: + kind: argument +constraints: + FUN: + regex: ^(\.libPaths|attach|browser|debug|debugcall|debugonce|detach|par|setwd|structure|Sys\.setenv|Sys\.setlocale|trace|undebug|untrace)$ +message: Function "~~FUN~~()" is undesirable. diff --git a/flint/rules/builtin/undesirable_operator.yml b/flint/rules/builtin/undesirable_operator.yml new file mode 100644 index 00000000..7d635137 --- /dev/null +++ b/flint/rules/builtin/undesirable_operator.yml @@ -0,0 +1,29 @@ +id: undesirable_operator-1 +language: r +severity: warning +rule: + any: + - pattern: $X <<- $Y + - pattern: $X ->> $Y +message: | + Avoid undesirable operators `<<-` and `->>`. They assign outside the current + environment in a way that can be hard to reason about. Prefer fully-encapsulated + functions wherever possible, or, if necessary, assign to a specific environment + with assign(). Recall that you can create an environment at the desired scope + with new.env(). + +--- + +id: undesirable_operator-2 +language: r +severity: warning +rule: + kind: namespace_operator + has: + pattern: ':::' +message: | + Operator `:::` is undesirable. It accesses non-exported functions inside + packages. Code relying on these is likely to break in future versions of the + package because the functions are not part of the public interface and may be + changed or removed by the maintainers without notice. Use public functions + via :: instead. diff --git a/flint/rules/builtin/unnecessary_nesting.yml b/flint/rules/builtin/unnecessary_nesting.yml new file mode 100644 index 00000000..b56b4670 --- /dev/null +++ b/flint/rules/builtin/unnecessary_nesting.yml @@ -0,0 +1,36 @@ +id: unnecessary_nesting-1 +language: r +severity: warning +rule: + kind: if_statement + any: + - has: + kind: 'braced_expression' + field: consequence + has: + kind: if_statement + stopBy: neighbor + not: + has: + kind: 'braced_expression' + field: alternative + stopBy: end + not: + any: + - has: + nthChild: 2 + - precedes: + regex: "^else$" + - has: + kind: if_statement + field: consequence + stopBy: neighbor + # Can be in if(), but not else if() + not: + inside: + field: alternative + kind: if_statement +message: | + Don't use nested `if` statements, where a single `if` with the combined + conditional expression will do. For example, instead of `if (x) { if (y) { ... }}`, + use `if (x && y) { ... }`. diff --git a/flint/rules/builtin/unreachable_code.yml b/flint/rules/builtin/unreachable_code.yml new file mode 100644 index 00000000..56eb4974 --- /dev/null +++ b/flint/rules/builtin/unreachable_code.yml @@ -0,0 +1,64 @@ +id: unreachable_code-1 +language: r +severity: warning +rule: + regex: '[^}]+' + not: + regex: 'else' + follows: + any: + - pattern: return($$$A) + - pattern: stop($$$A) + not: + precedes: + regex: 'else' + stopBy: end +message: Code and comments coming after a return() or stop() should be removed. + +--- + +id: unreachable_code-2 +language: r +severity: warning +rule: + regex: '[^}]+' + not: + regex: 'else' + follows: + any: + - pattern: next + - pattern: break + stopBy: end +message: Remove code and comments coming after `next` or `break` + +--- + +id: unreachable_code-3 +language: r +severity: warning +rule: + inside: + any: + - kind: if_statement + pattern: if (FALSE) + - kind: while_statement + pattern: while (FALSE) + stopBy: end +message: Remove code inside a conditional loop with a deterministically false condition. + +--- + +id: unreachable_code-4 +language: r +severity: warning +rule: + inside: + any: + - kind: if_statement + pattern: if (TRUE) + - kind: while_statement + pattern: while (TRUE) + stopBy: end +message: | + One branch has a a deterministically true condition. The other branches can + be removed. diff --git a/flint/rules/builtin/which_grepl.yml b/flint/rules/builtin/which_grepl.yml new file mode 100644 index 00000000..81e30f21 --- /dev/null +++ b/flint/rules/builtin/which_grepl.yml @@ -0,0 +1,7 @@ +id: which_grepl-1 +language: r +severity: warning +rule: + pattern: which(grepl($$$ARGS)) +fix: grep(~~ARGS~~) +message: grep(pattern, x) is better than which(grepl(pattern, x)).