From f63a90ae8bf6bef66cf5393733e152972d0bceab Mon Sep 17 00:00:00 2001 From: Saket Patel Date: Sun, 13 Oct 2024 01:05:27 +0530 Subject: [PATCH] feat(real): lex scientific notation --- compiler+runtime/src/cpp/jank/read/lex.cpp | 47 ++++++++++++++-- compiler+runtime/test/cpp/jank/read/lex.cpp | 60 +++++++++++++++++++++ 2 files changed, 103 insertions(+), 4 deletions(-) diff --git a/compiler+runtime/src/cpp/jank/read/lex.cpp b/compiler+runtime/src/cpp/jank/read/lex.cpp index d922a7fa..c92b4884 100644 --- a/compiler+runtime/src/cpp/jank/read/lex.cpp +++ b/compiler+runtime/src/cpp/jank/read/lex.cpp @@ -311,7 +311,7 @@ namespace jank::read } native_persistent_string_view const data{ file.data() + token_start + 1, - ++pos - token_start - 1}; + ++pos - token_start - 1 }; if(data.size() == 1 || data == "newline" || data == "backspace" || data == "space" || data == "formfeed" || data == "return" || data == "tab") @@ -376,6 +376,9 @@ namespace jank::read } native_bool contains_leading_digit{ file[token_start] != '-' }; native_bool contains_dot{}; + native_bool is_scientific{}; + native_bool found_exponent_sign{}; + native_bool expecting_exponent{}; while(true) { auto const oc(peek()); @@ -387,30 +390,66 @@ namespace jank::read auto const c(oc.unwrap()); if(c == '.') { - if(contains_dot || !contains_leading_digit) + if(contains_dot || is_scientific || !contains_leading_digit) { ++pos; return err(error{ token_start, pos, "invalid number" }); } contains_dot = true; } + else if(c == 'e' || c == 'E') + { + if(is_scientific || !contains_leading_digit) + { + ++pos; + return err(error{ token_start, pos, "invalid number" }); + } + is_scientific = true; + expecting_exponent = true; + } + else if(c == '+' || c == '-') + { + if(found_exponent_sign || !is_scientific || !expecting_exponent) + { + ++pos; + return err(error{ token_start, pos, "invalid number" }); + } + found_exponent_sign = true; + } else if(std::isdigit(c) == 0) { + if(expecting_exponent) + { + ++pos; + return err( + error{ token_start, pos, "unexpected end of real, expecting exponent" }); + } break; } + else if(expecting_exponent) + { + expecting_exponent = false; + } contains_leading_digit = true; ++pos; } + if(expecting_exponent) + { + ++pos; + return err(error{ token_start, pos, "unexpected end of real, expecting exponent" }); + } + /* Tokens beginning with - are ambiguous; it's only a negative number if it has numbers - * to follow. */ + * to follow. + * TODO: handle numbers starting with `+` */ if(file[token_start] != '-' || (pos - token_start) >= 1) { require_space = true; ++pos; - if(contains_dot) + if(contains_dot || is_scientific) { return ok(token{ token_start, pos - token_start, diff --git a/compiler+runtime/test/cpp/jank/read/lex.cpp b/compiler+runtime/test/cpp/jank/read/lex.cpp index d0544f27..c20c417c 100644 --- a/compiler+runtime/test/cpp/jank/read/lex.cpp +++ b/compiler+runtime/test/cpp/jank/read/lex.cpp @@ -580,6 +580,66 @@ namespace jank::read::lex })); } } + + SUBCASE("Scientific notation") + { + SUBCASE("Valid") + { + processor p{ "1e3 -1e2 2.E-3 22.3e-8 -12E+18" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_results({ + token{ 0, 3, token_kind::real, 1000.0l }, + token{ 4, 4, token_kind::real, -100.0l }, + token{ 9, 5, token_kind::real, 0.002l }, + token{ 15, 7, token_kind::real, 2.23e-07l }, + token{ 23, 7, token_kind::real, -1.2e+19l }, + })); + } + + SUBCASE("Missing exponent") + { + processor p{ "1e 23E-1 12e- -0.2e" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_results({ + error{ 0, 2, "unexpected end of real, expecting exponent" }, + token{ 3, 5, token_kind::real, 2.3l }, + error{ 9, 13, "unexpected end of real, expecting exponent" }, + error{ 14, 19, "unexpected end of real, expecting exponent" }, + })); + } + + SUBCASE("Signs after exponent found") + { + processor p{ "12.3 -1e3- 2.3E+" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_results({ + token{ 0, 4, token_kind::real, 12.3l }, + error{ 5, 9, "invalid number" }, + error{ 9, "expected whitespace before next token" }, + token{ 9, token_kind::symbol, "-"sv }, + error{ 11, 16, "unexpected end of real, expecting exponent" }, + })); + } + + SUBCASE("Extra dots") + { + processor p{ "1e3. 12.3 -1e4.3" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_results({ + error{ 0, 3, "invalid number" }, + error{ 3, "unexpected character: ." }, + token{ 5, 4, token_kind::real, 12.3l }, + error{ 10, 14, "invalid number" }, + error{ 14, "unexpected character: ." }, + error{ 15, "expected whitespace before next token" }, + token{ 15, token_kind::integer, 3ll }, + })); + } + } } TEST_CASE("Character")