Skip to content

Commit

Permalink
feat(real): lex scientific notation
Browse files Browse the repository at this point in the history
  • Loading branch information
Samy-33 committed Oct 12, 2024
1 parent f76e142 commit f63a90a
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 4 deletions.
47 changes: 43 additions & 4 deletions compiler+runtime/src/cpp/jank/read/lex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ namespace jank::read
}

native_persistent_string_view const data{ file.data() + token_start + 1,
++pos - token_start - 1};
++pos - token_start - 1 };

if(data.size() == 1 || data == "newline" || data == "backspace" || data == "space"
|| data == "formfeed" || data == "return" || data == "tab")
Expand Down Expand Up @@ -376,6 +376,9 @@ namespace jank::read
}
native_bool contains_leading_digit{ file[token_start] != '-' };
native_bool contains_dot{};
native_bool is_scientific{};
native_bool found_exponent_sign{};
native_bool expecting_exponent{};
while(true)
{
auto const oc(peek());
Expand All @@ -387,30 +390,66 @@ namespace jank::read
auto const c(oc.unwrap());
if(c == '.')
{
if(contains_dot || !contains_leading_digit)
if(contains_dot || is_scientific || !contains_leading_digit)
{
++pos;
return err(error{ token_start, pos, "invalid number" });
}
contains_dot = true;
}
else if(c == 'e' || c == 'E')
{
if(is_scientific || !contains_leading_digit)
{
++pos;
return err(error{ token_start, pos, "invalid number" });
}
is_scientific = true;
expecting_exponent = true;
}
else if(c == '+' || c == '-')
{
if(found_exponent_sign || !is_scientific || !expecting_exponent)
{
++pos;
return err(error{ token_start, pos, "invalid number" });
}
found_exponent_sign = true;
}
else if(std::isdigit(c) == 0)
{
if(expecting_exponent)
{
++pos;
return err(
error{ token_start, pos, "unexpected end of real, expecting exponent" });
}
break;
}
else if(expecting_exponent)
{
expecting_exponent = false;
}

contains_leading_digit = true;

++pos;
}

if(expecting_exponent)
{
++pos;
return err(error{ token_start, pos, "unexpected end of real, expecting exponent" });
}

/* Tokens beginning with - are ambiguous; it's only a negative number if it has numbers
* to follow. */
* to follow.
* TODO: handle numbers starting with `+` */
if(file[token_start] != '-' || (pos - token_start) >= 1)
{
require_space = true;
++pos;
if(contains_dot)
if(contains_dot || is_scientific)
{
return ok(token{ token_start,
pos - token_start,
Expand Down
60 changes: 60 additions & 0 deletions compiler+runtime/test/cpp/jank/read/lex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,66 @@ namespace jank::read::lex
}));
}
}

SUBCASE("Scientific notation")
{
SUBCASE("Valid")
{
processor p{ "1e3 -1e2 2.E-3 22.3e-8 -12E+18" };
native_vector<result<token, error>> tokens(p.begin(), p.end());
CHECK(tokens
== make_results({
token{ 0, 3, token_kind::real, 1000.0l },
token{ 4, 4, token_kind::real, -100.0l },
token{ 9, 5, token_kind::real, 0.002l },
token{ 15, 7, token_kind::real, 2.23e-07l },
token{ 23, 7, token_kind::real, -1.2e+19l },
}));
}

SUBCASE("Missing exponent")
{
processor p{ "1e 23E-1 12e- -0.2e" };
native_vector<result<token, error>> tokens(p.begin(), p.end());
CHECK(tokens
== make_results({
error{ 0, 2, "unexpected end of real, expecting exponent" },
token{ 3, 5, token_kind::real, 2.3l },
error{ 9, 13, "unexpected end of real, expecting exponent" },
error{ 14, 19, "unexpected end of real, expecting exponent" },
}));
}

SUBCASE("Signs after exponent found")
{
processor p{ "12.3 -1e3- 2.3E+" };
native_vector<result<token, error>> tokens(p.begin(), p.end());
CHECK(tokens
== make_results({
token{ 0, 4, token_kind::real, 12.3l },
error{ 5, 9, "invalid number" },
error{ 9, "expected whitespace before next token" },
token{ 9, token_kind::symbol, "-"sv },
error{ 11, 16, "unexpected end of real, expecting exponent" },
}));
}

SUBCASE("Extra dots")
{
processor p{ "1e3. 12.3 -1e4.3" };
native_vector<result<token, error>> tokens(p.begin(), p.end());
CHECK(tokens
== make_results({
error{ 0, 3, "invalid number" },
error{ 3, "unexpected character: ." },
token{ 5, 4, token_kind::real, 12.3l },
error{ 10, 14, "invalid number" },
error{ 14, "unexpected character: ." },
error{ 15, "expected whitespace before next token" },
token{ 15, token_kind::integer, 3ll },
}));
}
}
}

TEST_CASE("Character")
Expand Down

0 comments on commit f63a90a

Please sign in to comment.