diff --git a/compiler+runtime/bin/build-clang b/compiler+runtime/bin/build-clang index 8224956e..0d9a3d31 100755 --- a/compiler+runtime/bin/build-clang +++ b/compiler+runtime/bin/build-clang @@ -54,7 +54,7 @@ function build() cd "${srcdir}/llvm-build" cmake -DCMAKE_BUILD_TYPE=Release \ - -DLLVM_ENABLE_RUNTIMES=all \ + -DLLVM_ENABLE_RUNTIMES=all \ -DCMAKE_CXX_STANDARD=20 \ -DLLVM_ENABLE_PROJECTS="clang;clang-tools-extra" \ -DLLVM_TARGETS_TO_BUILD="host" \ diff --git a/compiler+runtime/include/cpp/jank/read/lex.hpp b/compiler+runtime/include/cpp/jank/read/lex.hpp index d4678bab..61b243b8 100644 --- a/compiler+runtime/include/cpp/jank/read/lex.hpp +++ b/compiler+runtime/include/cpp/jank/read/lex.hpp @@ -42,6 +42,8 @@ namespace jank::read::lex integer, /* Has double data. */ real, + /* Has two integer data. */ + ratio, /* Has string data. */ string, /* Has string data. */ @@ -49,6 +51,14 @@ namespace jank::read::lex eof, }; + struct ratio + { + native_integer numerator{}; + native_integer denominator{}; + native_bool operator==(ratio const &rhs) const; + native_bool operator!=(ratio const &rhs) const; + }; + struct token { token() = default; @@ -65,6 +75,7 @@ namespace jank::read::lex token(size_t const p, size_t const s, token_kind const k, native_persistent_string_view const); token(size_t const p, size_t const s, token_kind const k, char const * const); token(size_t const p, size_t const s, token_kind const k, native_bool const); + token(size_t const p, size_t const s, token_kind const k, ratio const); native_bool operator==(token const &rhs) const; native_bool operator!=(token const &rhs) const; @@ -81,12 +92,18 @@ namespace jank::read::lex size_t pos{ ignore_pos }; size_t size{ 1 }; token_kind kind{ token_kind::eof }; - boost::variant + boost::variant data; }; std::ostream &operator<<(std::ostream &os, token const &t); std::ostream &operator<<(std::ostream &os, token::no_data const &t); + std::ostream &operator<<(std::ostream &os, ratio const &t); } namespace jank::read @@ -142,6 +159,8 @@ namespace jank::read::lex size_t pos{}; /* Whether or not the previous token requires a space after it. */ native_bool require_space{}; + /* True when seeing a '/' following a number. */ + native_bool found_slash_after_number{}; native_persistent_string_view file; }; } diff --git a/compiler+runtime/include/cpp/jank/read/parse.hpp b/compiler+runtime/include/cpp/jank/read/parse.hpp index 0c526575..7e1db7aa 100644 --- a/compiler+runtime/include/cpp/jank/read/parse.hpp +++ b/compiler+runtime/include/cpp/jank/read/parse.hpp @@ -107,6 +107,7 @@ namespace jank::read::parse object_result parse_boolean(); object_result parse_keyword(); object_result parse_integer(); + object_result parse_ratio(); object_result parse_real(); object_result parse_string(); object_result parse_escaped_string(); diff --git a/compiler+runtime/include/cpp/jank/runtime/behavior/comparable.hpp b/compiler+runtime/include/cpp/jank/runtime/behavior/comparable.hpp index 7254ecda..659796dc 100644 --- a/compiler+runtime/include/cpp/jank/runtime/behavior/comparable.hpp +++ b/compiler+runtime/include/cpp/jank/runtime/behavior/comparable.hpp @@ -4,7 +4,7 @@ namespace jank::runtime::behavior { template concept comparable = requires(T * const t) { - /* Returns how this object compares to the specified object. Comparison, unlike equality, + /* Returns how this object compares to the specified object. Comparison, unlike equality, * can only be done for objects of the same type. If there's a type mismatch, this function * is expected to throw. There are three cases to handle: * @@ -14,6 +14,6 @@ namespace jank::runtime::behavior * * For sequences, all values need to be considered for comparison. */ - { t->compare(std::declval()) } -> std::convertible_to; + { t->compare(std::declval()) } -> std::convertible_to; }; } diff --git a/compiler+runtime/src/cpp/jank/analyze/processor.cpp b/compiler+runtime/src/cpp/jank/analyze/processor.cpp index 339b6233..c8972694 100644 --- a/compiler+runtime/src/cpp/jank/analyze/processor.cpp +++ b/compiler+runtime/src/cpp/jank/analyze/processor.cpp @@ -837,8 +837,7 @@ namespace jank::analyze } auto const condition(o->data.rest().first().unwrap()); - auto condition_expr( - analyze(condition, current_frame, expression_type::nested, fn_ctx, false)); + auto condition_expr(analyze(condition, current_frame, expression_type::nested, fn_ctx, false)); if(condition_expr.is_err()) { return condition_expr.expect_err_move(); @@ -1402,8 +1401,7 @@ namespace jank::analyze return found_special->second(o, current_frame, expr_type, fn_ctx, needs_box); } - auto sym_result( - analyze_symbol(sym, current_frame, expression_type::nested, fn_ctx, true)); + auto sym_result(analyze_symbol(sym, current_frame, expression_type::nested, fn_ctx, true)); if(sym_result.is_err()) { return sym_result; @@ -1466,8 +1464,7 @@ namespace jank::analyze } else { - auto callable_expr( - analyze(first, current_frame, expression_type::nested, fn_ctx, needs_box)); + auto callable_expr(analyze(first, current_frame, expression_type::nested, fn_ctx, needs_box)); if(callable_expr.is_err()) { return callable_expr; diff --git a/compiler+runtime/src/cpp/jank/read/lex.cpp b/compiler+runtime/src/cpp/jank/read/lex.cpp index 0d328922..557a820c 100644 --- a/compiler+runtime/src/cpp/jank/read/lex.cpp +++ b/compiler+runtime/src/cpp/jank/read/lex.cpp @@ -154,6 +154,24 @@ namespace jank::read { } + token::token(size_t const p, size_t const s, token_kind const k, ratio const d) + : pos{ p } + , size{ s } + , kind{ k } + , data{ d } + { + } + + native_bool ratio::operator==(ratio const &rhs) const + { + return numerator == rhs.numerator && denominator == rhs.denominator; + } + + native_bool ratio::operator!=(ratio const &rhs) const + { + return !(*this == rhs); + } + native_bool token::no_data::operator==(no_data const &) const { return true; @@ -186,6 +204,11 @@ namespace jank::read return os << ""; } + std::ostream &operator<<(std::ostream &os, ratio const &r) + { + return os << r.numerator << "/" << r.denominator; + } + processor::processor(native_persistent_string_view const &f) : file{ f } { @@ -366,6 +389,9 @@ namespace jank::read } native_bool contains_leading_digit{ file[token_start] != '-' }; native_bool contains_dot{}; + native_bool is_scientific{}; + native_bool found_exponent_sign{}; + native_bool expecting_exponent{}; while(true) { auto const oc(peek()); @@ -377,30 +403,93 @@ namespace jank::read auto const c(oc.unwrap()); if(c == '.') { - if(contains_dot || !contains_leading_digit) + if(contains_dot || is_scientific || !contains_leading_digit) { ++pos; return err(error{ token_start, pos, "invalid number" }); } contains_dot = true; } + else if(c == 'e' || c == 'E') + { + if(is_scientific || !contains_leading_digit) + { + ++pos; + return err(error{ token_start, pos, "invalid number" }); + } + is_scientific = true; + expecting_exponent = true; + } + else if(c == '+' || c == '-') + { + if(found_exponent_sign || !is_scientific || !expecting_exponent) + { + ++pos; + return err(error{ token_start, pos, "invalid number" }); + } + found_exponent_sign = true; + } + else if(c == '/') + { + require_space = false; + ++pos; + if(found_exponent_sign || is_scientific || expecting_exponent || contains_dot + || found_slash_after_number) + { + return err(error{ token_start, pos, "invalid ratio" }); + } + found_slash_after_number = true; + /* skip the '/' char and look for the denominator number. */ + ++pos; + auto const denominator(next()); + if(denominator.is_ok() && denominator.expect_ok().kind == token_kind::integer) + { + auto const &denominator_token(denominator.expect_ok()); + found_slash_after_number = false; + return ok( + token(token_start, + pos - token_start, + token_kind::ratio, + { .numerator = std::strtoll(file.data() + token_start, nullptr, 10), + .denominator = boost::get(denominator_token.data) })); + } + return err( + error{ token_start, pos, "invalid ratio: expecting an integer denominator" }); + } else if(std::isdigit(c) == 0) { + if(expecting_exponent) + { + ++pos; + return err( + error{ token_start, pos, "unexpected end of real, expecting exponent" }); + } break; } + else if(expecting_exponent) + { + expecting_exponent = false; + } contains_leading_digit = true; ++pos; } + if(expecting_exponent) + { + ++pos; + return err(error{ token_start, pos, "unexpected end of real, expecting exponent" }); + } + /* Tokens beginning with - are ambiguous; it's only a negative number if it has numbers - * to follow. */ + * to follow. + * TODO: handle numbers starting with `+` */ if(file[token_start] != '-' || (pos - token_start) >= 1) { require_space = true; ++pos; - if(contains_dot) + if(contains_dot || is_scientific) { return ok(token{ token_start, pos - token_start, diff --git a/compiler+runtime/src/cpp/jank/read/parse.cpp b/compiler+runtime/src/cpp/jank/read/parse.cpp index 98ebe086..396bd234 100644 --- a/compiler+runtime/src/cpp/jank/read/parse.cpp +++ b/compiler+runtime/src/cpp/jank/read/parse.cpp @@ -182,6 +182,8 @@ namespace jank::read::parse return parse_integer(); case lex::token_kind::real: return parse_real(); + case lex::token_kind::ratio: + return parse_ratio(); case lex::token_kind::string: return parse_string(); case lex::token_kind::escaped_string: @@ -1166,6 +1168,21 @@ namespace jank::read::parse token }; } + processor::object_result processor::parse_ratio() + { + auto const token(token_current->expect_ok()); + ++token_current; + auto const &ratio_data(boost::get(token.data)); + if(ratio_data.denominator == 0) + { + return err(error{ token.pos, "Divide by zero" }); + } + return object_source_info{ make_box(static_cast(ratio_data.numerator) + / ratio_data.denominator), + token, + token }; + } + processor::object_result processor::parse_real() { auto const token(token_current->expect_ok()); diff --git a/compiler+runtime/test/cpp/jank/read/lex.cpp b/compiler+runtime/test/cpp/jank/read/lex.cpp index ba861c6e..31ef2a4a 100644 --- a/compiler+runtime/test/cpp/jank/read/lex.cpp +++ b/compiler+runtime/test/cpp/jank/read/lex.cpp @@ -374,7 +374,76 @@ namespace jank::read::lex })); } } - + TEST_CASE("Ratio") + { + SUBCASE("Success - x/x") + { + processor p{ "4/5" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_tokens({ + { 0, 3, token_kind::ratio, { .numerator = 4, .denominator = 5 } } + })); + } + SUBCASE("Success - -x/x") + { + processor p{ "-4/5" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_tokens({ + { 0, 4, token_kind::ratio, { .numerator = -4, .denominator = 5 } } + })); + } + SUBCASE("Success - -x/-x") + { + processor p{ "-4/-5" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_tokens({ + { 0, 5, token_kind::ratio, { .numerator = -4, .denominator = -5 } } + })); + } + SUBCASE("Failures - x//x") + { + processor p{ "4//5" }; + native_vector> tokens(p.begin(), p.end()); + CHECK( + tokens + == make_results({ { error(0, 4, "invalid ratio: expecting an integer denominator") } })); + } + SUBCASE("Failures - x/x/x") + { + processor p{ "4/5/4" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_results({ { error(0, 3, "invalid ratio: expecting an integer denominator") }, + { error(3, 3, "invalid symbol") } })); + } + SUBCASE("Failures - x/x/x/x") + { + processor p{ "4/5/4/5/6/7/7" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_results({ { error(0, 3, "invalid ratio: expecting an integer denominator") }, + { error(3, 3, "invalid symbol") } })); + } + SUBCASE("Failures - x.x/x") + { + processor p{ "4.4/5" }; + native_vector> tokens(p.begin(), p.end()); + CHECK( + tokens + == make_results({ { error(0, 3, "invalid ratio") }, { error(3, 3, "invalid symbol") } })); + } + SUBCASE("Failures - x/x.x") + { + processor p{ "4/5.9" }; + native_vector> tokens(p.begin(), p.end()); + CHECK( + tokens + == make_results({ { error(0, 5, "invalid ratio: expecting an integer denominator") } })); + } + } TEST_CASE("Integer") { SUBCASE("Positive single-char") @@ -580,6 +649,87 @@ namespace jank::read::lex })); } } + + SUBCASE("Scientific notation") + { + SUBCASE("Valid") + { + processor p{ "1e3 -1e2 2.E-3 22.3e-8 -12E+18\\a" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_results({ + token{ 0, 3, token_kind::real, 1000.0l }, + token{ 4, 4, token_kind::real, -100.0l }, + token{ 9, 5, token_kind::real, 0.002l }, + token{ 15, 7, token_kind::real, 2.23e-07l }, + token{ 23, 7, token_kind::real, -1.2e+19l }, + token{ 30, 2, token_kind::character, "\\a"sv }, + })); + } + + SUBCASE("Missing exponent") + { + processor p{ "1e 23E-1 12e- -0.2e" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_results({ + error{ 0, 2, "unexpected end of real, expecting exponent" }, + token{ 3, 5, token_kind::real, 2.3l }, + error{ 9, 13, "unexpected end of real, expecting exponent" }, + error{ 14, 19, "unexpected end of real, expecting exponent" }, + })); + } + + SUBCASE("Signs after exponent found") + { + processor p{ "12.3 -1e3- 2.3E+" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_results({ + token{ 0, 4, token_kind::real, 12.3l }, + error{ 5, 9, "invalid number" }, + error{ 9, "expected whitespace before next token" }, + token{ 9, token_kind::symbol, "-"sv }, + error{ 11, 16, "unexpected end of real, expecting exponent" }, + })); + } + + SUBCASE("Extra dots") + { + processor p{ "1e3. 12.3 -1e4.3" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_results({ + error{ 0, 3, "invalid number" }, + error{ 3, "unexpected character: ." }, + token{ 5, 4, token_kind::real, 12.3l }, + error{ 10, 14, "invalid number" }, + error{ 14, "unexpected character: ." }, + error{ 15, "expected whitespace before next token" }, + token{ 15, token_kind::integer, 3ll }, + })); + } + + SUBCASE("Extra characters in exponent") + { + processor p{ "2.ee4 -1e4E3 1.eFoo 3E5fOo" }; + native_vector> tokens(p.begin(), p.end()); + CHECK(tokens + == make_results({ + error{ 0, 3, "invalid number" }, + token{ 3, 2, token_kind::symbol, "e4"sv }, + error{ 6, 10, "invalid number" }, + error{ 10, "expected whitespace before next token" }, + token{ 10, 2, token_kind::symbol, "E3"sv }, + error{ 13, 16, "unexpected end of real, expecting exponent" }, + error{ 16, "expected whitespace before next token" }, + token{ 16, 3, token_kind::symbol, "Foo"sv }, + token{ 20, 3, token_kind::real, 300000.0l }, + error{ 23, "expected whitespace before next token" }, + token{ 23, 3, token_kind::symbol, "fOo"sv }, + })); + } + } } TEST_CASE("Character") diff --git a/compiler+runtime/test/cpp/jank/read/parse.cpp b/compiler+runtime/test/cpp/jank/read/parse.cpp index a472d36d..be378bf9 100644 --- a/compiler+runtime/test/cpp/jank/read/parse.cpp +++ b/compiler+runtime/test/cpp/jank/read/parse.cpp @@ -63,6 +63,32 @@ namespace jank::read::parse CHECK(r.expect_ok().unwrap().end == r.expect_ok().unwrap().start); } + TEST_CASE("Ratio") + { + SUBCASE("Single Ratio") + { + lex::processor lp{ "4/5" }; + processor p{ lp.begin(), lp.end() }; + auto const r(p.next()); + CHECK(equal(r.expect_ok().unwrap().ptr, make_box(0.8))); + CHECK(r.expect_ok().unwrap().start + == lex::token{ + 0, + 3, + lex::token_kind::ratio, + { .numerator = 4, .denominator = 5 } + }); + CHECK(r.expect_ok().unwrap().end == r.expect_ok().unwrap().start); + } + SUBCASE("Division by zero") + { + lex::processor lp{ "1/0" }; + processor p{ lp.begin(), lp.end() }; + auto const r(p.next()); + CHECK(r.is_err()); + } + } + TEST_CASE("Comments") { lex::processor lp{ ";meow \n1234 ; bar\n;\n\n" };