diff --git a/compiler+runtime/include/cpp/jank/read/parse.hpp b/compiler+runtime/include/cpp/jank/read/parse.hpp index 0a2809e84..0c526575a 100644 --- a/compiler+runtime/include/cpp/jank/read/parse.hpp +++ b/compiler+runtime/include/cpp/jank/read/parse.hpp @@ -12,6 +12,40 @@ namespace jank::runtime /* TODO: Rename file to processor. */ namespace jank::read::parse { + static option get_char_from_literal(native_persistent_string const &sv) + { + if(sv.size() == 2) + { + return sv[1]; + } + else if(sv == R"(\newline)") + { + return '\n'; + } + else if(sv == R"(\space)") + { + return ' '; + } + else if(sv == R"(\tab)") + { + return '\t'; + } + else if(sv == R"(\backspace)") + { + return '\b'; + } + else if(sv == R"(\formfeed)") + { + return '\f'; + } + else if(sv == R"(\return)") + { + return '\r'; + } + + return none; + } + struct processor { struct object_source_info diff --git a/compiler+runtime/include/cpp/jank/runtime/obj/character.hpp b/compiler+runtime/include/cpp/jank/runtime/obj/character.hpp index 618933a0f..bf91444c2 100644 --- a/compiler+runtime/include/cpp/jank/runtime/obj/character.hpp +++ b/compiler+runtime/include/cpp/jank/runtime/obj/character.hpp @@ -18,8 +18,8 @@ namespace jank::runtime /* behavior::object_like */ native_bool equal(object const &) const; native_persistent_string to_string() const; - native_persistent_string to_code_string() const; void to_string(fmt::memory_buffer &buff) const; + native_persistent_string to_code_string() const; native_hash to_hash() const; object base{ object_type::character }; diff --git a/compiler+runtime/src/cpp/jank/read/lex.cpp b/compiler+runtime/src/cpp/jank/read/lex.cpp index d922a7fa3..eb61f2670 100644 --- a/compiler+runtime/src/cpp/jank/read/lex.cpp +++ b/compiler+runtime/src/cpp/jank/read/lex.cpp @@ -310,20 +310,10 @@ namespace jank::read pos++; } - native_persistent_string_view const data{ file.data() + token_start + 1, - ++pos - token_start - 1}; - - if(data.size() == 1 || data == "newline" || data == "backspace" || data == "space" - || data == "formfeed" || data == "return" || data == "tab") - { - return ok(token{ token_start, pos - token_start, token_kind::character, data }); - } + native_persistent_string_view const data{ file.data() + token_start, + ++pos - token_start }; - return err(error{ token_start, - pos - token_start, - fmt::format("Invalid character literal `\\{}` \nNote: Jank " - "doesn't support unicode characters yet!", - data) }); + return ok(token{ token_start, pos - token_start, token_kind::character, data }); } case ';': { diff --git a/compiler+runtime/src/cpp/jank/read/parse.cpp b/compiler+runtime/src/cpp/jank/read/parse.cpp index 42ff48365..98ebe086a 100644 --- a/compiler+runtime/src/cpp/jank/read/parse.cpp +++ b/compiler+runtime/src/cpp/jank/read/parse.cpp @@ -344,7 +344,14 @@ namespace jank::read::parse ++token_current; auto const sv(boost::get(token.data)); - return object_source_info{ make_box(sv), token, token }; + auto const character(get_char_from_literal(sv)); + + if(character.is_none()) + { + return err(error{ token.pos, fmt::format("invalid character literal `{}`", sv) }); + } + + return object_source_info{ make_box(character.unwrap()), token, token }; } processor::object_result processor::parse_meta_hint() diff --git a/compiler+runtime/src/cpp/jank/runtime/obj/character.cpp b/compiler+runtime/src/cpp/jank/runtime/obj/character.cpp index 015e8f0b4..92f0cae5f 100644 --- a/compiler+runtime/src/cpp/jank/runtime/obj/character.cpp +++ b/compiler+runtime/src/cpp/jank/runtime/obj/character.cpp @@ -3,58 +3,24 @@ namespace jank::runtime { - static option get_char_from_literal(native_persistent_string const &sv) - { - if(sv.size() == 1) - { - return sv[0]; - } - else if(sv == "newline") - { - return '\n'; - } - else if(sv == "space") - { - return ' '; - } - else if(sv == "tab") - { - return '\t'; - } - else if(sv == "backspace") - { - return '\b'; - } - else if(sv == "formfeed") - { - return '\f'; - } - else if(sv == "return") - { - return '\r'; - } - - return none; - } - static native_persistent_string get_literal_from_char(char const ch) { switch(ch) { case '\n': - return "newline"; + return R"(\newline)"; case ' ': - return "space"; + return R"(\space)"; case '\t': - return "tab"; + return R"(\tab)"; case '\b': - return "backspace"; + return R"(\backspace)"; case '\f': - return "formfeed"; + return R"(\formfeed)"; case '\r': - return "return"; + return R"(\return)"; default: - return fmt::format("{}", ch); + return fmt::format(R"(\{})", ch); } } @@ -64,7 +30,7 @@ namespace jank::runtime } obj::character::static_object(char const ch) - : data{ get_literal_from_char(ch) } + : data{ 1, ch } { } @@ -81,23 +47,21 @@ namespace jank::runtime void obj::character::to_string(fmt::memory_buffer &buff) const { - /* TODO: This is actually to_representation, since the string version of \a is just a. */ - fmt::format_to(std::back_inserter(buff), "{}", get_char_from_literal(data).unwrap()); + fmt::format_to(std::back_inserter(buff), "{}", data); } native_persistent_string obj::character::to_string() const { - auto const char_repr{get_char_from_literal(data).unwrap()}; - return native_persistent_string{1, char_repr}; + return data; } native_persistent_string obj::character::to_code_string() const { - return fmt::format("\\{}", data); + return get_literal_from_char(data[0]); } native_hash obj::character::to_hash() const { - return hash::visit(get_char_from_literal(data).unwrap()); + return hash::visit(data[0]); } } diff --git a/compiler+runtime/test/cpp/jank/read/lex.cpp b/compiler+runtime/test/cpp/jank/read/lex.cpp index d0544f276..94c008546 100644 --- a/compiler+runtime/test/cpp/jank/read/lex.cpp +++ b/compiler+runtime/test/cpp/jank/read/lex.cpp @@ -606,7 +606,7 @@ namespace jank::read::lex native_vector> tokens(p.begin(), p.end()); CHECK(tokens == make_tokens({ - { 0, 2, token_kind::character, "a"sv } + { 0, 2, token_kind::character, "\\a"sv } })); } @@ -616,7 +616,7 @@ namespace jank::read::lex native_vector> tokens(p.begin(), p.end()); CHECK(tokens == make_tokens({ - { 0, 2, token_kind::character, "1"sv } + { 0, 2, token_kind::character, "\\1"sv } })); } @@ -625,10 +625,9 @@ namespace jank::read::lex processor p{ R"(\11)" }; native_vector> tokens(p.begin(), p.end()); CHECK(tokens - == make_results({ { error(0, - 3, - "Invalid character literal `\\11` \nNote: Jank " - "doesn't support unicode characters yet!"sv) } })); + == make_tokens({ + { 0, 3, token_kind::character, "\\11"sv } + })); } SUBCASE("Invalid symbol after a valid char") @@ -637,7 +636,7 @@ namespace jank::read::lex native_vector> tokens(p.begin(), p.end()); CHECK(tokens == make_results({ - token{ 0, 2, token_kind::character, "1"sv }, + token{ 0, 2, token_kind::character, "\\1"sv }, error{ 2, "invalid keyword: expected non-whitespace character after :" } })); } @@ -648,10 +647,10 @@ namespace jank::read::lex native_vector> tokens(p.begin(), p.end()); CHECK(tokens == make_tokens({ - { 0, 2, token_kind::character, "1"sv }, - { 3, 8, token_kind::character, "newline"sv }, - { 11, 2, token_kind::character, "'"sv }, - { 14, 2, token_kind::character, "\\"sv } + { 0, 2, token_kind::character, "\\1"sv }, + { 3, 8, token_kind::character, "\\newline"sv }, + { 11, 2, token_kind::character, "\\'"sv }, + { 14, 2, token_kind::character, "\\\\"sv } })); } @@ -661,7 +660,7 @@ namespace jank::read::lex native_vector> tokens(p.begin(), p.end()); CHECK(tokens == make_results({ - token{ 0, 2, token_kind::character, "a"sv }, + token{ 0, 2, token_kind::character, "\\a"sv }, token{ 2, token_kind::syntax_quote }, token{ 3, 3, token_kind::keyword, "kw"sv } })); diff --git a/compiler+runtime/test/cpp/jank/read/parse.cpp b/compiler+runtime/test/cpp/jank/read/parse.cpp index ce44d38f6..fff59b893 100644 --- a/compiler+runtime/test/cpp/jank/read/parse.cpp +++ b/compiler+runtime/test/cpp/jank/read/parse.cpp @@ -92,10 +92,11 @@ namespace jank::read::parse processor p{ lp.begin(), lp.end() }; size_t offset{}; - for(native_persistent_string const &ch : { "a", "1", "`", ":", "#" }) + for(native_persistent_string const ch : { "\\a", "\\1", "\\`", "\\:", "\\#" }) { auto const r(p.next()); - CHECK(equal(r.expect_ok().unwrap().ptr, make_box(ch))); + CHECK(equal(r.expect_ok().unwrap().ptr, + make_box(get_char_from_literal(ch).unwrap()))); CHECK(r.expect_ok().unwrap().start == lex::token{ offset, 2, lex::token_kind::character, ch }); @@ -113,12 +114,13 @@ namespace jank::read::parse size_t offset{}; for(native_persistent_string const &ch : - { "newline", "backspace", "return", "formfeed", "tab", "space" }) + { "\\newline", "\\backspace", "\\return", "\\formfeed", "\\tab", "\\space" }) { auto const r(p.next()); - CHECK(equal(r.expect_ok().unwrap().ptr, make_box(ch))); + CHECK(equal(r.expect_ok().unwrap().ptr, + make_box(get_char_from_literal(ch).unwrap()))); - auto const len(ch.size() + 1); + auto const len(ch.size()); CHECK(r.expect_ok().unwrap().start == lex::token{ offset, len, lex::token_kind::character, ch }); CHECK(r.expect_ok().unwrap().end == r.expect_ok().unwrap().start); @@ -134,12 +136,13 @@ namespace jank::read::parse processor p{ lp.begin(), lp.end() }; size_t offset{}; - for(native_persistent_string const &ch : { "newline", "a", "tab", "`", "space" }) + for(native_persistent_string const &ch : { "\\newline", "\\a", "\\tab", "\\`", "\\space" }) { auto const r(p.next()); - CHECK(equal(r.expect_ok().unwrap().ptr, make_box(ch))); + CHECK(equal(r.expect_ok().unwrap().ptr, + make_box(get_char_from_literal(ch).unwrap()))); - auto const len(ch.size() + 1); + auto const len(ch.size()); CHECK(r.expect_ok().unwrap().start == lex::token{ offset, len, lex::token_kind::character, ch }); CHECK(r.expect_ok().unwrap().end == r.expect_ok().unwrap().start); @@ -147,6 +150,23 @@ namespace jank::read::parse offset += len; } } + + SUBCASE("Invalid character literal") + { + lex::processor lp{ R"(\ne\apple\backspace)" }; + processor p{ lp.begin(), lp.end() }; + + // First two lex tokens are invalid characters i.e. \ne and \apple + for(int _ignored = 1; _ignored <= 2; _ignored++) + { + auto const r(p.next()); + CHECK(r.is_err()); + } + + auto const r(p.next()); + CHECK(r.expect_ok().unwrap().start + == lex::token{ 9, 10, lex::token_kind::character, "\\backspace" }); + } } TEST_CASE("String")