Skip to content

Commit

Permalink
optimise: stores char data as bytes instead of their code representation
Browse files Browse the repository at this point in the history
  • Loading branch information
Samy-33 committed Oct 12, 2024
1 parent f76e142 commit 47b3350
Show file tree
Hide file tree
Showing 7 changed files with 97 additions and 83 deletions.
34 changes: 34 additions & 0 deletions compiler+runtime/include/cpp/jank/read/parse.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,40 @@ namespace jank::runtime
/* TODO: Rename file to processor. */
namespace jank::read::parse
{
static option<char> get_char_from_literal(native_persistent_string const &sv)
{
if(sv.size() == 2)
{
return sv[1];
}
else if(sv == R"(\newline)")
{
return '\n';
}
else if(sv == R"(\space)")
{
return ' ';
}
else if(sv == R"(\tab)")
{
return '\t';
}
else if(sv == R"(\backspace)")
{
return '\b';
}
else if(sv == R"(\formfeed)")
{
return '\f';
}
else if(sv == R"(\return)")
{
return '\r';
}

return none;
}

struct processor
{
struct object_source_info
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ namespace jank::runtime
/* behavior::object_like */
native_bool equal(object const &) const;
native_persistent_string to_string() const;
native_persistent_string to_code_string() const;
void to_string(fmt::memory_buffer &buff) const;
native_persistent_string to_code_string() const;
native_hash to_hash() const;

object base{ object_type::character };
Expand Down
16 changes: 3 additions & 13 deletions compiler+runtime/src/cpp/jank/read/lex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,20 +310,10 @@ namespace jank::read
pos++;
}

native_persistent_string_view const data{ file.data() + token_start + 1,
++pos - token_start - 1};

if(data.size() == 1 || data == "newline" || data == "backspace" || data == "space"
|| data == "formfeed" || data == "return" || data == "tab")
{
return ok(token{ token_start, pos - token_start, token_kind::character, data });
}
native_persistent_string_view const data{ file.data() + token_start,
++pos - token_start };

return err(error{ token_start,
pos - token_start,
fmt::format("Invalid character literal `\\{}` \nNote: Jank "
"doesn't support unicode characters yet!",
data) });
return ok(token{ token_start, pos - token_start, token_kind::character, data });
}
case ';':
{
Expand Down
9 changes: 8 additions & 1 deletion compiler+runtime/src/cpp/jank/read/parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,14 @@ namespace jank::read::parse
++token_current;
auto const sv(boost::get<native_persistent_string_view>(token.data));

return object_source_info{ make_box<obj::character>(sv), token, token };
auto const character(get_char_from_literal(sv));

if(character.is_none())
{
return err(error{ token.pos, fmt::format("invalid character literal `{}`", sv) });
}

return object_source_info{ make_box<obj::character>(character.unwrap()), token, token };
}

processor::object_result processor::parse_meta_hint()
Expand Down
60 changes: 12 additions & 48 deletions compiler+runtime/src/cpp/jank/runtime/obj/character.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,58 +3,24 @@

namespace jank::runtime
{
static option<char> get_char_from_literal(native_persistent_string const &sv)
{
if(sv.size() == 1)
{
return sv[0];
}
else if(sv == "newline")
{
return '\n';
}
else if(sv == "space")
{
return ' ';
}
else if(sv == "tab")
{
return '\t';
}
else if(sv == "backspace")
{
return '\b';
}
else if(sv == "formfeed")
{
return '\f';
}
else if(sv == "return")
{
return '\r';
}

return none;
}

static native_persistent_string get_literal_from_char(char const ch)
{
switch(ch)
{
case '\n':
return "newline";
return R"(\newline)";
case ' ':
return "space";
return R"(\space)";
case '\t':
return "tab";
return R"(\tab)";
case '\b':
return "backspace";
return R"(\backspace)";
case '\f':
return "formfeed";
return R"(\formfeed)";
case '\r':
return "return";
return R"(\return)";
default:
return fmt::format("{}", ch);
return fmt::format(R"(\{})", ch);
}
}

Expand All @@ -64,7 +30,7 @@ namespace jank::runtime
}

obj::character::static_object(char const ch)
: data{ get_literal_from_char(ch) }
: data{ 1, ch }
{
}

Expand All @@ -81,23 +47,21 @@ namespace jank::runtime

void obj::character::to_string(fmt::memory_buffer &buff) const
{
/* TODO: This is actually to_representation, since the string version of \a is just a. */
fmt::format_to(std::back_inserter(buff), "{}", get_char_from_literal(data).unwrap());
fmt::format_to(std::back_inserter(buff), "{}", data);
}

native_persistent_string obj::character::to_string() const
{
auto const char_repr{get_char_from_literal(data).unwrap()};
return native_persistent_string{1, char_repr};
return data;
}

native_persistent_string obj::character::to_code_string() const
{
return fmt::format("\\{}", data);
return get_literal_from_char(data[0]);
}

native_hash obj::character::to_hash() const
{
return hash::visit(get_char_from_literal(data).unwrap());
return hash::visit(data[0]);
}
}
23 changes: 11 additions & 12 deletions compiler+runtime/test/cpp/jank/read/lex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ namespace jank::read::lex
native_vector<result<token, error>> tokens(p.begin(), p.end());
CHECK(tokens
== make_tokens({
{ 0, 2, token_kind::character, "a"sv }
{ 0, 2, token_kind::character, "\\a"sv }
}));
}

Expand All @@ -616,7 +616,7 @@ namespace jank::read::lex
native_vector<result<token, error>> tokens(p.begin(), p.end());
CHECK(tokens
== make_tokens({
{ 0, 2, token_kind::character, "1"sv }
{ 0, 2, token_kind::character, "\\1"sv }
}));
}

Expand All @@ -625,10 +625,9 @@ namespace jank::read::lex
processor p{ R"(\11)" };
native_vector<result<token, error>> tokens(p.begin(), p.end());
CHECK(tokens
== make_results({ { error(0,
3,
"Invalid character literal `\\11` \nNote: Jank "
"doesn't support unicode characters yet!"sv) } }));
== make_tokens({
{ 0, 3, token_kind::character, "\\11"sv }
}));
}

SUBCASE("Invalid symbol after a valid char")
Expand All @@ -637,7 +636,7 @@ namespace jank::read::lex
native_vector<result<token, error>> tokens(p.begin(), p.end());
CHECK(tokens
== make_results({
token{ 0, 2, token_kind::character, "1"sv },
token{ 0, 2, token_kind::character, "\\1"sv },
error{ 2, "invalid keyword: expected non-whitespace character after :" }
}));
}
Expand All @@ -648,10 +647,10 @@ namespace jank::read::lex
native_vector<result<token, error>> tokens(p.begin(), p.end());
CHECK(tokens
== make_tokens({
{ 0, 2, token_kind::character, "1"sv },
{ 3, 8, token_kind::character, "newline"sv },
{ 11, 2, token_kind::character, "'"sv },
{ 14, 2, token_kind::character, "\\"sv }
{ 0, 2, token_kind::character, "\\1"sv },
{ 3, 8, token_kind::character, "\\newline"sv },
{ 11, 2, token_kind::character, "\\'"sv },
{ 14, 2, token_kind::character, "\\\\"sv }
}));
}

Expand All @@ -661,7 +660,7 @@ namespace jank::read::lex
native_vector<result<token, error>> tokens(p.begin(), p.end());
CHECK(tokens
== make_results({
token{ 0, 2, token_kind::character, "a"sv },
token{ 0, 2, token_kind::character, "\\a"sv },
token{ 2, token_kind::syntax_quote },
token{ 3, 3, token_kind::keyword, "kw"sv }
}));
Expand Down
36 changes: 28 additions & 8 deletions compiler+runtime/test/cpp/jank/read/parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,11 @@ namespace jank::read::parse
processor p{ lp.begin(), lp.end() };

size_t offset{};
for(native_persistent_string const &ch : { "a", "1", "`", ":", "#" })
for(native_persistent_string const ch : { "\\a", "\\1", "\\`", "\\:", "\\#" })
{
auto const r(p.next());
CHECK(equal(r.expect_ok().unwrap().ptr, make_box<obj::character>(ch)));
CHECK(equal(r.expect_ok().unwrap().ptr,
make_box<obj::character>(get_char_from_literal(ch).unwrap())));

CHECK(r.expect_ok().unwrap().start
== lex::token{ offset, 2, lex::token_kind::character, ch });
Expand All @@ -113,12 +114,13 @@ namespace jank::read::parse

size_t offset{};
for(native_persistent_string const &ch :
{ "newline", "backspace", "return", "formfeed", "tab", "space" })
{ "\\newline", "\\backspace", "\\return", "\\formfeed", "\\tab", "\\space" })
{
auto const r(p.next());
CHECK(equal(r.expect_ok().unwrap().ptr, make_box<obj::character>(ch)));
CHECK(equal(r.expect_ok().unwrap().ptr,
make_box<obj::character>(get_char_from_literal(ch).unwrap())));

auto const len(ch.size() + 1);
auto const len(ch.size());
CHECK(r.expect_ok().unwrap().start
== lex::token{ offset, len, lex::token_kind::character, ch });
CHECK(r.expect_ok().unwrap().end == r.expect_ok().unwrap().start);
Expand All @@ -134,19 +136,37 @@ namespace jank::read::parse
processor p{ lp.begin(), lp.end() };

size_t offset{};
for(native_persistent_string const &ch : { "newline", "a", "tab", "`", "space" })
for(native_persistent_string const &ch : { "\\newline", "\\a", "\\tab", "\\`", "\\space" })
{
auto const r(p.next());
CHECK(equal(r.expect_ok().unwrap().ptr, make_box<obj::character>(ch)));
CHECK(equal(r.expect_ok().unwrap().ptr,
make_box<obj::character>(get_char_from_literal(ch).unwrap())));

auto const len(ch.size() + 1);
auto const len(ch.size());
CHECK(r.expect_ok().unwrap().start
== lex::token{ offset, len, lex::token_kind::character, ch });
CHECK(r.expect_ok().unwrap().end == r.expect_ok().unwrap().start);

offset += len;
}
}

SUBCASE("Invalid character literal")
{
lex::processor lp{ R"(\ne\apple\backspace)" };
processor p{ lp.begin(), lp.end() };

// First two lex tokens are invalid characters i.e. \ne and \apple
for(int _ignored = 1; _ignored <= 2; _ignored++)
{
auto const r(p.next());
CHECK(r.is_err());
}

auto const r(p.next());
CHECK(r.expect_ok().unwrap().start
== lex::token{ 9, 10, lex::token_kind::character, "\\backspace" });
}
}

TEST_CASE("String")
Expand Down

0 comments on commit 47b3350

Please sign in to comment.