Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optimise: stores char data as bytes instead of their code representation #107

Merged
merged 1 commit into from
Oct 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions compiler+runtime/include/cpp/jank/read/parse.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,40 @@ namespace jank::runtime
/* TODO: Rename file to processor. */
namespace jank::read::parse
{
static option<char> get_char_from_literal(native_persistent_string const &sv)
{
if(sv.size() == 2)
{
return sv[1];
}
else if(sv == R"(\newline)")
{
return '\n';
}
else if(sv == R"(\space)")
{
return ' ';
}
else if(sv == R"(\tab)")
{
return '\t';
}
else if(sv == R"(\backspace)")
{
return '\b';
}
else if(sv == R"(\formfeed)")
{
return '\f';
}
else if(sv == R"(\return)")
{
return '\r';
}

return none;
}

struct processor
{
struct object_source_info
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ namespace jank::runtime
/* behavior::object_like */
native_bool equal(object const &) const;
native_persistent_string to_string() const;
native_persistent_string to_code_string() const;
void to_string(fmt::memory_buffer &buff) const;
native_persistent_string to_code_string() const;
native_hash to_hash() const;

object base{ object_type::character };
Expand Down
16 changes: 3 additions & 13 deletions compiler+runtime/src/cpp/jank/read/lex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,20 +310,10 @@ namespace jank::read
pos++;
}

native_persistent_string_view const data{ file.data() + token_start + 1,
++pos - token_start - 1};

if(data.size() == 1 || data == "newline" || data == "backspace" || data == "space"
|| data == "formfeed" || data == "return" || data == "tab")
{
return ok(token{ token_start, pos - token_start, token_kind::character, data });
}
native_persistent_string_view const data{ file.data() + token_start,
++pos - token_start };

return err(error{ token_start,
pos - token_start,
fmt::format("Invalid character literal `\\{}` \nNote: Jank "
"doesn't support unicode characters yet!",
data) });
return ok(token{ token_start, pos - token_start, token_kind::character, data });
}
case ';':
{
Expand Down
9 changes: 8 additions & 1 deletion compiler+runtime/src/cpp/jank/read/parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,14 @@ namespace jank::read::parse
++token_current;
auto const sv(boost::get<native_persistent_string_view>(token.data));

return object_source_info{ make_box<obj::character>(sv), token, token };
auto const character(get_char_from_literal(sv));

if(character.is_none())
{
return err(error{ token.pos, fmt::format("invalid character literal `{}`", sv) });
}

return object_source_info{ make_box<obj::character>(character.unwrap()), token, token };
}

processor::object_result processor::parse_meta_hint()
Expand Down
60 changes: 12 additions & 48 deletions compiler+runtime/src/cpp/jank/runtime/obj/character.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,58 +3,24 @@

namespace jank::runtime
{
static option<char> get_char_from_literal(native_persistent_string const &sv)
{
if(sv.size() == 1)
{
return sv[0];
}
else if(sv == "newline")
{
return '\n';
}
else if(sv == "space")
{
return ' ';
}
else if(sv == "tab")
{
return '\t';
}
else if(sv == "backspace")
{
return '\b';
}
else if(sv == "formfeed")
{
return '\f';
}
else if(sv == "return")
{
return '\r';
}

return none;
}

static native_persistent_string get_literal_from_char(char const ch)
{
switch(ch)
{
case '\n':
return "newline";
return R"(\newline)";
case ' ':
return "space";
return R"(\space)";
case '\t':
return "tab";
return R"(\tab)";
case '\b':
return "backspace";
return R"(\backspace)";
case '\f':
return "formfeed";
return R"(\formfeed)";
case '\r':
return "return";
return R"(\return)";
default:
return fmt::format("{}", ch);
return fmt::format(R"(\{})", ch);
}
}

Expand All @@ -64,7 +30,7 @@ namespace jank::runtime
}

obj::character::static_object(char const ch)
: data{ get_literal_from_char(ch) }
: data{ 1, ch }
{
}

Expand All @@ -81,23 +47,21 @@ namespace jank::runtime

void obj::character::to_string(fmt::memory_buffer &buff) const
{
/* TODO: This is actually to_representation, since the string version of \a is just a. */
fmt::format_to(std::back_inserter(buff), "{}", get_char_from_literal(data).unwrap());
fmt::format_to(std::back_inserter(buff), "{}", data);
}

native_persistent_string obj::character::to_string() const
{
auto const char_repr{get_char_from_literal(data).unwrap()};
return native_persistent_string{1, char_repr};
return data;
}

native_persistent_string obj::character::to_code_string() const
{
return fmt::format("\\{}", data);
return get_literal_from_char(data[0]);
}

native_hash obj::character::to_hash() const
{
return hash::visit(get_char_from_literal(data).unwrap());
return data.to_hash();
}
}
23 changes: 11 additions & 12 deletions compiler+runtime/test/cpp/jank/read/lex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ namespace jank::read::lex
native_vector<result<token, error>> tokens(p.begin(), p.end());
CHECK(tokens
== make_tokens({
{ 0, 2, token_kind::character, "a"sv }
{ 0, 2, token_kind::character, "\\a"sv }
}));
}

Expand All @@ -616,7 +616,7 @@ namespace jank::read::lex
native_vector<result<token, error>> tokens(p.begin(), p.end());
CHECK(tokens
== make_tokens({
{ 0, 2, token_kind::character, "1"sv }
{ 0, 2, token_kind::character, "\\1"sv }
}));
}

Expand All @@ -625,10 +625,9 @@ namespace jank::read::lex
processor p{ R"(\11)" };
native_vector<result<token, error>> tokens(p.begin(), p.end());
CHECK(tokens
== make_results({ { error(0,
3,
"Invalid character literal `\\11` \nNote: Jank "
"doesn't support unicode characters yet!"sv) } }));
== make_tokens({
{ 0, 3, token_kind::character, "\\11"sv }
}));
}

SUBCASE("Invalid symbol after a valid char")
Expand All @@ -637,7 +636,7 @@ namespace jank::read::lex
native_vector<result<token, error>> tokens(p.begin(), p.end());
CHECK(tokens
== make_results({
token{ 0, 2, token_kind::character, "1"sv },
token{ 0, 2, token_kind::character, "\\1"sv },
error{ 2, "invalid keyword: expected non-whitespace character after :" }
}));
}
Expand All @@ -648,10 +647,10 @@ namespace jank::read::lex
native_vector<result<token, error>> tokens(p.begin(), p.end());
CHECK(tokens
== make_tokens({
{ 0, 2, token_kind::character, "1"sv },
{ 3, 8, token_kind::character, "newline"sv },
{ 11, 2, token_kind::character, "'"sv },
{ 14, 2, token_kind::character, "\\"sv }
{ 0, 2, token_kind::character, "\\1"sv },
{ 3, 8, token_kind::character, "\\newline"sv },
{ 11, 2, token_kind::character, "\\'"sv },
{ 14, 2, token_kind::character, "\\\\"sv }
}));
}

Expand All @@ -661,7 +660,7 @@ namespace jank::read::lex
native_vector<result<token, error>> tokens(p.begin(), p.end());
CHECK(tokens
== make_results({
token{ 0, 2, token_kind::character, "a"sv },
token{ 0, 2, token_kind::character, "\\a"sv },
token{ 2, token_kind::syntax_quote },
token{ 3, 3, token_kind::keyword, "kw"sv }
}));
Expand Down
36 changes: 28 additions & 8 deletions compiler+runtime/test/cpp/jank/read/parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,11 @@ namespace jank::read::parse
processor p{ lp.begin(), lp.end() };

size_t offset{};
for(native_persistent_string const &ch : { "a", "1", "`", ":", "#" })
for(native_persistent_string const ch : { "\\a", "\\1", "\\`", "\\:", "\\#" })
{
auto const r(p.next());
CHECK(equal(r.expect_ok().unwrap().ptr, make_box<obj::character>(ch)));
CHECK(equal(r.expect_ok().unwrap().ptr,
make_box<obj::character>(get_char_from_literal(ch).unwrap())));

CHECK(r.expect_ok().unwrap().start
== lex::token{ offset, 2, lex::token_kind::character, ch });
Expand All @@ -113,12 +114,13 @@ namespace jank::read::parse

size_t offset{};
for(native_persistent_string const &ch :
{ "newline", "backspace", "return", "formfeed", "tab", "space" })
{ "\\newline", "\\backspace", "\\return", "\\formfeed", "\\tab", "\\space" })
{
auto const r(p.next());
CHECK(equal(r.expect_ok().unwrap().ptr, make_box<obj::character>(ch)));
CHECK(equal(r.expect_ok().unwrap().ptr,
make_box<obj::character>(get_char_from_literal(ch).unwrap())));

auto const len(ch.size() + 1);
auto const len(ch.size());
CHECK(r.expect_ok().unwrap().start
== lex::token{ offset, len, lex::token_kind::character, ch });
CHECK(r.expect_ok().unwrap().end == r.expect_ok().unwrap().start);
Expand All @@ -134,19 +136,37 @@ namespace jank::read::parse
processor p{ lp.begin(), lp.end() };

size_t offset{};
for(native_persistent_string const &ch : { "newline", "a", "tab", "`", "space" })
for(native_persistent_string const &ch : { "\\newline", "\\a", "\\tab", "\\`", "\\space" })
{
auto const r(p.next());
CHECK(equal(r.expect_ok().unwrap().ptr, make_box<obj::character>(ch)));
CHECK(equal(r.expect_ok().unwrap().ptr,
make_box<obj::character>(get_char_from_literal(ch).unwrap())));

auto const len(ch.size() + 1);
auto const len(ch.size());
CHECK(r.expect_ok().unwrap().start
== lex::token{ offset, len, lex::token_kind::character, ch });
CHECK(r.expect_ok().unwrap().end == r.expect_ok().unwrap().start);

offset += len;
}
}

SUBCASE("Invalid character literal")
{
lex::processor lp{ R"(\ne\apple\backspace)" };
processor p{ lp.begin(), lp.end() };

/* First two lex tokens are invalid characters i.e. \ne and \apple */
for(size_t i{}; i < 2; ++i)
{
auto const r(p.next());
CHECK(r.is_err());
}

auto const r(p.next());
CHECK(r.expect_ok().unwrap().start
== lex::token{ 9, 10, lex::token_kind::character, "\\backspace" });
}
}

TEST_CASE("String")
Expand Down
Loading