From cd6a3f2e4290d725bc9310aeb745fd5ddcb58005 Mon Sep 17 00:00:00 2001 From: Saket Patel Date: Wed, 25 Dec 2024 00:38:31 +0530 Subject: [PATCH 1/4] feat(module-caching): loads module based on origin --- .../cpp/jank/runtime/module/loader.hpp | 28 ++++++ .../src/cpp/jank/runtime/module/loader.cpp | 97 +++++++++++++++---- compiler+runtime/src/cpp/main.cpp | 2 +- 3 files changed, 105 insertions(+), 22 deletions(-) diff --git a/compiler+runtime/include/cpp/jank/runtime/module/loader.hpp b/compiler+runtime/include/cpp/jank/runtime/module/loader.hpp index 29851079..00e8b270 100644 --- a/compiler+runtime/include/cpp/jank/runtime/module/loader.hpp +++ b/compiler+runtime/include/cpp/jank/runtime/module/loader.hpp @@ -27,6 +27,14 @@ namespace jank::runtime::module latest, }; + enum class module_type : uint8_t + { + o, + cpp, + jank, + cljc + }; + struct file_entry { object_ptr to_runtime_data() const; @@ -68,6 +76,24 @@ namespace jank::runtime::module option cljc; }; + struct find_result + { + /* + * All the sources for a module + */ + entry sources; + /* + * On the basis of origin, source that should be loaded. + */ + option to_load; + + find_result(entry const &sources, module_type const to_load) + : sources{ sources } + , to_load{ to_load } + { + } + }; + /* These separators match what the JVM does on each system. */ #ifdef _WIN32 static constexpr char module_separator{ ';' }; @@ -80,6 +106,8 @@ namespace jank::runtime::module native_bool is_loaded(native_persistent_string_view const &) const; void set_loaded(native_persistent_string_view const &); + string_result + find(native_persistent_string_view const &module, origin const ori); string_result load(native_persistent_string_view const &module, origin const ori); string_result diff --git a/compiler+runtime/src/cpp/jank/runtime/module/loader.cpp b/compiler+runtime/src/cpp/jank/runtime/module/loader.cpp index 52dcdd9c..d7fab0d6 100644 --- a/compiler+runtime/src/cpp/jank/runtime/module/loader.cpp +++ b/compiler+runtime/src/cpp/jank/runtime/module/loader.cpp @@ -202,11 +202,11 @@ namespace jank::runtime::module if(registered) { - //fmt::println("register_entry {} {} {} {}", - // entry.archive_path, - // entry.path, - // module_path.string(), - // path_to_module(module_path)); + // fmt::println("register_entry {} {} {} {}", + // entry.archive_path.unwrap_or("None"), + // entry.path, + // module_path.string(), + // path_to_module(module_path)); } } @@ -327,7 +327,8 @@ namespace jank::runtime::module loaded.emplace(module); } - string_result loader::load(native_persistent_string_view const &module, origin const ori) + string_result + loader::find(native_persistent_string_view const &module, origin const ori) { static std::regex const underscore{ "_" }; native_transient_string patched_module{ module }; @@ -338,47 +339,101 @@ namespace jank::runtime::module return err(fmt::format("unable to find module: {}", module)); } - string_result res{ err(fmt::format("no sources for registered module: {}", module)) }; + auto const with_module_type{ [&](module_type const type) { + loader::find_result res{ entry->second, type }; + return res; + } }; //fmt::println("loading nested module {}", module); - /* When we're compiling, we always load from source. Otherwise, we - * load from source if we specifically chose to do so. */ - bool const compiling{ truthy(rt_ctx.compile_files_var->deref()) }; - if(compiling || ori == origin::source) + if(ori == origin::source) { if(entry->second.jank.is_some()) { - res = load_jank(entry->second.jank.unwrap()); + return with_module_type(module_type::jank); } else if(entry->second.cljc.is_some()) { - res = load_cljc(entry->second.cljc.unwrap()); + return with_module_type(module_type::cljc); } } else { - /* TODO: origin must be latest, so we need to find that. Even if there - * is a binary, we need to only choose it if it's as new, or newer, than - * the source. */ - if(entry->second.o.is_some()) + if(entry->second.o.is_some() + && (entry->second.jank.is_some() || entry->second.cljc.is_some())) { - res = load_o(module, entry->second.o.unwrap()); + auto const o_file_path{ native_transient_string{ entry->second.o.unwrap().path } }; + + native_transient_string source_path{}; + module_type module_type{}; + + if(entry->second.jank.is_some()) + { + source_path = entry->second.jank.unwrap().path; + module_type = module_type::jank; + } + else + { + source_path = entry->second.cljc.unwrap().path; + module_type = module_type::cljc; + } + + if(boost::filesystem::last_write_time(o_file_path) + >= boost::filesystem::last_write_time(source_path)) + { + return with_module_type(module_type::o); + } + else + { + return with_module_type(module_type); + } } else if(entry->second.cpp.is_some()) { - res = load_cpp(entry->second.cpp.unwrap()); + return with_module_type(module_type::cpp); } else if(entry->second.jank.is_some()) { - res = load_jank(entry->second.jank.unwrap()); + return with_module_type(module_type::jank); } else if(entry->second.cljc.is_some()) { - res = load_cljc(entry->second.cljc.unwrap()); + return with_module_type(module_type::cljc); } } + return err(fmt::format("no sources for registered module: {}", module)); + } + + string_result loader::load(native_persistent_string_view const &module, origin const ori) + { + auto const &found_module{ loader::find(module, ori) }; + if(found_module.is_err()) + { + return err(std::move(found_module.expect_err())); + } + + string_result res(err(fmt::format("Couldn't load module: {}", module))); + + auto const module_type_to_load{ found_module.expect_ok().to_load.unwrap() }; + auto const &module_sources{ found_module.expect_ok().sources }; + + switch(module_type_to_load) + { + case module_type::jank: + res = load_jank(module_sources.jank.unwrap()); + break; + case module_type::o: + res = load_o(module, module_sources.o.unwrap()); + break; + case module_type::cpp: + res = load_cpp(module_sources.cpp.unwrap()); + break; + case module_type::cljc: + res = load_cljc(module_sources.cljc.unwrap()); + break; + } + if(res.is_err()) { return res; diff --git a/compiler+runtime/src/cpp/main.cpp b/compiler+runtime/src/cpp/main.cpp index b28f0208..2f9ecf7c 100644 --- a/compiler+runtime/src/cpp/main.cpp +++ b/compiler+runtime/src/cpp/main.cpp @@ -101,7 +101,7 @@ namespace jank using namespace jank; using namespace jank::runtime; - //__rt_ctx->load_module("/clojure.core", module::origin::latest).expect_ok(); + __rt_ctx->load_module("/clojure.core", module::origin::latest).expect_ok(); __rt_ctx->compile_module(opts.target_ns).expect_ok(); } From f7ad7ae4421f84246f736c98ee34450471a0f5b3 Mon Sep 17 00:00:00 2001 From: Saket Patel Date: Wed, 25 Dec 2024 10:35:14 +0530 Subject: [PATCH 2/4] doesn't load a module if already loaded --- .../src/cpp/jank/runtime/module/loader.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/compiler+runtime/src/cpp/jank/runtime/module/loader.cpp b/compiler+runtime/src/cpp/jank/runtime/module/loader.cpp index d7fab0d6..46941f50 100644 --- a/compiler+runtime/src/cpp/jank/runtime/module/loader.cpp +++ b/compiler+runtime/src/cpp/jank/runtime/module/loader.cpp @@ -202,11 +202,11 @@ namespace jank::runtime::module if(registered) { - // fmt::println("register_entry {} {} {} {}", - // entry.archive_path.unwrap_or("None"), - // entry.path, - // module_path.string(), - // path_to_module(module_path)); + // fmt::println("register_entry {} {} {} {}", + // entry.archive_path.unwrap_or("None"), + // entry.path, + // module_path.string(), + // path_to_module(module_path)); } } @@ -407,6 +407,11 @@ namespace jank::runtime::module string_result loader::load(native_persistent_string_view const &module, origin const ori) { + if(loader::is_loaded(module)) + { + return ok(); + } + auto const &found_module{ loader::find(module, ori) }; if(found_module.is_err()) { @@ -439,7 +444,7 @@ namespace jank::runtime::module return res; } - loaded.emplace(module); + loader::set_loaded(module); return ok(); } From 11c03c4e515367fadbd39bf65d6f95d490964ced Mon Sep 17 00:00:00 2001 From: Saket Patel Date: Wed, 25 Dec 2024 21:05:01 +0530 Subject: [PATCH 3/4] feat(module-loading): Consider case where .o exists without source --- .../cpp/jank/runtime/module/loader.hpp | 19 ++--- .../src/cpp/jank/runtime/module/loader.cpp | 76 +++++++++++++------ compiler+runtime/src/cpp/main.cpp | 2 +- 3 files changed, 57 insertions(+), 40 deletions(-) diff --git a/compiler+runtime/include/cpp/jank/runtime/module/loader.hpp b/compiler+runtime/include/cpp/jank/runtime/module/loader.hpp index 00e8b270..53d4d60a 100644 --- a/compiler+runtime/include/cpp/jank/runtime/module/loader.hpp +++ b/compiler+runtime/include/cpp/jank/runtime/module/loader.hpp @@ -38,6 +38,8 @@ namespace jank::runtime::module struct file_entry { object_ptr to_runtime_data() const; + native_bool exists() const; + std::time_t last_modified_at() const; /* If the file is within a JAR, this will be the path to the JAR. */ option archive_path; @@ -78,20 +80,10 @@ namespace jank::runtime::module struct find_result { - /* - * All the sources for a module - */ + /* All the sources for a module */ entry sources; - /* - * On the basis of origin, source that should be loaded. - */ + /* On the basis of origin, source that should be loaded. */ option to_load; - - find_result(entry const &sources, module_type const to_load) - : sources{ sources } - , to_load{ to_load } - { - } }; /* These separators match what the JVM does on each system. */ @@ -106,8 +98,7 @@ namespace jank::runtime::module native_bool is_loaded(native_persistent_string_view const &) const; void set_loaded(native_persistent_string_view const &); - string_result - find(native_persistent_string_view const &module, origin const ori); + string_result find(native_persistent_string_view const &module, origin const ori); string_result load(native_persistent_string_view const &module, origin const ori); string_result diff --git a/compiler+runtime/src/cpp/jank/runtime/module/loader.cpp b/compiler+runtime/src/cpp/jank/runtime/module/loader.cpp index 46941f50..297e4b5b 100644 --- a/compiler+runtime/src/cpp/jank/runtime/module/loader.cpp +++ b/compiler+runtime/src/cpp/jank/runtime/module/loader.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -139,7 +140,7 @@ namespace jank::runtime::module } auto const &zip_entry(zf.getEntry(std::string{ entry.path })); - fn(zip_entry.readAsText()); + fn(zip_entry); } static void register_entry(native_unordered_map &entries, @@ -317,6 +318,31 @@ namespace jank::runtime::module make_box(path)); } + native_bool file_entry::exists() const + { + auto const is_archive{ archive_path.is_some() }; + if(is_archive && !boost::filesystem::exists(native_transient_string{ archive_path.unwrap() })) + { + return false; + } + else + { + native_bool source_exists{}; + if(is_archive) + { + visit_jar_entry(*this, [&](auto const &zip_entry) { source_exists = zip_entry.isFile(); }); + } + + return source_exists || boost::filesystem::exists(native_transient_string{ path }); + } + } + + std::time_t file_entry::last_modified_at() const + { + auto const source_path{ archive_path.unwrap_or(path) }; + return boost::filesystem::last_write_time(native_transient_string{ source_path }); + } + native_bool loader::is_loaded(native_persistent_string_view const &module) const { return loaded.contains(module); @@ -339,66 +365,63 @@ namespace jank::runtime::module return err(fmt::format("unable to find module: {}", module)); } - auto const with_module_type{ [&](module_type const type) { - loader::find_result res{ entry->second, type }; - return res; - } }; - - //fmt::println("loading nested module {}", module); - if(ori == origin::source) { if(entry->second.jank.is_some()) { - return with_module_type(module_type::jank); + return find_result{ entry->second, module_type::jank }; } else if(entry->second.cljc.is_some()) { - return with_module_type(module_type::cljc); + return find_result{ entry->second, module_type::cljc }; } } else { - if(entry->second.o.is_some() + if(entry->second.o.is_some() && entry->second.o.unwrap().archive_path.is_none() && (entry->second.jank.is_some() || entry->second.cljc.is_some())) { auto const o_file_path{ native_transient_string{ entry->second.o.unwrap().path } }; - native_transient_string source_path{}; + std::time_t source_modified_time{}; module_type module_type{}; - if(entry->second.jank.is_some()) + if(entry->second.jank.is_some() && entry->second.jank.unwrap().exists()) { - source_path = entry->second.jank.unwrap().path; + source_modified_time = entry->second.jank.unwrap().last_modified_at(); module_type = module_type::jank; } - else + else if(entry->second.cljc.is_some() && entry->second.cljc.unwrap().exists()) { - source_path = entry->second.cljc.unwrap().path; + source_modified_time = entry->second.cljc.unwrap().last_modified_at(); module_type = module_type::cljc; } + else + { + return err( + fmt::format("Found a binary ({}), without a source", entry->second.o.unwrap().path)); + } - if(boost::filesystem::last_write_time(o_file_path) - >= boost::filesystem::last_write_time(source_path)) + if(boost::filesystem::last_write_time(o_file_path) >= source_modified_time) { - return with_module_type(module_type::o); + return find_result{ entry->second, module_type::o }; } else { - return with_module_type(module_type); + return find_result{ entry->second, module_type }; } } else if(entry->second.cpp.is_some()) { - return with_module_type(module_type::cpp); + return find_result{ entry->second, module_type::cpp }; } else if(entry->second.jank.is_some()) { - return with_module_type(module_type::jank); + return find_result{ entry->second, module_type::jank }; } else if(entry->second.cljc.is_some()) { - return with_module_type(module_type::cljc); + return find_result{ entry->second, module_type::cljc }; } } @@ -473,7 +496,9 @@ namespace jank::runtime::module { if(entry.archive_path.is_some()) { - visit_jar_entry(entry, [&](auto const &str) { rt_ctx.jit_prc.eval_string(str); }); + visit_jar_entry(entry, [&](auto const &zip_entry) { + rt_ctx.jit_prc.eval_string(zip_entry.readAsText()); + }); } else { @@ -493,7 +518,8 @@ namespace jank::runtime::module { if(entry.archive_path.is_some()) { - visit_jar_entry(entry, [&](auto const &str) { rt_ctx.eval_string(str); }); + visit_jar_entry(entry, + [&](auto const &zip_entry) { rt_ctx.eval_string(zip_entry.readAsText()); }); } else { diff --git a/compiler+runtime/src/cpp/main.cpp b/compiler+runtime/src/cpp/main.cpp index 2f9ecf7c..b28f0208 100644 --- a/compiler+runtime/src/cpp/main.cpp +++ b/compiler+runtime/src/cpp/main.cpp @@ -101,7 +101,7 @@ namespace jank using namespace jank; using namespace jank::runtime; - __rt_ctx->load_module("/clojure.core", module::origin::latest).expect_ok(); + //__rt_ctx->load_module("/clojure.core", module::origin::latest).expect_ok(); __rt_ctx->compile_module(opts.target_ns).expect_ok(); } From b5311bf5e9a9698212c5285ab57a97e5f62f84c2 Mon Sep 17 00:00:00 2001 From: Saket Patel Date: Thu, 26 Dec 2024 10:40:52 +0530 Subject: [PATCH 4/4] chore(loader): load clojure.core while compiling modules - Don't load clojure.core when compiling clojure.core itself. - Adds doc explaining why we are ignoring object files from archives. --- .../src/cpp/jank/runtime/module/loader.cpp | 12 +++++++++++- compiler+runtime/src/cpp/main.cpp | 5 ++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/compiler+runtime/src/cpp/jank/runtime/module/loader.cpp b/compiler+runtime/src/cpp/jank/runtime/module/loader.cpp index 297e4b5b..bf004367 100644 --- a/compiler+runtime/src/cpp/jank/runtime/module/loader.cpp +++ b/compiler+runtime/src/cpp/jank/runtime/module/loader.cpp @@ -378,6 +378,16 @@ namespace jank::runtime::module } else { + /* Ignoring object files from the archives here for security and portability + * reasons. + * + * Security: + * A dependency can include a binary version of a module that doesn't belong + * to it. + * + * Portability: + * Unlike class files, object files are tied to the OS, architecture, c++ stdlib etc, + * making it hard to share them. */ if(entry->second.o.is_some() && entry->second.o.unwrap().archive_path.is_none() && (entry->second.jank.is_some() || entry->second.cljc.is_some())) { @@ -438,7 +448,7 @@ namespace jank::runtime::module auto const &found_module{ loader::find(module, ori) }; if(found_module.is_err()) { - return err(std::move(found_module.expect_err())); + return err(found_module.expect_err()); } string_result res(err(fmt::format("Couldn't load module: {}", module))); diff --git a/compiler+runtime/src/cpp/main.cpp b/compiler+runtime/src/cpp/main.cpp index b28f0208..9151462f 100644 --- a/compiler+runtime/src/cpp/main.cpp +++ b/compiler+runtime/src/cpp/main.cpp @@ -101,7 +101,10 @@ namespace jank using namespace jank; using namespace jank::runtime; - //__rt_ctx->load_module("/clojure.core", module::origin::latest).expect_ok(); + if(opts.target_ns != "clojure.core") + { + __rt_ctx->load_module("/clojure.core", module::origin::latest).expect_ok(); + } __rt_ctx->compile_module(opts.target_ns).expect_ok(); }