From 1e87023d5498c53a01ce44c0c3fc53c5092c15c1 Mon Sep 17 00:00:00 2001 From: Roberto Rossini <71787608+robomics@users.noreply.github.com> Date: Tue, 26 Nov 2024 13:27:36 +0100 Subject: [PATCH 1/2] Fix datarace in logging code The datarace could occur when spdlog was is called from any thread other than the main thread. --- src/cooler_file_writer.cpp | 3 ++- src/hic_file_writer.cpp | 3 ++- src/hictkpy.cpp | 1 - src/include/hictkpy/logger.hpp | 10 +++------- src/logger.cpp | 22 +++++++++++++--------- 5 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/cooler_file_writer.cpp b/src/cooler_file_writer.cpp index 1d0e542..9307168 100644 --- a/src/cooler_file_writer.cpp +++ b/src/cooler_file_writer.cpp @@ -213,7 +213,8 @@ void CoolerFileWriter::bind(nb::module_ &m) { "either with columns=[bin1_id, bin2_id, count] or with columns=[chrom1, start1, end1, " "chrom2, start2, end2, count]."); // NOLINTBEGIN(*-avoid-magic-numbers) - writer.def("finalize", &hictkpy::CoolerFileWriter::finalize, nb::arg("log_lvl") = "WARN", + writer.def("finalize", &hictkpy::CoolerFileWriter::finalize, + nb::call_guard(), nb::arg("log_lvl") = "WARN", nb::arg("chunk_size") = 500'000, nb::arg("update_frequency") = 10'000'000, "Write interactions to file.", nb::rv_policy::move); // NOLINTEND(*-avoid-magic-numbers) diff --git a/src/hic_file_writer.cpp b/src/hic_file_writer.cpp index a5eadea..a651f7d 100644 --- a/src/hic_file_writer.cpp +++ b/src/hic_file_writer.cpp @@ -196,7 +196,8 @@ void HiCFileWriter::bind(nb::module_ &m) { "Add pixels from a pandas DataFrame containing pixels in COO or BG2 format (i.e. " "either with columns=[bin1_id, bin2_id, count] or with columns=[chrom1, start1, end1, " "chrom2, start2, end2, count]."); - writer.def("finalize", &hictkpy::HiCFileWriter::finalize, nb::arg("log_lvl") = "WARN", + writer.def("finalize", &hictkpy::HiCFileWriter::finalize, + nb::call_guard(), nb::arg("log_lvl") = "WARN", "Write interactions to file.", nb::rv_policy::move); } diff --git a/src/hictkpy.cpp b/src/hictkpy.cpp index afca421..ebb57b7 100644 --- a/src/hictkpy.cpp +++ b/src/hictkpy.cpp @@ -6,7 +6,6 @@ #include #include -#include #include "hictkpy/bin_table.hpp" #include "hictkpy/cooler_file_writer.hpp" diff --git a/src/include/hictkpy/logger.hpp b/src/include/hictkpy/logger.hpp index 619cc02..11143d7 100644 --- a/src/include/hictkpy/logger.hpp +++ b/src/include/hictkpy/logger.hpp @@ -9,24 +9,20 @@ #include #include -#include "hictkpy/nanobind.hpp" - namespace hictkpy { class Logger { - nanobind::object _py_logger{}; std::shared_ptr _logger{}; public: - explicit Logger(spdlog::level::level_enum level_ = spdlog::level::warn); - explicit Logger(std::string_view level_); + explicit Logger(spdlog::level::level_enum level = spdlog::level::warn); + explicit Logger(std::string_view level); [[nodiscard]] std::shared_ptr get_logger(); private: - [[nodiscard]] static nanobind::object init_py_logger(); [[nodiscard]] static std::shared_ptr init_cpp_logger( - spdlog::level::level_enum level_, nanobind::object py_logger); + spdlog::level::level_enum level); }; } // namespace hictkpy diff --git a/src/logger.cpp b/src/logger.cpp index 6829c53..e663074 100644 --- a/src/logger.cpp +++ b/src/logger.cpp @@ -7,6 +7,11 @@ #include #include +#include +#include +#include +#include + #include "hictkpy/common.hpp" #include "hictkpy/nanobind.hpp" @@ -38,12 +43,11 @@ namespace hictkpy { // NOLINTEND(*-avoid-magic-numbers) } -Logger::Logger(spdlog::level::level_enum level_) - : _py_logger(init_py_logger()), _logger(init_cpp_logger(level_, _py_logger)) {} +Logger::Logger(spdlog::level::level_enum level) : _logger(init_cpp_logger(level)) {} -Logger::Logger(std::string_view level_) : Logger(spdlog::level::from_str(std::string{level_})) {} +Logger::Logger(std::string_view level) : Logger(spdlog::level::from_str(std::string{level})) {} -nb::object Logger::init_py_logger() { +[[nodiscard]] static nb::object get_py_logger() { const auto logging = nb::module_::import_("logging"); return logging.attr("getLogger")("hictkpy"); } @@ -51,13 +55,13 @@ nb::object Logger::init_py_logger() { std::shared_ptr Logger::get_logger() { return _logger; } std::shared_ptr Logger::init_cpp_logger( - [[maybe_unused]] spdlog::level::level_enum level_, [[maybe_unused]] nb::object py_logger) { + [[maybe_unused]] spdlog::level::level_enum level_) { #ifndef _WIN32 auto sink = std::make_shared( - // NOLINTNEXTLINE(*-unnecessary-value-param) - [logger = py_logger](const spdlog::details::log_msg& msg) { - logger.attr("log")(to_py_lvl(msg.level), - std::string_view{msg.payload.data(), msg.payload.size()}); + [logger = get_py_logger()](const spdlog::details::log_msg& msg) mutable { + [[maybe_unused]] const nb::gil_scoped_acquire gil{}; + auto msg_py = nb::str(msg.payload.data(), msg.payload.size()); + logger.attr("log")(to_py_lvl(msg.level), msg_py); }); sink->set_pattern("%v"); From 679f4bbaa6e19b1b327f3bf868afc1b226bf67d3 Mon Sep 17 00:00:00 2001 From: Roberto Rossini <71787608+robomics@users.noreply.github.com> Date: Tue, 26 Nov 2024 14:42:29 +0100 Subject: [PATCH 2/2] Fix deadlock in logging code --- src/cooler_file_writer.cpp | 8 ++++++-- src/hic_file_writer.cpp | 4 ++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/cooler_file_writer.cpp b/src/cooler_file_writer.cpp index 9307168..2229975 100644 --- a/src/cooler_file_writer.cpp +++ b/src/cooler_file_writer.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -86,12 +87,13 @@ void CoolerFileWriter::add_pixels(const nb::object &df) { "caught attempt to add_pixels to a .cool file that has already been finalized!"); } - const auto coo_format = nb::cast(df.attr("columns").attr("__contains__")("bin1_id")); - const auto cell_id = fmt::to_string(_w->cells().size()); auto attrs = hictk::cooler::Attributes::init(_w->resolution()); attrs.assembly = _w->attributes().assembly; + auto lck = std::make_optional(); + const auto coo_format = nb::cast(df.attr("columns").attr("__contains__")("bin1_id")); + const auto dtype = df.attr("__getitem__")("count").attr("dtype"); const auto dtype_str = nb::cast(dtype.attr("__str__")()); const auto var = map_dtype_to_type(dtype_str); @@ -101,6 +103,7 @@ void CoolerFileWriter::add_pixels(const nb::object &df) { using N = hictk::remove_cvref_t; const auto pixels = coo_format ? coo_df_to_thin_pixels(df, true) : bg2_df_to_thin_pixels(_w->bins(), df, true); + lck.reset(); auto clr = _w->create_cell(cell_id, std::move(attrs), hictk::cooler::DEFAULT_HDF5_CACHE_SIZE * 4, 1); @@ -208,6 +211,7 @@ void CoolerFileWriter::bind(nb::module_ &m) { nb::sig("def bins(self) -> hictkpy.BinTable"), nb::rv_policy::move); writer.def("add_pixels", &hictkpy::CoolerFileWriter::add_pixels, + nb::call_guard(), nb::sig("def add_pixels(self, pixels: pandas.DataFrame)"), nb::arg("pixels"), "Add pixels from a pandas DataFrame containing pixels in COO or BG2 format (i.e. " "either with columns=[bin1_id, bin2_id, count] or with columns=[chrom1, start1, end1, " diff --git a/src/hic_file_writer.cpp b/src/hic_file_writer.cpp index a651f7d..edad8a9 100644 --- a/src/hic_file_writer.cpp +++ b/src/hic_file_writer.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -127,10 +128,12 @@ void HiCFileWriter::add_pixels(const nb::object &df) { "caught attempt to add_pixels to a .hic file that has already been finalized!"); } + auto lck = std::make_optional(); const auto coo_format = nb::cast(df.attr("columns").attr("__contains__")("bin1_id")); const auto pixels = coo_format ? coo_df_to_thin_pixels(df, false) : bg2_df_to_thin_pixels(_w.bins(_w.resolutions().front()), df, false); + lck.reset(); SPDLOG_INFO(FMT_STRING("adding {} pixels to file \"{}\"..."), pixels.size(), _w.path()); _w.add_pixels(_w.resolutions().front(), pixels.begin(), pixels.end()); } @@ -192,6 +195,7 @@ void HiCFileWriter::bind(nb::module_ &m) { nb::sig("def bins(self, resolution: int) -> hictkpy.BinTable"), nb::rv_policy::move); writer.def("add_pixels", &hictkpy::HiCFileWriter::add_pixels, + nb::call_guard(), nb::sig("def add_pixels(self, pixels: pd.DataFrame) -> None"), nb::arg("pixels"), "Add pixels from a pandas DataFrame containing pixels in COO or BG2 format (i.e. " "either with columns=[bin1_id, bin2_id, count] or with columns=[chrom1, start1, end1, "