diff --git a/.github/workflows/build-conan-deps.yml b/.github/workflows/build-conan-deps.yml index 755b299..a231126 100644 --- a/.github/workflows/build-conan-deps.yml +++ b/.github/workflows/build-conan-deps.yml @@ -107,6 +107,30 @@ jobs: path: ${{ env.CONAN_HOME }}/p lookup-only: true + - name: Configure Conan + if: steps.cache-conan.outputs.cache-hit != 'true' + run: | + cat << 'EOF' | tee script.sh > /dev/null + #!usr/bin/env bash + set -u + set -e + + conan_version="$1" + + PATH="/opt/python/cp312-cp312/bin:$PATH" + pip install "conan==$conan_version" + + conan remote update conancenter --url https://center2.conan.io + EOF + + chmod 755 script.sh + + docker run \ + -e "CONAN_HOME=$CONAN_HOME" \ + -v "$PWD/script.sh:/tmp/script.sh:ro" \ + -v "$CONAN_HOME:$CONAN_HOME" \ + "$IMAGE" /tmp/script.sh '${{ inputs.conan-version }}' + - name: Clean Conan cache (pre-build) if: steps.cache-conan.outputs.cache-hit != 'true' run: | @@ -269,7 +293,9 @@ jobs: - name: Configure Conan if: steps.cache-conan.outputs.cache-hit != 'true' - run: conan profile detect --force + run: | + conan profile detect --force + conan remote update conancenter --url https://center2.conan.io - name: Clean Conan cache (pre-build) if: steps.cache-conan.outputs.cache-hit != 'true' @@ -380,6 +406,8 @@ jobs: sed -i 's/compiler\.cppstd=.*/compiler.cppstd=${{ inputs.cppstd }}/' "$conan_profile" + conan remote update conancenter --url https://center2.conan.io + - name: Clean Conan cache (pre-build) if: steps.cache-conan.outputs.cache-hit != 'true' run: | diff --git a/.github/workflows/fuzzy-testing.yml b/.github/workflows/fuzzy-testing.yml index 47901a7..d321645 100644 --- a/.github/workflows/fuzzy-testing.yml +++ b/.github/workflows/fuzzy-testing.yml @@ -143,6 +143,10 @@ jobs: key: conan-${{ steps.cache-key.outputs.key }} path: ${{ env.CONAN_HOME }}/p + - name: Configure Conan + if: steps.cache-conan.outputs.cache-hit != 'true' + run: conan remote update conancenter --url https://center2.conan.io + - name: Clean Conan cache (pre-build) if: steps.cache-conan.outputs.cache-hit != 'true' run: | diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml index e081639..776a38d 100644 --- a/.github/workflows/pip.yml +++ b/.github/workflows/pip.yml @@ -60,7 +60,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Add requirements - run: python -m pip install --upgrade wheel setuptools + run: python -m pip install --upgrade conan wheel setuptools - name: Generate cache key id: cache-key @@ -69,6 +69,9 @@ jobs: echo "conan-key=pip-${{ matrix.os }}-$hash" >> $GITHUB_OUTPUT + - name: Configure Conan + run: conan remote update conancenter --url https://center2.conan.io + - name: Restore Conan cache id: cache-conan uses: actions/cache/restore@v4 diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 836a98e..8625761 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -228,7 +228,7 @@ jobs: fail-on-cache-miss: true - name: Build wheels - uses: pypa/cibuildwheel@v2.21 + uses: pypa/cibuildwheel@v2.22 with: only: ${{ matrix.wheel-config }} env: diff --git a/docs/api/cooler.rst b/docs/api/cooler.rst index f2dce02..05c2a7e 100644 --- a/docs/api/cooler.rst +++ b/docs/api/cooler.rst @@ -23,6 +23,7 @@ Cooler API .. automethod:: __init__ .. automethod:: add_pixels + .. automethod:: bins .. automethod:: chromosomes .. automethod:: finalize .. automethod:: path diff --git a/docs/api/generic.rst b/docs/api/generic.rst index 007d15f..dbc606c 100644 --- a/docs/api/generic.rst +++ b/docs/api/generic.rst @@ -19,7 +19,10 @@ Generic API .. automethod:: __init__ .. automethod:: __getitem__ + .. automethod:: attributes .. automethod:: chromosomes + .. automethod:: is_hic + .. automethod:: is_mcool .. automethod:: path .. automethod:: resolutions diff --git a/docs/api/hic.rst b/docs/api/hic.rst index 488c2a1..a22e277 100644 --- a/docs/api/hic.rst +++ b/docs/api/hic.rst @@ -12,6 +12,7 @@ Hi-C API .. automethod:: __init__ .. automethod:: add_pixels + .. automethod:: bins .. automethod:: chromosomes .. automethod:: finalize .. automethod:: path diff --git a/src/bin_table.cpp b/src/bin_table.cpp index ddb12a4..89bb78e 100644 --- a/src/bin_table.cpp +++ b/src/bin_table.cpp @@ -395,6 +395,8 @@ nb::object BinTable::to_df(std::optional range, std::vector starts(n); std::vector ends(n); + const auto chrom_id_offset = static_cast(_bins->chromosomes().at(0).is_all()); + std::visit( [&](const auto& bins) { const auto [first_bin, last_bin] = !range.has_value() @@ -403,7 +405,7 @@ nb::object BinTable::to_df(std::optional range, std::size_t i = 0; std::for_each(first_bin, last_bin, [&](const auto& bin) { bin_ids[i] = bin.id(); - chrom_ids[i] = static_cast(bin.chrom().id()); + chrom_ids[i] = static_cast(bin.chrom().id() - chrom_id_offset); starts[i] = bin.start(); ends[i] = bin.end(); ++i; @@ -411,8 +413,8 @@ nb::object BinTable::to_df(std::optional range, }, _bins->get()); - return make_bin_table_df(chrom_names(), std::move(chrom_ids), std::move(starts), std::move(ends), - std::move(bin_ids)); + return make_bin_table_df(chrom_names(false), std::move(chrom_ids), std::move(starts), + std::move(ends), std::move(bin_ids)); } std::shared_ptr BinTable::get() const noexcept { return _bins; } diff --git a/src/cooler_file_writer.cpp b/src/cooler_file_writer.cpp index f432f63..8d47091 100644 --- a/src/cooler_file_writer.cpp +++ b/src/cooler_file_writer.cpp @@ -12,8 +12,11 @@ #include #include #include +#include #include #include +#include +#include #include #include #include @@ -70,18 +73,27 @@ const hictk::Reference &CoolerFileWriter::chromosomes() const { return ref; } +std::shared_ptr CoolerFileWriter::bins_ptr() const noexcept { + if (!_w) { + return {}; + } + + return _w->bins_ptr(); +} + void CoolerFileWriter::add_pixels(const nb::object &df) { if (!_w.has_value()) { throw std::runtime_error( "caught attempt to add_pixels to a .cool file that has already been finalized!"); } - const auto coo_format = nb::cast(df.attr("columns").attr("__contains__")("bin1_id")); - const auto cell_id = fmt::to_string(_w->cells().size()); auto attrs = hictk::cooler::Attributes::init(_w->resolution()); attrs.assembly = _w->attributes().assembly; + auto lck = std::make_optional(); + const auto coo_format = nb::cast(df.attr("columns").attr("__contains__")("bin1_id")); + const auto dtype = df.attr("__getitem__")("count").attr("dtype"); const auto dtype_str = nb::cast(dtype.attr("__str__")()); const auto var = map_dtype_to_type(dtype_str); @@ -91,6 +103,7 @@ void CoolerFileWriter::add_pixels(const nb::object &df) { using N = remove_cvref_t; const auto pixels = coo_format ? coo_df_to_thin_pixels(df, true) : bg2_df_to_thin_pixels(_w->bins(), df, true); + lck.reset(); auto clr = _w->create_cell(cell_id, std::move(attrs), hictk::cooler::DEFAULT_HDF5_CACHE_SIZE * 4, 1); @@ -104,8 +117,8 @@ void CoolerFileWriter::add_pixels(const nb::object &df) { var); } -void CoolerFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str, - std::size_t chunk_size, std::size_t update_freq) { +hictk::File CoolerFileWriter::finalize(std::string_view log_lvl_str, std::size_t chunk_size, + std::size_t update_freq) { if (_finalized) { throw std::runtime_error( fmt::format(FMT_STRING("finalize() was already called on file \"{}\""), _path)); @@ -142,6 +155,8 @@ void CoolerFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str, _w.reset(); std::filesystem::remove(sclr_path); // NOLINT // NOLINTEND(*-unchecked-optional-access) + + return hictk::File{_path.string()}; } hictk::cooler::SingleCellFile CoolerFileWriter::create_file(std::string_view path, @@ -192,16 +207,20 @@ void CoolerFileWriter::bind(nb::module_ &m) { nb::arg("include_ALL") = false, "Get chromosomes sizes as a dictionary mapping names to sizes.", nb::rv_policy::take_ownership); + writer.def("bins", &get_bins_from_object, "Get table of bins.", + nb::sig("def bins(self) -> hictkpy.BinTable"), nb::rv_policy::move); writer.def("add_pixels", &hictkpy::CoolerFileWriter::add_pixels, + nb::call_guard(), nb::sig("def add_pixels(self, pixels: pandas.DataFrame)"), nb::arg("pixels"), "Add pixels from a pandas DataFrame containing pixels in COO or BG2 format (i.e. " "either with columns=[bin1_id, bin2_id, count] or with columns=[chrom1, start1, end1, " "chrom2, start2, end2, count]."); // NOLINTBEGIN(*-avoid-magic-numbers) - writer.def("finalize", &hictkpy::CoolerFileWriter::finalize, nb::arg("log_lvl") = "WARN", + writer.def("finalize", &hictkpy::CoolerFileWriter::finalize, + nb::call_guard(), nb::arg("log_lvl") = "WARN", nb::arg("chunk_size") = 500'000, nb::arg("update_frequency") = 10'000'000, - "Write interactions to file."); + "Write interactions to file.", nb::rv_policy::move); // NOLINTEND(*-avoid-magic-numbers) } } // namespace hictkpy diff --git a/src/file.cpp b/src/file.cpp index e199955..29ba846 100644 --- a/src/file.cpp +++ b/src/file.cpp @@ -84,8 +84,9 @@ bool is_cooler(const std::filesystem::path &uri) { bool is_hic(const std::filesystem::path &uri) { return hictk::hic::utils::is_hic_file(uri); } -static hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, +static hictkpy::PixelSelector fetch(const hictk::File &f, std::optional range1, + std::optional range2, + std::optional normalization, std::string_view count_type, bool join, std::string_view query_type) { if (count_type != "float" && count_type != "int") { @@ -96,15 +97,17 @@ static hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range throw std::runtime_error("query_type should be either UCSC or BED"); } - if (normalization != "NONE") { + const hictk::balancing::Method normalization_method{normalization.value_or("NONE")}; + + if (normalization_method != hictk::balancing::Method::NONE()) { count_type = "float"; } - if (range1.empty()) { - assert(range2.empty()); + if (!range1.has_value() || range1->empty()) { + assert(!range2.has_value() || range2->empty()); return std::visit( [&](const auto &ff) { - auto sel = ff.fetch(hictk::balancing::Method{normalization}); + auto sel = ff.fetch(normalization_method); using SelT = decltype(sel); return hictkpy::PixelSelector(std::make_shared(std::move(sel)), count_type, join); @@ -112,20 +115,21 @@ static hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range f.get()); } - if (range2.empty()) { + if (!range2.has_value() || range2->empty()) { range2 = range1; } const auto query_type_ = query_type == "UCSC" ? hictk::GenomicInterval::Type::UCSC : hictk::GenomicInterval::Type::BED; - const auto gi1 = hictk::GenomicInterval::parse(f.chromosomes(), std::string{range1}, query_type_); - const auto gi2 = hictk::GenomicInterval::parse(f.chromosomes(), std::string{range2}, query_type_); + const auto gi1 = + hictk::GenomicInterval::parse(f.chromosomes(), std::string{*range1}, query_type_); + const auto gi2 = + hictk::GenomicInterval::parse(f.chromosomes(), std::string{*range2}, query_type_); return std::visit( [&](const auto &ff) { - // Workaround bug fixed in https://github.com/paulsengroup/hictk/pull/158 - auto sel = ff.fetch(fmt::format(FMT_STRING("{}"), gi1), fmt::format(FMT_STRING("{}"), gi2), - hictk::balancing::Method(normalization)); + auto sel = ff.fetch(gi1.chrom().name(), gi1.start(), gi1.end(), gi2.chrom().name(), + gi2.start(), gi2.end(), normalization_method); using SelT = decltype(sel); return hictkpy::PixelSelector(std::make_shared(std::move(sel)), count_type, @@ -190,7 +194,7 @@ static nb::dict get_hic_attrs(const hictk::hic::File &hf) { py_attrs["bin_size"] = hf.resolution(); py_attrs["format"] = "HIC"; - py_attrs["format_version"] = hf.version(); + py_attrs["format-version"] = hf.version(); py_attrs["assembly"] = hf.assembly(); py_attrs["format-url"] = "https://github.com/aidenlab/hic-format"; py_attrs["nbins"] = hf.bins().size(); @@ -301,14 +305,15 @@ void declare_file_class(nb::module_ &m) { file.def("resolution", &hictk::File::resolution, "Get the bin size in bp."); file.def("nbins", &hictk::File::nbins, "Get the total number of bins."); - file.def("nchroms", &hictk::File::nchroms, "Get the total number of chromosomes."); + file.def("nchroms", &hictk::File::nchroms, nb::arg("include_ALL") = false, + "Get the total number of chromosomes."); file.def("attributes", &file::attributes, "Get file attributes as a dictionary.", nb::rv_policy::take_ownership); - file.def("fetch", &file::fetch, nb::keep_alive<0, 1>(), nb::arg("range1") = "", - nb::arg("range2") = "", nb::arg("normalization") = "NONE", nb::arg("count_type") = "int", - nb::arg("join") = false, nb::arg("query_type") = "UCSC", + file.def("fetch", &file::fetch, nb::keep_alive<0, 1>(), nb::arg("range1") = nb::none(), + nb::arg("range2") = nb::none(), nb::arg("normalization") = nb::none(), + nb::arg("count_type") = "int", nb::arg("join") = false, nb::arg("query_type") = "UCSC", "Fetch interactions overlapping a region of interest.", nb::rv_policy::move); file.def("avail_normalizations", &file::avail_normalizations, diff --git a/src/hic_file_writer.cpp b/src/hic_file_writer.cpp index 8815e9a..edad8a9 100644 --- a/src/hic_file_writer.cpp +++ b/src/hic_file_writer.cpp @@ -10,8 +10,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -73,7 +75,7 @@ HiCFileWriter::HiCFileWriter(const std::filesystem::path &path_, const hictkpy:: assembly, n_threads, chunk_size, tmpdir, compression_lvl, skip_all_vs_all_matrix) {} -void HiCFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str) { +hictk::File HiCFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str) { if (_finalized) { throw std::runtime_error( fmt::format(FMT_STRING("finalize() was already called on file \"{}\""), _w.path())); @@ -93,28 +95,45 @@ void HiCFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str) { } SPDLOG_INFO(FMT_STRING("successfully finalized \"{}\"!"), _w.path()); spdlog::default_logger()->set_level(previous_lvl); + + return hictk::File{std::string{_w.path()}, _w.resolutions().front()}; } std::filesystem::path HiCFileWriter::path() const noexcept { return std::filesystem::path{_w.path()}; } -const std::vector &HiCFileWriter::resolutions() const noexcept { - return _w.resolutions(); +auto HiCFileWriter::resolutions() const { + using WeightVector = nb::ndarray, nb::c_contig, std::uint32_t>; + + // NOLINTNEXTLINE + auto *resolutions_ptr = new std::vector(_w.resolutions()); + + auto capsule = nb::capsule(resolutions_ptr, [](void *vect_ptr) noexcept { + delete reinterpret_cast *>(vect_ptr); // NOLINT + }); + + return WeightVector{resolutions_ptr->data(), {resolutions_ptr->size()}, capsule}; } const hictk::Reference &HiCFileWriter::chromosomes() const { return _w.chromosomes(); } +hictkpy::BinTable HiCFileWriter::bins(std::uint32_t resolution) const { + return hictkpy::BinTable{_w.bins(resolution)}; +} + void HiCFileWriter::add_pixels(const nb::object &df) { if (_finalized) { throw std::runtime_error( "caught attempt to add_pixels to a .hic file that has already been finalized!"); } + auto lck = std::make_optional(); const auto coo_format = nb::cast(df.attr("columns").attr("__contains__")("bin1_id")); const auto pixels = coo_format ? coo_df_to_thin_pixels(df, false) : bg2_df_to_thin_pixels(_w.bins(_w.resolutions().front()), df, false); + lck.reset(); SPDLOG_INFO(FMT_STRING("adding {} pixels to file \"{}\"..."), pixels.size(), _w.path()); _w.add_pixels(_w.resolutions().front(), pixels.begin(), pixels.end()); } @@ -167,19 +186,23 @@ void HiCFileWriter::bind(nb::module_ &m) { writer.def("path", &hictkpy::HiCFileWriter::path, "Get the file path.", nb::rv_policy::move); writer.def("resolutions", &hictkpy::HiCFileWriter::resolutions, - "Get the list of resolutions in bp.", nb::rv_policy::move); + "Get the list of resolutions in bp.", nb::rv_policy::take_ownership); writer.def("chromosomes", &get_chromosomes_from_object, nb::arg("include_ALL") = false, "Get chromosomes sizes as a dictionary mapping names to sizes.", nb::rv_policy::take_ownership); + writer.def("bins", &hictkpy::HiCFileWriter::bins, "Get table of bins for the given resolution.", + nb::sig("def bins(self, resolution: int) -> hictkpy.BinTable"), nb::rv_policy::move); writer.def("add_pixels", &hictkpy::HiCFileWriter::add_pixels, + nb::call_guard(), nb::sig("def add_pixels(self, pixels: pd.DataFrame) -> None"), nb::arg("pixels"), "Add pixels from a pandas DataFrame containing pixels in COO or BG2 format (i.e. " "either with columns=[bin1_id, bin2_id, count] or with columns=[chrom1, start1, end1, " "chrom2, start2, end2, count]."); - writer.def("finalize", &hictkpy::HiCFileWriter::finalize, nb::arg("log_lvl") = "WARN", - "Write interactions to file."); + writer.def("finalize", &hictkpy::HiCFileWriter::finalize, + nb::call_guard(), nb::arg("log_lvl") = "WARN", + "Write interactions to file.", nb::rv_policy::move); } } // namespace hictkpy diff --git a/src/hictkpy.cpp b/src/hictkpy.cpp index afca421..ebb57b7 100644 --- a/src/hictkpy.cpp +++ b/src/hictkpy.cpp @@ -6,7 +6,6 @@ #include #include -#include #include "hictkpy/bin_table.hpp" #include "hictkpy/cooler_file_writer.hpp" diff --git a/src/include/hictkpy/cooler_file_writer.hpp b/src/include/hictkpy/cooler_file_writer.hpp index 0c5986d..ec90b4b 100644 --- a/src/include/hictkpy/cooler_file_writer.hpp +++ b/src/include/hictkpy/cooler_file_writer.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -39,10 +40,12 @@ class CoolerFileWriter { [[nodiscard]] std::uint32_t resolution() const noexcept; [[nodiscard]] const hictk::Reference& chromosomes() const; + [[nodiscard]] std::shared_ptr bins_ptr() const noexcept; void add_pixels(const nanobind::object& df); - void finalize(std::string_view log_lvl_str, std::size_t chunk_size, std::size_t update_frequency); + [[nodiscard]] hictk::File finalize(std::string_view log_lvl_str, std::size_t chunk_size, + std::size_t update_frequency); [[nodiscard]] std::string repr() const; static void bind(nanobind::module_& m); diff --git a/src/include/hictkpy/dynamic_1d_array.hpp b/src/include/hictkpy/dynamic_1d_array.hpp deleted file mode 100644 index e592e89..0000000 --- a/src/include/hictkpy/dynamic_1d_array.hpp +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (C) 2024 Roberto Rossini -// -// SPDX-License-Identifier: MIT - -#pragma once - -#include -#include -#include - -#include "hictkpy/nanobind.hpp" - -namespace hictkpy { - -template -struct Dynamic1DA { - private: - using BufferT = nanobind::ndarray, T>; - using VectorT = decltype(std::declval().view()); - nanobind::object _dtype{}; - nanobind::object _np_array{}; - BufferT _buff{}; - VectorT _vector{}; - - std::int64_t _size{}; - std::int64_t _capacity{}; - - public: - explicit Dynamic1DA(std::size_t size_ = 1000); - void push_back(T x); - void emplace_back(T &&x); - void resize(std::int64_t new_size); - void grow(); - void shrink_to_fit(); - [[nodiscard]] auto operator()() -> BufferT; - - private: - [[nodiscard]] static nanobind::object np(); -}; - -} // namespace hictkpy - -#include "./impl/dynamic_1d_array_impl.hpp" diff --git a/src/include/hictkpy/hic_file_writer.hpp b/src/include/hictkpy/hic_file_writer.hpp index 5eeb60a..1041b72 100644 --- a/src/include/hictkpy/hic_file_writer.hpp +++ b/src/include/hictkpy/hic_file_writer.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -40,13 +41,14 @@ class HiCFileWriter { bool skip_all_vs_all_matrix); [[nodiscard]] std::filesystem::path path() const noexcept; - [[nodiscard]] const std::vector& resolutions() const noexcept; + [[nodiscard]] auto resolutions() const; [[nodiscard]] const hictk::Reference& chromosomes() const; + [[nodiscard]] hictkpy::BinTable bins(std::uint32_t resolution) const; void add_pixels(const nanobind::object& df); - void finalize(std::string_view log_lvl_str); + [[nodiscard]] hictk::File finalize(std::string_view log_lvl_str); [[nodiscard]] std::string repr() const; diff --git a/src/include/hictkpy/impl/dynamic_1d_array_impl.hpp b/src/include/hictkpy/impl/dynamic_1d_array_impl.hpp deleted file mode 100644 index ea83905..0000000 --- a/src/include/hictkpy/impl/dynamic_1d_array_impl.hpp +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright (C) 2024 Roberto Rossini -// -// SPDX-License-Identifier: MIT - -#pragma once - -#include -#include -#include -#include - -#include "hictkpy/common.hpp" -#include "hictkpy/nanobind.hpp" - -namespace hictkpy { - -template -inline Dynamic1DA::Dynamic1DA(std::size_t size_) - : _dtype(np().attr("dtype")(map_type_to_dtype())), - _np_array( - np().attr("empty")(static_cast(size_), nanobind::arg("dtype") = _dtype)), - _buff(nanobind::cast(_np_array)), - _vector(_buff.view()), - _capacity(static_cast(size_)) {} - -template -inline void Dynamic1DA::push_back(T x) { - if (_capacity == _size) { - grow(); - } - _vector(_size++) = x; -} - -template -inline void Dynamic1DA::emplace_back(T &&x) { - if (_capacity == _size) { - grow(); - } - _vector(_size++) = std::move(x); -} - -template -inline void Dynamic1DA::resize(std::int64_t new_size) { - if (_capacity == new_size) { - return; - } - auto new_array = np().attr("empty")(new_size, nanobind::arg("dtype") = _dtype); - auto new_buff = nanobind::cast(new_array); - auto new_vector = new_buff.view(); - - _capacity = new_size; - _size = std::min(_capacity, _size); - std::copy(_vector.data(), _vector.data() + static_cast(_size), new_vector.data()); - - std::swap(new_array, _np_array); - std::swap(new_buff, _buff); - std::swap(new_vector, _vector); -} - -template -inline void Dynamic1DA::grow() { - resize(static_cast(_buff.size() * 2)); -} - -template -inline void Dynamic1DA::shrink_to_fit() { - resize(_size); -} - -template -[[nodiscard]] auto Dynamic1DA::operator()() -> BufferT { - shrink_to_fit(); - return _buff; -} - -template -[[nodiscard]] nanobind::object Dynamic1DA::np() { - return nanobind::module_::import_("numpy"); -} - -} // namespace hictkpy diff --git a/src/include/hictkpy/logger.hpp b/src/include/hictkpy/logger.hpp index 619cc02..11143d7 100644 --- a/src/include/hictkpy/logger.hpp +++ b/src/include/hictkpy/logger.hpp @@ -9,24 +9,20 @@ #include #include -#include "hictkpy/nanobind.hpp" - namespace hictkpy { class Logger { - nanobind::object _py_logger{}; std::shared_ptr _logger{}; public: - explicit Logger(spdlog::level::level_enum level_ = spdlog::level::warn); - explicit Logger(std::string_view level_); + explicit Logger(spdlog::level::level_enum level = spdlog::level::warn); + explicit Logger(std::string_view level); [[nodiscard]] std::shared_ptr get_logger(); private: - [[nodiscard]] static nanobind::object init_py_logger(); [[nodiscard]] static std::shared_ptr init_cpp_logger( - spdlog::level::level_enum level_, nanobind::object py_logger); + spdlog::level::level_enum level); }; } // namespace hictkpy diff --git a/src/include/hictkpy/pixel_selector.hpp b/src/include/hictkpy/pixel_selector.hpp index a1c3c33..59cffdd 100644 --- a/src/include/hictkpy/pixel_selector.hpp +++ b/src/include/hictkpy/pixel_selector.hpp @@ -49,11 +49,10 @@ struct PixelSelector { [[nodiscard]] std::string repr() const; - using PixelCoordTuple = - std::tuple; + using GenomicCoordTuple = std::tuple; - [[nodiscard]] auto get_coord1() const -> PixelCoordTuple; - [[nodiscard]] auto get_coord2() const -> PixelCoordTuple; + [[nodiscard]] auto get_coord1() const -> GenomicCoordTuple; + [[nodiscard]] auto get_coord2() const -> GenomicCoordTuple; [[nodiscard]] nanobind::iterator make_iterable() const; [[nodiscard]] nanobind::object to_arrow(std::string_view span) const; diff --git a/src/logger.cpp b/src/logger.cpp index 6829c53..e663074 100644 --- a/src/logger.cpp +++ b/src/logger.cpp @@ -7,6 +7,11 @@ #include #include +#include +#include +#include +#include + #include "hictkpy/common.hpp" #include "hictkpy/nanobind.hpp" @@ -38,12 +43,11 @@ namespace hictkpy { // NOLINTEND(*-avoid-magic-numbers) } -Logger::Logger(spdlog::level::level_enum level_) - : _py_logger(init_py_logger()), _logger(init_cpp_logger(level_, _py_logger)) {} +Logger::Logger(spdlog::level::level_enum level) : _logger(init_cpp_logger(level)) {} -Logger::Logger(std::string_view level_) : Logger(spdlog::level::from_str(std::string{level_})) {} +Logger::Logger(std::string_view level) : Logger(spdlog::level::from_str(std::string{level})) {} -nb::object Logger::init_py_logger() { +[[nodiscard]] static nb::object get_py_logger() { const auto logging = nb::module_::import_("logging"); return logging.attr("getLogger")("hictkpy"); } @@ -51,13 +55,13 @@ nb::object Logger::init_py_logger() { std::shared_ptr Logger::get_logger() { return _logger; } std::shared_ptr Logger::init_cpp_logger( - [[maybe_unused]] spdlog::level::level_enum level_, [[maybe_unused]] nb::object py_logger) { + [[maybe_unused]] spdlog::level::level_enum level_) { #ifndef _WIN32 auto sink = std::make_shared( - // NOLINTNEXTLINE(*-unnecessary-value-param) - [logger = py_logger](const spdlog::details::log_msg& msg) { - logger.attr("log")(to_py_lvl(msg.level), - std::string_view{msg.payload.data(), msg.payload.size()}); + [logger = get_py_logger()](const spdlog::details::log_msg& msg) mutable { + [[maybe_unused]] const nb::gil_scoped_acquire gil{}; + auto msg_py = nb::str(msg.payload.data(), msg.payload.size()); + logger.attr("log")(to_py_lvl(msg.level), msg_py); }); sink->set_pattern("%v"); diff --git a/src/multires_file.cpp b/src/multires_file.cpp index ec17cab..f8b7121 100644 --- a/src/multires_file.cpp +++ b/src/multires_file.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include "hictkpy/nanobind.hpp" #include "hictkpy/reference.hpp" @@ -27,6 +28,56 @@ static std::string repr(const hictk::MultiResFile& mrf) { static std::filesystem::path get_path(const hictk::MultiResFile& mrf) { return mrf.path(); } +[[nodiscard]] static auto get_resolutions(const hictk::MultiResFile& f) { + using WeightVector = nb::ndarray, nb::c_contig, std::uint32_t>; + + // NOLINTNEXTLINE + auto* resolutions_ptr = new std::vector(f.resolutions()); + + auto capsule = nb::capsule(resolutions_ptr, [](void* vect_ptr) noexcept { + delete reinterpret_cast*>(vect_ptr); // NOLINT + }); + + return WeightVector{resolutions_ptr->data(), {resolutions_ptr->size()}, capsule}; +} + +static nb::dict get_attrs(const hictk::hic::File& hf) { + nb::dict py_attrs; + + py_attrs["format"] = "HIC"; + py_attrs["format-version"] = hf.version(); + py_attrs["assembly"] = hf.assembly(); + py_attrs["format-url"] = "https://github.com/aidenlab/hic-format"; + py_attrs["nchroms"] = hf.nchroms(); + + for (const auto& [k, v] : hf.attributes()) { + py_attrs[nb::cast(k)] = v; + } + + return py_attrs; +} + +static nb::dict get_attrs(const hictk::cooler::MultiResFile& mclr) { + nb::dict py_attrs; + + py_attrs["format"] = mclr.attributes().format; + py_attrs["format-version"] = mclr.attributes().format_version; + py_attrs["format-url"] = "https://github.com/open2c/cooler"; + py_attrs["assembly"] = + mclr.open(mclr.resolutions().front()).attributes().assembly.value_or("unknown"); + py_attrs["nchroms"] = mclr.chromosomes().size(); + + return py_attrs; +} + +static nb::dict attributes(const hictk::MultiResFile& f) { + auto attrs = f.is_hic() ? get_attrs(f.open(f.resolutions().front()).get()) + : get_attrs(hictk::cooler::MultiResFile{f.path()}); + attrs["resolutions"] = get_resolutions(f); + + return attrs; +} + bool is_mcool_file(const std::filesystem::path& path) { return bool(hictk::cooler::utils::is_multires_file(path.string())); } @@ -40,12 +91,17 @@ void declare_multires_file_class(nb::module_& m) { mres_file.def("__repr__", &multires_file::repr, nb::rv_policy::move); mres_file.def("path", &multires_file::get_path, "Get the file path.", nb::rv_policy::move); + mres_file.def("is_mcool", &hictk::MultiResFile::is_mcool, + "Test whether the file is in .mcool format."); + mres_file.def("is_hic", &hictk::MultiResFile::is_hic, "Test whether the file is in .hic format."); mres_file.def("chromosomes", &get_chromosomes_from_object, nb::arg("include_ALL") = false, "Get chromosomes sizes as a dictionary mapping names to sizes.", nb::rv_policy::take_ownership); - mres_file.def("resolutions", &hictk::MultiResFile::resolutions, - "Get the list of available resolutions.", nb::rv_policy::copy); + mres_file.def("resolutions", &get_resolutions, "Get the list of available resolutions.", + nb::rv_policy::take_ownership); + mres_file.def("attributes", &multires_file::attributes, "Get file attributes as a dictionary.", + nb::rv_policy::take_ownership); mres_file.def("__getitem__", &hictk::MultiResFile::open, "Open the Cooler or .hic file corresponding to the resolution given as input.", nb::rv_policy::move); diff --git a/src/pixel_selector.cpp b/src/pixel_selector.cpp index 1238876..ce4a7ef 100644 --- a/src/pixel_selector.cpp +++ b/src/pixel_selector.cpp @@ -71,7 +71,9 @@ std::string PixelSelector::repr() const { count_type_to_str(pixel_count)); } - return fmt::format(FMT_STRING("PixelSelector({}, {}; {}; {})"), coord1(), coord2(), + return fmt::format(FMT_STRING("PixelSelector({}:{}-{}; {}:{}-{}; {}; {})"), + coord1().bin1.chrom().name(), coord1().bin1.start(), coord1().bin2.end(), + coord2().bin1.chrom().name(), coord2().bin1.start(), coord2().bin2.end(), pixel_format == PixelFormat::COO ? "COO" : "BG2", count_type_to_str(pixel_count)); } @@ -110,16 +112,24 @@ const hictk::BinTable& PixelSelector::bins() const noexcept { return std::visit([](const auto& s) -> const hictk::BinTable& { return s->bins(); }, selector); } -auto PixelSelector::get_coord1() const -> PixelCoordTuple { - const auto c = coord1(); - return PixelCoordTuple{std::make_tuple(c.bin1.chrom().name(), c.bin1.start(), c.bin1.end(), - c.bin2.chrom().name(), c.bin2.start(), c.bin2.end())}; +[[nodiscard]] static PixelSelector::GenomicCoordTuple coords_to_tuple( + const hictk::PixelCoordinates& coords, const hictk::BinTable& bins) { + if (!coords) { + return {"ALL", 0, static_cast(bins.size())}; + } + + assert(coords.bin1.chrom() == coords.bin2.chrom()); + + return {std::string{coords.bin1.chrom().name()}, static_cast(coords.bin1.start()), + static_cast(coords.bin2.end())}; +} + +auto PixelSelector::get_coord1() const -> GenomicCoordTuple { + return coords_to_tuple(coord1(), bins()); } -auto PixelSelector::get_coord2() const -> PixelCoordTuple { - const auto c = coord2(); - return PixelCoordTuple{std::make_tuple(c.bin1.chrom().name(), c.bin1.start(), c.bin1.end(), - c.bin2.chrom().name(), c.bin2.start(), c.bin2.end())}; +auto PixelSelector::get_coord2() const -> GenomicCoordTuple { + return coords_to_tuple(coord2(), bins()); } template diff --git a/test/test_bin_table.py b/test/test_bin_table.py index 5e35ce3..3c7d3b9 100644 --- a/test/test_bin_table.py +++ b/test/test_bin_table.py @@ -61,17 +61,16 @@ def test_getters(self): bins.get_id("abc", 100) @pytest.mark.skipif( - not numpy_avail() or not pandas_avail() or not pyarrow_avail(), - reason="numpy, pandas, or pyarrow are not available", + not pandas_avail() or not pyarrow_avail(), + reason="pandas or pyarrow are not available", ) def test_vectorized_getters(self): - import numpy as np chroms = {"chr1": 1000, "chr2": 500} bins = hictkpy.BinTable(chroms, 100) - assert len(bins.get(np.array([1, 1]))) == 2 - assert len(bins.get_ids(np.array(["chr1", "chr1"]), np.array([1, 1]))) == 2 + assert len(bins.get([1, 1])) == 2 + assert len(bins.get_ids(["chr1", "chr1"], [1, 1])) == 2 @pytest.mark.skipif(not pandas_avail() or not pyarrow_avail(), reason="pandas is not available") def test_merge(self): diff --git a/test/test_file_accessors.py b/test/test_file_accessors.py index 923ea16..ee621ce 100644 --- a/test/test_file_accessors.py +++ b/test/test_file_accessors.py @@ -27,7 +27,7 @@ class TestClass: def test_attributes(self, file, resolution): f = hictkpy.File(file, resolution) assert f.resolution() == 100_000 - # assert f.nchroms() == 8 # TODO enable after merging https://github.com/paulsengroup/hictk/pull/294 + assert f.nchroms() == 8 assert f.nbins() == 1380 assert "chr2L" in f.chromosomes() diff --git a/test/test_file_creation_cool.py b/test/test_file_creation_cool.py index 5d2ebec..1685326 100644 --- a/test/test_file_creation_cool.py +++ b/test/test_file_creation_cool.py @@ -31,6 +31,21 @@ def setup_method(): logging.basicConfig(level="INFO", force=True) logging.getLogger().setLevel("INFO") + def test_accessors(self, file, resolution, tmpdir): + bins = hictkpy.File(file, resolution).bins() + + path = tmpdir / "test.cool" + w = hictkpy.cooler.FileWriter(path, bins) + + assert str(w).startswith("CoolFileWriter(") + assert w.path() == path + if resolution is None: + assert w.resolution() == 0 + else: + assert w.resolution() == resolution + assert w.chromosomes() == bins.chromosomes() + assert len(w.bins().to_df().compare(bins.to_df())) == 0 + def test_file_creation_thin_pixel(self, file, resolution, tmpdir): f = hictkpy.File(file, resolution) if f.bins().type() != "fixed": @@ -39,7 +54,7 @@ def test_file_creation_thin_pixel(self, file, resolution, tmpdir): df = f.fetch(join=False).to_df() expected_sum = df["count"].sum() - path = tmpdir / "test1.cool" + path = tmpdir / "test.cool" w = hictkpy.cooler.FileWriter(path, f.chromosomes(), f.resolution()) chunk_size = 1000 @@ -47,7 +62,7 @@ def test_file_creation_thin_pixel(self, file, resolution, tmpdir): end = start + chunk_size w.add_pixels(df[start:end]) - w.finalize("info", 100_000, 100_000) + f = w.finalize("info", 100_000, 100_000) with pytest.raises(Exception): w.add_pixels(df) with pytest.raises(Exception): @@ -56,7 +71,6 @@ def test_file_creation_thin_pixel(self, file, resolution, tmpdir): del w gc.collect() - f = hictkpy.File(path, resolution) assert f.fetch().sum() == expected_sum def test_file_creation(self, file, resolution, tmpdir): @@ -67,7 +81,7 @@ def test_file_creation(self, file, resolution, tmpdir): df = f.fetch(join=True).to_df() expected_sum = df["count"].sum() - path = tmpdir / "test2.cool" + path = tmpdir / "test.cool" w = hictkpy.cooler.FileWriter(path, f.chromosomes(), f.resolution()) chunk_size = 1000 @@ -75,7 +89,7 @@ def test_file_creation(self, file, resolution, tmpdir): end = start + chunk_size w.add_pixels(df[start:end]) - w.finalize("info", 100_000, 100_000) + f = w.finalize("info", 100_000, 100_000) with pytest.raises(Exception): w.add_pixels(df) with pytest.raises(Exception): @@ -84,7 +98,6 @@ def test_file_creation(self, file, resolution, tmpdir): del w gc.collect() - f = hictkpy.File(path, resolution) assert f.fetch().sum() == expected_sum def test_file_creation_bin_table(self, file, resolution, tmpdir): @@ -93,7 +106,7 @@ def test_file_creation_bin_table(self, file, resolution, tmpdir): df = f.fetch(join=True).to_df() expected_sum = df["count"].sum() - path = tmpdir / "test2.cool" + path = tmpdir / "test.cool" w = hictkpy.cooler.FileWriter(path, f.bins()) chunk_size = 1000 @@ -101,7 +114,7 @@ def test_file_creation_bin_table(self, file, resolution, tmpdir): end = start + chunk_size w.add_pixels(df[start:end]) - w.finalize("info", 100_000, 100_000) + f = w.finalize("info", 100_000, 100_000) with pytest.raises(Exception): w.add_pixels(df) with pytest.raises(Exception): @@ -110,7 +123,6 @@ def test_file_creation_bin_table(self, file, resolution, tmpdir): del w gc.collect() - f = hictkpy.File(path, resolution) assert f.fetch().sum() == expected_sum def test_file_creation_float_counts(self, file, resolution, tmpdir): @@ -122,7 +134,7 @@ def test_file_creation_float_counts(self, file, resolution, tmpdir): df["count"] += 0.12345 expected_sum = df["count"].sum() - path = tmpdir / "test3.cool" + path = tmpdir / "test.cool" w = hictkpy.cooler.FileWriter(path, f.chromosomes(), f.resolution()) chunk_size = 1000 @@ -130,7 +142,7 @@ def test_file_creation_float_counts(self, file, resolution, tmpdir): end = start + chunk_size w.add_pixels(df[start:end]) - w.finalize("info", 100_000, 100_000) + f = w.finalize("info", 100_000, 100_000) with pytest.raises(Exception): w.add_pixels(df) with pytest.raises(Exception): @@ -139,5 +151,4 @@ def test_file_creation_float_counts(self, file, resolution, tmpdir): del w gc.collect() - f = hictkpy.File(path, resolution) assert pytest.approx(f.fetch(count_type="float").sum()) == expected_sum diff --git a/test/test_file_creation_hic.py b/test/test_file_creation_hic.py index 194caca..347ef20 100644 --- a/test/test_file_creation_hic.py +++ b/test/test_file_creation_hic.py @@ -31,6 +31,20 @@ def setup_method(): logging.basicConfig(level="INFO", force=True) logging.getLogger().setLevel("INFO") + def test_accessors(self, file, resolution, tmpdir): + bins = hictkpy.File(file, resolution).bins() + if bins.type() != "fixed": + pytest.skip(f'BinTable of file "{file}" does not have fixed bins.') + + path = tmpdir / "test.hic" + w = hictkpy.hic.FileWriter(path, bins) + + assert str(w).startswith("HiCFileWriter(") + assert w.path() == path + assert w.resolutions() == [resolution] + assert w.chromosomes() == bins.chromosomes() + assert len(w.bins(resolution).to_df().compare(bins.to_df())) == 0 + def test_file_creation_thin_pixel(self, file, resolution, tmpdir): f = hictkpy.File(file, resolution) if f.bins().type() != "fixed": @@ -39,7 +53,7 @@ def test_file_creation_thin_pixel(self, file, resolution, tmpdir): df = f.fetch(join=False).to_df() expected_sum = df["count"].sum() - path = tmpdir / "test1.hic" + path = tmpdir / "test.hic" w = hictkpy.hic.FileWriter(path, f.chromosomes(), f.resolution()) chunk_size = 1000 @@ -47,7 +61,7 @@ def test_file_creation_thin_pixel(self, file, resolution, tmpdir): end = start + chunk_size w.add_pixels(df[start:end]) - w.finalize() + f = w.finalize() with pytest.raises(Exception): w.add_pixels(df) with pytest.raises(Exception): @@ -56,7 +70,6 @@ def test_file_creation_thin_pixel(self, file, resolution, tmpdir): del w gc.collect() - f = hictkpy.File(path, resolution) assert f.fetch().sum() == expected_sum def test_file_creation(self, file, resolution, tmpdir): @@ -67,7 +80,7 @@ def test_file_creation(self, file, resolution, tmpdir): df = f.fetch(join=True).to_df() expected_sum = df["count"].sum() - path = tmpdir / "test2.hic" + path = tmpdir / "test.hic" w = hictkpy.hic.FileWriter(path, f.chromosomes(), f.resolution()) chunk_size = 1000 @@ -75,7 +88,7 @@ def test_file_creation(self, file, resolution, tmpdir): end = start + chunk_size w.add_pixels(df[start:end]) - w.finalize() + f = w.finalize() with pytest.raises(Exception): w.add_pixels(df) with pytest.raises(Exception): @@ -84,7 +97,6 @@ def test_file_creation(self, file, resolution, tmpdir): del w gc.collect() - f = hictkpy.File(path, resolution) assert f.fetch().sum() == expected_sum def test_file_creation_bin_table(self, file, resolution, tmpdir): @@ -93,7 +105,7 @@ def test_file_creation_bin_table(self, file, resolution, tmpdir): df = f.fetch(join=True).to_df() expected_sum = df["count"].sum() - path = tmpdir / "test2.hic" + path = tmpdir / "test.hic" if f.bins().type() != "fixed": with pytest.raises(Exception): hictkpy.hic.FileWriter(path, f.bins()) @@ -106,7 +118,7 @@ def test_file_creation_bin_table(self, file, resolution, tmpdir): end = start + chunk_size w.add_pixels(df[start:end]) - w.finalize() + f = w.finalize() with pytest.raises(Exception): w.add_pixels(df) with pytest.raises(Exception): @@ -115,5 +127,4 @@ def test_file_creation_bin_table(self, file, resolution, tmpdir): del w gc.collect() - f = hictkpy.File(path, resolution) assert f.fetch().sum() == expected_sum diff --git a/test/test_file_validators.py b/test/test_file_validators.py index a1ad54e..aa6a091 100644 --- a/test/test_file_validators.py +++ b/test/test_file_validators.py @@ -4,8 +4,6 @@ import pathlib -import pytest - import hictkpy testdir = pathlib.Path(__file__).resolve().parent @@ -17,7 +15,7 @@ class TestClass: - def test_validators(self): + def test_valid_formats(self): assert hictkpy.is_cooler(cool_file) assert not hictkpy.is_cooler(hic_file) @@ -26,3 +24,31 @@ def test_validators(self): assert hictkpy.is_scool_file(scool_file) assert not hictkpy.is_scool_file(cool_file) + + assert hictkpy.is_hic(hic_file) + assert not hictkpy.is_hic(cool_file) + + def test_invalid_formats(self): + path = pathlib.Path(__file__).resolve() + + assert not hictkpy.is_cooler(path) + assert not hictkpy.is_mcool_file(path) + assert not hictkpy.is_scool_file(path) + assert not hictkpy.is_hic(path) + + def test_invalid_files(self): + non_existing_file = testdir / "foobar.123" + assert not non_existing_file.exists() + + assert not hictkpy.is_cooler(non_existing_file) + assert not hictkpy.is_mcool_file(non_existing_file) + assert not hictkpy.is_scool_file(non_existing_file) + assert not hictkpy.is_hic(non_existing_file) + + folder = testdir + assert folder.is_dir() + + assert not hictkpy.is_cooler(folder) + assert not hictkpy.is_mcool_file(folder) + assert not hictkpy.is_scool_file(folder) + assert not hictkpy.is_hic(folder) diff --git a/test/test_multires_file_accessors.py b/test/test_multires_file_accessors.py index 707fe7f..4e1e1c5 100644 --- a/test/test_multires_file_accessors.py +++ b/test/test_multires_file_accessors.py @@ -11,19 +11,39 @@ testdir = pathlib.Path(__file__).resolve().parent pytestmark = pytest.mark.parametrize( - "file", + "file,format", [ - testdir / "data" / "cooler_test_file.mcool", + (testdir / "data" / "cooler_test_file.mcool", "mcool"), + (testdir / "data" / "hic_test_file.hic", "hic"), ], ) class TestClass: - def test_attributes(self, file): + def test_accessors(self, file, format): f = hictkpy.MultiResFile(file) + assert str(f).startswith("MultiResFile(") + assert f.path() == file - assert f.resolutions() == [100_000, 1_000_000] + assert f.is_mcool() == (format == "mcool") + assert f.is_hic() == (format == "hic") assert len(f.chromosomes()) == 8 + if f.is_hic(): + resolutions = [100_000] + assert (f.resolutions() == resolutions).all() + assert f.attributes()["format"] == "HIC" + assert f.attributes()["format-version"] == 9 + assert (f.attributes()["resolutions"] == resolutions).all() + else: + resolutions = [100_000, 1_000_000] + assert (f.resolutions() == resolutions).all() + assert f.attributes()["format"] == "HDF5::MCOOL" + assert f.attributes()["format-version"] == 2 + assert (f.attributes()["resolutions"] == resolutions).all() + assert f[100_000].resolution() == 100_000 + + with pytest.raises(Exception): + f[1234] # noqa diff --git a/test/test_pixel_selector_accessors.py b/test/test_pixel_selector_accessors.py new file mode 100644 index 0000000..ada56bb --- /dev/null +++ b/test/test_pixel_selector_accessors.py @@ -0,0 +1,54 @@ +# Copyright (C) 2023 Roberto Rossini +# +# SPDX-License-Identifier: MIT + +import pathlib + +import pytest + +import hictkpy + +from .helpers import numpy_avail + +testdir = pathlib.Path(__file__).resolve().parent + +pytestmark = pytest.mark.parametrize( + "file,resolution", + [ + (testdir / "data" / "cooler_test_file.mcool", 100_000), + (testdir / "data" / "hic_test_file.hic", 100_000), + ], +) + + +@pytest.mark.skipif(not numpy_avail(), reason="numpy is not available") +class TestClass: + def test_repr(self, file, resolution): + f = hictkpy.File(file, resolution) + + sel = f.fetch() + assert str(sel) == "PixelSelector(ALL; COO; int32)" + + sel = f.fetch(join=True) + assert str(sel) == "PixelSelector(ALL; BG2; int32)" + + sel = f.fetch(count_type="float") + assert str(sel) == "PixelSelector(ALL; COO; float64)" + + sel = f.fetch("chr2L:0-10,000,000", "chr2L:5,000,000-20,000,000") + assert str(sel) == "PixelSelector(chr2L:0-10000000; chr2L:5000000-20000000; COO; int32)" + + def test_coords(self, file, resolution): + f = hictkpy.File(file, resolution) + + sel = f.fetch() + assert sel.coord1() == ("ALL", 0, len(f.bins())) + assert sel.coord1() == sel.coord2() + + sel = f.fetch("chr2L:0-10,000,000") + assert sel.coord1() == ("chr2L", 0, 10_000_000) + assert sel.coord1() == sel.coord2() + + sel = f.fetch("chr2L:0-10,000,000", "chr2L:5,000,000-20,000,000") + assert sel.coord1() == ("chr2L", 0, 10_000_000) + assert sel.coord2() == ("chr2L", 5_000_000, 20_000_000) diff --git a/test/test_singlecell_file_accessors.py b/test/test_singlecell_file_accessors.py index f48f1c4..aa7a1d9 100644 --- a/test/test_singlecell_file_accessors.py +++ b/test/test_singlecell_file_accessors.py @@ -19,13 +19,19 @@ class TestClass: - def test_attributes(self, file): + def test_accessors(self, file): f = hictkpy.cooler.SingleCellFile(file) + assert str(f).startswith("SingleCellFile(") + assert f.path() == file assert f.resolution() == 100_000 assert len(f.chromosomes()) == 20 + assert len(f.bins()) == 26398 assert len(f.cells()) == 5 assert f.attributes()["format"] == "HDF5::SCOOL" assert f["GSM2687248_41669_ACAGTG-R1-DpnII.100000.cool"].resolution() == 100_000 + + with pytest.raises(Exception): + f["ABC"] # noqa