Skip to content

Commit

Permalink
Merge branch 'main' into feat/stats
Browse files Browse the repository at this point in the history
  • Loading branch information
robomics authored Nov 28, 2024
2 parents e6b0c46 + 51f5f0b commit 7f7e230
Show file tree
Hide file tree
Showing 29 changed files with 390 additions and 229 deletions.
30 changes: 29 additions & 1 deletion .github/workflows/build-conan-deps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,30 @@ jobs:
path: ${{ env.CONAN_HOME }}/p
lookup-only: true

- name: Configure Conan
if: steps.cache-conan.outputs.cache-hit != 'true'
run: |
cat << 'EOF' | tee script.sh > /dev/null
#!usr/bin/env bash
set -u
set -e
conan_version="$1"
PATH="/opt/python/cp312-cp312/bin:$PATH"
pip install "conan==$conan_version"
conan remote update conancenter --url https://center2.conan.io
EOF
chmod 755 script.sh
docker run \
-e "CONAN_HOME=$CONAN_HOME" \
-v "$PWD/script.sh:/tmp/script.sh:ro" \
-v "$CONAN_HOME:$CONAN_HOME" \
"$IMAGE" /tmp/script.sh '${{ inputs.conan-version }}'
- name: Clean Conan cache (pre-build)
if: steps.cache-conan.outputs.cache-hit != 'true'
run: |
Expand Down Expand Up @@ -269,7 +293,9 @@ jobs:

- name: Configure Conan
if: steps.cache-conan.outputs.cache-hit != 'true'
run: conan profile detect --force
run: |
conan profile detect --force
conan remote update conancenter --url https://center2.conan.io
- name: Clean Conan cache (pre-build)
if: steps.cache-conan.outputs.cache-hit != 'true'
Expand Down Expand Up @@ -380,6 +406,8 @@ jobs:
sed -i 's/compiler\.cppstd=.*/compiler.cppstd=${{ inputs.cppstd }}/' "$conan_profile"
conan remote update conancenter --url https://center2.conan.io
- name: Clean Conan cache (pre-build)
if: steps.cache-conan.outputs.cache-hit != 'true'
run: |
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/fuzzy-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,10 @@ jobs:
key: conan-${{ steps.cache-key.outputs.key }}
path: ${{ env.CONAN_HOME }}/p

- name: Configure Conan
if: steps.cache-conan.outputs.cache-hit != 'true'
run: conan remote update conancenter --url https://center2.conan.io

- name: Clean Conan cache (pre-build)
if: steps.cache-conan.outputs.cache-hit != 'true'
run: |
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/pip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
python-version: ${{ matrix.python-version }}

- name: Add requirements
run: python -m pip install --upgrade wheel setuptools
run: python -m pip install --upgrade conan wheel setuptools

- name: Generate cache key
id: cache-key
Expand All @@ -69,6 +69,9 @@ jobs:
echo "conan-key=pip-${{ matrix.os }}-$hash" >> $GITHUB_OUTPUT
- name: Configure Conan
run: conan remote update conancenter --url https://center2.conan.io

- name: Restore Conan cache
id: cache-conan
uses: actions/cache/restore@v4
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ jobs:
fail-on-cache-miss: true

- name: Build wheels
uses: pypa/cibuildwheel@v2.21
uses: pypa/cibuildwheel@v2.22
with:
only: ${{ matrix.wheel-config }}
env:
Expand Down
1 change: 1 addition & 0 deletions docs/api/cooler.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Cooler API

.. automethod:: __init__
.. automethod:: add_pixels
.. automethod:: bins
.. automethod:: chromosomes
.. automethod:: finalize
.. automethod:: path
Expand Down
3 changes: 3 additions & 0 deletions docs/api/generic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ Generic API

.. automethod:: __init__
.. automethod:: __getitem__
.. automethod:: attributes
.. automethod:: chromosomes
.. automethod:: is_hic
.. automethod:: is_mcool
.. automethod:: path
.. automethod:: resolutions

Expand Down
1 change: 1 addition & 0 deletions docs/api/hic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Hi-C API

.. automethod:: __init__
.. automethod:: add_pixels
.. automethod:: bins
.. automethod:: chromosomes
.. automethod:: finalize
.. automethod:: path
Expand Down
8 changes: 5 additions & 3 deletions src/bin_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,8 @@ nb::object BinTable::to_df(std::optional<std::string_view> range,
std::vector<std::uint32_t> starts(n);
std::vector<std::uint32_t> ends(n);

const auto chrom_id_offset = static_cast<std::uint32_t>(_bins->chromosomes().at(0).is_all());

std::visit(
[&](const auto& bins) {
const auto [first_bin, last_bin] = !range.has_value()
Expand All @@ -403,16 +405,16 @@ nb::object BinTable::to_df(std::optional<std::string_view> range,
std::size_t i = 0;
std::for_each(first_bin, last_bin, [&](const auto& bin) {
bin_ids[i] = bin.id();
chrom_ids[i] = static_cast<std::int32_t>(bin.chrom().id());
chrom_ids[i] = static_cast<std::int32_t>(bin.chrom().id() - chrom_id_offset);
starts[i] = bin.start();
ends[i] = bin.end();
++i;
});
},
_bins->get());

return make_bin_table_df(chrom_names(), std::move(chrom_ids), std::move(starts), std::move(ends),
std::move(bin_ids));
return make_bin_table_df(chrom_names(false), std::move(chrom_ids), std::move(starts),
std::move(ends), std::move(bin_ids));
}

std::shared_ptr<const hictk::BinTable> BinTable::get() const noexcept { return _bins; }
Expand Down
31 changes: 25 additions & 6 deletions src/cooler_file_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@
#include <cstdint>
#include <filesystem>
#include <hictk/cooler/cooler.hpp>
#include <hictk/file.hpp>
#include <hictk/reference.hpp>
#include <hictk/tmpdir.hpp>
#include <hictk/type_traits.hpp>
#include <optional>
#include <stdexcept>
#include <string>
#include <string_view>
Expand Down Expand Up @@ -70,18 +73,27 @@ const hictk::Reference &CoolerFileWriter::chromosomes() const {
return ref;
}

std::shared_ptr<const hictk::BinTable> CoolerFileWriter::bins_ptr() const noexcept {
if (!_w) {
return {};
}

return _w->bins_ptr();
}

void CoolerFileWriter::add_pixels(const nb::object &df) {
if (!_w.has_value()) {
throw std::runtime_error(
"caught attempt to add_pixels to a .cool file that has already been finalized!");
}

const auto coo_format = nb::cast<bool>(df.attr("columns").attr("__contains__")("bin1_id"));

const auto cell_id = fmt::to_string(_w->cells().size());
auto attrs = hictk::cooler::Attributes::init(_w->resolution());
attrs.assembly = _w->attributes().assembly;

auto lck = std::make_optional<nb::gil_scoped_acquire>();
const auto coo_format = nb::cast<bool>(df.attr("columns").attr("__contains__")("bin1_id"));

const auto dtype = df.attr("__getitem__")("count").attr("dtype");
const auto dtype_str = nb::cast<std::string>(dtype.attr("__str__")());
const auto var = map_dtype_to_type(dtype_str);
Expand All @@ -91,6 +103,7 @@ void CoolerFileWriter::add_pixels(const nb::object &df) {
using N = remove_cvref_t<decltype(n)>;
const auto pixels = coo_format ? coo_df_to_thin_pixels<N>(df, true)
: bg2_df_to_thin_pixels<N>(_w->bins(), df, true);
lck.reset();

auto clr = _w->create_cell<N>(cell_id, std::move(attrs),
hictk::cooler::DEFAULT_HDF5_CACHE_SIZE * 4, 1);
Expand All @@ -104,8 +117,8 @@ void CoolerFileWriter::add_pixels(const nb::object &df) {
var);
}

void CoolerFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str,
std::size_t chunk_size, std::size_t update_freq) {
hictk::File CoolerFileWriter::finalize(std::string_view log_lvl_str, std::size_t chunk_size,
std::size_t update_freq) {
if (_finalized) {
throw std::runtime_error(
fmt::format(FMT_STRING("finalize() was already called on file \"{}\""), _path));
Expand Down Expand Up @@ -142,6 +155,8 @@ void CoolerFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str,
_w.reset();
std::filesystem::remove(sclr_path); // NOLINT
// NOLINTEND(*-unchecked-optional-access)

return hictk::File{_path.string()};
}

hictk::cooler::SingleCellFile CoolerFileWriter::create_file(std::string_view path,
Expand Down Expand Up @@ -192,16 +207,20 @@ void CoolerFileWriter::bind(nb::module_ &m) {
nb::arg("include_ALL") = false,
"Get chromosomes sizes as a dictionary mapping names to sizes.",
nb::rv_policy::take_ownership);
writer.def("bins", &get_bins_from_object<hictkpy::CoolerFileWriter>, "Get table of bins.",
nb::sig("def bins(self) -> hictkpy.BinTable"), nb::rv_policy::move);

writer.def("add_pixels", &hictkpy::CoolerFileWriter::add_pixels,
nb::call_guard<nb::gil_scoped_release>(),
nb::sig("def add_pixels(self, pixels: pandas.DataFrame)"), nb::arg("pixels"),
"Add pixels from a pandas DataFrame containing pixels in COO or BG2 format (i.e. "
"either with columns=[bin1_id, bin2_id, count] or with columns=[chrom1, start1, end1, "
"chrom2, start2, end2, count].");
// NOLINTBEGIN(*-avoid-magic-numbers)
writer.def("finalize", &hictkpy::CoolerFileWriter::finalize, nb::arg("log_lvl") = "WARN",
writer.def("finalize", &hictkpy::CoolerFileWriter::finalize,
nb::call_guard<nb::gil_scoped_release>(), nb::arg("log_lvl") = "WARN",
nb::arg("chunk_size") = 500'000, nb::arg("update_frequency") = 10'000'000,
"Write interactions to file.");
"Write interactions to file.", nb::rv_policy::move);
// NOLINTEND(*-avoid-magic-numbers)
}
} // namespace hictkpy
39 changes: 22 additions & 17 deletions src/file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,9 @@ bool is_cooler(const std::filesystem::path &uri) {

bool is_hic(const std::filesystem::path &uri) { return hictk::hic::utils::is_hic_file(uri); }

static hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range1,
std::string_view range2, std::string_view normalization,
static hictkpy::PixelSelector fetch(const hictk::File &f, std::optional<std::string_view> range1,
std::optional<std::string_view> range2,
std::optional<std::string_view> normalization,
std::string_view count_type, bool join,
std::string_view query_type) {
if (count_type != "float" && count_type != "int") {
Expand All @@ -96,36 +97,39 @@ static hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range
throw std::runtime_error("query_type should be either UCSC or BED");
}

if (normalization != "NONE") {
const hictk::balancing::Method normalization_method{normalization.value_or("NONE")};

if (normalization_method != hictk::balancing::Method::NONE()) {
count_type = "float";
}

if (range1.empty()) {
assert(range2.empty());
if (!range1.has_value() || range1->empty()) {
assert(!range2.has_value() || range2->empty());
return std::visit(
[&](const auto &ff) {
auto sel = ff.fetch(hictk::balancing::Method{normalization});
auto sel = ff.fetch(normalization_method);
using SelT = decltype(sel);
return hictkpy::PixelSelector(std::make_shared<const SelT>(std::move(sel)), count_type,
join);
},
f.get());
}

if (range2.empty()) {
if (!range2.has_value() || range2->empty()) {
range2 = range1;
}

const auto query_type_ =
query_type == "UCSC" ? hictk::GenomicInterval::Type::UCSC : hictk::GenomicInterval::Type::BED;
const auto gi1 = hictk::GenomicInterval::parse(f.chromosomes(), std::string{range1}, query_type_);
const auto gi2 = hictk::GenomicInterval::parse(f.chromosomes(), std::string{range2}, query_type_);
const auto gi1 =
hictk::GenomicInterval::parse(f.chromosomes(), std::string{*range1}, query_type_);
const auto gi2 =
hictk::GenomicInterval::parse(f.chromosomes(), std::string{*range2}, query_type_);

return std::visit(
[&](const auto &ff) {
// Workaround bug fixed in https://github.com/paulsengroup/hictk/pull/158
auto sel = ff.fetch(fmt::format(FMT_STRING("{}"), gi1), fmt::format(FMT_STRING("{}"), gi2),
hictk::balancing::Method(normalization));
auto sel = ff.fetch(gi1.chrom().name(), gi1.start(), gi1.end(), gi2.chrom().name(),
gi2.start(), gi2.end(), normalization_method);

using SelT = decltype(sel);
return hictkpy::PixelSelector(std::make_shared<const SelT>(std::move(sel)), count_type,
Expand Down Expand Up @@ -190,7 +194,7 @@ static nb::dict get_hic_attrs(const hictk::hic::File &hf) {

py_attrs["bin_size"] = hf.resolution();
py_attrs["format"] = "HIC";
py_attrs["format_version"] = hf.version();
py_attrs["format-version"] = hf.version();
py_attrs["assembly"] = hf.assembly();
py_attrs["format-url"] = "https://github.com/aidenlab/hic-format";
py_attrs["nbins"] = hf.bins().size();
Expand Down Expand Up @@ -301,14 +305,15 @@ void declare_file_class(nb::module_ &m) {

file.def("resolution", &hictk::File::resolution, "Get the bin size in bp.");
file.def("nbins", &hictk::File::nbins, "Get the total number of bins.");
file.def("nchroms", &hictk::File::nchroms, "Get the total number of chromosomes.");
file.def("nchroms", &hictk::File::nchroms, nb::arg("include_ALL") = false,
"Get the total number of chromosomes.");

file.def("attributes", &file::attributes, "Get file attributes as a dictionary.",
nb::rv_policy::take_ownership);

file.def("fetch", &file::fetch, nb::keep_alive<0, 1>(), nb::arg("range1") = "",
nb::arg("range2") = "", nb::arg("normalization") = "NONE", nb::arg("count_type") = "int",
nb::arg("join") = false, nb::arg("query_type") = "UCSC",
file.def("fetch", &file::fetch, nb::keep_alive<0, 1>(), nb::arg("range1") = nb::none(),
nb::arg("range2") = nb::none(), nb::arg("normalization") = nb::none(),
nb::arg("count_type") = "int", nb::arg("join") = false, nb::arg("query_type") = "UCSC",
"Fetch interactions overlapping a region of interest.", nb::rv_policy::move);

file.def("avail_normalizations", &file::avail_normalizations,
Expand Down
Loading

0 comments on commit 7f7e230

Please sign in to comment.