diff --git a/include/rmcl/correction/CorrectionResults.hpp b/include/rmcl/correction/CorrectionResults.hpp index 83bcbff..840f90c 100644 --- a/include/rmcl/correction/CorrectionResults.hpp +++ b/include/rmcl/correction/CorrectionResults.hpp @@ -40,6 +40,66 @@ namespace rmcl { +template +struct CorrespondencesView +{ + rmagine::MemoryView dataset_points; + rmagine::MemoryView model_points; + rmagine::MemoryView corr_valid; +}; + + +template +struct Correspondences +{ + rmagine::Memory dataset_points; + rmagine::Memory model_points; + rmagine::Memory corr_valid; + + + + + inline CorrespondencesView slice( + unsigned int start, unsigned int end) const + { + return { + dataset_points.slice(start, end), + model_points.slice(start, end), + corr_valid.slice(start, end) + }; + } + + inline CorrespondencesView view() const + { + return { + dataset_points, + model_points, + corr_valid + }; + } + + // Enable this function? + // inline std::vector > split( + // unsigned int N) const + // { + // std::vector > ret; + + // unsigned int batch_size = dataset_points.size() / N; + + // for(unsigned int i=0; i struct CorrectionPreResults { diff --git a/include/rmcl/correction/PinholeCorrectorEmbree.hpp b/include/rmcl/correction/PinholeCorrectorEmbree.hpp index f855841..8edbacb 100644 --- a/include/rmcl/correction/PinholeCorrectorEmbree.hpp +++ b/include/rmcl/correction/PinholeCorrectorEmbree.hpp @@ -111,15 +111,6 @@ class PinholeCorrectorEmbree rmagine::MemoryView& Ncorr ); - // Next: reuse the already computed correspondences - // void computeCovs( - // const rmagine::MemoryView dataset_points, - // const rmagine::MemoryView model_points, - // const rmagine::MemoryView corr_valid, - // rmagine::MemoryView& Cs, - // rmagine::MemoryView& Ncorr - // ); - void computeCovs( const rmagine::MemoryView& Tbms, CorrectionPreResults& res @@ -172,24 +163,6 @@ class PinholeCorrectorEmbree // TODO: currently unused rmagine::SVDPtr m_svd; - -private: - // different implementations for testing - void computeCovsSequential( - const rmagine::MemoryView& Tbms, - rmagine::MemoryView& data_means, - rmagine::MemoryView& model_means, - rmagine::MemoryView& Cs, - rmagine::MemoryView& Ncorr - ); - - void computeCovsOuterInnerParallel( - const rmagine::MemoryView& Tbms, - rmagine::MemoryView& data_means, - rmagine::MemoryView& model_means, - rmagine::MemoryView& Cs, - rmagine::MemoryView& Ncorr - ); }; using PinholeCorrectorEmbreePtr = std::shared_ptr; diff --git a/include/rmcl/correction/SphereCorrectorEmbree.hpp b/include/rmcl/correction/SphereCorrectorEmbree.hpp index 6b32435..e244643 100644 --- a/include/rmcl/correction/SphereCorrectorEmbree.hpp +++ b/include/rmcl/correction/SphereCorrectorEmbree.hpp @@ -140,6 +140,16 @@ class SphereCorrectorEmbree rmagine::Memory& corr_valid ); + // TODO: add them to other classes + void findSPC( + const rmagine::MemoryView& Tbms, + Correspondences& corr + ); + + Correspondences findSPC( + const rmagine::MemoryView& Tbms); + + // TODO: add properly - rmagine inline CorrectionParams params() const { diff --git a/src/nodes/tests/cov_online.cpp b/src/nodes/tests/cov_online.cpp index 5687f4d..51a4593 100644 --- a/src/nodes/tests/cov_online.cpp +++ b/src/nodes/tests/cov_online.cpp @@ -41,9 +41,9 @@ void fill_ones(rm::MemoryView mask) int main(int argc, char** argv) { - rm::Mem dataset(128); - // fill_random(dataset); - fill_sequence(dataset); + rm::Mem dataset(10000); + fill_random(dataset); + // fill_sequence(dataset); rm::Mem model(dataset.size()); @@ -60,7 +60,6 @@ int main(int argc, char** argv) model[i] = T * dataset[i]; } - rm::StopWatchHR sw; double el; rm::Mem ds(1); @@ -113,7 +112,6 @@ int main(int argc, char** argv) rm::Mem Cs_(1); rm::Mem Ncorr_(1); - std::cout << "means_covs_batched" << std::endl; sw(); rmcl::means_covs_batched(dataset_, model_, mask_, ds_, ms_, Cs_, Ncorr_); @@ -148,12 +146,5 @@ int main(int argc, char** argv) std::cout << Ncorr[0] << std::endl; } - - - - - - - return 0; } \ No newline at end of file diff --git a/src/rmcl/correction/O1DnCorrectorEmbree.cpp b/src/rmcl/correction/O1DnCorrectorEmbree.cpp index d391b63..b7e60f3 100644 --- a/src/rmcl/correction/O1DnCorrectorEmbree.cpp +++ b/src/rmcl/correction/O1DnCorrectorEmbree.cpp @@ -128,18 +128,28 @@ CorrectionResults O1DnCorrectorEmbree::correct( const Vector pmesh_b = Tsb * pmesh_s; // Online update: Covariance and means - // - https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + // - wrong: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + // use the following equations instead { - Ncorr++; - const float N = static_cast(Ncorr); + const float N_1 = static_cast(Ncorr); + const float N = static_cast(Ncorr + 1); + const float w1 = N_1/N; + const float w2 = 1.0/N; - const Vector dD = preal_b - Dmean; - const Vector dM = pmesh_b - Mmean; + const rm::Vector d_mean_old = Dmean; + const rm::Vector m_mean_old = Mmean; - // reduction - Dmean += dD / N; - Mmean += dM / N; - C += dM.multT(dD); + const rm::Vector d_mean_new = d_mean_old * w1 + preal_b * w2; + const rm::Vector m_mean_new = m_mean_old * w1 + pmesh_b * w2; + + auto P1 = (pmesh_b - m_mean_new).multT(preal_b - d_mean_new); + auto P2 = (m_mean_old - m_mean_new).multT(d_mean_old - d_mean_new); + + // write + Dmean = d_mean_new; + Mmean = m_mean_new; + C = C * w1 + P1 * w2 + P2 * w1; + Ncorr = Ncorr + 1; } } } @@ -150,9 +160,6 @@ CorrectionResults O1DnCorrectorEmbree::correct( if(Ncorr > 0) { - const float Ncorr_f = static_cast(Ncorr); - C /= Ncorr_f; - Matrix3x3 U, V, S; { // the only Eigen code left @@ -285,18 +292,28 @@ void O1DnCorrectorEmbree::computeCovs( const Vector pmesh_b = Tsb * pmesh_s; // Online update: Covariance and means - // - https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance - + // - wrong: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + // use the following equations instead { - const Vector dD = preal_b - Dmean; - const Vector dM = pmesh_b - Mmean; - float N = static_cast(Ncorr_ + 1); - - // reduction - Ncorr_++; - Mmean += dM / N; - Dmean += dD / N; - C += dM.multT(dD); + const float N_1 = static_cast(Ncorr_); + const float N = static_cast(Ncorr_ + 1); + const float w1 = N_1/N; + const float w2 = 1.0/N; + + const rm::Vector d_mean_old = Dmean; + const rm::Vector m_mean_old = Mmean; + + const rm::Vector d_mean_new = d_mean_old * w1 + preal_b * w2; + const rm::Vector m_mean_new = m_mean_old * w1 + pmesh_b * w2; + + auto P1 = (pmesh_b - m_mean_new).multT(preal_b - d_mean_new); + auto P2 = (m_mean_old - m_mean_new).multT(d_mean_old - d_mean_new); + + // write + Dmean = d_mean_new; + Mmean = m_mean_new; + C = C * w1 + P1 * w2 + P2 * w1; + Ncorr_ = Ncorr_ + 1; } } } @@ -304,20 +321,9 @@ void O1DnCorrectorEmbree::computeCovs( } Ncorr[pid] = Ncorr_; - - if(Ncorr_ > 0) - { - const float Ncorr_f = static_cast(Ncorr_); - C /= Ncorr_f; - - ds[pid] = Dmean; - ms[pid] = Mmean; - Cs[pid] = C; - } else { - ds[pid] = {0.0, 0.0, 0.0}; - ms[pid] = {0.0, 0.0, 0.0}; - Cs[pid].setZeros(); - } + ds[pid] = Dmean; + ms[pid] = Mmean; + Cs[pid] = C; } } @@ -376,6 +382,8 @@ void O1DnCorrectorEmbree::findSPC( if(range_real < m_model->range.min || range_real > m_model->range.max) { + dataset_points[glob_id] = {0.0f, 0.0f, 0.0f}; + model_points[glob_id] = {0.0f, 0.0f, 0.0f}; corr_valid[glob_id] = 0; continue; } @@ -434,19 +442,22 @@ void O1DnCorrectorEmbree::findSPC( const float distance = (pmesh_s - preal_s).l2norm(); - if(distance < max_distance) - { - // convert back to base (sensor shared coordinate system) - const Vector preal_b = Tsb * preal_s; - const Vector pmesh_b = Tsb * pmesh_s; + // convert back to base (sensor shared coordinate system) + const Vector preal_b = Tsb * preal_s; + const Vector pmesh_b = Tsb * pmesh_s; - dataset_points[glob_id] = preal_b; - model_points[glob_id] = pmesh_b; + dataset_points[glob_id] = preal_b; + model_points[glob_id] = pmesh_b; + + if(distance < max_distance) + { corr_valid[glob_id] = 1; } else { corr_valid[glob_id] = 0; } } else { + dataset_points[glob_id] = {0.0f, 0.0f, 0.0f}; + model_points[glob_id] = {0.0f, 0.0f, 0.0f}; corr_valid[glob_id] = 0; } } diff --git a/src/rmcl/correction/OnDnCorrectorEmbree.cpp b/src/rmcl/correction/OnDnCorrectorEmbree.cpp index c8b5829..5135c7c 100644 --- a/src/rmcl/correction/OnDnCorrectorEmbree.cpp +++ b/src/rmcl/correction/OnDnCorrectorEmbree.cpp @@ -129,18 +129,28 @@ CorrectionResults OnDnCorrectorEmbree::correct( const Vector pmesh_b = Tsb * pmesh_s; // Online update: Covariance and means - // - https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + // - wrong: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + // use the following equations instead { - Ncorr++; - const float N = static_cast(Ncorr); + const float N_1 = static_cast(Ncorr); + const float N = static_cast(Ncorr + 1); + const float w1 = N_1 / N; + const float w2 = 1.0 / N; - const Vector dD = preal_b - Dmean; - const Vector dM = pmesh_b - Mmean; + const rm::Vector d_mean_old = Dmean; + const rm::Vector m_mean_old = Mmean; - // reduction - Dmean += dD / N; - Mmean += dM / N; - C += dM.multT(dD); + const rm::Vector d_mean_new = d_mean_old * w1 + preal_b * w2; + const rm::Vector m_mean_new = m_mean_old * w1 + pmesh_b * w2; + + auto P1 = (pmesh_b - m_mean_new).multT(preal_b - d_mean_new); + auto P2 = (m_mean_old - m_mean_new).multT(d_mean_old - d_mean_new); + + // write + Dmean = d_mean_new; + Mmean = m_mean_new; + C = C * w1 + P1 * w2 + P2 * w1; + Ncorr = Ncorr + 1; } } } @@ -151,9 +161,6 @@ CorrectionResults OnDnCorrectorEmbree::correct( if(Ncorr > 0) { - const float Ncorr_f = static_cast(Ncorr); - C /= Ncorr_f; - Matrix3x3 U, V, S; { // the only Eigen code left @@ -286,18 +293,28 @@ void OnDnCorrectorEmbree::computeCovs( const Vector pmesh_b = Tsb * pmesh_s; // Online update: Covariance and means - // - https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance - + // - wrong: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + // use the following equations instead { - const Vector dD = preal_b - Dmean; - const Vector dM = pmesh_b - Mmean; - float N = static_cast(Ncorr_ + 1); - - // reduction - Ncorr_++; - Mmean += dM / N; - Dmean += dD / N; - C += dM.multT(dD); + const float N_1 = static_cast(Ncorr_); + const float N = static_cast(Ncorr_ + 1); + const float w1 = N_1/N; + const float w2 = 1.0/N; + + const rm::Vector d_mean_old = Dmean; + const rm::Vector m_mean_old = Mmean; + + const rm::Vector d_mean_new = d_mean_old * w1 + preal_b * w2; + const rm::Vector m_mean_new = m_mean_old * w1 + pmesh_b * w2; + + auto P1 = (pmesh_b - m_mean_new).multT(preal_b - d_mean_new); + auto P2 = (m_mean_old - m_mean_new).multT(d_mean_old - d_mean_new); + + // write + Dmean = d_mean_new; + Mmean = m_mean_new; + C = C * w1 + P1 * w2 + P2 * w1; + Ncorr_ = Ncorr_ + 1; } } } @@ -377,6 +394,8 @@ void OnDnCorrectorEmbree::findSPC( if(range_real < m_model->range.min || range_real > m_model->range.max) { + dataset_points[glob_id] = {0.0f, 0.0f, 0.0f}; + model_points[glob_id] = {0.0f, 0.0f, 0.0f}; corr_valid[glob_id] = 0; continue; } @@ -435,19 +454,22 @@ void OnDnCorrectorEmbree::findSPC( const float distance = (pmesh_s - preal_s).l2norm(); + // convert back to base (sensor shared coordinate system) + const Vector preal_b = Tsb * preal_s; + const Vector pmesh_b = Tsb * pmesh_s; + + dataset_points[glob_id] = preal_b; + model_points[glob_id] = pmesh_b; + if(distance < max_distance) { - // convert back to base (sensor shared coordinate system) - const Vector preal_b = Tsb * preal_s; - const Vector pmesh_b = Tsb * pmesh_s; - - dataset_points[glob_id] = preal_b; - model_points[glob_id] = pmesh_b; corr_valid[glob_id] = 1; } else { corr_valid[glob_id] = 0; } } else { + dataset_points[glob_id] = {0.0f, 0.0f, 0.0f}; + model_points[glob_id] = {0.0f, 0.0f, 0.0f}; corr_valid[glob_id] = 0; } } diff --git a/src/rmcl/correction/PinholeCorrectorEmbree.cpp b/src/rmcl/correction/PinholeCorrectorEmbree.cpp index 5b15419..dffd147 100644 --- a/src/rmcl/correction/PinholeCorrectorEmbree.cpp +++ b/src/rmcl/correction/PinholeCorrectorEmbree.cpp @@ -139,18 +139,28 @@ CorrectionResults PinholeCorrectorEmbree::correct( const Vector pmesh_b = Tsb * pmesh_s; // Online update: Covariance and means - // - https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + // - wrong: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + // use the following equations instead { - Ncorr++; - const float N = static_cast(Ncorr); + const float N_1 = static_cast(Ncorr); + const float N = static_cast(Ncorr + 1); + const float w1 = N_1/N; + const float w2 = 1.0/N; - const Vector dD = preal_b - Dmean; - const Vector dM = pmesh_b - Mmean; + const rm::Vector d_mean_old = Dmean; + const rm::Vector m_mean_old = Mmean; - // reduction - Dmean += dD / N; - Mmean += dM / N; - C += dM.multT(dD); + const rm::Vector d_mean_new = d_mean_old * w1 + preal_b * w2; + const rm::Vector m_mean_new = m_mean_old * w1 + pmesh_b * w2; + + auto P1 = (pmesh_b - m_mean_new).multT(preal_b - d_mean_new); + auto P2 = (m_mean_old - m_mean_new).multT(d_mean_old - d_mean_new); + + // write + Dmean = d_mean_new; + Mmean = m_mean_new; + C = C * w1 + P1 * w2 + P2 * w1; + Ncorr = Ncorr + 1; } } } @@ -161,8 +171,6 @@ CorrectionResults PinholeCorrectorEmbree::correct( if(Ncorr > 0) { - C /= static_cast(Ncorr); - Matrix3x3 U, V, S; { // the only Eigen code left @@ -301,19 +309,28 @@ void PinholeCorrectorEmbree::computeCovs( const Vector pmesh_b = Tsb * pmesh_s; // Online update: Covariance and means - // - https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance - - // #pragma omp critical + // - wrong: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + // use the following equations instead { - const Vector dD = preal_b - Dmean; - const Vector dM = pmesh_b - Mmean; - float N = static_cast(Ncorr_ + 1); - - // reduction - Ncorr_++; - Mmean += dM / N; - Dmean += dD / N; - C += dM.multT(dD); + const float N_1 = static_cast(Ncorr_); + const float N = static_cast(Ncorr_ + 1); + const float w1 = N_1/N; + const float w2 = 1.0/N; + + const rm::Vector d_mean_old = Dmean; + const rm::Vector m_mean_old = Mmean; + + const rm::Vector d_mean_new = d_mean_old * w1 + preal_b * w2; + const rm::Vector m_mean_new = m_mean_old * w1 + pmesh_b * w2; + + auto P1 = (pmesh_b - m_mean_new).multT(preal_b - d_mean_new); + auto P2 = (m_mean_old - m_mean_new).multT(d_mean_old - d_mean_new); + + // write + Dmean = d_mean_new; + Mmean = m_mean_new; + C = C * w1 + P1 * w2 + P2 * w1; + Ncorr_ = Ncorr_ + 1; } } } @@ -393,6 +410,8 @@ void PinholeCorrectorEmbree::findSPC( if(range_real < m_model->range.min || range_real > m_model->range.max) { + dataset_points[glob_id] = {0.0f, 0.0f, 0.0f}; + model_points[glob_id] = {0.0f, 0.0f, 0.0f}; corr_valid[glob_id] = 0; continue; } @@ -456,19 +475,22 @@ void PinholeCorrectorEmbree::findSPC( const float distance = (pmesh_s - preal_s).l2norm(); + // convert back to base (sensor shared coordinate system) + const Vector preal_b = Tsb * preal_s; + const Vector pmesh_b = Tsb * pmesh_s; + + dataset_points[glob_id] = preal_b; + model_points[glob_id] = pmesh_b; + if(distance < max_distance) { - // convert back to base (sensor shared coordinate system) - const Vector preal_b = Tsb * preal_s; - const Vector pmesh_b = Tsb * pmesh_s; - - dataset_points[glob_id] = preal_b; - model_points[glob_id] = pmesh_b; corr_valid[glob_id] = 1; } else { corr_valid[glob_id] = 0; } } else { + dataset_points[glob_id] = {0.0f, 0.0f, 0.0f}; + model_points[glob_id] = {0.0f, 0.0f, 0.0f}; corr_valid[glob_id] = 0; } } @@ -501,269 +523,4 @@ void PinholeCorrectorEmbree::findSPC( findSPC(Tbms, dataset_points(0, Nrays), model_points(0, Nrays), corr_valid(0, Nrays)); } - -void PinholeCorrectorEmbree::computeCovsSequential( - const rmagine::MemoryView& Tbms, - rmagine::MemoryView& ds, - rmagine::MemoryView& ms, - rmagine::MemoryView& Cs, - rmagine::MemoryView& Ncorr) -{ - const float max_distance = m_params.max_distance; - - auto scene = m_map->scene->handle(); - - const rmagine::Transform Tsb = m_Tsb[0]; - - // #pragma omp parallel for default(shared) if(Tbms.size() > 4) - for(size_t pid=0; pid < Tbms.size(); pid++) - { - auto Tbms0 = Tbms(pid, pid+1); - const rmagine::Transform Tbm = Tbms[pid]; - const rmagine::Transform Tsm = Tbm * Tsb; - const rmagine::Transform Tms = ~Tsm; - - const unsigned int glob_shift = pid * m_model->size(); - - Memory dataset_points; - Memory model_points; - Memory corr_valid; - findSPC(Tbms0, dataset_points, model_points, corr_valid); - - Vector Dmean = {0.0, 0.0, 0.0}; - Vector Mmean = {0.0, 0.0, 0.0}; - unsigned int Ncorr_ = 0; - - for(size_t i=0; i 0) - { - Dmean += dataset_points[i]; - Mmean += model_points[i]; - Ncorr_++; - } - } - - Ncorr[pid] = Ncorr_; - - if(Ncorr_ > 0) - { - const float Ncorr_f = static_cast(Ncorr_); - - Dmean /= Ncorr_f; - Mmean /= Ncorr_f; - - Matrix3x3 C; - C.setZeros(); - - for(size_t i=0; i 0) - { - C += (model_points[i] - Mmean).multT(dataset_points[i] - Dmean); - } - } - - C /= Ncorr_f; - - // Bessel's correction for sample variance - // C /= (Ncorr_ - 1); - - ds[pid] = Dmean; - ms[pid] = Mmean; - Cs[pid] = C; - } else { - ds[pid] = {0.0, 0.0, 0.0}; - ms[pid] = {0.0, 0.0, 0.0}; - Cs[pid].setZeros(); - } - } -} - -void PinholeCorrectorEmbree::computeCovsOuterInnerParallel( - const rmagine::MemoryView& Tbms, - rmagine::MemoryView& ms, - rmagine::MemoryView& ds, - rmagine::MemoryView& Cs, - rmagine::MemoryView& Ncorr) -{ - // std::cout << "computeCovsOuterInnerParallel" << std::endl; - - const float max_distance = m_params.max_distance; - - auto scene = m_map->scene->handle(); - - const rmagine::Transform Tsb = m_Tsb[0]; - - // #pragma omp parallel for default(shared) if(Tbms.size() > 4) - for(size_t pid=0; pid < Tbms.size(); pid++) - { - const rmagine::Transform Tbm = Tbms[pid]; - - const rmagine::Transform Tsm = Tbm * Tsb; - const rmagine::Transform Tms = ~Tsm; - - const unsigned int glob_shift = pid * m_model->size(); - - Vector Dmean = {0.0, 0.0, 0.0}; - Vector Mmean = {0.0, 0.0, 0.0}; - unsigned int Ncorr_ = 0; - Matrix3x3 C; - C.setZeros(); - - // #pragma omp parallel for default(shared) reduction(+:Dmean,Mmean,Ncorr_,C) - for(unsigned int vid = 0; vid < m_model->getHeight(); vid++) - { - Vector Dmean_inner = {0.0, 0.0, 0.0}; - Vector Mmean_inner = {0.0, 0.0, 0.0}; - unsigned int Ncorr_inner = 0; - Matrix3x3 C_inner; - C_inner.setZeros(); - - for(unsigned int hid = 0; hid < m_model->getWidth(); hid++) - { - const unsigned int loc_id = m_model->getBufferId(vid, hid); - const unsigned int glob_id = glob_shift + loc_id; - - const float range_real = m_ranges[loc_id]; - if(range_real < m_model->range.min - || range_real > m_model->range.max) - { - continue; - } - - Vector ray_dir_s; - if(m_optical) - { - ray_dir_s = m_model->getDirectionOptical(vid, hid); - } else { - ray_dir_s = m_model->getDirection(vid, hid); - } - - const Vector ray_dir_m = Tsm.R * ray_dir_s; - - RTCRayHit rayhit; - rayhit.ray.org_x = Tsm.t.x; - rayhit.ray.org_y = Tsm.t.y; - rayhit.ray.org_z = Tsm.t.z; - rayhit.ray.dir_x = ray_dir_m.x; - rayhit.ray.dir_y = ray_dir_m.y; - rayhit.ray.dir_z = ray_dir_m.z; - rayhit.ray.tnear = 0; - rayhit.ray.tfar = INFINITY; - rayhit.ray.mask = 0; - rayhit.ray.flags = 0; - rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; - rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; - - rtcIntersect1(scene, &m_context, &rayhit); - - bool sim_valid = rayhit.hit.geomID != RTC_INVALID_GEOMETRY_ID; - if(sim_valid) - { - Vector nint_m; - nint_m.x = rayhit.hit.Ng_x; - nint_m.y = rayhit.hit.Ng_y; - nint_m.z = rayhit.hit.Ng_z; - nint_m.normalizeInplace(); - - // sensor space - // Do point to plane ICP here - Vector preal_s, pint_s, nint_s; - preal_s = ray_dir_s * range_real; - - // search point on surface that is more nearby - pint_s = ray_dir_s * rayhit.ray.tfar; - - // transform normal from global to local - nint_s = Tms.R * nint_m; - - // flip to base: check if this is really needed: no - // if(ray_dir_s.dot(nint_s) > 0.0) - // { - // nint_s = -nint_s; - // } - - // distance of real point to plane at simulated point - float signed_plane_dist = (pint_s - preal_s).dot(nint_s); - // project point to plane results in correspondence - const Vector pmesh_s = preal_s + nint_s * signed_plane_dist; - - const float distance = (pmesh_s - preal_s).l2norm(); - - if(distance < max_distance) - { - const Vector preal_b = Tsb * preal_s; - const Vector pmesh_b = Tsb * pmesh_s; - - // Online update: Covariance and means - // - https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance - - Ncorr_inner++; - const float N = static_cast(Ncorr_inner); - - const Vector dD = preal_b - Dmean_inner; - const Vector dM = pmesh_b - Mmean_inner; - - // reduction - Dmean_inner += dD / N; - Mmean_inner += dM / N; - C_inner += dM.multT(dD); - } - } - } - - // reduce - // must be done sequentially - // #pragma omp critical - // { - // combining covariances of two sets - // - https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance - const double Nouter = static_cast(Ncorr_); - const double Ninner = static_cast(Ncorr_inner); - const double Ntot = static_cast(Ncorr_ + Ncorr_inner); - - - auto dD = Dmean_inner.cast() - Dmean.cast(); - auto dM = Mmean_inner.cast() - Mmean.cast(); - - Dmean = (Dmean * Nouter / Ntot + Dmean_inner * Ninner / Ntot).cast(); - Mmean = (Mmean * Nouter / Ntot + Mmean_inner * Ninner / Ntot).cast(); - - auto Cd = C.cast(); - auto C_innerd = C_inner.cast(); - auto dC = dM.multT(dD); - - // const Matrix3x3 dC = (Mmean_inner - Mmean).multT(Dmean_inner - Dmean); - - // const float factor = Ninner / Ntot * N; - Cd = Cd + C_innerd + dC * Ninner / Ntot * Nouter; - // C = C_inner + C; - - C = Cd.cast(); - - Ncorr_ += Ncorr_inner; - // } - } - - Ncorr[pid] = Ncorr_; - - if(Ncorr_ > 0) - { - C /= static_cast(Ncorr_); - // C *= 0.95; - // Bessel's correction for sample variance - // C /= (Ncorr_ - 1); - - ms[pid] = Mmean; - ds[pid] = Dmean; - Cs[pid] = C; - } else { - ms[pid] = {0.0, 0.0, 0.0}; - ds[pid] = {0.0, 0.0, 0.0}; - Cs[pid].setZeros(); - } - } -} - } // namespace rmcl \ No newline at end of file diff --git a/src/rmcl/correction/PinholeCorrectorOptix.cpp b/src/rmcl/correction/PinholeCorrectorOptix.cpp index c57d3a4..5411f9d 100644 --- a/src/rmcl/correction/PinholeCorrectorOptix.cpp +++ b/src/rmcl/correction/PinholeCorrectorOptix.cpp @@ -162,8 +162,8 @@ void PinholeCorrectorOptix::computeCovs( if(Tbms.size() < max_poses_rw) { // std::cout << "Raywise" << std::endl; - rm::StopWatchHR sw; - double el; + // rm::StopWatchHR sw; + // double el; // sw(); computeMeansCovsRW(Tbms, ds, ms, Cs, Ncorr); @@ -359,15 +359,6 @@ void PinholeCorrectorOptix::computeMeansCovsRW( means_dataset, means_model, // outputs Cs, Ncorr ); - - // old: two pass - // mean_batched(dataset_points, corr_valid, Ncorr, means_dataset); - // mean_batched(model_points, corr_valid, Ncorr, means_model); - // rm::sumBatched(corr_valid, Ncorr); - - // cov_batched(dataset_points, means_dataset, - // model_points, means_model, - // corr_valid, Ncorr, Cs); } void PinholeCorrectorOptix::computeMeansCovsSW( diff --git a/src/rmcl/correction/SphereCorrectorEmbree.cpp b/src/rmcl/correction/SphereCorrectorEmbree.cpp index cae36ea..3b5f542 100644 --- a/src/rmcl/correction/SphereCorrectorEmbree.cpp +++ b/src/rmcl/correction/SphereCorrectorEmbree.cpp @@ -129,18 +129,28 @@ CorrectionResults SphereCorrectorEmbree::correct( const Vector pmesh_b = Tsb * pmesh_s; // Online update: Covariance and means - // - https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + // - wrong: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + // use the following equations instead { - Ncorr++; - const float N = static_cast(Ncorr); + const float N_1 = static_cast(Ncorr); + const float N = static_cast(Ncorr + 1); + const float w1 = N_1/N; + const float w2 = 1.0/N; - const Vector dD = preal_b - Dmean; - const Vector dM = pmesh_b - Mmean; + const rm::Vector d_mean_old = Dmean; + const rm::Vector m_mean_old = Mmean; - // reduction - Dmean += dD / N; - Mmean += dM / N; - C += dM.multT(dD); + const rm::Vector d_mean_new = d_mean_old * w1 + preal_b * w2; + const rm::Vector m_mean_new = m_mean_old * w1 + pmesh_b * w2; + + auto P1 = (pmesh_b - m_mean_new).multT(preal_b - d_mean_new); + auto P2 = (m_mean_old - m_mean_new).multT(d_mean_old - d_mean_new); + + // write + Dmean = d_mean_new; + Mmean = m_mean_new; + C = C * w1 + P1 * w2 + P2 * w1; + Ncorr = Ncorr + 1; } } } @@ -151,9 +161,6 @@ CorrectionResults SphereCorrectorEmbree::correct( if(Ncorr > 0) { - const float Ncorr_f = static_cast(Ncorr); - C /= Ncorr_f; - Matrix3x3 U, V, S; { // the only Eigen code left @@ -283,39 +290,38 @@ void SphereCorrectorEmbree::computeCovs( const Vector pmesh_b = Tsb * pmesh_s; // Online update: Covariance and means - // - https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance - + // - wrong: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + // use the following equations instead { - const Vector dD = preal_b - Dmean; - const Vector dM = pmesh_b - Mmean; - float N = static_cast(Ncorr_ + 1); - - // reduction - Ncorr_++; - Mmean += dM / N; - Dmean += dD / N; - C += dM.multT(dD); + const float N_1 = static_cast(Ncorr_); + const float N = static_cast(Ncorr_ + 1); + const float w1 = N_1/N; + const float w2 = 1.0/N; + + const rm::Vector d_mean_old = Dmean; + const rm::Vector m_mean_old = Mmean; + + const rm::Vector d_mean_new = d_mean_old * w1 + preal_b * w2; + const rm::Vector m_mean_new = m_mean_old * w1 + pmesh_b * w2; + + auto P1 = (pmesh_b - m_mean_new).multT(preal_b - d_mean_new); + auto P2 = (m_mean_old - m_mean_new).multT(d_mean_old - d_mean_new); + + // write + Dmean = d_mean_new; + Mmean = m_mean_new; + C = C * w1 + P1 * w2 + P2 * w1; + Ncorr_ = Ncorr_ + 1; } } } } } + ds[pid] = Dmean; + ms[pid] = Mmean; + Cs[pid] = C; Ncorr[pid] = Ncorr_; - - if(Ncorr_ > 0) - { - const float Ncorr_f = static_cast(Ncorr_); - C /= Ncorr_f; - - ds[pid] = Dmean; - ms[pid] = Mmean; - Cs[pid] = C; - } else { - ds[pid] = {0.0f, 0.0f, 0.0f}; - ms[pid] = {0.0f, 0.0f, 0.0f}; - Cs[pid].setZeros(); - } } } @@ -373,6 +379,8 @@ void SphereCorrectorEmbree::findSPC( if(range_real < m_model->range.min || range_real > m_model->range.max) { + dataset_points[glob_id] = {0.0f, 0.0f, 0.0f}; + model_points[glob_id] = {0.0f, 0.0f, 0.0f}; corr_valid[glob_id] = 0; continue; } @@ -429,19 +437,22 @@ void SphereCorrectorEmbree::findSPC( const float distance = (pmesh_s - preal_s).l2norm(); + // convert back to base (sensor shared coordinate system) + const Vector preal_b = Tsb * preal_s; + const Vector pmesh_b = Tsb * pmesh_s; + + dataset_points[glob_id] = preal_b; + model_points[glob_id] = pmesh_b; + if(distance < max_distance) { - // convert back to base (sensor shared coordinate system) - const Vector preal_b = Tsb * preal_s; - const Vector pmesh_b = Tsb * pmesh_s; - - dataset_points[glob_id] = preal_b; - model_points[glob_id] = pmesh_b; corr_valid[glob_id] = 1; } else { corr_valid[glob_id] = 0; } } else { + dataset_points[glob_id] = {0.0f, 0.0f, 0.0f}; + model_points[glob_id] = {0.0f, 0.0f, 0.0f}; corr_valid[glob_id] = 0; } } @@ -474,4 +485,20 @@ void SphereCorrectorEmbree::findSPC( findSPC(Tbms, dataset_points(0, Nrays), model_points(0, Nrays), corr_valid(0, Nrays)); } +void SphereCorrectorEmbree::findSPC( + const rmagine::MemoryView& Tbms, + Correspondences& corr) +{ + findSPC(Tbms, corr.dataset_points, corr.model_points, corr.corr_valid); +} + +Correspondences SphereCorrectorEmbree::findSPC( + const rmagine::MemoryView& Tbms) +{ + Correspondences ret; + findSPC(Tbms, ret); + return ret; +} + + } // namespace rmcl \ No newline at end of file diff --git a/src/rmcl/correction/SphereCorrectorOptix.cpp b/src/rmcl/correction/SphereCorrectorOptix.cpp index f031419..c907970 100644 --- a/src/rmcl/correction/SphereCorrectorOptix.cpp +++ b/src/rmcl/correction/SphereCorrectorOptix.cpp @@ -261,13 +261,6 @@ void SphereCorrectorOptix::computeMeansCovsRW( means_dataset, means_model, // outputs Cs, Ncorr ); - - // old: two-pass - // means_covs_batched( - // dataset_points, model_points, corr_valid, // input - // means_dataset, means_model, // outputs - // Cs, Ncorr - // ); } void SphereCorrectorOptix::computeMeansCovsSW( diff --git a/src/rmcl/correction/optix/O1DnCorrectProgramSW.cu b/src/rmcl/correction/optix/O1DnCorrectProgramSW.cu index 1fe1bd2..538aa16 100644 --- a/src/rmcl/correction/optix/O1DnCorrectProgramSW.cu +++ b/src/rmcl/correction/optix/O1DnCorrectProgramSW.cu @@ -108,30 +108,34 @@ extern "C" __global__ void __raygen__rg() const rm::Vector preal_b = Tsb * preal_s; const rm::Vector pmesh_b = Tsb * pmesh_s; // Online update: Covariance and means - // - https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + // - wrong: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + // use the following equations instead { - Ncorr++; - const float N = static_cast(Ncorr); + const float N_1 = static_cast(Ncorr); + const float N = static_cast(Ncorr + 1); + const float w1 = N_1/N; + const float w2 = 1.0/N; - const rm::Vector dD = preal_b - Dmean; - const rm::Vector dM = pmesh_b - Mmean; + const rm::Vector d_mean_old = Dmean; + const rm::Vector m_mean_old = Mmean; - // reduction - Dmean += dD / N; - Mmean += dM / N; - C += dM.multT(dD); + const rm::Vector d_mean_new = d_mean_old * w1 + preal_b * w2; + const rm::Vector m_mean_new = m_mean_old * w1 + pmesh_b * w2; + + auto P1 = (pmesh_b - m_mean_new).multT(preal_b - d_mean_new); + auto P2 = (m_mean_old - m_mean_new).multT(d_mean_old - d_mean_new); + + // write + Dmean = d_mean_new; + Mmean = m_mean_new; + C = C * w1 + P1 * w2 + P2 * w1; + Ncorr = Ncorr + 1; } } } } mem.Ncorr[pid] = Ncorr; - - if(Ncorr > 0) - { - C /= static_cast(Ncorr); - } - mem.C[pid] = C; mem.m1[pid] = Dmean; mem.m2[pid] = Mmean; diff --git a/src/rmcl/correction/optix/OnDnCorrectProgramSW.cu b/src/rmcl/correction/optix/OnDnCorrectProgramSW.cu index b483845..0a93380 100644 --- a/src/rmcl/correction/optix/OnDnCorrectProgramSW.cu +++ b/src/rmcl/correction/optix/OnDnCorrectProgramSW.cu @@ -93,11 +93,7 @@ extern "C" __global__ void __raygen__rg() const rm::Vector pint_s = ray_orig_s + ray_dir_s * range; rm::Vector nint_s = Rms * nint_m; - - // if(nint_s.dot(ray_dir_s) > 0.0 ) - // { - // nint_s *= -1.0; - // } + const float signed_plane_dist = (pint_s - preal_s).dot(nint_s); const rm::Vector pmesh_s = preal_s + nint_s * signed_plane_dist; @@ -109,32 +105,34 @@ extern "C" __global__ void __raygen__rg() const rm::Vector preal_b = Tsb * preal_s; const rm::Vector pmesh_b = Tsb * pmesh_s; // Online update: Covariance and means - // - https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance - // -- The means shoud be exact (except of floating point issues) - // -- Are the covariances exact? I dont think so + // - wrong: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + // use the following equations instead { - Ncorr++; - const float N = static_cast(Ncorr); + const float N_1 = static_cast(Ncorr); + const float N = static_cast(Ncorr + 1); + const float w1 = N_1/N; + const float w2 = 1.0/N; + + const rm::Vector d_mean_old = Dmean; + const rm::Vector m_mean_old = Mmean; + + const rm::Vector d_mean_new = d_mean_old * w1 + preal_b * w2; + const rm::Vector m_mean_new = m_mean_old * w1 + pmesh_b * w2; - const rm::Vector dD = preal_b - Dmean; - const rm::Vector dM = pmesh_b - Mmean; + auto P1 = (pmesh_b - m_mean_new).multT(preal_b - d_mean_new); + auto P2 = (m_mean_old - m_mean_new).multT(d_mean_old - d_mean_new); - // reduction - Dmean += dD / N; - Mmean += dM / N; - C += dM.multT(dD); + // write + Dmean = d_mean_new; + Mmean = m_mean_new; + C = C * w1 + P1 * w2 + P2 * w1; + Ncorr = Ncorr + 1; } } } } mem.Ncorr[pid] = Ncorr; - - if(Ncorr > 0) - { - C /= static_cast(Ncorr); - } - mem.C[pid] = C; mem.m1[pid] = Dmean; mem.m2[pid] = Mmean; diff --git a/src/rmcl/correction/optix/PinholeCorrectProgramSW.cu b/src/rmcl/correction/optix/PinholeCorrectProgramSW.cu index 0084b0c..c75667e 100644 --- a/src/rmcl/correction/optix/PinholeCorrectProgramSW.cu +++ b/src/rmcl/correction/optix/PinholeCorrectProgramSW.cu @@ -106,31 +106,36 @@ extern "C" __global__ void __raygen__rg() { const rm::Vector preal_b = Tsb * preal_s; const rm::Vector pmesh_b = Tsb * pmesh_s; + // Online update: Covariance and means - // - https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + // - wrong: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + // use the following equations instead { - Ncorr++; - const float N = static_cast(Ncorr); + const float N_1 = static_cast(Ncorr); + const float N = static_cast(Ncorr + 1); + const float w1 = N_1/N; + const float w2 = 1.0/N; - const rm::Vector dD = preal_b - Dmean; - const rm::Vector dM = pmesh_b - Mmean; + const rm::Vector d_mean_old = Dmean; + const rm::Vector m_mean_old = Mmean; - // reduction - Dmean += dD / N; - Mmean += dM / N; - C += dM.multT(dD); + const rm::Vector d_mean_new = d_mean_old * w1 + preal_b * w2; + const rm::Vector m_mean_new = m_mean_old * w1 + pmesh_b * w2; + + auto P1 = (pmesh_b - m_mean_new).multT(preal_b - d_mean_new); + auto P2 = (m_mean_old - m_mean_new).multT(d_mean_old - d_mean_new); + + // write + Dmean = d_mean_new; + Mmean = m_mean_new; + C = C * w1 + P1 * w2 + P2 * w1; + Ncorr = Ncorr + 1; } } } } mem.Ncorr[pid] = Ncorr; - - if(Ncorr > 0) - { - C /= static_cast(Ncorr); - } - mem.C[pid] = C; mem.m1[pid] = Dmean; mem.m2[pid] = Mmean; diff --git a/src/rmcl/correction/optix/SphereCorrectProgramSW.cu b/src/rmcl/correction/optix/SphereCorrectProgramSW.cu index 48bb0b8..516ec9d 100644 --- a/src/rmcl/correction/optix/SphereCorrectProgramSW.cu +++ b/src/rmcl/correction/optix/SphereCorrectProgramSW.cu @@ -116,30 +116,34 @@ extern "C" __global__ void __raygen__rg() const rm::Vector preal_b = Tsb * preal_s; const rm::Vector pmesh_b = Tsb * pmesh_s; // Online update: Covariance and means - // - https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + // - wrong: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + // use the following equations instead { - Ncorr++; - const float N = static_cast(Ncorr); + const float N_1 = static_cast(Ncorr); + const float N = static_cast(Ncorr + 1); + const float w1 = N_1/N; + const float w2 = 1.0/N; - const rm::Vector dD = preal_b - Dmean; - const rm::Vector dM = pmesh_b - Mmean; + const rm::Vector d_mean_old = Dmean; + const rm::Vector m_mean_old = Mmean; - // reduction - Dmean += dD / N; - Mmean += dM / N; - C += dM.multT(dD); + const rm::Vector d_mean_new = d_mean_old * w1 + preal_b * w2; + const rm::Vector m_mean_new = m_mean_old * w1 + pmesh_b * w2; + + auto P1 = (pmesh_b - m_mean_new).multT(preal_b - d_mean_new); + auto P2 = (m_mean_old - m_mean_new).multT(d_mean_old - d_mean_new); + + // write + Dmean = d_mean_new; + Mmean = m_mean_new; + C = C * w1 + P1 * w2 + P2 * w1; + Ncorr = Ncorr + 1; } } } } mem.Ncorr[pid] = Ncorr; - - if(Ncorr > 0) - { - C /= static_cast(Ncorr); - } - mem.C[pid] = C; mem.m1[pid] = Dmean; mem.m2[pid] = Mmean; diff --git a/src/rmcl/math/math.cpp b/src/rmcl/math/math.cpp index 6251e8f..7b89705 100644 --- a/src/rmcl/math/math.cpp +++ b/src/rmcl/math/math.cpp @@ -3,6 +3,7 @@ #include using namespace rmagine; +namespace rm = rmagine; namespace rmcl { @@ -158,9 +159,16 @@ void weighted_average( const unsigned int Ncorr_ = Ncorr1[pid] + Ncorr2[pid]; const float Ncorrf = static_cast(Ncorr_); - ds[pid] = ds1[pid] * w1 + ds2[pid] * w2; - ms[pid] = ms1[pid] * w1 + ms2[pid] * w2; - Cs[pid] = Cs1[pid] * w1 + Cs2[pid] * w2; + const rm::Vector dsi = ds1[pid] * w1 + ds2[pid] * w2; + const rm::Vector msi = ms1[pid] * w1 + ms2[pid] * w2; + + // std::cout << "weighted_average 1 - NEW IMPL" << std::endl; + auto P1 = Cs1[pid] * w1 + Cs2[pid] * w2; + auto P2 = (ms1[pid] - msi).multT(ds1[pid] - dsi) * w1 + (ms2[pid] - msi).multT(ds2[pid] - dsi) * w2; + + ds[pid] = dsi; + ms[pid] = msi; + Cs[pid] = P1 + P2; Ncorr[pid] = Ncorr_; } } @@ -175,6 +183,8 @@ void weighted_average( rmagine::MemoryView& Cs, rmagine::MemoryView& Ncorr) { + // std::cout << "weighted_average 2 - NEW IMPL" << std::endl; + #pragma omp parallel for for(size_t pid=0; pid& Cs, rmagine::MemoryView& Ncorr) { + // std::cout << "weighted_average 3 - NEW IMPL" << std::endl; + #pragma omp parallel for if(ds.size() > 100) for(size_t pid=0; pid weights(model_means.size()); - // const float Ncorrf = static_cast(Ncorr_); - - // for(size_t i=0; i(Ncorrs[i][pid]) / Ncorrf; - // } - Vector ms_ = {0.0, 0.0, 0.0}; Vector ds_ = {0.0, 0.0, 0.0}; Matrix3x3 C_; C_.setZeros(); + unsigned int Ncorr_ = 0; + float w_ = 0.0; for(size_t i=0; i >& pre_results, CorrectionPreResults& pre_results_combined) { + // std::cout << "weighted_average 4 - NEW IMPL" << std::endl; + // source: to fuse std::vector > ds; std::vector > ms; @@ -305,6 +326,7 @@ void weighted_average( const std::vector& weights, CorrectionPreResults& pre_results_combined) { + // std::cout << "weighted_average 5 - NEW IMPL" << std::endl; // source: to fuse std::vector > ds; std::vector > ms; @@ -327,6 +349,8 @@ CorrectionPreResults weighted_average( const std::vector >& pre_results, const std::vector& weights) { + // std::cout << "weighted_average 6 - NEW IMPL" << std::endl; + CorrectionPreResults res; if(pre_results.size() > 0) { diff --git a/src/rmcl/math/math.cu b/src/rmcl/math/math.cu index 0669d6e..330b24e 100644 --- a/src/rmcl/math/math.cu +++ b/src/rmcl/math/math.cu @@ -174,12 +174,27 @@ __global__ void weighted_average_kernel( { const unsigned int Ncorr_ = Ncorr1[pid] + Ncorr2[pid]; const float Ncorrf = static_cast(Ncorr_); - float w1 = static_cast(Ncorr1[pid]) / Ncorrf; - float w2 = static_cast(Ncorr2[pid]) / Ncorrf; - - ds[pid] = ds1[pid] * w1 + ds2[pid] * w2; - ms[pid] = ms1[pid] * w1 + ms2[pid] * w2; - Cs[pid] = Cs1[pid] * w1 + Cs2[pid] * w2; + const float w1 = static_cast(Ncorr1[pid]) / Ncorrf; + const float w2 = static_cast(Ncorr2[pid]) / Ncorrf; + + const rm::Vector d1 = ds1[pid]; + const rm::Vector m1 = ms1[pid]; + const rm::Matrix3x3 C1 = Cs1[pid]; + const rm::Vector d2 = ds2[pid]; + const rm::Vector m2 = ms2[pid]; + const rm::Matrix3x3 C2 = Cs2[pid]; + + // means + const rm::Vector d = d1 * w1 + d2 * w2; + const rm::Vector m = m1 * w1 + m2 * w2; + + // covs + auto P1 = C1 * w1 + C2 * w2; + auto P2 = (m1 - m).multT(d1 - d) * w1 + (m2 - m).multT(d2 - d) * w2; + + ds[pid] = d; + ms[pid] = m; + Cs[pid] = P1 + P2; Ncorr[pid] = Ncorr_; } } @@ -221,9 +236,24 @@ __global__ void weighted_average_kernel( { const unsigned int Ncorr_ = Ncorr1[pid] + Ncorr2[pid]; - ds[pid] = ds1[pid] * w1 + ds2[pid] * w2; - ms[pid] = ms1[pid] * w1 + ms2[pid] * w2; - Cs[pid] = Cs1[pid] * w1 + Cs2[pid] * w2; + const rm::Vector d1 = ds1[pid]; + const rm::Vector m1 = ms1[pid]; + const rm::Matrix3x3 C1 = Cs1[pid]; + const rm::Vector d2 = ds2[pid]; + const rm::Vector m2 = ms2[pid]; + const rm::Matrix3x3 C2 = Cs2[pid]; + + // means + const rm::Vector d = d1 * w1 + d2 * w2; + const rm::Vector m = m1 * w1 + m2 * w2; + + // covs + auto P1 = C1 * w1 + C2 * w2; + auto P2 = (m1 - m).multT(d1 - d) * w1 + (m2 - m).multT(d2 - d) * w2; + + ds[pid] = d; + ms[pid] = m; + Cs[pid] = P1 + P2; Ncorr[pid] = Ncorr_; } } diff --git a/src/rmcl/math/math_batched.cpp b/src/rmcl/math/math_batched.cpp index c541168..530c88f 100644 --- a/src/rmcl/math/math_batched.cpp +++ b/src/rmcl/math/math_batched.cpp @@ -101,9 +101,8 @@ void means_covs_online_batched( { const float N_1 = static_cast(n_corr); const float N = static_cast(n_corr + 1); - - // update ncorr - n_corr++; + const float w1 = N_1/N; + const float w2 = 1.0/N; const rm::Vector Di = data_batch[j]; // read const rm::Vector Mi = model_batch[j]; // read @@ -111,27 +110,20 @@ void means_covs_online_batched( const rm::Vector m_mean_old = m_mean; // read // update means - // rm::Vector dD = Di - d_mean; - // rm::Vector dM = Mi - m_mean; // save old means for covariance - const rm::Vector d_mean_new = d_mean_old + (Di - d_mean) / N; - const rm::Vector m_mean_new = m_mean_old + (Mi - m_mean) / N; - - const float w1 = N_1/N; - const float w2 = 1.0/N; + const rm::Vector d_mean_new = d_mean_old * w1 + Di * w2; + const rm::Vector m_mean_new = m_mean_old * w1 + Mi * w2; auto P1 = (Mi - m_mean_new).multT(Di - d_mean_new); auto P2 = (m_mean_old - m_mean_new).multT(d_mean_old - d_mean_new); + // Write C = C * w1 + P1 * w2 + P2 * w1; - - // C += ((Mi - m_mean_new).multT(Di - d_mean_new) - C) * 1.0 / N - // + (m_mean_old - m_mean_new).multT(d_mean_old - d_mean_new) * N_1/N; - d_mean = d_mean_new; // write m_mean = m_mean_new; // write + n_corr = n_corr + 1; } } diff --git a/src/rmcl/math/math_batched.cu b/src/rmcl/math/math_batched.cu index 3fabb89..5693698 100644 --- a/src/rmcl/math/math_batched.cu +++ b/src/rmcl/math/math_batched.cu @@ -376,35 +376,25 @@ __global__ void means_covs_online_batched_kernel( const rm::Vector Dnew = D + (Di - D) / Nf; const rm::Vector Mnew = M + (Mi - M) / Nf; - // dD = Di - Dnew; - // dM = Mi - Mnew; - - - const float w1 = N_1f / Nf; - const float w2 = 1.0 / Nf; - - auto P1 = (Mi - Mnew).multT(Di - Dnew); - auto P2 = (M - Mnew).multT(D - Dnew); - - // printf("w1: %f\n", w1); - // printf("w2: %f\n", w2); - // printf("P1: %f\n", P1(0,0)); - // printf("P2: %f\n", P2(0,0)); - // printf("P3: %f\n", P3(0,0)); - // shared write // auto Cnew = P1 + P2 + P3; // sC[tid] = C * N_1f / Nf + (Mi - Mnew).multT(Di - Dnew) * 1.0 / Nf // + (M - Mnew).multT(D - Dnew) * N_1f / Nf; + const float w1 = N_1f / Nf; + const float w2 = 1.0 / Nf; + + auto P1 = (Mi - Mnew).multT(Di - Dnew); + auto P2 = (M - Mnew).multT(D - Dnew); + auto Cnew = C * w1 + P1 * w2 + P2 * w1; + + sC[tid] = Cnew; sD[tid] = Dnew; sM[tid] = Mnew; sN[tid] = N; - - printf("(%u,G) -> (%u,S) = %u, %f\n", globId + blockSize * i, tid, N, Cnew(0,0) ); } } } @@ -434,29 +424,16 @@ __global__ void means_covs_online_batched_kernel( const float w1 = static_cast(N) / static_cast(Ntot); const float w2 = static_cast(Nnext) / static_cast(Ntot); - - - // auto P1 = (Mi - Mnew).multT(Di - Dnew); - // auto P2 = (M - Mnew).multT(D - Dnew); - const rm::Vector Dtot = D * w1 + Dnext * w2; - - - - // printf("(%u,S) %u, (%f, %f, %f) -> (%u,S) %u, (%f, %f, %f) = %u, (%f, %f, %f)\n", - // tid + s, Nnext, Dnext.x, Dnext.y, Dnext.z, - // tid, N, D.x, D.y, D.z, - // Ntot, Dtot.x, Dtot.y, Dtot.z); + const rm::Vector Mtot = M * w1 + Mnext * w2; + auto P1 = C * w1 + Cnext * w2; + auto P2 = (M - Mtot).multT(D - Dtot) * w1 + (Mnext - Mtot).multT(Dnext - Dtot) * w2; + auto Cnew = P1 + P2; + // shared write sD[tid] = Dtot; - sM[tid] = M * w1 + Mnext * w2; - - - // TODO weighted covariance - auto Cnew = C * w1 + Cnext * w2; - - + sM[tid] = Mtot; sC[tid] = Cnew; // currently only a sum sN[tid] = Ntot; } @@ -574,7 +551,6 @@ __global__ void means_covs_online_approx_batched_kernel( const float w1 = static_cast(N) / static_cast(Ntot); const float w2 = static_cast(Nnext) / static_cast(Ntot); - const rm::Vector Dtot = D * w1 + Dnext * w2; // printf("(%u,S) %u, (%f, %f, %f) -> (%u,S) %u, (%f, %f, %f) = %u, (%f, %f, %f)\n", // tid + s, Nnext, Dnext.x, Dnext.y, Dnext.z,