Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nearest Neighbor Model #158

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions include/albatross/NearestNeighbor
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Copyright (C) 2019 Swift Navigation Inc.
* Contact: Swift Navigation <[email protected]>
*
* This source is subject to the license found in the file 'LICENSE' which must
* be distributed together with this source. All other rights reserved.
*
* THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
* EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
*/

#ifndef ALBATROSS_NEAREST_NEIGHBOR_MODEL_H
#define ALBATROSS_NEAREST_NEIGHBOR_MODEL_H

#include "Core"

#include <albatross/src/models/nearest_neighbor.hpp>

#endif
20 changes: 20 additions & 0 deletions include/albatross/serialize/NearestNeighbor
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Copyright (C) 2019 Swift Navigation Inc.
* Contact: Swift Navigation <[email protected]>
*
* This source is subject to the license found in the file 'LICENSE' which must
* be distributed together with this source. All other rights reserved.
*
* THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
* EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
*/

#ifndef ALBATROSS_SERIALIZE_NEAREST_NEIGHBOR_H
#define ALBATROSS_SERIALIZE_NEAREST_NEIGHBOR_H

#include "Core"

#include "../src/cereal/nearest_neighbor.hpp"

#endif
49 changes: 49 additions & 0 deletions include/albatross/src/cereal/nearest_neighbor.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Copyright (C) 2019 Swift Navigation Inc.
* Contact: Swift Navigation <[email protected]>
*
* This source is subject to the license found in the file 'LICENSE' which must
* be distributed together with this source. All other rights reserved.
*
* THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
* EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
*/

#ifndef ALBATROSS_CEREAL_NEAREST_NEIGHBOR_HPP_
#define ALBATROSS_CEREAL_NEAREST_NEIGHBOR_HPP_

namespace albatross {

template <typename DistanceMetric> class NearestNeighborModel;

template <typename FeatureType> struct NearestNeighborFit;

} // namespace albatross

namespace cereal {

template <typename Archive, typename FeatureType>
inline void
save(Archive &archive,
const albatross::Fit<albatross::NearestNeighborFit<FeatureType>> &fit,
const std::uint32_t) {
archive(cereal::make_nvp("training_features", fit.training_data.features));
archive(cereal::make_nvp("training_targets", fit.training_data.targets));
}

template <typename Archive, typename FeatureType>
inline void
load(Archive &archive,
albatross::Fit<albatross::NearestNeighborFit<FeatureType>> &fit,
const std::uint32_t) {
std::vector<FeatureType> features;
archive(cereal::make_nvp("training_features", features));
albatross::MarginalDistribution targets;
archive(cereal::make_nvp("training_targets", targets));
fit.training_data = RegressionDataset<FeatureType>(features, targets);
}

} // namespace cereal

#endif /* ALBATROSS_CEREAL_NEAREST_NEIGHBOR_HPP_ */
16 changes: 13 additions & 3 deletions include/albatross/src/evaluation/cross_validation_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,25 @@ inline MarginalDistribution concatenate_marginal_predictions(
Eigen::VectorXd variance(n);
Eigen::Index number_filled = 0;
// Put all the predicted means back in order.
bool has_covariance = false;
for (const auto &pair : indexer) {
assert(preds.at(pair.first).size() == pair.second.size());
set_subset(preds.at(pair.first).mean, pair.second, &mean);
set_subset(preds.at(pair.first).covariance.diagonal(), pair.second,
&variance);
if (preds.at(pair.first).has_covariance()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to do this or just add 1e6 as variances where we currently have none?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this method we're taking a pair of MarginalDistribution and concatenating them, so if the both don't have a defined covariance then we want to preserve that in the concatenation. Somewhere on my list of want to dos is to remove the optional behavior for convariances in favor of a third distribution type, something like:

using MeanOnlyDistribution = Distribution<Empty>;
using MarginalDistribution = Distribution<DiagonalMatrixXd>;
using JointDistribution = Distribution<Eigen::MatrixXd>;

or something along those lines, but that's out of scope here.

has_covariance = true;
set_subset(preds.at(pair.first).covariance.diagonal(), pair.second,
&variance);
} else {
assert(!has_covariance);
}
number_filled += static_cast<Eigen::Index>(pair.second.size());
}
assert(number_filled == n);
return MarginalDistribution(mean, variance.asDiagonal());
if (has_covariance) {
return MarginalDistribution(mean, variance.asDiagonal());
} else {
return MarginalDistribution(mean);
}
}

template <typename PredictionMetricType, typename FeatureType,
Expand Down
115 changes: 115 additions & 0 deletions include/albatross/src/models/nearest_neighbor.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* Copyright (C) 2019 Swift Navigation Inc.
* Contact: Swift Navigation <[email protected]>
*
* This source is subject to the license found in the file 'LICENSE' which must
* be distributed together with this source. All other rights reserved.
*
* THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
* EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
*/

#ifndef ALBATROSS_SRC_MODELS_NEAREST_NEIGHBOR_MODEL_HPP_
#define ALBATROSS_SRC_MODELS_NEAREST_NEIGHBOR_MODEL_HPP_

namespace albatross {

template <typename DistanceMetric> class NearestNeighborModel;

template <typename FeatureType> struct NearestNeighborFit;

template <typename FeatureType> struct Fit<NearestNeighborFit<FeatureType>> {

Fit() : training_data(){};

Fit(const RegressionDataset<FeatureType> &dataset) : training_data(dataset){};

bool operator==(const Fit<NearestNeighborFit<FeatureType>> &other) const {
return training_data == other.training_data;
}

RegressionDataset<FeatureType> training_data;
};

template <typename DistanceMetric>
class NearestNeighborModel
: public ModelBase<NearestNeighborModel<DistanceMetric>> {

public:
NearestNeighborModel() : distance_metric(){};

std::string get_name() const { return "nearest_neighbor_model"; };

template <typename FeatureType>
Fit<NearestNeighborFit<FeatureType>>
_fit_impl(const std::vector<FeatureType> &features,
const MarginalDistribution &targets) const {
return Fit<NearestNeighborFit<FeatureType>>(
RegressionDataset<FeatureType>(features, targets));
}

template <typename FeatureType>
auto fit_from_prediction(const std::vector<FeatureType> &features,
const JointDistribution &prediction) const {
const NearestNeighborModel<DistanceMetric> m(*this);
MarginalDistribution marginal_pred(
prediction.mean, prediction.covariance.diagonal().asDiagonal());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't prediction.covariance work here? Or are you looking to zero the non-diagonal elements?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah exactly, I need to zero the non-diagonal elements since the NearestNeighbor model can never actually predict off diagonals.

Fit<NearestNeighborFit<FeatureType>> fit = {
RegressionDataset<FeatureType>(features, marginal_pred)};
FitModel<NearestNeighborModel, Fit<NearestNeighborFit<FeatureType>>>
fit_model(m, fit);
return fit_model;
}

template <typename FeatureType>
MarginalDistribution
_predict_impl(const std::vector<FeatureType> &features,
const Fit<NearestNeighborFit<FeatureType>> &fit,
PredictTypeIdentity<MarginalDistribution> &&) const {
const Eigen::Index n = static_cast<Eigen::Index>(features.size());
Eigen::VectorXd mean = Eigen::VectorXd::Zero(n);
mean.fill(NAN);
Eigen::VectorXd variance = Eigen::VectorXd::Zero(n);
variance.fill(NAN);

for (std::size_t i = 0; i < features.size(); ++i) {
const auto min_index =
index_with_min_distance(features[i], fit.training_data.features);
mean[i] = fit.training_data.targets.mean[min_index];
variance[i] = fit.training_data.targets.get_diagonal(min_index);
}

if (fit.training_data.targets.has_covariance()) {
return MarginalDistribution(mean, variance.asDiagonal());
} else {
return MarginalDistribution(mean);
}
}

private:
template <typename FeatureType>
std::size_t
index_with_min_distance(const FeatureType &ref,
const std::vector<FeatureType> &features) const {
assert(features.size() > 0);

std::size_t min_index = 0;
double min_distance = distance_metric(ref, features[0]);

for (std::size_t i = 1; i < features.size(); ++i) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we turn min_distance into an optional so the loop can start at 0? The only difference in the loop would be !min_distance && going at the start of the if.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like that pattern better too ... but so far albatross doesn't used any optionals! So we'd have to add a third party lib for it which I've been avoiding (though perhaps the time has come).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An alternative is to initialize the min_distance to DBL_MAX or some such, so it will always be replaced by the first distance.

const double dist = distance_metric(ref, features[i]);
if (dist < min_distance) {
min_index = i;
min_distance = dist;
}
}
return min_index;
}

DistanceMetric distance_metric;
};

} // namespace albatross

#endif // ALBATROSS_SRC_MODELS_NEAREST_NEIGHBOR_MODEL_HPP_
9 changes: 1 addition & 8 deletions include/albatross/src/models/null_model.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,13 @@ class NullModel : public ModelBase<NullModel> {

std::string get_name() const { return "null_model"; };

/*
* The Gaussian Process Regression model derives its parameters from
* the covariance functions.
*/
ParameterStore get_params() const override { return params_; }

void unchecked_set_param(const std::string &name,
const Parameter &param) override {
params_[name] = param;
}

// If the implementing class doesn't have a fit method for this
// FeatureType but the CovarianceFunction does.
template <typename FeatureType>
Fit<NullModel> _fit_impl(const std::vector<FeatureType> &features,
const MarginalDistribution &targets) const {
Expand Down Expand Up @@ -87,5 +81,4 @@ class NullModel : public ModelBase<NullModel> {

} // namespace albatross

#endif /* THIRD_PARTY_ALBATROSS_INCLUDE_ALBATROSS_SRC_MODELS_NULL_MODEL_HPP_ \
*/
#endif // ALBATROSS_SRC_MODELS_NULL_MODEL_HPP_
30 changes: 19 additions & 11 deletions tests/test_cross_validation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,22 @@ TYPED_TEST_P(RegressionModelTester, test_logo_predict_variants) {
auto dataset = this->test_case.get_dataset();
auto model = this->test_case.get_model();

// Here we assume that the test case is linear, then split
// it using a group function which will not preserve order
// and make sure that cross validation properly reassembles
// the predictions
LeaveOneGroupOut<typename decltype(dataset)::Feature> logo(group_by_interval);
const auto prediction = model.cross_validate().predict(dataset, logo);

EXPECT_TRUE(is_monotonic_increasing(prediction.mean()));

expect_predict_variants_consistent(prediction);
// The nearest neighbor approach is not capable of modelling linear
// trends and in turn fails this test.
if (!std::is_same<decltype(model),
NearestNeighborModel<EuclideanDistance>>::value) {
// Here we assume that the test case is linear, then split
// it using a group function which will not preserve order
// and make sure that cross validation properly reassembles
// the predictions
LeaveOneGroupOut<typename decltype(dataset)::Feature> logo(
group_by_interval);
const auto prediction = model.cross_validate().predict(dataset, logo);

EXPECT_TRUE(is_monotonic_increasing(prediction.mean()));

expect_predict_variants_consistent(prediction);
}
}

TYPED_TEST_P(RegressionModelTester, test_loo_predict_variants) {
Expand Down Expand Up @@ -110,7 +116,9 @@ TYPED_TEST_P(RegressionModelTester, test_score_variants) {
// Here we make sure the cross validated mean absolute error is reasonable.
// Note that because we are running leave one out cross validation, the
// RMSE for each fold is just the absolute value of the error.
if (!std::is_same<decltype(model), NullModel>::value) {
if (!std::is_same<decltype(model), NullModel>::value &&
!std::is_same<decltype(model),
NearestNeighborModel<EuclideanDistance>>::value) {
EXPECT_LE(cv_scores.mean(), 0.1);
}
}
Expand Down
15 changes: 14 additions & 1 deletion tests/test_models.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include <albatross/GP>
#include <albatross/LeastSquares>
#include <albatross/NearestNeighbor>
#include <albatross/NullModel>
#include <albatross/Ransac>
#include <gtest/gtest.h>
Expand Down Expand Up @@ -181,6 +182,17 @@ class MakeNullModel {
}
};

class MakeNearestNeighborModel {
public:
NearestNeighborModel<EuclideanDistance> get_model() const {
return NearestNeighborModel<EuclideanDistance>();
}

RegressionDataset<double> get_dataset() const {
return make_toy_linear_data();
}
};

template <typename ModelTestCase>
class RegressionModelTester : public ::testing::Test {
public:
Expand All @@ -189,7 +201,8 @@ class RegressionModelTester : public ::testing::Test {

typedef ::testing::Types<MakeLinearRegression, MakeGaussianProcess,
MakeAdaptedGaussianProcess, MakeRansacGaussianProcess,
MakeRansacAdaptedGaussianProcess, MakeNullModel>
MakeRansacAdaptedGaussianProcess, MakeNullModel,
MakeNearestNeighborModel>
ExampleModels;

TYPED_TEST_CASE_P(RegressionModelTester);
Expand Down
1 change: 1 addition & 0 deletions tests/test_serialize.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <albatross/serialize/Common>
#include <albatross/serialize/GP>
#include <albatross/serialize/LeastSquares>
#include <albatross/serialize/NearestNeighbor>
#include <albatross/serialize/Ransac>

#include <gtest/gtest.h>
Expand Down