Skip to content

Commit

Permalink
Fix missaligned accesses to values of mixed types in the typed columns
Browse files Browse the repository at this point in the history
  • Loading branch information
polyntsov committed Oct 13, 2023
1 parent 3b9eabf commit 79133e6
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 17 deletions.
43 changes: 32 additions & 11 deletions src/core/model/table/typed_column_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,19 @@
#include "column_layout_typed_relation_data.h"
#include "create_type.h"

namespace {

size_t GetNextAlignedOffset(size_t cur_offset, size_t align) {
// alignment should be power of 2
assert(align != 0 && (align & (align - 1)) == 0);
if (size_t remainder = cur_offset % align; remainder != 0) {
cur_offset += align - remainder;
}
return cur_offset;
}

} // namespace

namespace model {

TypedColumnDataFactory::TypeMap TypedColumnDataFactory::CreateTypeMap() const {
Expand Down Expand Up @@ -55,14 +68,16 @@ TypedColumnDataFactory::TypeIdToType TypedColumnDataFactory::MapTypeIdsToTypes(
return type_id_to_type;
}

size_t TypedColumnDataFactory::CalculateMixedBufSize(MixedType const* mixed,
TypeIdToType const& type_id_to_type,
TypeMap const& type_map) const noexcept {
size_t result = 0;
for (auto const& [type_id, indices] : type_map) {
result += (mixed->GetMixedValueSize(type_id_to_type.at(type_id).get())) * indices.size();
size_t TypedColumnDataFactory::CalculateMixedBufSize(
std::vector<TypeId> const& types_layout,
TypeIdToType const& type_id_to_type) const noexcept {
size_t buf_size = 0;
for (TypeId const type_id : types_layout) {
size_t align = MixedType::GetAlignment(type_id);
buf_size = GetNextAlignedOffset(buf_size, align);
buf_size += MixedType::GetMixedValueSize(type_id_to_type.at(type_id).get());
}
return result;
return buf_size;
}

TypedColumnData TypedColumnDataFactory::CreateMixedFromTypeMap(std::unique_ptr<Type const> type,
Expand All @@ -79,23 +94,29 @@ TypedColumnData TypedColumnDataFactory::CreateMixedFromTypeMap(std::unique_ptr<T
size_t const empties_num = empties.size();

TypeIdToType type_id_to_type = MapTypeIdsToTypes(type_map);
size_t const buf_size = CalculateMixedBufSize(mixed_type, type_id_to_type, type_map);
std::unique_ptr<std::byte[]> buf(new std::byte[buf_size]);
std::vector<TypeId> types_layout = GetTypesLayout(type_map);
size_t const buf_size = CalculateMixedBufSize(types_layout, type_id_to_type);
static_assert(kTypesMaxAlignment <= __STDCPP_DEFAULT_NEW_ALIGNMENT__,
"Overaligned types lead to a missaligned accesses to values in the current "
"implementation, which is UB");
std::unique_ptr<std::byte[]> buf(new std::byte[buf_size]);
type_map.clear(); /* type_map is no longer needed, so saving space */

std::byte* next = buf.get();
size_t buf_index = 0;
for (size_t i = 0; i != types_layout.size(); ++i) {
TypeId const type_id = types_layout[i];
Type const* concrete_type = type_id_to_type.at(type_id).get();
size_t const value_size = mixed_type->GetMixedValueSize(concrete_type);

buf_index = GetNextAlignedOffset(buf_index, mixed_type->GetAlignment(type_id));
std::byte* next = buf.get() + buf_index;

assert(next + value_size <= buf.get() + buf_size);

mixed_type->ValueFromStr(next, std::move(unparsed_[i]), concrete_type);

data.push_back(next);
next += value_size;
buf_index += value_size;
}

return TypedColumnData(column_, std::move(type), rows_num, nulls_num, empties_num,
Expand Down
4 changes: 2 additions & 2 deletions src/core/model/table/typed_column_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,8 @@ class TypedColumnDataFactory {
{TypeId::kNull, std::regex(Null::kValue.data())},
{TypeId::kEmpty, std::regex(R"(^$)")}};

size_t CalculateMixedBufSize(MixedType const* mixed, TypeIdToType const& tid_to_map,
TypeMap const& type_map) const noexcept;
size_t CalculateMixedBufSize(std::vector<TypeId> const& types_layout,
TypeIdToType const& type_id_to_type) const noexcept;
std::vector<TypeId> GetTypesLayout(TypeMap const& tm) const;
TypeIdToType MapTypeIdsToTypes(TypeMap const& tm) const;
TypeMap CreateTypeMap() const;
Expand Down
15 changes: 15 additions & 0 deletions src/core/model/types/builtin.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#pragma once

#include <algorithm>

#include <boost/serialization/strong_typedef.hpp>
#include <enum.h>

Expand Down Expand Up @@ -89,6 +91,19 @@ template <> struct TypeConverter<Empty> {

enum class CompareResult { kLess = -1, kGreater = 1, kEqual = 0, kNotEqual = 2 };

namespace detail {

template <typename T>
struct TupleMaxAlign {};
template <typename... Ts>
struct TupleMaxAlign<std::tuple<Ts...>> {
static constexpr size_t value = std::max({alignof(Ts)...});
};

} // namespace detail

inline constexpr size_t kTypesMaxAlignment = detail::TupleMaxAlign<AllValueTypes>::value;

} // namespace model

/* Should be outside the namespace */
Expand Down
28 changes: 24 additions & 4 deletions src/core/model/types/mixed_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <cassert>
#include <memory>
#include <unordered_map>

#include "big_int_type.h"
#include "create_type.h"
Expand All @@ -24,9 +25,15 @@ class MixedType final : public Type {
private:
bool is_null_eq_null_;

public:
[[nodiscard]] static size_t GetTypeIdSizeWithPadding(TypeId type_id) noexcept {
size_t alignment = GetAlignment(type_id);
assert(kTypeIdSize <= alignment);
return alignment;
}

static constexpr size_t kTypeIdSize = sizeof(TypeId::_integral);

public:
explicit MixedType(bool is_null_eq_null) noexcept
: Type(TypeId::kMixed), is_null_eq_null_(is_null_eq_null) {}

Expand Down Expand Up @@ -100,6 +107,19 @@ class MixedType final : public Type {
return CreateType(type_id, is_null_eq_null_);
}

[[nodiscard]] static size_t GetAlignment(TypeId type_id) noexcept {
// Could possibly be implemented as a virtual method, however, it's much more incovinient
// since virtual method requires an object of type `Type` to be called which is not always
// available. Moreover, alignment of a type is used only when implementing MixedType
// or working with it, so GetAlignment as a general method for any type seems quite useless.
static std::unordered_map<TypeId, size_t> const type_to_alignment = {
{TypeId::kNull, alignof(Null)}, {TypeId::kEmpty, alignof(Empty)},
{TypeId::kInt, alignof(Int)}, {TypeId::kDouble, alignof(Double)},
{TypeId::kString, alignof(String)}, {TypeId::kBigInt, alignof(BigInt)}};
assert(type_to_alignment.find(type_id) != type_to_alignment.end());
return type_to_alignment.at(type_id);
}

static void ValueFromStr(std::byte* dest, std::string s, Type const* type) {
type->ValueFromStr(SetTypeId(dest, type->GetTypeId()), std::move(s));
}
Expand All @@ -118,11 +138,11 @@ class MixedType final : public Type {
}

[[nodiscard]] static std::byte* RetrieveValue(std::byte* value_with_type) noexcept {
return value_with_type + kTypeIdSize;
return value_with_type + GetTypeIdSizeWithPadding(RetrieveTypeId(value_with_type));
}

[[nodiscard]] static std::byte const* RetrieveValue(std::byte const* value_with_type) noexcept {
return value_with_type + kTypeIdSize;
return value_with_type + GetTypeIdSizeWithPadding(RetrieveTypeId(value_with_type));
}

static std::byte* SetTypeId(std::byte* dest, TypeId const type_id) noexcept {
Expand All @@ -131,7 +151,7 @@ class MixedType final : public Type {
}

[[nodiscard]] static size_t GetMixedValueSize(Type const* type) noexcept {
return kTypeIdSize + type->GetSize();
return GetTypeIdSizeWithPadding(type->GetTypeId()) + type->GetSize();
}
};

Expand Down

0 comments on commit 79133e6

Please sign in to comment.