Skip to content

Commit

Permalink
Server: Add deadlock checker
Browse files Browse the repository at this point in the history
Crashes the server when the main thread is idle for too long
  • Loading branch information
SirLynix committed Oct 20, 2024
1 parent dd19c0a commit 34108dc
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 0 deletions.
41 changes: 41 additions & 0 deletions include/CommonLib/HealthCheckerAppComponent.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright (C) 2024 Jérôme "SirLynix" Leclercq ([email protected])
// This file is part of the "This Space Of Mine" project
// For conditions of distribution and use, see copyright notice in LICENSE

#pragma once

#ifndef TSOM_COMMONLIB_HEALTHCHECKERAPPCOMPONENT_HPP
#define TSOM_COMMONLIB_HEALTHCHECKERAPPCOMPONENT_HPP

#include <CommonLib/Export.hpp>
#include <Nazara/Core/ApplicationComponent.hpp>
#include <atomic>
#include <semaphore>
#include <thread>

namespace tsom
{
class TSOM_COMMONLIB_API HealthCheckerAppComponent : public Nz::ApplicationComponent
{
public:
HealthCheckerAppComponent(Nz::ApplicationBase& app, unsigned int maxHangSeconds);
HealthCheckerAppComponent(const HealthCheckerAppComponent&) = delete;
HealthCheckerAppComponent(HealthCheckerAppComponent&&) = delete;
~HealthCheckerAppComponent();

HealthCheckerAppComponent& operator=(const HealthCheckerAppComponent&) = delete;
HealthCheckerAppComponent& operator=(HealthCheckerAppComponent&&) = delete;

private:
void Update(Nz::Time elapsedTime) override;

std::atomic_uint m_updateCounter;
std::binary_semaphore m_exitChecker;
std::thread m_healthCheckThread;
unsigned int m_maxHangSeconds;
};
}

#include <CommonLib/HealthCheckerAppComponent.inl>

#endif // TSOM_COMMONLIB_HEALTHCHECKERAPPCOMPONENT_HPP
7 changes: 7 additions & 0 deletions include/CommonLib/HealthCheckerAppComponent.inl
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// Copyright (C) 2024 Jérôme "SirLynix" Leclercq ([email protected])
// This file is part of the "This Space Of Mine" project
// For conditions of distribution and use, see copyright notice in LICENSE

namespace tsom
{
}
56 changes: 56 additions & 0 deletions src/CommonLib/HealthCheckerAppComponent.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright (C) 2024 Jérôme "SirLynix" Leclercq ([email protected])
// This file is part of the "This Space Of Mine" project
// For conditions of distribution and use, see copyright notice in LICENSE

#include <CommonLib/HealthCheckerAppComponent.hpp>
#include <fmt/color.h>
#include <chrono>
#include <csignal>

namespace tsom
{
HealthCheckerAppComponent::HealthCheckerAppComponent(Nz::ApplicationBase& app, unsigned int maxHangSeconds) :
ApplicationComponent(app),
m_updateCounter(0),
m_exitChecker(0),
m_maxHangSeconds(maxHangSeconds)
{
m_healthCheckThread = std::thread([this]
{
unsigned int lastUpdateCounter = 0;
unsigned int hangCounter = 0;

while (!m_exitChecker.try_acquire_for(std::chrono::seconds(1)))
{
unsigned int updateCount = m_updateCounter.load(std::memory_order_relaxed);
if NAZARA_LIKELY(updateCount != lastUpdateCounter)
{
hangCounter = 0;
lastUpdateCounter = updateCount;
}
else
{
if (++hangCounter >= m_maxHangSeconds)
{
fmt::print(fg(fmt::color::red), "main loop has been unresponsive for {} seconds, exiting...", hangCounter);
std::abort();
break; //< just in case
}
}
}

// Normal exit
});
}

HealthCheckerAppComponent::~HealthCheckerAppComponent()
{
m_exitChecker.release();
m_healthCheckThread.join();
}

void HealthCheckerAppComponent::Update(Nz::Time elapsedTime)
{
m_updateCounter.fetch_add(1, std::memory_order_relaxed);
}
}
1 change: 1 addition & 0 deletions src/Server/ServerConfigAppComponent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ namespace tsom
RegisterStringOption("Api.Url");
RegisterStringOption("ConnectionToken.EncryptionKey", "");
RegisterIntegerOption("Server.Port", 1, 0xFFFF, 29536);
RegisterIntegerOption("Server.MaxStuckSeconds", 0, 60, 10);
RegisterBoolOption("Server.SleepWhenEmpty", true);
RegisterStringOption("Save.Directory", "saves/chunks");
RegisterIntegerOption("Save.Interval", 0, 60 * 60, 30);
Expand Down
4 changes: 4 additions & 0 deletions src/Server/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// This file is part of the "This Space Of Mine" project
// For conditions of distribution and use, see copyright notice in LICENSE

#include <CommonLib/HealthCheckerAppComponent.hpp>
#include <CommonLib/InternalConstants.hpp>
#include <Server/ServerConfigAppComponent.hpp>
#include <ServerLib/PlayerTokenAppComponent.hpp>
Expand Down Expand Up @@ -43,6 +44,9 @@ int ServerMain(int argc, char* argv[])

auto& config = configAppComponent.GetConfig();

if (Nz::UInt32 maxStuckTime = config.GetIntegerValue<Nz::UInt32>("Server.MaxStuckSeconds"))
app.AddComponent<tsom::HealthCheckerAppComponent>(maxStuckTime);

Nz::UInt16 serverPort = config.GetIntegerValue<Nz::UInt16>("Server.Port");
std::filesystem::path saveDirectory = Nz::Utf8Path(config.GetStringValue("Save.Directory"));

Expand Down

0 comments on commit 34108dc

Please sign in to comment.