From 943a20cbf3162e1478e2b8699c6e28dfe10a4157 Mon Sep 17 00:00:00 2001 From: marta-lokhova Date: Wed, 30 Oct 2024 15:25:50 -0700 Subject: [PATCH 01/10] Cleanup dead code --- src/database/Database.h | 71 ++--------------------------------------- 1 file changed, 3 insertions(+), 68 deletions(-) diff --git a/src/database/Database.h b/src/database/Database.h index 73540c2884..fb6f694c2f 100644 --- a/src/database/Database.h +++ b/src/database/Database.h @@ -209,75 +209,10 @@ doDatabaseTypeSpecificOperation(soci::session& session, template T -Database::doDatabaseTypeSpecificOperation(DatabaseTypeSpecificOperation& op) +Database::doDatabaseTypeSpecificOperation(DatabaseTypeSpecificOperation& op, + soci::session& session) { - return stellar::doDatabaseTypeSpecificOperation(mSession, op); -} - -// Select a set of records using a client-defined query string, then map -// each record into an element of a client-defined datatype by applying a -// client-defined function (the records are accumulated in the "out" -// vector). -template -void -selectMap(Database& db, std::string const& selectStr, - std::function makeT, std::vector& out) -{ - soci::rowset rs = (db.getSession().prepare << selectStr); - - std::transform(rs.begin(), rs.end(), std::back_inserter(out), makeT); -} - -// Map each element in the given vector of a client-defined datatype into a -// SQL update command by applying a client-defined function, then send those -// update strings to the database. -// -// The "postUpdate" function receives the number of records affected -// by the given update, as well as the element of the client-defined -// datatype which generated that update. -template -void updateMap(Database& db, std::vector const& in, - std::string const& updateStr, - std::function prepUpdate, - std::function postUpdate); -template -void -updateMap(Database& db, std::vector const& in, std::string const& updateStr, - std::function prepUpdate, - std::function postUpdate) -{ - auto st_update = db.getPreparedStatement(updateStr).statement(); - - for (auto& recT : in) - { - prepUpdate(st_update, recT); - st_update.define_and_bind(); - st_update.execute(true); - auto affected_rows = st_update.get_affected_rows(); - st_update.clean_up(false); - postUpdate(affected_rows, recT); - } -} - -// The composition of updateMap() following selectMap(). -// -// Returns the number of records selected by selectMap() (all of which were -// then passed through updateMap() before the selectUpdateMap() call -// returned). -template -size_t -selectUpdateMap(Database& db, std::string const& selectStr, - std::function makeT, - std::string const& updateStr, - std::function prepUpdate, - std::function postUpdate) -{ - std::vector vecT; - - selectMap(db, selectStr, makeT, vecT); - updateMap(db, vecT, updateStr, prepUpdate, postUpdate); - - return vecT.size(); + return stellar::doDatabaseTypeSpecificOperation(session, op); } template From be4a86ba81b711ed4d5bc7f39b2aecbe423f5b31 Mon Sep 17 00:00:00 2001 From: marta-lokhova Date: Fri, 13 Dec 2024 20:00:56 -0800 Subject: [PATCH 02/10] LedgerTxn plumbing: allow specifying session in database operations --- src/ledger/LedgerTxn.cpp | 41 +++++++++-- src/ledger/LedgerTxn.h | 6 ++ src/ledger/LedgerTxnImpl.h | 4 ++ src/ledger/LedgerTxnOfferSQL.cpp | 84 +++++++++++++---------- src/ledger/test/InMemoryLedgerTxn.cpp | 8 ++- src/ledger/test/InMemoryLedgerTxn.h | 1 + src/ledger/test/InMemoryLedgerTxnRoot.cpp | 6 ++ src/ledger/test/InMemoryLedgerTxnRoot.h | 1 + 8 files changed, 107 insertions(+), 44 deletions(-) diff --git a/src/ledger/LedgerTxn.cpp b/src/ledger/LedgerTxn.cpp index 500bea8f27..c2a98ab52c 100644 --- a/src/ledger/LedgerTxn.cpp +++ b/src/ledger/LedgerTxn.cpp @@ -2324,6 +2324,13 @@ LedgerTxn::Impl::hasSponsorshipEntry() const return false; } +SessionWrapper& +LedgerTxn::getSession() const +{ + throw std::runtime_error("LedgerTxn::getSession illegal call, can only be " + "called on LedgerTxnRoot"); +} + void LedgerTxn::prepareNewObjects(size_t s) { @@ -2477,6 +2484,22 @@ LedgerTxnRoot::Impl::~Impl() } } +SessionWrapper& +LedgerTxnRoot::Impl::getSession() const +{ + if (mSession) + { + return *mSession; + } + return mApp.getDatabase().getSession(); +} + +SessionWrapper& +LedgerTxnRoot::getSession() const +{ + return mImpl->getSession(); +} + #ifdef BUILD_TESTS void LedgerTxnRoot::Impl::resetForFuzzer() @@ -2508,8 +2531,13 @@ LedgerTxnRoot::Impl::addChild(AbstractLedgerTxn& child, TransactionMode mode) if (mode == TransactionMode::READ_WRITE_WITH_SQL_TXN) { - mTransaction = std::make_unique( - mApp.getDatabase().getSession()); + if (mApp.getConfig().parallelLedgerClose()) + { + mSession = std::make_unique( + "ledgerClose", mApp.getDatabase().getPool()); + } + mTransaction = + std::make_unique(getSession().session()); } else { @@ -2632,7 +2660,7 @@ LedgerTxnRoot::Impl::commitChild(EntryIterator iter, // committing; on postgres this doesn't matter but on SQLite the passive // WAL-auto-checkpointing-at-commit behaviour will starve if there are // still prepared statements open at commit time. - mApp.getDatabase().clearPreparedStatementCache(); + mApp.getDatabase().clearPreparedStatementCache(getSession()); ZoneNamedN(commitZone, "SOCI commit", true); mTransaction->commit(); } @@ -2653,6 +2681,7 @@ LedgerTxnRoot::Impl::commitChild(EntryIterator iter, // std::unique_ptr<...>::reset does not throw mTransaction.reset(); + mSession.reset(); // std::unique_ptr<...>::swap does not throw mHeader.swap(childHeader); @@ -2682,8 +2711,7 @@ LedgerTxnRoot::Impl::countOffers(LedgerRange const& ledgers) const uint64_t count = 0; int first = static_cast(ledgers.mFirst); int limit = static_cast(ledgers.limit()); - mApp.getDatabase().getSession() << query, into(count), use(first), - use(limit); + getSession().session() << query, into(count), use(first), use(limit); return count; } @@ -2702,7 +2730,7 @@ LedgerTxnRoot::Impl::deleteOffersModifiedOnOrAfterLedger(uint32_t ledger) const mBestOffers.clear(); std::string query = "DELETE FROM offers WHERE lastmodified >= :v1"; - mApp.getDatabase().getSession() << query, use(ledger); + getSession().session() << query, use(ledger); } void @@ -3359,6 +3387,7 @@ LedgerTxnRoot::Impl::rollbackChild() noexcept { mTransaction->rollback(); mTransaction.reset(); + mSession.reset(); } catch (std::exception& e) { diff --git a/src/ledger/LedgerTxn.h b/src/ledger/LedgerTxn.h index a89ac6bac8..e1ca8f9b22 100644 --- a/src/ledger/LedgerTxn.h +++ b/src/ledger/LedgerTxn.h @@ -16,6 +16,7 @@ #include #include #include +#include ///////////////////////////////////////////////////////////////////////////// // Overview @@ -276,6 +277,7 @@ struct InflationVotes; struct LedgerEntry; struct LedgerKey; struct LedgerRange; +class SessionWrapper; struct OfferDescriptor { @@ -496,6 +498,8 @@ class AbstractLedgerTxnParent // prepares to increase the capacity of pending changes by up to "s" changes virtual void prepareNewObjects(size_t s) = 0; + virtual SessionWrapper& getSession() const = 0; + #ifdef BUILD_TESTS virtual void resetForFuzzer() = 0; #endif // BUILD_TESTS @@ -785,6 +789,7 @@ class LedgerTxn : public AbstractLedgerTxn uint32_t prefetchSoroban(UnorderedSet const& keys, LedgerKeyMeter* lkMeter) override; void prepareNewObjects(size_t s) override; + SessionWrapper& getSession() const override; bool hasSponsorshipEntry() const override; @@ -879,5 +884,6 @@ class LedgerTxnRoot : public AbstractLedgerTxnParent OfferDescriptor const* worseThan, std::unordered_set& exclude) override; #endif + SessionWrapper& getSession() const override; }; } diff --git a/src/ledger/LedgerTxnImpl.h b/src/ledger/LedgerTxnImpl.h index 47997323fc..62afa831ef 100644 --- a/src/ledger/LedgerTxnImpl.h +++ b/src/ledger/LedgerTxnImpl.h @@ -612,6 +612,8 @@ class LedgerTxnRoot::Impl size_t const mMaxBestOffersBatchSize; Application& mApp; + std::unique_ptr mSession; + std::unique_ptr mHeader; mutable EntryCache mEntryCache; mutable BestOffers mBestOffers; @@ -709,6 +711,8 @@ class LedgerTxnRoot::Impl // countOffers has the strong exception safety guarantee. uint64_t countOffers(LedgerRange const& ledgers) const; + SessionWrapper& getSession() const; + // deleteOffersModifiedOnOrAfterLedger has no exception safety guarantees. void deleteOffersModifiedOnOrAfterLedger(uint32_t ledger) const; diff --git a/src/ledger/LedgerTxnOfferSQL.cpp b/src/ledger/LedgerTxnOfferSQL.cpp index 4cf1b23bc2..c2d345ac83 100644 --- a/src/ledger/LedgerTxnOfferSQL.cpp +++ b/src/ledger/LedgerTxnOfferSQL.cpp @@ -39,7 +39,7 @@ LedgerTxnRoot::Impl::loadOffer(LedgerKey const& key) const "ledgerext " "FROM offers " "WHERE sellerid= :id AND offerid= :offerid"; - auto prep = mApp.getDatabase().getPreparedStatement(sql); + auto prep = mApp.getDatabase().getPreparedStatement(sql, getSession()); auto& st = prep.statement(); st.exchange(soci::use(actIDStrKey)); st.exchange(soci::use(offerID)); @@ -61,7 +61,7 @@ LedgerTxnRoot::Impl::loadAllOffers() const std::string sql = "SELECT sellerid, offerid, sellingasset, buyingasset, " "amount, pricen, priced, flags, lastmodified, extension, " "ledgerext FROM offers"; - auto prep = mApp.getDatabase().getPreparedStatement(sql); + auto prep = mApp.getDatabase().getPreparedStatement(sql, getSession()); std::vector offers; { @@ -89,7 +89,7 @@ LedgerTxnRoot::Impl::loadBestOffers(std::deque& offers, buyingAsset = decoder::encode_b64(xdr::xdr_to_opaque(buying)); sellingAsset = decoder::encode_b64(xdr::xdr_to_opaque(selling)); - auto prep = mApp.getDatabase().getPreparedStatement(sql); + auto prep = mApp.getDatabase().getPreparedStatement(sql, getSession()); auto& st = prep.statement(); st.exchange(soci::use(sellingAsset)); st.exchange(soci::use(buyingAsset)); @@ -145,7 +145,7 @@ LedgerTxnRoot::Impl::loadBestOffers(std::deque& offers, (double)worseThan.price.n / (double)worseThan.price.d; int64_t worseThanOfferID = worseThan.offerID + 1; - auto prep = mApp.getDatabase().getPreparedStatement(sql); + auto prep = mApp.getDatabase().getPreparedStatement(sql, getSession()); auto& st = prep.statement(); st.exchange(soci::use(sellingAsset)); st.exchange(soci::use(buyingAsset)); @@ -227,7 +227,7 @@ LedgerTxnRoot::Impl::loadOffersByAccountAndAsset(AccountID const& accountID, } std::string assetStr = decoder::encode_b64(xdr::xdr_to_opaque(asset)); - auto prep = mApp.getDatabase().getPreparedStatement(sql); + auto prep = mApp.getDatabase().getPreparedStatement(sql, getSession()); auto& st = prep.statement(); st.exchange(soci::use(accountStr)); st.exchange(soci::use(assetStr)); @@ -327,6 +327,7 @@ LedgerTxnRoot::Impl::loadOffers(StatementContext& prep) const class BulkUpsertOffersOperation : public DatabaseTypeSpecificOperation { Database& mDB; + SessionWrapper& mSession; std::vector mSellerIDs; std::vector mOfferIDs; std::vector mSellingAssets; @@ -371,8 +372,9 @@ class BulkUpsertOffersOperation : public DatabaseTypeSpecificOperation public: BulkUpsertOffersOperation(Database& DB, - std::vector const& entries) - : mDB(DB) + std::vector const& entries, + SessionWrapper& session) + : mDB(DB), mSession(session) { mSellerIDs.reserve(entries.size()); mOfferIDs.reserve(entries.size()); @@ -394,8 +396,9 @@ class BulkUpsertOffersOperation : public DatabaseTypeSpecificOperation } BulkUpsertOffersOperation(Database& DB, - std::vector const& entries) - : mDB(DB) + std::vector const& entries, + SessionWrapper& session) + : mDB(DB), mSession(session) { mSellerIDs.reserve(entries.size()); mOfferIDs.reserve(entries.size()); @@ -441,7 +444,7 @@ class BulkUpsertOffersOperation : public DatabaseTypeSpecificOperation "lastmodified = excluded.lastmodified, " "extension = excluded.extension, " "ledgerext = excluded.ledgerext"; - auto prep = mDB.getPreparedStatement(sql); + auto prep = mDB.getPreparedStatement(sql, mSession); soci::statement& st = prep.statement(); st.exchange(soci::use(mSellerIDs)); st.exchange(soci::use(mOfferIDs)); @@ -529,7 +532,7 @@ class BulkUpsertOffersOperation : public DatabaseTypeSpecificOperation "lastmodified = excluded.lastmodified, " "extension = excluded.extension, " "ledgerext = excluded.ledgerext"; - auto prep = mDB.getPreparedStatement(sql); + auto prep = mDB.getPreparedStatement(sql, mSession); soci::statement& st = prep.statement(); st.exchange(soci::use(strSellerIDs)); st.exchange(soci::use(strOfferIDs)); @@ -560,12 +563,14 @@ class BulkDeleteOffersOperation : public DatabaseTypeSpecificOperation { Database& mDB; LedgerTxnConsistency mCons; + SessionWrapper& mSession; std::vector mOfferIDs; public: BulkDeleteOffersOperation(Database& DB, LedgerTxnConsistency cons, - std::vector const& entries) - : mDB(DB), mCons(cons) + std::vector const& entries, + SessionWrapper& session) + : mDB(DB), mCons(cons), mSession(session) { for (auto const& e : entries) { @@ -582,7 +587,7 @@ class BulkDeleteOffersOperation : public DatabaseTypeSpecificOperation doSociGenericOperation() { std::string sql = "DELETE FROM offers WHERE offerid = :id"; - auto prep = mDB.getPreparedStatement(sql); + auto prep = mDB.getPreparedStatement(sql, mSession); soci::statement& st = prep.statement(); st.exchange(soci::use(mOfferIDs)); st.define_and_bind(); @@ -615,7 +620,7 @@ class BulkDeleteOffersOperation : public DatabaseTypeSpecificOperation ") " "DELETE FROM offers WHERE " "offerid IN (SELECT * FROM r)"; - auto prep = mDB.getPreparedStatement(sql); + auto prep = mDB.getPreparedStatement(sql, mSession); soci::statement& st = prep.statement(); st.exchange(soci::use(strOfferIDs)); st.define_and_bind(); @@ -637,8 +642,8 @@ LedgerTxnRoot::Impl::bulkUpsertOffers(std::vector const& entries) { ZoneScoped; ZoneValue(static_cast(entries.size())); - BulkUpsertOffersOperation op(mApp.getDatabase(), entries); - mApp.getDatabase().doDatabaseTypeSpecificOperation(op); + BulkUpsertOffersOperation op(mApp.getDatabase(), entries, getSession()); + mApp.getDatabase().doDatabaseTypeSpecificOperation(op, getSession()); } void @@ -647,8 +652,9 @@ LedgerTxnRoot::Impl::bulkDeleteOffers(std::vector const& entries, { ZoneScoped; ZoneValue(static_cast(entries.size())); - BulkDeleteOffersOperation op(mApp.getDatabase(), cons, entries); - mApp.getDatabase().doDatabaseTypeSpecificOperation(op); + BulkDeleteOffersOperation op(mApp.getDatabase(), cons, entries, + getSession()); + mApp.getDatabase().doDatabaseTypeSpecificOperation(op, getSession()); } void @@ -658,10 +664,10 @@ LedgerTxnRoot::Impl::dropOffers() mEntryCache.clear(); mBestOffers.clear(); - mApp.getDatabase().getSession() << "DROP TABLE IF EXISTS offers;"; + getSession().session() << "DROP TABLE IF EXISTS offers;"; std::string coll = mApp.getDatabase().getSimpleCollationClause(); - mApp.getDatabase().getSession() + mApp.getDatabase().getRawSession() << "CREATE TABLE offers" << "(" << "sellerid VARCHAR(56) " << coll << "NOT NULL," @@ -679,20 +685,21 @@ LedgerTxnRoot::Impl::dropOffers() "ledgerext TEXT NOT NULL," "PRIMARY KEY (offerid)" ");"; - mApp.getDatabase().getSession() + mApp.getDatabase().getRawSession() << "CREATE INDEX bestofferindex ON offers " "(sellingasset,buyingasset,price,offerid);"; - mApp.getDatabase().getSession() << "CREATE INDEX offerbyseller ON offers " - "(sellerid);"; + mApp.getDatabase().getRawSession() + << "CREATE INDEX offerbyseller ON offers " + "(sellerid);"; if (!mApp.getDatabase().isSqlite()) { - mApp.getDatabase().getSession() << "ALTER TABLE offers " - << "ALTER COLUMN sellerid " - << "TYPE VARCHAR(56) COLLATE \"C\", " - << "ALTER COLUMN buyingasset " - << "TYPE TEXT COLLATE \"C\", " - << "ALTER COLUMN sellingasset " - << "TYPE TEXT COLLATE \"C\""; + mApp.getDatabase().getRawSession() << "ALTER TABLE offers " + << "ALTER COLUMN sellerid " + << "TYPE VARCHAR(56) COLLATE \"C\", " + << "ALTER COLUMN buyingasset " + << "TYPE TEXT COLLATE \"C\", " + << "ALTER COLUMN sellingasset " + << "TYPE TEXT COLLATE \"C\""; } } @@ -700,6 +707,7 @@ class BulkLoadOffersOperation : public DatabaseTypeSpecificOperation> { Database& mDb; + SessionWrapper& mSession; std::vector mOfferIDs; UnorderedSet mKeys; @@ -762,8 +770,9 @@ class BulkLoadOffersOperation } public: - BulkLoadOffersOperation(Database& db, UnorderedSet const& keys) - : mDb(db) + BulkLoadOffersOperation(Database& db, UnorderedSet const& keys, + SessionWrapper& session) + : mDb(db), mSession(session) { mOfferIDs.reserve(keys.size()); for (auto const& k : keys) @@ -785,7 +794,7 @@ class BulkLoadOffersOperation "ledgerext " "FROM offers WHERE offerid IN carray(?, ?, 'int64')"; - auto prep = mDb.getPreparedStatement(sql); + auto prep = mDb.getPreparedStatement(sql, mSession); auto be = prep.statement().get_backend(); if (be == nullptr) { @@ -815,7 +824,7 @@ class BulkLoadOffersOperation "amount, pricen, priced, flags, lastmodified, extension, " "ledgerext " "FROM offers WHERE offerid IN (SELECT * FROM r)"; - auto prep = mDb.getPreparedStatement(sql); + auto prep = mDb.getPreparedStatement(sql, mSession); auto& st = prep.statement(); st.exchange(soci::use(strOfferIDs)); return executeAndFetch(st); @@ -830,9 +839,10 @@ LedgerTxnRoot::Impl::bulkLoadOffers(UnorderedSet const& keys) const ZoneValue(static_cast(keys.size())); if (!keys.empty()) { - BulkLoadOffersOperation op(mApp.getDatabase(), keys); + BulkLoadOffersOperation op(mApp.getDatabase(), keys, getSession()); return populateLoadedEntries( - keys, mApp.getDatabase().doDatabaseTypeSpecificOperation(op)); + keys, mApp.getDatabase().doDatabaseTypeSpecificOperation( + op, getSession())); } else { diff --git a/src/ledger/test/InMemoryLedgerTxn.cpp b/src/ledger/test/InMemoryLedgerTxn.cpp index beeab8d270..1b2dd26dfe 100644 --- a/src/ledger/test/InMemoryLedgerTxn.cpp +++ b/src/ledger/test/InMemoryLedgerTxn.cpp @@ -51,6 +51,12 @@ InMemoryLedgerTxn::FilteredEntryIteratorImpl::entryPtr() const return mIter.entryPtr(); } +SessionWrapper& +InMemoryLedgerTxn::getSession() const +{ + return mDb.getSession(); +} + bool InMemoryLedgerTxn::FilteredEntryIteratorImpl::entryExists() const { @@ -91,7 +97,7 @@ InMemoryLedgerTxn::addChild(AbstractLedgerTxn& child, TransactionMode mode) LedgerTxn::addChild(child, mode); if (mode == TransactionMode::READ_WRITE_WITH_SQL_TXN) { - mTransaction = std::make_unique(mDb.getSession()); + mTransaction = std::make_unique(mDb.getRawSession()); } } diff --git a/src/ledger/test/InMemoryLedgerTxn.h b/src/ledger/test/InMemoryLedgerTxn.h index 7e2f3d9ee7..9b49e8a890 100644 --- a/src/ledger/test/InMemoryLedgerTxn.h +++ b/src/ledger/test/InMemoryLedgerTxn.h @@ -136,6 +136,7 @@ class InMemoryLedgerTxn : public LedgerTxn void dropOffers() override; uint64_t countOffers(LedgerRange const& ledgers) const override; void deleteOffersModifiedOnOrAfterLedger(uint32_t ledger) const override; + SessionWrapper& getSession() const override; #ifdef BEST_OFFER_DEBUGGING virtual bool bestOfferDebuggingEnabled() const override; diff --git a/src/ledger/test/InMemoryLedgerTxnRoot.cpp b/src/ledger/test/InMemoryLedgerTxnRoot.cpp index 4ff9ca435f..75f1bb06c6 100644 --- a/src/ledger/test/InMemoryLedgerTxnRoot.cpp +++ b/src/ledger/test/InMemoryLedgerTxnRoot.cpp @@ -137,6 +137,12 @@ void InMemoryLedgerTxnRoot::prepareNewObjects(size_t) { } +SessionWrapper& +InMemoryLedgerTxnRoot::getSession() const +{ + throw std::runtime_error("ERROR!!"); +} + #ifdef BUILD_TESTS void InMemoryLedgerTxnRoot::resetForFuzzer() diff --git a/src/ledger/test/InMemoryLedgerTxnRoot.h b/src/ledger/test/InMemoryLedgerTxnRoot.h index 9b925172aa..16606faa6b 100644 --- a/src/ledger/test/InMemoryLedgerTxnRoot.h +++ b/src/ledger/test/InMemoryLedgerTxnRoot.h @@ -75,6 +75,7 @@ class InMemoryLedgerTxnRoot : public AbstractLedgerTxnParent LedgerKeyMeter* lkMeter) override; void prepareNewObjects(size_t s) override; + SessionWrapper& getSession() const override; #ifdef BUILD_TESTS void resetForFuzzer() override; From c70a3852fe9e56c82c43036867eb52c3b5424bd6 Mon Sep 17 00:00:00 2001 From: marta-lokhova Date: Wed, 18 Dec 2024 12:23:53 -0800 Subject: [PATCH 03/10] Virtual time thread-safety --- src/util/Timer.cpp | 10 ++++++++-- src/util/Timer.h | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/util/Timer.cpp b/src/util/Timer.cpp index dd68a7f13c..b254fb86a9 100644 --- a/src/util/Timer.cpp +++ b/src/util/Timer.cpp @@ -38,6 +38,7 @@ VirtualClock::now() const noexcept } else { + std::lock_guard lock(mVirtualNowMutex); return mVirtualNow; } } @@ -51,6 +52,7 @@ VirtualClock::system_now() const noexcept } else { + std::lock_guard lock(mVirtualNowMutex); auto offset = mVirtualNow.time_since_epoch(); return std::chrono::system_clock::time_point( std::chrono::duration_cast< @@ -283,6 +285,7 @@ void VirtualClock::setCurrentVirtualTime(time_point t) { releaseAssert(mMode == VIRTUAL_TIME); + std::lock_guard lock(mVirtualNowMutex); // Maintain monotonicity in VIRTUAL_TIME mode. releaseAssert(t >= mVirtualNow); mVirtualNow = t; @@ -547,9 +550,12 @@ VirtualClock::advanceToNext() auto nextEvent = next(); // jump forward in time, if needed - if (mVirtualNow < nextEvent) { - mVirtualNow = nextEvent; + std::lock_guard lock(mVirtualNowMutex); + if (mVirtualNow < nextEvent) + { + mVirtualNow = nextEvent; + } } return advanceToNow(); } diff --git a/src/util/Timer.h b/src/util/Timer.h index bd0130c540..0da56026fa 100644 --- a/src/util/Timer.h +++ b/src/util/Timer.h @@ -180,6 +180,7 @@ class VirtualClock // timer should be last to ensure it gets destroyed first RealSteadyTimer mRealTimer; + std::mutex mutable mVirtualNowMutex; public: // A VirtualClock is instantiated in either real or virtual mode. In real From c50b2c406b25184891d048ea376bde7fba9c5f2e Mon Sep 17 00:00:00 2001 From: marta-lokhova Date: Wed, 18 Dec 2024 12:29:50 -0800 Subject: [PATCH 04/10] Split PersistentState into two tables to support concurrent writes --- src/herder/HerderImpl.cpp | 15 ++- src/main/PersistentState.cpp | 238 ++++++++++++++++++++++++++--------- src/main/PersistentState.h | 46 ++++--- 3 files changed, 217 insertions(+), 82 deletions(-) diff --git a/src/herder/HerderImpl.cpp b/src/herder/HerderImpl.cpp index 81cc5fd74f..a208f630bc 100644 --- a/src/herder/HerderImpl.cpp +++ b/src/herder/HerderImpl.cpp @@ -1988,7 +1988,7 @@ HerderImpl::restoreSCPState() // Load all known tx sets auto latestTxSets = mApp.getPersistentState().getTxSetsForAllSlots(); - for (auto const& txSet : latestTxSets) + for (auto const& [_, txSet] : latestTxSets) { try { @@ -2017,7 +2017,7 @@ HerderImpl::restoreSCPState() // load saved state from database auto latest64 = mApp.getPersistentState().getSCPStateAllSlots(); - for (auto const& state : latest64) + for (auto const& [_, state] : latest64) { try { @@ -2057,16 +2057,19 @@ void HerderImpl::persistUpgrades() { ZoneScoped; + releaseAssert(threadIsMain()); auto s = mUpgrades.getParameters().toJson(); - mApp.getPersistentState().setState(PersistentState::kLedgerUpgrades, s); + mApp.getPersistentState().setState(PersistentState::kLedgerUpgrades, s, + mApp.getDatabase().getSession()); } void HerderImpl::restoreUpgrades() { ZoneScoped; - std::string s = - mApp.getPersistentState().getState(PersistentState::kLedgerUpgrades); + releaseAssert(threadIsMain()); + std::string s = mApp.getPersistentState().getState( + PersistentState::kLedgerUpgrades, mApp.getDatabase().getSession()); if (!s.empty()) { Upgrades::UpgradeParameters p; @@ -2223,7 +2226,7 @@ HerderImpl::purgeOldPersistedTxSets() { auto hashesToDelete = mApp.getPersistentState().getTxSetHashesForAllSlots(); - for (auto const& state : + for (auto const& [_, state] : mApp.getPersistentState().getSCPStateAllSlots()) { try diff --git a/src/main/PersistentState.cpp b/src/main/PersistentState.cpp index ecb7c12eaa..c1e4aecc1e 100644 --- a/src/main/PersistentState.cpp +++ b/src/main/PersistentState.cpp @@ -19,10 +19,9 @@ namespace stellar using namespace std; std::string PersistentState::mapping[kLastEntry] = { - "lastclosedledger", "historyarchivestate", "lastscpdata", - "databaseschema", "networkpassphrase", "ledgerupgrades", - "rebuildledger", "lastscpdataxdr", "txset", - "dbbackend"}; + "lastclosedledger", "historyarchivestate", "databaseschema", + "networkpassphrase", "dbbackend", "rebuildledger", + "ledgerupgrades", "lastscpdataxdr", "txset"}; std::string PersistentState::kSQLCreateStatement = "CREATE TABLE IF NOT EXISTS storestate (" @@ -30,19 +29,33 @@ std::string PersistentState::kSQLCreateStatement = "state TEXT" "); "; +// Persist consensus slot data +std::string PersistentState::kSQLCreateSCPStatement = + "CREATE TABLE IF NOT EXISTS slotstate (" + "statename CHARACTER(70) PRIMARY KEY," + "state TEXT" + "); "; + +std::string PersistentState::kLCLTableName = "storestate"; +std::string PersistentState::kSlotTableName = "slotstate"; + PersistentState::PersistentState(Application& app) : mApp(app) { + releaseAssert(threadIsMain()); } void -PersistentState::deleteTxSets(std::unordered_set hashesToDelete) +PersistentState::deleteTxSets(std::unordered_set hashesToDelete, + std::string table) { - soci::transaction tx(mApp.getDatabase().getSession()); + releaseAssert(threadIsMain()); + soci::transaction tx(mApp.getDatabase().getRawSession()); for (auto const& hash : hashesToDelete) { auto name = getStoreStateNameForTxSet(hash); auto prep = mApp.getDatabase().getPreparedStatement( - "DELETE FROM storestate WHERE statename = :n;"); + fmt::format("DELETE FROM {} WHERE statename = :n;", table), + mApp.getDatabase().getSession()); auto& st = prep.statement(); st.exchange(soci::use(name)); @@ -53,12 +66,76 @@ PersistentState::deleteTxSets(std::unordered_set hashesToDelete) } void -PersistentState::dropAll(Database& db) +PersistentState::migrateToSlotStateTable() { - db.getSession() << "DROP TABLE IF EXISTS storestate;"; + // No soci::transaction needed, because the migration in Database.cpp wraps + // everything in one transaction anyway. + releaseAssert(threadIsMain()); + auto& db = mApp.getDatabase(); + + // First, create the new table + db.getRawSession() << PersistentState::kSQLCreateSCPStatement; + + // Migrate all the tx sets + auto txSets = getTxSetsForAllSlots(kLCLTableName); + std::unordered_set keysToDelete; + for (auto const& txSet : txSets) + { + CLOG_INFO(Herder, "Migrating tx set {} to slotstate", + hexAbbrev(txSet.first)); + updateDb(getStoreStateNameForTxSet(txSet.first), txSet.second, + db.getSession(), kSlotTableName); + keysToDelete.insert(txSet.first); + } + + // Cleanup tx sets from the previous table + deleteTxSets(keysToDelete, kLCLTableName); + + // Migrate all SCP slot data + auto scpStates = getSCPStateAllSlots(kLCLTableName); + for (auto const& [i, scpState] : scpStates) + { + CLOG_INFO(Herder, "Migrating SCP state for slot {} to slotstate", i); + setSCPStateForSlot(i, scpState); + auto prep = mApp.getDatabase().getPreparedStatement( + "DELETE FROM storestate WHERE statename = :n;", + mApp.getDatabase().getSession()); - soci::statement st = db.getSession().prepare << kSQLCreateStatement; + auto& st = prep.statement(); + st.exchange(soci::use(getStoreStateName(kLastSCPDataXDR, i))); + st.define_and_bind(); + st.execute(true); + } + + // Migrate upgrade data + auto upgrades = getFromDb(getStoreStateName(kLedgerUpgrades), + db.getSession(), kLCLTableName); + if (!upgrades.empty()) + { + updateDb(getStoreStateName(kLedgerUpgrades), upgrades, db.getSession(), + kSlotTableName); + auto prep = mApp.getDatabase().getPreparedStatement( + "DELETE FROM storestate WHERE statename = :n;", + mApp.getDatabase().getSession()); + + auto& st = prep.statement(); + st.exchange(soci::use(getStoreStateName(kLedgerUpgrades))); + st.define_and_bind(); + st.execute(true); + } +} + +void +PersistentState::dropAll(Database& db) +{ + releaseAssert(threadIsMain()); + db.getRawSession() << "DROP TABLE IF EXISTS storestate;"; + soci::statement st = db.getRawSession().prepare << kSQLCreateStatement; st.execute(true); + + db.getRawSession() << "DROP TABLE IF EXISTS slotstate;"; + soci::statement st2 = db.getRawSession().prepare << kSQLCreateSCPStatement; + st2.execute(true); } std::string @@ -69,8 +146,7 @@ PersistentState::getStoreStateName(PersistentState::Entry n, uint32 subscript) throw out_of_range("unknown entry"); } auto res = mapping[n]; - if (((n == kLastSCPData || n == kLastSCPDataXDR) && subscript > 0) || - n == kRebuildLedger) + if ((n == kLastSCPDataXDR && subscript > 0) || n == kRebuildLedger) { res += std::to_string(subscript); } @@ -88,36 +164,61 @@ PersistentState::getStoreStateNameForTxSet(Hash const& txSetHash) bool PersistentState::hasTxSet(Hash const& txSetHash) { - return entryExists(getStoreStateNameForTxSet(txSetHash)); + releaseAssert(threadIsMain()); + + int res = 0; + auto entry = getStoreStateNameForTxSet(txSetHash); + + auto& db = mApp.getDatabase(); + auto prep = db.getPreparedStatement( + "SELECT COUNT(*) FROM slotstate WHERE statename = :n;", + db.getSession()); + auto& st = prep.statement(); + st.exchange(soci::into(res)); + st.exchange(soci::use(entry)); + st.define_and_bind(); + st.execute(true); + + return res > 0; } std::string -PersistentState::getState(PersistentState::Entry entry) +PersistentState::getDBForEntry(PersistentState::Entry entry) +{ + releaseAssert(entry != kLastEntry); + return entry <= kRebuildLedger ? kLCLTableName : kSlotTableName; +} + +std::string +PersistentState::getState(PersistentState::Entry entry, SessionWrapper& session) { ZoneScoped; - return getFromDb(getStoreStateName(entry)); + return getFromDb(getStoreStateName(entry), session, getDBForEntry(entry)); } void PersistentState::setState(PersistentState::Entry entry, - std::string const& value) + std::string const& value, SessionWrapper& session) { ZoneScoped; - updateDb(getStoreStateName(entry), value); + updateDb(getStoreStateName(entry), value, session, getDBForEntry(entry)); } -std::vector -PersistentState::getSCPStateAllSlots() +std::unordered_map +PersistentState::getSCPStateAllSlots(std::string table) { ZoneScoped; + releaseAssert(threadIsMain()); + // Collect all slots persisted - std::vector states; + std::unordered_map states; for (uint32 i = 0; i <= mApp.getConfig().MAX_SLOTS_TO_REMEMBER; i++) { - auto val = getFromDb(getStoreStateName(kLastSCPDataXDR, i)); + auto val = getFromDb(getStoreStateName(kLastSCPDataXDR, i), + mApp.getDatabase().getSession(), table); if (!val.empty()) { - states.push_back(val); + states.emplace(i, val); } } @@ -128,9 +229,12 @@ void PersistentState::setSCPStateForSlot(uint64 slot, std::string const& value) { ZoneScoped; + releaseAssert(threadIsMain()); + auto slotIdx = static_cast( slot % (mApp.getConfig().MAX_SLOTS_TO_REMEMBER + 1)); - updateDb(getStoreStateName(kLastSCPDataXDR, slotIdx), value); + updateDb(getStoreStateName(kLastSCPDataXDR, slotIdx), value, + mApp.getDatabase().getSession(), kSlotTableName); } void @@ -138,12 +242,15 @@ PersistentState::setSCPStateV1ForSlot( uint64 slot, std::string const& value, std::unordered_map const& txSets) { - soci::transaction tx(mApp.getDatabase().getSession()); + releaseAssert(threadIsMain()); + + soci::transaction tx(mApp.getDatabase().getRawSession()); setSCPStateForSlot(slot, value); for (auto const& txSet : txSets) { - updateDb(getStoreStateNameForTxSet(txSet.first), txSet.second); + updateDb(getStoreStateNameForTxSet(txSet.first), txSet.second, + mApp.getDatabase().getSession(), kSlotTableName); } tx.commit(); } @@ -152,29 +259,41 @@ bool PersistentState::shouldRebuildForOfferTable() { ZoneScoped; - return !getFromDb(getStoreStateName(kRebuildLedger, OFFER)).empty(); + releaseAssert(threadIsMain()); + + return !getFromDb(getStoreStateName(kRebuildLedger, OFFER), + mApp.getDatabase().getSession(), kLCLTableName) + .empty(); } void PersistentState::clearRebuildForOfferTable() { ZoneScoped; - updateDb(getStoreStateName(kRebuildLedger, OFFER), ""); + releaseAssert(threadIsMain()); + + updateDb(getStoreStateName(kRebuildLedger, OFFER), "", + mApp.getDatabase().getSession(), kLCLTableName); } void PersistentState::setRebuildForOfferTable() { ZoneScoped; - updateDb(getStoreStateName(kRebuildLedger, OFFER), "1"); + releaseAssert(threadIsMain()); + updateDb(getStoreStateName(kRebuildLedger, OFFER), "1", + mApp.getDatabase().getSession(), kLCLTableName); } void -PersistentState::updateDb(std::string const& entry, std::string const& value) +PersistentState::updateDb(std::string const& entry, std::string const& value, + SessionWrapper& sess, std::string const& tableName) { ZoneScoped; auto prep = mApp.getDatabase().getPreparedStatement( - "UPDATE storestate SET state = :v WHERE statename = :n;"); + fmt::format("UPDATE {} SET state = :v WHERE statename = :n;", + tableName), + sess); auto& st = prep.statement(); st.exchange(soci::use(value)); @@ -185,11 +304,14 @@ PersistentState::updateDb(std::string const& entry, std::string const& value) st.execute(true); } - if (st.get_affected_rows() != 1 && getFromDb(entry).empty()) + if (st.get_affected_rows() != 1 && + getFromDb(entry, sess, tableName).empty()) { ZoneNamedN(insertStoreStateZone, "insert storestate", true); auto prep2 = mApp.getDatabase().getPreparedStatement( - "INSERT INTO storestate (statename, state) VALUES (:n, :v);"); + fmt::format("INSERT INTO {} (statename, state) VALUES (:n, :v);", + tableName), + sess); auto& st2 = prep2.statement(); st2.exchange(soci::use(entry)); st2.exchange(soci::use(value)); @@ -202,19 +324,23 @@ PersistentState::updateDb(std::string const& entry, std::string const& value) } } -std::vector -PersistentState::getTxSetsForAllSlots() +std::unordered_map +PersistentState::getTxSetsForAllSlots(std::string table) { ZoneScoped; - std::vector result; + releaseAssert(threadIsMain()); + + std::unordered_map result; + std::string key; std::string val; std::string pattern = mapping[kTxSet] + "%"; - std::string statementStr = - "SELECT state FROM storestate WHERE statename LIKE :n;"; + std::string statementStr = fmt::format( + "SELECT statename, state FROM {} WHERE statename LIKE :n;", table); auto& db = mApp.getDatabase(); - auto prep = db.getPreparedStatement(statementStr); + auto prep = db.getPreparedStatement(statementStr, db.getSession()); auto& st = prep.statement(); + st.exchange(soci::into(key)); st.exchange(soci::into(val)); st.exchange(soci::use(pattern)); st.define_and_bind(); @@ -223,9 +349,13 @@ PersistentState::getTxSetsForAllSlots() st.execute(true); } + Hash hash; + size_t len = binToHex(hash).size(); + while (st.got_data()) { - result.push_back(val); + result.emplace(hexToBin256(key.substr(mapping[kTxSet].size(), len)), + val); st.fetch(); } @@ -236,14 +366,16 @@ std::unordered_set PersistentState::getTxSetHashesForAllSlots() { ZoneScoped; + releaseAssert(threadIsMain()); + std::unordered_set result; std::string val; std::string pattern = mapping[kTxSet] + "%"; std::string statementStr = - "SELECT statename FROM storestate WHERE statename LIKE :n;"; + "SELECT statename FROM slotstate WHERE statename LIKE :n;"; auto& db = mApp.getDatabase(); - auto prep = db.getPreparedStatement(statementStr); + auto prep = db.getPreparedStatement(statementStr, db.getSession()); auto& st = prep.statement(); st.exchange(soci::into(val)); st.exchange(soci::use(pattern)); @@ -267,14 +399,16 @@ PersistentState::getTxSetHashesForAllSlots() } std::string -PersistentState::getFromDb(std::string const& entry) +PersistentState::getFromDb(std::string const& entry, SessionWrapper& sess, + std::string const& tableName) { ZoneScoped; std::string res; auto& db = mApp.getDatabase(); auto prep = db.getPreparedStatement( - "SELECT state FROM storestate WHERE statename = :n;"); + fmt::format("SELECT state FROM {} WHERE statename = :n;", tableName), + sess); auto& st = prep.statement(); st.exchange(soci::into(res)); st.exchange(soci::use(entry)); @@ -291,22 +425,4 @@ PersistentState::getFromDb(std::string const& entry) return res; } - -bool -PersistentState::entryExists(std::string const& entry) -{ - ZoneScoped; - int res = 0; - - auto& db = mApp.getDatabase(); - auto prep = db.getPreparedStatement( - "SELECT COUNT(*) FROM storestate WHERE statename = :n;"); - auto& st = prep.statement(); - st.exchange(soci::into(res)); - st.exchange(soci::use(entry)); - st.define_and_bind(); - st.execute(true); - - return res > 0; -} } diff --git a/src/main/PersistentState.h b/src/main/PersistentState.h index 06eab873aa..7f36872051 100644 --- a/src/main/PersistentState.h +++ b/src/main/PersistentState.h @@ -4,14 +4,18 @@ // under the Apache License, Version 2.0. See the COPYING file at the root // of this distribution or at http://www.apache.org/licenses/LICENSE-2.0 -#include "herder/TxSetFrame.h" +#include "database/Database.h" #include "main/Application.h" #include "xdr/Stellar-internal.h" #include +#include namespace stellar { +// PersistentState class ensures all state criticial to node health, such as +// LCL, SCP messages, upgrades etc are persisted to the database. The class +// maintains two separate tables to avoid conflicts during concurrent writes. class PersistentState { public: @@ -19,28 +23,32 @@ class PersistentState enum Entry { + // LCL-related entries kLastClosedLedger = 0, kHistoryArchiveState, - kLastSCPData, kDatabaseSchema, kNetworkPassphrase, - kLedgerUpgrades, + // https://github.com/stellar/stellar-core/issues/4582 + kDBBackend, kRebuildLedger, + // SCP-related entries + kLedgerUpgrades, kLastSCPDataXDR, kTxSet, - // https://github.com/stellar/stellar-core/issues/4582 - kDBBackend, kLastEntry, }; static void dropAll(Database& db); - std::string getState(Entry stateName); - void setState(Entry stateName, std::string const& value); + std::string getState(Entry stateName, SessionWrapper& session); + void setState(Entry stateName, std::string const& value, + SessionWrapper& session); // Special methods for SCP state (multiple slots) - std::vector getSCPStateAllSlots(); - std::vector getTxSetsForAllSlots(); + std::unordered_map + getSCPStateAllSlots(std::string table = kSlotTableName); + std::unordered_map + getTxSetsForAllSlots(std::string table = kSlotTableName); std::unordered_set getTxSetHashesForAllSlots(); void @@ -52,20 +60,28 @@ class PersistentState void setRebuildForOfferTable(); bool hasTxSet(Hash const& txSetHash); - void deleteTxSets(std::unordered_set hashesToDelete); + void deleteTxSets(std::unordered_set hashesToDelete, + std::string table = kSlotTableName); + void migrateToSlotStateTable(); private: static std::string kSQLCreateStatement; + static std::string kSQLCreateSCPStatement; static std::string mapping[kLastEntry]; + static std::string kLCLTableName; + static std::string kSlotTableName; Application& mApp; - std::string getStoreStateName(Entry n, uint32 subscript = 0); - std::string getStoreStateNameForTxSet(Hash const& txSetHash); + static std::string getStoreStateName(Entry n, uint32 subscript = 0); + static std::string getStoreStateNameForTxSet(Hash const& txSetHash); void setSCPStateForSlot(uint64 slot, std::string const& value); - void updateDb(std::string const& entry, std::string const& value); - std::string getFromDb(std::string const& entry); - bool entryExists(std::string const& entry); + void updateDb(std::string const& entry, std::string const& value, + SessionWrapper& session, std::string const& tableName); + + std::string getFromDb(std::string const& entry, SessionWrapper& session, + std::string const& tableName); + static std::string getDBForEntry(PersistentState::Entry entry); }; } From 4ce27e0cbc419ac342f7ab87ec1504f8646eff27 Mon Sep 17 00:00:00 2001 From: marta-lokhova Date: Wed, 18 Dec 2024 12:39:44 -0800 Subject: [PATCH 05/10] HistoryManager cleanup: remove obsolete bucket publish cache, make certain methods static --- src/bucket/PublishQueueBuckets.cpp | 55 ------ src/bucket/PublishQueueBuckets.h | 36 ---- src/catchup/CatchupRange.cpp | 11 +- src/catchup/DownloadApplyTxsWork.cpp | 38 ++-- src/catchup/VerifyLedgerChainWork.cpp | 40 ++--- src/catchup/test/CatchupWorkTests.cpp | 10 +- src/herder/test/PendingEnvelopesTests.cpp | 4 +- src/history/CheckpointBuilder.cpp | 15 +- src/history/HistoryArchive.cpp | 3 +- src/history/HistoryArchive.h | 3 +- src/history/HistoryArchiveManager.cpp | 2 +- src/history/HistoryManager.h | 93 +++++----- src/history/HistoryManagerImpl.cpp | 135 +++++++-------- src/history/HistoryManagerImpl.h | 21 +-- src/history/StateSnapshot.cpp | 10 +- src/history/test/HistoryTests.cpp | 163 +++++++++++------- src/history/test/HistoryTestsUtils.cpp | 106 ++++++------ src/history/test/HistoryTestsUtils.h | 4 - src/historywork/BatchDownloadWork.cpp | 2 +- .../CheckSingleLedgerHeaderWork.cpp | 7 +- .../DownloadVerifyTxResultsWork.cpp | 2 +- src/historywork/FetchRecentQsetsWork.cpp | 2 +- src/historywork/Progress.cpp | 2 +- src/historywork/VerifyTxResultsWork.cpp | 5 +- .../WriteVerifiedCheckpointHashesWork.cpp | 17 +- src/ledger/CheckpointRange.cpp | 13 +- src/main/CommandLine.cpp | 7 +- src/main/test/ApplicationUtilsTests.cpp | 3 +- src/overlay/test/OverlayTests.cpp | 5 +- 29 files changed, 382 insertions(+), 432 deletions(-) delete mode 100644 src/bucket/PublishQueueBuckets.cpp delete mode 100644 src/bucket/PublishQueueBuckets.h diff --git a/src/bucket/PublishQueueBuckets.cpp b/src/bucket/PublishQueueBuckets.cpp deleted file mode 100644 index aad15119b9..0000000000 --- a/src/bucket/PublishQueueBuckets.cpp +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2017 Stellar Development Foundation and contributors. Licensed -// under the Apache License, Version 2.0. See the COPYING file at the root -// of this distribution or at http://www.apache.org/licenses/LICENSE-2.0 - -#include "bucket/PublishQueueBuckets.h" - -namespace stellar -{ - -void -PublishQueueBuckets::setBuckets(BucketCount const& buckets) -{ - mBucketUsage = buckets; -} - -void -PublishQueueBuckets::addBuckets(std::vector const& buckets) -{ - for (auto const& bucket : buckets) - { - addBucket(bucket); - } -} - -void -PublishQueueBuckets::addBucket(std::string const& bucket) -{ - mBucketUsage[bucket]++; -} - -void -PublishQueueBuckets::removeBuckets(std::vector const& buckets) -{ - for (auto const& bucket : buckets) - { - removeBucket(bucket); - } -} - -void -PublishQueueBuckets::removeBucket(std::string const& bucket) -{ - auto it = mBucketUsage.find(bucket); - if (it == std::end(mBucketUsage)) - { - return; - } - - it->second--; - if (it->second == 0) - { - mBucketUsage.erase(it); - } -} -} diff --git a/src/bucket/PublishQueueBuckets.h b/src/bucket/PublishQueueBuckets.h deleted file mode 100644 index c7c2090d41..0000000000 --- a/src/bucket/PublishQueueBuckets.h +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once - -// Copyright 2017 Stellar Development Foundation and contributors. Licensed -// under the Apache License, Version 2.0. See the COPYING file at the root -// of this distribution or at http://www.apache.org/licenses/LICENSE-2.0 - -#include -#include -#include - -namespace stellar -{ - -class PublishQueueBuckets -{ - public: - using BucketCount = std::map; - - void setBuckets(BucketCount const& buckets); - - void addBuckets(std::vector const& buckets); - void addBucket(std::string const& bucket); - - void removeBuckets(std::vector const& buckets); - void removeBucket(std::string const& bucket); - - BucketCount const& - map() const - { - return mBucketUsage; - } - - private: - BucketCount mBucketUsage; -}; -} diff --git a/src/catchup/CatchupRange.cpp b/src/catchup/CatchupRange.cpp index 047ea7b211..30f2556d00 100644 --- a/src/catchup/CatchupRange.cpp +++ b/src/catchup/CatchupRange.cpp @@ -69,15 +69,17 @@ calculateCatchupRange(uint32_t lcl, CatchupConfiguration const& cfg, // Case 3: special case of buckets only, no replay; only // possible when targeting the exact end of a checkpoint. - if (cfg.count() == 0 && - (hm.isLastLedgerInCheckpoint(cfg.toLedger()) || cfg.localBucketsOnly())) + if (cfg.count() == 0 && (HistoryManager::isLastLedgerInCheckpoint( + cfg.toLedger(), hm.getConfig()) || + cfg.localBucketsOnly())) { return CatchupRange(cfg.toLedger()); } uint32_t targetStart = cfg.toLedger() - cfg.count() + 1; uint32_t firstInCheckpoint = - hm.firstLedgerInCheckpointContaining(targetStart); + HistoryManager::firstLedgerInCheckpointContaining(targetStart, + hm.getConfig()); // Case 4: target is inside first checkpoint, just replay. if (firstInCheckpoint == init) @@ -87,7 +89,8 @@ calculateCatchupRange(uint32_t lcl, CatchupConfiguration const& cfg, // Case 5: apply buckets, then replay. uint32_t applyBucketsAt = - hm.lastLedgerBeforeCheckpointContaining(targetStart); + HistoryManager::lastLedgerBeforeCheckpointContaining(targetStart, + hm.getConfig()); LedgerRange replay(firstInCheckpoint, cfg.toLedger() - applyBucketsAt); return CatchupRange(applyBucketsAt, replay); } diff --git a/src/catchup/DownloadApplyTxsWork.cpp b/src/catchup/DownloadApplyTxsWork.cpp index 6ebde3ab43..ca13047e7c 100644 --- a/src/catchup/DownloadApplyTxsWork.cpp +++ b/src/catchup/DownloadApplyTxsWork.cpp @@ -28,8 +28,8 @@ DownloadApplyTxsWork::DownloadApplyTxsWork( , mRange(range) , mDownloadDir(downloadDir) , mLastApplied(lastApplied) - , mCheckpointToQueue( - app.getHistoryManager().checkpointContainingLedger(range.mFirst)) + , mCheckpointToQueue(HistoryManager::checkpointContainingLedger( + range.mFirst, app.getConfig())) , mWaitForPublish(waitForPublish) , mArchive(archive) { @@ -52,8 +52,8 @@ DownloadApplyTxsWork::yieldMoreWork() auto getAndUnzip = std::make_shared(mApp, ft, mArchive); - auto const& hm = mApp.getHistoryManager(); - auto low = hm.firstLedgerInCheckpointContaining(mCheckpointToQueue); + auto low = HistoryManager::firstLedgerInCheckpointContaining( + mCheckpointToQueue, mApp.getConfig()); auto high = std::min(mCheckpointToQueue, mRange.last()); TmpDir const& dir = mDownloadDir; @@ -162,8 +162,8 @@ DownloadApplyTxsWork::yieldMoreWork() bool res = true; if (waitForPublish) { - auto& hm = app.getHistoryManager(); - auto length = hm.publishQueueLength(); + auto length = + HistoryManager::publishQueueLength(app.getConfig()); if (length <= CatchupWork::PUBLISH_QUEUE_UNBLOCK_APPLICATION) { pqFellBehind = false; @@ -213,7 +213,8 @@ DownloadApplyTxsWork::yieldMoreWork() auto nextWork = std::make_shared( mApp, "download-apply-" + std::to_string(mCheckpointToQueue), seq, BasicWork::RETRY_NEVER, true /*stop at first failure*/); - mCheckpointToQueue += mApp.getHistoryManager().getCheckpointFrequency(); + mCheckpointToQueue += + HistoryManager::getCheckpointFrequency(mApp.getConfig()); mLastYieldedWork = nextWork; return nextWork; } @@ -221,8 +222,8 @@ DownloadApplyTxsWork::yieldMoreWork() void DownloadApplyTxsWork::resetIter() { - mCheckpointToQueue = - mApp.getHistoryManager().checkpointContainingLedger(mRange.mFirst); + mCheckpointToQueue = HistoryManager::checkpointContainingLedger( + mRange.mFirst, mApp.getConfig()); mLastYieldedWork.reset(); mLastApplied = mApp.getLedgerManager().getLastClosedLedgerHeader(); } @@ -234,8 +235,8 @@ DownloadApplyTxsWork::hasNext() const { return false; } - auto last = - mApp.getHistoryManager().checkpointContainingLedger(mRange.last()); + auto last = HistoryManager::checkpointContainingLedger(mRange.last(), + mApp.getConfig()); return mCheckpointToQueue <= last; } @@ -248,17 +249,22 @@ DownloadApplyTxsWork::onSuccess() std::string DownloadApplyTxsWork::getStatus() const { - auto& hm = mApp.getHistoryManager(); - auto first = hm.checkpointContainingLedger(mRange.mFirst); + auto first = HistoryManager::checkpointContainingLedger(mRange.mFirst, + mApp.getConfig()); auto last = (mRange.mCount == 0 ? first - : hm.checkpointContainingLedger(mRange.last())); + : HistoryManager::checkpointContainingLedger( + mRange.last(), mApp.getConfig())); auto checkpointsStarted = - (mCheckpointToQueue - first) / hm.getCheckpointFrequency(); + (mCheckpointToQueue - first) / + HistoryManager::getCheckpointFrequency(mApp.getConfig()); auto checkpointsApplied = checkpointsStarted - getNumWorksInBatch(); - auto totalCheckpoints = (last - first) / hm.getCheckpointFrequency() + 1; + auto totalCheckpoints = + (last - first) / + HistoryManager::getCheckpointFrequency(mApp.getConfig()) + + 1; return fmt::format( FMT_STRING("Download & apply checkpoints: num checkpoints left to " "apply:{:d} ({:d}% done)"), diff --git a/src/catchup/VerifyLedgerChainWork.cpp b/src/catchup/VerifyLedgerChainWork.cpp index 14340ef523..9c2113388d 100644 --- a/src/catchup/VerifyLedgerChainWork.cpp +++ b/src/catchup/VerifyLedgerChainWork.cpp @@ -116,8 +116,8 @@ VerifyLedgerChainWork::VerifyLedgerChainWork( , mRange(range) , mCurrCheckpoint(mRange.mCount == 0 ? 0 - : mApp.getHistoryManager().checkpointContainingLedger( - mRange.last())) + : HistoryManager::checkpointContainingLedger( + mRange.last(), app.getConfig())) , mLastClosed(lastClosedLedger) , mMaxPrevVerified(maxPrevVerified) , mFatalFailurePromise(std::move(fatalFailure)) @@ -152,8 +152,8 @@ VerifyLedgerChainWork::onReset() mVerifiedLedgers.clear(); mCurrCheckpoint = mRange.mCount == 0 ? 0 - : mApp.getHistoryManager().checkpointContainingLedger( - mRange.last()); + : HistoryManager::checkpointContainingLedger( + mRange.last(), mApp.getConfig()); mChainDisagreesWithLocalState.reset(); mHasTrustedHash = false; } @@ -187,8 +187,6 @@ VerifyLedgerChainWork::verifyHistoryOfSingleCheckpoint() CLOG_DEBUG(History, "Verifying ledger headers from {} for checkpoint {}", ft.localPath_nogz(), mCurrCheckpoint); - auto const& hm = mApp.getHistoryManager(); - while (hdrIn) { try @@ -261,12 +259,13 @@ VerifyLedgerChainWork::verifyHistoryOfSingleCheckpoint() if (beginCheckpoint) { - if (!hm.isFirstLedgerInCheckpoint(curr.header.ledgerSeq)) + if (!HistoryManager::isFirstLedgerInCheckpoint( + curr.header.ledgerSeq, mApp.getConfig())) { - CLOG_ERROR( - History, "Checkpoint did not start with {} - got {}", - hm.firstLedgerInCheckpointContaining(curr.header.ledgerSeq), - curr.header.ledgerSeq); + CLOG_ERROR(History, "Checkpoint did not start with {} - got {}", + HistoryManager::firstLedgerInCheckpointContaining( + curr.header.ledgerSeq, mApp.getConfig()), + curr.header.ledgerSeq); return HistoryManager::VERIFY_STATUS_ERR_MISSING_ENTRIES; } @@ -405,8 +404,8 @@ VerifyLedgerChainWork::verifyHistoryOfSingleCheckpoint() return verifyTrustedHash; } - if (mCurrCheckpoint == - mApp.getHistoryManager().checkpointContainingLedger(mRange.mFirst)) + if (mCurrCheckpoint == HistoryManager::checkpointContainingLedger( + mRange.mFirst, mApp.getConfig())) { // Write outgoing trust-link to shared write-once variable. LedgerNumHashPair outgoing; @@ -464,8 +463,8 @@ VerifyLedgerChainWork::onRun() return BasicWork::State::WORK_SUCCESS; } - if (mCurrCheckpoint < - mApp.getHistoryManager().checkpointContainingLedger(mRange.mFirst)) + if (mCurrCheckpoint < HistoryManager::checkpointContainingLedger( + mRange.mFirst, mApp.getConfig())) { throw std::runtime_error( "Verification undershot first ledger in the range."); @@ -491,8 +490,8 @@ VerifyLedgerChainWork::onRun() // then there is no point retrying catchup - core will never be able to // recover if (result == HistoryManager::VERIFY_STATUS_OK && - mCurrCheckpoint == - mApp.getHistoryManager().checkpointContainingLedger(mRange.mFirst)) + mCurrCheckpoint == HistoryManager::checkpointContainingLedger( + mRange.mFirst, mApp.getConfig())) { if (mChainDisagreesWithLocalState) { @@ -503,14 +502,15 @@ VerifyLedgerChainWork::onRun() switch (result) { case HistoryManager::VERIFY_STATUS_OK: - if (mCurrCheckpoint == - mApp.getHistoryManager().checkpointContainingLedger(mRange.mFirst)) + if (mCurrCheckpoint == HistoryManager::checkpointContainingLedger( + mRange.mFirst, mApp.getConfig())) { CLOG_INFO(History, "History chain [{},{}] verified", mRange.mFirst, mRange.last()); return BasicWork::State::WORK_SUCCESS; } - mCurrCheckpoint -= mApp.getHistoryManager().getCheckpointFrequency(); + mCurrCheckpoint -= + HistoryManager::getCheckpointFrequency(mApp.getConfig()); return BasicWork::State::WORK_RUNNING; case HistoryManager::VERIFY_STATUS_ERR_BAD_LEDGER_VERSION: CLOG_ERROR(History, "Catchup material failed verification - " diff --git a/src/catchup/test/CatchupWorkTests.cpp b/src/catchup/test/CatchupWorkTests.cpp index 3726fb646e..ebe78ee350 100644 --- a/src/catchup/test/CatchupWorkTests.cpp +++ b/src/catchup/test/CatchupWorkTests.cpp @@ -158,8 +158,8 @@ TEST_CASE("compute CatchupRange from CatchupConfiguration", "[catchup]") REQUIRE(lastClosedLedger == LedgerManager::GENESIS_LEDGER_SEQ); // buckets can only by applied on checkpoint boundary - REQUIRE(historyManager.isLastLedgerInCheckpoint( - range.getBucketApplyLedger())); + REQUIRE(HistoryManager::isLastLedgerInCheckpoint( + range.getBucketApplyLedger(), app->getConfig())); // If we're applying buckets and replaying ledgers, we do // the latter immediately after the former. @@ -185,14 +185,16 @@ TEST_CASE("compute CatchupRange from CatchupConfiguration", "[catchup]") REQUIRE(range.count() >= configuration.count()); if (std::numeric_limits::max() - - historyManager.getCheckpointFrequency() >= + HistoryManager::getCheckpointFrequency( + app->getConfig()) >= configuration.count()) { // but at most count + getCheckpointFrequency // doing more would mean we are doing non-needed work REQUIRE(range.getReplayCount() <= configuration.count() + - historyManager.getCheckpointFrequency()); + HistoryManager::getCheckpointFrequency( + app->getConfig())); } } else diff --git a/src/herder/test/PendingEnvelopesTests.cpp b/src/herder/test/PendingEnvelopesTests.cpp index e690887e0d..b06b92f2ab 100644 --- a/src/herder/test/PendingEnvelopesTests.cpp +++ b/src/herder/test/PendingEnvelopesTests.cpp @@ -264,8 +264,8 @@ TEST_CASE("PendingEnvelopes recvSCPEnvelope", "[herder]") auto lclNum = app->getLedgerManager().getLastClosedLedgerNum(); auto lastCheckpointSeq = - app->getHistoryManager().lastLedgerBeforeCheckpointContaining( - lclNum); + HistoryManager::lastLedgerBeforeCheckpointContaining( + lclNum, app->getConfig()); SECTION("with slotIndex difference less or equal than " "MAX_SLOTS_TO_REMEMBER") diff --git a/src/history/CheckpointBuilder.cpp b/src/history/CheckpointBuilder.cpp index 8d2aa47c5e..918554b8a5 100644 --- a/src/history/CheckpointBuilder.cpp +++ b/src/history/CheckpointBuilder.cpp @@ -19,16 +19,16 @@ CheckpointBuilder::ensureOpen(uint32_t ledgerSeq) releaseAssert(!mLedgerHeaders); // Don't start writing checkpoint until proper checkpoint boundary // This can occur if a node enabled publish mid-checkpoint - if (mPublishWasDisabled && - !mApp.getHistoryManager().isFirstLedgerInCheckpoint(ledgerSeq)) + if (mPublishWasDisabled && !HistoryManager::isFirstLedgerInCheckpoint( + ledgerSeq, mApp.getConfig())) { return false; } mPublishWasDisabled = false; - auto checkpoint = - mApp.getHistoryManager().checkpointContainingLedger(ledgerSeq); + auto checkpoint = HistoryManager::checkpointContainingLedger( + ledgerSeq, mApp.getConfig()); auto res = FileTransferInfo(FileType::HISTORY_FILE_TYPE_RESULTS, checkpoint, mApp.getConfig()); auto txs = FileTransferInfo(FileType::HISTORY_FILE_TYPE_TRANSACTIONS, @@ -59,7 +59,7 @@ CheckpointBuilder::checkpointComplete(uint32_t checkpoint) ZoneScoped; releaseAssert(mApp.getHistoryArchiveManager().publishEnabled()); releaseAssert( - mApp.getHistoryManager().isLastLedgerInCheckpoint(checkpoint)); + HistoryManager::isLastLedgerInCheckpoint(checkpoint, mApp.getConfig())); // This will close and reset the streams mLedgerHeaders.reset(); @@ -206,7 +206,7 @@ CheckpointBuilder::cleanup(uint32_t lcl) mOpen = false; auto const& cfg = mApp.getConfig(); - auto checkpoint = mApp.getHistoryManager().checkpointContainingLedger(lcl); + auto checkpoint = HistoryManager::checkpointContainingLedger(lcl, cfg); auto res = FileTransferInfo(FileType::HISTORY_FILE_TYPE_RESULTS, checkpoint, cfg); auto txs = FileTransferInfo(FileType::HISTORY_FILE_TYPE_TRANSACTIONS, @@ -224,8 +224,7 @@ CheckpointBuilder::cleanup(uint32_t lcl) // Make sure any new checkpoints are deleted auto next = FileTransferInfo( ft.getType(), - mApp.getHistoryManager().checkpointContainingLedger(checkpoint + - 1), + HistoryManager::checkpointContainingLedger(checkpoint + 1, cfg), cfg); CLOG_INFO(History, "Deleting next checkpoint files {}", next.localPath_nogz_dirty()); diff --git a/src/history/HistoryArchive.cpp b/src/history/HistoryArchive.cpp index aa6da206a4..a5b345bf82 100644 --- a/src/history/HistoryArchive.cpp +++ b/src/history/HistoryArchive.cpp @@ -446,8 +446,7 @@ HistoryArchiveState::HistoryArchiveState(uint32_t ledgerSeq, } } -HistoryArchive::HistoryArchive(Application& app, - HistoryArchiveConfiguration const& config) +HistoryArchive::HistoryArchive(HistoryArchiveConfiguration const& config) : mConfig(config) { } diff --git a/src/history/HistoryArchive.h b/src/history/HistoryArchive.h index 378716118d..d6ca646b42 100644 --- a/src/history/HistoryArchive.h +++ b/src/history/HistoryArchive.h @@ -154,8 +154,7 @@ struct HistoryArchiveState class HistoryArchive : public std::enable_shared_from_this { public: - explicit HistoryArchive(Application& app, - HistoryArchiveConfiguration const& config); + explicit HistoryArchive(HistoryArchiveConfiguration const& config); ~HistoryArchive(); bool hasGetCmd() const; bool hasPutCmd() const; diff --git a/src/history/HistoryArchiveManager.cpp b/src/history/HistoryArchiveManager.cpp index 5f95c523a4..7e7a31e27f 100644 --- a/src/history/HistoryArchiveManager.cpp +++ b/src/history/HistoryArchiveManager.cpp @@ -25,7 +25,7 @@ HistoryArchiveManager::HistoryArchiveManager(Application& app) : mApp{app} { for (auto const& archiveConfiguration : mApp.getConfig().HISTORY) mArchives.push_back( - std::make_shared(app, archiveConfiguration.second)); + std::make_shared(archiveConfiguration.second)); } bool diff --git a/src/history/HistoryManager.h b/src/history/HistoryManager.h index 0bfd8adf0f..07b6f64fa9 100644 --- a/src/history/HistoryManager.h +++ b/src/history/HistoryManager.h @@ -215,15 +215,15 @@ class HistoryManager // Checkpoints are made every getCheckpointFrequency() ledgers. // This should normally be a constant (64) but in testing cases // may be different (see ARTIFICIALLY_ACCELERATE_TIME_FOR_TESTING). - virtual uint32_t getCheckpointFrequency() const = 0; + static uint32_t getCheckpointFrequency(Config const& cfg); // Return checkpoint that contains given ledger. Checkpoint is identified // by last ledger in range. This does not consult the network nor take // account of manual checkpoints. - uint32_t - checkpointContainingLedger(uint32_t ledger) const + static uint32_t + checkpointContainingLedger(uint32_t ledger, Config const& cfg) { - uint32_t freq = getCheckpointFrequency(); + uint32_t freq = getCheckpointFrequency(cfg); // Round-up to next multiple of freq, then subtract 1 since checkpoints // are numbered for (and cover ledgers up to) the last ledger in them, // which is one-before the next multiple of freq. @@ -233,29 +233,29 @@ class HistoryManager // Return true iff closing `ledger` should cause publishing a checkpoint. // Equivalent to `ledger == checkpointContainingLedger(ledger)` but a little // more obviously named. - bool - publishCheckpointOnLedgerClose(uint32_t ledger) const + static bool + publishCheckpointOnLedgerClose(uint32_t ledger, Config const& cfg) { - return checkpointContainingLedger(ledger) == ledger; + return checkpointContainingLedger(ledger, cfg) == ledger; } - bool - isFirstLedgerInCheckpoint(uint32_t ledger) const + static bool + isFirstLedgerInCheckpoint(uint32_t ledger, Config const& cfg) { - return firstLedgerInCheckpointContaining(ledger) == ledger; + return firstLedgerInCheckpointContaining(ledger, cfg) == ledger; } - bool - isLastLedgerInCheckpoint(uint32_t ledger) const + static bool + isLastLedgerInCheckpoint(uint32_t ledger, Config const& cfg) { - return checkpointContainingLedger(ledger) == ledger; + return checkpointContainingLedger(ledger, cfg) == ledger; } // Return the number of ledgers in the checkpoint containing a given ledger. - uint32_t - sizeOfCheckpointContaining(uint32_t ledger) const + static uint32_t + sizeOfCheckpointContaining(uint32_t ledger, Config const& cfg) { - uint32_t freq = getCheckpointFrequency(); + uint32_t freq = getCheckpointFrequency(cfg); if (ledger < freq) { return freq - 1; @@ -264,52 +264,58 @@ class HistoryManager } // Return the first ledger in the checkpoint containing a given ledger. - uint32_t - firstLedgerInCheckpointContaining(uint32_t ledger) const + static uint32_t + firstLedgerInCheckpointContaining(uint32_t ledger, Config const& cfg) { - uint32_t last = checkpointContainingLedger(ledger); // == 63, 127, 191 - uint32_t size = sizeOfCheckpointContaining(ledger); // == 63, 64, 64 - return last - (size - 1); // == 1, 64, 128 + uint32_t last = + checkpointContainingLedger(ledger, cfg); // == 63, 127, 191 + uint32_t size = + sizeOfCheckpointContaining(ledger, cfg); // == 63, 64, 64 + return last - (size - 1); // == 1, 64, 128 } // Return the first ledger after the checkpoint containing a given ledger. - uint32_t - firstLedgerAfterCheckpointContaining(uint32_t ledger) const + static uint32_t + firstLedgerAfterCheckpointContaining(uint32_t ledger, Config const& cfg) { uint32_t first = - firstLedgerInCheckpointContaining(ledger); // == 1, 64, 128 - uint32_t size = sizeOfCheckpointContaining(ledger); // == 63, 64, 64 - return first + size; // == 64, 128, 192 + firstLedgerInCheckpointContaining(ledger, cfg); // == 1, 64, 128 + uint32_t size = + sizeOfCheckpointContaining(ledger, cfg); // == 63, 64, 64 + return first + size; // == 64, 128, 192 } // Return the last ledger before the checkpoint containing a given ledger, // or zero if `ledger` is contained inside the first checkpoint. - uint32_t - lastLedgerBeforeCheckpointContaining(uint32_t ledger) const + static uint32_t + lastLedgerBeforeCheckpointContaining(uint32_t ledger, Config const& cfg) { - uint32_t last = checkpointContainingLedger(ledger); // == 63, 127, 191 - uint32_t size = sizeOfCheckpointContaining(ledger); // == 63, 64, 64 + uint32_t last = + checkpointContainingLedger(ledger, cfg); // == 63, 127, 191 + uint32_t size = + sizeOfCheckpointContaining(ledger, cfg); // == 63, 64, 64 releaseAssert(last >= size); return last - size; // == 0, 63, 127 } // Return the ledger to trigger the catchup machinery on, given a ledger // that is the start of a checkpoint buffered in the catchup manager. - uint32_t - ledgerToTriggerCatchup(uint32_t firstLedgerOfBufferedCheckpoint) + static uint32_t + ledgerToTriggerCatchup(uint32_t firstLedgerOfBufferedCheckpoint, + Config const& cfg) { releaseAssert( - isFirstLedgerInCheckpoint(firstLedgerOfBufferedCheckpoint)); + isFirstLedgerInCheckpoint(firstLedgerOfBufferedCheckpoint, cfg)); return firstLedgerOfBufferedCheckpoint + 1; } + // Return the length of the current publishing queue. + static size_t publishQueueLength(Config const& cfg); + // Emit a log message and set StatusManager HISTORY_PUBLISH status to // describe current publish state. virtual void logAndUpdatePublishStatus() = 0; - // Return the length of the current publishing queue. - virtual size_t publishQueueLength() const = 0; - // Calls queueCurrentHistory() if the current ledger is a multiple of // getCheckpointFrequency() -- equivalently, the LCL is one _less_ than // a multiple of getCheckpointFrequency(). Returns true if checkpoint @@ -324,11 +330,11 @@ class HistoryManager // Return the youngest ledger still in the outgoing publish queue; // returns 0 if the publish queue has nothing in it. - virtual uint32_t getMinLedgerQueuedToPublish() = 0; + static uint32_t getMinLedgerQueuedToPublish(Config const& cfg); // Return the oldest ledger still in the outgoing publish queue; // returns 0 if the publish queue has nothing in it. - virtual uint32_t getMaxLedgerQueuedToPublish() = 0; + static uint32_t getMaxLedgerQueuedToPublish(Config const& cfg); // Publish any checkpoints queued (in the database) for publication. // Returns the number of publishes initiated. @@ -349,11 +355,13 @@ class HistoryManager // Return the set of buckets referenced by the persistent (DB) publish // queue. - virtual std::vector getBucketsReferencedByPublishQueue() = 0; + static std::set + getBucketsReferencedByPublishQueue(Config const& cfg); // Return the full set of HistoryArchiveStates in the persistent (DB) // publish queue. - virtual std::vector getPublishQueueStates() = 0; + static std::vector + getPublishQueueStates(Config const& cfg); // Callback from Publication, indicates that a given snapshot was // published. The `success` parameter indicates whether _all_ the @@ -375,8 +383,7 @@ class HistoryManager // Cleanup published files. If core is reset to genesis, any unpublished // files will be cleaned by removal of the buckets directory. - virtual void deletePublishedFiles(uint32_t ledgerSeq, - Config const& cfg) = 0; + static void deletePublishedFiles(uint32_t ledgerSeq, Config const& cfg); // Return the name of the HistoryManager's tmpdir (used for storing files in // transit). @@ -404,5 +411,7 @@ class HistoryManager #endif virtual ~HistoryManager(){}; + + virtual Config const& getConfig() const = 0; }; } diff --git a/src/history/HistoryManagerImpl.cpp b/src/history/HistoryManagerImpl.cpp index ed126b4d3d..4f1932898a 100644 --- a/src/history/HistoryManagerImpl.cpp +++ b/src/history/HistoryManagerImpl.cpp @@ -56,8 +56,8 @@ static std::string kSQLCreateStatement = void HistoryManager::dropAll(Database& db) { - db.getSession() << "DROP TABLE IF EXISTS publishqueue;"; - soci::statement st = db.getSession().prepare << kSQLCreateStatement; + db.getRawSession() << "DROP TABLE IF EXISTS publishqueue;"; + soci::statement st = db.getRawSession().prepare << kSQLCreateStatement; st.execute(true); } @@ -90,8 +90,8 @@ void writeCheckpointFile(Application& app, HistoryArchiveState const& has, bool finalize) { - releaseAssert( - app.getHistoryManager().isLastLedgerInCheckpoint(has.currentLedger)); + releaseAssert(HistoryManager::isLastLedgerInCheckpoint(has.currentLedger, + app.getConfig())); auto filename = publishQueueFileName(has.currentLedger); auto tmpOut = app.getHistoryManager().getTmpDir() / filename; { @@ -163,7 +163,8 @@ HistoryManagerImpl::dropSQLBasedPublish() // Migrate all the existing queued checkpoints to the new format { std::string state; - auto prep = db.getPreparedStatement("SELECT state FROM publishqueue;"); + auto prep = + db.getPreparedStatement("SELECT state FROM publishqueue;", sess); auto& st = prep.statement(); st.exchange(soci::into(state)); st.define_and_bind(); @@ -172,21 +173,21 @@ HistoryManagerImpl::dropSQLBasedPublish() { HistoryArchiveState has; has.fromString(state); - releaseAssert(isLastLedgerInCheckpoint(has.currentLedger)); + releaseAssert(isLastLedgerInCheckpoint(has.currentLedger, cfg)); checkpointLedgers.insert(has.currentLedger); writeCheckpointFile(mApp, has, /* finalize */ true); st.fetch(); } } - auto freq = getCheckpointFrequency(); + auto freq = getCheckpointFrequency(mApp.getConfig()); uint32_t lastQueued = 0; for (auto const& checkpoint : checkpointLedgers) { - auto begin = firstLedgerInCheckpointContaining(checkpoint); - populateCheckpointFilesFromDB(mApp, sess, begin, freq, + auto begin = firstLedgerInCheckpointContaining(checkpoint, cfg); + populateCheckpointFilesFromDB(mApp, sess.session(), begin, freq, mCheckpointBuilder); - LedgerHeaderUtils::copyToStream(db, sess, begin, freq, + LedgerHeaderUtils::copyToStream(db, sess.session(), begin, freq, mCheckpointBuilder); // Checkpoints in publish queue are complete, so we can finalize them mCheckpointBuilder.checkpointComplete(checkpoint); @@ -198,17 +199,17 @@ HistoryManagerImpl::dropSQLBasedPublish() { // Then, reconstruct any partial checkpoints that haven't yet been // queued - populateCheckpointFilesFromDB(mApp, sess, - firstLedgerInCheckpointContaining(lcl), - freq, mCheckpointBuilder); - LedgerHeaderUtils::copyToStream(db, sess, - firstLedgerInCheckpointContaining(lcl), - freq, mCheckpointBuilder); + populateCheckpointFilesFromDB( + mApp, sess.session(), firstLedgerInCheckpointContaining(lcl, cfg), + freq, mCheckpointBuilder); + LedgerHeaderUtils::copyToStream( + db, sess.session(), firstLedgerInCheckpointContaining(lcl, cfg), + freq, mCheckpointBuilder); } - db.clearPreparedStatementCache(); + db.clearPreparedStatementCache(sess); // Now it's safe to drop obsolete SQL tables - sess << "DROP TABLE IF EXISTS publishqueue;"; + sess.session() << "DROP TABLE IF EXISTS publishqueue;"; dropSupportTxHistory(db); dropSupportTxSetHistory(db); } @@ -238,9 +239,9 @@ HistoryManagerImpl::~HistoryManagerImpl() } uint32_t -HistoryManagerImpl::getCheckpointFrequency() const +HistoryManager::getCheckpointFrequency(Config const& cfg) { - if (mApp.getConfig().ARTIFICIALLY_ACCELERATE_TIME_FOR_TESTING) + if (cfg.ARTIFICIALLY_ACCELERATE_TIME_FOR_TESTING) { return 8; } @@ -256,10 +257,10 @@ HistoryManagerImpl::logAndUpdatePublishStatus() std::stringstream stateStr; if (mPublishWork) { - auto qlen = publishQueueLength(); + auto qlen = publishQueueLength(mApp.getConfig()); stateStr << "Publishing " << qlen << " queued checkpoints" - << " [" << getMinLedgerQueuedToPublish() << "-" - << getMaxLedgerQueuedToPublish() << "]" + << " [" << getMinLedgerQueuedToPublish(mApp.getConfig()) << "-" + << getMaxLedgerQueuedToPublish(mApp.getConfig()) << "]" << ": " << mPublishWork->getStatus(); auto current = stateStr.str(); @@ -327,10 +328,10 @@ forEveryTmpCheckpoint(std::string const& dir, } size_t -HistoryManagerImpl::publishQueueLength() const +HistoryManager::publishQueueLength(Config const& cfg) { ZoneScoped; - return findPublishFiles(publishQueuePath(mApp.getConfig()).string()).size(); + return findPublishFiles(publishQueuePath(cfg).string()).size(); } string const& @@ -352,23 +353,23 @@ HistoryManagerImpl::localFilename(std::string const& basename) } uint32_t -HistoryManagerImpl::getMinLedgerQueuedToPublish() +HistoryManager::getMinLedgerQueuedToPublish(Config const& cfg) { ZoneScoped; auto min = std::numeric_limits::max(); forEveryQueuedCheckpoint( - publishQueuePath(mApp.getConfig()).string(), + publishQueuePath(cfg).string(), [&](uint32_t seq, std::string const& f) { min = std::min(min, seq); }); return min; } uint32_t -HistoryManagerImpl::getMaxLedgerQueuedToPublish() +HistoryManager::getMaxLedgerQueuedToPublish(Config const& cfg) { ZoneScoped; auto max = std::numeric_limits::min(); forEveryQueuedCheckpoint( - publishQueuePath(mApp.getConfig()).string(), + publishQueuePath(cfg).string(), [&](uint32_t seq, std::string const& f) { max = std::max(max, seq); }); return max; } @@ -377,7 +378,7 @@ bool HistoryManagerImpl::maybeQueueHistoryCheckpoint() { uint32_t lcl = mApp.getLedgerManager().getLastClosedLedgerNum(); - if (!publishCheckpointOnLedgerClose(lcl)) + if (!publishCheckpointOnLedgerClose(lcl, mApp.getConfig())) { return false; } @@ -402,6 +403,8 @@ HistoryManagerImpl::queueCurrentHistory() LiveBucketList bl; if (mApp.getConfig().MODE_ENABLES_BUCKETLIST) { + // Only one thread can modify the bucketlist, access BL from the _same_ + // thread bl = mApp.getBucketManager().getLiveBucketList(); } @@ -422,13 +425,13 @@ HistoryManagerImpl::queueCurrentHistory() // merges-in-progress, avoid restarting them. mPublishQueued++; - mPublishQueueBuckets.addBuckets(has.allBuckets()); } void HistoryManagerImpl::takeSnapshotAndPublish(HistoryArchiveState const& has) { ZoneScoped; + releaseAssert(threadIsMain()); if (mPublishWork) { return; @@ -512,7 +515,7 @@ HistoryManagerImpl::publishQueuedHistory() #endif ZoneScoped; - auto seq = getMinLedgerQueuedToPublish(); + auto seq = getMinLedgerQueuedToPublish(mApp.getConfig()); if (seq == std::numeric_limits::max()) { return 0; @@ -527,7 +530,7 @@ void HistoryManagerImpl::maybeCheckpointComplete() { uint32_t lcl = mApp.getLedgerManager().getLastClosedLedgerNum(); - if (!publishCheckpointOnLedgerClose(lcl) || + if (!publishCheckpointOnLedgerClose(lcl, mApp.getConfig()) || !mApp.getHistoryArchiveManager().publishEnabled()) { return; @@ -556,61 +559,42 @@ HistoryManagerImpl::maybeCheckpointComplete() } std::vector -HistoryManagerImpl::getPublishQueueStates() +HistoryManager::getPublishQueueStates(Config const& cfg) { ZoneScoped; std::vector states; - forEveryQueuedCheckpoint( - publishQueuePath(mApp.getConfig()).string(), - [&](uint32_t seq, std::string const& f) { - HistoryArchiveState has; - auto fullPath = publishQueuePath(mApp.getConfig()) / f; - states.push_back(loadCheckpointHAS(fullPath.string())); - }); + forEveryQueuedCheckpoint(publishQueuePath(cfg).string(), + [&](uint32_t seq, std::string const& f) { + HistoryArchiveState has; + auto fullPath = publishQueuePath(cfg) / f; + states.push_back( + loadCheckpointHAS(fullPath.string())); + }); return states; } -PublishQueueBuckets::BucketCount -HistoryManagerImpl::loadBucketsReferencedByPublishQueue() +std::set +HistoryManager::getBucketsReferencedByPublishQueue(Config const& cfg) { ZoneScoped; - auto states = getPublishQueueStates(); - PublishQueueBuckets::BucketCount result{}; + auto states = getPublishQueueStates(cfg); + std::set result; for (auto const& s : states) { auto sb = s.allBuckets(); for (auto const& b : sb) { - result[b]++; + result.insert(b); } } return result; } -std::vector -HistoryManagerImpl::getBucketsReferencedByPublishQueue() -{ - ZoneScoped; - if (!mPublishQueueBucketsFilled) - { - mPublishQueueBuckets.setBuckets(loadBucketsReferencedByPublishQueue()); - mPublishQueueBucketsFilled = true; - } - - std::vector buckets; - for (auto const& s : mPublishQueueBuckets.map()) - { - buckets.push_back(s.first); - } - - return buckets; -} - std::vector HistoryManagerImpl::getMissingBucketsReferencedByPublishQueue() { ZoneScoped; - auto states = getPublishQueueStates(); + auto states = getPublishQueueStates(mApp.getConfig()); std::set buckets; for (auto const& s : states) { @@ -621,15 +605,14 @@ HistoryManagerImpl::getMissingBucketsReferencedByPublishQueue() } void -HistoryManagerImpl::deletePublishedFiles(uint32_t ledgerSeq, Config const& cfg) +HistoryManager::deletePublishedFiles(uint32_t ledgerSeq, Config const& cfg) { - releaseAssert(isLastLedgerInCheckpoint(ledgerSeq)); - FileTransferInfo res(FileType::HISTORY_FILE_TYPE_RESULTS, ledgerSeq, - mApp.getConfig()); + releaseAssert(HistoryManager::isLastLedgerInCheckpoint(ledgerSeq, cfg)); + FileTransferInfo res(FileType::HISTORY_FILE_TYPE_RESULTS, ledgerSeq, cfg); FileTransferInfo txs(FileType::HISTORY_FILE_TYPE_TRANSACTIONS, ledgerSeq, - mApp.getConfig()); + cfg); FileTransferInfo headers(FileType::HISTORY_FILE_TYPE_LEDGER, ledgerSeq, - mApp.getConfig()); + cfg); // Dirty files shouldn't exist, but cleanup just in case std::remove(res.localPath_nogz_dirty().c_str()); std::remove(txs.localPath_nogz_dirty().c_str()); @@ -664,7 +647,6 @@ HistoryManagerImpl::historyPublished( auto file = publishQueuePath(mApp.getConfig()) / publishQueueFileName(ledgerSeq); std::filesystem::remove(file); - mPublishQueueBuckets.removeBuckets(originalBuckets); deletePublishedFiles(ledgerSeq, mApp.getConfig()); } else @@ -751,4 +733,11 @@ HistoryManagerImpl::setPublicationEnabled(bool enabled) mPublicationEnabled = enabled; } #endif + +Config const& +HistoryManagerImpl::getConfig() const +{ + return mApp.getConfig(); +} + } diff --git a/src/history/HistoryManagerImpl.h b/src/history/HistoryManagerImpl.h index e03251f2f9..0ae2ae3378 100644 --- a/src/history/HistoryManagerImpl.h +++ b/src/history/HistoryManagerImpl.h @@ -4,7 +4,6 @@ // under the Apache License, Version 2.0. See the COPYING file at the root // of this distribution or at http://www.apache.org/licenses/LICENSE-2.0 -#include "bucket/PublishQueueBuckets.h" #include "history/CheckpointBuilder.h" #include "history/HistoryManager.h" #include "util/TmpDir.h" @@ -29,10 +28,7 @@ class HistoryManagerImpl : public HistoryManager std::unique_ptr mWorkDir; std::shared_ptr mPublishWork; - PublishQueueBuckets mPublishQueueBuckets; - bool mPublishQueueBucketsFilled{false}; - - int mPublishQueued{0}; + std::atomic mPublishQueued{0}; medida::Meter& mPublishSuccess; medida::Meter& mPublishFailure; @@ -40,7 +36,6 @@ class HistoryManagerImpl : public HistoryManager UnorderedMap mEnqueueTimes; CheckpointBuilder mCheckpointBuilder; - PublishQueueBuckets::BucketCount loadBucketsReferencedByPublishQueue(); #ifdef BUILD_TESTS bool mPublicationEnabled{true}; #endif @@ -49,22 +44,14 @@ class HistoryManagerImpl : public HistoryManager HistoryManagerImpl(Application& app); ~HistoryManagerImpl() override; - uint32_t getCheckpointFrequency() const override; - void logAndUpdatePublishStatus() override; - size_t publishQueueLength() const override; - bool maybeQueueHistoryCheckpoint() override; void queueCurrentHistory() override; void takeSnapshotAndPublish(HistoryArchiveState const& has); - uint32_t getMinLedgerQueuedToPublish() override; - - uint32_t getMaxLedgerQueuedToPublish() override; - size_t publishQueuedHistory() override; void maybeCheckpointComplete() override; @@ -73,10 +60,6 @@ class HistoryManagerImpl : public HistoryManager std::vector getMissingBucketsReferencedByPublishQueue() override; - std::vector getBucketsReferencedByPublishQueue() override; - - std::vector getPublishQueueStates() override; - void historyPublished(uint32_t ledgerSeq, std::vector const& originalBuckets, bool success) override; @@ -85,8 +68,8 @@ class HistoryManagerImpl : public HistoryManager TransactionResultSet const& resultSet) override; void appendLedgerHeader(LedgerHeader const& header) override; void restoreCheckpoint(uint32_t lcl) override; - void deletePublishedFiles(uint32_t ledgerSeq, Config const& cfg) override; + Config const& getConfig() const override; std::string const& getTmpDir() override; std::string localFilename(std::string const& basename) override; diff --git a/src/history/StateSnapshot.cpp b/src/history/StateSnapshot.cpp index 0d5a3e5185..3ab2a9e66e 100644 --- a/src/history/StateSnapshot.cpp +++ b/src/history/StateSnapshot.cpp @@ -57,7 +57,8 @@ StateSnapshot::writeSCPMessages() const mApp.getDatabase().canUsePool() ? std::make_unique(mApp.getDatabase().getPool()) : nullptr); - soci::session& sess(snapSess ? *snapSess : mApp.getDatabase().getSession()); + soci::session& sess(snapSess ? *snapSess + : mApp.getDatabase().getRawSession()); soci::transaction tx(sess); // The current "history block" is stored in _four_ files, one just ledger @@ -76,9 +77,10 @@ StateSnapshot::writeSCPMessages() const XDROutputFileStream scpHistory(ctx, doFsync); scpHistory.open(mSCPHistorySnapFile->localPath_nogz()); - auto& hm = mApp.getHistoryManager(); - begin = hm.firstLedgerInCheckpointContaining(mLocalState.currentLedger); - count = hm.sizeOfCheckpointContaining(mLocalState.currentLedger); + begin = HistoryManager::firstLedgerInCheckpointContaining( + mLocalState.currentLedger, mApp.getConfig()); + count = HistoryManager::sizeOfCheckpointContaining( + mLocalState.currentLedger, mApp.getConfig()); CLOG_DEBUG(History, "Streaming {} ledgers worth of history, from {}", count, begin); diff --git a/src/history/test/HistoryTests.cpp b/src/history/test/HistoryTests.cpp index e2ea59c6db..c02968835d 100644 --- a/src/history/test/HistoryTests.cpp +++ b/src/history/test/HistoryTests.cpp @@ -41,42 +41,69 @@ TEST_CASE("checkpoint containing ledger", "[history]") { VirtualClock clock; auto app = createTestApplication(clock, getTestConfig()); - auto& hm = app->getHistoryManager(); // Technically ledger 0 doesn't exist so it's not "in" any checkpoint; but // the first checkpoint's ledger range covers ledger 0 so we consider it // "contained" in that checkpoint for the sake of this function. - CHECK(hm.checkpointContainingLedger(0) == 0x3f); - CHECK(hm.checkpointContainingLedger(1) == 0x3f); - CHECK(hm.checkpointContainingLedger(2) == 0x3f); - CHECK(hm.checkpointContainingLedger(3) == 0x3f); + CHECK(HistoryManager::checkpointContainingLedger(0, app->getConfig()) == + 0x3f); + CHECK(HistoryManager::checkpointContainingLedger(1, app->getConfig()) == + 0x3f); + CHECK(HistoryManager::checkpointContainingLedger(2, app->getConfig()) == + 0x3f); + CHECK(HistoryManager::checkpointContainingLedger(3, app->getConfig()) == + 0x3f); // ... - CHECK(hm.checkpointContainingLedger(61) == 0x3f); - CHECK(hm.checkpointContainingLedger(62) == 0x3f); - CHECK(hm.checkpointContainingLedger(63) == 0x3f); - CHECK(hm.checkpointContainingLedger(64) == 0x7f); - CHECK(hm.checkpointContainingLedger(65) == 0x7f); - CHECK(hm.checkpointContainingLedger(66) == 0x7f); + CHECK(HistoryManager::checkpointContainingLedger(61, app->getConfig()) == + 0x3f); + CHECK(HistoryManager::checkpointContainingLedger(62, app->getConfig()) == + 0x3f); + CHECK(HistoryManager::checkpointContainingLedger(63, app->getConfig()) == + 0x3f); + CHECK(HistoryManager::checkpointContainingLedger(64, app->getConfig()) == + 0x7f); + CHECK(HistoryManager::checkpointContainingLedger(65, app->getConfig()) == + 0x7f); + CHECK(HistoryManager::checkpointContainingLedger(66, app->getConfig()) == + 0x7f); // ... - CHECK(hm.checkpointContainingLedger(125) == 0x7f); - CHECK(hm.checkpointContainingLedger(126) == 0x7f); - CHECK(hm.checkpointContainingLedger(127) == 0x7f); - CHECK(hm.checkpointContainingLedger(128) == 0xbf); - CHECK(hm.checkpointContainingLedger(129) == 0xbf); - CHECK(hm.checkpointContainingLedger(130) == 0xbf); + CHECK(HistoryManager::checkpointContainingLedger(125, app->getConfig()) == + 0x7f); + CHECK(HistoryManager::checkpointContainingLedger(126, app->getConfig()) == + 0x7f); + CHECK(HistoryManager::checkpointContainingLedger(127, app->getConfig()) == + 0x7f); + CHECK(HistoryManager::checkpointContainingLedger(128, app->getConfig()) == + 0xbf); + CHECK(HistoryManager::checkpointContainingLedger(129, app->getConfig()) == + 0xbf); + CHECK(HistoryManager::checkpointContainingLedger(130, app->getConfig()) == + 0xbf); // ... - CHECK(hm.checkpointContainingLedger(189) == 0xbf); - CHECK(hm.checkpointContainingLedger(190) == 0xbf); - CHECK(hm.checkpointContainingLedger(191) == 0xbf); - CHECK(hm.checkpointContainingLedger(192) == 0xff); - CHECK(hm.checkpointContainingLedger(193) == 0xff); - CHECK(hm.checkpointContainingLedger(194) == 0xff); + CHECK(HistoryManager::checkpointContainingLedger(189, app->getConfig()) == + 0xbf); + CHECK(HistoryManager::checkpointContainingLedger(190, app->getConfig()) == + 0xbf); + CHECK(HistoryManager::checkpointContainingLedger(191, app->getConfig()) == + 0xbf); + CHECK(HistoryManager::checkpointContainingLedger(192, app->getConfig()) == + 0xff); + CHECK(HistoryManager::checkpointContainingLedger(193, app->getConfig()) == + 0xff); + CHECK(HistoryManager::checkpointContainingLedger(194, app->getConfig()) == + 0xff); // ... - CHECK(hm.checkpointContainingLedger(253) == 0xff); - CHECK(hm.checkpointContainingLedger(254) == 0xff); - CHECK(hm.checkpointContainingLedger(255) == 0xff); - CHECK(hm.checkpointContainingLedger(256) == 0x13f); - CHECK(hm.checkpointContainingLedger(257) == 0x13f); - CHECK(hm.checkpointContainingLedger(258) == 0x13f); + CHECK(HistoryManager::checkpointContainingLedger(253, app->getConfig()) == + 0xff); + CHECK(HistoryManager::checkpointContainingLedger(254, app->getConfig()) == + 0xff); + CHECK(HistoryManager::checkpointContainingLedger(255, app->getConfig()) == + 0xff); + CHECK(HistoryManager::checkpointContainingLedger(256, app->getConfig()) == + 0x13f); + CHECK(HistoryManager::checkpointContainingLedger(257, app->getConfig()) == + 0x13f); + CHECK(HistoryManager::checkpointContainingLedger(258, app->getConfig()) == + 0x13f); } TEST_CASE("HistoryManager compress", "[history]") @@ -216,7 +243,8 @@ TEST_CASE("Ledger chain verification", "[ledgerheaderverification]") uint32_t initLedger = 127; auto ledgerRange = LedgerRange::inclusive( initLedger, - initLedger + (app->getHistoryManager().getCheckpointFrequency() * 10)); + initLedger + + (HistoryManager::getCheckpointFrequency(app->getConfig()) * 10)); CheckpointRange checkpointRange{ledgerRange, app->getHistoryManager()}; auto ledgerChainGenerator = TestLedgerChainGenerator{ *app, @@ -330,7 +358,7 @@ TEST_CASE("Ledger chain verification", "[ledgerheaderverification]") std::tie(lcl, last) = ledgerChainGenerator.makeLedgerChainFiles( HistoryManager::VERIFY_STATUS_OK); lcl.header.ledgerSeq += - app->getHistoryManager().getCheckpointFrequency() - 1; + HistoryManager::getCheckpointFrequency(app->getConfig()) - 1; lcl.hash = HashUtils::pseudoRandomForTesting(); auto w = checkExpectedBehavior(BasicWork::State::WORK_FAILURE, lcl, last); @@ -488,8 +516,7 @@ validateCheckpointFiles(Application& app, uint32_t ledger, bool isFinalized) REQUIRE(entry.header.ledgerSeq == ledger); }; - auto checkpoint = - app.getHistoryManager().checkpointContainingLedger(ledger); + auto checkpoint = HistoryManager::checkpointContainingLedger(ledger, cfg); FileTransferInfo res(FileType::HISTORY_FILE_TYPE_RESULTS, checkpoint, cfg); FileTransferInfo txs(FileType::HISTORY_FILE_TYPE_TRANSACTIONS, checkpoint, cfg); @@ -529,9 +556,8 @@ TEST_CASE("History publish with restart", "[history][publish]") LedgerManager::GENESIS_LEDGER_SEQ, 10, catchupSimulation.getLastCheckpointLedger(1), - catchupSimulation.getApp() - .getHistoryManager() - .firstLedgerInCheckpointContaining(checkpointLedger), + HistoryManager::firstLedgerInCheckpointContaining( + checkpointLedger, catchupSimulation.getApp().getConfig()), checkpointLedger - 1, checkpointLedger}; for (auto ledgerNum : ledgerNums) @@ -750,7 +776,7 @@ TEST_CASE("History catchup", "[history][catchup][acceptance]") auto offlineNonCheckpointDestinationLedger = checkpointLedger - - app->getHistoryManager().getCheckpointFrequency() / 2; + HistoryManager::getCheckpointFrequency(app->getConfig()) / 2; SECTION("when not enough publishes has been performed") { @@ -899,7 +925,6 @@ TEST_CASE("Retriggering catchups after trimming mSyncingLedgers", std::string("Retriggering catchups after trimming mSyncingLedgers")); auto& lm = app->getLedgerManager(); - auto& hm = app->getHistoryManager(); auto& herder = static_cast(app->getHerder()); auto runCatchup = [&](uint32_t expectedDestination) { @@ -913,7 +938,7 @@ TEST_CASE("Retriggering catchups after trimming mSyncingLedgers", CatchupConfiguration::Mode::ONLINE}, *app); - catchupSimulation.crankUntil( + testutil::crankUntil( app, [&]() { return app->getCatchupManager().catchupWorkIsDone(); }, std::chrono::seconds{ std::max(expectedCatchupWork.mTxSetsApplied + 15, 60)}); @@ -930,10 +955,11 @@ TEST_CASE("Retriggering catchups after trimming mSyncingLedgers", // initLedger (inclusive), so that there's something to knit-up with. Do not // externalize anything we haven't yet published, of course. const uint32_t firstLedgerInCheckpoint = - hm.firstLedgerAfterCheckpointContaining(initLedger); + HistoryManager::firstLedgerAfterCheckpointContaining(initLedger, + app->getConfig()); - const uint32_t triggerLedger = - hm.ledgerToTriggerCatchup(firstLedgerInCheckpoint); + const uint32_t triggerLedger = HistoryManager::ledgerToTriggerCatchup( + firstLedgerInCheckpoint, app->getConfig()); // 1. The app hears initLedger, ..., dividingLedger - 1. // NB: dividingLedger must be chosen such that mSyncingLedgers gets @@ -954,7 +980,8 @@ TEST_CASE("Retriggering catchups after trimming mSyncingLedgers", // have popped some elements in order to prevent it from growing // exponentially, and thus there is a gap between the LCL and // mSyncingLedgers. - runCatchup(hm.checkpointContainingLedger(initLedger - 1)); + runCatchup(HistoryManager::checkpointContainingLedger( + initLedger - 1, app->getConfig())); // As mentioned above, mSyncingLedgers must have been trimmed // after hearing up to (dividingLedger - 1). @@ -972,12 +999,21 @@ TEST_CASE("Retriggering catchups after trimming mSyncingLedgers", catchupSimulation.externalizeLedger(herder, n); } - runCatchup(hm.lastLedgerBeforeCheckpointContaining(dividingLedger)); + runCatchup(HistoryManager::lastLedgerBeforeCheckpointContaining( + dividingLedger, app->getConfig())); REQUIRE(lm.getLastClosedLedgerNum() == triggerLedger + bufferLedgers); catchupSimulation.externalizeLedger(herder, triggerLedger + bufferLedgers + 1); + testutil::crankUntil( + app, + [&]() { + return lm.getLastClosedLedgerNum() == + triggerLedger + bufferLedgers + 1; + }, + std::chrono::seconds{60}); + REQUIRE(lm.isSynced()); REQUIRE(lm.getLastClosedLedgerNum() == @@ -1001,9 +1037,10 @@ TEST_CASE("Retriggering catchups after trimming mSyncingLedgers", // By setting dividingLedger to the second ledger in a checkpoint, // we can make sure that mSyncingLedgers gets trimmed between the first // and second Catchup run. - auto dividingLedger = hm.firstLedgerInCheckpointContaining( - triggerLedger + bufferLedgers) + - 1; + auto dividingLedger = + HistoryManager::firstLedgerInCheckpointContaining( + triggerLedger + bufferLedgers, app->getConfig()) + + 1; runTest(dividingLedger); } } @@ -1026,7 +1063,7 @@ TEST_CASE("History prefix catchup", "[history][catchup]") // simulate 5 buffered ledgers and at last we need one closing ledger to // get us into synced state. REQUIRE(catchupSimulation.catchupOnline(a, 10, 5)); - uint32_t freq = a->getHistoryManager().getCheckpointFrequency(); + uint32_t freq = HistoryManager::getCheckpointFrequency(a->getConfig()); REQUIRE(a->getLedgerManager().getLastClosedLedgerNum() == freq + 7); // Try to catchup to ledger 74, which is part of second checkpoint (ending @@ -1321,15 +1358,19 @@ TEST_CASE_VERSIONS( } // Capture publish queue's view of HAS right before taking snapshot - auto queuedHAS = hm.getPublishQueueStates()[0]; + auto queuedHAS = + HistoryManager::getPublishQueueStates(app->getConfig())[0]; // Now take snapshot and schedule publish, this should *not* modify // HAS in any way hm.publishQueuedHistory(); // First, ensure bucket references are intact - auto pqb = hm.getBucketsReferencedByPublishQueue(); - REQUIRE(queuedHAS.allBuckets() == pqb); + auto pqb = HistoryManager::getBucketsReferencedByPublishQueue( + app->getConfig()); + auto allBuckets = queuedHAS.allBuckets(); + REQUIRE(std::set(allBuckets.begin(), + allBuckets.end()) == pqb); // Second, ensure `next` is in the exact same state as when it was // queued @@ -1370,7 +1411,8 @@ TEST_CASE("persist publish queue", "[history][publish][acceptance]") // We should have published nothing and have the first // checkpoint still queued. REQUIRE(hm0.getPublishSuccessCount() == 0); - REQUIRE(hm0.getMinLedgerQueuedToPublish() == 7); + REQUIRE(HistoryManager::getMinLedgerQueuedToPublish(hm0.getConfig()) == + 7); // Trim history after publishing. app0->getMaintainer().performMaintenance(50000); @@ -1396,7 +1438,8 @@ TEST_CASE("persist publish queue", "[history][publish][acceptance]") } // We should have either an empty publish queue or a // ledger sometime after the 5th checkpoint - auto minLedger = hm1.getMinLedgerQueuedToPublish(); + auto minLedger = + HistoryManager::getMinLedgerQueuedToPublish(hm1.getConfig()); LOG_INFO(DEFAULT_LOG, "minLedger {}", minLedger); bool okQueue = minLedger == 0 || minLedger >= 35; REQUIRE(okQueue); @@ -1641,18 +1684,21 @@ TEST_CASE("Introduce and fix gap without starting catchup", catchupSimulation.externalizeLedger(herder, nextLedger + 2); catchupSimulation.externalizeLedger(herder, nextLedger + 3); catchupSimulation.externalizeLedger(herder, nextLedger + 5); + testutil::crankFor(app->getClock(), std::chrono::seconds(5)); REQUIRE(!lm.isSynced()); REQUIRE(cm.getLargestLedgerSeqHeard() > lm.getLastClosedLedgerNum()); // Fill in the first gap. There will still be buffered ledgers left because // of the second gap catchupSimulation.externalizeLedger(herder, nextLedger + 1); + testutil::crankFor(app->getClock(), std::chrono::seconds(5)); REQUIRE(!lm.isSynced()); REQUIRE(cm.getLargestLedgerSeqHeard() > lm.getLastClosedLedgerNum()); // Fill in the second gap. All buffered ledgers should be applied, but we // wait for another ledger to close to get in sync catchupSimulation.externalizeLedger(herder, nextLedger + 4); + testutil::crankFor(app->getClock(), std::chrono::seconds(5)); REQUIRE(lm.isSynced()); REQUIRE(cm.getLargestLedgerSeqHeard() == lm.getLastClosedLedgerNum()); REQUIRE(!cm.isCatchupInitialized()); @@ -1685,6 +1731,7 @@ TEST_CASE("Receive trigger and checkpoint ledger out of order", catchupSimulation.externalizeLedger(herder, checkpointLedger + 1); catchupSimulation.externalizeLedger(herder, checkpointLedger); catchupSimulation.externalizeLedger(herder, checkpointLedger + 2); + testutil::crankFor(app->getClock(), std::chrono::seconds(10)); REQUIRE(lm.isSynced()); REQUIRE(cm.getLargestLedgerSeqHeard() == lm.getLastClosedLedgerNum()); @@ -1763,10 +1810,10 @@ TEST_CASE("CheckpointBuilder", "[history][publish]") { generate(10); validateCheckpointFiles(*app, 10, false); - FileTransferInfo headers( - FileType::HISTORY_FILE_TYPE_LEDGER, - app->getHistoryManager().checkpointContainingLedger(10), - app->getConfig()); + FileTransferInfo headers(FileType::HISTORY_FILE_TYPE_LEDGER, + HistoryManager::checkpointContainingLedger( + 10, app->getConfig()), + app->getConfig()); auto sz = std::filesystem::file_size(headers.localPath_nogz_dirty()); std::filesystem::resize_file(headers.localPath_nogz_dirty(), @@ -1778,7 +1825,7 @@ TEST_CASE("CheckpointBuilder", "[history][publish]") } SECTION("checkpoint complete") { - auto ledgerSeq = hm.checkpointContainingLedger(1); + auto ledgerSeq = HistoryManager::checkpointContainingLedger(1, cfg); // Checkpoint not finalized generate(ledgerSeq); validateCheckpointFiles(*app, ledgerSeq, false); diff --git a/src/history/test/HistoryTestsUtils.cpp b/src/history/test/HistoryTestsUtils.cpp index 501e7016c2..0738a0372c 100644 --- a/src/history/test/HistoryTestsUtils.cpp +++ b/src/history/test/HistoryTestsUtils.cpp @@ -258,9 +258,10 @@ TestLedgerChainGenerator::makeOneLedgerFile( uint32_t currCheckpoint, Hash prevHash, HistoryManager::LedgerVerificationStatus state) { - auto& hm = mApp.getHistoryManager(); - auto initLedger = hm.firstLedgerInCheckpointContaining(currCheckpoint); - auto size = hm.sizeOfCheckpointContaining(currCheckpoint); + auto initLedger = HistoryManager::firstLedgerInCheckpointContaining( + currCheckpoint, mApp.getConfig()); + auto size = HistoryManager::sizeOfCheckpointContaining(currCheckpoint, + mApp.getConfig()); LedgerHeaderHistoryEntry first, last, lcl; lcl.header.ledgerSeq = initLedger; @@ -282,7 +283,7 @@ TestLedgerChainGenerator::makeLedgerChainFiles( LedgerHeaderHistoryEntry first, last; for (auto i = mCheckpointRange.mFirst; i < mCheckpointRange.limit(); - i += mApp.getHistoryManager().sizeOfCheckpointContaining(i)) + i += HistoryManager::sizeOfCheckpointContaining(i, mApp.getConfig())) { // Only corrupt first checkpoint (last to be verified) if (i != mCheckpointRange.mFirst) @@ -404,7 +405,7 @@ CatchupSimulation::~CatchupSimulation() uint32_t CatchupSimulation::getLastCheckpointLedger(uint32_t checkpointIndex) const { - return getApp().getHistoryManager().getCheckpointFrequency() * + return HistoryManager::getCheckpointFrequency(getApp().getConfig()) * checkpointIndex - 1; } @@ -532,6 +533,7 @@ CatchupSimulation::generateRandomLedger(uint32_t version) auto lastSucceeded = txsSucceeded.count(); lm.closeLedger(mLedgerCloseDatas.back()); + testutil::crankFor(getApp().getClock(), std::chrono::milliseconds(10)); if (check) { @@ -609,7 +611,8 @@ CatchupSimulation::ensureLedgerAvailable(uint32_t targetLedger, .header.ledgerVersion); } - if (getApp().getHistoryManager().publishCheckpointOnLedgerClose(lcl)) + if (HistoryManager::publishCheckpointOnLedgerClose( + lcl, getApp().getConfig())) { mBucketListAtLastPublish = getApp().getBucketManager().getLiveBucketList(); @@ -621,23 +624,26 @@ void CatchupSimulation::ensurePublishesComplete() { auto& hm = getApp().getHistoryManager(); - while (hm.publishQueueLength() > 0 && hm.getPublishFailureCount() == 0) + auto const& cfg = getApp().getConfig(); + while (HistoryManager::publishQueueLength(cfg) > 0 && + hm.getPublishFailureCount() == 0) { getApp().getClock().crank(true); } REQUIRE(hm.getPublishFailureCount() == 0); // Make sure all references to buckets were released - REQUIRE(hm.getBucketsReferencedByPublishQueue().empty()); + REQUIRE(HistoryManager::getBucketsReferencedByPublishQueue(cfg).empty()); // Make sure all published checkpoint files have been cleaned up auto lcl = getApp().getLedgerManager().getLastClosedLedgerNum(); - auto firstCheckpoint = - hm.checkpointContainingLedger(LedgerManager::GENESIS_LEDGER_SEQ); - auto lastCheckpoint = hm.lastLedgerBeforeCheckpointContaining(lcl); + auto firstCheckpoint = HistoryManager::checkpointContainingLedger( + LedgerManager::GENESIS_LEDGER_SEQ, cfg); + auto lastCheckpoint = + HistoryManager::lastLedgerBeforeCheckpointContaining(lcl, cfg); for (uint32_t i = firstCheckpoint; i <= lastCheckpoint; - i += hm.getCheckpointFrequency()) + i += HistoryManager::getCheckpointFrequency(cfg)) { FileTransferInfo res(FileType::HISTORY_FILE_TYPE_RESULTS, i, getApp().getConfig()); @@ -659,9 +665,9 @@ CatchupSimulation::ensureOfflineCatchupPossible( uint32_t targetLedger, std::optional restartLedger) { // One additional ledger is needed for publish. - auto target = - getApp().getHistoryManager().checkpointContainingLedger(targetLedger) + - 1; + auto target = HistoryManager::checkpointContainingLedger( + targetLedger, getApp().getConfig()) + + 1; ensureLedgerAvailable(target, restartLedger); ensurePublishesComplete(); } @@ -670,11 +676,10 @@ void CatchupSimulation::ensureOnlineCatchupPossible(uint32_t targetLedger, uint32_t bufferLedgers) { - auto& hm = getApp().getHistoryManager(); - // One additional ledger is needed for publish, one as a trigger ledger for // catchup, one as closing ledger. - ensureLedgerAvailable(hm.checkpointContainingLedger(targetLedger) + + ensureLedgerAvailable(HistoryManager::checkpointContainingLedger( + targetLedger, getApp().getConfig()) + bufferLedgers + 3); ensurePublishesComplete(); } @@ -686,10 +691,9 @@ CatchupSimulation::getAllPublishedCheckpoints() const assert(mLedgerHashes.size() == mLedgerSeqs.size()); auto hi = mLedgerHashes.begin(); auto si = mLedgerSeqs.begin(); - auto const& hm = getApp().getHistoryManager(); while (si != mLedgerSeqs.end()) { - if (hm.isLastLedgerInCheckpoint(*si)) + if (HistoryManager::isLastLedgerInCheckpoint(*si, getApp().getConfig())) { LedgerNumHashPair pair; pair.first = *si; @@ -709,10 +713,9 @@ CatchupSimulation::getLastPublishedCheckpoint() const assert(mLedgerHashes.size() == mLedgerSeqs.size()); auto hi = mLedgerHashes.rbegin(); auto si = mLedgerSeqs.rbegin(); - auto const& hm = getApp().getHistoryManager(); while (si != mLedgerSeqs.rend()) { - if (hm.isLastLedgerInCheckpoint(*si)) + if (HistoryManager::isLastLedgerInCheckpoint(*si, getApp().getConfig())) { pair.first = *si; pair.second = std::make_optional(*hi); @@ -724,24 +727,6 @@ CatchupSimulation::getLastPublishedCheckpoint() const return pair; } -void -CatchupSimulation::crankUntil(Application::pointer app, - std::function const& predicate, - VirtualClock::duration timeout) -{ - auto start = std::chrono::system_clock::now(); - while (!predicate()) - { - app->getClock().crank(false); - auto current = std::chrono::system_clock::now(); - auto diff = current - start; - if (diff > timeout) - { - break; - } - } -} - Application::pointer CatchupSimulation::createCatchupApplication( uint32_t count, Config::TestDbMode dbMode, std::string const& appName, @@ -789,9 +774,9 @@ CatchupSimulation::catchupOffline(Application::pointer app, uint32_t toLedger, auto expectedCatchupWork = computeCatchupPerformedWork(lastLedger, catchupConfiguration, *app); - crankUntil(app, finished, - std::chrono::seconds{std::max( - expectedCatchupWork.mTxSetsApplied + 15, 60)}); + testutil::crankUntil(app, finished, + std::chrono::seconds{std::max( + expectedCatchupWork.mTxSetsApplied + 15, 60)}); // Finished successfully auto success = cm.isCatchupInitialized() && @@ -826,11 +811,11 @@ CatchupSimulation::catchupOnline(Application::pointer app, uint32_t initLedger, auto& lm = app->getLedgerManager(); auto startCatchupMetrics = app->getCatchupManager().getCatchupMetrics(); - auto& hm = app->getHistoryManager(); auto& herder = static_cast(app->getHerder()); // catchup will run to the final ledger in the checkpoint - auto toLedger = hm.checkpointContainingLedger(initLedger - 1); + auto toLedger = HistoryManager::checkpointContainingLedger( + initLedger - 1, app->getConfig()); auto catchupConfiguration = CatchupConfiguration{toLedger, app->getConfig().CATCHUP_RECENT, @@ -861,17 +846,19 @@ CatchupSimulation::catchupOnline(Application::pointer app, uint32_t initLedger, // initLedger (inclusive), so that there's something to knit-up with. Do not // externalize anything we haven't yet published, of course. uint32_t firstLedgerInCheckpoint; - if (hm.isFirstLedgerInCheckpoint(initLedger)) + if (HistoryManager::isFirstLedgerInCheckpoint(initLedger, app->getConfig())) { firstLedgerInCheckpoint = initLedger; } else { firstLedgerInCheckpoint = - hm.firstLedgerAfterCheckpointContaining(initLedger); + HistoryManager::firstLedgerAfterCheckpointContaining( + initLedger, app->getConfig()); } - uint32_t triggerLedger = hm.ledgerToTriggerCatchup(firstLedgerInCheckpoint); + uint32_t triggerLedger = HistoryManager::ledgerToTriggerCatchup( + firstLedgerInCheckpoint, app->getConfig()); if (ledgersToInject.empty()) { @@ -904,9 +891,9 @@ CatchupSimulation::catchupOnline(Application::pointer app, uint32_t initLedger, auto expectedCatchupWork = computeCatchupPerformedWork(lastLedger, catchupConfiguration, *app); - crankUntil(app, catchupIsDone, - std::chrono::seconds{std::max( - expectedCatchupWork.mTxSetsApplied + 15, 60)}); + testutil::crankUntil(app, catchupIsDone, + std::chrono::seconds{std::max( + expectedCatchupWork.mTxSetsApplied + 15, 60)}); if (lm.getLastClosedLedgerNum() == triggerLedger + bufferLedgers) { @@ -914,6 +901,14 @@ CatchupSimulation::catchupOnline(Application::pointer app, uint32_t initLedger, externalize(triggerLedger + bufferLedgers + 1); } + testutil::crankUntil( + app, + [&]() { + return lm.getLastClosedLedgerNum() == + triggerLedger + bufferLedgers + 1; + }, + std::chrono::seconds{60}); + auto result = caughtUp(); if (result) { @@ -952,6 +947,8 @@ CatchupSimulation::externalizeLedger(HerderImpl& herder, uint32_t ledger) lcd.getLedgerSeq(), lcd.getTxSet()); herder.getHerderSCPDriver().valueExternalized( lcd.getLedgerSeq(), xdr::xdr_to_opaque(lcd.getValue())); + + // TODO: crank the clock } void @@ -1100,9 +1097,10 @@ CatchupSimulation::computeCatchupPerformedWork( txSetsDownloaded = 0; } - auto firstVerifiedLedger = std::max(LedgerManager::GENESIS_LEDGER_SEQ, - verifyCheckpointRange.mFirst + 1 - - hm.getCheckpointFrequency()); + auto firstVerifiedLedger = + std::max(LedgerManager::GENESIS_LEDGER_SEQ, + verifyCheckpointRange.mFirst + 1 - + HistoryManager::getCheckpointFrequency(app.getConfig())); auto ledgersVerified = catchupConfiguration.toLedger() - firstVerifiedLedger + 1; auto txSetsApplied = catchupRange.getReplayCount(); diff --git a/src/history/test/HistoryTestsUtils.h b/src/history/test/HistoryTestsUtils.h index d3cbc4100b..6c0d65ccdb 100644 --- a/src/history/test/HistoryTestsUtils.h +++ b/src/history/test/HistoryTestsUtils.h @@ -275,10 +275,6 @@ class CatchupSimulation // this method externalizes through herder void externalizeLedger(HerderImpl& herder, uint32_t ledger); - void crankUntil(Application::pointer app, - std::function const& predicate, - VirtualClock::duration duration); - void setUpgradeLedger(uint32_t ledger, ProtocolVersion upgradeVersion); void restartApp(); }; diff --git a/src/historywork/BatchDownloadWork.cpp b/src/historywork/BatchDownloadWork.cpp index e446ee2db3..757acafd79 100644 --- a/src/historywork/BatchDownloadWork.cpp +++ b/src/historywork/BatchDownloadWork.cpp @@ -57,7 +57,7 @@ BatchDownloadWork::yieldMoreWork() typeString(mFileType), mNext); auto getAndUnzip = std::make_shared(mApp, ft, mArchive); - mNext += mApp.getHistoryManager().getCheckpointFrequency(); + mNext += HistoryManager::getCheckpointFrequency(mApp.getConfig()); return getAndUnzip; } diff --git a/src/historywork/CheckSingleLedgerHeaderWork.cpp b/src/historywork/CheckSingleLedgerHeaderWork.cpp index 677ccba36a..334578be9b 100644 --- a/src/historywork/CheckSingleLedgerHeaderWork.cpp +++ b/src/historywork/CheckSingleLedgerHeaderWork.cpp @@ -57,8 +57,8 @@ CheckSingleLedgerHeaderWork::doReset() mGetLedgerFileWork.reset(); mDownloadDir = std::make_unique(mApp.getTmpDirManager().tmpDir(getName())); - uint32_t checkpoint = mApp.getHistoryManager().checkpointContainingLedger( - mExpected.header.ledgerSeq); + uint32_t checkpoint = HistoryManager::checkpointContainingLedger( + mExpected.header.ledgerSeq, mApp.getConfig()); mFt = std::make_unique( *mDownloadDir, FileType::HISTORY_FILE_TYPE_LEDGER, checkpoint); } @@ -104,7 +104,8 @@ CheckSingleLedgerHeaderWork::doWork() XDRInputFileStream in; in.open(mFt->localPath_nogz()); LedgerHeaderHistoryEntry lhhe; - size_t headersToRead = mApp.getHistoryManager().getCheckpointFrequency(); + size_t headersToRead = + HistoryManager::getCheckpointFrequency(mApp.getConfig()); try { while (in && in.readOne(lhhe)) diff --git a/src/historywork/DownloadVerifyTxResultsWork.cpp b/src/historywork/DownloadVerifyTxResultsWork.cpp index 04ada07514..630af18dfc 100644 --- a/src/historywork/DownloadVerifyTxResultsWork.cpp +++ b/src/historywork/DownloadVerifyTxResultsWork.cpp @@ -74,7 +74,7 @@ DownloadVerifyTxResultsWork::yieldMoreWork() mCurrCheckpoint), seq); - mCurrCheckpoint += mApp.getHistoryManager().getCheckpointFrequency(); + mCurrCheckpoint += HistoryManager::getCheckpointFrequency(mApp.getConfig()); return w3; } } diff --git a/src/historywork/FetchRecentQsetsWork.cpp b/src/historywork/FetchRecentQsetsWork.cpp index 1d12a00cbe..e9c3e71955 100644 --- a/src/historywork/FetchRecentQsetsWork.cpp +++ b/src/historywork/FetchRecentQsetsWork.cpp @@ -51,7 +51,7 @@ FetchRecentQsetsWork::doWork() // about every active qset. A more sophisticated view would survey longer // time periods at lower resolution. uint32_t numCheckpoints = 100; - uint32_t step = mApp.getHistoryManager().getCheckpointFrequency(); + uint32_t step = HistoryManager::getCheckpointFrequency(mApp.getConfig()); uint32_t window = numCheckpoints * step; uint32_t lastSeq = mLedgerNum; uint32_t firstSeq = lastSeq < window ? (step - 1) : (lastSeq - window); diff --git a/src/historywork/Progress.cpp b/src/historywork/Progress.cpp index ebc13a5d9a..5e16f2abb4 100644 --- a/src/historywork/Progress.cpp +++ b/src/historywork/Progress.cpp @@ -16,7 +16,7 @@ std::string fmtProgress(Application& app, std::string const& task, LedgerRange const& range, uint32_t curr) { - auto step = app.getHistoryManager().getCheckpointFrequency(); + auto step = HistoryManager::getCheckpointFrequency(app.getConfig()); // Step is only ever 8 or 64. releaseAssert(step != 0); if (range.mCount == 0) diff --git a/src/historywork/VerifyTxResultsWork.cpp b/src/historywork/VerifyTxResultsWork.cpp index d1c70634e5..14c4dc0891 100644 --- a/src/historywork/VerifyTxResultsWork.cpp +++ b/src/historywork/VerifyTxResultsWork.cpp @@ -149,9 +149,8 @@ VerifyTxResultsWork::getCurrentTxResultSet(uint32_t ledger) if (res) { auto readLedger = mTxResultEntry.ledgerSeq; - auto const& hm = mApp.getHistoryManager(); - - auto low = hm.firstLedgerInCheckpointContaining(mCheckpoint); + auto low = HistoryManager::firstLedgerInCheckpointContaining( + mCheckpoint, mApp.getConfig()); if (readLedger > mCheckpoint || readLedger < low) { throw std::runtime_error("Results outside of checkpoint range"); diff --git a/src/historywork/WriteVerifiedCheckpointHashesWork.cpp b/src/historywork/WriteVerifiedCheckpointHashesWork.cpp index f1f86124eb..6c8eee5e80 100644 --- a/src/historywork/WriteVerifiedCheckpointHashesWork.cpp +++ b/src/historywork/WriteVerifiedCheckpointHashesWork.cpp @@ -151,8 +151,8 @@ WriteVerifiedCheckpointHashesWork::yieldMoreWork() throw std::runtime_error("nothing to iterate over"); } - auto const& hm = mApp.getHistoryManager(); - uint32_t const freq = hm.getCheckpointFrequency(); + uint32_t const freq = + HistoryManager::getCheckpointFrequency(mApp.getConfig()); auto const lclHe = mApp.getLedgerManager().getLastClosedLedgerHeader(); LedgerNumHashPair const lcl(lclHe.header.ledgerSeq, @@ -161,19 +161,22 @@ WriteVerifiedCheckpointHashesWork::yieldMoreWork() uint32_t const last = mCurrCheckpoint; uint32_t first = last <= span ? LedgerManager::GENESIS_LEDGER_SEQ - : hm.firstLedgerInCheckpointContaining(last - span); + : HistoryManager::firstLedgerInCheckpointContaining( + last - span, mApp.getConfig()); // If the first ledger in the range is less than mFromLedger then the // range should be constrained to start at mFromLedger, or the checkpoint // immediately before it if mFromLedger is not a checkpoint boundary. if (mFromLedger && first < *mFromLedger) { - if (hm.isLastLedgerInCheckpoint(*mFromLedger)) + if (HistoryManager::isLastLedgerInCheckpoint(*mFromLedger, + mApp.getConfig())) { first = *mFromLedger; } else { - first = hm.lastLedgerBeforeCheckpointContaining(*mFromLedger); + first = HistoryManager::lastLedgerBeforeCheckpointContaining( + *mFromLedger, mApp.getConfig()); } releaseAssertOrThrow(first <= *mFromLedger); } @@ -182,10 +185,12 @@ WriteVerifiedCheckpointHashesWork::yieldMoreWork() else if (mLatestTrustedHashPair && first < mLatestTrustedHashPair->first) { first = mLatestTrustedHashPair->first; - releaseAssertOrThrow(hm.isLastLedgerInCheckpoint(first)); + releaseAssertOrThrow( + HistoryManager::isLastLedgerInCheckpoint(first, mApp.getConfig())); } LedgerRange const ledgerRange = LedgerRange::inclusive(first, last); + auto const& hm = mApp.getHistoryManager(); CheckpointRange const checkpointRange(ledgerRange, hm); std::string const checkpointStr = std::to_string(mCurrCheckpoint); diff --git a/src/ledger/CheckpointRange.cpp b/src/ledger/CheckpointRange.cpp index 10790b8c5e..cd1203a21a 100644 --- a/src/ledger/CheckpointRange.cpp +++ b/src/ledger/CheckpointRange.cpp @@ -30,17 +30,20 @@ checkpointCount(uint32_t firstCheckpoint, LedgerRange const& r, { return 0; } - uint32_t lastCheckpoint = hm.checkpointContainingLedger(r.last()); - return 1 + - ((lastCheckpoint - firstCheckpoint) / hm.getCheckpointFrequency()); + uint32_t lastCheckpoint = + HistoryManager::checkpointContainingLedger(r.last(), hm.getConfig()); + return 1 + ((lastCheckpoint - firstCheckpoint) / + HistoryManager::getCheckpointFrequency(hm.getConfig())); } } CheckpointRange::CheckpointRange(LedgerRange const& ledgerRange, HistoryManager const& historyManager) - : mFirst{historyManager.checkpointContainingLedger(ledgerRange.mFirst)} + : mFirst{HistoryManager::checkpointContainingLedger( + ledgerRange.mFirst, historyManager.getConfig())} , mCount{checkpointCount(mFirst, ledgerRange, historyManager)} - , mFrequency{historyManager.getCheckpointFrequency()} + , mFrequency{ + HistoryManager::getCheckpointFrequency(historyManager.getConfig())} { releaseAssert(mFirst > 0); releaseAssert((mFirst + 1) % mFrequency == 0); diff --git a/src/main/CommandLine.cpp b/src/main/CommandLine.cpp index 0c2a3c717c..a061c0c14e 100644 --- a/src/main/CommandLine.cpp +++ b/src/main/CommandLine.cpp @@ -867,8 +867,8 @@ runCatchup(CommandLineArgs const& args) if (!trustedCheckpointHashesFile.empty()) { - auto const& hm = app->getHistoryManager(); - if (!hm.isLastLedgerInCheckpoint(cc.toLedger())) + if (!HistoryManager::isLastLedgerInCheckpoint( + cc.toLedger(), app->getConfig())) { throw std::runtime_error( "destination ledger is not a checkpoint boundary," @@ -1338,7 +1338,8 @@ runPrintPublishQueue(CommandLineArgs const& args) Application::pointer app = Application::create(clock, cfg, false); cereal::JSONOutputArchive archive(std::cout); archive.makeArray(); - for (auto const& has : app->getHistoryManager().getPublishQueueStates()) + for (auto const& has : + HistoryManager::getPublishQueueStates(app->getConfig())) { has.serialize(archive); } diff --git a/src/main/test/ApplicationUtilsTests.cpp b/src/main/test/ApplicationUtilsTests.cpp index 3e56609e37..b8a12017ad 100644 --- a/src/main/test/ApplicationUtilsTests.cpp +++ b/src/main/test/ApplicationUtilsTests.cpp @@ -165,8 +165,7 @@ class SimulationHelper /* txRate */ 1)); auto currLoadGenCount = loadGenDone.count(); - auto checkpoint = - mMainNode->getHistoryManager().getCheckpointFrequency(); + auto checkpoint = HistoryManager::getCheckpointFrequency(mMainCfg); // Make sure validator publishes something mSimulation->crankUntil( diff --git a/src/overlay/test/OverlayTests.cpp b/src/overlay/test/OverlayTests.cpp index 942f0a64a9..92985a2601 100644 --- a/src/overlay/test/OverlayTests.cpp +++ b/src/overlay/test/OverlayTests.cpp @@ -907,7 +907,8 @@ TEST_CASE("outbound queue filtering", "[overlay][connections]") { // Advance to next checkpoint auto nextCheckpoint = - node->getHistoryManager().firstLedgerAfterCheckpointContaining(lcl); + HistoryManager::firstLedgerAfterCheckpointContaining( + lcl, node->getConfig()); simulation->crankUntil( [&]() { return simulation->haveAllExternalized(nextCheckpoint, 1); @@ -917,7 +918,7 @@ TEST_CASE("outbound queue filtering", "[overlay][connections]") envs = herder.getSCP().getLatestMessagesSend(nextCheckpoint); auto checkpointFreq = - node->getHistoryManager().getCheckpointFrequency(); + HistoryManager::getCheckpointFrequency(node->getConfig()); for (auto& env : envs) { env.statement.slotIndex -= checkpointFreq; From 25c3ddd1534d600fde950e446502dee2ce69fbaa Mon Sep 17 00:00:00 2001 From: marta-lokhova Date: Wed, 18 Dec 2024 12:44:01 -0800 Subject: [PATCH 06/10] Support new experimental parallel ledger close config --- src/main/Config.cpp | 34 +++++++++++++++++++++++++++++++++- src/main/Config.h | 10 ++++++++++ src/test/test.cpp | 4 ++++ 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/src/main/Config.cpp b/src/main/Config.cpp index 0d13451681..19fef268fa 100644 --- a/src/main/Config.cpp +++ b/src/main/Config.cpp @@ -63,7 +63,8 @@ static const std::unordered_set TESTING_ONLY_OPTIONS = { "ARTIFICIALLY_SET_SURVEY_PHASE_DURATION_FOR_TESTING", "ARTIFICIALLY_DELAY_BUCKET_APPLICATION_FOR_TESTING", "ARTIFICIALLY_SLEEP_MAIN_THREAD_FOR_TESTING", - "ARTIFICIALLY_SKIP_CONNECTION_ADJUSTMENT_FOR_TESTING"}; + "ARTIFICIALLY_SKIP_CONNECTION_ADJUSTMENT_FOR_TESTING", + "ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING"}; // Options that should only be used for testing static const std::unordered_set TESTING_SUGGESTED_OPTIONS = { @@ -157,6 +158,7 @@ Config::Config() : NODE_SEED(SecretKey::random()) CATCHUP_COMPLETE = false; CATCHUP_RECENT = 0; BACKGROUND_OVERLAY_PROCESSING = true; + EXPERIMENTAL_PARALLEL_LEDGER_CLOSE = false; BUCKETLIST_DB_INDEX_PAGE_SIZE_EXPONENT = 14; // 2^14 == 16 kb BUCKETLIST_DB_INDEX_CUTOFF = 20; // 20 mb BUCKETLIST_DB_PERSIST_INDEX = true; @@ -183,6 +185,7 @@ Config::Config() : NODE_SEED(SecretKey::random()) ARTIFICIALLY_REPLAY_WITH_NEWEST_BUCKET_LOGIC_FOR_TESTING = false; ARTIFICIALLY_DELAY_BUCKET_APPLICATION_FOR_TESTING = std::chrono::seconds::zero(); + ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING = std::chrono::milliseconds(0); ALLOW_LOCALHOST_FOR_TESTING = false; USE_CONFIG_FOR_GENESIS = false; FAILURE_SAFETY = -1; @@ -1065,6 +1068,15 @@ Config::processConfig(std::shared_ptr t) }}, {"BACKGROUND_OVERLAY_PROCESSING", [&]() { BACKGROUND_OVERLAY_PROCESSING = readBool(item); }}, + {"EXPERIMENTAL_PARALLEL_LEDGER_CLOSE", + [&]() { + EXPERIMENTAL_PARALLEL_LEDGER_CLOSE = readBool(item); + }}, + {"ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING", + [&]() { + ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING = + std::chrono::milliseconds(readInt(item)); + }}, // https://github.com/stellar/stellar-core/issues/4581 {"BACKGROUND_EVICTION_SCAN", [&]() { @@ -1774,6 +1786,15 @@ Config::processConfig(std::shared_ptr t) throw std::runtime_error(msg); } + if (EXPERIMENTAL_PARALLEL_LEDGER_CLOSE && !parallelLedgerClose()) + { + std::string msg = + "Invalid configuration: EXPERIMENTAL_PARALLEL_LEDGER_CLOSE " + "does not support SQLite. Either switch to Postgres or set " + "EXPERIMENTAL_PARALLEL_LEDGER_CLOSE=false"; + throw std::runtime_error(msg); + } + // Check all loadgen distributions verifyLoadGenOpCountForTestingConfigs(); verifyLoadGenDistribution( @@ -2076,6 +2097,10 @@ Config::logBasicInfo() const "BACKGROUND_OVERLAY_PROCESSING=" "{}", BACKGROUND_OVERLAY_PROCESSING ? "true" : "false"); + LOG_INFO(DEFAULT_LOG, + "EXPERIMENTAL_PARALLEL_LEDGER_CLOSE=" + "{}", + EXPERIMENTAL_PARALLEL_LEDGER_CLOSE ? "true" : "false"); } void @@ -2370,6 +2395,13 @@ Config::modeStoresAnyHistory() const return MODE_STORES_HISTORY_LEDGERHEADERS || MODE_STORES_HISTORY_MISC; } +bool +Config::parallelLedgerClose() const +{ + return EXPERIMENTAL_PARALLEL_LEDGER_CLOSE && + !(DATABASE.value.find("sqlite3://") != std::string::npos); +} + void Config::setNoListen() { diff --git a/src/main/Config.h b/src/main/Config.h index 6e4e1e7418..d2798e2dd7 100644 --- a/src/main/Config.h +++ b/src/main/Config.h @@ -163,6 +163,7 @@ class Config : public std::enable_shared_from_this #endif TESTDB_BUCKET_DB_VOLATILE, TESTDB_BUCKET_DB_PERSISTENT, + TESTDB_BUCKET_DB_PERSISTENT_POSTGRES, TESTDB_MODES }; @@ -271,6 +272,11 @@ class Config : public std::enable_shared_from_this // This config should only be enabled when testing. std::chrono::microseconds ARTIFICIALLY_SLEEP_MAIN_THREAD_FOR_TESTING; + // A config parameter that forces stellar-core to sleep every time it closes + // a ledger if order to simulate slow application. This config should only + // be enabled when testing. + std::chrono::milliseconds ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING; + // Timeout before publishing externalized values to archive std::chrono::seconds PUBLISH_TO_ARCHIVE_DELAY; @@ -466,6 +472,9 @@ class Config : public std::enable_shared_from_this // Enable parallel processing of overlay operations (experimental) bool BACKGROUND_OVERLAY_PROCESSING; + // Enable parallel block application (experimental) + bool EXPERIMENTAL_PARALLEL_LEDGER_CLOSE; + // When set to true, BucketListDB indexes are persisted on-disk so that the // BucketList does not need to be reindexed on startup. Defaults to true. // This should only be set to false for testing purposes @@ -794,6 +803,7 @@ class Config : public std::enable_shared_from_this bool modeStoresAllHistory() const; bool modeStoresAnyHistory() const; void logBasicInfo() const; + bool parallelLedgerClose() const; void setNoListen(); void setNoPublish(); diff --git a/src/test/test.cpp b/src/test/test.cpp index 197e49fe50..3d46c55518 100644 --- a/src/test/test.cpp +++ b/src/test/test.cpp @@ -291,6 +291,10 @@ getTestConfig(int instanceNumber, Config::TestDbMode mode) thisConfig.DISABLE_XDR_FSYNC = false; break; #ifdef USE_POSTGRES + case Config::TESTDB_BUCKET_DB_PERSISTENT_POSTGRES: + dbname << "postgresql://dbname=test" << instanceNumber; + thisConfig.DISABLE_XDR_FSYNC = false; + break; case Config::TESTDB_POSTGRESQL: dbname << "postgresql://dbname=test" << instanceNumber; thisConfig.DISABLE_XDR_FSYNC = false; From a0e1413bd3ad570b3689b4339632a36c929a104b Mon Sep 17 00:00:00 2001 From: marta-lokhova Date: Wed, 18 Dec 2024 12:45:43 -0800 Subject: [PATCH 07/10] Support multiple sessions in Database class --- src/database/Database.cpp | 121 ++++++++++++++++++++++---------------- src/database/Database.h | 75 ++++++++++++++++++----- 2 files changed, 129 insertions(+), 67 deletions(-) diff --git a/src/database/Database.cpp b/src/database/Database.cpp index a60d252936..e6288ee6e3 100644 --- a/src/database/Database.cpp +++ b/src/database/Database.cpp @@ -60,10 +60,6 @@ using namespace std; bool Database::gDriversRegistered = false; -// smallest schema version supported -static unsigned long const MIN_SCHEMA_VERSION = 21; -static unsigned long const SCHEMA_VERSION = 24; - // These should always match our compiled version precisely, since we are // using a bundled version to get access to carray(). But in case someone // overrides that or our build configuration changes, it's nicer to get a @@ -184,6 +180,7 @@ Database::Database(Application& app) : mApp(app) , mQueryMeter( app.getMetrics().NewMeter({"database", "query", "exec"}, "query")) + , mSession("main") , mStatementsSize( app.getMetrics().NewCounter({"database", "memory", "statements"})) { @@ -198,17 +195,17 @@ Database::Database(Application& app) void Database::open() { - mSession.open(mApp.getConfig().DATABASE.value); - DatabaseConfigureSessionOp op(mSession); - doDatabaseTypeSpecificOperation(op); + mSession.session().open(mApp.getConfig().DATABASE.value); + DatabaseConfigureSessionOp op(mSession.session()); + doDatabaseTypeSpecificOperation(op, mSession); } void Database::applySchemaUpgrade(unsigned long vers) { - clearPreparedStatementCache(); + clearPreparedStatementCache(mSession); - soci::transaction tx(mSession); + soci::transaction tx(mSession.session()); switch (vers) { case 22: @@ -220,6 +217,7 @@ Database::applySchemaUpgrade(unsigned long vers) break; case 24: getSession() << "DROP TABLE IF EXISTS pubsub;"; + mApp.getPersistentState().migrateToSlotStateTable(); break; default: throw std::runtime_error("Unknown DB schema version"); @@ -263,44 +261,46 @@ Database::upgradeToCurrentSchema() void Database::maybeUpgradeToBucketListDB() { - if (mApp.getPersistentState().getState(PersistentState::kDBBackend) != + if (mApp.getPersistentState().getState(PersistentState::kDBBackend, + getSession()) != BucketIndex::DB_BACKEND_STATE) { CLOG_INFO(Database, "Upgrading to BucketListDB"); // Drop all LedgerEntry tables except for offers CLOG_INFO(Database, "Dropping table accounts"); - getSession() << "DROP TABLE IF EXISTS accounts;"; + getRawSession() << "DROP TABLE IF EXISTS accounts;"; CLOG_INFO(Database, "Dropping table signers"); - getSession() << "DROP TABLE IF EXISTS signers;"; + getRawSession() << "DROP TABLE IF EXISTS signers;"; CLOG_INFO(Database, "Dropping table claimablebalance"); - getSession() << "DROP TABLE IF EXISTS claimablebalance;"; + getRawSession() << "DROP TABLE IF EXISTS claimablebalance;"; CLOG_INFO(Database, "Dropping table configsettings"); - getSession() << "DROP TABLE IF EXISTS configsettings;"; + getRawSession() << "DROP TABLE IF EXISTS configsettings;"; CLOG_INFO(Database, "Dropping table contractcode"); - getSession() << "DROP TABLE IF EXISTS contractcode;"; + getRawSession() << "DROP TABLE IF EXISTS contractcode;"; CLOG_INFO(Database, "Dropping table contractdata"); - getSession() << "DROP TABLE IF EXISTS contractdata;"; + getRawSession() << "DROP TABLE IF EXISTS contractdata;"; CLOG_INFO(Database, "Dropping table accountdata"); - getSession() << "DROP TABLE IF EXISTS accountdata;"; + getRawSession() << "DROP TABLE IF EXISTS accountdata;"; CLOG_INFO(Database, "Dropping table liquiditypool"); - getSession() << "DROP TABLE IF EXISTS liquiditypool;"; + getRawSession() << "DROP TABLE IF EXISTS liquiditypool;"; CLOG_INFO(Database, "Dropping table trustlines"); - getSession() << "DROP TABLE IF EXISTS trustlines;"; + getRawSession() << "DROP TABLE IF EXISTS trustlines;"; CLOG_INFO(Database, "Dropping table ttl"); - getSession() << "DROP TABLE IF EXISTS ttl;"; + getRawSession() << "DROP TABLE IF EXISTS ttl;"; mApp.getPersistentState().setState(PersistentState::kDBBackend, - BucketIndex::DB_BACKEND_STATE); + BucketIndex::DB_BACKEND_STATE, + getSession()); } } @@ -308,17 +308,19 @@ void Database::putSchemaVersion(unsigned long vers) { mApp.getPersistentState().setState(PersistentState::kDatabaseSchema, - std::to_string(vers)); + std::to_string(vers), + mApp.getDatabase().getSession()); } unsigned long Database::getDBSchemaVersion() { + releaseAssert(threadIsMain()); unsigned long vers = 0; try { auto vstr = mApp.getPersistentState().getState( - PersistentState::kDatabaseSchema); + PersistentState::kDatabaseSchema, getSession()); vers = std::stoul(vstr); } catch (...) @@ -332,16 +334,9 @@ Database::getDBSchemaVersion() return vers; } -unsigned long -Database::getAppSchemaVersion() -{ - return SCHEMA_VERSION; -} - medida::TimerContext Database::getInsertTimer(std::string const& entityName) { - mEntityTypes.insert(entityName); mQueryMeter.Mark(); return mApp.getMetrics() .NewTimer({"database", "insert", entityName}) @@ -351,7 +346,6 @@ Database::getInsertTimer(std::string const& entityName) medida::TimerContext Database::getSelectTimer(std::string const& entityName) { - mEntityTypes.insert(entityName); mQueryMeter.Mark(); return mApp.getMetrics() .NewTimer({"database", "select", entityName}) @@ -361,7 +355,6 @@ Database::getSelectTimer(std::string const& entityName) medida::TimerContext Database::getDeleteTimer(std::string const& entityName) { - mEntityTypes.insert(entityName); mQueryMeter.Mark(); return mApp.getMetrics() .NewTimer({"database", "delete", entityName}) @@ -371,7 +364,6 @@ Database::getDeleteTimer(std::string const& entityName) medida::TimerContext Database::getUpdateTimer(std::string const& entityName) { - mEntityTypes.insert(entityName); mQueryMeter.Mark(); return mApp.getMetrics() .NewTimer({"database", "update", entityName}) @@ -381,7 +373,6 @@ Database::getUpdateTimer(std::string const& entityName) medida::TimerContext Database::getUpsertTimer(std::string const& entityName) { - mEntityTypes.insert(entityName); mQueryMeter.Mark(); return mApp.getMetrics() .NewTimer({"database", "upsert", entityName}) @@ -393,7 +384,8 @@ Database::setCurrentTransactionReadOnly() { if (!isSqlite()) { - auto prep = getPreparedStatement("SET TRANSACTION READ ONLY"); + auto prep = + getPreparedStatement("SET TRANSACTION READ ONLY", getSession()); auto& st = prep.statement(); st.define_and_bind(); st.execute(false); @@ -429,14 +421,31 @@ Database::canUsePool() const void Database::clearPreparedStatementCache() { + std::lock_guard lock(mStatementsMutex); + for (auto& c : mCaches) + { + for (auto& st : c.second) + { + st.second->clean_up(true); + } + } + mCaches.clear(); + mStatementsSize.set_count(0); +} + +void +Database::clearPreparedStatementCache(SessionWrapper& session) +{ + std::lock_guard lock(mStatementsMutex); + // Flush all prepared statements; in sqlite they represent open cursors // and will conflict with any DROP TABLE commands issued below - for (auto st : mStatements) + for (auto st : mCaches[session.getSessionName()]) { st.second->clean_up(true); + mStatementsSize.dec(); } - mStatements.clear(); - mStatementsSize.set_count(mStatements.size()); + mCaches.erase(session.getSessionName()); } void @@ -452,8 +461,9 @@ Database::initialize() int i; std::string databaseName, databaseLocation; soci::statement st = - (mSession.prepare << "PRAGMA database_list;", soci::into(i), - soci::into(databaseName), soci::into(databaseLocation)); + (getRawSession().prepare << "PRAGMA database_list;", + soci::into(i), soci::into(databaseName), + soci::into(databaseLocation)); st.execute(true); while (st.got_data()) { @@ -466,7 +476,7 @@ Database::initialize() } if (!fn.empty() && fs::exists(fn)) { - mSession.close(); + getRawSession().close(); std::remove(fn.c_str()); open(); } @@ -476,7 +486,7 @@ Database::initialize() // only time this section should be modified is when // consolidating changes found in applySchemaUpgrade here - Upgrades::dropAll(*this); + Upgrades::dropSupportUpgradeHistory(*this); OverlayManager::dropAll(*this); PersistentState::dropAll(*this); LedgerHeaderUtils::dropAll(*this); @@ -487,14 +497,13 @@ Database::initialize() HerderPersistence::dropAll(*this); BanManager::dropAll(*this); putSchemaVersion(MIN_SCHEMA_VERSION); - mApp.getHerderPersistence().createQuorumTrackingTable(mSession); LOG_INFO(DEFAULT_LOG, "* "); LOG_INFO(DEFAULT_LOG, "* The database has been initialized"); LOG_INFO(DEFAULT_LOG, "* "); } -soci::session& +SessionWrapper& Database::getSession() { // global session can only be used from the main thread @@ -502,6 +511,12 @@ Database::getSession() return mSession; } +soci::session& +Database::getRawSession() +{ + return getSession().session(); +} + soci::connection_pool& Database::getPool() { @@ -568,17 +583,21 @@ class SQLLogContext : NonCopyable }; StatementContext -Database::getPreparedStatement(std::string const& query) +Database::getPreparedStatement(std::string const& query, + SessionWrapper& session) { - auto i = mStatements.find(query); + std::lock_guard lock(mStatementsMutex); + + auto& cache = mCaches[session.getSessionName()]; + auto i = cache.find(query); std::shared_ptr p; - if (i == mStatements.end()) + if (i == cache.end()) { - p = std::make_shared(mSession); + p = std::make_shared(session.session()); p->alloc(); p->prepare(query); - mStatements.insert(std::make_pair(query, p)); - mStatementsSize.set_count(mStatements.size()); + cache.insert(std::make_pair(query, p)); + mStatementsSize.inc(); } else { @@ -591,6 +610,6 @@ Database::getPreparedStatement(std::string const& query) std::shared_ptr Database::captureAndLogSQL(std::string contextName) { - return make_shared(contextName, mSession); + return make_shared(contextName, mSession.session()); } } diff --git a/src/database/Database.h b/src/database/Database.h index fb6f694c2f..e58391b7a6 100644 --- a/src/database/Database.h +++ b/src/database/Database.h @@ -14,6 +14,7 @@ #include #include #include +#include #include namespace medida @@ -26,6 +27,12 @@ namespace stellar { class Application; class SQLLogContext; +using PreparedStatementCache = + std::map>; + +// smallest schema version supported +static constexpr unsigned long MIN_SCHEMA_VERSION = 21; +static constexpr unsigned long SCHEMA_VERSION = 24; /** * Helper class for borrowing a SOCI prepared statement handle into a local @@ -60,6 +67,33 @@ class StatementContext : NonCopyable } }; +class SessionWrapper : NonCopyable +{ + soci::session mSession; + std::string const mSessionName; + + public: + SessionWrapper(std::string sessionName) + : mSessionName(std::move(sessionName)) + { + } + SessionWrapper(std::string sessionName, soci::connection_pool& pool) + : mSession(pool), mSessionName(std::move(sessionName)) + { + } + + soci::session& + session() + { + return mSession; + } + std::string const& + getSessionName() const + { + return mSessionName; + } +}; + /** * Object that owns the database connection(s) that an application * uses to store the current ledger and other persistent state in. @@ -84,22 +118,30 @@ class StatementContext : NonCopyable * (SQL isolation level 'SERIALIZABLE' in Postgresql and Sqlite, neither of * which provide true serializability). */ + class Database : NonMovableOrCopyable { Application& mApp; medida::Meter& mQueryMeter; - soci::session mSession; + SessionWrapper mSession; + std::unique_ptr mPool; - std::map> mStatements; - medida::Counter& mStatementsSize; + // Cache key -> session name <> query + using PreparedStatementCache = + std::unordered_map>; + std::unordered_map mCaches; - std::set mEntityTypes; + medida::Counter& mStatementsSize; static bool gDriversRegistered; static void registerDrivers(); void applySchemaUpgrade(unsigned long vers); void open(); + // Save `vers` as schema version. + void putSchemaVersion(unsigned long vers); + + std::mutex mutable mStatementsMutex; public: // Instantiate object and connect to app.getConfig().DATABASE; @@ -118,11 +160,14 @@ class Database : NonMovableOrCopyable // Return a helper object that borrows, from the Database, a prepared // statement handle for the provided query. The prepared statement handle // is created if necessary before borrowing, and reset (unbound from data) - // when the statement context is destroyed. - StatementContext getPreparedStatement(std::string const& query); + // when the statement context is destroyed. Prepared statements caches are + // per DB session. + StatementContext getPreparedStatement(std::string const& query, + SessionWrapper& session); // Purge all cached prepared statements, closing their handles with the // database. + void clearPreparedStatementCache(SessionWrapper& session); void clearPreparedStatementCache(); // Return metric-gathering timers for various families of SQL operation. @@ -151,7 +196,8 @@ class Database : NonMovableOrCopyable // Call `op` back with the specific database backend subtype in use. template - T doDatabaseTypeSpecificOperation(DatabaseTypeSpecificOperation& op); + T doDatabaseTypeSpecificOperation(DatabaseTypeSpecificOperation& op, + SessionWrapper& session); // Return true if a connection pool is available for worker threads // to read from the database through, otherwise false. @@ -161,23 +207,20 @@ class Database : NonMovableOrCopyable // by the new-db command on stellar-core. void initialize(); - // Save `vers` as schema version. - void putSchemaVersion(unsigned long vers); - // Get current schema version in DB. unsigned long getDBSchemaVersion(); - // Get current schema version of running application. - unsigned long getAppSchemaVersion(); - // Check schema version and apply any upgrades if necessary. void upgradeToCurrentSchema(); void dropTxMetaIfExists(); void maybeUpgradeToBucketListDB(); + // Soci named session wrapper + SessionWrapper& getSession(); // Access the underlying SOCI session object - soci::session& getSession(); + // Use these to directly access the soci session object + soci::session& getRawSession(); // Access the optional SOCI connection pool available for worker // threads. Throws an error if !canUsePool(). @@ -210,9 +253,9 @@ doDatabaseTypeSpecificOperation(soci::session& session, template T Database::doDatabaseTypeSpecificOperation(DatabaseTypeSpecificOperation& op, - soci::session& session) + SessionWrapper& session) { - return stellar::doDatabaseTypeSpecificOperation(session, op); + return stellar::doDatabaseTypeSpecificOperation(session.session(), op); } template From cf636be3461caf436bbad921f19e5c872cad78ab Mon Sep 17 00:00:00 2001 From: marta-lokhova Date: Wed, 18 Dec 2024 15:35:56 -0800 Subject: [PATCH 08/10] Remove dead code now that in-memory mode is dropped --- src/catchup/ApplyBucketsWork.cpp | 14 ------------ src/catchup/ApplyBucketsWork.h | 1 - src/ledger/LedgerManager.h | 5 ----- src/ledger/LedgerManagerImpl.cpp | 38 -------------------------------- src/ledger/LedgerManagerImpl.h | 2 -- 5 files changed, 60 deletions(-) diff --git a/src/catchup/ApplyBucketsWork.cpp b/src/catchup/ApplyBucketsWork.cpp index 8358384018..2e30f83e64 100644 --- a/src/catchup/ApplyBucketsWork.cpp +++ b/src/catchup/ApplyBucketsWork.cpp @@ -207,20 +207,6 @@ ApplyBucketsWork::doWork() if (!mAssumeStateWork) { // Step 2: apply buckets. - if (mApp.getLedgerManager().rebuildingInMemoryState() && !mDelayChecked) - { - mDelayChecked = true; - auto delay = mApp.getConfig() - .ARTIFICIALLY_DELAY_BUCKET_APPLICATION_FOR_TESTING; - if (delay != std::chrono::seconds::zero()) - { - CLOG_INFO(History, "Delay bucket application by {} seconds", - delay.count()); - setupWaitingCallback(delay); - return State::WORK_WAITING; - } - } - auto isCurr = mBucketToApplyIndex % 2 == 0; if (mBucketApplicator) { diff --git a/src/catchup/ApplyBucketsWork.h b/src/catchup/ApplyBucketsWork.h index bdff18bed1..e2eab2c518 100644 --- a/src/catchup/ApplyBucketsWork.h +++ b/src/catchup/ApplyBucketsWork.h @@ -41,7 +41,6 @@ class ApplyBucketsWork : public Work std::unordered_set mSeenKeys; std::vector> mBucketsToApply; std::unique_ptr mBucketApplicator; - bool mDelayChecked{false}; BucketApplicator::Counters mCounters; bool const mIsApplyInvariantEnabled; diff --git a/src/ledger/LedgerManager.h b/src/ledger/LedgerManager.h index f85ef02161..4a181701c4 100644 --- a/src/ledger/LedgerManager.h +++ b/src/ledger/LedgerManager.h @@ -161,11 +161,6 @@ class LedgerManager // and restart merges virtual void loadLastKnownLedger(bool restoreBucketlist) = 0; - // Return true if core is currently rebuilding in-memory state via local - // catchup - virtual bool rebuildingInMemoryState() = 0; - virtual void setupInMemoryStateRebuild() = 0; - // Forcibly switch the application into catchup mode, treating `toLedger` // as the destination ledger number and count as the number of past ledgers // that should be replayed. Normally this happens automatically when diff --git a/src/ledger/LedgerManagerImpl.cpp b/src/ledger/LedgerManagerImpl.cpp index ca0b0bf949..d62f5888bb 100644 --- a/src/ledger/LedgerManagerImpl.cpp +++ b/src/ledger/LedgerManagerImpl.cpp @@ -178,20 +178,6 @@ LedgerManagerImpl::setState(State s) { mApp.getCatchupManager().logAndUpdateCatchupStatus(true); } - - if (mState == LM_CATCHING_UP_STATE && !mStartCatchup) - { - mStartCatchup = std::make_unique( - mApp.getClock().now()); - } - else if (mState == LM_SYNCED_STATE && mStartCatchup) - { - std::chrono::nanoseconds duration = - mApp.getClock().now() - *mStartCatchup; - mCatchupDuration.Update(duration); - CLOG_DEBUG(Perf, "Caught up to the network in {} seconds", - std::chrono::duration(duration).count()); - } } } @@ -398,30 +384,6 @@ LedgerManagerImpl::loadLastKnownLedger(bool restoreBucketlist) } } -bool -LedgerManagerImpl::rebuildingInMemoryState() -{ - return mRebuildInMemoryState; -} - -void -LedgerManagerImpl::setupInMemoryStateRebuild() -{ - if (!mRebuildInMemoryState) - { - LedgerHeader lh; - HistoryArchiveState has; - auto& ps = mApp.getPersistentState(); - ps.setState(PersistentState::kLastClosedLedger, - binToHex(xdrSha256(lh))); - ps.setState(PersistentState::kHistoryArchiveState, has.toString()); - ps.setState(PersistentState::kLastSCPData, ""); - ps.setState(PersistentState::kLastSCPDataXDR, ""); - ps.setState(PersistentState::kLedgerUpgrades, ""); - mRebuildInMemoryState = true; - } -} - Database& LedgerManagerImpl::getDatabase() { diff --git a/src/ledger/LedgerManagerImpl.h b/src/ledger/LedgerManagerImpl.h index 5408c917c8..cb96a14e67 100644 --- a/src/ledger/LedgerManagerImpl.h +++ b/src/ledger/LedgerManagerImpl.h @@ -186,8 +186,6 @@ class LedgerManagerImpl : public LedgerManager void startNewLedger(LedgerHeader const& genesisLedger); void startNewLedger() override; void loadLastKnownLedger(bool restoreBucketlist) override; - virtual bool rebuildingInMemoryState() override; - virtual void setupInMemoryStateRebuild() override; LedgerHeaderHistoryEntry const& getLastClosedLedgerHeader() const override; From f5cde0eb7bcd8ec4fd4d56e810b3f9ac509d552d Mon Sep 17 00:00:00 2001 From: marta-lokhova Date: Mon, 6 Jan 2025 12:38:26 -0800 Subject: [PATCH 09/10] Implement parallel ledger close, off by default --- src/bucket/BucketListBase.cpp | 2 - src/bucket/BucketListSnapshotBase.cpp | 2 - src/bucket/BucketManager.cpp | 76 ++- src/bucket/BucketManager.h | 21 +- src/bucket/BucketSnapshotManager.cpp | 3 - src/bucket/LiveBucketList.h | 3 + src/bucket/SearchableBucketList.cpp | 2 - src/bucket/test/BucketListTests.cpp | 2 +- src/bucket/test/BucketManagerTests.cpp | 26 +- src/bucket/test/BucketTestUtils.cpp | 8 +- src/catchup/ApplyCheckpointWork.cpp | 8 +- src/catchup/ApplyLedgerWork.cpp | 3 +- src/catchup/CatchupManager.h | 11 +- src/catchup/CatchupManagerImpl.cpp | 204 ++++++-- src/catchup/CatchupManagerImpl.h | 35 +- src/catchup/CatchupWork.cpp | 15 +- src/catchup/ReplayDebugMetaWork.cpp | 3 +- src/database/Database.cpp | 2 +- src/database/test/DatabaseTests.cpp | 13 +- src/herder/Herder.h | 5 +- src/herder/HerderImpl.cpp | 71 ++- src/herder/HerderImpl.h | 5 +- src/herder/HerderPersistence.h | 1 - src/herder/HerderPersistenceImpl.cpp | 76 +-- src/herder/HerderSCPDriver.cpp | 31 +- src/herder/HerderSCPDriver.h | 1 + src/herder/PendingEnvelopes.cpp | 4 +- src/herder/PendingEnvelopes.h | 4 + src/herder/TxSetFrame.cpp | 24 +- src/herder/Upgrades.cpp | 20 +- src/herder/test/HerderTests.cpp | 116 +++-- src/history/HistoryManager.h | 6 +- src/history/HistoryManagerImpl.cpp | 13 +- src/history/HistoryManagerImpl.h | 6 +- .../BucketListIsConsistentWithDatabase.cpp | 5 +- src/ledger/LedgerHeaderUtils.cpp | 38 +- src/ledger/LedgerHeaderUtils.h | 5 +- src/ledger/LedgerManager.h | 22 +- src/ledger/LedgerManagerImpl.cpp | 478 +++++++++++------- src/ledger/LedgerManagerImpl.h | 59 ++- src/ledger/test/InMemoryLedgerTxnRoot.cpp | 2 +- src/main/AppConnector.cpp | 21 +- src/main/AppConnector.h | 7 +- src/main/Application.cpp | 8 +- src/main/Application.h | 3 + src/main/ApplicationImpl.cpp | 85 +++- src/main/ApplicationImpl.h | 12 +- src/main/ApplicationUtils.cpp | 20 +- src/main/Maintainer.cpp | 4 +- src/main/test/ApplicationUtilsTests.cpp | 4 +- src/overlay/BanManagerImpl.cpp | 21 +- src/overlay/PeerManager.cpp | 24 +- src/overlay/test/OverlayManagerTests.cpp | 4 +- src/simulation/CoreTests.cpp | 3 +- src/simulation/LoadGenerator.cpp | 7 + src/simulation/TxGenerator.cpp | 2 +- src/test/FuzzerImpl.cpp | 10 +- src/test/TestUtils.cpp | 24 + src/test/TestUtils.h | 6 +- src/test/TxTests.cpp | 18 +- .../ExtendFootprintTTLOpFrame.cpp | 3 +- .../InvokeHostFunctionOpFrame.cpp | 3 +- src/transactions/OperationFrame.cpp | 21 +- src/transactions/OperationFrame.h | 1 + src/transactions/RestoreFootprintOpFrame.cpp | 3 +- src/transactions/TransactionFrame.cpp | 10 +- src/transactions/TransactionSQL.cpp | 9 +- src/transactions/test/SorobanTxTestUtils.cpp | 2 +- 68 files changed, 1140 insertions(+), 626 deletions(-) diff --git a/src/bucket/BucketListBase.cpp b/src/bucket/BucketListBase.cpp index 86daf45421..647e65d07a 100644 --- a/src/bucket/BucketListBase.cpp +++ b/src/bucket/BucketListBase.cpp @@ -57,7 +57,6 @@ template void BucketLevel::setNext(FutureBucket const& fb) { - releaseAssert(threadIsMain()); mNextCurr = fb; } @@ -79,7 +78,6 @@ template void BucketLevel::setCurr(std::shared_ptr b) { - releaseAssert(threadIsMain()); mNextCurr.clear(); mCurr = b; } diff --git a/src/bucket/BucketListSnapshotBase.cpp b/src/bucket/BucketListSnapshotBase.cpp index df4511b28b..cf2504fcfb 100644 --- a/src/bucket/BucketListSnapshotBase.cpp +++ b/src/bucket/BucketListSnapshotBase.cpp @@ -19,8 +19,6 @@ BucketListSnapshot::BucketListSnapshot( BucketListBase const& bl, LedgerHeader header) : mHeader(std::move(header)) { - releaseAssert(threadIsMain()); - for (uint32_t i = 0; i < BucketListBase::kNumLevels; ++i) { auto const& level = bl.getLevel(i); diff --git a/src/bucket/BucketManager.cpp b/src/bucket/BucketManager.cpp index dad3dcf7de..209d828caa 100644 --- a/src/bucket/BucketManager.cpp +++ b/src/bucket/BucketManager.cpp @@ -62,6 +62,7 @@ void BucketManager::initialize() { ZoneScoped; + releaseAssert(threadIsMain()); std::string d = mConfig.BUCKET_DIR_PATH; if (!fs::exists(d)) @@ -729,7 +730,7 @@ BucketManager::getBucketListReferencedBuckets() const } std::set -BucketManager::getAllReferencedBuckets() const +BucketManager::getAllReferencedBuckets(HistoryArchiveState const& has) const { ZoneScoped; auto referenced = getBucketListReferencedBuckets(); @@ -740,8 +741,7 @@ BucketManager::getAllReferencedBuckets() const // retain any bucket referenced by the last closed ledger as recorded in the // database (as merges complete, the bucket list drifts from that state) - auto lclHas = mApp.getLedgerManager().getLastClosedLedgerHAS(); - auto lclBuckets = lclHas.allBuckets(); + auto lclBuckets = has.allBuckets(); for (auto const& h : lclBuckets) { auto rit = referenced.emplace(hexToBin256(h)); @@ -752,39 +752,38 @@ BucketManager::getAllReferencedBuckets() const } // retain buckets that are referenced by a state in the publish queue. - auto pub = mApp.getHistoryManager().getBucketsReferencedByPublishQueue(); + for (auto const& h : + HistoryManager::getBucketsReferencedByPublishQueue(mApp.getConfig())) { - for (auto const& h : pub) + auto rhash = hexToBin256(h); + auto rit = referenced.emplace(rhash); + if (rit.second) { - auto rhash = hexToBin256(h); - auto rit = referenced.emplace(rhash); - if (rit.second) - { - CLOG_TRACE(Bucket, "{} referenced by publish queue", h); - - // Project referenced bucket `rhash` -- which might be a merge - // input captured before a merge finished -- through our weak - // map of merge input/output relationships, to find any outputs - // we'll want to retain in order to resynthesize the merge in - // the future, rather than re-run it. - mFinishedMerges.getOutputsUsingInput(rhash, referenced); - } + CLOG_TRACE(Bucket, "{} referenced by publish queue", h); + + // Project referenced bucket `rhash` -- which might be a merge + // input captured before a merge finished -- through our weak + // map of merge input/output relationships, to find any outputs + // we'll want to retain in order to resynthesize the merge in + // the future, rather than re-run it. + mFinishedMerges.getOutputsUsingInput(rhash, referenced); } } return referenced; } void -BucketManager::cleanupStaleFiles() +BucketManager::cleanupStaleFiles(HistoryArchiveState const& has) { ZoneScoped; + releaseAssert(threadIsMain()); if (mConfig.DISABLE_BUCKET_GC) { return; } std::lock_guard lock(mBucketMutex); - auto referenced = getAllReferencedBuckets(); + auto referenced = getAllReferencedBuckets(has); std::transform(std::begin(mSharedLiveBuckets), std::end(mSharedLiveBuckets), std::inserter(referenced, std::end(referenced)), [](std::pair> const& p) { @@ -818,11 +817,11 @@ BucketManager::cleanupStaleFiles() } void -BucketManager::forgetUnreferencedBuckets() +BucketManager::forgetUnreferencedBuckets(HistoryArchiveState const& has) { ZoneScoped; std::lock_guard lock(mBucketMutex); - auto referenced = getAllReferencedBuckets(); + auto referenced = getAllReferencedBuckets(has); auto blReferenced = getBucketListReferencedBuckets(); auto bucketMapLoop = [&](auto& bucketMap, auto& futureMap) { @@ -867,7 +866,7 @@ BucketManager::forgetUnreferencedBuckets() Bucket, "BucketManager::forgetUnreferencedBuckets dropping {}", filename); - if (!filename.empty() && !mApp.getConfig().DISABLE_BUCKET_GC) + if (!filename.empty() && !mConfig.DISABLE_BUCKET_GC) { CLOG_TRACE(Bucket, "removing bucket file: {}", filename); std::filesystem::remove(filename); @@ -1049,7 +1048,8 @@ BucketManager::maybeSetIndex(std::shared_ptr b, void BucketManager::startBackgroundEvictionScan(uint32_t ledgerSeq, - uint32_t ledgerVers) + uint32_t ledgerVers, + SorobanNetworkConfig const& cfg) { releaseAssert(mSnapshotManager); releaseAssert(!mEvictionFuture.valid()); @@ -1057,7 +1057,6 @@ BucketManager::startBackgroundEvictionScan(uint32_t ledgerSeq, auto searchableBL = mSnapshotManager->copySearchableLiveBucketListSnapshot(); - auto const& cfg = mApp.getLedgerManager().getSorobanNetworkConfigForApply(); auto const& sas = cfg.stateArchivalSettings(); using task_t = std::packaged_task; @@ -1078,31 +1077,27 @@ BucketManager::startBackgroundEvictionScan(uint32_t ledgerSeq, } EvictedStateVectors -BucketManager::resolveBackgroundEvictionScan(AbstractLedgerTxn& ltx, - uint32_t ledgerSeq, - LedgerKeySet const& modifiedKeys, - uint32_t ledgerVers) +BucketManager::resolveBackgroundEvictionScan( + AbstractLedgerTxn& ltx, uint32_t ledgerSeq, + LedgerKeySet const& modifiedKeys, uint32_t ledgerVers, + SorobanNetworkConfig& networkConfig) { ZoneScoped; - releaseAssert(threadIsMain()); releaseAssert(mEvictionStatistics); if (!mEvictionFuture.valid()) { - startBackgroundEvictionScan(ledgerSeq, ledgerVers); + startBackgroundEvictionScan(ledgerSeq, ledgerVers, networkConfig); } auto evictionCandidates = mEvictionFuture.get(); - auto const& networkConfig = - mApp.getLedgerManager().getSorobanNetworkConfigForApply(); - // If eviction related settings changed during the ledger, we have to // restart the scan if (!evictionCandidates.isValid(ledgerSeq, networkConfig.stateArchivalSettings())) { - startBackgroundEvictionScan(ledgerSeq, ledgerVers); + startBackgroundEvictionScan(ledgerSeq, ledgerVers, networkConfig); evictionCandidates = mEvictionFuture.get(); } @@ -1229,6 +1224,7 @@ BucketManager::assumeState(HistoryArchiveState const& has, uint32_t maxProtocolVersion, bool restartMerges) { ZoneScoped; + releaseAssert(threadIsMain()); releaseAssertOrThrow(mConfig.MODE_ENABLES_BUCKETLIST); // TODO: Assume archival bucket state @@ -1277,7 +1273,7 @@ BucketManager::assumeState(HistoryArchiveState const& has, mLiveBucketList->restartMerges(mApp, maxProtocolVersion, has.currentLedger); } - cleanupStaleFiles(); + cleanupStaleFiles(has); } void @@ -1378,7 +1374,7 @@ std::shared_ptr BucketManager::mergeBuckets(HistoryArchiveState const& has) { ZoneScoped; - + releaseAssert(threadIsMain()); std::map ledgerMap = loadCompleteLedgerState(has); BucketMetadata meta; MergeCounters mc; @@ -1568,9 +1564,11 @@ BucketManager::visitLedgerEntries( } std::shared_ptr -BucketManager::scheduleVerifyReferencedBucketsWork() +BucketManager::scheduleVerifyReferencedBucketsWork( + HistoryArchiveState const& has) { - std::set hashes = getAllReferencedBuckets(); + releaseAssert(threadIsMain()); + std::set hashes = getAllReferencedBuckets(has); std::vector> seq; for (auto const& h : hashes) { diff --git a/src/bucket/BucketManager.h b/src/bucket/BucketManager.h index 9f9724de78..d17517838f 100644 --- a/src/bucket/BucketManager.h +++ b/src/bucket/BucketManager.h @@ -70,6 +70,11 @@ class BucketManager : NonMovableOrCopyable static std::string const kLockFilename; + // NB: ideally, BucketManager should have no access to mApp, as it's too + // dangerous in the context of parallel application. BucketManager is quite + // bloated, with lots of legacy code, so to ensure safety, annotate all + // functions using mApp with `releaseAssert(threadIsMain())` and avoid + // accessing mApp in the background. Application& mApp; std::unique_ptr mLiveBucketList; std::unique_ptr mHotArchiveBucketList; @@ -124,7 +129,7 @@ class BucketManager : NonMovableOrCopyable std::atomic mIsShutdown{false}; - void cleanupStaleFiles(); + void cleanupStaleFiles(HistoryArchiveState const& has); void deleteTmpDirAndUnlockBucketDir(); void deleteEntireBucketDir(); @@ -260,7 +265,7 @@ class BucketManager : NonMovableOrCopyable // not immediately cause the buckets to delete themselves, if someone else // is using them via a shared_ptr<>, but the BucketManager will no longer // independently keep them alive. - void forgetUnreferencedBuckets(); + void forgetUnreferencedBuckets(HistoryArchiveState const& has); // Feed a new batch of entries to the bucket list. This interface expects to // be given separate init (created) and live (updated) entry vectors. The @@ -290,7 +295,8 @@ class BucketManager : NonMovableOrCopyable // Scans BucketList for non-live entries to evict starting at the entry // pointed to by EvictionIterator. Evicts until `maxEntriesToEvict` entries // have been evicted or maxEvictionScanSize bytes have been scanned. - void startBackgroundEvictionScan(uint32_t ledgerSeq, uint32_t ledgerVers); + void startBackgroundEvictionScan(uint32_t ledgerSeq, uint32_t ledgerVers, + SorobanNetworkConfig const& cfg); // Returns a pair of vectors representing entries evicted this ledger, where // the first vector constains all deleted keys (TTL and temporary), and @@ -300,7 +306,8 @@ class BucketManager : NonMovableOrCopyable EvictedStateVectors resolveBackgroundEvictionScan(AbstractLedgerTxn& ltx, uint32_t ledgerSeq, LedgerKeySet const& modifiedKeys, - uint32_t ledgerVers); + uint32_t ledgerVers, + SorobanNetworkConfig& networkConfig); medida::Meter& getBloomMissMeter() const; medida::Meter& getBloomLookupMeter() const; @@ -325,7 +332,8 @@ class BucketManager : NonMovableOrCopyable // Return the set of buckets referenced by the BucketList, LCL HAS, // and publish queue. - std::set getAllReferencedBuckets() const; + std::set + getAllReferencedBuckets(HistoryArchiveState const& has) const; // Check for missing bucket files that would prevent `assumeState` from // succeeding @@ -382,7 +390,8 @@ class BucketManager : NonMovableOrCopyable // Schedule a Work class that verifies the hashes of all referenced buckets // on background threads. - std::shared_ptr scheduleVerifyReferencedBucketsWork(); + std::shared_ptr + scheduleVerifyReferencedBucketsWork(HistoryArchiveState const& has); Config const& getConfig() const; diff --git a/src/bucket/BucketSnapshotManager.cpp b/src/bucket/BucketSnapshotManager.cpp index 0dffcc31ea..aaa85a3e44 100644 --- a/src/bucket/BucketSnapshotManager.cpp +++ b/src/bucket/BucketSnapshotManager.cpp @@ -98,7 +98,6 @@ BucketSnapshotManager::recordBulkLoadMetrics(std::string const& label, { // For now, only keep metrics for the main thread. We can decide on what // metrics make sense when more background services are added later. - releaseAssert(threadIsMain()); if (numEntries != 0) { @@ -153,8 +152,6 @@ BucketSnapshotManager::updateCurrentSnapshot( SnapshotPtrT&& liveSnapshot, SnapshotPtrT&& hotArchiveSnapshot) { - releaseAssert(threadIsMain()); - auto updateSnapshot = [numHistoricalSnapshots = mNumHistoricalSnapshots]( auto& currentSnapshot, auto& historicalSnapshots, auto&& newSnapshot) { diff --git a/src/bucket/LiveBucketList.h b/src/bucket/LiveBucketList.h index 0f2a6ac268..688f0acd22 100644 --- a/src/bucket/LiveBucketList.h +++ b/src/bucket/LiveBucketList.h @@ -9,6 +9,9 @@ namespace stellar { + +class SorobanNetworkConfig; + // The LiveBucketList stores the current canonical state of the ledger. It is // made up of LiveBucket buckets, which in turn store individual entries of type // BucketEntry. When an entry is "evicted" from the ledger, it is removed from diff --git a/src/bucket/SearchableBucketList.cpp b/src/bucket/SearchableBucketList.cpp index 60c66c31a4..d225a7c732 100644 --- a/src/bucket/SearchableBucketList.cpp +++ b/src/bucket/SearchableBucketList.cpp @@ -109,7 +109,6 @@ SearchableLiveBucketListSnapshot::loadPoolShareTrustLinesByAccountAndAsset( ZoneScoped; // This query should only be called during TX apply - releaseAssert(threadIsMain()); releaseAssert(mSnapshot); LedgerKeySet trustlinesToLoad; @@ -154,7 +153,6 @@ SearchableLiveBucketListSnapshot::loadInflationWinners(size_t maxWinners, // This is a legacy query, should only be called by main thread during // catchup - releaseAssert(threadIsMain()); auto timer = mSnapshotManager.recordBulkLoadMetrics("inflationWinners", 0) .TimeScope(); diff --git a/src/bucket/test/BucketListTests.cpp b/src/bucket/test/BucketListTests.cpp index 0a5b545097..5dfb6572f6 100644 --- a/src/bucket/test/BucketListTests.cpp +++ b/src/bucket/test/BucketListTests.cpp @@ -869,7 +869,7 @@ TEST_CASE_VERSIONS("network config snapshots BucketList size", "[bucketlist]") LedgerManagerForBucketTests& lm = app->getLedgerManager(); auto& networkConfig = - app->getLedgerManager().getSorobanNetworkConfigReadOnly(); + app->getLedgerManager().getMutableSorobanNetworkConfig(); uint32_t windowSize = networkConfig.stateArchivalSettings() .bucketListSizeWindowSampleSize; diff --git a/src/bucket/test/BucketManagerTests.cpp b/src/bucket/test/BucketManagerTests.cpp index 8eb393c789..5c22b3b997 100644 --- a/src/bucket/test/BucketManagerTests.cpp +++ b/src/bucket/test/BucketManagerTests.cpp @@ -237,7 +237,8 @@ TEST_CASE_VERSIONS("bucketmanager ownership", "[bucket][bucketmanager]") CHECK(fs::exists(indexFilename)); b.reset(); - app->getBucketManager().forgetUnreferencedBuckets(); + app->getBucketManager().forgetUnreferencedBuckets( + app->getLedgerManager().getLastClosedLedgerHAS()); CHECK(!fs::exists(filename)); CHECK(!fs::exists(indexFilename)); }; @@ -260,7 +261,8 @@ TEST_CASE_VERSIONS("bucketmanager ownership", "[bucket][bucketmanager]") // This shouldn't change if we forget unreferenced buckets since // it's referenced by bucketlist. - app->getBucketManager().forgetUnreferencedBuckets(); + app->getBucketManager().forgetUnreferencedBuckets( + app->getLedgerManager().getLastClosedLedgerHAS()); CHECK(b1.use_count() == 3); // But if we mutate the curr bucket of the bucketlist, it should. @@ -343,7 +345,8 @@ TEST_CASE_VERSIONS("bucketmanager reattach to finished merge", LedgerTestUtils::generateValidLedgerEntriesWithExclusions( {CONFIG_SETTING}, 10), {}); - bm.forgetUnreferencedBuckets(); + bm.forgetUnreferencedBuckets( + app->getLedgerManager().getLastClosedLedgerHAS()); } while (!LiveBucketList::levelShouldSpill(ledger, level - 1)); // Check that the merge on level isn't committed (we're in @@ -433,7 +436,8 @@ TEST_CASE_VERSIONS("bucketmanager reattach to running merge", {CONFIG_SETTING}, 100), {}); - bm.forgetUnreferencedBuckets(); + bm.forgetUnreferencedBuckets( + app->getLedgerManager().getLastClosedLedgerHAS()); HistoryArchiveState has(ledger, bl, app->getConfig().NETWORK_PASSPHRASE); @@ -517,8 +521,10 @@ TEST_CASE("bucketmanager do not leak empty-merge futures", bl.resolveAnyReadyFutures(); std::this_thread::sleep_for(std::chrono::seconds(1)); } - bm.forgetUnreferencedBuckets(); - auto bmRefBuckets = bm.getAllReferencedBuckets(); + bm.forgetUnreferencedBuckets( + app->getLedgerManager().getLastClosedLedgerHAS()); + auto bmRefBuckets = bm.getAllReferencedBuckets( + app->getLedgerManager().getLastClosedLedgerHAS()); auto bmDirBuckets = bm.getBucketHashesInBucketDirForTesting(); // Remove the 0 bucket in case it's "referenced"; it's never a file. @@ -574,16 +580,18 @@ TEST_CASE_VERSIONS( {CONFIG_SETTING}, 100), {}); clock.crank(false); - bm.forgetUnreferencedBuckets(); + bm.forgetUnreferencedBuckets( + app->getLedgerManager().getLastClosedLedgerHAS()); } // We should have published nothing and have the first // checkpoint still queued. REQUIRE(hm.getPublishSuccessCount() == 0); - REQUIRE(hm.getMinLedgerQueuedToPublish() == 7); + REQUIRE(HistoryManager::getMinLedgerQueuedToPublish(app->getConfig()) == + 7); auto oldReattachments = bm.readMergeCounters().mFinishedMergeReattachments; - auto HASs = hm.getPublishQueueStates(); + auto HASs = HistoryManager::getPublishQueueStates(app->getConfig()); REQUIRE(HASs.size() == 5); for (auto& has : HASs) { diff --git a/src/bucket/test/BucketTestUtils.cpp b/src/bucket/test/BucketTestUtils.cpp index e56fe35b2e..34122c63d2 100644 --- a/src/bucket/test/BucketTestUtils.cpp +++ b/src/bucket/test/BucketTestUtils.cpp @@ -101,6 +101,10 @@ closeLedger(Application& app, std::optional skToSignValue, app.getHerder().externalizeValue(TxSetXDRFrame::makeEmpty(lcl), ledgerNum, lcl.header.scpValue.closeTime, upgrades, skToSignValue); + testutil::crankUntil( + app, + [&lm, ledgerNum]() { return lm.getLastClosedLedgerNum() == ledgerNum; }, + std::chrono::seconds(10)); return lm.getLastClosedLedgerHeader().hash; } @@ -232,7 +236,9 @@ LedgerManagerForBucketTests::transferLedgerEntriesToBucketList( auto evictedState = mApp.getBucketManager().resolveBackgroundEvictionScan( - ltxEvictions, lh.ledgerSeq, keys, initialLedgerVers); + ltxEvictions, lh.ledgerSeq, keys, initialLedgerVers, + mApp.getLedgerManager() + .getMutableSorobanNetworkConfig()); if (protocolVersionStartsFrom( initialLedgerVers, diff --git a/src/catchup/ApplyCheckpointWork.cpp b/src/catchup/ApplyCheckpointWork.cpp index 8cdafece84..ed950a2672 100644 --- a/src/catchup/ApplyCheckpointWork.cpp +++ b/src/catchup/ApplyCheckpointWork.cpp @@ -31,13 +31,13 @@ ApplyCheckpointWork::ApplyCheckpointWork(Application& app, BasicWork::RETRY_NEVER) , mDownloadDir(downloadDir) , mLedgerRange(range) - , mCheckpoint( - app.getHistoryManager().checkpointContainingLedger(range.mFirst)) + , mCheckpoint(HistoryManager::checkpointContainingLedger(range.mFirst, + app.getConfig())) , mOnFailure(cb) { // Ledger range check to enforce application of a single checkpoint - auto const& hm = mApp.getHistoryManager(); - auto low = hm.firstLedgerInCheckpointContaining(mCheckpoint); + auto low = HistoryManager::firstLedgerInCheckpointContaining( + mCheckpoint, mApp.getConfig()); if (mLedgerRange.mFirst != low) { throw std::runtime_error( diff --git a/src/catchup/ApplyLedgerWork.cpp b/src/catchup/ApplyLedgerWork.cpp index 5d910f8bf5..bba96df816 100644 --- a/src/catchup/ApplyLedgerWork.cpp +++ b/src/catchup/ApplyLedgerWork.cpp @@ -23,7 +23,8 @@ BasicWork::State ApplyLedgerWork::onRun() { ZoneScoped; - mApp.getLedgerManager().closeLedger(mLedgerCloseData); + mApp.getLedgerManager().closeLedger(mLedgerCloseData, + /* externalize */ false); return BasicWork::State::WORK_SUCCESS; } diff --git a/src/catchup/CatchupManager.h b/src/catchup/CatchupManager.h index a6f5344880..f0b38d0ec2 100644 --- a/src/catchup/CatchupManager.h +++ b/src/catchup/CatchupManager.h @@ -49,11 +49,18 @@ class CatchupManager friend CatchupMetrics operator-(CatchupMetrics const& x, CatchupMetrics const& y); }; + + enum class ProcessLedgerResult + { + PROCESSED_ALL_LEDGERS_SEQUENTIALLY, + WAIT_TO_APPLY_BUFFERED_OR_CATCHUP + }; static std::unique_ptr create(Application& app); // Process ledgers that could not be applied, and determine if catchup // should run - virtual void processLedger(LedgerCloseData const& ledgerData) = 0; + virtual ProcessLedgerResult processLedger(LedgerCloseData const& ledgerData, + bool isLatestSlot) = 0; // Forcibly switch the application into catchup mode, treating `toLedger` // as the destination ledger number and count as the number of past ledgers @@ -103,6 +110,8 @@ class CatchupManager // heard of. virtual uint32_t getLargestLedgerSeqHeard() const = 0; + virtual uint32_t getMaxScheduledToApply() = 0; + // Ensure any metrics that are "current state" gauge-like counters reflect // the current reality as best as possible. virtual void syncMetrics() = 0; diff --git a/src/catchup/CatchupManagerImpl.cpp b/src/catchup/CatchupManagerImpl.cpp index 9c101066cf..98985df685 100644 --- a/src/catchup/CatchupManagerImpl.cpp +++ b/src/catchup/CatchupManagerImpl.cpp @@ -8,6 +8,7 @@ #include "util/asio.h" #include "catchup/CatchupManagerImpl.h" #include "catchup/CatchupConfiguration.h" +#include "herder/Herder.h" #include "history/FileTransferInfo.h" #include "ledger/LedgerManager.h" #include "main/Application.h" @@ -23,6 +24,8 @@ namespace stellar { +const uint32_t CatchupManagerImpl::MAX_EXTERNALIZE_LEDGER_APPLY_DRIFT = 12; + CatchupManagerImpl::CatchupMetrics::CatchupMetrics() : mHistoryArchiveStatesDownloaded{0} , mCheckpointsDownloaded{0} @@ -72,7 +75,8 @@ findFirstCheckpoint(T begin, T end, HistoryManager const& hm) { return std::find_if(begin, end, [&hm](std::pair const& kvp) { - return hm.isFirstLedgerInCheckpoint(kvp.first); + return HistoryManager::isFirstLedgerInCheckpoint( + kvp.first, hm.getConfig()); }); } @@ -89,6 +93,7 @@ CatchupManagerImpl::CatchupManagerImpl(Application& app) app.getMetrics().NewCounter({"ledger", "memory", "queued-ledgers"})) , mLargestLedgerSeqHeard(0) { + releaseAssert(threadIsMain()); } CatchupManagerImpl::~CatchupManagerImpl() @@ -98,14 +103,19 @@ CatchupManagerImpl::~CatchupManagerImpl() uint32_t CatchupManagerImpl::getCatchupCount() { + releaseAssert(threadIsMain()); return mApp.getConfig().CATCHUP_COMPLETE ? std::numeric_limits::max() : mApp.getConfig().CATCHUP_RECENT; } -void -CatchupManagerImpl::processLedger(LedgerCloseData const& ledgerData) +CatchupManager::ProcessLedgerResult +CatchupManagerImpl::processLedger(LedgerCloseData const& ledgerData, + bool isLatestSlot) { + releaseAssert(threadIsMain()); + updateLastQueuedToApply(); + ZoneScoped; if (catchupWorkIsDone()) { @@ -119,26 +129,33 @@ CatchupManagerImpl::processLedger(LedgerCloseData const& ledgerData) logAndUpdateCatchupStatus(true); } + // Always skip old ledgers uint32_t lastReceivedLedgerSeq = ledgerData.getLedgerSeq(); + if (lastReceivedLedgerSeq <= *mLastQueuedToApply) + { + // If last queued to apply is already at-or-ahead of the ledger we just + // received from the network, we're up to date. Return early, nothing to + // do. + CLOG_INFO( + Ledger, + "Skipping close ledger: local state is {}, more recent than {}", + *mLastQueuedToApply, ledgerData.getLedgerSeq()); + return ProcessLedgerResult::PROCESSED_ALL_LEDGERS_SEQUENTIALLY; + } + + // Always add a newer ledger, maybe apply + mSyncingLedgers.emplace(lastReceivedLedgerSeq, ledgerData); mLargestLedgerSeqHeard = std::max(mLargestLedgerSeqHeard, lastReceivedLedgerSeq); // 1. CatchupWork is not running yet - // 2. CatchupManager received ledger that was immediately applied by + // 2. CatchupManager received ledger that should be immediately applied by // LedgerManager: check if we have any sequential ledgers. // If so, attempt to apply mSyncingLedgers and possibly get back in sync - if (!mCatchupWork && lastReceivedLedgerSeq == - mApp.getLedgerManager().getLastClosedLedgerNum()) + if (!mCatchupWork && lastReceivedLedgerSeq == *mLastQueuedToApply + 1) { tryApplySyncingLedgers(); - return; - } - else if (lastReceivedLedgerSeq <= - mApp.getLedgerManager().getLastClosedLedgerNum()) - { - // If LCL is already at-or-ahead of the ledger we just received from the - // network, we're up to date. Return early, nothing to do. - return; + return ProcessLedgerResult::PROCESSED_ALL_LEDGERS_SEQUENTIALLY; } // For the rest of this method: we know LCL has fallen behind the network @@ -151,6 +168,9 @@ CatchupManagerImpl::processLedger(LedgerCloseData const& ledgerData) // to history and commence catchup, running the (checkpoint-driven) catchup // state machine to ledger L-1 (the end of the checkpoint covering K) and // then replay buffered ledgers from L onwards. + CLOG_INFO(Ledger, + "Close of ledger {} buffered. mSyncingLedgers has {} ledgers", + ledgerData.getLedgerSeq(), mSyncingLedgers.size()); // First: if CatchupWork has started, just buffer and return early. if (mCatchupWork) @@ -160,17 +180,17 @@ CatchupManagerImpl::processLedger(LedgerCloseData const& ledgerData) auto const& config = mCatchupWork->getCatchupConfiguration(); if (ledgerData.getLedgerSeq() <= config.toLedger()) { - return; + return ProcessLedgerResult::WAIT_TO_APPLY_BUFFERED_OR_CATCHUP; } - addAndTrimSyncingLedgers(ledgerData); + trimSyncingLedgers(); logAndUpdateCatchupStatus(true); - return; + return ProcessLedgerResult::WAIT_TO_APPLY_BUFFERED_OR_CATCHUP; } // Next, we buffer every out of sync ledger to allow us to get back in sync // in case the ledgers we're missing are received. - addAndTrimSyncingLedgers(ledgerData); + trimSyncingLedgers(); // Finally we wait some number of ledgers beyond the smallest buffered // checkpoint ledger before we trigger the CatchupWork. This could be any @@ -178,14 +198,14 @@ CatchupManagerImpl::processLedger(LedgerCloseData const& ledgerData) // after the first buffered one. Since we can receive out of order ledgers, // we just check for any ledger larger than the checkpoint - auto& hm = mApp.getHistoryManager(); - std::string message; uint32_t firstLedgerInBuffer = mSyncingLedgers.begin()->first; uint32_t lastLedgerInBuffer = mSyncingLedgers.crbegin()->first; if (mApp.getConfig().modeDoesCatchupWithBucketList() && - hm.isFirstLedgerInCheckpoint(firstLedgerInBuffer) && - firstLedgerInBuffer < lastLedgerInBuffer) + HistoryManager::isFirstLedgerInCheckpoint(firstLedgerInBuffer, + mApp.getConfig()) && + firstLedgerInBuffer < lastLedgerInBuffer && + !mApp.getLedgerManager().isApplying()) { // No point in processing ledgers as catchup won't ever be able to // succeed @@ -207,16 +227,25 @@ CatchupManagerImpl::processLedger(LedgerCloseData const& ledgerData) { // get the smallest checkpoint we need to start catchup uint32_t requiredFirstLedgerInCheckpoint = - hm.isFirstLedgerInCheckpoint(firstLedgerInBuffer) + HistoryManager::isFirstLedgerInCheckpoint(firstLedgerInBuffer, + mApp.getConfig()) ? firstLedgerInBuffer - : hm.firstLedgerAfterCheckpointContaining(firstLedgerInBuffer); + : HistoryManager::firstLedgerAfterCheckpointContaining( + firstLedgerInBuffer, mApp.getConfig()); - uint32_t catchupTriggerLedger = - hm.ledgerToTriggerCatchup(requiredFirstLedgerInCheckpoint); + uint32_t catchupTriggerLedger = HistoryManager::ledgerToTriggerCatchup( + requiredFirstLedgerInCheckpoint, mApp.getConfig()); + if (mApp.getLedgerManager().isApplying()) + { + message = + fmt::format(FMT_STRING("Waiting for ledger {:d} application to " + "complete before starting catchup"), + getMaxScheduledToApply()); + } // If the trigger ledger is behind the last ledger, that means we're // waiting for out of order ledgers, which should arrive quickly - if (catchupTriggerLedger > lastLedgerInBuffer) + else if (catchupTriggerLedger > lastLedgerInBuffer) { auto eta = (catchupTriggerLedger - lastLedgerInBuffer) * mApp.getConfig().getExpectedLedgerCloseTime(); @@ -233,6 +262,7 @@ CatchupManagerImpl::processLedger(LedgerCloseData const& ledgerData) } } logAndUpdateCatchupStatus(true, message); + return ProcessLedgerResult::WAIT_TO_APPLY_BUFFERED_OR_CATCHUP; } void @@ -241,7 +271,10 @@ CatchupManagerImpl::startCatchup( std::set> bucketsToRetain) { ZoneScoped; - auto lastClosedLedger = mApp.getLedgerManager().getLastClosedLedgerNum(); + releaseAssert(threadIsMain()); + updateLastQueuedToApply(); + + auto lastClosedLedger = *mLastQueuedToApply; if ((configuration.toLedger() != CatchupConfiguration::CURRENT) && (configuration.toLedger() <= lastClosedLedger)) { @@ -250,13 +283,6 @@ CatchupManagerImpl::startCatchup( configuration.toLedger(), lastClosedLedger)); } - if (configuration.localBucketsOnly() != - mApp.getLedgerManager().rebuildingInMemoryState()) - { - throw std::invalid_argument( - "Local catchup is only valid when rebuilding ledger state"); - } - // Offline and local catchup types aren't triggered by buffered ledgers auto offlineCatchup = configuration.offline() || configuration.localBucketsOnly(); @@ -273,12 +299,14 @@ CatchupManagerImpl::startCatchup( std::string CatchupManagerImpl::getStatus() const { + releaseAssert(threadIsMain()); return mCatchupWork ? mCatchupWork->getStatus() : std::string{}; } BasicWork::State CatchupManagerImpl::getCatchupWorkState() const { + releaseAssert(threadIsMain()); releaseAssert(mCatchupWork); return mCatchupWork->getState(); } @@ -286,12 +314,14 @@ CatchupManagerImpl::getCatchupWorkState() const bool CatchupManagerImpl::catchupWorkIsDone() const { + releaseAssert(threadIsMain()); return mCatchupWork && mCatchupWork->isDone(); } bool CatchupManagerImpl::isCatchupInitialized() const { + releaseAssert(threadIsMain()); return mCatchupWork != nullptr; } @@ -299,6 +329,7 @@ void CatchupManagerImpl::logAndUpdateCatchupStatus(bool contiguous, std::string const& message) { + releaseAssert(threadIsMain()); if (!message.empty()) { auto contiguousString = @@ -323,16 +354,20 @@ CatchupManagerImpl::logAndUpdateCatchupStatus(bool contiguous, void CatchupManagerImpl::logAndUpdateCatchupStatus(bool contiguous) { + releaseAssert(threadIsMain()); logAndUpdateCatchupStatus(contiguous, getStatus()); } std::optional CatchupManagerImpl::maybeGetNextBufferedLedgerToApply() { + releaseAssert(threadIsMain()); + // Since we just applied a ledger, refresh mLastQueuedToApply + updateLastQueuedToApply(); + trimSyncingLedgers(); if (!mSyncingLedgers.empty() && - mSyncingLedgers.begin()->first == - mApp.getLedgerManager().getLastClosedLedgerNum() + 1) + mSyncingLedgers.begin()->first == *mLastQueuedToApply + 1) { return std::make_optional( mSyncingLedgers.begin()->second); @@ -346,6 +381,7 @@ CatchupManagerImpl::maybeGetNextBufferedLedgerToApply() std::optional CatchupManagerImpl::maybeGetLargestBufferedLedger() { + releaseAssert(threadIsMain()); if (!mSyncingLedgers.empty()) { return std::make_optional( @@ -360,29 +396,45 @@ CatchupManagerImpl::maybeGetLargestBufferedLedger() uint32_t CatchupManagerImpl::getLargestLedgerSeqHeard() const { + releaseAssert(threadIsMain()); return mLargestLedgerSeqHeard; } +uint32_t +CatchupManagerImpl::getMaxScheduledToApply() +{ + releaseAssert(threadIsMain()); + updateLastQueuedToApply(); + return *mLastQueuedToApply; +} + void CatchupManagerImpl::syncMetrics() { + releaseAssert(threadIsMain()); mSyncingLedgersSize.set_count(mSyncingLedgers.size()); } void -CatchupManagerImpl::addAndTrimSyncingLedgers(LedgerCloseData const& ledgerData) +CatchupManagerImpl::updateLastQueuedToApply() { - mSyncingLedgers.emplace(ledgerData.getLedgerSeq(), ledgerData); - trimSyncingLedgers(); - - CLOG_INFO(Ledger, - "Close of ledger {} buffered. mSyncingLedgers has {} ledgers", - ledgerData.getLedgerSeq(), mSyncingLedgers.size()); + releaseAssert(threadIsMain()); + if (!mLastQueuedToApply) + { + mLastQueuedToApply = mApp.getLedgerManager().getLastClosedLedgerNum(); + } + else + { + mLastQueuedToApply = + std::max(*mLastQueuedToApply, + mApp.getLedgerManager().getLastClosedLedgerNum()); + } } void CatchupManagerImpl::startOnlineCatchup() { + releaseAssert(threadIsMain()); releaseAssert(mSyncingLedgers.size() > 1); // catchup just before first buffered ledger that way we will have a @@ -399,7 +451,7 @@ CatchupManagerImpl::startOnlineCatchup() void CatchupManagerImpl::trimSyncingLedgers() { - + releaseAssert(threadIsMain()); auto removeLedgersLessThan = [&](uint32_t ledger) { // lower_bound returns an iterator pointing to the first element whose // key is not considered to go before k. Thus we get the iterator to @@ -408,12 +460,12 @@ CatchupManagerImpl::trimSyncingLedgers() // This erases [begin, it). mSyncingLedgers.erase(mSyncingLedgers.begin(), it); }; - removeLedgersLessThan(mApp.getLedgerManager().getLastClosedLedgerNum() + 1); - auto& hm = mApp.getHistoryManager(); + removeLedgersLessThan(*mLastQueuedToApply + 1); if (!mSyncingLedgers.empty()) { auto const lastBufferedLedger = mSyncingLedgers.rbegin()->first; - if (hm.isFirstLedgerInCheckpoint(lastBufferedLedger)) + if (HistoryManager::isFirstLedgerInCheckpoint(lastBufferedLedger, + mApp.getConfig())) { // The last ledger is the first ledger in the checkpoint. // This means that nodes may not have started publishing @@ -421,7 +473,8 @@ CatchupManagerImpl::trimSyncingLedgers() // We should only keep lastBufferedLedger _and_ the checkpoint // before that. removeLedgersLessThan( - hm.firstLedgerInCheckpointContaining(lastBufferedLedger - 1)); + HistoryManager::firstLedgerInCheckpointContaining( + lastBufferedLedger - 1, mApp.getConfig())); } else { @@ -430,7 +483,8 @@ CatchupManagerImpl::trimSyncingLedgers() // the checkpoint of lastBufferedLedger. // Therefore, we will delete all ledgers before the checkpoint. removeLedgersLessThan( - hm.firstLedgerInCheckpointContaining(lastBufferedLedger)); + HistoryManager::firstLedgerInCheckpointContaining( + lastBufferedLedger, mApp.getConfig())); } } } @@ -439,8 +493,9 @@ void CatchupManagerImpl::tryApplySyncingLedgers() { ZoneScoped; - auto const& ledgerHeader = - mApp.getLedgerManager().getLastClosedLedgerHeader(); + releaseAssert(threadIsMain()); + uint32_t nextToClose = *mLastQueuedToApply + 1; + auto lcl = mApp.getLedgerManager().getLastClosedLedgerNum(); // We can apply multiple ledgers here, which might be slow. This is a rare // occurrence so we should be fine. @@ -450,16 +505,47 @@ CatchupManagerImpl::tryApplySyncingLedgers() auto const& lcd = it->second; // we still have a missing ledger - if (ledgerHeader.header.ledgerSeq + 1 != lcd.getLedgerSeq()) + if (nextToClose != lcd.getLedgerSeq()) + { + break; + } + + // If we have too many ledgers queued to apply, just stop scheduling + // more and let the node gracefully go into catchup. + releaseAssert(mLastQueuedToApply >= lcl); + if (nextToClose - lcl >= MAX_EXTERNALIZE_LEDGER_APPLY_DRIFT) { + CLOG_INFO(History, + "Next ledger to apply is {}, but LCL {} is too far " + "behind, waiting", + nextToClose, lcl); break; } - mApp.getLedgerManager().closeLedger(lcd); - CLOG_INFO(History, "Closed buffered ledger: {}", - LedgerManager::ledgerAbbrev(ledgerHeader)); + if (mApp.getConfig().parallelLedgerClose()) + { + // Notify LM that application has started + mApp.getLedgerManager().beginApply(); + mApp.postOnLedgerCloseThread( + [&app = mApp, lcd]() { + // No-op if app is shutting down + if (app.isStopping()) + { + return; + } + app.getLedgerManager().closeLedger(lcd, + /* externalize */ true); + }, + "closeLedger queue"); + } + else + { + mApp.getLedgerManager().closeLedger(lcd, /* externalize */ true); + } + mLastQueuedToApply = lcd.getLedgerSeq(); ++it; + ++nextToClose; } mSyncingLedgers.erase(mSyncingLedgers.cbegin(), it); @@ -468,35 +554,41 @@ CatchupManagerImpl::tryApplySyncingLedgers() void CatchupManagerImpl::historyArchiveStatesDownloaded(uint32_t num) { + releaseAssert(threadIsMain()); mMetrics.mHistoryArchiveStatesDownloaded += num; } void CatchupManagerImpl::ledgersVerified(uint32_t num) { + releaseAssert(threadIsMain()); mMetrics.mLedgersVerified += num; } void CatchupManagerImpl::ledgerChainsVerificationFailed(uint32_t num) { + releaseAssert(threadIsMain()); mMetrics.mLedgerChainsVerificationFailed += num; } void CatchupManagerImpl::bucketsApplied(uint32_t num) { + releaseAssert(threadIsMain()); mMetrics.mBucketsApplied += num; } void CatchupManagerImpl::txSetsApplied(uint32_t num) { + releaseAssert(threadIsMain()); mMetrics.mTxSetsApplied += num; } void CatchupManagerImpl::fileDownloaded(FileType type, uint32_t num) { + releaseAssert(threadIsMain()); if (type == FileType::HISTORY_FILE_TYPE_BUCKET) { mMetrics.mBucketsDownloaded += num; diff --git a/src/catchup/CatchupManagerImpl.h b/src/catchup/CatchupManagerImpl.h index b02876c7c7..1b049c33b6 100644 --- a/src/catchup/CatchupManagerImpl.h +++ b/src/catchup/CatchupManagerImpl.h @@ -22,6 +22,11 @@ class Work; class CatchupManagerImpl : public CatchupManager { + // Maximum number of ledgers that can be queued to apply (this only applies + // when Config.parallelLedgerClose() == true). If this number if exceeded, + // core stops scheduling new ledgers to apply, and goes into catchup mode. + static uint32_t const MAX_EXTERNALIZE_LEDGER_APPLY_DRIFT; + Application& mApp; std::shared_ptr mCatchupWork; @@ -44,12 +49,26 @@ class CatchupManagerImpl : public CatchupManager std::map mSyncingLedgers; medida::Counter& mSyncingLedgersSize; - void addAndTrimSyncingLedgers(LedgerCloseData const& ledgerData); + // Conceptually, there are three ledger sequences that LedgerManager, Herder + // and CatchupManager rely on: + // - L (mLargestLedgerSeqHeard) = maximum ledger that core heard the + // network externalize, may or may not be applied. + // - Q (mLastQueuedToApply) = Only applicable when + // mConfig.parallelLedgerClose() == true. Maximum ledger that was + // externalized by the network and passed to background thread for + // application. + // - LCL = last closed ledger, the last ledger that was externalized, and + // applied by core. + // - Core maintains the following invariace LCL <= Q <= L. Eventually, + // every externalized ledger will be applied. + std::optional mLastQueuedToApply; + uint32_t mLargestLedgerSeqHeard; + + void updateLastQueuedToApply(); void startOnlineCatchup(); void trimSyncingLedgers(); void tryApplySyncingLedgers(); uint32_t getCatchupCount(); - uint32_t mLargestLedgerSeqHeard; CatchupMetrics mMetrics; // Check if catchup can't be performed due to local version incompatibility @@ -61,7 +80,8 @@ class CatchupManagerImpl : public CatchupManager CatchupManagerImpl(Application& app); ~CatchupManagerImpl() override; - void processLedger(LedgerCloseData const& ledgerData) override; + ProcessLedgerResult processLedger(LedgerCloseData const& ledgerData, + bool isLatestSlot) override; void startCatchup( CatchupConfiguration configuration, std::shared_ptr archive, @@ -80,6 +100,7 @@ class CatchupManagerImpl : public CatchupManager std::optional maybeGetNextBufferedLedgerToApply() override; std::optional maybeGetLargestBufferedLedger() override; uint32_t getLargestLedgerSeqHeard() const override; + uint32_t getMaxScheduledToApply() override; void syncMetrics() override; @@ -114,6 +135,14 @@ class CatchupManagerImpl : public CatchupManager { return mCatchupFatalFailure; } + + std::optional mMaxExternalizeApplyBuffer; + uint32_t + getMaxExternalizeApplyBuffer() + { + return mMaxExternalizeApplyBuffer ? *mMaxExternalizeApplyBuffer + : MAX_EXTERNALIZE_LEDGER_APPLY_DRIFT; + } #endif }; } diff --git a/src/catchup/CatchupWork.cpp b/src/catchup/CatchupWork.cpp index e7434bdf0b..89fc839791 100644 --- a/src/catchup/CatchupWork.cpp +++ b/src/catchup/CatchupWork.cpp @@ -90,10 +90,6 @@ CatchupWork::CatchupWork(Application& app, CLOG_INFO(History, "CatchupWork: selected archive {}", mArchive->getName()); } - - // Local catchup is only valid if core is rebuilding state - releaseAssert(mCatchupConfiguration.localBucketsOnly() == - mApp.getLedgerManager().rebuildingInMemoryState()); } CatchupWork::~CatchupWork() @@ -323,8 +319,8 @@ CatchupWork::getAndMaybeSetHistoryArchiveState() mCatchupConfiguration.toLedger() == CatchupConfiguration::CURRENT ? CatchupConfiguration::CURRENT - : mApp.getHistoryManager().checkpointContainingLedger( - mCatchupConfiguration.toLedger()); + : HistoryManager::checkpointContainingLedger( + mCatchupConfiguration.toLedger(), mApp.getConfig()); // Set retries to 10 to ensure we retry enough in case current // checkpoint isn't published yet mGetHistoryArchiveStateWork = addWork( @@ -528,7 +524,7 @@ CatchupWork::runCatchupStep() // In this case we should actually have been caught-up during // the replay process and, if judged successful, our LCL should // be the one provided as well. - auto& lastClosed = + auto lastClosed = mApp.getLedgerManager().getLastClosedLedgerHeader(); releaseAssert(mLastApplied.hash == lastClosed.hash); releaseAssert(mLastApplied.header == lastClosed.header); @@ -575,9 +571,8 @@ CatchupWork::runCatchupStep() return true; } - auto checkpoint = - app.getHistoryManager().checkpointContainingLedger( - ledgerSeq); + auto checkpoint = HistoryManager::checkpointContainingLedger( + ledgerSeq, app.getConfig()); auto ft = FileTransferInfo( dir, FileType::HISTORY_FILE_TYPE_LEDGER, checkpoint); diff --git a/src/catchup/ReplayDebugMetaWork.cpp b/src/catchup/ReplayDebugMetaWork.cpp index 2d2dcd7fde..6fcbefc13e 100644 --- a/src/catchup/ReplayDebugMetaWork.cpp +++ b/src/catchup/ReplayDebugMetaWork.cpp @@ -164,7 +164,8 @@ ReplayDebugMetaWork::applyLastLedger() if (lcl + 1 == debugTxSet.ledgerSeq) { mApp.getLedgerManager().closeLedger( - LedgerCloseData::toLedgerCloseData(debugTxSet)); + LedgerCloseData::toLedgerCloseData(debugTxSet), + /* externalize */ false); } else { diff --git a/src/database/Database.cpp b/src/database/Database.cpp index e6288ee6e3..29738ba7a6 100644 --- a/src/database/Database.cpp +++ b/src/database/Database.cpp @@ -216,7 +216,7 @@ Database::applySchemaUpgrade(unsigned long vers) Upgrades::dropSupportUpgradeHistory(*this); break; case 24: - getSession() << "DROP TABLE IF EXISTS pubsub;"; + getRawSession() << "DROP TABLE IF EXISTS pubsub;"; mApp.getPersistentState().migrateToSlotStateTable(); break; default: diff --git a/src/database/test/DatabaseTests.cpp b/src/database/test/DatabaseTests.cpp index c2fc838bd3..62be8787e1 100644 --- a/src/database/test/DatabaseTests.cpp +++ b/src/database/test/DatabaseTests.cpp @@ -32,7 +32,7 @@ transactionTest(Application::pointer app) int a0 = a + 1; int a1 = a + 2; - auto& session = app->getDatabase().getSession(); + auto& session = app->getDatabase().getRawSession(); session << "DROP TABLE IF EXISTS test"; session << "CREATE TABLE test (x INTEGER)"; @@ -104,7 +104,7 @@ checkMVCCIsolation(Application::pointer app) int s2r1 = 0, s2r2 = 0, s2r3 = 0, s2r4 = 0; - auto& sess1 = app->getDatabase().getSession(); + auto& sess1 = app->getDatabase().getRawSession(); sess1 << "DROP TABLE IF EXISTS test"; sess1 << "CREATE TABLE test (x INTEGER)"; @@ -217,7 +217,7 @@ TEST_CASE("postgres smoketest", "[db]") Application::pointer app = createTestApplication(clock, cfg); int a = 10, b = 0; - auto& session = app->getDatabase().getSession(); + auto& session = app->getDatabase().getRawSession(); SECTION("round trip") { @@ -249,7 +249,7 @@ TEST_CASE("postgres smoketest", "[db]") SECTION("postgres MVCC test") { - app->getDatabase().getSession() << "drop table if exists test"; + app->getDatabase().getRawSession() << "drop table if exists test"; checkMVCCIsolation(app); } } @@ -279,7 +279,7 @@ TEST_CASE("postgres performance", "[db][pgperf][!hide]") try { Application::pointer app = createTestApplication(clock, cfg); - auto& session = app->getDatabase().getSession(); + auto& session = app->getDatabase().getRawSession(); session << "drop table if exists txtest;"; session << "create table txtest (a bigint, b bigint, c bigint, primary " @@ -356,6 +356,5 @@ TEST_CASE("schema test", "[db]") auto& db = app->getDatabase(); auto dbv = db.getDBSchemaVersion(); - auto av = db.getAppSchemaVersion(); - REQUIRE(dbv == av); + REQUIRE(dbv == SCHEMA_VERSION); } diff --git a/src/herder/Herder.h b/src/herder/Herder.h index 5ed657b5f3..ba64929101 100644 --- a/src/herder/Herder.h +++ b/src/herder/Herder.h @@ -114,7 +114,8 @@ class Herder // restores Herder's state from disk virtual void start() = 0; - virtual void lastClosedLedgerIncreased(bool latest) = 0; + virtual void lastClosedLedgerIncreased(bool latest, + TxSetXDRFrameConstPtr txSet) = 0; // Setup Herder's state to fully participate in consensus virtual void setTrackingSCPState(uint64_t index, StellarValue const& value, @@ -222,5 +223,7 @@ class Herder virtual bool isBannedTx(Hash const& hash) const = 0; virtual TransactionFrameBaseConstPtr getTx(Hash const& hash) const = 0; + + virtual void beginApply() = 0; }; } diff --git a/src/herder/HerderImpl.cpp b/src/herder/HerderImpl.cpp index a208f630bc..208c05f68c 100644 --- a/src/herder/HerderImpl.cpp +++ b/src/herder/HerderImpl.cpp @@ -3,6 +3,8 @@ // of this distribution or at http://www.apache.org/licenses/LICENSE-2.0 #include "herder/HerderImpl.h" +#include "bucket/BucketManager.h" +#include "bucket/BucketSnapshotManager.h" #include "crypto/Hex.h" #include "crypto/KeyUtils.h" #include "crypto/SHA.h" @@ -14,9 +16,6 @@ #include "herder/TxSetFrame.h" #include "herder/TxSetUtils.h" #include "ledger/LedgerManager.h" -#include "ledger/LedgerTxn.h" -#include "ledger/LedgerTxnEntry.h" -#include "ledger/LedgerTxnHeader.h" #include "lib/json/json.h" #include "main/Application.h" #include "main/Config.h" @@ -249,10 +248,6 @@ HerderImpl::newSlotExternalized(bool synchronous, StellarValue const& value) // start timing next externalize from this point mLastExternalize = mApp.getClock().now(); - // In order to update the transaction queue we need to get the - // applied transactions. - updateTransactionQueue(mPendingEnvelopes.getTxSet(value.txSetHash)); - // perform cleanups // Evict slots that are outside of our ledger validity bracket auto minSlotToRemember = getMinLedgerSeqToRemember(); @@ -359,7 +354,7 @@ HerderImpl::processExternalized(uint64 slotIndex, StellarValue const& value, writeDebugTxSet(ledgerData); } - mLedgerManager.valueExternalized(ledgerData); + mLedgerManager.valueExternalized(ledgerData, isLatestSlot); } void @@ -434,6 +429,15 @@ recordExternalizeAndCheckCloseTimeDrift( } } +void +HerderImpl::beginApply() +{ + // Tx set might be applied async: in this case, cancel the timer. It'll be + // restarted when the tx set is applied. This is needed to not mess with + // Herder's out of sync recovery mechanism. + mTrackingTimer.cancel(); +} + void HerderImpl::valueExternalized(uint64 slotIndex, StellarValue const& value, bool isLatestSlot) @@ -476,10 +480,6 @@ HerderImpl::valueExternalized(uint64 slotIndex, StellarValue const& value, // Check to see if quorums have changed and we need to reanalyze. checkAndMaybeReanalyzeQuorumMap(); - - // heart beat *after* doing all the work (ensures that we do not include - // the overhead of externalization in the way we track SCP) - trackingHeartBeat(); } else { @@ -1136,16 +1136,31 @@ HerderImpl::safelyProcessSCPQueue(bool synchronous) } void -HerderImpl::lastClosedLedgerIncreased(bool latest) +HerderImpl::lastClosedLedgerIncreased(bool latest, TxSetXDRFrameConstPtr txSet) { + releaseAssert(threadIsMain()); + maybeSetupSorobanQueue( mLedgerManager.getLastClosedLedgerHeader().header.ledgerVersion); // Ensure potential upgrades are handled in overlay maybeHandleUpgrade(); + // In order to update the transaction queue we need to get the + // applied transactions. + updateTransactionQueue(txSet); + + // If we're in sync and there are no buffered ledgers to apply, trigger next + // ledger if (latest) { + // Re-start heartbeat tracking _after_ applying the most up-to-date + // ledger. This guarantees out-of-sync timer won't fire while we have + // ledgers to apply. + trackingHeartBeat(); + + // Ensure out of sync recovery did not get triggered while we were + // applying releaseAssert(isTracking()); releaseAssert(trackingConsensusLedgerIndex() == mLedgerManager.getLastClosedLedgerNum()); @@ -1158,6 +1173,10 @@ HerderImpl::lastClosedLedgerIncreased(bool latest) void HerderImpl::setupTriggerNextLedger() { + // Invariant: core proceeds to vote for the next ledger only when it's _not_ + // applying to ensure block production does not conflict with ledger close. + releaseAssert(!mLedgerManager.isApplying()); + // Invariant: tracking is equal to LCL when we trigger. This helps ensure // core emits SCP messages only for slots it can fully validate // (any closed ledger is fully validated) @@ -1301,8 +1320,8 @@ uint32_t HerderImpl::getMostRecentCheckpointSeq() { auto lastIndex = trackingConsensusLedgerIndex(); - return mApp.getHistoryManager().firstLedgerInCheckpointContaining( - lastIndex); + return HistoryManager::firstLedgerInCheckpointContaining(lastIndex, + mApp.getConfig()); } void @@ -1347,8 +1366,18 @@ HerderImpl::triggerNextLedger(uint32_t ledgerSeqToTrigger, return; } + // If applying, the next ledger will trigger voting + if (mLedgerManager.isApplying()) + { + CLOG_DEBUG(Herder, "triggerNextLedger: skipping (applying) : {}", + mApp.getStateHuman()); + return; + } + // our first choice for this round's set is all the tx we have collected // during last few ledger closes + // Since we are not currently applying, it is safe to use read-only LCL, as + // it's guaranteed to be up-to-date auto const& lcl = mLedgerManager.getLastClosedLedgerHeader(); PerPhaseTransactionList txPhases; txPhases.emplace_back(mTransactionQueue.getTransactions(lcl.header)); @@ -1531,7 +1560,7 @@ HerderImpl::getUpgradesJson() void HerderImpl::forceSCPStateIntoSyncWithLastClosedLedger() { - auto const& header = mLedgerManager.getLastClosedLedgerHeader().header; + auto header = mLedgerManager.getLastClosedLedgerHeader().header; setTrackingSCPState(header.ledgerSeq, header.scpValue, /* isTrackingNetwork */ true); } @@ -2262,6 +2291,7 @@ HerderImpl::purgeOldPersistedTxSets() void HerderImpl::trackingHeartBeat() { + releaseAssert(threadIsMain()); if (mApp.getConfig().MANUAL_CLOSE) { return; @@ -2326,6 +2356,15 @@ void HerderImpl::herderOutOfSync() { ZoneScoped; + // State switch from "tracking" to "out of sync" should only happen if there + // are no ledgers queued to be applied. If there are ledgers queued, it's + // possible the rest of the network is waiting for this node to vote. In + // this case we should _still_ remain in tracking and emit nomination; If + // the nodes does not hear anything from the network after that, then node + // can go into out of sync recovery. + releaseAssert(threadIsMain()); + releaseAssert(!mLedgerManager.isApplying()); + CLOG_WARNING(Herder, "Lost track of consensus"); auto s = getJsonInfo(20).toStyledString(); diff --git a/src/herder/HerderImpl.h b/src/herder/HerderImpl.h index be1d3d8e12..015cde5e8f 100644 --- a/src/herder/HerderImpl.h +++ b/src/herder/HerderImpl.h @@ -75,7 +75,8 @@ class HerderImpl : public Herder void start() override; - void lastClosedLedgerIncreased(bool latest) override; + void lastClosedLedgerIncreased(bool latest, + TxSetXDRFrameConstPtr txSet) override; SCP& getSCP(); HerderSCPDriver& @@ -195,6 +196,8 @@ class HerderImpl : public Herder xdr::xvector const& upgrades, SecretKey const& s) override; + virtual void beginApply() override; + void startTxSetGCTimer(); #ifdef BUILD_TESTS diff --git a/src/herder/HerderPersistence.h b/src/herder/HerderPersistence.h index e5d44b0cc5..ea82cd747b 100644 --- a/src/herder/HerderPersistence.h +++ b/src/herder/HerderPersistence.h @@ -49,6 +49,5 @@ class HerderPersistence static void dropAll(Database& db); static void deleteOldEntries(Database& db, uint32_t ledgerSeq, uint32_t count); - static void createQuorumTrackingTable(soci::session& sess); }; } diff --git a/src/herder/HerderPersistenceImpl.cpp b/src/herder/HerderPersistenceImpl.cpp index bd743e8072..8f7977212a 100644 --- a/src/herder/HerderPersistenceImpl.cpp +++ b/src/herder/HerderPersistenceImpl.cpp @@ -40,6 +40,7 @@ HerderPersistenceImpl::saveSCPHistory(uint32_t seq, QuorumTracker::QuorumMap const& qmap) { ZoneScoped; + releaseAssert(threadIsMain()); if (envs.empty()) { return; @@ -47,12 +48,13 @@ HerderPersistenceImpl::saveSCPHistory(uint32_t seq, auto usedQSets = UnorderedMap{}; auto& db = mApp.getDatabase(); + auto& sess = db.getSession(); - soci::transaction txscope(db.getSession()); + soci::transaction txscope(sess.session()); { auto prepClean = db.getPreparedStatement( - "DELETE FROM scphistory WHERE ledgerseq =:l"); + "DELETE FROM scphistory WHERE ledgerseq =:l", sess); auto& st = prepClean.statement(); st.exchange(soci::use(seq)); @@ -92,7 +94,8 @@ HerderPersistenceImpl::saveSCPHistory(uint32_t seq, auto prepEnv = db.getPreparedStatement("INSERT INTO scphistory " "(nodeid, ledgerseq, envelope) VALUES " - "(:n, :l, :e)"); + "(:n, :l, :e)", + sess); auto& st = prepEnv.statement(); st.exchange(soci::use(nodeIDs, "n")); st.exchange(soci::use(seqs, "l")); @@ -124,7 +127,7 @@ HerderPersistenceImpl::saveSCPHistory(uint32_t seq, std::string qSetHHex(binToHex(qSetH)); auto prep = db.getPreparedStatement( - "UPDATE quoruminfo SET qsethash = :h WHERE nodeid = :id"); + "UPDATE quoruminfo SET qsethash = :h WHERE nodeid = :id", sess); auto& st = prep.statement(); st.exchange(soci::use(qSetHHex)); st.exchange(soci::use(nodeIDStrKey)); @@ -136,7 +139,8 @@ HerderPersistenceImpl::saveSCPHistory(uint32_t seq, if (st.get_affected_rows() != 1) { auto prepI = db.getPreparedStatement( - "INSERT INTO quoruminfo (nodeid, qsethash) VALUES (:id, :h)"); + "INSERT INTO quoruminfo (nodeid, qsethash) VALUES (:id, :h)", + sess); auto& stI = prepI.statement(); stI.exchange(soci::use(nodeIDStrKey)); stI.exchange(soci::use(qSetHHex)); @@ -158,7 +162,7 @@ HerderPersistenceImpl::saveSCPHistory(uint32_t seq, uint32_t lastSeenSeq; auto prepSelQSet = db.getPreparedStatement( - "SELECT lastledgerseq FROM scpquorums WHERE qsethash = :h"); + "SELECT lastledgerseq FROM scpquorums WHERE qsethash = :h", sess); auto& stSel = prepSelQSet.statement(); stSel.exchange(soci::into(lastSeenSeq)); stSel.exchange(soci::use(qSetH)); @@ -177,7 +181,8 @@ HerderPersistenceImpl::saveSCPHistory(uint32_t seq, auto prepUpQSet = db.getPreparedStatement( "UPDATE scpquorums SET " - "lastledgerseq = :l WHERE qsethash = :h"); + "lastledgerseq = :l WHERE qsethash = :h", + sess); auto& stUp = prepUpQSet.statement(); stUp.exchange(soci::use(seq)); @@ -202,7 +207,8 @@ HerderPersistenceImpl::saveSCPHistory(uint32_t seq, auto prepInsQSet = db.getPreparedStatement( "INSERT INTO scpquorums " "(qsethash, lastledgerseq, qset) VALUES " - "(:h, :l, :v);"); + "(:h, :l, :v);", + sess); auto& stIns = prepInsQSet.statement(); stIns.exchange(soci::use(qSetH)); @@ -230,6 +236,10 @@ HerderPersistence::copySCPHistoryToStream(Database& db, soci::session& sess, XDROutputFileStream& scpHistory) { ZoneScoped; + // TODO: this may conflict with main thread, as this is done in the + // background (this is the case in master today, so can be fixed + // later). + uint32_t begin = ledgerSeq, end = ledgerSeq + ledgerCount; size_t n = 0; @@ -372,38 +382,34 @@ void HerderPersistence::dropAll(Database& db) { ZoneScoped; - db.getSession() << "DROP TABLE IF EXISTS scphistory"; + db.getRawSession() << "DROP TABLE IF EXISTS scphistory"; - db.getSession() << "DROP TABLE IF EXISTS scpquorums"; + db.getRawSession() << "DROP TABLE IF EXISTS scpquorums"; - db.getSession() << "CREATE TABLE scphistory (" - "nodeid CHARACTER(56) NOT NULL," - "ledgerseq INT NOT NULL CHECK (ledgerseq >= 0)," - "envelope TEXT NOT NULL" - ")"; + db.getRawSession() << "CREATE TABLE scphistory (" + "nodeid CHARACTER(56) NOT NULL," + "ledgerseq INT NOT NULL CHECK (ledgerseq >= 0)," + "envelope TEXT NOT NULL" + ")"; - db.getSession() << "CREATE INDEX scpenvsbyseq ON scphistory(ledgerseq)"; + db.getRawSession() << "CREATE INDEX scpenvsbyseq ON scphistory(ledgerseq)"; - db.getSession() << "CREATE TABLE scpquorums (" - "qsethash CHARACTER(64) NOT NULL," - "lastledgerseq INT NOT NULL CHECK (lastledgerseq >= 0)," - "qset TEXT NOT NULL," - "PRIMARY KEY (qsethash)" - ")"; + db.getRawSession() + << "CREATE TABLE scpquorums (" + "qsethash CHARACTER(64) NOT NULL," + "lastledgerseq INT NOT NULL CHECK (lastledgerseq >= 0)," + "qset TEXT NOT NULL," + "PRIMARY KEY (qsethash)" + ")"; - db.getSession() + db.getRawSession() << "CREATE INDEX scpquorumsbyseq ON scpquorums(lastledgerseq)"; - db.getSession() << "DROP TABLE IF EXISTS quoruminfo"; -} - -void -HerderPersistence::createQuorumTrackingTable(soci::session& sess) -{ - sess << "CREATE TABLE quoruminfo (" - "nodeid CHARACTER(56) NOT NULL," - "qsethash CHARACTER(64) NOT NULL," - "PRIMARY KEY (nodeid))"; + db.getRawSession() << "DROP TABLE IF EXISTS quoruminfo"; + db.getRawSession() << "CREATE TABLE quoruminfo (" + "nodeid CHARACTER(56) NOT NULL," + "qsethash CHARACTER(64) NOT NULL," + "PRIMARY KEY (nodeid))"; } void @@ -411,9 +417,9 @@ HerderPersistence::deleteOldEntries(Database& db, uint32_t ledgerSeq, uint32_t count) { ZoneScoped; - DatabaseUtils::deleteOldEntriesHelper(db.getSession(), ledgerSeq, count, + DatabaseUtils::deleteOldEntriesHelper(db.getRawSession(), ledgerSeq, count, "scphistory", "ledgerseq"); - DatabaseUtils::deleteOldEntriesHelper(db.getSession(), ledgerSeq, count, + DatabaseUtils::deleteOldEntriesHelper(db.getRawSession(), ledgerSeq, count, "scpquorums", "lastledgerseq"); } } diff --git a/src/herder/HerderSCPDriver.cpp b/src/herder/HerderSCPDriver.cpp index 47f1453d11..13f5bde947 100644 --- a/src/herder/HerderSCPDriver.cpp +++ b/src/herder/HerderSCPDriver.cpp @@ -203,6 +203,7 @@ HerderSCPDriver::validateValueHelper(uint64_t slotIndex, StellarValue const& b, { ZoneScoped; uint64_t lastCloseTime; + releaseAssert(threadIsMain()); if (b.ext.v() != STELLAR_VALUE_SIGNED) { CLOG_TRACE(Herder, @@ -220,15 +221,15 @@ HerderSCPDriver::validateValueHelper(uint64_t slotIndex, StellarValue const& b, } } - auto const& lcl = mLedgerManager.getLastClosedLedgerHeader().header; + auto lhhe = mLedgerManager.getLastClosedLedgerHeader(); // when checking close time, start with what we have locally - lastCloseTime = lcl.scpValue.closeTime; + lastCloseTime = lhhe.header.scpValue.closeTime; // if this value is not for our local state, // perform as many checks as we can - if (slotIndex != (lcl.ledgerSeq + 1)) + if (slotIndex != (lhhe.header.ledgerSeq + 1)) { - if (slotIndex == lcl.ledgerSeq) + if (slotIndex == lhhe.header.ledgerSeq) { // previous ledger if (b.closeTime != lastCloseTime) @@ -239,7 +240,7 @@ HerderSCPDriver::validateValueHelper(uint64_t slotIndex, StellarValue const& b, return SCPDriver::kInvalidValue; } } - else if (slotIndex < lcl.ledgerSeq) + else if (slotIndex < lhhe.header.ledgerSeq) { // basic sanity check on older value if (b.closeTime >= lastCloseTime) @@ -322,7 +323,7 @@ HerderSCPDriver::validateValueHelper(uint64_t slotIndex, StellarValue const& b, res = SCPDriver::kInvalidValue; } - else if (!checkAndCacheTxSetValid(*txSet, closeTimeOffset)) + else if (!checkAndCacheTxSetValid(*txSet, lhhe, closeTimeOffset)) { CLOG_DEBUG(Herder, "HerderSCPDriver::validateValue i: {} invalid txSet {}", @@ -612,6 +613,7 @@ HerderSCPDriver::combineCandidates(uint64_t slotIndex, std::set aggSet; + releaseAssert(!mLedgerManager.isApplying()); auto const& lcl = mLedgerManager.getLastClosedLedgerHeader(); Hash candidatesHash; @@ -1226,11 +1228,11 @@ HerderSCPDriver::wrapStellarValue(StellarValue const& sv) bool HerderSCPDriver::checkAndCacheTxSetValid(TxSetXDRFrame const& txSet, + LedgerHeaderHistoryEntry const& lcl, uint64_t closeTimeOffset) const { - auto key = TxSetValidityKey{ - mApp.getLedgerManager().getLastClosedLedgerHeader().hash, - txSet.getContentsHash(), closeTimeOffset, closeTimeOffset}; + auto key = TxSetValidityKey{lcl.hash, txSet.getContentsHash(), + closeTimeOffset, closeTimeOffset}; bool* pRes = mTxSetValidCache.maybeGet(key); if (pRes == nullptr) @@ -1241,8 +1243,7 @@ HerderSCPDriver::checkAndCacheTxSetValid(TxSetXDRFrame const& txSet, // might end up with malformed tx set that doesn't refer to the // LCL. ApplicableTxSetFrameConstPtr applicableTxSet; - if (txSet.previousLedgerHash() == - mApp.getLedgerManager().getLastClosedLedgerHeader().hash) + if (txSet.previousLedgerHash() == lcl.hash) { applicableTxSet = txSet.prepareForApply(mApp); } @@ -1250,10 +1251,9 @@ HerderSCPDriver::checkAndCacheTxSetValid(TxSetXDRFrame const& txSet, bool res = true; if (applicableTxSet == nullptr) { - CLOG_ERROR(Herder, - "validateValue i:{} can't prepare txSet {} for apply", - (mApp.getLedgerManager().getLastClosedLedgerNum() + 1), - hexAbbrev(txSet.getContentsHash())); + CLOG_ERROR( + Herder, "validateValue i:{} can't prepare txSet {} for apply", + (lcl.header.ledgerSeq + 1), hexAbbrev(txSet.getContentsHash())); res = false; } else @@ -1286,6 +1286,7 @@ uint64 HerderSCPDriver::getNodeWeight(NodeID const& nodeID, SCPQuorumSet const& qset, bool const isLocalNode) const { + releaseAssert(!mLedgerManager.isApplying()); Config const& cfg = mApp.getConfig(); bool const unsupportedProtocol = protocolVersionIsBefore( mApp.getLedgerManager() diff --git a/src/herder/HerderSCPDriver.h b/src/herder/HerderSCPDriver.h index f5f74e0365..62443f8e27 100644 --- a/src/herder/HerderSCPDriver.h +++ b/src/herder/HerderSCPDriver.h @@ -241,6 +241,7 @@ class HerderSCPDriver : public SCPDriver uint64_t slotIndex); bool checkAndCacheTxSetValid(TxSetXDRFrame const& txSet, + LedgerHeaderHistoryEntry const& lcl, uint64_t closeTimeOffset) const; }; } diff --git a/src/herder/PendingEnvelopes.cpp b/src/herder/PendingEnvelopes.cpp index 4db92c5619..fe47c72937 100644 --- a/src/herder/PendingEnvelopes.cpp +++ b/src/herder/PendingEnvelopes.cpp @@ -745,7 +745,7 @@ PendingEnvelopes::getQSet(Hash const& hash) else { auto& db = mApp.getDatabase(); - qset = HerderPersistence::getQuorumSet(db, db.getSession(), hash); + qset = HerderPersistence::getQuorumSet(db, db.getRawSession(), hash); } if (qset) { @@ -814,7 +814,7 @@ PendingEnvelopes::rebuildQuorumTrackerState() // see if we had some information for that node auto& db = mApp.getDatabase(); auto h = HerderPersistence::getNodeQuorumSet( - db, db.getSession(), id); + db, db.getRawSession(), id); if (h) { res = getQSet(*h); diff --git a/src/herder/PendingEnvelopes.h b/src/herder/PendingEnvelopes.h index afb194dfce..4ffcdf12a6 100644 --- a/src/herder/PendingEnvelopes.h +++ b/src/herder/PendingEnvelopes.h @@ -62,6 +62,10 @@ class PendingEnvelopes using TxSetFramCacheItem = std::pair; // recent txsets + // Note on thread-safety: the cache must be maintained strictly by the main + // thread Other threads may reference TxSetXDRFrameConstPtr, which is safe, + // because shared_ptr ref counting is thread-safe TxSetXDRFrameConstPtr + // itself is immutable, and thus thread-safe RandomEvictionCache mTxSetCache; // weak references to all known txsets UnorderedMap> mKnownTxSets; diff --git a/src/herder/TxSetFrame.cpp b/src/herder/TxSetFrame.cpp index cf4afc218d..d80752e27a 100644 --- a/src/herder/TxSetFrame.cpp +++ b/src/herder/TxSetFrame.cpp @@ -430,14 +430,14 @@ phaseTxsAreValid(TxSetPhaseFrame const& phase, Application& app, uint64_t upperBoundCloseTimeOffset) { ZoneScoped; + releaseAssert(threadIsMain()); // This is done so minSeqLedgerGap is validated against the next // ledgerSeq, which is what will be used at apply time // Grab read-only latest ledger state; This is only used to validate tx sets // for LCL+1 LedgerSnapshot ls(app); - ls.getLedgerHeader().currentToModify().ledgerSeq = - app.getLedgerManager().getLastClosedLedgerNum() + 1; + ls.getLedgerHeader().currentToModify().ledgerSeq += 1; for (auto const& tx : phase) { auto txResult = tx->checkValid(app.getAppConnector(), ls, 0, @@ -526,7 +526,9 @@ std::pair> applySurgePricing(TxSetPhase phase, TxFrameList const& txs, Application& app) { ZoneScoped; - + releaseAssert(threadIsMain()); + releaseAssert(!app.getLedgerManager().isApplying()); + auto const& lclHeader = app.getLedgerManager().getLastClosedLedgerHeader().header; std::vector hadTxNotFittingLane; @@ -703,6 +705,8 @@ makeTxSetFromTransactions(PerPhaseTransactionList const& txPhases, #endif ) { + releaseAssert(threadIsMain()); + releaseAssert(!app.getLedgerManager().isApplying()); releaseAssert(txPhases.size() == invalidTxs.size()); releaseAssert(txPhases.size() <= static_cast(TxSetPhase::PHASE_COUNT)); @@ -875,6 +879,8 @@ makeTxSetFromTransactions(TxFrameList txs, Application& app, uint64_t upperBoundCloseTimeOffset, TxFrameList& invalidTxs, bool enforceTxsApplyOrder) { + releaseAssert(threadIsMain()); + releaseAssert(!app.getLedgerManager().isApplying()); auto lclHeader = app.getLedgerManager().getLastClosedLedgerHeader(); PerPhaseTransactionList perPhaseTxs; perPhaseTxs.resize(protocolVersionStartsFrom(lclHeader.header.ledgerVersion, @@ -1563,8 +1569,13 @@ ApplicableTxSetFrame::ApplicableTxSetFrame( , mPhases(phases) , mContentsHash(contentsHash) { - releaseAssert(previousLedgerHash == - app.getLedgerManager().getLastClosedLedgerHeader().hash); + // When applying in the background, the same check is performed in + // closeLedger already + if (threadIsMain()) + { + releaseAssert(previousLedgerHash == + app.getLedgerManager().getLastClosedLedgerHeader().hash); + } } ApplicableTxSetFrame::ApplicableTxSetFrame( @@ -1624,7 +1635,8 @@ ApplicableTxSetFrame::checkValid(Application& app, uint64_t upperBoundCloseTimeOffset) const { ZoneScoped; - auto& lcl = app.getLedgerManager().getLastClosedLedgerHeader(); + releaseAssert(threadIsMain()); + auto const& lcl = app.getLedgerManager().getLastClosedLedgerHeader(); // Start by checking previousLedgerHash if (lcl.hash != mPreviousLedgerHash) diff --git a/src/herder/Upgrades.cpp b/src/herder/Upgrades.cpp index e15080df62..07f64d1385 100644 --- a/src/herder/Upgrades.cpp +++ b/src/herder/Upgrades.cpp @@ -690,22 +690,22 @@ Upgrades::timeForUpgrade(uint64_t time) const void Upgrades::dropAll(Database& db) { - db.getSession() << "DROP TABLE IF EXISTS upgradehistory"; - db.getSession() << "CREATE TABLE upgradehistory (" - "ledgerseq INT NOT NULL CHECK (ledgerseq >= 0), " - "upgradeindex INT NOT NULL, " - "upgrade TEXT NOT NULL, " - "changes TEXT NOT NULL, " - "PRIMARY KEY (ledgerseq, upgradeindex)" - ")"; - db.getSession() + db.getRawSession() << "DROP TABLE IF EXISTS upgradehistory"; + db.getRawSession() << "CREATE TABLE upgradehistory (" + "ledgerseq INT NOT NULL CHECK (ledgerseq >= 0), " + "upgradeindex INT NOT NULL, " + "upgrade TEXT NOT NULL, " + "changes TEXT NOT NULL, " + "PRIMARY KEY (ledgerseq, upgradeindex)" + ")"; + db.getRawSession() << "CREATE INDEX upgradehistbyseq ON upgradehistory (ledgerseq);"; } void Upgrades::dropSupportUpgradeHistory(Database& db) { - db.getSession() << "DROP TABLE IF EXISTS upgradehistory"; + db.getRawSession() << "DROP TABLE IF EXISTS upgradehistory"; } static void diff --git a/src/herder/test/HerderTests.cpp b/src/herder/test/HerderTests.cpp index ca40142998..7df1b61e08 100644 --- a/src/herder/test/HerderTests.cpp +++ b/src/herder/test/HerderTests.cpp @@ -97,22 +97,9 @@ TEST_CASE_VERSIONS("standalone", "[herder][acceptance]") }; auto waitForExternalize = [&]() { - bool stop = false; auto prev = app->getLedgerManager().getLastClosedLedgerNum(); - VirtualTimer checkTimer(*app); - - auto check = [&](asio::error_code const& error) { - REQUIRE(!error); - REQUIRE(app->getLedgerManager().getLastClosedLedgerNum() > - prev); - stop = true; - }; - - checkTimer.expires_from_now( - Herder::EXP_LEDGER_TIMESPAN_SECONDS + - std::chrono::seconds(1)); - checkTimer.async_wait(check); - while (!stop) + while (app->getLedgerManager().getLastClosedLedgerNum() <= + prev + 1) { app->getClock().crank(true); } @@ -2552,10 +2539,10 @@ TEST_CASE("SCP State", "[herder]") REQUIRE(sim->getNode(nodeIDs[0]) ->getLedgerManager() - .getLastClosedLedgerNum() == expectedLedger); + .getLastClosedLedgerNum() >= expectedLedger); REQUIRE(sim->getNode(nodeIDs[1]) ->getLedgerManager() - .getLastClosedLedgerNum() == expectedLedger); + .getLastClosedLedgerNum() >= expectedLedger); lcl = sim->getNode(nodeIDs[0]) ->getLedgerManager() @@ -2653,7 +2640,7 @@ TEST_CASE("SCP State", "[herder]") // then let the nodes run a bit more, they should all externalize the // next ledger sim->crankUntil( - [&]() { return sim->haveAllExternalized(expectedLedger + 1, 5); }, + [&]() { return sim->haveAllExternalized(expectedLedger + 2, 6); }, 2 * numLedgers * Herder::EXP_LEDGER_TIMESPAN_SECONDS, false); // nodes are at least on ledger 7 (some may be on 8) @@ -2662,14 +2649,6 @@ TEST_CASE("SCP State", "[herder]") // All nodes are in sync REQUIRE(sim->getNode(nodeIDs[i])->getState() == Application::State::APP_SYNCED_STATE); - auto const& actual = sim->getNode(nodeIDs[i]) - ->getLedgerManager() - .getLastClosedLedgerHeader() - .header; - if (actual.ledgerSeq == expectedLedger + 1) - { - REQUIRE(actual.previousLedgerHash == lcl.hash); - } } } @@ -2783,8 +2762,8 @@ TEST_CASE("SCP checkpoint", "[catchup][herder]") auto mainNode = simulation->addNode(v0SecretKey, qSet, &cfg1); simulation->startAllNodes(); - auto& hm = mainNode->getHistoryManager(); - auto firstCheckpoint = hm.firstLedgerAfterCheckpointContaining(1); + auto firstCheckpoint = HistoryManager::firstLedgerAfterCheckpointContaining( + 1, mainNode->getConfig()); // Crank until we are halfway through the second checkpoint simulation->crankUntil( @@ -2803,7 +2782,8 @@ TEST_CASE("SCP checkpoint", "[catchup][herder]") mainNode->getConfig().MAX_SLOTS_TO_REMEMBER + 1); auto secondCheckpoint = - hm.firstLedgerAfterCheckpointContaining(firstCheckpoint); + HistoryManager::firstLedgerAfterCheckpointContaining( + firstCheckpoint, mainNode->getConfig()); // Crank until we complete the 2nd checkpoint simulation->crankUntil( @@ -3090,10 +3070,10 @@ TEST_CASE("soroban txs each parameter surge priced", "[soroban][herder]") bool hadSorobanSurgePricing = false; simulation->crankUntil( [&]() { - auto& lclHeader = nodes[0] - ->getLedgerManager() - .getLastClosedLedgerHeader() - .header; + auto const& lclHeader = nodes[0] + ->getLedgerManager() + .getLastClosedLedgerHeader() + .header; auto txSet = nodes[0]->getHerder().getTxSet( lclHeader.scpValue.txSetHash); GeneralizedTransactionSet xdrTxSet; @@ -3229,14 +3209,34 @@ TEST_CASE("overlay parallel processing") { auto networkID = sha256(getTestConfig().NETWORK_PASSPHRASE); - // Set threshold to 1 so all have to vote - auto simulation = - Topologies::core(4, 1, Simulation::OVER_TCP, networkID, [](int i) { - auto cfg = getTestConfig(i); - cfg.TESTING_UPGRADE_MAX_TX_SET_SIZE = 100; - cfg.BACKGROUND_OVERLAY_PROCESSING = true; - return cfg; - }); + std::shared_ptr simulation; + + SECTION("background traffic processing") + { + // Set threshold to 1 so all have to vote + simulation = + Topologies::core(4, 1, Simulation::OVER_TCP, networkID, [](int i) { + auto cfg = getTestConfig(i); + cfg.TESTING_UPGRADE_MAX_TX_SET_SIZE = 100; + cfg.BACKGROUND_OVERLAY_PROCESSING = true; + return cfg; + }); + } + SECTION("background ledger close") + { + // Set threshold to 1 so all have to vote + simulation = + Topologies::core(4, 1, Simulation::OVER_TCP, networkID, [](int i) { + auto cfg = getTestConfig( + i, Config::TESTDB_BUCKET_DB_PERSISTENT_POSTGRES); + cfg.TESTING_UPGRADE_MAX_TX_SET_SIZE = 100; + cfg.EXPERIMENTAL_PARALLEL_LEDGER_CLOSE = true; + cfg.ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING = + std::chrono::milliseconds(500); + return cfg; + }); + } + simulation->startAllNodes(); auto nodes = simulation->getNodes(); uint32_t desiredTxRate = 1; @@ -3268,15 +3268,15 @@ TEST_CASE("overlay parallel processing") // soroban traffic currLoadGenCount = loadGenDone.count(); auto secondLoadGenCount = secondLoadGenDone.count(); - uint32_t const classicTxCount = 200; + uint32_t const txCount = 100; // Generate Soroban txs from one node loadGen.generateLoad(GeneratedLoadConfig::txLoad( LoadGenMode::SOROBAN_UPLOAD, 50, - /* nTxs */ 500, desiredTxRate, /* offset */ 0)); + /* nTxs */ txCount, desiredTxRate, /* offset */ 0)); // Generate classic txs from another node (with offset to prevent // overlapping accounts) secondLoadGen.generateLoad(GeneratedLoadConfig::txLoad( - LoadGenMode::PAY, 50, classicTxCount, desiredTxRate, + LoadGenMode::PAY, 50, txCount, desiredTxRate, /* offset */ 50)); simulation->crankUntil( @@ -3519,13 +3519,26 @@ checkHerder(Application& app, HerderImpl& herder, Herder::State expectedState, // received by a node, we fully control the state of Herder and LM (and whether // each component is in sync or out of sync) static void -herderExternalizesValuesWithProtocol(uint32_t version) +herderExternalizesValuesWithProtocol(uint32_t version, + bool parallelLedgerClose = false, + uint32_t delayCloseMs = 0) { auto networkID = sha256(getTestConfig().NETWORK_PASSPHRASE); auto simulation = std::make_shared( - Simulation::OVER_LOOPBACK, networkID, [version](int i) { - auto cfg = getTestConfig(i, Config::TESTDB_BUCKET_DB_PERSISTENT); + Simulation::OVER_LOOPBACK, networkID, [&](int i) { + auto cfg = getTestConfig( + i, parallelLedgerClose + ? Config::TESTDB_BUCKET_DB_PERSISTENT_POSTGRES + : Config::TESTDB_BUCKET_DB_PERSISTENT); cfg.TESTING_UPGRADE_LEDGER_PROTOCOL_VERSION = version; + if (parallelLedgerClose) + { + cfg.EXPERIMENTAL_PARALLEL_LEDGER_CLOSE = true; + // Add artifical delay to ledger close to increase chances of + // conflicts + cfg.ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING = + std::chrono::milliseconds(delayCloseMs); + } return cfg; }); @@ -3583,7 +3596,7 @@ herderExternalizesValuesWithProtocol(uint32_t version) [&]() { return simulation->haveAllExternalized(destinationLedger, 100); }, - 2 * nLedgers * Herder::EXP_LEDGER_TIMESPAN_SECONDS, false); + 10 * nLedgers * Herder::EXP_LEDGER_TIMESPAN_SECONDS, false); return std::min(currentALedger(), currentCLedger()); }; @@ -3703,6 +3716,7 @@ herderExternalizesValuesWithProtocol(uint32_t version) Herder::ENVELOPE_STATUS_READY); REQUIRE(herder.recvSCPEnvelope(newMsgB.first, qset, newMsgB.second) == Herder::ENVELOPE_STATUS_READY); + simulation->crankForAtLeast(std::chrono::seconds(10), false); }; auto testOutOfOrder = [&](bool partial) { @@ -4283,6 +4297,7 @@ TEST_CASE("do not flood invalid transactions", "[herder]") VirtualClock clock; auto cfg = getTestConfig(); cfg.FLOOD_TX_PERIOD_MS = 1; // flood as fast as possible + cfg.ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING = std::chrono::seconds(0); auto app = createTestApplication(clock, cfg); auto& lm = app->getLedgerManager(); @@ -4335,6 +4350,8 @@ TEST_CASE("do not flood too many soroban transactions", cfg.FLOOD_OP_RATE_PER_LEDGER = 2.0; cfg.FLOOD_SOROBAN_TX_PERIOD_MS = 50; cfg.FLOOD_SOROBAN_RATE_PER_LEDGER = 2.0; + cfg.ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING = + std::chrono::seconds(0); return cfg; }); @@ -5304,7 +5321,8 @@ TEST_CASE("SCP message capture from previous ledger", "[herder]") // Prepare query auto& db = node->getDatabase(); auto prep = db.getPreparedStatement( - "SELECT envelope FROM scphistory WHERE ledgerseq = :l"); + "SELECT envelope FROM scphistory WHERE ledgerseq = :l", + db.getSession()); auto& st = prep.statement(); st.exchange(soci::use(ledgerNum)); std::string envStr; diff --git a/src/history/HistoryManager.h b/src/history/HistoryManager.h index 07b6f64fa9..4a17bb795d 100644 --- a/src/history/HistoryManager.h +++ b/src/history/HistoryManager.h @@ -320,13 +320,13 @@ class HistoryManager // getCheckpointFrequency() -- equivalently, the LCL is one _less_ than // a multiple of getCheckpointFrequency(). Returns true if checkpoint // publication of the LCL was queued, otherwise false. - virtual bool maybeQueueHistoryCheckpoint() = 0; + virtual bool maybeQueueHistoryCheckpoint(uint32_t lcl) = 0; // Checkpoint the LCL -- both the log of history from the previous // checkpoint to it, as well as the bucketlist of its state -- to a // publication-queue in the database. This should be followed shortly // (typically after commit) with a call to publishQueuedHistory. - virtual void queueCurrentHistory() = 0; + virtual void queueCurrentHistory(uint32_t lcl) = 0; // Return the youngest ledger still in the outgoing publish queue; // returns 0 if the publish queue has nothing in it. @@ -341,7 +341,7 @@ class HistoryManager virtual size_t publishQueuedHistory() = 0; // Prepare checkpoint files for publishing - virtual void maybeCheckpointComplete() = 0; + virtual void maybeCheckpointComplete(uint32_t lcl) = 0; // Migrate SQL-based publish queue to the new file format // (one-time call during database schema upgrade path) diff --git a/src/history/HistoryManagerImpl.cpp b/src/history/HistoryManagerImpl.cpp index 4f1932898a..a8d9e23c5c 100644 --- a/src/history/HistoryManagerImpl.cpp +++ b/src/history/HistoryManagerImpl.cpp @@ -375,9 +375,8 @@ HistoryManager::getMaxLedgerQueuedToPublish(Config const& cfg) } bool -HistoryManagerImpl::maybeQueueHistoryCheckpoint() +HistoryManagerImpl::maybeQueueHistoryCheckpoint(uint32_t lcl) { - uint32_t lcl = mApp.getLedgerManager().getLastClosedLedgerNum(); if (!publishCheckpointOnLedgerClose(lcl, mApp.getConfig())) { return false; @@ -390,15 +389,14 @@ HistoryManagerImpl::maybeQueueHistoryCheckpoint() return false; } - queueCurrentHistory(); + queueCurrentHistory(lcl); return true; } void -HistoryManagerImpl::queueCurrentHistory() +HistoryManagerImpl::queueCurrentHistory(uint32_t ledger) { ZoneScoped; - auto ledger = mApp.getLedgerManager().getLastClosedLedgerNum(); LiveBucketList bl; if (mApp.getConfig().MODE_ENABLES_BUCKETLIST) @@ -527,9 +525,8 @@ HistoryManagerImpl::publishQueuedHistory() } void -HistoryManagerImpl::maybeCheckpointComplete() +HistoryManagerImpl::maybeCheckpointComplete(uint32_t lcl) { - uint32_t lcl = mApp.getLedgerManager().getLastClosedLedgerNum(); if (!publishCheckpointOnLedgerClose(lcl, mApp.getConfig()) || !mApp.getHistoryArchiveManager().publishEnabled()) { @@ -702,7 +699,7 @@ HistoryManagerImpl::restoreCheckpoint(uint32_t lcl) }); // Maybe finalize checkpoint if we're at a checkpoint boundary and // haven't rotated yet. No-op if checkpoint has been rotated already - maybeCheckpointComplete(); + maybeCheckpointComplete(lcl); } } diff --git a/src/history/HistoryManagerImpl.h b/src/history/HistoryManagerImpl.h index 0ae2ae3378..60c146fd26 100644 --- a/src/history/HistoryManagerImpl.h +++ b/src/history/HistoryManagerImpl.h @@ -46,15 +46,15 @@ class HistoryManagerImpl : public HistoryManager void logAndUpdatePublishStatus() override; - bool maybeQueueHistoryCheckpoint() override; + bool maybeQueueHistoryCheckpoint(uint32_t lcl) override; - void queueCurrentHistory() override; + void queueCurrentHistory(uint32_t lcl) override; void takeSnapshotAndPublish(HistoryArchiveState const& has); size_t publishQueuedHistory() override; - void maybeCheckpointComplete() override; + void maybeCheckpointComplete(uint32_t lcl) override; void dropSQLBasedPublish() override; std::vector diff --git a/src/invariant/BucketListIsConsistentWithDatabase.cpp b/src/invariant/BucketListIsConsistentWithDatabase.cpp index 798059d9de..529077e721 100644 --- a/src/invariant/BucketListIsConsistentWithDatabase.cpp +++ b/src/invariant/BucketListIsConsistentWithDatabase.cpp @@ -7,6 +7,8 @@ #include "bucket/BucketManager.h" #include "bucket/LiveBucket.h" #include "bucket/LiveBucketList.h" +#include "crypto/Hex.h" +#include "database/Database.h" #include "history/HistoryArchive.h" #include "invariant/InvariantManager.h" #include "ledger/LedgerManager.h" @@ -157,7 +159,8 @@ BucketListIsConsistentWithDatabase::checkEntireBucketlist() throw std::runtime_error(s); } - if (mApp.getPersistentState().getState(PersistentState::kDBBackend) != + if (mApp.getPersistentState().getState(PersistentState::kDBBackend, + mApp.getDatabase().getSession()) != BucketIndex::DB_BACKEND_STATE) { throw std::runtime_error( diff --git a/src/ledger/LedgerHeaderUtils.cpp b/src/ledger/LedgerHeaderUtils.cpp index 0835439355..952b0c34d2 100644 --- a/src/ledger/LedgerHeaderUtils.cpp +++ b/src/ledger/LedgerHeaderUtils.cpp @@ -43,7 +43,7 @@ isValid(LedgerHeader const& lh) } void -storeInDatabase(Database& db, LedgerHeader const& header) +storeInDatabase(Database& db, LedgerHeader const& header, SessionWrapper& sess) { ZoneScoped; if (!isValid(header)) @@ -64,7 +64,8 @@ storeInDatabase(Database& db, LedgerHeader const& header) "INSERT INTO ledgerheaders " "(ledgerhash, prevhash, bucketlisthash, ledgerseq, closetime, data) " "VALUES " - "(:h, :ph, :blh, :seq, :ct, :data)"); + "(:h, :ph, :blh, :seq, :ct, :data)", + sess); auto& st = prep.statement(); st.exchange(soci::use(hash)); st.exchange(soci::use(prevHash)); @@ -112,7 +113,8 @@ loadByHash(Database& db, Hash const& hash) std::string headerEncoded; auto prep = db.getPreparedStatement("SELECT data FROM ledgerheaders " - "WHERE ledgerhash = :h"); + "WHERE ledgerhash = :h", + db.getSession()); auto& st = prep.statement(); st.exchange(soci::into(headerEncoded)); st.exchange(soci::use(hash_s)); @@ -144,8 +146,8 @@ loadMaxLedgerSeq(Database& db) ZoneScoped; uint32_t seq = 0; soci::indicator maxIndicator; - auto prep = - db.getPreparedStatement("SELECT MAX(ledgerseq) FROM ledgerheaders"); + auto prep = db.getPreparedStatement( + "SELECT MAX(ledgerseq) FROM ledgerheaders", db.getSession()); auto& st = prep.statement(); st.exchange(soci::into(seq, maxIndicator)); st.define_and_bind(); @@ -188,10 +190,10 @@ loadBySequence(Database& db, soci::session& sess, uint32_t seq) } void -deleteOldEntries(Database& db, uint32_t ledgerSeq, uint32_t count) +deleteOldEntries(soci::session& sess, uint32_t ledgerSeq, uint32_t count) { ZoneScoped; - DatabaseUtils::deleteOldEntriesHelper(db.getSession(), ledgerSeq, count, + DatabaseUtils::deleteOldEntriesHelper(sess, ledgerSeq, count, "ledgerheaders", "ledgerseq"); } @@ -232,17 +234,17 @@ dropAll(Database& db) { std::string coll = db.getSimpleCollationClause(); - db.getSession() << "DROP TABLE IF EXISTS ledgerheaders;"; - db.getSession() << "CREATE TABLE ledgerheaders (" - << "ledgerhash CHARACTER(64) " << coll - << " PRIMARY KEY," - << "prevhash CHARACTER(64) NOT NULL," - "bucketlisthash CHARACTER(64) NOT NULL," - "ledgerseq INT UNIQUE CHECK (ledgerseq >= 0)," - "closetime BIGINT NOT NULL CHECK (closetime >= 0)," - "data TEXT NOT NULL" - ");"; - db.getSession() + db.getRawSession() << "DROP TABLE IF EXISTS ledgerheaders;"; + db.getRawSession() + << "CREATE TABLE ledgerheaders (" + << "ledgerhash CHARACTER(64) " << coll << " PRIMARY KEY," + << "prevhash CHARACTER(64) NOT NULL," + "bucketlisthash CHARACTER(64) NOT NULL," + "ledgerseq INT UNIQUE CHECK (ledgerseq >= 0)," + "closetime BIGINT NOT NULL CHECK (closetime >= 0)," + "data TEXT NOT NULL" + ");"; + db.getRawSession() << "CREATE INDEX ledgersbyseq ON ledgerheaders ( ledgerseq );"; } } diff --git a/src/ledger/LedgerHeaderUtils.h b/src/ledger/LedgerHeaderUtils.h index e165570e1a..ae50c082d7 100644 --- a/src/ledger/LedgerHeaderUtils.h +++ b/src/ledger/LedgerHeaderUtils.h @@ -18,7 +18,8 @@ uint32_t getFlags(LedgerHeader const& lh); bool isValid(LedgerHeader const& lh); -void storeInDatabase(Database& db, LedgerHeader const& header); +void storeInDatabase(Database& db, LedgerHeader const& header, + SessionWrapper& sess); LedgerHeader decodeFromData(std::string const& data); @@ -29,7 +30,7 @@ std::shared_ptr loadBySequence(Database& db, soci::session& sess, uint32_t loadMaxLedgerSeq(Database& db); -void deleteOldEntries(Database& db, uint32_t ledgerSeq, uint32_t count); +void deleteOldEntries(soci::session& sess, uint32_t ledgerSeq, uint32_t count); size_t copyToStream(Database& db, soci::session& sess, uint32_t ledgerSeq, uint32_t ledgerCount, CheckpointBuilder& checkpointBuilder); diff --git a/src/ledger/LedgerManager.h b/src/ledger/LedgerManager.h index 4a181701c4..aaf32b3de8 100644 --- a/src/ledger/LedgerManager.h +++ b/src/ledger/LedgerManager.h @@ -64,6 +64,7 @@ class LedgerManager }; virtual void moveToSynced() = 0; + virtual void beginApply() = 0; virtual State getState() const = 0; virtual std::string getStateHuman() const = 0; @@ -90,7 +91,8 @@ class LedgerManager // close event. This is the most common cause of LedgerManager advancing // from one ledger to the next: the network reached consensus on // `ledgerData`. - virtual void valueExternalized(LedgerCloseData const& ledgerData) = 0; + virtual void valueExternalized(LedgerCloseData const& ledgerData, + bool isLatestSlot) = 0; // Return the LCL header and (complete, immutable) hash. virtual LedgerHeaderHistoryEntry const& @@ -101,6 +103,7 @@ class LedgerManager // return the HAS that corresponds to the last closed ledger as persisted in // the database + // This function return of copy of latest HAS, so it's thread-safe. virtual HistoryArchiveState getLastClosedLedgerHAS() = 0; // Return the sequence number of the LCL. @@ -174,9 +177,18 @@ class LedgerManager // Forcibly close the current ledger, applying `ledgerData` as the consensus // changes. This is normally done automatically as part of - // `valueExternalized()`; this method is present in the public interface to - // permit testing. - virtual void closeLedger(LedgerCloseData const& ledgerData) = 0; + // `valueExternalized()` during normal operation (in which case + // `calledViaExternalize` should be set to true), but can also be called + // directly by catchup (with `calledViaExternalize` false in this case). + virtual void closeLedger(LedgerCloseData const& ledgerData, + bool calledViaExternalize) = 0; +#ifdef BUILD_TESTS + void + closeLedger(LedgerCloseData const& ledgerData) + { + closeLedger(ledgerData, /* externalize */ false); + } +#endif // deletes old entries stored in the database virtual void deleteOldEntries(Database& db, uint32_t ledgerSeq, @@ -192,5 +204,7 @@ class LedgerManager virtual ~LedgerManager() { } + + virtual bool isApplying() const = 0; }; } diff --git a/src/ledger/LedgerManagerImpl.cpp b/src/ledger/LedgerManagerImpl.cpp index d62f5888bb..2c35fb1e2f 100644 --- a/src/ledger/LedgerManagerImpl.cpp +++ b/src/ledger/LedgerManagerImpl.cpp @@ -40,6 +40,7 @@ #include "util/XDRCereal.h" #include "util/XDRStream.h" #include "work/WorkScheduler.h" +#include "xdrpp/printer.h" #include @@ -165,9 +166,20 @@ LedgerManagerImpl::moveToSynced() setState(LM_SYNCED_STATE); } +void +LedgerManagerImpl::beginApply() +{ + // Go into "applying" state, this will prevent catchup from starting + mCurrentlyApplyingLedger = true; + // Notify Herder that application star:ted, so it won't fire out of sync + // timer + mApp.getHerder().beginApply(); +} + void LedgerManagerImpl::setState(State s) { + releaseAssert(threadIsMain()); if (s != getState()) { std::string oldState = getStateHuman(); @@ -238,7 +250,10 @@ LedgerManagerImpl::startNewLedger(LedgerHeader const& genesisLedger) CLOG_INFO(Ledger, "Established genesis ledger, closing"); CLOG_INFO(Ledger, "Root account: {}", skey.getStrKeyPublic()); CLOG_INFO(Ledger, "Root account seed: {}", skey.getStrKeySeed().value); - ledgerClosed(ltx, /*ledgerCloseMeta*/ nullptr, /*initialLedgerVers*/ 0); + auto output = + ledgerClosed(ltx, /*ledgerCloseMeta*/ nullptr, /*initialLedgerVers*/ 0); + updateCurrentLedgerState(output); + ltx.commit(); } @@ -272,8 +287,8 @@ LedgerManagerImpl::loadLastKnownLedger(bool restoreBucketlist) ZoneScoped; // Step 1. Load LCL state from the DB and extract latest ledger hash - string lastLedger = - mApp.getPersistentState().getState(PersistentState::kLastClosedLedger); + string lastLedger = mApp.getPersistentState().getState( + PersistentState::kLastClosedLedger, mApp.getDatabase().getSession()); if (lastLedger.empty()) { @@ -284,6 +299,11 @@ LedgerManagerImpl::loadLastKnownLedger(bool restoreBucketlist) CLOG_INFO(Ledger, "Last closed ledger (LCL) hash is {}", lastLedger); Hash lastLedgerHash = hexToBin256(lastLedger); + HistoryArchiveState has; + has.fromString(mApp.getPersistentState().getState( + PersistentState::kHistoryArchiveState, + mApp.getDatabase().getSession())); + // Step 2. Restore LedgerHeader from DB based on the ledger hash derived // earlier, or verify we're at genesis if in no-history mode std::optional latestLedgerHeader; @@ -306,7 +326,7 @@ LedgerManagerImpl::loadLastKnownLedger(bool restoreBucketlist) { throw std::runtime_error("Could not load ledger from database"); } - HistoryArchiveState has = getLastClosedLedgerHAS(); + if (currentLedger->ledgerSeq != has.currentLedger) { throw std::runtime_error("Invalid database state: last known " @@ -333,7 +353,6 @@ LedgerManagerImpl::loadLastKnownLedger(bool restoreBucketlist) releaseAssert(latestLedgerHeader.has_value()); - HistoryArchiveState has = getLastClosedLedgerHAS(); auto missing = mApp.getBucketManager().checkForMissingBucketsFiles(has); auto pubmissing = mApp.getHistoryManager().getMissingBucketsReferencedByPublishQueue(); @@ -366,7 +385,8 @@ LedgerManagerImpl::loadLastKnownLedger(bool restoreBucketlist) } // Step 4. Restore LedgerManager's internal state - advanceLedgerPointers(*latestLedgerHeader); + auto output = advanceLedgerPointers(*latestLedgerHeader, has); + updateCurrentLedgerState(output); // Maybe truncate checkpoint files if we're restarting after a crash // in closeLedger (in which case any modifications to the ledger state have @@ -485,14 +505,8 @@ LedgerManagerImpl::getLastClosedLedgerHeader() const HistoryArchiveState LedgerManagerImpl::getLastClosedLedgerHAS() { - ZoneScoped; releaseAssert(threadIsMain()); - - string hasString = mApp.getPersistentState().getState( - PersistentState::kHistoryArchiveState); - HistoryArchiveState has; - has.fromString(hasString); - return has; + return mLastClosedLedgerHAS; } uint32_t @@ -513,7 +527,6 @@ LedgerManagerImpl::getSorobanNetworkConfigReadOnly() SorobanNetworkConfig const& LedgerManagerImpl::getSorobanNetworkConfigForApply() { - // Must be called from ledger close thread only releaseAssert(mSorobanNetworkConfigForApply); return *mSorobanNetworkConfigForApply; } @@ -597,12 +610,11 @@ LedgerManagerImpl::publishSorobanMetrics() // called by txherder void -LedgerManagerImpl::valueExternalized(LedgerCloseData const& ledgerData) +LedgerManagerImpl::valueExternalized(LedgerCloseData const& ledgerData, + bool isLatestSlot) { ZoneScoped; - - // Capture LCL before we do any processing (which may trigger ledger close) - auto lcl = getLastClosedLedgerNum(); + releaseAssert(threadIsMain()); CLOG_INFO(Ledger, "Got consensus: [seq={}, prev={}, txs={}, ops={}, sv: {}]", @@ -620,67 +632,20 @@ LedgerManagerImpl::valueExternalized(LedgerCloseData const& ledgerData) releaseAssert(false); } - closeLedgerIf(ledgerData); - auto& cm = mApp.getCatchupManager(); - - cm.processLedger(ledgerData); - - // We set the state to synced - // if we have closed the latest ledger we have heard of. - bool appliedLatest = false; - if (cm.getLargestLedgerSeqHeard() == getLastClosedLedgerNum()) - { - setState(LM_SYNCED_STATE); - appliedLatest = true; - } - - // New ledger(s) got closed, notify Herder - if (getLastClosedLedgerNum() > lcl) - { - CLOG_DEBUG(Ledger, - "LedgerManager::valueExternalized LCL advanced {} -> {}", - lcl, getLastClosedLedgerNum()); - mApp.getHerder().lastClosedLedgerIncreased(appliedLatest); - } -} - -void -LedgerManagerImpl::closeLedgerIf(LedgerCloseData const& ledgerData) -{ - ZoneScoped; - if (mLastClosedLedger.header.ledgerSeq + 1 == ledgerData.getLedgerSeq()) - { - auto& cm = mApp.getCatchupManager(); - // if catchup work is running, we don't want ledger manager to close - // this ledger and potentially cause issues. - if (cm.isCatchupInitialized() && !cm.catchupWorkIsDone()) - { - CLOG_INFO( - Ledger, - "Can't close ledger: {} in LM because catchup is running", - ledgerAbbrev(mLastClosedLedger)); - return; - } - - closeLedger(ledgerData); - CLOG_INFO(Ledger, "Closed ledger: {}", ledgerAbbrev(mLastClosedLedger)); - } - else if (ledgerData.getLedgerSeq() <= mLastClosedLedger.header.ledgerSeq) - { - CLOG_INFO( - Ledger, - "Skipping close ledger: local state is {}, more recent than {}", - mLastClosedLedger.header.ledgerSeq, ledgerData.getLedgerSeq()); - } - else + auto res = cm.processLedger(ledgerData, isLatestSlot); + // Go into catchup if we have any future ledgers we're unable to apply + // sequentially. + if (res == + CatchupManager::ProcessLedgerResult::WAIT_TO_APPLY_BUFFERED_OR_CATCHUP) { if (mState != LM_CATCHING_UP_STATE) { // Out of sync, buffer what we just heard and start catchup. - CLOG_INFO( - Ledger, "Lost sync, local LCL is {}, network closed ledger {}", - mLastClosedLedger.header.ledgerSeq, ledgerData.getLedgerSeq()); + CLOG_INFO(Ledger, + "Lost sync, local LCL is {}, network closed ledger {}", + getLastClosedLedgerHeader().header.ledgerSeq, + ledgerData.getLedgerSeq()); } setState(LM_CATCHING_UP_STATE); @@ -746,6 +711,85 @@ LedgerManagerImpl::emitNextMeta() mNextMetaToEmit.reset(); } +void +maybeSimulateSleep(Config const& cfg, size_t opSize, + LogSlowExecution& closeTime) +{ + if (!cfg.OP_APPLY_SLEEP_TIME_WEIGHT_FOR_TESTING.empty()) + { + // Sleep for a parameterized amount of time in simulation mode + std::discrete_distribution distribution( + cfg.OP_APPLY_SLEEP_TIME_WEIGHT_FOR_TESTING.begin(), + cfg.OP_APPLY_SLEEP_TIME_WEIGHT_FOR_TESTING.end()); + std::chrono::microseconds sleepFor{0}; + for (size_t i = 0; i < opSize; i++) + { + sleepFor += + cfg.OP_APPLY_SLEEP_TIME_DURATION_FOR_TESTING[distribution( + gRandomEngine)]; + } + std::chrono::microseconds applicationTime = + closeTime.checkElapsedTime(); + if (applicationTime < sleepFor) + { + sleepFor -= applicationTime; + CLOG_DEBUG(Perf, "Simulate application: sleep for {} microseconds", + sleepFor.count()); + std::this_thread::sleep_for(sleepFor); + } + } +} + +asio::io_context& +getMetaIOContext(Application& app) +{ + return app.getConfig().parallelLedgerClose() + ? app.getLedgerCloseIOContext() + : app.getClock().getIOContext(); +} + +void +LedgerManagerImpl::ledgerCloseComplete(uint32_t lcl, bool calledViaExternalize, + LedgerCloseData const& ledgerData) +{ + // We just finished applying `lcl`, maybe change LM's state + // Also notify Herder so it can trigger next ledger. + + releaseAssert(threadIsMain()); + uint32_t latestHeardFromNetwork = + mApp.getCatchupManager().getLargestLedgerSeqHeard(); + uint32_t latestQueuedToApply = + mApp.getCatchupManager().getMaxScheduledToApply(); + if (calledViaExternalize) + { + releaseAssert(lcl <= latestQueuedToApply); + releaseAssert(latestQueuedToApply <= latestHeardFromNetwork); + } + + if (lcl == latestQueuedToApply) + { + mCurrentlyApplyingLedger = false; + } + + // Continue execution on the main thread + // if we have closed the latest ledger we have heard of, set state to + // "synced" + bool appliedLatest = false; + + if (latestHeardFromNetwork == lcl) + { + mApp.getLedgerManager().moveToSynced(); + appliedLatest = true; + } + + if (calledViaExternalize) + { + // New ledger(s) got closed, notify Herder + mApp.getHerder().lastClosedLedgerIncreased(appliedLatest, + ledgerData.getTxSet()); + } +} + /* This is the main method that closes the current ledger based on the close context that was computed by SCP or by the historical module @@ -753,8 +797,14 @@ during replays. */ void -LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) +LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData, + bool calledViaExternalize) { + if (mApp.isStopping()) + { + return; + } + #ifdef BUILD_TESTS mLastLedgerTxMeta.clear(); #endif @@ -766,9 +816,13 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) LedgerTxn ltx(mApp.getLedgerTxnRoot()); auto header = ltx.loadHeader(); + auto prevHeader = + threadIsMain() ? getLastClosedLedgerHeader().header : header.current(); + auto prevHash = xdrSha256(prevHeader); + auto initialLedgerVers = header.current().ledgerVersion; ++header.current().ledgerSeq; - header.current().previousLedgerHash = mLastClosedLedger.hash; + header.current().previousLedgerHash = prevHash; CLOG_DEBUG(Ledger, "starting closeLedger() on ledgerSeq={}", header.current().ledgerSeq); @@ -776,6 +830,7 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) auto now = mApp.getClock().now(); mLedgerAgeClosed.Update(now - mLastClose); + // mLastClose is only accessed by a single thread mLastClose = now; mLedgerAge.set_count(0); @@ -793,15 +848,14 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) header.current().ledgerVersion)); } - if (txSet->previousLedgerHash() != getLastClosedLedgerHeader().hash) + if (txSet->previousLedgerHash() != prevHash) { CLOG_ERROR(Ledger, "TxSet mismatch: LCD wants {}, LCL is {}", ledgerAbbrev(ledgerData.getLedgerSeq() - 1, txSet->previousLedgerHash()), - ledgerAbbrev(getLastClosedLedgerHeader())); + ledgerAbbrev(prevHeader)); - CLOG_ERROR(Ledger, "{}", - xdrToCerealString(getLastClosedLedgerHeader(), "Full LCL")); + CLOG_ERROR(Ledger, "{}", xdrToCerealString(prevHeader, "Full LCL")); CLOG_ERROR(Ledger, "{}", POSSIBLY_CORRUPTED_LOCAL_DATA); throw std::runtime_error("txset mismatch"); @@ -845,8 +899,7 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) { if (mNextMetaToEmit) { - releaseAssert(mNextMetaToEmit->ledgerHeader().hash == - getLastClosedLedgerHeader().hash); + releaseAssert(mNextMetaToEmit->ledgerHeader().hash == prevHash); emitNextMeta(); } releaseAssert(!mNextMetaToEmit); @@ -860,11 +913,11 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) } // first, prefetch source accounts for txset, then charge fees - prefetchTxSourceIds(*applicableTxSet); - auto const mutableTxResults = - processFeesSeqNums(*applicableTxSet, ltx, ledgerCloseMeta, ledgerData); - // Subtle: after this call, `header` is invalidated, and is not safe to use + prefetchTxSourceIds(mApp.getLedgerTxnRoot(), *applicableTxSet, mApp.getConfig()); + auto const mutableTxResults = processFeesSeqNums( + *applicableTxSet, ltx, ledgerCloseMeta, ledgerData); + // Subtle: after this call, `header` is invalidated, and is not safe to use auto txResultSet = applyTransactions(*applicableTxSet, mutableTxResults, ltx, ledgerCloseMeta); if (mApp.getConfig().MODE_STORES_HISTORY_MISC) @@ -934,10 +987,11 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) updateNetworkConfig(ltx); } - ledgerClosed(ltx, ledgerCloseMeta, initialLedgerVers); + auto closeLedgerResult = + ledgerClosed(ltx, ledgerCloseMeta, initialLedgerVers); if (ledgerData.getExpectedHash() && - *ledgerData.getExpectedHash() != mLastClosedLedger.hash) + *ledgerData.getExpectedHash() != closeLedgerResult.ledgerHeader.hash) { throw std::runtime_error("Local node's ledger corrupted during close"); } @@ -945,7 +999,7 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) if (mMetaStream || mMetaDebugStream) { releaseAssert(ledgerCloseMeta); - ledgerCloseMeta->ledgerHeader() = mLastClosedLedger; + ledgerCloseMeta->ledgerHeader() = closeLedgerResult.ledgerHeader; // At this point we've got a complete meta and we can store it to the // member variable: if we throw while committing below, we will at worst @@ -958,7 +1012,7 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) // This is unfortunate and it would be nice if we could make it not // be so subtle, but for the time being this is where we are. // - // 1. Queue any history-checkpoint to the database, _within_ the current + // 1. Queue any history-checkpoint, _within_ the current // transaction. This way if there's a crash after commit and before // we've published successfully, we'll re-publish on restart. // @@ -969,20 +1023,25 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) // between commit and this step, core will attempt finalizing files again // on restart. // - // 4. Start any queued checkpoint publishing, _after_ the commit so that + // 4. Start background eviction scan for the next ledger, _after_ the commit + // so that it takes its snapshot of network setting from the + // committed state. + // + // 5. Start any queued checkpoint publishing, _after_ the commit so that // it takes its snapshot of history-rows from the committed state, but // _before_ we GC any buckets (because this is the step where the // bucket refcounts are incremented for the duration of the publish). // - // 5. Start background eviction scan for the next ledger, _after_ the commit - // so that it takes its snapshot of network setting from the - // committed state. - // // 6. GC unreferenced buckets. Only do this once publishes are in progress. + // + // 7. Finally, relfect newly closed ledger in LedgerManager's and Herder's + // states: maybe move into SYNCED state, trigger next ledger, etc. - // step 1 + // Step 1. Maybe queue the current checkpoint file for publishing; this + // should not race with main, since publish on main begins strictly _after_ + // this call. auto& hm = mApp.getHistoryManager(); - hm.maybeQueueHistoryCheckpoint(); + hm.maybeQueueHistoryCheckpoint(ledgerSeq); // step 2 ltx.commit(); @@ -992,64 +1051,75 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData) #endif // step 3 - hm.maybeCheckpointComplete(); + hm.maybeCheckpointComplete(ledgerSeq); - // step 4 - hm.publishQueuedHistory(); - hm.logAndUpdatePublishStatus(); - - // step 5 + // Step 4 if (protocolVersionStartsFrom(initialLedgerVers, SOROBAN_PROTOCOL_VERSION)) { - mApp.getBucketManager().startBackgroundEvictionScan(ledgerSeq + 1, - initialLedgerVers); + mApp.getBucketManager().startBackgroundEvictionScan( + ledgerSeq + 1, initialLedgerVers, + getSorobanNetworkConfigForApply()); } - // step 6 - mApp.getBucketManager().forgetUnreferencedBuckets(); + // Invoke completion handler on the _main_ thread: kick off publishing, + // cleanup bucket files, notify herder to trigger next ledger + auto completionHandler = + [this, ledgerSeq, calledViaExternalize, ledgerData, + ledgerOutput = std::move(closeLedgerResult)]() mutable { + releaseAssert(threadIsMain()); + updateCurrentLedgerState(ledgerOutput); + + // Step 5. Maybe kick off publishing on complete checkpoint files + auto& hm = mApp.getHistoryManager(); + hm.publishQueuedHistory(); + hm.logAndUpdatePublishStatus(); + + // Step 6. Clean up unreferenced buckets post-apply + { + // Ledger state might be updated at the same time, so protect GC + // call with state mutex + std::lock_guard guard(mLedgerStateMutex); + mApp.getBucketManager().forgetUnreferencedBuckets( + getLastClosedLedgerHAS()); + } + + // Step 7. Maybe set LedgerManager into synced state, maybe let + // Herder trigger next ledger + ledgerCloseComplete(ledgerSeq, calledViaExternalize, ledgerData); + CLOG_INFO(Ledger, "Ledger close complete: {}", ledgerSeq); + }; - if (!mApp.getConfig().OP_APPLY_SLEEP_TIME_WEIGHT_FOR_TESTING.empty()) + if (threadIsMain()) { - // Sleep for a parameterized amount of time in simulation mode - std::discrete_distribution distribution( - mApp.getConfig().OP_APPLY_SLEEP_TIME_WEIGHT_FOR_TESTING.begin(), - mApp.getConfig().OP_APPLY_SLEEP_TIME_WEIGHT_FOR_TESTING.end()); - std::chrono::microseconds sleepFor{0}; - auto txSetSizeOp = applicableTxSet->sizeOpTotal(); - for (size_t i = 0; i < txSetSizeOp; i++) - { - sleepFor += - mApp.getConfig() - .OP_APPLY_SLEEP_TIME_DURATION_FOR_TESTING[distribution( - gRandomEngine)]; - } - std::chrono::microseconds applicationTime = - closeLedgerTime.checkElapsedTime(); - if (applicationTime < sleepFor) - { - sleepFor -= applicationTime; - CLOG_DEBUG(Perf, "Simulate application: sleep for {} microseconds", - sleepFor.count()); - std::this_thread::sleep_for(sleepFor); - } + completionHandler(); + } + else + { + mApp.postOnMainThread(completionHandler, "ledgerCloseComplete"); } + maybeSimulateSleep(mApp.getConfig(), txSet->sizeOpTotalForLogging(), closeLedgerTime); std::chrono::duration ledgerTimeSeconds = ledgerTime.Stop(); - CLOG_DEBUG(Perf, "Applied ledger in {} seconds", ledgerTimeSeconds.count()); + CLOG_DEBUG(Perf, "Applied ledger {} in {} seconds", ledgerSeq, + ledgerTimeSeconds.count()); FrameMark; } - void LedgerManagerImpl::deleteOldEntries(Database& db, uint32_t ledgerSeq, uint32_t count) { ZoneScoped; - soci::transaction txscope(db.getSession()); - db.clearPreparedStatementCache(); - LedgerHeaderUtils::deleteOldEntries(db, ledgerSeq, count); - HerderPersistence::deleteOldEntries(db, ledgerSeq, count); - db.clearPreparedStatementCache(); - txscope.commit(); + if (mApp.getConfig().parallelLedgerClose()) + { + auto session = + std::make_unique(mApp.getDatabase().getPool()); + LedgerHeaderUtils::deleteOldEntries(*session, ledgerSeq, count); + } + else + { + LedgerHeaderUtils::deleteOldEntries(db.getRawSession(), ledgerSeq, + count); + } } void @@ -1057,15 +1127,17 @@ LedgerManagerImpl::setLastClosedLedger( LedgerHeaderHistoryEntry const& lastClosed, bool storeInDB) { ZoneScoped; + releaseAssert(threadIsMain()); LedgerTxn ltx(mApp.getLedgerTxnRoot()); auto header = ltx.loadHeader(); header.current() = lastClosed.header; - storeCurrentLedger(header.current(), storeInDB, - /* appendToCheckpoint */ false); + auto has = storeCurrentLedger(header.current(), storeInDB, + /* appendToCheckpoint */ false); ltx.commit(); mRebuildInMemoryState = false; - advanceLedgerPointers(lastClosed.header); + updateCurrentLedgerState(advanceLedgerPointers(lastClosed.header, has)); + LedgerTxn ltx2(mApp.getLedgerTxnRoot()); if (protocolVersionStartsFrom(ltx2.loadHeader().current().ledgerVersion, SOROBAN_PROTOCOL_VERSION)) @@ -1083,7 +1155,12 @@ LedgerManagerImpl::manuallyAdvanceLedgerHeader(LedgerHeader const& header) "May only manually advance ledger header sequence number with " "MANUAL_CLOSE and RUN_STANDALONE"); } - advanceLedgerPointers(header, false); + HistoryArchiveState has; + has.fromString(mApp.getPersistentState().getState( + PersistentState::kHistoryArchiveState, + mApp.getDatabase().getSession())); + auto output = advanceLedgerPointers(header, has, false); + updateCurrentLedgerState(output); } void @@ -1100,9 +1177,9 @@ LedgerManagerImpl::setupLedgerCloseMetaStream() { // We can't be sure we're writing to a stream that supports fsync; // pipes typically error when you try. So we don't do it. - mMetaStream = std::make_unique( - mApp.getClock().getIOContext(), - /*fsyncOnClose=*/false); + mMetaStream = + std::make_unique(getMetaIOContext(mApp), + /*fsyncOnClose=*/false); std::regex fdrx("^fd:([0-9]+)$"); std::smatch sm; if (std::regex_match(cfg.METADATA_OUTPUT_STREAM, sm, fdrx)) @@ -1167,9 +1244,9 @@ LedgerManagerImpl::maybeResetLedgerCloseMetaDebugStream(uint32_t ledgerSeq) // such stream or a replacement for the one we just handed off to // flush-and-rotate. Either way, we should not have an existing one! releaseAssert(!mMetaDebugStream); - auto tmpStream = std::make_unique( - mApp.getClock().getIOContext(), - /*fsyncOnClose=*/true); + auto tmpStream = + std::make_unique(getMetaIOContext(mApp), + /*fsyncOnClose=*/true); auto metaDebugPath = metautils::getMetaDebugFilePath( mApp.getBucketManager().getBucketDir(), ledgerSeq); @@ -1230,23 +1307,32 @@ LedgerManagerImpl::getCurrentLedgerStateSnaphot() } void +LedgerManagerImpl::updateCurrentLedgerState(CloseLedgerOutput const& output) +{ + releaseAssert(threadIsMain()); + CLOG_DEBUG( + Ledger, "Advancing LCL: {} -> {}", ledgerAbbrev(mLastClosedLedger), + ledgerAbbrev(output.ledgerHeader.header, output.ledgerHeader.hash)); + + // Update ledger state as seen by the main thread + mLastClosedLedger = output.ledgerHeader; + mLastClosedLedgerHAS = output.has; + mSorobanNetworkConfigReadOnly = output.sorobanConfig; + mReadOnlyLedgerStateSnapshot = output.snapshot; +} + +LedgerManagerImpl::CloseLedgerOutput LedgerManagerImpl::advanceLedgerPointers(LedgerHeader const& header, + HistoryArchiveState const& has, bool debugLog) { auto ledgerHash = xdrSha256(header); - if (debugLog) - { - CLOG_DEBUG(Ledger, "Advancing LCL: {} -> {}", - ledgerAbbrev(mLastClosedLedger), - ledgerAbbrev(header, ledgerHash)); - } - - // NB: with parallel ledger close, this will have to be called strictly from - // the main thread, - mLastClosedLedger.hash = ledgerHash; - mLastClosedLedger.header = header; - mSorobanNetworkConfigReadOnly = mSorobanNetworkConfigForApply; + CloseLedgerOutput res; + res.ledgerHeader.hash = ledgerHash; + res.ledgerHeader.header = header; + res.has = has; + res.sorobanConfig = mSorobanNetworkConfigForApply; auto& bm = mApp.getBucketManager(); auto liveSnapshot = std::make_unique>( @@ -1254,22 +1340,20 @@ LedgerManagerImpl::advanceLedgerPointers(LedgerHeader const& header, auto hotArchiveSnapshot = std::make_unique>( bm.getHotArchiveBucketList(), header); + // Updating BL snapshot is thread-safe bm.getBucketSnapshotManager().updateCurrentSnapshot( std::move(liveSnapshot), std::move(hotArchiveSnapshot)); - // NB: with parallel ledger close, this will have to be called strictly from - // the main thread, - mReadOnlyLedgerStateSnapshot = + res.snapshot = bm.getBucketSnapshotManager().copySearchableLiveBucketListSnapshot(); + return res; } void -LedgerManagerImpl::updateNetworkConfig(AbstractLedgerTxn& rootLtx) +LedgerManagerImpl::updateNetworkConfig(AbstractLedgerTxn& ltx) { ZoneScoped; - releaseAssert(threadIsMain()); - - uint32_t ledgerVersion = rootLtx.loadHeader().current().ledgerVersion; + uint32_t ledgerVersion = ltx.loadHeader().current().ledgerVersion; if (protocolVersionStartsFrom(ledgerVersion, SOROBAN_PROTOCOL_VERSION)) { @@ -1279,7 +1363,7 @@ LedgerManagerImpl::updateNetworkConfig(AbstractLedgerTxn& rootLtx) std::make_shared(); } mSorobanNetworkConfigForApply->loadFromLedger( - rootLtx, mApp.getConfig().CURRENT_LEDGER_PROTOCOL_VERSION, + ltx, mApp.getConfig().CURRENT_LEDGER_PROTOCOL_VERSION, ledgerVersion); publishSorobanMetrics(); } @@ -1328,7 +1412,6 @@ LedgerManagerImpl::processFeesSeqNums( auto expectedResults = ledgerData.getExpectedResults(); if (expectedResults) { - releaseAssert(mApp.getCatchupManager().isCatchupInitialized()); expectedResultsIter = std::make_optional(expectedResults->results.begin()); } @@ -1426,10 +1509,11 @@ LedgerManagerImpl::processFeesSeqNums( } void -LedgerManagerImpl::prefetchTxSourceIds(ApplicableTxSetFrame const& txSet) +LedgerManagerImpl::prefetchTxSourceIds( + AbstractLedgerTxnParent& ltx,ApplicableTxSetFrame const& txSet, Config const& config) { ZoneScoped; - if (mApp.getConfig().PREFETCH_BATCH_SIZE > 0) + if (config.PREFETCH_BATCH_SIZE > 0) { UnorderedSet keys; for (auto const& phase : txSet.getPhases()) @@ -1439,15 +1523,16 @@ LedgerManagerImpl::prefetchTxSourceIds(ApplicableTxSetFrame const& txSet) tx->insertKeysForFeeProcessing(keys); } } - mApp.getLedgerTxnRoot().prefetchClassic(keys); + ltx.prefetchClassic(keys); } } void -LedgerManagerImpl::prefetchTransactionData(ApplicableTxSetFrame const& txSet) +LedgerManagerImpl::prefetchTransactionData( + AbstractLedgerTxnParent& ltx,ApplicableTxSetFrame const& txSet, Config const& config) { ZoneScoped; - if (mApp.getConfig().PREFETCH_BATCH_SIZE > 0) + if (config.PREFETCH_BATCH_SIZE > 0) { UnorderedSet sorobanKeys; auto lkMeter = make_unique(); @@ -1470,10 +1555,10 @@ LedgerManagerImpl::prefetchTransactionData(ApplicableTxSetFrame const& txSet) // visibility into the performance of each mode. if (!sorobanKeys.empty()) { - mApp.getLedgerTxnRoot().prefetchSoroban(sorobanKeys, lkMeter.get()); + ltx.prefetchSoroban(sorobanKeys, lkMeter.get()); } - mApp.getLedgerTxnRoot().prefetchClassic(classicKeys); + ltx.prefetchClassic(classicKeys); } } @@ -1504,8 +1589,9 @@ LedgerManagerImpl::applyTransactions( TransactionResultSet txResultSet; txResultSet.results.reserve(numTxs); - prefetchTransactionData(txSet); + prefetchTransactionData(mApp.getLedgerTxnRoot(), txSet, mApp.getConfig()); auto phases = txSet.getPhasesInApplyOrder(); + Hash sorobanBasePrngSeed = txSet.getContentsHash(); uint64_t txNum{0}; uint64_t txSucceeded{0}; @@ -1608,7 +1694,7 @@ LedgerManagerImpl::logTxApplyMetrics(AbstractLedgerTxn& ltx, size_t numTxs, TracyPlot("ledger.prefetch.hit-rate", hitRate); } -void +HistoryArchiveState LedgerManagerImpl::storeCurrentLedger(LedgerHeader const& header, bool storeHeader, bool appendToCheckpoint) { @@ -1616,8 +1702,16 @@ LedgerManagerImpl::storeCurrentLedger(LedgerHeader const& header, Hash hash = xdrSha256(header); releaseAssert(!isZero(hash)); + auto& sess = mApp.getLedgerTxnRoot().getSession(); mApp.getPersistentState().setState(PersistentState::kLastClosedLedger, - binToHex(hash)); + binToHex(hash), sess); + + if (mApp.getConfig().ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING.count() > + 0) + { + std::this_thread::sleep_for( + mApp.getConfig().ARTIFICIALLY_DELAY_LEDGER_CLOSE_FOR_TESTING); + } LiveBucketList bl; if (mApp.getConfig().MODE_ENABLES_BUCKETLIST) @@ -1631,16 +1725,18 @@ LedgerManagerImpl::storeCurrentLedger(LedgerHeader const& header, mApp.getConfig().NETWORK_PASSPHRASE); mApp.getPersistentState().setState(PersistentState::kHistoryArchiveState, - has.toString()); + has.toString(), sess); if (mApp.getConfig().MODE_STORES_HISTORY_LEDGERHEADERS && storeHeader) { - LedgerHeaderUtils::storeInDatabase(mApp.getDatabase(), header); + LedgerHeaderUtils::storeInDatabase(mApp.getDatabase(), header, sess); if (appendToCheckpoint) { mApp.getHistoryManager().appendLedgerHeader(header); } } + + return has; } // NB: This is a separate method so a testing subclass can override it. @@ -1651,6 +1747,7 @@ LedgerManagerImpl::transferLedgerEntriesToBucketList( LedgerHeader lh, uint32_t initialLedgerVers) { ZoneScoped; + // `ledgerClosed` protects this call with a mutex std::vector initEntries, liveEntries; std::vector deadEntries; auto blEnabled = mApp.getConfig().MODE_ENABLES_BUCKETLIST; @@ -1669,7 +1766,8 @@ LedgerManagerImpl::transferLedgerEntriesToBucketList( auto evictedState = mApp.getBucketManager().resolveBackgroundEvictionScan( - ltxEvictions, lh.ledgerSeq, keys, initialLedgerVers); + ltxEvictions, lh.ledgerSeq, keys, initialLedgerVers, + *mSorobanNetworkConfigForApply); if (protocolVersionStartsFrom( initialLedgerVers, @@ -1699,13 +1797,14 @@ LedgerManagerImpl::transferLedgerEntriesToBucketList( } } -void +LedgerManagerImpl::CloseLedgerOutput LedgerManagerImpl::ledgerClosed( AbstractLedgerTxn& ltx, std::unique_ptr const& ledgerCloseMeta, uint32_t initialLedgerVers) { ZoneScoped; + std::lock_guard guard(mLedgerStateMutex); auto ledgerSeq = ltx.loadHeader().current().ledgerSeq; auto currLedgerVers = ltx.loadHeader().current().ledgerVersion; CLOG_TRACE(Ledger, @@ -1739,15 +1838,18 @@ LedgerManagerImpl::ledgerClosed( protocolVersionStartsFrom(initialLedgerVers, SOROBAN_PROTOCOL_VERSION)) { ledgerCloseMeta->setNetworkConfiguration( - getSorobanNetworkConfigReadOnly(), + getSorobanNetworkConfigForApply(), mApp.getConfig().EMIT_LEDGER_CLOSE_META_EXT_V1); } - ltx.unsealHeader([this](LedgerHeader& lh) { + CloseLedgerOutput res; + ltx.unsealHeader([this, &res](LedgerHeader& lh) { mApp.getBucketManager().snapshotLedger(lh); - storeCurrentLedger(lh, /* storeHeader */ true, - /* appendToCheckpoint */ true); - advanceLedgerPointers(lh); + auto has = storeCurrentLedger(lh, /* storeHeader */ true, + /* appendToCheckpoint */ true); + res = advanceLedgerPointers(lh, has); }); + + return res; } } diff --git a/src/ledger/LedgerManagerImpl.h b/src/ledger/LedgerManagerImpl.h index cb96a14e67..c7c3931b06 100644 --- a/src/ledger/LedgerManagerImpl.h +++ b/src/ledger/LedgerManagerImpl.h @@ -42,6 +42,14 @@ class BasicWork; class LedgerManagerImpl : public LedgerManager { protected: + struct CloseLedgerOutput + { + LedgerHeaderHistoryEntry ledgerHeader; + std::shared_ptr sorobanConfig; + HistoryArchiveState has; + std::shared_ptr snapshot; + }; + Application& mApp; std::unique_ptr mMetaStream; std::unique_ptr mMetaDebugStream; @@ -49,6 +57,8 @@ class LedgerManagerImpl : public LedgerManager std::filesystem::path mMetaDebugPath; private: + // Cache LCL state, updates once a ledger (synchronized with + // mLedgerStateMutex) LedgerHeaderHistoryEntry mLastClosedLedger; // Read-only Soroban network configuration, accessible by main thread only. @@ -64,6 +74,7 @@ class LedgerManagerImpl : public LedgerManager // variable is not synchronized, since it should only be used by one thread // (main or ledger close). std::shared_ptr mSorobanNetworkConfigForApply; + HistoryArchiveState mLastClosedLedgerHAS; SorobanMetrics mSorobanMetrics; medida::Timer& mTransactionApply; @@ -83,13 +94,17 @@ class LedgerManagerImpl : public LedgerManager bool mRebuildInMemoryState{false}; SearchableSnapshotConstPtr mReadOnlyLedgerStateSnapshot; - std::unique_ptr mStartCatchup; + // Use mutex to guard read access to LCL and Soroban network config + mutable std::recursive_mutex mLedgerStateMutex; + medida::Timer& mCatchupDuration; std::unique_ptr mNextMetaToEmit; + bool mCurrentlyApplyingLedger{false}; - std::vector processFeesSeqNums( - ApplicableTxSetFrame const& txSet, AbstractLedgerTxn& ltxOuter, + + static std::vector processFeesSeqNums( + ApplicableTxSetFrame const& txSet, AbstractLedgerTxn& ltxOuter, std::unique_ptr const& ledgerCloseMeta, LedgerCloseData const& ledgerData); @@ -102,16 +117,19 @@ class LedgerManagerImpl : public LedgerManager // initialLedgerVers must be the ledger version at the start of the ledger. // On the ledger in which a protocol upgrade from vN to vN + 1 occurs, // initialLedgerVers must be vN. - void + CloseLedgerOutput ledgerClosed(AbstractLedgerTxn& ltx, std::unique_ptr const& ledgerCloseMeta, uint32_t initialLedgerVers); - void storeCurrentLedger(LedgerHeader const& header, bool storeHeader, - bool appendToCheckpoint); - void prefetchTransactionData(ApplicableTxSetFrame const& txSet); - void prefetchTxSourceIds(ApplicableTxSetFrame const& txSet); - void closeLedgerIf(LedgerCloseData const& ledgerData); + HistoryArchiveState storeCurrentLedger(LedgerHeader const& header, + bool storeHeader, + bool appendToCheckpoint); + static void + prefetchTransactionData(AbstractLedgerTxnParent& rootLtx, ApplicableTxSetFrame const& txSet, Config const& config); + static void + prefetchTxSourceIds(AbstractLedgerTxnParent& rootLtx, + ApplicableTxSetFrame const& txSet, Config const& config); State mState; @@ -127,6 +145,8 @@ class LedgerManagerImpl : public LedgerManager // as the actual ledger usage. void publishSorobanMetrics(); + void updateCurrentLedgerState(CloseLedgerOutput const& output); + protected: // initialLedgerVers must be the ledger version at the start of the ledger // and currLedgerVers is the ledger version in the current ltx header. These @@ -141,8 +161,11 @@ class LedgerManagerImpl : public LedgerManager std::unique_ptr const& ledgerCloseMeta, LedgerHeader lh, uint32_t initialLedgerVers); - void advanceLedgerPointers(LedgerHeader const& header, - bool debugLog = true); + // Update in-memory cached LCL state (this only happens at the end of ledger + // close) + CloseLedgerOutput advanceLedgerPointers(LedgerHeader const& header, + HistoryArchiveState const& has, + bool debugLog = true); void logTxApplyMetrics(AbstractLedgerTxn& ltx, size_t numTxs, size_t numOps); @@ -154,10 +177,12 @@ class LedgerManagerImpl : public LedgerManager // This call is read-only and hence `ltx` can be read-only. void updateNetworkConfig(AbstractLedgerTxn& ltx) override; void moveToSynced() override; + void beginApply() override; State getState() const override; std::string getStateHuman() const override; - void valueExternalized(LedgerCloseData const& ledgerData) override; + void valueExternalized(LedgerCloseData const& ledgerData, + bool isLatestSlot) override; uint32_t getLastMaxTxSetSize() const override; uint32_t getLastMaxTxSetSizeOps() const override; @@ -198,7 +223,10 @@ class LedgerManagerImpl : public LedgerManager std::shared_ptr archive, std::set> bucketsToRetain) override; - void closeLedger(LedgerCloseData const& ledgerData) override; + void closeLedger(LedgerCloseData const& ledgerData, + bool calledViaExternalize) override; + void ledgerCloseComplete(uint32_t lcl, bool calledViaExternalize, + LedgerCloseData const& ledgerData); void deleteOldEntries(Database& db, uint32_t ledgerSeq, uint32_t count) override; @@ -212,5 +240,10 @@ class LedgerManagerImpl : public LedgerManager SorobanMetrics& getSorobanMetrics() override; SearchableSnapshotConstPtr getCurrentLedgerStateSnaphot() override; + virtual bool + isApplying() const override + { + return mCurrentlyApplyingLedger; + } }; } diff --git a/src/ledger/test/InMemoryLedgerTxnRoot.cpp b/src/ledger/test/InMemoryLedgerTxnRoot.cpp index 75f1bb06c6..d2d2bd4ab1 100644 --- a/src/ledger/test/InMemoryLedgerTxnRoot.cpp +++ b/src/ledger/test/InMemoryLedgerTxnRoot.cpp @@ -140,7 +140,7 @@ void InMemoryLedgerTxnRoot::prepareNewObjects(size_t) SessionWrapper& InMemoryLedgerTxnRoot::getSession() const { - throw std::runtime_error("ERROR!!"); + throw std::runtime_error("called InMemoryLedgerTxnRoot::getSession"); } #ifdef BUILD_TESTS diff --git a/src/main/AppConnector.cpp b/src/main/AppConnector.cpp index d282e7eddd..904d6396f1 100644 --- a/src/main/AppConnector.cpp +++ b/src/main/AppConnector.cpp @@ -53,17 +53,22 @@ AppConnector::getSorobanNetworkConfigReadOnly() const return mApp.getLedgerManager().getSorobanNetworkConfigReadOnly(); } +SorobanNetworkConfig const& +AppConnector::getSorobanNetworkConfigForApply() const +{ + // releaseAssert(!threadIsMain() || !mConfig.parallelLedgerClose()); + return mApp.getLedgerManager().getSorobanNetworkConfigForApply(); +} + medida::MetricsRegistry& AppConnector::getMetrics() const { - releaseAssert(threadIsMain()); return mApp.getMetrics(); } SorobanMetrics& AppConnector::getSorobanMetrics() const { - releaseAssert(threadIsMain()); return mApp.getLedgerManager().getSorobanMetrics(); } @@ -72,7 +77,8 @@ AppConnector::checkOnOperationApply(Operation const& operation, OperationResult const& opres, LedgerTxnDelta const& ltxDelta) { - releaseAssert(threadIsMain()); + // Only one thread can call this method + releaseAssert(threadIsMain() || mConfig.parallelLedgerClose()); mApp.getInvariantManager().checkOnOperationApply(operation, opres, ltxDelta); } @@ -80,7 +86,7 @@ AppConnector::checkOnOperationApply(Operation const& operation, Hash const& AppConnector::getNetworkID() const { - releaseAssert(threadIsMain()); + // NetworkID is a const return mApp.getNetworkID(); } @@ -137,4 +143,11 @@ AppConnector::checkScheduledAndCache( return mApp.getOverlayManager().checkScheduledAndCache(msgTracker); } +LedgerHeaderHistoryEntry +AppConnector::getLastClosedLedgerHeader() const +{ + // LCL is thread-safe (it's a copy) + return mApp.getLedgerManager().getLastClosedLedgerHeader(); +} + } \ No newline at end of file diff --git a/src/main/AppConnector.h b/src/main/AppConnector.h index ec35925133..4f2c565982 100644 --- a/src/main/AppConnector.h +++ b/src/main/AppConnector.h @@ -34,8 +34,6 @@ class AppConnector OverlayManager& getOverlayManager(); BanManager& getBanManager(); bool shouldYield() const; - SorobanNetworkConfig const& getSorobanNetworkConfigReadOnly() const; - medida::MetricsRegistry& getMetrics() const; SorobanMetrics& getSorobanMetrics() const; void checkOnOperationApply(Operation const& operation, OperationResult const& opres, @@ -55,5 +53,10 @@ class AppConnector // This method is always exclusively called from one thread bool checkScheduledAndCache(std::shared_ptr msgTracker); + SorobanNetworkConfig const& getSorobanNetworkConfigReadOnly() const; + SorobanNetworkConfig const& getSorobanNetworkConfigForApply() const; + + medida::MetricsRegistry& getMetrics() const; + LedgerHeaderHistoryEntry getLastClosedLedgerHeader() const; }; } \ No newline at end of file diff --git a/src/main/Application.cpp b/src/main/Application.cpp index 991c11f48a..8ff31b3b4f 100644 --- a/src/main/Application.cpp +++ b/src/main/Application.cpp @@ -4,6 +4,7 @@ #include "Application.h" #include "ApplicationImpl.h" +#include "database/Database.h" #include "util/GlobalChecks.h" #include @@ -21,12 +22,13 @@ validateNetworkPassphrase(Application::pointer app) } auto& persistentState = app->getPersistentState(); - std::string prevNetworkPassphrase = - persistentState.getState(PersistentState::kNetworkPassphrase); + std::string prevNetworkPassphrase = persistentState.getState( + PersistentState::kNetworkPassphrase, app->getDatabase().getSession()); if (prevNetworkPassphrase.empty()) { persistentState.setState(PersistentState::kNetworkPassphrase, - networkPassphrase); + networkPassphrase, + app->getDatabase().getSession()); } else if (networkPassphrase != prevNetworkPassphrase) { diff --git a/src/main/Application.h b/src/main/Application.h index ae23517a57..ccaf4af374 100644 --- a/src/main/Application.h +++ b/src/main/Application.h @@ -229,6 +229,7 @@ class Application virtual asio::io_context& getWorkerIOContext() = 0; virtual asio::io_context& getEvictionIOContext() = 0; virtual asio::io_context& getOverlayIOContext() = 0; + virtual asio::io_context& getLedgerCloseIOContext() = 0; virtual void postOnMainThread( std::function&& f, std::string&& name, @@ -242,6 +243,8 @@ class Application std::string jobName) = 0; virtual void postOnOverlayThread(std::function&& f, std::string jobName) = 0; + virtual void postOnLedgerCloseThread(std::function&& f, + std::string jobName) = 0; // Perform actions necessary to transition from BOOTING_STATE to other // states. In particular: either reload or reinitialize the database, and diff --git a/src/main/ApplicationImpl.cpp b/src/main/ApplicationImpl.cpp index 482ceef177..022f09785f 100644 --- a/src/main/ApplicationImpl.cpp +++ b/src/main/ApplicationImpl.cpp @@ -92,6 +92,13 @@ ApplicationImpl::ApplicationImpl(VirtualClock& clock, Config const& cfg) , mOverlayWork(mOverlayIOContext ? std::make_unique( *mOverlayIOContext) : nullptr) + , mLedgerCloseIOContext(mConfig.parallelLedgerClose() + ? std::make_unique(1) + : nullptr) + , mLedgerCloseWork( + mLedgerCloseIOContext + ? std::make_unique(*mLedgerCloseIOContext) + : nullptr) , mWorkerThreads() , mEvictionThread() , mStopSignals(clock.getIOContext(), SIGINT) @@ -107,6 +114,8 @@ ApplicationImpl::ApplicationImpl(VirtualClock& clock, Config const& cfg) mMetrics->NewTimer({"app", "post-on-background-thread", "delay"})) , mPostOnOverlayThreadDelay( mMetrics->NewTimer({"app", "post-on-overlay-thread", "delay"})) + , mPostOnLedgerCloseThreadDelay( + mMetrics->NewTimer({"app", "post-on-ledger-close-thread", "delay"})) , mStartedOn(clock.system_now()) { #ifdef SIGQUIT @@ -173,6 +182,12 @@ ApplicationImpl::ApplicationImpl(VirtualClock& clock, Config const& cfg) // Keep priority unchanged as overlay processes time-sensitive tasks mOverlayThread = std::thread{[this]() { mOverlayIOContext->run(); }}; } + + if (mConfig.parallelLedgerClose()) + { + mLedgerCloseThread = + std::thread{[this]() { mLedgerCloseIOContext->run(); }}; + } } static void @@ -182,7 +197,7 @@ maybeRebuildLedger(Application& app, bool applyBuckets) if (ps.shouldRebuildForOfferTable()) { app.getDatabase().clearPreparedStatementCache(); - soci::transaction tx(app.getDatabase().getSession()); + soci::transaction tx(app.getDatabase().getRawSession()); LOG_INFO(DEFAULT_LOG, "Dropping offers"); app.getLedgerTxnRoot().dropOffers(); tx.commit(); @@ -536,10 +551,11 @@ ApplicationImpl::scheduleSelfCheck(bool waitUntilNextCheckpoint) { // Delay until a second full checkpoint-period after the next checkpoint // publication. The captured lhhe should usually be published by then. - auto& hm = getHistoryManager(); auto targetLedger = - hm.firstLedgerAfterCheckpointContaining(lhhe.header.ledgerSeq); - targetLedger = hm.firstLedgerAfterCheckpointContaining(targetLedger); + HistoryManager::firstLedgerAfterCheckpointContaining( + lhhe.header.ledgerSeq, getConfig()); + targetLedger = HistoryManager::firstLedgerAfterCheckpointContaining( + targetLedger, getConfig()); auto cond = [targetLedger](Application& app) -> bool { auto& lm = app.getLedgerManager(); return lm.getLastClosedLedgerNum() > targetLedger; @@ -571,8 +587,13 @@ ApplicationImpl::getNetworkID() const ApplicationImpl::~ApplicationImpl() { LOG_INFO(DEFAULT_LOG, "Application destructing"); + mStopping = true; try { + // First, shutdown ledger close queue _before_ shutting down all the + // subsystems This ensures that any ledger currently being closed + // finishes okay + shutdownLedgerCloseThread(); shutdownWorkScheduler(); if (mProcessManager) { @@ -773,6 +794,7 @@ ApplicationImpl::gracefulStop() return; } mStopping = true; + shutdownLedgerCloseThread(); if (mOverlayManager) { mOverlayManager->shutdown(); @@ -788,7 +810,8 @@ ApplicationImpl::gracefulStop() // This call happens in shutdown -- before destruction -- so that we can // be sure other subsystems (ledger etc.) are still alive and we can // call into them to figure out which buckets _are_ referenced. - mBucketManager->forgetUnreferencedBuckets(); + mBucketManager->forgetUnreferencedBuckets( + mLedgerManager->getLastClosedLedgerHAS()); mBucketManager->shutdown(); } if (mHerder) @@ -819,6 +842,21 @@ ApplicationImpl::shutdownWorkScheduler() } } +void +ApplicationImpl::shutdownLedgerCloseThread() +{ + if (mLedgerCloseThread && !mLedgerCloseThreadStopped) + { + if (mLedgerCloseWork) + { + mLedgerCloseWork.reset(); + } + LOG_INFO(DEFAULT_LOG, "Joining the ledger close thread"); + mLedgerCloseThread->join(); + mLedgerCloseThreadStopped = true; + } +} + void ApplicationImpl::joinAllThreads() { @@ -833,6 +871,10 @@ ApplicationImpl::joinAllThreads() { mOverlayWork.reset(); } + if (mEvictionWork) + { + mEvictionWork.reset(); + } LOG_INFO(DEFAULT_LOG, "Joining {} worker threads", mWorkerThreads.size()); for (auto& w : mWorkerThreads) @@ -840,9 +882,10 @@ ApplicationImpl::joinAllThreads() w.join(); } - if (mEvictionWork) + if (mOverlayThread) { - mEvictionWork.reset(); + LOG_INFO(DEFAULT_LOG, "Joining the overlay thread"); + mOverlayThread->join(); } if (mEvictionThread) @@ -851,12 +894,6 @@ ApplicationImpl::joinAllThreads() mEvictionThread->join(); } - if (mOverlayThread) - { - LOG_INFO(DEFAULT_LOG, "Joining the overlay thread"); - mOverlayThread->join(); - } - LOG_INFO(DEFAULT_LOG, "Joined all {} threads", (mWorkerThreads.size() + 1)); } @@ -1345,6 +1382,13 @@ ApplicationImpl::getOverlayIOContext() return *mOverlayIOContext; } +asio::io_context& +ApplicationImpl::getLedgerCloseIOContext() +{ + releaseAssert(mLedgerCloseIOContext); + return *mLedgerCloseIOContext; +} + void ApplicationImpl::postOnMainThread(std::function&& f, std::string&& name, Scheduler::ActionType type) @@ -1402,6 +1446,19 @@ ApplicationImpl::postOnOverlayThread(std::function&& f, }); } +void +ApplicationImpl::postOnLedgerCloseThread(std::function&& f, + std::string jobName) +{ + releaseAssert(mLedgerCloseIOContext); + LogSlowExecution isSlow{std::move(jobName), LogSlowExecution::Mode::MANUAL, + "executed after"}; + asio::post(*mLedgerCloseIOContext, [this, f = std::move(f), isSlow]() { + mPostOnLedgerCloseThreadDelay.Update(isSlow.checkElapsedTime()); + f(); + }); +} + void ApplicationImpl::enableInvariantsFromConfig() { @@ -1444,8 +1501,6 @@ ApplicationImpl::createDatabase() AbstractLedgerTxnParent& ApplicationImpl::getLedgerTxnRoot() { - releaseAssert(threadIsMain()); - #ifdef BUILD_TESTS if (mConfig.MODE_USES_IN_MEMORY_LEDGER) { diff --git a/src/main/ApplicationImpl.h b/src/main/ApplicationImpl.h index 1fc7ea989c..b37093c831 100644 --- a/src/main/ApplicationImpl.h +++ b/src/main/ApplicationImpl.h @@ -82,6 +82,7 @@ class ApplicationImpl : public Application virtual asio::io_context& getWorkerIOContext() override; virtual asio::io_context& getEvictionIOContext() override; virtual asio::io_context& getOverlayIOContext() override; + virtual asio::io_context& getLedgerCloseIOContext() override; virtual void postOnMainThread(std::function&& f, std::string&& name, Scheduler::ActionType type) override; @@ -92,6 +93,8 @@ class ApplicationImpl : public Application virtual void postOnOverlayThread(std::function&& f, std::string jobName) override; + virtual void postOnLedgerCloseThread(std::function&& f, + std::string jobName) override; virtual void start() override; void startServices(); @@ -158,6 +161,9 @@ class ApplicationImpl : public Application std::unique_ptr mOverlayIOContext; std::unique_ptr mOverlayWork; + std::unique_ptr mLedgerCloseIOContext; + std::unique_ptr mLedgerCloseWork; + std::unique_ptr mBucketManager; std::unique_ptr mDatabase; std::unique_ptr mOverlayManager; @@ -206,6 +212,7 @@ class ApplicationImpl : public Application std::vector mWorkerThreads; std::optional mOverlayThread; + std::optional mLedgerCloseThread; // Unlike mWorkerThreads (which are low priority), eviction scans require a // medium priority thread. In the future, this may become a more general @@ -216,7 +223,8 @@ class ApplicationImpl : public Application asio::signal_set mStopSignals; bool mStarted; - bool mStopping; + std::atomic mStopping; + bool mLedgerCloseThreadStopped{false}; VirtualTimer mStoppingTimer; VirtualTimer mSelfCheckTimer; @@ -225,6 +233,7 @@ class ApplicationImpl : public Application medida::Timer& mPostOnMainThreadDelay; medida::Timer& mPostOnBackgroundThreadDelay; medida::Timer& mPostOnOverlayThreadDelay; + medida::Timer& mPostOnLedgerCloseThreadDelay; VirtualClock::system_time_point mStartedOn; @@ -258,5 +267,6 @@ class ApplicationImpl : public Application void upgradeToCurrentSchemaAndMaybeRebuildLedger(bool applyBuckets, bool forceRebuild); + void shutdownLedgerCloseThread(); }; } diff --git a/src/main/ApplicationUtils.cpp b/src/main/ApplicationUtils.cpp index 6898facd6e..81c0cacc13 100644 --- a/src/main/ApplicationUtils.cpp +++ b/src/main/ApplicationUtils.cpp @@ -166,8 +166,8 @@ bool applyBucketsForLCL(Application& app) { auto has = app.getLedgerManager().getLastClosedLedgerHAS(); - auto lclHash = - app.getPersistentState().getState(PersistentState::kLastClosedLedger); + auto lclHash = app.getPersistentState().getState( + PersistentState::kLastClosedLedger, app.getDatabase().getSession()); auto maxProtocolVersion = app.getConfig().LEDGER_PROTOCOL_VERSION; auto currentLedger = @@ -240,10 +240,9 @@ setAuthenticatedLedgerHashPair(Application::pointer app, uint32_t startLedger, std::string startHash) { auto const& lm = app->getLedgerManager(); - auto const& hm = app->getHistoryManager(); auto tryCheckpoint = [&](uint32_t seq, Hash h) { - if (hm.isLastLedgerInCheckpoint(seq)) + if (HistoryManager::isLastLedgerInCheckpoint(seq, app->getConfig())) { LOG_INFO(DEFAULT_LOG, "Found authenticated checkpoint hash {} for ledger {}", @@ -272,7 +271,7 @@ setAuthenticatedLedgerHashPair(Application::pointer app, if (lm.isSynced()) { - auto const& lhe = lm.getLastClosedLedgerHeader(); + auto lhe = lm.getLastClosedLedgerHeader(); tryCheckpoint(lhe.header.ledgerSeq, lhe.hash); } else @@ -307,7 +306,8 @@ selfCheck(Config cfg) // Then we scan all the buckets to check they have expected hashes. LOG_INFO(DEFAULT_LOG, "Self-check phase 2: bucket hash verification"); - auto seq2 = app->getBucketManager().scheduleVerifyReferencedBucketsWork(); + auto seq2 = app->getBucketManager().scheduleVerifyReferencedBucketsWork( + app->getLedgerManager().getLastClosedLedgerHAS()); while (clock.crank(true) && !seq2->isDone()) ; @@ -965,18 +965,20 @@ publish(Application::pointer app) asio::io_context::work mainWork(io); auto lcl = app->getLedgerManager().getLastClosedLedgerNum(); - auto isCheckpoint = app->getHistoryManager().isLastLedgerInCheckpoint(lcl); + auto isCheckpoint = + HistoryManager::isLastLedgerInCheckpoint(lcl, app->getConfig()); size_t expectedPublishQueueSize = isCheckpoint ? 1 : 0; app->getHistoryManager().publishQueuedHistory(); - while (app->getHistoryManager().publishQueueLength() != + while (HistoryManager::publishQueueLength(app->getConfig()) != expectedPublishQueueSize && clock.crank(true)) { } // Cleanup buckets not referenced by publish queue anymore - app->getBucketManager().forgetUnreferencedBuckets(); + app->getBucketManager().forgetUnreferencedBuckets( + app->getLedgerManager().getLastClosedLedgerHAS()); LOG_INFO(DEFAULT_LOG, "*"); LOG_INFO(DEFAULT_LOG, "* Publish finished."); diff --git a/src/main/Maintainer.cpp b/src/main/Maintainer.cpp index 0b11d7658b..3130bc202a 100644 --- a/src/main/Maintainer.cpp +++ b/src/main/Maintainer.cpp @@ -76,14 +76,14 @@ Maintainer::performMaintenance(uint32_t count) // Calculate the minimum of the LCL and/or any queued checkpoint. uint32_t lcl = mApp.getLedgerManager().getLastClosedLedgerNum(); - uint32_t ql = mApp.getHistoryManager().getMinLedgerQueuedToPublish(); + uint32_t ql = HistoryManager::getMinLedgerQueuedToPublish(mApp.getConfig()); uint32_t qmin = ql == 0 ? lcl : std::min(ql, lcl); // Next calculate, given qmin, the first ledger it'd be _safe to // delete_ while still keeping everything required to publish. // So if qmin is (for example) 0x7f = 127, then we want to keep 64 // ledgers before that, and therefore can erase 0x3f = 63 and less. - uint32_t freq = mApp.getHistoryManager().getCheckpointFrequency(); + uint32_t freq = HistoryManager::getCheckpointFrequency(mApp.getConfig()); uint32_t lmin = qmin >= freq ? qmin - freq : 0; CLOG_INFO(History, "Trimming history <= ledger {}", lmin); diff --git a/src/main/test/ApplicationUtilsTests.cpp b/src/main/test/ApplicationUtilsTests.cpp index b8a12017ad..93f5616739 100644 --- a/src/main/test/ApplicationUtilsTests.cpp +++ b/src/main/test/ApplicationUtilsTests.cpp @@ -194,7 +194,7 @@ class SimulationHelper return std::make_pair(selectedLedger, selectedHash); } - LedgerHeaderHistoryEntry const& + LedgerHeaderHistoryEntry getMainNodeLCL() { return mSimulation->getNode(mMainNodeID) @@ -202,7 +202,7 @@ class SimulationHelper .getLastClosedLedgerHeader(); } - LedgerHeaderHistoryEntry const& + LedgerHeaderHistoryEntry getTestNodeLCL() { return mSimulation->getNode(mTestNodeID) diff --git a/src/overlay/BanManagerImpl.cpp b/src/overlay/BanManagerImpl.cpp index a433ef94e0..297af877dc 100644 --- a/src/overlay/BanManagerImpl.cpp +++ b/src/overlay/BanManagerImpl.cpp @@ -44,7 +44,8 @@ BanManagerImpl::banNode(NodeID nodeID) { ZoneNamedN(insertBanZone, "insert ban", true); auto prep = mApp.getDatabase().getPreparedStatement( - "INSERT INTO ban (nodeid) VALUES(:n)"); + "INSERT INTO ban (nodeid) VALUES(:n)", + mApp.getDatabase().getSession()); auto& st = prep.statement(); st.exchange(soci::use(nodeIDString)); st.define_and_bind(); @@ -61,7 +62,8 @@ BanManagerImpl::unbanNode(NodeID nodeID) { ZoneNamedN(deleteBanZone, "delete ban", true); auto prep = mApp.getDatabase().getPreparedStatement( - "DELETE FROM ban WHERE nodeid = :n;"); + "DELETE FROM ban WHERE nodeid = :n;", + mApp.getDatabase().getSession()); auto& st = prep.statement(); st.exchange(soci::use(nodeIDString)); st.define_and_bind(); @@ -77,7 +79,8 @@ BanManagerImpl::isBanned(NodeID nodeID) { ZoneNamedN(selectBanZone, "select ban", true); auto prep = mApp.getDatabase().getPreparedStatement( - "SELECT count(*) FROM ban WHERE nodeid = :n"); + "SELECT count(*) FROM ban WHERE nodeid = :n", + mApp.getDatabase().getSession()); uint32_t count; auto& st = prep.statement(); st.exchange(soci::into(count)); @@ -96,8 +99,8 @@ BanManagerImpl::getBans() std::string nodeIDString; { ZoneNamedN(selectBanZone, "select ban", true); - auto prep = - mApp.getDatabase().getPreparedStatement("SELECT nodeid FROM ban"); + auto prep = mApp.getDatabase().getPreparedStatement( + "SELECT nodeid FROM ban", mApp.getDatabase().getSession()); auto& st = prep.statement(); st.exchange(soci::into(nodeIDString)); st.define_and_bind(); @@ -114,10 +117,10 @@ BanManagerImpl::getBans() void BanManager::dropAll(Database& db) { - db.getSession() << "DROP TABLE IF EXISTS ban"; + db.getRawSession() << "DROP TABLE IF EXISTS ban"; - db.getSession() << "CREATE TABLE ban (" - "nodeid CHARACTER(56) NOT NULL PRIMARY KEY" - ")"; + db.getRawSession() << "CREATE TABLE ban (" + "nodeid CHARACTER(56) NOT NULL PRIMARY KEY" + ")"; } } diff --git a/src/overlay/PeerManager.cpp b/src/overlay/PeerManager.cpp index 4abfbc946c..a27eaec3ec 100644 --- a/src/overlay/PeerManager.cpp +++ b/src/overlay/PeerManager.cpp @@ -180,7 +180,8 @@ PeerManager::removePeersWithManyFailures(size_t minNumFailures, sql += " AND ip = :ip"; } - auto prep = db.getPreparedStatement(sql); + auto prep = + db.getPreparedStatement(sql, mApp.getDatabase().getSession()); auto& st = prep.statement(); st.exchange(use(minNumFailures)); @@ -237,7 +238,8 @@ PeerManager::load(PeerBareAddress const& address) { auto prep = mApp.getDatabase().getPreparedStatement( "SELECT numfailures, nextattempt, type FROM peers " - "WHERE ip = :v1 AND port = :v2"); + "WHERE ip = :v1 AND port = :v2", + mApp.getDatabase().getSession()); auto& st = prep.statement(); st.exchange(into(result.mNumFailures)); st.exchange(into(result.mNextAttempt)); @@ -294,7 +296,8 @@ PeerManager::store(PeerBareAddress const& address, PeerRecord const& peerRecord, try { - auto prep = mApp.getDatabase().getPreparedStatement(query); + auto prep = mApp.getDatabase().getPreparedStatement( + query, mApp.getDatabase().getSession()); auto& st = prep.statement(); st.exchange(use(peerRecord.mNextAttempt)); st.exchange(use(peerRecord.mNumFailures)); @@ -503,7 +506,8 @@ PeerManager::countPeers(std::string const& where, { std::string sql = "SELECT COUNT(*) FROM peers WHERE " + where; - auto prep = mApp.getDatabase().getPreparedStatement(sql); + auto prep = mApp.getDatabase().getPreparedStatement( + sql, mApp.getDatabase().getSession()); auto& st = prep.statement(); bind(st); @@ -533,7 +537,8 @@ PeerManager::loadPeers(size_t limit, size_t offset, std::string const& where, "FROM peers WHERE " + where + " LIMIT :limit OFFSET :offset"; - auto prep = mApp.getDatabase().getPreparedStatement(sql); + auto prep = mApp.getDatabase().getPreparedStatement( + sql, mApp.getDatabase().getSession()); auto& st = prep.statement(); bind(st); @@ -570,8 +575,8 @@ PeerManager::loadPeers(size_t limit, size_t offset, std::string const& where, void PeerManager::dropAll(Database& db) { - db.getSession() << "DROP TABLE IF EXISTS peers;"; - db.getSession() << kSQLCreateStatement; + db.getRawSession() << "DROP TABLE IF EXISTS peers;"; + db.getRawSession() << kSQLCreateStatement; } std::vector> @@ -588,7 +593,8 @@ PeerManager::loadAllPeers() int port; PeerRecord record; - auto prep = mApp.getDatabase().getPreparedStatement(sql); + auto prep = mApp.getDatabase().getPreparedStatement( + sql, mApp.getDatabase().getSession()); auto& st = prep.statement(); st.exchange(into(ip)); @@ -621,7 +627,7 @@ void PeerManager::storePeers( std::vector> peers) { - soci::transaction tx(mApp.getDatabase().getSession()); + soci::transaction tx(mApp.getDatabase().getRawSession()); for (auto const& peer : peers) { store(peer.first, peer.second, /* inDatabase */ false); diff --git a/src/overlay/test/OverlayManagerTests.cpp b/src/overlay/test/OverlayManagerTests.cpp index 5255100fba..afba967e8b 100644 --- a/src/overlay/test/OverlayManagerTests.cpp +++ b/src/overlay/test/OverlayManagerTests.cpp @@ -165,7 +165,7 @@ class OverlayManagerTests pm.storeConfigPeers(); } - rowset rs = app->getDatabase().getSession().prepare + rowset rs = app->getDatabase().getRawSession().prepare << "SELECT ip,port,type FROM peers ORDER BY ip, port"; auto& ppeers = pm.mConfigurationPreferredPeers; @@ -213,7 +213,7 @@ class OverlayManagerTests pm.mResolvedPeers.wait(); pm.tick(); - rowset rs = app->getDatabase().getSession().prepare + rowset rs = app->getDatabase().getRawSession().prepare << "SELECT ip,port,type FROM peers ORDER BY ip, port"; int found = 0; diff --git a/src/simulation/CoreTests.cpp b/src/simulation/CoreTests.cpp index ab91094f72..db4f8766a1 100644 --- a/src/simulation/CoreTests.cpp +++ b/src/simulation/CoreTests.cpp @@ -691,7 +691,8 @@ TEST_CASE("Bucket list entries vs write throughput", "[scalability][!hide]") batch.GetSnapshot().get99thPercentile(), batch.max(), (double)merges.count(), merges.max(), merges.mean()}); - app->getBucketManager().forgetUnreferencedBuckets(); + app->getBucketManager().forgetUnreferencedBuckets( + app->getLedgerManager().getLastClosedLedgerHAS()); } } } diff --git a/src/simulation/LoadGenerator.cpp b/src/simulation/LoadGenerator.cpp index 94063d83a8..949f40b40f 100644 --- a/src/simulation/LoadGenerator.cpp +++ b/src/simulation/LoadGenerator.cpp @@ -1214,6 +1214,13 @@ LoadGenerator::checkAccountSynced(Application& app, bool isCreate) account->getAccountId()); result.push_back(account); } + else if (app.getHerder().sourceAccountPending( + account->getPublicKey())) + { + CLOG_TRACE(LoadGen, "Account {} is pending!", + account->getAccountId()); + result.push_back(account); + } } else if (!reloadRes) { diff --git a/src/simulation/TxGenerator.cpp b/src/simulation/TxGenerator.cpp index 59794f1473..07e2f22108 100644 --- a/src/simulation/TxGenerator.cpp +++ b/src/simulation/TxGenerator.cpp @@ -1016,7 +1016,7 @@ TxGenerator::sorobanRandomUploadResources() // Estimate VM instantiation cost, with some additional buffer to increase // the chance that this instruction count is sufficient. - ContractCostParamEntry const& vmInstantiationCosts = + ContractCostParamEntry vmInstantiationCosts = mApp.getLedgerManager() .getSorobanNetworkConfigReadOnly() .cpuCostParams()[VmInstantiation]; diff --git a/src/test/FuzzerImpl.cpp b/src/test/FuzzerImpl.cpp index afa750f206..d8eea5d123 100644 --- a/src/test/FuzzerImpl.cpp +++ b/src/test/FuzzerImpl.cpp @@ -887,7 +887,8 @@ resetTxInternalState(Application& app) app.getLedgerTxnRoot().resetForFuzzer(); app.getInvariantManager().resetForFuzzer(); #endif // BUILD_TESTS - app.getDatabase().clearPreparedStatementCache(); + app.getDatabase().clearPreparedStatementCache( + app.getDatabase().getSession()); } // FuzzTransactionFrame is a specialized TransactionFrame that includes @@ -926,9 +927,10 @@ class FuzzTransactionFrame : public TransactionFrame LedgerSnapshot ltxStmt(ltx); // if any ill-formed Operations, do not attempt transaction application auto isInvalidOperation = [&](auto const& op, auto& opResult) { - return !op->checkValid(app.getAppConnector(), signatureChecker, - ltxStmt, false, opResult, - mTxResult->getSorobanData()); + return !op->checkValid( + app.getAppConnector(), signatureChecker, + app.getAppConnector().getSorobanNetworkConfigReadOnly(), + ltxStmt, false, opResult, mTxResult->getSorobanData()); }; auto const& ops = getOperations(); diff --git a/src/test/TestUtils.cpp b/src/test/TestUtils.cpp index 488d4a00bd..c73139d093 100644 --- a/src/test/TestUtils.cpp +++ b/src/test/TestUtils.cpp @@ -36,6 +36,30 @@ crankFor(VirtualClock& clock, VirtualClock::duration duration) ; } +void +crankUntil(Application::pointer app, std::function const& predicate, + VirtualClock::duration timeout) +{ + crankUntil(*app, predicate, timeout); +} + +void +crankUntil(Application& app, std::function const& predicate, + VirtualClock::duration timeout) +{ + auto start = std::chrono::system_clock::now(); + while (!predicate()) + { + app.getClock().crank(false); + auto current = std::chrono::system_clock::now(); + auto diff = current - start; + if (diff > timeout) + { + break; + } + } +} + void shutdownWorkScheduler(Application& app) { diff --git a/src/test/TestUtils.h b/src/test/TestUtils.h index d0175a4e0e..c54c6cf600 100644 --- a/src/test/TestUtils.h +++ b/src/test/TestUtils.h @@ -23,7 +23,11 @@ namespace testutil { void crankSome(VirtualClock& clock); void crankFor(VirtualClock& clock, VirtualClock::duration duration); - +void crankUntil(Application::pointer app, + std::function const& predicate, + VirtualClock::duration timeout); +void crankUntil(Application& app, std::function const& predicate, + VirtualClock::duration timeout); void shutdownWorkScheduler(Application& app); std::vector getInvalidAssets(SecretKey const& issuer); diff --git a/src/test/TxTests.cpp b/src/test/TxTests.cpp index 9cdcac2d2c..227596d932 100644 --- a/src/test/TxTests.cpp +++ b/src/test/TxTests.cpp @@ -372,6 +372,9 @@ applyCheck(TransactionTestFramePtr tx, Application& app, bool checkSeqNum) recordOrCheckGlobalTestTxMetadata(tm.getXDR()); } + // TODO: in-memory mode doesn't work with parallel ledger close because + // it manually modifies LedgerTxn without closing a ledger; this results + // in a different ledger header stored inside of LedgerTxn ltx.commit(); return res; @@ -631,15 +634,15 @@ loadAccount(AbstractLedgerTxn& ltx, PublicKey const& k, bool mustExist) bool doesAccountExist(Application& app, PublicKey const& k) { - LedgerTxn ltx(app.getLedgerTxnRoot()); - return (bool)stellar::loadAccountWithoutRecord(ltx, k); + LedgerSnapshot lss(app); + return (bool)lss.getAccount(k); } xdr::xvector getAccountSigners(PublicKey const& k, Application& app) { - LedgerTxn ltx(app.getLedgerTxnRoot()); - auto account = stellar::loadAccount(ltx, k); + LedgerSnapshot lss(app); + auto account = lss.getAccount(k); return account.current().data.account().signers; } @@ -699,11 +702,8 @@ transactionFromOperations(Application& app, SecretKey const& from, SequenceNumber seq, const std::vector& ops, uint32_t fee) { - uint32_t ledgerVersion; - { - LedgerTxn ltx(app.getLedgerTxnRoot()); - ledgerVersion = ltx.loadHeader().current().ledgerVersion; - } + auto ledgerVersion = + app.getLedgerManager().getLastClosedLedgerHeader().header.ledgerVersion; if (protocolVersionIsBefore(ledgerVersion, ProtocolVersion::V_13)) { return transactionFromOperationsV0(app, from, seq, ops, fee); diff --git a/src/transactions/ExtendFootprintTTLOpFrame.cpp b/src/transactions/ExtendFootprintTTLOpFrame.cpp index a4b00112be..9df14d4273 100644 --- a/src/transactions/ExtendFootprintTTLOpFrame.cpp +++ b/src/transactions/ExtendFootprintTTLOpFrame.cpp @@ -62,8 +62,7 @@ ExtendFootprintTTLOpFrame::doApply( auto const& resources = mParentTx.sorobanResources(); auto const& footprint = resources.footprint; - auto const& sorobanConfig = - app.getLedgerManager().getSorobanNetworkConfigForApply(); + auto const& sorobanConfig = app.getSorobanNetworkConfigForApply(); rust::Vec rustEntryRentChanges; rustEntryRentChanges.reserve(footprint.readOnly.size()); diff --git a/src/transactions/InvokeHostFunctionOpFrame.cpp b/src/transactions/InvokeHostFunctionOpFrame.cpp index 30cadc324c..ae76c16264 100644 --- a/src/transactions/InvokeHostFunctionOpFrame.cpp +++ b/src/transactions/InvokeHostFunctionOpFrame.cpp @@ -332,8 +332,7 @@ InvokeHostFunctionOpFrame::doApply( Config const& appConfig = app.getConfig(); HostFunctionMetrics metrics(app.getSorobanMetrics()); auto timeScope = metrics.getExecTimer(); - auto const& sorobanConfig = - app.getLedgerManager().getSorobanNetworkConfigForApply(); + auto const& sorobanConfig = app.getSorobanNetworkConfigForApply(); // Get the entries for the footprint rust::Vec ledgerEntryCxxBufs; diff --git a/src/transactions/OperationFrame.cpp b/src/transactions/OperationFrame.cpp index ff14610986..5108b15106 100644 --- a/src/transactions/OperationFrame.cpp +++ b/src/transactions/OperationFrame.cpp @@ -144,8 +144,11 @@ OperationFrame::apply(AppConnector& app, SignatureChecker& signatureChecker, CLOG_TRACE(Tx, "{}", xdrToCerealString(mOperation, "Operation")); LedgerSnapshot ltxState(ltx); - bool applyRes = - checkValid(app, signatureChecker, ltxState, true, res, sorobanData); + std::optional cfg = + isSoroban() ? std::make_optional(app.getSorobanNetworkConfigForApply()) + : std::nullopt; + bool applyRes = checkValid(app, signatureChecker, cfg, ltxState, true, res, + sorobanData); if (applyRes) { applyRes = doApply(app, ltx, sorobanBasePrngSeed, res, sorobanData); @@ -219,6 +222,7 @@ OperationFrame::getSourceID() const bool OperationFrame::checkValid(AppConnector& app, SignatureChecker& signatureChecker, + std::optional const& cfg, LedgerSnapshot const& ls, bool forApply, OperationResult& res, std::shared_ptr sorobanData) const @@ -226,8 +230,8 @@ OperationFrame::checkValid(AppConnector& app, ZoneScoped; bool validationResult = false; auto validate = [this, &res, forApply, &signatureChecker, &app, - &sorobanData, - &validationResult](LedgerSnapshot const& ls) { + &sorobanData, &validationResult, + &cfg](LedgerSnapshot const& ls) { if (!isOpSupported(ls.getLedgerHeader().current())) { res.code(opNOT_SUPPORTED); @@ -262,12 +266,9 @@ OperationFrame::checkValid(AppConnector& app, isSoroban()) { releaseAssertOrThrow(sorobanData); - auto const& sorobanConfig = - app.getLedgerManager().getSorobanNetworkConfigForApply(); - - validationResult = - doCheckValidForSoroban(sorobanConfig, app.getConfig(), - ledgerVersion, res, *sorobanData); + releaseAssertOrThrow(cfg); + validationResult = doCheckValidForSoroban( + cfg.value(), app.getConfig(), ledgerVersion, res, *sorobanData); } else { diff --git a/src/transactions/OperationFrame.h b/src/transactions/OperationFrame.h index 5cc8aa6641..c260d8f11f 100644 --- a/src/transactions/OperationFrame.h +++ b/src/transactions/OperationFrame.h @@ -74,6 +74,7 @@ class OperationFrame AccountID getSourceID() const; bool checkValid(AppConnector& app, SignatureChecker& signatureChecker, + std::optional const& cfg, LedgerSnapshot const& ls, bool forApply, OperationResult& res, std::shared_ptr sorobanData) const; diff --git a/src/transactions/RestoreFootprintOpFrame.cpp b/src/transactions/RestoreFootprintOpFrame.cpp index 354b894ae5..dc849b1c6f 100644 --- a/src/transactions/RestoreFootprintOpFrame.cpp +++ b/src/transactions/RestoreFootprintOpFrame.cpp @@ -63,8 +63,7 @@ RestoreFootprintOpFrame::doApply( auto const& resources = mParentTx.sorobanResources(); auto const& footprint = resources.footprint; auto ledgerSeq = ltx.loadHeader().current().ledgerSeq; - auto const& sorobanConfig = - app.getLedgerManager().getSorobanNetworkConfigForApply(); + auto const& sorobanConfig = app.getSorobanNetworkConfigForApply(); auto const& appConfig = app.getConfig(); auto const& archivalSettings = sorobanConfig.stateArchivalSettings(); diff --git a/src/transactions/TransactionFrame.cpp b/src/transactions/TransactionFrame.cpp index 223817b92c..4d9b2d6ae1 100644 --- a/src/transactions/TransactionFrame.cpp +++ b/src/transactions/TransactionFrame.cpp @@ -1465,8 +1465,8 @@ TransactionFrame::checkValidWithOptionallyChargedFee( auto const& op = mOperations[i]; auto& opResult = txResult->getOpResultAt(i); - if (!op->checkValid(app, signatureChecker, ls, false, opResult, - txResult->getSorobanData())) + if (!op->checkValid(app, signatureChecker, sorobanConfig, ls, false, + opResult, txResult->getSorobanData())) { // it's OK to just fast fail here and not try to call // checkValid on all operations as the resulting object @@ -1719,8 +1719,7 @@ TransactionFrame::applyOperations(SignatureChecker& signatureChecker, // If transaction fails, we don't charge for any // refundable resources. auto preApplyFee = computePreApplySorobanResourceFee( - ledgerVersion, - app.getLedgerManager().getSorobanNetworkConfigForApply(), + ledgerVersion, app.getSorobanNetworkConfigForApply(), app.getConfig()); txResult.getSorobanData()->setSorobanFeeRefund( @@ -1838,8 +1837,7 @@ TransactionFrame::apply(AppConnector& app, AbstractLedgerTxn& ltx, SOROBAN_PROTOCOL_VERSION) && isSoroban()) { - sorobanConfig = - app.getLedgerManager().getSorobanNetworkConfigForApply(); + sorobanConfig = app.getSorobanNetworkConfigForApply(); sorobanResourceFee = computePreApplySorobanResourceFee( ledgerVersion, *sorobanConfig, app.getConfig()); diff --git a/src/transactions/TransactionSQL.cpp b/src/transactions/TransactionSQL.cpp index c35a725f36..7d68016328 100644 --- a/src/transactions/TransactionSQL.cpp +++ b/src/transactions/TransactionSQL.cpp @@ -348,20 +348,23 @@ void dropSupportTransactionFeeHistory(Database& db) { ZoneScoped; - db.getSession() << "DROP TABLE IF EXISTS txfeehistory"; + releaseAssert(threadIsMain()); + db.getRawSession() << "DROP TABLE IF EXISTS txfeehistory"; } void dropSupportTxSetHistory(Database& db) { ZoneScoped; - db.getSession() << "DROP TABLE IF EXISTS txsethistory"; + releaseAssert(threadIsMain()); + db.getRawSession() << "DROP TABLE IF EXISTS txsethistory"; } void dropSupportTxHistory(Database& db) { ZoneScoped; - db.getSession() << "DROP TABLE IF EXISTS txhistory"; + releaseAssert(threadIsMain()); + db.getRawSession() << "DROP TABLE IF EXISTS txhistory"; } } diff --git a/src/transactions/test/SorobanTxTestUtils.cpp b/src/transactions/test/SorobanTxTestUtils.cpp index f16a7b9b4a..f583a09b09 100644 --- a/src/transactions/test/SorobanTxTestUtils.cpp +++ b/src/transactions/test/SorobanTxTestUtils.cpp @@ -1055,7 +1055,7 @@ SorobanTest::getDummyAccount() SorobanNetworkConfig const& SorobanTest::getNetworkCfg() { - return getApp().getLedgerManager().getSorobanNetworkConfigReadOnly(); + return getApp().getLedgerManager().getMutableSorobanNetworkConfig(); } uint32_t From e4173147572d8e65e0b0a949c323f04b9994f51e Mon Sep 17 00:00:00 2001 From: marta-lokhova Date: Mon, 6 Jan 2025 19:06:56 -0800 Subject: [PATCH 10/10] Some cleanup post-rebase, refresh comments --- src/bucket/BucketManager.cpp | 3 +- src/bucket/BucketManager.h | 3 +- src/bucket/LiveBucketList.h | 3 - src/bucket/test/BucketListTests.cpp | 2 +- src/bucket/test/BucketTestUtils.cpp | 6 +- src/catchup/CatchupWork.cpp | 2 +- src/database/Database.h | 2 + src/herder/HerderImpl.cpp | 17 ++- src/herder/HerderPersistenceImpl.cpp | 5 +- src/herder/HerderSCPDriver.cpp | 13 +- src/herder/TxSetFrame.cpp | 2 +- src/history/test/HistoryTestsUtils.cpp | 2 - .../BucketListIsConsistentWithDatabase.cpp | 1 - src/ledger/LedgerManagerImpl.cpp | 117 ++++++++++-------- src/ledger/LedgerManagerImpl.h | 38 +++--- src/main/AppConnector.cpp | 11 -- src/main/AppConnector.h | 1 - src/main/ApplicationImpl.cpp | 2 +- src/main/ApplicationUtils.cpp | 2 +- 19 files changed, 124 insertions(+), 108 deletions(-) diff --git a/src/bucket/BucketManager.cpp b/src/bucket/BucketManager.cpp index 209d828caa..9aecee8713 100644 --- a/src/bucket/BucketManager.cpp +++ b/src/bucket/BucketManager.cpp @@ -18,6 +18,7 @@ #include "ledger/LedgerManager.h" #include "ledger/LedgerTxn.h" #include "ledger/LedgerTypeUtils.h" +#include "ledger/NetworkConfig.h" #include "main/Application.h" #include "main/Config.h" #include "util/Fs.h" @@ -1080,7 +1081,7 @@ EvictedStateVectors BucketManager::resolveBackgroundEvictionScan( AbstractLedgerTxn& ltx, uint32_t ledgerSeq, LedgerKeySet const& modifiedKeys, uint32_t ledgerVers, - SorobanNetworkConfig& networkConfig) + SorobanNetworkConfig const& networkConfig) { ZoneScoped; releaseAssert(mEvictionStatistics); diff --git a/src/bucket/BucketManager.h b/src/bucket/BucketManager.h index d17517838f..32043b8870 100644 --- a/src/bucket/BucketManager.h +++ b/src/bucket/BucketManager.h @@ -36,6 +36,7 @@ class BucketSnapshotManager; class SearchableLiveBucketListSnapshot; struct BucketEntryCounters; enum class LedgerEntryTypeAndDurability : uint32_t; +class SorobanNetworkConfig; struct HistoryArchiveState; @@ -307,7 +308,7 @@ class BucketManager : NonMovableOrCopyable resolveBackgroundEvictionScan(AbstractLedgerTxn& ltx, uint32_t ledgerSeq, LedgerKeySet const& modifiedKeys, uint32_t ledgerVers, - SorobanNetworkConfig& networkConfig); + SorobanNetworkConfig const& networkConfig); medida::Meter& getBloomMissMeter() const; medida::Meter& getBloomLookupMeter() const; diff --git a/src/bucket/LiveBucketList.h b/src/bucket/LiveBucketList.h index 688f0acd22..0f2a6ac268 100644 --- a/src/bucket/LiveBucketList.h +++ b/src/bucket/LiveBucketList.h @@ -9,9 +9,6 @@ namespace stellar { - -class SorobanNetworkConfig; - // The LiveBucketList stores the current canonical state of the ledger. It is // made up of LiveBucket buckets, which in turn store individual entries of type // BucketEntry. When an entry is "evicted" from the ledger, it is removed from diff --git a/src/bucket/test/BucketListTests.cpp b/src/bucket/test/BucketListTests.cpp index 5dfb6572f6..0a5b545097 100644 --- a/src/bucket/test/BucketListTests.cpp +++ b/src/bucket/test/BucketListTests.cpp @@ -869,7 +869,7 @@ TEST_CASE_VERSIONS("network config snapshots BucketList size", "[bucketlist]") LedgerManagerForBucketTests& lm = app->getLedgerManager(); auto& networkConfig = - app->getLedgerManager().getMutableSorobanNetworkConfig(); + app->getLedgerManager().getSorobanNetworkConfigReadOnly(); uint32_t windowSize = networkConfig.stateArchivalSettings() .bucketListSizeWindowSampleSize; diff --git a/src/bucket/test/BucketTestUtils.cpp b/src/bucket/test/BucketTestUtils.cpp index 34122c63d2..ea6d0f351f 100644 --- a/src/bucket/test/BucketTestUtils.cpp +++ b/src/bucket/test/BucketTestUtils.cpp @@ -101,10 +101,6 @@ closeLedger(Application& app, std::optional skToSignValue, app.getHerder().externalizeValue(TxSetXDRFrame::makeEmpty(lcl), ledgerNum, lcl.header.scpValue.closeTime, upgrades, skToSignValue); - testutil::crankUntil( - app, - [&lm, ledgerNum]() { return lm.getLastClosedLedgerNum() == ledgerNum; }, - std::chrono::seconds(10)); return lm.getLastClosedLedgerHeader().hash; } @@ -238,7 +234,7 @@ LedgerManagerForBucketTests::transferLedgerEntriesToBucketList( mApp.getBucketManager().resolveBackgroundEvictionScan( ltxEvictions, lh.ledgerSeq, keys, initialLedgerVers, mApp.getLedgerManager() - .getMutableSorobanNetworkConfig()); + .getSorobanNetworkConfigForApply()); if (protocolVersionStartsFrom( initialLedgerVers, diff --git a/src/catchup/CatchupWork.cpp b/src/catchup/CatchupWork.cpp index 89fc839791..dc46ae37c6 100644 --- a/src/catchup/CatchupWork.cpp +++ b/src/catchup/CatchupWork.cpp @@ -524,7 +524,7 @@ CatchupWork::runCatchupStep() // In this case we should actually have been caught-up during // the replay process and, if judged successful, our LCL should // be the one provided as well. - auto lastClosed = + auto& lastClosed = mApp.getLedgerManager().getLastClosedLedgerHeader(); releaseAssert(mLastApplied.hash == lastClosed.hash); releaseAssert(mLastApplied.header == lastClosed.header); diff --git a/src/database/Database.h b/src/database/Database.h index e58391b7a6..f96062a418 100644 --- a/src/database/Database.h +++ b/src/database/Database.h @@ -141,6 +141,8 @@ class Database : NonMovableOrCopyable // Save `vers` as schema version. void putSchemaVersion(unsigned long vers); + // Prepared statements cache may be accessed by mutliple threads (each using + // a different session), so use a mutex to synchronize access. std::mutex mutable mStatementsMutex; public: diff --git a/src/herder/HerderImpl.cpp b/src/herder/HerderImpl.cpp index 208c05f68c..5f01ea543a 100644 --- a/src/herder/HerderImpl.cpp +++ b/src/herder/HerderImpl.cpp @@ -480,6 +480,15 @@ HerderImpl::valueExternalized(uint64 slotIndex, StellarValue const& value, // Check to see if quorums have changed and we need to reanalyze. checkAndMaybeReanalyzeQuorumMap(); + + // heart beat *after* doing all the work (ensures that we do not include + // the overhead of externalization in the way we track SCP) + // Note: this only makes sense in the context of synchronous ledger + // application on the main thread. + if (!mApp.getConfig().parallelLedgerClose()) + { + trackingHeartBeat(); + } } else { @@ -1156,7 +1165,7 @@ HerderImpl::lastClosedLedgerIncreased(bool latest, TxSetXDRFrameConstPtr txSet) { // Re-start heartbeat tracking _after_ applying the most up-to-date // ledger. This guarantees out-of-sync timer won't fire while we have - // ledgers to apply. + // ledgers to apply (applicable during parallel ledger close). trackingHeartBeat(); // Ensure out of sync recovery did not get triggered while we were @@ -1369,6 +1378,8 @@ HerderImpl::triggerNextLedger(uint32_t ledgerSeqToTrigger, // If applying, the next ledger will trigger voting if (mLedgerManager.isApplying()) { + // This can only happen when closing ledgers in parallel + releaseAssert(mApp.getConfig().parallelLedgerClose()); CLOG_DEBUG(Herder, "triggerNextLedger: skipping (applying) : {}", mApp.getStateHuman()); return; @@ -1560,7 +1571,7 @@ HerderImpl::getUpgradesJson() void HerderImpl::forceSCPStateIntoSyncWithLastClosedLedger() { - auto header = mLedgerManager.getLastClosedLedgerHeader().header; + auto const& header = mLedgerManager.getLastClosedLedgerHeader().header; setTrackingSCPState(header.ledgerSeq, header.scpValue, /* isTrackingNetwork */ true); } @@ -2360,7 +2371,7 @@ HerderImpl::herderOutOfSync() // are no ledgers queued to be applied. If there are ledgers queued, it's // possible the rest of the network is waiting for this node to vote. In // this case we should _still_ remain in tracking and emit nomination; If - // the nodes does not hear anything from the network after that, then node + // the node does not hear anything from the network after that, then node // can go into out of sync recovery. releaseAssert(threadIsMain()); releaseAssert(!mLedgerManager.isApplying()); diff --git a/src/herder/HerderPersistenceImpl.cpp b/src/herder/HerderPersistenceImpl.cpp index 8f7977212a..65c38eed0b 100644 --- a/src/herder/HerderPersistenceImpl.cpp +++ b/src/herder/HerderPersistenceImpl.cpp @@ -236,9 +236,8 @@ HerderPersistence::copySCPHistoryToStream(Database& db, soci::session& sess, XDROutputFileStream& scpHistory) { ZoneScoped; - // TODO: this may conflict with main thread, as this is done in the - // background (this is the case in master today, so can be fixed - // later). + // Subtle: changing these queries may cause conflicts with main thread + // https://github.com/stellar/stellar-core/issues/4589 uint32_t begin = ledgerSeq, end = ledgerSeq + ledgerCount; size_t n = 0; diff --git a/src/herder/HerderSCPDriver.cpp b/src/herder/HerderSCPDriver.cpp index 13f5bde947..89e3fddd14 100644 --- a/src/herder/HerderSCPDriver.cpp +++ b/src/herder/HerderSCPDriver.cpp @@ -221,15 +221,15 @@ HerderSCPDriver::validateValueHelper(uint64_t slotIndex, StellarValue const& b, } } - auto lhhe = mLedgerManager.getLastClosedLedgerHeader(); + auto const& lcl = mLedgerManager.getLastClosedLedgerHeader(); // when checking close time, start with what we have locally - lastCloseTime = lhhe.header.scpValue.closeTime; + lastCloseTime = lcl.header.scpValue.closeTime; // if this value is not for our local state, // perform as many checks as we can - if (slotIndex != (lhhe.header.ledgerSeq + 1)) + if (slotIndex != (lcl.header.ledgerSeq + 1)) { - if (slotIndex == lhhe.header.ledgerSeq) + if (slotIndex == lcl.header.ledgerSeq) { // previous ledger if (b.closeTime != lastCloseTime) @@ -240,7 +240,7 @@ HerderSCPDriver::validateValueHelper(uint64_t slotIndex, StellarValue const& b, return SCPDriver::kInvalidValue; } } - else if (slotIndex < lhhe.header.ledgerSeq) + else if (slotIndex < lcl.header.ledgerSeq) { // basic sanity check on older value if (b.closeTime >= lastCloseTime) @@ -323,7 +323,7 @@ HerderSCPDriver::validateValueHelper(uint64_t slotIndex, StellarValue const& b, res = SCPDriver::kInvalidValue; } - else if (!checkAndCacheTxSetValid(*txSet, lhhe, closeTimeOffset)) + else if (!checkAndCacheTxSetValid(*txSet, lcl, closeTimeOffset)) { CLOG_DEBUG(Herder, "HerderSCPDriver::validateValue i: {} invalid txSet {}", @@ -614,6 +614,7 @@ HerderSCPDriver::combineCandidates(uint64_t slotIndex, std::set aggSet; releaseAssert(!mLedgerManager.isApplying()); + releaseAssert(threadIsMain()); auto const& lcl = mLedgerManager.getLastClosedLedgerHeader(); Hash candidatesHash; diff --git a/src/herder/TxSetFrame.cpp b/src/herder/TxSetFrame.cpp index d80752e27a..9e69d55a5f 100644 --- a/src/herder/TxSetFrame.cpp +++ b/src/herder/TxSetFrame.cpp @@ -528,7 +528,7 @@ applySurgePricing(TxSetPhase phase, TxFrameList const& txs, Application& app) ZoneScoped; releaseAssert(threadIsMain()); releaseAssert(!app.getLedgerManager().isApplying()); - + auto const& lclHeader = app.getLedgerManager().getLastClosedLedgerHeader().header; std::vector hadTxNotFittingLane; diff --git a/src/history/test/HistoryTestsUtils.cpp b/src/history/test/HistoryTestsUtils.cpp index 0738a0372c..565c8d00f6 100644 --- a/src/history/test/HistoryTestsUtils.cpp +++ b/src/history/test/HistoryTestsUtils.cpp @@ -947,8 +947,6 @@ CatchupSimulation::externalizeLedger(HerderImpl& herder, uint32_t ledger) lcd.getLedgerSeq(), lcd.getTxSet()); herder.getHerderSCPDriver().valueExternalized( lcd.getLedgerSeq(), xdr::xdr_to_opaque(lcd.getValue())); - - // TODO: crank the clock } void diff --git a/src/invariant/BucketListIsConsistentWithDatabase.cpp b/src/invariant/BucketListIsConsistentWithDatabase.cpp index 529077e721..d16a9bcdf3 100644 --- a/src/invariant/BucketListIsConsistentWithDatabase.cpp +++ b/src/invariant/BucketListIsConsistentWithDatabase.cpp @@ -7,7 +7,6 @@ #include "bucket/BucketManager.h" #include "bucket/LiveBucket.h" #include "bucket/LiveBucketList.h" -#include "crypto/Hex.h" #include "database/Database.h" #include "history/HistoryArchive.h" #include "invariant/InvariantManager.h" diff --git a/src/ledger/LedgerManagerImpl.cpp b/src/ledger/LedgerManagerImpl.cpp index 2c35fb1e2f..6bb090f91d 100644 --- a/src/ledger/LedgerManagerImpl.cpp +++ b/src/ledger/LedgerManagerImpl.cpp @@ -171,7 +171,8 @@ LedgerManagerImpl::beginApply() { // Go into "applying" state, this will prevent catchup from starting mCurrentlyApplyingLedger = true; - // Notify Herder that application star:ted, so it won't fire out of sync + + // Notify Herder that application started, so it won't fire out of sync // timer mApp.getHerder().beginApply(); } @@ -252,7 +253,7 @@ LedgerManagerImpl::startNewLedger(LedgerHeader const& genesisLedger) CLOG_INFO(Ledger, "Root account seed: {}", skey.getStrKeySeed().value); auto output = ledgerClosed(ltx, /*ledgerCloseMeta*/ nullptr, /*initialLedgerVers*/ 0); - updateCurrentLedgerState(output); + advanceLedgerPointers(output); ltx.commit(); } @@ -385,8 +386,8 @@ LedgerManagerImpl::loadLastKnownLedger(bool restoreBucketlist) } // Step 4. Restore LedgerManager's internal state - auto output = advanceLedgerPointers(*latestLedgerHeader, has); - updateCurrentLedgerState(output); + auto output = advanceLedgerStateSnapshot(*latestLedgerHeader, has); + advanceLedgerPointers(output); // Maybe truncate checkpoint files if we're restarting after a crash // in closeLedger (in which case any modifications to the ledger state have @@ -766,7 +767,10 @@ LedgerManagerImpl::ledgerCloseComplete(uint32_t lcl, bool calledViaExternalize, releaseAssert(latestQueuedToApply <= latestHeardFromNetwork); } - if (lcl == latestQueuedToApply) + // Without parallel ledger close, this should always be true + bool doneApplying = lcl == latestQueuedToApply; + releaseAssert(doneApplying || mApp.getConfig().parallelLedgerClose()); + if (doneApplying) { mCurrentlyApplyingLedger = false; } @@ -816,8 +820,17 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData, LedgerTxn ltx(mApp.getLedgerTxnRoot()); auto header = ltx.loadHeader(); - auto prevHeader = - threadIsMain() ? getLastClosedLedgerHeader().header : header.current(); + // Note: closeLedger should be able to work correctly based on ledger header + // stored in LedgerTxn. The issue is that in tests LedgerTxn is sometimes + // modified manually, which changes ledger header hash compared to the + // cached one and causes tests to fail. + LedgerHeader prevHeader = header.current(); +#ifdef BUILD_TESTS + if (threadIsMain()) + { + prevHeader = getLastClosedLedgerHeader().header; + } +#endif auto prevHash = xdrSha256(prevHeader); auto initialLedgerVers = header.current().ledgerVersion; @@ -830,7 +843,8 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData, auto now = mApp.getClock().now(); mLedgerAgeClosed.Update(now - mLastClose); - // mLastClose is only accessed by a single thread + // mLastClose is only accessed by a single thread, so no synchronization + // needed mLastClose = now; mLedgerAge.set_count(0); @@ -913,9 +927,10 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData, } // first, prefetch source accounts for txset, then charge fees - prefetchTxSourceIds(mApp.getLedgerTxnRoot(), *applicableTxSet, mApp.getConfig()); - auto const mutableTxResults = processFeesSeqNums( - *applicableTxSet, ltx, ledgerCloseMeta, ledgerData); + prefetchTxSourceIds(mApp.getLedgerTxnRoot(), *applicableTxSet, + mApp.getConfig()); + auto const mutableTxResults = + processFeesSeqNums(*applicableTxSet, ltx, ledgerCloseMeta, ledgerData); // Subtle: after this call, `header` is invalidated, and is not safe to use auto txResultSet = applyTransactions(*applicableTxSet, mutableTxResults, @@ -1008,7 +1023,7 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData, emitNextMeta(); } - // The next 5 steps happen in a relatively non-obvious, subtle order. + // The next 7 steps happen in a relatively non-obvious, subtle order. // This is unfortunate and it would be nice if we could make it not // be so subtle, but for the time being this is where we are. // @@ -1034,7 +1049,7 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData, // // 6. GC unreferenced buckets. Only do this once publishes are in progress. // - // 7. Finally, relfect newly closed ledger in LedgerManager's and Herder's + // 7. Finally, reflect newly closed ledger in LedgerManager's and Herder's // states: maybe move into SYNCED state, trigger next ledger, etc. // Step 1. Maybe queue the current checkpoint file for publishing; this @@ -1063,31 +1078,31 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData, // Invoke completion handler on the _main_ thread: kick off publishing, // cleanup bucket files, notify herder to trigger next ledger - auto completionHandler = - [this, ledgerSeq, calledViaExternalize, ledgerData, - ledgerOutput = std::move(closeLedgerResult)]() mutable { - releaseAssert(threadIsMain()); - updateCurrentLedgerState(ledgerOutput); - - // Step 5. Maybe kick off publishing on complete checkpoint files - auto& hm = mApp.getHistoryManager(); - hm.publishQueuedHistory(); - hm.logAndUpdatePublishStatus(); - - // Step 6. Clean up unreferenced buckets post-apply - { - // Ledger state might be updated at the same time, so protect GC - // call with state mutex - std::lock_guard guard(mLedgerStateMutex); - mApp.getBucketManager().forgetUnreferencedBuckets( - getLastClosedLedgerHAS()); - } + auto completionHandler = [this, ledgerSeq, calledViaExternalize, ledgerData, + ledgerOutput = + std::move(closeLedgerResult)]() mutable { + releaseAssert(threadIsMain()); + advanceLedgerPointers(ledgerOutput); + + // Step 5. Maybe kick off publishing on complete checkpoint files + auto& hm = mApp.getHistoryManager(); + hm.publishQueuedHistory(); + hm.logAndUpdatePublishStatus(); + + // Step 6. Clean up unreferenced buckets post-apply + { + // Ledger state might be updated at the same time, so protect GC + // call with state mutex + std::lock_guard guard(mLedgerStateMutex); + mApp.getBucketManager().forgetUnreferencedBuckets( + getLastClosedLedgerHAS()); + } - // Step 7. Maybe set LedgerManager into synced state, maybe let - // Herder trigger next ledger - ledgerCloseComplete(ledgerSeq, calledViaExternalize, ledgerData); - CLOG_INFO(Ledger, "Ledger close complete: {}", ledgerSeq); - }; + // Step 7. Maybe set LedgerManager into synced state, maybe let + // Herder trigger next ledger + ledgerCloseComplete(ledgerSeq, calledViaExternalize, ledgerData); + CLOG_INFO(Ledger, "Ledger close complete: {}", ledgerSeq); + }; if (threadIsMain()) { @@ -1098,7 +1113,8 @@ LedgerManagerImpl::closeLedger(LedgerCloseData const& ledgerData, mApp.postOnMainThread(completionHandler, "ledgerCloseComplete"); } - maybeSimulateSleep(mApp.getConfig(), txSet->sizeOpTotalForLogging(), closeLedgerTime); + maybeSimulateSleep(mApp.getConfig(), txSet->sizeOpTotalForLogging(), + closeLedgerTime); std::chrono::duration ledgerTimeSeconds = ledgerTime.Stop(); CLOG_DEBUG(Perf, "Applied ledger {} in {} seconds", ledgerSeq, ledgerTimeSeconds.count()); @@ -1136,7 +1152,7 @@ LedgerManagerImpl::setLastClosedLedger( ltx.commit(); mRebuildInMemoryState = false; - updateCurrentLedgerState(advanceLedgerPointers(lastClosed.header, has)); + advanceLedgerPointers(advanceLedgerStateSnapshot(lastClosed.header, has)); LedgerTxn ltx2(mApp.getLedgerTxnRoot()); if (protocolVersionStartsFrom(ltx2.loadHeader().current().ledgerVersion, @@ -1159,8 +1175,8 @@ LedgerManagerImpl::manuallyAdvanceLedgerHeader(LedgerHeader const& header) has.fromString(mApp.getPersistentState().getState( PersistentState::kHistoryArchiveState, mApp.getDatabase().getSession())); - auto output = advanceLedgerPointers(header, has, false); - updateCurrentLedgerState(output); + auto output = advanceLedgerStateSnapshot(header, has); + advanceLedgerPointers(output); } void @@ -1307,7 +1323,7 @@ LedgerManagerImpl::getCurrentLedgerStateSnaphot() } void -LedgerManagerImpl::updateCurrentLedgerState(CloseLedgerOutput const& output) +LedgerManagerImpl::advanceLedgerPointers(CloseLedgerOutput const& output) { releaseAssert(threadIsMain()); CLOG_DEBUG( @@ -1322,9 +1338,8 @@ LedgerManagerImpl::updateCurrentLedgerState(CloseLedgerOutput const& output) } LedgerManagerImpl::CloseLedgerOutput -LedgerManagerImpl::advanceLedgerPointers(LedgerHeader const& header, - HistoryArchiveState const& has, - bool debugLog) +LedgerManagerImpl::advanceLedgerStateSnapshot(LedgerHeader const& header, + HistoryArchiveState const& has) { auto ledgerHash = xdrSha256(header); @@ -1509,8 +1524,9 @@ LedgerManagerImpl::processFeesSeqNums( } void -LedgerManagerImpl::prefetchTxSourceIds( - AbstractLedgerTxnParent& ltx,ApplicableTxSetFrame const& txSet, Config const& config) +LedgerManagerImpl::prefetchTxSourceIds(AbstractLedgerTxnParent& ltx, + ApplicableTxSetFrame const& txSet, + Config const& config) { ZoneScoped; if (config.PREFETCH_BATCH_SIZE > 0) @@ -1528,8 +1544,9 @@ LedgerManagerImpl::prefetchTxSourceIds( } void -LedgerManagerImpl::prefetchTransactionData( - AbstractLedgerTxnParent& ltx,ApplicableTxSetFrame const& txSet, Config const& config) +LedgerManagerImpl::prefetchTransactionData(AbstractLedgerTxnParent& ltx, + ApplicableTxSetFrame const& txSet, + Config const& config) { ZoneScoped; if (config.PREFETCH_BATCH_SIZE > 0) @@ -1847,7 +1864,7 @@ LedgerManagerImpl::ledgerClosed( mApp.getBucketManager().snapshotLedger(lh); auto has = storeCurrentLedger(lh, /* storeHeader */ true, /* appendToCheckpoint */ true); - res = advanceLedgerPointers(lh, has); + res = advanceLedgerStateSnapshot(lh, has); }); return res; diff --git a/src/ledger/LedgerManagerImpl.h b/src/ledger/LedgerManagerImpl.h index c7c3931b06..3538021a04 100644 --- a/src/ledger/LedgerManagerImpl.h +++ b/src/ledger/LedgerManagerImpl.h @@ -57,8 +57,7 @@ class LedgerManagerImpl : public LedgerManager std::filesystem::path mMetaDebugPath; private: - // Cache LCL state, updates once a ledger (synchronized with - // mLedgerStateMutex) + // Cache LCL state, accessible only from main thread LedgerHeaderHistoryEntry mLastClosedLedger; // Read-only Soroban network configuration, accessible by main thread only. @@ -74,6 +73,8 @@ class LedgerManagerImpl : public LedgerManager // variable is not synchronized, since it should only be used by one thread // (main or ledger close). std::shared_ptr mSorobanNetworkConfigForApply; + + // Cache most recent HAS, accessible only from main thread HistoryArchiveState mLastClosedLedgerHAS; SorobanMetrics mSorobanMetrics; @@ -94,17 +95,19 @@ class LedgerManagerImpl : public LedgerManager bool mRebuildInMemoryState{false}; SearchableSnapshotConstPtr mReadOnlyLedgerStateSnapshot; - // Use mutex to guard read access to LCL and Soroban network config + // Use mutex to guard ledger state during apply mutable std::recursive_mutex mLedgerStateMutex; medida::Timer& mCatchupDuration; std::unique_ptr mNextMetaToEmit; - bool mCurrentlyApplyingLedger{false}; + // Use in the context of parallel ledger close to indicate background thread + // is currently closing a ledger or has ledgers queued to apply. + bool mCurrentlyApplyingLedger{false}; static std::vector processFeesSeqNums( - ApplicableTxSetFrame const& txSet, AbstractLedgerTxn& ltxOuter, + ApplicableTxSetFrame const& txSet, AbstractLedgerTxn& ltxOuter, std::unique_ptr const& ledgerCloseMeta, LedgerCloseData const& ledgerData); @@ -125,11 +128,12 @@ class LedgerManagerImpl : public LedgerManager HistoryArchiveState storeCurrentLedger(LedgerHeader const& header, bool storeHeader, bool appendToCheckpoint); - static void - prefetchTransactionData(AbstractLedgerTxnParent& rootLtx, ApplicableTxSetFrame const& txSet, Config const& config); - static void - prefetchTxSourceIds(AbstractLedgerTxnParent& rootLtx, - ApplicableTxSetFrame const& txSet, Config const& config); + static void prefetchTransactionData(AbstractLedgerTxnParent& rootLtx, + ApplicableTxSetFrame const& txSet, + Config const& config); + static void prefetchTxSourceIds(AbstractLedgerTxnParent& rootLtx, + ApplicableTxSetFrame const& txSet, + Config const& config); State mState; @@ -145,7 +149,8 @@ class LedgerManagerImpl : public LedgerManager // as the actual ledger usage. void publishSorobanMetrics(); - void updateCurrentLedgerState(CloseLedgerOutput const& output); + // Update cached ledger state values managed by this class. + void advanceLedgerPointers(CloseLedgerOutput const& output); protected: // initialLedgerVers must be the ledger version at the start of the ledger @@ -161,11 +166,12 @@ class LedgerManagerImpl : public LedgerManager std::unique_ptr const& ledgerCloseMeta, LedgerHeader lh, uint32_t initialLedgerVers); - // Update in-memory cached LCL state (this only happens at the end of ledger - // close) - CloseLedgerOutput advanceLedgerPointers(LedgerHeader const& header, - HistoryArchiveState const& has, - bool debugLog = true); + // Update ledger state snapshot, and construct CloseLedgerOutput return + // value, which contains all information relevant to ledger state (HAS, + // ledger header, network config, bucketlist snapshot). + CloseLedgerOutput + advanceLedgerStateSnapshot(LedgerHeader const& header, + HistoryArchiveState const& has); void logTxApplyMetrics(AbstractLedgerTxn& ltx, size_t numTxs, size_t numOps); diff --git a/src/main/AppConnector.cpp b/src/main/AppConnector.cpp index 904d6396f1..49b24d31c7 100644 --- a/src/main/AppConnector.cpp +++ b/src/main/AppConnector.cpp @@ -56,7 +56,6 @@ AppConnector::getSorobanNetworkConfigReadOnly() const SorobanNetworkConfig const& AppConnector::getSorobanNetworkConfigForApply() const { - // releaseAssert(!threadIsMain() || !mConfig.parallelLedgerClose()); return mApp.getLedgerManager().getSorobanNetworkConfigForApply(); } @@ -77,8 +76,6 @@ AppConnector::checkOnOperationApply(Operation const& operation, OperationResult const& opres, LedgerTxnDelta const& ltxDelta) { - // Only one thread can call this method - releaseAssert(threadIsMain() || mConfig.parallelLedgerClose()); mApp.getInvariantManager().checkOnOperationApply(operation, opres, ltxDelta); } @@ -142,12 +139,4 @@ AppConnector::checkScheduledAndCache( { return mApp.getOverlayManager().checkScheduledAndCache(msgTracker); } - -LedgerHeaderHistoryEntry -AppConnector::getLastClosedLedgerHeader() const -{ - // LCL is thread-safe (it's a copy) - return mApp.getLedgerManager().getLastClosedLedgerHeader(); -} - } \ No newline at end of file diff --git a/src/main/AppConnector.h b/src/main/AppConnector.h index 4f2c565982..9cdeb9be30 100644 --- a/src/main/AppConnector.h +++ b/src/main/AppConnector.h @@ -57,6 +57,5 @@ class AppConnector SorobanNetworkConfig const& getSorobanNetworkConfigForApply() const; medida::MetricsRegistry& getMetrics() const; - LedgerHeaderHistoryEntry getLastClosedLedgerHeader() const; }; } \ No newline at end of file diff --git a/src/main/ApplicationImpl.cpp b/src/main/ApplicationImpl.cpp index 022f09785f..c39db76013 100644 --- a/src/main/ApplicationImpl.cpp +++ b/src/main/ApplicationImpl.cpp @@ -591,7 +591,7 @@ ApplicationImpl::~ApplicationImpl() try { // First, shutdown ledger close queue _before_ shutting down all the - // subsystems This ensures that any ledger currently being closed + // subsystems. This ensures that any ledger currently being closed // finishes okay shutdownLedgerCloseThread(); shutdownWorkScheduler(); diff --git a/src/main/ApplicationUtils.cpp b/src/main/ApplicationUtils.cpp index 81c0cacc13..54476fe79a 100644 --- a/src/main/ApplicationUtils.cpp +++ b/src/main/ApplicationUtils.cpp @@ -271,7 +271,7 @@ setAuthenticatedLedgerHashPair(Application::pointer app, if (lm.isSynced()) { - auto lhe = lm.getLastClosedLedgerHeader(); + auto const& lhe = lm.getLastClosedLedgerHeader(); tryCheckpoint(lhe.header.ledgerSeq, lhe.hash); } else