mirror of
https://github.com/pocoproject/poco.git
synced 2025-12-09 16:36:51 +01:00
enh(MongoDB): Replica set: More robust retry of failed MongoDB commands.
This commit is contained in:
@@ -7,7 +7,7 @@
|
||||
//
|
||||
// Definition of the ReadPreference class.
|
||||
//
|
||||
// Copyright (c) 2012-2025, Applied Informatics Software Engineering GmbH.
|
||||
// Copyright (c) 2025, Applied Informatics Software Engineering GmbH.
|
||||
// and Contributors.
|
||||
//
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
//
|
||||
// Definition of the ReplicaSet class.
|
||||
//
|
||||
// Copyright (c) 2012-2025, Applied Informatics Software Engineering GmbH.
|
||||
// Copyright (c) 2025, Applied Informatics Software Engineering GmbH.
|
||||
// and Contributors.
|
||||
//
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
@@ -23,6 +23,7 @@
|
||||
#include "Poco/MongoDB/ReadPreference.h"
|
||||
#include "Poco/MongoDB/TopologyDescription.h"
|
||||
#include "Poco/Net/SocketAddress.h"
|
||||
#include "Poco/Logger.h"
|
||||
#include "Poco/Timespan.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
@@ -97,11 +98,20 @@ public:
|
||||
Poco::Timespan heartbeatFrequency{10, 0};
|
||||
/// Topology monitoring interval (default: 10 seconds)
|
||||
|
||||
std::size_t serverReconnectRetries { 10 };
|
||||
// Number of connection retries to a server/replica set if no server is available temporarily
|
||||
|
||||
std::chrono::seconds serverReconnectDelay { 1 };
|
||||
// Delay between re-connects to a server/replica set if no server is available temporarily
|
||||
|
||||
bool enableMonitoring{true};
|
||||
/// Enable background topology monitoring (default: true)
|
||||
|
||||
Connection::SocketFactory* socketFactory{nullptr};
|
||||
/// Optional socket factory for SSL/TLS connections
|
||||
|
||||
Logger::Ptr logger;
|
||||
/// Optional logger to write important information about replica set activity
|
||||
};
|
||||
|
||||
explicit ReplicaSet(const Config& config);
|
||||
@@ -145,6 +155,9 @@ public:
|
||||
/// Returns a connection to a secondary server.
|
||||
/// Returns null if no secondary is available.
|
||||
|
||||
[[nodiscard]] Config configuration() const;
|
||||
// Returns a copy of replica set configuration.
|
||||
|
||||
[[nodiscard]] TopologyDescription topology() const;
|
||||
/// Returns a copy of the current topology description.
|
||||
|
||||
@@ -157,6 +170,9 @@ public:
|
||||
void stopMonitoring();
|
||||
/// Stops the background monitoring thread.
|
||||
|
||||
void setLogger(Logger::Ptr logger);
|
||||
/// Sets the logger to log important replica set activity.
|
||||
|
||||
void setReadPreference(const ReadPreference& pref);
|
||||
/// Sets the default read preference.
|
||||
|
||||
@@ -173,7 +189,7 @@ private:
|
||||
void discover();
|
||||
/// Performs initial topology discovery from seed servers.
|
||||
|
||||
void monitor();
|
||||
void monitor() noexcept;
|
||||
/// Background monitoring thread function.
|
||||
|
||||
Connection::Ptr selectServer(const ReadPreference& readPref);
|
||||
@@ -182,10 +198,10 @@ private:
|
||||
Connection::Ptr createConnection(const Net::SocketAddress& address);
|
||||
/// Creates a new connection to the specified address.
|
||||
|
||||
void updateTopologyFromHello(const Net::SocketAddress& address);
|
||||
void updateTopologyFromHello(const Net::SocketAddress& address) noexcept;
|
||||
/// Queries a server with 'hello' command and updates topology.
|
||||
|
||||
void updateTopologyFromAllServers();
|
||||
void updateTopologyFromAllServers() noexcept;
|
||||
/// Queries all known servers and updates topology.
|
||||
|
||||
void parseURI(const std::string& uri);
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
//
|
||||
// Definition of the ReplicaSetConnection class.
|
||||
//
|
||||
// Copyright (c) 2012-2025, Applied Informatics Software Engineering GmbH.
|
||||
// Copyright (c) 2025, Applied Informatics Software Engineering GmbH.
|
||||
// and Contributors.
|
||||
//
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
@@ -121,6 +121,9 @@ private:
|
||||
void markServerFailed();
|
||||
/// Marks the current server as failed in the topology.
|
||||
|
||||
void logInfo(const std::string& message);
|
||||
void logDebug(const std::string& message);
|
||||
|
||||
ReplicaSet& _replicaSet;
|
||||
ReadPreference _readPreference;
|
||||
Connection::Ptr _connection;
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
//
|
||||
// Definition of the ReplicaSetPoolableConnectionFactory class.
|
||||
//
|
||||
// Copyright (c) 2012-2025, Applied Informatics Software Engineering GmbH.
|
||||
// Copyright (c) 2025, Applied Informatics Software Engineering GmbH.
|
||||
// and Contributors.
|
||||
//
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
@@ -65,7 +65,7 @@ public:
|
||||
// Check if the connection is still valid and matches the read preference.
|
||||
// This ensures that if a server changes role (e.g., primary becomes secondary),
|
||||
// the cached connection is invalidated and a new one is created.
|
||||
return pObject->isConnected() && pObject->matchesReadPreference();
|
||||
return pObject != nullptr && pObject->isConnected() && pObject->matchesReadPreference();
|
||||
}
|
||||
|
||||
void activateObject(MongoDB::ReplicaSetConnection::Ptr pObject)
|
||||
@@ -75,7 +75,7 @@ public:
|
||||
try {
|
||||
pObject->reconnect();
|
||||
}
|
||||
catch (Poco::Exception& e)
|
||||
catch (const Poco::Exception& e)
|
||||
{
|
||||
// Ignore connect error. c->isConnected() can be used to determine if the connection is valid.
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
//
|
||||
// Definition of the ServerDescription class.
|
||||
//
|
||||
// Copyright (c) 2012-2025, Applied Informatics Software Engineering GmbH.
|
||||
// Copyright (c) 2025, Applied Informatics Software Engineering GmbH.
|
||||
// and Contributors.
|
||||
//
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
//
|
||||
// Definition of the TopologyDescription class.
|
||||
//
|
||||
// Copyright (c) 2012-2025, Applied Informatics Software Engineering GmbH.
|
||||
// Copyright (c) 2025, Applied Informatics Software Engineering GmbH.
|
||||
// and Contributors.
|
||||
//
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
// Package: MongoDB
|
||||
// Module: ReadPreference
|
||||
//
|
||||
// Copyright (c) 2012-2025, Applied Informatics Software Engineering GmbH.
|
||||
// Copyright (c) 2025, Applied Informatics Software Engineering GmbH.
|
||||
// and Contributors.
|
||||
//
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
// Package: MongoDB
|
||||
// Module: ReplicaSet
|
||||
//
|
||||
// Copyright (c) 2012-2025, Applied Informatics Software Engineering GmbH.
|
||||
// Copyright (c) 2025, Applied Informatics Software Engineering GmbH.
|
||||
// and Contributors.
|
||||
//
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
@@ -146,6 +146,12 @@ TopologyDescription ReplicaSet::topology() const
|
||||
}
|
||||
|
||||
|
||||
ReplicaSet::Config ReplicaSet::configuration() const
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
return _config;
|
||||
}
|
||||
|
||||
void ReplicaSet::refreshTopology()
|
||||
{
|
||||
updateTopologyFromAllServers();
|
||||
@@ -183,6 +189,13 @@ void ReplicaSet::stopMonitoring()
|
||||
}
|
||||
|
||||
|
||||
void ReplicaSet::setLogger(Logger::Ptr logger)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
_config.logger = logger;
|
||||
}
|
||||
|
||||
|
||||
void ReplicaSet::setReadPreference(const ReadPreference& pref)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
@@ -238,7 +251,7 @@ void ReplicaSet::discover()
|
||||
}
|
||||
|
||||
|
||||
void ReplicaSet::monitor()
|
||||
void ReplicaSet::monitor() noexcept
|
||||
{
|
||||
while (!_stopMonitoring.load())
|
||||
{
|
||||
@@ -328,7 +341,7 @@ Connection::Ptr ReplicaSet::createConnection(const Net::SocketAddress& address)
|
||||
}
|
||||
|
||||
|
||||
void ReplicaSet::updateTopologyFromHello(const Net::SocketAddress& address)
|
||||
void ReplicaSet::updateTopologyFromHello(const Net::SocketAddress& address) noexcept
|
||||
{
|
||||
Connection::Ptr conn = new Connection();
|
||||
|
||||
@@ -397,7 +410,7 @@ void ReplicaSet::updateTopologyFromHello(const Net::SocketAddress& address)
|
||||
}
|
||||
|
||||
|
||||
void ReplicaSet::updateTopologyFromAllServers()
|
||||
void ReplicaSet::updateTopologyFromAllServers() noexcept
|
||||
{
|
||||
std::vector<ServerDescription> servers;
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
// Package: MongoDB
|
||||
// Module: ReplicaSetConnection
|
||||
//
|
||||
// Copyright (c) 2012-2025, Applied Informatics Software Engineering GmbH.
|
||||
// Copyright (c) 2025, Applied Informatics Software Engineering GmbH.
|
||||
// and Contributors.
|
||||
//
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
@@ -17,8 +17,9 @@
|
||||
#include "Poco/Net/NetException.h"
|
||||
#include "Poco/Exception.h"
|
||||
#include <set>
|
||||
#include <thread>
|
||||
|
||||
using namespace std::string_literals;
|
||||
using namespace std::literals;
|
||||
|
||||
|
||||
namespace Poco {
|
||||
@@ -41,6 +42,9 @@ enum class ErrorCode
|
||||
SocketException = 9001
|
||||
};
|
||||
|
||||
// Minimum retry count to run the MongoDB command.
|
||||
static constexpr std::size_t lowExecuteRetryThreshold { 5 };
|
||||
|
||||
|
||||
ReplicaSetConnection::ReplicaSetConnection(ReplicaSet& replicaSet, const ReadPreference& readPref):
|
||||
_replicaSet(replicaSet),
|
||||
@@ -55,7 +59,6 @@ ReplicaSetConnection::~ReplicaSetConnection() = default;
|
||||
void ReplicaSetConnection::sendRequest(OpMsgMessage& request, OpMsgMessage& response)
|
||||
{
|
||||
executeWithRetry([&]() {
|
||||
ensureConnection();
|
||||
_connection->sendRequest(request, response);
|
||||
|
||||
// Check if response contains a retriable error
|
||||
@@ -170,9 +173,12 @@ void ReplicaSetConnection::executeWithRetry(std::function<void()> operation)
|
||||
std::exception_ptr lastException;
|
||||
std::set<Net::SocketAddress> triedServers;
|
||||
|
||||
// Retry with different servers until we've tried all available servers
|
||||
TopologyDescription topology = _replicaSet.topology();
|
||||
const std::size_t maxAttempts = topology.serverCount();
|
||||
// Retry with different servers until we've tried all available servers with a minimum
|
||||
// retry threshold to cover situations when single server topology or complete replica set
|
||||
// is not available temporarily.
|
||||
auto topology = _replicaSet.topology();
|
||||
const auto rsConfig = _replicaSet.configuration();
|
||||
const std::size_t maxAttempts = std::max(topology.serverCount(), lowExecuteRetryThreshold);
|
||||
std::size_t attempt = 0;
|
||||
|
||||
while (attempt < maxAttempts)
|
||||
@@ -182,30 +188,18 @@ void ReplicaSetConnection::executeWithRetry(std::function<void()> operation)
|
||||
ensureConnection();
|
||||
triedServers.insert(_connection->address());
|
||||
operation();
|
||||
if (attempt > 0)
|
||||
logDebug(Poco::format("Operation succeeded after %Lu retries."s, attempt));
|
||||
|
||||
return; // Success
|
||||
}
|
||||
catch (const Poco::Net::NetException& e)
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
if (!isRetriableError(e))
|
||||
{
|
||||
throw; // Non-retriable network error
|
||||
}
|
||||
lastException = std::current_exception();
|
||||
}
|
||||
catch (const Poco::TimeoutException& e)
|
||||
{
|
||||
if (!isRetriableError(e))
|
||||
{
|
||||
throw; // Non-retriable timeout
|
||||
}
|
||||
lastException = std::current_exception();
|
||||
}
|
||||
catch (const Poco::IOException& e)
|
||||
{
|
||||
if (!isRetriableError(e))
|
||||
{
|
||||
throw; // Non-retriable I/O error
|
||||
throw;
|
||||
}
|
||||
// Retriable error.
|
||||
lastException = std::current_exception();
|
||||
}
|
||||
catch (...)
|
||||
@@ -221,17 +215,26 @@ void ReplicaSetConnection::executeWithRetry(std::function<void()> operation)
|
||||
|
||||
// Get new connection, avoiding servers we've already tried
|
||||
bool foundNewServer = false;
|
||||
for (std::size_t i = 0; i < 10 && !foundNewServer; ++i) // Try up to 10 times
|
||||
for (std::size_t i = 0; i < rsConfig.serverReconnectRetries && !foundNewServer; ++i) // Try several times to connect
|
||||
{
|
||||
Connection::Ptr newConn = _replicaSet.getConnection(_readPreference);
|
||||
if (newConn.isNull())
|
||||
{
|
||||
break; // No servers available
|
||||
// No servers available at this moment. Wait briefly and retry.
|
||||
std::this_thread::sleep_for(rsConfig.serverReconnectDelay);
|
||||
triedServers.clear();
|
||||
_replicaSet.refreshTopology();
|
||||
topology = _replicaSet.topology();
|
||||
if (!topology.servers().empty())
|
||||
logInfo(Poco::format("Refreshed topology. Number of servers: %Lu"s, topology.servers().size()));
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
Net::SocketAddress addr = newConn->address();
|
||||
if (triedServers.find(addr) == triedServers.end())
|
||||
{
|
||||
logDebug(Poco::format("Connection reconnected to server: %s"s, addr.toString()));
|
||||
_connection = newConn;
|
||||
foundNewServer = true;
|
||||
}
|
||||
@@ -261,30 +264,21 @@ void ReplicaSetConnection::executeWithRetry(std::function<void()> operation)
|
||||
bool ReplicaSetConnection::isRetriableError(const std::exception& e)
|
||||
{
|
||||
// Network exceptions are generally retriable
|
||||
if (dynamic_cast<const Poco::Net::NetException*>(&e))
|
||||
if (dynamic_cast<const Poco::Net::NetException*>(&e) != nullptr)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Timeout exceptions are retriable
|
||||
if (dynamic_cast<const Poco::TimeoutException*>(&e))
|
||||
if (dynamic_cast<const Poco::TimeoutException*>(&e) != nullptr)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// I/O exceptions might be retriable
|
||||
const Poco::IOException* ioEx = dynamic_cast<const Poco::IOException*>(&e);
|
||||
if (ioEx)
|
||||
// I/O exceptions are retriable
|
||||
if (dynamic_cast<const Poco::IOException*>(&e) != nullptr)
|
||||
{
|
||||
const auto& msg = ioEx->message();
|
||||
// Check for specific retriable error messages
|
||||
if (msg.find("not master"s) != std::string::npos ||
|
||||
msg.find("NotMaster"s) != std::string::npos ||
|
||||
msg.find("Connection"s) != std::string::npos ||
|
||||
msg.find("connection"s) != std::string::npos)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
@@ -349,4 +343,22 @@ void ReplicaSetConnection::markServerFailed()
|
||||
}
|
||||
|
||||
|
||||
void ReplicaSetConnection::logInfo(const std::string& message)
|
||||
{
|
||||
auto cfg { _replicaSet.configuration() };
|
||||
if (cfg.logger == nullptr) return;
|
||||
|
||||
cfg.logger->information("MongoDB replica set: "s + message);
|
||||
}
|
||||
|
||||
|
||||
void ReplicaSetConnection::logDebug(const std::string& message)
|
||||
{
|
||||
auto cfg { _replicaSet.configuration() };
|
||||
if (cfg.logger == nullptr) return;
|
||||
|
||||
cfg.logger->debug("MongoDB replica set: "s + message);
|
||||
}
|
||||
|
||||
|
||||
} } // namespace Poco::MongoDB
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
// Package: MongoDB
|
||||
// Module: ServerDescription
|
||||
//
|
||||
// Copyright (c) 2012-2025, Applied Informatics Software Engineering GmbH.
|
||||
// Copyright (c) 2025, Applied Informatics Software Engineering GmbH.
|
||||
// and Contributors.
|
||||
//
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
// Package: MongoDB
|
||||
// Module: TopologyDescription
|
||||
//
|
||||
// Copyright (c) 2012-2025, Applied Informatics Software Engineering GmbH.
|
||||
// Copyright (c) 2025, Applied Informatics Software Engineering GmbH.
|
||||
// and Contributors.
|
||||
//
|
||||
// SPDX-License-Identifier: BSL-1.0
|
||||
|
||||
Reference in New Issue
Block a user