From 7854242b7dbb9b398f8770fecebec61ed18fb504 Mon Sep 17 00:00:00 2001 From: Logan Gore Date: Thu, 1 Sep 2022 16:03:31 -0700 Subject: [PATCH] Use singleton per-region for S3Client (#384) Summary: Pull Request resolved: https://github.com/facebookresearch/fbpcf/pull/384 # What * Use a singleton for each region when constructing our S3Client instead of a _single_ singleton * This is kind of a follow-up to D36727729 (https://github.com/facebookresearch/fbpcf/commit/47182c669b94f972d401d2acab937e971219034c) # Why * Follow-up to S281873 - S3Client singleton breaks multi-region support in PCF IO NOTE: PCF should be owning this code in the long-term since it's out of PSI's scope, but since we owned the initial SEV, I took ownership of this follow-up. Differential Revision: D39174441 fbshipit-source-id: 73dd6d6c88e216da3f99573689ef4c4eaa7d16ed --- fbpcf/io/cloud_util/S3Client.cpp | 38 ++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/fbpcf/io/cloud_util/S3Client.cpp b/fbpcf/io/cloud_util/S3Client.cpp index 83fddb9c..9dd03577 100644 --- a/fbpcf/io/cloud_util/S3Client.cpp +++ b/fbpcf/io/cloud_util/S3Client.cpp @@ -7,12 +7,46 @@ #include "fbpcf/io/cloud_util/S3Client.h" +#include + #include #include +#include +#include + namespace fbpcf::cloudio { S3Client& S3Client::getInstance(const fbpcf::aws::S3ClientOption& option) { - static S3Client s3Client(option); - return s3Client; + /* Due to previous problems, we create a Singleton instance of the S3Client, + * but there's a catch: we need a distinct S3Client for each region, or we + * run into other issues. For that reason, we store this map from string to + * S3Client with the assumption that the keys are region names. Since region + * is optional, we also allow for a default empty string region. + * ***************************** NOT THREAD SAFE **************************** + * NOTE: Significant refactoring is required to make this thread safe + * Downstream usage wants a mutable reference, but a folly::Synchronized + * RWLock will return a const ref to a reader, meaning it's hard to refactor. + * Simply trying to use folly::Synchronized around the map isn't sufficient, + * because we'll leak a reference to an object in the map which is unsafe. + * ***************************** NOT THREAD SAFE **************************** + */ + static folly::Synchronized> m; + + std::string defaultStr{}; + auto region = option.region.value_or(defaultStr); + + m.withWLock([&](auto& clientMap) { + if (clientMap.find(region) == clientMap.end()) { + clientMap.at(region) = S3Client{option}; + } + }); + + /* You may see this and think, "Hey, the NOT THREAD SAFE warning above is + * outdated, it looks like we fixed it!", but you're wrong. This still does + * not fully solve the problem. Because the downstream consumer takes a + * mutable reference, there's no guarantee that this is thread safe. It's + * better than nothing, but you still shouldn't fully trust this code. + */ + return m.wlock()->at(region); } } // namespace fbpcf::cloudio