From a2a6b4ad932d31f0b485250409b3d451f7581b18 Mon Sep 17 00:00:00 2001 From: Steven Wu Date: Thu, 23 Oct 2025 16:07:33 -0700 Subject: [PATCH] [CAS] Cleanup chaining for UnifiedOnDiskCache Previously, the chaining of KeyValueDB and OnDiskGraphDB is not consistant. Some operations are implemented directly in the lowest layer, some are in the UnifiedOnDiskCache layer, and some are in the ActionCache/ObjectStore layer. Now unifies all the chaining logics down into OnDiskGraphDB and OnDiskKeyValueDB layer, with the exception of KeyValueDB chaining will need the help of functions in UnifiedOnDiskCache layer. This cleans up the interfaces for UnifiedOnDiskCache member functions so it only contains database managment functions. Old functions like `KVPut/Get` can be done directly via underlying database file with a little bit of extra wrapper around it (see libCASPluginTest.dylib) implementation for the simple wrapper needed. --- .../llvm/CAS/BuiltinUnifiedCASDatabases.h | 6 +- llvm/include/llvm/CAS/ObjectStore.h | 18 +- llvm/include/llvm/CAS/OnDiskGraphDB.h | 14 +- llvm/include/llvm/CAS/OnDiskKeyValueDB.h | 13 +- llvm/include/llvm/CAS/UnifiedOnDiskCache.h | 43 +-- llvm/lib/CAS/ActionCaches.cpp | 53 +++- llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp | 4 +- llvm/lib/CAS/OnDiskCAS.cpp | 20 +- llvm/lib/CAS/OnDiskGraphDB.cpp | 34 +- llvm/lib/CAS/OnDiskKeyValueDB.cpp | 19 +- llvm/lib/CAS/UnifiedOnDiskCache.cpp | 291 ++++++++---------- .../libCASPluginTest/libCASPluginTest.cpp | 32 +- llvm/unittests/CAS/OnDiskCommonUtils.h | 21 ++ llvm/unittests/CAS/OnDiskGraphDBTest.cpp | 8 +- llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp | 17 +- 15 files changed, 319 insertions(+), 274 deletions(-) diff --git a/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h b/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h index bd0a0b65f57f8..6c165c421b168 100644 --- a/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h +++ b/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h @@ -1,4 +1,4 @@ -//===- BuiltinUnifiedCASDatabases.h -----------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -42,7 +42,7 @@ enum class ValidationResult { /// marking the files for garbage collection. /// \param ForceValidation Whether to force validation to occur even if it /// should not be necessary. -/// \param LLVMCasBinary If provided, validation is performed out-of-process +/// \param LLVMCasBinaryPath If provided, validation is performed out-of-process /// using the given \c llvm-cas executable which protects against crashes /// during validation. Otherwise validation is performed in-process. /// @@ -52,7 +52,7 @@ enum class ValidationResult { /// in an invalid state because \p AllowRecovery is false. Expected validateOnDiskUnifiedCASDatabasesIfNeeded( StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation, - std::optional LLVMCasBinary); + std::optional LLVMCasBinaryPath); } // namespace llvm::cas diff --git a/llvm/include/llvm/CAS/ObjectStore.h b/llvm/include/llvm/CAS/ObjectStore.h index f451e613dccdd..a08f3982d3a19 100644 --- a/llvm/include/llvm/CAS/ObjectStore.h +++ b/llvm/include/llvm/CAS/ObjectStore.h @@ -5,6 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declaration of the ObjectStore class. +/// +//===----------------------------------------------------------------------===// #ifndef LLVM_CAS_OBJECTSTORE_H #define LLVM_CAS_OBJECTSTORE_H @@ -246,7 +251,7 @@ class ObjectStore { /// Set the size for limiting growth of on-disk storage. This has an effect /// for when the instance is closed. /// - /// Implementations may be not have this implemented. + /// Implementations may leave this unimplemented. virtual Error setSizeLimit(std::optional SizeLimit) { return Error::success(); } @@ -262,7 +267,7 @@ class ObjectStore { /// Prune local storage to reduce its size according to the desired size /// limit. Pruning can happen concurrently with other operations. /// - /// Implementations may be not have this implemented. + /// Implementations may leave this unimplemented. virtual Error pruneStorageData() { return Error::success(); } /// Validate the whole node tree. @@ -291,13 +296,9 @@ class ObjectStore { /// Reference to an abstract hierarchical node, with data and references. /// Reference is passed by value and is expected to be valid as long as the \a /// ObjectStore is. -/// -/// TODO: Expose \a ObjectStore::readData() and only call \a -/// ObjectStore::getDataString() when asked. class ObjectProxy { public: - const ObjectStore &getCAS() const { return *CAS; } - ObjectStore &getCAS() { return *CAS; } + ObjectStore &getCAS() const { return *CAS; } CASID getID() const { return CAS->getID(Ref); } ObjectRef getRef() const { return Ref; } size_t getNumReferences() const { return CAS->getNumRefs(H); } @@ -352,12 +353,13 @@ class ObjectProxy { ObjectHandle H; }; +/// Create an in memory CAS. std::unique_ptr createInMemoryCAS(); /// \returns true if \c LLVM_ENABLE_ONDISK_CAS configuration was enabled. bool isOnDiskCASEnabled(); -/// Gets or creates a persistent on-disk path at \p Path. +/// Create a persistent on-disk path at \p Path. Expected> createOnDiskCAS(const Twine &Path); /// Set \p Path to a reasonable default on-disk path for a persistent CAS for diff --git a/llvm/include/llvm/CAS/OnDiskGraphDB.h b/llvm/include/llvm/CAS/OnDiskGraphDB.h index 1dfb0d38daa27..2c3ee5a9b6ab3 100644 --- a/llvm/include/llvm/CAS/OnDiskGraphDB.h +++ b/llvm/include/llvm/CAS/OnDiskGraphDB.h @@ -341,13 +341,16 @@ class OnDiskGraphDB { /// \param HashByteSize Size for the object digest hash bytes. /// \param UpstreamDB Optional on-disk store to be used for faulting-in nodes /// if they don't exist in the primary store. The upstream store is only used - /// for reading nodes, new nodes are only written to the primary store. + /// for reading nodes, new nodes are only written to the primary store. User + /// need to make sure \p UpstreamDB outlives current instance of + /// OnDiskGraphDB and the common usage is to have an \p UnifiedOnDiskCache to + /// manage both. /// \param Policy If \p UpstreamDB is provided, controls how nodes are copied /// to primary store. This is recorded at creation time and subsequent opens /// need to pass the same policy otherwise the \p open will fail. static Expected> open(StringRef Path, StringRef HashName, unsigned HashByteSize, - std::unique_ptr UpstreamDB = nullptr, + OnDiskGraphDB *UpstreamDB = nullptr, std::shared_ptr Logger = nullptr, FaultInPolicy Policy = FaultInPolicy::FullTree); @@ -440,9 +443,8 @@ class OnDiskGraphDB { // Private constructor. OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index, - OnDiskDataAllocator DataPool, - std::unique_ptr UpstreamDB, FaultInPolicy Policy, - std::shared_ptr Logger); + OnDiskDataAllocator DataPool, OnDiskGraphDB *UpstreamDB, + FaultInPolicy Policy, std::shared_ptr Logger); /// Mapping from hash to object reference. /// @@ -461,7 +463,7 @@ class OnDiskGraphDB { std::string RootPath; /// Optional on-disk store to be used for faulting-in nodes. - std::unique_ptr UpstreamDB; + OnDiskGraphDB* UpstreamDB = nullptr; /// The policy used to fault in data from upstream. FaultInPolicy FIPolicy; diff --git a/llvm/include/llvm/CAS/OnDiskKeyValueDB.h b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h index fb2aba4b1526e..0dfd0345cd142 100644 --- a/llvm/include/llvm/CAS/OnDiskKeyValueDB.h +++ b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h @@ -19,6 +19,8 @@ namespace llvm::cas::ondisk { +class UnifiedOnDiskCache; + /// An on-disk key-value data store with the following properties: /// * Keys are fixed length binary hashes with expected normal distribution. /// * Values are buffers of the same size, specified at creation time. @@ -59,9 +61,13 @@ class OnDiskKeyValueDB { /// \param KeySize Size for the key hash bytes. /// \param ValueName Identifier name for the values. /// \param ValueSize Size for the value bytes. + /// \param UnifiedCache An optional UnifiedOnDiskCache that manages the size + /// and lifetime of the CAS instance and it must owns current initializing + /// KeyValueDB after initialized. static Expected> open(StringRef Path, StringRef HashName, unsigned KeySize, StringRef ValueName, size_t ValueSize, + UnifiedOnDiskCache *UnifiedCache = nullptr, std::shared_ptr Logger = nullptr); using CheckValueT = @@ -71,11 +77,14 @@ class OnDiskKeyValueDB { Error validate(CheckValueT CheckValue) const; private: - OnDiskKeyValueDB(size_t ValueSize, OnDiskTrieRawHashMap Cache) - : ValueSize(ValueSize), Cache(std::move(Cache)) {} + OnDiskKeyValueDB(size_t ValueSize, OnDiskTrieRawHashMap Cache, + UnifiedOnDiskCache *UnifiedCache) + : ValueSize(ValueSize), Cache(std::move(Cache)), + UnifiedCache(UnifiedCache) {} const size_t ValueSize; OnDiskTrieRawHashMap Cache; + UnifiedOnDiskCache *UnifiedCache = nullptr; }; } // namespace llvm::cas::ondisk diff --git a/llvm/include/llvm/CAS/UnifiedOnDiskCache.h b/llvm/include/llvm/CAS/UnifiedOnDiskCache.h index f00ab92dd29c7..e34919a60c278 100644 --- a/llvm/include/llvm/CAS/UnifiedOnDiskCache.h +++ b/llvm/include/llvm/CAS/UnifiedOnDiskCache.h @@ -1,4 +1,4 @@ -//===- UnifiedOnDiskCache.h -------------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -33,7 +33,7 @@ class OnDiskKeyValueDB; /// Usage patterns should be that an instance of \p UnifiedOnDiskCache is open /// for a limited period of time, e.g. for the duration of a build operation. /// For long-living processes that need periodic access to a -/// \p UnifiedOnDiskCache, the client should device a scheme where access is +/// \p UnifiedOnDiskCache, the client should devise a scheme where access is /// performed within some defined period. For example, if a service is designed /// to continuously wait for requests that access a \p UnifiedOnDiskCache, it /// could keep the instance alive while new requests are coming in but close it @@ -43,28 +43,8 @@ class UnifiedOnDiskCache { /// The \p OnDiskGraphDB instance for the open directory. OnDiskGraphDB &getGraphDB() { return *PrimaryGraphDB; } - /// Associate an \p ObjectID, of the \p OnDiskGraphDB instance, with a key. - /// - /// \param Key the hash bytes for the key. - /// \param Value the \p ObjectID value. - /// - /// \returns the \p ObjectID associated with the \p Key. It may be different - /// than \p Value if another value was already associated with this key. - Expected KVPut(ArrayRef Key, ObjectID Value); - - /// Associate an \p ObjectID, of the \p OnDiskGraphDB instance, with a key. - /// An \p ObjectID as a key is equivalent to its digest bytes. - /// - /// \param Key the \p ObjectID for the key. - /// \param Value the \p ObjectID value. - /// - /// \returns the \p ObjectID associated with the \p Key. It may be different - /// than \p Value if another value was already associated with this key. - Expected KVPut(ObjectID Key, ObjectID Value); - - /// \returns the \p ObjectID, of the \p OnDiskGraphDB instance, associated - /// with the \p Key, or \p std::nullopt if the key does not exist. - Expected> KVGet(ArrayRef Key); + /// The \p OnDiskGraphDB instance for the open directory. + OnDiskKeyValueDB &getKeyValueDB() { return *PrimaryKVDB; } /// Open a \p UnifiedOnDiskCache instance for a directory. /// @@ -150,18 +130,23 @@ class UnifiedOnDiskCache { static Error collectGarbage(StringRef Path, ondisk::OnDiskCASLogger *Logger = nullptr); + /// Remove unused data from the current UnifiedOnDiskCache. Error collectGarbage(); - ~UnifiedOnDiskCache(); + /// Helper function to convert the value stored in KeyValueDB and ObjectID. + static ObjectID getObjectIDFromValue(ArrayRef Value); - Error validateActionCache(); + using ValueBytes = std::array; + static ValueBytes getValueFromObjectID(ObjectID ID); - OnDiskGraphDB *getUpstreamGraphDB() const { return UpstreamGraphDB; } + ~UnifiedOnDiskCache(); private: + friend class OnDiskGraphDB; + friend class OnDiskKeyValueDB; UnifiedOnDiskCache(); - Expected> + Expected>> faultInFromUpstreamKV(ArrayRef Key); /// \returns the storage size of the primary directory. @@ -175,7 +160,7 @@ class UnifiedOnDiskCache { std::atomic NeedsGarbageCollection; std::string PrimaryDBDir; - OnDiskGraphDB *UpstreamGraphDB = nullptr; + std::unique_ptr UpstreamGraphDB; std::unique_ptr PrimaryGraphDB; std::unique_ptr UpstreamKVDB; diff --git a/llvm/lib/CAS/ActionCaches.cpp b/llvm/lib/CAS/ActionCaches.cpp index 3097d811d6a59..5589ccc4f3a38 100644 --- a/llvm/lib/CAS/ActionCaches.cpp +++ b/llvm/lib/CAS/ActionCaches.cpp @@ -13,15 +13,13 @@ #include "BuiltinCAS.h" #include "llvm/ADT/TrieRawHashMap.h" #include "llvm/CAS/ActionCache.h" -#include "llvm/CAS/ObjectStore.h" #include "llvm/CAS/OnDiskCASLogger.h" -#include "llvm/CAS/OnDiskGraphDB.h" #include "llvm/CAS/OnDiskKeyValueDB.h" #include "llvm/CAS/UnifiedOnDiskCache.h" #include "llvm/Config/llvm-config.h" -#include "llvm/Support/Alignment.h" #include "llvm/Support/BLAKE3.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/Path.h" #define DEBUG_TYPE "cas-action-caches" @@ -67,6 +65,7 @@ class InMemoryActionCache final : public ActionCache { InMemoryCacheT Cache; }; +/// Builtin basic OnDiskActionCache that uses one underlying OnDiskKeyValueDB. class OnDiskActionCache final : public ActionCache { public: Error putImpl(ArrayRef ActionKey, const CASID &Result, @@ -87,6 +86,8 @@ class OnDiskActionCache final : public ActionCache { using DataT = CacheEntry; }; +/// Builtin unified ActionCache that wraps around UnifiedOnDiskCache to provide +/// access to its ActionCache. class UnifiedOnDiskActionCache final : public ActionCache { public: Error putImpl(ArrayRef ActionKey, const CASID &Result, @@ -118,7 +119,8 @@ static Error createResultCachePoisonedError(ArrayRef KeyHash, } Expected> -InMemoryActionCache::getImpl(ArrayRef Key, bool /*CanBeDistributed*/) const { +InMemoryActionCache::getImpl(ArrayRef Key, + bool /*CanBeDistributed*/) const { auto Result = Cache.find(Key); if (!Result) return std::nullopt; @@ -169,9 +171,9 @@ OnDiskActionCache::create(StringRef AbsPath) { ondisk::OnDiskCASLogger::openIfEnabled(AbsPath).moveInto(Logger)) return std::move(E); std::unique_ptr DB; - if (Error E = ondisk::OnDiskKeyValueDB::open(AbsPath, getHashName(), - sizeof(HashType), getHashName(), - sizeof(DataT), std::move(Logger)) + if (Error E = ondisk::OnDiskKeyValueDB::open( + AbsPath, getHashName(), sizeof(HashType), getHashName(), + sizeof(DataT), /*UnifiedCache=*/nullptr, std::move(Logger)) .moveInto(DB)) return std::move(E); return std::unique_ptr( @@ -179,7 +181,8 @@ OnDiskActionCache::create(StringRef AbsPath) { } Expected> -OnDiskActionCache::getImpl(ArrayRef Key, bool /*CanBeDistributed*/) const { +OnDiskActionCache::getImpl(ArrayRef Key, + bool /*CanBeDistributed*/) const { std::optional> Val; if (Error E = DB->get(Key).moveInto(Val)) return std::move(E); @@ -218,13 +221,14 @@ UnifiedOnDiskActionCache::UnifiedOnDiskActionCache( Expected> UnifiedOnDiskActionCache::getImpl(ArrayRef Key, bool /*CanBeDistributed*/) const { - std::optional Val; - if (Error E = UniDB->KVGet(Key).moveInto(Val)) + std::optional> Val; + if (Error E = UniDB->getKeyValueDB().get(Key).moveInto(Val)) return std::move(E); if (!Val) return std::nullopt; + auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Val); return CASID::create(&getContext(), - toStringRef(UniDB->getGraphDB().getDigest(*Val))); + toStringRef(UniDB->getGraphDB().getDigest(ID))); } Error UnifiedOnDiskActionCache::putImpl(ArrayRef Key, @@ -233,20 +237,35 @@ Error UnifiedOnDiskActionCache::putImpl(ArrayRef Key, auto Expected = UniDB->getGraphDB().getReference(Result.getHash()); if (LLVM_UNLIKELY(!Expected)) return Expected.takeError(); - std::optional Observed; - if (Error E = UniDB->KVPut(Key, *Expected).moveInto(Observed)) + + auto Value = ondisk::UnifiedOnDiskCache::getValueFromObjectID(*Expected); + std::optional> Observed; + if (Error E = UniDB->getKeyValueDB().put(Key, Value).moveInto(Observed)) return E; - if (*Expected == Observed) + auto ObservedID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Observed); + if (*Expected == ObservedID) return Error::success(); return createResultCachePoisonedError( - Key, getContext(), Result, - UniDB->getGraphDB().getDigest(*Observed)); + Key, getContext(), Result, UniDB->getGraphDB().getDigest(ObservedID)); } Error UnifiedOnDiskActionCache::validate() const { - return UniDB->validateActionCache(); + auto ValidateRef = [](FileOffset Offset, ArrayRef Value) -> Error { + auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(Value); + auto formatError = [&](Twine Msg) { + return createStringError( + llvm::errc::illegal_byte_sequence, + "bad record at 0x" + + utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " + + Msg.str()); + }; + if (ID.getOpaqueData() == 0) + return formatError("zero is not a valid ref"); + return Error::success(); + }; + return UniDB->getKeyValueDB().validate(ValidateRef); } Expected> diff --git a/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp index 40d898e4b7f56..f3f6fa043bc52 100644 --- a/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp +++ b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp @@ -1,4 +1,4 @@ -//===- BuiltinUnifiedCASDatabases.cpp ---------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -35,4 +35,4 @@ Expected cas::validateOnDiskUnifiedCASDatabasesIfNeeded( #else return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled"); #endif -} \ No newline at end of file +} diff --git a/llvm/lib/CAS/OnDiskCAS.cpp b/llvm/lib/CAS/OnDiskCAS.cpp index dee2f0ec76dea..ab5aec88ede60 100644 --- a/llvm/lib/CAS/OnDiskCAS.cpp +++ b/llvm/lib/CAS/OnDiskCAS.cpp @@ -43,8 +43,8 @@ class OnDiskCAS : public BuiltinCAS { static Expected> open(StringRef Path); - OnDiskCAS(std::shared_ptr UniDB_) - : UniDB(std::move(UniDB_)), DB(&UniDB->getGraphDB()) {} + OnDiskCAS(std::shared_ptr UniDB) + : UnifiedDB(std::move(UniDB)), DB(&UnifiedDB->getGraphDB()) {} private: ObjectHandle convertHandle(ondisk::ObjectHandle Node) const { @@ -67,10 +67,12 @@ class OnDiskCAS : public BuiltinCAS { auto RefsRange = DB->getObjectRefs(convertHandle(Node)); return std::distance(RefsRange.begin(), RefsRange.end()); } + ObjectRef readRef(ObjectHandle Node, size_t I) const final { auto RefsRange = DB->getObjectRefs(convertHandle(Node)); return convertRef(RefsRange.begin()[I]); } + Error forEachRef(ObjectHandle Node, function_ref Callback) const final; @@ -78,11 +80,11 @@ class OnDiskCAS : public BuiltinCAS { Expected> getStorageSize() const final; Error pruneStorageData() final; - OnDiskCAS(std::unique_ptr DB_) - : OwnedDB(std::move(DB_)), DB(OwnedDB.get()) {} + OnDiskCAS(std::unique_ptr GraphDB) + : OwnedDB(std::move(GraphDB)), DB(OwnedDB.get()) {} std::unique_ptr OwnedDB; - std::shared_ptr UniDB; + std::shared_ptr UnifiedDB; ondisk::OnDiskGraphDB *DB; }; @@ -99,8 +101,6 @@ Error OnDiskCAS::validate(bool CheckHash) const { if (auto E = DB->validate(CheckHash, Hasher)) return E; - if (UniDB && UniDB->getUpstreamGraphDB()) - return UniDB->getUpstreamGraphDB()->validate(CheckHash, Hasher); return Error::success(); } @@ -165,15 +165,15 @@ Error OnDiskCAS::forEachRef(ObjectHandle Node, } Error OnDiskCAS::setSizeLimit(std::optional SizeLimit) { - UniDB->setSizeLimit(SizeLimit); + UnifiedDB->setSizeLimit(SizeLimit); return Error::success(); } Expected> OnDiskCAS::getStorageSize() const { - return UniDB->getStorageSize(); + return UnifiedDB->getStorageSize(); } -Error OnDiskCAS::pruneStorageData() { return UniDB->collectGarbage(); } +Error OnDiskCAS::pruneStorageData() { return UnifiedDB->collectGarbage(); } Expected> OnDiskCAS::open(StringRef AbsPath) { std::shared_ptr Logger; diff --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp index 43fe8dc363098..8e123ca028e51 100644 --- a/llvm/lib/CAS/OnDiskGraphDB.cpp +++ b/llvm/lib/CAS/OnDiskGraphDB.cpp @@ -907,6 +907,10 @@ int64_t DataRecordHandle::getDataRelOffset() const { } Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { + if (UpstreamDB) { + if (auto E = UpstreamDB->validate(Deep, Hasher)) + return E; + } return Index.validate([&](FileOffset Offset, OnDiskTrieRawHashMap::ConstValueProxy Record) -> Error { @@ -1216,11 +1220,8 @@ OnDiskGraphDB::load(ObjectID ExternalRef) { return I.takeError(); TrieRecord::Data Object = I->Ref.load(); - if (Object.SK == TrieRecord::StorageKind::Unknown) { - if (!UpstreamDB) - return std::nullopt; + if (Object.SK == TrieRecord::StorageKind::Unknown) return faultInFromUpstream(ExternalRef); - } if (Object.SK == TrieRecord::StorageKind::DataPool) return ObjectHandle::fromFileOffset(Object.Offset); @@ -1300,8 +1301,10 @@ OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef, TrieRecord::Data Object = I->Ref.load(); if (Object.SK != TrieRecord::StorageKind::Unknown) return ObjectPresence::InPrimaryDB; + if (!CheckUpstream || !UpstreamDB) return ObjectPresence::Missing; + std::optional UpstreamID = UpstreamDB->getExistingReference(getDigest(*I)); return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB @@ -1563,10 +1566,11 @@ unsigned OnDiskGraphDB::getHardStorageLimitUtilization() const { return std::max(IndexPercent, DataPercent); } -Expected> OnDiskGraphDB::open( - StringRef AbsPath, StringRef HashName, unsigned HashByteSize, - std::unique_ptr UpstreamDB, - std::shared_ptr Logger, FaultInPolicy Policy) { +Expected> +OnDiskGraphDB::open(StringRef AbsPath, StringRef HashName, + unsigned HashByteSize, OnDiskGraphDB *UpstreamDB, + std::shared_ptr Logger, + FaultInPolicy Policy) { if (std::error_code EC = sys::fs::create_directories(AbsPath)) return createFileError(AbsPath, EC); @@ -1621,17 +1625,16 @@ Expected> OnDiskGraphDB::open( return std::unique_ptr( new OnDiskGraphDB(AbsPath, std::move(*Index), std::move(*DataPool), - std::move(UpstreamDB), Policy, std::move(Logger))); + UpstreamDB, Policy, std::move(Logger))); } OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index, OnDiskDataAllocator DataPool, - std::unique_ptr UpstreamDB, - FaultInPolicy Policy, + OnDiskGraphDB *UpstreamDB, FaultInPolicy Policy, std::shared_ptr Logger) : Index(std::move(Index)), DataPool(std::move(DataPool)), - RootPath(RootPath.str()), UpstreamDB(std::move(UpstreamDB)), - FIPolicy(Policy), Logger(std::move(Logger)) { + RootPath(RootPath.str()), UpstreamDB(UpstreamDB), FIPolicy(Policy), + Logger(std::move(Logger)) { /// Lifetime for "big" objects not in DataPool. /// /// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something @@ -1654,7 +1657,6 @@ Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID, // against the process dying during importing and leaving the database with an // incomplete tree. Note that if the upstream has missing nodes then the tree // will be copied with missing nodes as well, it won't be considered an error. - struct UpstreamCursor { ObjectHandle Node; size_t RefsCount; @@ -1736,7 +1738,6 @@ Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID, // Copy the node data into the primary store. // FIXME: Use hard-link or cloning if the file-system supports it and data is // stored into a separate file. - auto Data = UpstreamDB->getObjectData(UpstreamNode); auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode); SmallVector Refs; @@ -1753,7 +1754,8 @@ Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID, Expected> OnDiskGraphDB::faultInFromUpstream(ObjectID PrimaryID) { - assert(UpstreamDB); + if (!UpstreamDB) + return std::nullopt; auto UpstreamID = UpstreamDB->getReference(getDigest(PrimaryID)); if (LLVM_UNLIKELY(!UpstreamID)) diff --git a/llvm/lib/CAS/OnDiskKeyValueDB.cpp b/llvm/lib/CAS/OnDiskKeyValueDB.cpp index 709c639fa7012..d67d4455bcdd8 100644 --- a/llvm/lib/CAS/OnDiskKeyValueDB.cpp +++ b/llvm/lib/CAS/OnDiskKeyValueDB.cpp @@ -20,6 +20,7 @@ #include "llvm/CAS/OnDiskKeyValueDB.h" #include "OnDiskCommon.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/CAS/UnifiedOnDiskCache.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Errc.h" @@ -53,15 +54,21 @@ Expected>> OnDiskKeyValueDB::get(ArrayRef Key) { // Check the result cache. OnDiskTrieRawHashMap::ConstOnDiskPtr ActionP = Cache.find(Key); - if (!ActionP) + if (ActionP) { + assert(isAddrAligned(Align(8), ActionP->Data.data())); + return ActionP->Data; + } + if (!UnifiedCache || !UnifiedCache->UpstreamKVDB) return std::nullopt; - assert(isAddrAligned(Align(8), ActionP->Data.data())); - return ActionP->Data; + + // Try to fault in from upstream. + return UnifiedCache->faultInFromUpstreamKV(Key); } Expected> OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize, StringRef ValueName, size_t ValueSize, + UnifiedOnDiskCache *Cache, std::shared_ptr Logger) { if (std::error_code EC = sys::fs::create_directories(Path)) return createFileError(Path, EC); @@ -89,10 +96,14 @@ OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize, return std::move(E); return std::unique_ptr( - new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache))); + new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache), Cache)); } Error OnDiskKeyValueDB::validate(CheckValueT CheckValue) const { + if (UnifiedCache && UnifiedCache->UpstreamKVDB) { + if (auto E = UnifiedCache->UpstreamKVDB->validate(CheckValue)) + return E; + } return Cache.validate( [&](FileOffset Offset, OnDiskTrieRawHashMap::ConstValueProxy Record) -> Error { diff --git a/llvm/lib/CAS/UnifiedOnDiskCache.cpp b/llvm/lib/CAS/UnifiedOnDiskCache.cpp index ec5ada21912f1..d4b19c3cb91f3 100644 --- a/llvm/lib/CAS/UnifiedOnDiskCache.cpp +++ b/llvm/lib/CAS/UnifiedOnDiskCache.cpp @@ -1,4 +1,4 @@ -//===- UnifiedOnDiskCache.cpp -----------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,60 +6,62 @@ // //===----------------------------------------------------------------------===// // -// Encapsulates \p OnDiskGraphDB and \p OnDiskKeyValueDB instances within one -// directory while also restricting storage growth with a scheme of chaining the -// two most recent directories (primary & upstream), where the primary -// "faults-in" data from the upstream one. When the primary (most recent) -// directory exceeds its intended limit a new empty directory becomes the -// primary one. -// -// Within the top-level directory (the path that \p UnifiedOnDiskCache::open -// receives) there are directories named like this: -// -// 'v.' -// 'v..' -// ... -// -// 'version' is the version integer for this \p UnifiedOnDiskCache's scheme and -// the part after the dot is an increasing integer. The primary directory is the -// one with the highest integer and the upstream one is the directory before it. -// For example, if the sub-directories contained are: -// -// 'v1.5', 'v1.6', 'v1.7', 'v1.8' -// -// Then the primary one is 'v1.8', the upstream one is 'v1.7', and the rest are -// unused directories that can be safely deleted at any time and by any process. -// -// Contained within the top-level directory is a file named "lock" which is used -// for processes to take shared or exclusive locks for the contents of the top -// directory. While a \p UnifiedOnDiskCache is open it keeps a shared lock for -// the top-level directory; when it closes, if the primary sub-directory -// exceeded its limit, it attempts to get an exclusive lock in order to create a -// new empty primary directory; if it can't get the exclusive lock it gives up -// and lets the next \p UnifiedOnDiskCache instance that closes to attempt -// again. -// -// The downside of this scheme is that while \p UnifiedOnDiskCache is open on a -// directory, by any process, the storage size in that directory will keep -// growing unrestricted. But the major benefit is that garbage-collection can be -// triggered on a directory concurrently, at any time and by any process, -// without affecting any active readers/writers in the same process or other -// processes. -// -// The \c UnifiedOnDiskCache also provides validation and recovery on top of the -// underlying on-disk storage. The low-level storage is designed to remain -// coherent across regular process crashes, but may be invalid after power loss -// or similar system failures. \c UnifiedOnDiskCache::validateIfNeeded allows -// validating the contents once per boot and can recover by marking invalid -// data for garbage collection. -// -// The data recovery described above requires exclusive access to the CAS, and -// it is an error to attempt recovery if the CAS is open in any process/thread. -// In order to maximize backwards compatibility with tools that do not perform -// validation before opening the CAS, we do not attempt to get exclusive access -// until recovery is actually performed, meaning as long as the data is valid -// it will not conflict with concurrent use. +/// \file +/// Encapsulates \p OnDiskGraphDB and \p OnDiskKeyValueDB instances within one +/// directory while also restricting storage growth with a scheme of chaining +/// the two most recent directories (primary & upstream), where the primary +/// "faults-in" data from the upstream one. When the primary (most recent) +/// directory exceeds its intended limit a new empty directory becomes the +/// primary one. +/// +/// Within the top-level directory (the path that \p UnifiedOnDiskCache::open +/// receives) there are directories named like this: +/// +/// 'v.' +/// 'v.' +/// 'v.' +/// ... +/// +/// 'version' is the version integer for this \p UnifiedOnDiskCache's scheme and +/// the part after the dot is an increasing integer. The primary directory is +/// the one with the highest integer and the upstream one is the directory +/// before it. For example, if the sub-directories contained are: +/// +/// 'v1.5', 'v1.6', 'v1.7', 'v1.8' +/// +/// Then the primary one is 'v1.8', the upstream one is 'v1.7', and the rest are +/// unused directories that can be safely deleted at any time and by any +/// process. +/// +/// Contained within the top-level directory is a file named "lock" which is +/// used for processes to take shared or exclusive locks for the contents of the +/// top directory. While a \p UnifiedOnDiskCache is open it keeps a shared lock +/// for the top-level directory; when it closes, if the primary sub-directory +/// exceeded its limit, it attempts to get an exclusive lock in order to create +/// a new empty primary directory; if it can't get the exclusive lock it gives +/// up and lets the next \p UnifiedOnDiskCache instance that closes to attempt +/// again. +/// +/// The downside of this scheme is that while \p UnifiedOnDiskCache is open on a +/// directory, by any process, the storage size in that directory will keep +/// growing unrestricted. But the major benefit is that garbage-collection can +/// be triggered on a directory concurrently, at any time and by any process, +/// without affecting any active readers/writers in the same process or other +/// processes. +/// +/// The \c UnifiedOnDiskCache also provides validation and recovery on top of +/// the underlying on-disk storage. The low-level storage is designed to remain +/// coherent across regular process crashes, but may be invalid after power loss +/// or similar system failures. \c UnifiedOnDiskCache::validateIfNeeded allows +/// validating the contents once per boot and can recover by marking invalid +/// data for garbage collection. +/// +/// The data recovery described above requires exclusive access to the CAS, and +/// it is an error to attempt recovery if the CAS is open in any process/thread. +/// In order to maximize backwards compatibility with tools that do not perform +/// validation before opening the CAS, we do not attempt to get exclusive access +/// until recovery is actually performed, meaning as long as the data is valid +/// it will not conflict with concurrent use. // //===----------------------------------------------------------------------===// @@ -73,6 +75,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/CAS/OnDiskCASLogger.h" +#include "llvm/CAS/ActionCache.h" #include "llvm/CAS/OnDiskGraphDB.h" #include "llvm/CAS/OnDiskKeyValueDB.h" #include "llvm/Support/Compiler.h" @@ -102,38 +105,22 @@ static constexpr StringLiteral DBDirPrefix = "v1."; static constexpr StringLiteral ValidationFilename = "v1.validation"; static constexpr StringLiteral CorruptPrefix = "corrupt."; -Expected UnifiedOnDiskCache::KVPut(ObjectID Key, ObjectID Value) { - return KVPut(PrimaryGraphDB->getDigest(Key), Value); +ObjectID UnifiedOnDiskCache::getObjectIDFromValue(ArrayRef Value) { + // little endian encoded. + assert(Value.size() == sizeof(uint64_t)); + return ObjectID::fromOpaqueData(support::endian::read64le(Value.data())); } -Expected UnifiedOnDiskCache::KVPut(ArrayRef Key, - ObjectID Value) { - static_assert(sizeof(Value.getOpaqueData()) == sizeof(uint64_t), - "unexpected return opaque type"); - std::array ValBytes; - support::endian::write64le(ValBytes.data(), Value.getOpaqueData()); - Expected> Existing = PrimaryKVDB->put(Key, ValBytes); - if (!Existing) - return Existing.takeError(); - assert(Existing->size() == sizeof(uint64_t)); - return ObjectID::fromOpaqueData(support::endian::read64le(Existing->data())); +UnifiedOnDiskCache::ValueBytes +UnifiedOnDiskCache::getValueFromObjectID(ObjectID ID) { + // little endian encoded. + UnifiedOnDiskCache::ValueBytes ValBytes; + static_assert(ValBytes.size() == sizeof(ID.getOpaqueData())); + support::endian::write64le(ValBytes.data(), ID.getOpaqueData()); + return ValBytes; } -Expected> -UnifiedOnDiskCache::KVGet(ArrayRef Key) { - std::optional> Value; - if (Error E = PrimaryKVDB->get(Key).moveInto(Value)) - return std::move(E); - if (!Value) { - if (UpstreamKVDB) - return faultInFromUpstreamKV(Key); - return std::nullopt; - } - assert(Value->size() == sizeof(uint64_t)); - return ObjectID::fromOpaqueData(support::endian::read64le(Value->data())); -} - -Expected> +Expected>> UnifiedOnDiskCache::faultInFromUpstreamKV(ArrayRef Key) { assert(UpstreamGraphDB); assert(UpstreamKVDB); @@ -147,48 +134,24 @@ UnifiedOnDiskCache::faultInFromUpstreamKV(ArrayRef Key) { // The value is the \p ObjectID in the context of the upstream // \p OnDiskGraphDB instance. Translate it to the context of the primary // \p OnDiskGraphDB instance. - assert(UpstreamValue->size() == sizeof(uint64_t)); - ObjectID UpstreamID = ObjectID::fromOpaqueData( - support::endian::read64le(UpstreamValue->data())); + ObjectID UpstreamID = getObjectIDFromValue(*UpstreamValue); auto PrimaryID = PrimaryGraphDB->getReference(UpstreamGraphDB->getDigest(UpstreamID)); if (LLVM_UNLIKELY(!PrimaryID)) return PrimaryID.takeError(); - return KVPut(Key, *PrimaryID); -} - -Error UnifiedOnDiskCache::validateActionCache() { - auto ValidateRef = [&](FileOffset Offset, ArrayRef Value) -> Error { - assert(Value.size() == sizeof(uint64_t) && "should be validated already"); - auto ID = ObjectID::fromOpaqueData(support::endian::read64le(Value.data())); - auto formatError = [&](Twine Msg) { - return createStringError( - llvm::errc::illegal_byte_sequence, - "bad record at 0x" + - utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " + - Msg.str()); - }; - if (ID.getOpaqueData() == 0) - return formatError("zero is not a valid ref"); - return Error::success(); - }; - if (Error E = PrimaryKVDB->validate(ValidateRef)) - return E; - if (UpstreamKVDB) - return UpstreamKVDB->validate(ValidateRef); - return Error::success(); + return PrimaryKVDB->put(Key, getValueFromObjectID(*PrimaryID)); } /// \returns all the 'v.' names of sub-directories, sorted with /// ascending order of the integer after the dot. Corrupt directories, if /// included, will come first. -static Error getAllDBDirs(StringRef Path, SmallVectorImpl &DBDirs, - bool IncludeCorrupt = false) { +static Expected> +getAllDBDirs(StringRef Path, bool IncludeCorrupt = false) { struct DBDir { uint64_t Order; std::string Name; }; - SmallVector FoundDBDirs; + SmallVector FoundDBDirs; std::error_code EC; for (sys::fs::directory_iterator DirI(Path, EC), DirE; !EC && DirI != DirE; @@ -214,26 +177,28 @@ static Error getAllDBDirs(StringRef Path, SmallVectorImpl &DBDirs, llvm::sort(FoundDBDirs, [](const DBDir &LHS, const DBDir &RHS) -> bool { return LHS.Order <= RHS.Order; }); + + SmallVector DBDirs; for (DBDir &Dir : FoundDBDirs) DBDirs.push_back(std::move(Dir.Name)); - return Error::success(); + return DBDirs; } -static Error getAllGarbageDirs(StringRef Path, - SmallVectorImpl &DBDirs) { - if (Error E = getAllDBDirs(Path, DBDirs, /*IncludeCorrupt=*/true)) - return E; +static Expected> getAllGarbageDirs(StringRef Path) { + auto DBDirs = getAllDBDirs(Path, /*IncludeCorrupt=*/true); + if (!DBDirs) + return DBDirs.takeError(); // FIXME: When the version of \p DBDirPrefix is bumped up we need to figure // out how to handle the leftover sub-directories of the previous version. - for (unsigned Keep = 2; Keep > 0 && !DBDirs.empty(); --Keep) { - StringRef Back(DBDirs.back()); + for (unsigned Keep = 2; Keep > 0 && !DBDirs->empty(); --Keep) { + StringRef Back(DBDirs->back()); if (Back.starts_with(CorruptPrefix)) break; - DBDirs.pop_back(); + DBDirs->pop_back(); } - return Error::success(); + return *DBDirs; } /// \returns Given a sub-directory named 'v.', it outputs the @@ -301,7 +266,8 @@ static Error validateInProcess(StringRef RootPath, StringRef HashName, auto CAS = builtin::createObjectStoreFromUnifiedOnDiskCache(UniDB); if (Error E = CAS->validate(CheckHash)) return E; - if (Error E = UniDB->validateActionCache()) + auto Cache = builtin::createActionCacheFromUnifiedOnDiskCache(UniDB); + if (Error E = Cache->validate()) return E; return Error::success(); } @@ -325,15 +291,14 @@ static Expected getBootTime() { return createFileError("/proc", EC); return Status.getLastModificationTime().time_since_epoch().count(); #else - llvm::report_fatal_error("unimplemented"); + llvm::report_fatal_error("getBootTime unimplemented"); #endif } -Expected -UnifiedOnDiskCache::validateIfNeeded(StringRef RootPath, StringRef HashName, - unsigned HashByteSize, bool CheckHash, - bool AllowRecovery, bool ForceValidation, - std::optional LLVMCasBinary) { +Expected UnifiedOnDiskCache::validateIfNeeded( + StringRef RootPath, StringRef HashName, unsigned HashByteSize, + bool CheckHash, bool AllowRecovery, bool ForceValidation, + std::optional LLVMCasBinaryPath) { if (std::error_code EC = sys::fs::create_directories(RootPath)) return createFileError(RootPath, EC); @@ -381,7 +346,8 @@ UnifiedOnDiskCache::validateIfNeeded(StringRef RootPath, StringRef HashName, return; Logger->log_UnifiedOnDiskCache_validateIfNeeded( RootPath, BootTime, ValidationBootTime, CheckHash, AllowRecovery, - ForceValidation, LLVMCasBinary, LogValidationError, Skipped, Recovered); + ForceValidation, LLVMCasBinaryPath, LogValidationError, Skipped, + Recovered); }); if (ValidationBootTime == BootTime && !ForceValidation) { @@ -392,8 +358,8 @@ UnifiedOnDiskCache::validateIfNeeded(StringRef RootPath, StringRef HashName, // Validate! bool NeedsRecovery = false; Error E = - LLVMCasBinary - ? validateOutOfProcess(*LLVMCasBinary, RootPath, CheckHash) + LLVMCasBinaryPath + ? validateOutOfProcess(*LLVMCasBinaryPath, RootPath, CheckHash) : validateInProcess(RootPath, HashName, HashByteSize, CheckHash); if (E) { if (Logger) @@ -425,11 +391,11 @@ UnifiedOnDiskCache::validateIfNeeded(StringRef RootPath, StringRef HashName, } auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); }); - SmallVector DBDirs; - if (Error E = getAllDBDirs(RootPath, DBDirs)) - return std::move(E); + auto DBDirs = getAllDBDirs(RootPath); + if (!DBDirs) + return DBDirs.takeError(); - for (StringRef DBDir : DBDirs) { + for (StringRef DBDir : *DBDirs) { sys::path::remove_filename(PathBuf); sys::path::append(PathBuf, DBDir); std::error_code EC; @@ -468,8 +434,7 @@ UnifiedOnDiskCache::validateIfNeeded(StringRef RootPath, StringRef HashName, return createFileError(PathBuf, OS.error()); } - return NeedsRecovery ? ValidationResult::Recovered - : ValidationResult::Valid; + return NeedsRecovery ? ValidationResult::Recovered : ValidationResult::Valid; } Expected> @@ -490,16 +455,15 @@ UnifiedOnDiskCache::open(StringRef RootPath, std::optional SizeLimit, // from creating a new chain (essentially while a \p UnifiedOnDiskCache // instance holds a shared lock the storage for the primary directory will // grow unrestricted). - if (std::error_code EC = lockFileThreadSafe(LockFD, sys::fs::LockKind::Shared)) + if (std::error_code EC = + lockFileThreadSafe(LockFD, sys::fs::LockKind::Shared)) return createFileError(PathBuf, EC); - SmallVector DBDirs; - if (Error E = getAllDBDirs(RootPath, DBDirs)) - return std::move(E); - if (DBDirs.empty()) - DBDirs.push_back((Twine(DBDirPrefix) + "1").str()); - - assert(!DBDirs.empty()); + auto DBDirs = getAllDBDirs(RootPath); + if (!DBDirs) + return DBDirs.takeError(); + if (DBDirs->empty()) + DBDirs->push_back((Twine(DBDirPrefix) + "1").str()); std::shared_ptr Logger; if (Error E = @@ -510,10 +474,11 @@ UnifiedOnDiskCache::open(StringRef RootPath, std::optional SizeLimit, /// more directories, get the most recent directories and chain them, with the /// most recent being the primary one. The remaining directories are unused /// data than can be garbage-collected. + auto UniDB = std::unique_ptr(new UnifiedOnDiskCache()); std::unique_ptr UpstreamGraphDB; std::unique_ptr UpstreamKVDB; - if (DBDirs.size() > 1) { - StringRef UpstreamDir = *(DBDirs.end() - 2); + if (DBDirs->size() > 1) { + StringRef UpstreamDir = *(DBDirs->end() - 2); PathBuf = RootPath; sys::path::append(PathBuf, UpstreamDir); if (Error E = @@ -523,19 +488,19 @@ UnifiedOnDiskCache::open(StringRef RootPath, std::optional SizeLimit, return std::move(E); if (Error E = OnDiskKeyValueDB::open(PathBuf, HashName, HashByteSize, /*ValueName=*/"objectid", - /*ValueSize=*/sizeof(uint64_t), Logger) + /*ValueSize=*/sizeof(uint64_t), + /*UnifiedCache=*/nullptr, Logger) .moveInto(UpstreamKVDB)) return std::move(E); } - OnDiskGraphDB *UpstreamGraphDBPtr = UpstreamGraphDB.get(); - StringRef PrimaryDir = *(DBDirs.end() - 1); + StringRef PrimaryDir = *(DBDirs->end() - 1); PathBuf = RootPath; sys::path::append(PathBuf, PrimaryDir); std::unique_ptr PrimaryGraphDB; if (Error E = OnDiskGraphDB::open(PathBuf, HashName, HashByteSize, - std::move(UpstreamGraphDB), Logger, FaultInPolicy) + UpstreamGraphDB.get(), Logger, FaultInPolicy) .moveInto(PrimaryGraphDB)) return std::move(E); std::unique_ptr PrimaryKVDB; @@ -543,17 +508,17 @@ UnifiedOnDiskCache::open(StringRef RootPath, std::optional SizeLimit, // including an extra translation step of the value during fault-in. if (Error E = OnDiskKeyValueDB::open(PathBuf, HashName, HashByteSize, /*ValueName=*/"objectid", - /*ValueSize=*/sizeof(uint64_t), Logger) + /*ValueSize=*/sizeof(uint64_t), + UniDB.get(), Logger) .moveInto(PrimaryKVDB)) return std::move(E); - auto UniDB = std::unique_ptr(new UnifiedOnDiskCache()); UniDB->RootPath = RootPath; UniDB->SizeLimit = SizeLimit.value_or(0); UniDB->LockFD = LockFD; - UniDB->NeedsGarbageCollection = DBDirs.size() > 2; + UniDB->NeedsGarbageCollection = DBDirs->size() > 2; UniDB->PrimaryDBDir = PrimaryDir; - UniDB->UpstreamGraphDB = UpstreamGraphDBPtr; + UniDB->UpstreamGraphDB = std::move(UpstreamGraphDB); UniDB->PrimaryGraphDB = std::move(PrimaryGraphDB); UniDB->UpstreamKVDB = std::move(UpstreamKVDB); UniDB->PrimaryKVDB = std::move(PrimaryKVDB); @@ -607,7 +572,7 @@ bool UnifiedOnDiskCache::hasExceededSizeLimit() const { Error UnifiedOnDiskCache::close(bool CheckSizeLimit) { if (LockFD == -1) return Error::success(); // already closed. - auto _1 = make_scope_exit([&]() { + auto CloseLock = make_scope_exit([&]() { assert(LockFD >= 0); sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(LockFD); sys::fs::closeFile(LockFile); @@ -615,10 +580,10 @@ Error UnifiedOnDiskCache::close(bool CheckSizeLimit) { }); bool ExceededSizeLimit = CheckSizeLimit ? hasExceededSizeLimit() : false; - PrimaryKVDB.reset(); UpstreamKVDB.reset(); + PrimaryKVDB.reset(); + UpstreamGraphDB.reset(); PrimaryGraphDB.reset(); - UpstreamGraphDB = nullptr; if (std::error_code EC = unlockFileThreadSafe(LockFD)) return createFileError(RootPath, EC); @@ -635,7 +600,7 @@ Error UnifiedOnDiskCache::close(bool CheckSizeLimit) { return Error::success(); // couldn't get exclusive lock, give up. return createFileError(RootPath, EC); } - auto _2 = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); }); + auto UnlockFile = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); }); // Managed to get an exclusive lock which means there are no other open // \p UnifiedOnDiskCache instances for the same path, so we can safely start a @@ -661,12 +626,12 @@ UnifiedOnDiskCache::~UnifiedOnDiskCache() { consumeError(close()); } Error UnifiedOnDiskCache::collectGarbage(StringRef Path, ondisk::OnDiskCASLogger *Logger) { - SmallVector DBDirs; - if (Error E = getAllGarbageDirs(Path, DBDirs)) - return E; + auto DBDirs = getAllGarbageDirs(Path); + if (!DBDirs) + return DBDirs.takeError(); SmallString<256> PathBuf(Path); - for (StringRef UnusedSubDir : DBDirs) { + for (StringRef UnusedSubDir : *DBDirs) { sys::path::append(PathBuf, UnusedSubDir); if (Logger) Logger->log_UnifiedOnDiskCache_collectGarbage(PathBuf); diff --git a/llvm/tools/libCASPluginTest/libCASPluginTest.cpp b/llvm/tools/libCASPluginTest/libCASPluginTest.cpp index bc60264bb58ba..726390a7acce5 100644 --- a/llvm/tools/libCASPluginTest/libCASPluginTest.cpp +++ b/llvm/tools/libCASPluginTest/libCASPluginTest.cpp @@ -13,6 +13,7 @@ #include "llvm-c/CAS/PluginAPI_functions.h" #include "llvm/CAS/BuiltinObjectHasher.h" #include "llvm/CAS/CASID.h" +#include "llvm/CAS/OnDiskKeyValueDB.h" #include "llvm/CAS/UnifiedOnDiskCache.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Errc.h" @@ -318,13 +319,33 @@ Expected CASWrapper::downstreamNode(ObjectID Node) { return importNode(Node, FromDB, ToDB); } +static Expected cachePut(OnDiskKeyValueDB &DB, ArrayRef Key, + ObjectID ID) { + auto Value = UnifiedOnDiskCache::getValueFromObjectID(ID); + auto Result = DB.put(Key, Value); + if (!Result) + return Result.takeError(); + return UnifiedOnDiskCache::getObjectIDFromValue(*Result); +} + +static Expected> cacheGet(OnDiskKeyValueDB &DB, + ArrayRef Key) { + auto Result = DB.get(Key); + if (!Result) + return Result.takeError(); + if (!*Result) + return std::nullopt; + return UnifiedOnDiskCache::getObjectIDFromValue(**Result); +} + Error CASWrapper::upstreamKey(ArrayRef Key, ObjectID Value) { if (!UpstreamDB) return Error::success(); Expected UpstreamVal = upstreamNode(Value); if (!UpstreamVal) return UpstreamVal.takeError(); - Expected PutValue = UpstreamDB->KVPut(Key, *UpstreamVal); + Expected PutValue = + cachePut(UpstreamDB->getKeyValueDB(), Key, *UpstreamVal); if (!PutValue) return PutValue.takeError(); assert(*PutValue == *UpstreamVal); @@ -336,7 +357,8 @@ CASWrapper::downstreamKey(ArrayRef Key) { if (!UpstreamDB) return std::nullopt; std::optional UpstreamValue; - if (Error E = UpstreamDB->KVGet(Key).moveInto(UpstreamValue)) + if (Error E = + cacheGet(UpstreamDB->getKeyValueDB(), Key).moveInto(UpstreamValue)) return std::move(E); if (!UpstreamValue) return std::nullopt; @@ -345,7 +367,7 @@ CASWrapper::downstreamKey(ArrayRef Key) { UpstreamDB->getGraphDB().getDigest(*UpstreamValue)); if (!Value) return Value.takeError(); - Expected PutValue = DB->KVPut(Key, *Value); + Expected PutValue = cachePut(DB->getKeyValueDB(), Key, *Value); if (!PutValue) return PutValue.takeError(); assert(*PutValue == *Value); @@ -628,7 +650,7 @@ llcas_actioncache_get_for_digest(llcas_cas_t c_cas, llcas_digest_t c_key, auto &DB = *Wrap.DB; ArrayRef Key(c_key.data, c_key.size); std::optional Value; - if (Error E = DB.KVGet(Key).moveInto(Value)) + if (Error E = cacheGet(DB.getKeyValueDB(), Key).moveInto(Value)) return reportError(std::move(E), error, LLCAS_LOOKUP_RESULT_ERROR); if (!Value) { if (!globally) @@ -684,7 +706,7 @@ bool llcas_actioncache_put_for_digest(llcas_cas_t c_cas, llcas_digest_t c_key, auto &DB = *Wrap.DB; ObjectID Value = ObjectID::fromOpaqueData(c_value.opaque); ArrayRef Key(c_key.data, c_key.size); - Expected Ret = DB.KVPut(Key, Value); + Expected Ret = cachePut(DB.getKeyValueDB(), Key, Value); if (!Ret) return reportError(Ret.takeError(), error, true); if (*Ret != Value) diff --git a/llvm/unittests/CAS/OnDiskCommonUtils.h b/llvm/unittests/CAS/OnDiskCommonUtils.h index 89f93e08366c9..48a1830f9b219 100644 --- a/llvm/unittests/CAS/OnDiskCommonUtils.h +++ b/llvm/unittests/CAS/OnDiskCommonUtils.h @@ -12,6 +12,8 @@ #include "llvm/CAS/BuiltinObjectHasher.h" #include "llvm/CAS/OnDiskGraphDB.h" +#include "llvm/CAS/OnDiskKeyValueDB.h" +#include "llvm/CAS/UnifiedOnDiskCache.h" #include "llvm/Support/BLAKE3.h" #include "llvm/Testing/Support/Error.h" @@ -58,6 +60,25 @@ inline Expected store(OnDiskGraphDB &DB, StringRef Data, return ID; } +inline Expected cachePut(OnDiskKeyValueDB &DB, ArrayRef Key, + ObjectID ID) { + auto Value = UnifiedOnDiskCache::getValueFromObjectID(ID); + auto Result = DB.put(Key, Value); + if (!Result) + return Result.takeError(); + return UnifiedOnDiskCache::getObjectIDFromValue(*Result); +} + +inline Expected> cacheGet(OnDiskKeyValueDB &DB, + ArrayRef Key) { + auto Result = DB.get(Key); + if (!Result) + return Result.takeError(); + if (!*Result) + return std::nullopt; + return UnifiedOnDiskCache::getObjectIDFromValue(**Result); +} + inline Error printTree(OnDiskGraphDB &DB, ObjectID ID, raw_ostream &OS, unsigned Indent = 0) { std::optional Obj; diff --git a/llvm/unittests/CAS/OnDiskGraphDBTest.cpp b/llvm/unittests/CAS/OnDiskGraphDBTest.cpp index 9c7c6a929fd39..68af80715cd8c 100644 --- a/llvm/unittests/CAS/OnDiskGraphDBTest.cpp +++ b/llvm/unittests/CAS/OnDiskGraphDBTest.cpp @@ -102,7 +102,7 @@ TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInSingleNode) { std::unique_ptr DB; ASSERT_THAT_ERROR( OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType), - std::move(UpstreamDB), /*Logger=*/nullptr, + UpstreamDB.get(), /*Logger=*/nullptr, OnDiskGraphDB::FaultInPolicy::SingleNode) .moveInto(DB), Succeeded()); @@ -208,7 +208,7 @@ TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInFullTree) { unittest::TempDir Temp("ondiskcas", /*Unique=*/true); std::unique_ptr DB; ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType), - std::move(UpstreamDB), + UpstreamDB.get(), /*Logger=*/nullptr, OnDiskGraphDB::FaultInPolicy::FullTree) .moveInto(DB), @@ -267,13 +267,13 @@ TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInPolicyConflict) { std::unique_ptr DB; ASSERT_THAT_ERROR( OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType), - std::move(UpstreamDB), /*Logger=*/nullptr, Policy1) + UpstreamDB.get(), /*Logger=*/nullptr, Policy1) .moveInto(DB), Succeeded()); DB.reset(); ASSERT_THAT_ERROR( OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType), - std::move(UpstreamDB), /*Logger=*/nullptr, Policy2) + UpstreamDB.get(), /*Logger=*/nullptr, Policy2) .moveInto(DB), Failed()); }; diff --git a/llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp b/llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp index e25288a26eb92..09aebc2d4bc19 100644 --- a/llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp +++ b/llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp @@ -82,14 +82,18 @@ TEST_F(OnDiskCASTest, UnifiedOnDiskCacheTest) { Key1Hash = digest("key1"); std::optional Val; - ASSERT_THAT_ERROR(UniDB->KVPut(Key1Hash, *IDRoot).moveInto(Val), - Succeeded()); + ASSERT_THAT_ERROR( + cachePut(UniDB->getKeyValueDB(), Key1Hash, *IDRoot).moveInto(Val), + Succeeded()); EXPECT_EQ(IDRoot, Val); Key2Hash = digest("key2"); std::optional KeyID; ASSERT_THAT_ERROR(DB.getReference(Key2Hash).moveInto(KeyID), Succeeded()); - ASSERT_THAT_ERROR(UniDB->KVPut(*KeyID, *ID1).moveInto(Val), Succeeded()); + ASSERT_THAT_ERROR(cachePut(UniDB->getKeyValueDB(), + UniDB->getGraphDB().getDigest(*KeyID), *ID1) + .moveInto(Val), + Succeeded()); } auto checkTree = [&](const HashType &Digest, StringRef ExpectedTree) { @@ -108,7 +112,9 @@ TEST_F(OnDiskCASTest, UnifiedOnDiskCacheTest) { auto checkKey = [&](const HashType &Key, StringRef ExpectedData) { OnDiskGraphDB &DB = UniDB->getGraphDB(); std::optional Val; - ASSERT_THAT_ERROR(UniDB->KVGet(Key).moveInto(Val), Succeeded()); + ASSERT_THAT_ERROR(cacheGet(UniDB->getKeyValueDB(), Key).moveInto(Val), + Succeeded()); + ASSERT_TRUE(Val.has_value()); std::optional Obj; ASSERT_THAT_ERROR(DB.load(*Val).moveInto(Obj), Succeeded()); @@ -185,7 +191,8 @@ TEST_F(OnDiskCASTest, UnifiedOnDiskCacheTest) { ASSERT_THAT_ERROR(DB.getReference(OtherHash).moveInto(ID), Succeeded()); EXPECT_FALSE(DB.containsObject(*ID)); std::optional Val; - ASSERT_THAT_ERROR(UniDB->KVGet(Key2Hash).moveInto(Val), Succeeded()); + ASSERT_THAT_ERROR(cacheGet(UniDB->getKeyValueDB(), Key2Hash).moveInto(Val), + Succeeded()); EXPECT_FALSE(Val.has_value()); } }