LLVM 23.0.0git
CachePruning.cpp
Go to the documentation of this file.
1//===-CachePruning.cpp - LLVM Cache Directory Pruning ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the pruning of a directory based on least recently used.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/StringRef.h"
15#include "llvm/Support/Debug.h"
16#include "llvm/Support/Errc.h"
17#include "llvm/Support/Error.h"
19#include "llvm/Support/Path.h"
22
23#define DEBUG_TYPE "cache-pruning"
24
25#include <set>
26#include <system_error>
27
28using namespace llvm;
29
30namespace {
31struct FileInfo {
34 std::string Path;
35
36 /// Used to determine which files to prune first. Also used to determine
37 /// set membership, so must take into account all fields.
38 bool operator<(const FileInfo &Other) const {
39 return std::tie(Time, Other.Size, Path) <
40 std::tie(Other.Time, Size, Other.Path);
41 }
42};
43} // anonymous namespace
44
45/// Write a new timestamp file with the given path. This is used for the pruning
46/// interval option.
47static void writeTimestampFile(StringRef TimestampFile) {
48 std::error_code EC;
49 raw_fd_ostream Out(TimestampFile.str(), EC, sys::fs::OF_None);
50}
51
53 if (Duration.empty())
54 return make_error<StringError>("Duration must not be empty",
56
57 StringRef NumStr = Duration.slice(0, Duration.size()-1);
58 uint64_t Num;
59 if (NumStr.getAsInteger(0, Num))
60 return make_error<StringError>("'" + NumStr + "' not an integer",
62
63 switch (Duration.back()) {
64 case 's':
65 return std::chrono::seconds(Num);
66 case 'm':
67 return std::chrono::minutes(Num);
68 case 'h':
69 return std::chrono::hours(Num);
70 default:
72 "' must end with one of 's', 'm' or 'h'",
74 }
75}
76
79 CachePruningPolicy Policy;
80 std::pair<StringRef, StringRef> P = {"", PolicyStr};
81 while (!P.second.empty()) {
82 P = P.second.split(':');
83
85 std::tie(Key, Value) = P.first.split('=');
86 if (Key == "prune_interval") {
87 auto DurationOrErr = parseDuration(Value);
88 if (!DurationOrErr)
89 return DurationOrErr.takeError();
90 Policy.Interval = *DurationOrErr;
91 } else if (Key == "prune_after") {
92 auto DurationOrErr = parseDuration(Value);
93 if (!DurationOrErr)
94 return DurationOrErr.takeError();
95 Policy.Expiration = *DurationOrErr;
96 } else if (Key == "cache_size") {
97 if (Value.back() != '%')
98 return make_error<StringError>("'" + Value + "' must be a percentage",
100 StringRef SizeStr = Value.drop_back();
102 if (SizeStr.getAsInteger(0, Size))
103 return make_error<StringError>("'" + SizeStr + "' not an integer",
105 if (Size > 100)
106 return make_error<StringError>("'" + SizeStr +
107 "' must be between 0 and 100",
110 } else if (Key == "cache_size_bytes") {
111 uint64_t Mult = 1;
112 switch (tolower(Value.back())) {
113 case 'k':
114 Mult = 1024;
115 Value = Value.drop_back();
116 break;
117 case 'm':
118 Mult = 1024 * 1024;
119 Value = Value.drop_back();
120 break;
121 case 'g':
122 Mult = 1024 * 1024 * 1024;
123 Value = Value.drop_back();
124 break;
125 }
127 if (Value.getAsInteger(0, Size))
128 return make_error<StringError>("'" + Value + "' not an integer",
130 Policy.MaxSizeBytes = Size * Mult;
131 } else if (Key == "cache_size_files") {
132 if (Value.getAsInteger(0, Policy.MaxSizeFiles))
133 return make_error<StringError>("'" + Value + "' not an integer",
135 } else {
136 return make_error<StringError>("Unknown key: '" + Key + "'",
138 }
139 }
140
141 return Policy;
142}
143
144/// Prune the cache of files that haven't been accessed in a long time.
147 const std::vector<std::unique_ptr<MemoryBuffer>> &Files) {
148 using namespace std::chrono;
149
150 if (Path.empty())
151 return false;
152
153 bool isPathDir;
154 if (sys::fs::is_directory(Path, isPathDir))
155 return false;
156
157 if (!isPathDir)
158 return false;
159
161 std::min(Policy.MaxSizePercentageOfAvailableSpace, 100u);
162
163 if (Policy.Expiration == seconds(0) &&
165 Policy.MaxSizeBytes == 0 && Policy.MaxSizeFiles == 0) {
166 LLVM_DEBUG(dbgs() << "No pruning settings set, exit early\n");
167 // Nothing will be pruned, early exit
168 return false;
169 }
170
171 // Try to stat() the timestamp file.
172 SmallString<128> TimestampFile(Path);
173 sys::path::append(TimestampFile, "llvmcache.timestamp");
174 sys::fs::file_status FileStatus;
175 const auto CurrentTime = system_clock::now();
176 if (auto EC = sys::fs::status(TimestampFile, FileStatus)) {
178 // If the timestamp file wasn't there, create one now.
179 writeTimestampFile(TimestampFile);
180 } else {
181 // Unknown error?
182 return false;
183 }
184 } else {
185 if (!Policy.Interval)
186 return false;
187 if (Policy.Interval != seconds(0)) {
188 // Check whether the time stamp is older than our pruning interval.
189 // If not, do nothing.
190 const auto TimeStampModTime = FileStatus.getLastModificationTime();
191 auto TimeStampAge = CurrentTime - TimeStampModTime;
192 if (TimeStampAge <= *Policy.Interval) {
193 LLVM_DEBUG(dbgs() << "Timestamp file too recent ("
194 << duration_cast<seconds>(TimeStampAge).count()
195 << "s old), do not prune.\n");
196 return false;
197 }
198 }
199 // Write a new timestamp file so that nobody else attempts to prune.
200 // There is a benign race condition here, if two processes happen to
201 // notice at the same time that the timestamp is out-of-date.
202 writeTimestampFile(TimestampFile);
203 }
204
205 // Keep track of files to delete to get below the size limit.
206 // Order by time of last use so that recently used files are preserved.
207 std::set<FileInfo> FileInfos;
208 uint64_t TotalSize = 0;
209
210 // Walk the entire directory cache, looking for unused files.
211 std::error_code EC;
212 SmallString<128> CachePathNative;
213 sys::path::native(Path, CachePathNative);
214 // Walk all of the files within this directory.
215 for (sys::fs::directory_iterator File(CachePathNative, EC), FileEnd;
216 File != FileEnd && !EC; File.increment(EC)) {
217 // Ignore filenames not beginning with "llvmcache-" or "Thin-". This
218 // includes the timestamp file as well as any files created by the user.
219 // This acts as a safeguard against data loss if the user specifies the
220 // wrong directory as their cache directory.
221 StringRef filename = sys::path::filename(File->path());
222 if (!filename.starts_with("llvmcache-") && !filename.starts_with("Thin-"))
223 continue;
224
225 // Look at this file. If we can't stat it, there's nothing interesting
226 // there.
227 ErrorOr<sys::fs::basic_file_status> StatusOrErr = File->status();
228 if (!StatusOrErr) {
229 LLVM_DEBUG(dbgs() << "Ignore " << File->path() << " (can't stat)\n");
230 continue;
231 }
232
233 // If the file hasn't been used recently enough, delete it
234 const auto FileAccessTime = StatusOrErr->getLastAccessedTime();
235 auto FileAge = CurrentTime - FileAccessTime;
236 if (Policy.Expiration != seconds(0) && FileAge > Policy.Expiration) {
237 LLVM_DEBUG(dbgs() << "Remove " << File->path() << " ("
238 << duration_cast<seconds>(FileAge).count()
239 << "s old)\n");
240 sys::fs::remove(File->path());
241 continue;
242 }
243
244 // Leave it here for now, but add it to the list of size-based pruning.
245 TotalSize += StatusOrErr->getSize();
246 FileInfos.insert({FileAccessTime, StatusOrErr->getSize(), File->path()});
247 }
248
249 auto FileInfo = FileInfos.begin();
250 size_t NumFiles = FileInfos.size();
251
252 auto RemoveCacheFile = [&]() {
253 // Remove the file.
254 sys::fs::remove(FileInfo->Path);
255 // Update size
256 TotalSize -= FileInfo->Size;
257 NumFiles--;
258 LLVM_DEBUG(dbgs() << " - Remove " << FileInfo->Path << " (size "
259 << FileInfo->Size << "), new occupancy is " << TotalSize
260 << "%\n");
261 ++FileInfo;
262 };
263
264 // files.size() is greater the number of inputs by one. However, a timestamp
265 // file is created and stored in the cache directory if --thinlto-cache-policy
266 // option is used. Therefore, files.size() is used as ActualNums.
267 const size_t ActualNums = Files.size();
268 if (Policy.MaxSizeFiles && ActualNums > Policy.MaxSizeFiles)
270 << "ThinLTO cache pruning happens since the number of created files ("
271 << ActualNums << ") exceeds the maximum number of files ("
272 << Policy.MaxSizeFiles
273 << "); consider adjusting --thinlto-cache-policy\n";
274
275 // Prune for number of files.
276 if (Policy.MaxSizeFiles)
277 while (NumFiles > Policy.MaxSizeFiles)
278 RemoveCacheFile();
279
280 // Prune for size now if needed
281 if (Policy.MaxSizePercentageOfAvailableSpace > 0 || Policy.MaxSizeBytes > 0) {
282 auto SpaceInfoOrErr = sys::fs::disk_space(Path);
283 if (!SpaceInfoOrErr) {
284 auto EC = SpaceInfoOrErr.getError();
285 return createStringError(EC,
286 "cannot get available disk space for '%s': '%s'",
287 Path.str().c_str(), EC.message().c_str());
288 }
289 sys::fs::space_info SpaceInfo = SpaceInfoOrErr.get();
290 auto AvailableSpace = TotalSize + SpaceInfo.free;
291
292 if (Policy.MaxSizePercentageOfAvailableSpace == 0)
294 if (Policy.MaxSizeBytes == 0)
295 Policy.MaxSizeBytes = AvailableSpace;
296 auto TotalSizeTarget = std::min<uint64_t>(
297 AvailableSpace * Policy.MaxSizePercentageOfAvailableSpace / 100ull,
298 Policy.MaxSizeBytes);
299
300 LLVM_DEBUG(dbgs() << "Occupancy: " << ((100 * TotalSize) / AvailableSpace)
301 << "% target is: "
302 << Policy.MaxSizePercentageOfAvailableSpace << "%, "
303 << Policy.MaxSizeBytes << " bytes\n");
304
305 size_t ActualSizes = 0;
306 for (const auto &File : Files)
307 if (File)
308 ActualSizes += File->getBufferSize();
309
310 if (ActualSizes > TotalSizeTarget)
312 << "ThinLTO cache pruning happens since the total size of the cache "
313 "files consumed by the current link job ("
314 << ActualSizes << " bytes) exceeds maximum cache size ("
315 << TotalSizeTarget
316 << " bytes); consider adjusting --thinlto-cache-policy\n";
317
318 // Remove the oldest accessed files first, till we get below the threshold.
319 while (TotalSize > TotalSizeTarget && FileInfo != FileInfos.end())
320 RemoveCacheFile();
321 }
322 return true;
323}
static Expected< std::chrono::seconds > parseDuration(StringRef Duration)
static void writeTimestampFile(StringRef TimestampFile)
Write a new timestamp file with the given path.
#define P(N)
#define LLVM_DEBUG(...)
Definition Debug.h:119
Represents either an error or a value T.
Definition ErrorOr.h:56
Tagged union holding either a T or a Error.
Definition Error.h:485
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:490
std::string str() const
Get the contents as an std::string.
Definition StringRef.h:222
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
LLVM Value Representation.
Definition Value.h:75
static LLVM_ABI raw_ostream & warning()
Convenience method for printing "warning: " to stderr.
Definition WithColor.cpp:85
A raw_ostream that writes to a file descriptor.
LLVM_ABI TimePoint getLastModificationTime() const
The file modification time as reported from the underlying file system.
directory_iterator - Iterates through the entries in path.
Represents the result of a call to sys::fs::status().
Definition FileSystem.h:222
LLVM_ABI ErrorOr< space_info > disk_space(const Twine &Path)
Get disk space usage information.
LLVM_ABI std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
LLVM_ABI bool is_directory(const basic_file_status &status)
Does status represent a directory?
Definition Path.cpp:1112
LLVM_ABI StringRef filename(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get filename.
Definition Path.cpp:584
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition Path.cpp:457
std::chrono::time_point< std::chrono::system_clock, D > TimePoint
A time point on the system clock.
Definition Chrono.h:34
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:94
LLVM_ABI Expected< CachePruningPolicy > parseCachePruningPolicy(StringRef PolicyStr)
Parse the given string as a cache pruning policy.
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1321
@ no_such_file_or_directory
Definition Errc.h:65
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
Definition Error.h:340
@ Other
Any other memory.
Definition ModRef.h:68
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
LLVM_ABI Expected< bool > pruneCache(StringRef Path, CachePruningPolicy Policy, const std::vector< std::unique_ptr< MemoryBuffer > > &Files={})
Perform pruning using the supplied policy, returns true if pruning occurred, i.e.
Policy for the pruneCache() function.
uint64_t MaxSizeFiles
The maximum number of files in the cache directory.
std::optional< std::chrono::seconds > Interval
The pruning interval.
std::chrono::seconds Expiration
The expiration for a file.
uint64_t MaxSizeBytes
The maximum size for the cache directory in bytes.
unsigned MaxSizePercentageOfAvailableSpace
The maximum size for the cache directory, in terms of percentage of the available space on the disk.
space_info - Self explanatory.
Definition FileSystem.h:76