LLVM 22.0.0git
SpecialCaseList.cpp
Go to the documentation of this file.
1//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is a utility class for instrumentation passes (like AddressSanitizer
10// or ThreadSanitizer) to avoid instrumenting some functions or global
11// variables, or to instrument some functions or global variables in a specific
12// way, based on a user-supplied list.
13//
14//===----------------------------------------------------------------------===//
15
17#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/StringRef.h"
23#include <algorithm>
24#include <limits>
25#include <memory>
26#include <stdio.h>
27#include <string>
28#include <system_error>
29#include <utility>
30
31namespace llvm {
32
33Error SpecialCaseList::RegexMatcher::insert(StringRef Pattern,
34 unsigned LineNumber) {
35 if (Pattern.empty())
37 "Supplied regex was blank");
38
39 // Replace * with .*
40 auto Regexp = Pattern.str();
41 for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
42 pos += strlen(".*")) {
43 Regexp.replace(pos, strlen("*"), ".*");
44 }
45
46 Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
47
48 // Check that the regexp is valid.
49 Regex CheckRE(Regexp);
50 std::string REError;
51 if (!CheckRE.isValid(REError))
53
54 RegExes.emplace_back(Pattern, LineNumber, std::move(CheckRE));
55 return Error::success();
56}
57
58void SpecialCaseList::RegexMatcher::preprocess(bool BySize) {
59 if (BySize) {
60 llvm::stable_sort(RegExes, [](const Reg &A, const Reg &B) {
61 return A.Name.size() < B.Name.size();
62 });
63 }
64}
65
66void SpecialCaseList::RegexMatcher::match(
67 StringRef Query,
68 llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
69 for (const auto &R : reverse(RegExes))
70 if (R.Rg.match(Query))
71 return Cb(R.Name, R.LineNo);
72}
73
74Error SpecialCaseList::GlobMatcher::insert(StringRef Pattern,
75 unsigned LineNumber) {
76 if (Pattern.empty())
77 return createStringError(errc::invalid_argument, "Supplied glob was blank");
78
79 auto Res = GlobPattern::create(Pattern, /*MaxSubPatterns=*/1024);
80 if (auto Err = Res.takeError())
81 return Err;
82 Globs.emplace_back(Pattern, LineNumber, std::move(Res.get()));
83 return Error::success();
84}
85
86void SpecialCaseList::GlobMatcher::preprocess(bool BySize) {
87 if (BySize) {
88 llvm::stable_sort(Globs, [](const Glob &A, const Glob &B) {
89 return A.Name.size() < B.Name.size();
90 });
91 }
92
93 for (const auto &G : reverse(Globs)) {
94 StringRef Prefix = G.Pattern.prefix();
95 StringRef Suffix = G.Pattern.suffix();
96
97 if (Suffix.empty() && Prefix.empty()) {
98 // If both prefix and suffix are empty put into special tree to search by
99 // substring in a middle.
100 StringRef Substr = G.Pattern.longest_substr();
101 if (!Substr.empty()) {
102 // But only if substring is not empty. Searching this tree is more
103 // expensive.
104 auto &V = SubstrToGlob.emplace(Substr).first->second;
105 V.emplace_back(&G);
106 continue;
107 }
108 }
109
110 auto &SToGlob = PrefixSuffixToGlob.emplace(Prefix).first->second;
111 auto &V = SToGlob.emplace(reverse(Suffix)).first->second;
112 V.emplace_back(&G);
113 }
114}
115
116void SpecialCaseList::GlobMatcher::match(
117 StringRef Query,
118 llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
119 if (!PrefixSuffixToGlob.empty()) {
120 for (const auto &[_, SToGlob] : PrefixSuffixToGlob.find_prefixes(Query)) {
121 for (const auto &[_, V] : SToGlob.find_prefixes(reverse(Query))) {
122 for (const auto *G : V) {
123 if (G->Pattern.match(Query)) {
124 Cb(G->Name, G->LineNo);
125 // As soon as we find a match in the vector, we can break for this
126 // vector, since the globs are already sorted by priority within the
127 // prefix group. However, we continue searching other prefix groups
128 // in the map, as they may contain a better match overall.
129 break;
130 }
131 }
132 }
133 }
134 }
135
136 if (!SubstrToGlob.empty()) {
137 // As we don't know when substring exactly starts, we will try all
138 // possibilities. In most cases search will fail on first characters.
139 for (StringRef Q = Query; !Q.empty(); Q = Q.drop_front()) {
140 for (const auto &[_, V] : SubstrToGlob.find_prefixes(Q)) {
141 for (const auto *G : V) {
142 if (G->Pattern.match(Query)) {
143 Cb(G->Name, G->LineNo);
144 // As soon as we find a match in the vector, we can break for this
145 // vector, since the globs are already sorted by priority within the
146 // prefix group. However, we continue searching other prefix groups
147 // in the map, as they may contain a better match overall.
148 break;
149 }
150 }
151 }
152 }
153 }
154}
155
156SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash)
157 : RemoveDotSlash(RemoveDotSlash) {
158 if (UseGlobs)
159 M.emplace<GlobMatcher>();
160 else
161 M.emplace<RegexMatcher>();
162}
163
164Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber) {
165 return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M);
166}
167
168void SpecialCaseList::Matcher::preprocess(bool BySize) {
169 return std::visit([&](auto &V) { return V.preprocess(BySize); }, M);
170}
171
172void SpecialCaseList::Matcher::match(
173 StringRef Query,
174 llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
175 if (RemoveDotSlash)
177 return std::visit([&](auto &V) { return V.match(Query, Cb); }, M);
178}
179
180// TODO: Refactor this to return Expected<...>
181std::unique_ptr<SpecialCaseList>
182SpecialCaseList::create(const std::vector<std::string> &Paths,
183 llvm::vfs::FileSystem &FS, std::string &Error) {
184 std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
185 if (SCL->createInternal(Paths, FS, Error))
186 return SCL;
187 return nullptr;
188}
189
190std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const MemoryBuffer *MB,
191 std::string &Error) {
192 std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
193 if (SCL->createInternal(MB, Error))
194 return SCL;
195 return nullptr;
196}
197
198std::unique_ptr<SpecialCaseList>
199SpecialCaseList::createOrDie(const std::vector<std::string> &Paths,
201 std::string Error;
202 if (auto SCL = create(Paths, FS, Error))
203 return SCL;
205}
206
207bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
208 vfs::FileSystem &VFS, std::string &Error) {
209 for (size_t i = 0; i < Paths.size(); ++i) {
210 const auto &Path = Paths[i];
212 VFS.getBufferForFile(Path);
213 if (std::error_code EC = FileOrErr.getError()) {
214 Error = (Twine("can't open file '") + Path + "': " + EC.message()).str();
215 return false;
216 }
217 std::string ParseError;
218 if (!parse(i, FileOrErr.get().get(), ParseError, /*OrderBySize=*/false)) {
219 Error = (Twine("error parsing file '") + Path + "': " + ParseError).str();
220 return false;
221 }
222 }
223 return true;
224}
225
227 bool OrderBySize) {
228 if (!parse(0, MB, Error, OrderBySize))
229 return false;
230 return true;
231}
232
234SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo,
235 unsigned LineNo, bool UseGlobs) {
236 Sections.emplace_back(SectionStr, FileNo, UseGlobs);
237 auto &Section = Sections.back();
238
239 SectionStr = SectionStr.copy(StrAlloc);
240 if (auto Err = Section.SectionMatcher.insert(SectionStr, LineNo)) {
242 "malformed section at line " + Twine(LineNo) +
243 ": '" + SectionStr +
244 "': " + toString(std::move(Err)));
245 }
246
247 return &Section;
248}
249
250bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
251 std::string &Error, bool OrderBySize) {
252 unsigned long long Version = 2;
253
254 StringRef Header = MB->getBuffer();
255 if (Header.consume_front("#!special-case-list-v"))
256 consumeUnsignedInteger(Header, 10, Version);
257
258 // In https://reviews.llvm.org/D154014 we added glob support and planned
259 // to remove regex support in patterns. We temporarily support the
260 // original behavior using regexes if "#!special-case-list-v1" is the
261 // first line of the file. For more details, see
262 // https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666
263 bool UseGlobs = Version > 1;
264
265 bool RemoveDotSlash = Version > 2;
266
267 Section *CurrentSection;
268 if (auto Err = addSection("*", FileIdx, 1, true).moveInto(CurrentSection)) {
269 Error = toString(std::move(Err));
270 return false;
271 }
272
273 // This is the current list of prefixes for all existing users matching file
274 // path. We may need parametrization in constructor in future.
275 constexpr StringRef PathPrefixes[] = {"src", "!src", "mainfile", "source"};
276
277 for (line_iterator LineIt(*MB, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
278 !LineIt.is_at_eof(); LineIt++) {
279 unsigned LineNo = LineIt.line_number();
280 StringRef Line = LineIt->trim();
281 if (Line.empty())
282 continue;
283
284 // Save section names
285 if (Line.starts_with("[")) {
286 if (!Line.ends_with("]")) {
287 Error =
288 ("malformed section header on line " + Twine(LineNo) + ": " + Line)
289 .str();
290 return false;
291 }
292
293 if (auto Err = addSection(Line.drop_front().drop_back(), FileIdx, LineNo,
294 UseGlobs)
295 .moveInto(CurrentSection)) {
296 Error = toString(std::move(Err));
297 return false;
298 }
299 continue;
300 }
301
302 // Get our prefix and unparsed glob.
303 auto [Prefix, Postfix] = Line.split(":");
304 if (Postfix.empty()) {
305 // Missing ':' in the line.
306 Error = ("malformed line " + Twine(LineNo) + ": '" + Line + "'").str();
307 return false;
308 }
309
310 auto [Pattern, Category] = Postfix.split("=");
311 auto [It, _] = CurrentSection->Entries[Prefix].try_emplace(
312 Category, UseGlobs,
313 RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix));
314 Pattern = Pattern.copy(StrAlloc);
315 if (auto Err = It->second.insert(Pattern, LineNo)) {
316 Error =
317 (Twine("malformed ") + (UseGlobs ? "glob" : "regex") + " in line " +
318 Twine(LineNo) + ": '" + Pattern + "': " + toString(std::move(Err)))
319 .str();
320 return false;
321 }
322 }
323
324 for (Section &S : Sections)
325 S.preprocess(OrderBySize);
326
327 return true;
328}
329
330SpecialCaseList::~SpecialCaseList() = default;
331
333 StringRef Query, StringRef Category) const {
334 auto [FileIdx, LineNo] = inSectionBlame(Section, Prefix, Query, Category);
335 return LineNo;
336}
337
338std::pair<unsigned, unsigned>
340 StringRef Query, StringRef Category) const {
341 for (const auto &S : reverse(Sections)) {
342 if (S.SectionMatcher.matchAny(Section)) {
343 unsigned Blame = S.getLastMatch(Prefix, Query, Category);
344 if (Blame)
345 return {S.FileIdx, Blame};
346 }
347 }
348 return NotFound;
349}
350
351const SpecialCaseList::Matcher *
352SpecialCaseList::Section::findMatcher(StringRef Prefix,
353 StringRef Category) const {
354 SectionEntries::const_iterator I = Entries.find(Prefix);
355 if (I == Entries.end())
356 return nullptr;
357 StringMap<Matcher>::const_iterator II = I->second.find(Category);
358 if (II == I->second.end())
359 return nullptr;
360
361 return &II->second;
362}
363
364LLVM_ABI void SpecialCaseList::Section::preprocess(bool OrderBySize) {
365 SectionMatcher.preprocess(false);
366 for (auto &[K1, E] : Entries)
367 for (auto &[K2, M] : E)
368 M.preprocess(OrderBySize);
369}
370
372 StringRef Query,
373 StringRef Category) const {
374 unsigned LastLine = 0;
375 if (const Matcher *M = findMatcher(Prefix, Category)) {
376 M->match(Query, [&](StringRef, unsigned LineNo) {
377 LastLine = std::max(LastLine, LineNo);
378 });
379 }
380 return LastLine;
381}
382
384 StringRef Query,
385 StringRef Category) const {
386 StringRef LongestRule;
387 if (const Matcher *M = findMatcher(Prefix, Category)) {
388 M->match(Query, [&](StringRef Rule, unsigned) {
389 if (LongestRule.size() < Rule.size())
390 LongestRule = Rule;
391 });
392 }
393 return LongestRule;
394}
395
396} // namespace llvm
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
#define _
static llvm::Error parse(DataExtractor &Data, uint64_t BaseAddr, LineEntryCallback const &Callback)
Definition LineTable.cpp:54
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
static const char * toString(MIToken::TokenKind TokenKind)
Definition MIParser.cpp:624
static Error addSection(const NewSectionInfo &NewSection, Object &Obj)
Register Reg
uint64_t IntrinsicInst * II
This file contains some templates that are useful if you are working with the STL at all.
This file contains some functions that are useful when dealing with strings.
Defines the virtual file system interface vfs::FileSystem.
Represents either an error or a value T.
Definition ErrorOr.h:56
reference get()
Definition ErrorOr.h:149
std::error_code getError() const
Definition ErrorOr.h:152
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
static LLVM_ABI Expected< GlobPattern > create(StringRef Pat, std::optional< size_t > MaxSubPatterns={})
This interface provides simple read-only access to a block of memory, and provides simple methods for...
static constexpr std::pair< unsigned, unsigned > NotFound
LLVM_ABI std::pair< unsigned, unsigned > inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category=StringRef()) const
Returns the file index and the line number <FileIdx, LineNo> corresponding to the special case list e...
LLVM_ABI bool createInternal(const std::vector< std::string > &Paths, vfs::FileSystem &VFS, std::string &Error)
static LLVM_ABI std::unique_ptr< SpecialCaseList > createOrDie(const std::vector< std::string > &Paths, llvm::vfs::FileSystem &FS)
Parses the special case list entries from files.
static LLVM_ABI std::unique_ptr< SpecialCaseList > create(const std::vector< std::string > &Paths, llvm::vfs::FileSystem &FS, std::string &Error)
Parses the special case list entries from files.
LLVM_ABI bool inSection(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category=StringRef()) const
Returns true, if special case list contains a line.
StringMapIterBase< ValueTy, true > const_iterator
Definition StringMap.h:220
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
StringRef copy(Allocator &A) const
Definition StringRef.h:162
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
An efficient, type-erasing, non-owning reference to a callable.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
LLVM_ABI StringRef remove_leading_dotslash(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Remove redundant leading "./" pieces and consecutive separators.
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
Definition STLExtras.h:2058
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
LLVM_ABI bool consumeUnsignedInteger(StringRef &Str, unsigned Radix, unsigned long long &Result)
@ invalid_argument
Definition Errc.h:56
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
LLVM_ABI StringRef getLongestMatch(StringRef Prefix, StringRef Query, StringRef Category) const
LLVM_ABI unsigned getLastMatch(StringRef Prefix, StringRef Query, StringRef Category) const