LLVM 23.0.0git
DataExtractor.cpp
Go to the documentation of this file.
1//===-- DataExtractor.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
11#include "llvm/Support/Errc.h"
13#include "llvm/Support/LEB128.h"
15
16using namespace llvm;
17
18bool DataExtractor::prepareRead(uint64_t Offset, uint64_t Size,
19 Error *E) const {
21 return true;
22 if (E) {
23 if (Offset <= Data.size())
26 "unexpected end of data at offset 0x%zx while reading [0x%" PRIx64
27 ", 0x%" PRIx64 ")",
28 Data.size(), Offset, Offset + Size);
29 else
31 "offset 0x%" PRIx64
32 " is beyond the end of data at 0x%zx",
33 Offset, Data.size());
34 }
35 return false;
36}
37
38static bool isError(Error *E) { return E && *E; }
39
40template <typename T>
41T DataExtractor::getU(uint64_t *offset_ptr, Error *Err) const {
42 ErrorAsOutParameter ErrAsOut(Err);
43 T val = 0;
44 if (isError(Err))
45 return val;
46
47 uint64_t offset = *offset_ptr;
48 if (!prepareRead(offset, sizeof(T), Err))
49 return val;
50 std::memcpy(&val, &Data.data()[offset], sizeof(val));
51 if (sys::IsLittleEndianHost != IsLittleEndian)
53
54 // Advance the offset
55 *offset_ptr += sizeof(val);
56 return val;
57}
58
59template <typename T>
60T *DataExtractor::getUs(uint64_t *offset_ptr, T *dst, uint32_t count,
61 Error *Err) const {
62 ErrorAsOutParameter ErrAsOut(Err);
63 if (isError(Err))
64 return nullptr;
65
66 uint64_t offset = *offset_ptr;
67
68 if (!prepareRead(offset, sizeof(*dst) * count, Err))
69 return nullptr;
70 for (T *value_ptr = dst, *end = dst + count; value_ptr != end;
71 ++value_ptr, offset += sizeof(*dst))
72 *value_ptr = getU<T>(offset_ptr, Err);
73 // Advance the offset
74 *offset_ptr = offset;
75 // Return a non-NULL pointer to the converted data as an indicator of
76 // success
77 return dst;
78}
79
81 return getU<uint8_t>(offset_ptr, Err);
82}
83
85 uint32_t count) const {
86 return getUs<uint8_t>(offset_ptr, dst, count, nullptr);
87}
88
90 return getUs<uint8_t>(&C.Offset, Dst, Count, &C.Err);
91}
92
94 return getU<uint16_t>(offset_ptr, Err);
95}
96
98 uint32_t count) const {
99 return getUs<uint16_t>(offset_ptr, dst, count, nullptr);
100}
101
103 uint24_t ExtractedVal = getU<uint24_t>(OffsetPtr, Err);
104 // The 3 bytes are in the correct byte order for the host.
105 return ExtractedVal.getAsUint32(sys::IsLittleEndianHost);
106}
107
109 return getU<uint32_t>(offset_ptr, Err);
110}
111
113 uint32_t count) const {
114 return getUs<uint32_t>(offset_ptr, dst, count, nullptr);
115}
116
118 return getU<uint64_t>(offset_ptr, Err);
119}
120
122 uint32_t count) const {
123 return getUs<uint64_t>(offset_ptr, dst, count, nullptr);
124}
125
127 llvm::Error *Err) const {
128 switch (byte_size) {
129 case 1:
130 return getU8(offset_ptr, Err);
131 case 2:
132 return getU16(offset_ptr, Err);
133 case 4:
134 return getU32(offset_ptr, Err);
135 case 8:
136 return getU64(offset_ptr, Err);
137 }
138
139 // For any other byte size, read the bytes and swap/shift if necessary.
140 ErrorAsOutParameter ErrAsOut(Err);
141 uint64_t val = 0;
142 if (isError(Err))
143 return val;
144 uint64_t offset = *offset_ptr;
145 if (!prepareRead(offset, byte_size, Err))
146 return val;
147 std::memcpy(&val, &Data.data()[offset], byte_size);
148 if (sys::IsLittleEndianHost != IsLittleEndian)
149 // Say byte_size is 3.
150 // high low
151 // Read bytes: 00 00 00 00 00 AA BB CC
152 // Swapped bytes: CC BB AA 00 00 00 00 00
153 // Shifted bytes: 00 00 00 00 00 CC BB AA
154 val = sys::getSwappedBytes(val) >> (8 * (8 - byte_size));
155
156 // Advance the offset
157 *offset_ptr += byte_size;
158 return val;
159}
160
161int64_t
162DataExtractor::getSigned(uint64_t *offset_ptr, uint32_t byte_size) const {
163 switch (byte_size) {
164 case 1:
165 return getS8(offset_ptr);
166 case 2:
167 return getS16(offset_ptr);
168 case 4:
169 return getS32(offset_ptr);
170 case 8:
171 return getS64(offset_ptr);
172 }
173 llvm_unreachable("getSigned unhandled case!");
174}
175
177 ErrorAsOutParameter ErrAsOut(Err);
178 if (isError(Err))
179 return StringRef();
180
181 uint64_t Start = *OffsetPtr;
182 StringRef::size_type Pos = Data.find('\0', Start);
183 if (Pos != StringRef::npos) {
184 *OffsetPtr = Pos + 1;
185 return StringRef(Data.data() + Start, Pos - Start);
186 }
187 if (Err)
189 "no null terminated string at offset 0x%" PRIx64,
190 Start);
191 return StringRef();
192}
193
196 StringRef TrimChars) const {
197 StringRef Bytes(getBytes(OffsetPtr, Length));
198 return Bytes.trim(TrimChars);
199}
200
202 Error *Err) const {
203 ErrorAsOutParameter ErrAsOut(Err);
204 if (isError(Err))
205 return StringRef();
206
207 if (!prepareRead(*OffsetPtr, Length, Err))
208 return StringRef();
209
210 StringRef Result = Data.substr(*OffsetPtr, Length);
211 *OffsetPtr += Length;
212 return Result;
213}
214
215template <typename T>
216static T getLEB128(StringRef Data, uint64_t *OffsetPtr, Error *Err,
217 T (&Decoder)(const uint8_t *p, unsigned *n,
218 const uint8_t *end, const char **error)) {
220 assert(*OffsetPtr <= Bytes.size());
221 ErrorAsOutParameter ErrAsOut(Err);
222 if (isError(Err))
223 return T();
224
225 const char *error = nullptr;
226 unsigned bytes_read;
227 T result =
228 Decoder(Bytes.data() + *OffsetPtr, &bytes_read, Bytes.end(), &error);
229 if (error) {
230 if (Err)
232 "unable to decode LEB128 at offset 0x%8.8" PRIx64
233 ": %s",
234 *OffsetPtr, error);
235 return T();
236 }
237 *OffsetPtr += bytes_read;
238 return result;
239}
240
242 return getLEB128(Data, offset_ptr, Err, decodeULEB128);
243}
244
245int64_t DataExtractor::getSLEB128(uint64_t *offset_ptr, Error *Err) const {
246 return getLEB128(Data, offset_ptr, Err, decodeSLEB128);
247}
248
250 ErrorAsOutParameter ErrAsOut(C.Err);
251 if (isError(&C.Err))
252 return;
253
254 if (prepareRead(C.Offset, Length, &C.Err))
255 C.Offset += Length;
256}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static T getLEB128(StringRef Data, uint64_t *OffsetPtr, Error *Err, T(&Decoder)(const uint8_t *p, unsigned *n, const uint8_t *end, const char **error))
static bool isError(Error *E)
#define T
This file contains some functions that are useful when dealing with strings.
#define error(X)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
const T * data() const
Definition ArrayRef.h:139
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
LLVM_ABI StringRef getFixedLengthString(uint64_t *OffsetPtr, uint64_t Length, StringRef TrimChars={"\0", 1}) const
Extract a fixed length string from *OffsetPtr and consume Length bytes.
LLVM_ABI uint64_t getUnsigned(uint64_t *offset_ptr, uint32_t byte_size, Error *Err=nullptr) const
Extract an unsigned integer of size byte_size from *offset_ptr.
LLVM_ABI uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
int16_t getS16(uint64_t *OffsetPtr, Error *Err=nullptr) const
Extract a int16_t value from *OffsetPtr.
int8_t getS8(uint64_t *OffsetPtr, Error *Err=nullptr) const
Extract a int8_t value from *OffsetPtr.
int32_t getS32(uint64_t *OffsetPtr, Error *Err=nullptr) const
Extract a int32_t value from *OffsetPtr.
LLVM_ABI StringRef getCStrRef(uint64_t *OffsetPtr, Error *Err=nullptr) const
Extract a C string from *offset_ptr.
LLVM_ABI uint8_t getU8(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint8_t value from *offset_ptr.
LLVM_ABI int64_t getSigned(uint64_t *offset_ptr, uint32_t size) const
Extract an signed integer of size byte_size from *offset_ptr.
int64_t getS64(uint64_t *OffsetPtr, Error *Err=nullptr) const
Extract a int64_t value from *OffsetPtr.
LLVM_ABI uint64_t getULEB128(uint64_t *offset_ptr, llvm::Error *Err=nullptr) const
Extract a unsigned LEB128 value from *offset_ptr.
LLVM_ABI int64_t getSLEB128(uint64_t *OffsetPtr, Error *Err=nullptr) const
Extract a signed LEB128 value from *offset_ptr.
LLVM_ABI uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
LLVM_ABI void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
LLVM_ABI uint64_t getU64(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint64_t value from *offset_ptr.
bool isValidOffsetForDataOfSize(uint64_t offset, uint64_t length) const
Test the availability of length bytes of data from offset.
LLVM_ABI StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
LLVM_ABI uint32_t getU24(uint64_t *OffsetPtr, Error *Err=nullptr) const
Extract a 24-bit unsigned value from *offset_ptr and return it in a uint32_t.
Helper for Errors used as out-parameters.
Definition Error.h:1144
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
static constexpr size_t npos
Definition StringRef.h:57
size_t size_type
Definition StringRef.h:61
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:844
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
iterator end() const
Definition BasicBlock.h:89
constexpr bool IsLittleEndianHost
unsigned char getSwappedBytes(unsigned char C)
void swapByteOrder(T &Value)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
ArrayRef< CharT > arrayRefFromStringRef(StringRef Input)
Construct a string ref from an array ref of unsigned chars.
uint64_t decodeULEB128(const uint8_t *p, unsigned *n=nullptr, const uint8_t *end=nullptr, const char **error=nullptr)
Utility function to decode a ULEB128 value.
Definition LEB128.h:130
int64_t decodeSLEB128(const uint8_t *p, unsigned *n=nullptr, const uint8_t *end=nullptr, const char **error=nullptr)
Utility function to decode a SLEB128 value.
Definition LEB128.h:164
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
@ illegal_byte_sequence
Definition Errc.h:52
@ invalid_argument
Definition Errc.h:56
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
Uint24 uint24_t
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
uint32_t getAsUint32(bool IsLittleEndian) const