1use std::{array::TryFromSliceError, ops::Range, str};
18
19use crate::VariantPathElement;
20use arrow_schema::ArrowError;
21
22use std::cmp::Ordering;
23use std::fmt::Debug;
24use std::slice::SliceIndex;
25
26pub(crate) fn overflow_error(msg: &str) -> ArrowError {
28 ArrowError::InvalidArgumentError(format!("Integer overflow computing {msg}"))
29}
30
31#[inline]
32pub(crate) fn slice_from_slice<I: SliceIndex<[u8]> + Clone + Debug>(
33 bytes: &[u8],
34 index: I,
35) -> Result<&I::Output, ArrowError> {
36 bytes.get(index.clone()).ok_or_else(|| {
37 ArrowError::InvalidArgumentError(format!(
38 "Tried to extract byte(s) {index:?} from {}-byte buffer",
39 bytes.len(),
40 ))
41 })
42}
43
44#[inline]
49pub(crate) fn slice_from_slice_at_offset(
50 bytes: &[u8],
51 base_offset: usize,
52 range: Range<usize>,
53) -> Result<&[u8], ArrowError> {
54 let start_byte = base_offset
55 .checked_add(range.start)
56 .ok_or_else(|| overflow_error("slice start"))?;
57 let end_byte = base_offset
58 .checked_add(range.end)
59 .ok_or_else(|| overflow_error("slice end"))?;
60 slice_from_slice(bytes, start_byte..end_byte)
61}
62
63pub(crate) fn array_from_slice<const N: usize>(
64 bytes: &[u8],
65 offset: usize,
66) -> Result<[u8; N], ArrowError> {
67 slice_from_slice_at_offset(bytes, offset, 0..N)?
68 .try_into()
69 .map_err(|e: TryFromSliceError| ArrowError::InvalidArgumentError(e.to_string()))
70}
71
72pub(crate) fn first_byte_from_slice(slice: &[u8]) -> Result<u8, ArrowError> {
73 slice
74 .first()
75 .copied()
76 .ok_or_else(|| ArrowError::InvalidArgumentError("Received empty bytes".to_string()))
77}
78
79#[inline]
81pub(crate) fn string_from_slice(
82 slice: &[u8],
83 offset: usize,
84 range: Range<usize>,
85) -> Result<&str, ArrowError> {
86 let offset_buffer = slice_from_slice_at_offset(slice, offset, range)?;
87
88 #[cfg(feature = "simdutf8")]
90 {
91 simdutf8::basic::from_utf8(offset_buffer).map_err(|_| {
92 let e = simdutf8::compat::from_utf8(offset_buffer).unwrap_err();
94 ArrowError::InvalidArgumentError(format!("encountered non UTF-8 data: {e}"))
95 })
96 }
97
98 #[cfg(not(feature = "simdutf8"))]
100 str::from_utf8(offset_buffer)
101 .map_err(|_| ArrowError::InvalidArgumentError("invalid UTF-8 string".to_string()))
102}
103
104pub(crate) fn try_binary_search_range_by<F>(
121 range: Range<usize>,
122 cmp: F,
123) -> Option<Result<usize, usize>>
124where
125 F: Fn(usize) -> Option<Ordering>,
126{
127 let Range { mut start, mut end } = range;
128 while start < end {
129 let mid = start + (end - start) / 2;
130 match cmp(mid)? {
131 Ordering::Equal => return Some(Ok(mid)),
132 Ordering::Greater => end = mid,
133 Ordering::Less => start = mid + 1,
134 }
135 }
136
137 Some(Err(start))
138}
139
140#[allow(unused)]
142pub(crate) const fn expect_size_of<T>(expected: usize) {
143 let size = std::mem::size_of::<T>();
144 if size != expected {
145 let _ = [""; 0][size];
146 }
147}
148
149pub(crate) fn fits_precision<const N: u32>(n: impl Into<i64>) -> bool {
150 n.into().unsigned_abs().leading_zeros() >= (i64::BITS - N)
151}
152
153#[inline]
186pub(crate) fn parse_path(s: &str) -> Result<Vec<VariantPathElement<'_>>, ArrowError> {
187 let scan_field = |start: usize| {
188 s[start..]
189 .find(['.', '[', ']'])
190 .map_or_else(|| s.len(), |p| start + p)
191 };
192
193 let bytes = s.as_bytes();
194 if let Some(b'.') = bytes.first() {
195 return Err(ArrowError::ParseError("Unexpected leading '.'".into()));
196 }
197
198 let mut elements = Vec::new();
199 let mut i = 0;
200
201 while i < bytes.len() {
202 let (elem, end) = match bytes[i] {
203 b'.' => {
204 i += 1; let end = scan_field(i);
206 if end == i {
207 return Err(ArrowError::ParseError(match bytes.get(i) {
208 None => "Unexpected trailing '.'".into(),
209 Some(&c) => format!("Unexpected '{}' at byte {i}", c as char),
210 }));
211 }
212 (VariantPathElement::field(&s[i..end]), end)
213 }
214 b'[' => {
215 let (element, end) = parse_in_bracket(s, i)?;
216 (element, end)
217 }
218 b']' => {
219 return Err(ArrowError::ParseError(format!(
220 "Unexpected ']' at byte {i}"
221 )));
222 }
223 _ => {
224 let end = scan_field(i);
225 (VariantPathElement::field(&s[i..end]), end)
226 }
227 };
228 elements.push(elem);
229 i = end;
230 }
231
232 Ok(elements)
233}
234
235fn parse_in_bracket(s: &str, i: usize) -> Result<(VariantPathElement<'_>, usize), ArrowError> {
238 let start = i + 1; let mut unescaped = String::new();
241 let mut chars = s[start..].char_indices().peekable();
242 let mut end = None;
243
244 while let Some((offset, c)) = chars.next() {
245 match c {
246 '\\' => {
248 if let Some((_, next)) = chars.next() {
249 unescaped.push(next);
250 }
251 }
253 ']' => {
254 end = Some(start + offset);
256 break;
257 }
258 _ => {
259 unescaped.push(c);
260 }
261 }
262 }
263
264 let end = match end {
265 Some(e) => e,
266 None => {
267 return Err(ArrowError::ParseError(format!("Unclosed '[' at byte {i}")));
268 }
269 };
270
271 let element = if let Some(inner) = unescaped
272 .strip_prefix('\'')
273 .and_then(|s| s.strip_suffix('\''))
274 .or_else(|| {
275 unescaped
276 .strip_prefix('"')
277 .and_then(|s| s.strip_suffix('"'))
278 }) {
279 VariantPathElement::field(inner.to_string())
281 } else {
282 let Ok(idx) = unescaped.parse() else {
283 return Err(ArrowError::ParseError(format!(
284 "Invalid token in bracket request: `{unescaped}`. Expected a quoted string or a number(e.g., `['field']` or `[123]`)"
285 )));
286 };
287 VariantPathElement::index(idx)
288 };
289
290 Ok((element, end + 1))
291}
292
293#[cfg(test)]
294mod test {
295 use super::*;
296
297 #[test]
298 fn test_fits_precision() {
299 assert!(fits_precision::<10>(1023));
300 assert!(!fits_precision::<10>(1024));
301 assert!(fits_precision::<10>(-1023));
302 assert!(!fits_precision::<10>(-1024));
303 }
304}