arrow_avro/reader/
cursor.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::reader::vlq::read_varint;
19use arrow_schema::ArrowError;
20
21/// A wrapper around a byte slice, providing low-level decoding for Avro
22///
23/// <https://avro.apache.org/docs/1.11.1/specification/#encodings>
24#[derive(Debug)]
25pub(crate) struct AvroCursor<'a> {
26    buf: &'a [u8],
27    start_len: usize,
28}
29
30impl<'a> AvroCursor<'a> {
31    pub(crate) fn new(buf: &'a [u8]) -> Self {
32        Self {
33            buf,
34            start_len: buf.len(),
35        }
36    }
37
38    /// Returns the current cursor position
39    #[inline]
40    pub(crate) fn position(&self) -> usize {
41        self.start_len - self.buf.len()
42    }
43
44    /// Read a single `u8`
45    #[inline]
46    pub(crate) fn get_u8(&mut self) -> Result<u8, ArrowError> {
47        match self.buf.first().copied() {
48            Some(x) => {
49                self.buf = &self.buf[1..];
50                Ok(x)
51            }
52            None => Err(ArrowError::ParseError("Unexpected EOF".to_string())),
53        }
54    }
55
56    #[inline]
57    pub(crate) fn get_bool(&mut self) -> Result<bool, ArrowError> {
58        Ok(self.get_u8()? != 0)
59    }
60
61    pub(crate) fn read_vlq(&mut self) -> Result<u64, ArrowError> {
62        let (val, offset) = read_varint(self.buf)
63            .ok_or_else(|| ArrowError::ParseError("bad varint".to_string()))?;
64        self.buf = &self.buf[offset..];
65        Ok(val)
66    }
67
68    #[inline]
69    pub(crate) fn get_int(&mut self) -> Result<i32, ArrowError> {
70        let varint = self.read_vlq()?;
71        let val: u32 = varint
72            .try_into()
73            .map_err(|_| ArrowError::ParseError("varint overflow".to_string()))?;
74        Ok((val >> 1) as i32 ^ -((val & 1) as i32))
75    }
76
77    #[inline]
78    pub(crate) fn get_long(&mut self) -> Result<i64, ArrowError> {
79        let val = self.read_vlq()?;
80        Ok((val >> 1) as i64 ^ -((val & 1) as i64))
81    }
82
83    pub(crate) fn get_bytes(&mut self) -> Result<&'a [u8], ArrowError> {
84        let len: usize = self.get_long()?.try_into().map_err(|_| {
85            ArrowError::ParseError("offset overflow reading avro bytes".to_string())
86        })?;
87
88        if (self.buf.len() < len) {
89            return Err(ArrowError::ParseError(
90                "Unexpected EOF reading bytes".to_string(),
91            ));
92        }
93        let ret = &self.buf[..len];
94        self.buf = &self.buf[len..];
95        Ok(ret)
96    }
97
98    #[inline]
99    pub(crate) fn get_float(&mut self) -> Result<f32, ArrowError> {
100        if (self.buf.len() < 4) {
101            return Err(ArrowError::ParseError(
102                "Unexpected EOF reading float".to_string(),
103            ));
104        }
105        let ret = f32::from_le_bytes(self.buf[..4].try_into().unwrap());
106        self.buf = &self.buf[4..];
107        Ok(ret)
108    }
109
110    #[inline]
111    pub(crate) fn get_double(&mut self) -> Result<f64, ArrowError> {
112        if (self.buf.len() < 8) {
113            return Err(ArrowError::ParseError(
114                "Unexpected EOF reading float".to_string(),
115            ));
116        }
117        let ret = f64::from_le_bytes(self.buf[..8].try_into().unwrap());
118        self.buf = &self.buf[8..];
119        Ok(ret)
120    }
121}