1use arrow_schema::ArrowError;
18use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, Utc};
19use std::array::TryFromSliceError;
20
21use crate::utils::{array_from_slice, slice_from_slice, string_from_slice};
22
23#[derive(Debug, Clone, Copy)]
24pub enum VariantBasicType {
25 Primitive = 0,
26 ShortString = 1,
27 Object = 2,
28 Array = 3,
29}
30
31#[derive(Debug, Clone, Copy)]
32pub enum VariantPrimitiveType {
33 Null = 0,
34 BooleanTrue = 1,
35 BooleanFalse = 2,
36 Int8 = 3,
37 Int16 = 4,
38 Int32 = 5,
39 Int64 = 6,
40 Double = 7,
41 Decimal4 = 8,
42 Decimal8 = 9,
43 Decimal16 = 10,
44 Date = 11,
45 TimestampMicros = 12,
46 TimestampNtzMicros = 13,
47 Float = 14,
48 Binary = 15,
49 String = 16,
50}
51
52pub(crate) fn get_basic_type(header: u8) -> Result<VariantBasicType, ArrowError> {
54 let basic_type = header & 0x03; let basic_type = match basic_type {
57 0 => VariantBasicType::Primitive,
58 1 => VariantBasicType::ShortString,
59 2 => VariantBasicType::Object,
60 3 => VariantBasicType::Array,
61 _ => {
62 unreachable!();
65 }
66 };
67 Ok(basic_type)
68}
69
70impl TryFrom<u8> for VariantPrimitiveType {
71 type Error = ArrowError;
72
73 fn try_from(value: u8) -> Result<Self, Self::Error> {
74 match value {
75 0 => Ok(VariantPrimitiveType::Null),
76 1 => Ok(VariantPrimitiveType::BooleanTrue),
77 2 => Ok(VariantPrimitiveType::BooleanFalse),
78 3 => Ok(VariantPrimitiveType::Int8),
79 4 => Ok(VariantPrimitiveType::Int16),
80 5 => Ok(VariantPrimitiveType::Int32),
81 6 => Ok(VariantPrimitiveType::Int64),
82 7 => Ok(VariantPrimitiveType::Double),
83 8 => Ok(VariantPrimitiveType::Decimal4),
84 9 => Ok(VariantPrimitiveType::Decimal8),
85 10 => Ok(VariantPrimitiveType::Decimal16),
86 11 => Ok(VariantPrimitiveType::Date),
87 12 => Ok(VariantPrimitiveType::TimestampMicros),
88 13 => Ok(VariantPrimitiveType::TimestampNtzMicros),
89 14 => Ok(VariantPrimitiveType::Float),
90 15 => Ok(VariantPrimitiveType::Binary),
91 16 => Ok(VariantPrimitiveType::String),
92 _ => Err(ArrowError::InvalidArgumentError(format!(
93 "unknown primitive type: {}",
94 value
95 ))),
96 }
97 }
98}
99pub(crate) fn get_primitive_type(metadata: u8) -> Result<VariantPrimitiveType, ArrowError> {
101 VariantPrimitiveType::try_from(metadata >> 2)
103}
104
105fn map_try_from_slice_error(e: TryFromSliceError) -> ArrowError {
107 ArrowError::InvalidArgumentError(e.to_string())
108}
109
110pub(crate) fn decode_int8(data: &[u8]) -> Result<i8, ArrowError> {
112 Ok(i8::from_le_bytes(array_from_slice(data, 0)?))
113}
114
115pub(crate) fn decode_int16(data: &[u8]) -> Result<i16, ArrowError> {
117 Ok(i16::from_le_bytes(array_from_slice(data, 0)?))
118}
119
120pub(crate) fn decode_int32(data: &[u8]) -> Result<i32, ArrowError> {
122 Ok(i32::from_le_bytes(array_from_slice(data, 0)?))
123}
124
125pub(crate) fn decode_int64(data: &[u8]) -> Result<i64, ArrowError> {
127 Ok(i64::from_le_bytes(array_from_slice(data, 0)?))
128}
129
130pub(crate) fn decode_decimal4(data: &[u8]) -> Result<(i32, u8), ArrowError> {
132 let scale = u8::from_le_bytes(array_from_slice(data, 0)?);
133 let integer = i32::from_le_bytes(array_from_slice(data, 1)?);
134 Ok((integer, scale))
135}
136
137pub(crate) fn decode_decimal8(data: &[u8]) -> Result<(i64, u8), ArrowError> {
139 let scale = u8::from_le_bytes(array_from_slice(data, 0)?);
140 let integer = i64::from_le_bytes(array_from_slice(data, 1)?);
141 Ok((integer, scale))
142}
143
144pub(crate) fn decode_decimal16(data: &[u8]) -> Result<(i128, u8), ArrowError> {
146 let scale = u8::from_le_bytes(array_from_slice(data, 0)?);
147 let integer = i128::from_le_bytes(array_from_slice(data, 1)?);
148 Ok((integer, scale))
149}
150
151pub(crate) fn decode_float(data: &[u8]) -> Result<f32, ArrowError> {
153 Ok(f32::from_le_bytes(array_from_slice(data, 0)?))
154}
155
156pub(crate) fn decode_double(data: &[u8]) -> Result<f64, ArrowError> {
158 Ok(f64::from_le_bytes(array_from_slice(data, 0)?))
159}
160
161pub(crate) fn decode_date(data: &[u8]) -> Result<NaiveDate, ArrowError> {
163 let days_since_epoch = i32::from_le_bytes(array_from_slice(data, 0)?);
164 let value = DateTime::UNIX_EPOCH + Duration::days(i64::from(days_since_epoch));
165 Ok(value.date_naive())
166}
167
168pub(crate) fn decode_timestamp_micros(data: &[u8]) -> Result<DateTime<Utc>, ArrowError> {
170 let micros_since_epoch = i64::from_le_bytes(array_from_slice(data, 0)?);
171 DateTime::from_timestamp_micros(micros_since_epoch).ok_or_else(|| {
172 ArrowError::CastError(format!(
173 "Could not cast `{micros_since_epoch}` microseconds into a DateTime<Utc>"
174 ))
175 })
176}
177
178pub(crate) fn decode_timestampntz_micros(data: &[u8]) -> Result<NaiveDateTime, ArrowError> {
180 let micros_since_epoch = i64::from_le_bytes(array_from_slice(data, 0)?);
181 DateTime::from_timestamp_micros(micros_since_epoch)
182 .ok_or_else(|| {
183 ArrowError::CastError(format!(
184 "Could not cast `{micros_since_epoch}` microseconds into a NaiveDateTime"
185 ))
186 })
187 .map(|v| v.naive_utc())
188}
189
190pub(crate) fn decode_binary(data: &[u8]) -> Result<&[u8], ArrowError> {
192 let len = u32::from_le_bytes(array_from_slice(data, 0)?) as usize;
193 let value = slice_from_slice(data, 4..4 + len)?;
194 Ok(value)
195}
196
197pub(crate) fn decode_long_string(data: &[u8]) -> Result<&str, ArrowError> {
199 let len = u32::from_le_bytes(array_from_slice(data, 0)?) as usize;
200 let string = string_from_slice(data, 4..4 + len)?;
201 Ok(string)
202}
203
204pub(crate) fn decode_short_string(metadata: u8, data: &[u8]) -> Result<&str, ArrowError> {
206 let len = (metadata >> 2) as usize;
207 let string = string_from_slice(data, 0..len)?;
208 Ok(string)
209}
210
211#[cfg(test)]
212mod tests {
213 use super::*;
214
215 #[test]
216 fn test_i8() -> Result<(), ArrowError> {
217 let data = [0x2a];
218 let result = decode_int8(&data)?;
219 assert_eq!(result, 42);
220 Ok(())
221 }
222
223 #[test]
224 fn test_i16() -> Result<(), ArrowError> {
225 let data = [0xd2, 0x04];
226 let result = decode_int16(&data)?;
227 assert_eq!(result, 1234);
228 Ok(())
229 }
230
231 #[test]
232 fn test_i32() -> Result<(), ArrowError> {
233 let data = [0x40, 0xe2, 0x01, 0x00];
234 let result = decode_int32(&data)?;
235 assert_eq!(result, 123456);
236 Ok(())
237 }
238
239 #[test]
240 fn test_i64() -> Result<(), ArrowError> {
241 let data = [0x15, 0x81, 0xe9, 0x7d, 0xf4, 0x10, 0x22, 0x11];
242 let result = decode_int64(&data)?;
243 assert_eq!(result, 1234567890123456789);
244 Ok(())
245 }
246
247 #[test]
248 fn test_decimal4() -> Result<(), ArrowError> {
249 let data = [
250 0x02, 0xd2, 0x04, 0x00, 0x00, ];
253 let result = decode_decimal4(&data)?;
254 assert_eq!(result, (1234, 2));
255 Ok(())
256 }
257
258 #[test]
259 fn test_decimal8() -> Result<(), ArrowError> {
260 let data = [
261 0x02, 0xd2, 0x02, 0x96, 0x49, 0x00, 0x00, 0x00, 0x00, ];
264 let result = decode_decimal8(&data)?;
265 assert_eq!(result, (1234567890, 2));
266 Ok(())
267 }
268
269 #[test]
270 fn test_decimal16() -> Result<(), ArrowError> {
271 let data = [
272 0x02, 0xd2, 0xb6, 0x23, 0xc0, 0xf4, 0x10, 0x22, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
274 0x00, 0x00, ];
276 let result = decode_decimal16(&data)?;
277 assert_eq!(result, (1234567891234567890, 2));
278 Ok(())
279 }
280
281 #[test]
282 fn test_float() -> Result<(), ArrowError> {
283 let data = [0x06, 0x2c, 0x93, 0x4e];
284 let result = decode_float(&data)?;
285 assert_eq!(result, 1234567890.1234);
286 Ok(())
287 }
288
289 #[test]
290 fn test_double() -> Result<(), ArrowError> {
291 let data = [0xc9, 0xe5, 0x87, 0xb4, 0x80, 0x65, 0xd2, 0x41];
292 let result = decode_double(&data)?;
293 assert_eq!(result, 1234567890.1234);
294 Ok(())
295 }
296
297 #[test]
298 fn test_date() -> Result<(), ArrowError> {
299 let data = [0xe2, 0x4e, 0x0, 0x0];
300 let result = decode_date(&data)?;
301 assert_eq!(result, NaiveDate::from_ymd_opt(2025, 4, 16).unwrap());
302 Ok(())
303 }
304
305 #[test]
306 fn test_timestamp_micros() -> Result<(), ArrowError> {
307 let data = [0xe0, 0x52, 0x97, 0xdd, 0xe7, 0x32, 0x06, 0x00];
308 let result = decode_timestamp_micros(&data)?;
309 assert_eq!(
310 result,
311 NaiveDate::from_ymd_opt(2025, 4, 16)
312 .unwrap()
313 .and_hms_milli_opt(16, 34, 56, 780)
314 .unwrap()
315 .and_utc()
316 );
317 Ok(())
318 }
319
320 #[test]
321 fn test_timestampntz_micros() -> Result<(), ArrowError> {
322 let data = [0xe0, 0x52, 0x97, 0xdd, 0xe7, 0x32, 0x06, 0x00];
323 let result = decode_timestampntz_micros(&data)?;
324 assert_eq!(
325 result,
326 NaiveDate::from_ymd_opt(2025, 4, 16)
327 .unwrap()
328 .and_hms_milli_opt(16, 34, 56, 780)
329 .unwrap()
330 );
331 Ok(())
332 }
333
334 #[test]
335 fn test_binary() -> Result<(), ArrowError> {
336 let data = [
337 0x09, 0, 0, 0, 0x03, 0x13, 0x37, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe,
339 ];
340 let result = decode_binary(&data)?;
341 assert_eq!(
342 result,
343 [0x03, 0x13, 0x37, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe]
344 );
345 Ok(())
346 }
347
348 #[test]
349 fn test_short_string() -> Result<(), ArrowError> {
350 let data = [b'H', b'e', b'l', b'l', b'o', b'o'];
351 let result = decode_short_string(1 | 5 << 2, &data)?;
352 assert_eq!(result, "Hello");
353 Ok(())
354 }
355
356 #[test]
357 fn test_string() -> Result<(), ArrowError> {
358 let data = [
359 0x05, 0, 0, 0, b'H', b'e', b'l', b'l', b'o', b'o',
361 ];
362 let result = decode_long_string(&data)?;
363 assert_eq!(result, "Hello");
364 Ok(())
365 }
366}