parquet_variant/variant/
object.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17use crate::decoder::{map_bytes_to_offsets, OffsetSizeBytes};
18use crate::utils::{
19    first_byte_from_slice, overflow_error, slice_from_slice, try_binary_search_range_by,
20};
21use crate::variant::{Variant, VariantMetadata};
22
23use arrow_schema::ArrowError;
24
25// The value header occupies one byte; use a named constant for readability
26const NUM_HEADER_BYTES: u32 = 1;
27
28/// Header structure for [`VariantObject`]
29#[derive(Debug, Clone, PartialEq)]
30pub(crate) struct VariantObjectHeader {
31    num_elements_size: OffsetSizeBytes,
32    field_id_size: OffsetSizeBytes,
33    field_offset_size: OffsetSizeBytes,
34}
35
36impl VariantObjectHeader {
37    // Hide the ugly casting
38    const fn num_elements_size(&self) -> u32 {
39        self.num_elements_size as _
40    }
41    const fn field_id_size(&self) -> u32 {
42        self.field_id_size as _
43    }
44    const fn field_offset_size(&self) -> u32 {
45        self.field_offset_size as _
46    }
47
48    // Avoid materializing this offset, since it's cheaply and safely computable
49    const fn field_ids_start_byte(&self) -> u32 {
50        NUM_HEADER_BYTES + self.num_elements_size()
51    }
52
53    pub(crate) fn try_new(header_byte: u8) -> Result<Self, ArrowError> {
54        // Parse the header byte to get object parameters
55        let value_header = header_byte >> 2;
56        let field_offset_size_minus_one = value_header & 0x03; // Last 2 bits
57        let field_id_size_minus_one = (value_header >> 2) & 0x03; // Next 2 bits
58        let is_large = (value_header & 0x10) != 0; // 5th bit
59        let num_elements_size = match is_large {
60            true => OffsetSizeBytes::Four,
61            false => OffsetSizeBytes::One,
62        };
63        Ok(Self {
64            num_elements_size,
65            field_id_size: OffsetSizeBytes::try_new(field_id_size_minus_one)?,
66            field_offset_size: OffsetSizeBytes::try_new(field_offset_size_minus_one)?,
67        })
68    }
69}
70
71/// A [`Variant`] Object (struct with named fields).
72///
73/// See the [Variant spec] file for more information.
74///
75/// # Validation
76///
77/// Every instance of variant object is either _valid_ or _invalid_. depending on whether the
78/// underlying bytes are a valid encoding of a variant object subtype (see below).
79///
80/// Instances produced by [`Self::try_new`] or [`Self::with_full_validation`] are fully (and recursively)
81/// _validated_. They always contain _valid_ data, and infallible accesses such as iteration and
82/// indexing are panic-free. The validation cost is linear in the number of underlying bytes.
83///
84/// Instances produced by [`Self::new`] are _unvalidated_ and so they may contain either _valid_ or
85/// _invalid_ data. Infallible accesses such as iteration and indexing will panic if the underlying
86/// bytes are _invalid_, and fallible alternatives such as [`Self::iter_try`] and [`Self::get`] are
87/// provided as panic-free alternatives. [`Self::with_full_validation`] can also be used to _validate_ an
88/// _unvalidated_ instance, if desired.
89///
90/// _Unvalidated_ instances can be constructed in constant time. They can be useful if the caller
91/// knows the underlying bytes were already validated previously, or if the caller intends to
92/// perform a small number of (fallible) field accesses against a large object.
93///
94/// A _validated_ instance guarantees that:
95///
96/// - header byte is valid
97/// - num_elements is in bounds
98/// - field id array is in bounds
99/// - field offset array is in bounds
100/// - field value array is in bounds
101/// - all field ids are valid metadata dictionary entries (*)
102/// - field ids are lexically ordered according by their corresponding string values (*)
103/// - all field offsets are in bounds (*)
104/// - all field values are (recursively) _valid_ variant values (*)
105/// - the associated variant metadata is [valid] (*)
106///
107/// NOTE: [`Self::new`] only skips expensive (non-constant cost) validation checks (marked by `(*)`
108/// in the list above); it panics any of the other checks fails.
109///
110/// # Safety
111///
112/// Even an _invalid_ variant object instance is still _safe_ to use in the Rust sense. Accessing it
113/// with infallible methods may cause panics but will never lead to undefined behavior.
114///
115/// [valid]: VariantMetadata#Validation
116/// [Variant spec]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#value-data-for-object-basic_type2
117#[derive(Debug, Clone, PartialEq)]
118pub struct VariantObject<'m, 'v> {
119    pub metadata: VariantMetadata<'m>,
120    pub value: &'v [u8],
121    header: VariantObjectHeader,
122    num_elements: u32,
123    first_field_offset_byte: u32,
124    first_value_byte: u32,
125    validated: bool,
126}
127
128// We don't want this to grow because it could increase the size of `Variant` and hurt performance.
129const _: () = crate::utils::expect_size_of::<VariantObject>(64);
130
131impl<'m, 'v> VariantObject<'m, 'v> {
132    pub fn new(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Self {
133        Self::try_new_with_shallow_validation(metadata, value).expect("Invalid variant object")
134    }
135
136    /// Attempts to interpet `metadata` and `value` as a variant object.
137    ///
138    /// # Validation
139    ///
140    /// This constructor verifies that `value` points to a valid variant object value. In
141    /// particular, that all field ids exist in `metadata`, and all offsets are in-bounds and point
142    /// to valid objects.
143    pub fn try_new(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Result<Self, ArrowError> {
144        Self::try_new_with_shallow_validation(metadata, value)?.with_full_validation()
145    }
146
147    /// Attempts to interpet `metadata` and `value` as a variant object, performing only basic
148    /// (constant-cost) [validation].
149    ///
150    /// [validation]: Self#Validation
151    pub(crate) fn try_new_with_shallow_validation(
152        metadata: VariantMetadata<'m>,
153        value: &'v [u8],
154    ) -> Result<Self, ArrowError> {
155        let header_byte = first_byte_from_slice(value)?;
156        let header = VariantObjectHeader::try_new(header_byte)?;
157
158        // Determine num_elements size based on is_large flag and fetch the value
159        let num_elements =
160            header
161                .num_elements_size
162                .unpack_u32_at_offset(value, NUM_HEADER_BYTES as _, 0)?;
163
164        // Calculate byte offsets for field offsets and values with overflow protection, and verify
165        // they're in bounds
166        let first_field_offset_byte = num_elements
167            .checked_mul(header.field_id_size())
168            .and_then(|n| n.checked_add(header.field_ids_start_byte()))
169            .ok_or_else(|| overflow_error("offset of variant object field offsets"))?;
170
171        let first_value_byte = num_elements
172            .checked_add(1)
173            .and_then(|n| n.checked_mul(header.field_offset_size()))
174            .and_then(|n| n.checked_add(first_field_offset_byte))
175            .ok_or_else(|| overflow_error("offset of variant object field values"))?;
176
177        let mut new_self = Self {
178            metadata,
179            value,
180            header,
181            num_elements,
182            first_field_offset_byte,
183            first_value_byte,
184            validated: false,
185        };
186
187        // Spec says: "The last field_offset points to the byte after the end of the last value"
188        //
189        // Use it to upper-bound the value bytes, which also verifies that the field id and field
190        // offset arrays are in bounds.
191        let last_offset = new_self
192            .get_offset(num_elements as _)?
193            .checked_add(first_value_byte)
194            .ok_or_else(|| overflow_error("variant object size"))?;
195        new_self.value = slice_from_slice(value, ..last_offset as _)?;
196        Ok(new_self)
197    }
198
199    /// True if this instance is fully [validated] for panic-free infallible accesses.
200    ///
201    /// [validated]: Self#Validation
202    pub fn is_fully_validated(&self) -> bool {
203        self.validated
204    }
205
206    /// Performs a full [validation] of this variant object.
207    ///
208    /// [validation]: Self#Validation
209    pub fn with_full_validation(mut self) -> Result<Self, ArrowError> {
210        if !self.validated {
211            // Validate the metadata dictionary first, if not already validated, because we pass it
212            // by value to all the children (who would otherwise re-validate it repeatedly).
213            self.metadata = self.metadata.with_full_validation()?;
214
215            let field_id_buffer = slice_from_slice(
216                self.value,
217                self.header.field_ids_start_byte() as _..self.first_field_offset_byte as _,
218            )?;
219
220            let field_ids = map_bytes_to_offsets(field_id_buffer, self.header.field_id_size)
221                .collect::<Vec<_>>();
222
223            // Validate all field ids exist in the metadata dictionary and the corresponding field names are lexicographically sorted
224            if self.metadata.is_sorted() {
225                // Since the metadata dictionary has unique and sorted field names, we can also guarantee this object's field names
226                // are lexicographically sorted by their field id ordering
227                if !field_ids.is_sorted() {
228                    return Err(ArrowError::InvalidArgumentError(
229                        "field names not sorted".to_string(),
230                    ));
231                }
232
233                // Since field ids are sorted, if the last field is smaller than the dictionary size,
234                // we also know all field ids are smaller than the dictionary size and in-bounds.
235                if let Some(&last_field_id) = field_ids.last() {
236                    if last_field_id >= self.metadata.dictionary_size() {
237                        return Err(ArrowError::InvalidArgumentError(
238                            "field id is not valid".to_string(),
239                        ));
240                    }
241                }
242            } else {
243                // The metadata dictionary can't guarantee uniqueness or sortedness, so we have to parse out the corresponding field names
244                // to check lexicographical order
245                //
246                // Since we are probing the metadata dictionary by field id, this also verifies field ids are in-bounds
247                let are_field_names_sorted = field_ids
248                    .iter()
249                    .map(|&i| self.metadata.get(i))
250                    .collect::<Result<Vec<_>, _>>()?
251                    .is_sorted();
252
253                if !are_field_names_sorted {
254                    return Err(ArrowError::InvalidArgumentError(
255                        "field names not sorted".to_string(),
256                    ));
257                }
258            }
259
260            // Validate whether values are valid variant objects
261            let field_offset_buffer = slice_from_slice(
262                self.value,
263                self.first_field_offset_byte as _..self.first_value_byte as _,
264            )?;
265            let num_offsets = field_offset_buffer.len() / self.header.field_offset_size() as usize;
266
267            let value_buffer = slice_from_slice(self.value, self.first_value_byte as _..)?;
268
269            map_bytes_to_offsets(field_offset_buffer, self.header.field_offset_size)
270                .take(num_offsets.saturating_sub(1))
271                .try_for_each(|offset| {
272                    let value_bytes = slice_from_slice(value_buffer, offset..)?;
273                    Variant::try_new_with_metadata(self.metadata.clone(), value_bytes)?;
274
275                    Ok::<_, ArrowError>(())
276                })?;
277
278            self.validated = true;
279        }
280        Ok(self)
281    }
282
283    /// Returns the number of key-value pairs in this object
284    pub fn len(&self) -> usize {
285        self.num_elements as _
286    }
287
288    /// Returns true if the object contains no key-value pairs
289    pub fn is_empty(&self) -> bool {
290        self.len() == 0
291    }
292
293    /// Get a field's value by index in `0..self.len()`
294    ///
295    /// # Panics
296    ///
297    /// If the index is out of bounds. Also if variant object is corrupted (e.g., invalid offsets or
298    /// field IDs). The latter can only happen when working with an unvalidated object produced by
299    /// [`Self::new`].
300    pub fn field(&self, i: usize) -> Option<Variant<'m, 'v>> {
301        (i < self.len()).then(|| {
302            self.try_field_with_shallow_validation(i)
303                .expect("Invalid object field value")
304        })
305    }
306
307    /// Fallible version of `field`. Returns field value by index, capturing validation errors
308    pub fn try_field(&self, i: usize) -> Result<Variant<'m, 'v>, ArrowError> {
309        self.try_field_with_shallow_validation(i)?
310            .with_full_validation()
311    }
312
313    // Attempts to retrieve the ith field value from the value region of the byte buffer; it
314    // performs only basic (constant-cost) validation.
315    fn try_field_with_shallow_validation(&self, i: usize) -> Result<Variant<'m, 'v>, ArrowError> {
316        let value_bytes = slice_from_slice(self.value, self.first_value_byte as _..)?;
317        let value_bytes = slice_from_slice(value_bytes, self.get_offset(i)? as _..)?;
318        Variant::try_new_with_metadata_and_shallow_validation(self.metadata.clone(), value_bytes)
319    }
320
321    // Attempts to retrieve the ith offset from the field offset region of the byte buffer.
322    fn get_offset(&self, i: usize) -> Result<u32, ArrowError> {
323        let byte_range = self.first_field_offset_byte as _..self.first_value_byte as _;
324        let field_offsets = slice_from_slice(self.value, byte_range)?;
325        self.header.field_offset_size.unpack_u32(field_offsets, i)
326    }
327
328    /// Get a field's name by index in `0..self.len()`
329    ///
330    /// # Panics
331    /// If the variant object is corrupted (e.g., invalid offsets or field IDs).
332    /// This should never happen since the constructor validates all data upfront.
333    pub fn field_name(&self, i: usize) -> Option<&'m str> {
334        (i < self.len()).then(|| {
335            self.try_field_name(i)
336                .expect("Invalid variant object field name")
337        })
338    }
339
340    /// Fallible version of `field_name`. Returns field name by index, capturing validation errors
341    fn try_field_name(&self, i: usize) -> Result<&'m str, ArrowError> {
342        let byte_range = self.header.field_ids_start_byte() as _..self.first_field_offset_byte as _;
343        let field_id_bytes = slice_from_slice(self.value, byte_range)?;
344        let field_id = self.header.field_id_size.unpack_u32(field_id_bytes, i)?;
345        self.metadata.get(field_id as _)
346    }
347
348    /// Returns an iterator of (name, value) pairs over the fields of this object.
349    pub fn iter(&self) -> impl Iterator<Item = (&'m str, Variant<'m, 'v>)> + '_ {
350        self.iter_try_with_shallow_validation()
351            .map(|result| result.expect("Invalid variant object field value"))
352    }
353
354    /// Fallible iteration over the fields of this object.
355    pub fn iter_try(
356        &self,
357    ) -> impl Iterator<Item = Result<(&'m str, Variant<'m, 'v>), ArrowError>> + '_ {
358        self.iter_try_with_shallow_validation().map(|result| {
359            let (name, value) = result?;
360            Ok((name, value.with_full_validation()?))
361        })
362    }
363
364    // Fallible iteration over the fields of this object that performs only shallow (constant-cost)
365    // validation of field values.
366    fn iter_try_with_shallow_validation(
367        &self,
368    ) -> impl Iterator<Item = Result<(&'m str, Variant<'m, 'v>), ArrowError>> + '_ {
369        (0..self.len()).map(|i| {
370            let field = self.try_field_with_shallow_validation(i)?;
371            Ok((self.try_field_name(i)?, field))
372        })
373    }
374
375    /// Returns the value of the field with the specified name, if any.
376    ///
377    /// `Ok(None)` means the field does not exist; `Err` means the search encountered an error.
378    pub fn get(&self, name: &str) -> Option<Variant<'m, 'v>> {
379        // Binary search through the field IDs of this object to find the requested field name.
380        //
381        // NOTE: This does not require a sorted metadata dictionary, because the variant spec
382        // requires object field ids to be lexically sorted by their corresponding string values,
383        // and probing the dictionary for a field id is always O(1) work.
384        let i = try_binary_search_range_by(0..self.len(), &name, |i| self.field_name(i))?.ok()?;
385
386        self.field(i)
387    }
388}
389
390#[cfg(test)]
391mod tests {
392    use crate::VariantBuilder;
393
394    use super::*;
395
396    #[test]
397    fn test_variant_object_simple() {
398        // Create metadata with field names: "age", "name", "active" (sorted)
399        // Header: version=1, sorted=1, offset_size=1 (offset_size_minus_one=0)
400        // So header byte = 00_0_1_0001 = 0x11
401        let metadata_bytes = vec![
402            0b0001_0001,
403            3, // dictionary size
404            0, // "active"
405            6, // "age"
406            9, // "name"
407            13,
408            b'a',
409            b'c',
410            b't',
411            b'i',
412            b'v',
413            b'e',
414            b'a',
415            b'g',
416            b'e',
417            b'n',
418            b'a',
419            b'm',
420            b'e',
421        ];
422        let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
423
424        // Create object value data for: {"active": true, "age": 42, "name": "hello"}
425        // Field IDs in sorted order: [0, 1, 2] (active, age, name)
426        // Header: basic_type=2, field_offset_size_minus_one=0, field_id_size_minus_one=0, is_large=0
427        // value_header = 0000_00_00 = 0x00
428        // So header byte = (0x00 << 2) | 2 = 0x02
429        let object_value = vec![
430            0x02, // header: basic_type=2, value_header=0x00
431            3,    // num_elements = 3
432            // Field IDs (1 byte each): active=0, age=1, name=2
433            0, 1, 2,
434            // Field offsets (1 byte each): 4 offsets total
435            0, // offset to first value (boolean true)
436            1, // offset to second value (int8)
437            3, // offset to third value (short string)
438            9, // end offset
439            // Values:
440            0x04, // boolean true: primitive_header=1, basic_type=0 -> (1 << 2) | 0 = 0x04
441            0x0C,
442            42, // int8: primitive_header=3, basic_type=0 -> (3 << 2) | 0 = 0x0C, then value 42
443            0x15, b'h', b'e', b'l', b'l',
444            b'o', // short string: length=5, basic_type=1 -> (5 << 2) | 1 = 0x15
445        ];
446
447        let variant_obj = VariantObject::try_new(metadata, &object_value).unwrap();
448
449        // Test basic properties
450        assert_eq!(variant_obj.len(), 3);
451        assert!(!variant_obj.is_empty());
452
453        // Test field access
454        let active_field = variant_obj.get("active");
455        assert!(active_field.is_some());
456        assert_eq!(active_field.unwrap().as_boolean(), Some(true));
457
458        let age_field = variant_obj.get("age");
459        assert!(age_field.is_some());
460        assert_eq!(age_field.unwrap().as_int8(), Some(42));
461
462        let name_field = variant_obj.get("name");
463        assert!(name_field.is_some());
464        assert_eq!(name_field.unwrap().as_string(), Some("hello"));
465
466        // Test non-existent field
467        let missing_field = variant_obj.get("missing");
468        assert!(missing_field.is_none());
469
470        let missing_field_name = variant_obj.field_name(3);
471        assert!(missing_field_name.is_none());
472
473        let missing_field_name = variant_obj.field_name(300);
474        assert!(missing_field_name.is_none());
475
476        let missing_field_value = variant_obj.field(3);
477        assert!(missing_field_value.is_none());
478
479        let missing_field_value = variant_obj.field(300);
480        assert!(missing_field_value.is_none());
481
482        // Test fields iterator
483        let fields: Vec<_> = variant_obj.iter().collect();
484        assert_eq!(fields.len(), 3);
485
486        // Fields should be in sorted order: active, age, name
487        assert_eq!(fields[0].0, "active");
488        assert_eq!(fields[0].1.as_boolean(), Some(true));
489
490        assert_eq!(fields[1].0, "age");
491        assert_eq!(fields[1].1.as_int8(), Some(42));
492
493        assert_eq!(fields[2].0, "name");
494        assert_eq!(fields[2].1.as_string(), Some("hello"));
495
496        // Test field access by index
497        // Fields should be in sorted order: active, age, name
498        assert_eq!(variant_obj.field_name(0), Some("active"));
499        assert_eq!(variant_obj.field(0).unwrap().as_boolean(), Some(true));
500
501        assert_eq!(variant_obj.field_name(1), Some("age"));
502        assert_eq!(variant_obj.field(1).unwrap().as_int8(), Some(42));
503
504        assert_eq!(variant_obj.field_name(2), Some("name"));
505        assert_eq!(variant_obj.field(2).unwrap().as_string(), Some("hello"));
506    }
507
508    #[test]
509    fn test_variant_object_empty() {
510        // Create metadata with no fields
511        let metadata_bytes = vec![
512            0x11, // header: version=1, sorted=0, offset_size_minus_one=0
513            0,    // dictionary_size = 0
514            0,    // offset[0] = 0 (end of dictionary)
515        ];
516        let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
517
518        // Create empty object value data: {}
519        let object_value = vec![
520            0x02, // header: basic_type=2, value_header=0x00
521            0,    // num_elements = 0
522            0,    // single offset pointing to end
523                  // No field IDs, no values
524        ];
525
526        let variant_obj = VariantObject::try_new(metadata, &object_value).unwrap();
527
528        // Test basic properties
529        assert_eq!(variant_obj.len(), 0);
530        assert!(variant_obj.is_empty());
531
532        // Test field access on empty object
533        let missing_field = variant_obj.get("anything");
534        assert!(missing_field.is_none());
535
536        // Test fields iterator on empty object
537        let fields: Vec<_> = variant_obj.iter().collect();
538        assert_eq!(fields.len(), 0);
539    }
540
541    #[test]
542    fn test_variant_object_invalid_metadata_end_offset() {
543        // Create metadata with field names: "age", "name" (sorted)
544        let metadata_bytes = vec![
545            0b0001_0001, // header: version=1, sorted=1, offset_size_minus_one=0
546            2,           // dictionary size
547            0,           // "age"
548            3,           // "name"
549            8,           // Invalid end offset (should be 7)
550            b'a',
551            b'g',
552            b'e',
553            b'n',
554            b'a',
555            b'm',
556            b'e',
557        ];
558        let err = VariantMetadata::try_new(&metadata_bytes);
559        let err = err.unwrap_err();
560        assert!(matches!(
561            err,
562            ArrowError::InvalidArgumentError(ref msg) if msg.contains("Tried to extract byte(s) ..13 from 12-byte buffer")
563        ));
564    }
565
566    #[test]
567    fn test_variant_object_invalid_end_offset() {
568        // Create metadata with field names: "age", "name" (sorted)
569        let metadata_bytes = vec![
570            0b0001_0001, // header: version=1, sorted=1, offset_size_minus_one=0
571            2,           // dictionary size
572            0,           // "age"
573            3,           // "name"
574            7,
575            b'a',
576            b'g',
577            b'e',
578            b'n',
579            b'a',
580            b'm',
581            b'e',
582        ];
583        let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
584
585        // Create object value data for: {"age": 42, "name": "hello"}
586        // Field IDs in sorted order: [0, 1] (age, name)
587        // Header: basic_type=2, field_offset_size_minus_one=0, field_id_size_minus_one=0, is_large=0
588        // value_header = 0000_00_00 = 0x00
589        let object_value = vec![
590            0x02, // header: basic_type=2, value_header=0x00
591            2,    // num_elements = 2
592            // Field IDs (1 byte each): age=0, name=1
593            0, 1,
594            // Field offsets (1 byte each): 3 offsets total
595            0, // offset to first value (int8)
596            2, // offset to second value (short string)
597            9, // invalid end offset (correct would be 8)
598            // Values:
599            0x0C,
600            42, // int8: primitive_header=3, basic_type=0 -> (3 << 2) | 0 = 0x0C, then value 42
601            0x15, b'h', b'e', b'l', b'l',
602            b'o', // short string: length=5, basic_type=1 -> (5 << 2) | 1 = 0x15
603        ];
604
605        let err = VariantObject::try_new(metadata, &object_value);
606        let err = err.unwrap_err();
607        assert!(matches!(
608            err,
609            ArrowError::InvalidArgumentError(ref msg) if msg.contains("Tried to extract byte(s) ..16 from 15-byte buffer")
610        ));
611    }
612
613    fn test_variant_object_with_count(count: i32, expected_field_id_size: OffsetSizeBytes) {
614        let field_names: Vec<_> = (0..count).map(|val| val.to_string()).collect();
615        let mut builder =
616            VariantBuilder::new().with_field_names(field_names.iter().map(|s| s.as_str()));
617
618        let mut obj = builder.new_object();
619
620        for i in 0..count {
621            obj.insert(&field_names[i as usize], i);
622        }
623
624        obj.finish().unwrap();
625        let (metadata, value) = builder.finish();
626        let variant = Variant::new(&metadata, &value);
627
628        if let Variant::Object(obj) = variant {
629            assert_eq!(obj.len(), count as usize);
630
631            assert_eq!(obj.get(&field_names[0]).unwrap(), Variant::Int32(0));
632            assert_eq!(
633                obj.get(&field_names[(count - 1) as usize]).unwrap(),
634                Variant::Int32(count - 1)
635            );
636            assert_eq!(
637                obj.header.field_id_size, expected_field_id_size,
638                "Expected {}-byte field IDs, got {}-byte field IDs",
639                expected_field_id_size as usize, obj.header.field_id_size as usize
640            );
641        } else {
642            panic!("Expected object variant");
643        }
644    }
645
646    #[test]
647    fn test_variant_object_257_elements() {
648        test_variant_object_with_count((1 << 8) + 1, OffsetSizeBytes::Two); // 2^8 + 1, expected 2-byte field IDs
649    }
650
651    #[test]
652    fn test_variant_object_65537_elements() {
653        test_variant_object_with_count((1 << 16) + 1, OffsetSizeBytes::Three);
654        // 2^16 + 1, expected 3-byte field IDs
655    }
656
657    /* Can't run this test now as it takes 45x longer than other tests
658    #[test]
659    fn test_variant_object_16777217_elements() {
660        test_variant_object_with_count((1 << 24) + 1, OffsetSizeBytes::Four);
661        // 2^24 + 1, expected 4-byte field IDs
662    }
663     */
664
665    #[test]
666    fn test_variant_object_small_sizes_255_elements() {
667        test_variant_object_with_count(255, OffsetSizeBytes::One);
668    }
669
670    fn test_variant_object_with_large_data(
671        data_size_per_field: usize,
672        expected_field_offset_size: OffsetSizeBytes,
673    ) {
674        let num_fields = 20;
675        let mut builder = VariantBuilder::new();
676        let mut obj = builder.new_object();
677
678        let str_val = "a".repeat(data_size_per_field);
679
680        for val in 0..num_fields {
681            let key = format!("id_{val}");
682            obj.insert(&key, str_val.as_str());
683        }
684
685        obj.finish().unwrap();
686        let (metadata, value) = builder.finish();
687        let variant = Variant::new(&metadata, &value);
688
689        if let Variant::Object(obj) = variant {
690            assert_eq!(obj.len(), num_fields);
691            assert_eq!(
692                obj.header.field_offset_size, expected_field_offset_size,
693                "Expected {}-byte field offsets, got {}-byte field offsets",
694                expected_field_offset_size as usize, obj.header.field_offset_size as usize
695            );
696        } else {
697            panic!("Expected object variant");
698        }
699    }
700
701    #[test]
702    fn test_variant_object_child_data_0_byte_offsets_minus_one() {
703        test_variant_object_with_large_data(10, OffsetSizeBytes::One);
704    }
705
706    #[test]
707    fn test_variant_object_256_bytes_child_data_3_byte_offsets() {
708        test_variant_object_with_large_data(256 + 1, OffsetSizeBytes::Two); // 2^8 - 2^16 elements
709    }
710
711    #[test]
712    fn test_variant_object_16777216_bytes_child_data_4_byte_offsets() {
713        test_variant_object_with_large_data(65536 + 1, OffsetSizeBytes::Three); // 2^16 - 2^24 elements
714    }
715
716    #[test]
717    fn test_variant_object_65535_bytes_child_data_2_byte_offsets() {
718        test_variant_object_with_large_data(16777216 + 1, OffsetSizeBytes::Four);
719        // 2^24
720    }
721}