parquet_variant/variant/object.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17use crate::decoder::{map_bytes_to_offsets, OffsetSizeBytes};
18use crate::utils::{
19 first_byte_from_slice, overflow_error, slice_from_slice, try_binary_search_range_by,
20};
21use crate::variant::{Variant, VariantMetadata};
22
23use arrow_schema::ArrowError;
24
25// The value header occupies one byte; use a named constant for readability
26const NUM_HEADER_BYTES: u32 = 1;
27
28/// Header structure for [`VariantObject`]
29#[derive(Debug, Clone, PartialEq)]
30pub(crate) struct VariantObjectHeader {
31 num_elements_size: OffsetSizeBytes,
32 field_id_size: OffsetSizeBytes,
33 field_offset_size: OffsetSizeBytes,
34}
35
36impl VariantObjectHeader {
37 // Hide the ugly casting
38 const fn num_elements_size(&self) -> u32 {
39 self.num_elements_size as _
40 }
41 const fn field_id_size(&self) -> u32 {
42 self.field_id_size as _
43 }
44 const fn field_offset_size(&self) -> u32 {
45 self.field_offset_size as _
46 }
47
48 // Avoid materializing this offset, since it's cheaply and safely computable
49 const fn field_ids_start_byte(&self) -> u32 {
50 NUM_HEADER_BYTES + self.num_elements_size()
51 }
52
53 pub(crate) fn try_new(header_byte: u8) -> Result<Self, ArrowError> {
54 // Parse the header byte to get object parameters
55 let value_header = header_byte >> 2;
56 let field_offset_size_minus_one = value_header & 0x03; // Last 2 bits
57 let field_id_size_minus_one = (value_header >> 2) & 0x03; // Next 2 bits
58 let is_large = (value_header & 0x10) != 0; // 5th bit
59 let num_elements_size = match is_large {
60 true => OffsetSizeBytes::Four,
61 false => OffsetSizeBytes::One,
62 };
63 Ok(Self {
64 num_elements_size,
65 field_id_size: OffsetSizeBytes::try_new(field_id_size_minus_one)?,
66 field_offset_size: OffsetSizeBytes::try_new(field_offset_size_minus_one)?,
67 })
68 }
69}
70
71/// A [`Variant`] Object (struct with named fields).
72///
73/// See the [Variant spec] file for more information.
74///
75/// # Validation
76///
77/// Every instance of variant object is either _valid_ or _invalid_. depending on whether the
78/// underlying bytes are a valid encoding of a variant object subtype (see below).
79///
80/// Instances produced by [`Self::try_new`] or [`Self::with_full_validation`] are fully (and recursively)
81/// _validated_. They always contain _valid_ data, and infallible accesses such as iteration and
82/// indexing are panic-free. The validation cost is linear in the number of underlying bytes.
83///
84/// Instances produced by [`Self::new`] are _unvalidated_ and so they may contain either _valid_ or
85/// _invalid_ data. Infallible accesses such as iteration and indexing will panic if the underlying
86/// bytes are _invalid_, and fallible alternatives such as [`Self::iter_try`] and [`Self::get`] are
87/// provided as panic-free alternatives. [`Self::with_full_validation`] can also be used to _validate_ an
88/// _unvalidated_ instance, if desired.
89///
90/// _Unvalidated_ instances can be constructed in constant time. They can be useful if the caller
91/// knows the underlying bytes were already validated previously, or if the caller intends to
92/// perform a small number of (fallible) field accesses against a large object.
93///
94/// A _validated_ instance guarantees that:
95///
96/// - header byte is valid
97/// - num_elements is in bounds
98/// - field id array is in bounds
99/// - field offset array is in bounds
100/// - field value array is in bounds
101/// - all field ids are valid metadata dictionary entries (*)
102/// - field ids are lexically ordered according by their corresponding string values (*)
103/// - all field offsets are in bounds (*)
104/// - all field values are (recursively) _valid_ variant values (*)
105/// - the associated variant metadata is [valid] (*)
106///
107/// NOTE: [`Self::new`] only skips expensive (non-constant cost) validation checks (marked by `(*)`
108/// in the list above); it panics any of the other checks fails.
109///
110/// # Safety
111///
112/// Even an _invalid_ variant object instance is still _safe_ to use in the Rust sense. Accessing it
113/// with infallible methods may cause panics but will never lead to undefined behavior.
114///
115/// [valid]: VariantMetadata#Validation
116/// [Variant spec]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#value-data-for-object-basic_type2
117#[derive(Debug, Clone, PartialEq)]
118pub struct VariantObject<'m, 'v> {
119 pub metadata: VariantMetadata<'m>,
120 pub value: &'v [u8],
121 header: VariantObjectHeader,
122 num_elements: u32,
123 first_field_offset_byte: u32,
124 first_value_byte: u32,
125 validated: bool,
126}
127
128// We don't want this to grow because it could increase the size of `Variant` and hurt performance.
129const _: () = crate::utils::expect_size_of::<VariantObject>(64);
130
131impl<'m, 'v> VariantObject<'m, 'v> {
132 pub fn new(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Self {
133 Self::try_new_with_shallow_validation(metadata, value).expect("Invalid variant object")
134 }
135
136 /// Attempts to interpet `metadata` and `value` as a variant object.
137 ///
138 /// # Validation
139 ///
140 /// This constructor verifies that `value` points to a valid variant object value. In
141 /// particular, that all field ids exist in `metadata`, and all offsets are in-bounds and point
142 /// to valid objects.
143 pub fn try_new(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Result<Self, ArrowError> {
144 Self::try_new_with_shallow_validation(metadata, value)?.with_full_validation()
145 }
146
147 /// Attempts to interpet `metadata` and `value` as a variant object, performing only basic
148 /// (constant-cost) [validation].
149 ///
150 /// [validation]: Self#Validation
151 pub(crate) fn try_new_with_shallow_validation(
152 metadata: VariantMetadata<'m>,
153 value: &'v [u8],
154 ) -> Result<Self, ArrowError> {
155 let header_byte = first_byte_from_slice(value)?;
156 let header = VariantObjectHeader::try_new(header_byte)?;
157
158 // Determine num_elements size based on is_large flag and fetch the value
159 let num_elements =
160 header
161 .num_elements_size
162 .unpack_u32_at_offset(value, NUM_HEADER_BYTES as _, 0)?;
163
164 // Calculate byte offsets for field offsets and values with overflow protection, and verify
165 // they're in bounds
166 let first_field_offset_byte = num_elements
167 .checked_mul(header.field_id_size())
168 .and_then(|n| n.checked_add(header.field_ids_start_byte()))
169 .ok_or_else(|| overflow_error("offset of variant object field offsets"))?;
170
171 let first_value_byte = num_elements
172 .checked_add(1)
173 .and_then(|n| n.checked_mul(header.field_offset_size()))
174 .and_then(|n| n.checked_add(first_field_offset_byte))
175 .ok_or_else(|| overflow_error("offset of variant object field values"))?;
176
177 let mut new_self = Self {
178 metadata,
179 value,
180 header,
181 num_elements,
182 first_field_offset_byte,
183 first_value_byte,
184 validated: false,
185 };
186
187 // Spec says: "The last field_offset points to the byte after the end of the last value"
188 //
189 // Use it to upper-bound the value bytes, which also verifies that the field id and field
190 // offset arrays are in bounds.
191 let last_offset = new_self
192 .get_offset(num_elements as _)?
193 .checked_add(first_value_byte)
194 .ok_or_else(|| overflow_error("variant object size"))?;
195 new_self.value = slice_from_slice(value, ..last_offset as _)?;
196 Ok(new_self)
197 }
198
199 /// True if this instance is fully [validated] for panic-free infallible accesses.
200 ///
201 /// [validated]: Self#Validation
202 pub fn is_fully_validated(&self) -> bool {
203 self.validated
204 }
205
206 /// Performs a full [validation] of this variant object.
207 ///
208 /// [validation]: Self#Validation
209 pub fn with_full_validation(mut self) -> Result<Self, ArrowError> {
210 if !self.validated {
211 // Validate the metadata dictionary first, if not already validated, because we pass it
212 // by value to all the children (who would otherwise re-validate it repeatedly).
213 self.metadata = self.metadata.with_full_validation()?;
214
215 let field_id_buffer = slice_from_slice(
216 self.value,
217 self.header.field_ids_start_byte() as _..self.first_field_offset_byte as _,
218 )?;
219
220 let field_ids = map_bytes_to_offsets(field_id_buffer, self.header.field_id_size)
221 .collect::<Vec<_>>();
222
223 // Validate all field ids exist in the metadata dictionary and the corresponding field names are lexicographically sorted
224 if self.metadata.is_sorted() {
225 // Since the metadata dictionary has unique and sorted field names, we can also guarantee this object's field names
226 // are lexicographically sorted by their field id ordering
227 if !field_ids.is_sorted() {
228 return Err(ArrowError::InvalidArgumentError(
229 "field names not sorted".to_string(),
230 ));
231 }
232
233 // Since field ids are sorted, if the last field is smaller than the dictionary size,
234 // we also know all field ids are smaller than the dictionary size and in-bounds.
235 if let Some(&last_field_id) = field_ids.last() {
236 if last_field_id >= self.metadata.dictionary_size() {
237 return Err(ArrowError::InvalidArgumentError(
238 "field id is not valid".to_string(),
239 ));
240 }
241 }
242 } else {
243 // The metadata dictionary can't guarantee uniqueness or sortedness, so we have to parse out the corresponding field names
244 // to check lexicographical order
245 //
246 // Since we are probing the metadata dictionary by field id, this also verifies field ids are in-bounds
247 let are_field_names_sorted = field_ids
248 .iter()
249 .map(|&i| self.metadata.get(i))
250 .collect::<Result<Vec<_>, _>>()?
251 .is_sorted();
252
253 if !are_field_names_sorted {
254 return Err(ArrowError::InvalidArgumentError(
255 "field names not sorted".to_string(),
256 ));
257 }
258 }
259
260 // Validate whether values are valid variant objects
261 let field_offset_buffer = slice_from_slice(
262 self.value,
263 self.first_field_offset_byte as _..self.first_value_byte as _,
264 )?;
265 let num_offsets = field_offset_buffer.len() / self.header.field_offset_size() as usize;
266
267 let value_buffer = slice_from_slice(self.value, self.first_value_byte as _..)?;
268
269 map_bytes_to_offsets(field_offset_buffer, self.header.field_offset_size)
270 .take(num_offsets.saturating_sub(1))
271 .try_for_each(|offset| {
272 let value_bytes = slice_from_slice(value_buffer, offset..)?;
273 Variant::try_new_with_metadata(self.metadata.clone(), value_bytes)?;
274
275 Ok::<_, ArrowError>(())
276 })?;
277
278 self.validated = true;
279 }
280 Ok(self)
281 }
282
283 /// Returns the number of key-value pairs in this object
284 pub fn len(&self) -> usize {
285 self.num_elements as _
286 }
287
288 /// Returns true if the object contains no key-value pairs
289 pub fn is_empty(&self) -> bool {
290 self.len() == 0
291 }
292
293 /// Get a field's value by index in `0..self.len()`
294 ///
295 /// # Panics
296 ///
297 /// If the index is out of bounds. Also if variant object is corrupted (e.g., invalid offsets or
298 /// field IDs). The latter can only happen when working with an unvalidated object produced by
299 /// [`Self::new`].
300 pub fn field(&self, i: usize) -> Option<Variant<'m, 'v>> {
301 (i < self.len()).then(|| {
302 self.try_field_with_shallow_validation(i)
303 .expect("Invalid object field value")
304 })
305 }
306
307 /// Fallible version of `field`. Returns field value by index, capturing validation errors
308 pub fn try_field(&self, i: usize) -> Result<Variant<'m, 'v>, ArrowError> {
309 self.try_field_with_shallow_validation(i)?
310 .with_full_validation()
311 }
312
313 // Attempts to retrieve the ith field value from the value region of the byte buffer; it
314 // performs only basic (constant-cost) validation.
315 fn try_field_with_shallow_validation(&self, i: usize) -> Result<Variant<'m, 'v>, ArrowError> {
316 let value_bytes = slice_from_slice(self.value, self.first_value_byte as _..)?;
317 let value_bytes = slice_from_slice(value_bytes, self.get_offset(i)? as _..)?;
318 Variant::try_new_with_metadata_and_shallow_validation(self.metadata.clone(), value_bytes)
319 }
320
321 // Attempts to retrieve the ith offset from the field offset region of the byte buffer.
322 fn get_offset(&self, i: usize) -> Result<u32, ArrowError> {
323 let byte_range = self.first_field_offset_byte as _..self.first_value_byte as _;
324 let field_offsets = slice_from_slice(self.value, byte_range)?;
325 self.header.field_offset_size.unpack_u32(field_offsets, i)
326 }
327
328 /// Get a field's name by index in `0..self.len()`
329 ///
330 /// # Panics
331 /// If the variant object is corrupted (e.g., invalid offsets or field IDs).
332 /// This should never happen since the constructor validates all data upfront.
333 pub fn field_name(&self, i: usize) -> Option<&'m str> {
334 (i < self.len()).then(|| {
335 self.try_field_name(i)
336 .expect("Invalid variant object field name")
337 })
338 }
339
340 /// Fallible version of `field_name`. Returns field name by index, capturing validation errors
341 fn try_field_name(&self, i: usize) -> Result<&'m str, ArrowError> {
342 let byte_range = self.header.field_ids_start_byte() as _..self.first_field_offset_byte as _;
343 let field_id_bytes = slice_from_slice(self.value, byte_range)?;
344 let field_id = self.header.field_id_size.unpack_u32(field_id_bytes, i)?;
345 self.metadata.get(field_id as _)
346 }
347
348 /// Returns an iterator of (name, value) pairs over the fields of this object.
349 pub fn iter(&self) -> impl Iterator<Item = (&'m str, Variant<'m, 'v>)> + '_ {
350 self.iter_try_with_shallow_validation()
351 .map(|result| result.expect("Invalid variant object field value"))
352 }
353
354 /// Fallible iteration over the fields of this object.
355 pub fn iter_try(
356 &self,
357 ) -> impl Iterator<Item = Result<(&'m str, Variant<'m, 'v>), ArrowError>> + '_ {
358 self.iter_try_with_shallow_validation().map(|result| {
359 let (name, value) = result?;
360 Ok((name, value.with_full_validation()?))
361 })
362 }
363
364 // Fallible iteration over the fields of this object that performs only shallow (constant-cost)
365 // validation of field values.
366 fn iter_try_with_shallow_validation(
367 &self,
368 ) -> impl Iterator<Item = Result<(&'m str, Variant<'m, 'v>), ArrowError>> + '_ {
369 (0..self.len()).map(|i| {
370 let field = self.try_field_with_shallow_validation(i)?;
371 Ok((self.try_field_name(i)?, field))
372 })
373 }
374
375 /// Returns the value of the field with the specified name, if any.
376 ///
377 /// `Ok(None)` means the field does not exist; `Err` means the search encountered an error.
378 pub fn get(&self, name: &str) -> Option<Variant<'m, 'v>> {
379 // Binary search through the field IDs of this object to find the requested field name.
380 //
381 // NOTE: This does not require a sorted metadata dictionary, because the variant spec
382 // requires object field ids to be lexically sorted by their corresponding string values,
383 // and probing the dictionary for a field id is always O(1) work.
384 let i = try_binary_search_range_by(0..self.len(), &name, |i| self.field_name(i))?.ok()?;
385
386 self.field(i)
387 }
388}
389
390#[cfg(test)]
391mod tests {
392 use crate::VariantBuilder;
393
394 use super::*;
395
396 #[test]
397 fn test_variant_object_simple() {
398 // Create metadata with field names: "age", "name", "active" (sorted)
399 // Header: version=1, sorted=1, offset_size=1 (offset_size_minus_one=0)
400 // So header byte = 00_0_1_0001 = 0x11
401 let metadata_bytes = vec![
402 0b0001_0001,
403 3, // dictionary size
404 0, // "active"
405 6, // "age"
406 9, // "name"
407 13,
408 b'a',
409 b'c',
410 b't',
411 b'i',
412 b'v',
413 b'e',
414 b'a',
415 b'g',
416 b'e',
417 b'n',
418 b'a',
419 b'm',
420 b'e',
421 ];
422 let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
423
424 // Create object value data for: {"active": true, "age": 42, "name": "hello"}
425 // Field IDs in sorted order: [0, 1, 2] (active, age, name)
426 // Header: basic_type=2, field_offset_size_minus_one=0, field_id_size_minus_one=0, is_large=0
427 // value_header = 0000_00_00 = 0x00
428 // So header byte = (0x00 << 2) | 2 = 0x02
429 let object_value = vec![
430 0x02, // header: basic_type=2, value_header=0x00
431 3, // num_elements = 3
432 // Field IDs (1 byte each): active=0, age=1, name=2
433 0, 1, 2,
434 // Field offsets (1 byte each): 4 offsets total
435 0, // offset to first value (boolean true)
436 1, // offset to second value (int8)
437 3, // offset to third value (short string)
438 9, // end offset
439 // Values:
440 0x04, // boolean true: primitive_header=1, basic_type=0 -> (1 << 2) | 0 = 0x04
441 0x0C,
442 42, // int8: primitive_header=3, basic_type=0 -> (3 << 2) | 0 = 0x0C, then value 42
443 0x15, b'h', b'e', b'l', b'l',
444 b'o', // short string: length=5, basic_type=1 -> (5 << 2) | 1 = 0x15
445 ];
446
447 let variant_obj = VariantObject::try_new(metadata, &object_value).unwrap();
448
449 // Test basic properties
450 assert_eq!(variant_obj.len(), 3);
451 assert!(!variant_obj.is_empty());
452
453 // Test field access
454 let active_field = variant_obj.get("active");
455 assert!(active_field.is_some());
456 assert_eq!(active_field.unwrap().as_boolean(), Some(true));
457
458 let age_field = variant_obj.get("age");
459 assert!(age_field.is_some());
460 assert_eq!(age_field.unwrap().as_int8(), Some(42));
461
462 let name_field = variant_obj.get("name");
463 assert!(name_field.is_some());
464 assert_eq!(name_field.unwrap().as_string(), Some("hello"));
465
466 // Test non-existent field
467 let missing_field = variant_obj.get("missing");
468 assert!(missing_field.is_none());
469
470 let missing_field_name = variant_obj.field_name(3);
471 assert!(missing_field_name.is_none());
472
473 let missing_field_name = variant_obj.field_name(300);
474 assert!(missing_field_name.is_none());
475
476 let missing_field_value = variant_obj.field(3);
477 assert!(missing_field_value.is_none());
478
479 let missing_field_value = variant_obj.field(300);
480 assert!(missing_field_value.is_none());
481
482 // Test fields iterator
483 let fields: Vec<_> = variant_obj.iter().collect();
484 assert_eq!(fields.len(), 3);
485
486 // Fields should be in sorted order: active, age, name
487 assert_eq!(fields[0].0, "active");
488 assert_eq!(fields[0].1.as_boolean(), Some(true));
489
490 assert_eq!(fields[1].0, "age");
491 assert_eq!(fields[1].1.as_int8(), Some(42));
492
493 assert_eq!(fields[2].0, "name");
494 assert_eq!(fields[2].1.as_string(), Some("hello"));
495
496 // Test field access by index
497 // Fields should be in sorted order: active, age, name
498 assert_eq!(variant_obj.field_name(0), Some("active"));
499 assert_eq!(variant_obj.field(0).unwrap().as_boolean(), Some(true));
500
501 assert_eq!(variant_obj.field_name(1), Some("age"));
502 assert_eq!(variant_obj.field(1).unwrap().as_int8(), Some(42));
503
504 assert_eq!(variant_obj.field_name(2), Some("name"));
505 assert_eq!(variant_obj.field(2).unwrap().as_string(), Some("hello"));
506 }
507
508 #[test]
509 fn test_variant_object_empty() {
510 // Create metadata with no fields
511 let metadata_bytes = vec![
512 0x11, // header: version=1, sorted=0, offset_size_minus_one=0
513 0, // dictionary_size = 0
514 0, // offset[0] = 0 (end of dictionary)
515 ];
516 let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
517
518 // Create empty object value data: {}
519 let object_value = vec![
520 0x02, // header: basic_type=2, value_header=0x00
521 0, // num_elements = 0
522 0, // single offset pointing to end
523 // No field IDs, no values
524 ];
525
526 let variant_obj = VariantObject::try_new(metadata, &object_value).unwrap();
527
528 // Test basic properties
529 assert_eq!(variant_obj.len(), 0);
530 assert!(variant_obj.is_empty());
531
532 // Test field access on empty object
533 let missing_field = variant_obj.get("anything");
534 assert!(missing_field.is_none());
535
536 // Test fields iterator on empty object
537 let fields: Vec<_> = variant_obj.iter().collect();
538 assert_eq!(fields.len(), 0);
539 }
540
541 #[test]
542 fn test_variant_object_invalid_metadata_end_offset() {
543 // Create metadata with field names: "age", "name" (sorted)
544 let metadata_bytes = vec![
545 0b0001_0001, // header: version=1, sorted=1, offset_size_minus_one=0
546 2, // dictionary size
547 0, // "age"
548 3, // "name"
549 8, // Invalid end offset (should be 7)
550 b'a',
551 b'g',
552 b'e',
553 b'n',
554 b'a',
555 b'm',
556 b'e',
557 ];
558 let err = VariantMetadata::try_new(&metadata_bytes);
559 let err = err.unwrap_err();
560 assert!(matches!(
561 err,
562 ArrowError::InvalidArgumentError(ref msg) if msg.contains("Tried to extract byte(s) ..13 from 12-byte buffer")
563 ));
564 }
565
566 #[test]
567 fn test_variant_object_invalid_end_offset() {
568 // Create metadata with field names: "age", "name" (sorted)
569 let metadata_bytes = vec![
570 0b0001_0001, // header: version=1, sorted=1, offset_size_minus_one=0
571 2, // dictionary size
572 0, // "age"
573 3, // "name"
574 7,
575 b'a',
576 b'g',
577 b'e',
578 b'n',
579 b'a',
580 b'm',
581 b'e',
582 ];
583 let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
584
585 // Create object value data for: {"age": 42, "name": "hello"}
586 // Field IDs in sorted order: [0, 1] (age, name)
587 // Header: basic_type=2, field_offset_size_minus_one=0, field_id_size_minus_one=0, is_large=0
588 // value_header = 0000_00_00 = 0x00
589 let object_value = vec![
590 0x02, // header: basic_type=2, value_header=0x00
591 2, // num_elements = 2
592 // Field IDs (1 byte each): age=0, name=1
593 0, 1,
594 // Field offsets (1 byte each): 3 offsets total
595 0, // offset to first value (int8)
596 2, // offset to second value (short string)
597 9, // invalid end offset (correct would be 8)
598 // Values:
599 0x0C,
600 42, // int8: primitive_header=3, basic_type=0 -> (3 << 2) | 0 = 0x0C, then value 42
601 0x15, b'h', b'e', b'l', b'l',
602 b'o', // short string: length=5, basic_type=1 -> (5 << 2) | 1 = 0x15
603 ];
604
605 let err = VariantObject::try_new(metadata, &object_value);
606 let err = err.unwrap_err();
607 assert!(matches!(
608 err,
609 ArrowError::InvalidArgumentError(ref msg) if msg.contains("Tried to extract byte(s) ..16 from 15-byte buffer")
610 ));
611 }
612
613 fn test_variant_object_with_count(count: i32, expected_field_id_size: OffsetSizeBytes) {
614 let field_names: Vec<_> = (0..count).map(|val| val.to_string()).collect();
615 let mut builder =
616 VariantBuilder::new().with_field_names(field_names.iter().map(|s| s.as_str()));
617
618 let mut obj = builder.new_object();
619
620 for i in 0..count {
621 obj.insert(&field_names[i as usize], i);
622 }
623
624 obj.finish().unwrap();
625 let (metadata, value) = builder.finish();
626 let variant = Variant::new(&metadata, &value);
627
628 if let Variant::Object(obj) = variant {
629 assert_eq!(obj.len(), count as usize);
630
631 assert_eq!(obj.get(&field_names[0]).unwrap(), Variant::Int32(0));
632 assert_eq!(
633 obj.get(&field_names[(count - 1) as usize]).unwrap(),
634 Variant::Int32(count - 1)
635 );
636 assert_eq!(
637 obj.header.field_id_size, expected_field_id_size,
638 "Expected {}-byte field IDs, got {}-byte field IDs",
639 expected_field_id_size as usize, obj.header.field_id_size as usize
640 );
641 } else {
642 panic!("Expected object variant");
643 }
644 }
645
646 #[test]
647 fn test_variant_object_257_elements() {
648 test_variant_object_with_count((1 << 8) + 1, OffsetSizeBytes::Two); // 2^8 + 1, expected 2-byte field IDs
649 }
650
651 #[test]
652 fn test_variant_object_65537_elements() {
653 test_variant_object_with_count((1 << 16) + 1, OffsetSizeBytes::Three);
654 // 2^16 + 1, expected 3-byte field IDs
655 }
656
657 /* Can't run this test now as it takes 45x longer than other tests
658 #[test]
659 fn test_variant_object_16777217_elements() {
660 test_variant_object_with_count((1 << 24) + 1, OffsetSizeBytes::Four);
661 // 2^24 + 1, expected 4-byte field IDs
662 }
663 */
664
665 #[test]
666 fn test_variant_object_small_sizes_255_elements() {
667 test_variant_object_with_count(255, OffsetSizeBytes::One);
668 }
669
670 fn test_variant_object_with_large_data(
671 data_size_per_field: usize,
672 expected_field_offset_size: OffsetSizeBytes,
673 ) {
674 let num_fields = 20;
675 let mut builder = VariantBuilder::new();
676 let mut obj = builder.new_object();
677
678 let str_val = "a".repeat(data_size_per_field);
679
680 for val in 0..num_fields {
681 let key = format!("id_{val}");
682 obj.insert(&key, str_val.as_str());
683 }
684
685 obj.finish().unwrap();
686 let (metadata, value) = builder.finish();
687 let variant = Variant::new(&metadata, &value);
688
689 if let Variant::Object(obj) = variant {
690 assert_eq!(obj.len(), num_fields);
691 assert_eq!(
692 obj.header.field_offset_size, expected_field_offset_size,
693 "Expected {}-byte field offsets, got {}-byte field offsets",
694 expected_field_offset_size as usize, obj.header.field_offset_size as usize
695 );
696 } else {
697 panic!("Expected object variant");
698 }
699 }
700
701 #[test]
702 fn test_variant_object_child_data_0_byte_offsets_minus_one() {
703 test_variant_object_with_large_data(10, OffsetSizeBytes::One);
704 }
705
706 #[test]
707 fn test_variant_object_256_bytes_child_data_3_byte_offsets() {
708 test_variant_object_with_large_data(256 + 1, OffsetSizeBytes::Two); // 2^8 - 2^16 elements
709 }
710
711 #[test]
712 fn test_variant_object_16777216_bytes_child_data_4_byte_offsets() {
713 test_variant_object_with_large_data(65536 + 1, OffsetSizeBytes::Three); // 2^16 - 2^24 elements
714 }
715
716 #[test]
717 fn test_variant_object_65535_bytes_child_data_2_byte_offsets() {
718 test_variant_object_with_large_data(16777216 + 1, OffsetSizeBytes::Four);
719 // 2^24
720 }
721}