1use crate::decoder::{map_bytes_to_offsets, OffsetSizeBytes};
19use crate::utils::{
20 first_byte_from_slice, overflow_error, slice_from_slice, try_binary_search_range_by,
21};
22use crate::variant::{Variant, VariantMetadata};
23
24use arrow_schema::ArrowError;
25
26const NUM_HEADER_BYTES: u32 = 1;
28
29#[derive(Debug, Clone, PartialEq)]
31pub(crate) struct VariantObjectHeader {
32 num_elements_size: OffsetSizeBytes,
33 field_id_size: OffsetSizeBytes,
34 field_offset_size: OffsetSizeBytes,
35}
36
37impl VariantObjectHeader {
38 const fn num_elements_size(&self) -> u32 {
40 self.num_elements_size as _
41 }
42 const fn field_id_size(&self) -> u32 {
43 self.field_id_size as _
44 }
45 const fn field_offset_size(&self) -> u32 {
46 self.field_offset_size as _
47 }
48
49 const fn field_ids_start_byte(&self) -> u32 {
51 NUM_HEADER_BYTES + self.num_elements_size()
52 }
53
54 pub(crate) fn try_new(header_byte: u8) -> Result<Self, ArrowError> {
55 let value_header = header_byte >> 2;
57 let field_offset_size_minus_one = value_header & 0x03; let field_id_size_minus_one = (value_header >> 2) & 0x03; let is_large = (value_header & 0x10) != 0; let num_elements_size = match is_large {
61 true => OffsetSizeBytes::Four,
62 false => OffsetSizeBytes::One,
63 };
64 Ok(Self {
65 num_elements_size,
66 field_id_size: OffsetSizeBytes::try_new(field_id_size_minus_one)?,
67 field_offset_size: OffsetSizeBytes::try_new(field_offset_size_minus_one)?,
68 })
69 }
70}
71
72#[derive(Debug, Clone)]
119pub struct VariantObject<'m, 'v> {
120 pub metadata: VariantMetadata<'m>,
121 pub value: &'v [u8],
122 header: VariantObjectHeader,
123 num_elements: u32,
124 first_field_offset_byte: u32,
125 first_value_byte: u32,
126 validated: bool,
127}
128
129const _: () = crate::utils::expect_size_of::<VariantObject>(64);
131
132impl<'m, 'v> VariantObject<'m, 'v> {
133 pub fn new(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Self {
134 Self::try_new_with_shallow_validation(metadata, value).expect("Invalid variant object")
135 }
136
137 pub fn try_new(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Result<Self, ArrowError> {
145 Self::try_new_with_shallow_validation(metadata, value)?.with_full_validation()
146 }
147
148 pub(crate) fn try_new_with_shallow_validation(
153 metadata: VariantMetadata<'m>,
154 value: &'v [u8],
155 ) -> Result<Self, ArrowError> {
156 let header_byte = first_byte_from_slice(value)?;
157 let header = VariantObjectHeader::try_new(header_byte)?;
158
159 let num_elements =
161 header
162 .num_elements_size
163 .unpack_u32_at_offset(value, NUM_HEADER_BYTES as _, 0)?;
164
165 let first_field_offset_byte = num_elements
168 .checked_mul(header.field_id_size())
169 .and_then(|n| n.checked_add(header.field_ids_start_byte()))
170 .ok_or_else(|| overflow_error("offset of variant object field offsets"))?;
171
172 let first_value_byte = num_elements
173 .checked_add(1)
174 .and_then(|n| n.checked_mul(header.field_offset_size()))
175 .and_then(|n| n.checked_add(first_field_offset_byte))
176 .ok_or_else(|| overflow_error("offset of variant object field values"))?;
177
178 let mut new_self = Self {
179 metadata,
180 value,
181 header,
182 num_elements,
183 first_field_offset_byte,
184 first_value_byte,
185 validated: false,
186 };
187
188 let last_offset = new_self
193 .get_offset(num_elements as _)?
194 .checked_add(first_value_byte)
195 .ok_or_else(|| overflow_error("variant object size"))?;
196 new_self.value = slice_from_slice(value, ..last_offset as _)?;
197 Ok(new_self)
198 }
199
200 pub fn is_fully_validated(&self) -> bool {
204 self.validated
205 }
206
207 pub fn with_full_validation(mut self) -> Result<Self, ArrowError> {
211 if !self.validated {
212 self.metadata = self.metadata.with_full_validation()?;
215
216 let field_id_buffer = slice_from_slice(
217 self.value,
218 self.header.field_ids_start_byte() as _..self.first_field_offset_byte as _,
219 )?;
220
221 let mut field_ids_iter =
222 map_bytes_to_offsets(field_id_buffer, self.header.field_id_size);
223
224 if self.metadata.is_sorted() {
226 let dictionary_size = self.metadata.len();
229
230 if let Some(mut current_id) = field_ids_iter.next() {
231 for next_id in field_ids_iter {
232 if current_id >= dictionary_size {
233 return Err(ArrowError::InvalidArgumentError(
234 "field id is not valid".to_string(),
235 ));
236 }
237
238 if next_id <= current_id {
239 return Err(ArrowError::InvalidArgumentError(
240 "field names not sorted".to_string(),
241 ));
242 }
243 current_id = next_id;
244 }
245
246 if current_id >= dictionary_size {
247 return Err(ArrowError::InvalidArgumentError(
248 "field id is not valid".to_string(),
249 ));
250 }
251 }
252 } else {
253 let mut current_field_name = match field_ids_iter.next() {
258 Some(field_id) => Some(self.metadata.get(field_id)?),
259 None => None,
260 };
261
262 for field_id in field_ids_iter {
263 let next_field_name = self.metadata.get(field_id)?;
264
265 if let Some(current_name) = current_field_name {
266 if next_field_name < current_name {
267 return Err(ArrowError::InvalidArgumentError(
268 "field names not sorted".to_string(),
269 ));
270 }
271 }
272 current_field_name = Some(next_field_name);
273 }
274 }
275
276 let field_offset_buffer = slice_from_slice(
278 self.value,
279 self.first_field_offset_byte as _..self.first_value_byte as _,
280 )?;
281 let num_offsets = field_offset_buffer.len() / self.header.field_offset_size() as usize;
282
283 let value_buffer = slice_from_slice(self.value, self.first_value_byte as _..)?;
284
285 map_bytes_to_offsets(field_offset_buffer, self.header.field_offset_size)
286 .take(num_offsets.saturating_sub(1))
287 .try_for_each(|offset| {
288 let value_bytes = slice_from_slice(value_buffer, offset..)?;
289 Variant::try_new_with_metadata(self.metadata.clone(), value_bytes)?;
290
291 Ok::<_, ArrowError>(())
292 })?;
293
294 self.validated = true;
295 }
296 Ok(self)
297 }
298
299 pub fn len(&self) -> usize {
301 self.num_elements as _
302 }
303
304 pub fn is_empty(&self) -> bool {
306 self.len() == 0
307 }
308
309 pub fn field(&self, i: usize) -> Option<Variant<'m, 'v>> {
317 (i < self.len()).then(|| {
318 self.try_field_with_shallow_validation(i)
319 .expect("Invalid object field value")
320 })
321 }
322
323 pub fn try_field(&self, i: usize) -> Result<Variant<'m, 'v>, ArrowError> {
325 self.try_field_with_shallow_validation(i)?
326 .with_full_validation()
327 }
328
329 fn try_field_with_shallow_validation(&self, i: usize) -> Result<Variant<'m, 'v>, ArrowError> {
332 let value_bytes = slice_from_slice(self.value, self.first_value_byte as _..)?;
333 let value_bytes = slice_from_slice(value_bytes, self.get_offset(i)? as _..)?;
334 Variant::try_new_with_metadata_and_shallow_validation(self.metadata.clone(), value_bytes)
335 }
336
337 fn get_offset(&self, i: usize) -> Result<u32, ArrowError> {
339 let byte_range = self.first_field_offset_byte as _..self.first_value_byte as _;
340 let field_offsets = slice_from_slice(self.value, byte_range)?;
341 self.header.field_offset_size.unpack_u32(field_offsets, i)
342 }
343
344 pub fn field_name(&self, i: usize) -> Option<&'m str> {
350 (i < self.len()).then(|| {
351 self.try_field_name(i)
352 .expect("Invalid variant object field name")
353 })
354 }
355
356 fn try_field_name(&self, i: usize) -> Result<&'m str, ArrowError> {
358 let byte_range = self.header.field_ids_start_byte() as _..self.first_field_offset_byte as _;
359 let field_id_bytes = slice_from_slice(self.value, byte_range)?;
360 let field_id = self.header.field_id_size.unpack_u32(field_id_bytes, i)?;
361 self.metadata.get(field_id as _)
362 }
363
364 pub fn iter(&self) -> impl Iterator<Item = (&'m str, Variant<'m, 'v>)> + '_ {
366 self.iter_try_with_shallow_validation()
367 .map(|result| result.expect("Invalid variant object field value"))
368 }
369
370 pub fn iter_try(
372 &self,
373 ) -> impl Iterator<Item = Result<(&'m str, Variant<'m, 'v>), ArrowError>> + '_ {
374 self.iter_try_with_shallow_validation().map(|result| {
375 let (name, value) = result?;
376 Ok((name, value.with_full_validation()?))
377 })
378 }
379
380 fn iter_try_with_shallow_validation(
383 &self,
384 ) -> impl Iterator<Item = Result<(&'m str, Variant<'m, 'v>), ArrowError>> + '_ {
385 (0..self.len()).map(|i| {
386 let field = self.try_field_with_shallow_validation(i)?;
387 Ok((self.try_field_name(i)?, field))
388 })
389 }
390
391 pub fn get(&self, name: &str) -> Option<Variant<'m, 'v>> {
395 let cmp = |i| Some(self.field_name(i)?.cmp(name));
401 let i = try_binary_search_range_by(0..self.len(), cmp)?.ok()?;
402 self.field(i)
403 }
404}
405
406impl<'m, 'v> PartialEq for VariantObject<'m, 'v> {
414 fn eq(&self, other: &Self) -> bool {
415 if self.num_elements != other.num_elements {
416 return false;
417 }
418
419 self.iter()
423 .zip(other.iter())
424 .all(|((name_a, value_a), (name_b, value_b))| name_a == name_b && value_a == value_b)
425 }
426}
427
428#[cfg(test)]
429mod tests {
430 use crate::VariantBuilder;
431
432 use super::*;
433
434 #[test]
435 fn test_variant_object_simple() {
436 let metadata_bytes = vec![
440 0b0001_0001,
441 3, 0, 6, 9, 13,
446 b'a',
447 b'c',
448 b't',
449 b'i',
450 b'v',
451 b'e',
452 b'a',
453 b'g',
454 b'e',
455 b'n',
456 b'a',
457 b'm',
458 b'e',
459 ];
460 let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
461
462 let object_value = vec![
468 0x02, 3, 0, 1, 2,
472 0, 1, 3, 9, 0x04, 0x0C,
480 42, 0x15, b'h', b'e', b'l', b'l',
482 b'o', ];
484
485 let variant_obj = VariantObject::try_new(metadata, &object_value).unwrap();
486
487 assert_eq!(variant_obj.len(), 3);
489 assert!(!variant_obj.is_empty());
490
491 let active_field = variant_obj.get("active");
493 assert!(active_field.is_some());
494 assert_eq!(active_field.unwrap().as_boolean(), Some(true));
495
496 let age_field = variant_obj.get("age");
497 assert!(age_field.is_some());
498 assert_eq!(age_field.unwrap().as_int8(), Some(42));
499
500 let name_field = variant_obj.get("name");
501 assert!(name_field.is_some());
502 assert_eq!(name_field.unwrap().as_string(), Some("hello"));
503
504 let missing_field = variant_obj.get("missing");
506 assert!(missing_field.is_none());
507
508 let missing_field_name = variant_obj.field_name(3);
509 assert!(missing_field_name.is_none());
510
511 let missing_field_name = variant_obj.field_name(300);
512 assert!(missing_field_name.is_none());
513
514 let missing_field_value = variant_obj.field(3);
515 assert!(missing_field_value.is_none());
516
517 let missing_field_value = variant_obj.field(300);
518 assert!(missing_field_value.is_none());
519
520 let fields: Vec<_> = variant_obj.iter().collect();
522 assert_eq!(fields.len(), 3);
523
524 assert_eq!(fields[0].0, "active");
526 assert_eq!(fields[0].1.as_boolean(), Some(true));
527
528 assert_eq!(fields[1].0, "age");
529 assert_eq!(fields[1].1.as_int8(), Some(42));
530
531 assert_eq!(fields[2].0, "name");
532 assert_eq!(fields[2].1.as_string(), Some("hello"));
533
534 assert_eq!(variant_obj.field_name(0), Some("active"));
537 assert_eq!(variant_obj.field(0).unwrap().as_boolean(), Some(true));
538
539 assert_eq!(variant_obj.field_name(1), Some("age"));
540 assert_eq!(variant_obj.field(1).unwrap().as_int8(), Some(42));
541
542 assert_eq!(variant_obj.field_name(2), Some("name"));
543 assert_eq!(variant_obj.field(2).unwrap().as_string(), Some("hello"));
544 }
545
546 #[test]
547 fn test_variant_object_empty_fields() {
548 let mut builder = VariantBuilder::new();
549 builder.new_object().with_field("", 42).finish();
550 let (metadata, value) = builder.finish();
551
552 let variant = Variant::try_new(&metadata, &value).unwrap();
554 let variant_obj = variant.as_object().unwrap();
555 assert_eq!(variant_obj.len(), 1);
556 assert_eq!(variant_obj.get(""), Some(Variant::from(42)));
557 }
558
559 #[test]
560 fn test_variant_object_empty() {
561 let metadata_bytes = vec![
563 0x11, 0, 0, ];
567 let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
568
569 let object_value = vec![
571 0x02, 0, 0, ];
576
577 let variant_obj = VariantObject::try_new(metadata, &object_value).unwrap();
578
579 assert_eq!(variant_obj.len(), 0);
581 assert!(variant_obj.is_empty());
582
583 let missing_field = variant_obj.get("anything");
585 assert!(missing_field.is_none());
586
587 let fields: Vec<_> = variant_obj.iter().collect();
589 assert_eq!(fields.len(), 0);
590 }
591
592 #[test]
593 fn test_variant_object_invalid_metadata_end_offset() {
594 let metadata_bytes = vec![
596 0b0001_0001, 2, 0, 3, 8, b'a',
602 b'g',
603 b'e',
604 b'n',
605 b'a',
606 b'm',
607 b'e',
608 ];
609 let err = VariantMetadata::try_new(&metadata_bytes);
610 let err = err.unwrap_err();
611 assert!(matches!(
612 err,
613 ArrowError::InvalidArgumentError(ref msg) if msg.contains("Tried to extract byte(s) ..13 from 12-byte buffer")
614 ));
615 }
616
617 #[test]
618 fn test_variant_object_invalid_end_offset() {
619 let metadata_bytes = vec![
621 0b0001_0001, 2, 0, 3, 7,
626 b'a',
627 b'g',
628 b'e',
629 b'n',
630 b'a',
631 b'm',
632 b'e',
633 ];
634 let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
635
636 let object_value = vec![
641 0x02, 2, 0, 1,
645 0, 2, 9, 0x0C,
651 42, 0x15, b'h', b'e', b'l', b'l',
653 b'o', ];
655
656 let err = VariantObject::try_new(metadata, &object_value);
657 let err = err.unwrap_err();
658 assert!(matches!(
659 err,
660 ArrowError::InvalidArgumentError(ref msg) if msg.contains("Tried to extract byte(s) ..16 from 15-byte buffer")
661 ));
662 }
663
664 fn test_variant_object_with_count(count: i32, expected_field_id_size: OffsetSizeBytes) {
665 let field_names: Vec<_> = (0..count).map(|val| val.to_string()).collect();
666 let mut builder =
667 VariantBuilder::new().with_field_names(field_names.iter().map(|s| s.as_str()));
668
669 let mut obj = builder.new_object();
670
671 for i in 0..count {
672 obj.insert(&field_names[i as usize], i);
673 }
674
675 obj.finish();
676 let (metadata, value) = builder.finish();
677 let variant = Variant::new(&metadata, &value);
678
679 if let Variant::Object(obj) = variant {
680 assert_eq!(obj.len(), count as usize);
681
682 assert_eq!(obj.get(&field_names[0]).unwrap(), Variant::Int32(0));
683 assert_eq!(
684 obj.get(&field_names[(count - 1) as usize]).unwrap(),
685 Variant::Int32(count - 1)
686 );
687 assert_eq!(
688 obj.header.field_id_size, expected_field_id_size,
689 "Expected {}-byte field IDs, got {}-byte field IDs",
690 expected_field_id_size as usize, obj.header.field_id_size as usize
691 );
692 } else {
693 panic!("Expected object variant");
694 }
695 }
696
697 #[test]
698 fn test_variant_object_257_elements() {
699 test_variant_object_with_count((1 << 8) + 1, OffsetSizeBytes::Two); }
701
702 #[test]
703 fn test_variant_object_65537_elements() {
704 test_variant_object_with_count((1 << 16) + 1, OffsetSizeBytes::Three);
705 }
707
708 #[test]
717 fn test_variant_object_small_sizes_255_elements() {
718 test_variant_object_with_count(255, OffsetSizeBytes::One);
719 }
720
721 fn test_variant_object_with_large_data(
722 data_size_per_field: usize,
723 expected_field_offset_size: OffsetSizeBytes,
724 ) {
725 let num_fields = 20;
726 let mut builder = VariantBuilder::new();
727 let mut obj = builder.new_object();
728
729 let str_val = "a".repeat(data_size_per_field);
730
731 for val in 0..num_fields {
732 let key = format!("id_{val}");
733 obj.insert(&key, str_val.as_str());
734 }
735
736 obj.finish();
737 let (metadata, value) = builder.finish();
738 let variant = Variant::new(&metadata, &value);
739
740 if let Variant::Object(obj) = variant {
741 assert_eq!(obj.len(), num_fields);
742 assert_eq!(
743 obj.header.field_offset_size, expected_field_offset_size,
744 "Expected {}-byte field offsets, got {}-byte field offsets",
745 expected_field_offset_size as usize, obj.header.field_offset_size as usize
746 );
747 } else {
748 panic!("Expected object variant");
749 }
750 }
751
752 #[test]
753 fn test_variant_object_child_data_0_byte_offsets_minus_one() {
754 test_variant_object_with_large_data(10, OffsetSizeBytes::One);
755 }
756
757 #[test]
758 fn test_variant_object_256_bytes_child_data_3_byte_offsets() {
759 test_variant_object_with_large_data(256 + 1, OffsetSizeBytes::Two); }
761
762 #[test]
763 fn test_variant_object_16777216_bytes_child_data_4_byte_offsets() {
764 test_variant_object_with_large_data(65536 + 1, OffsetSizeBytes::Three); }
766
767 #[test]
768 fn test_variant_object_65535_bytes_child_data_2_byte_offsets() {
769 test_variant_object_with_large_data(16777216 + 1, OffsetSizeBytes::Four);
770 }
772
773 #[test]
774 fn test_objects_with_same_fields_are_equal() {
775 let mut b = VariantBuilder::new();
776 let mut o = b.new_object();
777
778 o.insert("b", ());
779 o.insert("c", ());
780 o.insert("a", ());
781
782 o.finish();
783
784 let (m, v) = b.finish();
785
786 let v1 = Variant::try_new(&m, &v).unwrap();
787 let v2 = Variant::try_new(&m, &v).unwrap();
788
789 assert_eq!(v1, v2);
790 }
791
792 #[test]
793 fn test_same_objects_with_different_builder_are_equal() {
794 let mut b = VariantBuilder::new();
795 let mut o = b.new_object();
796
797 o.insert("a", ());
798 o.insert("b", false);
799
800 o.finish();
801 let (m, v) = b.finish();
802
803 let v1 = Variant::try_new(&m, &v).unwrap();
804
805 let mut b = VariantBuilder::new();
806 let mut o = b.new_object();
807
808 o.insert("a", ());
809 o.insert("b", false);
810
811 o.finish();
812 let (m, v) = b.finish();
813
814 let v2 = Variant::try_new(&m, &v).unwrap();
815
816 assert_eq!(v1, v2);
817 }
818
819 #[test]
820 fn test_objects_with_different_values_are_not_equal() {
821 let mut b = VariantBuilder::new();
822 let mut o = b.new_object();
823
824 o.insert("a", ());
825 o.insert("b", 4.3);
826
827 o.finish();
828
829 let (m, v) = b.finish();
830
831 let v1 = Variant::try_new(&m, &v).unwrap();
832
833 let mut b = VariantBuilder::new();
835 let mut o = b.new_object();
836
837 o.insert("a", ());
838 let mut inner_o = o.new_object("b");
839 inner_o.insert("a", 3.3);
840 inner_o.finish();
841 o.finish();
842
843 let (m, v) = b.finish();
844
845 let v2 = Variant::try_new(&m, &v).unwrap();
846
847 let m1 = v1.metadata();
848 let m2 = v2.metadata();
849
850 assert_eq!(m1, m2);
852
853 assert_ne!(v1, v2);
855 }
856
857 #[test]
858 fn test_objects_with_different_field_names_are_not_equal() {
859 let mut b = VariantBuilder::new();
860 let mut o = b.new_object();
861
862 o.insert("a", ());
863 o.insert("b", 4.3);
864
865 o.finish();
866
867 let (m, v) = b.finish();
868
869 let v1 = Variant::try_new(&m, &v).unwrap();
870
871 let mut b = VariantBuilder::new();
873 let mut o = b.new_object();
874
875 o.insert("aardvark", ());
876 o.insert("barracuda", 3.3);
877
878 o.finish();
879
880 let (m, v) = b.finish();
881 let v2 = Variant::try_new(&m, &v).unwrap();
882
883 assert_ne!(v1, v2);
884 }
885
886 #[test]
887 fn test_objects_with_different_insertion_order_are_equal() {
888 let mut b = VariantBuilder::new();
889 let mut o = b.new_object();
890
891 o.insert("b", false);
892 o.insert("a", ());
893
894 o.finish();
895
896 let (m, v) = b.finish();
897
898 let v1 = Variant::try_new(&m, &v).unwrap();
899 assert!(!v1.metadata().is_sorted());
900
901 let mut b = VariantBuilder::new().with_field_names(["b", "a"]);
904 let mut o = b.new_object();
905
906 o.insert("a", ());
907 o.insert("b", false);
908
909 o.finish();
910
911 let (m, v) = b.finish();
912
913 let v2 = Variant::try_new(&m, &v).unwrap();
914
915 assert!(!v2.metadata().is_sorted());
917
918 assert_eq!(v1, v2);
919 }
920
921 #[test]
922 fn test_objects_with_differing_metadata_are_equal() {
923 let mut b = VariantBuilder::new();
924 let mut o = b.new_object();
925
926 o.insert("a", ());
927 o.insert("b", 4.3);
928
929 o.finish();
930
931 let (meta1, value1) = b.finish();
932
933 let v1 = Variant::try_new(&meta1, &value1).unwrap();
934 assert!(v1.metadata().is_sorted());
936
937 let mut b = VariantBuilder::new().with_field_names(["d", "c", "b", "a"]);
939 let mut o = b.new_object();
940
941 o.insert("b", 4.3);
942 o.insert("a", ());
943
944 o.finish();
945
946 let (meta2, value2) = b.finish();
947
948 let v2 = Variant::try_new(&meta2, &value2).unwrap();
949 assert!(!v2.metadata().is_sorted());
951
952 assert_ne!(v1.metadata(), v2.metadata());
954
955 assert_eq!(v1, v2);
957 }
958
959 #[test]
960 fn test_compare_object_with_unsorted_dictionary_vs_sorted_dictionary() {
961 let mut b = VariantBuilder::new();
963 let mut o = b.new_object();
964
965 o.insert("a", false);
966 o.insert("b", false);
967
968 o.finish();
969
970 let (m, v) = b.finish();
971
972 let v1 = Variant::try_new(&m, &v).unwrap();
973
974 let metadata_bytes = vec![
977 0b0000_0001,
978 3, 0, 1, 2, 3,
983 b'a',
984 b'b',
985 b'a',
986 ];
987 let m = VariantMetadata::try_new(&metadata_bytes).unwrap();
988 assert!(!m.is_sorted());
989
990 let v2 = Variant::new_with_metadata(m, &v);
991 assert_eq!(v1, v2);
992 }
993}