Skip to main content

parquet_variant/
builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17use crate::decoder::{OffsetSizeBytes, VariantBasicType, VariantPrimitiveType};
18use crate::{
19    ShortString, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16, VariantList,
20    VariantMetadata, VariantObject,
21};
22use arrow_schema::ArrowError;
23use chrono::Timelike;
24use uuid::Uuid;
25
26mod list;
27mod metadata;
28mod object;
29
30pub use list::*;
31pub use metadata::*;
32pub use object::*;
33
34pub(crate) const BASIC_TYPE_BITS: u8 = 2;
35pub(crate) const UNIX_EPOCH_DATE: chrono::NaiveDate =
36    chrono::NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
37
38fn primitive_header(primitive_type: VariantPrimitiveType) -> u8 {
39    (primitive_type as u8) << 2 | VariantBasicType::Primitive as u8
40}
41
42fn short_string_header(len: usize) -> u8 {
43    (len as u8) << 2 | VariantBasicType::ShortString as u8
44}
45
46pub(crate) fn int_size(v: usize) -> OffsetSizeBytes {
47    match v {
48        0..=0xFF => OffsetSizeBytes::One,
49        0x100..=0xFFFF => OffsetSizeBytes::Two,
50        0x10000..=0xFFFFFF => OffsetSizeBytes::Three,
51        _ => OffsetSizeBytes::Four,
52    }
53}
54
55/// Wrapper around a `Vec<u8>` that provides methods for appending
56/// primitive values, variant types, and metadata.
57///
58/// This is used internally by the builders to construct the
59/// the `value` field for [`Variant`] values.
60///
61/// You can reuse an existing `Vec<u8>` by using the `from` impl
62#[derive(Debug, Default)]
63pub struct ValueBuilder(Vec<u8>);
64
65impl ValueBuilder {
66    /// Construct a ValueBuffer that will write to a new underlying `Vec`
67    pub fn new() -> Self {
68        Default::default()
69    }
70}
71
72/// Macro to generate the match statement for each append_variant, try_append_variant, and
73/// append_variant_bytes -- they each have slightly different handling for object and list handling.
74macro_rules! variant_append_value {
75    ($builder:expr, $value:expr, $object_pat:pat => $object_arm:expr, $list_pat:pat => $list_arm:expr) => {
76        match $value {
77            Variant::Null => $builder.append_null(),
78            Variant::BooleanTrue => $builder.append_bool(true),
79            Variant::BooleanFalse => $builder.append_bool(false),
80            Variant::Int8(v) => $builder.append_int8(v),
81            Variant::Int16(v) => $builder.append_int16(v),
82            Variant::Int32(v) => $builder.append_int32(v),
83            Variant::Int64(v) => $builder.append_int64(v),
84            Variant::Date(v) => $builder.append_date(v),
85            Variant::Time(v) => $builder.append_time_micros(v),
86            Variant::TimestampMicros(v) => $builder.append_timestamp_micros(v),
87            Variant::TimestampNtzMicros(v) => $builder.append_timestamp_ntz_micros(v),
88            Variant::TimestampNanos(v) => $builder.append_timestamp_nanos(v),
89            Variant::TimestampNtzNanos(v) => $builder.append_timestamp_ntz_nanos(v),
90            Variant::Decimal4(decimal4) => $builder.append_decimal4(decimal4),
91            Variant::Decimal8(decimal8) => $builder.append_decimal8(decimal8),
92            Variant::Decimal16(decimal16) => $builder.append_decimal16(decimal16),
93            Variant::Float(v) => $builder.append_float(v),
94            Variant::Double(v) => $builder.append_double(v),
95            Variant::Binary(v) => $builder.append_binary(v),
96            Variant::String(s) => $builder.append_string(s),
97            Variant::ShortString(s) => $builder.append_short_string(s),
98            Variant::Uuid(v) => $builder.append_uuid(v),
99            $object_pat => $object_arm,
100            $list_pat => $list_arm,
101        }
102    };
103}
104
105impl ValueBuilder {
106    fn append_u8(&mut self, term: u8) {
107        self.0.push(term);
108    }
109
110    fn append_slice(&mut self, other: &[u8]) {
111        self.0.extend_from_slice(other);
112    }
113
114    fn append_primitive_header(&mut self, primitive_type: VariantPrimitiveType) {
115        self.0.push(primitive_header(primitive_type));
116    }
117
118    /// Returns the underlying buffer, consuming self
119    pub fn into_inner(self) -> Vec<u8> {
120        self.0
121    }
122
123    pub(crate) fn inner_mut(&mut self) -> &mut Vec<u8> {
124        &mut self.0
125    }
126
127    // Variant types below
128
129    fn append_null(&mut self) {
130        self.append_primitive_header(VariantPrimitiveType::Null);
131    }
132
133    fn append_bool(&mut self, value: bool) {
134        let primitive_type = if value {
135            VariantPrimitiveType::BooleanTrue
136        } else {
137            VariantPrimitiveType::BooleanFalse
138        };
139        self.append_primitive_header(primitive_type);
140    }
141
142    fn append_int8(&mut self, value: i8) {
143        self.append_primitive_header(VariantPrimitiveType::Int8);
144        self.append_u8(value as u8);
145    }
146
147    fn append_int16(&mut self, value: i16) {
148        self.append_primitive_header(VariantPrimitiveType::Int16);
149        self.append_slice(&value.to_le_bytes());
150    }
151
152    fn append_int32(&mut self, value: i32) {
153        self.append_primitive_header(VariantPrimitiveType::Int32);
154        self.append_slice(&value.to_le_bytes());
155    }
156
157    fn append_int64(&mut self, value: i64) {
158        self.append_primitive_header(VariantPrimitiveType::Int64);
159        self.append_slice(&value.to_le_bytes());
160    }
161
162    fn append_float(&mut self, value: f32) {
163        self.append_primitive_header(VariantPrimitiveType::Float);
164        self.append_slice(&value.to_le_bytes());
165    }
166
167    fn append_double(&mut self, value: f64) {
168        self.append_primitive_header(VariantPrimitiveType::Double);
169        self.append_slice(&value.to_le_bytes());
170    }
171
172    fn append_date(&mut self, value: chrono::NaiveDate) {
173        self.append_primitive_header(VariantPrimitiveType::Date);
174        let days_since_epoch = value.signed_duration_since(UNIX_EPOCH_DATE).num_days() as i32;
175        self.append_slice(&days_since_epoch.to_le_bytes());
176    }
177
178    fn append_timestamp_micros(&mut self, value: chrono::DateTime<chrono::Utc>) {
179        self.append_primitive_header(VariantPrimitiveType::TimestampMicros);
180        let micros = value.timestamp_micros();
181        self.append_slice(&micros.to_le_bytes());
182    }
183
184    fn append_timestamp_ntz_micros(&mut self, value: chrono::NaiveDateTime) {
185        self.append_primitive_header(VariantPrimitiveType::TimestampNtzMicros);
186        let micros = value.and_utc().timestamp_micros();
187        self.append_slice(&micros.to_le_bytes());
188    }
189
190    fn append_time_micros(&mut self, value: chrono::NaiveTime) {
191        self.append_primitive_header(VariantPrimitiveType::Time);
192        let micros_from_midnight = value.num_seconds_from_midnight() as u64 * 1_000_000
193            + value.nanosecond() as u64 / 1_000;
194        self.append_slice(&micros_from_midnight.to_le_bytes());
195    }
196
197    fn append_timestamp_nanos(&mut self, value: chrono::DateTime<chrono::Utc>) {
198        self.append_primitive_header(VariantPrimitiveType::TimestampNanos);
199        let nanos = value.timestamp_nanos_opt().unwrap();
200        self.append_slice(&nanos.to_le_bytes());
201    }
202
203    fn append_timestamp_ntz_nanos(&mut self, value: chrono::NaiveDateTime) {
204        self.append_primitive_header(VariantPrimitiveType::TimestampNtzNanos);
205        let nanos = value.and_utc().timestamp_nanos_opt().unwrap();
206        self.append_slice(&nanos.to_le_bytes());
207    }
208
209    fn append_uuid(&mut self, value: Uuid) {
210        self.append_primitive_header(VariantPrimitiveType::Uuid);
211        self.append_slice(&value.into_bytes());
212    }
213
214    fn append_decimal4(&mut self, decimal4: VariantDecimal4) {
215        self.append_primitive_header(VariantPrimitiveType::Decimal4);
216        self.append_u8(decimal4.scale());
217        self.append_slice(&decimal4.integer().to_le_bytes());
218    }
219
220    fn append_decimal8(&mut self, decimal8: VariantDecimal8) {
221        self.append_primitive_header(VariantPrimitiveType::Decimal8);
222        self.append_u8(decimal8.scale());
223        self.append_slice(&decimal8.integer().to_le_bytes());
224    }
225
226    fn append_decimal16(&mut self, decimal16: VariantDecimal16) {
227        self.append_primitive_header(VariantPrimitiveType::Decimal16);
228        self.append_u8(decimal16.scale());
229        self.append_slice(&decimal16.integer().to_le_bytes());
230    }
231
232    fn append_binary(&mut self, value: &[u8]) {
233        self.append_primitive_header(VariantPrimitiveType::Binary);
234        self.append_slice(&(value.len() as u32).to_le_bytes());
235        self.append_slice(value);
236    }
237
238    fn append_short_string(&mut self, value: ShortString) {
239        let inner = value.0;
240        self.append_u8(short_string_header(inner.len()));
241        self.append_slice(inner.as_bytes());
242    }
243
244    fn append_string(&mut self, value: &str) {
245        self.append_primitive_header(VariantPrimitiveType::String);
246        self.append_slice(&(value.len() as u32).to_le_bytes());
247        self.append_slice(value.as_bytes());
248    }
249
250    fn append_object<S: BuilderSpecificState>(state: ParentState<'_, S>, obj: VariantObject) {
251        let mut object_builder = ObjectBuilder::new(state, false);
252        object_builder.extend(obj.iter());
253        object_builder.finish();
254    }
255
256    fn try_append_object<S: BuilderSpecificState>(
257        state: ParentState<'_, S>,
258        obj: VariantObject,
259    ) -> Result<(), ArrowError> {
260        let mut object_builder = ObjectBuilder::new(state, false);
261
262        for res in obj.iter_try() {
263            let (field_name, value) = res?;
264            object_builder.try_insert(field_name, value)?;
265        }
266
267        object_builder.finish();
268        Ok(())
269    }
270
271    fn append_list<S: BuilderSpecificState>(state: ParentState<'_, S>, list: VariantList) {
272        let mut list_builder = ListBuilder::new(state, false);
273        list_builder.extend(list.iter());
274        list_builder.finish();
275    }
276
277    fn try_append_list<S: BuilderSpecificState>(
278        state: ParentState<'_, S>,
279        list: VariantList,
280    ) -> Result<(), ArrowError> {
281        let mut list_builder = ListBuilder::new(state, false);
282        for res in list.iter_try() {
283            let value = res?;
284            list_builder.try_append_value(value)?;
285        }
286
287        list_builder.finish();
288
289        Ok(())
290    }
291
292    /// Returns the current size of the underlying buffer
293    pub fn offset(&self) -> usize {
294        self.0.len()
295    }
296
297    /// Appends a variant to the builder.
298    ///
299    /// # Panics
300    ///
301    /// This method will panic if the variant contains duplicate field names in objects
302    /// when validation is enabled. For a fallible version, use [`ValueBuilder::try_append_variant`]
303    pub fn append_variant<S: BuilderSpecificState>(
304        mut state: ParentState<'_, S>,
305        variant: Variant<'_, '_>,
306    ) {
307        variant_append_value!(
308            state.value_builder(),
309            variant,
310            Variant::Object(obj) => return Self::append_object(state, obj),
311            Variant::List(list) => return Self::append_list(state, list)
312        );
313        state.finish();
314    }
315
316    /// Tries to append a variant to the provided [`ParentState`] instance.
317    ///
318    /// The attempt fails if the variant contains duplicate field names in objects when validation
319    /// is enabled.
320    pub fn try_append_variant<S: BuilderSpecificState>(
321        mut state: ParentState<'_, S>,
322        variant: Variant<'_, '_>,
323    ) -> Result<(), ArrowError> {
324        variant_append_value!(
325            state.value_builder(),
326            variant,
327            Variant::Object(obj) => return Self::try_append_object(state, obj),
328            Variant::List(list) => return Self::try_append_list(state, list)
329        );
330        state.finish();
331        Ok(())
332    }
333
334    /// Appends a variant to the buffer by copying raw bytes when possible.
335    ///
336    /// For objects and lists, this directly copies their underlying byte representation instead of
337    /// performing a logical copy and without touching the metadata builder. For other variant
338    /// types, this falls back to the standard append behavior.
339    ///
340    /// The caller must ensure that the metadata dictionary is already built and correct for
341    /// any objects or lists being appended.
342    pub fn append_variant_bytes<S: BuilderSpecificState>(
343        mut state: ParentState<'_, S>,
344        variant: Variant<'_, '_>,
345    ) {
346        let builder = state.value_builder();
347        variant_append_value!(
348            builder,
349            variant,
350            Variant::Object(obj) => builder.append_slice(obj.value),
351            Variant::List(list) => builder.append_slice(list.value)
352        );
353        state.finish();
354    }
355}
356
357/// A trait for managing state specific to different builder types.
358pub trait BuilderSpecificState: std::fmt::Debug {
359    /// Called by [`ParentState::finish`] to apply any pending builder-specific changes.
360    ///
361    /// The provided implementation does nothing by default.
362    ///
363    /// Parameters:
364    /// - `metadata_builder`: The metadata builder that was used
365    /// - `value_builder`: The value builder that was used
366    fn finish(
367        &mut self,
368        _metadata_builder: &mut dyn MetadataBuilder,
369        _value_builder: &mut ValueBuilder,
370    ) {
371    }
372
373    /// Called by [`ParentState::drop`] to revert any changes that were eagerly applied, if
374    /// [`ParentState::finish`] was never invoked.
375    ///
376    /// The provided implementation does nothing by default.
377    ///
378    /// The base [`ParentState`] will handle rolling back the value and metadata builders,
379    /// but builder-specific state may need to revert its own changes.
380    fn rollback(&mut self) {}
381}
382
383/// Empty no-op implementation for top-level variant building
384impl BuilderSpecificState for () {}
385
386/// Tracks information needed to correctly finalize a nested builder.
387///
388/// A child builder has no effect on its parent unless/until its `finalize` method is called, at
389/// which point the child appends the new value to the parent. As a (desirable) side effect,
390/// creating a parent state instance captures mutable references to a subset of the parent's fields,
391/// rendering the parent object completely unusable until the parent state goes out of scope. This
392/// ensures that at most one child builder can exist at a time.
393///
394/// The redundancy in `value_builder` and `metadata_builder` is because all the references come from
395/// the parent, and we cannot "split" a mutable reference across two objects (parent state and the
396/// child builder that uses it). So everything has to be here.
397#[derive(Debug)]
398pub struct ParentState<'a, S: BuilderSpecificState> {
399    pub(crate) value_builder: &'a mut ValueBuilder,
400    pub(crate) saved_value_builder_offset: usize,
401    pub(crate) metadata_builder: &'a mut dyn MetadataBuilder,
402    pub(crate) saved_metadata_builder_dict_size: usize,
403    pub(crate) builder_state: S,
404    pub(crate) finished: bool,
405}
406
407impl<'a, S: BuilderSpecificState> ParentState<'a, S> {
408    /// Creates a new ParentState instance. The value and metadata builder
409    /// state is checkpointed and will roll back on drop, unless [`Self::finish`] is called. The
410    /// builder-specific state is governed by its own `finish` and `rollback` calls.
411    pub fn new(
412        value_builder: &'a mut ValueBuilder,
413        metadata_builder: &'a mut dyn MetadataBuilder,
414        builder_state: S,
415    ) -> Self {
416        Self {
417            saved_value_builder_offset: value_builder.offset(),
418            value_builder,
419            saved_metadata_builder_dict_size: metadata_builder.num_field_names(),
420            metadata_builder,
421            builder_state,
422            finished: false,
423        }
424    }
425
426    /// Marks the insertion as having succeeded and invokes
427    /// [`BuilderSpecificState::finish`]. Internal state will no longer roll back on drop.
428    pub fn finish(&mut self) {
429        self.builder_state
430            .finish(self.metadata_builder, self.value_builder);
431        self.finished = true
432    }
433
434    // Rolls back value and metadata builder changes and invokes [`BuilderSpecificState::rollback`].
435    fn rollback(&mut self) {
436        if self.finished {
437            return;
438        }
439
440        self.value_builder
441            .inner_mut()
442            .truncate(self.saved_value_builder_offset);
443        self.metadata_builder
444            .truncate_field_names(self.saved_metadata_builder_dict_size);
445        self.builder_state.rollback();
446    }
447
448    // Useful because e.g. `let b = self.value_builder;` fails compilation.
449    pub(crate) fn value_builder(&mut self) -> &mut ValueBuilder {
450        self.value_builder
451    }
452
453    // Useful because e.g. `let b = self.metadata_builder;` fails compilation.
454    pub(crate) fn metadata_builder(&mut self) -> &mut dyn MetadataBuilder {
455        self.metadata_builder
456    }
457}
458
459impl<'a> ParentState<'a, ()> {
460    /// Creates a new instance suitable for a top-level variant builder
461    /// (e.g. [`VariantBuilder`]). The value and metadata builder state is checkpointed and will
462    /// roll back on drop, unless [`Self::finish`] is called.
463    pub fn variant(
464        value_builder: &'a mut ValueBuilder,
465        metadata_builder: &'a mut dyn MetadataBuilder,
466    ) -> Self {
467        Self::new(value_builder, metadata_builder, ())
468    }
469}
470
471/// Automatically rolls back any unfinished `ParentState`.
472impl<S: BuilderSpecificState> Drop for ParentState<'_, S> {
473    fn drop(&mut self) {
474        self.rollback()
475    }
476}
477
478/// Top level builder for [`Variant`] values
479///
480/// # Example: create a Primitive Int8
481/// ```
482/// # use parquet_variant::{Variant, VariantBuilder};
483/// let mut builder = VariantBuilder::new();
484/// builder.append_value(Variant::Int8(42));
485/// // Finish the builder to get the metadata and value
486/// let (metadata, value) = builder.finish();
487/// // use the Variant API to verify the result
488/// let variant = Variant::try_new(&metadata, &value).unwrap();
489/// assert_eq!(variant, Variant::Int8(42));
490/// ```
491///
492/// # Example: Create a [`Variant::Object`]
493///
494/// This example shows how to create an object with two fields:
495/// ```json
496/// {
497///  "first_name": "Jiaying",
498///  "last_name": "Li"
499/// }
500/// ```
501///
502/// ```
503/// # use parquet_variant::{Variant, VariantBuilder};
504/// let mut builder = VariantBuilder::new();
505/// // Create an object builder that will write fields to the object
506/// let mut object_builder = builder.new_object();
507/// object_builder.insert("first_name", "Jiaying");
508/// object_builder.insert("last_name", "Li");
509/// object_builder.finish(); // call finish to finalize the object
510/// // Finish the builder to get the metadata and value
511/// let (metadata, value) = builder.finish();
512/// // use the Variant API to verify the result
513/// let variant = Variant::try_new(&metadata, &value).unwrap();
514/// let variant_object = variant.as_object().unwrap();
515/// assert_eq!(
516///   variant_object.get("first_name"),
517///   Some(Variant::from("Jiaying"))
518/// );
519/// assert_eq!(
520///   variant_object.get("last_name"),
521///   Some(Variant::from("Li"))
522/// );
523/// ```
524///
525///
526/// You can also use the [`ObjectBuilder::with_field`] to add fields to the
527/// object
528/// ```
529/// # use parquet_variant::{Variant, VariantBuilder};
530/// // build the same object as above
531/// let mut builder = VariantBuilder::new();
532/// builder.new_object()
533///   .with_field("first_name", "Jiaying")
534///   .with_field("last_name", "Li")
535///   .finish();
536/// let (metadata, value) = builder.finish();
537/// let variant = Variant::try_new(&metadata, &value).unwrap();
538/// let variant_object = variant.as_object().unwrap();
539/// assert_eq!(
540///   variant_object.get("first_name"),
541///   Some(Variant::from("Jiaying"))
542/// );
543/// assert_eq!(
544///   variant_object.get("last_name"),
545///   Some(Variant::from("Li"))
546/// );
547/// ```
548/// # Example: Create a [`Variant::List`] (an Array)
549///
550/// This example shows how to create an array of integers: `[1, 2, 3]`.
551/// ```
552///  # use parquet_variant::{Variant, VariantBuilder};
553///  let mut builder = VariantBuilder::new();
554///  // Create a builder that will write elements to the list
555///  let mut list_builder = builder.new_list();
556///  list_builder.append_value(1i8);
557///  list_builder.append_value(2i8);
558///  list_builder.append_value(3i8);
559/// // call finish to finalize the list
560///  list_builder.finish();
561/// // Finish the builder to get the metadata and value
562/// let (metadata, value) = builder.finish();
563/// // use the Variant API to verify the result
564/// let variant = Variant::try_new(&metadata, &value).unwrap();
565/// let variant_list = variant.as_list().unwrap();
566/// // Verify the list contents
567/// assert_eq!(variant_list.get(0).unwrap(), Variant::Int8(1));
568/// assert_eq!(variant_list.get(1).unwrap(), Variant::Int8(2));
569/// assert_eq!(variant_list.get(2).unwrap(), Variant::Int8(3));
570/// ```
571///
572/// You can also use the [`ListBuilder::with_value`] to append values to the
573/// list.
574/// ```
575///  # use parquet_variant::{Variant, VariantBuilder};
576///  let mut builder = VariantBuilder::new();
577///  builder.new_list()
578///      .with_value(1i8)
579///      .with_value(2i8)
580///      .with_value(3i8)
581///      .finish();
582/// let (metadata, value) = builder.finish();
583/// let variant = Variant::try_new(&metadata, &value).unwrap();
584/// let variant_list = variant.as_list().unwrap();
585/// assert_eq!(variant_list.get(0).unwrap(), Variant::Int8(1));
586/// assert_eq!(variant_list.get(1).unwrap(), Variant::Int8(2));
587/// assert_eq!(variant_list.get(2).unwrap(), Variant::Int8(3));
588/// ```
589///
590/// # Example: [`Variant::List`] of  [`Variant::Object`]s
591///
592/// This example shows how to create an list of objects:
593/// ```json
594/// [
595///   {
596///      "id": 1,
597///      "type": "Cauliflower"
598///   },
599///   {
600///      "id": 2,
601///      "type": "Beets"
602///   }
603/// ]
604/// ```
605/// ```
606/// use parquet_variant::{Variant, VariantBuilder};
607/// let mut builder = VariantBuilder::new();
608///
609/// // Create a builder that will write elements to the list
610/// let mut list_builder = builder.new_list();
611///
612/// {
613///     let mut object_builder = list_builder.new_object();
614///     object_builder.insert("id", 1);
615///     object_builder.insert("type", "Cauliflower");
616///     object_builder.finish();
617/// }
618///
619/// {
620///     let mut object_builder = list_builder.new_object();
621///     object_builder.insert("id", 2);
622///     object_builder.insert("type", "Beets");
623///     object_builder.finish();
624/// }
625///
626/// list_builder.finish();
627/// // Finish the builder to get the metadata and value
628/// let (metadata, value) = builder.finish();
629/// // use the Variant API to verify the result
630/// let variant = Variant::try_new(&metadata, &value).unwrap();
631/// let variant_list = variant.as_list().unwrap();
632///
633///
634/// let obj1_variant = variant_list.get(0).unwrap();
635/// let obj1 = obj1_variant.as_object().unwrap();
636/// assert_eq!(
637///     obj1.get("id"),
638///     Some(Variant::from(1))
639/// );
640/// assert_eq!(
641///     obj1.get("type"),
642///     Some(Variant::from("Cauliflower"))
643/// );
644///
645/// let obj2_variant = variant_list.get(1).unwrap();
646/// let obj2 = obj2_variant.as_object().unwrap();
647///
648/// assert_eq!(
649///     obj2.get("id"),
650///     Some(Variant::from(2))
651/// );
652/// assert_eq!(
653///     obj2.get("type"),
654///     Some(Variant::from("Beets"))
655/// );
656///
657/// ```
658/// # Example: Unique Field Validation
659///
660/// This example shows how enabling unique field validation will cause an error
661/// if the same field is inserted more than once.
662/// ```
663/// # use parquet_variant::VariantBuilder;
664/// #
665/// let mut builder = VariantBuilder::new().with_validate_unique_fields(true);
666///
667/// // When validation is enabled, try_with_field will return an error
668/// let result = builder
669///     .new_object()
670///     .with_field("a", 1)
671///     .try_with_field("a", 2);
672/// assert!(result.is_err());
673/// ```
674///
675/// # Example: Sorted dictionaries
676///
677/// This example shows how to create a [`VariantBuilder`] with a pre-sorted field dictionary
678/// to improve field access performance when reading [`Variant`] objects.
679///
680/// You can use [`VariantBuilder::with_field_names`] to add multiple field names at once:
681/// ```
682/// use parquet_variant::{Variant, VariantBuilder};
683/// let mut builder = VariantBuilder::new()
684///     .with_field_names(["age", "name", "score"].into_iter());
685///
686/// let mut obj = builder.new_object();
687/// obj.insert("name", "Alice");
688/// obj.insert("age", 30);
689/// obj.insert("score", 95.5);
690/// obj.finish();
691///
692/// let (metadata, value) = builder.finish();
693/// let variant = Variant::try_new(&metadata, &value).unwrap();
694/// ```
695///
696/// Alternatively, you can use [`VariantBuilder::add_field_name`] to add field names one by one:
697/// ```
698/// use parquet_variant::{Variant, VariantBuilder};
699/// let mut builder = VariantBuilder::new();
700/// builder.add_field_name("age"); // field id = 0
701/// builder.add_field_name("name"); // field id = 1
702/// builder.add_field_name("score"); // field id = 2
703///
704/// let mut obj = builder.new_object();
705/// obj.insert("name", "Bob"); // field id = 3
706/// obj.insert("age", 25);
707/// obj.insert("score", 88.0);
708/// obj.finish();
709///
710/// let (metadata, value) = builder.finish();
711/// let variant = Variant::try_new(&metadata, &value).unwrap();
712/// ```
713#[derive(Default, Debug)]
714pub struct VariantBuilder {
715    value_builder: ValueBuilder,
716    metadata_builder: WritableMetadataBuilder,
717    validate_unique_fields: bool,
718}
719
720impl VariantBuilder {
721    /// Create a new VariantBuilder with new underlying buffers
722    pub fn new() -> Self {
723        Self {
724            value_builder: ValueBuilder::new(),
725            metadata_builder: WritableMetadataBuilder::default(),
726            validate_unique_fields: false,
727        }
728    }
729
730    /// Create a new VariantBuilder with pre-existing [`VariantMetadata`].
731    pub fn with_metadata(mut self, metadata: VariantMetadata) -> Self {
732        self.metadata_builder.extend(metadata.iter());
733
734        self
735    }
736
737    /// Enables validation of unique field keys in nested objects.
738    ///
739    /// This setting is propagated to all [`ObjectBuilder`]s created through this [`VariantBuilder`]
740    /// (including via any [`ListBuilder`]), and causes [`ObjectBuilder::finish()`] to return
741    /// an error if duplicate keys were inserted.
742    pub fn with_validate_unique_fields(mut self, validate_unique_fields: bool) -> Self {
743        self.validate_unique_fields = validate_unique_fields;
744        self
745    }
746
747    /// This method pre-populates the field name directory in the Variant metadata with
748    /// the specific field names, in order.
749    ///
750    /// You can use this to pre-populate a [`VariantBuilder`] with a sorted dictionary if you
751    /// know the field names beforehand. Sorted dictionaries can accelerate field access when
752    /// reading [`Variant`]s.
753    pub fn with_field_names<'a>(mut self, field_names: impl IntoIterator<Item = &'a str>) -> Self {
754        self.metadata_builder.extend(field_names);
755
756        self
757    }
758
759    /// Builder-style API for appending a value to the list and returning self to enable method chaining.
760    ///
761    /// # Panics
762    ///
763    /// This method will panic if the variant contains duplicate field names in objects
764    /// when validation is enabled. For a fallible version, use [`ListBuilder::try_with_value`].
765    pub fn with_value<'m, 'd, T: Into<Variant<'m, 'd>>>(mut self, value: T) -> Self {
766        self.append_value(value);
767        self
768    }
769
770    /// Builder-style API for appending a value to the list and returns self for method chaining.
771    ///
772    /// This is the fallible version of [`ListBuilder::with_value`].
773    pub fn try_with_value<'m, 'd, T: Into<Variant<'m, 'd>>>(
774        mut self,
775        value: T,
776    ) -> Result<Self, ArrowError> {
777        self.try_append_value(value)?;
778        Ok(self)
779    }
780
781    /// This method reserves capacity for field names in the Variant metadata,
782    /// which can improve performance when you know the approximate number of unique field
783    /// names that will be used across all objects in the [`Variant`].
784    pub fn reserve(&mut self, capacity: usize) {
785        self.metadata_builder.field_names.reserve(capacity);
786    }
787
788    /// Adds a single field name to the field name directory in the Variant metadata.
789    ///
790    /// This method does the same thing as [`VariantBuilder::with_field_names`] but adds one field name at a time.
791    pub fn add_field_name(&mut self, field_name: &str) {
792        self.metadata_builder.upsert_field_name(field_name);
793    }
794
795    /// Create an [`ListBuilder`] for creating [`Variant::List`] values.
796    ///
797    /// See the examples on [`VariantBuilder`] for usage.
798    pub fn new_list(&mut self) -> ListBuilder<'_, ()> {
799        let parent_state =
800            ParentState::variant(&mut self.value_builder, &mut self.metadata_builder);
801        ListBuilder::new(parent_state, self.validate_unique_fields)
802    }
803
804    /// Create an [`ObjectBuilder`] for creating [`Variant::Object`] values.
805    ///
806    /// See the examples on [`VariantBuilder`] for usage.
807    pub fn new_object(&mut self) -> ObjectBuilder<'_, ()> {
808        let parent_state =
809            ParentState::variant(&mut self.value_builder, &mut self.metadata_builder);
810        ObjectBuilder::new(parent_state, self.validate_unique_fields)
811    }
812
813    /// Append a value to the builder.
814    ///
815    /// # Panics
816    ///
817    /// This method will panic if the variant contains duplicate field names in objects
818    /// when validation is enabled. For a fallible version, use [`VariantBuilder::try_append_value`]
819    ///
820    /// # Example
821    /// ```
822    /// # use parquet_variant::{Variant, VariantBuilder};
823    /// let mut builder = VariantBuilder::new();
824    /// // most primitive types can be appended directly as they implement `Into<Variant>`
825    /// builder.append_value(42i8);
826    /// ```
827    pub fn append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) {
828        let state = ParentState::variant(&mut self.value_builder, &mut self.metadata_builder);
829        ValueBuilder::append_variant(state, value.into())
830    }
831
832    /// Append a value to the builder.
833    pub fn try_append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(
834        &mut self,
835        value: T,
836    ) -> Result<(), ArrowError> {
837        let state = ParentState::variant(&mut self.value_builder, &mut self.metadata_builder);
838        ValueBuilder::try_append_variant(state, value.into())
839    }
840
841    /// Appends a variant value to the builder by copying raw bytes when possible.
842    ///
843    /// For objects and lists, this directly copies their underlying byte representation instead of
844    /// performing a logical copy and without touching the metadata builder. For other variant
845    /// types, this falls back to the standard append behavior.
846    ///
847    /// The caller must ensure that the metadata dictionary entries are already built and correct for
848    /// any objects or lists being appended.
849    pub fn append_value_bytes<'m, 'd>(&mut self, value: impl Into<Variant<'m, 'd>>) {
850        let state = ParentState::variant(&mut self.value_builder, &mut self.metadata_builder);
851        ValueBuilder::append_variant_bytes(state, value.into());
852    }
853
854    /// Finish the builder and return the metadata and value buffers.
855    pub fn finish(mut self) -> (Vec<u8>, Vec<u8>) {
856        self.metadata_builder.finish();
857        (
858            self.metadata_builder.into_inner(),
859            self.value_builder.into_inner(),
860        )
861    }
862}
863
864/// Extends [`VariantBuilder`] to help building nested [`Variant`]s
865///
866/// Allows users to append values to a [`VariantBuilder`], [`ListBuilder`] or
867/// [`ObjectBuilder`]. using the same interface.
868pub trait VariantBuilderExt {
869    /// The builder specific state used by nested builders
870    type State<'a>: BuilderSpecificState + 'a
871    where
872        Self: 'a;
873
874    /// Appends a NULL value to this builder. The semantics depend on the implementation, but will
875    /// often translate to appending a [`Variant::Null`] value.
876    fn append_null(&mut self);
877
878    /// Appends a new variant value to this builder. See e.g. [`VariantBuilder::append_value`].
879    fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>);
880
881    /// Creates a nested list builder. See e.g. [`VariantBuilder::new_list`]. Panics if the nested
882    /// builder cannot be created, see e.g. [`ObjectBuilder::new_list`].
883    fn new_list(&mut self) -> ListBuilder<'_, Self::State<'_>> {
884        self.try_new_list().unwrap()
885    }
886
887    /// Creates a nested object builder. See e.g. [`VariantBuilder::new_object`]. Panics if the
888    /// nested builder cannot be created, see e.g. [`ObjectBuilder::new_object`].
889    fn new_object(&mut self) -> ObjectBuilder<'_, Self::State<'_>> {
890        self.try_new_object().unwrap()
891    }
892
893    /// Creates a nested list builder. See e.g. [`VariantBuilder::new_list`]. Returns an error if
894    /// the nested builder cannot be created, see e.g. [`ObjectBuilder::try_new_list`].
895    fn try_new_list(&mut self) -> Result<ListBuilder<'_, Self::State<'_>>, ArrowError>;
896
897    /// Creates a nested object builder. See e.g. [`VariantBuilder::new_object`]. Returns an error
898    /// if the nested builder cannot be created, see e.g. [`ObjectBuilder::try_new_object`].
899    fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, Self::State<'_>>, ArrowError>;
900}
901
902impl VariantBuilderExt for VariantBuilder {
903    type State<'a>
904        = ()
905    where
906        Self: 'a;
907
908    /// Variant values cannot encode NULL, only [`Variant::Null`]. This is different from the column
909    /// that holds variant values being NULL at some positions.
910    fn append_null(&mut self) {
911        self.append_value(Variant::Null);
912    }
913    fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
914        self.append_value(value);
915    }
916
917    fn try_new_list(&mut self) -> Result<ListBuilder<'_, Self::State<'_>>, ArrowError> {
918        Ok(self.new_list())
919    }
920
921    fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, Self::State<'_>>, ArrowError> {
922        Ok(self.new_object())
923    }
924}
925
926#[cfg(test)]
927mod tests {
928    use crate::{VariantMetadata, builder::metadata::ReadOnlyMetadataBuilder};
929
930    use super::*;
931    #[test]
932    fn test_simple_usage() {
933        test_variant_roundtrip((), Variant::Null);
934        test_variant_roundtrip(true, Variant::BooleanTrue);
935        test_variant_roundtrip(false, Variant::BooleanFalse);
936        test_variant_roundtrip(42i8, Variant::Int8(42));
937        test_variant_roundtrip(1234i16, Variant::Int16(1234));
938        test_variant_roundtrip(123456i32, Variant::Int32(123456));
939        test_variant_roundtrip(123456789i64, Variant::Int64(123456789));
940        test_variant_roundtrip(1.5f32, Variant::Float(1.5));
941        test_variant_roundtrip(2.5f64, Variant::Double(2.5));
942        test_variant_roundtrip("hello", Variant::ShortString(ShortString("hello")));
943
944        // Test long string (> 63 bytes)
945        let long_string = "This is a very long string that exceeds the short string limit of 63 bytes and should be encoded as a regular string type instead of a short string";
946        test_variant_roundtrip(long_string, Variant::String(long_string));
947
948        // Test binary data
949        let binary_data = b"binary data";
950        test_variant_roundtrip(
951            binary_data.as_slice(),
952            Variant::Binary(binary_data.as_slice()),
953        );
954    }
955
956    /// Helper function to test that a value can be built and reconstructed correctly
957    fn test_variant_roundtrip<'m, 'd, T: Into<Variant<'m, 'd>>>(input: T, expected: Variant) {
958        let mut builder = VariantBuilder::new();
959        builder.append_value(input);
960        let (metadata, value) = builder.finish();
961        let variant = Variant::try_new(&metadata, &value).unwrap_or_else(|_| {
962            panic!("Failed to create variant from metadata and value: {metadata:?}, {value:?}")
963        });
964        assert_eq!(variant, expected);
965    }
966
967    #[test]
968    fn test_nested_object_with_lists() {
969        /*
970        {
971            "door 1": {
972                "items": ["apple", false ]
973            }
974        }
975
976        */
977
978        let mut builder = VariantBuilder::new();
979        {
980            let mut outer_object_builder = builder.new_object();
981            {
982                let mut inner_object_builder = outer_object_builder.new_object("door 1");
983
984                // create inner_object_list
985                inner_object_builder
986                    .new_list("items")
987                    .with_value("apple")
988                    .with_value(false)
989                    .finish();
990
991                inner_object_builder.finish();
992            }
993
994            outer_object_builder.finish();
995        }
996
997        let (metadata, value) = builder.finish();
998        let variant = Variant::try_new(&metadata, &value).unwrap();
999        let outer_object = variant.as_object().unwrap();
1000
1001        assert_eq!(outer_object.len(), 1);
1002        assert_eq!(outer_object.field_name(0).unwrap(), "door 1");
1003
1004        let inner_object_variant = outer_object.field(0).unwrap();
1005        let inner_object = inner_object_variant.as_object().unwrap();
1006
1007        assert_eq!(inner_object.len(), 1);
1008        assert_eq!(inner_object.field_name(0).unwrap(), "items");
1009
1010        let items_variant = inner_object.field(0).unwrap();
1011        let items_list = items_variant.as_list().unwrap();
1012
1013        assert_eq!(items_list.len(), 2);
1014        assert_eq!(items_list.get(0).unwrap(), Variant::from("apple"));
1015        assert_eq!(items_list.get(1).unwrap(), Variant::from(false));
1016    }
1017
1018    #[test]
1019    fn test_sorted_dictionary() {
1020        // check if variant metadatabuilders are equivalent from different ways of constructing them
1021        let mut variant1 = VariantBuilder::new().with_field_names(["b", "c", "d"]);
1022
1023        let mut variant2 = {
1024            let mut builder = VariantBuilder::new();
1025
1026            builder.add_field_name("b");
1027            builder.add_field_name("c");
1028            builder.add_field_name("d");
1029
1030            builder
1031        };
1032
1033        assert_eq!(
1034            variant1.metadata_builder.field_names,
1035            variant2.metadata_builder.field_names
1036        );
1037
1038        // check metadata builders say it's sorted
1039        assert!(variant1.metadata_builder.is_sorted);
1040        assert!(variant2.metadata_builder.is_sorted);
1041
1042        {
1043            // test the bad case and break the sort order
1044            variant2.add_field_name("a");
1045            assert!(!variant2.metadata_builder.is_sorted);
1046
1047            // per the spec, make sure the variant will fail to build if only metadata is provided
1048            let (m, v) = variant2.finish();
1049            let res = Variant::try_new(&m, &v);
1050            assert!(res.is_err());
1051
1052            // since it is not sorted, make sure the metadata says so
1053            let header = VariantMetadata::try_new(&m).unwrap();
1054            assert!(!header.is_sorted());
1055        }
1056
1057        // write out variant1 and make sure the sorted flag is properly encoded
1058        variant1.append_value(false);
1059
1060        let (m, v) = variant1.finish();
1061        let res = Variant::try_new(&m, &v);
1062        assert!(res.is_ok());
1063
1064        let header = VariantMetadata::try_new(&m).unwrap();
1065        assert!(header.is_sorted());
1066    }
1067
1068    #[test]
1069    fn test_object_sorted_dictionary() {
1070        // predefine the list of field names
1071        let mut variant1 = VariantBuilder::new().with_field_names(["a", "b", "c"]);
1072        let mut obj = variant1.new_object();
1073
1074        obj.insert("c", true);
1075        obj.insert("a", false);
1076        obj.insert("b", ());
1077
1078        // verify the field ids are correctly
1079        let field_ids_by_insert_order = obj.fields.iter().map(|(&id, _)| id).collect::<Vec<_>>();
1080        assert_eq!(field_ids_by_insert_order, vec![2, 0, 1]);
1081
1082        // add a field name that wasn't pre-defined but doesn't break the sort order
1083        obj.insert("d", 2);
1084        obj.finish();
1085
1086        let (metadata, value) = variant1.finish();
1087        let variant = Variant::try_new(&metadata, &value).unwrap();
1088
1089        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1090        assert!(metadata.is_sorted());
1091
1092        // verify object is sorted by field name order
1093        let object = variant.as_object().unwrap();
1094        let field_names = object
1095            .iter()
1096            .map(|(field_name, _)| field_name)
1097            .collect::<Vec<_>>();
1098
1099        assert_eq!(field_names, vec!["a", "b", "c", "d"]);
1100    }
1101
1102    #[test]
1103    fn test_object_not_sorted_dictionary() {
1104        // predefine the list of field names
1105        let mut variant1 = VariantBuilder::new().with_field_names(["b", "c", "d"]);
1106        let mut obj = variant1.new_object();
1107
1108        obj.insert("c", true);
1109        obj.insert("d", false);
1110        obj.insert("b", ());
1111
1112        // verify the field ids are correctly
1113        let field_ids_by_insert_order = obj.fields.iter().map(|(&id, _)| id).collect::<Vec<_>>();
1114        assert_eq!(field_ids_by_insert_order, vec![1, 2, 0]);
1115
1116        // add a field name that wasn't pre-defined but breaks the sort order
1117        obj.insert("a", 2);
1118        obj.finish();
1119
1120        let (metadata, value) = variant1.finish();
1121        let variant = Variant::try_new(&metadata, &value).unwrap();
1122
1123        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1124        assert!(!metadata.is_sorted());
1125
1126        // verify object field names are sorted by field name order
1127        let object = variant.as_object().unwrap();
1128        let field_names = object
1129            .iter()
1130            .map(|(field_name, _)| field_name)
1131            .collect::<Vec<_>>();
1132
1133        assert_eq!(field_names, vec!["a", "b", "c", "d"]);
1134    }
1135
1136    #[test]
1137    fn test_building_sorted_dictionary() {
1138        let mut builder = VariantBuilder::new();
1139        assert!(!builder.metadata_builder.is_sorted);
1140        assert_eq!(builder.metadata_builder.num_field_names(), 0);
1141
1142        builder.add_field_name("a");
1143
1144        assert!(builder.metadata_builder.is_sorted);
1145        assert_eq!(builder.metadata_builder.num_field_names(), 1);
1146
1147        let builder = builder.with_field_names(["b", "c", "d"]);
1148
1149        assert!(builder.metadata_builder.is_sorted);
1150        assert_eq!(builder.metadata_builder.num_field_names(), 4);
1151
1152        let builder = builder.with_field_names(["z", "y"]);
1153        assert!(!builder.metadata_builder.is_sorted);
1154        assert_eq!(builder.metadata_builder.num_field_names(), 6);
1155    }
1156
1157    #[test]
1158    fn test_variant_builder_to_list_builder_no_finish() {
1159        // Create a list builder but never finish it
1160        let mut builder = VariantBuilder::new();
1161        let mut list_builder = builder.new_list();
1162        list_builder.append_value("hi");
1163        drop(list_builder);
1164
1165        builder.append_value(42i8);
1166
1167        // The original builder should be unchanged
1168        let (metadata, value) = builder.finish();
1169        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1170        assert!(metadata.is_empty());
1171
1172        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
1173        assert_eq!(variant, Variant::Int8(42));
1174    }
1175
1176    #[test]
1177    fn test_variant_builder_to_object_builder_no_finish() {
1178        // Create an object builder but never finish it
1179        let mut builder = VariantBuilder::new();
1180        let mut object_builder = builder.new_object();
1181        object_builder.insert("name", "unknown");
1182        drop(object_builder);
1183
1184        builder.append_value(42i8);
1185
1186        // The original builder should be unchanged
1187        let (metadata, value) = builder.finish();
1188        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1189        assert!(metadata.is_empty()); // rolled back
1190
1191        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
1192        assert_eq!(variant, Variant::Int8(42));
1193    }
1194
1195    #[test]
1196    fn test_list_builder_to_list_builder_inner_no_finish() {
1197        let mut builder = VariantBuilder::new();
1198        let mut list_builder = builder.new_list();
1199        list_builder.append_value(1i8);
1200
1201        // Create a nested list builder but never finish it
1202        let mut nested_list_builder = list_builder.new_list();
1203        nested_list_builder.append_value("hi");
1204        drop(nested_list_builder);
1205
1206        list_builder.append_value(2i8);
1207
1208        // The parent list should only contain the original values
1209        list_builder.finish();
1210        let (metadata, value) = builder.finish();
1211        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1212        assert!(metadata.is_empty());
1213
1214        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
1215        let list = variant.as_list().unwrap();
1216        assert_eq!(list.len(), 2);
1217        assert_eq!(list.get(0).unwrap(), Variant::Int8(1));
1218        assert_eq!(list.get(1).unwrap(), Variant::Int8(2));
1219    }
1220
1221    #[test]
1222    fn test_list_builder_to_list_builder_outer_no_finish() {
1223        let mut builder = VariantBuilder::new();
1224        let mut list_builder = builder.new_list();
1225        list_builder.append_value(1i8);
1226
1227        // Create a nested list builder and finish it
1228        let mut nested_list_builder = list_builder.new_list();
1229        nested_list_builder.append_value("hi");
1230        nested_list_builder.finish();
1231
1232        // Drop the outer list builder without finishing it
1233        drop(list_builder);
1234
1235        builder.append_value(2i8);
1236
1237        // Only the second attempt should appear in the final variant
1238        let (metadata, value) = builder.finish();
1239        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1240        assert!(metadata.is_empty());
1241
1242        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
1243        assert_eq!(variant, Variant::Int8(2));
1244    }
1245
1246    #[test]
1247    fn test_list_builder_to_object_builder_inner_no_finish() {
1248        let mut builder = VariantBuilder::new();
1249        let mut list_builder = builder.new_list();
1250        list_builder.append_value(1i8);
1251
1252        // Create a nested object builder but never finish it
1253        let mut nested_object_builder = list_builder.new_object();
1254        nested_object_builder.insert("name", "unknown");
1255        drop(nested_object_builder);
1256
1257        list_builder.append_value(2i8);
1258
1259        // The parent list should only contain the original values
1260        list_builder.finish();
1261        let (metadata, value) = builder.finish();
1262        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1263        assert!(metadata.is_empty());
1264
1265        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
1266        let list = variant.as_list().unwrap();
1267        assert_eq!(list.len(), 2);
1268        assert_eq!(list.get(0).unwrap(), Variant::Int8(1));
1269        assert_eq!(list.get(1).unwrap(), Variant::Int8(2));
1270    }
1271
1272    #[test]
1273    fn test_list_builder_to_object_builder_outer_no_finish() {
1274        let mut builder = VariantBuilder::new();
1275        let mut list_builder = builder.new_list();
1276        list_builder.append_value(1i8);
1277
1278        // Create a nested object builder and finish it
1279        let mut nested_object_builder = list_builder.new_object();
1280        nested_object_builder.insert("name", "unknown");
1281        nested_object_builder.finish();
1282
1283        // Drop the outer list builder without finishing it
1284        drop(list_builder);
1285
1286        builder.append_value(2i8);
1287
1288        // Only the second attempt should appear in the final variant
1289        let (metadata, value) = builder.finish();
1290        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1291        assert!(metadata.is_empty()); // rolled back
1292
1293        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
1294        assert_eq!(variant, Variant::Int8(2));
1295    }
1296
1297    #[test]
1298    fn test_object_builder_to_list_builder_inner_no_finish() {
1299        let mut builder = VariantBuilder::new();
1300        let mut object_builder = builder.new_object();
1301        object_builder.insert("first", 1i8);
1302
1303        // Create a nested list builder but never finish it
1304        let mut nested_list_builder = object_builder.new_list("nested");
1305        nested_list_builder.append_value("hi");
1306        drop(nested_list_builder);
1307
1308        object_builder.insert("second", 2i8);
1309
1310        // The parent object should only contain the original fields
1311        object_builder.finish();
1312        let (metadata, value) = builder.finish();
1313
1314        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1315        assert_eq!(metadata.len(), 2);
1316        assert_eq!(&metadata[0], "first");
1317        assert_eq!(&metadata[1], "second");
1318
1319        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
1320        let obj = variant.as_object().unwrap();
1321        assert_eq!(obj.len(), 2);
1322        assert_eq!(obj.get("first"), Some(Variant::Int8(1)));
1323        assert_eq!(obj.get("second"), Some(Variant::Int8(2)));
1324    }
1325
1326    #[test]
1327    fn test_object_builder_to_list_builder_outer_no_finish() {
1328        let mut builder = VariantBuilder::new();
1329        let mut object_builder = builder.new_object();
1330        object_builder.insert("first", 1i8);
1331
1332        // Create a nested list builder and finish it
1333        let mut nested_list_builder = object_builder.new_list("nested");
1334        nested_list_builder.append_value("hi");
1335        nested_list_builder.finish();
1336
1337        // Drop the outer object builder without finishing it
1338        drop(object_builder);
1339
1340        builder.append_value(2i8);
1341
1342        // Only the second attempt should appear in the final variant
1343        let (metadata, value) = builder.finish();
1344        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1345        assert!(metadata.is_empty()); // rolled back
1346
1347        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
1348        assert_eq!(variant, Variant::Int8(2));
1349    }
1350
1351    #[test]
1352    fn test_object_builder_to_object_builder_inner_no_finish() {
1353        let mut builder = VariantBuilder::new();
1354        let mut object_builder = builder.new_object();
1355        object_builder.insert("first", 1i8);
1356
1357        // Create a nested object builder but never finish it
1358        let mut nested_object_builder = object_builder.new_object("nested");
1359        nested_object_builder.insert("name", "unknown");
1360        drop(nested_object_builder);
1361
1362        object_builder.insert("second", 2i8);
1363
1364        // The parent object should only contain the original fields
1365        object_builder.finish();
1366        let (metadata, value) = builder.finish();
1367
1368        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1369        assert_eq!(metadata.len(), 2); // the fields of nested_object_builder has been rolled back
1370        assert_eq!(&metadata[0], "first");
1371        assert_eq!(&metadata[1], "second");
1372
1373        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
1374        let obj = variant.as_object().unwrap();
1375        assert_eq!(obj.len(), 2);
1376        assert_eq!(obj.get("first"), Some(Variant::Int8(1)));
1377        assert_eq!(obj.get("second"), Some(Variant::Int8(2)));
1378    }
1379
1380    #[test]
1381    fn test_object_builder_to_object_builder_outer_no_finish() {
1382        let mut builder = VariantBuilder::new();
1383        let mut object_builder = builder.new_object();
1384        object_builder.insert("first", 1i8);
1385
1386        // Create a nested object builder and finish it
1387        let mut nested_object_builder = object_builder.new_object("nested");
1388        nested_object_builder.insert("name", "unknown");
1389        nested_object_builder.finish();
1390
1391        // Drop the outer object builder without finishing it
1392        drop(object_builder);
1393
1394        builder.append_value(2i8);
1395
1396        // Only the second attempt should appear in the final variant
1397        let (metadata, value) = builder.finish();
1398        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1399        assert_eq!(metadata.len(), 0); // rolled back
1400
1401        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
1402        assert_eq!(variant, Variant::Int8(2));
1403    }
1404
1405    // Make sure that we can correctly build deeply nested objects even when some of the nested
1406    // builders don't finish.
1407    #[test]
1408    fn test_append_list_object_list_object() {
1409        // An infinite counter
1410        let mut counter = 0..;
1411        let mut take = move |i| (&mut counter).take(i).collect::<Vec<_>>();
1412        let mut builder = VariantBuilder::new();
1413        let skip = 5;
1414        {
1415            let mut list = builder.new_list();
1416            for i in take(4) {
1417                let mut object = list.new_object();
1418                for i in take(4) {
1419                    let field_name = format!("field{i}");
1420                    let mut list = object.new_list(&field_name);
1421                    for i in take(3) {
1422                        let mut object = list.new_object();
1423                        for i in take(3) {
1424                            if i % skip != 0 {
1425                                object.insert(&format!("field{i}"), i);
1426                            }
1427                        }
1428                        if i % skip != 0 {
1429                            object.finish();
1430                        }
1431                    }
1432                    if i % skip != 0 {
1433                        list.finish();
1434                    }
1435                }
1436                if i % skip != 0 {
1437                    object.finish();
1438                }
1439            }
1440            list.finish();
1441        }
1442        let (metadata, value) = builder.finish();
1443        let v1 = Variant::try_new(&metadata, &value).unwrap();
1444
1445        let (metadata, value) = VariantBuilder::new().with_value(v1.clone()).finish();
1446        let v2 = Variant::try_new(&metadata, &value).unwrap();
1447
1448        assert_eq!(format!("{v1:?}"), format!("{v2:?}"));
1449    }
1450
1451    #[test]
1452    fn test_append_variant_bytes_round_trip() {
1453        // Create a complex variant with the normal builder
1454        let mut builder = VariantBuilder::new();
1455        {
1456            let mut obj = builder.new_object();
1457            obj.insert("name", "Alice");
1458            obj.insert("age", 30i32);
1459            {
1460                let mut scores_list = obj.new_list("scores");
1461                scores_list.append_value(95i32);
1462                scores_list.append_value(87i32);
1463                scores_list.append_value(92i32);
1464                scores_list.finish();
1465            }
1466            {
1467                let mut address = obj.new_object("address");
1468                address.insert("street", "123 Main St");
1469                address.insert("city", "Anytown");
1470                address.finish();
1471            }
1472            obj.finish();
1473        }
1474        let (metadata, value1) = builder.finish();
1475        let variant1 = Variant::try_new(&metadata, &value1).unwrap();
1476
1477        // Copy using the new bytes API
1478        let metadata = VariantMetadata::new(&metadata);
1479        let mut metadata = ReadOnlyMetadataBuilder::new(&metadata);
1480        let mut builder2 = ValueBuilder::new();
1481        let state = ParentState::variant(&mut builder2, &mut metadata);
1482        ValueBuilder::append_variant_bytes(state, variant1);
1483        let value2 = builder2.into_inner();
1484
1485        // The bytes should be identical, we merely copied them across.
1486        assert_eq!(value1, value2);
1487    }
1488
1489    #[test]
1490    fn test_object_insert_bytes_subset() {
1491        // Create an original object, making sure to inject the field names we'll add later.
1492        let mut builder = VariantBuilder::new().with_field_names(["new_field", "another_field"]);
1493        {
1494            let mut obj = builder.new_object();
1495            obj.insert("field1", "value1");
1496            obj.insert("field2", 42i32);
1497            obj.insert("field3", true);
1498            obj.insert("field4", "value4");
1499            obj.finish();
1500        }
1501        let (metadata1, value1) = builder.finish();
1502        let original_variant = Variant::try_new(&metadata1, &value1).unwrap();
1503        let original_obj = original_variant.as_object().unwrap();
1504
1505        // Create a new object copying subset of fields interleaved with new ones
1506        let metadata2 = VariantMetadata::new(&metadata1);
1507        let mut metadata2 = ReadOnlyMetadataBuilder::new(&metadata2);
1508        let mut builder2 = ValueBuilder::new();
1509        let state = ParentState::variant(&mut builder2, &mut metadata2);
1510        {
1511            let mut obj = ObjectBuilder::new(state, true);
1512
1513            // Copy field1 using bytes API
1514            obj.insert_bytes("field1", original_obj.get("field1").unwrap());
1515
1516            // Add new field
1517            obj.insert("new_field", "new_value");
1518
1519            // Copy field3 using bytes API
1520            obj.insert_bytes("field3", original_obj.get("field3").unwrap());
1521
1522            // Add another new field
1523            obj.insert("another_field", 99i32);
1524
1525            // Copy field2 using bytes API
1526            obj.insert_bytes("field2", original_obj.get("field2").unwrap());
1527
1528            obj.finish();
1529        }
1530        let value2 = builder2.into_inner();
1531        let result_variant = Variant::try_new(&metadata1, &value2).unwrap();
1532        let result_obj = result_variant.as_object().unwrap();
1533
1534        // Verify the object contains expected fields
1535        assert_eq!(result_obj.len(), 5);
1536        assert_eq!(
1537            result_obj.get("field1").unwrap().as_string().unwrap(),
1538            "value1"
1539        );
1540        assert_eq!(result_obj.get("field2").unwrap().as_int32().unwrap(), 42);
1541        assert!(result_obj.get("field3").unwrap().as_boolean().unwrap());
1542        assert_eq!(
1543            result_obj.get("new_field").unwrap().as_string().unwrap(),
1544            "new_value"
1545        );
1546        assert_eq!(
1547            result_obj.get("another_field").unwrap().as_int32().unwrap(),
1548            99
1549        );
1550    }
1551
1552    #[test]
1553    fn test_complex_nested_filtering_injection() {
1554        // Create a complex nested structure: object -> list -> objects. Make sure to pre-register
1555        // the extra field names we'll need later while manipulating variant bytes.
1556        let mut builder = VariantBuilder::new().with_field_names([
1557            "active_count",
1558            "active_users",
1559            "computed_score",
1560            "processed_at",
1561            "status",
1562        ]);
1563
1564        {
1565            let mut root_obj = builder.new_object();
1566            root_obj.insert("metadata", "original");
1567
1568            {
1569                let mut users_list = root_obj.new_list("users");
1570
1571                // User 1
1572                {
1573                    let mut user1 = users_list.new_object();
1574                    user1.insert("id", 1i32);
1575                    user1.insert("name", "Alice");
1576                    user1.insert("active", true);
1577                    user1.finish();
1578                }
1579
1580                // User 2
1581                {
1582                    let mut user2 = users_list.new_object();
1583                    user2.insert("id", 2i32);
1584                    user2.insert("name", "Bob");
1585                    user2.insert("active", false);
1586                    user2.finish();
1587                }
1588
1589                // User 3
1590                {
1591                    let mut user3 = users_list.new_object();
1592                    user3.insert("id", 3i32);
1593                    user3.insert("name", "Charlie");
1594                    user3.insert("active", true);
1595                    user3.finish();
1596                }
1597
1598                users_list.finish();
1599            }
1600
1601            root_obj.insert("total_count", 3i32);
1602            root_obj.finish();
1603        }
1604        let (metadata1, value1) = builder.finish();
1605        let original_variant = Variant::try_new(&metadata1, &value1).unwrap();
1606        let original_obj = original_variant.as_object().unwrap();
1607        let original_users = original_obj.get("users").unwrap();
1608        let original_users = original_users.as_list().unwrap();
1609
1610        // Create filtered/modified version: only copy active users and inject new data
1611        let metadata2 = VariantMetadata::new(&metadata1);
1612        let mut metadata2 = ReadOnlyMetadataBuilder::new(&metadata2);
1613        let mut builder2 = ValueBuilder::new();
1614        let state = ParentState::variant(&mut builder2, &mut metadata2);
1615        {
1616            let mut root_obj = ObjectBuilder::new(state, true);
1617
1618            // Copy metadata using bytes API
1619            root_obj.insert_bytes("metadata", original_obj.get("metadata").unwrap());
1620
1621            // Add processing timestamp
1622            root_obj.insert("processed_at", "2024-01-01T00:00:00Z");
1623
1624            {
1625                let mut filtered_users = root_obj.new_list("active_users");
1626
1627                // Copy only active users and inject additional data
1628                for i in 0..original_users.len() {
1629                    let user = original_users.get(i).unwrap();
1630                    let user = user.as_object().unwrap();
1631                    if user.get("active").unwrap().as_boolean().unwrap() {
1632                        {
1633                            let mut new_user = filtered_users.new_object();
1634
1635                            // Copy existing fields using bytes API
1636                            new_user.insert_bytes("id", user.get("id").unwrap());
1637                            new_user.insert_bytes("name", user.get("name").unwrap());
1638
1639                            // Inject new computed field
1640                            let user_id = user.get("id").unwrap().as_int32().unwrap();
1641                            new_user.insert("computed_score", user_id * 10);
1642
1643                            // Add status transformation (don't copy the 'active' field)
1644                            new_user.insert("status", "verified");
1645
1646                            new_user.finish();
1647                        }
1648                    }
1649                }
1650
1651                // Inject a completely new user
1652                {
1653                    let mut new_user = filtered_users.new_object();
1654                    new_user.insert("id", 999i32);
1655                    new_user.insert("name", "System User");
1656                    new_user.insert("computed_score", 0i32);
1657                    new_user.insert("status", "system");
1658                    new_user.finish();
1659                }
1660
1661                filtered_users.finish();
1662            }
1663
1664            // Update count
1665            root_obj.insert("active_count", 3i32); // 2 active + 1 new
1666
1667            root_obj.finish();
1668        }
1669        let value2 = builder2.into_inner();
1670        let result_variant = Variant::try_new(&metadata1, &value2).unwrap();
1671        let result_obj = result_variant.as_object().unwrap();
1672
1673        // Verify the filtered/modified structure
1674        assert_eq!(
1675            result_obj.get("metadata").unwrap().as_string().unwrap(),
1676            "original"
1677        );
1678        assert_eq!(
1679            result_obj.get("processed_at").unwrap().as_string().unwrap(),
1680            "2024-01-01T00:00:00Z"
1681        );
1682        assert_eq!(
1683            result_obj.get("active_count").unwrap().as_int32().unwrap(),
1684            3
1685        );
1686
1687        let active_users = result_obj.get("active_users").unwrap();
1688        let active_users = active_users.as_list().unwrap();
1689        assert_eq!(active_users.len(), 3);
1690
1691        // Verify Alice (id=1, was active)
1692        let alice = active_users.get(0).unwrap();
1693        let alice = alice.as_object().unwrap();
1694        assert_eq!(alice.get("id").unwrap().as_int32().unwrap(), 1);
1695        assert_eq!(alice.get("name").unwrap().as_string().unwrap(), "Alice");
1696        assert_eq!(alice.get("computed_score").unwrap().as_int32().unwrap(), 10);
1697        assert_eq!(
1698            alice.get("status").unwrap().as_string().unwrap(),
1699            "verified"
1700        );
1701        assert!(alice.get("active").is_none()); // This field was not copied
1702
1703        // Verify Charlie (id=3, was active) - Bob (id=2) was not active so not included
1704        let charlie = active_users.get(1).unwrap();
1705        let charlie = charlie.as_object().unwrap();
1706        assert_eq!(charlie.get("id").unwrap().as_int32().unwrap(), 3);
1707        assert_eq!(charlie.get("name").unwrap().as_string().unwrap(), "Charlie");
1708        assert_eq!(
1709            charlie.get("computed_score").unwrap().as_int32().unwrap(),
1710            30
1711        );
1712        assert_eq!(
1713            charlie.get("status").unwrap().as_string().unwrap(),
1714            "verified"
1715        );
1716
1717        // Verify injected system user
1718        let system_user = active_users.get(2).unwrap();
1719        let system_user = system_user.as_object().unwrap();
1720        assert_eq!(system_user.get("id").unwrap().as_int32().unwrap(), 999);
1721        assert_eq!(
1722            system_user.get("name").unwrap().as_string().unwrap(),
1723            "System User"
1724        );
1725        assert_eq!(
1726            system_user
1727                .get("computed_score")
1728                .unwrap()
1729                .as_int32()
1730                .unwrap(),
1731            0
1732        );
1733        assert_eq!(
1734            system_user.get("status").unwrap().as_string().unwrap(),
1735            "system"
1736        );
1737    }
1738}