parquet_variant/
builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17use crate::decoder::{VariantBasicType, VariantPrimitiveType};
18use crate::{
19    ShortString, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16, VariantList,
20    VariantMetadata, VariantObject,
21};
22use arrow_schema::ArrowError;
23use chrono::Timelike;
24use uuid::Uuid;
25
26mod list;
27mod metadata;
28mod object;
29
30pub use list::*;
31pub use metadata::*;
32pub use object::*;
33
34pub(crate) const BASIC_TYPE_BITS: u8 = 2;
35pub(crate) const UNIX_EPOCH_DATE: chrono::NaiveDate =
36    chrono::NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
37
38fn primitive_header(primitive_type: VariantPrimitiveType) -> u8 {
39    (primitive_type as u8) << 2 | VariantBasicType::Primitive as u8
40}
41
42fn short_string_header(len: usize) -> u8 {
43    (len as u8) << 2 | VariantBasicType::ShortString as u8
44}
45
46pub(crate) fn int_size(v: usize) -> u8 {
47    match v {
48        0..=0xFF => 1,
49        0x100..=0xFFFF => 2,
50        0x10000..=0xFFFFFF => 3,
51        _ => 4,
52    }
53}
54
55/// Write little-endian integer to buffer at a specific position
56fn write_offset_at_pos(buf: &mut [u8], start_pos: usize, value: usize, nbytes: u8) {
57    let bytes = value.to_le_bytes();
58    buf[start_pos..start_pos + nbytes as usize].copy_from_slice(&bytes[..nbytes as usize]);
59}
60
61/// Wrapper around a `Vec<u8>` that provides methods for appending
62/// primitive values, variant types, and metadata.
63///
64/// This is used internally by the builders to construct the
65/// the `value` field for [`Variant`] values.
66///
67/// You can reuse an existing `Vec<u8>` by using the `from` impl
68#[derive(Debug, Default)]
69pub struct ValueBuilder(Vec<u8>);
70
71impl ValueBuilder {
72    /// Construct a ValueBuffer that will write to a new underlying `Vec`
73    pub fn new() -> Self {
74        Default::default()
75    }
76}
77
78/// Macro to generate the match statement for each append_variant, try_append_variant, and
79/// append_variant_bytes -- they each have slightly different handling for object and list handling.
80macro_rules! variant_append_value {
81    ($builder:expr, $value:expr, $object_pat:pat => $object_arm:expr, $list_pat:pat => $list_arm:expr) => {
82        match $value {
83            Variant::Null => $builder.append_null(),
84            Variant::BooleanTrue => $builder.append_bool(true),
85            Variant::BooleanFalse => $builder.append_bool(false),
86            Variant::Int8(v) => $builder.append_int8(v),
87            Variant::Int16(v) => $builder.append_int16(v),
88            Variant::Int32(v) => $builder.append_int32(v),
89            Variant::Int64(v) => $builder.append_int64(v),
90            Variant::Date(v) => $builder.append_date(v),
91            Variant::Time(v) => $builder.append_time_micros(v),
92            Variant::TimestampMicros(v) => $builder.append_timestamp_micros(v),
93            Variant::TimestampNtzMicros(v) => $builder.append_timestamp_ntz_micros(v),
94            Variant::TimestampNanos(v) => $builder.append_timestamp_nanos(v),
95            Variant::TimestampNtzNanos(v) => $builder.append_timestamp_ntz_nanos(v),
96            Variant::Decimal4(decimal4) => $builder.append_decimal4(decimal4),
97            Variant::Decimal8(decimal8) => $builder.append_decimal8(decimal8),
98            Variant::Decimal16(decimal16) => $builder.append_decimal16(decimal16),
99            Variant::Float(v) => $builder.append_float(v),
100            Variant::Double(v) => $builder.append_double(v),
101            Variant::Binary(v) => $builder.append_binary(v),
102            Variant::String(s) => $builder.append_string(s),
103            Variant::ShortString(s) => $builder.append_short_string(s),
104            Variant::Uuid(v) => $builder.append_uuid(v),
105            $object_pat => $object_arm,
106            $list_pat => $list_arm,
107        }
108    };
109}
110
111impl ValueBuilder {
112    fn append_u8(&mut self, term: u8) {
113        self.0.push(term);
114    }
115
116    fn append_slice(&mut self, other: &[u8]) {
117        self.0.extend_from_slice(other);
118    }
119
120    fn append_primitive_header(&mut self, primitive_type: VariantPrimitiveType) {
121        self.0.push(primitive_header(primitive_type));
122    }
123
124    /// Returns the underlying buffer, consuming self
125    pub fn into_inner(self) -> Vec<u8> {
126        self.0
127    }
128
129    pub(crate) fn inner_mut(&mut self) -> &mut Vec<u8> {
130        &mut self.0
131    }
132
133    // Variant types below
134
135    fn append_null(&mut self) {
136        self.append_primitive_header(VariantPrimitiveType::Null);
137    }
138
139    fn append_bool(&mut self, value: bool) {
140        let primitive_type = if value {
141            VariantPrimitiveType::BooleanTrue
142        } else {
143            VariantPrimitiveType::BooleanFalse
144        };
145        self.append_primitive_header(primitive_type);
146    }
147
148    fn append_int8(&mut self, value: i8) {
149        self.append_primitive_header(VariantPrimitiveType::Int8);
150        self.append_u8(value as u8);
151    }
152
153    fn append_int16(&mut self, value: i16) {
154        self.append_primitive_header(VariantPrimitiveType::Int16);
155        self.append_slice(&value.to_le_bytes());
156    }
157
158    fn append_int32(&mut self, value: i32) {
159        self.append_primitive_header(VariantPrimitiveType::Int32);
160        self.append_slice(&value.to_le_bytes());
161    }
162
163    fn append_int64(&mut self, value: i64) {
164        self.append_primitive_header(VariantPrimitiveType::Int64);
165        self.append_slice(&value.to_le_bytes());
166    }
167
168    fn append_float(&mut self, value: f32) {
169        self.append_primitive_header(VariantPrimitiveType::Float);
170        self.append_slice(&value.to_le_bytes());
171    }
172
173    fn append_double(&mut self, value: f64) {
174        self.append_primitive_header(VariantPrimitiveType::Double);
175        self.append_slice(&value.to_le_bytes());
176    }
177
178    fn append_date(&mut self, value: chrono::NaiveDate) {
179        self.append_primitive_header(VariantPrimitiveType::Date);
180        let days_since_epoch = value.signed_duration_since(UNIX_EPOCH_DATE).num_days() as i32;
181        self.append_slice(&days_since_epoch.to_le_bytes());
182    }
183
184    fn append_timestamp_micros(&mut self, value: chrono::DateTime<chrono::Utc>) {
185        self.append_primitive_header(VariantPrimitiveType::TimestampMicros);
186        let micros = value.timestamp_micros();
187        self.append_slice(&micros.to_le_bytes());
188    }
189
190    fn append_timestamp_ntz_micros(&mut self, value: chrono::NaiveDateTime) {
191        self.append_primitive_header(VariantPrimitiveType::TimestampNtzMicros);
192        let micros = value.and_utc().timestamp_micros();
193        self.append_slice(&micros.to_le_bytes());
194    }
195
196    fn append_time_micros(&mut self, value: chrono::NaiveTime) {
197        self.append_primitive_header(VariantPrimitiveType::Time);
198        let micros_from_midnight = value.num_seconds_from_midnight() as u64 * 1_000_000
199            + value.nanosecond() as u64 / 1_000;
200        self.append_slice(&micros_from_midnight.to_le_bytes());
201    }
202
203    fn append_timestamp_nanos(&mut self, value: chrono::DateTime<chrono::Utc>) {
204        self.append_primitive_header(VariantPrimitiveType::TimestampNanos);
205        let nanos = value.timestamp_nanos_opt().unwrap();
206        self.append_slice(&nanos.to_le_bytes());
207    }
208
209    fn append_timestamp_ntz_nanos(&mut self, value: chrono::NaiveDateTime) {
210        self.append_primitive_header(VariantPrimitiveType::TimestampNtzNanos);
211        let nanos = value.and_utc().timestamp_nanos_opt().unwrap();
212        self.append_slice(&nanos.to_le_bytes());
213    }
214
215    fn append_uuid(&mut self, value: Uuid) {
216        self.append_primitive_header(VariantPrimitiveType::Uuid);
217        self.append_slice(&value.into_bytes());
218    }
219
220    fn append_decimal4(&mut self, decimal4: VariantDecimal4) {
221        self.append_primitive_header(VariantPrimitiveType::Decimal4);
222        self.append_u8(decimal4.scale());
223        self.append_slice(&decimal4.integer().to_le_bytes());
224    }
225
226    fn append_decimal8(&mut self, decimal8: VariantDecimal8) {
227        self.append_primitive_header(VariantPrimitiveType::Decimal8);
228        self.append_u8(decimal8.scale());
229        self.append_slice(&decimal8.integer().to_le_bytes());
230    }
231
232    fn append_decimal16(&mut self, decimal16: VariantDecimal16) {
233        self.append_primitive_header(VariantPrimitiveType::Decimal16);
234        self.append_u8(decimal16.scale());
235        self.append_slice(&decimal16.integer().to_le_bytes());
236    }
237
238    fn append_binary(&mut self, value: &[u8]) {
239        self.append_primitive_header(VariantPrimitiveType::Binary);
240        self.append_slice(&(value.len() as u32).to_le_bytes());
241        self.append_slice(value);
242    }
243
244    fn append_short_string(&mut self, value: ShortString) {
245        let inner = value.0;
246        self.append_u8(short_string_header(inner.len()));
247        self.append_slice(inner.as_bytes());
248    }
249
250    fn append_string(&mut self, value: &str) {
251        self.append_primitive_header(VariantPrimitiveType::String);
252        self.append_slice(&(value.len() as u32).to_le_bytes());
253        self.append_slice(value.as_bytes());
254    }
255
256    fn append_object<S: BuilderSpecificState>(state: ParentState<'_, S>, obj: VariantObject) {
257        let mut object_builder = ObjectBuilder::new(state, false);
258        object_builder.extend(obj.iter());
259        object_builder.finish();
260    }
261
262    fn try_append_object<S: BuilderSpecificState>(
263        state: ParentState<'_, S>,
264        obj: VariantObject,
265    ) -> Result<(), ArrowError> {
266        let mut object_builder = ObjectBuilder::new(state, false);
267
268        for res in obj.iter_try() {
269            let (field_name, value) = res?;
270            object_builder.try_insert(field_name, value)?;
271        }
272
273        object_builder.finish();
274        Ok(())
275    }
276
277    fn append_list<S: BuilderSpecificState>(state: ParentState<'_, S>, list: VariantList) {
278        let mut list_builder = ListBuilder::new(state, false);
279        list_builder.extend(list.iter());
280        list_builder.finish();
281    }
282
283    fn try_append_list<S: BuilderSpecificState>(
284        state: ParentState<'_, S>,
285        list: VariantList,
286    ) -> Result<(), ArrowError> {
287        let mut list_builder = ListBuilder::new(state, false);
288        for res in list.iter_try() {
289            let value = res?;
290            list_builder.try_append_value(value)?;
291        }
292
293        list_builder.finish();
294
295        Ok(())
296    }
297
298    /// Returns the current size of the underlying buffer
299    pub fn offset(&self) -> usize {
300        self.0.len()
301    }
302
303    /// Appends a variant to the builder.
304    ///
305    /// # Panics
306    ///
307    /// This method will panic if the variant contains duplicate field names in objects
308    /// when validation is enabled. For a fallible version, use [`ValueBuilder::try_append_variant`]
309    pub fn append_variant<S: BuilderSpecificState>(
310        mut state: ParentState<'_, S>,
311        variant: Variant<'_, '_>,
312    ) {
313        variant_append_value!(
314            state.value_builder(),
315            variant,
316            Variant::Object(obj) => return Self::append_object(state, obj),
317            Variant::List(list) => return Self::append_list(state, list)
318        );
319        state.finish();
320    }
321
322    /// Tries to append a variant to the provided [`ParentState`] instance.
323    ///
324    /// The attempt fails if the variant contains duplicate field names in objects when validation
325    /// is enabled.
326    pub fn try_append_variant<S: BuilderSpecificState>(
327        mut state: ParentState<'_, S>,
328        variant: Variant<'_, '_>,
329    ) -> Result<(), ArrowError> {
330        variant_append_value!(
331            state.value_builder(),
332            variant,
333            Variant::Object(obj) => return Self::try_append_object(state, obj),
334            Variant::List(list) => return Self::try_append_list(state, list)
335        );
336        state.finish();
337        Ok(())
338    }
339
340    /// Appends a variant to the buffer by copying raw bytes when possible.
341    ///
342    /// For objects and lists, this directly copies their underlying byte representation instead of
343    /// performing a logical copy and without touching the metadata builder. For other variant
344    /// types, this falls back to the standard append behavior.
345    ///
346    /// The caller must ensure that the metadata dictionary is already built and correct for
347    /// any objects or lists being appended.
348    pub fn append_variant_bytes<S: BuilderSpecificState>(
349        mut state: ParentState<'_, S>,
350        variant: Variant<'_, '_>,
351    ) {
352        let builder = state.value_builder();
353        variant_append_value!(
354            builder,
355            variant,
356            Variant::Object(obj) => builder.append_slice(obj.value),
357            Variant::List(list) => builder.append_slice(list.value)
358        );
359        state.finish();
360    }
361
362    /// Writes out the header byte for a variant object or list, from the starting position
363    /// of the builder, will return the position after this write
364    pub(crate) fn append_header_start_from_buf_pos(
365        &mut self,
366        start_pos: usize, // the start position where the header will be inserted
367        header_byte: u8,
368        is_large: bool,
369        num_fields: usize,
370    ) -> usize {
371        let buffer = self.inner_mut();
372
373        // Write header at the original start position
374        let mut header_pos = start_pos;
375
376        // Write header byte
377        buffer[header_pos] = header_byte;
378        header_pos += 1;
379
380        // Write number of fields
381        if is_large {
382            buffer[header_pos..header_pos + 4].copy_from_slice(&(num_fields as u32).to_le_bytes());
383            header_pos += 4;
384        } else {
385            buffer[header_pos] = num_fields as u8;
386            header_pos += 1;
387        }
388
389        header_pos
390    }
391
392    /// Writes out the offsets for an array of offsets, including the final offset (data size).
393    /// from the starting position of the buffer, will return the position after this write
394    pub(crate) fn append_offset_array_start_from_buf_pos(
395        &mut self,
396        start_pos: usize,
397        offsets: impl IntoIterator<Item = usize>,
398        data_size: Option<usize>,
399        nbytes: u8,
400    ) -> usize {
401        let buf = self.inner_mut();
402
403        let mut current_pos = start_pos;
404        for relative_offset in offsets {
405            write_offset_at_pos(buf, current_pos, relative_offset, nbytes);
406            current_pos += nbytes as usize;
407        }
408
409        // Write data_size
410        if let Some(data_size) = data_size {
411            // Write data_size at the end of the offsets
412            write_offset_at_pos(buf, current_pos, data_size, nbytes);
413            current_pos += nbytes as usize;
414        }
415
416        current_pos
417    }
418}
419
420/// A trait for managing state specific to different builder types.
421pub trait BuilderSpecificState: std::fmt::Debug {
422    /// Called by [`ParentState::finish`] to apply any pending builder-specific changes.
423    ///
424    /// The provided implementation does nothing by default.
425    ///
426    /// Parameters:
427    /// - `metadata_builder`: The metadata builder that was used
428    /// - `value_builder`: The value builder that was used
429    fn finish(
430        &mut self,
431        _metadata_builder: &mut dyn MetadataBuilder,
432        _value_builder: &mut ValueBuilder,
433    ) {
434    }
435
436    /// Called by [`ParentState::drop`] to revert any changes that were eagerly applied, if
437    /// [`ParentState::finish`] was never invoked.
438    ///
439    /// The provided implementation does nothing by default.
440    ///
441    /// The base [`ParentState`] will handle rolling back the value and metadata builders,
442    /// but builder-specific state may need to revert its own changes.
443    fn rollback(&mut self) {}
444}
445
446/// Empty no-op implementation for top-level variant building
447impl BuilderSpecificState for () {}
448
449/// Tracks information needed to correctly finalize a nested builder.
450///
451/// A child builder has no effect on its parent unless/until its `finalize` method is called, at
452/// which point the child appends the new value to the parent. As a (desirable) side effect,
453/// creating a parent state instance captures mutable references to a subset of the parent's fields,
454/// rendering the parent object completely unusable until the parent state goes out of scope. This
455/// ensures that at most one child builder can exist at a time.
456///
457/// The redundancy in `value_builder` and `metadata_builder` is because all the references come from
458/// the parent, and we cannot "split" a mutable reference across two objects (parent state and the
459/// child builder that uses it). So everything has to be here.
460#[derive(Debug)]
461pub struct ParentState<'a, S: BuilderSpecificState> {
462    pub(crate) value_builder: &'a mut ValueBuilder,
463    pub(crate) saved_value_builder_offset: usize,
464    pub(crate) metadata_builder: &'a mut dyn MetadataBuilder,
465    pub(crate) saved_metadata_builder_dict_size: usize,
466    pub(crate) builder_state: S,
467    pub(crate) finished: bool,
468}
469
470impl<'a, S: BuilderSpecificState> ParentState<'a, S> {
471    /// Creates a new ParentState instance. The value and metadata builder
472    /// state is checkpointed and will roll back on drop, unless [`Self::finish`] is called. The
473    /// builder-specific state is governed by its own `finish` and `rollback` calls.
474    pub fn new(
475        value_builder: &'a mut ValueBuilder,
476        metadata_builder: &'a mut dyn MetadataBuilder,
477        builder_state: S,
478    ) -> Self {
479        Self {
480            saved_value_builder_offset: value_builder.offset(),
481            value_builder,
482            saved_metadata_builder_dict_size: metadata_builder.num_field_names(),
483            metadata_builder,
484            builder_state,
485            finished: false,
486        }
487    }
488
489    /// Marks the insertion as having succeeded and invokes
490    /// [`BuilderSpecificState::finish`]. Internal state will no longer roll back on drop.
491    pub fn finish(&mut self) {
492        self.builder_state
493            .finish(self.metadata_builder, self.value_builder);
494        self.finished = true
495    }
496
497    // Rolls back value and metadata builder changes and invokes [`BuilderSpecificState::rollback`].
498    fn rollback(&mut self) {
499        if self.finished {
500            return;
501        }
502
503        self.value_builder
504            .inner_mut()
505            .truncate(self.saved_value_builder_offset);
506        self.metadata_builder
507            .truncate_field_names(self.saved_metadata_builder_dict_size);
508        self.builder_state.rollback();
509    }
510
511    // Useful because e.g. `let b = self.value_builder;` fails compilation.
512    pub(crate) fn value_builder(&mut self) -> &mut ValueBuilder {
513        self.value_builder
514    }
515
516    // Useful because e.g. `let b = self.metadata_builder;` fails compilation.
517    pub(crate) fn metadata_builder(&mut self) -> &mut dyn MetadataBuilder {
518        self.metadata_builder
519    }
520}
521
522impl<'a> ParentState<'a, ()> {
523    /// Creates a new instance suitable for a top-level variant builder
524    /// (e.g. [`VariantBuilder`]). The value and metadata builder state is checkpointed and will
525    /// roll back on drop, unless [`Self::finish`] is called.
526    pub fn variant(
527        value_builder: &'a mut ValueBuilder,
528        metadata_builder: &'a mut dyn MetadataBuilder,
529    ) -> Self {
530        Self::new(value_builder, metadata_builder, ())
531    }
532}
533
534/// Automatically rolls back any unfinished `ParentState`.
535impl<S: BuilderSpecificState> Drop for ParentState<'_, S> {
536    fn drop(&mut self) {
537        self.rollback()
538    }
539}
540
541/// Top level builder for [`Variant`] values
542///
543/// # Example: create a Primitive Int8
544/// ```
545/// # use parquet_variant::{Variant, VariantBuilder};
546/// let mut builder = VariantBuilder::new();
547/// builder.append_value(Variant::Int8(42));
548/// // Finish the builder to get the metadata and value
549/// let (metadata, value) = builder.finish();
550/// // use the Variant API to verify the result
551/// let variant = Variant::try_new(&metadata, &value).unwrap();
552/// assert_eq!(variant, Variant::Int8(42));
553/// ```
554///
555/// # Example: Create a [`Variant::Object`]
556///
557/// This example shows how to create an object with two fields:
558/// ```json
559/// {
560///  "first_name": "Jiaying",
561///  "last_name": "Li"
562/// }
563/// ```
564///
565/// ```
566/// # use parquet_variant::{Variant, VariantBuilder};
567/// let mut builder = VariantBuilder::new();
568/// // Create an object builder that will write fields to the object
569/// let mut object_builder = builder.new_object();
570/// object_builder.insert("first_name", "Jiaying");
571/// object_builder.insert("last_name", "Li");
572/// object_builder.finish(); // call finish to finalize the object
573/// // Finish the builder to get the metadata and value
574/// let (metadata, value) = builder.finish();
575/// // use the Variant API to verify the result
576/// let variant = Variant::try_new(&metadata, &value).unwrap();
577/// let variant_object = variant.as_object().unwrap();
578/// assert_eq!(
579///   variant_object.get("first_name"),
580///   Some(Variant::from("Jiaying"))
581/// );
582/// assert_eq!(
583///   variant_object.get("last_name"),
584///   Some(Variant::from("Li"))
585/// );
586/// ```
587///
588///
589/// You can also use the [`ObjectBuilder::with_field`] to add fields to the
590/// object
591/// ```
592/// # use parquet_variant::{Variant, VariantBuilder};
593/// // build the same object as above
594/// let mut builder = VariantBuilder::new();
595/// builder.new_object()
596///   .with_field("first_name", "Jiaying")
597///   .with_field("last_name", "Li")
598///   .finish();
599/// let (metadata, value) = builder.finish();
600/// let variant = Variant::try_new(&metadata, &value).unwrap();
601/// let variant_object = variant.as_object().unwrap();
602/// assert_eq!(
603///   variant_object.get("first_name"),
604///   Some(Variant::from("Jiaying"))
605/// );
606/// assert_eq!(
607///   variant_object.get("last_name"),
608///   Some(Variant::from("Li"))
609/// );
610/// ```
611/// # Example: Create a [`Variant::List`] (an Array)
612///
613/// This example shows how to create an array of integers: `[1, 2, 3]`.
614/// ```
615///  # use parquet_variant::{Variant, VariantBuilder};
616///  let mut builder = VariantBuilder::new();
617///  // Create a builder that will write elements to the list
618///  let mut list_builder = builder.new_list();
619///  list_builder.append_value(1i8);
620///  list_builder.append_value(2i8);
621///  list_builder.append_value(3i8);
622/// // call finish to finalize the list
623///  list_builder.finish();
624/// // Finish the builder to get the metadata and value
625/// let (metadata, value) = builder.finish();
626/// // use the Variant API to verify the result
627/// let variant = Variant::try_new(&metadata, &value).unwrap();
628/// let variant_list = variant.as_list().unwrap();
629/// // Verify the list contents
630/// assert_eq!(variant_list.get(0).unwrap(), Variant::Int8(1));
631/// assert_eq!(variant_list.get(1).unwrap(), Variant::Int8(2));
632/// assert_eq!(variant_list.get(2).unwrap(), Variant::Int8(3));
633/// ```
634///
635/// You can also use the [`ListBuilder::with_value`] to append values to the
636/// list.
637/// ```
638///  # use parquet_variant::{Variant, VariantBuilder};
639///  let mut builder = VariantBuilder::new();
640///  builder.new_list()
641///      .with_value(1i8)
642///      .with_value(2i8)
643///      .with_value(3i8)
644///      .finish();
645/// let (metadata, value) = builder.finish();
646/// let variant = Variant::try_new(&metadata, &value).unwrap();
647/// let variant_list = variant.as_list().unwrap();
648/// assert_eq!(variant_list.get(0).unwrap(), Variant::Int8(1));
649/// assert_eq!(variant_list.get(1).unwrap(), Variant::Int8(2));
650/// assert_eq!(variant_list.get(2).unwrap(), Variant::Int8(3));
651/// ```
652///
653/// # Example: [`Variant::List`] of  [`Variant::Object`]s
654///
655/// This example shows how to create an list of objects:
656/// ```json
657/// [
658///   {
659///      "id": 1,
660///      "type": "Cauliflower"
661///   },
662///   {
663///      "id": 2,
664///      "type": "Beets"
665///   }
666/// ]
667/// ```
668/// ```
669/// use parquet_variant::{Variant, VariantBuilder};
670/// let mut builder = VariantBuilder::new();
671///
672/// // Create a builder that will write elements to the list
673/// let mut list_builder = builder.new_list();
674///
675/// {
676///     let mut object_builder = list_builder.new_object();
677///     object_builder.insert("id", 1);
678///     object_builder.insert("type", "Cauliflower");
679///     object_builder.finish();
680/// }
681///
682/// {
683///     let mut object_builder = list_builder.new_object();
684///     object_builder.insert("id", 2);
685///     object_builder.insert("type", "Beets");
686///     object_builder.finish();
687/// }
688///
689/// list_builder.finish();
690/// // Finish the builder to get the metadata and value
691/// let (metadata, value) = builder.finish();
692/// // use the Variant API to verify the result
693/// let variant = Variant::try_new(&metadata, &value).unwrap();
694/// let variant_list = variant.as_list().unwrap();
695///
696///
697/// let obj1_variant = variant_list.get(0).unwrap();
698/// let obj1 = obj1_variant.as_object().unwrap();
699/// assert_eq!(
700///     obj1.get("id"),
701///     Some(Variant::from(1))
702/// );
703/// assert_eq!(
704///     obj1.get("type"),
705///     Some(Variant::from("Cauliflower"))
706/// );
707///
708/// let obj2_variant = variant_list.get(1).unwrap();
709/// let obj2 = obj2_variant.as_object().unwrap();
710///
711/// assert_eq!(
712///     obj2.get("id"),
713///     Some(Variant::from(2))
714/// );
715/// assert_eq!(
716///     obj2.get("type"),
717///     Some(Variant::from("Beets"))
718/// );
719///
720/// ```
721/// # Example: Unique Field Validation
722///
723/// This example shows how enabling unique field validation will cause an error
724/// if the same field is inserted more than once.
725/// ```
726/// # use parquet_variant::VariantBuilder;
727/// #
728/// let mut builder = VariantBuilder::new().with_validate_unique_fields(true);
729///
730/// // When validation is enabled, try_with_field will return an error
731/// let result = builder
732///     .new_object()
733///     .with_field("a", 1)
734///     .try_with_field("a", 2);
735/// assert!(result.is_err());
736/// ```
737///
738/// # Example: Sorted dictionaries
739///
740/// This example shows how to create a [`VariantBuilder`] with a pre-sorted field dictionary
741/// to improve field access performance when reading [`Variant`] objects.
742///
743/// You can use [`VariantBuilder::with_field_names`] to add multiple field names at once:
744/// ```
745/// use parquet_variant::{Variant, VariantBuilder};
746/// let mut builder = VariantBuilder::new()
747///     .with_field_names(["age", "name", "score"].into_iter());
748///
749/// let mut obj = builder.new_object();
750/// obj.insert("name", "Alice");
751/// obj.insert("age", 30);
752/// obj.insert("score", 95.5);
753/// obj.finish();
754///
755/// let (metadata, value) = builder.finish();
756/// let variant = Variant::try_new(&metadata, &value).unwrap();
757/// ```
758///
759/// Alternatively, you can use [`VariantBuilder::add_field_name`] to add field names one by one:
760/// ```
761/// use parquet_variant::{Variant, VariantBuilder};
762/// let mut builder = VariantBuilder::new();
763/// builder.add_field_name("age"); // field id = 0
764/// builder.add_field_name("name"); // field id = 1
765/// builder.add_field_name("score"); // field id = 2
766///
767/// let mut obj = builder.new_object();
768/// obj.insert("name", "Bob"); // field id = 3
769/// obj.insert("age", 25);
770/// obj.insert("score", 88.0);
771/// obj.finish();
772///
773/// let (metadata, value) = builder.finish();
774/// let variant = Variant::try_new(&metadata, &value).unwrap();
775/// ```
776#[derive(Default, Debug)]
777pub struct VariantBuilder {
778    value_builder: ValueBuilder,
779    metadata_builder: WritableMetadataBuilder,
780    validate_unique_fields: bool,
781}
782
783impl VariantBuilder {
784    /// Create a new VariantBuilder with new underlying buffers
785    pub fn new() -> Self {
786        Self {
787            value_builder: ValueBuilder::new(),
788            metadata_builder: WritableMetadataBuilder::default(),
789            validate_unique_fields: false,
790        }
791    }
792
793    /// Create a new VariantBuilder with pre-existing [`VariantMetadata`].
794    pub fn with_metadata(mut self, metadata: VariantMetadata) -> Self {
795        self.metadata_builder.extend(metadata.iter());
796
797        self
798    }
799
800    /// Enables validation of unique field keys in nested objects.
801    ///
802    /// This setting is propagated to all [`ObjectBuilder`]s created through this [`VariantBuilder`]
803    /// (including via any [`ListBuilder`]), and causes [`ObjectBuilder::finish()`] to return
804    /// an error if duplicate keys were inserted.
805    pub fn with_validate_unique_fields(mut self, validate_unique_fields: bool) -> Self {
806        self.validate_unique_fields = validate_unique_fields;
807        self
808    }
809
810    /// This method pre-populates the field name directory in the Variant metadata with
811    /// the specific field names, in order.
812    ///
813    /// You can use this to pre-populate a [`VariantBuilder`] with a sorted dictionary if you
814    /// know the field names beforehand. Sorted dictionaries can accelerate field access when
815    /// reading [`Variant`]s.
816    pub fn with_field_names<'a>(mut self, field_names: impl IntoIterator<Item = &'a str>) -> Self {
817        self.metadata_builder.extend(field_names);
818
819        self
820    }
821
822    /// Builder-style API for appending a value to the list and returning self to enable method chaining.
823    ///
824    /// # Panics
825    ///
826    /// This method will panic if the variant contains duplicate field names in objects
827    /// when validation is enabled. For a fallible version, use [`ListBuilder::try_with_value`].
828    pub fn with_value<'m, 'd, T: Into<Variant<'m, 'd>>>(mut self, value: T) -> Self {
829        self.append_value(value);
830        self
831    }
832
833    /// Builder-style API for appending a value to the list and returns self for method chaining.
834    ///
835    /// This is the fallible version of [`ListBuilder::with_value`].
836    pub fn try_with_value<'m, 'd, T: Into<Variant<'m, 'd>>>(
837        mut self,
838        value: T,
839    ) -> Result<Self, ArrowError> {
840        self.try_append_value(value)?;
841        Ok(self)
842    }
843
844    /// This method reserves capacity for field names in the Variant metadata,
845    /// which can improve performance when you know the approximate number of unique field
846    /// names that will be used across all objects in the [`Variant`].
847    pub fn reserve(&mut self, capacity: usize) {
848        self.metadata_builder.field_names.reserve(capacity);
849    }
850
851    /// Adds a single field name to the field name directory in the Variant metadata.
852    ///
853    /// This method does the same thing as [`VariantBuilder::with_field_names`] but adds one field name at a time.
854    pub fn add_field_name(&mut self, field_name: &str) {
855        self.metadata_builder.upsert_field_name(field_name);
856    }
857
858    /// Create an [`ListBuilder`] for creating [`Variant::List`] values.
859    ///
860    /// See the examples on [`VariantBuilder`] for usage.
861    pub fn new_list(&mut self) -> ListBuilder<'_, ()> {
862        let parent_state =
863            ParentState::variant(&mut self.value_builder, &mut self.metadata_builder);
864        ListBuilder::new(parent_state, self.validate_unique_fields)
865    }
866
867    /// Create an [`ObjectBuilder`] for creating [`Variant::Object`] values.
868    ///
869    /// See the examples on [`VariantBuilder`] for usage.
870    pub fn new_object(&mut self) -> ObjectBuilder<'_, ()> {
871        let parent_state =
872            ParentState::variant(&mut self.value_builder, &mut self.metadata_builder);
873        ObjectBuilder::new(parent_state, self.validate_unique_fields)
874    }
875
876    /// Append a value to the builder.
877    ///
878    /// # Panics
879    ///
880    /// This method will panic if the variant contains duplicate field names in objects
881    /// when validation is enabled. For a fallible version, use [`VariantBuilder::try_append_value`]
882    ///
883    /// # Example
884    /// ```
885    /// # use parquet_variant::{Variant, VariantBuilder};
886    /// let mut builder = VariantBuilder::new();
887    /// // most primitive types can be appended directly as they implement `Into<Variant>`
888    /// builder.append_value(42i8);
889    /// ```
890    pub fn append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) {
891        let state = ParentState::variant(&mut self.value_builder, &mut self.metadata_builder);
892        ValueBuilder::append_variant(state, value.into())
893    }
894
895    /// Append a value to the builder.
896    pub fn try_append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(
897        &mut self,
898        value: T,
899    ) -> Result<(), ArrowError> {
900        let state = ParentState::variant(&mut self.value_builder, &mut self.metadata_builder);
901        ValueBuilder::try_append_variant(state, value.into())
902    }
903
904    /// Appends a variant value to the builder by copying raw bytes when possible.
905    ///
906    /// For objects and lists, this directly copies their underlying byte representation instead of
907    /// performing a logical copy and without touching the metadata builder. For other variant
908    /// types, this falls back to the standard append behavior.
909    ///
910    /// The caller must ensure that the metadata dictionary entries are already built and correct for
911    /// any objects or lists being appended.
912    pub fn append_value_bytes<'m, 'd>(&mut self, value: impl Into<Variant<'m, 'd>>) {
913        let state = ParentState::variant(&mut self.value_builder, &mut self.metadata_builder);
914        ValueBuilder::append_variant_bytes(state, value.into());
915    }
916
917    /// Finish the builder and return the metadata and value buffers.
918    pub fn finish(mut self) -> (Vec<u8>, Vec<u8>) {
919        self.metadata_builder.finish();
920        (
921            self.metadata_builder.into_inner(),
922            self.value_builder.into_inner(),
923        )
924    }
925}
926
927/// Extends [`VariantBuilder`] to help building nested [`Variant`]s
928///
929/// Allows users to append values to a [`VariantBuilder`], [`ListBuilder`] or
930/// [`ObjectBuilder`]. using the same interface.
931pub trait VariantBuilderExt {
932    /// The builder specific state used by nested builders
933    type State<'a>: BuilderSpecificState + 'a
934    where
935        Self: 'a;
936
937    /// Appends a NULL value to this builder. The semantics depend on the implementation, but will
938    /// often translate to appending a [`Variant::Null`] value.
939    fn append_null(&mut self);
940
941    /// Appends a new variant value to this builder. See e.g. [`VariantBuilder::append_value`].
942    fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>);
943
944    /// Creates a nested list builder. See e.g. [`VariantBuilder::new_list`]. Panics if the nested
945    /// builder cannot be created, see e.g. [`ObjectBuilder::new_list`].
946    fn new_list(&mut self) -> ListBuilder<'_, Self::State<'_>> {
947        self.try_new_list().unwrap()
948    }
949
950    /// Creates a nested object builder. See e.g. [`VariantBuilder::new_object`]. Panics if the
951    /// nested builder cannot be created, see e.g. [`ObjectBuilder::new_object`].
952    fn new_object(&mut self) -> ObjectBuilder<'_, Self::State<'_>> {
953        self.try_new_object().unwrap()
954    }
955
956    /// Creates a nested list builder. See e.g. [`VariantBuilder::new_list`]. Returns an error if
957    /// the nested builder cannot be created, see e.g. [`ObjectBuilder::try_new_list`].
958    fn try_new_list(&mut self) -> Result<ListBuilder<'_, Self::State<'_>>, ArrowError>;
959
960    /// Creates a nested object builder. See e.g. [`VariantBuilder::new_object`]. Returns an error
961    /// if the nested builder cannot be created, see e.g. [`ObjectBuilder::try_new_object`].
962    fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, Self::State<'_>>, ArrowError>;
963}
964
965impl VariantBuilderExt for VariantBuilder {
966    type State<'a>
967        = ()
968    where
969        Self: 'a;
970
971    /// Variant values cannot encode NULL, only [`Variant::Null`]. This is different from the column
972    /// that holds variant values being NULL at some positions.
973    fn append_null(&mut self) {
974        self.append_value(Variant::Null);
975    }
976    fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
977        self.append_value(value);
978    }
979
980    fn try_new_list(&mut self) -> Result<ListBuilder<'_, Self::State<'_>>, ArrowError> {
981        Ok(self.new_list())
982    }
983
984    fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, Self::State<'_>>, ArrowError> {
985        Ok(self.new_object())
986    }
987}
988
989#[cfg(test)]
990mod tests {
991    use crate::{VariantMetadata, builder::metadata::ReadOnlyMetadataBuilder};
992
993    use super::*;
994    #[test]
995    fn test_simple_usage() {
996        test_variant_roundtrip((), Variant::Null);
997        test_variant_roundtrip(true, Variant::BooleanTrue);
998        test_variant_roundtrip(false, Variant::BooleanFalse);
999        test_variant_roundtrip(42i8, Variant::Int8(42));
1000        test_variant_roundtrip(1234i16, Variant::Int16(1234));
1001        test_variant_roundtrip(123456i32, Variant::Int32(123456));
1002        test_variant_roundtrip(123456789i64, Variant::Int64(123456789));
1003        test_variant_roundtrip(1.5f32, Variant::Float(1.5));
1004        test_variant_roundtrip(2.5f64, Variant::Double(2.5));
1005        test_variant_roundtrip("hello", Variant::ShortString(ShortString("hello")));
1006
1007        // Test long string (> 63 bytes)
1008        let long_string = "This is a very long string that exceeds the short string limit of 63 bytes and should be encoded as a regular string type instead of a short string";
1009        test_variant_roundtrip(long_string, Variant::String(long_string));
1010
1011        // Test binary data
1012        let binary_data = b"binary data";
1013        test_variant_roundtrip(
1014            binary_data.as_slice(),
1015            Variant::Binary(binary_data.as_slice()),
1016        );
1017    }
1018
1019    /// Helper function to test that a value can be built and reconstructed correctly
1020    fn test_variant_roundtrip<'m, 'd, T: Into<Variant<'m, 'd>>>(input: T, expected: Variant) {
1021        let mut builder = VariantBuilder::new();
1022        builder.append_value(input);
1023        let (metadata, value) = builder.finish();
1024        let variant = Variant::try_new(&metadata, &value).unwrap_or_else(|_| {
1025            panic!("Failed to create variant from metadata and value: {metadata:?}, {value:?}")
1026        });
1027        assert_eq!(variant, expected);
1028    }
1029
1030    #[test]
1031    fn test_nested_object_with_lists() {
1032        /*
1033        {
1034            "door 1": {
1035                "items": ["apple", false ]
1036            }
1037        }
1038
1039        */
1040
1041        let mut builder = VariantBuilder::new();
1042        {
1043            let mut outer_object_builder = builder.new_object();
1044            {
1045                let mut inner_object_builder = outer_object_builder.new_object("door 1");
1046
1047                // create inner_object_list
1048                inner_object_builder
1049                    .new_list("items")
1050                    .with_value("apple")
1051                    .with_value(false)
1052                    .finish();
1053
1054                inner_object_builder.finish();
1055            }
1056
1057            outer_object_builder.finish();
1058        }
1059
1060        let (metadata, value) = builder.finish();
1061        let variant = Variant::try_new(&metadata, &value).unwrap();
1062        let outer_object = variant.as_object().unwrap();
1063
1064        assert_eq!(outer_object.len(), 1);
1065        assert_eq!(outer_object.field_name(0).unwrap(), "door 1");
1066
1067        let inner_object_variant = outer_object.field(0).unwrap();
1068        let inner_object = inner_object_variant.as_object().unwrap();
1069
1070        assert_eq!(inner_object.len(), 1);
1071        assert_eq!(inner_object.field_name(0).unwrap(), "items");
1072
1073        let items_variant = inner_object.field(0).unwrap();
1074        let items_list = items_variant.as_list().unwrap();
1075
1076        assert_eq!(items_list.len(), 2);
1077        assert_eq!(items_list.get(0).unwrap(), Variant::from("apple"));
1078        assert_eq!(items_list.get(1).unwrap(), Variant::from(false));
1079    }
1080
1081    #[test]
1082    fn test_sorted_dictionary() {
1083        // check if variant metadatabuilders are equivalent from different ways of constructing them
1084        let mut variant1 = VariantBuilder::new().with_field_names(["b", "c", "d"]);
1085
1086        let mut variant2 = {
1087            let mut builder = VariantBuilder::new();
1088
1089            builder.add_field_name("b");
1090            builder.add_field_name("c");
1091            builder.add_field_name("d");
1092
1093            builder
1094        };
1095
1096        assert_eq!(
1097            variant1.metadata_builder.field_names,
1098            variant2.metadata_builder.field_names
1099        );
1100
1101        // check metadata builders say it's sorted
1102        assert!(variant1.metadata_builder.is_sorted);
1103        assert!(variant2.metadata_builder.is_sorted);
1104
1105        {
1106            // test the bad case and break the sort order
1107            variant2.add_field_name("a");
1108            assert!(!variant2.metadata_builder.is_sorted);
1109
1110            // per the spec, make sure the variant will fail to build if only metadata is provided
1111            let (m, v) = variant2.finish();
1112            let res = Variant::try_new(&m, &v);
1113            assert!(res.is_err());
1114
1115            // since it is not sorted, make sure the metadata says so
1116            let header = VariantMetadata::try_new(&m).unwrap();
1117            assert!(!header.is_sorted());
1118        }
1119
1120        // write out variant1 and make sure the sorted flag is properly encoded
1121        variant1.append_value(false);
1122
1123        let (m, v) = variant1.finish();
1124        let res = Variant::try_new(&m, &v);
1125        assert!(res.is_ok());
1126
1127        let header = VariantMetadata::try_new(&m).unwrap();
1128        assert!(header.is_sorted());
1129    }
1130
1131    #[test]
1132    fn test_object_sorted_dictionary() {
1133        // predefine the list of field names
1134        let mut variant1 = VariantBuilder::new().with_field_names(["a", "b", "c"]);
1135        let mut obj = variant1.new_object();
1136
1137        obj.insert("c", true);
1138        obj.insert("a", false);
1139        obj.insert("b", ());
1140
1141        // verify the field ids are correctly
1142        let field_ids_by_insert_order = obj.fields.iter().map(|(&id, _)| id).collect::<Vec<_>>();
1143        assert_eq!(field_ids_by_insert_order, vec![2, 0, 1]);
1144
1145        // add a field name that wasn't pre-defined but doesn't break the sort order
1146        obj.insert("d", 2);
1147        obj.finish();
1148
1149        let (metadata, value) = variant1.finish();
1150        let variant = Variant::try_new(&metadata, &value).unwrap();
1151
1152        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1153        assert!(metadata.is_sorted());
1154
1155        // verify object is sorted by field name order
1156        let object = variant.as_object().unwrap();
1157        let field_names = object
1158            .iter()
1159            .map(|(field_name, _)| field_name)
1160            .collect::<Vec<_>>();
1161
1162        assert_eq!(field_names, vec!["a", "b", "c", "d"]);
1163    }
1164
1165    #[test]
1166    fn test_object_not_sorted_dictionary() {
1167        // predefine the list of field names
1168        let mut variant1 = VariantBuilder::new().with_field_names(["b", "c", "d"]);
1169        let mut obj = variant1.new_object();
1170
1171        obj.insert("c", true);
1172        obj.insert("d", false);
1173        obj.insert("b", ());
1174
1175        // verify the field ids are correctly
1176        let field_ids_by_insert_order = obj.fields.iter().map(|(&id, _)| id).collect::<Vec<_>>();
1177        assert_eq!(field_ids_by_insert_order, vec![1, 2, 0]);
1178
1179        // add a field name that wasn't pre-defined but breaks the sort order
1180        obj.insert("a", 2);
1181        obj.finish();
1182
1183        let (metadata, value) = variant1.finish();
1184        let variant = Variant::try_new(&metadata, &value).unwrap();
1185
1186        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1187        assert!(!metadata.is_sorted());
1188
1189        // verify object field names are sorted by field name order
1190        let object = variant.as_object().unwrap();
1191        let field_names = object
1192            .iter()
1193            .map(|(field_name, _)| field_name)
1194            .collect::<Vec<_>>();
1195
1196        assert_eq!(field_names, vec!["a", "b", "c", "d"]);
1197    }
1198
1199    #[test]
1200    fn test_building_sorted_dictionary() {
1201        let mut builder = VariantBuilder::new();
1202        assert!(!builder.metadata_builder.is_sorted);
1203        assert_eq!(builder.metadata_builder.num_field_names(), 0);
1204
1205        builder.add_field_name("a");
1206
1207        assert!(builder.metadata_builder.is_sorted);
1208        assert_eq!(builder.metadata_builder.num_field_names(), 1);
1209
1210        let builder = builder.with_field_names(["b", "c", "d"]);
1211
1212        assert!(builder.metadata_builder.is_sorted);
1213        assert_eq!(builder.metadata_builder.num_field_names(), 4);
1214
1215        let builder = builder.with_field_names(["z", "y"]);
1216        assert!(!builder.metadata_builder.is_sorted);
1217        assert_eq!(builder.metadata_builder.num_field_names(), 6);
1218    }
1219
1220    #[test]
1221    fn test_variant_builder_to_list_builder_no_finish() {
1222        // Create a list builder but never finish it
1223        let mut builder = VariantBuilder::new();
1224        let mut list_builder = builder.new_list();
1225        list_builder.append_value("hi");
1226        drop(list_builder);
1227
1228        builder.append_value(42i8);
1229
1230        // The original builder should be unchanged
1231        let (metadata, value) = builder.finish();
1232        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1233        assert!(metadata.is_empty());
1234
1235        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
1236        assert_eq!(variant, Variant::Int8(42));
1237    }
1238
1239    #[test]
1240    fn test_variant_builder_to_object_builder_no_finish() {
1241        // Create an object builder but never finish it
1242        let mut builder = VariantBuilder::new();
1243        let mut object_builder = builder.new_object();
1244        object_builder.insert("name", "unknown");
1245        drop(object_builder);
1246
1247        builder.append_value(42i8);
1248
1249        // The original builder should be unchanged
1250        let (metadata, value) = builder.finish();
1251        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1252        assert!(metadata.is_empty()); // rolled back
1253
1254        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
1255        assert_eq!(variant, Variant::Int8(42));
1256    }
1257
1258    #[test]
1259    fn test_list_builder_to_list_builder_inner_no_finish() {
1260        let mut builder = VariantBuilder::new();
1261        let mut list_builder = builder.new_list();
1262        list_builder.append_value(1i8);
1263
1264        // Create a nested list builder but never finish it
1265        let mut nested_list_builder = list_builder.new_list();
1266        nested_list_builder.append_value("hi");
1267        drop(nested_list_builder);
1268
1269        list_builder.append_value(2i8);
1270
1271        // The parent list should only contain the original values
1272        list_builder.finish();
1273        let (metadata, value) = builder.finish();
1274        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1275        assert!(metadata.is_empty());
1276
1277        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
1278        let list = variant.as_list().unwrap();
1279        assert_eq!(list.len(), 2);
1280        assert_eq!(list.get(0).unwrap(), Variant::Int8(1));
1281        assert_eq!(list.get(1).unwrap(), Variant::Int8(2));
1282    }
1283
1284    #[test]
1285    fn test_list_builder_to_list_builder_outer_no_finish() {
1286        let mut builder = VariantBuilder::new();
1287        let mut list_builder = builder.new_list();
1288        list_builder.append_value(1i8);
1289
1290        // Create a nested list builder and finish it
1291        let mut nested_list_builder = list_builder.new_list();
1292        nested_list_builder.append_value("hi");
1293        nested_list_builder.finish();
1294
1295        // Drop the outer list builder without finishing it
1296        drop(list_builder);
1297
1298        builder.append_value(2i8);
1299
1300        // Only the second attempt should appear in the final variant
1301        let (metadata, value) = builder.finish();
1302        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1303        assert!(metadata.is_empty());
1304
1305        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
1306        assert_eq!(variant, Variant::Int8(2));
1307    }
1308
1309    #[test]
1310    fn test_list_builder_to_object_builder_inner_no_finish() {
1311        let mut builder = VariantBuilder::new();
1312        let mut list_builder = builder.new_list();
1313        list_builder.append_value(1i8);
1314
1315        // Create a nested object builder but never finish it
1316        let mut nested_object_builder = list_builder.new_object();
1317        nested_object_builder.insert("name", "unknown");
1318        drop(nested_object_builder);
1319
1320        list_builder.append_value(2i8);
1321
1322        // The parent list should only contain the original values
1323        list_builder.finish();
1324        let (metadata, value) = builder.finish();
1325        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1326        assert!(metadata.is_empty());
1327
1328        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
1329        let list = variant.as_list().unwrap();
1330        assert_eq!(list.len(), 2);
1331        assert_eq!(list.get(0).unwrap(), Variant::Int8(1));
1332        assert_eq!(list.get(1).unwrap(), Variant::Int8(2));
1333    }
1334
1335    #[test]
1336    fn test_list_builder_to_object_builder_outer_no_finish() {
1337        let mut builder = VariantBuilder::new();
1338        let mut list_builder = builder.new_list();
1339        list_builder.append_value(1i8);
1340
1341        // Create a nested object builder and finish it
1342        let mut nested_object_builder = list_builder.new_object();
1343        nested_object_builder.insert("name", "unknown");
1344        nested_object_builder.finish();
1345
1346        // Drop the outer list builder without finishing it
1347        drop(list_builder);
1348
1349        builder.append_value(2i8);
1350
1351        // Only the second attempt should appear in the final variant
1352        let (metadata, value) = builder.finish();
1353        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1354        assert!(metadata.is_empty()); // rolled back
1355
1356        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
1357        assert_eq!(variant, Variant::Int8(2));
1358    }
1359
1360    #[test]
1361    fn test_object_builder_to_list_builder_inner_no_finish() {
1362        let mut builder = VariantBuilder::new();
1363        let mut object_builder = builder.new_object();
1364        object_builder.insert("first", 1i8);
1365
1366        // Create a nested list builder but never finish it
1367        let mut nested_list_builder = object_builder.new_list("nested");
1368        nested_list_builder.append_value("hi");
1369        drop(nested_list_builder);
1370
1371        object_builder.insert("second", 2i8);
1372
1373        // The parent object should only contain the original fields
1374        object_builder.finish();
1375        let (metadata, value) = builder.finish();
1376
1377        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1378        assert_eq!(metadata.len(), 2);
1379        assert_eq!(&metadata[0], "first");
1380        assert_eq!(&metadata[1], "second");
1381
1382        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
1383        let obj = variant.as_object().unwrap();
1384        assert_eq!(obj.len(), 2);
1385        assert_eq!(obj.get("first"), Some(Variant::Int8(1)));
1386        assert_eq!(obj.get("second"), Some(Variant::Int8(2)));
1387    }
1388
1389    #[test]
1390    fn test_object_builder_to_list_builder_outer_no_finish() {
1391        let mut builder = VariantBuilder::new();
1392        let mut object_builder = builder.new_object();
1393        object_builder.insert("first", 1i8);
1394
1395        // Create a nested list builder and finish it
1396        let mut nested_list_builder = object_builder.new_list("nested");
1397        nested_list_builder.append_value("hi");
1398        nested_list_builder.finish();
1399
1400        // Drop the outer object builder without finishing it
1401        drop(object_builder);
1402
1403        builder.append_value(2i8);
1404
1405        // Only the second attempt should appear in the final variant
1406        let (metadata, value) = builder.finish();
1407        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1408        assert!(metadata.is_empty()); // rolled back
1409
1410        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
1411        assert_eq!(variant, Variant::Int8(2));
1412    }
1413
1414    #[test]
1415    fn test_object_builder_to_object_builder_inner_no_finish() {
1416        let mut builder = VariantBuilder::new();
1417        let mut object_builder = builder.new_object();
1418        object_builder.insert("first", 1i8);
1419
1420        // Create a nested object builder but never finish it
1421        let mut nested_object_builder = object_builder.new_object("nested");
1422        nested_object_builder.insert("name", "unknown");
1423        drop(nested_object_builder);
1424
1425        object_builder.insert("second", 2i8);
1426
1427        // The parent object should only contain the original fields
1428        object_builder.finish();
1429        let (metadata, value) = builder.finish();
1430
1431        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1432        assert_eq!(metadata.len(), 2); // the fields of nested_object_builder has been rolled back
1433        assert_eq!(&metadata[0], "first");
1434        assert_eq!(&metadata[1], "second");
1435
1436        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
1437        let obj = variant.as_object().unwrap();
1438        assert_eq!(obj.len(), 2);
1439        assert_eq!(obj.get("first"), Some(Variant::Int8(1)));
1440        assert_eq!(obj.get("second"), Some(Variant::Int8(2)));
1441    }
1442
1443    #[test]
1444    fn test_object_builder_to_object_builder_outer_no_finish() {
1445        let mut builder = VariantBuilder::new();
1446        let mut object_builder = builder.new_object();
1447        object_builder.insert("first", 1i8);
1448
1449        // Create a nested object builder and finish it
1450        let mut nested_object_builder = object_builder.new_object("nested");
1451        nested_object_builder.insert("name", "unknown");
1452        nested_object_builder.finish();
1453
1454        // Drop the outer object builder without finishing it
1455        drop(object_builder);
1456
1457        builder.append_value(2i8);
1458
1459        // Only the second attempt should appear in the final variant
1460        let (metadata, value) = builder.finish();
1461        let metadata = VariantMetadata::try_new(&metadata).unwrap();
1462        assert_eq!(metadata.len(), 0); // rolled back
1463
1464        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
1465        assert_eq!(variant, Variant::Int8(2));
1466    }
1467
1468    // Make sure that we can correctly build deeply nested objects even when some of the nested
1469    // builders don't finish.
1470    #[test]
1471    fn test_append_list_object_list_object() {
1472        // An infinite counter
1473        let mut counter = 0..;
1474        let mut take = move |i| (&mut counter).take(i).collect::<Vec<_>>();
1475        let mut builder = VariantBuilder::new();
1476        let skip = 5;
1477        {
1478            let mut list = builder.new_list();
1479            for i in take(4) {
1480                let mut object = list.new_object();
1481                for i in take(4) {
1482                    let field_name = format!("field{i}");
1483                    let mut list = object.new_list(&field_name);
1484                    for i in take(3) {
1485                        let mut object = list.new_object();
1486                        for i in take(3) {
1487                            if i % skip != 0 {
1488                                object.insert(&format!("field{i}"), i);
1489                            }
1490                        }
1491                        if i % skip != 0 {
1492                            object.finish();
1493                        }
1494                    }
1495                    if i % skip != 0 {
1496                        list.finish();
1497                    }
1498                }
1499                if i % skip != 0 {
1500                    object.finish();
1501                }
1502            }
1503            list.finish();
1504        }
1505        let (metadata, value) = builder.finish();
1506        let v1 = Variant::try_new(&metadata, &value).unwrap();
1507
1508        let (metadata, value) = VariantBuilder::new().with_value(v1.clone()).finish();
1509        let v2 = Variant::try_new(&metadata, &value).unwrap();
1510
1511        assert_eq!(format!("{v1:?}"), format!("{v2:?}"));
1512    }
1513
1514    #[test]
1515    fn test_append_variant_bytes_round_trip() {
1516        // Create a complex variant with the normal builder
1517        let mut builder = VariantBuilder::new();
1518        {
1519            let mut obj = builder.new_object();
1520            obj.insert("name", "Alice");
1521            obj.insert("age", 30i32);
1522            {
1523                let mut scores_list = obj.new_list("scores");
1524                scores_list.append_value(95i32);
1525                scores_list.append_value(87i32);
1526                scores_list.append_value(92i32);
1527                scores_list.finish();
1528            }
1529            {
1530                let mut address = obj.new_object("address");
1531                address.insert("street", "123 Main St");
1532                address.insert("city", "Anytown");
1533                address.finish();
1534            }
1535            obj.finish();
1536        }
1537        let (metadata, value1) = builder.finish();
1538        let variant1 = Variant::try_new(&metadata, &value1).unwrap();
1539
1540        // Copy using the new bytes API
1541        let metadata = VariantMetadata::new(&metadata);
1542        let mut metadata = ReadOnlyMetadataBuilder::new(&metadata);
1543        let mut builder2 = ValueBuilder::new();
1544        let state = ParentState::variant(&mut builder2, &mut metadata);
1545        ValueBuilder::append_variant_bytes(state, variant1);
1546        let value2 = builder2.into_inner();
1547
1548        // The bytes should be identical, we merely copied them across.
1549        assert_eq!(value1, value2);
1550    }
1551
1552    #[test]
1553    fn test_object_insert_bytes_subset() {
1554        // Create an original object, making sure to inject the field names we'll add later.
1555        let mut builder = VariantBuilder::new().with_field_names(["new_field", "another_field"]);
1556        {
1557            let mut obj = builder.new_object();
1558            obj.insert("field1", "value1");
1559            obj.insert("field2", 42i32);
1560            obj.insert("field3", true);
1561            obj.insert("field4", "value4");
1562            obj.finish();
1563        }
1564        let (metadata1, value1) = builder.finish();
1565        let original_variant = Variant::try_new(&metadata1, &value1).unwrap();
1566        let original_obj = original_variant.as_object().unwrap();
1567
1568        // Create a new object copying subset of fields interleaved with new ones
1569        let metadata2 = VariantMetadata::new(&metadata1);
1570        let mut metadata2 = ReadOnlyMetadataBuilder::new(&metadata2);
1571        let mut builder2 = ValueBuilder::new();
1572        let state = ParentState::variant(&mut builder2, &mut metadata2);
1573        {
1574            let mut obj = ObjectBuilder::new(state, true);
1575
1576            // Copy field1 using bytes API
1577            obj.insert_bytes("field1", original_obj.get("field1").unwrap());
1578
1579            // Add new field
1580            obj.insert("new_field", "new_value");
1581
1582            // Copy field3 using bytes API
1583            obj.insert_bytes("field3", original_obj.get("field3").unwrap());
1584
1585            // Add another new field
1586            obj.insert("another_field", 99i32);
1587
1588            // Copy field2 using bytes API
1589            obj.insert_bytes("field2", original_obj.get("field2").unwrap());
1590
1591            obj.finish();
1592        }
1593        let value2 = builder2.into_inner();
1594        let result_variant = Variant::try_new(&metadata1, &value2).unwrap();
1595        let result_obj = result_variant.as_object().unwrap();
1596
1597        // Verify the object contains expected fields
1598        assert_eq!(result_obj.len(), 5);
1599        assert_eq!(
1600            result_obj.get("field1").unwrap().as_string().unwrap(),
1601            "value1"
1602        );
1603        assert_eq!(result_obj.get("field2").unwrap().as_int32().unwrap(), 42);
1604        assert!(result_obj.get("field3").unwrap().as_boolean().unwrap());
1605        assert_eq!(
1606            result_obj.get("new_field").unwrap().as_string().unwrap(),
1607            "new_value"
1608        );
1609        assert_eq!(
1610            result_obj.get("another_field").unwrap().as_int32().unwrap(),
1611            99
1612        );
1613    }
1614
1615    #[test]
1616    fn test_complex_nested_filtering_injection() {
1617        // Create a complex nested structure: object -> list -> objects. Make sure to pre-register
1618        // the extra field names we'll need later while manipulating variant bytes.
1619        let mut builder = VariantBuilder::new().with_field_names([
1620            "active_count",
1621            "active_users",
1622            "computed_score",
1623            "processed_at",
1624            "status",
1625        ]);
1626
1627        {
1628            let mut root_obj = builder.new_object();
1629            root_obj.insert("metadata", "original");
1630
1631            {
1632                let mut users_list = root_obj.new_list("users");
1633
1634                // User 1
1635                {
1636                    let mut user1 = users_list.new_object();
1637                    user1.insert("id", 1i32);
1638                    user1.insert("name", "Alice");
1639                    user1.insert("active", true);
1640                    user1.finish();
1641                }
1642
1643                // User 2
1644                {
1645                    let mut user2 = users_list.new_object();
1646                    user2.insert("id", 2i32);
1647                    user2.insert("name", "Bob");
1648                    user2.insert("active", false);
1649                    user2.finish();
1650                }
1651
1652                // User 3
1653                {
1654                    let mut user3 = users_list.new_object();
1655                    user3.insert("id", 3i32);
1656                    user3.insert("name", "Charlie");
1657                    user3.insert("active", true);
1658                    user3.finish();
1659                }
1660
1661                users_list.finish();
1662            }
1663
1664            root_obj.insert("total_count", 3i32);
1665            root_obj.finish();
1666        }
1667        let (metadata1, value1) = builder.finish();
1668        let original_variant = Variant::try_new(&metadata1, &value1).unwrap();
1669        let original_obj = original_variant.as_object().unwrap();
1670        let original_users = original_obj.get("users").unwrap();
1671        let original_users = original_users.as_list().unwrap();
1672
1673        // Create filtered/modified version: only copy active users and inject new data
1674        let metadata2 = VariantMetadata::new(&metadata1);
1675        let mut metadata2 = ReadOnlyMetadataBuilder::new(&metadata2);
1676        let mut builder2 = ValueBuilder::new();
1677        let state = ParentState::variant(&mut builder2, &mut metadata2);
1678        {
1679            let mut root_obj = ObjectBuilder::new(state, true);
1680
1681            // Copy metadata using bytes API
1682            root_obj.insert_bytes("metadata", original_obj.get("metadata").unwrap());
1683
1684            // Add processing timestamp
1685            root_obj.insert("processed_at", "2024-01-01T00:00:00Z");
1686
1687            {
1688                let mut filtered_users = root_obj.new_list("active_users");
1689
1690                // Copy only active users and inject additional data
1691                for i in 0..original_users.len() {
1692                    let user = original_users.get(i).unwrap();
1693                    let user = user.as_object().unwrap();
1694                    if user.get("active").unwrap().as_boolean().unwrap() {
1695                        {
1696                            let mut new_user = filtered_users.new_object();
1697
1698                            // Copy existing fields using bytes API
1699                            new_user.insert_bytes("id", user.get("id").unwrap());
1700                            new_user.insert_bytes("name", user.get("name").unwrap());
1701
1702                            // Inject new computed field
1703                            let user_id = user.get("id").unwrap().as_int32().unwrap();
1704                            new_user.insert("computed_score", user_id * 10);
1705
1706                            // Add status transformation (don't copy the 'active' field)
1707                            new_user.insert("status", "verified");
1708
1709                            new_user.finish();
1710                        }
1711                    }
1712                }
1713
1714                // Inject a completely new user
1715                {
1716                    let mut new_user = filtered_users.new_object();
1717                    new_user.insert("id", 999i32);
1718                    new_user.insert("name", "System User");
1719                    new_user.insert("computed_score", 0i32);
1720                    new_user.insert("status", "system");
1721                    new_user.finish();
1722                }
1723
1724                filtered_users.finish();
1725            }
1726
1727            // Update count
1728            root_obj.insert("active_count", 3i32); // 2 active + 1 new
1729
1730            root_obj.finish();
1731        }
1732        let value2 = builder2.into_inner();
1733        let result_variant = Variant::try_new(&metadata1, &value2).unwrap();
1734        let result_obj = result_variant.as_object().unwrap();
1735
1736        // Verify the filtered/modified structure
1737        assert_eq!(
1738            result_obj.get("metadata").unwrap().as_string().unwrap(),
1739            "original"
1740        );
1741        assert_eq!(
1742            result_obj.get("processed_at").unwrap().as_string().unwrap(),
1743            "2024-01-01T00:00:00Z"
1744        );
1745        assert_eq!(
1746            result_obj.get("active_count").unwrap().as_int32().unwrap(),
1747            3
1748        );
1749
1750        let active_users = result_obj.get("active_users").unwrap();
1751        let active_users = active_users.as_list().unwrap();
1752        assert_eq!(active_users.len(), 3);
1753
1754        // Verify Alice (id=1, was active)
1755        let alice = active_users.get(0).unwrap();
1756        let alice = alice.as_object().unwrap();
1757        assert_eq!(alice.get("id").unwrap().as_int32().unwrap(), 1);
1758        assert_eq!(alice.get("name").unwrap().as_string().unwrap(), "Alice");
1759        assert_eq!(alice.get("computed_score").unwrap().as_int32().unwrap(), 10);
1760        assert_eq!(
1761            alice.get("status").unwrap().as_string().unwrap(),
1762            "verified"
1763        );
1764        assert!(alice.get("active").is_none()); // This field was not copied
1765
1766        // Verify Charlie (id=3, was active) - Bob (id=2) was not active so not included
1767        let charlie = active_users.get(1).unwrap();
1768        let charlie = charlie.as_object().unwrap();
1769        assert_eq!(charlie.get("id").unwrap().as_int32().unwrap(), 3);
1770        assert_eq!(charlie.get("name").unwrap().as_string().unwrap(), "Charlie");
1771        assert_eq!(
1772            charlie.get("computed_score").unwrap().as_int32().unwrap(),
1773            30
1774        );
1775        assert_eq!(
1776            charlie.get("status").unwrap().as_string().unwrap(),
1777            "verified"
1778        );
1779
1780        // Verify injected system user
1781        let system_user = active_users.get(2).unwrap();
1782        let system_user = system_user.as_object().unwrap();
1783        assert_eq!(system_user.get("id").unwrap().as_int32().unwrap(), 999);
1784        assert_eq!(
1785            system_user.get("name").unwrap().as_string().unwrap(),
1786            "System User"
1787        );
1788        assert_eq!(
1789            system_user
1790                .get("computed_score")
1791                .unwrap()
1792                .as_int32()
1793                .unwrap(),
1794            0
1795        );
1796        assert_eq!(
1797            system_user.get("status").unwrap().as_string().unwrap(),
1798            "system"
1799        );
1800    }
1801}