Skip to main content

arrow_json/writer/
encoder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17use std::io::Write;
18use std::sync::Arc;
19
20use crate::StructMode;
21use arrow_array::cast::AsArray;
22use arrow_array::types::*;
23use arrow_array::*;
24use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer, ScalarBuffer};
25use arrow_cast::display::{ArrayFormatter, FormatOptions};
26use arrow_schema::{ArrowError, DataType, FieldRef};
27use half::f16;
28use lexical_core::FormattedSize;
29use serde_core::Serializer;
30
31/// Configuration options for the JSON encoder.
32#[derive(Debug, Clone, Default)]
33pub struct EncoderOptions {
34    /// Whether to include nulls in the output or elide them.
35    explicit_nulls: bool,
36    /// Whether to encode structs as JSON objects or JSON arrays of their values.
37    struct_mode: StructMode,
38    /// An optional hook for customizing encoding behavior.
39    encoder_factory: Option<Arc<dyn EncoderFactory>>,
40    /// Optional date format for date arrays
41    date_format: Option<String>,
42    /// Optional datetime format for datetime arrays
43    datetime_format: Option<String>,
44    /// Optional timestamp format for timestamp arrays
45    timestamp_format: Option<String>,
46    /// Optional timestamp format for timestamp with timezone arrays
47    timestamp_tz_format: Option<String>,
48    /// Optional time format for time arrays
49    time_format: Option<String>,
50}
51
52impl EncoderOptions {
53    /// Set whether to include nulls in the output or elide them.
54    pub fn with_explicit_nulls(mut self, explicit_nulls: bool) -> Self {
55        self.explicit_nulls = explicit_nulls;
56        self
57    }
58
59    /// Set whether to encode structs as JSON objects or JSON arrays of their values.
60    pub fn with_struct_mode(mut self, struct_mode: StructMode) -> Self {
61        self.struct_mode = struct_mode;
62        self
63    }
64
65    /// Set an optional hook for customizing encoding behavior.
66    pub fn with_encoder_factory(mut self, encoder_factory: Arc<dyn EncoderFactory>) -> Self {
67        self.encoder_factory = Some(encoder_factory);
68        self
69    }
70
71    /// Get whether to include nulls in the output or elide them.
72    pub fn explicit_nulls(&self) -> bool {
73        self.explicit_nulls
74    }
75
76    /// Get whether to encode structs as JSON objects or JSON arrays of their values.
77    pub fn struct_mode(&self) -> StructMode {
78        self.struct_mode
79    }
80
81    /// Get the optional hook for customizing encoding behavior.
82    pub fn encoder_factory(&self) -> Option<&Arc<dyn EncoderFactory>> {
83        self.encoder_factory.as_ref()
84    }
85
86    /// Set the JSON file's date format
87    pub fn with_date_format(mut self, format: String) -> Self {
88        self.date_format = Some(format);
89        self
90    }
91
92    /// Get the JSON file's date format if set, defaults to RFC3339
93    pub fn date_format(&self) -> Option<&str> {
94        self.date_format.as_deref()
95    }
96
97    /// Set the JSON file's datetime format
98    pub fn with_datetime_format(mut self, format: String) -> Self {
99        self.datetime_format = Some(format);
100        self
101    }
102
103    /// Get the JSON file's datetime format if set, defaults to RFC3339
104    pub fn datetime_format(&self) -> Option<&str> {
105        self.datetime_format.as_deref()
106    }
107
108    /// Set the JSON file's time format
109    pub fn with_time_format(mut self, format: String) -> Self {
110        self.time_format = Some(format);
111        self
112    }
113
114    /// Get the JSON file's datetime time if set, defaults to RFC3339
115    pub fn time_format(&self) -> Option<&str> {
116        self.time_format.as_deref()
117    }
118
119    /// Set the JSON file's timestamp format
120    pub fn with_timestamp_format(mut self, format: String) -> Self {
121        self.timestamp_format = Some(format);
122        self
123    }
124
125    /// Get the JSON file's timestamp format if set, defaults to RFC3339
126    pub fn timestamp_format(&self) -> Option<&str> {
127        self.timestamp_format.as_deref()
128    }
129
130    /// Set the JSON file's timestamp tz format
131    pub fn with_timestamp_tz_format(mut self, tz_format: String) -> Self {
132        self.timestamp_tz_format = Some(tz_format);
133        self
134    }
135
136    /// Get the JSON file's timestamp tz format if set, defaults to RFC3339
137    pub fn timestamp_tz_format(&self) -> Option<&str> {
138        self.timestamp_tz_format.as_deref()
139    }
140}
141
142/// A trait to create custom encoders for specific data types.
143///
144/// This allows overriding the default encoders for specific data types,
145/// or adding new encoders for custom data types.
146///
147/// # Examples
148///
149/// ```
150/// use std::io::Write;
151/// use arrow_array::{ArrayAccessor, Array, BinaryArray, Float64Array, RecordBatch};
152/// use arrow_array::cast::AsArray;
153/// use arrow_schema::{DataType, Field, Schema, FieldRef};
154/// use arrow_json::{writer::{WriterBuilder, JsonArray, NullableEncoder}, StructMode};
155/// use arrow_json::{Encoder, EncoderFactory, EncoderOptions};
156/// use arrow_schema::ArrowError;
157/// use std::sync::Arc;
158/// use serde_json::json;
159/// use serde_json::Value;
160///
161/// struct IntArrayBinaryEncoder<B> {
162///     array: B,
163/// }
164///
165/// impl<'a, B> Encoder for IntArrayBinaryEncoder<B>
166/// where
167///     B: ArrayAccessor<Item = &'a [u8]>,
168/// {
169///     fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
170///         out.push(b'[');
171///         let child = self.array.value(idx);
172///         for (idx, byte) in child.iter().enumerate() {
173///             write!(out, "{byte}").unwrap();
174///             if idx < child.len() - 1 {
175///                 out.push(b',');
176///             }
177///         }
178///         out.push(b']');
179///     }
180/// }
181///
182/// #[derive(Debug)]
183/// struct IntArayBinaryEncoderFactory;
184///
185/// impl EncoderFactory for IntArayBinaryEncoderFactory {
186///     fn make_default_encoder<'a>(
187///         &self,
188///         _field: &'a FieldRef,
189///         array: &'a dyn Array,
190///         _options: &'a EncoderOptions,
191///     ) -> Result<Option<NullableEncoder<'a>>, ArrowError> {
192///         match array.data_type() {
193///             DataType::Binary => {
194///                 let array = array.as_binary::<i32>();
195///                 let encoder = IntArrayBinaryEncoder { array };
196///                 let array_encoder = Box::new(encoder) as Box<dyn Encoder + 'a>;
197///                 let nulls = array.nulls().cloned();
198///                 Ok(Some(NullableEncoder::new(array_encoder, nulls)))
199///             }
200///             _ => Ok(None),
201///         }
202///     }
203/// }
204///
205/// let binary_array = BinaryArray::from_iter([Some(b"a".as_slice()), None, Some(b"b".as_slice())]);
206/// let float_array = Float64Array::from(vec![Some(1.0), Some(2.3), None]);
207/// let fields = vec![
208///     Field::new("bytes", DataType::Binary, true),
209///     Field::new("float", DataType::Float64, true),
210/// ];
211/// let batch = RecordBatch::try_new(
212///     Arc::new(Schema::new(fields)),
213///     vec![
214///         Arc::new(binary_array) as Arc<dyn Array>,
215///         Arc::new(float_array) as Arc<dyn Array>,
216///     ],
217/// )
218/// .unwrap();
219///
220/// let json_value: Value = {
221///     let mut buf = Vec::new();
222///     let mut writer = WriterBuilder::new()
223///         .with_encoder_factory(Arc::new(IntArayBinaryEncoderFactory))
224///         .build::<_, JsonArray>(&mut buf);
225///     writer.write_batches(&[&batch]).unwrap();
226///     writer.finish().unwrap();
227///     serde_json::from_slice(&buf).unwrap()
228/// };
229///
230/// let expected = json!([
231///     {"bytes": [97], "float": 1.0},
232///     {"float": 2.3},
233///     {"bytes": [98]},
234/// ]);
235///
236/// assert_eq!(json_value, expected);
237/// ```
238pub trait EncoderFactory: std::fmt::Debug + Send + Sync {
239    /// Make an encoder that overrides the default encoder for a specific field and array or provides an encoder for a custom data type.
240    /// This can be used to override how e.g. binary data is encoded so that it is an encoded string or an array of integers.
241    ///
242    /// Note that the type of the field may not match the type of the array: for dictionary arrays unless the top-level dictionary is handled this
243    /// will be called again for the keys and values of the dictionary, at which point the field type will still be the outer dictionary type but the
244    /// array will have a different type.
245    /// For example, `field`` might have the type `Dictionary(i32, Utf8)` but `array` will be `Utf8`.
246    fn make_default_encoder<'a>(
247        &self,
248        _field: &'a FieldRef,
249        _array: &'a dyn Array,
250        _options: &'a EncoderOptions,
251    ) -> Result<Option<NullableEncoder<'a>>, ArrowError> {
252        Ok(None)
253    }
254}
255
256/// An encoder + a null buffer.
257/// This is packaged together into a wrapper struct to minimize dynamic dispatch for null checks.
258pub struct NullableEncoder<'a> {
259    encoder: Box<dyn Encoder + 'a>,
260    nulls: Option<NullBuffer>,
261}
262
263impl<'a> NullableEncoder<'a> {
264    /// Create a new encoder with a null buffer.
265    pub fn new(encoder: Box<dyn Encoder + 'a>, nulls: Option<NullBuffer>) -> Self {
266        Self { encoder, nulls }
267    }
268
269    /// Encode the value at index `idx` to `out`.
270    pub fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
271        self.encoder.encode(idx, out)
272    }
273
274    /// Returns whether the value at index `idx` is null.
275    pub fn is_null(&self, idx: usize) -> bool {
276        self.nulls.as_ref().is_some_and(|nulls| nulls.is_null(idx))
277    }
278
279    /// Returns whether the encoder has any nulls.
280    pub fn has_nulls(&self) -> bool {
281        match self.nulls {
282            Some(ref nulls) => nulls.null_count() > 0,
283            None => false,
284        }
285    }
286}
287
288impl Encoder for NullableEncoder<'_> {
289    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
290        self.encoder.encode(idx, out)
291    }
292}
293
294/// A trait to format array values as JSON values
295///
296/// Nullability is handled by the caller to allow encoding nulls implicitly, i.e. `{}` instead of `{"a": null}`
297pub trait Encoder {
298    /// Encode the non-null value at index `idx` to `out`.
299    ///
300    /// The behaviour is unspecified if `idx` corresponds to a null index.
301    fn encode(&mut self, idx: usize, out: &mut Vec<u8>);
302}
303
304/// Creates an encoder for the given array and field.
305///
306/// This first calls the EncoderFactory if one is provided, and then falls back to the default encoders.
307pub fn make_encoder<'a>(
308    field: &'a FieldRef,
309    array: &'a dyn Array,
310    options: &'a EncoderOptions,
311) -> Result<NullableEncoder<'a>, ArrowError> {
312    macro_rules! primitive_helper {
313        ($t:ty) => {{
314            let array = array.as_primitive::<$t>();
315            let nulls = array.nulls().cloned();
316            NullableEncoder::new(Box::new(PrimitiveEncoder::new(array)), nulls)
317        }};
318    }
319
320    if let Some(factory) = options.encoder_factory() {
321        if let Some(encoder) = factory.make_default_encoder(field, array, options)? {
322            return Ok(encoder);
323        }
324    }
325
326    let nulls = array.nulls().cloned();
327    let encoder = downcast_integer! {
328        array.data_type() => (primitive_helper),
329        DataType::Float16 => primitive_helper!(Float16Type),
330        DataType::Float32 => primitive_helper!(Float32Type),
331        DataType::Float64 => primitive_helper!(Float64Type),
332        DataType::Boolean => {
333            let array = array.as_boolean();
334            NullableEncoder::new(Box::new(BooleanEncoder(array)), array.nulls().cloned())
335        }
336        DataType::Null => NullableEncoder::new(Box::new(NullEncoder), array.logical_nulls()),
337        DataType::Utf8 => {
338            let array = array.as_string::<i32>();
339            NullableEncoder::new(Box::new(StringEncoder(array)), array.nulls().cloned())
340        }
341        DataType::LargeUtf8 => {
342            let array = array.as_string::<i64>();
343            NullableEncoder::new(Box::new(StringEncoder(array)), array.nulls().cloned())
344        }
345        DataType::Utf8View => {
346            let array = array.as_string_view();
347            NullableEncoder::new(Box::new(StringViewEncoder(array)), array.nulls().cloned())
348        }
349        DataType::BinaryView => {
350            let array = array.as_binary_view();
351            NullableEncoder::new(Box::new(BinaryViewEncoder(array)), array.nulls().cloned())
352        }
353        DataType::List(_) => {
354            let array = array.as_list::<i32>();
355            NullableEncoder::new(Box::new(ListEncoder::try_new(field, array, options)?), array.nulls().cloned())
356        }
357        DataType::LargeList(_) => {
358            let array = array.as_list::<i64>();
359            NullableEncoder::new(Box::new(ListEncoder::try_new(field, array, options)?), array.nulls().cloned())
360        }
361        DataType::FixedSizeList(_, _) => {
362            let array = array.as_fixed_size_list();
363            NullableEncoder::new(Box::new(FixedSizeListEncoder::try_new(field, array, options)?), array.nulls().cloned())
364        }
365
366        DataType::Dictionary(_, _) => downcast_dictionary_array! {
367            array => {
368                NullableEncoder::new(Box::new(DictionaryEncoder::try_new(field, array, options)?), array.nulls().cloned())
369            },
370            _ => unreachable!()
371        }
372
373        DataType::RunEndEncoded(_, _) => downcast_run_array! {
374            array => {
375                NullableEncoder::new(
376                    Box::new(RunEndEncodedEncoder::try_new(field, array, options)?),
377                    array.logical_nulls(),
378                )
379            },
380            _ => unreachable!()
381        }
382
383        DataType::Map(_, _) => {
384            let array = array.as_map();
385            NullableEncoder::new(Box::new(MapEncoder::try_new(field, array, options)?), array.nulls().cloned())
386        }
387
388        DataType::FixedSizeBinary(_) => {
389            let array = array.as_fixed_size_binary();
390            NullableEncoder::new(Box::new(BinaryEncoder::new(array)) as _, array.nulls().cloned())
391        }
392
393        DataType::Binary => {
394            let array: &BinaryArray = array.as_binary();
395            NullableEncoder::new(Box::new(BinaryEncoder::new(array)), array.nulls().cloned())
396        }
397
398        DataType::LargeBinary => {
399            let array: &LargeBinaryArray = array.as_binary();
400            NullableEncoder::new(Box::new(BinaryEncoder::new(array)), array.nulls().cloned())
401        }
402
403        DataType::Struct(fields) => {
404            let array = array.as_struct();
405            let encoders = fields.iter().zip(array.columns()).map(|(field, array)| {
406                let encoder = make_encoder(field, array, options)?;
407                Ok(FieldEncoder{
408                    field: field.clone(),
409                    encoder,
410                })
411            }).collect::<Result<Vec<_>, ArrowError>>()?;
412
413            let encoder = StructArrayEncoder{
414                encoders,
415                explicit_nulls: options.explicit_nulls(),
416                struct_mode: options.struct_mode(),
417            };
418            let nulls = array.nulls().cloned();
419            NullableEncoder::new(Box::new(encoder) as Box<dyn Encoder + 'a>, nulls)
420        }
421        DataType::Decimal32(_, _) | DataType::Decimal64(_, _) | DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => {
422            let options = FormatOptions::new().with_display_error(true);
423            let formatter = JsonArrayFormatter::new(ArrayFormatter::try_new(array, &options)?);
424            NullableEncoder::new(Box::new(RawArrayFormatter(formatter)) as Box<dyn Encoder + 'a>, nulls)
425        }
426        d => match d.is_temporal() {
427            true => {
428                // Note: the implementation of Encoder for ArrayFormatter assumes it does not produce
429                // characters that would need to be escaped within a JSON string, e.g. `'"'`.
430                // If support for user-provided format specifications is added, this assumption
431                // may need to be revisited
432                let fops = FormatOptions::new().with_display_error(true)
433                .with_date_format(options.date_format.as_deref())
434                .with_datetime_format(options.datetime_format.as_deref())
435                .with_timestamp_format(options.timestamp_format.as_deref())
436                .with_timestamp_tz_format(options.timestamp_tz_format.as_deref())
437                .with_time_format(options.time_format.as_deref());
438
439                let formatter = ArrayFormatter::try_new(array, &fops)?;
440                let formatter = JsonArrayFormatter::new(formatter);
441                NullableEncoder::new(Box::new(formatter) as Box<dyn Encoder + 'a>, nulls)
442            }
443            false => return Err(ArrowError::JsonError(format!(
444                "Unsupported data type for JSON encoding: {d:?}",
445            )))
446        }
447    };
448
449    Ok(encoder)
450}
451
452fn encode_string(s: &str, out: &mut Vec<u8>) {
453    let mut serializer = serde_json::Serializer::new(out);
454    serializer.serialize_str(s).unwrap();
455}
456
457fn encode_binary(bytes: &[u8], out: &mut Vec<u8>) {
458    out.push(b'"');
459    for byte in bytes {
460        write!(out, "{byte:02x}").unwrap();
461    }
462    out.push(b'"');
463}
464
465struct FieldEncoder<'a> {
466    field: FieldRef,
467    encoder: NullableEncoder<'a>,
468}
469
470impl FieldEncoder<'_> {
471    fn is_null(&self, idx: usize) -> bool {
472        self.encoder.is_null(idx)
473    }
474}
475
476struct StructArrayEncoder<'a> {
477    encoders: Vec<FieldEncoder<'a>>,
478    explicit_nulls: bool,
479    struct_mode: StructMode,
480}
481
482impl Encoder for StructArrayEncoder<'_> {
483    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
484        match self.struct_mode {
485            StructMode::ObjectOnly => out.push(b'{'),
486            StructMode::ListOnly => out.push(b'['),
487        }
488        let mut is_first = true;
489        // Nulls can only be dropped in explicit mode
490        let drop_nulls = (self.struct_mode == StructMode::ObjectOnly) && !self.explicit_nulls;
491
492        for field_encoder in self.encoders.iter_mut() {
493            let is_null = field_encoder.is_null(idx);
494            if is_null && drop_nulls {
495                continue;
496            }
497
498            if !is_first {
499                out.push(b',');
500            }
501            is_first = false;
502
503            if self.struct_mode == StructMode::ObjectOnly {
504                encode_string(field_encoder.field.name(), out);
505                out.push(b':');
506            }
507
508            if is_null {
509                out.extend_from_slice(b"null");
510            } else {
511                field_encoder.encoder.encode(idx, out);
512            }
513        }
514        match self.struct_mode {
515            StructMode::ObjectOnly => out.push(b'}'),
516            StructMode::ListOnly => out.push(b']'),
517        }
518    }
519}
520
521trait PrimitiveEncode: ArrowNativeType {
522    type Buffer;
523
524    // Workaround https://github.com/rust-lang/rust/issues/61415
525    fn init_buffer() -> Self::Buffer;
526
527    /// Encode the primitive value as bytes, returning a reference to that slice.
528    ///
529    /// `buf` is temporary space that may be used
530    fn encode(self, buf: &mut Self::Buffer) -> &[u8];
531}
532
533macro_rules! integer_encode {
534    ($($t:ty),*) => {
535        $(
536            impl PrimitiveEncode for $t {
537                type Buffer = [u8; Self::FORMATTED_SIZE];
538
539                fn init_buffer() -> Self::Buffer {
540                    [0; Self::FORMATTED_SIZE]
541                }
542
543                fn encode(self, buf: &mut Self::Buffer) -> &[u8] {
544                    lexical_core::write(self, buf)
545                }
546            }
547        )*
548    };
549}
550integer_encode!(i8, i16, i32, i64, u8, u16, u32, u64);
551
552macro_rules! float_encode {
553    ($($t:ty),*) => {
554        $(
555            impl PrimitiveEncode for $t {
556                type Buffer = [u8; Self::FORMATTED_SIZE];
557
558                fn init_buffer() -> Self::Buffer {
559                    [0; Self::FORMATTED_SIZE]
560                }
561
562                fn encode(self, buf: &mut Self::Buffer) -> &[u8] {
563                    if self.is_infinite() || self.is_nan() {
564                        b"null"
565                    } else {
566                        lexical_core::write(self, buf)
567                    }
568                }
569            }
570        )*
571    };
572}
573float_encode!(f32, f64);
574
575impl PrimitiveEncode for f16 {
576    type Buffer = <f32 as PrimitiveEncode>::Buffer;
577
578    fn init_buffer() -> Self::Buffer {
579        f32::init_buffer()
580    }
581
582    fn encode(self, buf: &mut Self::Buffer) -> &[u8] {
583        self.to_f32().encode(buf)
584    }
585}
586
587struct PrimitiveEncoder<N: PrimitiveEncode> {
588    values: ScalarBuffer<N>,
589    buffer: N::Buffer,
590}
591
592impl<N: PrimitiveEncode> PrimitiveEncoder<N> {
593    fn new<P: ArrowPrimitiveType<Native = N>>(array: &PrimitiveArray<P>) -> Self {
594        Self {
595            values: array.values().clone(),
596            buffer: N::init_buffer(),
597        }
598    }
599}
600
601impl<N: PrimitiveEncode> Encoder for PrimitiveEncoder<N> {
602    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
603        out.extend_from_slice(self.values[idx].encode(&mut self.buffer));
604    }
605}
606
607struct BooleanEncoder<'a>(&'a BooleanArray);
608
609impl Encoder for BooleanEncoder<'_> {
610    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
611        match self.0.value(idx) {
612            true => out.extend_from_slice(b"true"),
613            false => out.extend_from_slice(b"false"),
614        }
615    }
616}
617
618struct StringEncoder<'a, O: OffsetSizeTrait>(&'a GenericStringArray<O>);
619
620impl<O: OffsetSizeTrait> Encoder for StringEncoder<'_, O> {
621    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
622        encode_string(self.0.value(idx), out);
623    }
624}
625
626struct StringViewEncoder<'a>(&'a StringViewArray);
627
628impl Encoder for StringViewEncoder<'_> {
629    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
630        encode_string(self.0.value(idx), out);
631    }
632}
633
634struct BinaryViewEncoder<'a>(&'a BinaryViewArray);
635
636impl Encoder for BinaryViewEncoder<'_> {
637    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
638        encode_binary(self.0.value(idx), out);
639    }
640}
641
642struct ListEncoder<'a, O: OffsetSizeTrait> {
643    offsets: OffsetBuffer<O>,
644    encoder: NullableEncoder<'a>,
645}
646
647impl<'a, O: OffsetSizeTrait> ListEncoder<'a, O> {
648    fn try_new(
649        field: &'a FieldRef,
650        array: &'a GenericListArray<O>,
651        options: &'a EncoderOptions,
652    ) -> Result<Self, ArrowError> {
653        let encoder = make_encoder(field, array.values().as_ref(), options)?;
654        Ok(Self {
655            offsets: array.offsets().clone(),
656            encoder,
657        })
658    }
659}
660
661impl<O: OffsetSizeTrait> Encoder for ListEncoder<'_, O> {
662    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
663        let end = self.offsets[idx + 1].as_usize();
664        let start = self.offsets[idx].as_usize();
665        out.push(b'[');
666
667        if self.encoder.has_nulls() {
668            for idx in start..end {
669                if idx != start {
670                    out.push(b',')
671                }
672                if self.encoder.is_null(idx) {
673                    out.extend_from_slice(b"null");
674                } else {
675                    self.encoder.encode(idx, out);
676                }
677            }
678        } else {
679            for idx in start..end {
680                if idx != start {
681                    out.push(b',')
682                }
683                self.encoder.encode(idx, out);
684            }
685        }
686        out.push(b']');
687    }
688}
689
690struct FixedSizeListEncoder<'a> {
691    value_length: usize,
692    encoder: NullableEncoder<'a>,
693}
694
695impl<'a> FixedSizeListEncoder<'a> {
696    fn try_new(
697        field: &'a FieldRef,
698        array: &'a FixedSizeListArray,
699        options: &'a EncoderOptions,
700    ) -> Result<Self, ArrowError> {
701        let encoder = make_encoder(field, array.values().as_ref(), options)?;
702        Ok(Self {
703            encoder,
704            value_length: array.value_length().as_usize(),
705        })
706    }
707}
708
709impl Encoder for FixedSizeListEncoder<'_> {
710    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
711        let start = idx * self.value_length;
712        let end = start + self.value_length;
713        out.push(b'[');
714        if self.encoder.has_nulls() {
715            for idx in start..end {
716                if idx != start {
717                    out.push(b',')
718                }
719                if self.encoder.is_null(idx) {
720                    out.extend_from_slice(b"null");
721                } else {
722                    self.encoder.encode(idx, out);
723                }
724            }
725        } else {
726            for idx in start..end {
727                if idx != start {
728                    out.push(b',')
729                }
730                self.encoder.encode(idx, out);
731            }
732        }
733        out.push(b']');
734    }
735}
736
737struct DictionaryEncoder<'a, K: ArrowDictionaryKeyType> {
738    keys: ScalarBuffer<K::Native>,
739    encoder: NullableEncoder<'a>,
740}
741
742impl<'a, K: ArrowDictionaryKeyType> DictionaryEncoder<'a, K> {
743    fn try_new(
744        field: &'a FieldRef,
745        array: &'a DictionaryArray<K>,
746        options: &'a EncoderOptions,
747    ) -> Result<Self, ArrowError> {
748        let encoder = make_encoder(field, array.values().as_ref(), options)?;
749
750        Ok(Self {
751            keys: array.keys().values().clone(),
752            encoder,
753        })
754    }
755}
756
757impl<K: ArrowDictionaryKeyType> Encoder for DictionaryEncoder<'_, K> {
758    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
759        self.encoder.encode(self.keys[idx].as_usize(), out)
760    }
761}
762
763struct RunEndEncodedEncoder<'a, R: RunEndIndexType> {
764    run_array: &'a RunArray<R>,
765    encoder: NullableEncoder<'a>,
766}
767
768impl<'a, R: RunEndIndexType> RunEndEncodedEncoder<'a, R> {
769    fn try_new(
770        field: &'a FieldRef,
771        array: &'a RunArray<R>,
772        options: &'a EncoderOptions,
773    ) -> Result<Self, ArrowError> {
774        let encoder = make_encoder(field, array.values().as_ref(), options)?;
775        Ok(Self {
776            run_array: array,
777            encoder,
778        })
779    }
780}
781
782impl<R: RunEndIndexType> Encoder for RunEndEncodedEncoder<'_, R> {
783    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
784        let physical_idx = self.run_array.get_physical_index(idx);
785        self.encoder.encode(physical_idx, out)
786    }
787}
788
789/// A newtype wrapper around [`ArrayFormatter`] to keep our usage of it private and not implement `Encoder` for the public type
790struct JsonArrayFormatter<'a> {
791    formatter: ArrayFormatter<'a>,
792}
793
794impl<'a> JsonArrayFormatter<'a> {
795    fn new(formatter: ArrayFormatter<'a>) -> Self {
796        Self { formatter }
797    }
798}
799
800impl Encoder for JsonArrayFormatter<'_> {
801    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
802        out.push(b'"');
803        // Should be infallible
804        // Note: We are making an assumption that the formatter does not produce characters that require escaping
805        let _ = write!(out, "{}", self.formatter.value(idx));
806        out.push(b'"')
807    }
808}
809
810/// A newtype wrapper around [`JsonArrayFormatter`] that skips surrounding the value with `"`
811struct RawArrayFormatter<'a>(JsonArrayFormatter<'a>);
812
813impl Encoder for RawArrayFormatter<'_> {
814    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
815        let _ = write!(out, "{}", self.0.formatter.value(idx));
816    }
817}
818
819struct NullEncoder;
820
821impl Encoder for NullEncoder {
822    fn encode(&mut self, _idx: usize, _out: &mut Vec<u8>) {
823        unreachable!()
824    }
825}
826
827struct MapEncoder<'a> {
828    offsets: OffsetBuffer<i32>,
829    keys: NullableEncoder<'a>,
830    values: NullableEncoder<'a>,
831    explicit_nulls: bool,
832}
833
834impl<'a> MapEncoder<'a> {
835    fn try_new(
836        field: &'a FieldRef,
837        array: &'a MapArray,
838        options: &'a EncoderOptions,
839    ) -> Result<Self, ArrowError> {
840        let values = array.values();
841        let keys = array.keys();
842
843        if !matches!(
844            keys.data_type(),
845            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View
846        ) {
847            return Err(ArrowError::JsonError(format!(
848                "Only UTF8 keys supported by JSON MapArray Writer: got {:?}",
849                keys.data_type()
850            )));
851        }
852
853        let keys = make_encoder(field, keys, options)?;
854        let values = make_encoder(field, values, options)?;
855
856        // We sanity check nulls as these are currently not enforced by MapArray (#1697)
857        if keys.has_nulls() {
858            return Err(ArrowError::InvalidArgumentError(
859                "Encountered nulls in MapArray keys".to_string(),
860            ));
861        }
862
863        if array.entries().nulls().is_some_and(|x| x.null_count() != 0) {
864            return Err(ArrowError::InvalidArgumentError(
865                "Encountered nulls in MapArray entries".to_string(),
866            ));
867        }
868
869        Ok(Self {
870            offsets: array.offsets().clone(),
871            keys,
872            values,
873            explicit_nulls: options.explicit_nulls(),
874        })
875    }
876}
877
878impl Encoder for MapEncoder<'_> {
879    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
880        let end = self.offsets[idx + 1].as_usize();
881        let start = self.offsets[idx].as_usize();
882
883        let mut is_first = true;
884
885        out.push(b'{');
886
887        for idx in start..end {
888            let is_null = self.values.is_null(idx);
889            if is_null && !self.explicit_nulls {
890                continue;
891            }
892
893            if !is_first {
894                out.push(b',');
895            }
896            is_first = false;
897
898            self.keys.encode(idx, out);
899            out.push(b':');
900
901            if is_null {
902                out.extend_from_slice(b"null");
903            } else {
904                self.values.encode(idx, out);
905            }
906        }
907        out.push(b'}');
908    }
909}
910
911/// New-type wrapper for encoding the binary types in arrow: `Binary`, `LargeBinary`
912/// and `FixedSizeBinary` as hex strings in JSON.
913struct BinaryEncoder<B>(B);
914
915impl<'a, B> BinaryEncoder<B>
916where
917    B: ArrayAccessor<Item = &'a [u8]>,
918{
919    fn new(array: B) -> Self {
920        Self(array)
921    }
922}
923
924impl<'a, B> Encoder for BinaryEncoder<B>
925where
926    B: ArrayAccessor<Item = &'a [u8]>,
927{
928    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
929        out.push(b'"');
930        for byte in self.0.value(idx) {
931            // this write is infallible
932            write!(out, "{byte:02x}").unwrap();
933        }
934        out.push(b'"');
935    }
936}