parquet_variant_json/
to_json.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Module for converting Variant data to JSON format
19use arrow_schema::ArrowError;
20use base64::{engine::general_purpose, Engine as _};
21use chrono::Timelike;
22use parquet_variant::{Variant, VariantList, VariantObject};
23use serde_json::Value;
24use std::io::Write;
25
26/// Extension trait for converting Variants to JSON
27pub trait VariantToJson {
28    ///
29    /// This function writes JSON directly to any type that implements [`Write`],
30    /// making it efficient for streaming or when you want to control the output destination.
31    ///
32    /// See [`VariantToJson::to_json_string`] for a convenience function that returns a
33    /// JSON string.
34    ///
35    /// # Arguments
36    ///
37    /// * `writer` - Writer to output JSON to
38    /// * `variant` - The Variant value to convert
39    ///
40    /// # Returns
41    ///
42    /// * `Ok(())` if successful
43    /// * `Err` with error details if conversion fails
44    ///
45    /// # Examples
46    ///
47    ///
48    /// ```rust
49    /// # use parquet_variant::{Variant};
50    /// # use parquet_variant_json::VariantToJson;
51    /// # use arrow_schema::ArrowError;
52    /// let variant = Variant::from("Hello, World!");
53    /// let mut buffer = Vec::new();
54    /// variant.to_json(&mut buffer)?;
55    /// assert_eq!(String::from_utf8(buffer).unwrap(), "\"Hello, World!\"");
56    /// # Ok::<(), ArrowError>(())
57    /// ```
58    ///
59    /// # Example: Create a [`Variant::Object`] and convert to JSON
60    /// ```rust
61    /// # use parquet_variant::{Variant, VariantBuilder};
62    /// # use parquet_variant_json::VariantToJson;
63    /// # use arrow_schema::ArrowError;
64    /// let mut builder = VariantBuilder::new();
65    /// // Create an object builder that will write fields to the object
66    /// let mut object_builder = builder.new_object();
67    /// object_builder.insert("first_name", "Jiaying");
68    /// object_builder.insert("last_name", "Li");
69    /// object_builder.finish();
70    /// // Finish the builder to get the metadata and value
71    /// let (metadata, value) = builder.finish();
72    /// // Create the Variant and convert to JSON
73    /// let variant = Variant::try_new(&metadata, &value)?;
74    /// let mut writer = Vec::new();
75    /// variant.to_json(&mut writer)?;
76    /// assert_eq!(br#"{"first_name":"Jiaying","last_name":"Li"}"#, writer.as_slice());
77    /// # Ok::<(), ArrowError>(())
78    /// ```
79    fn to_json(&self, buffer: &mut impl Write) -> Result<(), ArrowError>;
80
81    /// Convert [`Variant`] to JSON [`String`]
82    ///
83    /// This is a convenience function that converts a Variant to a JSON string.
84    /// This is the same as calling [`VariantToJson::to_json`] with a [`Vec`].
85    /// It's the simplest way to get a JSON representation when you just need a String result.
86    ///
87    /// # Arguments
88    ///
89    /// * `variant` - The Variant value to convert
90    ///
91    /// # Returns
92    ///
93    /// * `Ok(String)` containing the JSON representation
94    /// * `Err` with error details if conversion fails
95    ///
96    /// # Examples
97    ///
98    /// ```rust
99    /// # use parquet_variant::{Variant};
100    /// # use parquet_variant_json::VariantToJson;
101    /// # use arrow_schema::ArrowError;
102    /// let variant = Variant::Int32(42);
103    /// let json = variant.to_json_string()?;
104    /// assert_eq!(json, "42");
105    /// # Ok::<(), ArrowError>(())
106    /// ```
107    ///
108    /// # Example: Create a [`Variant::Object`] and convert to JSON
109    ///
110    /// This example shows how to create an object with two fields and convert it to JSON:
111    /// ```json
112    /// {
113    ///   "first_name": "Jiaying",
114    ///   "last_name": "Li"
115    /// }
116    /// ```
117    ///
118    /// ```rust
119    /// # use parquet_variant::{Variant, VariantBuilder};
120    /// # use parquet_variant_json::VariantToJson;
121    /// # use arrow_schema::ArrowError;
122    /// let mut builder = VariantBuilder::new();
123    /// // Create an object builder that will write fields to the object
124    /// let mut object_builder = builder.new_object();
125    /// object_builder.insert("first_name", "Jiaying");
126    /// object_builder.insert("last_name", "Li");
127    /// object_builder.finish();
128    /// // Finish the builder to get the metadata and value
129    /// let (metadata, value) = builder.finish();
130    /// // Create the Variant and convert to JSON
131    /// let variant = Variant::try_new(&metadata, &value)?;
132    /// let json = variant.to_json_string()?;
133    /// assert_eq!(r#"{"first_name":"Jiaying","last_name":"Li"}"#, json);
134    /// # Ok::<(), ArrowError>(())
135    /// ```
136    fn to_json_string(&self) -> Result<String, ArrowError>;
137
138    /// Convert [`Variant`] to [`serde_json::Value`]
139    ///
140    /// This function converts a Variant to a [`serde_json::Value`], which is useful
141    /// when you need to work with the JSON data programmatically or integrate with
142    /// other serde-based JSON processing.
143    ///
144    /// # Arguments
145    ///
146    /// * `variant` - The Variant value to convert
147    ///
148    /// # Returns
149    ///
150    /// * `Ok(Value)` containing the JSON value
151    /// * `Err` with error details if conversion fails
152    ///
153    /// # Examples
154    ///
155    /// ```rust
156    /// # use parquet_variant::{Variant};
157    /// # use parquet_variant_json::VariantToJson;
158    /// # use serde_json::Value;
159    /// # use arrow_schema::ArrowError;
160    /// let variant = Variant::from("hello");
161    /// let json_value = variant.to_json_value()?;
162    /// assert_eq!(json_value, Value::String("hello".to_string()));
163    /// # Ok::<(), ArrowError>(())
164    /// ```
165    fn to_json_value(&self) -> Result<Value, ArrowError>;
166}
167
168impl<'m, 'v> VariantToJson for Variant<'m, 'v> {
169    fn to_json(&self, buffer: &mut impl Write) -> Result<(), ArrowError> {
170        match self {
171            Variant::Null => write!(buffer, "null")?,
172            Variant::BooleanTrue => write!(buffer, "true")?,
173            Variant::BooleanFalse => write!(buffer, "false")?,
174            Variant::Int8(i) => write!(buffer, "{i}")?,
175            Variant::Int16(i) => write!(buffer, "{i}")?,
176            Variant::Int32(i) => write!(buffer, "{i}")?,
177            Variant::Int64(i) => write!(buffer, "{i}")?,
178            Variant::Float(f) => write!(buffer, "{f}")?,
179            Variant::Double(f) => write!(buffer, "{f}")?,
180            Variant::Decimal4(decimal) => write!(buffer, "{decimal}")?,
181            Variant::Decimal8(decimal) => write!(buffer, "{decimal}")?,
182            Variant::Decimal16(decimal) => write!(buffer, "{decimal}")?,
183            Variant::Date(date) => write!(buffer, "\"{}\"", format_date_string(date))?,
184            Variant::TimestampMicros(ts) | Variant::TimestampNanos(ts) => {
185                write!(buffer, "\"{}\"", ts.to_rfc3339())?
186            }
187            Variant::TimestampNtzMicros(ts) => {
188                write!(buffer, "\"{}\"", format_timestamp_ntz_string(ts, 6))?
189            }
190            Variant::TimestampNtzNanos(ts) => {
191                write!(buffer, "\"{}\"", format_timestamp_ntz_string(ts, 9))?
192            }
193            Variant::Time(time) => write!(buffer, "\"{}\"", format_time_ntz_str(time))?,
194            Variant::Binary(bytes) => {
195                // Encode binary as base64 string
196                let base64_str = format_binary_base64(bytes);
197                let json_str = serde_json::to_string(&base64_str).map_err(|e| {
198                    ArrowError::InvalidArgumentError(format!("JSON encoding error: {e}"))
199                })?;
200                write!(buffer, "{json_str}")?
201            }
202            Variant::String(s) => {
203                // Use serde_json to properly escape the string
204                let json_str = serde_json::to_string(s).map_err(|e| {
205                    ArrowError::InvalidArgumentError(format!("JSON encoding error: {e}"))
206                })?;
207                write!(buffer, "{json_str}")?
208            }
209            Variant::ShortString(s) => {
210                // Use serde_json to properly escape the string
211                let json_str = serde_json::to_string(s.as_str()).map_err(|e| {
212                    ArrowError::InvalidArgumentError(format!("JSON encoding error: {e}"))
213                })?;
214                write!(buffer, "{json_str}")?
215            }
216            Variant::Uuid(uuid) => {
217                write!(buffer, "\"{uuid}\"")?;
218            }
219            Variant::Object(obj) => {
220                convert_object_to_json(buffer, obj)?;
221            }
222            Variant::List(arr) => {
223                convert_array_to_json(buffer, arr)?;
224            }
225        }
226        Ok(())
227    }
228
229    fn to_json_string(&self) -> Result<String, ArrowError> {
230        let mut buffer = Vec::new();
231        self.to_json(&mut buffer)?;
232        String::from_utf8(buffer)
233            .map_err(|e| ArrowError::InvalidArgumentError(format!("UTF-8 conversion error: {e}")))
234    }
235
236    fn to_json_value(&self) -> Result<Value, ArrowError> {
237        match self {
238            Variant::Null => Ok(Value::Null),
239            Variant::BooleanTrue => Ok(Value::Bool(true)),
240            Variant::BooleanFalse => Ok(Value::Bool(false)),
241            Variant::Int8(i) => Ok(Value::Number((*i).into())),
242            Variant::Int16(i) => Ok(Value::Number((*i).into())),
243            Variant::Int32(i) => Ok(Value::Number((*i).into())),
244            Variant::Int64(i) => Ok(Value::Number((*i).into())),
245            Variant::Float(f) => serde_json::Number::from_f64((*f).into())
246                .map(Value::Number)
247                .ok_or_else(|| ArrowError::InvalidArgumentError("Invalid float value".to_string())),
248            Variant::Double(f) => serde_json::Number::from_f64(*f)
249                .map(Value::Number)
250                .ok_or_else(|| {
251                    ArrowError::InvalidArgumentError("Invalid double value".to_string())
252                }),
253            Variant::Decimal4(decimal4) => {
254                let scale = decimal4.scale();
255                let integer = decimal4.integer();
256
257                let integer = if scale == 0 {
258                    integer
259                } else {
260                    let divisor = 10_i32.pow(scale as u32);
261                    if integer % divisor != 0 {
262                        // fall back to floating point
263                        return Ok(Value::from(integer as f64 / divisor as f64));
264                    }
265                    integer / divisor
266                };
267                Ok(Value::from(integer))
268            }
269            Variant::Decimal8(decimal8) => {
270                let scale = decimal8.scale();
271                let integer = decimal8.integer();
272
273                let integer = if scale == 0 {
274                    integer
275                } else {
276                    let divisor = 10_i64.pow(scale as u32);
277                    if integer % divisor != 0 {
278                        // fall back to floating point
279                        return Ok(Value::from(integer as f64 / divisor as f64));
280                    }
281                    integer / divisor
282                };
283                Ok(Value::from(integer))
284            }
285            Variant::Decimal16(decimal16) => {
286                let scale = decimal16.scale();
287                let integer = decimal16.integer();
288
289                let integer = if scale == 0 {
290                    integer
291                } else {
292                    let divisor = 10_i128.pow(scale as u32);
293                    if integer % divisor != 0 {
294                        // fall back to floating point
295                        return Ok(Value::from(integer as f64 / divisor as f64));
296                    }
297                    integer / divisor
298                };
299                // i128 has higher precision than any 64-bit type. Try a lossless narrowing cast to
300                // i64 or u64 first, falling back to a lossy narrowing cast to f64 if necessary.
301                let value = i64::try_from(integer)
302                    .map(Value::from)
303                    .or_else(|_| u64::try_from(integer).map(Value::from))
304                    .unwrap_or_else(|_| Value::from(integer as f64));
305                Ok(value)
306            }
307            Variant::Date(date) => Ok(Value::String(format_date_string(date))),
308            Variant::TimestampMicros(ts) | Variant::TimestampNanos(ts) => {
309                Ok(Value::String(ts.to_rfc3339()))
310            }
311            Variant::TimestampNtzMicros(ts) => {
312                Ok(Value::String(format_timestamp_ntz_string(ts, 6)))
313            }
314            Variant::TimestampNtzNanos(ts) => Ok(Value::String(format_timestamp_ntz_string(ts, 9))),
315            Variant::Time(time) => Ok(Value::String(format_time_ntz_str(time))),
316            Variant::Binary(bytes) => Ok(Value::String(format_binary_base64(bytes))),
317            Variant::String(s) => Ok(Value::String(s.to_string())),
318            Variant::ShortString(s) => Ok(Value::String(s.to_string())),
319            Variant::Uuid(uuid) => Ok(Value::String(uuid.to_string())),
320            Variant::Object(obj) => {
321                let map = obj
322                    .iter()
323                    .map(|(k, v)| v.to_json_value().map(|json_val| (k.to_string(), json_val)))
324                    .collect::<Result<_, _>>()?;
325                Ok(Value::Object(map))
326            }
327            Variant::List(arr) => {
328                let vec = arr
329                    .iter()
330                    .map(|element| element.to_json_value())
331                    .collect::<Result<_, _>>()?;
332                Ok(Value::Array(vec))
333            }
334        }
335    }
336}
337
338// Format string constants to avoid duplication and reduce errors
339const DATE_FORMAT: &str = "%Y-%m-%d";
340
341// Helper functions for consistent formatting
342fn format_date_string(date: &chrono::NaiveDate) -> String {
343    date.format(DATE_FORMAT).to_string()
344}
345
346fn format_timestamp_ntz_string(ts: &chrono::NaiveDateTime, precision: usize) -> String {
347    let format_str = format!(
348        "{}",
349        ts.format(&format!("%Y-%m-%dT%H:%M:%S%.{}f", precision))
350    );
351    ts.format(format_str.as_str()).to_string()
352}
353
354fn format_binary_base64(bytes: &[u8]) -> String {
355    general_purpose::STANDARD.encode(bytes)
356}
357
358fn format_time_ntz_str(time: &chrono::NaiveTime) -> String {
359    let base = time.format("%H:%M:%S").to_string();
360    let micros = time.nanosecond() / 1000;
361    match micros {
362        0 => format!("{}.{}", base, 0),
363        _ => {
364            let micros_str = format!("{:06}", micros);
365            let micros_str_trimmed = micros_str.trim_matches('0');
366            format!("{}.{}", base, micros_str_trimmed)
367        }
368    }
369}
370
371/// Convert object fields to JSON
372fn convert_object_to_json(buffer: &mut impl Write, obj: &VariantObject) -> Result<(), ArrowError> {
373    write!(buffer, "{{")?;
374
375    // Get all fields from the object
376    let mut first = true;
377
378    for (key, value) in obj.iter() {
379        if !first {
380            write!(buffer, ",")?;
381        }
382        first = false;
383
384        // Write the key (properly escaped)
385        let json_key = serde_json::to_string(key).map_err(|e| {
386            ArrowError::InvalidArgumentError(format!("JSON key encoding error: {e}"))
387        })?;
388        write!(buffer, "{json_key}:")?;
389
390        // Recursively convert the value
391        value.to_json(buffer)?;
392    }
393
394    write!(buffer, "}}")?;
395    Ok(())
396}
397
398/// Convert array elements to JSON
399fn convert_array_to_json(buffer: &mut impl Write, arr: &VariantList) -> Result<(), ArrowError> {
400    write!(buffer, "[")?;
401
402    let mut first = true;
403    for element in arr.iter() {
404        if !first {
405            write!(buffer, ",")?;
406        }
407        first = false;
408
409        element.to_json(buffer)?;
410    }
411
412    write!(buffer, "]")?;
413    Ok(())
414}
415
416#[cfg(test)]
417mod tests {
418    use super::*;
419    use chrono::{DateTime, NaiveDate, NaiveTime, Utc};
420    use parquet_variant::{VariantDecimal16, VariantDecimal4, VariantDecimal8};
421
422    #[test]
423    fn test_decimal_edge_cases() -> Result<(), ArrowError> {
424        // Test negative decimal
425        let negative_variant = Variant::from(VariantDecimal4::try_new(-12345, 3)?);
426        let negative_json = negative_variant.to_json_string()?;
427        assert_eq!(negative_json, "-12.345");
428
429        // Test large scale decimal
430        let large_scale_variant = Variant::from(VariantDecimal8::try_new(123456789, 6)?);
431        let large_scale_json = large_scale_variant.to_json_string()?;
432        assert_eq!(large_scale_json, "123.456789");
433
434        Ok(())
435    }
436
437    #[test]
438    fn test_decimal16_to_json() -> Result<(), ArrowError> {
439        let variant = Variant::from(VariantDecimal16::try_new(123456789012345, 4)?);
440        let json = variant.to_json_string()?;
441        assert_eq!(json, "12345678901.2345");
442
443        let json_value = variant.to_json_value()?;
444        assert!(matches!(json_value, Value::Number(_)));
445
446        // Test very large number
447        let large_variant = Variant::from(VariantDecimal16::try_new(999999999999999999, 2)?);
448        let large_json = large_variant.to_json_string()?;
449        // Due to f64 precision limits, very large numbers may lose precision
450        assert!(
451            large_json.starts_with("9999999999999999")
452                || large_json.starts_with("10000000000000000")
453        );
454        Ok(())
455    }
456
457    #[test]
458    fn test_date_to_json() -> Result<(), ArrowError> {
459        let date = NaiveDate::from_ymd_opt(2023, 12, 25).unwrap();
460        let variant = Variant::Date(date);
461        let json = variant.to_json_string()?;
462        assert_eq!(json, "\"2023-12-25\"");
463
464        let json_value = variant.to_json_value()?;
465        assert_eq!(json_value, Value::String("2023-12-25".to_string()));
466
467        // Test leap year date
468        let leap_date = NaiveDate::from_ymd_opt(2024, 2, 29).unwrap();
469        let leap_variant = Variant::Date(leap_date);
470        let leap_json = leap_variant.to_json_string()?;
471        assert_eq!(leap_json, "\"2024-02-29\"");
472        Ok(())
473    }
474
475    #[test]
476    fn test_timestamp_micros_to_json() -> Result<(), ArrowError> {
477        let timestamp = DateTime::parse_from_rfc3339("2023-12-25T10:30:45Z")
478            .unwrap()
479            .with_timezone(&Utc);
480        let variant = Variant::TimestampMicros(timestamp);
481        let json = variant.to_json_string()?;
482        assert!(json.contains("2023-12-25T10:30:45"));
483        assert!(json.starts_with('"') && json.ends_with('"'));
484
485        let json_value = variant.to_json_value()?;
486        assert!(matches!(json_value, Value::String(_)));
487        Ok(())
488    }
489
490    #[test]
491    fn test_timestamp_ntz_micros_to_json() -> Result<(), ArrowError> {
492        let naive_timestamp = DateTime::from_timestamp(1703505045, 123456)
493            .unwrap()
494            .naive_utc();
495        let variant = Variant::TimestampNtzMicros(naive_timestamp);
496        let json = variant.to_json_string()?;
497        assert!(json.contains("2023-12-25"));
498        assert!(json.starts_with('"') && json.ends_with('"'));
499
500        let json_value = variant.to_json_value()?;
501        assert!(matches!(json_value, Value::String(_)));
502        Ok(())
503    }
504
505    #[test]
506    fn test_time_to_json() -> Result<(), ArrowError> {
507        let naive_time = NaiveTime::from_num_seconds_from_midnight_opt(12345, 123460708).unwrap();
508        let variant = Variant::Time(naive_time);
509        let json = variant.to_json_string()?;
510        assert_eq!("\"03:25:45.12346\"", json);
511
512        let json_value = variant.to_json_value()?;
513        assert!(matches!(json_value, Value::String(_)));
514        Ok(())
515    }
516
517    #[test]
518    fn test_timestamp_nanos_to_json() -> Result<(), ArrowError> {
519        let timestamp = DateTime::parse_from_rfc3339("2023-12-25T10:30:45.123456789Z")
520            .unwrap()
521            .with_timezone(&Utc);
522        let variant = Variant::TimestampNanos(timestamp);
523        let json = variant.to_json_string()?;
524        assert_eq!(json, "\"2023-12-25T10:30:45.123456789+00:00\"");
525
526        let json_value = variant.to_json_value()?;
527        assert!(matches!(json_value, Value::String(_)));
528        Ok(())
529    }
530
531    #[test]
532    fn test_timestamp_ntz_nanos_to_json() -> Result<(), ArrowError> {
533        let naive_timestamp = DateTime::from_timestamp(1703505045, 123456789)
534            .unwrap()
535            .naive_utc();
536        let variant = Variant::TimestampNtzNanos(naive_timestamp);
537        let json = variant.to_json_string()?;
538        assert_eq!(json, "\"2023-12-25T11:50:45.123456789\"");
539
540        let json_value = variant.to_json_value()?;
541        assert!(matches!(json_value, Value::String(_)));
542        Ok(())
543    }
544
545    #[test]
546    fn test_binary_to_json() -> Result<(), ArrowError> {
547        let binary_data = b"Hello, World!";
548        let variant = Variant::Binary(binary_data);
549        let json = variant.to_json_string()?;
550
551        // Should be base64 encoded and quoted
552        assert!(json.starts_with('"') && json.ends_with('"'));
553        assert!(json.len() > 2); // Should have content
554
555        let json_value = variant.to_json_value()?;
556        assert!(matches!(json_value, Value::String(_)));
557
558        // Test empty binary
559        let empty_variant = Variant::Binary(b"");
560        let empty_json = empty_variant.to_json_string()?;
561        assert_eq!(empty_json, "\"\"");
562
563        // Test binary with special bytes
564        let special_variant = Variant::Binary(&[0, 255, 128, 64]);
565        let special_json = special_variant.to_json_string()?;
566        assert!(special_json.starts_with('"') && special_json.ends_with('"'));
567        Ok(())
568    }
569
570    #[test]
571    fn test_string_to_json() -> Result<(), ArrowError> {
572        let variant = Variant::from("hello world");
573        let json = variant.to_json_string()?;
574        assert_eq!(json, "\"hello world\"");
575
576        let json_value = variant.to_json_value()?;
577        assert_eq!(json_value, Value::String("hello world".to_string()));
578        Ok(())
579    }
580
581    #[test]
582    fn test_short_string_to_json() -> Result<(), ArrowError> {
583        use parquet_variant::ShortString;
584        let short_string = ShortString::try_new("short")?;
585        let variant = Variant::ShortString(short_string);
586        let json = variant.to_json_string()?;
587        assert_eq!(json, "\"short\"");
588
589        let json_value = variant.to_json_value()?;
590        assert_eq!(json_value, Value::String("short".to_string()));
591        Ok(())
592    }
593
594    #[test]
595    fn test_uuid_to_json() -> Result<(), ArrowError> {
596        let uuid = uuid::Uuid::parse_str("123e4567-e89b-12d3-a456-426614174000").unwrap();
597        let variant = Variant::Uuid(uuid);
598        let json = variant.to_json_string()?;
599        assert_eq!(json, "\"123e4567-e89b-12d3-a456-426614174000\"");
600
601        let json_value = variant.to_json_value()?;
602        assert_eq!(
603            json_value,
604            Value::String("123e4567-e89b-12d3-a456-426614174000".to_string())
605        );
606        Ok(())
607    }
608
609    #[test]
610    fn test_string_escaping() -> Result<(), ArrowError> {
611        let variant = Variant::from("hello\nworld\t\"quoted\"");
612        let json = variant.to_json_string()?;
613        assert_eq!(json, "\"hello\\nworld\\t\\\"quoted\\\"\"");
614
615        let json_value = variant.to_json_value()?;
616        assert_eq!(
617            json_value,
618            Value::String("hello\nworld\t\"quoted\"".to_string())
619        );
620        Ok(())
621    }
622
623    #[test]
624    fn test_json_buffer_writing() -> Result<(), ArrowError> {
625        let variant = Variant::Int8(123);
626        let mut buffer = Vec::new();
627        variant.to_json(&mut buffer)?;
628
629        let result = String::from_utf8(buffer)
630            .map_err(|e| ArrowError::InvalidArgumentError(e.to_string()))?;
631        assert_eq!(result, "123");
632        Ok(())
633    }
634
635    /// Reusable test structure for JSON conversion testing
636    struct JsonTest {
637        variant: Variant<'static, 'static>,
638        expected_json: &'static str,
639        expected_value: Value,
640    }
641
642    impl JsonTest {
643        fn run(self) {
644            let json_string = self
645                .variant
646                .to_json_string()
647                .expect("variant_to_json_string should succeed");
648            assert_eq!(
649                json_string, self.expected_json,
650                "JSON string mismatch for variant: {:?}",
651                self.variant
652            );
653
654            let json_value = self
655                .variant
656                .to_json_value()
657                .expect("variant_to_json_value should succeed");
658
659            // For floating point numbers, we need special comparison due to JSON number representation
660            match (&json_value, &self.expected_value) {
661                (Value::Number(actual), Value::Number(expected)) => {
662                    let actual_f64 = actual.as_f64().unwrap_or(0.0);
663                    let expected_f64 = expected.as_f64().unwrap_or(0.0);
664                    assert!(
665                        (actual_f64 - expected_f64).abs() < f64::EPSILON,
666                        "JSON value mismatch for variant: {:?}, got {}, expected {}",
667                        self.variant,
668                        actual_f64,
669                        expected_f64
670                    );
671                }
672                _ => {
673                    assert_eq!(
674                        json_value, self.expected_value,
675                        "JSON value mismatch for variant: {:?}",
676                        self.variant
677                    );
678                }
679            }
680
681            // Verify roundtrip: JSON string should parse to same value
682            let parsed: Value =
683                serde_json::from_str(&json_string).expect("Generated JSON should be valid");
684            // Same floating point handling for roundtrip
685            match (&parsed, &self.expected_value) {
686                (Value::Number(actual), Value::Number(expected)) => {
687                    let actual_f64 = actual.as_f64().unwrap_or(0.0);
688                    let expected_f64 = expected.as_f64().unwrap_or(0.0);
689                    assert!(
690                        (actual_f64 - expected_f64).abs() < f64::EPSILON,
691                        "Parsed JSON mismatch for variant: {:?}, got {}, expected {}",
692                        self.variant,
693                        actual_f64,
694                        expected_f64
695                    );
696                }
697                _ => {
698                    assert_eq!(
699                        parsed, self.expected_value,
700                        "Parsed JSON mismatch for variant: {:?}",
701                        self.variant
702                    );
703                }
704            }
705        }
706    }
707
708    #[test]
709    fn test_primitive_json_conversion() {
710        use parquet_variant::ShortString;
711
712        // Null
713        JsonTest {
714            variant: Variant::Null,
715            expected_json: "null",
716            expected_value: Value::Null,
717        }
718        .run();
719
720        // Booleans
721        JsonTest {
722            variant: Variant::BooleanTrue,
723            expected_json: "true",
724            expected_value: Value::Bool(true),
725        }
726        .run();
727
728        JsonTest {
729            variant: Variant::BooleanFalse,
730            expected_json: "false",
731            expected_value: Value::Bool(false),
732        }
733        .run();
734
735        // Integers - positive and negative edge cases
736        JsonTest {
737            variant: Variant::Int8(42),
738            expected_json: "42",
739            expected_value: Value::Number(42.into()),
740        }
741        .run();
742
743        JsonTest {
744            variant: Variant::Int8(-128),
745            expected_json: "-128",
746            expected_value: Value::Number((-128).into()),
747        }
748        .run();
749
750        JsonTest {
751            variant: Variant::Int16(32767),
752            expected_json: "32767",
753            expected_value: Value::Number(32767.into()),
754        }
755        .run();
756
757        JsonTest {
758            variant: Variant::Int16(-32768),
759            expected_json: "-32768",
760            expected_value: Value::Number((-32768).into()),
761        }
762        .run();
763
764        JsonTest {
765            variant: Variant::Int32(2147483647),
766            expected_json: "2147483647",
767            expected_value: Value::Number(2147483647.into()),
768        }
769        .run();
770
771        JsonTest {
772            variant: Variant::Int32(-2147483648),
773            expected_json: "-2147483648",
774            expected_value: Value::Number((-2147483648).into()),
775        }
776        .run();
777
778        JsonTest {
779            variant: Variant::Int64(9223372036854775807),
780            expected_json: "9223372036854775807",
781            expected_value: Value::Number(9223372036854775807i64.into()),
782        }
783        .run();
784
785        JsonTest {
786            variant: Variant::Int64(-9223372036854775808),
787            expected_json: "-9223372036854775808",
788            expected_value: Value::Number((-9223372036854775808i64).into()),
789        }
790        .run();
791
792        // Floats
793        JsonTest {
794            variant: Variant::Float(3.5),
795            expected_json: "3.5",
796            expected_value: serde_json::Number::from_f64(3.5)
797                .map(Value::Number)
798                .unwrap(),
799        }
800        .run();
801
802        JsonTest {
803            variant: Variant::Float(0.0),
804            expected_json: "0",
805            expected_value: Value::Number(0.into()), // Use integer 0 to match JSON parsing
806        }
807        .run();
808
809        JsonTest {
810            variant: Variant::Float(-1.5),
811            expected_json: "-1.5",
812            expected_value: serde_json::Number::from_f64(-1.5)
813                .map(Value::Number)
814                .unwrap(),
815        }
816        .run();
817
818        JsonTest {
819            variant: Variant::Double(std::f64::consts::E),
820            expected_json: "2.718281828459045",
821            expected_value: serde_json::Number::from_f64(std::f64::consts::E)
822                .map(Value::Number)
823                .unwrap(),
824        }
825        .run();
826
827        // Decimals
828        JsonTest {
829            variant: Variant::from(VariantDecimal4::try_new(12345, 2).unwrap()),
830            expected_json: "123.45",
831            expected_value: serde_json::Number::from_f64(123.45)
832                .map(Value::Number)
833                .unwrap(),
834        }
835        .run();
836
837        JsonTest {
838            variant: Variant::from(VariantDecimal4::try_new(42, 0).unwrap()),
839            expected_json: "42",
840            expected_value: serde_json::Number::from_f64(42.0)
841                .map(Value::Number)
842                .unwrap(),
843        }
844        .run();
845
846        JsonTest {
847            variant: Variant::from(VariantDecimal8::try_new(1234567890, 3).unwrap()),
848            expected_json: "1234567.89",
849            expected_value: serde_json::Number::from_f64(1234567.89)
850                .map(Value::Number)
851                .unwrap(),
852        }
853        .run();
854
855        JsonTest {
856            variant: Variant::from(VariantDecimal16::try_new(123456789012345, 4).unwrap()),
857            expected_json: "12345678901.2345",
858            expected_value: serde_json::Number::from_f64(12345678901.2345)
859                .map(Value::Number)
860                .unwrap(),
861        }
862        .run();
863
864        // Strings
865        JsonTest {
866            variant: Variant::from("hello world"),
867            expected_json: "\"hello world\"",
868            expected_value: Value::String("hello world".to_string()),
869        }
870        .run();
871
872        JsonTest {
873            variant: Variant::from(""),
874            expected_json: "\"\"",
875            expected_value: Value::String("".to_string()),
876        }
877        .run();
878
879        JsonTest {
880            variant: Variant::ShortString(ShortString::try_new("test").unwrap()),
881            expected_json: "\"test\"",
882            expected_value: Value::String("test".to_string()),
883        }
884        .run();
885
886        // Date and timestamps
887        JsonTest {
888            variant: Variant::Date(NaiveDate::from_ymd_opt(2023, 12, 25).unwrap()),
889            expected_json: "\"2023-12-25\"",
890            expected_value: Value::String("2023-12-25".to_string()),
891        }
892        .run();
893
894        // Binary data (base64 encoded)
895        JsonTest {
896            variant: Variant::Binary(b"test"),
897            expected_json: "\"dGVzdA==\"", // base64 encoded "test"
898            expected_value: Value::String("dGVzdA==".to_string()),
899        }
900        .run();
901
902        JsonTest {
903            variant: Variant::Binary(b""),
904            expected_json: "\"\"", // empty base64
905            expected_value: Value::String("".to_string()),
906        }
907        .run();
908
909        JsonTest {
910            variant: Variant::Binary(b"binary data"),
911            expected_json: "\"YmluYXJ5IGRhdGE=\"", // base64 encoded "binary data"
912            expected_value: Value::String("YmluYXJ5IGRhdGE=".to_string()),
913        }
914        .run();
915    }
916
917    #[test]
918    fn test_string_escaping_comprehensive() {
919        // Test comprehensive string escaping scenarios
920        JsonTest {
921            variant: Variant::from("line1\nline2\ttab\"quote\"\\backslash"),
922            expected_json: "\"line1\\nline2\\ttab\\\"quote\\\"\\\\backslash\"",
923            expected_value: Value::String("line1\nline2\ttab\"quote\"\\backslash".to_string()),
924        }
925        .run();
926
927        JsonTest {
928            variant: Variant::from("Hello δΈ–η•Œ 🌍"),
929            expected_json: "\"Hello δΈ–η•Œ 🌍\"",
930            expected_value: Value::String("Hello δΈ–η•Œ 🌍".to_string()),
931        }
932        .run();
933    }
934
935    #[test]
936    fn test_buffer_writing_variants() -> Result<(), ArrowError> {
937        let variant = Variant::from("test buffer writing");
938
939        // Test writing to a Vec<u8>
940        let mut buffer = Vec::new();
941        variant.to_json(&mut buffer)?;
942        let result = String::from_utf8(buffer)
943            .map_err(|e| ArrowError::InvalidArgumentError(e.to_string()))?;
944        assert_eq!(result, "\"test buffer writing\"");
945
946        // Test writing to vec![]
947        let mut buffer = vec![];
948        variant.to_json(&mut buffer)?;
949        let result = String::from_utf8(buffer)
950            .map_err(|e| ArrowError::InvalidArgumentError(e.to_string()))?;
951        assert_eq!(result, "\"test buffer writing\"");
952
953        Ok(())
954    }
955
956    #[test]
957    fn test_simple_object_to_json() -> Result<(), ArrowError> {
958        use parquet_variant::VariantBuilder;
959
960        // Create a simple object with various field types
961        let mut builder = VariantBuilder::new();
962
963        builder
964            .new_object()
965            .with_field("name", "Alice")
966            .with_field("age", 30i32)
967            .with_field("active", true)
968            .with_field("score", 95.5f64)
969            .finish();
970
971        let (metadata, value) = builder.finish();
972        let variant = Variant::try_new(&metadata, &value)?;
973        let json = variant.to_json_string()?;
974
975        // Parse the JSON to verify structure - handle JSON parsing errors manually
976        let parsed: Value = serde_json::from_str(&json).unwrap();
977        let obj = parsed.as_object().expect("expected JSON object");
978        assert_eq!(obj.get("name"), Some(&Value::String("Alice".to_string())));
979        assert_eq!(obj.get("age"), Some(&Value::Number(30.into())));
980        assert_eq!(obj.get("active"), Some(&Value::Bool(true)));
981        assert!(matches!(obj.get("score"), Some(Value::Number(_))));
982        assert_eq!(obj.len(), 4);
983
984        // Test variant_to_json_value as well
985        let json_value = variant.to_json_value()?;
986        assert!(matches!(json_value, Value::Object(_)));
987
988        Ok(())
989    }
990
991    #[test]
992    fn test_empty_object_to_json() -> Result<(), ArrowError> {
993        use parquet_variant::VariantBuilder;
994
995        let mut builder = VariantBuilder::new();
996
997        {
998            let obj = builder.new_object();
999            obj.finish();
1000        }
1001
1002        let (metadata, value) = builder.finish();
1003        let variant = Variant::try_new(&metadata, &value)?;
1004        let json = variant.to_json_string()?;
1005        assert_eq!(json, "{}");
1006
1007        let json_value = variant.to_json_value()?;
1008        assert_eq!(json_value, Value::Object(serde_json::Map::new()));
1009
1010        Ok(())
1011    }
1012
1013    #[test]
1014    fn test_object_with_special_characters_to_json() -> Result<(), ArrowError> {
1015        use parquet_variant::VariantBuilder;
1016
1017        let mut builder = VariantBuilder::new();
1018
1019        builder
1020            .new_object()
1021            .with_field("message", "Hello \"World\"\nWith\tTabs")
1022            .with_field("path", "C:\\Users\\Alice\\Documents")
1023            .with_field("unicode", "πŸ˜€ Smiley")
1024            .finish();
1025
1026        let (metadata, value) = builder.finish();
1027        let variant = Variant::try_new(&metadata, &value)?;
1028        let json = variant.to_json_string()?;
1029
1030        // Verify that special characters are properly escaped
1031        assert!(json.contains("Hello \\\"World\\\"\\nWith\\tTabs"));
1032        assert!(json.contains("C:\\\\Users\\\\Alice\\\\Documents"));
1033        assert!(json.contains("πŸ˜€ Smiley"));
1034
1035        // Verify that the JSON can be parsed back
1036        let parsed: Value = serde_json::from_str(&json).unwrap();
1037        assert!(matches!(parsed, Value::Object(_)));
1038
1039        Ok(())
1040    }
1041
1042    #[test]
1043    fn test_simple_list_to_json() -> Result<(), ArrowError> {
1044        use parquet_variant::VariantBuilder;
1045
1046        let mut builder = VariantBuilder::new();
1047
1048        builder
1049            .new_list()
1050            .with_value(1i32)
1051            .with_value(2i32)
1052            .with_value(3i32)
1053            .with_value(4i32)
1054            .with_value(5i32)
1055            .finish();
1056
1057        let (metadata, value) = builder.finish();
1058        let variant = Variant::try_new(&metadata, &value)?;
1059        let json = variant.to_json_string()?;
1060        assert_eq!(json, "[1,2,3,4,5]");
1061
1062        let json_value = variant.to_json_value()?;
1063        let arr = json_value.as_array().expect("expected JSON array");
1064        assert_eq!(arr.len(), 5);
1065        assert_eq!(arr[0], Value::Number(1.into()));
1066        assert_eq!(arr[4], Value::Number(5.into()));
1067
1068        Ok(())
1069    }
1070
1071    #[test]
1072    fn test_empty_list_to_json() -> Result<(), ArrowError> {
1073        use parquet_variant::VariantBuilder;
1074
1075        let mut builder = VariantBuilder::new();
1076
1077        {
1078            let list = builder.new_list();
1079            list.finish();
1080        }
1081
1082        let (metadata, value) = builder.finish();
1083        let variant = Variant::try_new(&metadata, &value)?;
1084        let json = variant.to_json_string()?;
1085        assert_eq!(json, "[]");
1086
1087        let json_value = variant.to_json_value()?;
1088        assert_eq!(json_value, Value::Array(vec![]));
1089
1090        Ok(())
1091    }
1092
1093    #[test]
1094    fn test_mixed_type_list_to_json() -> Result<(), ArrowError> {
1095        use parquet_variant::VariantBuilder;
1096
1097        let mut builder = VariantBuilder::new();
1098
1099        builder
1100            .new_list()
1101            .with_value("hello")
1102            .with_value(42i32)
1103            .with_value(true)
1104            .with_value(()) // null
1105            .with_value(std::f64::consts::PI)
1106            .finish();
1107
1108        let (metadata, value) = builder.finish();
1109        let variant = Variant::try_new(&metadata, &value)?;
1110        let json = variant.to_json_string()?;
1111
1112        let parsed: Value = serde_json::from_str(&json).unwrap();
1113        let arr = parsed.as_array().expect("expected JSON array");
1114        assert_eq!(arr.len(), 5);
1115        assert_eq!(arr[0], Value::String("hello".to_string()));
1116        assert_eq!(arr[1], Value::Number(42.into()));
1117        assert_eq!(arr[2], Value::Bool(true));
1118        assert_eq!(arr[3], Value::Null);
1119        assert!(matches!(arr[4], Value::Number(_)));
1120
1121        Ok(())
1122    }
1123
1124    #[test]
1125    fn test_object_field_ordering_in_json() -> Result<(), ArrowError> {
1126        use parquet_variant::VariantBuilder;
1127
1128        let mut builder = VariantBuilder::new();
1129
1130        {
1131            let mut obj = builder.new_object();
1132            // Add fields in non-alphabetical order
1133            obj.insert("zebra", "last");
1134            obj.insert("alpha", "first");
1135            obj.insert("beta", "second");
1136            obj.finish();
1137        }
1138
1139        let (metadata, value) = builder.finish();
1140        let variant = Variant::try_new(&metadata, &value)?;
1141        let json = variant.to_json_string()?;
1142
1143        // Parse and verify all fields are present
1144        let parsed: Value = serde_json::from_str(&json).unwrap();
1145        let obj = parsed.as_object().expect("expected JSON object");
1146        assert_eq!(obj.len(), 3);
1147        assert_eq!(obj.get("alpha"), Some(&Value::String("first".to_string())));
1148        assert_eq!(obj.get("beta"), Some(&Value::String("second".to_string())));
1149        assert_eq!(obj.get("zebra"), Some(&Value::String("last".to_string())));
1150
1151        Ok(())
1152    }
1153
1154    #[test]
1155    fn test_list_with_various_primitive_types_to_json() -> Result<(), ArrowError> {
1156        use parquet_variant::VariantBuilder;
1157
1158        let mut builder = VariantBuilder::new();
1159
1160        builder
1161            .new_list()
1162            .with_value("string_value")
1163            .with_value(42i32)
1164            .with_value(true)
1165            .with_value(std::f64::consts::PI)
1166            .with_value(false)
1167            .with_value(()) // null
1168            .with_value(100i64)
1169            .finish();
1170
1171        let (metadata, value) = builder.finish();
1172        let variant = Variant::try_new(&metadata, &value)?;
1173        let json = variant.to_json_string()?;
1174
1175        let parsed: Value = serde_json::from_str(&json).unwrap();
1176        let arr = parsed.as_array().expect("expected JSON array");
1177        assert_eq!(arr.len(), 7);
1178        assert_eq!(arr[0], Value::String("string_value".to_string()));
1179        assert_eq!(arr[1], Value::Number(42.into()));
1180        assert_eq!(arr[2], Value::Bool(true));
1181        assert!(matches!(arr[3], Value::Number(_))); // float
1182        assert_eq!(arr[4], Value::Bool(false));
1183        assert_eq!(arr[5], Value::Null);
1184        assert_eq!(arr[6], Value::Number(100.into()));
1185
1186        Ok(())
1187    }
1188
1189    #[test]
1190    fn test_object_with_various_primitive_types_to_json() -> Result<(), ArrowError> {
1191        use parquet_variant::VariantBuilder;
1192
1193        let mut builder = VariantBuilder::new();
1194
1195        {
1196            let mut obj = builder.new_object();
1197            obj.insert("string_field", "test_string");
1198            obj.insert("int_field", 123i32);
1199            obj.insert("bool_field", true);
1200            obj.insert("float_field", 2.71f64);
1201            obj.insert("null_field", ());
1202            obj.insert("long_field", 999i64);
1203            obj.finish();
1204        }
1205
1206        let (metadata, value) = builder.finish();
1207        let variant = Variant::try_new(&metadata, &value)?;
1208        let json = variant.to_json_string()?;
1209
1210        let parsed: Value = serde_json::from_str(&json).unwrap();
1211        let obj = parsed.as_object().expect("expected JSON object");
1212        assert_eq!(obj.len(), 6);
1213        assert_eq!(
1214            obj.get("string_field"),
1215            Some(&Value::String("test_string".to_string()))
1216        );
1217        assert_eq!(obj.get("int_field"), Some(&Value::Number(123.into())));
1218        assert_eq!(obj.get("bool_field"), Some(&Value::Bool(true)));
1219        assert!(matches!(obj.get("float_field"), Some(Value::Number(_))));
1220        assert_eq!(obj.get("null_field"), Some(&Value::Null));
1221        assert_eq!(obj.get("long_field"), Some(&Value::Number(999.into())));
1222
1223        Ok(())
1224    }
1225
1226    #[test]
1227    fn test_decimal_precision_behavior() -> Result<(), ArrowError> {
1228        // Test case that demonstrates f64 precision limits
1229        // This is a 63-bit precision decimal8 value that f64 cannot represent exactly
1230        let high_precision_decimal8 = Variant::from(VariantDecimal8::try_new(
1231            9007199254740993, // 2^53 + 1, exceeds f64 precision
1232            6,
1233        )?);
1234
1235        let json_string = high_precision_decimal8.to_json_string()?;
1236        let json_value = high_precision_decimal8.to_json_value()?;
1237
1238        // Due to f64 precision limits, we expect precision loss for values > 2^53
1239        // Both functions should produce consistent results (even if not exact)
1240        let parsed: Value = serde_json::from_str(&json_string).unwrap();
1241        assert_eq!(parsed, json_value);
1242
1243        // Test a case that can be exactly represented (integer result)
1244        let exact_decimal = Variant::from(VariantDecimal8::try_new(
1245            1234567890000, // Should result in 1234567.89 (trailing zeros trimmed)
1246            6,
1247        )?);
1248
1249        let json_string_exact = exact_decimal.to_json_string()?;
1250        assert_eq!(json_string_exact, "1234567.89");
1251
1252        // Test integer case (should be exact)
1253        let integer_decimal = Variant::from(VariantDecimal8::try_new(
1254            42000000, // Should result in 42 (integer)
1255            6,
1256        )?);
1257
1258        let json_string_integer = integer_decimal.to_json_string()?;
1259        assert_eq!(json_string_integer, "42");
1260
1261        Ok(())
1262    }
1263
1264    #[test]
1265    fn test_float_nan_inf_handling() -> Result<(), ArrowError> {
1266        // Test NaN handling - should return an error since JSON doesn't support NaN
1267        let nan_variant = Variant::Float(f32::NAN);
1268        let nan_result = nan_variant.to_json_value();
1269        assert!(nan_result.is_err());
1270        assert!(nan_result
1271            .unwrap_err()
1272            .to_string()
1273            .contains("Invalid float value"));
1274
1275        // Test positive infinity - should return an error since JSON doesn't support Infinity
1276        let pos_inf_variant = Variant::Float(f32::INFINITY);
1277        let pos_inf_result = pos_inf_variant.to_json_value();
1278        assert!(pos_inf_result.is_err());
1279        assert!(pos_inf_result
1280            .unwrap_err()
1281            .to_string()
1282            .contains("Invalid float value"));
1283
1284        // Test negative infinity - should return an error since JSON doesn't support -Infinity
1285        let neg_inf_variant = Variant::Float(f32::NEG_INFINITY);
1286        let neg_inf_result = neg_inf_variant.to_json_value();
1287        assert!(neg_inf_result.is_err());
1288        assert!(neg_inf_result
1289            .unwrap_err()
1290            .to_string()
1291            .contains("Invalid float value"));
1292
1293        // Test the same for Double variants
1294        let nan_double_variant = Variant::Double(f64::NAN);
1295        let nan_double_result = nan_double_variant.to_json_value();
1296        assert!(nan_double_result.is_err());
1297        assert!(nan_double_result
1298            .unwrap_err()
1299            .to_string()
1300            .contains("Invalid double value"));
1301
1302        let pos_inf_double_variant = Variant::Double(f64::INFINITY);
1303        let pos_inf_double_result = pos_inf_double_variant.to_json_value();
1304        assert!(pos_inf_double_result.is_err());
1305        assert!(pos_inf_double_result
1306            .unwrap_err()
1307            .to_string()
1308            .contains("Invalid double value"));
1309
1310        let neg_inf_double_variant = Variant::Double(f64::NEG_INFINITY);
1311        let neg_inf_double_result = neg_inf_double_variant.to_json_value();
1312        assert!(neg_inf_double_result.is_err());
1313        assert!(neg_inf_double_result
1314            .unwrap_err()
1315            .to_string()
1316            .contains("Invalid double value"));
1317
1318        // Test normal float values still work
1319        let normal_float = Variant::Float(std::f32::consts::PI);
1320        let normal_result = normal_float.to_json_value()?;
1321        assert!(matches!(normal_result, Value::Number(_)));
1322
1323        let normal_double = Variant::Double(std::f64::consts::E);
1324        let normal_double_result = normal_double.to_json_value()?;
1325        assert!(matches!(normal_double_result, Value::Number(_)));
1326
1327        Ok(())
1328    }
1329}