use std::fmt;
use chrono::{TimeZone, Utc};
use half::f16;
use num::traits::Float;
use num_bigint::{BigInt, Sign};
use crate::basic::{ConvertedType, LogicalType, Type as PhysicalType};
use crate::data_type::{ByteArray, Decimal, Int96};
use crate::errors::{ParquetError, Result};
use crate::schema::types::ColumnDescPtr;
#[cfg(any(feature = "json", test))]
use serde_json::Value;
macro_rules! nyi {
($column_descr:ident, $value:ident) => {{
unimplemented!(
"Conversion for physical type {}, converted type {}, value {:?}",
$column_descr.physical_type(),
$column_descr.converted_type(),
$value
);
}};
}
#[derive(Clone, Debug, PartialEq)]
pub struct Row {
fields: Vec<(String, Field)>,
}
#[allow(clippy::len_without_is_empty)]
impl Row {
pub fn len(&self) -> usize {
self.fields.len()
}
pub fn into_columns(self) -> Vec<(String, Field)> {
self.fields
}
pub fn get_column_iter(&self) -> RowColumnIter {
RowColumnIter {
fields: &self.fields,
curr: 0,
count: self.fields.len(),
}
}
#[cfg(any(feature = "json", test))]
pub fn to_json_value(&self) -> Value {
Value::Object(
self.fields
.iter()
.map(|(key, field)| (key.to_owned(), field.to_json_value()))
.collect(),
)
}
}
pub struct RowColumnIter<'a> {
fields: &'a Vec<(String, Field)>,
curr: usize,
count: usize,
}
impl<'a> Iterator for RowColumnIter<'a> {
type Item = (&'a String, &'a Field);
fn next(&mut self) -> Option<Self::Item> {
let idx = self.curr;
if idx >= self.count {
return None;
}
self.curr += 1;
Some((&self.fields[idx].0, &self.fields[idx].1))
}
}
pub trait RowAccessor {
fn get_bool(&self, i: usize) -> Result<bool>;
fn get_byte(&self, i: usize) -> Result<i8>;
fn get_short(&self, i: usize) -> Result<i16>;
fn get_int(&self, i: usize) -> Result<i32>;
fn get_long(&self, i: usize) -> Result<i64>;
fn get_ubyte(&self, i: usize) -> Result<u8>;
fn get_ushort(&self, i: usize) -> Result<u16>;
fn get_uint(&self, i: usize) -> Result<u32>;
fn get_ulong(&self, i: usize) -> Result<u64>;
fn get_float16(&self, i: usize) -> Result<f16>;
fn get_float(&self, i: usize) -> Result<f32>;
fn get_double(&self, i: usize) -> Result<f64>;
fn get_timestamp_millis(&self, i: usize) -> Result<i64>;
fn get_timestamp_micros(&self, i: usize) -> Result<i64>;
fn get_decimal(&self, i: usize) -> Result<&Decimal>;
fn get_string(&self, i: usize) -> Result<&String>;
fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
fn get_group(&self, i: usize) -> Result<&Row>;
fn get_list(&self, i: usize) -> Result<&List>;
fn get_map(&self, i: usize) -> Result<&Map>;
}
pub trait RowFormatter {
fn fmt(&self, i: usize) -> &dyn fmt::Display;
}
macro_rules! row_primitive_accessor {
($METHOD:ident, $VARIANT:ident, $TY:ty) => {
fn $METHOD(&self, i: usize) -> Result<$TY> {
match self.fields[i].1 {
Field::$VARIANT(v) => Ok(v),
_ => Err(general_err!(
"Cannot access {} as {}",
self.fields[i].1.get_type_name(),
stringify!($VARIANT)
)),
}
}
};
}
macro_rules! row_complex_accessor {
($METHOD:ident, $VARIANT:ident, $TY:ty) => {
fn $METHOD(&self, i: usize) -> Result<&$TY> {
match self.fields[i].1 {
Field::$VARIANT(ref v) => Ok(v),
_ => Err(general_err!(
"Cannot access {} as {}",
self.fields[i].1.get_type_name(),
stringify!($VARIANT)
)),
}
}
};
}
impl RowFormatter for Row {
fn fmt(&self, i: usize) -> &dyn fmt::Display {
&self.fields[i].1
}
}
impl RowAccessor for Row {
row_primitive_accessor!(get_bool, Bool, bool);
row_primitive_accessor!(get_byte, Byte, i8);
row_primitive_accessor!(get_short, Short, i16);
row_primitive_accessor!(get_int, Int, i32);
row_primitive_accessor!(get_long, Long, i64);
row_primitive_accessor!(get_ubyte, UByte, u8);
row_primitive_accessor!(get_ushort, UShort, u16);
row_primitive_accessor!(get_uint, UInt, u32);
row_primitive_accessor!(get_ulong, ULong, u64);
row_primitive_accessor!(get_float16, Float16, f16);
row_primitive_accessor!(get_float, Float, f32);
row_primitive_accessor!(get_double, Double, f64);
row_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
row_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
row_complex_accessor!(get_decimal, Decimal, Decimal);
row_complex_accessor!(get_string, Str, String);
row_complex_accessor!(get_bytes, Bytes, ByteArray);
row_complex_accessor!(get_group, Group, Row);
row_complex_accessor!(get_list, ListInternal, List);
row_complex_accessor!(get_map, MapInternal, Map);
}
#[inline]
pub fn make_row(fields: Vec<(String, Field)>) -> Row {
Row { fields }
}
impl fmt::Display for Row {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{{")?;
for (i, (key, value)) in self.fields.iter().enumerate() {
key.fmt(f)?;
write!(f, ": ")?;
value.fmt(f)?;
if i < self.fields.len() - 1 {
write!(f, ", ")?;
}
}
write!(f, "}}")
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct List {
elements: Vec<Field>,
}
#[allow(clippy::len_without_is_empty)]
impl List {
pub fn len(&self) -> usize {
self.elements.len()
}
pub fn elements(&self) -> &[Field] {
self.elements.as_slice()
}
}
#[inline]
pub fn make_list(elements: Vec<Field>) -> List {
List { elements }
}
pub trait ListAccessor {
fn get_bool(&self, i: usize) -> Result<bool>;
fn get_byte(&self, i: usize) -> Result<i8>;
fn get_short(&self, i: usize) -> Result<i16>;
fn get_int(&self, i: usize) -> Result<i32>;
fn get_long(&self, i: usize) -> Result<i64>;
fn get_ubyte(&self, i: usize) -> Result<u8>;
fn get_ushort(&self, i: usize) -> Result<u16>;
fn get_uint(&self, i: usize) -> Result<u32>;
fn get_ulong(&self, i: usize) -> Result<u64>;
fn get_float16(&self, i: usize) -> Result<f16>;
fn get_float(&self, i: usize) -> Result<f32>;
fn get_double(&self, i: usize) -> Result<f64>;
fn get_timestamp_millis(&self, i: usize) -> Result<i64>;
fn get_timestamp_micros(&self, i: usize) -> Result<i64>;
fn get_decimal(&self, i: usize) -> Result<&Decimal>;
fn get_string(&self, i: usize) -> Result<&String>;
fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
fn get_group(&self, i: usize) -> Result<&Row>;
fn get_list(&self, i: usize) -> Result<&List>;
fn get_map(&self, i: usize) -> Result<&Map>;
}
macro_rules! list_primitive_accessor {
($METHOD:ident, $VARIANT:ident, $TY:ty) => {
fn $METHOD(&self, i: usize) -> Result<$TY> {
match self.elements[i] {
Field::$VARIANT(v) => Ok(v),
_ => Err(general_err!(
"Cannot access {} as {}",
self.elements[i].get_type_name(),
stringify!($VARIANT)
)),
}
}
};
}
macro_rules! list_complex_accessor {
($METHOD:ident, $VARIANT:ident, $TY:ty) => {
fn $METHOD(&self, i: usize) -> Result<&$TY> {
match self.elements[i] {
Field::$VARIANT(ref v) => Ok(v),
_ => Err(general_err!(
"Cannot access {} as {}",
self.elements[i].get_type_name(),
stringify!($VARIANT)
)),
}
}
};
}
impl ListAccessor for List {
list_primitive_accessor!(get_bool, Bool, bool);
list_primitive_accessor!(get_byte, Byte, i8);
list_primitive_accessor!(get_short, Short, i16);
list_primitive_accessor!(get_int, Int, i32);
list_primitive_accessor!(get_long, Long, i64);
list_primitive_accessor!(get_ubyte, UByte, u8);
list_primitive_accessor!(get_ushort, UShort, u16);
list_primitive_accessor!(get_uint, UInt, u32);
list_primitive_accessor!(get_ulong, ULong, u64);
list_primitive_accessor!(get_float16, Float16, f16);
list_primitive_accessor!(get_float, Float, f32);
list_primitive_accessor!(get_double, Double, f64);
list_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
list_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
list_complex_accessor!(get_decimal, Decimal, Decimal);
list_complex_accessor!(get_string, Str, String);
list_complex_accessor!(get_bytes, Bytes, ByteArray);
list_complex_accessor!(get_group, Group, Row);
list_complex_accessor!(get_list, ListInternal, List);
list_complex_accessor!(get_map, MapInternal, Map);
}
#[derive(Clone, Debug, PartialEq)]
pub struct Map {
entries: Vec<(Field, Field)>,
}
#[allow(clippy::len_without_is_empty)]
impl Map {
pub fn len(&self) -> usize {
self.entries.len()
}
pub fn entries(&self) -> &[(Field, Field)] {
self.entries.as_slice()
}
}
#[inline]
pub fn make_map(entries: Vec<(Field, Field)>) -> Map {
Map { entries }
}
pub trait MapAccessor {
fn get_keys<'a>(&'a self) -> Box<dyn ListAccessor + 'a>;
fn get_values<'a>(&'a self) -> Box<dyn ListAccessor + 'a>;
}
struct MapList<'a> {
elements: Vec<&'a Field>,
}
macro_rules! map_list_primitive_accessor {
($METHOD:ident, $VARIANT:ident, $TY:ty) => {
fn $METHOD(&self, i: usize) -> Result<$TY> {
match self.elements[i] {
Field::$VARIANT(v) => Ok(*v),
_ => Err(general_err!(
"Cannot access {} as {}",
self.elements[i].get_type_name(),
stringify!($VARIANT)
)),
}
}
};
}
impl ListAccessor for MapList<'_> {
map_list_primitive_accessor!(get_bool, Bool, bool);
map_list_primitive_accessor!(get_byte, Byte, i8);
map_list_primitive_accessor!(get_short, Short, i16);
map_list_primitive_accessor!(get_int, Int, i32);
map_list_primitive_accessor!(get_long, Long, i64);
map_list_primitive_accessor!(get_ubyte, UByte, u8);
map_list_primitive_accessor!(get_ushort, UShort, u16);
map_list_primitive_accessor!(get_uint, UInt, u32);
map_list_primitive_accessor!(get_ulong, ULong, u64);
map_list_primitive_accessor!(get_float16, Float16, f16);
map_list_primitive_accessor!(get_float, Float, f32);
map_list_primitive_accessor!(get_double, Double, f64);
map_list_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
map_list_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
list_complex_accessor!(get_decimal, Decimal, Decimal);
list_complex_accessor!(get_string, Str, String);
list_complex_accessor!(get_bytes, Bytes, ByteArray);
list_complex_accessor!(get_group, Group, Row);
list_complex_accessor!(get_list, ListInternal, List);
list_complex_accessor!(get_map, MapInternal, Map);
}
impl MapAccessor for Map {
fn get_keys<'a>(&'a self) -> Box<dyn ListAccessor + 'a> {
let map_list = MapList {
elements: self.entries.iter().map(|v| &v.0).collect(),
};
Box::new(map_list)
}
fn get_values<'a>(&'a self) -> Box<dyn ListAccessor + 'a> {
let map_list = MapList {
elements: self.entries.iter().map(|v| &v.1).collect(),
};
Box::new(map_list)
}
}
#[derive(Clone, Debug, PartialEq)]
pub enum Field {
Null,
Bool(bool),
Byte(i8),
Short(i16),
Int(i32),
Long(i64),
UByte(u8),
UShort(u16),
UInt(u32),
ULong(u64),
Float16(f16),
Float(f32),
Double(f64),
Decimal(Decimal),
Str(String),
Bytes(ByteArray),
Date(i32),
TimestampMillis(i64),
TimestampMicros(i64),
Group(Row),
ListInternal(List),
MapInternal(Map),
}
impl Field {
fn get_type_name(&self) -> &'static str {
match *self {
Field::Null => "Null",
Field::Bool(_) => "Bool",
Field::Byte(_) => "Byte",
Field::Short(_) => "Short",
Field::Int(_) => "Int",
Field::Long(_) => "Long",
Field::UByte(_) => "UByte",
Field::UShort(_) => "UShort",
Field::UInt(_) => "UInt",
Field::ULong(_) => "ULong",
Field::Float16(_) => "Float16",
Field::Float(_) => "Float",
Field::Double(_) => "Double",
Field::Decimal(_) => "Decimal",
Field::Date(_) => "Date",
Field::Str(_) => "Str",
Field::Bytes(_) => "Bytes",
Field::TimestampMillis(_) => "TimestampMillis",
Field::TimestampMicros(_) => "TimestampMicros",
Field::Group(_) => "Group",
Field::ListInternal(_) => "ListInternal",
Field::MapInternal(_) => "MapInternal",
}
}
pub fn is_primitive(&self) -> bool {
!matches!(
*self,
Field::Group(_) | Field::ListInternal(_) | Field::MapInternal(_)
)
}
#[inline]
pub fn convert_bool(_descr: &ColumnDescPtr, value: bool) -> Self {
Field::Bool(value)
}
#[inline]
pub fn convert_int32(descr: &ColumnDescPtr, value: i32) -> Self {
match descr.converted_type() {
ConvertedType::INT_8 => Field::Byte(value as i8),
ConvertedType::INT_16 => Field::Short(value as i16),
ConvertedType::INT_32 | ConvertedType::NONE => Field::Int(value),
ConvertedType::UINT_8 => Field::UByte(value as u8),
ConvertedType::UINT_16 => Field::UShort(value as u16),
ConvertedType::UINT_32 => Field::UInt(value as u32),
ConvertedType::DATE => Field::Date(value),
ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i32(
value,
descr.type_precision(),
descr.type_scale(),
)),
_ => nyi!(descr, value),
}
}
#[inline]
pub fn convert_int64(descr: &ColumnDescPtr, value: i64) -> Self {
match descr.converted_type() {
ConvertedType::INT_64 | ConvertedType::NONE => Field::Long(value),
ConvertedType::UINT_64 => Field::ULong(value as u64),
ConvertedType::TIMESTAMP_MILLIS => Field::TimestampMillis(value),
ConvertedType::TIMESTAMP_MICROS => Field::TimestampMicros(value),
ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i64(
value,
descr.type_precision(),
descr.type_scale(),
)),
_ => nyi!(descr, value),
}
}
#[inline]
pub fn convert_int96(_descr: &ColumnDescPtr, value: Int96) -> Self {
Field::TimestampMillis(value.to_i64())
}
#[inline]
pub fn convert_float(_descr: &ColumnDescPtr, value: f32) -> Self {
Field::Float(value)
}
#[inline]
pub fn convert_double(_descr: &ColumnDescPtr, value: f64) -> Self {
Field::Double(value)
}
#[inline]
pub fn convert_byte_array(descr: &ColumnDescPtr, value: ByteArray) -> Result<Self> {
let field = match descr.physical_type() {
PhysicalType::BYTE_ARRAY => match descr.converted_type() {
ConvertedType::UTF8 | ConvertedType::ENUM | ConvertedType::JSON => {
let value = String::from_utf8(value.data().to_vec()).map_err(|e| {
general_err!(
"Error reading BYTE_ARRAY as String. Bytes: {:?} Error: {:?}",
value.data(),
e
)
})?;
Field::Str(value)
}
ConvertedType::BSON | ConvertedType::NONE => Field::Bytes(value),
ConvertedType::DECIMAL => Field::Decimal(Decimal::from_bytes(
value,
descr.type_precision(),
descr.type_scale(),
)),
_ => nyi!(descr, value),
},
PhysicalType::FIXED_LEN_BYTE_ARRAY => match descr.converted_type() {
ConvertedType::DECIMAL => Field::Decimal(Decimal::from_bytes(
value,
descr.type_precision(),
descr.type_scale(),
)),
ConvertedType::NONE if descr.logical_type() == Some(LogicalType::Float16) => {
if value.len() != 2 {
return Err(general_err!(
"Error reading FIXED_LEN_BYTE_ARRAY as FLOAT16. Length must be 2, got {}",
value.len()
));
}
let bytes = [value.data()[0], value.data()[1]];
Field::Float16(f16::from_le_bytes(bytes))
}
ConvertedType::NONE => Field::Bytes(value),
_ => nyi!(descr, value),
},
_ => nyi!(descr, value),
};
Ok(field)
}
#[cfg(any(feature = "json", test))]
pub fn to_json_value(&self) -> Value {
use base64::prelude::BASE64_STANDARD;
use base64::Engine;
match &self {
Field::Null => Value::Null,
Field::Bool(b) => Value::Bool(*b),
Field::Byte(n) => Value::Number(serde_json::Number::from(*n)),
Field::Short(n) => Value::Number(serde_json::Number::from(*n)),
Field::Int(n) => Value::Number(serde_json::Number::from(*n)),
Field::Long(n) => Value::Number(serde_json::Number::from(*n)),
Field::UByte(n) => Value::Number(serde_json::Number::from(*n)),
Field::UShort(n) => Value::Number(serde_json::Number::from(*n)),
Field::UInt(n) => Value::Number(serde_json::Number::from(*n)),
Field::ULong(n) => Value::Number(serde_json::Number::from(*n)),
Field::Float16(n) => serde_json::Number::from_f64(f64::from(*n))
.map(Value::Number)
.unwrap_or(Value::Null),
Field::Float(n) => serde_json::Number::from_f64(f64::from(*n))
.map(Value::Number)
.unwrap_or(Value::Null),
Field::Double(n) => serde_json::Number::from_f64(*n)
.map(Value::Number)
.unwrap_or(Value::Null),
Field::Decimal(n) => Value::String(convert_decimal_to_string(n)),
Field::Str(s) => Value::String(s.to_owned()),
Field::Bytes(b) => Value::String(BASE64_STANDARD.encode(b.data())),
Field::Date(d) => Value::String(convert_date_to_string(*d)),
Field::TimestampMillis(ts) => Value::String(convert_timestamp_millis_to_string(*ts)),
Field::TimestampMicros(ts) => Value::String(convert_timestamp_micros_to_string(*ts)),
Field::Group(row) => row.to_json_value(),
Field::ListInternal(fields) => {
Value::Array(fields.elements.iter().map(|f| f.to_json_value()).collect())
}
Field::MapInternal(map) => Value::Object(
map.entries
.iter()
.map(|(key_field, value_field)| {
let key_val = key_field.to_json_value();
let key_str = key_val
.as_str()
.map(|s| s.to_owned())
.unwrap_or_else(|| key_val.to_string());
(key_str, value_field.to_json_value())
})
.collect(),
),
}
}
}
impl fmt::Display for Field {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Field::Null => write!(f, "null"),
Field::Bool(value) => write!(f, "{value}"),
Field::Byte(value) => write!(f, "{value}"),
Field::Short(value) => write!(f, "{value}"),
Field::Int(value) => write!(f, "{value}"),
Field::Long(value) => write!(f, "{value}"),
Field::UByte(value) => write!(f, "{value}"),
Field::UShort(value) => write!(f, "{value}"),
Field::UInt(value) => write!(f, "{value}"),
Field::ULong(value) => write!(f, "{value}"),
Field::Float16(value) => {
if !value.is_finite() {
write!(f, "{value}")
} else if value.trunc() == value {
write!(f, "{value}.0")
} else {
write!(f, "{value}")
}
}
Field::Float(value) => {
if !(1e-15..=1e19).contains(&value) {
write!(f, "{value:E}")
} else if value.trunc() == value {
write!(f, "{value}.0")
} else {
write!(f, "{value}")
}
}
Field::Double(value) => {
if !(1e-15..=1e19).contains(&value) {
write!(f, "{value:E}")
} else if value.trunc() == value {
write!(f, "{value}.0")
} else {
write!(f, "{value}")
}
}
Field::Decimal(ref value) => {
write!(f, "{}", convert_decimal_to_string(value))
}
Field::Str(ref value) => write!(f, "\"{value}\""),
Field::Bytes(ref value) => write!(f, "{:?}", value.data()),
Field::Date(value) => write!(f, "{}", convert_date_to_string(value)),
Field::TimestampMillis(value) => {
write!(f, "{}", convert_timestamp_millis_to_string(value))
}
Field::TimestampMicros(value) => {
write!(f, "{}", convert_timestamp_micros_to_string(value))
}
Field::Group(ref fields) => write!(f, "{fields}"),
Field::ListInternal(ref list) => {
let elems = &list.elements;
write!(f, "[")?;
for (i, field) in elems.iter().enumerate() {
field.fmt(f)?;
if i < elems.len() - 1 {
write!(f, ", ")?;
}
}
write!(f, "]")
}
Field::MapInternal(ref map) => {
let entries = &map.entries;
write!(f, "{{")?;
for (i, (key, value)) in entries.iter().enumerate() {
key.fmt(f)?;
write!(f, " -> ")?;
value.fmt(f)?;
if i < entries.len() - 1 {
write!(f, ", ")?;
}
}
write!(f, "}}")
}
}
}
}
#[inline]
fn convert_date_to_string(value: i32) -> String {
static NUM_SECONDS_IN_DAY: i64 = 60 * 60 * 24;
let dt = Utc
.timestamp_opt(value as i64 * NUM_SECONDS_IN_DAY, 0)
.unwrap();
format!("{}", dt.format("%Y-%m-%d"))
}
#[inline]
fn convert_timestamp_secs_to_string(value: i64) -> String {
let dt = Utc.timestamp_opt(value, 0).unwrap();
format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"))
}
#[inline]
fn convert_timestamp_millis_to_string(value: i64) -> String {
convert_timestamp_secs_to_string(value / 1000)
}
#[inline]
fn convert_timestamp_micros_to_string(value: i64) -> String {
convert_timestamp_secs_to_string(value / 1000000)
}
#[inline]
fn convert_decimal_to_string(decimal: &Decimal) -> String {
assert!(decimal.scale() >= 0 && decimal.precision() > decimal.scale());
let num = BigInt::from_signed_bytes_be(decimal.data());
let negative = i32::from(num.sign() == Sign::Minus);
let mut num_str = num.to_string();
let mut point = num_str.len() as i32 - decimal.scale() - negative;
if point <= 0 {
while point < 0 {
num_str.insert(negative as usize, '0');
point += 1;
}
num_str.insert_str(negative as usize, "0.");
} else {
num_str.insert((point + negative) as usize, '.');
}
num_str
}
#[cfg(test)]
#[allow(clippy::many_single_char_names)]
mod tests {
use super::*;
use std::f64::consts::PI;
use std::sync::Arc;
use crate::schema::types::{ColumnDescriptor, ColumnPath, PrimitiveTypeBuilder};
macro_rules! make_column_descr {
($physical_type:expr, $logical_type:expr) => {{
let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
.with_converted_type($logical_type)
.build()
.unwrap();
Arc::new(ColumnDescriptor::new(
Arc::new(tpe),
0,
0,
ColumnPath::from("col"),
))
}};
($physical_type:expr, $logical_type:expr, $len:expr, $prec:expr, $scale:expr) => {{
let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
.with_converted_type($logical_type)
.with_length($len)
.with_precision($prec)
.with_scale($scale)
.build()
.unwrap();
Arc::new(ColumnDescriptor::new(
Arc::new(tpe),
0,
0,
ColumnPath::from("col"),
))
}};
}
#[test]
fn test_row_convert_bool() {
let descr = make_column_descr![PhysicalType::BOOLEAN, ConvertedType::NONE];
let row = Field::convert_bool(&descr, true);
assert_eq!(row, Field::Bool(true));
let row = Field::convert_bool(&descr, false);
assert_eq!(row, Field::Bool(false));
}
#[test]
fn test_row_convert_int32() {
let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_8];
let row = Field::convert_int32(&descr, 111);
assert_eq!(row, Field::Byte(111));
let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_16];
let row = Field::convert_int32(&descr, 222);
assert_eq!(row, Field::Short(222));
let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_32];
let row = Field::convert_int32(&descr, 333);
assert_eq!(row, Field::Int(333));
let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_8];
let row = Field::convert_int32(&descr, -1);
assert_eq!(row, Field::UByte(255));
let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_16];
let row = Field::convert_int32(&descr, 256);
assert_eq!(row, Field::UShort(256));
let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_32];
let row = Field::convert_int32(&descr, 1234);
assert_eq!(row, Field::UInt(1234));
let descr = make_column_descr![PhysicalType::INT32, ConvertedType::NONE];
let row = Field::convert_int32(&descr, 444);
assert_eq!(row, Field::Int(444));
let descr = make_column_descr![PhysicalType::INT32, ConvertedType::DATE];
let row = Field::convert_int32(&descr, 14611);
assert_eq!(row, Field::Date(14611));
let descr = make_column_descr![PhysicalType::INT32, ConvertedType::DECIMAL, 0, 8, 2];
let row = Field::convert_int32(&descr, 444);
assert_eq!(row, Field::Decimal(Decimal::from_i32(444, 8, 2)));
}
#[test]
fn test_row_convert_int64() {
let descr = make_column_descr![PhysicalType::INT64, ConvertedType::INT_64];
let row = Field::convert_int64(&descr, 1111);
assert_eq!(row, Field::Long(1111));
let descr = make_column_descr![PhysicalType::INT64, ConvertedType::UINT_64];
let row = Field::convert_int64(&descr, 78239823);
assert_eq!(row, Field::ULong(78239823));
let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MILLIS];
let row = Field::convert_int64(&descr, 1541186529153);
assert_eq!(row, Field::TimestampMillis(1541186529153));
let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MICROS];
let row = Field::convert_int64(&descr, 1541186529153123);
assert_eq!(row, Field::TimestampMicros(1541186529153123));
let descr = make_column_descr![PhysicalType::INT64, ConvertedType::NONE];
let row = Field::convert_int64(&descr, 2222);
assert_eq!(row, Field::Long(2222));
let descr = make_column_descr![PhysicalType::INT64, ConvertedType::DECIMAL, 0, 8, 2];
let row = Field::convert_int64(&descr, 3333);
assert_eq!(row, Field::Decimal(Decimal::from_i64(3333, 8, 2)));
}
#[test]
fn test_row_convert_int96() {
let descr = make_column_descr![PhysicalType::INT96, ConvertedType::NONE];
let value = Int96::from(vec![0, 0, 2454923]);
let row = Field::convert_int96(&descr, value);
assert_eq!(row, Field::TimestampMillis(1238544000000));
let value = Int96::from(vec![4165425152, 13, 2454923]);
let row = Field::convert_int96(&descr, value);
assert_eq!(row, Field::TimestampMillis(1238544060000));
}
#[test]
fn test_row_convert_float() {
let descr = make_column_descr![PhysicalType::FLOAT, ConvertedType::NONE];
let row = Field::convert_float(&descr, 2.31);
assert_eq!(row, Field::Float(2.31));
}
#[test]
fn test_row_convert_double() {
let descr = make_column_descr![PhysicalType::DOUBLE, ConvertedType::NONE];
let row = Field::convert_double(&descr, 1.56);
assert_eq!(row, Field::Double(1.56));
}
#[test]
fn test_row_convert_byte_array() {
let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::UTF8];
let value = ByteArray::from(vec![b'A', b'B', b'C', b'D']);
let row = Field::convert_byte_array(&descr, value);
assert_eq!(row.unwrap(), Field::Str("ABCD".to_string()));
let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::ENUM];
let value = ByteArray::from(vec![b'1', b'2', b'3']);
let row = Field::convert_byte_array(&descr, value);
assert_eq!(row.unwrap(), Field::Str("123".to_string()));
let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::JSON];
let value = ByteArray::from(vec![b'{', b'"', b'a', b'"', b':', b'1', b'}']);
let row = Field::convert_byte_array(&descr, value);
assert_eq!(row.unwrap(), Field::Str("{\"a\":1}".to_string()));
let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::NONE];
let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
let row = Field::convert_byte_array(&descr, value.clone());
assert_eq!(row.unwrap(), Field::Bytes(value));
let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::BSON];
let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
let row = Field::convert_byte_array(&descr, value.clone());
assert_eq!(row.unwrap(), Field::Bytes(value));
let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::DECIMAL, 0, 8, 2];
let value = ByteArray::from(vec![207, 200]);
let row = Field::convert_byte_array(&descr, value.clone());
assert_eq!(
row.unwrap(),
Field::Decimal(Decimal::from_bytes(value, 8, 2))
);
let descr = make_column_descr![
PhysicalType::FIXED_LEN_BYTE_ARRAY,
ConvertedType::DECIMAL,
8,
17,
5
];
let value = ByteArray::from(vec![0, 0, 0, 0, 0, 4, 147, 224]);
let row = Field::convert_byte_array(&descr, value.clone());
assert_eq!(
row.unwrap(),
Field::Decimal(Decimal::from_bytes(value, 17, 5))
);
let descr = {
let tpe = PrimitiveTypeBuilder::new("col", PhysicalType::FIXED_LEN_BYTE_ARRAY)
.with_logical_type(Some(LogicalType::Float16))
.with_length(2)
.build()
.unwrap();
Arc::new(ColumnDescriptor::new(
Arc::new(tpe),
0,
0,
ColumnPath::from("col"),
))
};
let value = ByteArray::from(f16::PI);
let row = Field::convert_byte_array(&descr, value.clone());
assert_eq!(row.unwrap(), Field::Float16(f16::PI));
let descr = make_column_descr![
PhysicalType::FIXED_LEN_BYTE_ARRAY,
ConvertedType::NONE,
6,
0,
0
];
let value = ByteArray::from(vec![1, 2, 3, 4, 5, 6]);
let row = Field::convert_byte_array(&descr, value.clone());
assert_eq!(row.unwrap(), Field::Bytes(value));
}
#[test]
fn test_convert_date_to_string() {
fn check_date_conversion(y: u32, m: u32, d: u32) {
let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
.unwrap()
.and_hms_opt(0, 0, 0)
.unwrap();
let dt = Utc.from_utc_datetime(&datetime);
let res = convert_date_to_string((dt.timestamp() / 60 / 60 / 24) as i32);
let exp = format!("{}", dt.format("%Y-%m-%d"));
assert_eq!(res, exp);
}
check_date_conversion(1969, 12, 31);
check_date_conversion(2010, 1, 2);
check_date_conversion(2014, 5, 1);
check_date_conversion(2016, 2, 29);
check_date_conversion(2017, 9, 12);
check_date_conversion(2018, 3, 31);
}
#[test]
fn test_convert_timestamp_millis_to_string() {
fn check_datetime_conversion(y: u32, m: u32, d: u32, h: u32, mi: u32, s: u32) {
let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
.unwrap()
.and_hms_opt(h, mi, s)
.unwrap();
let dt = Utc.from_utc_datetime(&datetime);
let res = convert_timestamp_millis_to_string(dt.timestamp_millis());
let exp = format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"));
assert_eq!(res, exp);
}
check_datetime_conversion(1969, 9, 10, 1, 2, 3);
check_datetime_conversion(2010, 1, 2, 13, 12, 54);
check_datetime_conversion(2011, 1, 3, 8, 23, 1);
check_datetime_conversion(2012, 4, 5, 11, 6, 32);
check_datetime_conversion(2013, 5, 12, 16, 38, 0);
check_datetime_conversion(2014, 11, 28, 21, 15, 12);
}
#[test]
fn test_convert_timestamp_micros_to_string() {
fn check_datetime_conversion(y: u32, m: u32, d: u32, h: u32, mi: u32, s: u32) {
let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
.unwrap()
.and_hms_opt(h, mi, s)
.unwrap();
let dt = Utc.from_utc_datetime(&datetime);
let res = convert_timestamp_micros_to_string(dt.timestamp_micros());
let exp = format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"));
assert_eq!(res, exp);
}
check_datetime_conversion(1969, 9, 10, 1, 2, 3);
check_datetime_conversion(2010, 1, 2, 13, 12, 54);
check_datetime_conversion(2011, 1, 3, 8, 23, 1);
check_datetime_conversion(2012, 4, 5, 11, 6, 32);
check_datetime_conversion(2013, 5, 12, 16, 38, 0);
check_datetime_conversion(2014, 11, 28, 21, 15, 12);
}
#[test]
fn test_convert_float16_to_string() {
assert_eq!(format!("{}", Field::Float16(f16::ONE)), "1.0");
assert_eq!(format!("{}", Field::Float16(f16::PI)), "3.140625");
assert_eq!(format!("{}", Field::Float16(f16::MAX)), "65504.0");
assert_eq!(format!("{}", Field::Float16(f16::NAN)), "NaN");
assert_eq!(format!("{}", Field::Float16(f16::INFINITY)), "inf");
assert_eq!(format!("{}", Field::Float16(f16::NEG_INFINITY)), "-inf");
assert_eq!(format!("{}", Field::Float16(f16::ZERO)), "0.0");
assert_eq!(format!("{}", Field::Float16(f16::NEG_ZERO)), "-0.0");
}
#[test]
fn test_convert_float_to_string() {
assert_eq!(format!("{}", Field::Float(1.0)), "1.0");
assert_eq!(format!("{}", Field::Float(9.63)), "9.63");
assert_eq!(format!("{}", Field::Float(1e-15)), "0.000000000000001");
assert_eq!(format!("{}", Field::Float(1e-16)), "1E-16");
assert_eq!(format!("{}", Field::Float(1e19)), "10000000000000000000.0");
assert_eq!(format!("{}", Field::Float(1e20)), "1E20");
assert_eq!(format!("{}", Field::Float(1.7976931E30)), "1.7976931E30");
assert_eq!(format!("{}", Field::Float(-1.7976931E30)), "-1.7976931E30");
}
#[test]
fn test_convert_double_to_string() {
assert_eq!(format!("{}", Field::Double(1.0)), "1.0");
assert_eq!(format!("{}", Field::Double(9.63)), "9.63");
assert_eq!(format!("{}", Field::Double(1e-15)), "0.000000000000001");
assert_eq!(format!("{}", Field::Double(1e-16)), "1E-16");
assert_eq!(format!("{}", Field::Double(1e19)), "10000000000000000000.0");
assert_eq!(format!("{}", Field::Double(1e20)), "1E20");
assert_eq!(
format!("{}", Field::Double(1.79769313486E308)),
"1.79769313486E308"
);
assert_eq!(
format!("{}", Field::Double(-1.79769313486E308)),
"-1.79769313486E308"
);
}
#[test]
fn test_convert_decimal_to_string() {
fn check_decimal(bytes: Vec<u8>, precision: i32, scale: i32, res: &str) {
let decimal = Decimal::from_bytes(ByteArray::from(bytes), precision, scale);
assert_eq!(convert_decimal_to_string(&decimal), res);
}
check_decimal(
vec![0, 0, 0, 0, 0, 0, 0, 0, 13, 224, 182, 179, 167, 100, 0, 0],
38,
18,
"1.000000000000000000",
);
check_decimal(
vec![
249, 233, 247, 16, 185, 192, 202, 223, 215, 165, 192, 166, 67, 72,
],
36,
28,
"-12344.0242342304923409234234293432",
);
check_decimal(vec![0, 0, 0, 0, 0, 4, 147, 224], 17, 5, "3.00000");
check_decimal(vec![0, 0, 0, 0, 1, 201, 195, 140], 18, 2, "300000.12");
check_decimal(vec![207, 200], 10, 2, "-123.44");
check_decimal(vec![207, 200], 10, 8, "-0.00012344");
}
#[test]
fn test_row_display() {
assert_eq!(format!("{}", Field::Null), "null");
assert_eq!(format!("{}", Field::Bool(true)), "true");
assert_eq!(format!("{}", Field::Bool(false)), "false");
assert_eq!(format!("{}", Field::Byte(1)), "1");
assert_eq!(format!("{}", Field::Short(2)), "2");
assert_eq!(format!("{}", Field::Int(3)), "3");
assert_eq!(format!("{}", Field::Long(4)), "4");
assert_eq!(format!("{}", Field::UByte(1)), "1");
assert_eq!(format!("{}", Field::UShort(2)), "2");
assert_eq!(format!("{}", Field::UInt(3)), "3");
assert_eq!(format!("{}", Field::ULong(4)), "4");
assert_eq!(format!("{}", Field::Float16(f16::E)), "2.71875");
assert_eq!(format!("{}", Field::Float(5.0)), "5.0");
assert_eq!(format!("{}", Field::Float(5.1234)), "5.1234");
assert_eq!(format!("{}", Field::Double(6.0)), "6.0");
assert_eq!(format!("{}", Field::Double(6.1234)), "6.1234");
assert_eq!(format!("{}", Field::Str("abc".to_string())), "\"abc\"");
assert_eq!(
format!("{}", Field::Bytes(ByteArray::from(vec![1, 2, 3]))),
"[1, 2, 3]"
);
assert_eq!(
format!("{}", Field::Date(14611)),
convert_date_to_string(14611)
);
assert_eq!(
format!("{}", Field::TimestampMillis(1262391174000)),
convert_timestamp_millis_to_string(1262391174000)
);
assert_eq!(
format!("{}", Field::TimestampMicros(1262391174000000)),
convert_timestamp_micros_to_string(1262391174000000)
);
assert_eq!(
format!("{}", Field::Decimal(Decimal::from_i32(4, 8, 2))),
convert_decimal_to_string(&Decimal::from_i32(4, 8, 2))
);
let fields = vec![
("x".to_string(), Field::Null),
("Y".to_string(), Field::Int(2)),
("z".to_string(), Field::Float(3.1)),
("a".to_string(), Field::Str("abc".to_string())),
];
let row = Field::Group(make_row(fields));
assert_eq!(format!("{row}"), "{x: null, Y: 2, z: 3.1, a: \"abc\"}");
let row = Field::ListInternal(make_list(vec![
Field::Int(2),
Field::Int(1),
Field::Null,
Field::Int(12),
]));
assert_eq!(format!("{row}"), "[2, 1, null, 12]");
let row = Field::MapInternal(make_map(vec![
(Field::Int(1), Field::Float(1.2)),
(Field::Int(2), Field::Float(4.5)),
(Field::Int(3), Field::Float(2.3)),
]));
assert_eq!(format!("{row}"), "{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}");
}
#[test]
fn test_is_primitive() {
assert!(Field::Null.is_primitive());
assert!(Field::Bool(true).is_primitive());
assert!(Field::Bool(false).is_primitive());
assert!(Field::Byte(1).is_primitive());
assert!(Field::Short(2).is_primitive());
assert!(Field::Int(3).is_primitive());
assert!(Field::Long(4).is_primitive());
assert!(Field::UByte(1).is_primitive());
assert!(Field::UShort(2).is_primitive());
assert!(Field::UInt(3).is_primitive());
assert!(Field::ULong(4).is_primitive());
assert!(Field::Float16(f16::E).is_primitive());
assert!(Field::Float(5.0).is_primitive());
assert!(Field::Float(5.1234).is_primitive());
assert!(Field::Double(6.0).is_primitive());
assert!(Field::Double(6.1234).is_primitive());
assert!(Field::Str("abc".to_string()).is_primitive());
assert!(Field::Bytes(ByteArray::from(vec![1, 2, 3])).is_primitive());
assert!(Field::TimestampMillis(12345678).is_primitive());
assert!(Field::TimestampMicros(12345678901).is_primitive());
assert!(Field::Decimal(Decimal::from_i32(4, 8, 2)).is_primitive());
assert!(!Field::Group(make_row(vec![
("x".to_string(), Field::Null),
("Y".to_string(), Field::Int(2)),
("z".to_string(), Field::Float(3.1)),
("a".to_string(), Field::Str("abc".to_string()))
]))
.is_primitive());
assert!(!Field::ListInternal(make_list(vec![
Field::Int(2),
Field::Int(1),
Field::Null,
Field::Int(12)
]))
.is_primitive());
assert!(!Field::MapInternal(make_map(vec![
(Field::Int(1), Field::Float(1.2)),
(Field::Int(2), Field::Float(4.5)),
(Field::Int(3), Field::Float(2.3))
]))
.is_primitive());
}
#[test]
fn test_row_primitive_field_fmt() {
let row = make_row(vec![
("00".to_string(), Field::Null),
("01".to_string(), Field::Bool(false)),
("02".to_string(), Field::Byte(3)),
("03".to_string(), Field::Short(4)),
("04".to_string(), Field::Int(5)),
("05".to_string(), Field::Long(6)),
("06".to_string(), Field::UByte(7)),
("07".to_string(), Field::UShort(8)),
("08".to_string(), Field::UInt(9)),
("09".to_string(), Field::ULong(10)),
("10".to_string(), Field::Float(11.1)),
("11".to_string(), Field::Double(12.1)),
("12".to_string(), Field::Str("abc".to_string())),
(
"13".to_string(),
Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
),
("14".to_string(), Field::Date(14611)),
("15".to_string(), Field::TimestampMillis(1262391174000)),
("16".to_string(), Field::TimestampMicros(1262391174000000)),
("17".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
("18".to_string(), Field::Float16(f16::PI)),
]);
assert_eq!("null", format!("{}", row.fmt(0)));
assert_eq!("false", format!("{}", row.fmt(1)));
assert_eq!("3", format!("{}", row.fmt(2)));
assert_eq!("4", format!("{}", row.fmt(3)));
assert_eq!("5", format!("{}", row.fmt(4)));
assert_eq!("6", format!("{}", row.fmt(5)));
assert_eq!("7", format!("{}", row.fmt(6)));
assert_eq!("8", format!("{}", row.fmt(7)));
assert_eq!("9", format!("{}", row.fmt(8)));
assert_eq!("10", format!("{}", row.fmt(9)));
assert_eq!("11.1", format!("{}", row.fmt(10)));
assert_eq!("12.1", format!("{}", row.fmt(11)));
assert_eq!("\"abc\"", format!("{}", row.fmt(12)));
assert_eq!("[1, 2, 3, 4, 5]", format!("{}", row.fmt(13)));
assert_eq!(convert_date_to_string(14611), format!("{}", row.fmt(14)));
assert_eq!(
convert_timestamp_millis_to_string(1262391174000),
format!("{}", row.fmt(15))
);
assert_eq!(
convert_timestamp_micros_to_string(1262391174000000),
format!("{}", row.fmt(16))
);
assert_eq!("0.04", format!("{}", row.fmt(17)));
assert_eq!("3.140625", format!("{}", row.fmt(18)));
}
#[test]
fn test_row_complex_field_fmt() {
let row = make_row(vec![
(
"00".to_string(),
Field::Group(make_row(vec![
("x".to_string(), Field::Null),
("Y".to_string(), Field::Int(2)),
])),
),
(
"01".to_string(),
Field::ListInternal(make_list(vec![
Field::Int(2),
Field::Int(1),
Field::Null,
Field::Int(12),
])),
),
(
"02".to_string(),
Field::MapInternal(make_map(vec![
(Field::Int(1), Field::Float(1.2)),
(Field::Int(2), Field::Float(4.5)),
(Field::Int(3), Field::Float(2.3)),
])),
),
]);
assert_eq!("{x: null, Y: 2}", format!("{}", row.fmt(0)));
assert_eq!("[2, 1, null, 12]", format!("{}", row.fmt(1)));
assert_eq!("{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}", format!("{}", row.fmt(2)));
}
#[test]
fn test_row_primitive_accessors() {
let row = make_row(vec![
("a".to_string(), Field::Null),
("b".to_string(), Field::Bool(false)),
("c".to_string(), Field::Byte(3)),
("d".to_string(), Field::Short(4)),
("e".to_string(), Field::Int(5)),
("f".to_string(), Field::Long(6)),
("g".to_string(), Field::UByte(3)),
("h".to_string(), Field::UShort(4)),
("i".to_string(), Field::UInt(5)),
("j".to_string(), Field::ULong(6)),
("k".to_string(), Field::Float(7.1)),
("l".to_string(), Field::Double(8.1)),
("m".to_string(), Field::Str("abc".to_string())),
(
"n".to_string(),
Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
),
("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
("p".to_string(), Field::Float16(f16::from_f32(9.1))),
]);
assert!(!row.get_bool(1).unwrap());
assert_eq!(3, row.get_byte(2).unwrap());
assert_eq!(4, row.get_short(3).unwrap());
assert_eq!(5, row.get_int(4).unwrap());
assert_eq!(6, row.get_long(5).unwrap());
assert_eq!(3, row.get_ubyte(6).unwrap());
assert_eq!(4, row.get_ushort(7).unwrap());
assert_eq!(5, row.get_uint(8).unwrap());
assert_eq!(6, row.get_ulong(9).unwrap());
assert!((7.1 - row.get_float(10).unwrap()).abs() < f32::EPSILON);
assert!((8.1 - row.get_double(11).unwrap()).abs() < f64::EPSILON);
assert_eq!("abc", row.get_string(12).unwrap());
assert_eq!(5, row.get_bytes(13).unwrap().len());
assert_eq!(7, row.get_decimal(14).unwrap().precision());
assert!((f16::from_f32(9.1) - row.get_float16(15).unwrap()).abs() < f16::EPSILON);
}
#[test]
fn test_row_primitive_invalid_accessors() {
let row = make_row(vec![
("a".to_string(), Field::Null),
("b".to_string(), Field::Bool(false)),
("c".to_string(), Field::Byte(3)),
("d".to_string(), Field::Short(4)),
("e".to_string(), Field::Int(5)),
("f".to_string(), Field::Long(6)),
("g".to_string(), Field::UByte(3)),
("h".to_string(), Field::UShort(4)),
("i".to_string(), Field::UInt(5)),
("j".to_string(), Field::ULong(6)),
("k".to_string(), Field::Float(7.1)),
("l".to_string(), Field::Double(8.1)),
("m".to_string(), Field::Str("abc".to_string())),
(
"n".to_string(),
Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
),
("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
("p".to_string(), Field::Float16(f16::from_f32(9.1))),
]);
for i in 0..row.len() {
assert!(row.get_group(i).is_err());
}
}
#[test]
fn test_row_complex_accessors() {
let row = make_row(vec![
(
"a".to_string(),
Field::Group(make_row(vec![
("x".to_string(), Field::Null),
("Y".to_string(), Field::Int(2)),
])),
),
(
"b".to_string(),
Field::ListInternal(make_list(vec![
Field::Int(2),
Field::Int(1),
Field::Null,
Field::Int(12),
])),
),
(
"c".to_string(),
Field::MapInternal(make_map(vec![
(Field::Int(1), Field::Float(1.2)),
(Field::Int(2), Field::Float(4.5)),
(Field::Int(3), Field::Float(2.3)),
])),
),
]);
assert_eq!(2, row.get_group(0).unwrap().len());
assert_eq!(4, row.get_list(1).unwrap().len());
assert_eq!(3, row.get_map(2).unwrap().len());
}
#[test]
fn test_row_complex_invalid_accessors() {
let row = make_row(vec![
(
"a".to_string(),
Field::Group(make_row(vec![
("x".to_string(), Field::Null),
("Y".to_string(), Field::Int(2)),
])),
),
(
"b".to_string(),
Field::ListInternal(make_list(vec![
Field::Int(2),
Field::Int(1),
Field::Null,
Field::Int(12),
])),
),
(
"c".to_string(),
Field::MapInternal(make_map(vec![
(Field::Int(1), Field::Float(1.2)),
(Field::Int(2), Field::Float(4.5)),
(Field::Int(3), Field::Float(2.3)),
])),
),
]);
assert_eq!(
row.get_float(0).unwrap_err().to_string(),
"Parquet error: Cannot access Group as Float"
);
assert_eq!(
row.get_float(1).unwrap_err().to_string(),
"Parquet error: Cannot access ListInternal as Float"
);
assert_eq!(
row.get_float(2).unwrap_err().to_string(),
"Parquet error: Cannot access MapInternal as Float",
);
}
#[test]
fn test_list_primitive_accessors() {
let list = make_list(vec![Field::Bool(false)]);
assert!(!list.get_bool(0).unwrap());
let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
assert_eq!(4, list.get_byte(1).unwrap());
let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
assert_eq!(6, list.get_short(2).unwrap());
let list = make_list(vec![Field::Int(5)]);
assert_eq!(5, list.get_int(0).unwrap());
let list = make_list(vec![Field::Long(6), Field::Long(7)]);
assert_eq!(7, list.get_long(1).unwrap());
let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
assert_eq!(4, list.get_ubyte(1).unwrap());
let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
assert_eq!(6, list.get_ushort(2).unwrap());
let list = make_list(vec![Field::UInt(5)]);
assert_eq!(5, list.get_uint(0).unwrap());
let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
assert_eq!(7, list.get_ulong(1).unwrap());
let list = make_list(vec![Field::Float16(f16::PI)]);
assert!((f16::PI - list.get_float16(0).unwrap()).abs() < f16::EPSILON);
let list = make_list(vec![
Field::Float(8.1),
Field::Float(9.2),
Field::Float(10.3),
]);
assert!((10.3 - list.get_float(2).unwrap()).abs() < f32::EPSILON);
let list = make_list(vec![Field::Double(PI)]);
assert!((PI - list.get_double(0).unwrap()).abs() < f64::EPSILON);
let list = make_list(vec![Field::Str("abc".to_string())]);
assert_eq!(&"abc".to_string(), list.get_string(0).unwrap());
let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
assert_eq!(&[1, 2, 3, 4, 5], list.get_bytes(0).unwrap().data());
let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
assert_eq!(&[0, 0, 0, 4], list.get_decimal(0).unwrap().data());
}
#[test]
fn test_list_primitive_invalid_accessors() {
let list = make_list(vec![Field::Bool(false)]);
assert!(list.get_byte(0).is_err());
let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
assert!(list.get_short(1).is_err());
let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
assert!(list.get_int(2).is_err());
let list = make_list(vec![Field::Int(5)]);
assert!(list.get_long(0).is_err());
let list = make_list(vec![Field::Long(6), Field::Long(7)]);
assert!(list.get_float(1).is_err());
let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
assert!(list.get_short(1).is_err());
let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
assert!(list.get_int(2).is_err());
let list = make_list(vec![Field::UInt(5)]);
assert!(list.get_long(0).is_err());
let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
assert!(list.get_float(1).is_err());
let list = make_list(vec![Field::Float16(f16::PI)]);
assert!(list.get_string(0).is_err());
let list = make_list(vec![
Field::Float(8.1),
Field::Float(9.2),
Field::Float(10.3),
]);
assert!(list.get_double(2).is_err());
let list = make_list(vec![Field::Double(PI)]);
assert!(list.get_string(0).is_err());
let list = make_list(vec![Field::Str("abc".to_string())]);
assert!(list.get_bytes(0).is_err());
let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
assert!(list.get_bool(0).is_err());
let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
assert!(list.get_bool(0).is_err());
}
#[test]
fn test_list_complex_accessors() {
let list = make_list(vec![Field::Group(make_row(vec![
("x".to_string(), Field::Null),
("Y".to_string(), Field::Int(2)),
]))]);
assert_eq!(2, list.get_group(0).unwrap().len());
let list = make_list(vec![Field::ListInternal(make_list(vec![
Field::Int(2),
Field::Int(1),
Field::Null,
Field::Int(12),
]))]);
assert_eq!(4, list.get_list(0).unwrap().len());
let list = make_list(vec![Field::MapInternal(make_map(vec![
(Field::Int(1), Field::Float(1.2)),
(Field::Int(2), Field::Float(4.5)),
(Field::Int(3), Field::Float(2.3)),
]))]);
assert_eq!(3, list.get_map(0).unwrap().len());
}
#[test]
fn test_list_complex_invalid_accessors() {
let list = make_list(vec![Field::Group(make_row(vec![
("x".to_string(), Field::Null),
("Y".to_string(), Field::Int(2)),
]))]);
assert_eq!(
list.get_float(0).unwrap_err().to_string(),
"Parquet error: Cannot access Group as Float"
);
let list = make_list(vec![Field::ListInternal(make_list(vec![
Field::Int(2),
Field::Int(1),
Field::Null,
Field::Int(12),
]))]);
assert_eq!(
list.get_float(0).unwrap_err().to_string(),
"Parquet error: Cannot access ListInternal as Float"
);
let list = make_list(vec![Field::MapInternal(make_map(vec![
(Field::Int(1), Field::Float(1.2)),
(Field::Int(2), Field::Float(4.5)),
(Field::Int(3), Field::Float(2.3)),
]))]);
assert_eq!(
list.get_float(0).unwrap_err().to_string(),
"Parquet error: Cannot access MapInternal as Float",
);
}
#[test]
fn test_map_accessors() {
let map = make_map(vec![
(Field::Int(1), Field::Str("a".to_string())),
(Field::Int(2), Field::Str("b".to_string())),
(Field::Int(3), Field::Str("c".to_string())),
(Field::Int(4), Field::Str("d".to_string())),
(Field::Int(5), Field::Str("e".to_string())),
]);
assert_eq!(5, map.len());
for i in 0..5 {
assert_eq!((i + 1) as i32, map.get_keys().get_int(i).unwrap());
assert_eq!(
&((i as u8 + b'a') as char).to_string(),
map.get_values().get_string(i).unwrap()
);
}
}
#[test]
fn test_to_json_value() {
assert_eq!(Field::Null.to_json_value(), Value::Null);
assert_eq!(Field::Bool(true).to_json_value(), Value::Bool(true));
assert_eq!(Field::Bool(false).to_json_value(), Value::Bool(false));
assert_eq!(
Field::Byte(1).to_json_value(),
Value::Number(serde_json::Number::from(1))
);
assert_eq!(
Field::Short(2).to_json_value(),
Value::Number(serde_json::Number::from(2))
);
assert_eq!(
Field::Int(3).to_json_value(),
Value::Number(serde_json::Number::from(3))
);
assert_eq!(
Field::Long(4).to_json_value(),
Value::Number(serde_json::Number::from(4))
);
assert_eq!(
Field::UByte(1).to_json_value(),
Value::Number(serde_json::Number::from(1))
);
assert_eq!(
Field::UShort(2).to_json_value(),
Value::Number(serde_json::Number::from(2))
);
assert_eq!(
Field::UInt(3).to_json_value(),
Value::Number(serde_json::Number::from(3))
);
assert_eq!(
Field::ULong(4).to_json_value(),
Value::Number(serde_json::Number::from(4))
);
assert_eq!(
Field::Float16(f16::from_f32(5.0)).to_json_value(),
Value::Number(serde_json::Number::from_f64(5.0).unwrap())
);
assert_eq!(
Field::Float(5.0).to_json_value(),
Value::Number(serde_json::Number::from_f64(5.0).unwrap())
);
assert_eq!(
Field::Float(5.1234).to_json_value(),
Value::Number(serde_json::Number::from_f64(5.1234_f32 as f64).unwrap())
);
assert_eq!(
Field::Double(6.0).to_json_value(),
Value::Number(serde_json::Number::from_f64(6.0).unwrap())
);
assert_eq!(
Field::Double(6.1234).to_json_value(),
Value::Number(serde_json::Number::from_f64(6.1234).unwrap())
);
assert_eq!(
Field::Str("abc".to_string()).to_json_value(),
Value::String(String::from("abc"))
);
assert_eq!(
Field::Decimal(Decimal::from_i32(4, 8, 2)).to_json_value(),
Value::String(String::from("0.04"))
);
assert_eq!(
Field::Bytes(ByteArray::from(vec![1, 2, 3])).to_json_value(),
Value::String(String::from("AQID"))
);
assert_eq!(
Field::TimestampMillis(12345678).to_json_value(),
Value::String("1970-01-01 03:25:45 +00:00".to_string())
);
assert_eq!(
Field::TimestampMicros(12345678901).to_json_value(),
Value::String(convert_timestamp_micros_to_string(12345678901))
);
let fields = vec![
("X".to_string(), Field::Int(1)),
("Y".to_string(), Field::Double(2.2)),
("Z".to_string(), Field::Str("abc".to_string())),
];
let row = Field::Group(make_row(fields));
assert_eq!(
row.to_json_value(),
serde_json::json!({"X": 1, "Y": 2.2, "Z": "abc"})
);
let row = Field::ListInternal(make_list(vec![Field::Int(1), Field::Int(12), Field::Null]));
let array = vec![
Value::Number(serde_json::Number::from(1)),
Value::Number(serde_json::Number::from(12)),
Value::Null,
];
assert_eq!(row.to_json_value(), Value::Array(array));
let row = Field::MapInternal(make_map(vec![
(Field::Str("k1".to_string()), Field::Double(1.2)),
(Field::Str("k2".to_string()), Field::Double(3.4)),
(Field::Str("k3".to_string()), Field::Double(4.5)),
]));
assert_eq!(
row.to_json_value(),
serde_json::json!({"k1": 1.2, "k2": 3.4, "k3": 4.5})
);
}
}
#[cfg(test)]
#[allow(clippy::many_single_char_names)]
mod api_tests {
use super::{make_list, make_map, make_row};
use crate::record::Field;
#[test]
fn test_field_visibility() {
let row = make_row(vec![(
"a".to_string(),
Field::Group(make_row(vec![
("x".to_string(), Field::Null),
("Y".to_string(), Field::Int(2)),
])),
)]);
match row.get_column_iter().next() {
Some(column) => {
assert_eq!("a", column.0);
match column.1 {
Field::Group(r) => {
assert_eq!(
&make_row(vec![
("x".to_string(), Field::Null),
("Y".to_string(), Field::Int(2)),
]),
r
);
}
_ => panic!("Expected the first column to be Field::Group"),
}
}
None => panic!("Expected at least one column"),
}
}
#[test]
fn test_list_element_access() {
let expected = vec![
Field::Int(1),
Field::Group(make_row(vec![
("x".to_string(), Field::Null),
("Y".to_string(), Field::Int(2)),
])),
];
let list = make_list(expected.clone());
assert_eq!(expected.as_slice(), list.elements());
}
#[test]
fn test_map_entry_access() {
let expected = vec![
(Field::Str("one".to_owned()), Field::Int(1)),
(Field::Str("two".to_owned()), Field::Int(2)),
];
let map = make_map(expected.clone());
assert_eq!(expected.as_slice(), map.entries());
}
}