1#![doc(
157 html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
158 html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
159)]
160#![cfg_attr(docsrs, feature(doc_cfg))]
161#![warn(missing_docs)]
162use std::cmp::Ordering;
163use std::hash::{Hash, Hasher};
164use std::iter::Map;
165use std::slice::Windows;
166use std::sync::Arc;
167
168use arrow_array::cast::*;
169use arrow_array::types::{ArrowDictionaryKeyType, ByteArrayType, ByteViewType};
170use arrow_array::*;
171use arrow_buffer::{ArrowNativeType, Buffer, OffsetBuffer, ScalarBuffer};
172use arrow_data::{ArrayData, ArrayDataBuilder};
173use arrow_schema::*;
174use variable::{decode_binary_view, decode_string_view};
175
176use crate::fixed::{decode_bool, decode_fixed_size_binary, decode_primitive};
177use crate::list::{compute_lengths_fixed_size_list, encode_fixed_size_list};
178use crate::variable::{decode_binary, decode_string};
179use arrow_array::types::{Int16Type, Int32Type, Int64Type};
180
181mod fixed;
182mod list;
183mod run;
184mod variable;
185
186#[derive(Debug)]
532pub struct RowConverter {
533 fields: Arc<[SortField]>,
534 codecs: Vec<Codec>,
536}
537
538#[derive(Debug)]
539enum Codec {
540 Stateless,
542 Dictionary(RowConverter, OwnedRow),
545 Struct(RowConverter, OwnedRow),
548 List(RowConverter),
550 RunEndEncoded(RowConverter),
552 Union(Vec<RowConverter>, Vec<i8>, Vec<OwnedRow>),
555}
556
557fn compute_list_view_bounds<O: OffsetSizeTrait>(array: &GenericListViewArray<O>) -> (usize, usize) {
560 if array.is_empty() {
561 return (0, 0);
562 }
563
564 let offsets = array.value_offsets();
565 let sizes = array.value_sizes();
566 let values_len = array.values().len();
567
568 let mut min_offset = usize::MAX;
569 let mut max_end = 0usize;
570
571 for i in 0..array.len() {
572 let offset = offsets[i].as_usize();
573 let size = sizes[i].as_usize();
574 let end = offset + size;
575
576 if size > 0 {
577 min_offset = min_offset.min(offset);
578 max_end = max_end.max(end);
579 }
580
581 if min_offset == 0 && max_end == values_len {
585 break;
586 }
587 }
588
589 if min_offset == usize::MAX {
590 (0, 0)
592 } else {
593 (min_offset, max_end)
594 }
595}
596
597impl Codec {
598 fn new(sort_field: &SortField) -> Result<Self, ArrowError> {
599 match &sort_field.data_type {
600 DataType::Dictionary(_, values) => {
601 let sort_field =
602 SortField::new_with_options(values.as_ref().clone(), sort_field.options);
603
604 let converter = RowConverter::new(vec![sort_field])?;
605 let null_array = new_null_array(values.as_ref(), 1);
606 let nulls = converter.convert_columns(&[null_array])?;
607
608 let owned = OwnedRow {
609 data: nulls.buffer.into(),
610 config: nulls.config,
611 };
612 Ok(Self::Dictionary(converter, owned))
613 }
614 DataType::RunEndEncoded(_, values) => {
615 let options = SortOptions {
617 descending: false,
618 nulls_first: sort_field.options.nulls_first != sort_field.options.descending,
619 };
620
621 let field = SortField::new_with_options(values.data_type().clone(), options);
622 let converter = RowConverter::new(vec![field])?;
623 Ok(Self::RunEndEncoded(converter))
624 }
625 d if !d.is_nested() => Ok(Self::Stateless),
626 DataType::List(f)
627 | DataType::LargeList(f)
628 | DataType::ListView(f)
629 | DataType::LargeListView(f) => {
630 let options = SortOptions {
634 descending: false,
635 nulls_first: sort_field.options.nulls_first != sort_field.options.descending,
636 };
637
638 let field = SortField::new_with_options(f.data_type().clone(), options);
639 let converter = RowConverter::new(vec![field])?;
640 Ok(Self::List(converter))
641 }
642 DataType::FixedSizeList(f, _) => {
643 let field = SortField::new_with_options(f.data_type().clone(), sort_field.options);
644 let converter = RowConverter::new(vec![field])?;
645 Ok(Self::List(converter))
646 }
647 DataType::Struct(f) => {
648 let sort_fields = f
649 .iter()
650 .map(|x| SortField::new_with_options(x.data_type().clone(), sort_field.options))
651 .collect();
652
653 let converter = RowConverter::new(sort_fields)?;
654 let nulls: Vec<_> = f.iter().map(|x| new_null_array(x.data_type(), 1)).collect();
655
656 let nulls = converter.convert_columns(&nulls)?;
657 let owned = OwnedRow {
658 data: nulls.buffer.into(),
659 config: nulls.config,
660 };
661
662 Ok(Self::Struct(converter, owned))
663 }
664 DataType::Union(fields, _mode) => {
665 let options = SortOptions {
668 descending: false,
669 nulls_first: sort_field.options.nulls_first != sort_field.options.descending,
670 };
671
672 let mut converters = Vec::with_capacity(fields.len());
673 let mut type_ids = Vec::with_capacity(fields.len());
674 let mut null_rows = Vec::with_capacity(fields.len());
675
676 for (type_id, field) in fields.iter() {
677 let sort_field =
678 SortField::new_with_options(field.data_type().clone(), options);
679 let converter = RowConverter::new(vec![sort_field])?;
680
681 let null_array = new_null_array(field.data_type(), 1);
682 let nulls = converter.convert_columns(&[null_array])?;
683 let owned = OwnedRow {
684 data: nulls.buffer.into(),
685 config: nulls.config,
686 };
687
688 converters.push(converter);
689 type_ids.push(type_id);
690 null_rows.push(owned);
691 }
692
693 Ok(Self::Union(converters, type_ids, null_rows))
694 }
695 _ => Err(ArrowError::NotYetImplemented(format!(
696 "not yet implemented: {:?}",
697 sort_field.data_type
698 ))),
699 }
700 }
701
702 fn encoder(&self, array: &dyn Array) -> Result<Encoder<'_>, ArrowError> {
703 match self {
704 Codec::Stateless => Ok(Encoder::Stateless),
705 Codec::Dictionary(converter, nulls) => {
706 let values = array.as_any_dictionary().values().clone();
707 let rows = converter.convert_columns(&[values])?;
708 Ok(Encoder::Dictionary(rows, nulls.row()))
709 }
710 Codec::Struct(converter, null) => {
711 let v = as_struct_array(array);
712 let rows = converter.convert_columns(v.columns())?;
713 Ok(Encoder::Struct(rows, null.row()))
714 }
715 Codec::List(converter) => {
716 let values = match array.data_type() {
717 DataType::List(_) => {
718 let list_array = as_list_array(array);
719 let first_offset = list_array.offsets()[0] as usize;
720 let last_offset =
721 list_array.offsets()[list_array.offsets().len() - 1] as usize;
722
723 list_array
726 .values()
727 .slice(first_offset, last_offset - first_offset)
728 }
729 DataType::LargeList(_) => {
730 let list_array = as_large_list_array(array);
731
732 let first_offset = list_array.offsets()[0] as usize;
733 let last_offset =
734 list_array.offsets()[list_array.offsets().len() - 1] as usize;
735
736 list_array
739 .values()
740 .slice(first_offset, last_offset - first_offset)
741 }
742 DataType::ListView(_) => {
743 let list_view_array = array.as_list_view::<i32>();
744 let (min_offset, max_end) = compute_list_view_bounds(list_view_array);
745 list_view_array
746 .values()
747 .slice(min_offset, max_end - min_offset)
748 }
749 DataType::LargeListView(_) => {
750 let list_view_array = array.as_list_view::<i64>();
751 let (min_offset, max_end) = compute_list_view_bounds(list_view_array);
752 list_view_array
753 .values()
754 .slice(min_offset, max_end - min_offset)
755 }
756 DataType::FixedSizeList(_, _) => {
757 as_fixed_size_list_array(array).values().clone()
758 }
759 _ => unreachable!(),
760 };
761 let rows = converter.convert_columns(&[values])?;
762 Ok(Encoder::List(rows))
763 }
764 Codec::RunEndEncoded(converter) => {
765 let values = match array.data_type() {
766 DataType::RunEndEncoded(r, _) => match r.data_type() {
767 DataType::Int16 => array.as_run::<Int16Type>().values_slice(),
768 DataType::Int32 => array.as_run::<Int32Type>().values_slice(),
769 DataType::Int64 => array.as_run::<Int64Type>().values_slice(),
770 _ => unreachable!("Unsupported run end index type: {r:?}"),
771 },
772 _ => unreachable!(),
773 };
774 let rows = converter.convert_columns(std::slice::from_ref(&values))?;
775 Ok(Encoder::RunEndEncoded(rows))
776 }
777 Codec::Union(converters, field_to_type_ids, _) => {
778 let union_array = array
779 .as_any()
780 .downcast_ref::<UnionArray>()
781 .expect("expected Union array");
782
783 let type_ids = union_array.type_ids().clone();
784 let offsets = union_array.offsets().cloned();
785
786 let mut child_rows = Vec::with_capacity(converters.len());
787 for (field_idx, converter) in converters.iter().enumerate() {
788 let type_id = field_to_type_ids[field_idx];
789 let child_array = union_array.child(type_id);
790 let rows = converter.convert_columns(std::slice::from_ref(child_array))?;
791 child_rows.push(rows);
792 }
793
794 Ok(Encoder::Union {
795 child_rows,
796 field_to_type_ids: field_to_type_ids.clone(),
797 type_ids,
798 offsets,
799 })
800 }
801 }
802 }
803
804 fn size(&self) -> usize {
805 match self {
806 Codec::Stateless => 0,
807 Codec::Dictionary(converter, nulls) => converter.size() + nulls.data.len(),
808 Codec::Struct(converter, nulls) => converter.size() + nulls.data.len(),
809 Codec::List(converter) => converter.size(),
810 Codec::RunEndEncoded(converter) => converter.size(),
811 Codec::Union(converters, _, null_rows) => {
812 converters.iter().map(|c| c.size()).sum::<usize>()
813 + null_rows.iter().map(|n| n.data.len()).sum::<usize>()
814 }
815 }
816 }
817}
818
819#[derive(Debug)]
820enum Encoder<'a> {
821 Stateless,
823 Dictionary(Rows, Row<'a>),
825 Struct(Rows, Row<'a>),
831 List(Rows),
833 RunEndEncoded(Rows),
835 Union {
837 child_rows: Vec<Rows>,
838 field_to_type_ids: Vec<i8>,
839 type_ids: ScalarBuffer<i8>,
840 offsets: Option<ScalarBuffer<i32>>,
841 },
842}
843
844#[derive(Debug, Clone, PartialEq, Eq)]
846pub struct SortField {
847 options: SortOptions,
849 data_type: DataType,
851}
852
853impl SortField {
854 pub fn new(data_type: DataType) -> Self {
856 Self::new_with_options(data_type, Default::default())
857 }
858
859 pub fn new_with_options(data_type: DataType, options: SortOptions) -> Self {
861 Self { options, data_type }
862 }
863
864 pub fn size(&self) -> usize {
868 self.data_type.size() + std::mem::size_of::<Self>() - std::mem::size_of::<DataType>()
869 }
870}
871
872impl RowConverter {
873 pub fn new(fields: Vec<SortField>) -> Result<Self, ArrowError> {
875 if !Self::supports_fields(&fields) {
876 return Err(ArrowError::NotYetImplemented(format!(
877 "Row format support not yet implemented for: {fields:?}"
878 )));
879 }
880
881 let codecs = fields.iter().map(Codec::new).collect::<Result<_, _>>()?;
882 Ok(Self {
883 fields: fields.into(),
884 codecs,
885 })
886 }
887
888 pub fn supports_fields(fields: &[SortField]) -> bool {
890 fields.iter().all(|x| Self::supports_datatype(&x.data_type))
891 }
892
893 fn supports_datatype(d: &DataType) -> bool {
894 match d {
895 _ if !d.is_nested() => true,
896 DataType::List(f)
897 | DataType::LargeList(f)
898 | DataType::ListView(f)
899 | DataType::LargeListView(f)
900 | DataType::FixedSizeList(f, _) => Self::supports_datatype(f.data_type()),
901 DataType::Struct(f) => f.iter().all(|x| Self::supports_datatype(x.data_type())),
902 DataType::RunEndEncoded(_, values) => Self::supports_datatype(values.data_type()),
903 DataType::Union(fs, _mode) => fs
904 .iter()
905 .all(|(_, f)| Self::supports_datatype(f.data_type())),
906 _ => false,
907 }
908 }
909
910 pub fn convert_columns(&self, columns: &[ArrayRef]) -> Result<Rows, ArrowError> {
920 let num_rows = columns.first().map(|x| x.len()).unwrap_or(0);
921 let mut rows = self.empty_rows(num_rows, 0);
922 self.append(&mut rows, columns)?;
923 Ok(rows)
924 }
925
926 pub fn append(&self, rows: &mut Rows, columns: &[ArrayRef]) -> Result<(), ArrowError> {
957 assert!(
958 Arc::ptr_eq(&rows.config.fields, &self.fields),
959 "rows were not produced by this RowConverter"
960 );
961
962 if columns.len() != self.fields.len() {
963 return Err(ArrowError::InvalidArgumentError(format!(
964 "Incorrect number of arrays provided to RowConverter, expected {} got {}",
965 self.fields.len(),
966 columns.len()
967 )));
968 }
969 for colum in columns.iter().skip(1) {
970 if colum.len() != columns[0].len() {
971 return Err(ArrowError::InvalidArgumentError(format!(
972 "RowConverter columns must all have the same length, expected {} got {}",
973 columns[0].len(),
974 colum.len()
975 )));
976 }
977 }
978
979 let encoders = columns
980 .iter()
981 .zip(&self.codecs)
982 .zip(self.fields.iter())
983 .map(|((column, codec), field)| {
984 if !column.data_type().equals_datatype(&field.data_type) {
985 return Err(ArrowError::InvalidArgumentError(format!(
986 "RowConverter column schema mismatch, expected {} got {}",
987 field.data_type,
988 column.data_type()
989 )));
990 }
991 codec.encoder(column.as_ref())
992 })
993 .collect::<Result<Vec<_>, _>>()?;
994
995 let write_offset = rows.num_rows();
996 let lengths = row_lengths(columns, &encoders);
997 let total = lengths.extend_offsets(rows.offsets[write_offset], &mut rows.offsets);
998 rows.buffer.resize(total, 0);
999
1000 for ((column, field), encoder) in columns.iter().zip(self.fields.iter()).zip(encoders) {
1001 encode_column(
1003 &mut rows.buffer,
1004 &mut rows.offsets[write_offset..],
1005 column.as_ref(),
1006 field.options,
1007 &encoder,
1008 )
1009 }
1010
1011 if cfg!(debug_assertions) {
1012 assert_eq!(*rows.offsets.last().unwrap(), rows.buffer.len());
1013 rows.offsets
1014 .windows(2)
1015 .for_each(|w| assert!(w[0] <= w[1], "offsets should be monotonic"));
1016 }
1017
1018 Ok(())
1019 }
1020
1021 pub fn convert_rows<'a, I>(&self, rows: I) -> Result<Vec<ArrayRef>, ArrowError>
1029 where
1030 I: IntoIterator<Item = Row<'a>>,
1031 {
1032 let mut validate_utf8 = false;
1033 let mut rows: Vec<_> = rows
1034 .into_iter()
1035 .map(|row| {
1036 assert!(
1037 Arc::ptr_eq(&row.config.fields, &self.fields),
1038 "rows were not produced by this RowConverter"
1039 );
1040 validate_utf8 |= row.config.validate_utf8;
1041 row.data
1042 })
1043 .collect();
1044
1045 let result = unsafe { self.convert_raw(&mut rows, validate_utf8) }?;
1049
1050 if cfg!(debug_assertions) {
1051 for (i, row) in rows.iter().enumerate() {
1052 if !row.is_empty() {
1053 return Err(ArrowError::InvalidArgumentError(format!(
1054 "Codecs {codecs:?} did not consume all bytes for row {i}, remaining bytes: {row:?}",
1055 codecs = &self.codecs
1056 )));
1057 }
1058 }
1059 }
1060
1061 Ok(result)
1062 }
1063
1064 pub fn empty_rows(&self, row_capacity: usize, data_capacity: usize) -> Rows {
1093 let mut offsets = Vec::with_capacity(row_capacity.saturating_add(1));
1094 offsets.push(0);
1095
1096 Rows {
1097 offsets,
1098 buffer: Vec::with_capacity(data_capacity),
1099 config: RowConfig {
1100 fields: self.fields.clone(),
1101 validate_utf8: false,
1102 },
1103 }
1104 }
1105
1106 pub fn from_binary(&self, array: BinaryArray) -> Rows {
1133 assert_eq!(
1134 array.null_count(),
1135 0,
1136 "can't construct Rows instance from array with nulls"
1137 );
1138 let (offsets, values, _) = array.into_parts();
1139 let offsets = offsets.iter().map(|&i| i.as_usize()).collect();
1140 let buffer = values.into_vec().unwrap_or_else(|values| values.to_vec());
1142 Rows {
1143 buffer,
1144 offsets,
1145 config: RowConfig {
1146 fields: Arc::clone(&self.fields),
1147 validate_utf8: true,
1148 },
1149 }
1150 }
1151
1152 unsafe fn convert_raw(
1158 &self,
1159 rows: &mut [&[u8]],
1160 validate_utf8: bool,
1161 ) -> Result<Vec<ArrayRef>, ArrowError> {
1162 self.fields
1163 .iter()
1164 .zip(&self.codecs)
1165 .map(|(field, codec)| unsafe { decode_column(field, rows, codec, validate_utf8) })
1166 .collect()
1167 }
1168
1169 pub fn parser(&self) -> RowParser {
1171 RowParser::new(Arc::clone(&self.fields))
1172 }
1173
1174 pub fn size(&self) -> usize {
1178 std::mem::size_of::<Self>()
1179 + self.fields.iter().map(|x| x.size()).sum::<usize>()
1180 + self.codecs.capacity() * std::mem::size_of::<Codec>()
1181 + self.codecs.iter().map(Codec::size).sum::<usize>()
1182 }
1183}
1184
1185#[derive(Debug)]
1187pub struct RowParser {
1188 config: RowConfig,
1189}
1190
1191impl RowParser {
1192 fn new(fields: Arc<[SortField]>) -> Self {
1193 Self {
1194 config: RowConfig {
1195 fields,
1196 validate_utf8: true,
1197 },
1198 }
1199 }
1200
1201 pub fn parse<'a>(&'a self, bytes: &'a [u8]) -> Row<'a> {
1206 Row {
1207 data: bytes,
1208 config: &self.config,
1209 }
1210 }
1211}
1212
1213#[derive(Debug, Clone)]
1215struct RowConfig {
1216 fields: Arc<[SortField]>,
1218 validate_utf8: bool,
1220}
1221
1222#[derive(Debug)]
1226pub struct Rows {
1227 buffer: Vec<u8>,
1229 offsets: Vec<usize>,
1231 config: RowConfig,
1233}
1234
1235pub type RowLengthIter<'a> = Map<Windows<'a, usize>, fn(&'a [usize]) -> usize>;
1237
1238impl Rows {
1239 pub fn push(&mut self, row: Row<'_>) {
1241 assert!(
1242 Arc::ptr_eq(&row.config.fields, &self.config.fields),
1243 "row was not produced by this RowConverter"
1244 );
1245 self.config.validate_utf8 |= row.config.validate_utf8;
1246 self.buffer.extend_from_slice(row.data);
1247 self.offsets.push(self.buffer.len())
1248 }
1249
1250 pub fn reserve(&mut self, row_capacity: usize, data_capacity: usize) {
1252 self.buffer.reserve(data_capacity);
1253 self.offsets.reserve(row_capacity);
1254 }
1255
1256 pub fn row(&self, row: usize) -> Row<'_> {
1258 assert!(row + 1 < self.offsets.len());
1259 unsafe { self.row_unchecked(row) }
1260 }
1261
1262 pub unsafe fn row_unchecked(&self, index: usize) -> Row<'_> {
1267 let end = unsafe { self.offsets.get_unchecked(index + 1) };
1268 let start = unsafe { self.offsets.get_unchecked(index) };
1269 let data = unsafe { self.buffer.get_unchecked(*start..*end) };
1270 Row {
1271 data,
1272 config: &self.config,
1273 }
1274 }
1275
1276 pub fn row_len(&self, row: usize) -> usize {
1279 assert!(row + 1 < self.offsets.len());
1280
1281 self.offsets[row + 1] - self.offsets[row]
1282 }
1283
1284 pub fn lengths(&self) -> RowLengthIter<'_> {
1286 self.offsets.windows(2).map(|w| w[1] - w[0])
1287 }
1288
1289 pub fn clear(&mut self) {
1291 self.offsets.truncate(1);
1292 self.buffer.clear();
1293 }
1294
1295 pub fn num_rows(&self) -> usize {
1297 self.offsets.len() - 1
1298 }
1299
1300 pub fn iter(&self) -> RowsIter<'_> {
1302 self.into_iter()
1303 }
1304
1305 pub fn size(&self) -> usize {
1309 std::mem::size_of::<Self>()
1311 + self.buffer.capacity()
1312 + self.offsets.capacity() * std::mem::size_of::<usize>()
1313 }
1314
1315 pub fn try_into_binary(self) -> Result<BinaryArray, ArrowError> {
1345 if self.buffer.len() > i32::MAX as usize {
1346 return Err(ArrowError::InvalidArgumentError(format!(
1347 "{}-byte rows buffer too long to convert into a i32-indexed BinaryArray",
1348 self.buffer.len()
1349 )));
1350 }
1351 let offsets_scalar = ScalarBuffer::from_iter(self.offsets.into_iter().map(i32::usize_as));
1353 let array = unsafe {
1355 BinaryArray::new_unchecked(
1356 OffsetBuffer::new_unchecked(offsets_scalar),
1357 Buffer::from_vec(self.buffer),
1358 None,
1359 )
1360 };
1361 Ok(array)
1362 }
1363}
1364
1365impl<'a> IntoIterator for &'a Rows {
1366 type Item = Row<'a>;
1367 type IntoIter = RowsIter<'a>;
1368
1369 fn into_iter(self) -> Self::IntoIter {
1370 RowsIter {
1371 rows: self,
1372 start: 0,
1373 end: self.num_rows(),
1374 }
1375 }
1376}
1377
1378#[derive(Debug)]
1380pub struct RowsIter<'a> {
1381 rows: &'a Rows,
1382 start: usize,
1383 end: usize,
1384}
1385
1386impl<'a> Iterator for RowsIter<'a> {
1387 type Item = Row<'a>;
1388
1389 fn next(&mut self) -> Option<Self::Item> {
1390 if self.end == self.start {
1391 return None;
1392 }
1393
1394 let row = unsafe { self.rows.row_unchecked(self.start) };
1396 self.start += 1;
1397 Some(row)
1398 }
1399
1400 fn size_hint(&self) -> (usize, Option<usize>) {
1401 let len = self.len();
1402 (len, Some(len))
1403 }
1404}
1405
1406impl ExactSizeIterator for RowsIter<'_> {
1407 fn len(&self) -> usize {
1408 self.end - self.start
1409 }
1410}
1411
1412impl DoubleEndedIterator for RowsIter<'_> {
1413 fn next_back(&mut self) -> Option<Self::Item> {
1414 if self.end == self.start {
1415 return None;
1416 }
1417 let row = unsafe { self.rows.row_unchecked(self.end) };
1419 self.end -= 1;
1420 Some(row)
1421 }
1422}
1423
1424#[derive(Debug, Copy, Clone)]
1433pub struct Row<'a> {
1434 data: &'a [u8],
1435 config: &'a RowConfig,
1436}
1437
1438impl<'a> Row<'a> {
1439 pub fn owned(&self) -> OwnedRow {
1441 OwnedRow {
1442 data: self.data.into(),
1443 config: self.config.clone(),
1444 }
1445 }
1446
1447 pub fn data(&self) -> &'a [u8] {
1449 self.data
1450 }
1451}
1452
1453impl PartialEq for Row<'_> {
1456 #[inline]
1457 fn eq(&self, other: &Self) -> bool {
1458 self.data.eq(other.data)
1459 }
1460}
1461
1462impl Eq for Row<'_> {}
1463
1464impl PartialOrd for Row<'_> {
1465 #[inline]
1466 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1467 Some(self.cmp(other))
1468 }
1469}
1470
1471impl Ord for Row<'_> {
1472 #[inline]
1473 fn cmp(&self, other: &Self) -> Ordering {
1474 self.data.cmp(other.data)
1475 }
1476}
1477
1478impl Hash for Row<'_> {
1479 #[inline]
1480 fn hash<H: Hasher>(&self, state: &mut H) {
1481 self.data.hash(state)
1482 }
1483}
1484
1485impl AsRef<[u8]> for Row<'_> {
1486 #[inline]
1487 fn as_ref(&self) -> &[u8] {
1488 self.data
1489 }
1490}
1491
1492#[derive(Debug, Clone)]
1496pub struct OwnedRow {
1497 data: Box<[u8]>,
1498 config: RowConfig,
1499}
1500
1501impl OwnedRow {
1502 pub fn row(&self) -> Row<'_> {
1506 Row {
1507 data: &self.data,
1508 config: &self.config,
1509 }
1510 }
1511}
1512
1513impl PartialEq for OwnedRow {
1516 #[inline]
1517 fn eq(&self, other: &Self) -> bool {
1518 self.row().eq(&other.row())
1519 }
1520}
1521
1522impl Eq for OwnedRow {}
1523
1524impl PartialOrd for OwnedRow {
1525 #[inline]
1526 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1527 Some(self.cmp(other))
1528 }
1529}
1530
1531impl Ord for OwnedRow {
1532 #[inline]
1533 fn cmp(&self, other: &Self) -> Ordering {
1534 self.row().cmp(&other.row())
1535 }
1536}
1537
1538impl Hash for OwnedRow {
1539 #[inline]
1540 fn hash<H: Hasher>(&self, state: &mut H) {
1541 self.row().hash(state)
1542 }
1543}
1544
1545impl AsRef<[u8]> for OwnedRow {
1546 #[inline]
1547 fn as_ref(&self) -> &[u8] {
1548 &self.data
1549 }
1550}
1551
1552#[inline]
1554fn null_sentinel(options: SortOptions) -> u8 {
1555 match options.nulls_first {
1556 true => 0,
1557 false => 0xFF,
1558 }
1559}
1560
1561enum LengthTracker {
1563 Fixed { length: usize, num_rows: usize },
1565 Variable {
1567 fixed_length: usize,
1568 lengths: Vec<usize>,
1569 },
1570}
1571
1572impl LengthTracker {
1573 fn new(num_rows: usize) -> Self {
1574 Self::Fixed {
1575 length: 0,
1576 num_rows,
1577 }
1578 }
1579
1580 fn push_fixed(&mut self, new_length: usize) {
1582 match self {
1583 LengthTracker::Fixed { length, .. } => *length += new_length,
1584 LengthTracker::Variable { fixed_length, .. } => *fixed_length += new_length,
1585 }
1586 }
1587
1588 fn push_variable(&mut self, new_lengths: impl ExactSizeIterator<Item = usize>) {
1590 match self {
1591 LengthTracker::Fixed { length, .. } => {
1592 *self = LengthTracker::Variable {
1593 fixed_length: *length,
1594 lengths: new_lengths.collect(),
1595 }
1596 }
1597 LengthTracker::Variable { lengths, .. } => {
1598 assert_eq!(lengths.len(), new_lengths.len());
1599 lengths
1600 .iter_mut()
1601 .zip(new_lengths)
1602 .for_each(|(length, new_length)| *length += new_length);
1603 }
1604 }
1605 }
1606
1607 fn materialized(&mut self) -> &mut [usize] {
1609 if let LengthTracker::Fixed { length, num_rows } = *self {
1610 *self = LengthTracker::Variable {
1611 fixed_length: length,
1612 lengths: vec![0; num_rows],
1613 };
1614 }
1615
1616 match self {
1617 LengthTracker::Variable { lengths, .. } => lengths,
1618 LengthTracker::Fixed { .. } => unreachable!(),
1619 }
1620 }
1621
1622 fn extend_offsets(&self, initial_offset: usize, offsets: &mut Vec<usize>) -> usize {
1640 match self {
1641 LengthTracker::Fixed { length, num_rows } => {
1642 offsets.extend((0..*num_rows).map(|i| initial_offset + i * length));
1643
1644 initial_offset + num_rows * length
1645 }
1646 LengthTracker::Variable {
1647 fixed_length,
1648 lengths,
1649 } => {
1650 let mut acc = initial_offset;
1651
1652 offsets.extend(lengths.iter().map(|length| {
1653 let current = acc;
1654 acc += length + fixed_length;
1655 current
1656 }));
1657
1658 acc
1659 }
1660 }
1661 }
1662}
1663
1664fn row_lengths(cols: &[ArrayRef], encoders: &[Encoder]) -> LengthTracker {
1666 use fixed::FixedLengthEncoding;
1667
1668 let num_rows = cols.first().map(|x| x.len()).unwrap_or(0);
1669 let mut tracker = LengthTracker::new(num_rows);
1670
1671 for (array, encoder) in cols.iter().zip(encoders) {
1672 match encoder {
1673 Encoder::Stateless => {
1674 downcast_primitive_array! {
1675 array => tracker.push_fixed(fixed::encoded_len(array)),
1676 DataType::Null => tracker.push_fixed(2)
1677 DataType::Boolean => tracker.push_fixed(bool::ENCODED_LEN),
1678 DataType::Binary => push_generic_byte_array_lengths(&mut tracker, as_generic_binary_array::<i32>(array)),
1679 DataType::LargeBinary => push_generic_byte_array_lengths(&mut tracker, as_generic_binary_array::<i64>(array)),
1680 DataType::BinaryView => push_byte_view_array_lengths(&mut tracker, array.as_binary_view()),
1681 DataType::Utf8 => push_generic_byte_array_lengths(&mut tracker, array.as_string::<i32>()),
1682 DataType::LargeUtf8 => push_generic_byte_array_lengths(&mut tracker, array.as_string::<i64>()),
1683 DataType::Utf8View => push_byte_view_array_lengths(&mut tracker, array.as_string_view()),
1684 DataType::FixedSizeBinary(len) => {
1685 let len = len.to_usize().unwrap();
1686 tracker.push_fixed(1 + len)
1687 }
1688 _ => unimplemented!("unsupported data type: {}", array.data_type()),
1689 }
1690 }
1691 Encoder::Dictionary(values, null) => {
1692 downcast_dictionary_array! {
1693 array => {
1694 tracker.push_variable(
1695 array.keys().iter().map(|v| match v {
1696 Some(k) => values.row_len(k.as_usize()),
1697 None => null.data.len(),
1698 })
1699 )
1700 }
1701 _ => unreachable!(),
1702 }
1703 }
1704 Encoder::Struct(rows, null) => {
1705 let array = as_struct_array(array);
1706 if rows.num_rows() > 0 {
1707 tracker.push_variable((0..array.len()).map(|idx| match array.is_valid(idx) {
1709 true => 1 + rows.row_len(idx),
1710 false => 1 + null.data.len(),
1711 }));
1712 } else {
1713 tracker.push_variable((0..array.len()).map(|idx| match array.is_valid(idx) {
1715 true => 1,
1716 false => 1 + null.data.len(),
1717 }));
1718 }
1719 }
1720 Encoder::List(rows) => match array.data_type() {
1721 DataType::List(_) => {
1722 list::compute_lengths(tracker.materialized(), rows, as_list_array(array))
1723 }
1724 DataType::LargeList(_) => {
1725 list::compute_lengths(tracker.materialized(), rows, as_large_list_array(array))
1726 }
1727 DataType::ListView(_) => {
1728 let list_view = array.as_list_view::<i32>();
1729 let (min_offset, _) = compute_list_view_bounds(list_view);
1730 list::compute_lengths_list_view(
1731 tracker.materialized(),
1732 rows,
1733 list_view,
1734 min_offset,
1735 )
1736 }
1737 DataType::LargeListView(_) => {
1738 let list_view = array.as_list_view::<i64>();
1739 let (min_offset, _) = compute_list_view_bounds(list_view);
1740 list::compute_lengths_list_view(
1741 tracker.materialized(),
1742 rows,
1743 list_view,
1744 min_offset,
1745 )
1746 }
1747 DataType::FixedSizeList(_, _) => compute_lengths_fixed_size_list(
1748 &mut tracker,
1749 rows,
1750 as_fixed_size_list_array(array),
1751 ),
1752 _ => unreachable!(),
1753 },
1754 Encoder::RunEndEncoded(rows) => match array.data_type() {
1755 DataType::RunEndEncoded(r, _) => match r.data_type() {
1756 DataType::Int16 => run::compute_lengths(
1757 tracker.materialized(),
1758 rows,
1759 array.as_run::<Int16Type>(),
1760 ),
1761 DataType::Int32 => run::compute_lengths(
1762 tracker.materialized(),
1763 rows,
1764 array.as_run::<Int32Type>(),
1765 ),
1766 DataType::Int64 => run::compute_lengths(
1767 tracker.materialized(),
1768 rows,
1769 array.as_run::<Int64Type>(),
1770 ),
1771 _ => unreachable!("Unsupported run end index type: {r:?}"),
1772 },
1773 _ => unreachable!(),
1774 },
1775 Encoder::Union {
1776 child_rows,
1777 field_to_type_ids,
1778 type_ids,
1779 offsets,
1780 } => {
1781 let union_array = array
1782 .as_any()
1783 .downcast_ref::<UnionArray>()
1784 .expect("expected UnionArray");
1785
1786 let mut type_id_to_field_idx = [0usize; 128];
1787 for (field_idx, &type_id) in field_to_type_ids.iter().enumerate() {
1788 type_id_to_field_idx[type_id as usize] = field_idx;
1789 }
1790
1791 let lengths = (0..union_array.len()).map(|i| {
1792 let type_id = type_ids[i];
1793 let field_idx = type_id_to_field_idx[type_id as usize];
1794 let child_row_i = offsets.as_ref().map(|o| o[i] as usize).unwrap_or(i);
1795 let child_row_len = child_rows[field_idx].row_len(child_row_i);
1796
1797 1 + child_row_len
1799 });
1800
1801 tracker.push_variable(lengths);
1802 }
1803 }
1804 }
1805
1806 tracker
1807}
1808
1809fn push_generic_byte_array_lengths<T: ByteArrayType>(
1811 tracker: &mut LengthTracker,
1812 array: &GenericByteArray<T>,
1813) {
1814 if let Some(nulls) = array.nulls().filter(|n| n.null_count() > 0) {
1815 tracker.push_variable(
1816 array
1817 .offsets()
1818 .lengths()
1819 .zip(nulls.iter())
1820 .map(|(length, is_valid)| if is_valid { Some(length) } else { None })
1821 .map(variable::padded_length),
1822 )
1823 } else {
1824 tracker.push_variable(
1825 array
1826 .offsets()
1827 .lengths()
1828 .map(variable::non_null_padded_length),
1829 )
1830 }
1831}
1832
1833fn push_byte_view_array_lengths<T: ByteViewType>(
1835 tracker: &mut LengthTracker,
1836 array: &GenericByteViewArray<T>,
1837) {
1838 if let Some(nulls) = array.nulls().filter(|n| n.null_count() > 0) {
1839 tracker.push_variable(
1840 array
1841 .lengths()
1842 .zip(nulls.iter())
1843 .map(|(length, is_valid)| {
1844 if is_valid {
1845 Some(length as usize)
1846 } else {
1847 None
1848 }
1849 })
1850 .map(variable::padded_length),
1851 )
1852 } else {
1853 tracker.push_variable(
1854 array
1855 .lengths()
1856 .map(|len| variable::padded_length(Some(len as usize))),
1857 )
1858 }
1859}
1860
1861fn encode_column(
1863 data: &mut [u8],
1864 offsets: &mut [usize],
1865 column: &dyn Array,
1866 opts: SortOptions,
1867 encoder: &Encoder<'_>,
1868) {
1869 match encoder {
1870 Encoder::Stateless => {
1871 downcast_primitive_array! {
1872 column => {
1873 if let Some(nulls) = column.nulls().filter(|n| n.null_count() > 0){
1874 fixed::encode(data, offsets, column.values(), nulls, opts)
1875 } else {
1876 fixed::encode_not_null(data, offsets, column.values(), opts)
1877 }
1878 }
1879 DataType::Null => {
1880 for offset in offsets.iter_mut().skip(1) {
1881 variable::encode_null_value(&mut data[*offset..], opts);
1882 *offset += 2;
1883 }
1884 }
1885 DataType::Boolean => {
1886 if let Some(nulls) = column.nulls().filter(|n| n.null_count() > 0){
1887 fixed::encode_boolean(data, offsets, column.as_boolean().values(), nulls, opts)
1888 } else {
1889 fixed::encode_boolean_not_null(data, offsets, column.as_boolean().values(), opts)
1890 }
1891 }
1892 DataType::Binary => {
1893 variable::encode_generic_byte_array(data, offsets, as_generic_binary_array::<i32>(column), opts)
1894 }
1895 DataType::BinaryView => {
1896 variable::encode(data, offsets, column.as_binary_view().iter(), opts)
1897 }
1898 DataType::LargeBinary => {
1899 variable::encode_generic_byte_array(data, offsets, as_generic_binary_array::<i64>(column), opts)
1900 }
1901 DataType::Utf8 => variable::encode_generic_byte_array(
1902 data, offsets,
1903 column.as_string::<i32>(),
1904 opts,
1905 ),
1906 DataType::LargeUtf8 => variable::encode_generic_byte_array(
1907 data, offsets,
1908 column.as_string::<i64>(),
1909 opts,
1910 ),
1911 DataType::Utf8View => variable::encode(
1912 data, offsets,
1913 column.as_string_view().iter().map(|x| x.map(|x| x.as_bytes())),
1914 opts,
1915 ),
1916 DataType::FixedSizeBinary(_) => {
1917 let array = column.as_any().downcast_ref().unwrap();
1918 fixed::encode_fixed_size_binary(data, offsets, array, opts)
1919 }
1920 _ => unimplemented!("unsupported data type: {}", column.data_type()),
1921 }
1922 }
1923 Encoder::Dictionary(values, nulls) => {
1924 downcast_dictionary_array! {
1925 column => encode_dictionary_values(data, offsets, column, values, nulls),
1926 _ => unreachable!()
1927 }
1928 }
1929 Encoder::Struct(rows, null) => {
1930 fn struct_encode_helper<const NO_CHILD_FIELDS: bool>(
1931 array: &StructArray,
1932 offsets: &mut [usize],
1933 null_sentinel: u8,
1934 rows: &Rows,
1935 null: &Row<'_>,
1936 data: &mut [u8],
1937 ) {
1938 let empty_row = Row {
1939 data: &[],
1940 config: &rows.config,
1941 };
1942
1943 offsets
1944 .iter_mut()
1945 .skip(1)
1946 .enumerate()
1947 .for_each(|(idx, offset)| {
1948 let (row, sentinel) = match array.is_valid(idx) {
1949 true => (
1950 if NO_CHILD_FIELDS {
1951 empty_row
1952 } else {
1953 rows.row(idx)
1954 },
1955 0x01,
1956 ),
1957 false => (*null, null_sentinel),
1958 };
1959 let end_offset = *offset + 1 + row.as_ref().len();
1960 data[*offset] = sentinel;
1961 data[*offset + 1..end_offset].copy_from_slice(row.as_ref());
1962 *offset = end_offset;
1963 })
1964 }
1965
1966 let array = as_struct_array(column);
1967 let null_sentinel = null_sentinel(opts);
1968 if rows.num_rows() == 0 {
1969 struct_encode_helper::<true>(array, offsets, null_sentinel, rows, null, data);
1971 } else {
1972 struct_encode_helper::<false>(array, offsets, null_sentinel, rows, null, data);
1973 }
1974 }
1975 Encoder::List(rows) => match column.data_type() {
1976 DataType::List(_) => list::encode(data, offsets, rows, opts, as_list_array(column)),
1977 DataType::LargeList(_) => {
1978 list::encode(data, offsets, rows, opts, as_large_list_array(column))
1979 }
1980 DataType::ListView(_) => {
1981 let list_view = column.as_list_view::<i32>();
1982 let (min_offset, _) = compute_list_view_bounds(list_view);
1983 list::encode_list_view(data, offsets, rows, opts, list_view, min_offset)
1984 }
1985 DataType::LargeListView(_) => {
1986 let list_view = column.as_list_view::<i64>();
1987 let (min_offset, _) = compute_list_view_bounds(list_view);
1988 list::encode_list_view(data, offsets, rows, opts, list_view, min_offset)
1989 }
1990 DataType::FixedSizeList(_, _) => {
1991 encode_fixed_size_list(data, offsets, rows, opts, as_fixed_size_list_array(column))
1992 }
1993 _ => unreachable!(),
1994 },
1995 Encoder::RunEndEncoded(rows) => match column.data_type() {
1996 DataType::RunEndEncoded(r, _) => match r.data_type() {
1997 DataType::Int16 => {
1998 run::encode(data, offsets, rows, opts, column.as_run::<Int16Type>())
1999 }
2000 DataType::Int32 => {
2001 run::encode(data, offsets, rows, opts, column.as_run::<Int32Type>())
2002 }
2003 DataType::Int64 => {
2004 run::encode(data, offsets, rows, opts, column.as_run::<Int64Type>())
2005 }
2006 _ => unreachable!("Unsupported run end index type: {r:?}"),
2007 },
2008 _ => unreachable!(),
2009 },
2010 Encoder::Union {
2011 child_rows,
2012 field_to_type_ids,
2013 type_ids,
2014 offsets: offsets_buf,
2015 } => {
2016 let mut type_id_to_field_idx = [0usize; 128];
2017 for (field_idx, &type_id) in field_to_type_ids.iter().enumerate() {
2018 type_id_to_field_idx[type_id as usize] = field_idx;
2019 }
2020
2021 offsets
2022 .iter_mut()
2023 .skip(1)
2024 .enumerate()
2025 .for_each(|(i, offset)| {
2026 let type_id = type_ids[i];
2027 let field_idx = type_id_to_field_idx[type_id as usize];
2028
2029 let child_row_idx = offsets_buf.as_ref().map(|o| o[i] as usize).unwrap_or(i);
2030 let child_row = child_rows[field_idx].row(child_row_idx);
2031 let child_bytes = child_row.as_ref();
2032
2033 let type_id_byte = if opts.descending {
2034 !(type_id as u8)
2035 } else {
2036 type_id as u8
2037 };
2038 data[*offset] = type_id_byte;
2039
2040 let child_start = *offset + 1;
2041 let child_end = child_start + child_bytes.len();
2042 data[child_start..child_end].copy_from_slice(child_bytes);
2043
2044 *offset = child_end;
2045 });
2046 }
2047 }
2048}
2049
2050pub fn encode_dictionary_values<K: ArrowDictionaryKeyType>(
2052 data: &mut [u8],
2053 offsets: &mut [usize],
2054 column: &DictionaryArray<K>,
2055 values: &Rows,
2056 null: &Row<'_>,
2057) {
2058 for (offset, k) in offsets.iter_mut().skip(1).zip(column.keys()) {
2059 let row = match k {
2060 Some(k) => values.row(k.as_usize()).data,
2061 None => null.data,
2062 };
2063 let end_offset = *offset + row.len();
2064 data[*offset..end_offset].copy_from_slice(row);
2065 *offset = end_offset;
2066 }
2067}
2068
2069macro_rules! decode_primitive_helper {
2070 ($t:ty, $rows:ident, $data_type:ident, $options:ident) => {
2071 Arc::new(decode_primitive::<$t>($rows, $data_type, $options))
2072 };
2073}
2074
2075unsafe fn decode_column(
2081 field: &SortField,
2082 rows: &mut [&[u8]],
2083 codec: &Codec,
2084 validate_utf8: bool,
2085) -> Result<ArrayRef, ArrowError> {
2086 let options = field.options;
2087
2088 let array: ArrayRef = match codec {
2089 Codec::Stateless => {
2090 let data_type = field.data_type.clone();
2091 downcast_primitive! {
2092 data_type => (decode_primitive_helper, rows, data_type, options),
2093 DataType::Null => {
2094 variable::decode_null_value(rows, options);
2095 Arc::new(NullArray::new(rows.len()))
2096 }
2097 DataType::Boolean => Arc::new(decode_bool(rows, options)),
2098 DataType::Binary => Arc::new(decode_binary::<i32>(rows, options)),
2099 DataType::LargeBinary => Arc::new(decode_binary::<i64>(rows, options)),
2100 DataType::BinaryView => Arc::new(decode_binary_view(rows, options)),
2101 DataType::FixedSizeBinary(size) => Arc::new(decode_fixed_size_binary(rows, size, options)),
2102 DataType::Utf8 => Arc::new(unsafe{ decode_string::<i32>(rows, options, validate_utf8) }),
2103 DataType::LargeUtf8 => Arc::new(unsafe { decode_string::<i64>(rows, options, validate_utf8) }),
2104 DataType::Utf8View => Arc::new(unsafe { decode_string_view(rows, options, validate_utf8) }),
2105 _ => return Err(ArrowError::NotYetImplemented(format!("unsupported data type: {data_type}" )))
2106 }
2107 }
2108 Codec::Dictionary(converter, _) => {
2109 let cols = unsafe { converter.convert_raw(rows, validate_utf8) }?;
2110 cols.into_iter().next().unwrap()
2111 }
2112 Codec::Struct(converter, _) => {
2113 let (null_count, nulls) = fixed::decode_nulls(rows);
2114 rows.iter_mut().for_each(|row| *row = &row[1..]);
2115 let children = unsafe { converter.convert_raw(rows, validate_utf8) }?;
2116
2117 let child_data: Vec<ArrayData> = children.iter().map(|c| c.to_data()).collect();
2118 let corrected_fields: Vec<Field> = match &field.data_type {
2121 DataType::Struct(struct_fields) => struct_fields
2122 .iter()
2123 .zip(child_data.iter())
2124 .map(|(orig_field, child_array)| {
2125 orig_field
2126 .as_ref()
2127 .clone()
2128 .with_data_type(child_array.data_type().clone())
2129 })
2130 .collect(),
2131 _ => unreachable!("Only Struct types should be corrected here"),
2132 };
2133 let corrected_struct_type = DataType::Struct(corrected_fields.into());
2134 let builder = ArrayDataBuilder::new(corrected_struct_type)
2135 .len(rows.len())
2136 .null_count(null_count)
2137 .null_bit_buffer(Some(nulls))
2138 .child_data(child_data);
2139
2140 Arc::new(StructArray::from(unsafe { builder.build_unchecked() }))
2141 }
2142 Codec::List(converter) => match &field.data_type {
2143 DataType::List(_) => {
2144 Arc::new(unsafe { list::decode::<i32>(converter, rows, field, validate_utf8) }?)
2145 }
2146 DataType::LargeList(_) => {
2147 Arc::new(unsafe { list::decode::<i64>(converter, rows, field, validate_utf8) }?)
2148 }
2149 DataType::ListView(_) => Arc::new(unsafe {
2150 list::decode_list_view::<i32>(converter, rows, field, validate_utf8)
2151 }?),
2152 DataType::LargeListView(_) => Arc::new(unsafe {
2153 list::decode_list_view::<i64>(converter, rows, field, validate_utf8)
2154 }?),
2155 DataType::FixedSizeList(_, value_length) => Arc::new(unsafe {
2156 list::decode_fixed_size_list(
2157 converter,
2158 rows,
2159 field,
2160 validate_utf8,
2161 value_length.as_usize(),
2162 )
2163 }?),
2164 _ => unreachable!(),
2165 },
2166 Codec::RunEndEncoded(converter) => match &field.data_type {
2167 DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() {
2168 DataType::Int16 => Arc::new(unsafe {
2169 run::decode::<Int16Type>(converter, rows, field, validate_utf8)
2170 }?),
2171 DataType::Int32 => Arc::new(unsafe {
2172 run::decode::<Int32Type>(converter, rows, field, validate_utf8)
2173 }?),
2174 DataType::Int64 => Arc::new(unsafe {
2175 run::decode::<Int64Type>(converter, rows, field, validate_utf8)
2176 }?),
2177 _ => unreachable!(),
2178 },
2179 _ => unreachable!(),
2180 },
2181 Codec::Union(converters, field_to_type_ids, null_rows) => {
2182 let len = rows.len();
2183
2184 let DataType::Union(union_fields, mode) = &field.data_type else {
2185 unreachable!()
2186 };
2187
2188 let mut type_id_to_field_idx = [0usize; 128];
2189 for (field_idx, &type_id) in field_to_type_ids.iter().enumerate() {
2190 type_id_to_field_idx[type_id as usize] = field_idx;
2191 }
2192
2193 let mut type_ids = Vec::with_capacity(len);
2194 let mut rows_by_field: Vec<Vec<(usize, &[u8])>> = vec![Vec::new(); converters.len()];
2195
2196 for (idx, row) in rows.iter_mut().enumerate() {
2197 let type_id_byte = {
2198 let id = row[0];
2199 if options.descending { !id } else { id }
2200 };
2201
2202 let type_id = type_id_byte as i8;
2203 type_ids.push(type_id);
2204
2205 let field_idx = type_id_to_field_idx[type_id as usize];
2206
2207 let child_row = &row[1..];
2208 rows_by_field[field_idx].push((idx, child_row));
2209 }
2210
2211 let mut child_arrays: Vec<ArrayRef> = Vec::with_capacity(converters.len());
2212 let mut offsets = (*mode == UnionMode::Dense).then(|| Vec::with_capacity(len));
2213
2214 for (field_idx, converter) in converters.iter().enumerate() {
2215 let field_rows = &rows_by_field[field_idx];
2216
2217 match &mode {
2218 UnionMode::Dense => {
2219 if field_rows.is_empty() {
2220 let (_, field) = union_fields.iter().nth(field_idx).unwrap();
2221 child_arrays.push(arrow_array::new_empty_array(field.data_type()));
2222 continue;
2223 }
2224
2225 let mut child_data = field_rows
2226 .iter()
2227 .map(|(_, bytes)| *bytes)
2228 .collect::<Vec<_>>();
2229
2230 let child_array =
2231 unsafe { converter.convert_raw(&mut child_data, validate_utf8) }?;
2232
2233 for ((row_idx, original_bytes), remaining_bytes) in
2235 field_rows.iter().zip(child_data)
2236 {
2237 let consumed_length = 1 + original_bytes.len() - remaining_bytes.len();
2238 rows[*row_idx] = &rows[*row_idx][consumed_length..];
2239 }
2240
2241 child_arrays.push(child_array.into_iter().next().unwrap());
2242 }
2243 UnionMode::Sparse => {
2244 let mut sparse_data: Vec<&[u8]> = Vec::with_capacity(len);
2245 let mut field_row_iter = field_rows.iter().peekable();
2246 let null_row_bytes: &[u8] = &null_rows[field_idx].data;
2247
2248 for idx in 0..len {
2249 if let Some((next_idx, bytes)) = field_row_iter.peek() {
2250 if *next_idx == idx {
2251 sparse_data.push(*bytes);
2252
2253 field_row_iter.next();
2254 continue;
2255 }
2256 }
2257 sparse_data.push(null_row_bytes);
2258 }
2259
2260 let child_array =
2261 unsafe { converter.convert_raw(&mut sparse_data, validate_utf8) }?;
2262
2263 for (row_idx, child_row) in field_rows.iter() {
2265 let remaining_len = sparse_data[*row_idx].len();
2266 let consumed_length = 1 + child_row.len() - remaining_len;
2267 rows[*row_idx] = &rows[*row_idx][consumed_length..];
2268 }
2269
2270 child_arrays.push(child_array.into_iter().next().unwrap());
2271 }
2272 }
2273 }
2274
2275 if let Some(ref mut offsets_vec) = offsets {
2277 let mut count = vec![0i32; converters.len()];
2278 for type_id in &type_ids {
2279 let field_idx = *type_id as usize;
2280 offsets_vec.push(count[field_idx]);
2281
2282 count[field_idx] += 1;
2283 }
2284 }
2285
2286 let type_ids_buffer = ScalarBuffer::from(type_ids);
2287 let offsets_buffer = offsets.map(ScalarBuffer::from);
2288
2289 let union_array = UnionArray::try_new(
2290 union_fields.clone(),
2291 type_ids_buffer,
2292 offsets_buffer,
2293 child_arrays,
2294 )?;
2295
2296 Arc::new(union_array)
2299 }
2300 };
2301 Ok(array)
2302}
2303
2304#[cfg(test)]
2305mod tests {
2306 use arrow_array::builder::*;
2307 use arrow_array::types::*;
2308 use arrow_array::*;
2309 use arrow_buffer::{Buffer, OffsetBuffer};
2310 use arrow_buffer::{NullBuffer, i256};
2311 use arrow_cast::display::{ArrayFormatter, FormatOptions};
2312 use arrow_ord::sort::{LexicographicalComparator, SortColumn};
2313 use rand::distr::uniform::SampleUniform;
2314 use rand::distr::{Distribution, StandardUniform};
2315 use rand::prelude::StdRng;
2316 use rand::{Rng, RngCore, SeedableRng};
2317
2318 use super::*;
2319
2320 #[test]
2321 fn test_fixed_width() {
2322 let cols = [
2323 Arc::new(Int16Array::from_iter([
2324 Some(1),
2325 Some(2),
2326 None,
2327 Some(-5),
2328 Some(2),
2329 Some(2),
2330 Some(0),
2331 ])) as ArrayRef,
2332 Arc::new(Float32Array::from_iter([
2333 Some(1.3),
2334 Some(2.5),
2335 None,
2336 Some(4.),
2337 Some(0.1),
2338 Some(-4.),
2339 Some(-0.),
2340 ])) as ArrayRef,
2341 ];
2342
2343 let converter = RowConverter::new(vec![
2344 SortField::new(DataType::Int16),
2345 SortField::new(DataType::Float32),
2346 ])
2347 .unwrap();
2348 let rows = converter.convert_columns(&cols).unwrap();
2349
2350 assert_eq!(rows.offsets, &[0, 8, 16, 24, 32, 40, 48, 56]);
2351 assert_eq!(
2352 rows.buffer,
2353 &[
2354 1, 128, 1, 1, 191, 166, 102, 102, 1, 128, 2, 1, 192, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 127, 251, 1, 192, 128, 0, 0, 1, 128, 2, 1, 189, 204, 204, 205, 1, 128, 2, 1, 63, 127, 255, 255, 1, 128, 0, 1, 127, 255, 255, 255 ]
2369 );
2370
2371 assert!(rows.row(3) < rows.row(6));
2372 assert!(rows.row(0) < rows.row(1));
2373 assert!(rows.row(3) < rows.row(0));
2374 assert!(rows.row(4) < rows.row(1));
2375 assert!(rows.row(5) < rows.row(4));
2376
2377 let back = converter.convert_rows(&rows).unwrap();
2378 for (expected, actual) in cols.iter().zip(&back) {
2379 assert_eq!(expected, actual);
2380 }
2381 }
2382
2383 #[test]
2384 fn test_decimal32() {
2385 let converter = RowConverter::new(vec![SortField::new(DataType::Decimal32(
2386 DECIMAL32_MAX_PRECISION,
2387 7,
2388 ))])
2389 .unwrap();
2390 let col = Arc::new(
2391 Decimal32Array::from_iter([
2392 None,
2393 Some(i32::MIN),
2394 Some(-13),
2395 Some(46_i32),
2396 Some(5456_i32),
2397 Some(i32::MAX),
2398 ])
2399 .with_precision_and_scale(9, 7)
2400 .unwrap(),
2401 ) as ArrayRef;
2402
2403 let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
2404 for i in 0..rows.num_rows() - 1 {
2405 assert!(rows.row(i) < rows.row(i + 1));
2406 }
2407
2408 let back = converter.convert_rows(&rows).unwrap();
2409 assert_eq!(back.len(), 1);
2410 assert_eq!(col.as_ref(), back[0].as_ref())
2411 }
2412
2413 #[test]
2414 fn test_decimal64() {
2415 let converter = RowConverter::new(vec![SortField::new(DataType::Decimal64(
2416 DECIMAL64_MAX_PRECISION,
2417 7,
2418 ))])
2419 .unwrap();
2420 let col = Arc::new(
2421 Decimal64Array::from_iter([
2422 None,
2423 Some(i64::MIN),
2424 Some(-13),
2425 Some(46_i64),
2426 Some(5456_i64),
2427 Some(i64::MAX),
2428 ])
2429 .with_precision_and_scale(18, 7)
2430 .unwrap(),
2431 ) as ArrayRef;
2432
2433 let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
2434 for i in 0..rows.num_rows() - 1 {
2435 assert!(rows.row(i) < rows.row(i + 1));
2436 }
2437
2438 let back = converter.convert_rows(&rows).unwrap();
2439 assert_eq!(back.len(), 1);
2440 assert_eq!(col.as_ref(), back[0].as_ref())
2441 }
2442
2443 #[test]
2444 fn test_decimal128() {
2445 let converter = RowConverter::new(vec![SortField::new(DataType::Decimal128(
2446 DECIMAL128_MAX_PRECISION,
2447 7,
2448 ))])
2449 .unwrap();
2450 let col = Arc::new(
2451 Decimal128Array::from_iter([
2452 None,
2453 Some(i128::MIN),
2454 Some(-13),
2455 Some(46_i128),
2456 Some(5456_i128),
2457 Some(i128::MAX),
2458 ])
2459 .with_precision_and_scale(38, 7)
2460 .unwrap(),
2461 ) as ArrayRef;
2462
2463 let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
2464 for i in 0..rows.num_rows() - 1 {
2465 assert!(rows.row(i) < rows.row(i + 1));
2466 }
2467
2468 let back = converter.convert_rows(&rows).unwrap();
2469 assert_eq!(back.len(), 1);
2470 assert_eq!(col.as_ref(), back[0].as_ref())
2471 }
2472
2473 #[test]
2474 fn test_decimal256() {
2475 let converter = RowConverter::new(vec![SortField::new(DataType::Decimal256(
2476 DECIMAL256_MAX_PRECISION,
2477 7,
2478 ))])
2479 .unwrap();
2480 let col = Arc::new(
2481 Decimal256Array::from_iter([
2482 None,
2483 Some(i256::MIN),
2484 Some(i256::from_parts(0, -1)),
2485 Some(i256::from_parts(u128::MAX, -1)),
2486 Some(i256::from_parts(u128::MAX, 0)),
2487 Some(i256::from_parts(0, 46_i128)),
2488 Some(i256::from_parts(5, 46_i128)),
2489 Some(i256::MAX),
2490 ])
2491 .with_precision_and_scale(DECIMAL256_MAX_PRECISION, 7)
2492 .unwrap(),
2493 ) as ArrayRef;
2494
2495 let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
2496 for i in 0..rows.num_rows() - 1 {
2497 assert!(rows.row(i) < rows.row(i + 1));
2498 }
2499
2500 let back = converter.convert_rows(&rows).unwrap();
2501 assert_eq!(back.len(), 1);
2502 assert_eq!(col.as_ref(), back[0].as_ref())
2503 }
2504
2505 #[test]
2506 fn test_bool() {
2507 let converter = RowConverter::new(vec![SortField::new(DataType::Boolean)]).unwrap();
2508
2509 let col = Arc::new(BooleanArray::from_iter([None, Some(false), Some(true)])) as ArrayRef;
2510
2511 let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
2512 assert!(rows.row(2) > rows.row(1));
2513 assert!(rows.row(2) > rows.row(0));
2514 assert!(rows.row(1) > rows.row(0));
2515
2516 let cols = converter.convert_rows(&rows).unwrap();
2517 assert_eq!(&cols[0], &col);
2518
2519 let converter = RowConverter::new(vec![SortField::new_with_options(
2520 DataType::Boolean,
2521 SortOptions::default().desc().with_nulls_first(false),
2522 )])
2523 .unwrap();
2524
2525 let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
2526 assert!(rows.row(2) < rows.row(1));
2527 assert!(rows.row(2) < rows.row(0));
2528 assert!(rows.row(1) < rows.row(0));
2529 let cols = converter.convert_rows(&rows).unwrap();
2530 assert_eq!(&cols[0], &col);
2531 }
2532
2533 #[test]
2534 fn test_timezone() {
2535 let a =
2536 TimestampNanosecondArray::from(vec![1, 2, 3, 4, 5]).with_timezone("+01:00".to_string());
2537 let d = a.data_type().clone();
2538
2539 let converter = RowConverter::new(vec![SortField::new(a.data_type().clone())]).unwrap();
2540 let rows = converter.convert_columns(&[Arc::new(a) as _]).unwrap();
2541 let back = converter.convert_rows(&rows).unwrap();
2542 assert_eq!(back.len(), 1);
2543 assert_eq!(back[0].data_type(), &d);
2544
2545 let mut a = PrimitiveDictionaryBuilder::<Int32Type, TimestampNanosecondType>::new();
2547 a.append(34).unwrap();
2548 a.append_null();
2549 a.append(345).unwrap();
2550
2551 let dict = a.finish();
2553 let values = TimestampNanosecondArray::from(dict.values().to_data());
2554 let dict_with_tz = dict.with_values(Arc::new(values.with_timezone("+02:00")));
2555 let v = DataType::Timestamp(TimeUnit::Nanosecond, Some("+02:00".into()));
2556 let d = DataType::Dictionary(Box::new(DataType::Int32), Box::new(v.clone()));
2557
2558 assert_eq!(dict_with_tz.data_type(), &d);
2559 let converter = RowConverter::new(vec![SortField::new(d.clone())]).unwrap();
2560 let rows = converter
2561 .convert_columns(&[Arc::new(dict_with_tz) as _])
2562 .unwrap();
2563 let back = converter.convert_rows(&rows).unwrap();
2564 assert_eq!(back.len(), 1);
2565 assert_eq!(back[0].data_type(), &v);
2566 }
2567
2568 #[test]
2569 fn test_null_encoding() {
2570 let col = Arc::new(NullArray::new(10));
2571 let converter = RowConverter::new(vec![SortField::new(DataType::Null)]).unwrap();
2572 let rows = converter.convert_columns(&[col]).unwrap();
2573 assert_eq!(rows.num_rows(), 10);
2574 assert_eq!(rows.row(1).data.len(), 2);
2576 }
2577
2578 #[test]
2579 fn test_variable_width() {
2580 let col = Arc::new(StringArray::from_iter([
2581 Some("hello"),
2582 Some("he"),
2583 None,
2584 Some("foo"),
2585 Some(""),
2586 ])) as ArrayRef;
2587
2588 let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
2589 let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
2590
2591 assert!(rows.row(1) < rows.row(0));
2592 assert!(rows.row(2) < rows.row(4));
2593 assert!(rows.row(3) < rows.row(0));
2594 assert!(rows.row(3) < rows.row(1));
2595
2596 let cols = converter.convert_rows(&rows).unwrap();
2597 assert_eq!(&cols[0], &col);
2598
2599 let col = Arc::new(BinaryArray::from_iter([
2600 None,
2601 Some(vec![0_u8; 0]),
2602 Some(vec![0_u8; 6]),
2603 Some(vec![0_u8; variable::MINI_BLOCK_SIZE]),
2604 Some(vec![0_u8; variable::MINI_BLOCK_SIZE + 1]),
2605 Some(vec![0_u8; variable::BLOCK_SIZE]),
2606 Some(vec![0_u8; variable::BLOCK_SIZE + 1]),
2607 Some(vec![1_u8; 6]),
2608 Some(vec![1_u8; variable::MINI_BLOCK_SIZE]),
2609 Some(vec![1_u8; variable::MINI_BLOCK_SIZE + 1]),
2610 Some(vec![1_u8; variable::BLOCK_SIZE]),
2611 Some(vec![1_u8; variable::BLOCK_SIZE + 1]),
2612 Some(vec![0xFF_u8; 6]),
2613 Some(vec![0xFF_u8; variable::MINI_BLOCK_SIZE]),
2614 Some(vec![0xFF_u8; variable::MINI_BLOCK_SIZE + 1]),
2615 Some(vec![0xFF_u8; variable::BLOCK_SIZE]),
2616 Some(vec![0xFF_u8; variable::BLOCK_SIZE + 1]),
2617 ])) as ArrayRef;
2618
2619 let converter = RowConverter::new(vec![SortField::new(DataType::Binary)]).unwrap();
2620 let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
2621
2622 for i in 0..rows.num_rows() {
2623 for j in i + 1..rows.num_rows() {
2624 assert!(
2625 rows.row(i) < rows.row(j),
2626 "{} < {} - {:?} < {:?}",
2627 i,
2628 j,
2629 rows.row(i),
2630 rows.row(j)
2631 );
2632 }
2633 }
2634
2635 let cols = converter.convert_rows(&rows).unwrap();
2636 assert_eq!(&cols[0], &col);
2637
2638 let converter = RowConverter::new(vec![SortField::new_with_options(
2639 DataType::Binary,
2640 SortOptions::default().desc().with_nulls_first(false),
2641 )])
2642 .unwrap();
2643 let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
2644
2645 for i in 0..rows.num_rows() {
2646 for j in i + 1..rows.num_rows() {
2647 assert!(
2648 rows.row(i) > rows.row(j),
2649 "{} > {} - {:?} > {:?}",
2650 i,
2651 j,
2652 rows.row(i),
2653 rows.row(j)
2654 );
2655 }
2656 }
2657
2658 let cols = converter.convert_rows(&rows).unwrap();
2659 assert_eq!(&cols[0], &col);
2660 }
2661
2662 fn dictionary_eq(a: &dyn Array, b: &dyn Array) {
2664 match b.data_type() {
2665 DataType::Dictionary(_, v) => {
2666 assert_eq!(a.data_type(), v.as_ref());
2667 let b = arrow_cast::cast(b, v).unwrap();
2668 assert_eq!(a, b.as_ref())
2669 }
2670 _ => assert_eq!(a, b),
2671 }
2672 }
2673
2674 #[test]
2675 fn test_string_dictionary() {
2676 let a = Arc::new(DictionaryArray::<Int32Type>::from_iter([
2677 Some("foo"),
2678 Some("hello"),
2679 Some("he"),
2680 None,
2681 Some("hello"),
2682 Some(""),
2683 Some("hello"),
2684 Some("hello"),
2685 ])) as ArrayRef;
2686
2687 let field = SortField::new(a.data_type().clone());
2688 let converter = RowConverter::new(vec![field]).unwrap();
2689 let rows_a = converter.convert_columns(&[Arc::clone(&a)]).unwrap();
2690
2691 assert!(rows_a.row(3) < rows_a.row(5));
2692 assert!(rows_a.row(2) < rows_a.row(1));
2693 assert!(rows_a.row(0) < rows_a.row(1));
2694 assert!(rows_a.row(3) < rows_a.row(0));
2695
2696 assert_eq!(rows_a.row(1), rows_a.row(4));
2697 assert_eq!(rows_a.row(1), rows_a.row(6));
2698 assert_eq!(rows_a.row(1), rows_a.row(7));
2699
2700 let cols = converter.convert_rows(&rows_a).unwrap();
2701 dictionary_eq(&cols[0], &a);
2702
2703 let b = Arc::new(DictionaryArray::<Int32Type>::from_iter([
2704 Some("hello"),
2705 None,
2706 Some("cupcakes"),
2707 ])) as ArrayRef;
2708
2709 let rows_b = converter.convert_columns(&[Arc::clone(&b)]).unwrap();
2710 assert_eq!(rows_a.row(1), rows_b.row(0));
2711 assert_eq!(rows_a.row(3), rows_b.row(1));
2712 assert!(rows_b.row(2) < rows_a.row(0));
2713
2714 let cols = converter.convert_rows(&rows_b).unwrap();
2715 dictionary_eq(&cols[0], &b);
2716
2717 let converter = RowConverter::new(vec![SortField::new_with_options(
2718 a.data_type().clone(),
2719 SortOptions::default().desc().with_nulls_first(false),
2720 )])
2721 .unwrap();
2722
2723 let rows_c = converter.convert_columns(&[Arc::clone(&a)]).unwrap();
2724 assert!(rows_c.row(3) > rows_c.row(5));
2725 assert!(rows_c.row(2) > rows_c.row(1));
2726 assert!(rows_c.row(0) > rows_c.row(1));
2727 assert!(rows_c.row(3) > rows_c.row(0));
2728
2729 let cols = converter.convert_rows(&rows_c).unwrap();
2730 dictionary_eq(&cols[0], &a);
2731
2732 let converter = RowConverter::new(vec![SortField::new_with_options(
2733 a.data_type().clone(),
2734 SortOptions::default().desc().with_nulls_first(true),
2735 )])
2736 .unwrap();
2737
2738 let rows_c = converter.convert_columns(&[Arc::clone(&a)]).unwrap();
2739 assert!(rows_c.row(3) < rows_c.row(5));
2740 assert!(rows_c.row(2) > rows_c.row(1));
2741 assert!(rows_c.row(0) > rows_c.row(1));
2742 assert!(rows_c.row(3) < rows_c.row(0));
2743
2744 let cols = converter.convert_rows(&rows_c).unwrap();
2745 dictionary_eq(&cols[0], &a);
2746 }
2747
2748 #[test]
2749 fn test_struct() {
2750 let a = Arc::new(Int32Array::from(vec![1, 1, 2, 2])) as ArrayRef;
2752 let a_f = Arc::new(Field::new("int", DataType::Int32, false));
2753 let u = Arc::new(StringArray::from(vec!["a", "b", "c", "d"])) as ArrayRef;
2754 let u_f = Arc::new(Field::new("s", DataType::Utf8, false));
2755 let s1 = Arc::new(StructArray::from(vec![(a_f, a), (u_f, u)])) as ArrayRef;
2756
2757 let sort_fields = vec![SortField::new(s1.data_type().clone())];
2758 let converter = RowConverter::new(sort_fields).unwrap();
2759 let r1 = converter.convert_columns(&[Arc::clone(&s1)]).unwrap();
2760
2761 for (a, b) in r1.iter().zip(r1.iter().skip(1)) {
2762 assert!(a < b);
2763 }
2764
2765 let back = converter.convert_rows(&r1).unwrap();
2766 assert_eq!(back.len(), 1);
2767 assert_eq!(&back[0], &s1);
2768
2769 let data = s1
2771 .to_data()
2772 .into_builder()
2773 .null_bit_buffer(Some(Buffer::from_slice_ref([0b00001010])))
2774 .null_count(2)
2775 .build()
2776 .unwrap();
2777
2778 let s2 = Arc::new(StructArray::from(data)) as ArrayRef;
2779 let r2 = converter.convert_columns(&[Arc::clone(&s2)]).unwrap();
2780 assert_eq!(r2.row(0), r2.row(2)); assert!(r2.row(0) < r2.row(1)); assert_ne!(r1.row(0), r2.row(0)); assert_eq!(r1.row(1), r2.row(1)); let back = converter.convert_rows(&r2).unwrap();
2786 assert_eq!(back.len(), 1);
2787 assert_eq!(&back[0], &s2);
2788
2789 back[0].to_data().validate_full().unwrap();
2790 }
2791
2792 #[test]
2793 fn test_dictionary_in_struct() {
2794 let builder = StringDictionaryBuilder::<Int32Type>::new();
2795 let mut struct_builder = StructBuilder::new(
2796 vec![Field::new_dictionary(
2797 "foo",
2798 DataType::Int32,
2799 DataType::Utf8,
2800 true,
2801 )],
2802 vec![Box::new(builder)],
2803 );
2804
2805 let dict_builder = struct_builder
2806 .field_builder::<StringDictionaryBuilder<Int32Type>>(0)
2807 .unwrap();
2808
2809 dict_builder.append_value("a");
2811 dict_builder.append_null();
2812 dict_builder.append_value("a");
2813 dict_builder.append_value("b");
2814
2815 for _ in 0..4 {
2816 struct_builder.append(true);
2817 }
2818
2819 let s = Arc::new(struct_builder.finish()) as ArrayRef;
2820 let sort_fields = vec![SortField::new(s.data_type().clone())];
2821 let converter = RowConverter::new(sort_fields).unwrap();
2822 let r = converter.convert_columns(&[Arc::clone(&s)]).unwrap();
2823
2824 let back = converter.convert_rows(&r).unwrap();
2825 let [s2] = back.try_into().unwrap();
2826
2827 assert_ne!(&s.data_type(), &s2.data_type());
2830 s2.to_data().validate_full().unwrap();
2831
2832 let s1_struct = s.as_struct();
2836 let s1_0 = s1_struct.column(0);
2837 let s1_idx_0 = s1_0.as_dictionary::<Int32Type>();
2838 let keys = s1_idx_0.keys();
2839 let values = s1_idx_0.values().as_string::<i32>();
2840 let s2_struct = s2.as_struct();
2842 let s2_0 = s2_struct.column(0);
2843 let s2_idx_0 = s2_0.as_string::<i32>();
2844
2845 for i in 0..keys.len() {
2846 if keys.is_null(i) {
2847 assert!(s2_idx_0.is_null(i));
2848 } else {
2849 let dict_index = keys.value(i) as usize;
2850 assert_eq!(values.value(dict_index), s2_idx_0.value(i));
2851 }
2852 }
2853 }
2854
2855 #[test]
2856 fn test_dictionary_in_struct_empty() {
2857 let ty = DataType::Struct(
2858 vec![Field::new_dictionary(
2859 "foo",
2860 DataType::Int32,
2861 DataType::Int32,
2862 false,
2863 )]
2864 .into(),
2865 );
2866 let s = arrow_array::new_empty_array(&ty);
2867
2868 let sort_fields = vec![SortField::new(s.data_type().clone())];
2869 let converter = RowConverter::new(sort_fields).unwrap();
2870 let r = converter.convert_columns(&[Arc::clone(&s)]).unwrap();
2871
2872 let back = converter.convert_rows(&r).unwrap();
2873 let [s2] = back.try_into().unwrap();
2874
2875 assert_ne!(&s.data_type(), &s2.data_type());
2878 s2.to_data().validate_full().unwrap();
2879 assert_eq!(s.len(), 0);
2880 assert_eq!(s2.len(), 0);
2881 }
2882
2883 #[test]
2884 fn test_list_of_string_dictionary() {
2885 let mut builder = ListBuilder::<StringDictionaryBuilder<Int32Type>>::default();
2886 builder.values().append("a").unwrap();
2888 builder.values().append("b").unwrap();
2889 builder.values().append("zero").unwrap();
2890 builder.values().append_null();
2891 builder.values().append("c").unwrap();
2892 builder.values().append("b").unwrap();
2893 builder.values().append("d").unwrap();
2894 builder.append(true);
2895 builder.append(false);
2897 builder.values().append("e").unwrap();
2899 builder.values().append("zero").unwrap();
2900 builder.values().append("a").unwrap();
2901 builder.append(true);
2902
2903 let a = Arc::new(builder.finish()) as ArrayRef;
2904 let data_type = a.data_type().clone();
2905
2906 let field = SortField::new(data_type.clone());
2907 let converter = RowConverter::new(vec![field]).unwrap();
2908 let rows = converter.convert_columns(&[Arc::clone(&a)]).unwrap();
2909
2910 let back = converter.convert_rows(&rows).unwrap();
2911 assert_eq!(back.len(), 1);
2912 let [a2] = back.try_into().unwrap();
2913
2914 assert_ne!(&a.data_type(), &a2.data_type());
2917
2918 a2.to_data().validate_full().unwrap();
2919
2920 let a2_list = a2.as_list::<i32>();
2921 let a1_list = a.as_list::<i32>();
2922
2923 let a1_0 = a1_list.value(0);
2926 let a1_idx_0 = a1_0.as_dictionary::<Int32Type>();
2927 let keys = a1_idx_0.keys();
2928 let values = a1_idx_0.values().as_string::<i32>();
2929 let a2_0 = a2_list.value(0);
2930 let a2_idx_0 = a2_0.as_string::<i32>();
2931
2932 for i in 0..keys.len() {
2933 if keys.is_null(i) {
2934 assert!(a2_idx_0.is_null(i));
2935 } else {
2936 let dict_index = keys.value(i) as usize;
2937 assert_eq!(values.value(dict_index), a2_idx_0.value(i));
2938 }
2939 }
2940
2941 assert!(a1_list.is_null(1));
2943 assert!(a2_list.is_null(1));
2944
2945 let a1_2 = a1_list.value(2);
2947 let a1_idx_2 = a1_2.as_dictionary::<Int32Type>();
2948 let keys = a1_idx_2.keys();
2949 let values = a1_idx_2.values().as_string::<i32>();
2950 let a2_2 = a2_list.value(2);
2951 let a2_idx_2 = a2_2.as_string::<i32>();
2952
2953 for i in 0..keys.len() {
2954 if keys.is_null(i) {
2955 assert!(a2_idx_2.is_null(i));
2956 } else {
2957 let dict_index = keys.value(i) as usize;
2958 assert_eq!(values.value(dict_index), a2_idx_2.value(i));
2959 }
2960 }
2961 }
2962
2963 #[test]
2964 fn test_primitive_dictionary() {
2965 let mut builder = PrimitiveDictionaryBuilder::<Int32Type, Int32Type>::new();
2966 builder.append(2).unwrap();
2967 builder.append(3).unwrap();
2968 builder.append(0).unwrap();
2969 builder.append_null();
2970 builder.append(5).unwrap();
2971 builder.append(3).unwrap();
2972 builder.append(-1).unwrap();
2973
2974 let a = builder.finish();
2975 let data_type = a.data_type().clone();
2976 let columns = [Arc::new(a) as ArrayRef];
2977
2978 let field = SortField::new(data_type.clone());
2979 let converter = RowConverter::new(vec![field]).unwrap();
2980 let rows = converter.convert_columns(&columns).unwrap();
2981 assert!(rows.row(0) < rows.row(1));
2982 assert!(rows.row(2) < rows.row(0));
2983 assert!(rows.row(3) < rows.row(2));
2984 assert!(rows.row(6) < rows.row(2));
2985 assert!(rows.row(3) < rows.row(6));
2986
2987 let back = converter.convert_rows(&rows).unwrap();
2988 assert_eq!(back.len(), 1);
2989 back[0].to_data().validate_full().unwrap();
2990 }
2991
2992 #[test]
2993 fn test_dictionary_nulls() {
2994 let values = Int32Array::from_iter([Some(1), Some(-1), None, Some(4), None]).into_data();
2995 let keys =
2996 Int32Array::from_iter([Some(0), Some(0), Some(1), Some(2), Some(4), None]).into_data();
2997
2998 let data_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Int32));
2999 let data = keys
3000 .into_builder()
3001 .data_type(data_type.clone())
3002 .child_data(vec![values])
3003 .build()
3004 .unwrap();
3005
3006 let columns = [Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef];
3007 let field = SortField::new(data_type.clone());
3008 let converter = RowConverter::new(vec![field]).unwrap();
3009 let rows = converter.convert_columns(&columns).unwrap();
3010
3011 assert_eq!(rows.row(0), rows.row(1));
3012 assert_eq!(rows.row(3), rows.row(4));
3013 assert_eq!(rows.row(4), rows.row(5));
3014 assert!(rows.row(3) < rows.row(0));
3015 }
3016
3017 #[test]
3018 fn test_from_binary_shared_buffer() {
3019 let converter = RowConverter::new(vec![SortField::new(DataType::Binary)]).unwrap();
3020 let array = Arc::new(BinaryArray::from_iter_values([&[0xFF]])) as _;
3021 let rows = converter.convert_columns(&[array]).unwrap();
3022 let binary_rows = rows.try_into_binary().expect("known-small rows");
3023 let _binary_rows_shared_buffer = binary_rows.clone();
3024
3025 let parsed = converter.from_binary(binary_rows);
3026
3027 converter.convert_rows(parsed.iter()).unwrap();
3028 }
3029
3030 #[test]
3031 #[should_panic(expected = "Encountered non UTF-8 data")]
3032 fn test_invalid_utf8() {
3033 let converter = RowConverter::new(vec![SortField::new(DataType::Binary)]).unwrap();
3034 let array = Arc::new(BinaryArray::from_iter_values([&[0xFF]])) as _;
3035 let rows = converter.convert_columns(&[array]).unwrap();
3036 let binary_row = rows.row(0);
3037
3038 let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
3039 let parser = converter.parser();
3040 let utf8_row = parser.parse(binary_row.as_ref());
3041
3042 converter.convert_rows(std::iter::once(utf8_row)).unwrap();
3043 }
3044
3045 #[test]
3046 #[should_panic(expected = "Encountered non UTF-8 data")]
3047 fn test_invalid_utf8_array() {
3048 let converter = RowConverter::new(vec![SortField::new(DataType::Binary)]).unwrap();
3049 let array = Arc::new(BinaryArray::from_iter_values([&[0xFF]])) as _;
3050 let rows = converter.convert_columns(&[array]).unwrap();
3051 let binary_rows = rows.try_into_binary().expect("known-small rows");
3052
3053 let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
3054 let parsed = converter.from_binary(binary_rows);
3055
3056 converter.convert_rows(parsed.iter()).unwrap();
3057 }
3058
3059 #[test]
3060 #[should_panic(expected = "index out of bounds")]
3061 fn test_invalid_empty() {
3062 let binary_row: &[u8] = &[];
3063
3064 let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
3065 let parser = converter.parser();
3066 let utf8_row = parser.parse(binary_row.as_ref());
3067
3068 converter.convert_rows(std::iter::once(utf8_row)).unwrap();
3069 }
3070
3071 #[test]
3072 #[should_panic(expected = "index out of bounds")]
3073 fn test_invalid_empty_array() {
3074 let row: &[u8] = &[];
3075 let binary_rows = BinaryArray::from(vec![row]);
3076
3077 let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
3078 let parsed = converter.from_binary(binary_rows);
3079
3080 converter.convert_rows(parsed.iter()).unwrap();
3081 }
3082
3083 #[test]
3084 #[should_panic(expected = "index out of bounds")]
3085 fn test_invalid_truncated() {
3086 let binary_row: &[u8] = &[0x02];
3087
3088 let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
3089 let parser = converter.parser();
3090 let utf8_row = parser.parse(binary_row.as_ref());
3091
3092 converter.convert_rows(std::iter::once(utf8_row)).unwrap();
3093 }
3094
3095 #[test]
3096 #[should_panic(expected = "index out of bounds")]
3097 fn test_invalid_truncated_array() {
3098 let row: &[u8] = &[0x02];
3099 let binary_rows = BinaryArray::from(vec![row]);
3100
3101 let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
3102 let parsed = converter.from_binary(binary_rows);
3103
3104 converter.convert_rows(parsed.iter()).unwrap();
3105 }
3106
3107 #[test]
3108 #[should_panic(expected = "rows were not produced by this RowConverter")]
3109 fn test_different_converter() {
3110 let values = Arc::new(Int32Array::from_iter([Some(1), Some(-1)]));
3111 let converter = RowConverter::new(vec![SortField::new(DataType::Int32)]).unwrap();
3112 let rows = converter.convert_columns(&[values]).unwrap();
3113
3114 let converter = RowConverter::new(vec![SortField::new(DataType::Int32)]).unwrap();
3115 let _ = converter.convert_rows(&rows);
3116 }
3117
3118 fn test_single_list<O: OffsetSizeTrait>() {
3119 let mut builder = GenericListBuilder::<O, _>::new(Int32Builder::new());
3120 builder.values().append_value(32);
3121 builder.values().append_value(52);
3122 builder.values().append_value(32);
3123 builder.append(true);
3124 builder.values().append_value(32);
3125 builder.values().append_value(52);
3126 builder.values().append_value(12);
3127 builder.append(true);
3128 builder.values().append_value(32);
3129 builder.values().append_value(52);
3130 builder.append(true);
3131 builder.values().append_value(32); builder.values().append_value(52); builder.append(false);
3134 builder.values().append_value(32);
3135 builder.values().append_null();
3136 builder.append(true);
3137 builder.append(true);
3138 builder.values().append_value(17); builder.values().append_null(); builder.append(false);
3141
3142 let list = Arc::new(builder.finish()) as ArrayRef;
3143 let d = list.data_type().clone();
3144
3145 let converter = RowConverter::new(vec![SortField::new(d.clone())]).unwrap();
3146
3147 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3148 assert!(rows.row(0) > rows.row(1)); assert!(rows.row(2) < rows.row(1)); assert!(rows.row(3) < rows.row(2)); assert!(rows.row(4) < rows.row(2)); assert!(rows.row(5) < rows.row(2)); assert!(rows.row(3) < rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3157 assert_eq!(back.len(), 1);
3158 back[0].to_data().validate_full().unwrap();
3159 assert_eq!(&back[0], &list);
3160
3161 let options = SortOptions::default().asc().with_nulls_first(false);
3162 let field = SortField::new_with_options(d.clone(), options);
3163 let converter = RowConverter::new(vec![field]).unwrap();
3164 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3165
3166 assert!(rows.row(0) > rows.row(1)); assert!(rows.row(2) < rows.row(1)); assert!(rows.row(3) > rows.row(2)); assert!(rows.row(4) > rows.row(2)); assert!(rows.row(5) < rows.row(2)); assert!(rows.row(3) > rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3175 assert_eq!(back.len(), 1);
3176 back[0].to_data().validate_full().unwrap();
3177 assert_eq!(&back[0], &list);
3178
3179 let options = SortOptions::default().desc().with_nulls_first(false);
3180 let field = SortField::new_with_options(d.clone(), options);
3181 let converter = RowConverter::new(vec![field]).unwrap();
3182 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3183
3184 assert!(rows.row(0) < rows.row(1)); assert!(rows.row(2) > rows.row(1)); assert!(rows.row(3) > rows.row(2)); assert!(rows.row(4) > rows.row(2)); assert!(rows.row(5) > rows.row(2)); assert!(rows.row(3) > rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3193 assert_eq!(back.len(), 1);
3194 back[0].to_data().validate_full().unwrap();
3195 assert_eq!(&back[0], &list);
3196
3197 let options = SortOptions::default().desc().with_nulls_first(true);
3198 let field = SortField::new_with_options(d, options);
3199 let converter = RowConverter::new(vec![field]).unwrap();
3200 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3201
3202 assert!(rows.row(0) < rows.row(1)); assert!(rows.row(2) > rows.row(1)); assert!(rows.row(3) < rows.row(2)); assert!(rows.row(4) < rows.row(2)); assert!(rows.row(5) > rows.row(2)); assert!(rows.row(3) < rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3211 assert_eq!(back.len(), 1);
3212 back[0].to_data().validate_full().unwrap();
3213 assert_eq!(&back[0], &list);
3214
3215 let sliced_list = list.slice(1, 5);
3216 let rows_on_sliced_list = converter
3217 .convert_columns(&[Arc::clone(&sliced_list)])
3218 .unwrap();
3219
3220 assert!(rows_on_sliced_list.row(1) > rows_on_sliced_list.row(0)); assert!(rows_on_sliced_list.row(2) < rows_on_sliced_list.row(1)); assert!(rows_on_sliced_list.row(3) < rows_on_sliced_list.row(1)); assert!(rows_on_sliced_list.row(4) > rows_on_sliced_list.row(1)); assert!(rows_on_sliced_list.row(2) < rows_on_sliced_list.row(4)); let back = converter.convert_rows(&rows_on_sliced_list).unwrap();
3227 assert_eq!(back.len(), 1);
3228 back[0].to_data().validate_full().unwrap();
3229 assert_eq!(&back[0], &sliced_list);
3230 }
3231
3232 fn test_nested_list<O: OffsetSizeTrait>() {
3233 let mut builder =
3234 GenericListBuilder::<O, _>::new(GenericListBuilder::<O, _>::new(Int32Builder::new()));
3235
3236 builder.values().values().append_value(1);
3237 builder.values().values().append_value(2);
3238 builder.values().append(true);
3239 builder.values().values().append_value(1);
3240 builder.values().values().append_null();
3241 builder.values().append(true);
3242 builder.append(true);
3243
3244 builder.values().values().append_value(1);
3245 builder.values().values().append_null();
3246 builder.values().append(true);
3247 builder.values().values().append_value(1);
3248 builder.values().values().append_null();
3249 builder.values().append(true);
3250 builder.append(true);
3251
3252 builder.values().values().append_value(1);
3253 builder.values().values().append_null();
3254 builder.values().append(true);
3255 builder.values().append(false);
3256 builder.append(true);
3257 builder.append(false);
3258
3259 builder.values().values().append_value(1);
3260 builder.values().values().append_value(2);
3261 builder.values().append(true);
3262 builder.append(true);
3263
3264 let list = Arc::new(builder.finish()) as ArrayRef;
3265 let d = list.data_type().clone();
3266
3267 let options = SortOptions::default().asc().with_nulls_first(true);
3275 let field = SortField::new_with_options(d.clone(), options);
3276 let converter = RowConverter::new(vec![field]).unwrap();
3277 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3278
3279 assert!(rows.row(0) > rows.row(1));
3280 assert!(rows.row(1) > rows.row(2));
3281 assert!(rows.row(2) > rows.row(3));
3282 assert!(rows.row(4) < rows.row(0));
3283 assert!(rows.row(4) > rows.row(1));
3284
3285 let back = converter.convert_rows(&rows).unwrap();
3286 assert_eq!(back.len(), 1);
3287 back[0].to_data().validate_full().unwrap();
3288 assert_eq!(&back[0], &list);
3289
3290 let options = SortOptions::default().desc().with_nulls_first(true);
3291 let field = SortField::new_with_options(d.clone(), options);
3292 let converter = RowConverter::new(vec![field]).unwrap();
3293 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3294
3295 assert!(rows.row(0) > rows.row(1));
3296 assert!(rows.row(1) > rows.row(2));
3297 assert!(rows.row(2) > rows.row(3));
3298 assert!(rows.row(4) > rows.row(0));
3299 assert!(rows.row(4) > rows.row(1));
3300
3301 let back = converter.convert_rows(&rows).unwrap();
3302 assert_eq!(back.len(), 1);
3303 back[0].to_data().validate_full().unwrap();
3304 assert_eq!(&back[0], &list);
3305
3306 let options = SortOptions::default().desc().with_nulls_first(false);
3307 let field = SortField::new_with_options(d, options);
3308 let converter = RowConverter::new(vec![field]).unwrap();
3309 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3310
3311 assert!(rows.row(0) < rows.row(1));
3312 assert!(rows.row(1) < rows.row(2));
3313 assert!(rows.row(2) < rows.row(3));
3314 assert!(rows.row(4) > rows.row(0));
3315 assert!(rows.row(4) < rows.row(1));
3316
3317 let back = converter.convert_rows(&rows).unwrap();
3318 assert_eq!(back.len(), 1);
3319 back[0].to_data().validate_full().unwrap();
3320 assert_eq!(&back[0], &list);
3321
3322 let sliced_list = list.slice(1, 3);
3323 let rows = converter
3324 .convert_columns(&[Arc::clone(&sliced_list)])
3325 .unwrap();
3326
3327 assert!(rows.row(0) < rows.row(1));
3328 assert!(rows.row(1) < rows.row(2));
3329
3330 let back = converter.convert_rows(&rows).unwrap();
3331 assert_eq!(back.len(), 1);
3332 back[0].to_data().validate_full().unwrap();
3333 assert_eq!(&back[0], &sliced_list);
3334 }
3335
3336 #[test]
3337 fn test_list() {
3338 test_single_list::<i32>();
3339 test_nested_list::<i32>();
3340 }
3341
3342 #[test]
3343 fn test_large_list() {
3344 test_single_list::<i64>();
3345 test_nested_list::<i64>();
3346 }
3347
3348 fn test_single_list_view<O: OffsetSizeTrait>() {
3349 let mut builder = GenericListViewBuilder::<O, _>::new(Int32Builder::new());
3350 builder.values().append_value(32);
3351 builder.values().append_value(52);
3352 builder.values().append_value(32);
3353 builder.append(true);
3354 builder.values().append_value(32);
3355 builder.values().append_value(52);
3356 builder.values().append_value(12);
3357 builder.append(true);
3358 builder.values().append_value(32);
3359 builder.values().append_value(52);
3360 builder.append(true);
3361 builder.values().append_value(32); builder.values().append_value(52); builder.append(false);
3364 builder.values().append_value(32);
3365 builder.values().append_null();
3366 builder.append(true);
3367 builder.append(true);
3368 builder.values().append_value(17); builder.values().append_null(); builder.append(false);
3371
3372 let list = Arc::new(builder.finish()) as ArrayRef;
3373 let d = list.data_type().clone();
3374
3375 let converter = RowConverter::new(vec![SortField::new(d.clone())]).unwrap();
3376
3377 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3378 assert!(rows.row(0) > rows.row(1)); assert!(rows.row(2) < rows.row(1)); assert!(rows.row(3) < rows.row(2)); assert!(rows.row(4) < rows.row(2)); assert!(rows.row(5) < rows.row(2)); assert!(rows.row(3) < rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3387 assert_eq!(back.len(), 1);
3388 back[0].to_data().validate_full().unwrap();
3389
3390 let back_list_view = back[0]
3392 .as_any()
3393 .downcast_ref::<GenericListViewArray<O>>()
3394 .unwrap();
3395 let orig_list_view = list
3396 .as_any()
3397 .downcast_ref::<GenericListViewArray<O>>()
3398 .unwrap();
3399
3400 assert_eq!(back_list_view.len(), orig_list_view.len());
3401 for i in 0..back_list_view.len() {
3402 assert_eq!(back_list_view.is_valid(i), orig_list_view.is_valid(i));
3403 if back_list_view.is_valid(i) {
3404 assert_eq!(&back_list_view.value(i), &orig_list_view.value(i));
3405 }
3406 }
3407
3408 let options = SortOptions::default().asc().with_nulls_first(false);
3409 let field = SortField::new_with_options(d.clone(), options);
3410 let converter = RowConverter::new(vec![field]).unwrap();
3411 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3412
3413 assert!(rows.row(0) > rows.row(1)); assert!(rows.row(2) < rows.row(1)); assert!(rows.row(3) > rows.row(2)); assert!(rows.row(4) > rows.row(2)); assert!(rows.row(5) < rows.row(2)); assert!(rows.row(3) > rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3422 assert_eq!(back.len(), 1);
3423 back[0].to_data().validate_full().unwrap();
3424
3425 let options = SortOptions::default().desc().with_nulls_first(false);
3426 let field = SortField::new_with_options(d.clone(), options);
3427 let converter = RowConverter::new(vec![field]).unwrap();
3428 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3429
3430 assert!(rows.row(0) < rows.row(1)); assert!(rows.row(2) > rows.row(1)); assert!(rows.row(3) > rows.row(2)); assert!(rows.row(4) > rows.row(2)); assert!(rows.row(5) > rows.row(2)); assert!(rows.row(3) > rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3439 assert_eq!(back.len(), 1);
3440 back[0].to_data().validate_full().unwrap();
3441
3442 let options = SortOptions::default().desc().with_nulls_first(true);
3443 let field = SortField::new_with_options(d, options);
3444 let converter = RowConverter::new(vec![field]).unwrap();
3445 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3446
3447 assert!(rows.row(0) < rows.row(1)); assert!(rows.row(2) > rows.row(1)); assert!(rows.row(3) < rows.row(2)); assert!(rows.row(4) < rows.row(2)); assert!(rows.row(5) > rows.row(2)); assert!(rows.row(3) < rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3456 assert_eq!(back.len(), 1);
3457 back[0].to_data().validate_full().unwrap();
3458
3459 let sliced_list = list.slice(1, 5);
3460 let rows_on_sliced_list = converter
3461 .convert_columns(&[Arc::clone(&sliced_list)])
3462 .unwrap();
3463
3464 assert!(rows_on_sliced_list.row(1) > rows_on_sliced_list.row(0)); assert!(rows_on_sliced_list.row(2) < rows_on_sliced_list.row(1)); assert!(rows_on_sliced_list.row(3) < rows_on_sliced_list.row(1)); assert!(rows_on_sliced_list.row(4) > rows_on_sliced_list.row(1)); assert!(rows_on_sliced_list.row(2) < rows_on_sliced_list.row(4)); let back = converter.convert_rows(&rows_on_sliced_list).unwrap();
3471 assert_eq!(back.len(), 1);
3472 back[0].to_data().validate_full().unwrap();
3473 }
3474
3475 fn test_nested_list_view<O: OffsetSizeTrait>() {
3476 let mut builder = GenericListViewBuilder::<O, _>::new(GenericListViewBuilder::<O, _>::new(
3477 Int32Builder::new(),
3478 ));
3479
3480 builder.values().values().append_value(1);
3482 builder.values().values().append_value(2);
3483 builder.values().append(true);
3484 builder.values().values().append_value(1);
3485 builder.values().values().append_null();
3486 builder.values().append(true);
3487 builder.append(true);
3488
3489 builder.values().values().append_value(1);
3491 builder.values().values().append_null();
3492 builder.values().append(true);
3493 builder.values().values().append_value(1);
3494 builder.values().values().append_null();
3495 builder.values().append(true);
3496 builder.append(true);
3497
3498 builder.values().values().append_value(1);
3500 builder.values().values().append_null();
3501 builder.values().append(true);
3502 builder.values().append(false);
3503 builder.append(true);
3504
3505 builder.append(false);
3507
3508 builder.values().values().append_value(1);
3510 builder.values().values().append_value(2);
3511 builder.values().append(true);
3512 builder.append(true);
3513
3514 let list = Arc::new(builder.finish()) as ArrayRef;
3515 let d = list.data_type().clone();
3516
3517 let options = SortOptions::default().asc().with_nulls_first(true);
3525 let field = SortField::new_with_options(d.clone(), options);
3526 let converter = RowConverter::new(vec![field]).unwrap();
3527 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3528
3529 assert!(rows.row(0) > rows.row(1));
3530 assert!(rows.row(1) > rows.row(2));
3531 assert!(rows.row(2) > rows.row(3));
3532 assert!(rows.row(4) < rows.row(0));
3533 assert!(rows.row(4) > rows.row(1));
3534
3535 let back = converter.convert_rows(&rows).unwrap();
3536 assert_eq!(back.len(), 1);
3537 back[0].to_data().validate_full().unwrap();
3538
3539 let back_list_view = back[0]
3541 .as_any()
3542 .downcast_ref::<GenericListViewArray<O>>()
3543 .unwrap();
3544 let orig_list_view = list
3545 .as_any()
3546 .downcast_ref::<GenericListViewArray<O>>()
3547 .unwrap();
3548
3549 assert_eq!(back_list_view.len(), orig_list_view.len());
3550 for i in 0..back_list_view.len() {
3551 assert_eq!(back_list_view.is_valid(i), orig_list_view.is_valid(i));
3552 if back_list_view.is_valid(i) {
3553 assert_eq!(&back_list_view.value(i), &orig_list_view.value(i));
3554 }
3555 }
3556
3557 let options = SortOptions::default().desc().with_nulls_first(true);
3558 let field = SortField::new_with_options(d.clone(), options);
3559 let converter = RowConverter::new(vec![field]).unwrap();
3560 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3561
3562 assert!(rows.row(0) > rows.row(1));
3563 assert!(rows.row(1) > rows.row(2));
3564 assert!(rows.row(2) > rows.row(3));
3565 assert!(rows.row(4) > rows.row(0));
3566 assert!(rows.row(4) > rows.row(1));
3567
3568 let back = converter.convert_rows(&rows).unwrap();
3569 assert_eq!(back.len(), 1);
3570 back[0].to_data().validate_full().unwrap();
3571
3572 let back_list_view = back[0]
3574 .as_any()
3575 .downcast_ref::<GenericListViewArray<O>>()
3576 .unwrap();
3577
3578 assert_eq!(back_list_view.len(), orig_list_view.len());
3579 for i in 0..back_list_view.len() {
3580 assert_eq!(back_list_view.is_valid(i), orig_list_view.is_valid(i));
3581 if back_list_view.is_valid(i) {
3582 assert_eq!(&back_list_view.value(i), &orig_list_view.value(i));
3583 }
3584 }
3585
3586 let options = SortOptions::default().desc().with_nulls_first(false);
3587 let field = SortField::new_with_options(d.clone(), options);
3588 let converter = RowConverter::new(vec![field]).unwrap();
3589 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3590
3591 assert!(rows.row(0) < rows.row(1));
3592 assert!(rows.row(1) < rows.row(2));
3593 assert!(rows.row(2) < rows.row(3));
3594 assert!(rows.row(4) > rows.row(0));
3595 assert!(rows.row(4) < rows.row(1));
3596
3597 let back = converter.convert_rows(&rows).unwrap();
3598 assert_eq!(back.len(), 1);
3599 back[0].to_data().validate_full().unwrap();
3600
3601 let back_list_view = back[0]
3603 .as_any()
3604 .downcast_ref::<GenericListViewArray<O>>()
3605 .unwrap();
3606
3607 assert_eq!(back_list_view.len(), orig_list_view.len());
3608 for i in 0..back_list_view.len() {
3609 assert_eq!(back_list_view.is_valid(i), orig_list_view.is_valid(i));
3610 if back_list_view.is_valid(i) {
3611 assert_eq!(&back_list_view.value(i), &orig_list_view.value(i));
3612 }
3613 }
3614
3615 let sliced_list = list.slice(1, 3);
3616 let rows = converter
3617 .convert_columns(&[Arc::clone(&sliced_list)])
3618 .unwrap();
3619
3620 assert!(rows.row(0) < rows.row(1));
3621 assert!(rows.row(1) < rows.row(2));
3622
3623 let back = converter.convert_rows(&rows).unwrap();
3624 assert_eq!(back.len(), 1);
3625 back[0].to_data().validate_full().unwrap();
3626 }
3627
3628 #[test]
3629 fn test_list_view() {
3630 test_single_list_view::<i32>();
3631 test_nested_list_view::<i32>();
3632 }
3633
3634 #[test]
3635 fn test_large_list_view() {
3636 test_single_list_view::<i64>();
3637 test_nested_list_view::<i64>();
3638 }
3639
3640 fn test_list_view_with_shared_values<O: OffsetSizeTrait>() {
3641 let values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8]);
3643 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
3644
3645 let offsets = ScalarBuffer::<O>::from(vec![
3653 O::from_usize(0).unwrap(),
3654 O::from_usize(0).unwrap(),
3655 O::from_usize(5).unwrap(),
3656 O::from_usize(2).unwrap(),
3657 O::from_usize(1).unwrap(),
3658 O::from_usize(2).unwrap(),
3659 ]);
3660 let sizes = ScalarBuffer::<O>::from(vec![
3661 O::from_usize(3).unwrap(),
3662 O::from_usize(3).unwrap(),
3663 O::from_usize(2).unwrap(),
3664 O::from_usize(2).unwrap(),
3665 O::from_usize(4).unwrap(),
3666 O::from_usize(1).unwrap(),
3667 ]);
3668
3669 let list_view: GenericListViewArray<O> =
3670 GenericListViewArray::try_new(field, offsets, sizes, Arc::new(values), None).unwrap();
3671
3672 let d = list_view.data_type().clone();
3673 let list = Arc::new(list_view) as ArrayRef;
3674
3675 let converter = RowConverter::new(vec![SortField::new(d.clone())]).unwrap();
3676 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3677
3678 assert_eq!(rows.row(0), rows.row(1));
3680
3681 assert!(rows.row(0) < rows.row(2));
3683
3684 assert!(rows.row(3) > rows.row(0));
3686
3687 assert!(rows.row(4) > rows.row(0));
3689
3690 assert!(rows.row(5) < rows.row(3));
3692
3693 assert!(rows.row(5) > rows.row(4));
3695
3696 let back = converter.convert_rows(&rows).unwrap();
3698 assert_eq!(back.len(), 1);
3699 back[0].to_data().validate_full().unwrap();
3700
3701 let back_list_view = back[0]
3703 .as_any()
3704 .downcast_ref::<GenericListViewArray<O>>()
3705 .unwrap();
3706 let orig_list_view = list
3707 .as_any()
3708 .downcast_ref::<GenericListViewArray<O>>()
3709 .unwrap();
3710
3711 assert_eq!(back_list_view.len(), orig_list_view.len());
3712 for i in 0..back_list_view.len() {
3713 assert_eq!(back_list_view.is_valid(i), orig_list_view.is_valid(i));
3714 if back_list_view.is_valid(i) {
3715 assert_eq!(&back_list_view.value(i), &orig_list_view.value(i));
3716 }
3717 }
3718
3719 let options = SortOptions::default().desc();
3721 let field = SortField::new_with_options(d, options);
3722 let converter = RowConverter::new(vec![field]).unwrap();
3723 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3724
3725 assert_eq!(rows.row(0), rows.row(1)); assert!(rows.row(0) > rows.row(2)); assert!(rows.row(3) < rows.row(0)); let back = converter.convert_rows(&rows).unwrap();
3731 assert_eq!(back.len(), 1);
3732 back[0].to_data().validate_full().unwrap();
3733 }
3734
3735 #[test]
3736 fn test_list_view_shared_values() {
3737 test_list_view_with_shared_values::<i32>();
3738 }
3739
3740 #[test]
3741 fn test_large_list_view_shared_values() {
3742 test_list_view_with_shared_values::<i64>();
3743 }
3744
3745 #[test]
3746 fn test_fixed_size_list() {
3747 let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
3748 builder.values().append_value(32);
3749 builder.values().append_value(52);
3750 builder.values().append_value(32);
3751 builder.append(true);
3752 builder.values().append_value(32);
3753 builder.values().append_value(52);
3754 builder.values().append_value(12);
3755 builder.append(true);
3756 builder.values().append_value(32);
3757 builder.values().append_value(52);
3758 builder.values().append_null();
3759 builder.append(true);
3760 builder.values().append_value(32); builder.values().append_value(52); builder.values().append_value(13); builder.append(false);
3764 builder.values().append_value(32);
3765 builder.values().append_null();
3766 builder.values().append_null();
3767 builder.append(true);
3768 builder.values().append_null();
3769 builder.values().append_null();
3770 builder.values().append_null();
3771 builder.append(true);
3772 builder.values().append_value(17); builder.values().append_null(); builder.values().append_value(77); builder.append(false);
3776
3777 let list = Arc::new(builder.finish()) as ArrayRef;
3778 let d = list.data_type().clone();
3779
3780 let converter = RowConverter::new(vec![SortField::new(d.clone())]).unwrap();
3782
3783 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3784 assert!(rows.row(0) > rows.row(1)); assert!(rows.row(2) < rows.row(1)); assert!(rows.row(3) < rows.row(2)); assert!(rows.row(4) < rows.row(2)); assert!(rows.row(5) < rows.row(2)); assert!(rows.row(3) < rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3793 assert_eq!(back.len(), 1);
3794 back[0].to_data().validate_full().unwrap();
3795 assert_eq!(&back[0], &list);
3796
3797 let options = SortOptions::default().asc().with_nulls_first(false);
3799 let field = SortField::new_with_options(d.clone(), options);
3800 let converter = RowConverter::new(vec![field]).unwrap();
3801 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3802 assert!(rows.row(0) > rows.row(1)); assert!(rows.row(2) > rows.row(1)); assert!(rows.row(3) > rows.row(2)); assert!(rows.row(4) > rows.row(2)); assert!(rows.row(5) > rows.row(2)); assert!(rows.row(3) > rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3811 assert_eq!(back.len(), 1);
3812 back[0].to_data().validate_full().unwrap();
3813 assert_eq!(&back[0], &list);
3814
3815 let options = SortOptions::default().desc().with_nulls_first(false);
3817 let field = SortField::new_with_options(d.clone(), options);
3818 let converter = RowConverter::new(vec![field]).unwrap();
3819 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3820 assert!(rows.row(0) < rows.row(1)); assert!(rows.row(2) > rows.row(1)); assert!(rows.row(3) > rows.row(2)); assert!(rows.row(4) > rows.row(2)); assert!(rows.row(5) > rows.row(2)); assert!(rows.row(3) > rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3829 assert_eq!(back.len(), 1);
3830 back[0].to_data().validate_full().unwrap();
3831 assert_eq!(&back[0], &list);
3832
3833 let options = SortOptions::default().desc().with_nulls_first(true);
3835 let field = SortField::new_with_options(d, options);
3836 let converter = RowConverter::new(vec![field]).unwrap();
3837 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3838
3839 assert!(rows.row(0) < rows.row(1)); assert!(rows.row(2) < rows.row(1)); assert!(rows.row(3) < rows.row(2)); assert!(rows.row(4) < rows.row(2)); assert!(rows.row(5) < rows.row(2)); assert!(rows.row(3) < rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3848 assert_eq!(back.len(), 1);
3849 back[0].to_data().validate_full().unwrap();
3850 assert_eq!(&back[0], &list);
3851
3852 let sliced_list = list.slice(1, 5);
3853 let rows_on_sliced_list = converter
3854 .convert_columns(&[Arc::clone(&sliced_list)])
3855 .unwrap();
3856
3857 assert!(rows_on_sliced_list.row(2) < rows_on_sliced_list.row(1)); assert!(rows_on_sliced_list.row(3) < rows_on_sliced_list.row(1)); assert!(rows_on_sliced_list.row(4) < rows_on_sliced_list.row(1)); assert!(rows_on_sliced_list.row(2) < rows_on_sliced_list.row(4)); let back = converter.convert_rows(&rows_on_sliced_list).unwrap();
3863 assert_eq!(back.len(), 1);
3864 back[0].to_data().validate_full().unwrap();
3865 assert_eq!(&back[0], &sliced_list);
3866 }
3867
3868 #[test]
3869 fn test_two_fixed_size_lists() {
3870 let mut first = FixedSizeListBuilder::new(UInt8Builder::new(), 1);
3871 first.values().append_value(100);
3873 first.append(true);
3874 first.values().append_value(101);
3876 first.append(true);
3877 first.values().append_value(102);
3879 first.append(true);
3880 first.values().append_null();
3882 first.append(true);
3883 first.values().append_null(); first.append(false);
3886 let first = Arc::new(first.finish()) as ArrayRef;
3887 let first_type = first.data_type().clone();
3888
3889 let mut second = FixedSizeListBuilder::new(UInt8Builder::new(), 1);
3890 second.values().append_value(200);
3892 second.append(true);
3893 second.values().append_value(201);
3895 second.append(true);
3896 second.values().append_value(202);
3898 second.append(true);
3899 second.values().append_null();
3901 second.append(true);
3902 second.values().append_null(); second.append(false);
3905 let second = Arc::new(second.finish()) as ArrayRef;
3906 let second_type = second.data_type().clone();
3907
3908 let converter = RowConverter::new(vec![
3909 SortField::new(first_type.clone()),
3910 SortField::new(second_type.clone()),
3911 ])
3912 .unwrap();
3913
3914 let rows = converter
3915 .convert_columns(&[Arc::clone(&first), Arc::clone(&second)])
3916 .unwrap();
3917
3918 let back = converter.convert_rows(&rows).unwrap();
3919 assert_eq!(back.len(), 2);
3920 back[0].to_data().validate_full().unwrap();
3921 assert_eq!(&back[0], &first);
3922 back[1].to_data().validate_full().unwrap();
3923 assert_eq!(&back[1], &second);
3924 }
3925
3926 #[test]
3927 fn test_fixed_size_list_with_variable_width_content() {
3928 let mut first = FixedSizeListBuilder::new(
3929 StructBuilder::from_fields(
3930 vec![
3931 Field::new(
3932 "timestamp",
3933 DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from("UTC"))),
3934 false,
3935 ),
3936 Field::new("offset_minutes", DataType::Int16, false),
3937 Field::new("time_zone", DataType::Utf8, false),
3938 ],
3939 1,
3940 ),
3941 1,
3942 );
3943 first
3945 .values()
3946 .field_builder::<TimestampMicrosecondBuilder>(0)
3947 .unwrap()
3948 .append_null();
3949 first
3950 .values()
3951 .field_builder::<Int16Builder>(1)
3952 .unwrap()
3953 .append_null();
3954 first
3955 .values()
3956 .field_builder::<StringBuilder>(2)
3957 .unwrap()
3958 .append_null();
3959 first.values().append(false);
3960 first.append(false);
3961 first
3963 .values()
3964 .field_builder::<TimestampMicrosecondBuilder>(0)
3965 .unwrap()
3966 .append_null();
3967 first
3968 .values()
3969 .field_builder::<Int16Builder>(1)
3970 .unwrap()
3971 .append_null();
3972 first
3973 .values()
3974 .field_builder::<StringBuilder>(2)
3975 .unwrap()
3976 .append_null();
3977 first.values().append(false);
3978 first.append(true);
3979 first
3981 .values()
3982 .field_builder::<TimestampMicrosecondBuilder>(0)
3983 .unwrap()
3984 .append_value(0);
3985 first
3986 .values()
3987 .field_builder::<Int16Builder>(1)
3988 .unwrap()
3989 .append_value(0);
3990 first
3991 .values()
3992 .field_builder::<StringBuilder>(2)
3993 .unwrap()
3994 .append_value("UTC");
3995 first.values().append(true);
3996 first.append(true);
3997 first
3999 .values()
4000 .field_builder::<TimestampMicrosecondBuilder>(0)
4001 .unwrap()
4002 .append_value(1126351800123456);
4003 first
4004 .values()
4005 .field_builder::<Int16Builder>(1)
4006 .unwrap()
4007 .append_value(120);
4008 first
4009 .values()
4010 .field_builder::<StringBuilder>(2)
4011 .unwrap()
4012 .append_value("Europe/Warsaw");
4013 first.values().append(true);
4014 first.append(true);
4015 let first = Arc::new(first.finish()) as ArrayRef;
4016 let first_type = first.data_type().clone();
4017
4018 let mut second = StringBuilder::new();
4019 second.append_value("somewhere near");
4020 second.append_null();
4021 second.append_value("Greenwich");
4022 second.append_value("Warsaw");
4023 let second = Arc::new(second.finish()) as ArrayRef;
4024 let second_type = second.data_type().clone();
4025
4026 let converter = RowConverter::new(vec![
4027 SortField::new(first_type.clone()),
4028 SortField::new(second_type.clone()),
4029 ])
4030 .unwrap();
4031
4032 let rows = converter
4033 .convert_columns(&[Arc::clone(&first), Arc::clone(&second)])
4034 .unwrap();
4035
4036 let back = converter.convert_rows(&rows).unwrap();
4037 assert_eq!(back.len(), 2);
4038 back[0].to_data().validate_full().unwrap();
4039 assert_eq!(&back[0], &first);
4040 back[1].to_data().validate_full().unwrap();
4041 assert_eq!(&back[1], &second);
4042 }
4043
4044 fn generate_primitive_array<K>(
4045 rng: &mut impl RngCore,
4046 len: usize,
4047 valid_percent: f64,
4048 ) -> PrimitiveArray<K>
4049 where
4050 K: ArrowPrimitiveType,
4051 StandardUniform: Distribution<K::Native>,
4052 {
4053 (0..len)
4054 .map(|_| rng.random_bool(valid_percent).then(|| rng.random()))
4055 .collect()
4056 }
4057
4058 fn generate_boolean_array(
4059 rng: &mut impl RngCore,
4060 len: usize,
4061 valid_percent: f64,
4062 ) -> BooleanArray {
4063 (0..len)
4064 .map(|_| rng.random_bool(valid_percent).then(|| rng.random_bool(0.5)))
4065 .collect()
4066 }
4067
4068 fn generate_strings<O: OffsetSizeTrait>(
4069 rng: &mut impl RngCore,
4070 len: usize,
4071 valid_percent: f64,
4072 ) -> GenericStringArray<O> {
4073 (0..len)
4074 .map(|_| {
4075 rng.random_bool(valid_percent).then(|| {
4076 let len = rng.random_range(0..100);
4077 let bytes = (0..len).map(|_| rng.random_range(0..128)).collect();
4078 String::from_utf8(bytes).unwrap()
4079 })
4080 })
4081 .collect()
4082 }
4083
4084 fn generate_string_view(
4085 rng: &mut impl RngCore,
4086 len: usize,
4087 valid_percent: f64,
4088 ) -> StringViewArray {
4089 (0..len)
4090 .map(|_| {
4091 rng.random_bool(valid_percent).then(|| {
4092 let len = rng.random_range(0..100);
4093 let bytes = (0..len).map(|_| rng.random_range(0..128)).collect();
4094 String::from_utf8(bytes).unwrap()
4095 })
4096 })
4097 .collect()
4098 }
4099
4100 fn generate_byte_view(
4101 rng: &mut impl RngCore,
4102 len: usize,
4103 valid_percent: f64,
4104 ) -> BinaryViewArray {
4105 (0..len)
4106 .map(|_| {
4107 rng.random_bool(valid_percent).then(|| {
4108 let len = rng.random_range(0..100);
4109 let bytes: Vec<_> = (0..len).map(|_| rng.random_range(0..128)).collect();
4110 bytes
4111 })
4112 })
4113 .collect()
4114 }
4115
4116 fn generate_fixed_stringview_column(len: usize) -> StringViewArray {
4117 let edge_cases = vec![
4118 Some("bar".to_string()),
4119 Some("bar\0".to_string()),
4120 Some("LongerThan12Bytes".to_string()),
4121 Some("LongerThan12Bytez".to_string()),
4122 Some("LongerThan12Bytes\0".to_string()),
4123 Some("LongerThan12Byt".to_string()),
4124 Some("backend one".to_string()),
4125 Some("backend two".to_string()),
4126 Some("a".repeat(257)),
4127 Some("a".repeat(300)),
4128 ];
4129
4130 let mut values = Vec::with_capacity(len);
4132 for i in 0..len {
4133 values.push(
4134 edge_cases
4135 .get(i % edge_cases.len())
4136 .cloned()
4137 .unwrap_or(None),
4138 );
4139 }
4140
4141 StringViewArray::from(values)
4142 }
4143
4144 fn generate_dictionary<K>(
4145 rng: &mut impl RngCore,
4146 values: ArrayRef,
4147 len: usize,
4148 valid_percent: f64,
4149 ) -> DictionaryArray<K>
4150 where
4151 K: ArrowDictionaryKeyType,
4152 K::Native: SampleUniform,
4153 {
4154 let min_key = K::Native::from_usize(0).unwrap();
4155 let max_key = K::Native::from_usize(values.len()).unwrap();
4156 let keys: PrimitiveArray<K> = (0..len)
4157 .map(|_| {
4158 rng.random_bool(valid_percent)
4159 .then(|| rng.random_range(min_key..max_key))
4160 })
4161 .collect();
4162
4163 let data_type =
4164 DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
4165
4166 let data = keys
4167 .into_data()
4168 .into_builder()
4169 .data_type(data_type)
4170 .add_child_data(values.to_data())
4171 .build()
4172 .unwrap();
4173
4174 DictionaryArray::from(data)
4175 }
4176
4177 fn generate_fixed_size_binary(
4178 rng: &mut impl RngCore,
4179 len: usize,
4180 valid_percent: f64,
4181 ) -> FixedSizeBinaryArray {
4182 let width = rng.random_range(0..20);
4183 let mut builder = FixedSizeBinaryBuilder::new(width);
4184
4185 let mut b = vec![0; width as usize];
4186 for _ in 0..len {
4187 match rng.random_bool(valid_percent) {
4188 true => {
4189 b.iter_mut().for_each(|x| *x = rng.random());
4190 builder.append_value(&b).unwrap();
4191 }
4192 false => builder.append_null(),
4193 }
4194 }
4195
4196 builder.finish()
4197 }
4198
4199 fn generate_struct(rng: &mut impl RngCore, len: usize, valid_percent: f64) -> StructArray {
4200 let nulls = NullBuffer::from_iter((0..len).map(|_| rng.random_bool(valid_percent)));
4201 let a = generate_primitive_array::<Int32Type>(rng, len, valid_percent);
4202 let b = generate_strings::<i32>(rng, len, valid_percent);
4203 let fields = Fields::from(vec![
4204 Field::new("a", DataType::Int32, true),
4205 Field::new("b", DataType::Utf8, true),
4206 ]);
4207 let values = vec![Arc::new(a) as _, Arc::new(b) as _];
4208 StructArray::new(fields, values, Some(nulls))
4209 }
4210
4211 fn generate_list<R: RngCore, F>(
4212 rng: &mut R,
4213 len: usize,
4214 valid_percent: f64,
4215 values: F,
4216 ) -> ListArray
4217 where
4218 F: FnOnce(&mut R, usize) -> ArrayRef,
4219 {
4220 let offsets = OffsetBuffer::<i32>::from_lengths((0..len).map(|_| rng.random_range(0..10)));
4221 let values_len = offsets.last().unwrap().to_usize().unwrap();
4222 let values = values(rng, values_len);
4223 let nulls = NullBuffer::from_iter((0..len).map(|_| rng.random_bool(valid_percent)));
4224 let field = Arc::new(Field::new_list_field(values.data_type().clone(), true));
4225 ListArray::new(field, offsets, values, Some(nulls))
4226 }
4227
4228 fn generate_list_view<F>(
4229 rng: &mut impl RngCore,
4230 len: usize,
4231 valid_percent: f64,
4232 values: F,
4233 ) -> ListViewArray
4234 where
4235 F: FnOnce(usize) -> ArrayRef,
4236 {
4237 let sizes: Vec<i32> = (0..len).map(|_| rng.random_range(0..10)).collect();
4239 let values_len: usize = sizes.iter().map(|s| *s as usize).sum::<usize>().max(1);
4240 let values = values(values_len);
4241
4242 let offsets: Vec<i32> = sizes
4244 .iter()
4245 .map(|&size| {
4246 if size == 0 {
4247 0
4248 } else {
4249 rng.random_range(0..=(values_len as i32 - size))
4250 }
4251 })
4252 .collect();
4253
4254 let nulls = NullBuffer::from_iter((0..len).map(|_| rng.random_bool(valid_percent)));
4255 let field = Arc::new(Field::new_list_field(values.data_type().clone(), true));
4256 ListViewArray::new(
4257 field,
4258 ScalarBuffer::from(offsets),
4259 ScalarBuffer::from(sizes),
4260 values,
4261 Some(nulls),
4262 )
4263 }
4264
4265 fn generate_nulls(rng: &mut impl RngCore, len: usize) -> Option<NullBuffer> {
4266 Some(NullBuffer::from_iter(
4267 (0..len).map(|_| rng.random_bool(0.8)),
4268 ))
4269 }
4270
4271 fn change_underlying_null_values_for_primitive<T: ArrowPrimitiveType>(
4272 array: &PrimitiveArray<T>,
4273 ) -> PrimitiveArray<T> {
4274 let (dt, values, nulls) = array.clone().into_parts();
4275
4276 let new_values = ScalarBuffer::<T::Native>::from_iter(
4277 values
4278 .iter()
4279 .zip(nulls.as_ref().unwrap().iter())
4280 .map(|(val, is_valid)| {
4281 if is_valid {
4282 *val
4283 } else {
4284 val.add_wrapping(T::Native::usize_as(1))
4285 }
4286 }),
4287 );
4288
4289 PrimitiveArray::new(new_values, nulls).with_data_type(dt)
4290 }
4291
4292 fn change_underline_null_values_for_byte_array<T: ByteArrayType>(
4293 array: &GenericByteArray<T>,
4294 ) -> GenericByteArray<T> {
4295 let (offsets, values, nulls) = array.clone().into_parts();
4296
4297 let new_offsets = OffsetBuffer::<T::Offset>::from_lengths(
4298 offsets
4299 .lengths()
4300 .zip(nulls.as_ref().unwrap().iter())
4301 .map(|(len, is_valid)| if is_valid { len } else { len + 1 }),
4302 );
4303
4304 let mut new_bytes = Vec::<u8>::with_capacity(new_offsets[new_offsets.len() - 1].as_usize());
4305
4306 offsets
4307 .windows(2)
4308 .zip(nulls.as_ref().unwrap().iter())
4309 .for_each(|(start_and_end, is_valid)| {
4310 let start = start_and_end[0].as_usize();
4311 let end = start_and_end[1].as_usize();
4312 new_bytes.extend_from_slice(&values.as_slice()[start..end]);
4313
4314 if !is_valid {
4316 new_bytes.push(b'c');
4317 }
4318 });
4319
4320 GenericByteArray::<T>::new(new_offsets, Buffer::from_vec(new_bytes), nulls)
4321 }
4322
4323 fn change_underline_null_values_for_list_array<O: OffsetSizeTrait>(
4324 array: &GenericListArray<O>,
4325 ) -> GenericListArray<O> {
4326 let (field, offsets, values, nulls) = array.clone().into_parts();
4327
4328 let (new_values, new_offsets) = {
4329 let concat_values = offsets
4330 .windows(2)
4331 .zip(nulls.as_ref().unwrap().iter())
4332 .map(|(start_and_end, is_valid)| {
4333 let start = start_and_end[0].as_usize();
4334 let end = start_and_end[1].as_usize();
4335 if is_valid {
4336 return (start, end - start);
4337 }
4338
4339 if end == values.len() {
4341 (start, (end - start).saturating_sub(1))
4342 } else {
4343 (start, end - start + 1)
4344 }
4345 })
4346 .map(|(start, length)| values.slice(start, length))
4347 .collect::<Vec<_>>();
4348
4349 let new_offsets =
4350 OffsetBuffer::<O>::from_lengths(concat_values.iter().map(|s| s.len()));
4351
4352 let new_values = {
4353 let values = concat_values.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
4354 arrow_select::concat::concat(&values).expect("should be able to concat")
4355 };
4356
4357 (new_values, new_offsets)
4358 };
4359
4360 GenericListArray::<O>::new(field, new_offsets, new_values, nulls)
4361 }
4362
4363 fn change_underline_null_values(array: &ArrayRef) -> ArrayRef {
4364 if array.null_count() == 0 {
4365 return Arc::clone(array);
4366 }
4367
4368 downcast_primitive_array!(
4369 array => {
4370 let output = change_underlying_null_values_for_primitive(array);
4371
4372 Arc::new(output)
4373 }
4374
4375 DataType::Utf8 => {
4376 Arc::new(change_underline_null_values_for_byte_array(array.as_string::<i32>()))
4377 }
4378 DataType::LargeUtf8 => {
4379 Arc::new(change_underline_null_values_for_byte_array(array.as_string::<i64>()))
4380 }
4381 DataType::Binary => {
4382 Arc::new(change_underline_null_values_for_byte_array(array.as_binary::<i32>()))
4383 }
4384 DataType::LargeBinary => {
4385 Arc::new(change_underline_null_values_for_byte_array(array.as_binary::<i64>()))
4386 }
4387 DataType::List(_) => {
4388 Arc::new(change_underline_null_values_for_list_array(array.as_list::<i32>()))
4389 }
4390 DataType::LargeList(_) => {
4391 Arc::new(change_underline_null_values_for_list_array(array.as_list::<i64>()))
4392 }
4393 _ => {
4394 Arc::clone(array)
4395 }
4396 )
4397 }
4398
4399 fn generate_column(rng: &mut (impl RngCore + Clone), len: usize) -> ArrayRef {
4400 match rng.random_range(0..23) {
4401 0 => Arc::new(generate_primitive_array::<Int32Type>(rng, len, 0.8)),
4402 1 => Arc::new(generate_primitive_array::<UInt32Type>(rng, len, 0.8)),
4403 2 => Arc::new(generate_primitive_array::<Int64Type>(rng, len, 0.8)),
4404 3 => Arc::new(generate_primitive_array::<UInt64Type>(rng, len, 0.8)),
4405 4 => Arc::new(generate_primitive_array::<Float32Type>(rng, len, 0.8)),
4406 5 => Arc::new(generate_primitive_array::<Float64Type>(rng, len, 0.8)),
4407 6 => Arc::new(generate_strings::<i32>(rng, len, 0.8)),
4408 7 => {
4409 let dict_values_len = rng.random_range(1..len);
4410 let strings = Arc::new(generate_strings::<i32>(rng, dict_values_len, 1.0));
4412 Arc::new(generate_dictionary::<Int64Type>(rng, strings, len, 0.8))
4413 }
4414 8 => {
4415 let dict_values_len = rng.random_range(1..len);
4416 let values = Arc::new(generate_primitive_array::<Int64Type>(
4418 rng,
4419 dict_values_len,
4420 1.0,
4421 ));
4422 Arc::new(generate_dictionary::<Int64Type>(rng, values, len, 0.8))
4423 }
4424 9 => Arc::new(generate_fixed_size_binary(rng, len, 0.8)),
4425 10 => Arc::new(generate_struct(rng, len, 0.8)),
4426 11 => Arc::new(generate_list(rng, len, 0.8, |rng, values_len| {
4427 Arc::new(generate_primitive_array::<Int64Type>(rng, values_len, 0.8))
4428 })),
4429 12 => Arc::new(generate_list(rng, len, 0.8, |rng, values_len| {
4430 Arc::new(generate_strings::<i32>(rng, values_len, 0.8))
4431 })),
4432 13 => Arc::new(generate_list(rng, len, 0.8, |rng, values_len| {
4433 Arc::new(generate_struct(rng, values_len, 0.8))
4434 })),
4435 14 => Arc::new(generate_string_view(rng, len, 0.8)),
4436 15 => Arc::new(generate_byte_view(rng, len, 0.8)),
4437 16 => Arc::new(generate_fixed_stringview_column(len)),
4438 17 => Arc::new(
4439 generate_list(&mut rng.clone(), len + 1000, 0.8, |rng, values_len| {
4440 Arc::new(generate_primitive_array::<Int64Type>(rng, values_len, 0.8))
4441 })
4442 .slice(500, len),
4443 ),
4444 18 => Arc::new(generate_boolean_array(rng, len, 0.8)),
4445 19 => Arc::new(generate_list_view(
4446 &mut rng.clone(),
4447 len,
4448 0.8,
4449 |values_len| Arc::new(generate_primitive_array::<Int64Type>(rng, values_len, 0.8)),
4450 )),
4451 20 => Arc::new(generate_list_view(
4452 &mut rng.clone(),
4453 len,
4454 0.8,
4455 |values_len| Arc::new(generate_strings::<i32>(rng, values_len, 0.8)),
4456 )),
4457 21 => Arc::new(generate_list_view(
4458 &mut rng.clone(),
4459 len,
4460 0.8,
4461 |values_len| Arc::new(generate_struct(rng, values_len, 0.8)),
4462 )),
4463 22 => Arc::new(
4464 generate_list_view(&mut rng.clone(), len + 1000, 0.8, |values_len| {
4465 Arc::new(generate_primitive_array::<Int64Type>(rng, values_len, 0.8))
4466 })
4467 .slice(500, len),
4468 ),
4469 _ => unreachable!(),
4470 }
4471 }
4472
4473 fn print_row(cols: &[SortColumn], row: usize) -> String {
4474 let t: Vec<_> = cols
4475 .iter()
4476 .map(|x| match x.values.is_valid(row) {
4477 true => {
4478 let opts = FormatOptions::default().with_null("NULL");
4479 let formatter = ArrayFormatter::try_new(x.values.as_ref(), &opts).unwrap();
4480 formatter.value(row).to_string()
4481 }
4482 false => "NULL".to_string(),
4483 })
4484 .collect();
4485 t.join(",")
4486 }
4487
4488 fn print_col_types(cols: &[SortColumn]) -> String {
4489 let t: Vec<_> = cols
4490 .iter()
4491 .map(|x| x.values.data_type().to_string())
4492 .collect();
4493 t.join(",")
4494 }
4495
4496 #[derive(Debug, PartialEq)]
4497 enum Nulls {
4498 AsIs,
4500
4501 Different,
4503
4504 None,
4506 }
4507
4508 #[test]
4509 #[cfg_attr(miri, ignore)]
4510 fn fuzz_test() {
4511 let mut rng = StdRng::seed_from_u64(42);
4512 for _ in 0..100 {
4513 for null_behavior in [Nulls::AsIs, Nulls::Different, Nulls::None] {
4514 let num_columns = rng.random_range(1..5);
4515 let len = rng.random_range(5..100);
4516 let mut arrays: Vec<_> = (0..num_columns)
4517 .map(|_| generate_column(&mut rng, len))
4518 .collect();
4519
4520 match null_behavior {
4521 Nulls::AsIs => {
4522 }
4524 Nulls::Different => {
4525 arrays = arrays
4527 .into_iter()
4528 .map(|a| replace_array_nulls(a, generate_nulls(&mut rng, len)))
4529 .collect()
4530 }
4531 Nulls::None => {
4532 arrays = arrays
4534 .into_iter()
4535 .map(|a| replace_array_nulls(a, None))
4536 .collect()
4537 }
4538 }
4539
4540 let options: Vec<_> = (0..num_columns)
4541 .map(|_| SortOptions {
4542 descending: rng.random_bool(0.5),
4543 nulls_first: rng.random_bool(0.5),
4544 })
4545 .collect();
4546
4547 let sort_columns: Vec<_> = options
4548 .iter()
4549 .zip(&arrays)
4550 .map(|(o, c)| SortColumn {
4551 values: Arc::clone(c),
4552 options: Some(*o),
4553 })
4554 .collect();
4555
4556 let comparator = LexicographicalComparator::try_new(&sort_columns).unwrap();
4557
4558 let columns: Vec<SortField> = options
4559 .into_iter()
4560 .zip(&arrays)
4561 .map(|(o, a)| SortField::new_with_options(a.data_type().clone(), o))
4562 .collect();
4563
4564 let converter = RowConverter::new(columns).unwrap();
4565 let rows = converter.convert_columns(&arrays).unwrap();
4566
4567 if !matches!(null_behavior, Nulls::None) {
4570 assert_same_rows_when_changing_input_underlying_null_values(
4571 &arrays, &converter, &rows,
4572 );
4573 }
4574
4575 for i in 0..len {
4576 for j in 0..len {
4577 let row_i = rows.row(i);
4578 let row_j = rows.row(j);
4579 let row_cmp = row_i.cmp(&row_j);
4580 let lex_cmp = comparator.compare(i, j);
4581 assert_eq!(
4582 row_cmp,
4583 lex_cmp,
4584 "({:?} vs {:?}) vs ({:?} vs {:?}) for types {}",
4585 print_row(&sort_columns, i),
4586 print_row(&sort_columns, j),
4587 row_i,
4588 row_j,
4589 print_col_types(&sort_columns)
4590 );
4591 }
4592 }
4593
4594 {
4596 let mut rows_iter = rows.iter();
4597 let mut rows_lengths_iter = rows.lengths();
4598 for (index, row) in rows_iter.by_ref().enumerate() {
4599 let len = rows_lengths_iter
4600 .next()
4601 .expect("Reached end of length iterator while still have rows");
4602 assert_eq!(
4603 row.data.len(),
4604 len,
4605 "Row length mismatch: {} vs {}",
4606 row.data.len(),
4607 len
4608 );
4609 assert_eq!(
4610 len,
4611 rows.row_len(index),
4612 "Row length mismatch at index {}: {} vs {}",
4613 index,
4614 len,
4615 rows.row_len(index)
4616 );
4617 }
4618
4619 assert_eq!(
4620 rows_lengths_iter.next(),
4621 None,
4622 "Length iterator did not reach end"
4623 );
4624 }
4625
4626 let back = converter.convert_rows(&rows).unwrap();
4629 for (actual, expected) in back.iter().zip(&arrays) {
4630 actual.to_data().validate_full().unwrap();
4631 dictionary_eq(actual, expected)
4632 }
4633
4634 let rows = rows.try_into_binary().expect("reasonable size");
4637 let parser = converter.parser();
4638 let back = converter
4639 .convert_rows(rows.iter().map(|b| parser.parse(b.expect("valid bytes"))))
4640 .unwrap();
4641 for (actual, expected) in back.iter().zip(&arrays) {
4642 actual.to_data().validate_full().unwrap();
4643 dictionary_eq(actual, expected)
4644 }
4645
4646 let rows = converter.from_binary(rows);
4647 let back = converter.convert_rows(&rows).unwrap();
4648 for (actual, expected) in back.iter().zip(&arrays) {
4649 actual.to_data().validate_full().unwrap();
4650 dictionary_eq(actual, expected)
4651 }
4652 }
4653 }
4654 }
4655
4656 fn replace_array_nulls(array: ArrayRef, new_nulls: Option<NullBuffer>) -> ArrayRef {
4657 make_array(
4658 array
4659 .into_data()
4660 .into_builder()
4661 .nulls(new_nulls)
4663 .build()
4664 .unwrap(),
4665 )
4666 }
4667
4668 fn assert_same_rows_when_changing_input_underlying_null_values(
4669 arrays: &[ArrayRef],
4670 converter: &RowConverter,
4671 rows: &Rows,
4672 ) {
4673 let arrays_with_different_data_behind_nulls = arrays
4674 .iter()
4675 .map(|arr| change_underline_null_values(arr))
4676 .collect::<Vec<_>>();
4677
4678 if arrays
4680 .iter()
4681 .zip(arrays_with_different_data_behind_nulls.iter())
4682 .all(|(a, b)| Arc::ptr_eq(a, b))
4683 {
4684 return;
4685 }
4686
4687 let rows_with_different_nulls = converter
4688 .convert_columns(&arrays_with_different_data_behind_nulls)
4689 .unwrap();
4690
4691 assert_eq!(
4692 rows.iter().collect::<Vec<_>>(),
4693 rows_with_different_nulls.iter().collect::<Vec<_>>(),
4694 "Different underlying nulls should not output different rows"
4695 )
4696 }
4697
4698 #[test]
4699 fn test_clear() {
4700 let converter = RowConverter::new(vec![SortField::new(DataType::Int32)]).unwrap();
4701 let mut rows = converter.empty_rows(3, 128);
4702
4703 let first = Int32Array::from(vec![None, Some(2), Some(4)]);
4704 let second = Int32Array::from(vec![Some(2), None, Some(4)]);
4705 let arrays = [Arc::new(first) as ArrayRef, Arc::new(second) as ArrayRef];
4706
4707 for array in arrays.iter() {
4708 rows.clear();
4709 converter
4710 .append(&mut rows, std::slice::from_ref(array))
4711 .unwrap();
4712 let back = converter.convert_rows(&rows).unwrap();
4713 assert_eq!(&back[0], array);
4714 }
4715
4716 let mut rows_expected = converter.empty_rows(3, 128);
4717 converter.append(&mut rows_expected, &arrays[1..]).unwrap();
4718
4719 for (i, (actual, expected)) in rows.iter().zip(rows_expected.iter()).enumerate() {
4720 assert_eq!(
4721 actual, expected,
4722 "For row {i}: expected {expected:?}, actual: {actual:?}",
4723 );
4724 }
4725 }
4726
4727 #[test]
4728 fn test_append_codec_dictionary_binary() {
4729 use DataType::*;
4730 let converter = RowConverter::new(vec![SortField::new(Dictionary(
4732 Box::new(Int32),
4733 Box::new(Binary),
4734 ))])
4735 .unwrap();
4736 let mut rows = converter.empty_rows(4, 128);
4737
4738 let keys = Int32Array::from_iter_values([0, 1, 2, 3]);
4739 let values = BinaryArray::from(vec![
4740 Some("a".as_bytes()),
4741 Some(b"b"),
4742 Some(b"c"),
4743 Some(b"d"),
4744 ]);
4745 let dict_array = DictionaryArray::new(keys, Arc::new(values));
4746
4747 rows.clear();
4748 let array = Arc::new(dict_array) as ArrayRef;
4749 converter
4750 .append(&mut rows, std::slice::from_ref(&array))
4751 .unwrap();
4752 let back = converter.convert_rows(&rows).unwrap();
4753
4754 dictionary_eq(&back[0], &array);
4755 }
4756
4757 #[test]
4758 fn test_list_prefix() {
4759 let mut a = ListBuilder::new(Int8Builder::new());
4760 a.append_value([None]);
4761 a.append_value([None, None]);
4762 let a = a.finish();
4763
4764 let converter = RowConverter::new(vec![SortField::new(a.data_type().clone())]).unwrap();
4765 let rows = converter.convert_columns(&[Arc::new(a) as _]).unwrap();
4766 assert_eq!(rows.row(0).cmp(&rows.row(1)), Ordering::Less);
4767 }
4768
4769 #[test]
4770 fn map_should_be_marked_as_unsupported() {
4771 let map_data_type = Field::new_map(
4772 "map",
4773 "entries",
4774 Field::new("key", DataType::Utf8, false),
4775 Field::new("value", DataType::Utf8, true),
4776 false,
4777 true,
4778 )
4779 .data_type()
4780 .clone();
4781
4782 let is_supported = RowConverter::supports_fields(&[SortField::new(map_data_type)]);
4783
4784 assert!(!is_supported, "Map should not be supported");
4785 }
4786
4787 #[test]
4788 fn should_fail_to_create_row_converter_for_unsupported_map_type() {
4789 let map_data_type = Field::new_map(
4790 "map",
4791 "entries",
4792 Field::new("key", DataType::Utf8, false),
4793 Field::new("value", DataType::Utf8, true),
4794 false,
4795 true,
4796 )
4797 .data_type()
4798 .clone();
4799
4800 let converter = RowConverter::new(vec![SortField::new(map_data_type)]);
4801
4802 match converter {
4803 Err(ArrowError::NotYetImplemented(message)) => {
4804 assert!(
4805 message.contains("Row format support not yet implemented for"),
4806 "Expected NotYetImplemented error for map data type, got: {message}",
4807 );
4808 }
4809 Err(e) => panic!("Expected NotYetImplemented error, got: {e}"),
4810 Ok(_) => panic!("Expected NotYetImplemented error for map data type"),
4811 }
4812 }
4813
4814 #[test]
4815 fn test_values_buffer_smaller_when_utf8_validation_disabled() {
4816 fn get_values_buffer_len(col: ArrayRef) -> (usize, usize) {
4817 let converter = RowConverter::new(vec![SortField::new(DataType::Utf8View)]).unwrap();
4819
4820 let rows = converter.convert_columns(&[col]).unwrap();
4822 let converted = converter.convert_rows(&rows).unwrap();
4823 let unchecked_values_len = converted[0].as_string_view().data_buffers()[0].len();
4824
4825 let rows = rows.try_into_binary().expect("reasonable size");
4827 let parser = converter.parser();
4828 let converted = converter
4829 .convert_rows(rows.iter().map(|b| parser.parse(b.expect("valid bytes"))))
4830 .unwrap();
4831 let checked_values_len = converted[0].as_string_view().data_buffers()[0].len();
4832 (unchecked_values_len, checked_values_len)
4833 }
4834
4835 let col = Arc::new(StringViewArray::from_iter([
4837 Some("hello"), None, Some("short"), Some("tiny"), ])) as ArrayRef;
4842
4843 let (unchecked_values_len, checked_values_len) = get_values_buffer_len(col);
4844 assert_eq!(unchecked_values_len, 0);
4846 assert_eq!(checked_values_len, 14);
4848
4849 let col = Arc::new(StringViewArray::from_iter([
4851 Some("this is a very long string over 12 bytes"),
4852 Some("another long string to test the buffer"),
4853 ])) as ArrayRef;
4854
4855 let (unchecked_values_len, checked_values_len) = get_values_buffer_len(col);
4856 assert!(unchecked_values_len > 0);
4858 assert_eq!(unchecked_values_len, checked_values_len);
4859
4860 let col = Arc::new(StringViewArray::from_iter([
4862 Some("tiny"), Some("thisisexact13"), None,
4865 Some("short"), ])) as ArrayRef;
4867
4868 let (unchecked_values_len, checked_values_len) = get_values_buffer_len(col);
4869 assert_eq!(unchecked_values_len, 13);
4871 assert!(checked_values_len > unchecked_values_len);
4872 }
4873
4874 #[test]
4875 fn test_sparse_union() {
4876 let int_array = Int32Array::from(vec![Some(1), None, Some(3), None, Some(5)]);
4878 let str_array = StringArray::from(vec![None, Some("b"), None, Some("d"), None]);
4879
4880 let type_ids = vec![0, 1, 0, 1, 0].into();
4882
4883 let union_fields = [
4884 (0, Arc::new(Field::new("int", DataType::Int32, false))),
4885 (1, Arc::new(Field::new("str", DataType::Utf8, false))),
4886 ]
4887 .into_iter()
4888 .collect();
4889
4890 let union_array = UnionArray::try_new(
4891 union_fields,
4892 type_ids,
4893 None,
4894 vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)],
4895 )
4896 .unwrap();
4897
4898 let union_type = union_array.data_type().clone();
4899 let converter = RowConverter::new(vec![SortField::new(union_type)]).unwrap();
4900
4901 let rows = converter
4902 .convert_columns(&[Arc::new(union_array.clone())])
4903 .unwrap();
4904
4905 let back = converter.convert_rows(&rows).unwrap();
4907 let back_union = back[0].as_any().downcast_ref::<UnionArray>().unwrap();
4908
4909 assert_eq!(union_array.len(), back_union.len());
4910 for i in 0..union_array.len() {
4911 assert_eq!(union_array.type_id(i), back_union.type_id(i));
4912 }
4913 }
4914
4915 #[test]
4916 fn test_sparse_union_with_nulls() {
4917 let int_array = Int32Array::from(vec![Some(1), None, Some(3), None, Some(5)]);
4919 let str_array = StringArray::from(vec![None::<&str>; 5]);
4920
4921 let type_ids = vec![0, 1, 0, 1, 0].into();
4923
4924 let union_fields = [
4925 (0, Arc::new(Field::new("int", DataType::Int32, true))),
4926 (1, Arc::new(Field::new("str", DataType::Utf8, true))),
4927 ]
4928 .into_iter()
4929 .collect();
4930
4931 let union_array = UnionArray::try_new(
4932 union_fields,
4933 type_ids,
4934 None,
4935 vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)],
4936 )
4937 .unwrap();
4938
4939 let union_type = union_array.data_type().clone();
4940 let converter = RowConverter::new(vec![SortField::new(union_type)]).unwrap();
4941
4942 let rows = converter
4943 .convert_columns(&[Arc::new(union_array.clone())])
4944 .unwrap();
4945
4946 let back = converter.convert_rows(&rows).unwrap();
4948 let back_union = back[0].as_any().downcast_ref::<UnionArray>().unwrap();
4949
4950 assert_eq!(union_array.len(), back_union.len());
4951 for i in 0..union_array.len() {
4952 let expected_null = union_array.is_null(i);
4953 let actual_null = back_union.is_null(i);
4954 assert_eq!(expected_null, actual_null, "Null mismatch at index {i}");
4955 if !expected_null {
4956 assert_eq!(union_array.type_id(i), back_union.type_id(i));
4957 }
4958 }
4959 }
4960
4961 #[test]
4962 fn test_dense_union() {
4963 let int_array = Int32Array::from(vec![1, 3, 5]);
4965 let str_array = StringArray::from(vec!["a", "b"]);
4966
4967 let type_ids = vec![0, 1, 0, 1, 0].into();
4968
4969 let offsets = vec![0, 0, 1, 1, 2].into();
4971
4972 let union_fields = [
4973 (0, Arc::new(Field::new("int", DataType::Int32, false))),
4974 (1, Arc::new(Field::new("str", DataType::Utf8, false))),
4975 ]
4976 .into_iter()
4977 .collect();
4978
4979 let union_array = UnionArray::try_new(
4980 union_fields,
4981 type_ids,
4982 Some(offsets), vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)],
4984 )
4985 .unwrap();
4986
4987 let union_type = union_array.data_type().clone();
4988 let converter = RowConverter::new(vec![SortField::new(union_type)]).unwrap();
4989
4990 let rows = converter
4991 .convert_columns(&[Arc::new(union_array.clone())])
4992 .unwrap();
4993
4994 let back = converter.convert_rows(&rows).unwrap();
4996 let back_union = back[0].as_any().downcast_ref::<UnionArray>().unwrap();
4997
4998 assert_eq!(union_array.len(), back_union.len());
4999 for i in 0..union_array.len() {
5000 assert_eq!(union_array.type_id(i), back_union.type_id(i));
5001 }
5002 }
5003
5004 #[test]
5005 fn test_dense_union_with_nulls() {
5006 let int_array = Int32Array::from(vec![Some(1), None, Some(5)]);
5008 let str_array = StringArray::from(vec![Some("a"), None]);
5009
5010 let type_ids = vec![0, 1, 0, 1, 0].into();
5012 let offsets = vec![0, 0, 1, 1, 2].into();
5013
5014 let union_fields = [
5015 (0, Arc::new(Field::new("int", DataType::Int32, true))),
5016 (1, Arc::new(Field::new("str", DataType::Utf8, true))),
5017 ]
5018 .into_iter()
5019 .collect();
5020
5021 let union_array = UnionArray::try_new(
5022 union_fields,
5023 type_ids,
5024 Some(offsets),
5025 vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)],
5026 )
5027 .unwrap();
5028
5029 let union_type = union_array.data_type().clone();
5030 let converter = RowConverter::new(vec![SortField::new(union_type)]).unwrap();
5031
5032 let rows = converter
5033 .convert_columns(&[Arc::new(union_array.clone())])
5034 .unwrap();
5035
5036 let back = converter.convert_rows(&rows).unwrap();
5038 let back_union = back[0].as_any().downcast_ref::<UnionArray>().unwrap();
5039
5040 assert_eq!(union_array.len(), back_union.len());
5041 for i in 0..union_array.len() {
5042 let expected_null = union_array.is_null(i);
5043 let actual_null = back_union.is_null(i);
5044 assert_eq!(expected_null, actual_null, "Null mismatch at index {i}");
5045 if !expected_null {
5046 assert_eq!(union_array.type_id(i), back_union.type_id(i));
5047 }
5048 }
5049 }
5050
5051 #[test]
5052 fn test_union_ordering() {
5053 let int_array = Int32Array::from(vec![100, 5, 20]);
5054 let str_array = StringArray::from(vec!["z", "a"]);
5055
5056 let type_ids = vec![0, 1, 0, 1, 0].into();
5058 let offsets = vec![0, 0, 1, 1, 2].into();
5059
5060 let union_fields = [
5061 (0, Arc::new(Field::new("int", DataType::Int32, false))),
5062 (1, Arc::new(Field::new("str", DataType::Utf8, false))),
5063 ]
5064 .into_iter()
5065 .collect();
5066
5067 let union_array = UnionArray::try_new(
5068 union_fields,
5069 type_ids,
5070 Some(offsets),
5071 vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)],
5072 )
5073 .unwrap();
5074
5075 let union_type = union_array.data_type().clone();
5076 let converter = RowConverter::new(vec![SortField::new(union_type)]).unwrap();
5077
5078 let rows = converter.convert_columns(&[Arc::new(union_array)]).unwrap();
5079
5080 assert!(rows.row(2) < rows.row(1));
5092
5093 assert!(rows.row(0) < rows.row(3));
5095
5096 assert!(rows.row(2) < rows.row(4));
5099 assert!(rows.row(4) < rows.row(0));
5101
5102 assert!(rows.row(3) < rows.row(1));
5105 }
5106
5107 #[test]
5108 fn test_row_converter_roundtrip_with_many_union_columns() {
5109 let fields1 = UnionFields::try_new(
5111 vec![0, 1],
5112 vec![
5113 Field::new("int", DataType::Int32, true),
5114 Field::new("string", DataType::Utf8, true),
5115 ],
5116 )
5117 .unwrap();
5118
5119 let int_array1 = Int32Array::from(vec![Some(67), None]);
5120 let string_array1 = StringArray::from(vec![None::<&str>, Some("hello")]);
5121 let type_ids1 = vec![0i8, 1].into();
5122
5123 let union_array1 = UnionArray::try_new(
5124 fields1.clone(),
5125 type_ids1,
5126 None,
5127 vec![
5128 Arc::new(int_array1) as ArrayRef,
5129 Arc::new(string_array1) as ArrayRef,
5130 ],
5131 )
5132 .unwrap();
5133
5134 let fields2 = UnionFields::try_new(
5136 vec![0, 1],
5137 vec![
5138 Field::new("int", DataType::Int32, true),
5139 Field::new("string", DataType::Utf8, true),
5140 ],
5141 )
5142 .unwrap();
5143
5144 let int_array2 = Int32Array::from(vec![Some(100), None]);
5145 let string_array2 = StringArray::from(vec![None::<&str>, Some("world")]);
5146 let type_ids2 = vec![0i8, 1].into();
5147
5148 let union_array2 = UnionArray::try_new(
5149 fields2.clone(),
5150 type_ids2,
5151 None,
5152 vec![
5153 Arc::new(int_array2) as ArrayRef,
5154 Arc::new(string_array2) as ArrayRef,
5155 ],
5156 )
5157 .unwrap();
5158
5159 let field1 = Field::new("col1", DataType::Union(fields1, UnionMode::Sparse), true);
5161 let field2 = Field::new("col2", DataType::Union(fields2, UnionMode::Sparse), true);
5162
5163 let sort_field1 = SortField::new(field1.data_type().clone());
5164 let sort_field2 = SortField::new(field2.data_type().clone());
5165
5166 let converter = RowConverter::new(vec![sort_field1, sort_field2]).unwrap();
5167
5168 let rows = converter
5169 .convert_columns(&[
5170 Arc::new(union_array1.clone()) as ArrayRef,
5171 Arc::new(union_array2.clone()) as ArrayRef,
5172 ])
5173 .unwrap();
5174
5175 let out = converter.convert_rows(&rows).unwrap();
5177
5178 let [col1, col2] = out.as_slice() else {
5179 panic!("expected 2 columns")
5180 };
5181
5182 let col1 = col1.as_any().downcast_ref::<UnionArray>().unwrap();
5183 let col2 = col2.as_any().downcast_ref::<UnionArray>().unwrap();
5184
5185 for (expected, got) in [union_array1, union_array2].iter().zip([col1, col2]) {
5186 assert_eq!(expected.len(), got.len());
5187 assert_eq!(expected.type_ids(), got.type_ids());
5188
5189 for i in 0..expected.len() {
5190 assert_eq!(expected.value(i).as_ref(), got.value(i).as_ref());
5191 }
5192 }
5193 }
5194
5195 #[test]
5196 fn test_row_converter_roundtrip_with_one_union_column() {
5197 let fields = UnionFields::try_new(
5198 vec![0, 1],
5199 vec![
5200 Field::new("int", DataType::Int32, true),
5201 Field::new("string", DataType::Utf8, true),
5202 ],
5203 )
5204 .unwrap();
5205
5206 let int_array = Int32Array::from(vec![Some(67), None]);
5207 let string_array = StringArray::from(vec![None::<&str>, Some("hello")]);
5208 let type_ids = vec![0i8, 1].into();
5209
5210 let union_array = UnionArray::try_new(
5211 fields.clone(),
5212 type_ids,
5213 None,
5214 vec![
5215 Arc::new(int_array) as ArrayRef,
5216 Arc::new(string_array) as ArrayRef,
5217 ],
5218 )
5219 .unwrap();
5220
5221 let field = Field::new("col", DataType::Union(fields, UnionMode::Sparse), true);
5222 let sort_field = SortField::new(field.data_type().clone());
5223 let converter = RowConverter::new(vec![sort_field]).unwrap();
5224
5225 let rows = converter
5226 .convert_columns(&[Arc::new(union_array.clone()) as ArrayRef])
5227 .unwrap();
5228
5229 let out = converter.convert_rows(&rows).unwrap();
5231
5232 let [col1] = out.as_slice() else {
5233 panic!("expected 1 column")
5234 };
5235
5236 let col = col1.as_any().downcast_ref::<UnionArray>().unwrap();
5237 assert_eq!(col.len(), union_array.len());
5238 assert_eq!(col.type_ids(), union_array.type_ids());
5239
5240 for i in 0..col.len() {
5241 assert_eq!(col.value(i).as_ref(), union_array.value(i).as_ref());
5242 }
5243 }
5244
5245 #[test]
5246 fn test_row_converter_roundtrip_with_non_default_union_type_ids() {
5247 let fields = UnionFields::try_new(
5249 vec![70, 85],
5250 vec![
5251 Field::new("int", DataType::Int32, true),
5252 Field::new("string", DataType::Utf8, true),
5253 ],
5254 )
5255 .unwrap();
5256
5257 let int_array = Int32Array::from(vec![Some(67), None]);
5258 let string_array = StringArray::from(vec![None::<&str>, Some("hello")]);
5259 let type_ids = vec![70i8, 85].into();
5260
5261 let union_array = UnionArray::try_new(
5262 fields.clone(),
5263 type_ids,
5264 None,
5265 vec![
5266 Arc::new(int_array) as ArrayRef,
5267 Arc::new(string_array) as ArrayRef,
5268 ],
5269 )
5270 .unwrap();
5271
5272 let field = Field::new("col", DataType::Union(fields, UnionMode::Sparse), true);
5273 let sort_field = SortField::new(field.data_type().clone());
5274 let converter = RowConverter::new(vec![sort_field]).unwrap();
5275
5276 let rows = converter
5277 .convert_columns(&[Arc::new(union_array.clone()) as ArrayRef])
5278 .unwrap();
5279
5280 let out = converter.convert_rows(&rows).unwrap();
5282
5283 let [col1] = out.as_slice() else {
5284 panic!("expected 1 column")
5285 };
5286
5287 let col = col1.as_any().downcast_ref::<UnionArray>().unwrap();
5288 assert_eq!(col.len(), union_array.len());
5289 assert_eq!(col.type_ids(), union_array.type_ids());
5290
5291 for i in 0..col.len() {
5292 assert_eq!(col.value(i).as_ref(), union_array.value(i).as_ref());
5293 }
5294 }
5295
5296 #[test]
5297 fn rows_size_should_count_for_capacity() {
5298 let row_converter = RowConverter::new(vec![SortField::new(DataType::UInt8)]).unwrap();
5299
5300 let empty_rows_size_with_preallocate_rows_and_data = {
5301 let rows = row_converter.empty_rows(1000, 1000);
5302
5303 rows.size()
5304 };
5305 let empty_rows_size_with_preallocate_rows = {
5306 let rows = row_converter.empty_rows(1000, 0);
5307
5308 rows.size()
5309 };
5310 let empty_rows_size_with_preallocate_data = {
5311 let rows = row_converter.empty_rows(0, 1000);
5312
5313 rows.size()
5314 };
5315 let empty_rows_size_without_preallocate = {
5316 let rows = row_converter.empty_rows(0, 0);
5317
5318 rows.size()
5319 };
5320
5321 assert!(
5322 empty_rows_size_with_preallocate_rows_and_data > empty_rows_size_with_preallocate_rows,
5323 "{empty_rows_size_with_preallocate_rows_and_data} should be larger than {empty_rows_size_with_preallocate_rows}"
5324 );
5325 assert!(
5326 empty_rows_size_with_preallocate_rows_and_data > empty_rows_size_with_preallocate_data,
5327 "{empty_rows_size_with_preallocate_rows_and_data} should be larger than {empty_rows_size_with_preallocate_data}"
5328 );
5329 assert!(
5330 empty_rows_size_with_preallocate_rows > empty_rows_size_without_preallocate,
5331 "{empty_rows_size_with_preallocate_rows} should be larger than {empty_rows_size_without_preallocate}"
5332 );
5333 assert!(
5334 empty_rows_size_with_preallocate_data > empty_rows_size_without_preallocate,
5335 "{empty_rows_size_with_preallocate_data} should be larger than {empty_rows_size_without_preallocate}"
5336 );
5337 }
5338
5339 #[test]
5340 fn test_struct_no_child_fields() {
5341 fn run_test(array: ArrayRef) {
5342 let sort_fields = vec![SortField::new(array.data_type().clone())];
5343 let converter = RowConverter::new(sort_fields).unwrap();
5344 let r = converter.convert_columns(&[Arc::clone(&array)]).unwrap();
5345
5346 let back = converter.convert_rows(&r).unwrap();
5347 assert_eq!(back.len(), 1);
5348 assert_eq!(&back[0], &array);
5349 }
5350
5351 let s = Arc::new(StructArray::new_empty_fields(5, None)) as ArrayRef;
5352 run_test(s);
5353
5354 let s = Arc::new(StructArray::new_empty_fields(
5355 5,
5356 Some(vec![true, false, true, false, false].into()),
5357 )) as ArrayRef;
5358 run_test(s);
5359 }
5360
5361 #[test]
5362 fn reserve_should_increase_capacity_to_the_requested_size() {
5363 let row_converter = RowConverter::new(vec![SortField::new(DataType::UInt8)]).unwrap();
5364 let mut empty_rows = row_converter.empty_rows(0, 0);
5365 empty_rows.reserve(50, 50);
5366 let before_size = empty_rows.size();
5367 empty_rows.reserve(50, 50);
5368 assert_eq!(
5369 empty_rows.size(),
5370 before_size,
5371 "Size should not change when reserving already reserved space"
5372 );
5373 empty_rows.reserve(10, 20);
5374 assert_eq!(
5375 empty_rows.size(),
5376 before_size,
5377 "Size should not change when already have space for the expected reserved data"
5378 );
5379
5380 empty_rows.reserve(100, 20);
5381 assert!(
5382 empty_rows.size() > before_size,
5383 "Size should increase when reserving more space than previously reserved"
5384 );
5385
5386 let before_size = empty_rows.size();
5387
5388 empty_rows.reserve(20, 100);
5389 assert!(
5390 empty_rows.size() > before_size,
5391 "Size should increase when reserving more space than previously reserved"
5392 );
5393 }
5394
5395 #[test]
5396 fn empty_rows_should_return_empty_lengths_iterator() {
5397 let rows = RowConverter::new(vec![SortField::new(DataType::UInt8)])
5398 .unwrap()
5399 .empty_rows(0, 0);
5400 let mut lengths_iter = rows.lengths();
5401 assert_eq!(lengths_iter.next(), None);
5402 }
5403
5404 #[test]
5405 fn test_nested_null_list() {
5406 let null_array = Arc::new(NullArray::new(3));
5407 let list: ArrayRef = Arc::new(ListArray::new(
5409 Field::new_list_field(DataType::Null, true).into(),
5410 OffsetBuffer::from_lengths(vec![1, 0, 2]),
5411 null_array,
5412 None,
5413 ));
5414
5415 let converter = RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
5416 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
5417 let back = converter.convert_rows(&rows).unwrap();
5418
5419 assert_eq!(&list, &back[0]);
5420 }
5421
5422 #[test]
5424 fn test_double_nested_null_list() {
5425 let null_array = Arc::new(NullArray::new(1));
5426 let nested_field = Arc::new(Field::new_list_field(DataType::Null, true));
5428 let nested_list = Arc::new(ListArray::new(
5429 nested_field.clone(),
5430 OffsetBuffer::from_lengths(vec![1]),
5431 null_array,
5432 None,
5433 ));
5434 let list = Arc::new(ListArray::new(
5436 Field::new_list_field(DataType::List(nested_field), true).into(),
5437 OffsetBuffer::from_lengths(vec![1]),
5438 nested_list,
5439 None,
5440 )) as ArrayRef;
5441
5442 let converter = RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
5443 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
5444 let back = converter.convert_rows(&rows).unwrap();
5445
5446 assert_eq!(&list, &back[0]);
5447 }
5448
5449 #[test]
5451 fn test_large_list_null() {
5452 let null_array = Arc::new(NullArray::new(3));
5453 let list: ArrayRef = Arc::new(LargeListArray::new(
5455 Field::new_list_field(DataType::Null, true).into(),
5456 OffsetBuffer::from_lengths(vec![1, 0, 2]),
5457 null_array,
5458 None,
5459 ));
5460
5461 let converter = RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
5462 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
5463 let back = converter.convert_rows(&rows).unwrap();
5464
5465 assert_eq!(&list, &back[0]);
5466 }
5467
5468 #[test]
5470 fn test_fixed_size_list_null() {
5471 let null_array = Arc::new(NullArray::new(6));
5472 let list: ArrayRef = Arc::new(FixedSizeListArray::new(
5474 Arc::new(Field::new_list_field(DataType::Null, true)),
5475 2,
5476 null_array,
5477 None,
5478 ));
5479
5480 let converter = RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
5481 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
5482 let back = converter.convert_rows(&rows).unwrap();
5483
5484 assert_eq!(&list, &back[0]);
5485 }
5486
5487 #[test]
5489 fn test_list_null_variations() {
5490 let null_array = Arc::new(NullArray::new(3));
5492 let list: ArrayRef = Arc::new(ListArray::new(
5493 Field::new_list_field(DataType::Null, true).into(),
5494 OffsetBuffer::from_lengths(vec![1, 0, 2]),
5495 null_array,
5496 None,
5497 ));
5498
5499 let converter = RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
5500 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
5501 let back = converter.convert_rows(&rows).unwrap();
5502 assert_eq!(&list, &back[0]);
5503
5504 let null_array = Arc::new(NullArray::new(3));
5506 let list: ArrayRef = Arc::new(ListArray::new(
5507 Field::new_list_field(DataType::Null, true).into(),
5508 OffsetBuffer::from_lengths(vec![1, 0, 2]),
5509 null_array,
5510 Some(vec![true, false, true].into()),
5511 ));
5512
5513 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
5514 let back = converter.convert_rows(&rows).unwrap();
5515 assert_eq!(&list, &back[0]);
5516
5517 let null_array = Arc::new(NullArray::new(0));
5519 let list: ArrayRef = Arc::new(ListArray::new(
5520 Field::new_list_field(DataType::Null, true).into(),
5521 OffsetBuffer::from_lengths(vec![]),
5522 null_array,
5523 None,
5524 ));
5525
5526 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
5527 let back = converter.convert_rows(&rows).unwrap();
5528 assert_eq!(&list, &back[0]);
5529
5530 let null_array = Arc::new(NullArray::new(0));
5532 let list: ArrayRef = Arc::new(ListArray::new(
5533 Field::new_list_field(DataType::Null, true).into(),
5534 OffsetBuffer::from_lengths(vec![0, 0, 0]),
5535 null_array,
5536 None,
5537 ));
5538
5539 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
5540 let back = converter.convert_rows(&rows).unwrap();
5541 assert_eq!(&list, &back[0]);
5542 }
5543
5544 #[test]
5546 fn test_list_null_descending() {
5547 let null_array = Arc::new(NullArray::new(3));
5548 let list: ArrayRef = Arc::new(ListArray::new(
5550 Field::new_list_field(DataType::Null, true).into(),
5551 OffsetBuffer::from_lengths(vec![1, 0, 2]),
5552 null_array,
5553 None,
5554 ));
5555
5556 let options = SortOptions::default().with_descending(true);
5557 let field = SortField::new_with_options(list.data_type().clone(), options);
5558 let converter = RowConverter::new(vec![field]).unwrap();
5559 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
5560 let back = converter.convert_rows(&rows).unwrap();
5561
5562 assert_eq!(&list, &back[0]);
5563 }
5564
5565 #[test]
5567 fn test_struct_with_null_field() {
5568 let null_array = Arc::new(NullArray::new(3));
5570 let int_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
5571
5572 let struct_array: ArrayRef = Arc::new(StructArray::new(
5573 vec![
5574 Arc::new(Field::new("a", DataType::Null, true)),
5575 Arc::new(Field::new("b", DataType::Int32, true)),
5576 ]
5577 .into(),
5578 vec![null_array, int_array],
5579 Some(vec![true, true, false].into()), ));
5581
5582 let converter =
5583 RowConverter::new(vec![SortField::new(struct_array.data_type().clone())]).unwrap();
5584 let rows = converter
5585 .convert_columns(&[Arc::clone(&struct_array)])
5586 .unwrap();
5587 let back = converter.convert_rows(&rows).unwrap();
5588
5589 assert_eq!(&struct_array, &back[0]);
5590 }
5591
5592 #[test]
5594 fn test_nested_struct_with_null() {
5595 let inner_null = Arc::new(NullArray::new(2));
5597 let inner_struct = Arc::new(StructArray::new(
5598 vec![Arc::new(Field::new("x", DataType::Null, true))].into(),
5599 vec![inner_null],
5600 None,
5601 ));
5602
5603 let y_array = Arc::new(Int32Array::from(vec![10, 20]));
5605 let outer_struct: ArrayRef = Arc::new(StructArray::new(
5606 vec![
5607 Arc::new(Field::new("inner", inner_struct.data_type().clone(), true)),
5608 Arc::new(Field::new("y", DataType::Int32, true)),
5609 ]
5610 .into(),
5611 vec![inner_struct, y_array],
5612 None,
5613 ));
5614
5615 let converter =
5616 RowConverter::new(vec![SortField::new(outer_struct.data_type().clone())]).unwrap();
5617 let rows = converter
5618 .convert_columns(&[Arc::clone(&outer_struct)])
5619 .unwrap();
5620 let back = converter.convert_rows(&rows).unwrap();
5621
5622 assert_eq!(&outer_struct, &back[0]);
5623 }
5624
5625 #[test]
5628 fn test_map_null_not_supported() {
5629 let map_data_type = Field::new_map(
5631 "map",
5632 "entries",
5633 Field::new("key", DataType::Utf8, false),
5634 Field::new("value", DataType::Null, true),
5635 false,
5636 true,
5637 )
5638 .data_type()
5639 .clone();
5640
5641 let result = RowConverter::new(vec![SortField::new(map_data_type)]);
5643 assert!(
5644 result.is_err(),
5645 "Map should not be supported by RowConverter"
5646 );
5647 assert!(
5648 result
5649 .unwrap_err()
5650 .to_string()
5651 .contains("not yet implemented")
5652 );
5653 }
5654}