1#![doc(
157 html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
158 html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
159)]
160#![cfg_attr(docsrs, feature(doc_cfg))]
161#![warn(missing_docs)]
162use std::cmp::Ordering;
163use std::hash::{Hash, Hasher};
164use std::iter::Map;
165use std::slice::Windows;
166use std::sync::Arc;
167
168use arrow_array::cast::*;
169use arrow_array::types::{ArrowDictionaryKeyType, ByteArrayType, ByteViewType};
170use arrow_array::*;
171use arrow_buffer::{ArrowNativeType, Buffer, OffsetBuffer, ScalarBuffer};
172use arrow_data::{ArrayData, ArrayDataBuilder};
173use arrow_schema::*;
174use variable::{decode_binary_view, decode_string_view};
175
176use crate::fixed::{decode_bool, decode_fixed_size_binary, decode_primitive};
177use crate::list::{compute_lengths_fixed_size_list, encode_fixed_size_list};
178use crate::variable::{decode_binary, decode_string};
179use arrow_array::types::{Int16Type, Int32Type, Int64Type};
180
181mod fixed;
182mod list;
183mod run;
184mod variable;
185
186#[derive(Debug)]
532pub struct RowConverter {
533 fields: Arc<[SortField]>,
534 codecs: Vec<Codec>,
536}
537
538#[derive(Debug)]
539enum Codec {
540 Stateless,
542 Dictionary(RowConverter, OwnedRow),
545 Struct(RowConverter, OwnedRow),
548 List(RowConverter),
550 RunEndEncoded(RowConverter),
552 Union(Vec<RowConverter>, Vec<i8>, Vec<OwnedRow>),
555}
556
557fn compute_list_view_bounds<O: OffsetSizeTrait>(array: &GenericListViewArray<O>) -> (usize, usize) {
560 if array.is_empty() {
561 return (0, 0);
562 }
563
564 let offsets = array.value_offsets();
565 let sizes = array.value_sizes();
566 let values_len = array.values().len();
567
568 let mut min_offset = usize::MAX;
569 let mut max_end = 0usize;
570
571 for i in 0..array.len() {
572 let offset = offsets[i].as_usize();
573 let size = sizes[i].as_usize();
574 let end = offset + size;
575
576 if size > 0 {
577 min_offset = min_offset.min(offset);
578 max_end = max_end.max(end);
579 }
580
581 if min_offset == 0 && max_end == values_len {
585 break;
586 }
587 }
588
589 if min_offset == usize::MAX {
590 (0, 0)
592 } else {
593 (min_offset, max_end)
594 }
595}
596
597impl Codec {
598 fn new(sort_field: &SortField) -> Result<Self, ArrowError> {
599 match &sort_field.data_type {
600 DataType::Dictionary(_, values) => {
601 let sort_field =
602 SortField::new_with_options(values.as_ref().clone(), sort_field.options);
603
604 let converter = RowConverter::new(vec![sort_field])?;
605 let null_array = new_null_array(values.as_ref(), 1);
606 let nulls = converter.convert_columns(&[null_array])?;
607
608 let owned = OwnedRow {
609 data: nulls.buffer.into(),
610 config: nulls.config,
611 };
612 Ok(Self::Dictionary(converter, owned))
613 }
614 DataType::RunEndEncoded(_, values) => {
615 let options = SortOptions {
617 descending: false,
618 nulls_first: sort_field.options.nulls_first != sort_field.options.descending,
619 };
620
621 let field = SortField::new_with_options(values.data_type().clone(), options);
622 let converter = RowConverter::new(vec![field])?;
623 Ok(Self::RunEndEncoded(converter))
624 }
625 d if !d.is_nested() => Ok(Self::Stateless),
626 DataType::List(f)
627 | DataType::LargeList(f)
628 | DataType::ListView(f)
629 | DataType::LargeListView(f) => {
630 let options = SortOptions {
634 descending: false,
635 nulls_first: sort_field.options.nulls_first != sort_field.options.descending,
636 };
637
638 let field = SortField::new_with_options(f.data_type().clone(), options);
639 let converter = RowConverter::new(vec![field])?;
640 Ok(Self::List(converter))
641 }
642 DataType::FixedSizeList(f, _) => {
643 let field = SortField::new_with_options(f.data_type().clone(), sort_field.options);
644 let converter = RowConverter::new(vec![field])?;
645 Ok(Self::List(converter))
646 }
647 DataType::Struct(f) => {
648 let sort_fields = f
649 .iter()
650 .map(|x| SortField::new_with_options(x.data_type().clone(), sort_field.options))
651 .collect();
652
653 let converter = RowConverter::new(sort_fields)?;
654 let nulls: Vec<_> = f.iter().map(|x| new_null_array(x.data_type(), 1)).collect();
655
656 let nulls = converter.convert_columns(&nulls)?;
657 let owned = OwnedRow {
658 data: nulls.buffer.into(),
659 config: nulls.config,
660 };
661
662 Ok(Self::Struct(converter, owned))
663 }
664 DataType::Union(fields, _mode) => {
665 let options = SortOptions {
668 descending: false,
669 nulls_first: sort_field.options.nulls_first != sort_field.options.descending,
670 };
671
672 let mut converters = Vec::with_capacity(fields.len());
673 let mut type_ids = Vec::with_capacity(fields.len());
674 let mut null_rows = Vec::with_capacity(fields.len());
675
676 for (type_id, field) in fields.iter() {
677 let sort_field =
678 SortField::new_with_options(field.data_type().clone(), options);
679 let converter = RowConverter::new(vec![sort_field])?;
680
681 let null_array = new_null_array(field.data_type(), 1);
682 let nulls = converter.convert_columns(&[null_array])?;
683 let owned = OwnedRow {
684 data: nulls.buffer.into(),
685 config: nulls.config,
686 };
687
688 converters.push(converter);
689 type_ids.push(type_id);
690 null_rows.push(owned);
691 }
692
693 Ok(Self::Union(converters, type_ids, null_rows))
694 }
695 _ => Err(ArrowError::NotYetImplemented(format!(
696 "not yet implemented: {:?}",
697 sort_field.data_type
698 ))),
699 }
700 }
701
702 fn encoder(&self, array: &dyn Array) -> Result<Encoder<'_>, ArrowError> {
703 match self {
704 Codec::Stateless => Ok(Encoder::Stateless),
705 Codec::Dictionary(converter, nulls) => {
706 let values = array.as_any_dictionary().values().clone();
707 let rows = converter.convert_columns(&[values])?;
708 Ok(Encoder::Dictionary(rows, nulls.row()))
709 }
710 Codec::Struct(converter, null) => {
711 let v = as_struct_array(array);
712 let rows = converter.convert_columns(v.columns())?;
713 Ok(Encoder::Struct(rows, null.row()))
714 }
715 Codec::List(converter) => {
716 let values = match array.data_type() {
717 DataType::List(_) => {
718 let list_array = as_list_array(array);
719 let first_offset = list_array.offsets()[0] as usize;
720 let last_offset =
721 list_array.offsets()[list_array.offsets().len() - 1] as usize;
722
723 list_array
726 .values()
727 .slice(first_offset, last_offset - first_offset)
728 }
729 DataType::LargeList(_) => {
730 let list_array = as_large_list_array(array);
731
732 let first_offset = list_array.offsets()[0] as usize;
733 let last_offset =
734 list_array.offsets()[list_array.offsets().len() - 1] as usize;
735
736 list_array
739 .values()
740 .slice(first_offset, last_offset - first_offset)
741 }
742 DataType::ListView(_) => {
743 let list_view_array = array.as_list_view::<i32>();
744 let (min_offset, max_end) = compute_list_view_bounds(list_view_array);
745 list_view_array
746 .values()
747 .slice(min_offset, max_end - min_offset)
748 }
749 DataType::LargeListView(_) => {
750 let list_view_array = array.as_list_view::<i64>();
751 let (min_offset, max_end) = compute_list_view_bounds(list_view_array);
752 list_view_array
753 .values()
754 .slice(min_offset, max_end - min_offset)
755 }
756 DataType::FixedSizeList(_, _) => {
757 as_fixed_size_list_array(array).values().clone()
758 }
759 _ => unreachable!(),
760 };
761 let rows = converter.convert_columns(&[values])?;
762 Ok(Encoder::List(rows))
763 }
764 Codec::RunEndEncoded(converter) => {
765 let values = match array.data_type() {
766 DataType::RunEndEncoded(r, _) => match r.data_type() {
767 DataType::Int16 => array.as_run::<Int16Type>().values_slice(),
768 DataType::Int32 => array.as_run::<Int32Type>().values_slice(),
769 DataType::Int64 => array.as_run::<Int64Type>().values_slice(),
770 _ => unreachable!("Unsupported run end index type: {r:?}"),
771 },
772 _ => unreachable!(),
773 };
774 let rows = converter.convert_columns(std::slice::from_ref(&values))?;
775 Ok(Encoder::RunEndEncoded(rows))
776 }
777 Codec::Union(converters, field_to_type_ids, _) => {
778 let union_array = array
779 .as_any()
780 .downcast_ref::<UnionArray>()
781 .expect("expected Union array");
782
783 let type_ids = union_array.type_ids().clone();
784 let offsets = union_array.offsets().cloned();
785
786 let mut child_rows = Vec::with_capacity(converters.len());
787 for (field_idx, converter) in converters.iter().enumerate() {
788 let type_id = field_to_type_ids[field_idx];
789 let child_array = union_array.child(type_id);
790 let rows = converter.convert_columns(std::slice::from_ref(child_array))?;
791 child_rows.push(rows);
792 }
793
794 Ok(Encoder::Union {
795 child_rows,
796 field_to_type_ids: field_to_type_ids.clone(),
797 type_ids,
798 offsets,
799 })
800 }
801 }
802 }
803
804 fn size(&self) -> usize {
805 match self {
806 Codec::Stateless => 0,
807 Codec::Dictionary(converter, nulls) => converter.size() + nulls.data.len(),
808 Codec::Struct(converter, nulls) => converter.size() + nulls.data.len(),
809 Codec::List(converter) => converter.size(),
810 Codec::RunEndEncoded(converter) => converter.size(),
811 Codec::Union(converters, _, null_rows) => {
812 converters.iter().map(|c| c.size()).sum::<usize>()
813 + null_rows.iter().map(|n| n.data.len()).sum::<usize>()
814 }
815 }
816 }
817}
818
819#[derive(Debug)]
820enum Encoder<'a> {
821 Stateless,
823 Dictionary(Rows, Row<'a>),
825 Struct(Rows, Row<'a>),
831 List(Rows),
833 RunEndEncoded(Rows),
835 Union {
837 child_rows: Vec<Rows>,
838 field_to_type_ids: Vec<i8>,
839 type_ids: ScalarBuffer<i8>,
840 offsets: Option<ScalarBuffer<i32>>,
841 },
842}
843
844#[derive(Debug, Clone, PartialEq, Eq)]
846pub struct SortField {
847 options: SortOptions,
849 data_type: DataType,
851}
852
853impl SortField {
854 pub fn new(data_type: DataType) -> Self {
856 Self::new_with_options(data_type, Default::default())
857 }
858
859 pub fn new_with_options(data_type: DataType, options: SortOptions) -> Self {
861 Self { options, data_type }
862 }
863
864 pub fn size(&self) -> usize {
868 self.data_type.size() + std::mem::size_of::<Self>() - std::mem::size_of::<DataType>()
869 }
870}
871
872impl RowConverter {
873 pub fn new(fields: Vec<SortField>) -> Result<Self, ArrowError> {
875 if !Self::supports_fields(&fields) {
876 return Err(ArrowError::NotYetImplemented(format!(
877 "Row format support not yet implemented for: {fields:?}"
878 )));
879 }
880
881 let codecs = fields.iter().map(Codec::new).collect::<Result<_, _>>()?;
882 Ok(Self {
883 fields: fields.into(),
884 codecs,
885 })
886 }
887
888 pub fn supports_fields(fields: &[SortField]) -> bool {
890 fields.iter().all(|x| Self::supports_datatype(&x.data_type))
891 }
892
893 fn supports_datatype(d: &DataType) -> bool {
894 match d {
895 _ if !d.is_nested() => true,
896 DataType::List(f)
897 | DataType::LargeList(f)
898 | DataType::ListView(f)
899 | DataType::LargeListView(f)
900 | DataType::FixedSizeList(f, _) => Self::supports_datatype(f.data_type()),
901 DataType::Struct(f) => f.iter().all(|x| Self::supports_datatype(x.data_type())),
902 DataType::RunEndEncoded(_, values) => Self::supports_datatype(values.data_type()),
903 DataType::Union(fs, _mode) => fs
904 .iter()
905 .all(|(_, f)| Self::supports_datatype(f.data_type())),
906 _ => false,
907 }
908 }
909
910 pub fn convert_columns(&self, columns: &[ArrayRef]) -> Result<Rows, ArrowError> {
920 let num_rows = columns.first().map(|x| x.len()).unwrap_or(0);
921 let mut rows = self.empty_rows(num_rows, 0);
922 self.append(&mut rows, columns)?;
923 Ok(rows)
924 }
925
926 pub fn append(&self, rows: &mut Rows, columns: &[ArrayRef]) -> Result<(), ArrowError> {
957 assert!(
958 Arc::ptr_eq(&rows.config.fields, &self.fields),
959 "rows were not produced by this RowConverter"
960 );
961
962 if columns.len() != self.fields.len() {
963 return Err(ArrowError::InvalidArgumentError(format!(
964 "Incorrect number of arrays provided to RowConverter, expected {} got {}",
965 self.fields.len(),
966 columns.len()
967 )));
968 }
969 for colum in columns.iter().skip(1) {
970 if colum.len() != columns[0].len() {
971 return Err(ArrowError::InvalidArgumentError(format!(
972 "RowConverter columns must all have the same length, expected {} got {}",
973 columns[0].len(),
974 colum.len()
975 )));
976 }
977 }
978
979 let encoders = columns
980 .iter()
981 .zip(&self.codecs)
982 .zip(self.fields.iter())
983 .map(|((column, codec), field)| {
984 if !column.data_type().equals_datatype(&field.data_type) {
985 return Err(ArrowError::InvalidArgumentError(format!(
986 "RowConverter column schema mismatch, expected {} got {}",
987 field.data_type,
988 column.data_type()
989 )));
990 }
991 codec.encoder(column.as_ref())
992 })
993 .collect::<Result<Vec<_>, _>>()?;
994
995 let write_offset = rows.num_rows();
996 let lengths = row_lengths(columns, &encoders);
997 let total = lengths.extend_offsets(rows.offsets[write_offset], &mut rows.offsets);
998 rows.buffer.resize(total, 0);
999
1000 for ((column, field), encoder) in columns.iter().zip(self.fields.iter()).zip(encoders) {
1001 encode_column(
1003 &mut rows.buffer,
1004 &mut rows.offsets[write_offset..],
1005 column.as_ref(),
1006 field.options,
1007 &encoder,
1008 )
1009 }
1010
1011 if cfg!(debug_assertions) {
1012 assert_eq!(*rows.offsets.last().unwrap(), rows.buffer.len());
1013 rows.offsets
1014 .windows(2)
1015 .for_each(|w| assert!(w[0] <= w[1], "offsets should be monotonic"));
1016 }
1017
1018 Ok(())
1019 }
1020
1021 pub fn convert_rows<'a, I>(&self, rows: I) -> Result<Vec<ArrayRef>, ArrowError>
1029 where
1030 I: IntoIterator<Item = Row<'a>>,
1031 {
1032 let mut validate_utf8 = false;
1033 let mut rows: Vec<_> = rows
1034 .into_iter()
1035 .map(|row| {
1036 assert!(
1037 Arc::ptr_eq(&row.config.fields, &self.fields),
1038 "rows were not produced by this RowConverter"
1039 );
1040 validate_utf8 |= row.config.validate_utf8;
1041 row.data
1042 })
1043 .collect();
1044
1045 let result = unsafe { self.convert_raw(&mut rows, validate_utf8) }?;
1049
1050 if cfg!(debug_assertions) {
1051 for (i, row) in rows.iter().enumerate() {
1052 if !row.is_empty() {
1053 return Err(ArrowError::InvalidArgumentError(format!(
1054 "Codecs {codecs:?} did not consume all bytes for row {i}, remaining bytes: {row:?}",
1055 codecs = &self.codecs
1056 )));
1057 }
1058 }
1059 }
1060
1061 Ok(result)
1062 }
1063
1064 pub fn empty_rows(&self, row_capacity: usize, data_capacity: usize) -> Rows {
1093 let mut offsets = Vec::with_capacity(row_capacity.saturating_add(1));
1094 offsets.push(0);
1095
1096 Rows {
1097 offsets,
1098 buffer: Vec::with_capacity(data_capacity),
1099 config: RowConfig {
1100 fields: self.fields.clone(),
1101 validate_utf8: false,
1102 },
1103 }
1104 }
1105
1106 pub fn from_binary(&self, array: BinaryArray) -> Rows {
1133 assert_eq!(
1134 array.null_count(),
1135 0,
1136 "can't construct Rows instance from array with nulls"
1137 );
1138 let (offsets, values, _) = array.into_parts();
1139 let offsets = offsets.iter().map(|&i| i.as_usize()).collect();
1140 let buffer = values.into_vec().unwrap_or_else(|values| values.to_vec());
1142 Rows {
1143 buffer,
1144 offsets,
1145 config: RowConfig {
1146 fields: Arc::clone(&self.fields),
1147 validate_utf8: true,
1148 },
1149 }
1150 }
1151
1152 unsafe fn convert_raw(
1158 &self,
1159 rows: &mut [&[u8]],
1160 validate_utf8: bool,
1161 ) -> Result<Vec<ArrayRef>, ArrowError> {
1162 self.fields
1163 .iter()
1164 .zip(&self.codecs)
1165 .map(|(field, codec)| unsafe { decode_column(field, rows, codec, validate_utf8) })
1166 .collect()
1167 }
1168
1169 pub fn parser(&self) -> RowParser {
1171 RowParser::new(Arc::clone(&self.fields))
1172 }
1173
1174 pub fn size(&self) -> usize {
1178 std::mem::size_of::<Self>()
1179 + self.fields.iter().map(|x| x.size()).sum::<usize>()
1180 + self.codecs.capacity() * std::mem::size_of::<Codec>()
1181 + self.codecs.iter().map(Codec::size).sum::<usize>()
1182 }
1183}
1184
1185#[derive(Debug)]
1187pub struct RowParser {
1188 config: RowConfig,
1189}
1190
1191impl RowParser {
1192 fn new(fields: Arc<[SortField]>) -> Self {
1193 Self {
1194 config: RowConfig {
1195 fields,
1196 validate_utf8: true,
1197 },
1198 }
1199 }
1200
1201 pub fn parse<'a>(&'a self, bytes: &'a [u8]) -> Row<'a> {
1206 Row {
1207 data: bytes,
1208 config: &self.config,
1209 }
1210 }
1211}
1212
1213#[derive(Debug, Clone)]
1215struct RowConfig {
1216 fields: Arc<[SortField]>,
1218 validate_utf8: bool,
1220}
1221
1222#[derive(Debug)]
1226pub struct Rows {
1227 buffer: Vec<u8>,
1229 offsets: Vec<usize>,
1231 config: RowConfig,
1233}
1234
1235pub type RowLengthIter<'a> = Map<Windows<'a, usize>, fn(&'a [usize]) -> usize>;
1237
1238impl Rows {
1239 pub fn push(&mut self, row: Row<'_>) {
1241 assert!(
1242 Arc::ptr_eq(&row.config.fields, &self.config.fields),
1243 "row was not produced by this RowConverter"
1244 );
1245 self.config.validate_utf8 |= row.config.validate_utf8;
1246 self.buffer.extend_from_slice(row.data);
1247 self.offsets.push(self.buffer.len())
1248 }
1249
1250 pub fn reserve(&mut self, row_capacity: usize, data_capacity: usize) {
1252 self.buffer.reserve(data_capacity);
1253 self.offsets.reserve(row_capacity);
1254 }
1255
1256 pub fn row(&self, row: usize) -> Row<'_> {
1258 assert!(row + 1 < self.offsets.len());
1259 unsafe { self.row_unchecked(row) }
1260 }
1261
1262 pub unsafe fn row_unchecked(&self, index: usize) -> Row<'_> {
1267 let end = unsafe { self.offsets.get_unchecked(index + 1) };
1268 let start = unsafe { self.offsets.get_unchecked(index) };
1269 let data = unsafe { self.buffer.get_unchecked(*start..*end) };
1270 Row {
1271 data,
1272 config: &self.config,
1273 }
1274 }
1275
1276 pub fn row_len(&self, row: usize) -> usize {
1279 assert!(row + 1 < self.offsets.len());
1280
1281 self.offsets[row + 1] - self.offsets[row]
1282 }
1283
1284 pub fn lengths(&self) -> RowLengthIter<'_> {
1286 self.offsets.windows(2).map(|w| w[1] - w[0])
1287 }
1288
1289 pub fn clear(&mut self) {
1291 self.offsets.truncate(1);
1292 self.buffer.clear();
1293 }
1294
1295 pub fn num_rows(&self) -> usize {
1297 self.offsets.len() - 1
1298 }
1299
1300 pub fn iter(&self) -> RowsIter<'_> {
1302 self.into_iter()
1303 }
1304
1305 pub fn size(&self) -> usize {
1309 std::mem::size_of::<Self>()
1311 + self.buffer.capacity()
1312 + self.offsets.capacity() * std::mem::size_of::<usize>()
1313 }
1314
1315 pub fn try_into_binary(self) -> Result<BinaryArray, ArrowError> {
1345 if self.buffer.len() > i32::MAX as usize {
1346 return Err(ArrowError::InvalidArgumentError(format!(
1347 "{}-byte rows buffer too long to convert into a i32-indexed BinaryArray",
1348 self.buffer.len()
1349 )));
1350 }
1351 let offsets_scalar = ScalarBuffer::from_iter(self.offsets.into_iter().map(i32::usize_as));
1353 let array = unsafe {
1355 BinaryArray::new_unchecked(
1356 OffsetBuffer::new_unchecked(offsets_scalar),
1357 Buffer::from_vec(self.buffer),
1358 None,
1359 )
1360 };
1361 Ok(array)
1362 }
1363}
1364
1365impl<'a> IntoIterator for &'a Rows {
1366 type Item = Row<'a>;
1367 type IntoIter = RowsIter<'a>;
1368
1369 fn into_iter(self) -> Self::IntoIter {
1370 RowsIter {
1371 rows: self,
1372 start: 0,
1373 end: self.num_rows(),
1374 }
1375 }
1376}
1377
1378#[derive(Debug)]
1380pub struct RowsIter<'a> {
1381 rows: &'a Rows,
1382 start: usize,
1383 end: usize,
1384}
1385
1386impl<'a> Iterator for RowsIter<'a> {
1387 type Item = Row<'a>;
1388
1389 fn next(&mut self) -> Option<Self::Item> {
1390 if self.end == self.start {
1391 return None;
1392 }
1393
1394 let row = unsafe { self.rows.row_unchecked(self.start) };
1396 self.start += 1;
1397 Some(row)
1398 }
1399
1400 fn size_hint(&self) -> (usize, Option<usize>) {
1401 let len = self.len();
1402 (len, Some(len))
1403 }
1404}
1405
1406impl ExactSizeIterator for RowsIter<'_> {
1407 fn len(&self) -> usize {
1408 self.end - self.start
1409 }
1410}
1411
1412impl DoubleEndedIterator for RowsIter<'_> {
1413 fn next_back(&mut self) -> Option<Self::Item> {
1414 if self.end == self.start {
1415 return None;
1416 }
1417
1418 self.end -= 1;
1419
1420 let row = unsafe { self.rows.row_unchecked(self.end) };
1423 Some(row)
1424 }
1425}
1426
1427#[derive(Debug, Copy, Clone)]
1436pub struct Row<'a> {
1437 data: &'a [u8],
1438 config: &'a RowConfig,
1439}
1440
1441impl<'a> Row<'a> {
1442 pub fn owned(&self) -> OwnedRow {
1444 OwnedRow {
1445 data: self.data.into(),
1446 config: self.config.clone(),
1447 }
1448 }
1449
1450 pub fn data(&self) -> &'a [u8] {
1452 self.data
1453 }
1454}
1455
1456impl PartialEq for Row<'_> {
1459 #[inline]
1460 fn eq(&self, other: &Self) -> bool {
1461 self.data.eq(other.data)
1462 }
1463}
1464
1465impl Eq for Row<'_> {}
1466
1467impl PartialOrd for Row<'_> {
1468 #[inline]
1469 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1470 Some(self.cmp(other))
1471 }
1472}
1473
1474impl Ord for Row<'_> {
1475 #[inline]
1476 fn cmp(&self, other: &Self) -> Ordering {
1477 self.data.cmp(other.data)
1478 }
1479}
1480
1481impl Hash for Row<'_> {
1482 #[inline]
1483 fn hash<H: Hasher>(&self, state: &mut H) {
1484 self.data.hash(state)
1485 }
1486}
1487
1488impl AsRef<[u8]> for Row<'_> {
1489 #[inline]
1490 fn as_ref(&self) -> &[u8] {
1491 self.data
1492 }
1493}
1494
1495#[derive(Debug, Clone)]
1499pub struct OwnedRow {
1500 data: Box<[u8]>,
1501 config: RowConfig,
1502}
1503
1504impl OwnedRow {
1505 pub fn row(&self) -> Row<'_> {
1509 Row {
1510 data: &self.data,
1511 config: &self.config,
1512 }
1513 }
1514}
1515
1516impl PartialEq for OwnedRow {
1519 #[inline]
1520 fn eq(&self, other: &Self) -> bool {
1521 self.row().eq(&other.row())
1522 }
1523}
1524
1525impl Eq for OwnedRow {}
1526
1527impl PartialOrd for OwnedRow {
1528 #[inline]
1529 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1530 Some(self.cmp(other))
1531 }
1532}
1533
1534impl Ord for OwnedRow {
1535 #[inline]
1536 fn cmp(&self, other: &Self) -> Ordering {
1537 self.row().cmp(&other.row())
1538 }
1539}
1540
1541impl Hash for OwnedRow {
1542 #[inline]
1543 fn hash<H: Hasher>(&self, state: &mut H) {
1544 self.row().hash(state)
1545 }
1546}
1547
1548impl AsRef<[u8]> for OwnedRow {
1549 #[inline]
1550 fn as_ref(&self) -> &[u8] {
1551 &self.data
1552 }
1553}
1554
1555#[inline]
1557fn null_sentinel(options: SortOptions) -> u8 {
1558 match options.nulls_first {
1559 true => 0,
1560 false => 0xFF,
1561 }
1562}
1563
1564enum LengthTracker {
1566 Fixed { length: usize, num_rows: usize },
1568 Variable {
1570 fixed_length: usize,
1571 lengths: Vec<usize>,
1572 },
1573}
1574
1575impl LengthTracker {
1576 fn new(num_rows: usize) -> Self {
1577 Self::Fixed {
1578 length: 0,
1579 num_rows,
1580 }
1581 }
1582
1583 fn push_fixed(&mut self, new_length: usize) {
1585 match self {
1586 LengthTracker::Fixed { length, .. } => *length += new_length,
1587 LengthTracker::Variable { fixed_length, .. } => *fixed_length += new_length,
1588 }
1589 }
1590
1591 fn push_variable(&mut self, new_lengths: impl ExactSizeIterator<Item = usize>) {
1593 match self {
1594 LengthTracker::Fixed { length, .. } => {
1595 *self = LengthTracker::Variable {
1596 fixed_length: *length,
1597 lengths: new_lengths.collect(),
1598 }
1599 }
1600 LengthTracker::Variable { lengths, .. } => {
1601 assert_eq!(lengths.len(), new_lengths.len());
1602 lengths
1603 .iter_mut()
1604 .zip(new_lengths)
1605 .for_each(|(length, new_length)| *length += new_length);
1606 }
1607 }
1608 }
1609
1610 fn materialized(&mut self) -> &mut [usize] {
1612 if let LengthTracker::Fixed { length, num_rows } = *self {
1613 *self = LengthTracker::Variable {
1614 fixed_length: length,
1615 lengths: vec![0; num_rows],
1616 };
1617 }
1618
1619 match self {
1620 LengthTracker::Variable { lengths, .. } => lengths,
1621 LengthTracker::Fixed { .. } => unreachable!(),
1622 }
1623 }
1624
1625 fn extend_offsets(&self, initial_offset: usize, offsets: &mut Vec<usize>) -> usize {
1643 match self {
1644 LengthTracker::Fixed { length, num_rows } => {
1645 offsets.extend((0..*num_rows).map(|i| initial_offset + i * length));
1646
1647 initial_offset + num_rows * length
1648 }
1649 LengthTracker::Variable {
1650 fixed_length,
1651 lengths,
1652 } => {
1653 let mut acc = initial_offset;
1654
1655 offsets.extend(lengths.iter().map(|length| {
1656 let current = acc;
1657 acc += length + fixed_length;
1658 current
1659 }));
1660
1661 acc
1662 }
1663 }
1664 }
1665}
1666
1667fn row_lengths(cols: &[ArrayRef], encoders: &[Encoder]) -> LengthTracker {
1669 use fixed::FixedLengthEncoding;
1670
1671 let num_rows = cols.first().map(|x| x.len()).unwrap_or(0);
1672 let mut tracker = LengthTracker::new(num_rows);
1673
1674 for (array, encoder) in cols.iter().zip(encoders) {
1675 match encoder {
1676 Encoder::Stateless => {
1677 downcast_primitive_array! {
1678 array => tracker.push_fixed(fixed::encoded_len(array)),
1679 DataType::Null => tracker.push_fixed(2)
1680 DataType::Boolean => tracker.push_fixed(bool::ENCODED_LEN),
1681 DataType::Binary => push_generic_byte_array_lengths(&mut tracker, as_generic_binary_array::<i32>(array)),
1682 DataType::LargeBinary => push_generic_byte_array_lengths(&mut tracker, as_generic_binary_array::<i64>(array)),
1683 DataType::BinaryView => push_byte_view_array_lengths(&mut tracker, array.as_binary_view()),
1684 DataType::Utf8 => push_generic_byte_array_lengths(&mut tracker, array.as_string::<i32>()),
1685 DataType::LargeUtf8 => push_generic_byte_array_lengths(&mut tracker, array.as_string::<i64>()),
1686 DataType::Utf8View => push_byte_view_array_lengths(&mut tracker, array.as_string_view()),
1687 DataType::FixedSizeBinary(len) => {
1688 let len = len.to_usize().unwrap();
1689 tracker.push_fixed(1 + len)
1690 }
1691 _ => unimplemented!("unsupported data type: {}", array.data_type()),
1692 }
1693 }
1694 Encoder::Dictionary(values, null) => {
1695 downcast_dictionary_array! {
1696 array => {
1697 tracker.push_variable(
1698 array.keys().iter().map(|v| match v {
1699 Some(k) => values.row_len(k.as_usize()),
1700 None => null.data.len(),
1701 })
1702 )
1703 }
1704 _ => unreachable!(),
1705 }
1706 }
1707 Encoder::Struct(rows, null) => {
1708 let array = as_struct_array(array);
1709 if rows.num_rows() > 0 {
1710 tracker.push_variable((0..array.len()).map(|idx| match array.is_valid(idx) {
1712 true => 1 + rows.row_len(idx),
1713 false => 1 + null.data.len(),
1714 }));
1715 } else {
1716 tracker.push_variable((0..array.len()).map(|idx| match array.is_valid(idx) {
1718 true => 1,
1719 false => 1 + null.data.len(),
1720 }));
1721 }
1722 }
1723 Encoder::List(rows) => match array.data_type() {
1724 DataType::List(_) => {
1725 list::compute_lengths(tracker.materialized(), rows, as_list_array(array))
1726 }
1727 DataType::LargeList(_) => {
1728 list::compute_lengths(tracker.materialized(), rows, as_large_list_array(array))
1729 }
1730 DataType::ListView(_) => {
1731 let list_view = array.as_list_view::<i32>();
1732 let (min_offset, _) = compute_list_view_bounds(list_view);
1733 list::compute_lengths_list_view(
1734 tracker.materialized(),
1735 rows,
1736 list_view,
1737 min_offset,
1738 )
1739 }
1740 DataType::LargeListView(_) => {
1741 let list_view = array.as_list_view::<i64>();
1742 let (min_offset, _) = compute_list_view_bounds(list_view);
1743 list::compute_lengths_list_view(
1744 tracker.materialized(),
1745 rows,
1746 list_view,
1747 min_offset,
1748 )
1749 }
1750 DataType::FixedSizeList(_, _) => compute_lengths_fixed_size_list(
1751 &mut tracker,
1752 rows,
1753 as_fixed_size_list_array(array),
1754 ),
1755 _ => unreachable!(),
1756 },
1757 Encoder::RunEndEncoded(rows) => match array.data_type() {
1758 DataType::RunEndEncoded(r, _) => match r.data_type() {
1759 DataType::Int16 => run::compute_lengths(
1760 tracker.materialized(),
1761 rows,
1762 array.as_run::<Int16Type>(),
1763 ),
1764 DataType::Int32 => run::compute_lengths(
1765 tracker.materialized(),
1766 rows,
1767 array.as_run::<Int32Type>(),
1768 ),
1769 DataType::Int64 => run::compute_lengths(
1770 tracker.materialized(),
1771 rows,
1772 array.as_run::<Int64Type>(),
1773 ),
1774 _ => unreachable!("Unsupported run end index type: {r:?}"),
1775 },
1776 _ => unreachable!(),
1777 },
1778 Encoder::Union {
1779 child_rows,
1780 field_to_type_ids,
1781 type_ids,
1782 offsets,
1783 } => {
1784 let union_array = array
1785 .as_any()
1786 .downcast_ref::<UnionArray>()
1787 .expect("expected UnionArray");
1788
1789 let mut type_id_to_field_idx = [0usize; 128];
1790 for (field_idx, &type_id) in field_to_type_ids.iter().enumerate() {
1791 type_id_to_field_idx[type_id as usize] = field_idx;
1792 }
1793
1794 let lengths = (0..union_array.len()).map(|i| {
1795 let type_id = type_ids[i];
1796 let field_idx = type_id_to_field_idx[type_id as usize];
1797 let child_row_i = offsets.as_ref().map(|o| o[i] as usize).unwrap_or(i);
1798 let child_row_len = child_rows[field_idx].row_len(child_row_i);
1799
1800 1 + child_row_len
1802 });
1803
1804 tracker.push_variable(lengths);
1805 }
1806 }
1807 }
1808
1809 tracker
1810}
1811
1812fn push_generic_byte_array_lengths<T: ByteArrayType>(
1814 tracker: &mut LengthTracker,
1815 array: &GenericByteArray<T>,
1816) {
1817 if let Some(nulls) = array.nulls().filter(|n| n.null_count() > 0) {
1818 tracker.push_variable(
1819 array
1820 .offsets()
1821 .lengths()
1822 .zip(nulls.iter())
1823 .map(|(length, is_valid)| if is_valid { Some(length) } else { None })
1824 .map(variable::padded_length),
1825 )
1826 } else {
1827 tracker.push_variable(
1828 array
1829 .offsets()
1830 .lengths()
1831 .map(variable::non_null_padded_length),
1832 )
1833 }
1834}
1835
1836fn push_byte_view_array_lengths<T: ByteViewType>(
1838 tracker: &mut LengthTracker,
1839 array: &GenericByteViewArray<T>,
1840) {
1841 if let Some(nulls) = array.nulls().filter(|n| n.null_count() > 0) {
1842 tracker.push_variable(
1843 array
1844 .lengths()
1845 .zip(nulls.iter())
1846 .map(|(length, is_valid)| {
1847 if is_valid {
1848 Some(length as usize)
1849 } else {
1850 None
1851 }
1852 })
1853 .map(variable::padded_length),
1854 )
1855 } else {
1856 tracker.push_variable(
1857 array
1858 .lengths()
1859 .map(|len| variable::padded_length(Some(len as usize))),
1860 )
1861 }
1862}
1863
1864fn encode_column(
1866 data: &mut [u8],
1867 offsets: &mut [usize],
1868 column: &dyn Array,
1869 opts: SortOptions,
1870 encoder: &Encoder<'_>,
1871) {
1872 match encoder {
1873 Encoder::Stateless => {
1874 downcast_primitive_array! {
1875 column => {
1876 if let Some(nulls) = column.nulls().filter(|n| n.null_count() > 0){
1877 fixed::encode(data, offsets, column.values(), nulls, opts)
1878 } else {
1879 fixed::encode_not_null(data, offsets, column.values(), opts)
1880 }
1881 }
1882 DataType::Null => {
1883 for offset in offsets.iter_mut().skip(1) {
1884 variable::encode_null_value(&mut data[*offset..], opts);
1885 *offset += 2;
1886 }
1887 }
1888 DataType::Boolean => {
1889 if let Some(nulls) = column.nulls().filter(|n| n.null_count() > 0){
1890 fixed::encode_boolean(data, offsets, column.as_boolean().values(), nulls, opts)
1891 } else {
1892 fixed::encode_boolean_not_null(data, offsets, column.as_boolean().values(), opts)
1893 }
1894 }
1895 DataType::Binary => {
1896 variable::encode_generic_byte_array(data, offsets, as_generic_binary_array::<i32>(column), opts)
1897 }
1898 DataType::BinaryView => {
1899 variable::encode(data, offsets, column.as_binary_view().iter(), opts)
1900 }
1901 DataType::LargeBinary => {
1902 variable::encode_generic_byte_array(data, offsets, as_generic_binary_array::<i64>(column), opts)
1903 }
1904 DataType::Utf8 => variable::encode_generic_byte_array(
1905 data, offsets,
1906 column.as_string::<i32>(),
1907 opts,
1908 ),
1909 DataType::LargeUtf8 => variable::encode_generic_byte_array(
1910 data, offsets,
1911 column.as_string::<i64>(),
1912 opts,
1913 ),
1914 DataType::Utf8View => variable::encode(
1915 data, offsets,
1916 column.as_string_view().iter().map(|x| x.map(|x| x.as_bytes())),
1917 opts,
1918 ),
1919 DataType::FixedSizeBinary(_) => {
1920 let array = column.as_any().downcast_ref().unwrap();
1921 fixed::encode_fixed_size_binary(data, offsets, array, opts)
1922 }
1923 _ => unimplemented!("unsupported data type: {}", column.data_type()),
1924 }
1925 }
1926 Encoder::Dictionary(values, nulls) => {
1927 downcast_dictionary_array! {
1928 column => encode_dictionary_values(data, offsets, column, values, nulls),
1929 _ => unreachable!()
1930 }
1931 }
1932 Encoder::Struct(rows, null) => {
1933 fn struct_encode_helper<const NO_CHILD_FIELDS: bool>(
1934 array: &StructArray,
1935 offsets: &mut [usize],
1936 null_sentinel: u8,
1937 rows: &Rows,
1938 null: &Row<'_>,
1939 data: &mut [u8],
1940 ) {
1941 let empty_row = Row {
1942 data: &[],
1943 config: &rows.config,
1944 };
1945
1946 offsets
1947 .iter_mut()
1948 .skip(1)
1949 .enumerate()
1950 .for_each(|(idx, offset)| {
1951 let (row, sentinel) = match array.is_valid(idx) {
1952 true => (
1953 if NO_CHILD_FIELDS {
1954 empty_row
1955 } else {
1956 rows.row(idx)
1957 },
1958 0x01,
1959 ),
1960 false => (*null, null_sentinel),
1961 };
1962 let end_offset = *offset + 1 + row.as_ref().len();
1963 data[*offset] = sentinel;
1964 data[*offset + 1..end_offset].copy_from_slice(row.as_ref());
1965 *offset = end_offset;
1966 })
1967 }
1968
1969 let array = as_struct_array(column);
1970 let null_sentinel = null_sentinel(opts);
1971 if rows.num_rows() == 0 {
1972 struct_encode_helper::<true>(array, offsets, null_sentinel, rows, null, data);
1974 } else {
1975 struct_encode_helper::<false>(array, offsets, null_sentinel, rows, null, data);
1976 }
1977 }
1978 Encoder::List(rows) => match column.data_type() {
1979 DataType::List(_) => list::encode(data, offsets, rows, opts, as_list_array(column)),
1980 DataType::LargeList(_) => {
1981 list::encode(data, offsets, rows, opts, as_large_list_array(column))
1982 }
1983 DataType::ListView(_) => {
1984 let list_view = column.as_list_view::<i32>();
1985 let (min_offset, _) = compute_list_view_bounds(list_view);
1986 list::encode_list_view(data, offsets, rows, opts, list_view, min_offset)
1987 }
1988 DataType::LargeListView(_) => {
1989 let list_view = column.as_list_view::<i64>();
1990 let (min_offset, _) = compute_list_view_bounds(list_view);
1991 list::encode_list_view(data, offsets, rows, opts, list_view, min_offset)
1992 }
1993 DataType::FixedSizeList(_, _) => {
1994 encode_fixed_size_list(data, offsets, rows, opts, as_fixed_size_list_array(column))
1995 }
1996 _ => unreachable!(),
1997 },
1998 Encoder::RunEndEncoded(rows) => match column.data_type() {
1999 DataType::RunEndEncoded(r, _) => match r.data_type() {
2000 DataType::Int16 => {
2001 run::encode(data, offsets, rows, opts, column.as_run::<Int16Type>())
2002 }
2003 DataType::Int32 => {
2004 run::encode(data, offsets, rows, opts, column.as_run::<Int32Type>())
2005 }
2006 DataType::Int64 => {
2007 run::encode(data, offsets, rows, opts, column.as_run::<Int64Type>())
2008 }
2009 _ => unreachable!("Unsupported run end index type: {r:?}"),
2010 },
2011 _ => unreachable!(),
2012 },
2013 Encoder::Union {
2014 child_rows,
2015 field_to_type_ids,
2016 type_ids,
2017 offsets: offsets_buf,
2018 } => {
2019 let mut type_id_to_field_idx = [0usize; 128];
2020 for (field_idx, &type_id) in field_to_type_ids.iter().enumerate() {
2021 type_id_to_field_idx[type_id as usize] = field_idx;
2022 }
2023
2024 offsets
2025 .iter_mut()
2026 .skip(1)
2027 .enumerate()
2028 .for_each(|(i, offset)| {
2029 let type_id = type_ids[i];
2030 let field_idx = type_id_to_field_idx[type_id as usize];
2031
2032 let child_row_idx = offsets_buf.as_ref().map(|o| o[i] as usize).unwrap_or(i);
2033 let child_row = child_rows[field_idx].row(child_row_idx);
2034 let child_bytes = child_row.as_ref();
2035
2036 let type_id_byte = if opts.descending {
2037 !(type_id as u8)
2038 } else {
2039 type_id as u8
2040 };
2041 data[*offset] = type_id_byte;
2042
2043 let child_start = *offset + 1;
2044 let child_end = child_start + child_bytes.len();
2045 data[child_start..child_end].copy_from_slice(child_bytes);
2046
2047 *offset = child_end;
2048 });
2049 }
2050 }
2051}
2052
2053pub fn encode_dictionary_values<K: ArrowDictionaryKeyType>(
2055 data: &mut [u8],
2056 offsets: &mut [usize],
2057 column: &DictionaryArray<K>,
2058 values: &Rows,
2059 null: &Row<'_>,
2060) {
2061 for (offset, k) in offsets.iter_mut().skip(1).zip(column.keys()) {
2062 let row = match k {
2063 Some(k) => values.row(k.as_usize()).data,
2064 None => null.data,
2065 };
2066 let end_offset = *offset + row.len();
2067 data[*offset..end_offset].copy_from_slice(row);
2068 *offset = end_offset;
2069 }
2070}
2071
2072macro_rules! decode_primitive_helper {
2073 ($t:ty, $rows:ident, $data_type:ident, $options:ident) => {
2074 Arc::new(decode_primitive::<$t>($rows, $data_type, $options))
2075 };
2076}
2077
2078unsafe fn decode_column(
2084 field: &SortField,
2085 rows: &mut [&[u8]],
2086 codec: &Codec,
2087 validate_utf8: bool,
2088) -> Result<ArrayRef, ArrowError> {
2089 let options = field.options;
2090
2091 let array: ArrayRef = match codec {
2092 Codec::Stateless => {
2093 let data_type = field.data_type.clone();
2094 downcast_primitive! {
2095 data_type => (decode_primitive_helper, rows, data_type, options),
2096 DataType::Null => {
2097 variable::decode_null_value(rows, options);
2098 Arc::new(NullArray::new(rows.len()))
2099 }
2100 DataType::Boolean => Arc::new(decode_bool(rows, options)),
2101 DataType::Binary => Arc::new(decode_binary::<i32>(rows, options)),
2102 DataType::LargeBinary => Arc::new(decode_binary::<i64>(rows, options)),
2103 DataType::BinaryView => Arc::new(decode_binary_view(rows, options)),
2104 DataType::FixedSizeBinary(size) => Arc::new(decode_fixed_size_binary(rows, size, options)),
2105 DataType::Utf8 => Arc::new(unsafe{ decode_string::<i32>(rows, options, validate_utf8) }),
2106 DataType::LargeUtf8 => Arc::new(unsafe { decode_string::<i64>(rows, options, validate_utf8) }),
2107 DataType::Utf8View => Arc::new(unsafe { decode_string_view(rows, options, validate_utf8) }),
2108 _ => return Err(ArrowError::NotYetImplemented(format!("unsupported data type: {data_type}" )))
2109 }
2110 }
2111 Codec::Dictionary(converter, _) => {
2112 let cols = unsafe { converter.convert_raw(rows, validate_utf8) }?;
2113 cols.into_iter().next().unwrap()
2114 }
2115 Codec::Struct(converter, _) => {
2116 let (null_count, nulls) = fixed::decode_nulls(rows);
2117 rows.iter_mut().for_each(|row| *row = &row[1..]);
2118 let children = unsafe { converter.convert_raw(rows, validate_utf8) }?;
2119
2120 let child_data: Vec<ArrayData> = children.iter().map(|c| c.to_data()).collect();
2121 let corrected_fields: Vec<Field> = match &field.data_type {
2124 DataType::Struct(struct_fields) => struct_fields
2125 .iter()
2126 .zip(child_data.iter())
2127 .map(|(orig_field, child_array)| {
2128 orig_field
2129 .as_ref()
2130 .clone()
2131 .with_data_type(child_array.data_type().clone())
2132 })
2133 .collect(),
2134 _ => unreachable!("Only Struct types should be corrected here"),
2135 };
2136 let corrected_struct_type = DataType::Struct(corrected_fields.into());
2137 let builder = ArrayDataBuilder::new(corrected_struct_type)
2138 .len(rows.len())
2139 .null_count(null_count)
2140 .null_bit_buffer(Some(nulls))
2141 .child_data(child_data);
2142
2143 Arc::new(StructArray::from(unsafe { builder.build_unchecked() }))
2144 }
2145 Codec::List(converter) => match &field.data_type {
2146 DataType::List(_) => {
2147 Arc::new(unsafe { list::decode::<i32>(converter, rows, field, validate_utf8) }?)
2148 }
2149 DataType::LargeList(_) => {
2150 Arc::new(unsafe { list::decode::<i64>(converter, rows, field, validate_utf8) }?)
2151 }
2152 DataType::ListView(_) => Arc::new(unsafe {
2153 list::decode_list_view::<i32>(converter, rows, field, validate_utf8)
2154 }?),
2155 DataType::LargeListView(_) => Arc::new(unsafe {
2156 list::decode_list_view::<i64>(converter, rows, field, validate_utf8)
2157 }?),
2158 DataType::FixedSizeList(_, value_length) => Arc::new(unsafe {
2159 list::decode_fixed_size_list(
2160 converter,
2161 rows,
2162 field,
2163 validate_utf8,
2164 value_length.as_usize(),
2165 )
2166 }?),
2167 _ => unreachable!(),
2168 },
2169 Codec::RunEndEncoded(converter) => match &field.data_type {
2170 DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() {
2171 DataType::Int16 => Arc::new(unsafe {
2172 run::decode::<Int16Type>(converter, rows, field, validate_utf8)
2173 }?),
2174 DataType::Int32 => Arc::new(unsafe {
2175 run::decode::<Int32Type>(converter, rows, field, validate_utf8)
2176 }?),
2177 DataType::Int64 => Arc::new(unsafe {
2178 run::decode::<Int64Type>(converter, rows, field, validate_utf8)
2179 }?),
2180 _ => unreachable!(),
2181 },
2182 _ => unreachable!(),
2183 },
2184 Codec::Union(converters, field_to_type_ids, null_rows) => {
2185 let len = rows.len();
2186
2187 let DataType::Union(union_fields, mode) = &field.data_type else {
2188 unreachable!()
2189 };
2190
2191 let mut type_id_to_field_idx = [0usize; 128];
2192 for (field_idx, &type_id) in field_to_type_ids.iter().enumerate() {
2193 type_id_to_field_idx[type_id as usize] = field_idx;
2194 }
2195
2196 let mut type_ids = Vec::with_capacity(len);
2197 let mut rows_by_field: Vec<Vec<(usize, &[u8])>> = vec![Vec::new(); converters.len()];
2198
2199 for (idx, row) in rows.iter_mut().enumerate() {
2200 let type_id_byte = {
2201 let id = row[0];
2202 if options.descending { !id } else { id }
2203 };
2204
2205 let type_id = type_id_byte as i8;
2206 type_ids.push(type_id);
2207
2208 let field_idx = type_id_to_field_idx[type_id as usize];
2209
2210 let child_row = &row[1..];
2211 rows_by_field[field_idx].push((idx, child_row));
2212 }
2213
2214 let mut child_arrays: Vec<ArrayRef> = Vec::with_capacity(converters.len());
2215 let mut offsets = (*mode == UnionMode::Dense).then(|| Vec::with_capacity(len));
2216
2217 for (field_idx, converter) in converters.iter().enumerate() {
2218 let field_rows = &rows_by_field[field_idx];
2219
2220 match &mode {
2221 UnionMode::Dense => {
2222 if field_rows.is_empty() {
2223 let (_, field) = union_fields.iter().nth(field_idx).unwrap();
2224 child_arrays.push(arrow_array::new_empty_array(field.data_type()));
2225 continue;
2226 }
2227
2228 let mut child_data = field_rows
2229 .iter()
2230 .map(|(_, bytes)| *bytes)
2231 .collect::<Vec<_>>();
2232
2233 let child_array =
2234 unsafe { converter.convert_raw(&mut child_data, validate_utf8) }?;
2235
2236 for ((row_idx, original_bytes), remaining_bytes) in
2238 field_rows.iter().zip(child_data)
2239 {
2240 let consumed_length = 1 + original_bytes.len() - remaining_bytes.len();
2241 rows[*row_idx] = &rows[*row_idx][consumed_length..];
2242 }
2243
2244 child_arrays.push(child_array.into_iter().next().unwrap());
2245 }
2246 UnionMode::Sparse => {
2247 let mut sparse_data: Vec<&[u8]> = Vec::with_capacity(len);
2248 let mut field_row_iter = field_rows.iter().peekable();
2249 let null_row_bytes: &[u8] = &null_rows[field_idx].data;
2250
2251 for idx in 0..len {
2252 if let Some((next_idx, bytes)) = field_row_iter.peek() {
2253 if *next_idx == idx {
2254 sparse_data.push(*bytes);
2255
2256 field_row_iter.next();
2257 continue;
2258 }
2259 }
2260 sparse_data.push(null_row_bytes);
2261 }
2262
2263 let child_array =
2264 unsafe { converter.convert_raw(&mut sparse_data, validate_utf8) }?;
2265
2266 for (row_idx, child_row) in field_rows.iter() {
2268 let remaining_len = sparse_data[*row_idx].len();
2269 let consumed_length = 1 + child_row.len() - remaining_len;
2270 rows[*row_idx] = &rows[*row_idx][consumed_length..];
2271 }
2272
2273 child_arrays.push(child_array.into_iter().next().unwrap());
2274 }
2275 }
2276 }
2277
2278 if let Some(ref mut offsets_vec) = offsets {
2280 let mut count = vec![0i32; converters.len()];
2281 for type_id in &type_ids {
2282 let field_idx = *type_id as usize;
2283 offsets_vec.push(count[field_idx]);
2284
2285 count[field_idx] += 1;
2286 }
2287 }
2288
2289 let type_ids_buffer = ScalarBuffer::from(type_ids);
2290 let offsets_buffer = offsets.map(ScalarBuffer::from);
2291
2292 let union_array = UnionArray::try_new(
2293 union_fields.clone(),
2294 type_ids_buffer,
2295 offsets_buffer,
2296 child_arrays,
2297 )?;
2298
2299 Arc::new(union_array)
2302 }
2303 };
2304 Ok(array)
2305}
2306
2307#[cfg(test)]
2308mod tests {
2309 use arrow_array::builder::*;
2310 use arrow_array::types::*;
2311 use arrow_array::*;
2312 use arrow_buffer::{Buffer, OffsetBuffer};
2313 use arrow_buffer::{NullBuffer, i256};
2314 use arrow_cast::display::{ArrayFormatter, FormatOptions};
2315 use arrow_ord::sort::{LexicographicalComparator, SortColumn};
2316 use rand::distr::uniform::SampleUniform;
2317 use rand::distr::{Distribution, StandardUniform};
2318 use rand::prelude::StdRng;
2319 use rand::{Rng, RngCore, SeedableRng};
2320
2321 use super::*;
2322
2323 #[test]
2324 fn test_fixed_width() {
2325 let cols = [
2326 Arc::new(Int16Array::from_iter([
2327 Some(1),
2328 Some(2),
2329 None,
2330 Some(-5),
2331 Some(2),
2332 Some(2),
2333 Some(0),
2334 ])) as ArrayRef,
2335 Arc::new(Float32Array::from_iter([
2336 Some(1.3),
2337 Some(2.5),
2338 None,
2339 Some(4.),
2340 Some(0.1),
2341 Some(-4.),
2342 Some(-0.),
2343 ])) as ArrayRef,
2344 ];
2345
2346 let converter = RowConverter::new(vec![
2347 SortField::new(DataType::Int16),
2348 SortField::new(DataType::Float32),
2349 ])
2350 .unwrap();
2351 let rows = converter.convert_columns(&cols).unwrap();
2352
2353 assert_eq!(rows.offsets, &[0, 8, 16, 24, 32, 40, 48, 56]);
2354 assert_eq!(
2355 rows.buffer,
2356 &[
2357 1, 128, 1, 1, 191, 166, 102, 102, 1, 128, 2, 1, 192, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 127, 251, 1, 192, 128, 0, 0, 1, 128, 2, 1, 189, 204, 204, 205, 1, 128, 2, 1, 63, 127, 255, 255, 1, 128, 0, 1, 127, 255, 255, 255 ]
2372 );
2373
2374 assert!(rows.row(3) < rows.row(6));
2375 assert!(rows.row(0) < rows.row(1));
2376 assert!(rows.row(3) < rows.row(0));
2377 assert!(rows.row(4) < rows.row(1));
2378 assert!(rows.row(5) < rows.row(4));
2379
2380 let back = converter.convert_rows(&rows).unwrap();
2381 for (expected, actual) in cols.iter().zip(&back) {
2382 assert_eq!(expected, actual);
2383 }
2384 }
2385
2386 #[test]
2387 fn test_decimal32() {
2388 let converter = RowConverter::new(vec![SortField::new(DataType::Decimal32(
2389 DECIMAL32_MAX_PRECISION,
2390 7,
2391 ))])
2392 .unwrap();
2393 let col = Arc::new(
2394 Decimal32Array::from_iter([
2395 None,
2396 Some(i32::MIN),
2397 Some(-13),
2398 Some(46_i32),
2399 Some(5456_i32),
2400 Some(i32::MAX),
2401 ])
2402 .with_precision_and_scale(9, 7)
2403 .unwrap(),
2404 ) as ArrayRef;
2405
2406 let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
2407 for i in 0..rows.num_rows() - 1 {
2408 assert!(rows.row(i) < rows.row(i + 1));
2409 }
2410
2411 let back = converter.convert_rows(&rows).unwrap();
2412 assert_eq!(back.len(), 1);
2413 assert_eq!(col.as_ref(), back[0].as_ref())
2414 }
2415
2416 #[test]
2417 fn test_decimal64() {
2418 let converter = RowConverter::new(vec![SortField::new(DataType::Decimal64(
2419 DECIMAL64_MAX_PRECISION,
2420 7,
2421 ))])
2422 .unwrap();
2423 let col = Arc::new(
2424 Decimal64Array::from_iter([
2425 None,
2426 Some(i64::MIN),
2427 Some(-13),
2428 Some(46_i64),
2429 Some(5456_i64),
2430 Some(i64::MAX),
2431 ])
2432 .with_precision_and_scale(18, 7)
2433 .unwrap(),
2434 ) as ArrayRef;
2435
2436 let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
2437 for i in 0..rows.num_rows() - 1 {
2438 assert!(rows.row(i) < rows.row(i + 1));
2439 }
2440
2441 let back = converter.convert_rows(&rows).unwrap();
2442 assert_eq!(back.len(), 1);
2443 assert_eq!(col.as_ref(), back[0].as_ref())
2444 }
2445
2446 #[test]
2447 fn test_decimal128() {
2448 let converter = RowConverter::new(vec![SortField::new(DataType::Decimal128(
2449 DECIMAL128_MAX_PRECISION,
2450 7,
2451 ))])
2452 .unwrap();
2453 let col = Arc::new(
2454 Decimal128Array::from_iter([
2455 None,
2456 Some(i128::MIN),
2457 Some(-13),
2458 Some(46_i128),
2459 Some(5456_i128),
2460 Some(i128::MAX),
2461 ])
2462 .with_precision_and_scale(38, 7)
2463 .unwrap(),
2464 ) as ArrayRef;
2465
2466 let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
2467 for i in 0..rows.num_rows() - 1 {
2468 assert!(rows.row(i) < rows.row(i + 1));
2469 }
2470
2471 let back = converter.convert_rows(&rows).unwrap();
2472 assert_eq!(back.len(), 1);
2473 assert_eq!(col.as_ref(), back[0].as_ref())
2474 }
2475
2476 #[test]
2477 fn test_decimal256() {
2478 let converter = RowConverter::new(vec![SortField::new(DataType::Decimal256(
2479 DECIMAL256_MAX_PRECISION,
2480 7,
2481 ))])
2482 .unwrap();
2483 let col = Arc::new(
2484 Decimal256Array::from_iter([
2485 None,
2486 Some(i256::MIN),
2487 Some(i256::from_parts(0, -1)),
2488 Some(i256::from_parts(u128::MAX, -1)),
2489 Some(i256::from_parts(u128::MAX, 0)),
2490 Some(i256::from_parts(0, 46_i128)),
2491 Some(i256::from_parts(5, 46_i128)),
2492 Some(i256::MAX),
2493 ])
2494 .with_precision_and_scale(DECIMAL256_MAX_PRECISION, 7)
2495 .unwrap(),
2496 ) as ArrayRef;
2497
2498 let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
2499 for i in 0..rows.num_rows() - 1 {
2500 assert!(rows.row(i) < rows.row(i + 1));
2501 }
2502
2503 let back = converter.convert_rows(&rows).unwrap();
2504 assert_eq!(back.len(), 1);
2505 assert_eq!(col.as_ref(), back[0].as_ref())
2506 }
2507
2508 #[test]
2509 fn test_bool() {
2510 let converter = RowConverter::new(vec![SortField::new(DataType::Boolean)]).unwrap();
2511
2512 let col = Arc::new(BooleanArray::from_iter([None, Some(false), Some(true)])) as ArrayRef;
2513
2514 let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
2515 assert!(rows.row(2) > rows.row(1));
2516 assert!(rows.row(2) > rows.row(0));
2517 assert!(rows.row(1) > rows.row(0));
2518
2519 let cols = converter.convert_rows(&rows).unwrap();
2520 assert_eq!(&cols[0], &col);
2521
2522 let converter = RowConverter::new(vec![SortField::new_with_options(
2523 DataType::Boolean,
2524 SortOptions::default().desc().with_nulls_first(false),
2525 )])
2526 .unwrap();
2527
2528 let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
2529 assert!(rows.row(2) < rows.row(1));
2530 assert!(rows.row(2) < rows.row(0));
2531 assert!(rows.row(1) < rows.row(0));
2532 let cols = converter.convert_rows(&rows).unwrap();
2533 assert_eq!(&cols[0], &col);
2534 }
2535
2536 #[test]
2537 fn test_timezone() {
2538 let a =
2539 TimestampNanosecondArray::from(vec![1, 2, 3, 4, 5]).with_timezone("+01:00".to_string());
2540 let d = a.data_type().clone();
2541
2542 let converter = RowConverter::new(vec![SortField::new(a.data_type().clone())]).unwrap();
2543 let rows = converter.convert_columns(&[Arc::new(a) as _]).unwrap();
2544 let back = converter.convert_rows(&rows).unwrap();
2545 assert_eq!(back.len(), 1);
2546 assert_eq!(back[0].data_type(), &d);
2547
2548 let mut a = PrimitiveDictionaryBuilder::<Int32Type, TimestampNanosecondType>::new();
2550 a.append(34).unwrap();
2551 a.append_null();
2552 a.append(345).unwrap();
2553
2554 let dict = a.finish();
2556 let values = TimestampNanosecondArray::from(dict.values().to_data());
2557 let dict_with_tz = dict.with_values(Arc::new(values.with_timezone("+02:00")));
2558 let v = DataType::Timestamp(TimeUnit::Nanosecond, Some("+02:00".into()));
2559 let d = DataType::Dictionary(Box::new(DataType::Int32), Box::new(v.clone()));
2560
2561 assert_eq!(dict_with_tz.data_type(), &d);
2562 let converter = RowConverter::new(vec![SortField::new(d.clone())]).unwrap();
2563 let rows = converter
2564 .convert_columns(&[Arc::new(dict_with_tz) as _])
2565 .unwrap();
2566 let back = converter.convert_rows(&rows).unwrap();
2567 assert_eq!(back.len(), 1);
2568 assert_eq!(back[0].data_type(), &v);
2569 }
2570
2571 #[test]
2572 fn test_null_encoding() {
2573 let col = Arc::new(NullArray::new(10));
2574 let converter = RowConverter::new(vec![SortField::new(DataType::Null)]).unwrap();
2575 let rows = converter.convert_columns(&[col]).unwrap();
2576 assert_eq!(rows.num_rows(), 10);
2577 assert_eq!(rows.row(1).data.len(), 2);
2579 }
2580
2581 #[test]
2582 fn test_variable_width() {
2583 let col = Arc::new(StringArray::from_iter([
2584 Some("hello"),
2585 Some("he"),
2586 None,
2587 Some("foo"),
2588 Some(""),
2589 ])) as ArrayRef;
2590
2591 let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
2592 let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
2593
2594 assert!(rows.row(1) < rows.row(0));
2595 assert!(rows.row(2) < rows.row(4));
2596 assert!(rows.row(3) < rows.row(0));
2597 assert!(rows.row(3) < rows.row(1));
2598
2599 let cols = converter.convert_rows(&rows).unwrap();
2600 assert_eq!(&cols[0], &col);
2601
2602 let col = Arc::new(BinaryArray::from_iter([
2603 None,
2604 Some(vec![0_u8; 0]),
2605 Some(vec![0_u8; 6]),
2606 Some(vec![0_u8; variable::MINI_BLOCK_SIZE]),
2607 Some(vec![0_u8; variable::MINI_BLOCK_SIZE + 1]),
2608 Some(vec![0_u8; variable::BLOCK_SIZE]),
2609 Some(vec![0_u8; variable::BLOCK_SIZE + 1]),
2610 Some(vec![1_u8; 6]),
2611 Some(vec![1_u8; variable::MINI_BLOCK_SIZE]),
2612 Some(vec![1_u8; variable::MINI_BLOCK_SIZE + 1]),
2613 Some(vec![1_u8; variable::BLOCK_SIZE]),
2614 Some(vec![1_u8; variable::BLOCK_SIZE + 1]),
2615 Some(vec![0xFF_u8; 6]),
2616 Some(vec![0xFF_u8; variable::MINI_BLOCK_SIZE]),
2617 Some(vec![0xFF_u8; variable::MINI_BLOCK_SIZE + 1]),
2618 Some(vec![0xFF_u8; variable::BLOCK_SIZE]),
2619 Some(vec![0xFF_u8; variable::BLOCK_SIZE + 1]),
2620 ])) as ArrayRef;
2621
2622 let converter = RowConverter::new(vec![SortField::new(DataType::Binary)]).unwrap();
2623 let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
2624
2625 for i in 0..rows.num_rows() {
2626 for j in i + 1..rows.num_rows() {
2627 assert!(
2628 rows.row(i) < rows.row(j),
2629 "{} < {} - {:?} < {:?}",
2630 i,
2631 j,
2632 rows.row(i),
2633 rows.row(j)
2634 );
2635 }
2636 }
2637
2638 let cols = converter.convert_rows(&rows).unwrap();
2639 assert_eq!(&cols[0], &col);
2640
2641 let converter = RowConverter::new(vec![SortField::new_with_options(
2642 DataType::Binary,
2643 SortOptions::default().desc().with_nulls_first(false),
2644 )])
2645 .unwrap();
2646 let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
2647
2648 for i in 0..rows.num_rows() {
2649 for j in i + 1..rows.num_rows() {
2650 assert!(
2651 rows.row(i) > rows.row(j),
2652 "{} > {} - {:?} > {:?}",
2653 i,
2654 j,
2655 rows.row(i),
2656 rows.row(j)
2657 );
2658 }
2659 }
2660
2661 let cols = converter.convert_rows(&rows).unwrap();
2662 assert_eq!(&cols[0], &col);
2663 }
2664
2665 fn dictionary_eq(a: &dyn Array, b: &dyn Array) {
2667 match b.data_type() {
2668 DataType::Dictionary(_, v) => {
2669 assert_eq!(a.data_type(), v.as_ref());
2670 let b = arrow_cast::cast(b, v).unwrap();
2671 assert_eq!(a, b.as_ref())
2672 }
2673 _ => assert_eq!(a, b),
2674 }
2675 }
2676
2677 #[test]
2678 fn test_string_dictionary() {
2679 let a = Arc::new(DictionaryArray::<Int32Type>::from_iter([
2680 Some("foo"),
2681 Some("hello"),
2682 Some("he"),
2683 None,
2684 Some("hello"),
2685 Some(""),
2686 Some("hello"),
2687 Some("hello"),
2688 ])) as ArrayRef;
2689
2690 let field = SortField::new(a.data_type().clone());
2691 let converter = RowConverter::new(vec![field]).unwrap();
2692 let rows_a = converter.convert_columns(&[Arc::clone(&a)]).unwrap();
2693
2694 assert!(rows_a.row(3) < rows_a.row(5));
2695 assert!(rows_a.row(2) < rows_a.row(1));
2696 assert!(rows_a.row(0) < rows_a.row(1));
2697 assert!(rows_a.row(3) < rows_a.row(0));
2698
2699 assert_eq!(rows_a.row(1), rows_a.row(4));
2700 assert_eq!(rows_a.row(1), rows_a.row(6));
2701 assert_eq!(rows_a.row(1), rows_a.row(7));
2702
2703 let cols = converter.convert_rows(&rows_a).unwrap();
2704 dictionary_eq(&cols[0], &a);
2705
2706 let b = Arc::new(DictionaryArray::<Int32Type>::from_iter([
2707 Some("hello"),
2708 None,
2709 Some("cupcakes"),
2710 ])) as ArrayRef;
2711
2712 let rows_b = converter.convert_columns(&[Arc::clone(&b)]).unwrap();
2713 assert_eq!(rows_a.row(1), rows_b.row(0));
2714 assert_eq!(rows_a.row(3), rows_b.row(1));
2715 assert!(rows_b.row(2) < rows_a.row(0));
2716
2717 let cols = converter.convert_rows(&rows_b).unwrap();
2718 dictionary_eq(&cols[0], &b);
2719
2720 let converter = RowConverter::new(vec![SortField::new_with_options(
2721 a.data_type().clone(),
2722 SortOptions::default().desc().with_nulls_first(false),
2723 )])
2724 .unwrap();
2725
2726 let rows_c = converter.convert_columns(&[Arc::clone(&a)]).unwrap();
2727 assert!(rows_c.row(3) > rows_c.row(5));
2728 assert!(rows_c.row(2) > rows_c.row(1));
2729 assert!(rows_c.row(0) > rows_c.row(1));
2730 assert!(rows_c.row(3) > rows_c.row(0));
2731
2732 let cols = converter.convert_rows(&rows_c).unwrap();
2733 dictionary_eq(&cols[0], &a);
2734
2735 let converter = RowConverter::new(vec![SortField::new_with_options(
2736 a.data_type().clone(),
2737 SortOptions::default().desc().with_nulls_first(true),
2738 )])
2739 .unwrap();
2740
2741 let rows_c = converter.convert_columns(&[Arc::clone(&a)]).unwrap();
2742 assert!(rows_c.row(3) < rows_c.row(5));
2743 assert!(rows_c.row(2) > rows_c.row(1));
2744 assert!(rows_c.row(0) > rows_c.row(1));
2745 assert!(rows_c.row(3) < rows_c.row(0));
2746
2747 let cols = converter.convert_rows(&rows_c).unwrap();
2748 dictionary_eq(&cols[0], &a);
2749 }
2750
2751 #[test]
2752 fn test_struct() {
2753 let a = Arc::new(Int32Array::from(vec![1, 1, 2, 2])) as ArrayRef;
2755 let a_f = Arc::new(Field::new("int", DataType::Int32, false));
2756 let u = Arc::new(StringArray::from(vec!["a", "b", "c", "d"])) as ArrayRef;
2757 let u_f = Arc::new(Field::new("s", DataType::Utf8, false));
2758 let s1 = Arc::new(StructArray::from(vec![(a_f, a), (u_f, u)])) as ArrayRef;
2759
2760 let sort_fields = vec![SortField::new(s1.data_type().clone())];
2761 let converter = RowConverter::new(sort_fields).unwrap();
2762 let r1 = converter.convert_columns(&[Arc::clone(&s1)]).unwrap();
2763
2764 for (a, b) in r1.iter().zip(r1.iter().skip(1)) {
2765 assert!(a < b);
2766 }
2767
2768 let back = converter.convert_rows(&r1).unwrap();
2769 assert_eq!(back.len(), 1);
2770 assert_eq!(&back[0], &s1);
2771
2772 let data = s1
2774 .to_data()
2775 .into_builder()
2776 .null_bit_buffer(Some(Buffer::from_slice_ref([0b00001010])))
2777 .null_count(2)
2778 .build()
2779 .unwrap();
2780
2781 let s2 = Arc::new(StructArray::from(data)) as ArrayRef;
2782 let r2 = converter.convert_columns(&[Arc::clone(&s2)]).unwrap();
2783 assert_eq!(r2.row(0), r2.row(2)); assert!(r2.row(0) < r2.row(1)); assert_ne!(r1.row(0), r2.row(0)); assert_eq!(r1.row(1), r2.row(1)); let back = converter.convert_rows(&r2).unwrap();
2789 assert_eq!(back.len(), 1);
2790 assert_eq!(&back[0], &s2);
2791
2792 back[0].to_data().validate_full().unwrap();
2793 }
2794
2795 #[test]
2796 fn test_dictionary_in_struct() {
2797 let builder = StringDictionaryBuilder::<Int32Type>::new();
2798 let mut struct_builder = StructBuilder::new(
2799 vec![Field::new_dictionary(
2800 "foo",
2801 DataType::Int32,
2802 DataType::Utf8,
2803 true,
2804 )],
2805 vec![Box::new(builder)],
2806 );
2807
2808 let dict_builder = struct_builder
2809 .field_builder::<StringDictionaryBuilder<Int32Type>>(0)
2810 .unwrap();
2811
2812 dict_builder.append_value("a");
2814 dict_builder.append_null();
2815 dict_builder.append_value("a");
2816 dict_builder.append_value("b");
2817
2818 for _ in 0..4 {
2819 struct_builder.append(true);
2820 }
2821
2822 let s = Arc::new(struct_builder.finish()) as ArrayRef;
2823 let sort_fields = vec![SortField::new(s.data_type().clone())];
2824 let converter = RowConverter::new(sort_fields).unwrap();
2825 let r = converter.convert_columns(&[Arc::clone(&s)]).unwrap();
2826
2827 let back = converter.convert_rows(&r).unwrap();
2828 let [s2] = back.try_into().unwrap();
2829
2830 assert_ne!(&s.data_type(), &s2.data_type());
2833 s2.to_data().validate_full().unwrap();
2834
2835 let s1_struct = s.as_struct();
2839 let s1_0 = s1_struct.column(0);
2840 let s1_idx_0 = s1_0.as_dictionary::<Int32Type>();
2841 let keys = s1_idx_0.keys();
2842 let values = s1_idx_0.values().as_string::<i32>();
2843 let s2_struct = s2.as_struct();
2845 let s2_0 = s2_struct.column(0);
2846 let s2_idx_0 = s2_0.as_string::<i32>();
2847
2848 for i in 0..keys.len() {
2849 if keys.is_null(i) {
2850 assert!(s2_idx_0.is_null(i));
2851 } else {
2852 let dict_index = keys.value(i) as usize;
2853 assert_eq!(values.value(dict_index), s2_idx_0.value(i));
2854 }
2855 }
2856 }
2857
2858 #[test]
2859 fn test_dictionary_in_struct_empty() {
2860 let ty = DataType::Struct(
2861 vec![Field::new_dictionary(
2862 "foo",
2863 DataType::Int32,
2864 DataType::Int32,
2865 false,
2866 )]
2867 .into(),
2868 );
2869 let s = arrow_array::new_empty_array(&ty);
2870
2871 let sort_fields = vec![SortField::new(s.data_type().clone())];
2872 let converter = RowConverter::new(sort_fields).unwrap();
2873 let r = converter.convert_columns(&[Arc::clone(&s)]).unwrap();
2874
2875 let back = converter.convert_rows(&r).unwrap();
2876 let [s2] = back.try_into().unwrap();
2877
2878 assert_ne!(&s.data_type(), &s2.data_type());
2881 s2.to_data().validate_full().unwrap();
2882 assert_eq!(s.len(), 0);
2883 assert_eq!(s2.len(), 0);
2884 }
2885
2886 #[test]
2887 fn test_list_of_string_dictionary() {
2888 let mut builder = ListBuilder::<StringDictionaryBuilder<Int32Type>>::default();
2889 builder.values().append("a").unwrap();
2891 builder.values().append("b").unwrap();
2892 builder.values().append("zero").unwrap();
2893 builder.values().append_null();
2894 builder.values().append("c").unwrap();
2895 builder.values().append("b").unwrap();
2896 builder.values().append("d").unwrap();
2897 builder.append(true);
2898 builder.append(false);
2900 builder.values().append("e").unwrap();
2902 builder.values().append("zero").unwrap();
2903 builder.values().append("a").unwrap();
2904 builder.append(true);
2905
2906 let a = Arc::new(builder.finish()) as ArrayRef;
2907 let data_type = a.data_type().clone();
2908
2909 let field = SortField::new(data_type.clone());
2910 let converter = RowConverter::new(vec![field]).unwrap();
2911 let rows = converter.convert_columns(&[Arc::clone(&a)]).unwrap();
2912
2913 let back = converter.convert_rows(&rows).unwrap();
2914 assert_eq!(back.len(), 1);
2915 let [a2] = back.try_into().unwrap();
2916
2917 assert_ne!(&a.data_type(), &a2.data_type());
2920
2921 a2.to_data().validate_full().unwrap();
2922
2923 let a2_list = a2.as_list::<i32>();
2924 let a1_list = a.as_list::<i32>();
2925
2926 let a1_0 = a1_list.value(0);
2929 let a1_idx_0 = a1_0.as_dictionary::<Int32Type>();
2930 let keys = a1_idx_0.keys();
2931 let values = a1_idx_0.values().as_string::<i32>();
2932 let a2_0 = a2_list.value(0);
2933 let a2_idx_0 = a2_0.as_string::<i32>();
2934
2935 for i in 0..keys.len() {
2936 if keys.is_null(i) {
2937 assert!(a2_idx_0.is_null(i));
2938 } else {
2939 let dict_index = keys.value(i) as usize;
2940 assert_eq!(values.value(dict_index), a2_idx_0.value(i));
2941 }
2942 }
2943
2944 assert!(a1_list.is_null(1));
2946 assert!(a2_list.is_null(1));
2947
2948 let a1_2 = a1_list.value(2);
2950 let a1_idx_2 = a1_2.as_dictionary::<Int32Type>();
2951 let keys = a1_idx_2.keys();
2952 let values = a1_idx_2.values().as_string::<i32>();
2953 let a2_2 = a2_list.value(2);
2954 let a2_idx_2 = a2_2.as_string::<i32>();
2955
2956 for i in 0..keys.len() {
2957 if keys.is_null(i) {
2958 assert!(a2_idx_2.is_null(i));
2959 } else {
2960 let dict_index = keys.value(i) as usize;
2961 assert_eq!(values.value(dict_index), a2_idx_2.value(i));
2962 }
2963 }
2964 }
2965
2966 #[test]
2967 fn test_primitive_dictionary() {
2968 let mut builder = PrimitiveDictionaryBuilder::<Int32Type, Int32Type>::new();
2969 builder.append(2).unwrap();
2970 builder.append(3).unwrap();
2971 builder.append(0).unwrap();
2972 builder.append_null();
2973 builder.append(5).unwrap();
2974 builder.append(3).unwrap();
2975 builder.append(-1).unwrap();
2976
2977 let a = builder.finish();
2978 let data_type = a.data_type().clone();
2979 let columns = [Arc::new(a) as ArrayRef];
2980
2981 let field = SortField::new(data_type.clone());
2982 let converter = RowConverter::new(vec![field]).unwrap();
2983 let rows = converter.convert_columns(&columns).unwrap();
2984 assert!(rows.row(0) < rows.row(1));
2985 assert!(rows.row(2) < rows.row(0));
2986 assert!(rows.row(3) < rows.row(2));
2987 assert!(rows.row(6) < rows.row(2));
2988 assert!(rows.row(3) < rows.row(6));
2989
2990 let back = converter.convert_rows(&rows).unwrap();
2991 assert_eq!(back.len(), 1);
2992 back[0].to_data().validate_full().unwrap();
2993 }
2994
2995 #[test]
2996 fn test_dictionary_nulls() {
2997 let values = Int32Array::from_iter([Some(1), Some(-1), None, Some(4), None]).into_data();
2998 let keys =
2999 Int32Array::from_iter([Some(0), Some(0), Some(1), Some(2), Some(4), None]).into_data();
3000
3001 let data_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Int32));
3002 let data = keys
3003 .into_builder()
3004 .data_type(data_type.clone())
3005 .child_data(vec![values])
3006 .build()
3007 .unwrap();
3008
3009 let columns = [Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef];
3010 let field = SortField::new(data_type.clone());
3011 let converter = RowConverter::new(vec![field]).unwrap();
3012 let rows = converter.convert_columns(&columns).unwrap();
3013
3014 assert_eq!(rows.row(0), rows.row(1));
3015 assert_eq!(rows.row(3), rows.row(4));
3016 assert_eq!(rows.row(4), rows.row(5));
3017 assert!(rows.row(3) < rows.row(0));
3018 }
3019
3020 #[test]
3021 fn test_from_binary_shared_buffer() {
3022 let converter = RowConverter::new(vec![SortField::new(DataType::Binary)]).unwrap();
3023 let array = Arc::new(BinaryArray::from_iter_values([&[0xFF]])) as _;
3024 let rows = converter.convert_columns(&[array]).unwrap();
3025 let binary_rows = rows.try_into_binary().expect("known-small rows");
3026 let _binary_rows_shared_buffer = binary_rows.clone();
3027
3028 let parsed = converter.from_binary(binary_rows);
3029
3030 converter.convert_rows(parsed.iter()).unwrap();
3031 }
3032
3033 #[test]
3034 #[should_panic(expected = "Encountered non UTF-8 data")]
3035 fn test_invalid_utf8() {
3036 let converter = RowConverter::new(vec![SortField::new(DataType::Binary)]).unwrap();
3037 let array = Arc::new(BinaryArray::from_iter_values([&[0xFF]])) as _;
3038 let rows = converter.convert_columns(&[array]).unwrap();
3039 let binary_row = rows.row(0);
3040
3041 let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
3042 let parser = converter.parser();
3043 let utf8_row = parser.parse(binary_row.as_ref());
3044
3045 converter.convert_rows(std::iter::once(utf8_row)).unwrap();
3046 }
3047
3048 #[test]
3049 #[should_panic(expected = "Encountered non UTF-8 data")]
3050 fn test_invalid_utf8_array() {
3051 let converter = RowConverter::new(vec![SortField::new(DataType::Binary)]).unwrap();
3052 let array = Arc::new(BinaryArray::from_iter_values([&[0xFF]])) as _;
3053 let rows = converter.convert_columns(&[array]).unwrap();
3054 let binary_rows = rows.try_into_binary().expect("known-small rows");
3055
3056 let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
3057 let parsed = converter.from_binary(binary_rows);
3058
3059 converter.convert_rows(parsed.iter()).unwrap();
3060 }
3061
3062 #[test]
3063 #[should_panic(expected = "index out of bounds")]
3064 fn test_invalid_empty() {
3065 let binary_row: &[u8] = &[];
3066
3067 let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
3068 let parser = converter.parser();
3069 let utf8_row = parser.parse(binary_row.as_ref());
3070
3071 converter.convert_rows(std::iter::once(utf8_row)).unwrap();
3072 }
3073
3074 #[test]
3075 #[should_panic(expected = "index out of bounds")]
3076 fn test_invalid_empty_array() {
3077 let row: &[u8] = &[];
3078 let binary_rows = BinaryArray::from(vec![row]);
3079
3080 let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
3081 let parsed = converter.from_binary(binary_rows);
3082
3083 converter.convert_rows(parsed.iter()).unwrap();
3084 }
3085
3086 #[test]
3087 #[should_panic(expected = "index out of bounds")]
3088 fn test_invalid_truncated() {
3089 let binary_row: &[u8] = &[0x02];
3090
3091 let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
3092 let parser = converter.parser();
3093 let utf8_row = parser.parse(binary_row.as_ref());
3094
3095 converter.convert_rows(std::iter::once(utf8_row)).unwrap();
3096 }
3097
3098 #[test]
3099 #[should_panic(expected = "index out of bounds")]
3100 fn test_invalid_truncated_array() {
3101 let row: &[u8] = &[0x02];
3102 let binary_rows = BinaryArray::from(vec![row]);
3103
3104 let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
3105 let parsed = converter.from_binary(binary_rows);
3106
3107 converter.convert_rows(parsed.iter()).unwrap();
3108 }
3109
3110 #[test]
3111 #[should_panic(expected = "rows were not produced by this RowConverter")]
3112 fn test_different_converter() {
3113 let values = Arc::new(Int32Array::from_iter([Some(1), Some(-1)]));
3114 let converter = RowConverter::new(vec![SortField::new(DataType::Int32)]).unwrap();
3115 let rows = converter.convert_columns(&[values]).unwrap();
3116
3117 let converter = RowConverter::new(vec![SortField::new(DataType::Int32)]).unwrap();
3118 let _ = converter.convert_rows(&rows);
3119 }
3120
3121 fn test_single_list<O: OffsetSizeTrait>() {
3122 let mut builder = GenericListBuilder::<O, _>::new(Int32Builder::new());
3123 builder.values().append_value(32);
3124 builder.values().append_value(52);
3125 builder.values().append_value(32);
3126 builder.append(true);
3127 builder.values().append_value(32);
3128 builder.values().append_value(52);
3129 builder.values().append_value(12);
3130 builder.append(true);
3131 builder.values().append_value(32);
3132 builder.values().append_value(52);
3133 builder.append(true);
3134 builder.values().append_value(32); builder.values().append_value(52); builder.append(false);
3137 builder.values().append_value(32);
3138 builder.values().append_null();
3139 builder.append(true);
3140 builder.append(true);
3141 builder.values().append_value(17); builder.values().append_null(); builder.append(false);
3144
3145 let list = Arc::new(builder.finish()) as ArrayRef;
3146 let d = list.data_type().clone();
3147
3148 let converter = RowConverter::new(vec![SortField::new(d.clone())]).unwrap();
3149
3150 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3151 assert!(rows.row(0) > rows.row(1)); assert!(rows.row(2) < rows.row(1)); assert!(rows.row(3) < rows.row(2)); assert!(rows.row(4) < rows.row(2)); assert!(rows.row(5) < rows.row(2)); assert!(rows.row(3) < rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3160 assert_eq!(back.len(), 1);
3161 back[0].to_data().validate_full().unwrap();
3162 assert_eq!(&back[0], &list);
3163
3164 let options = SortOptions::default().asc().with_nulls_first(false);
3165 let field = SortField::new_with_options(d.clone(), options);
3166 let converter = RowConverter::new(vec![field]).unwrap();
3167 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3168
3169 assert!(rows.row(0) > rows.row(1)); assert!(rows.row(2) < rows.row(1)); assert!(rows.row(3) > rows.row(2)); assert!(rows.row(4) > rows.row(2)); assert!(rows.row(5) < rows.row(2)); assert!(rows.row(3) > rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3178 assert_eq!(back.len(), 1);
3179 back[0].to_data().validate_full().unwrap();
3180 assert_eq!(&back[0], &list);
3181
3182 let options = SortOptions::default().desc().with_nulls_first(false);
3183 let field = SortField::new_with_options(d.clone(), options);
3184 let converter = RowConverter::new(vec![field]).unwrap();
3185 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3186
3187 assert!(rows.row(0) < rows.row(1)); assert!(rows.row(2) > rows.row(1)); assert!(rows.row(3) > rows.row(2)); assert!(rows.row(4) > rows.row(2)); assert!(rows.row(5) > rows.row(2)); assert!(rows.row(3) > rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3196 assert_eq!(back.len(), 1);
3197 back[0].to_data().validate_full().unwrap();
3198 assert_eq!(&back[0], &list);
3199
3200 let options = SortOptions::default().desc().with_nulls_first(true);
3201 let field = SortField::new_with_options(d, options);
3202 let converter = RowConverter::new(vec![field]).unwrap();
3203 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3204
3205 assert!(rows.row(0) < rows.row(1)); assert!(rows.row(2) > rows.row(1)); assert!(rows.row(3) < rows.row(2)); assert!(rows.row(4) < rows.row(2)); assert!(rows.row(5) > rows.row(2)); assert!(rows.row(3) < rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3214 assert_eq!(back.len(), 1);
3215 back[0].to_data().validate_full().unwrap();
3216 assert_eq!(&back[0], &list);
3217
3218 let sliced_list = list.slice(1, 5);
3219 let rows_on_sliced_list = converter
3220 .convert_columns(&[Arc::clone(&sliced_list)])
3221 .unwrap();
3222
3223 assert!(rows_on_sliced_list.row(1) > rows_on_sliced_list.row(0)); assert!(rows_on_sliced_list.row(2) < rows_on_sliced_list.row(1)); assert!(rows_on_sliced_list.row(3) < rows_on_sliced_list.row(1)); assert!(rows_on_sliced_list.row(4) > rows_on_sliced_list.row(1)); assert!(rows_on_sliced_list.row(2) < rows_on_sliced_list.row(4)); let back = converter.convert_rows(&rows_on_sliced_list).unwrap();
3230 assert_eq!(back.len(), 1);
3231 back[0].to_data().validate_full().unwrap();
3232 assert_eq!(&back[0], &sliced_list);
3233 }
3234
3235 fn test_nested_list<O: OffsetSizeTrait>() {
3236 let mut builder =
3237 GenericListBuilder::<O, _>::new(GenericListBuilder::<O, _>::new(Int32Builder::new()));
3238
3239 builder.values().values().append_value(1);
3240 builder.values().values().append_value(2);
3241 builder.values().append(true);
3242 builder.values().values().append_value(1);
3243 builder.values().values().append_null();
3244 builder.values().append(true);
3245 builder.append(true);
3246
3247 builder.values().values().append_value(1);
3248 builder.values().values().append_null();
3249 builder.values().append(true);
3250 builder.values().values().append_value(1);
3251 builder.values().values().append_null();
3252 builder.values().append(true);
3253 builder.append(true);
3254
3255 builder.values().values().append_value(1);
3256 builder.values().values().append_null();
3257 builder.values().append(true);
3258 builder.values().append(false);
3259 builder.append(true);
3260 builder.append(false);
3261
3262 builder.values().values().append_value(1);
3263 builder.values().values().append_value(2);
3264 builder.values().append(true);
3265 builder.append(true);
3266
3267 let list = Arc::new(builder.finish()) as ArrayRef;
3268 let d = list.data_type().clone();
3269
3270 let options = SortOptions::default().asc().with_nulls_first(true);
3278 let field = SortField::new_with_options(d.clone(), options);
3279 let converter = RowConverter::new(vec![field]).unwrap();
3280 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3281
3282 assert!(rows.row(0) > rows.row(1));
3283 assert!(rows.row(1) > rows.row(2));
3284 assert!(rows.row(2) > rows.row(3));
3285 assert!(rows.row(4) < rows.row(0));
3286 assert!(rows.row(4) > rows.row(1));
3287
3288 let back = converter.convert_rows(&rows).unwrap();
3289 assert_eq!(back.len(), 1);
3290 back[0].to_data().validate_full().unwrap();
3291 assert_eq!(&back[0], &list);
3292
3293 let options = SortOptions::default().desc().with_nulls_first(true);
3294 let field = SortField::new_with_options(d.clone(), options);
3295 let converter = RowConverter::new(vec![field]).unwrap();
3296 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3297
3298 assert!(rows.row(0) > rows.row(1));
3299 assert!(rows.row(1) > rows.row(2));
3300 assert!(rows.row(2) > rows.row(3));
3301 assert!(rows.row(4) > rows.row(0));
3302 assert!(rows.row(4) > rows.row(1));
3303
3304 let back = converter.convert_rows(&rows).unwrap();
3305 assert_eq!(back.len(), 1);
3306 back[0].to_data().validate_full().unwrap();
3307 assert_eq!(&back[0], &list);
3308
3309 let options = SortOptions::default().desc().with_nulls_first(false);
3310 let field = SortField::new_with_options(d, options);
3311 let converter = RowConverter::new(vec![field]).unwrap();
3312 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3313
3314 assert!(rows.row(0) < rows.row(1));
3315 assert!(rows.row(1) < rows.row(2));
3316 assert!(rows.row(2) < rows.row(3));
3317 assert!(rows.row(4) > rows.row(0));
3318 assert!(rows.row(4) < rows.row(1));
3319
3320 let back = converter.convert_rows(&rows).unwrap();
3321 assert_eq!(back.len(), 1);
3322 back[0].to_data().validate_full().unwrap();
3323 assert_eq!(&back[0], &list);
3324
3325 let sliced_list = list.slice(1, 3);
3326 let rows = converter
3327 .convert_columns(&[Arc::clone(&sliced_list)])
3328 .unwrap();
3329
3330 assert!(rows.row(0) < rows.row(1));
3331 assert!(rows.row(1) < rows.row(2));
3332
3333 let back = converter.convert_rows(&rows).unwrap();
3334 assert_eq!(back.len(), 1);
3335 back[0].to_data().validate_full().unwrap();
3336 assert_eq!(&back[0], &sliced_list);
3337 }
3338
3339 #[test]
3340 fn test_list() {
3341 test_single_list::<i32>();
3342 test_nested_list::<i32>();
3343 }
3344
3345 #[test]
3346 fn test_large_list() {
3347 test_single_list::<i64>();
3348 test_nested_list::<i64>();
3349 }
3350
3351 fn test_single_list_view<O: OffsetSizeTrait>() {
3352 let mut builder = GenericListViewBuilder::<O, _>::new(Int32Builder::new());
3353 builder.values().append_value(32);
3354 builder.values().append_value(52);
3355 builder.values().append_value(32);
3356 builder.append(true);
3357 builder.values().append_value(32);
3358 builder.values().append_value(52);
3359 builder.values().append_value(12);
3360 builder.append(true);
3361 builder.values().append_value(32);
3362 builder.values().append_value(52);
3363 builder.append(true);
3364 builder.values().append_value(32); builder.values().append_value(52); builder.append(false);
3367 builder.values().append_value(32);
3368 builder.values().append_null();
3369 builder.append(true);
3370 builder.append(true);
3371 builder.values().append_value(17); builder.values().append_null(); builder.append(false);
3374
3375 let list = Arc::new(builder.finish()) as ArrayRef;
3376 let d = list.data_type().clone();
3377
3378 let converter = RowConverter::new(vec![SortField::new(d.clone())]).unwrap();
3379
3380 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3381 assert!(rows.row(0) > rows.row(1)); assert!(rows.row(2) < rows.row(1)); assert!(rows.row(3) < rows.row(2)); assert!(rows.row(4) < rows.row(2)); assert!(rows.row(5) < rows.row(2)); assert!(rows.row(3) < rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3390 assert_eq!(back.len(), 1);
3391 back[0].to_data().validate_full().unwrap();
3392
3393 let back_list_view = back[0]
3395 .as_any()
3396 .downcast_ref::<GenericListViewArray<O>>()
3397 .unwrap();
3398 let orig_list_view = list
3399 .as_any()
3400 .downcast_ref::<GenericListViewArray<O>>()
3401 .unwrap();
3402
3403 assert_eq!(back_list_view.len(), orig_list_view.len());
3404 for i in 0..back_list_view.len() {
3405 assert_eq!(back_list_view.is_valid(i), orig_list_view.is_valid(i));
3406 if back_list_view.is_valid(i) {
3407 assert_eq!(&back_list_view.value(i), &orig_list_view.value(i));
3408 }
3409 }
3410
3411 let options = SortOptions::default().asc().with_nulls_first(false);
3412 let field = SortField::new_with_options(d.clone(), options);
3413 let converter = RowConverter::new(vec![field]).unwrap();
3414 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3415
3416 assert!(rows.row(0) > rows.row(1)); assert!(rows.row(2) < rows.row(1)); assert!(rows.row(3) > rows.row(2)); assert!(rows.row(4) > rows.row(2)); assert!(rows.row(5) < rows.row(2)); assert!(rows.row(3) > rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3425 assert_eq!(back.len(), 1);
3426 back[0].to_data().validate_full().unwrap();
3427
3428 let options = SortOptions::default().desc().with_nulls_first(false);
3429 let field = SortField::new_with_options(d.clone(), options);
3430 let converter = RowConverter::new(vec![field]).unwrap();
3431 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3432
3433 assert!(rows.row(0) < rows.row(1)); assert!(rows.row(2) > rows.row(1)); assert!(rows.row(3) > rows.row(2)); assert!(rows.row(4) > rows.row(2)); assert!(rows.row(5) > rows.row(2)); assert!(rows.row(3) > rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3442 assert_eq!(back.len(), 1);
3443 back[0].to_data().validate_full().unwrap();
3444
3445 let options = SortOptions::default().desc().with_nulls_first(true);
3446 let field = SortField::new_with_options(d, options);
3447 let converter = RowConverter::new(vec![field]).unwrap();
3448 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3449
3450 assert!(rows.row(0) < rows.row(1)); assert!(rows.row(2) > rows.row(1)); assert!(rows.row(3) < rows.row(2)); assert!(rows.row(4) < rows.row(2)); assert!(rows.row(5) > rows.row(2)); assert!(rows.row(3) < rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3459 assert_eq!(back.len(), 1);
3460 back[0].to_data().validate_full().unwrap();
3461
3462 let sliced_list = list.slice(1, 5);
3463 let rows_on_sliced_list = converter
3464 .convert_columns(&[Arc::clone(&sliced_list)])
3465 .unwrap();
3466
3467 assert!(rows_on_sliced_list.row(1) > rows_on_sliced_list.row(0)); assert!(rows_on_sliced_list.row(2) < rows_on_sliced_list.row(1)); assert!(rows_on_sliced_list.row(3) < rows_on_sliced_list.row(1)); assert!(rows_on_sliced_list.row(4) > rows_on_sliced_list.row(1)); assert!(rows_on_sliced_list.row(2) < rows_on_sliced_list.row(4)); let back = converter.convert_rows(&rows_on_sliced_list).unwrap();
3474 assert_eq!(back.len(), 1);
3475 back[0].to_data().validate_full().unwrap();
3476 }
3477
3478 fn test_nested_list_view<O: OffsetSizeTrait>() {
3479 let mut builder = GenericListViewBuilder::<O, _>::new(GenericListViewBuilder::<O, _>::new(
3480 Int32Builder::new(),
3481 ));
3482
3483 builder.values().values().append_value(1);
3485 builder.values().values().append_value(2);
3486 builder.values().append(true);
3487 builder.values().values().append_value(1);
3488 builder.values().values().append_null();
3489 builder.values().append(true);
3490 builder.append(true);
3491
3492 builder.values().values().append_value(1);
3494 builder.values().values().append_null();
3495 builder.values().append(true);
3496 builder.values().values().append_value(1);
3497 builder.values().values().append_null();
3498 builder.values().append(true);
3499 builder.append(true);
3500
3501 builder.values().values().append_value(1);
3503 builder.values().values().append_null();
3504 builder.values().append(true);
3505 builder.values().append(false);
3506 builder.append(true);
3507
3508 builder.append(false);
3510
3511 builder.values().values().append_value(1);
3513 builder.values().values().append_value(2);
3514 builder.values().append(true);
3515 builder.append(true);
3516
3517 let list = Arc::new(builder.finish()) as ArrayRef;
3518 let d = list.data_type().clone();
3519
3520 let options = SortOptions::default().asc().with_nulls_first(true);
3528 let field = SortField::new_with_options(d.clone(), options);
3529 let converter = RowConverter::new(vec![field]).unwrap();
3530 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3531
3532 assert!(rows.row(0) > rows.row(1));
3533 assert!(rows.row(1) > rows.row(2));
3534 assert!(rows.row(2) > rows.row(3));
3535 assert!(rows.row(4) < rows.row(0));
3536 assert!(rows.row(4) > rows.row(1));
3537
3538 let back = converter.convert_rows(&rows).unwrap();
3539 assert_eq!(back.len(), 1);
3540 back[0].to_data().validate_full().unwrap();
3541
3542 let back_list_view = back[0]
3544 .as_any()
3545 .downcast_ref::<GenericListViewArray<O>>()
3546 .unwrap();
3547 let orig_list_view = list
3548 .as_any()
3549 .downcast_ref::<GenericListViewArray<O>>()
3550 .unwrap();
3551
3552 assert_eq!(back_list_view.len(), orig_list_view.len());
3553 for i in 0..back_list_view.len() {
3554 assert_eq!(back_list_view.is_valid(i), orig_list_view.is_valid(i));
3555 if back_list_view.is_valid(i) {
3556 assert_eq!(&back_list_view.value(i), &orig_list_view.value(i));
3557 }
3558 }
3559
3560 let options = SortOptions::default().desc().with_nulls_first(true);
3561 let field = SortField::new_with_options(d.clone(), options);
3562 let converter = RowConverter::new(vec![field]).unwrap();
3563 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3564
3565 assert!(rows.row(0) > rows.row(1));
3566 assert!(rows.row(1) > rows.row(2));
3567 assert!(rows.row(2) > rows.row(3));
3568 assert!(rows.row(4) > rows.row(0));
3569 assert!(rows.row(4) > rows.row(1));
3570
3571 let back = converter.convert_rows(&rows).unwrap();
3572 assert_eq!(back.len(), 1);
3573 back[0].to_data().validate_full().unwrap();
3574
3575 let back_list_view = back[0]
3577 .as_any()
3578 .downcast_ref::<GenericListViewArray<O>>()
3579 .unwrap();
3580
3581 assert_eq!(back_list_view.len(), orig_list_view.len());
3582 for i in 0..back_list_view.len() {
3583 assert_eq!(back_list_view.is_valid(i), orig_list_view.is_valid(i));
3584 if back_list_view.is_valid(i) {
3585 assert_eq!(&back_list_view.value(i), &orig_list_view.value(i));
3586 }
3587 }
3588
3589 let options = SortOptions::default().desc().with_nulls_first(false);
3590 let field = SortField::new_with_options(d.clone(), options);
3591 let converter = RowConverter::new(vec![field]).unwrap();
3592 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3593
3594 assert!(rows.row(0) < rows.row(1));
3595 assert!(rows.row(1) < rows.row(2));
3596 assert!(rows.row(2) < rows.row(3));
3597 assert!(rows.row(4) > rows.row(0));
3598 assert!(rows.row(4) < rows.row(1));
3599
3600 let back = converter.convert_rows(&rows).unwrap();
3601 assert_eq!(back.len(), 1);
3602 back[0].to_data().validate_full().unwrap();
3603
3604 let back_list_view = back[0]
3606 .as_any()
3607 .downcast_ref::<GenericListViewArray<O>>()
3608 .unwrap();
3609
3610 assert_eq!(back_list_view.len(), orig_list_view.len());
3611 for i in 0..back_list_view.len() {
3612 assert_eq!(back_list_view.is_valid(i), orig_list_view.is_valid(i));
3613 if back_list_view.is_valid(i) {
3614 assert_eq!(&back_list_view.value(i), &orig_list_view.value(i));
3615 }
3616 }
3617
3618 let sliced_list = list.slice(1, 3);
3619 let rows = converter
3620 .convert_columns(&[Arc::clone(&sliced_list)])
3621 .unwrap();
3622
3623 assert!(rows.row(0) < rows.row(1));
3624 assert!(rows.row(1) < rows.row(2));
3625
3626 let back = converter.convert_rows(&rows).unwrap();
3627 assert_eq!(back.len(), 1);
3628 back[0].to_data().validate_full().unwrap();
3629 }
3630
3631 #[test]
3632 fn test_list_view() {
3633 test_single_list_view::<i32>();
3634 test_nested_list_view::<i32>();
3635 }
3636
3637 #[test]
3638 fn test_large_list_view() {
3639 test_single_list_view::<i64>();
3640 test_nested_list_view::<i64>();
3641 }
3642
3643 fn test_list_view_with_shared_values<O: OffsetSizeTrait>() {
3644 let values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8]);
3646 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
3647
3648 let offsets = ScalarBuffer::<O>::from(vec![
3656 O::from_usize(0).unwrap(),
3657 O::from_usize(0).unwrap(),
3658 O::from_usize(5).unwrap(),
3659 O::from_usize(2).unwrap(),
3660 O::from_usize(1).unwrap(),
3661 O::from_usize(2).unwrap(),
3662 ]);
3663 let sizes = ScalarBuffer::<O>::from(vec![
3664 O::from_usize(3).unwrap(),
3665 O::from_usize(3).unwrap(),
3666 O::from_usize(2).unwrap(),
3667 O::from_usize(2).unwrap(),
3668 O::from_usize(4).unwrap(),
3669 O::from_usize(1).unwrap(),
3670 ]);
3671
3672 let list_view: GenericListViewArray<O> =
3673 GenericListViewArray::try_new(field, offsets, sizes, Arc::new(values), None).unwrap();
3674
3675 let d = list_view.data_type().clone();
3676 let list = Arc::new(list_view) as ArrayRef;
3677
3678 let converter = RowConverter::new(vec![SortField::new(d.clone())]).unwrap();
3679 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3680
3681 assert_eq!(rows.row(0), rows.row(1));
3683
3684 assert!(rows.row(0) < rows.row(2));
3686
3687 assert!(rows.row(3) > rows.row(0));
3689
3690 assert!(rows.row(4) > rows.row(0));
3692
3693 assert!(rows.row(5) < rows.row(3));
3695
3696 assert!(rows.row(5) > rows.row(4));
3698
3699 let back = converter.convert_rows(&rows).unwrap();
3701 assert_eq!(back.len(), 1);
3702 back[0].to_data().validate_full().unwrap();
3703
3704 let back_list_view = back[0]
3706 .as_any()
3707 .downcast_ref::<GenericListViewArray<O>>()
3708 .unwrap();
3709 let orig_list_view = list
3710 .as_any()
3711 .downcast_ref::<GenericListViewArray<O>>()
3712 .unwrap();
3713
3714 assert_eq!(back_list_view.len(), orig_list_view.len());
3715 for i in 0..back_list_view.len() {
3716 assert_eq!(back_list_view.is_valid(i), orig_list_view.is_valid(i));
3717 if back_list_view.is_valid(i) {
3718 assert_eq!(&back_list_view.value(i), &orig_list_view.value(i));
3719 }
3720 }
3721
3722 let options = SortOptions::default().desc();
3724 let field = SortField::new_with_options(d, options);
3725 let converter = RowConverter::new(vec![field]).unwrap();
3726 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3727
3728 assert_eq!(rows.row(0), rows.row(1)); assert!(rows.row(0) > rows.row(2)); assert!(rows.row(3) < rows.row(0)); let back = converter.convert_rows(&rows).unwrap();
3734 assert_eq!(back.len(), 1);
3735 back[0].to_data().validate_full().unwrap();
3736 }
3737
3738 #[test]
3739 fn test_list_view_shared_values() {
3740 test_list_view_with_shared_values::<i32>();
3741 }
3742
3743 #[test]
3744 fn test_large_list_view_shared_values() {
3745 test_list_view_with_shared_values::<i64>();
3746 }
3747
3748 #[test]
3749 fn test_fixed_size_list() {
3750 let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
3751 builder.values().append_value(32);
3752 builder.values().append_value(52);
3753 builder.values().append_value(32);
3754 builder.append(true);
3755 builder.values().append_value(32);
3756 builder.values().append_value(52);
3757 builder.values().append_value(12);
3758 builder.append(true);
3759 builder.values().append_value(32);
3760 builder.values().append_value(52);
3761 builder.values().append_null();
3762 builder.append(true);
3763 builder.values().append_value(32); builder.values().append_value(52); builder.values().append_value(13); builder.append(false);
3767 builder.values().append_value(32);
3768 builder.values().append_null();
3769 builder.values().append_null();
3770 builder.append(true);
3771 builder.values().append_null();
3772 builder.values().append_null();
3773 builder.values().append_null();
3774 builder.append(true);
3775 builder.values().append_value(17); builder.values().append_null(); builder.values().append_value(77); builder.append(false);
3779
3780 let list = Arc::new(builder.finish()) as ArrayRef;
3781 let d = list.data_type().clone();
3782
3783 let converter = RowConverter::new(vec![SortField::new(d.clone())]).unwrap();
3785
3786 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3787 assert!(rows.row(0) > rows.row(1)); assert!(rows.row(2) < rows.row(1)); assert!(rows.row(3) < rows.row(2)); assert!(rows.row(4) < rows.row(2)); assert!(rows.row(5) < rows.row(2)); assert!(rows.row(3) < rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3796 assert_eq!(back.len(), 1);
3797 back[0].to_data().validate_full().unwrap();
3798 assert_eq!(&back[0], &list);
3799
3800 let options = SortOptions::default().asc().with_nulls_first(false);
3802 let field = SortField::new_with_options(d.clone(), options);
3803 let converter = RowConverter::new(vec![field]).unwrap();
3804 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3805 assert!(rows.row(0) > rows.row(1)); assert!(rows.row(2) > rows.row(1)); assert!(rows.row(3) > rows.row(2)); assert!(rows.row(4) > rows.row(2)); assert!(rows.row(5) > rows.row(2)); assert!(rows.row(3) > rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3814 assert_eq!(back.len(), 1);
3815 back[0].to_data().validate_full().unwrap();
3816 assert_eq!(&back[0], &list);
3817
3818 let options = SortOptions::default().desc().with_nulls_first(false);
3820 let field = SortField::new_with_options(d.clone(), options);
3821 let converter = RowConverter::new(vec![field]).unwrap();
3822 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3823 assert!(rows.row(0) < rows.row(1)); assert!(rows.row(2) > rows.row(1)); assert!(rows.row(3) > rows.row(2)); assert!(rows.row(4) > rows.row(2)); assert!(rows.row(5) > rows.row(2)); assert!(rows.row(3) > rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3832 assert_eq!(back.len(), 1);
3833 back[0].to_data().validate_full().unwrap();
3834 assert_eq!(&back[0], &list);
3835
3836 let options = SortOptions::default().desc().with_nulls_first(true);
3838 let field = SortField::new_with_options(d, options);
3839 let converter = RowConverter::new(vec![field]).unwrap();
3840 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
3841
3842 assert!(rows.row(0) < rows.row(1)); assert!(rows.row(2) < rows.row(1)); assert!(rows.row(3) < rows.row(2)); assert!(rows.row(4) < rows.row(2)); assert!(rows.row(5) < rows.row(2)); assert!(rows.row(3) < rows.row(5)); assert_eq!(rows.row(3), rows.row(6)); let back = converter.convert_rows(&rows).unwrap();
3851 assert_eq!(back.len(), 1);
3852 back[0].to_data().validate_full().unwrap();
3853 assert_eq!(&back[0], &list);
3854
3855 let sliced_list = list.slice(1, 5);
3856 let rows_on_sliced_list = converter
3857 .convert_columns(&[Arc::clone(&sliced_list)])
3858 .unwrap();
3859
3860 assert!(rows_on_sliced_list.row(2) < rows_on_sliced_list.row(1)); assert!(rows_on_sliced_list.row(3) < rows_on_sliced_list.row(1)); assert!(rows_on_sliced_list.row(4) < rows_on_sliced_list.row(1)); assert!(rows_on_sliced_list.row(2) < rows_on_sliced_list.row(4)); let back = converter.convert_rows(&rows_on_sliced_list).unwrap();
3866 assert_eq!(back.len(), 1);
3867 back[0].to_data().validate_full().unwrap();
3868 assert_eq!(&back[0], &sliced_list);
3869 }
3870
3871 #[test]
3872 fn test_two_fixed_size_lists() {
3873 let mut first = FixedSizeListBuilder::new(UInt8Builder::new(), 1);
3874 first.values().append_value(100);
3876 first.append(true);
3877 first.values().append_value(101);
3879 first.append(true);
3880 first.values().append_value(102);
3882 first.append(true);
3883 first.values().append_null();
3885 first.append(true);
3886 first.values().append_null(); first.append(false);
3889 let first = Arc::new(first.finish()) as ArrayRef;
3890 let first_type = first.data_type().clone();
3891
3892 let mut second = FixedSizeListBuilder::new(UInt8Builder::new(), 1);
3893 second.values().append_value(200);
3895 second.append(true);
3896 second.values().append_value(201);
3898 second.append(true);
3899 second.values().append_value(202);
3901 second.append(true);
3902 second.values().append_null();
3904 second.append(true);
3905 second.values().append_null(); second.append(false);
3908 let second = Arc::new(second.finish()) as ArrayRef;
3909 let second_type = second.data_type().clone();
3910
3911 let converter = RowConverter::new(vec![
3912 SortField::new(first_type.clone()),
3913 SortField::new(second_type.clone()),
3914 ])
3915 .unwrap();
3916
3917 let rows = converter
3918 .convert_columns(&[Arc::clone(&first), Arc::clone(&second)])
3919 .unwrap();
3920
3921 let back = converter.convert_rows(&rows).unwrap();
3922 assert_eq!(back.len(), 2);
3923 back[0].to_data().validate_full().unwrap();
3924 assert_eq!(&back[0], &first);
3925 back[1].to_data().validate_full().unwrap();
3926 assert_eq!(&back[1], &second);
3927 }
3928
3929 #[test]
3930 fn test_fixed_size_list_with_variable_width_content() {
3931 let mut first = FixedSizeListBuilder::new(
3932 StructBuilder::from_fields(
3933 vec![
3934 Field::new(
3935 "timestamp",
3936 DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from("UTC"))),
3937 false,
3938 ),
3939 Field::new("offset_minutes", DataType::Int16, false),
3940 Field::new("time_zone", DataType::Utf8, false),
3941 ],
3942 1,
3943 ),
3944 1,
3945 );
3946 first
3948 .values()
3949 .field_builder::<TimestampMicrosecondBuilder>(0)
3950 .unwrap()
3951 .append_null();
3952 first
3953 .values()
3954 .field_builder::<Int16Builder>(1)
3955 .unwrap()
3956 .append_null();
3957 first
3958 .values()
3959 .field_builder::<StringBuilder>(2)
3960 .unwrap()
3961 .append_null();
3962 first.values().append(false);
3963 first.append(false);
3964 first
3966 .values()
3967 .field_builder::<TimestampMicrosecondBuilder>(0)
3968 .unwrap()
3969 .append_null();
3970 first
3971 .values()
3972 .field_builder::<Int16Builder>(1)
3973 .unwrap()
3974 .append_null();
3975 first
3976 .values()
3977 .field_builder::<StringBuilder>(2)
3978 .unwrap()
3979 .append_null();
3980 first.values().append(false);
3981 first.append(true);
3982 first
3984 .values()
3985 .field_builder::<TimestampMicrosecondBuilder>(0)
3986 .unwrap()
3987 .append_value(0);
3988 first
3989 .values()
3990 .field_builder::<Int16Builder>(1)
3991 .unwrap()
3992 .append_value(0);
3993 first
3994 .values()
3995 .field_builder::<StringBuilder>(2)
3996 .unwrap()
3997 .append_value("UTC");
3998 first.values().append(true);
3999 first.append(true);
4000 first
4002 .values()
4003 .field_builder::<TimestampMicrosecondBuilder>(0)
4004 .unwrap()
4005 .append_value(1126351800123456);
4006 first
4007 .values()
4008 .field_builder::<Int16Builder>(1)
4009 .unwrap()
4010 .append_value(120);
4011 first
4012 .values()
4013 .field_builder::<StringBuilder>(2)
4014 .unwrap()
4015 .append_value("Europe/Warsaw");
4016 first.values().append(true);
4017 first.append(true);
4018 let first = Arc::new(first.finish()) as ArrayRef;
4019 let first_type = first.data_type().clone();
4020
4021 let mut second = StringBuilder::new();
4022 second.append_value("somewhere near");
4023 second.append_null();
4024 second.append_value("Greenwich");
4025 second.append_value("Warsaw");
4026 let second = Arc::new(second.finish()) as ArrayRef;
4027 let second_type = second.data_type().clone();
4028
4029 let converter = RowConverter::new(vec![
4030 SortField::new(first_type.clone()),
4031 SortField::new(second_type.clone()),
4032 ])
4033 .unwrap();
4034
4035 let rows = converter
4036 .convert_columns(&[Arc::clone(&first), Arc::clone(&second)])
4037 .unwrap();
4038
4039 let back = converter.convert_rows(&rows).unwrap();
4040 assert_eq!(back.len(), 2);
4041 back[0].to_data().validate_full().unwrap();
4042 assert_eq!(&back[0], &first);
4043 back[1].to_data().validate_full().unwrap();
4044 assert_eq!(&back[1], &second);
4045 }
4046
4047 fn generate_primitive_array<K>(
4048 rng: &mut impl RngCore,
4049 len: usize,
4050 valid_percent: f64,
4051 ) -> PrimitiveArray<K>
4052 where
4053 K: ArrowPrimitiveType,
4054 StandardUniform: Distribution<K::Native>,
4055 {
4056 (0..len)
4057 .map(|_| rng.random_bool(valid_percent).then(|| rng.random()))
4058 .collect()
4059 }
4060
4061 fn generate_boolean_array(
4062 rng: &mut impl RngCore,
4063 len: usize,
4064 valid_percent: f64,
4065 ) -> BooleanArray {
4066 (0..len)
4067 .map(|_| rng.random_bool(valid_percent).then(|| rng.random_bool(0.5)))
4068 .collect()
4069 }
4070
4071 fn generate_strings<O: OffsetSizeTrait>(
4072 rng: &mut impl RngCore,
4073 len: usize,
4074 valid_percent: f64,
4075 ) -> GenericStringArray<O> {
4076 (0..len)
4077 .map(|_| {
4078 rng.random_bool(valid_percent).then(|| {
4079 let len = rng.random_range(0..100);
4080 let bytes = (0..len).map(|_| rng.random_range(0..128)).collect();
4081 String::from_utf8(bytes).unwrap()
4082 })
4083 })
4084 .collect()
4085 }
4086
4087 fn generate_string_view(
4088 rng: &mut impl RngCore,
4089 len: usize,
4090 valid_percent: f64,
4091 ) -> StringViewArray {
4092 (0..len)
4093 .map(|_| {
4094 rng.random_bool(valid_percent).then(|| {
4095 let len = rng.random_range(0..100);
4096 let bytes = (0..len).map(|_| rng.random_range(0..128)).collect();
4097 String::from_utf8(bytes).unwrap()
4098 })
4099 })
4100 .collect()
4101 }
4102
4103 fn generate_byte_view(
4104 rng: &mut impl RngCore,
4105 len: usize,
4106 valid_percent: f64,
4107 ) -> BinaryViewArray {
4108 (0..len)
4109 .map(|_| {
4110 rng.random_bool(valid_percent).then(|| {
4111 let len = rng.random_range(0..100);
4112 let bytes: Vec<_> = (0..len).map(|_| rng.random_range(0..128)).collect();
4113 bytes
4114 })
4115 })
4116 .collect()
4117 }
4118
4119 fn generate_fixed_stringview_column(len: usize) -> StringViewArray {
4120 let edge_cases = vec![
4121 Some("bar".to_string()),
4122 Some("bar\0".to_string()),
4123 Some("LongerThan12Bytes".to_string()),
4124 Some("LongerThan12Bytez".to_string()),
4125 Some("LongerThan12Bytes\0".to_string()),
4126 Some("LongerThan12Byt".to_string()),
4127 Some("backend one".to_string()),
4128 Some("backend two".to_string()),
4129 Some("a".repeat(257)),
4130 Some("a".repeat(300)),
4131 ];
4132
4133 let mut values = Vec::with_capacity(len);
4135 for i in 0..len {
4136 values.push(
4137 edge_cases
4138 .get(i % edge_cases.len())
4139 .cloned()
4140 .unwrap_or(None),
4141 );
4142 }
4143
4144 StringViewArray::from(values)
4145 }
4146
4147 fn generate_dictionary<K>(
4148 rng: &mut impl RngCore,
4149 values: ArrayRef,
4150 len: usize,
4151 valid_percent: f64,
4152 ) -> DictionaryArray<K>
4153 where
4154 K: ArrowDictionaryKeyType,
4155 K::Native: SampleUniform,
4156 {
4157 let min_key = K::Native::from_usize(0).unwrap();
4158 let max_key = K::Native::from_usize(values.len()).unwrap();
4159 let keys: PrimitiveArray<K> = (0..len)
4160 .map(|_| {
4161 rng.random_bool(valid_percent)
4162 .then(|| rng.random_range(min_key..max_key))
4163 })
4164 .collect();
4165
4166 let data_type =
4167 DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
4168
4169 let data = keys
4170 .into_data()
4171 .into_builder()
4172 .data_type(data_type)
4173 .add_child_data(values.to_data())
4174 .build()
4175 .unwrap();
4176
4177 DictionaryArray::from(data)
4178 }
4179
4180 fn generate_fixed_size_binary(
4181 rng: &mut impl RngCore,
4182 len: usize,
4183 valid_percent: f64,
4184 ) -> FixedSizeBinaryArray {
4185 let width = rng.random_range(0..20);
4186 let mut builder = FixedSizeBinaryBuilder::new(width);
4187
4188 let mut b = vec![0; width as usize];
4189 for _ in 0..len {
4190 match rng.random_bool(valid_percent) {
4191 true => {
4192 b.iter_mut().for_each(|x| *x = rng.random());
4193 builder.append_value(&b).unwrap();
4194 }
4195 false => builder.append_null(),
4196 }
4197 }
4198
4199 builder.finish()
4200 }
4201
4202 fn generate_struct(rng: &mut impl RngCore, len: usize, valid_percent: f64) -> StructArray {
4203 let nulls = NullBuffer::from_iter((0..len).map(|_| rng.random_bool(valid_percent)));
4204 let a = generate_primitive_array::<Int32Type>(rng, len, valid_percent);
4205 let b = generate_strings::<i32>(rng, len, valid_percent);
4206 let fields = Fields::from(vec![
4207 Field::new("a", DataType::Int32, true),
4208 Field::new("b", DataType::Utf8, true),
4209 ]);
4210 let values = vec![Arc::new(a) as _, Arc::new(b) as _];
4211 StructArray::new(fields, values, Some(nulls))
4212 }
4213
4214 fn generate_list<R: RngCore, F>(
4215 rng: &mut R,
4216 len: usize,
4217 valid_percent: f64,
4218 values: F,
4219 ) -> ListArray
4220 where
4221 F: FnOnce(&mut R, usize) -> ArrayRef,
4222 {
4223 let offsets = OffsetBuffer::<i32>::from_lengths((0..len).map(|_| rng.random_range(0..10)));
4224 let values_len = offsets.last().unwrap().to_usize().unwrap();
4225 let values = values(rng, values_len);
4226 let nulls = NullBuffer::from_iter((0..len).map(|_| rng.random_bool(valid_percent)));
4227 let field = Arc::new(Field::new_list_field(values.data_type().clone(), true));
4228 ListArray::new(field, offsets, values, Some(nulls))
4229 }
4230
4231 fn generate_list_view<F>(
4232 rng: &mut impl RngCore,
4233 len: usize,
4234 valid_percent: f64,
4235 values: F,
4236 ) -> ListViewArray
4237 where
4238 F: FnOnce(usize) -> ArrayRef,
4239 {
4240 let sizes: Vec<i32> = (0..len).map(|_| rng.random_range(0..10)).collect();
4242 let values_len: usize = sizes.iter().map(|s| *s as usize).sum::<usize>().max(1);
4243 let values = values(values_len);
4244
4245 let offsets: Vec<i32> = sizes
4247 .iter()
4248 .map(|&size| {
4249 if size == 0 {
4250 0
4251 } else {
4252 rng.random_range(0..=(values_len as i32 - size))
4253 }
4254 })
4255 .collect();
4256
4257 let nulls = NullBuffer::from_iter((0..len).map(|_| rng.random_bool(valid_percent)));
4258 let field = Arc::new(Field::new_list_field(values.data_type().clone(), true));
4259 ListViewArray::new(
4260 field,
4261 ScalarBuffer::from(offsets),
4262 ScalarBuffer::from(sizes),
4263 values,
4264 Some(nulls),
4265 )
4266 }
4267
4268 fn generate_nulls(rng: &mut impl RngCore, len: usize) -> Option<NullBuffer> {
4269 Some(NullBuffer::from_iter(
4270 (0..len).map(|_| rng.random_bool(0.8)),
4271 ))
4272 }
4273
4274 fn change_underlying_null_values_for_primitive<T: ArrowPrimitiveType>(
4275 array: &PrimitiveArray<T>,
4276 ) -> PrimitiveArray<T> {
4277 let (dt, values, nulls) = array.clone().into_parts();
4278
4279 let new_values = ScalarBuffer::<T::Native>::from_iter(
4280 values
4281 .iter()
4282 .zip(nulls.as_ref().unwrap().iter())
4283 .map(|(val, is_valid)| {
4284 if is_valid {
4285 *val
4286 } else {
4287 val.add_wrapping(T::Native::usize_as(1))
4288 }
4289 }),
4290 );
4291
4292 PrimitiveArray::new(new_values, nulls).with_data_type(dt)
4293 }
4294
4295 fn change_underline_null_values_for_byte_array<T: ByteArrayType>(
4296 array: &GenericByteArray<T>,
4297 ) -> GenericByteArray<T> {
4298 let (offsets, values, nulls) = array.clone().into_parts();
4299
4300 let new_offsets = OffsetBuffer::<T::Offset>::from_lengths(
4301 offsets
4302 .lengths()
4303 .zip(nulls.as_ref().unwrap().iter())
4304 .map(|(len, is_valid)| if is_valid { len } else { len + 1 }),
4305 );
4306
4307 let mut new_bytes = Vec::<u8>::with_capacity(new_offsets[new_offsets.len() - 1].as_usize());
4308
4309 offsets
4310 .windows(2)
4311 .zip(nulls.as_ref().unwrap().iter())
4312 .for_each(|(start_and_end, is_valid)| {
4313 let start = start_and_end[0].as_usize();
4314 let end = start_and_end[1].as_usize();
4315 new_bytes.extend_from_slice(&values.as_slice()[start..end]);
4316
4317 if !is_valid {
4319 new_bytes.push(b'c');
4320 }
4321 });
4322
4323 GenericByteArray::<T>::new(new_offsets, Buffer::from_vec(new_bytes), nulls)
4324 }
4325
4326 fn change_underline_null_values_for_list_array<O: OffsetSizeTrait>(
4327 array: &GenericListArray<O>,
4328 ) -> GenericListArray<O> {
4329 let (field, offsets, values, nulls) = array.clone().into_parts();
4330
4331 let (new_values, new_offsets) = {
4332 let concat_values = offsets
4333 .windows(2)
4334 .zip(nulls.as_ref().unwrap().iter())
4335 .map(|(start_and_end, is_valid)| {
4336 let start = start_and_end[0].as_usize();
4337 let end = start_and_end[1].as_usize();
4338 if is_valid {
4339 return (start, end - start);
4340 }
4341
4342 if end == values.len() {
4344 (start, (end - start).saturating_sub(1))
4345 } else {
4346 (start, end - start + 1)
4347 }
4348 })
4349 .map(|(start, length)| values.slice(start, length))
4350 .collect::<Vec<_>>();
4351
4352 let new_offsets =
4353 OffsetBuffer::<O>::from_lengths(concat_values.iter().map(|s| s.len()));
4354
4355 let new_values = {
4356 let values = concat_values.iter().map(|a| a.as_ref()).collect::<Vec<_>>();
4357 arrow_select::concat::concat(&values).expect("should be able to concat")
4358 };
4359
4360 (new_values, new_offsets)
4361 };
4362
4363 GenericListArray::<O>::new(field, new_offsets, new_values, nulls)
4364 }
4365
4366 fn change_underline_null_values(array: &ArrayRef) -> ArrayRef {
4367 if array.null_count() == 0 {
4368 return Arc::clone(array);
4369 }
4370
4371 downcast_primitive_array!(
4372 array => {
4373 let output = change_underlying_null_values_for_primitive(array);
4374
4375 Arc::new(output)
4376 }
4377
4378 DataType::Utf8 => {
4379 Arc::new(change_underline_null_values_for_byte_array(array.as_string::<i32>()))
4380 }
4381 DataType::LargeUtf8 => {
4382 Arc::new(change_underline_null_values_for_byte_array(array.as_string::<i64>()))
4383 }
4384 DataType::Binary => {
4385 Arc::new(change_underline_null_values_for_byte_array(array.as_binary::<i32>()))
4386 }
4387 DataType::LargeBinary => {
4388 Arc::new(change_underline_null_values_for_byte_array(array.as_binary::<i64>()))
4389 }
4390 DataType::List(_) => {
4391 Arc::new(change_underline_null_values_for_list_array(array.as_list::<i32>()))
4392 }
4393 DataType::LargeList(_) => {
4394 Arc::new(change_underline_null_values_for_list_array(array.as_list::<i64>()))
4395 }
4396 _ => {
4397 Arc::clone(array)
4398 }
4399 )
4400 }
4401
4402 fn generate_column(rng: &mut (impl RngCore + Clone), len: usize) -> ArrayRef {
4403 match rng.random_range(0..23) {
4404 0 => Arc::new(generate_primitive_array::<Int32Type>(rng, len, 0.8)),
4405 1 => Arc::new(generate_primitive_array::<UInt32Type>(rng, len, 0.8)),
4406 2 => Arc::new(generate_primitive_array::<Int64Type>(rng, len, 0.8)),
4407 3 => Arc::new(generate_primitive_array::<UInt64Type>(rng, len, 0.8)),
4408 4 => Arc::new(generate_primitive_array::<Float32Type>(rng, len, 0.8)),
4409 5 => Arc::new(generate_primitive_array::<Float64Type>(rng, len, 0.8)),
4410 6 => Arc::new(generate_strings::<i32>(rng, len, 0.8)),
4411 7 => {
4412 let dict_values_len = rng.random_range(1..len);
4413 let strings = Arc::new(generate_strings::<i32>(rng, dict_values_len, 1.0));
4415 Arc::new(generate_dictionary::<Int64Type>(rng, strings, len, 0.8))
4416 }
4417 8 => {
4418 let dict_values_len = rng.random_range(1..len);
4419 let values = Arc::new(generate_primitive_array::<Int64Type>(
4421 rng,
4422 dict_values_len,
4423 1.0,
4424 ));
4425 Arc::new(generate_dictionary::<Int64Type>(rng, values, len, 0.8))
4426 }
4427 9 => Arc::new(generate_fixed_size_binary(rng, len, 0.8)),
4428 10 => Arc::new(generate_struct(rng, len, 0.8)),
4429 11 => Arc::new(generate_list(rng, len, 0.8, |rng, values_len| {
4430 Arc::new(generate_primitive_array::<Int64Type>(rng, values_len, 0.8))
4431 })),
4432 12 => Arc::new(generate_list(rng, len, 0.8, |rng, values_len| {
4433 Arc::new(generate_strings::<i32>(rng, values_len, 0.8))
4434 })),
4435 13 => Arc::new(generate_list(rng, len, 0.8, |rng, values_len| {
4436 Arc::new(generate_struct(rng, values_len, 0.8))
4437 })),
4438 14 => Arc::new(generate_string_view(rng, len, 0.8)),
4439 15 => Arc::new(generate_byte_view(rng, len, 0.8)),
4440 16 => Arc::new(generate_fixed_stringview_column(len)),
4441 17 => Arc::new(
4442 generate_list(&mut rng.clone(), len + 1000, 0.8, |rng, values_len| {
4443 Arc::new(generate_primitive_array::<Int64Type>(rng, values_len, 0.8))
4444 })
4445 .slice(500, len),
4446 ),
4447 18 => Arc::new(generate_boolean_array(rng, len, 0.8)),
4448 19 => Arc::new(generate_list_view(
4449 &mut rng.clone(),
4450 len,
4451 0.8,
4452 |values_len| Arc::new(generate_primitive_array::<Int64Type>(rng, values_len, 0.8)),
4453 )),
4454 20 => Arc::new(generate_list_view(
4455 &mut rng.clone(),
4456 len,
4457 0.8,
4458 |values_len| Arc::new(generate_strings::<i32>(rng, values_len, 0.8)),
4459 )),
4460 21 => Arc::new(generate_list_view(
4461 &mut rng.clone(),
4462 len,
4463 0.8,
4464 |values_len| Arc::new(generate_struct(rng, values_len, 0.8)),
4465 )),
4466 22 => Arc::new(
4467 generate_list_view(&mut rng.clone(), len + 1000, 0.8, |values_len| {
4468 Arc::new(generate_primitive_array::<Int64Type>(rng, values_len, 0.8))
4469 })
4470 .slice(500, len),
4471 ),
4472 _ => unreachable!(),
4473 }
4474 }
4475
4476 fn print_row(cols: &[SortColumn], row: usize) -> String {
4477 let t: Vec<_> = cols
4478 .iter()
4479 .map(|x| match x.values.is_valid(row) {
4480 true => {
4481 let opts = FormatOptions::default().with_null("NULL");
4482 let formatter = ArrayFormatter::try_new(x.values.as_ref(), &opts).unwrap();
4483 formatter.value(row).to_string()
4484 }
4485 false => "NULL".to_string(),
4486 })
4487 .collect();
4488 t.join(",")
4489 }
4490
4491 fn print_col_types(cols: &[SortColumn]) -> String {
4492 let t: Vec<_> = cols
4493 .iter()
4494 .map(|x| x.values.data_type().to_string())
4495 .collect();
4496 t.join(",")
4497 }
4498
4499 #[derive(Debug, PartialEq)]
4500 enum Nulls {
4501 AsIs,
4503
4504 Different,
4506
4507 None,
4509 }
4510
4511 #[test]
4512 #[cfg_attr(miri, ignore)]
4513 fn fuzz_test() {
4514 let mut rng = StdRng::seed_from_u64(42);
4515 for _ in 0..100 {
4516 for null_behavior in [Nulls::AsIs, Nulls::Different, Nulls::None] {
4517 let num_columns = rng.random_range(1..5);
4518 let len = rng.random_range(5..100);
4519 let mut arrays: Vec<_> = (0..num_columns)
4520 .map(|_| generate_column(&mut rng, len))
4521 .collect();
4522
4523 match null_behavior {
4524 Nulls::AsIs => {
4525 }
4527 Nulls::Different => {
4528 arrays = arrays
4530 .into_iter()
4531 .map(|a| replace_array_nulls(a, generate_nulls(&mut rng, len)))
4532 .collect()
4533 }
4534 Nulls::None => {
4535 arrays = arrays
4537 .into_iter()
4538 .map(|a| replace_array_nulls(a, None))
4539 .collect()
4540 }
4541 }
4542
4543 let options: Vec<_> = (0..num_columns)
4544 .map(|_| SortOptions {
4545 descending: rng.random_bool(0.5),
4546 nulls_first: rng.random_bool(0.5),
4547 })
4548 .collect();
4549
4550 let sort_columns: Vec<_> = options
4551 .iter()
4552 .zip(&arrays)
4553 .map(|(o, c)| SortColumn {
4554 values: Arc::clone(c),
4555 options: Some(*o),
4556 })
4557 .collect();
4558
4559 let comparator = LexicographicalComparator::try_new(&sort_columns).unwrap();
4560
4561 let columns: Vec<SortField> = options
4562 .into_iter()
4563 .zip(&arrays)
4564 .map(|(o, a)| SortField::new_with_options(a.data_type().clone(), o))
4565 .collect();
4566
4567 let converter = RowConverter::new(columns).unwrap();
4568 let rows = converter.convert_columns(&arrays).unwrap();
4569
4570 if !matches!(null_behavior, Nulls::None) {
4573 assert_same_rows_when_changing_input_underlying_null_values(
4574 &arrays, &converter, &rows,
4575 );
4576 }
4577
4578 for i in 0..len {
4579 for j in 0..len {
4580 let row_i = rows.row(i);
4581 let row_j = rows.row(j);
4582 let row_cmp = row_i.cmp(&row_j);
4583 let lex_cmp = comparator.compare(i, j);
4584 assert_eq!(
4585 row_cmp,
4586 lex_cmp,
4587 "({:?} vs {:?}) vs ({:?} vs {:?}) for types {}",
4588 print_row(&sort_columns, i),
4589 print_row(&sort_columns, j),
4590 row_i,
4591 row_j,
4592 print_col_types(&sort_columns)
4593 );
4594 }
4595 }
4596
4597 {
4599 let mut rows_iter = rows.iter();
4600 let mut rows_lengths_iter = rows.lengths();
4601 for (index, row) in rows_iter.by_ref().enumerate() {
4602 let len = rows_lengths_iter
4603 .next()
4604 .expect("Reached end of length iterator while still have rows");
4605 assert_eq!(
4606 row.data.len(),
4607 len,
4608 "Row length mismatch: {} vs {}",
4609 row.data.len(),
4610 len
4611 );
4612 assert_eq!(
4613 len,
4614 rows.row_len(index),
4615 "Row length mismatch at index {}: {} vs {}",
4616 index,
4617 len,
4618 rows.row_len(index)
4619 );
4620 }
4621
4622 assert_eq!(
4623 rows_lengths_iter.next(),
4624 None,
4625 "Length iterator did not reach end"
4626 );
4627 }
4628
4629 let back = converter.convert_rows(&rows).unwrap();
4632 for (actual, expected) in back.iter().zip(&arrays) {
4633 actual.to_data().validate_full().unwrap();
4634 dictionary_eq(actual, expected)
4635 }
4636
4637 let rows = rows.try_into_binary().expect("reasonable size");
4640 let parser = converter.parser();
4641 let back = converter
4642 .convert_rows(rows.iter().map(|b| parser.parse(b.expect("valid bytes"))))
4643 .unwrap();
4644 for (actual, expected) in back.iter().zip(&arrays) {
4645 actual.to_data().validate_full().unwrap();
4646 dictionary_eq(actual, expected)
4647 }
4648
4649 let rows = converter.from_binary(rows);
4650 let back = converter.convert_rows(&rows).unwrap();
4651 for (actual, expected) in back.iter().zip(&arrays) {
4652 actual.to_data().validate_full().unwrap();
4653 dictionary_eq(actual, expected)
4654 }
4655 }
4656 }
4657 }
4658
4659 fn replace_array_nulls(array: ArrayRef, new_nulls: Option<NullBuffer>) -> ArrayRef {
4660 make_array(
4661 array
4662 .into_data()
4663 .into_builder()
4664 .nulls(new_nulls)
4666 .build()
4667 .unwrap(),
4668 )
4669 }
4670
4671 fn assert_same_rows_when_changing_input_underlying_null_values(
4672 arrays: &[ArrayRef],
4673 converter: &RowConverter,
4674 rows: &Rows,
4675 ) {
4676 let arrays_with_different_data_behind_nulls = arrays
4677 .iter()
4678 .map(|arr| change_underline_null_values(arr))
4679 .collect::<Vec<_>>();
4680
4681 if arrays
4683 .iter()
4684 .zip(arrays_with_different_data_behind_nulls.iter())
4685 .all(|(a, b)| Arc::ptr_eq(a, b))
4686 {
4687 return;
4688 }
4689
4690 let rows_with_different_nulls = converter
4691 .convert_columns(&arrays_with_different_data_behind_nulls)
4692 .unwrap();
4693
4694 assert_eq!(
4695 rows.iter().collect::<Vec<_>>(),
4696 rows_with_different_nulls.iter().collect::<Vec<_>>(),
4697 "Different underlying nulls should not output different rows"
4698 )
4699 }
4700
4701 #[test]
4702 fn test_clear() {
4703 let converter = RowConverter::new(vec![SortField::new(DataType::Int32)]).unwrap();
4704 let mut rows = converter.empty_rows(3, 128);
4705
4706 let first = Int32Array::from(vec![None, Some(2), Some(4)]);
4707 let second = Int32Array::from(vec![Some(2), None, Some(4)]);
4708 let arrays = [Arc::new(first) as ArrayRef, Arc::new(second) as ArrayRef];
4709
4710 for array in arrays.iter() {
4711 rows.clear();
4712 converter
4713 .append(&mut rows, std::slice::from_ref(array))
4714 .unwrap();
4715 let back = converter.convert_rows(&rows).unwrap();
4716 assert_eq!(&back[0], array);
4717 }
4718
4719 let mut rows_expected = converter.empty_rows(3, 128);
4720 converter.append(&mut rows_expected, &arrays[1..]).unwrap();
4721
4722 for (i, (actual, expected)) in rows.iter().zip(rows_expected.iter()).enumerate() {
4723 assert_eq!(
4724 actual, expected,
4725 "For row {i}: expected {expected:?}, actual: {actual:?}",
4726 );
4727 }
4728 }
4729
4730 #[test]
4731 fn test_append_codec_dictionary_binary() {
4732 use DataType::*;
4733 let converter = RowConverter::new(vec![SortField::new(Dictionary(
4735 Box::new(Int32),
4736 Box::new(Binary),
4737 ))])
4738 .unwrap();
4739 let mut rows = converter.empty_rows(4, 128);
4740
4741 let keys = Int32Array::from_iter_values([0, 1, 2, 3]);
4742 let values = BinaryArray::from(vec![
4743 Some("a".as_bytes()),
4744 Some(b"b"),
4745 Some(b"c"),
4746 Some(b"d"),
4747 ]);
4748 let dict_array = DictionaryArray::new(keys, Arc::new(values));
4749
4750 rows.clear();
4751 let array = Arc::new(dict_array) as ArrayRef;
4752 converter
4753 .append(&mut rows, std::slice::from_ref(&array))
4754 .unwrap();
4755 let back = converter.convert_rows(&rows).unwrap();
4756
4757 dictionary_eq(&back[0], &array);
4758 }
4759
4760 #[test]
4761 fn test_list_prefix() {
4762 let mut a = ListBuilder::new(Int8Builder::new());
4763 a.append_value([None]);
4764 a.append_value([None, None]);
4765 let a = a.finish();
4766
4767 let converter = RowConverter::new(vec![SortField::new(a.data_type().clone())]).unwrap();
4768 let rows = converter.convert_columns(&[Arc::new(a) as _]).unwrap();
4769 assert_eq!(rows.row(0).cmp(&rows.row(1)), Ordering::Less);
4770 }
4771
4772 #[test]
4773 fn map_should_be_marked_as_unsupported() {
4774 let map_data_type = Field::new_map(
4775 "map",
4776 "entries",
4777 Field::new("key", DataType::Utf8, false),
4778 Field::new("value", DataType::Utf8, true),
4779 false,
4780 true,
4781 )
4782 .data_type()
4783 .clone();
4784
4785 let is_supported = RowConverter::supports_fields(&[SortField::new(map_data_type)]);
4786
4787 assert!(!is_supported, "Map should not be supported");
4788 }
4789
4790 #[test]
4791 fn should_fail_to_create_row_converter_for_unsupported_map_type() {
4792 let map_data_type = Field::new_map(
4793 "map",
4794 "entries",
4795 Field::new("key", DataType::Utf8, false),
4796 Field::new("value", DataType::Utf8, true),
4797 false,
4798 true,
4799 )
4800 .data_type()
4801 .clone();
4802
4803 let converter = RowConverter::new(vec![SortField::new(map_data_type)]);
4804
4805 match converter {
4806 Err(ArrowError::NotYetImplemented(message)) => {
4807 assert!(
4808 message.contains("Row format support not yet implemented for"),
4809 "Expected NotYetImplemented error for map data type, got: {message}",
4810 );
4811 }
4812 Err(e) => panic!("Expected NotYetImplemented error, got: {e}"),
4813 Ok(_) => panic!("Expected NotYetImplemented error for map data type"),
4814 }
4815 }
4816
4817 #[test]
4818 fn test_values_buffer_smaller_when_utf8_validation_disabled() {
4819 fn get_values_buffer_len(col: ArrayRef) -> (usize, usize) {
4820 let converter = RowConverter::new(vec![SortField::new(DataType::Utf8View)]).unwrap();
4822
4823 let rows = converter.convert_columns(&[col]).unwrap();
4825 let converted = converter.convert_rows(&rows).unwrap();
4826 let unchecked_values_len = converted[0].as_string_view().data_buffers()[0].len();
4827
4828 let rows = rows.try_into_binary().expect("reasonable size");
4830 let parser = converter.parser();
4831 let converted = converter
4832 .convert_rows(rows.iter().map(|b| parser.parse(b.expect("valid bytes"))))
4833 .unwrap();
4834 let checked_values_len = converted[0].as_string_view().data_buffers()[0].len();
4835 (unchecked_values_len, checked_values_len)
4836 }
4837
4838 let col = Arc::new(StringViewArray::from_iter([
4840 Some("hello"), None, Some("short"), Some("tiny"), ])) as ArrayRef;
4845
4846 let (unchecked_values_len, checked_values_len) = get_values_buffer_len(col);
4847 assert_eq!(unchecked_values_len, 0);
4849 assert_eq!(checked_values_len, 14);
4851
4852 let col = Arc::new(StringViewArray::from_iter([
4854 Some("this is a very long string over 12 bytes"),
4855 Some("another long string to test the buffer"),
4856 ])) as ArrayRef;
4857
4858 let (unchecked_values_len, checked_values_len) = get_values_buffer_len(col);
4859 assert!(unchecked_values_len > 0);
4861 assert_eq!(unchecked_values_len, checked_values_len);
4862
4863 let col = Arc::new(StringViewArray::from_iter([
4865 Some("tiny"), Some("thisisexact13"), None,
4868 Some("short"), ])) as ArrayRef;
4870
4871 let (unchecked_values_len, checked_values_len) = get_values_buffer_len(col);
4872 assert_eq!(unchecked_values_len, 13);
4874 assert!(checked_values_len > unchecked_values_len);
4875 }
4876
4877 #[test]
4878 fn test_sparse_union() {
4879 let int_array = Int32Array::from(vec![Some(1), None, Some(3), None, Some(5)]);
4881 let str_array = StringArray::from(vec![None, Some("b"), None, Some("d"), None]);
4882
4883 let type_ids = vec![0, 1, 0, 1, 0].into();
4885
4886 let union_fields = [
4887 (0, Arc::new(Field::new("int", DataType::Int32, false))),
4888 (1, Arc::new(Field::new("str", DataType::Utf8, false))),
4889 ]
4890 .into_iter()
4891 .collect();
4892
4893 let union_array = UnionArray::try_new(
4894 union_fields,
4895 type_ids,
4896 None,
4897 vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)],
4898 )
4899 .unwrap();
4900
4901 let union_type = union_array.data_type().clone();
4902 let converter = RowConverter::new(vec![SortField::new(union_type)]).unwrap();
4903
4904 let rows = converter
4905 .convert_columns(&[Arc::new(union_array.clone())])
4906 .unwrap();
4907
4908 let back = converter.convert_rows(&rows).unwrap();
4910 let back_union = back[0].as_any().downcast_ref::<UnionArray>().unwrap();
4911
4912 assert_eq!(union_array.len(), back_union.len());
4913 for i in 0..union_array.len() {
4914 assert_eq!(union_array.type_id(i), back_union.type_id(i));
4915 }
4916 }
4917
4918 #[test]
4919 fn test_sparse_union_with_nulls() {
4920 let int_array = Int32Array::from(vec![Some(1), None, Some(3), None, Some(5)]);
4922 let str_array = StringArray::from(vec![None::<&str>; 5]);
4923
4924 let type_ids = vec![0, 1, 0, 1, 0].into();
4926
4927 let union_fields = [
4928 (0, Arc::new(Field::new("int", DataType::Int32, true))),
4929 (1, Arc::new(Field::new("str", DataType::Utf8, true))),
4930 ]
4931 .into_iter()
4932 .collect();
4933
4934 let union_array = UnionArray::try_new(
4935 union_fields,
4936 type_ids,
4937 None,
4938 vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)],
4939 )
4940 .unwrap();
4941
4942 let union_type = union_array.data_type().clone();
4943 let converter = RowConverter::new(vec![SortField::new(union_type)]).unwrap();
4944
4945 let rows = converter
4946 .convert_columns(&[Arc::new(union_array.clone())])
4947 .unwrap();
4948
4949 let back = converter.convert_rows(&rows).unwrap();
4951 let back_union = back[0].as_any().downcast_ref::<UnionArray>().unwrap();
4952
4953 assert_eq!(union_array.len(), back_union.len());
4954 for i in 0..union_array.len() {
4955 let expected_null = union_array.is_null(i);
4956 let actual_null = back_union.is_null(i);
4957 assert_eq!(expected_null, actual_null, "Null mismatch at index {i}");
4958 if !expected_null {
4959 assert_eq!(union_array.type_id(i), back_union.type_id(i));
4960 }
4961 }
4962 }
4963
4964 #[test]
4965 fn test_dense_union() {
4966 let int_array = Int32Array::from(vec![1, 3, 5]);
4968 let str_array = StringArray::from(vec!["a", "b"]);
4969
4970 let type_ids = vec![0, 1, 0, 1, 0].into();
4971
4972 let offsets = vec![0, 0, 1, 1, 2].into();
4974
4975 let union_fields = [
4976 (0, Arc::new(Field::new("int", DataType::Int32, false))),
4977 (1, Arc::new(Field::new("str", DataType::Utf8, false))),
4978 ]
4979 .into_iter()
4980 .collect();
4981
4982 let union_array = UnionArray::try_new(
4983 union_fields,
4984 type_ids,
4985 Some(offsets), vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)],
4987 )
4988 .unwrap();
4989
4990 let union_type = union_array.data_type().clone();
4991 let converter = RowConverter::new(vec![SortField::new(union_type)]).unwrap();
4992
4993 let rows = converter
4994 .convert_columns(&[Arc::new(union_array.clone())])
4995 .unwrap();
4996
4997 let back = converter.convert_rows(&rows).unwrap();
4999 let back_union = back[0].as_any().downcast_ref::<UnionArray>().unwrap();
5000
5001 assert_eq!(union_array.len(), back_union.len());
5002 for i in 0..union_array.len() {
5003 assert_eq!(union_array.type_id(i), back_union.type_id(i));
5004 }
5005 }
5006
5007 #[test]
5008 fn test_dense_union_with_nulls() {
5009 let int_array = Int32Array::from(vec![Some(1), None, Some(5)]);
5011 let str_array = StringArray::from(vec![Some("a"), None]);
5012
5013 let type_ids = vec![0, 1, 0, 1, 0].into();
5015 let offsets = vec![0, 0, 1, 1, 2].into();
5016
5017 let union_fields = [
5018 (0, Arc::new(Field::new("int", DataType::Int32, true))),
5019 (1, Arc::new(Field::new("str", DataType::Utf8, true))),
5020 ]
5021 .into_iter()
5022 .collect();
5023
5024 let union_array = UnionArray::try_new(
5025 union_fields,
5026 type_ids,
5027 Some(offsets),
5028 vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)],
5029 )
5030 .unwrap();
5031
5032 let union_type = union_array.data_type().clone();
5033 let converter = RowConverter::new(vec![SortField::new(union_type)]).unwrap();
5034
5035 let rows = converter
5036 .convert_columns(&[Arc::new(union_array.clone())])
5037 .unwrap();
5038
5039 let back = converter.convert_rows(&rows).unwrap();
5041 let back_union = back[0].as_any().downcast_ref::<UnionArray>().unwrap();
5042
5043 assert_eq!(union_array.len(), back_union.len());
5044 for i in 0..union_array.len() {
5045 let expected_null = union_array.is_null(i);
5046 let actual_null = back_union.is_null(i);
5047 assert_eq!(expected_null, actual_null, "Null mismatch at index {i}");
5048 if !expected_null {
5049 assert_eq!(union_array.type_id(i), back_union.type_id(i));
5050 }
5051 }
5052 }
5053
5054 #[test]
5055 fn test_union_ordering() {
5056 let int_array = Int32Array::from(vec![100, 5, 20]);
5057 let str_array = StringArray::from(vec!["z", "a"]);
5058
5059 let type_ids = vec![0, 1, 0, 1, 0].into();
5061 let offsets = vec![0, 0, 1, 1, 2].into();
5062
5063 let union_fields = [
5064 (0, Arc::new(Field::new("int", DataType::Int32, false))),
5065 (1, Arc::new(Field::new("str", DataType::Utf8, false))),
5066 ]
5067 .into_iter()
5068 .collect();
5069
5070 let union_array = UnionArray::try_new(
5071 union_fields,
5072 type_ids,
5073 Some(offsets),
5074 vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)],
5075 )
5076 .unwrap();
5077
5078 let union_type = union_array.data_type().clone();
5079 let converter = RowConverter::new(vec![SortField::new(union_type)]).unwrap();
5080
5081 let rows = converter.convert_columns(&[Arc::new(union_array)]).unwrap();
5082
5083 assert!(rows.row(2) < rows.row(1));
5095
5096 assert!(rows.row(0) < rows.row(3));
5098
5099 assert!(rows.row(2) < rows.row(4));
5102 assert!(rows.row(4) < rows.row(0));
5104
5105 assert!(rows.row(3) < rows.row(1));
5108 }
5109
5110 #[test]
5111 fn test_row_converter_roundtrip_with_many_union_columns() {
5112 let fields1 = UnionFields::try_new(
5114 vec![0, 1],
5115 vec![
5116 Field::new("int", DataType::Int32, true),
5117 Field::new("string", DataType::Utf8, true),
5118 ],
5119 )
5120 .unwrap();
5121
5122 let int_array1 = Int32Array::from(vec![Some(67), None]);
5123 let string_array1 = StringArray::from(vec![None::<&str>, Some("hello")]);
5124 let type_ids1 = vec![0i8, 1].into();
5125
5126 let union_array1 = UnionArray::try_new(
5127 fields1.clone(),
5128 type_ids1,
5129 None,
5130 vec![
5131 Arc::new(int_array1) as ArrayRef,
5132 Arc::new(string_array1) as ArrayRef,
5133 ],
5134 )
5135 .unwrap();
5136
5137 let fields2 = UnionFields::try_new(
5139 vec![0, 1],
5140 vec![
5141 Field::new("int", DataType::Int32, true),
5142 Field::new("string", DataType::Utf8, true),
5143 ],
5144 )
5145 .unwrap();
5146
5147 let int_array2 = Int32Array::from(vec![Some(100), None]);
5148 let string_array2 = StringArray::from(vec![None::<&str>, Some("world")]);
5149 let type_ids2 = vec![0i8, 1].into();
5150
5151 let union_array2 = UnionArray::try_new(
5152 fields2.clone(),
5153 type_ids2,
5154 None,
5155 vec![
5156 Arc::new(int_array2) as ArrayRef,
5157 Arc::new(string_array2) as ArrayRef,
5158 ],
5159 )
5160 .unwrap();
5161
5162 let field1 = Field::new("col1", DataType::Union(fields1, UnionMode::Sparse), true);
5164 let field2 = Field::new("col2", DataType::Union(fields2, UnionMode::Sparse), true);
5165
5166 let sort_field1 = SortField::new(field1.data_type().clone());
5167 let sort_field2 = SortField::new(field2.data_type().clone());
5168
5169 let converter = RowConverter::new(vec![sort_field1, sort_field2]).unwrap();
5170
5171 let rows = converter
5172 .convert_columns(&[
5173 Arc::new(union_array1.clone()) as ArrayRef,
5174 Arc::new(union_array2.clone()) as ArrayRef,
5175 ])
5176 .unwrap();
5177
5178 let out = converter.convert_rows(&rows).unwrap();
5180
5181 let [col1, col2] = out.as_slice() else {
5182 panic!("expected 2 columns")
5183 };
5184
5185 let col1 = col1.as_any().downcast_ref::<UnionArray>().unwrap();
5186 let col2 = col2.as_any().downcast_ref::<UnionArray>().unwrap();
5187
5188 for (expected, got) in [union_array1, union_array2].iter().zip([col1, col2]) {
5189 assert_eq!(expected.len(), got.len());
5190 assert_eq!(expected.type_ids(), got.type_ids());
5191
5192 for i in 0..expected.len() {
5193 assert_eq!(expected.value(i).as_ref(), got.value(i).as_ref());
5194 }
5195 }
5196 }
5197
5198 #[test]
5199 fn test_row_converter_roundtrip_with_one_union_column() {
5200 let fields = UnionFields::try_new(
5201 vec![0, 1],
5202 vec![
5203 Field::new("int", DataType::Int32, true),
5204 Field::new("string", DataType::Utf8, true),
5205 ],
5206 )
5207 .unwrap();
5208
5209 let int_array = Int32Array::from(vec![Some(67), None]);
5210 let string_array = StringArray::from(vec![None::<&str>, Some("hello")]);
5211 let type_ids = vec![0i8, 1].into();
5212
5213 let union_array = UnionArray::try_new(
5214 fields.clone(),
5215 type_ids,
5216 None,
5217 vec![
5218 Arc::new(int_array) as ArrayRef,
5219 Arc::new(string_array) as ArrayRef,
5220 ],
5221 )
5222 .unwrap();
5223
5224 let field = Field::new("col", DataType::Union(fields, UnionMode::Sparse), true);
5225 let sort_field = SortField::new(field.data_type().clone());
5226 let converter = RowConverter::new(vec![sort_field]).unwrap();
5227
5228 let rows = converter
5229 .convert_columns(&[Arc::new(union_array.clone()) as ArrayRef])
5230 .unwrap();
5231
5232 let out = converter.convert_rows(&rows).unwrap();
5234
5235 let [col1] = out.as_slice() else {
5236 panic!("expected 1 column")
5237 };
5238
5239 let col = col1.as_any().downcast_ref::<UnionArray>().unwrap();
5240 assert_eq!(col.len(), union_array.len());
5241 assert_eq!(col.type_ids(), union_array.type_ids());
5242
5243 for i in 0..col.len() {
5244 assert_eq!(col.value(i).as_ref(), union_array.value(i).as_ref());
5245 }
5246 }
5247
5248 #[test]
5249 fn test_row_converter_roundtrip_with_non_default_union_type_ids() {
5250 let fields = UnionFields::try_new(
5252 vec![70, 85],
5253 vec![
5254 Field::new("int", DataType::Int32, true),
5255 Field::new("string", DataType::Utf8, true),
5256 ],
5257 )
5258 .unwrap();
5259
5260 let int_array = Int32Array::from(vec![Some(67), None]);
5261 let string_array = StringArray::from(vec![None::<&str>, Some("hello")]);
5262 let type_ids = vec![70i8, 85].into();
5263
5264 let union_array = UnionArray::try_new(
5265 fields.clone(),
5266 type_ids,
5267 None,
5268 vec![
5269 Arc::new(int_array) as ArrayRef,
5270 Arc::new(string_array) as ArrayRef,
5271 ],
5272 )
5273 .unwrap();
5274
5275 let field = Field::new("col", DataType::Union(fields, UnionMode::Sparse), true);
5276 let sort_field = SortField::new(field.data_type().clone());
5277 let converter = RowConverter::new(vec![sort_field]).unwrap();
5278
5279 let rows = converter
5280 .convert_columns(&[Arc::new(union_array.clone()) as ArrayRef])
5281 .unwrap();
5282
5283 let out = converter.convert_rows(&rows).unwrap();
5285
5286 let [col1] = out.as_slice() else {
5287 panic!("expected 1 column")
5288 };
5289
5290 let col = col1.as_any().downcast_ref::<UnionArray>().unwrap();
5291 assert_eq!(col.len(), union_array.len());
5292 assert_eq!(col.type_ids(), union_array.type_ids());
5293
5294 for i in 0..col.len() {
5295 assert_eq!(col.value(i).as_ref(), union_array.value(i).as_ref());
5296 }
5297 }
5298
5299 #[test]
5300 fn rows_size_should_count_for_capacity() {
5301 let row_converter = RowConverter::new(vec![SortField::new(DataType::UInt8)]).unwrap();
5302
5303 let empty_rows_size_with_preallocate_rows_and_data = {
5304 let rows = row_converter.empty_rows(1000, 1000);
5305
5306 rows.size()
5307 };
5308 let empty_rows_size_with_preallocate_rows = {
5309 let rows = row_converter.empty_rows(1000, 0);
5310
5311 rows.size()
5312 };
5313 let empty_rows_size_with_preallocate_data = {
5314 let rows = row_converter.empty_rows(0, 1000);
5315
5316 rows.size()
5317 };
5318 let empty_rows_size_without_preallocate = {
5319 let rows = row_converter.empty_rows(0, 0);
5320
5321 rows.size()
5322 };
5323
5324 assert!(
5325 empty_rows_size_with_preallocate_rows_and_data > empty_rows_size_with_preallocate_rows,
5326 "{empty_rows_size_with_preallocate_rows_and_data} should be larger than {empty_rows_size_with_preallocate_rows}"
5327 );
5328 assert!(
5329 empty_rows_size_with_preallocate_rows_and_data > empty_rows_size_with_preallocate_data,
5330 "{empty_rows_size_with_preallocate_rows_and_data} should be larger than {empty_rows_size_with_preallocate_data}"
5331 );
5332 assert!(
5333 empty_rows_size_with_preallocate_rows > empty_rows_size_without_preallocate,
5334 "{empty_rows_size_with_preallocate_rows} should be larger than {empty_rows_size_without_preallocate}"
5335 );
5336 assert!(
5337 empty_rows_size_with_preallocate_data > empty_rows_size_without_preallocate,
5338 "{empty_rows_size_with_preallocate_data} should be larger than {empty_rows_size_without_preallocate}"
5339 );
5340 }
5341
5342 #[test]
5343 fn test_struct_no_child_fields() {
5344 fn run_test(array: ArrayRef) {
5345 let sort_fields = vec![SortField::new(array.data_type().clone())];
5346 let converter = RowConverter::new(sort_fields).unwrap();
5347 let r = converter.convert_columns(&[Arc::clone(&array)]).unwrap();
5348
5349 let back = converter.convert_rows(&r).unwrap();
5350 assert_eq!(back.len(), 1);
5351 assert_eq!(&back[0], &array);
5352 }
5353
5354 let s = Arc::new(StructArray::new_empty_fields(5, None)) as ArrayRef;
5355 run_test(s);
5356
5357 let s = Arc::new(StructArray::new_empty_fields(
5358 5,
5359 Some(vec![true, false, true, false, false].into()),
5360 )) as ArrayRef;
5361 run_test(s);
5362 }
5363
5364 #[test]
5365 fn reserve_should_increase_capacity_to_the_requested_size() {
5366 let row_converter = RowConverter::new(vec![SortField::new(DataType::UInt8)]).unwrap();
5367 let mut empty_rows = row_converter.empty_rows(0, 0);
5368 empty_rows.reserve(50, 50);
5369 let before_size = empty_rows.size();
5370 empty_rows.reserve(50, 50);
5371 assert_eq!(
5372 empty_rows.size(),
5373 before_size,
5374 "Size should not change when reserving already reserved space"
5375 );
5376 empty_rows.reserve(10, 20);
5377 assert_eq!(
5378 empty_rows.size(),
5379 before_size,
5380 "Size should not change when already have space for the expected reserved data"
5381 );
5382
5383 empty_rows.reserve(100, 20);
5384 assert!(
5385 empty_rows.size() > before_size,
5386 "Size should increase when reserving more space than previously reserved"
5387 );
5388
5389 let before_size = empty_rows.size();
5390
5391 empty_rows.reserve(20, 100);
5392 assert!(
5393 empty_rows.size() > before_size,
5394 "Size should increase when reserving more space than previously reserved"
5395 );
5396 }
5397
5398 #[test]
5399 fn empty_rows_should_return_empty_lengths_iterator() {
5400 let rows = RowConverter::new(vec![SortField::new(DataType::UInt8)])
5401 .unwrap()
5402 .empty_rows(0, 0);
5403 let mut lengths_iter = rows.lengths();
5404 assert_eq!(lengths_iter.next(), None);
5405 }
5406
5407 #[test]
5408 fn test_nested_null_list() {
5409 let null_array = Arc::new(NullArray::new(3));
5410 let list: ArrayRef = Arc::new(ListArray::new(
5412 Field::new_list_field(DataType::Null, true).into(),
5413 OffsetBuffer::from_lengths(vec![1, 0, 2]),
5414 null_array,
5415 None,
5416 ));
5417
5418 let converter = RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
5419 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
5420 let back = converter.convert_rows(&rows).unwrap();
5421
5422 assert_eq!(&list, &back[0]);
5423 }
5424
5425 #[test]
5427 fn test_double_nested_null_list() {
5428 let null_array = Arc::new(NullArray::new(1));
5429 let nested_field = Arc::new(Field::new_list_field(DataType::Null, true));
5431 let nested_list = Arc::new(ListArray::new(
5432 nested_field.clone(),
5433 OffsetBuffer::from_lengths(vec![1]),
5434 null_array,
5435 None,
5436 ));
5437 let list = Arc::new(ListArray::new(
5439 Field::new_list_field(DataType::List(nested_field), true).into(),
5440 OffsetBuffer::from_lengths(vec![1]),
5441 nested_list,
5442 None,
5443 )) as ArrayRef;
5444
5445 let converter = RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
5446 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
5447 let back = converter.convert_rows(&rows).unwrap();
5448
5449 assert_eq!(&list, &back[0]);
5450 }
5451
5452 #[test]
5454 fn test_large_list_null() {
5455 let null_array = Arc::new(NullArray::new(3));
5456 let list: ArrayRef = Arc::new(LargeListArray::new(
5458 Field::new_list_field(DataType::Null, true).into(),
5459 OffsetBuffer::from_lengths(vec![1, 0, 2]),
5460 null_array,
5461 None,
5462 ));
5463
5464 let converter = RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
5465 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
5466 let back = converter.convert_rows(&rows).unwrap();
5467
5468 assert_eq!(&list, &back[0]);
5469 }
5470
5471 #[test]
5473 fn test_fixed_size_list_null() {
5474 let null_array = Arc::new(NullArray::new(6));
5475 let list: ArrayRef = Arc::new(FixedSizeListArray::new(
5477 Arc::new(Field::new_list_field(DataType::Null, true)),
5478 2,
5479 null_array,
5480 None,
5481 ));
5482
5483 let converter = RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
5484 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
5485 let back = converter.convert_rows(&rows).unwrap();
5486
5487 assert_eq!(&list, &back[0]);
5488 }
5489
5490 #[test]
5492 fn test_list_null_variations() {
5493 let null_array = Arc::new(NullArray::new(3));
5495 let list: ArrayRef = Arc::new(ListArray::new(
5496 Field::new_list_field(DataType::Null, true).into(),
5497 OffsetBuffer::from_lengths(vec![1, 0, 2]),
5498 null_array,
5499 None,
5500 ));
5501
5502 let converter = RowConverter::new(vec![SortField::new(list.data_type().clone())]).unwrap();
5503 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
5504 let back = converter.convert_rows(&rows).unwrap();
5505 assert_eq!(&list, &back[0]);
5506
5507 let null_array = Arc::new(NullArray::new(3));
5509 let list: ArrayRef = Arc::new(ListArray::new(
5510 Field::new_list_field(DataType::Null, true).into(),
5511 OffsetBuffer::from_lengths(vec![1, 0, 2]),
5512 null_array,
5513 Some(vec![true, false, true].into()),
5514 ));
5515
5516 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
5517 let back = converter.convert_rows(&rows).unwrap();
5518 assert_eq!(&list, &back[0]);
5519
5520 let null_array = Arc::new(NullArray::new(0));
5522 let list: ArrayRef = Arc::new(ListArray::new(
5523 Field::new_list_field(DataType::Null, true).into(),
5524 OffsetBuffer::from_lengths(vec![]),
5525 null_array,
5526 None,
5527 ));
5528
5529 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
5530 let back = converter.convert_rows(&rows).unwrap();
5531 assert_eq!(&list, &back[0]);
5532
5533 let null_array = Arc::new(NullArray::new(0));
5535 let list: ArrayRef = Arc::new(ListArray::new(
5536 Field::new_list_field(DataType::Null, true).into(),
5537 OffsetBuffer::from_lengths(vec![0, 0, 0]),
5538 null_array,
5539 None,
5540 ));
5541
5542 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
5543 let back = converter.convert_rows(&rows).unwrap();
5544 assert_eq!(&list, &back[0]);
5545 }
5546
5547 #[test]
5549 fn test_list_null_descending() {
5550 let null_array = Arc::new(NullArray::new(3));
5551 let list: ArrayRef = Arc::new(ListArray::new(
5553 Field::new_list_field(DataType::Null, true).into(),
5554 OffsetBuffer::from_lengths(vec![1, 0, 2]),
5555 null_array,
5556 None,
5557 ));
5558
5559 let options = SortOptions::default().with_descending(true);
5560 let field = SortField::new_with_options(list.data_type().clone(), options);
5561 let converter = RowConverter::new(vec![field]).unwrap();
5562 let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
5563 let back = converter.convert_rows(&rows).unwrap();
5564
5565 assert_eq!(&list, &back[0]);
5566 }
5567
5568 #[test]
5570 fn test_struct_with_null_field() {
5571 let null_array = Arc::new(NullArray::new(3));
5573 let int_array = Arc::new(Int32Array::from(vec![1, 2, 3]));
5574
5575 let struct_array: ArrayRef = Arc::new(StructArray::new(
5576 vec![
5577 Arc::new(Field::new("a", DataType::Null, true)),
5578 Arc::new(Field::new("b", DataType::Int32, true)),
5579 ]
5580 .into(),
5581 vec![null_array, int_array],
5582 Some(vec![true, true, false].into()), ));
5584
5585 let converter =
5586 RowConverter::new(vec![SortField::new(struct_array.data_type().clone())]).unwrap();
5587 let rows = converter
5588 .convert_columns(&[Arc::clone(&struct_array)])
5589 .unwrap();
5590 let back = converter.convert_rows(&rows).unwrap();
5591
5592 assert_eq!(&struct_array, &back[0]);
5593 }
5594
5595 #[test]
5597 fn test_nested_struct_with_null() {
5598 let inner_null = Arc::new(NullArray::new(2));
5600 let inner_struct = Arc::new(StructArray::new(
5601 vec![Arc::new(Field::new("x", DataType::Null, true))].into(),
5602 vec![inner_null],
5603 None,
5604 ));
5605
5606 let y_array = Arc::new(Int32Array::from(vec![10, 20]));
5608 let outer_struct: ArrayRef = Arc::new(StructArray::new(
5609 vec![
5610 Arc::new(Field::new("inner", inner_struct.data_type().clone(), true)),
5611 Arc::new(Field::new("y", DataType::Int32, true)),
5612 ]
5613 .into(),
5614 vec![inner_struct, y_array],
5615 None,
5616 ));
5617
5618 let converter =
5619 RowConverter::new(vec![SortField::new(outer_struct.data_type().clone())]).unwrap();
5620 let rows = converter
5621 .convert_columns(&[Arc::clone(&outer_struct)])
5622 .unwrap();
5623 let back = converter.convert_rows(&rows).unwrap();
5624
5625 assert_eq!(&outer_struct, &back[0]);
5626 }
5627
5628 #[test]
5631 fn test_map_null_not_supported() {
5632 let map_data_type = Field::new_map(
5634 "map",
5635 "entries",
5636 Field::new("key", DataType::Utf8, false),
5637 Field::new("value", DataType::Null, true),
5638 false,
5639 true,
5640 )
5641 .data_type()
5642 .clone();
5643
5644 let result = RowConverter::new(vec![SortField::new(map_data_type)]);
5646 assert!(
5647 result.is_err(),
5648 "Map should not be supported by RowConverter"
5649 );
5650 assert!(
5651 result
5652 .unwrap_err()
5653 .to_string()
5654 .contains("not yet implemented")
5655 );
5656 }
5657
5658 #[test]
5659 fn empty_row_iter_next_back() {
5660 let rows = RowConverter::new(vec![SortField::new(DataType::UInt8)])
5661 .unwrap()
5662 .empty_rows(0, 0);
5663 let mut rows_iter = rows.iter();
5664 assert_eq!(rows_iter.next_back(), None);
5665 assert_eq!(rows_iter.next_back(), None);
5666 assert_eq!(rows_iter.next_back(), None);
5667 }
5668
5669 #[test]
5670 fn row_iter_next_back() {
5671 let row_converter = RowConverter::new(vec![SortField::new(DataType::UInt8)]).unwrap();
5672 let mut rng = StdRng::seed_from_u64(42);
5673 let array = generate_primitive_array::<UInt8Type>(&mut rng, 100, 0.8);
5674 let rows = row_converter.convert_columns(&[Arc::new(array)]).unwrap();
5675
5676 let mut rows_iter = rows.iter();
5677 let mut bytes: Vec<u8> = vec![];
5678
5679 while let Some(row) = rows_iter.next_back() {
5680 bytes.extend(row.data.iter().rev());
5681 }
5682
5683 bytes.reverse();
5684
5685 assert_eq!(
5686 bytes,
5687 &rows.buffer.as_slice()[..*rows.offsets.last().unwrap()]
5688 );
5689
5690 assert_eq!(rows_iter.next_back(), None);
5691 assert_eq!(rows_iter.next(), None);
5692 }
5693}