1use crate::types::GenericStringType;
19use crate::{GenericBinaryArray, GenericByteArray, GenericListArray, OffsetSizeTrait};
20use arrow_schema::ArrowError;
21
22pub type GenericStringArray<OffsetSize> = GenericByteArray<GenericStringType<OffsetSize>>;
24
25impl<OffsetSize: OffsetSizeTrait> GenericStringArray<OffsetSize> {
26 pub fn num_chars(&self, i: usize) -> usize {
32 self.value(i).chars().count()
33 }
34
35 pub fn take_iter<'a>(
37 &'a self,
38 indexes: impl Iterator<Item = Option<usize>> + 'a,
39 ) -> impl Iterator<Item = Option<&'a str>> {
40 indexes.map(|opt_index| opt_index.map(|index| self.value(index)))
41 }
42
43 pub unsafe fn take_iter_unchecked<'a>(
48 &'a self,
49 indexes: impl Iterator<Item = Option<usize>> + 'a,
50 ) -> impl Iterator<Item = Option<&'a str>> {
51 indexes.map(|opt_index| opt_index.map(|index| unsafe { self.value_unchecked(index) }))
52 }
53
54 pub fn try_from_binary(v: GenericBinaryArray<OffsetSize>) -> Result<Self, ArrowError> {
57 let (offsets, values, nulls) = v.into_parts();
58 Self::try_new(offsets, values, nulls)
59 }
60}
61
62impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>>
63 for GenericStringArray<OffsetSize>
64{
65 fn from(v: GenericListArray<OffsetSize>) -> Self {
66 GenericBinaryArray::<OffsetSize>::from(v).into()
67 }
68}
69
70impl<OffsetSize: OffsetSizeTrait> From<GenericBinaryArray<OffsetSize>>
71 for GenericStringArray<OffsetSize>
72{
73 fn from(v: GenericBinaryArray<OffsetSize>) -> Self {
74 Self::try_from_binary(v).unwrap()
75 }
76}
77
78impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<&str>>> for GenericStringArray<OffsetSize> {
79 fn from(v: Vec<Option<&str>>) -> Self {
80 v.into_iter().collect()
81 }
82}
83
84impl<OffsetSize: OffsetSizeTrait> From<Vec<&str>> for GenericStringArray<OffsetSize> {
85 fn from(v: Vec<&str>) -> Self {
86 Self::from_iter_values(v)
87 }
88}
89
90impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<String>>> for GenericStringArray<OffsetSize> {
91 fn from(v: Vec<Option<String>>) -> Self {
92 v.into_iter().collect()
93 }
94}
95
96impl<OffsetSize: OffsetSizeTrait> From<Vec<String>> for GenericStringArray<OffsetSize> {
97 fn from(v: Vec<String>) -> Self {
98 Self::from_iter_values(v)
99 }
100}
101
102pub type StringArray = GenericStringArray<i32>;
128
129pub type LargeStringArray = GenericStringArray<i64>;
155
156#[cfg(test)]
157mod tests {
158 use super::*;
159 use crate::Array;
160 use crate::builder::{ListBuilder, PrimitiveBuilder, StringBuilder};
161 use crate::types::UInt8Type;
162 use arrow_buffer::Buffer;
163 use arrow_data::ArrayData;
164 use arrow_schema::{DataType, Field};
165 use std::sync::Arc;
166
167 #[test]
168 fn test_string_array_from_u8_slice() {
169 let values: Vec<&str> = vec!["hello", "", "A£ऀ𖼚𝌆৩ƐZ"];
170
171 let string_array = StringArray::from(values);
173
174 assert_eq!(3, string_array.len());
175 assert_eq!(0, string_array.null_count());
176 assert_eq!("hello", string_array.value(0));
177 assert_eq!("hello", unsafe { string_array.value_unchecked(0) });
178 assert_eq!("", string_array.value(1));
179 assert_eq!("", unsafe { string_array.value_unchecked(1) });
180 assert_eq!("A£ऀ𖼚𝌆৩ƐZ", string_array.value(2));
181 assert_eq!("A£ऀ𖼚𝌆৩ƐZ", unsafe {
182 string_array.value_unchecked(2)
183 });
184 assert_eq!(20, string_array.value_length(2)); assert_eq!(8, string_array.num_chars(2));
186 for i in 0..3 {
187 assert!(string_array.is_valid(i));
188 assert!(!string_array.is_null(i));
189 }
190 }
191
192 #[test]
193 #[should_panic(expected = "StringArray expects DataType::Utf8")]
194 fn test_string_array_from_int() {
195 let array = LargeStringArray::from(vec!["a", "b"]);
196 drop(StringArray::from(array.into_data()));
197 }
198
199 #[test]
200 fn test_large_string_array_from_u8_slice() {
201 let values: Vec<&str> = vec!["hello", "", "A£ऀ𖼚𝌆৩ƐZ"];
202
203 let string_array = LargeStringArray::from(values);
205
206 assert_eq!(3, string_array.len());
207 assert_eq!(0, string_array.null_count());
208 assert_eq!("hello", string_array.value(0));
209 assert_eq!("hello", unsafe { string_array.value_unchecked(0) });
210 assert_eq!("", string_array.value(1));
211 assert_eq!("", unsafe { string_array.value_unchecked(1) });
212 assert_eq!("A£ऀ𖼚𝌆৩ƐZ", string_array.value(2));
213 assert_eq!("A£ऀ𖼚𝌆৩ƐZ", unsafe {
214 string_array.value_unchecked(2)
215 });
216 assert_eq!(5, string_array.value_offsets()[2]);
217 assert_eq!(20, string_array.value_length(2)); assert_eq!(8, string_array.num_chars(2));
219 for i in 0..3 {
220 assert!(string_array.is_valid(i));
221 assert!(!string_array.is_null(i));
222 }
223 }
224
225 #[test]
226 fn test_nested_string_array() {
227 let string_builder = StringBuilder::with_capacity(3, 10);
228 let mut list_of_string_builder = ListBuilder::new(string_builder);
229
230 list_of_string_builder.values().append_value("foo");
231 list_of_string_builder.values().append_value("bar");
232 list_of_string_builder.append(true);
233
234 list_of_string_builder.values().append_value("foobar");
235 list_of_string_builder.append(true);
236 let list_of_strings = list_of_string_builder.finish();
237
238 assert_eq!(list_of_strings.len(), 2);
239
240 let first_slot = list_of_strings.value(0);
241 let first_list = first_slot.as_any().downcast_ref::<StringArray>().unwrap();
242 assert_eq!(first_list.len(), 2);
243 assert_eq!(first_list.value(0), "foo");
244 assert_eq!(unsafe { first_list.value_unchecked(0) }, "foo");
245 assert_eq!(first_list.value(1), "bar");
246 assert_eq!(unsafe { first_list.value_unchecked(1) }, "bar");
247
248 let second_slot = list_of_strings.value(1);
249 let second_list = second_slot.as_any().downcast_ref::<StringArray>().unwrap();
250 assert_eq!(second_list.len(), 1);
251 assert_eq!(second_list.value(0), "foobar");
252 assert_eq!(unsafe { second_list.value_unchecked(0) }, "foobar");
253 }
254
255 #[test]
256 #[should_panic(
257 expected = "Trying to access an element at index 4 from a StringArray of length 3"
258 )]
259 fn test_string_array_get_value_index_out_of_bound() {
260 let values = b"helloparquet";
261 let offsets: [i32; 4] = [0, 5, 5, 12];
262 let array_data = ArrayData::builder(DataType::Utf8)
263 .len(3)
264 .add_buffer(Buffer::from_slice_ref(offsets))
265 .add_buffer(Buffer::from_slice_ref(values))
266 .build()
267 .unwrap();
268 let string_array = StringArray::from(array_data);
269 string_array.value(4);
270 }
271
272 #[test]
273 fn test_string_array_fmt_debug() {
274 let arr: StringArray = vec!["hello", "arrow"].into();
275 assert_eq!(
276 "StringArray\n[\n \"hello\",\n \"arrow\",\n]",
277 format!("{arr:?}")
278 );
279 }
280
281 #[test]
282 fn test_large_string_array_fmt_debug() {
283 let arr: LargeStringArray = vec!["hello", "arrow"].into();
284 assert_eq!(
285 "LargeStringArray\n[\n \"hello\",\n \"arrow\",\n]",
286 format!("{arr:?}")
287 );
288 }
289
290 #[test]
291 fn test_string_array_from_iter() {
292 let data = [Some("hello"), None, Some("arrow")];
293 let data_vec = data.to_vec();
294 let array1 = StringArray::from(data_vec.clone());
296 let array2: StringArray = data_vec.clone().into_iter().collect();
298 let array3: StringArray = data_vec
300 .into_iter()
301 .map(|x| x.map(|s| s.to_string()))
302 .collect();
303 let array4: StringArray = data.iter().collect::<StringArray>();
305
306 assert_eq!(array1, array2);
307 assert_eq!(array2, array3);
308 assert_eq!(array3, array4);
309 }
310
311 #[test]
312 fn test_string_array_from_iter_values() {
313 let data = ["hello", "hello2"];
314 let array1 = StringArray::from_iter_values(data.iter());
315
316 assert_eq!(array1.value(0), "hello");
317 assert_eq!(array1.value(1), "hello2");
318
319 let data2 = ["goodbye".to_string(), "goodbye2".to_string()];
321 let array2 = StringArray::from_iter_values(data2.iter());
322
323 assert_eq!(array2.value(0), "goodbye");
324 assert_eq!(array2.value(1), "goodbye2");
325 }
326
327 #[test]
328 fn test_string_array_from_unbound_iter() {
329 let string_iter = (0..)
331 .scan(0usize, |pos, i| {
332 if *pos < 10 {
333 *pos += 1;
334 Some(Some(format!("value {i}")))
335 } else {
336 None
338 }
339 })
340 .take(100);
342
343 let (_, upper_size_bound) = string_iter.size_hint();
344 assert_eq!(upper_size_bound, Some(100));
346 let string_array: StringArray = string_iter.collect();
347 assert_eq!(string_array.len(), 10);
349 }
350
351 #[test]
352 fn test_string_array_all_null() {
353 let data: Vec<Option<&str>> = vec![None];
354 let array = StringArray::from(data);
355 array
356 .into_data()
357 .validate_full()
358 .expect("All null array has valid array data");
359 }
360
361 #[test]
362 fn test_large_string_array_all_null() {
363 let data: Vec<Option<&str>> = vec![None];
364 let array = LargeStringArray::from(data);
365 array
366 .into_data()
367 .validate_full()
368 .expect("All null array has valid array data");
369 }
370
371 fn _test_generic_string_array_from_list_array<O: OffsetSizeTrait>() {
372 let values = b"HelloArrowAndParquet";
373 let child_data = ArrayData::builder(DataType::UInt8)
375 .len(15)
376 .offset(5)
377 .add_buffer(Buffer::from(values))
378 .build()
379 .unwrap();
380
381 let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap());
382 let null_buffer = Buffer::from_slice_ref([0b101]);
383 let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
384 Field::new_list_field(DataType::UInt8, false),
385 ));
386
387 let array_data = ArrayData::builder(data_type)
389 .len(2)
390 .offset(1)
391 .add_buffer(Buffer::from_slice_ref(offsets))
392 .null_bit_buffer(Some(null_buffer))
393 .add_child_data(child_data)
394 .build()
395 .unwrap();
396 let list_array = GenericListArray::<O>::from(array_data);
397 let string_array = GenericStringArray::<O>::from(list_array);
398
399 assert_eq!(2, string_array.len());
400 assert_eq!(1, string_array.null_count());
401 assert!(string_array.is_null(0));
402 assert!(string_array.is_valid(1));
403 assert_eq!("Parquet", string_array.value(1));
404 }
405
406 #[test]
407 fn test_string_array_from_list_array() {
408 _test_generic_string_array_from_list_array::<i32>();
409 }
410
411 #[test]
412 fn test_large_string_array_from_list_array() {
413 _test_generic_string_array_from_list_array::<i64>();
414 }
415
416 fn _test_generic_string_array_from_list_array_with_child_nulls_failed<O: OffsetSizeTrait>() {
417 let values = b"HelloArrow";
418 let child_data = ArrayData::builder(DataType::UInt8)
419 .len(10)
420 .add_buffer(Buffer::from(values))
421 .null_bit_buffer(Some(Buffer::from_slice_ref([0b1010101010])))
422 .build()
423 .unwrap();
424
425 let offsets = [0, 5, 10].map(|n| O::from_usize(n).unwrap());
426
427 let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
430 Field::new_list_field(DataType::UInt8, true),
431 ));
432
433 let array_data = ArrayData::builder(data_type)
435 .len(2)
436 .add_buffer(Buffer::from_slice_ref(offsets))
437 .add_child_data(child_data)
438 .build()
439 .unwrap();
440 let list_array = GenericListArray::<O>::from(array_data);
441 drop(GenericStringArray::<O>::from(list_array));
442 }
443
444 #[test]
445 #[should_panic(expected = "The child array cannot contain null values.")]
446 fn test_string_array_from_list_array_with_child_nulls_failed() {
447 _test_generic_string_array_from_list_array_with_child_nulls_failed::<i32>();
448 }
449
450 #[test]
451 #[should_panic(expected = "The child array cannot contain null values.")]
452 fn test_large_string_array_from_list_array_with_child_nulls_failed() {
453 _test_generic_string_array_from_list_array_with_child_nulls_failed::<i64>();
454 }
455
456 fn _test_generic_string_array_from_list_array_wrong_type<O: OffsetSizeTrait>() {
457 let values = b"HelloArrow";
458 let child_data = ArrayData::builder(DataType::UInt16)
459 .len(5)
460 .add_buffer(Buffer::from(values))
461 .build()
462 .unwrap();
463
464 let offsets = [0, 2, 3].map(|n| O::from_usize(n).unwrap());
465 let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
466 Field::new_list_field(DataType::UInt16, false),
467 ));
468
469 let array_data = ArrayData::builder(data_type)
470 .len(2)
471 .add_buffer(Buffer::from_slice_ref(offsets))
472 .add_child_data(child_data)
473 .build()
474 .unwrap();
475 let list_array = GenericListArray::<O>::from(array_data);
476 drop(GenericStringArray::<O>::from(list_array));
477 }
478
479 #[test]
480 #[should_panic(
481 expected = "BinaryArray can only be created from List<u8> arrays, mismatched data types."
482 )]
483 fn test_string_array_from_list_array_wrong_type() {
484 _test_generic_string_array_from_list_array_wrong_type::<i32>();
485 }
486
487 #[test]
488 #[should_panic(
489 expected = "BinaryArray can only be created from List<u8> arrays, mismatched data types."
490 )]
491 fn test_large_string_array_from_list_array_wrong_type() {
492 _test_generic_string_array_from_list_array_wrong_type::<i64>();
493 }
494
495 #[test]
496 #[should_panic(
497 expected = "Encountered non UTF-8 data: invalid utf-8 sequence of 1 bytes from index 0"
498 )]
499 fn test_list_array_utf8_validation() {
500 let mut builder = ListBuilder::new(PrimitiveBuilder::<UInt8Type>::new());
501 builder.values().append_value(0xFF);
502 builder.append(true);
503 let list = builder.finish();
504 let _ = StringArray::from(list);
505 }
506
507 #[test]
508 fn test_empty_offsets() {
509 let string = StringArray::from(
510 ArrayData::builder(DataType::Utf8)
511 .buffers(vec![Buffer::from(&[]), Buffer::from(&[])])
512 .build()
513 .unwrap(),
514 );
515 assert_eq!(string.len(), 0);
516 assert_eq!(string.value_offsets(), &[0]);
517
518 let string = LargeStringArray::from(
519 ArrayData::builder(DataType::LargeUtf8)
520 .buffers(vec![Buffer::from(&[]), Buffer::from(&[])])
521 .build()
522 .unwrap(),
523 );
524 assert_eq!(string.len(), 0);
525 assert_eq!(string.value_offsets(), &[0]);
526 }
527
528 #[test]
529 fn test_into_builder() {
530 let array: StringArray = vec!["hello", "arrow"].into();
531
532 let mut builder = array.into_builder().unwrap();
534
535 builder.append_value("rust");
536
537 let expected: StringArray = vec!["hello", "arrow", "rust"].into();
538 let array = builder.finish();
539 assert_eq!(expected, array);
540 }
541
542 #[test]
543 fn test_into_builder_err() {
544 let array: StringArray = vec!["hello", "arrow"].into();
545
546 let shared_array = array.clone();
548
549 let err_return = array.into_builder().unwrap_err();
550 assert_eq!(&err_return, &shared_array);
551 }
552
553 #[test]
554 fn test_non_null_string_array_equal() {
555 let a = StringArray::from(vec![Some("ab"), Some("c")]);
556 let b = StringArray::from(vec![Some("a"), Some("bc")]);
557
558 assert_ne!(a, b);
559 }
560}