1use std::any::Any;
19use std::marker::PhantomData;
20use std::sync::Arc;
21
22use arrow_buffer::{Buffer, NullBufferBuilder, ScalarBuffer};
23use arrow_data::{ByteView, MAX_INLINE_VIEW_LEN};
24use arrow_schema::ArrowError;
25use hashbrown::HashTable;
26use hashbrown::hash_table::Entry;
27
28use crate::builder::{ArrayBuilder, BinaryLikeArrayBuilder, StringLikeArrayBuilder};
29use crate::types::bytes::ByteArrayNativeType;
30use crate::types::{BinaryViewType, ByteViewType, StringViewType};
31use crate::{Array, ArrayRef, GenericByteViewArray};
32
33const STARTING_BLOCK_SIZE: u32 = 8 * 1024; const MAX_BLOCK_SIZE: u32 = 2 * 1024 * 1024; enum BlockSizeGrowthStrategy {
37 Fixed { size: u32 },
38 Exponential { current_size: u32 },
39}
40
41impl BlockSizeGrowthStrategy {
42 fn next_size(&mut self) -> u32 {
43 match self {
44 Self::Fixed { size } => *size,
45 Self::Exponential { current_size } => {
46 if *current_size < MAX_BLOCK_SIZE {
47 *current_size = current_size.saturating_mul(2);
49 *current_size
50 } else {
51 MAX_BLOCK_SIZE
52 }
53 }
54 }
55 }
56}
57
58pub struct GenericByteViewBuilder<T: ByteViewType + ?Sized> {
82 views_buffer: Vec<u128>,
83 null_buffer_builder: NullBufferBuilder,
84 completed: Vec<Buffer>,
85 in_progress: Vec<u8>,
86 block_size: BlockSizeGrowthStrategy,
87 string_tracker: Option<(HashTable<usize>, ahash::RandomState)>,
90 max_deduplication_len: Option<u32>,
91 phantom: PhantomData<T>,
92}
93
94impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
95 pub fn new() -> Self {
97 Self::with_capacity(1024)
98 }
99
100 pub fn with_capacity(capacity: usize) -> Self {
102 Self {
103 views_buffer: Vec::with_capacity(capacity),
104 null_buffer_builder: NullBufferBuilder::new(capacity),
105 completed: vec![],
106 in_progress: vec![],
107 block_size: BlockSizeGrowthStrategy::Exponential {
108 current_size: STARTING_BLOCK_SIZE,
109 },
110 string_tracker: None,
111 max_deduplication_len: None,
112 phantom: Default::default(),
113 }
114 }
115
116 pub fn with_max_deduplication_len(self, max_deduplication_len: u32) -> Self {
123 debug_assert!(
124 max_deduplication_len > 0,
125 "max_deduplication_len must be greater than 0"
126 );
127 Self {
128 max_deduplication_len: Some(max_deduplication_len),
129 ..self
130 }
131 }
132
133 pub fn with_fixed_block_size(self, block_size: u32) -> Self {
149 debug_assert!(block_size > 0, "Block size must be greater than 0");
150 Self {
151 block_size: BlockSizeGrowthStrategy::Fixed { size: block_size },
152 ..self
153 }
154 }
155
156 pub fn with_deduplicate_strings(self) -> Self {
161 Self {
162 string_tracker: Some((
163 HashTable::with_capacity(self.views_buffer.capacity()),
164 Default::default(),
165 )),
166 ..self
167 }
168 }
169
170 pub fn append_block(&mut self, buffer: Buffer) -> u32 {
195 assert!(buffer.len() < u32::MAX as usize);
196
197 self.flush_in_progress();
198 let offset = self.completed.len();
199 self.push_completed(buffer);
200 offset as u32
201 }
202
203 pub unsafe fn append_view_unchecked(&mut self, block: u32, offset: u32, len: u32) {
210 let b = unsafe { self.completed.get_unchecked(block as usize) };
211 let start = offset as usize;
212 let end = start.saturating_add(len as usize);
213 let b = unsafe { b.get_unchecked(start..end) };
214
215 let view = make_view(b, block, offset);
216 self.views_buffer.push(view);
217 self.null_buffer_builder.append_non_null();
218 }
219
220 pub fn append_array(&mut self, array: &GenericByteViewArray<T>) {
224 self.flush_in_progress();
225 let keep_views = self.completed.is_empty() || array.data_buffers().is_empty();
227 let starting_buffer = self.completed.len() as u32;
228
229 self.completed.extend(array.data_buffers().iter().cloned());
230
231 if keep_views {
232 self.views_buffer.extend_from_slice(array.views());
233 } else {
234 self.views_buffer.extend(array.views().iter().map(|v| {
235 let mut byte_view = ByteView::from(*v);
236 if byte_view.length > MAX_INLINE_VIEW_LEN {
237 byte_view.buffer_index += starting_buffer;
239 };
240
241 byte_view.as_u128()
242 }));
243 }
244
245 if let Some(null_buffer) = array.nulls() {
246 self.null_buffer_builder.append_buffer(null_buffer);
247 } else {
248 self.null_buffer_builder.append_n_non_nulls(array.len());
249 }
250 }
251
252 pub fn try_append_view(&mut self, block: u32, offset: u32, len: u32) -> Result<(), ArrowError> {
256 let b = self.completed.get(block as usize).ok_or_else(|| {
257 ArrowError::InvalidArgumentError(format!("No block found with index {block}"))
258 })?;
259 let start = offset as usize;
260 let end = start.saturating_add(len as usize);
261
262 let b = b.get(start..end).ok_or_else(|| {
263 ArrowError::InvalidArgumentError(format!(
264 "Range {start}..{end} out of bounds for block of length {}",
265 b.len()
266 ))
267 })?;
268
269 if T::Native::from_bytes_checked(b).is_none() {
270 return Err(ArrowError::InvalidArgumentError(
271 "Invalid view data".to_string(),
272 ));
273 }
274
275 unsafe {
276 self.append_view_unchecked(block, offset, len);
277 }
278 Ok(())
279 }
280
281 #[inline]
283 fn flush_in_progress(&mut self) {
284 if !self.in_progress.is_empty() {
285 let f = Buffer::from_vec(std::mem::take(&mut self.in_progress));
286 self.push_completed(f)
287 }
288 }
289
290 #[inline]
292 fn push_completed(&mut self, block: Buffer) {
293 assert!(block.len() < u32::MAX as usize, "Block too large");
294 assert!(self.completed.len() < u32::MAX as usize, "Too many blocks");
295 self.completed.push(block);
296 }
297
298 pub fn get_value(&self, index: usize) -> &[u8] {
302 let view = self.views_buffer.as_slice().get(index).unwrap();
303 let len = *view as u32;
304 if len <= MAX_INLINE_VIEW_LEN {
305 unsafe { GenericByteViewArray::<T>::inline_value(view, len as usize) }
308 } else {
309 let view = ByteView::from(*view);
310 if view.buffer_index < self.completed.len() as u32 {
311 let block = &self.completed[view.buffer_index as usize];
312 &block[view.offset as usize..view.offset as usize + view.length as usize]
313 } else {
314 &self.in_progress[view.offset as usize..view.offset as usize + view.length as usize]
315 }
316 }
317 }
318
319 #[inline]
327 pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
328 self.try_append_value(value).unwrap()
329 }
330
331 #[inline]
339 pub fn try_append_value(&mut self, value: impl AsRef<T::Native>) -> Result<(), ArrowError> {
340 let v: &[u8] = value.as_ref().as_ref();
341 let length: u32 = v.len().try_into().map_err(|_| {
342 ArrowError::InvalidArgumentError(format!("String length {} exceeds u32::MAX", v.len()))
343 })?;
344
345 if length <= MAX_INLINE_VIEW_LEN {
346 let mut view_buffer = [0; 16];
347 view_buffer[0..4].copy_from_slice(&length.to_le_bytes());
348 view_buffer[4..4 + v.len()].copy_from_slice(v);
349 self.views_buffer.push(u128::from_le_bytes(view_buffer));
350 self.null_buffer_builder.append_non_null();
351 return Ok(());
352 }
353
354 let can_deduplicate = self.string_tracker.is_some()
358 && self
359 .max_deduplication_len
360 .map(|max_length| length <= max_length)
361 .unwrap_or(true);
362 if can_deduplicate {
363 if let Some((mut ht, hasher)) = self.string_tracker.take() {
364 let hash_val = hasher.hash_one(v);
365 let hasher_fn = |v: &_| hasher.hash_one(v);
366
367 let entry = ht.entry(
368 hash_val,
369 |idx| {
370 let stored_value = self.get_value(*idx);
371 v == stored_value
372 },
373 hasher_fn,
374 );
375 match entry {
376 Entry::Occupied(occupied) => {
377 let idx = occupied.get();
379 self.views_buffer.push(self.views_buffer[*idx]);
380 self.null_buffer_builder.append_non_null();
381 self.string_tracker = Some((ht, hasher));
382 return Ok(());
383 }
384 Entry::Vacant(vacant) => {
385 vacant.insert(self.views_buffer.len());
388 }
389 }
390 self.string_tracker = Some((ht, hasher));
391 }
392 }
393
394 let required_cap = self.in_progress.len() + v.len();
395 if self.in_progress.capacity() < required_cap {
396 self.flush_in_progress();
397 let to_reserve = v.len().max(self.block_size.next_size() as usize);
398 self.in_progress.reserve(to_reserve);
399 };
400
401 let offset = self.in_progress.len() as u32;
402 self.in_progress.extend_from_slice(v);
403
404 let buffer_index: u32 = self.completed.len().try_into().map_err(|_| {
405 ArrowError::InvalidArgumentError(format!(
406 "Buffer count {} exceeds u32::MAX",
407 self.completed.len()
408 ))
409 })?;
410
411 let view = ByteView {
412 length,
413 prefix: u32::from_le_bytes(v[0..4].try_into().unwrap()),
415 buffer_index,
416 offset,
417 };
418 self.views_buffer.push(view.into());
419 self.null_buffer_builder.append_non_null();
420
421 Ok(())
422 }
423
424 #[inline]
426 pub fn append_option(&mut self, value: Option<impl AsRef<T::Native>>) {
427 match value {
428 None => self.append_null(),
429 Some(v) => self.append_value(v),
430 };
431 }
432
433 #[inline]
463 pub fn try_append_value_n(
464 &mut self,
465 value: impl AsRef<T::Native>,
466 n: usize,
467 ) -> Result<(), ArrowError> {
468 if n == 0 {
469 return Ok(());
470 }
471 self.try_append_value(value)?;
473 let view = *self.views_buffer.last().unwrap();
475 self.views_buffer.extend(std::iter::repeat_n(view, n - 1));
476 self.null_buffer_builder.append_n_non_nulls(n - 1);
477 Ok(())
478 }
479
480 #[inline]
482 pub fn append_null(&mut self) {
483 self.null_buffer_builder.append_null();
484 self.views_buffer.push(0);
485 }
486
487 pub fn finish(&mut self) -> GenericByteViewArray<T> {
489 self.flush_in_progress();
490 let completed = std::mem::take(&mut self.completed);
491 let nulls = self.null_buffer_builder.finish();
492 if let Some((ht, _)) = self.string_tracker.as_mut() {
493 ht.clear();
494 }
495 let views = std::mem::take(&mut self.views_buffer);
496 unsafe { GenericByteViewArray::new_unchecked(views.into(), completed, nulls) }
498 }
499
500 pub fn finish_cloned(&self) -> GenericByteViewArray<T> {
502 let mut completed = self.completed.clone();
503 if !self.in_progress.is_empty() {
504 completed.push(Buffer::from_slice_ref(&self.in_progress));
505 }
506 let len = self.views_buffer.len();
507 let views = Buffer::from_slice_ref(self.views_buffer.as_slice());
508 let views = ScalarBuffer::new(views, 0, len);
509 let nulls = self.null_buffer_builder.finish_cloned();
510 unsafe { GenericByteViewArray::new_unchecked(views, completed, nulls) }
512 }
513
514 pub fn validity_slice(&self) -> Option<&[u8]> {
516 self.null_buffer_builder.as_slice()
517 }
518
519 pub fn allocated_size(&self) -> usize {
521 let views = self.views_buffer.capacity() * std::mem::size_of::<u128>();
522 let null = self.null_buffer_builder.allocated_size();
523 let buffer_size = self.completed.iter().map(|b| b.capacity()).sum::<usize>();
524 let in_progress = self.in_progress.capacity();
525 let tracker = match &self.string_tracker {
526 Some((ht, _)) => ht.capacity() * std::mem::size_of::<usize>(),
527 None => 0,
528 };
529 buffer_size + in_progress + tracker + views + null
530 }
531}
532
533impl<T: ByteViewType + ?Sized> Default for GenericByteViewBuilder<T> {
534 fn default() -> Self {
535 Self::new()
536 }
537}
538
539impl<T: ByteViewType + ?Sized> std::fmt::Debug for GenericByteViewBuilder<T> {
540 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
541 write!(f, "{}ViewBuilder", T::PREFIX)?;
542 f.debug_struct("")
543 .field("views_buffer", &self.views_buffer)
544 .field("in_progress", &self.in_progress)
545 .field("completed", &self.completed)
546 .field("null_buffer_builder", &self.null_buffer_builder)
547 .finish()
548 }
549}
550
551impl<T: ByteViewType + ?Sized> ArrayBuilder for GenericByteViewBuilder<T> {
552 fn len(&self) -> usize {
553 self.null_buffer_builder.len()
554 }
555
556 fn finish(&mut self) -> ArrayRef {
557 Arc::new(self.finish())
558 }
559
560 fn finish_cloned(&self) -> ArrayRef {
561 Arc::new(self.finish_cloned())
562 }
563
564 fn as_any(&self) -> &dyn Any {
565 self
566 }
567
568 fn as_any_mut(&mut self) -> &mut dyn Any {
569 self
570 }
571
572 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
573 self
574 }
575}
576
577impl<T: ByteViewType + ?Sized, V: AsRef<T::Native>> Extend<Option<V>>
578 for GenericByteViewBuilder<T>
579{
580 #[inline]
581 fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
582 for v in iter {
583 self.append_option(v)
584 }
585 }
586}
587
588pub type StringViewBuilder = GenericByteViewBuilder<StringViewType>;
608
609impl StringLikeArrayBuilder for StringViewBuilder {
610 fn type_name() -> &'static str {
611 std::any::type_name::<StringViewBuilder>()
612 }
613 fn with_capacity(capacity: usize) -> Self {
614 Self::with_capacity(capacity)
615 }
616 fn append_value(&mut self, value: &str) {
617 Self::append_value(self, value);
618 }
619 fn append_null(&mut self) {
620 Self::append_null(self);
621 }
622}
623
624pub type BinaryViewBuilder = GenericByteViewBuilder<BinaryViewType>;
645
646impl BinaryLikeArrayBuilder for BinaryViewBuilder {
647 fn type_name() -> &'static str {
648 std::any::type_name::<BinaryViewBuilder>()
649 }
650 fn with_capacity(capacity: usize) -> Self {
651 Self::with_capacity(capacity)
652 }
653 fn append_value(&mut self, value: &[u8]) {
654 Self::append_value(self, value);
655 }
656 fn append_null(&mut self) {
657 Self::append_null(self);
658 }
659}
660
661fn make_inlined_view<const LEN: usize>(data: &[u8]) -> u128 {
664 let mut view_buffer = [0; 16];
665 view_buffer[0..4].copy_from_slice(&(LEN as u32).to_le_bytes());
666 view_buffer[4..4 + LEN].copy_from_slice(&data[..LEN]);
667 u128::from_le_bytes(view_buffer)
668}
669
670#[inline(never)]
676pub fn make_view(data: &[u8], block_id: u32, offset: u32) -> u128 {
677 let len = data.len();
678
679 match len {
682 0 => make_inlined_view::<0>(data),
683 1 => make_inlined_view::<1>(data),
684 2 => make_inlined_view::<2>(data),
685 3 => make_inlined_view::<3>(data),
686 4 => make_inlined_view::<4>(data),
687 5 => make_inlined_view::<5>(data),
688 6 => make_inlined_view::<6>(data),
689 7 => make_inlined_view::<7>(data),
690 8 => make_inlined_view::<8>(data),
691 9 => make_inlined_view::<9>(data),
692 10 => make_inlined_view::<10>(data),
693 11 => make_inlined_view::<11>(data),
694 12 => make_inlined_view::<12>(data),
695 _ => {
697 let view = ByteView {
698 length: len as u32,
699 prefix: u32::from_le_bytes(data[0..4].try_into().unwrap()),
700 buffer_index: block_id,
701 offset,
702 };
703 view.as_u128()
704 }
705 }
706}
707
708#[cfg(test)]
709mod tests {
710 use core::str;
711
712 use arrow_buffer::ArrowNativeType;
713
714 use super::*;
715
716 #[test]
717 fn test_string_max_deduplication_len() {
718 let value_1 = "short";
719 let value_2 = "not so similar string but long";
720 let value_3 = "1234567890123";
721
722 let max_deduplication_len = MAX_INLINE_VIEW_LEN * 2;
723
724 let mut builder = StringViewBuilder::new()
725 .with_deduplicate_strings()
726 .with_max_deduplication_len(max_deduplication_len);
727
728 assert!(value_1.len() < MAX_INLINE_VIEW_LEN.as_usize());
729 assert!(value_2.len() > max_deduplication_len.as_usize());
730 assert!(
731 value_3.len() > MAX_INLINE_VIEW_LEN.as_usize()
732 && value_3.len() < max_deduplication_len.as_usize()
733 );
734
735 builder.append_value(value_1); builder.append_value(value_1); builder.append_value(value_2); builder.append_value(value_2); builder.append_value(value_3); builder.append_value(value_3); let array = builder.finish();
746
747 let v2 = ByteView::from(array.views()[2]);
749 let v3 = ByteView::from(array.views()[3]);
750 assert_eq!(v2.buffer_index, v3.buffer_index); assert_ne!(v2.offset, v3.offset); let v4 = ByteView::from(array.views()[4]);
754 let v5 = ByteView::from(array.views()[5]);
755 assert_eq!(v4.buffer_index, v5.buffer_index); assert_eq!(v4.offset, v5.offset); }
758
759 #[test]
760 fn test_string_view_deduplicate() {
761 let value_1 = "long string to test string view";
762 let value_2 = "not so similar string but long";
763
764 let mut builder = StringViewBuilder::new()
765 .with_deduplicate_strings()
766 .with_fixed_block_size(value_1.len() as u32 * 2); let values = vec![
769 Some(value_1),
770 Some(value_2),
771 Some("short"),
772 Some(value_1),
773 None,
774 Some(value_2),
775 Some(value_1),
776 ];
777 builder.extend(values.clone());
778
779 let array = builder.finish_cloned();
780 array.to_data().validate_full().unwrap();
781 assert_eq!(array.data_buffers().len(), 1); let actual: Vec<_> = array.iter().collect();
783 assert_eq!(actual, values);
784
785 let view0 = array.views().first().unwrap();
786 let view3 = array.views().get(3).unwrap();
787 let view6 = array.views().get(6).unwrap();
788
789 assert_eq!(view0, view3);
790 assert_eq!(view0, view6);
791
792 assert_eq!(array.views().get(1), array.views().get(5));
793 }
794
795 #[test]
796 fn test_string_view_deduplicate_after_finish() {
797 let mut builder = StringViewBuilder::new().with_deduplicate_strings();
798
799 let value_1 = "long string to test string view";
800 let value_2 = "not so similar string but long";
801 builder.append_value(value_1);
802 let _array = builder.finish();
803 builder.append_value(value_2);
804 let _array = builder.finish();
805 builder.append_value(value_1);
806 let _array = builder.finish();
807 }
808
809 #[test]
810 fn test_string_view() {
811 let b1 = Buffer::from(b"world\xFFbananas\xF0\x9F\x98\x81");
812 let b2 = Buffer::from(b"cupcakes");
813 let b3 = Buffer::from(b"Many strings are here contained of great length and verbosity");
814
815 let mut v = StringViewBuilder::new();
816 assert_eq!(v.append_block(b1), 0);
817
818 v.append_value("This is a very long string that exceeds the inline length");
819 v.append_value("This is another very long string that exceeds the inline length");
820
821 assert_eq!(v.append_block(b2), 2);
822 assert_eq!(v.append_block(b3), 3);
823
824 v.try_append_view(0, 0, 5).unwrap(); v.try_append_view(0, 6, 7).unwrap(); v.try_append_view(2, 3, 5).unwrap(); v.try_append_view(2, 0, 3).unwrap(); v.try_append_view(2, 0, 8).unwrap(); v.try_append_view(0, 13, 4).unwrap(); v.try_append_view(0, 13, 0).unwrap(); v.try_append_view(3, 0, 16).unwrap(); v.try_append_view(1, 0, 19).unwrap(); v.try_append_view(3, 13, 27).unwrap(); v.append_value("I do so like long strings");
839
840 let array = v.finish_cloned();
841 array.to_data().validate_full().unwrap();
842 assert_eq!(array.data_buffers().len(), 5);
843 let actual: Vec<_> = array.iter().flatten().collect();
844 assert_eq!(
845 actual,
846 &[
847 "This is a very long string that exceeds the inline length",
848 "This is another very long string that exceeds the inline length",
849 "world",
850 "bananas",
851 "cakes",
852 "cup",
853 "cupcakes",
854 "😁",
855 "",
856 "Many strings are",
857 "This is a very long",
858 "are here contained of great",
859 "I do so like long strings"
860 ]
861 );
862
863 let err = v.try_append_view(0, u32::MAX, 1).unwrap_err();
864 assert_eq!(
865 err.to_string(),
866 "Invalid argument error: Range 4294967295..4294967296 out of bounds for block of length 17"
867 );
868
869 let err = v.try_append_view(0, 1, u32::MAX).unwrap_err();
870 assert_eq!(
871 err.to_string(),
872 "Invalid argument error: Range 1..4294967296 out of bounds for block of length 17"
873 );
874
875 let err = v.try_append_view(0, 13, 2).unwrap_err();
876 assert_eq!(err.to_string(), "Invalid argument error: Invalid view data");
877
878 let err = v.try_append_view(0, 40, 0).unwrap_err();
879 assert_eq!(
880 err.to_string(),
881 "Invalid argument error: Range 40..40 out of bounds for block of length 17"
882 );
883
884 let err = v.try_append_view(5, 0, 0).unwrap_err();
885 assert_eq!(
886 err.to_string(),
887 "Invalid argument error: No block found with index 5"
888 );
889 }
890
891 #[test]
892 fn test_string_view_with_block_size_growth() {
893 let mut exp_builder = StringViewBuilder::new();
894 let mut fixed_builder = StringViewBuilder::new().with_fixed_block_size(STARTING_BLOCK_SIZE);
895
896 let long_string = str::from_utf8(&[b'a'; STARTING_BLOCK_SIZE as usize]).unwrap();
897
898 for i in 0..9 {
899 for _ in 0..(2_u32.pow(i)) {
901 exp_builder.append_value(long_string);
902 fixed_builder.append_value(long_string);
903 }
904 exp_builder.flush_in_progress();
905 fixed_builder.flush_in_progress();
906
907 assert_eq!(exp_builder.completed.len(), i as usize + 1);
909 assert_eq!(
910 exp_builder.completed[i as usize].len(),
911 STARTING_BLOCK_SIZE as usize * 2_usize.pow(i)
912 );
913
914 assert_eq!(fixed_builder.completed.len(), 2_usize.pow(i + 1) - 1);
916
917 assert!(
919 fixed_builder
920 .completed
921 .iter()
922 .all(|b| b.len() == STARTING_BLOCK_SIZE as usize)
923 );
924 }
925
926 exp_builder.append_value(long_string);
928 exp_builder.flush_in_progress();
929 assert_eq!(
930 exp_builder.completed.last().unwrap().capacity(),
931 MAX_BLOCK_SIZE as usize
932 );
933 }
934
935 #[test]
936 fn test_append_value_n() {
937 let mut builder = StringViewBuilder::new();
939
940 builder.try_append_value_n("hello", 100).unwrap();
941 builder.append_value("world");
942 builder.try_append_value_n("foo", 50).unwrap();
943
944 let array = builder.finish();
945 assert_eq!(array.len(), 151);
946 assert_eq!(array.null_count(), 0);
947
948 for i in 0..100 {
950 assert_eq!(array.value(i), "hello");
951 }
952 assert_eq!(array.value(100), "world");
953 for i in 101..151 {
954 assert_eq!(array.value(i), "foo");
955 }
956
957 assert_eq!(array.data_buffers().len(), 0);
959 }
960
961 #[test]
962 fn test_append_value_n_with_deduplication() {
963 let long_string = "This is a very long string that exceeds the inline length";
964
965 let mut builder = StringViewBuilder::new().with_deduplicate_strings();
967
968 builder.append_value(long_string);
970
971 builder.try_append_value_n(long_string, 999).unwrap();
973
974 let array = builder.finish();
975 assert_eq!(array.len(), 1000);
976 assert_eq!(array.null_count(), 0);
977
978 for i in 0..1000 {
980 assert_eq!(array.value(i), long_string);
981 }
982
983 assert_eq!(array.data_buffers().len(), 1);
985
986 let first_view = array.views()[0];
988 for view in array.views().iter() {
989 assert_eq!(*view, first_view);
990 }
991 }
992
993 #[test]
994 fn test_append_value_n_zero() {
995 let mut builder = StringViewBuilder::new();
996
997 builder.append_value("first");
998 builder.try_append_value_n("should not appear", 0).unwrap();
999 builder.append_value("second");
1000
1001 let array = builder.finish();
1002 assert_eq!(array.len(), 2);
1003 assert_eq!(array.value(0), "first");
1004 assert_eq!(array.value(1), "second");
1005 }
1006}