1use crate::{
38 ArrowError, DataType, Field, FieldRef, IntervalUnit, Schema, TimeUnit, UnionFields, UnionMode,
39};
40use bitflags::bitflags;
41use std::borrow::Cow;
42use std::sync::Arc;
43use std::{
44 collections::HashMap,
45 ffi::{CStr, CString, c_char, c_void},
46};
47
48bitflags! {
49 pub struct Flags: i64 {
54 const DICTIONARY_ORDERED = 0b00000001;
56 const NULLABLE = 0b00000010;
58 const MAP_KEYS_SORTED = 0b00000100;
60 }
61}
62
63#[repr(C)]
75#[derive(Debug)]
76#[allow(non_camel_case_types)]
77pub struct FFI_ArrowSchema {
78 pub format: *const c_char,
80 pub name: *const c_char,
82 pub metadata: *const c_char,
84 pub flags: i64,
87 pub n_children: i64,
89 pub children: *mut *mut FFI_ArrowSchema,
91 pub dictionary: *mut FFI_ArrowSchema,
93 pub release: Option<unsafe extern "C" fn(arg1: *mut FFI_ArrowSchema)>,
95 pub private_data: *mut c_void,
97}
98
99struct SchemaPrivateData {
100 children: Box<[*mut FFI_ArrowSchema]>,
101 dictionary: *mut FFI_ArrowSchema,
102 metadata: Option<Vec<u8>>,
103}
104
105unsafe extern "C" fn release_schema(schema: *mut FFI_ArrowSchema) {
107 if schema.is_null() {
108 return;
109 }
110 let schema = unsafe { &mut *schema };
111
112 drop(unsafe { CString::from_raw(schema.format as *mut c_char) });
114 if !schema.name.is_null() {
115 drop(unsafe { CString::from_raw(schema.name as *mut c_char) });
116 }
117 if !schema.private_data.is_null() {
118 let private_data = unsafe { Box::from_raw(schema.private_data as *mut SchemaPrivateData) };
119 for child in private_data.children.iter() {
120 drop(unsafe { Box::from_raw(*child) })
121 }
122 if !private_data.dictionary.is_null() {
123 drop(unsafe { Box::from_raw(private_data.dictionary) });
124 }
125
126 drop(private_data);
127 }
128
129 schema.release = None;
130}
131
132impl FFI_ArrowSchema {
133 pub fn try_new(
136 format: &str,
137 children: Vec<FFI_ArrowSchema>,
138 dictionary: Option<FFI_ArrowSchema>,
139 ) -> Result<Self, ArrowError> {
140 let mut this = Self::empty();
141
142 let children_ptr = children
143 .into_iter()
144 .map(Box::new)
145 .map(Box::into_raw)
146 .collect::<Box<_>>();
147
148 this.format = CString::new(format).unwrap().into_raw();
149 this.release = Some(release_schema);
150 this.n_children = children_ptr.len() as i64;
151
152 let dictionary_ptr = dictionary
153 .map(|d| Box::into_raw(Box::new(d)))
154 .unwrap_or(std::ptr::null_mut());
155
156 let mut private_data = Box::new(SchemaPrivateData {
157 children: children_ptr,
158 dictionary: dictionary_ptr,
159 metadata: None,
160 });
161
162 this.children = private_data.children.as_mut_ptr();
164
165 this.dictionary = dictionary_ptr;
166
167 this.private_data = Box::into_raw(private_data) as *mut c_void;
168
169 Ok(this)
170 }
171
172 pub fn with_name(mut self, name: &str) -> Result<Self, ArrowError> {
174 self.name = CString::new(name).unwrap().into_raw();
175 Ok(self)
176 }
177
178 pub fn with_flags(mut self, flags: Flags) -> Result<Self, ArrowError> {
180 self.flags = flags.bits();
181 Ok(self)
182 }
183
184 pub fn with_metadata<I, S>(mut self, metadata: I) -> Result<Self, ArrowError>
186 where
187 I: IntoIterator<Item = (S, S)>,
188 S: AsRef<str>,
189 {
190 let metadata: Vec<(S, S)> = metadata.into_iter().collect();
191 let new_metadata = if !metadata.is_empty() {
193 let mut metadata_serialized: Vec<u8> = Vec::new();
194 let num_entries: i32 = metadata.len().try_into().map_err(|_| {
195 ArrowError::CDataInterface(format!(
196 "metadata can only have {} entries, but {} were provided",
197 i32::MAX,
198 metadata.len()
199 ))
200 })?;
201 metadata_serialized.extend(num_entries.to_ne_bytes());
202
203 for (key, value) in metadata.into_iter() {
204 let key_len: i32 = key.as_ref().len().try_into().map_err(|_| {
205 ArrowError::CDataInterface(format!(
206 "metadata key can only have {} bytes, but {} were provided",
207 i32::MAX,
208 key.as_ref().len()
209 ))
210 })?;
211 let value_len: i32 = value.as_ref().len().try_into().map_err(|_| {
212 ArrowError::CDataInterface(format!(
213 "metadata value can only have {} bytes, but {} were provided",
214 i32::MAX,
215 value.as_ref().len()
216 ))
217 })?;
218
219 metadata_serialized.extend(key_len.to_ne_bytes());
220 metadata_serialized.extend_from_slice(key.as_ref().as_bytes());
221 metadata_serialized.extend(value_len.to_ne_bytes());
222 metadata_serialized.extend_from_slice(value.as_ref().as_bytes());
223 }
224
225 self.metadata = metadata_serialized.as_ptr() as *const c_char;
226 Some(metadata_serialized)
227 } else {
228 self.metadata = std::ptr::null_mut();
229 None
230 };
231
232 unsafe {
233 let mut private_data = Box::from_raw(self.private_data as *mut SchemaPrivateData);
234 private_data.metadata = new_metadata;
235 self.private_data = Box::into_raw(private_data) as *mut c_void;
236 }
237
238 Ok(self)
239 }
240
241 pub unsafe fn from_raw(schema: *mut FFI_ArrowSchema) -> Self {
254 unsafe { std::ptr::replace(schema, Self::empty()) }
255 }
256
257 pub fn empty() -> Self {
259 Self {
260 format: std::ptr::null_mut(),
261 name: std::ptr::null_mut(),
262 metadata: std::ptr::null_mut(),
263 flags: 0,
264 n_children: 0,
265 children: std::ptr::null_mut(),
266 dictionary: std::ptr::null_mut(),
267 release: None,
268 private_data: std::ptr::null_mut(),
269 }
270 }
271
272 pub fn format(&self) -> &str {
274 assert!(!self.format.is_null());
275 unsafe { CStr::from_ptr(self.format) }
277 .to_str()
278 .expect("The external API has a non-utf8 as format")
279 }
280
281 pub fn name(&self) -> Option<&str> {
283 if self.name.is_null() {
284 None
285 } else {
286 Some(
288 unsafe { CStr::from_ptr(self.name) }
289 .to_str()
290 .expect("The external API has a non-utf8 as name"),
291 )
292 }
293 }
294
295 pub fn flags(&self) -> Option<Flags> {
297 Flags::from_bits(self.flags)
298 }
299
300 pub fn child(&self, index: usize) -> &Self {
308 assert!(index < self.n_children as usize);
309 unsafe { self.children.add(index).as_ref().unwrap().as_ref().unwrap() }
310 }
311
312 pub fn children(&self) -> impl Iterator<Item = &Self> {
314 (0..self.n_children as usize).map(move |i| self.child(i))
315 }
316
317 pub fn nullable(&self) -> bool {
320 (self.flags / 2) & 1 == 1
321 }
322
323 pub fn dictionary(&self) -> Option<&Self> {
328 unsafe { self.dictionary.as_ref() }
329 }
330
331 pub fn map_keys_sorted(&self) -> bool {
335 self.flags & 0b00000100 != 0
336 }
337
338 pub fn dictionary_ordered(&self) -> bool {
340 self.flags & 0b00000001 != 0
341 }
342
343 pub fn metadata(&self) -> Result<HashMap<String, String>, ArrowError> {
345 if self.metadata.is_null() {
346 Ok(HashMap::new())
347 } else {
348 let mut pos = 0;
349
350 #[allow(clippy::unnecessary_cast)]
354 let buffer: *const u8 = self.metadata as *const u8;
355
356 fn next_four_bytes(buffer: *const u8, pos: &mut isize) -> [u8; 4] {
357 let out = unsafe {
358 [
359 *buffer.offset(*pos),
360 *buffer.offset(*pos + 1),
361 *buffer.offset(*pos + 2),
362 *buffer.offset(*pos + 3),
363 ]
364 };
365 *pos += 4;
366 out
367 }
368
369 fn next_n_bytes(buffer: *const u8, pos: &mut isize, n: i32) -> &[u8] {
370 let out = unsafe {
371 std::slice::from_raw_parts(buffer.offset(*pos), n.try_into().unwrap())
372 };
373 *pos += isize::try_from(n).unwrap();
374 out
375 }
376
377 let num_entries = i32::from_ne_bytes(next_four_bytes(buffer, &mut pos));
378 if num_entries < 0 {
379 return Err(ArrowError::CDataInterface(
380 "Negative number of metadata entries".to_string(),
381 ));
382 }
383
384 let mut metadata =
385 HashMap::with_capacity(num_entries.try_into().expect("Too many metadata entries"));
386
387 for _ in 0..num_entries {
388 let key_length = i32::from_ne_bytes(next_four_bytes(buffer, &mut pos));
389 if key_length < 0 {
390 return Err(ArrowError::CDataInterface(
391 "Negative key length in metadata".to_string(),
392 ));
393 }
394 let key = String::from_utf8(next_n_bytes(buffer, &mut pos, key_length).to_vec())?;
395 let value_length = i32::from_ne_bytes(next_four_bytes(buffer, &mut pos));
396 if value_length < 0 {
397 return Err(ArrowError::CDataInterface(
398 "Negative value length in metadata".to_string(),
399 ));
400 }
401 let value =
402 String::from_utf8(next_n_bytes(buffer, &mut pos, value_length).to_vec())?;
403 metadata.insert(key, value);
404 }
405
406 Ok(metadata)
407 }
408 }
409}
410
411impl Drop for FFI_ArrowSchema {
412 fn drop(&mut self) {
413 match self.release {
414 None => (),
415 Some(release) => unsafe { release(self) },
416 };
417 }
418}
419
420unsafe impl Send for FFI_ArrowSchema {}
421
422impl TryFrom<&FFI_ArrowSchema> for DataType {
423 type Error = ArrowError;
424
425 fn try_from(c_schema: &FFI_ArrowSchema) -> Result<Self, ArrowError> {
427 let mut dtype = match c_schema.format() {
428 "n" => DataType::Null,
429 "b" => DataType::Boolean,
430 "c" => DataType::Int8,
431 "C" => DataType::UInt8,
432 "s" => DataType::Int16,
433 "S" => DataType::UInt16,
434 "i" => DataType::Int32,
435 "I" => DataType::UInt32,
436 "l" => DataType::Int64,
437 "L" => DataType::UInt64,
438 "e" => DataType::Float16,
439 "f" => DataType::Float32,
440 "g" => DataType::Float64,
441 "vz" => DataType::BinaryView,
442 "z" => DataType::Binary,
443 "Z" => DataType::LargeBinary,
444 "vu" => DataType::Utf8View,
445 "u" => DataType::Utf8,
446 "U" => DataType::LargeUtf8,
447 "tdD" => DataType::Date32,
448 "tdm" => DataType::Date64,
449 "tts" => DataType::Time32(TimeUnit::Second),
450 "ttm" => DataType::Time32(TimeUnit::Millisecond),
451 "ttu" => DataType::Time64(TimeUnit::Microsecond),
452 "ttn" => DataType::Time64(TimeUnit::Nanosecond),
453 "tDs" => DataType::Duration(TimeUnit::Second),
454 "tDm" => DataType::Duration(TimeUnit::Millisecond),
455 "tDu" => DataType::Duration(TimeUnit::Microsecond),
456 "tDn" => DataType::Duration(TimeUnit::Nanosecond),
457 "tiM" => DataType::Interval(IntervalUnit::YearMonth),
458 "tiD" => DataType::Interval(IntervalUnit::DayTime),
459 "tin" => DataType::Interval(IntervalUnit::MonthDayNano),
460 "+l" => {
461 let c_child = c_schema.child(0);
462 DataType::List(Arc::new(Field::try_from(c_child)?))
463 }
464 "+L" => {
465 let c_child = c_schema.child(0);
466 DataType::LargeList(Arc::new(Field::try_from(c_child)?))
467 }
468 "+vl" => {
469 let c_child = c_schema.child(0);
470 DataType::ListView(Arc::new(Field::try_from(c_child)?))
471 }
472 "+vL" => {
473 let c_child = c_schema.child(0);
474 DataType::LargeListView(Arc::new(Field::try_from(c_child)?))
475 }
476 "+s" => {
477 let fields = c_schema.children().map(Field::try_from);
478 DataType::Struct(fields.collect::<Result<_, ArrowError>>()?)
479 }
480 "+m" => {
481 let c_child = c_schema.child(0);
482 let map_keys_sorted = c_schema.map_keys_sorted();
483 DataType::Map(Arc::new(Field::try_from(c_child)?), map_keys_sorted)
484 }
485 "+r" => {
486 let c_run_ends = c_schema.child(0);
487 let c_values = c_schema.child(1);
488 DataType::RunEndEncoded(
489 Arc::new(Field::try_from(c_run_ends)?),
490 Arc::new(Field::try_from(c_values)?),
491 )
492 }
493 other => {
495 match other.splitn(2, ':').collect::<Vec<&str>>().as_slice() {
496 ["w", num_bytes] => {
498 let parsed_num_bytes = num_bytes.parse::<i32>().map_err(|_| {
499 ArrowError::CDataInterface(
500 "FixedSizeBinary requires an integer parameter representing number of bytes per element".to_string())
501 })?;
502 DataType::FixedSizeBinary(parsed_num_bytes)
503 }
504 ["+w", num_elems] => {
506 let c_child = c_schema.child(0);
507 let parsed_num_elems = num_elems.parse::<i32>().map_err(|_| {
508 ArrowError::CDataInterface(
509 "The FixedSizeList type requires an integer parameter representing number of elements per list".to_string())
510 })?;
511 DataType::FixedSizeList(
512 Arc::new(Field::try_from(c_child)?),
513 parsed_num_elems,
514 )
515 }
516 ["d", extra] => match extra.splitn(3, ',').collect::<Vec<&str>>().as_slice() {
518 [precision, scale] => {
519 let parsed_precision = precision.parse::<u8>().map_err(|_| {
520 ArrowError::CDataInterface(
521 "The decimal type requires an integer precision".to_string(),
522 )
523 })?;
524 let parsed_scale = scale.parse::<i8>().map_err(|_| {
525 ArrowError::CDataInterface(
526 "The decimal type requires an integer scale".to_string(),
527 )
528 })?;
529 DataType::Decimal128(parsed_precision, parsed_scale)
530 }
531 [precision, scale, bits] => {
532 let parsed_precision = precision.parse::<u8>().map_err(|_| {
533 ArrowError::CDataInterface(
534 "The decimal type requires an integer precision".to_string(),
535 )
536 })?;
537 let parsed_scale = scale.parse::<i8>().map_err(|_| {
538 ArrowError::CDataInterface(
539 "The decimal type requires an integer scale".to_string(),
540 )
541 })?;
542 match *bits {
543 "32" => DataType::Decimal32(parsed_precision, parsed_scale),
544 "64" => DataType::Decimal64(parsed_precision, parsed_scale),
545 "128" => DataType::Decimal128(parsed_precision, parsed_scale),
546 "256" => DataType::Decimal256(parsed_precision, parsed_scale),
547 _ => return Err(ArrowError::CDataInterface("Only 32/64/128/256 bit wide decimals are supported in the Rust implementation".to_string())),
548 }
549 }
550 _ => {
551 return Err(ArrowError::CDataInterface(format!(
552 "The decimal pattern \"d:{extra:?}\" is not supported in the Rust implementation"
553 )));
554 }
555 },
556 ["+ud", extra] => {
558 let type_ids = extra
559 .split(',')
560 .map(|t| {
561 t.parse::<i8>().map_err(|_| {
562 ArrowError::CDataInterface(
563 "The Union type requires an integer type id".to_string(),
564 )
565 })
566 })
567 .collect::<Result<Vec<_>, ArrowError>>()?;
568 let mut fields = Vec::with_capacity(type_ids.len());
569 for idx in 0..c_schema.n_children {
570 let c_child = c_schema.child(idx as usize);
571 let field = Field::try_from(c_child)?;
572 fields.push(field);
573 }
574
575 if fields.len() != type_ids.len() {
576 return Err(ArrowError::CDataInterface(
577 "The Union type requires same number of fields and type ids"
578 .to_string(),
579 ));
580 }
581
582 DataType::Union(UnionFields::try_new(type_ids, fields)?, UnionMode::Dense)
583 }
584 ["+us", extra] => {
586 let type_ids = extra
587 .split(',')
588 .map(|t| {
589 t.parse::<i8>().map_err(|_| {
590 ArrowError::CDataInterface(
591 "The Union type requires an integer type id".to_string(),
592 )
593 })
594 })
595 .collect::<Result<Vec<_>, ArrowError>>()?;
596 let mut fields = Vec::with_capacity(type_ids.len());
597 for idx in 0..c_schema.n_children {
598 let c_child = c_schema.child(idx as usize);
599 let field = Field::try_from(c_child)?;
600 fields.push(field);
601 }
602
603 if fields.len() != type_ids.len() {
604 return Err(ArrowError::CDataInterface(
605 "The Union type requires same number of fields and type ids"
606 .to_string(),
607 ));
608 }
609
610 DataType::Union(UnionFields::try_new(type_ids, fields)?, UnionMode::Sparse)
611 }
612
613 ["tss", ""] => DataType::Timestamp(TimeUnit::Second, None),
615 ["tsm", ""] => DataType::Timestamp(TimeUnit::Millisecond, None),
616 ["tsu", ""] => DataType::Timestamp(TimeUnit::Microsecond, None),
617 ["tsn", ""] => DataType::Timestamp(TimeUnit::Nanosecond, None),
618 ["tss", tz] => DataType::Timestamp(TimeUnit::Second, Some(Arc::from(*tz))),
619 ["tsm", tz] => DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from(*tz))),
620 ["tsu", tz] => DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from(*tz))),
621 ["tsn", tz] => DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from(*tz))),
622 _ => {
623 return Err(ArrowError::CDataInterface(format!(
624 "The datatype \"{other:?}\" is still not supported in Rust implementation"
625 )));
626 }
627 }
628 }
629 };
630
631 if let Some(dict_schema) = c_schema.dictionary() {
632 let value_type = Self::try_from(dict_schema)?;
633 dtype = DataType::Dictionary(Box::new(dtype), Box::new(value_type));
634 }
635
636 Ok(dtype)
637 }
638}
639
640impl TryFrom<&FFI_ArrowSchema> for Field {
641 type Error = ArrowError;
642
643 fn try_from(c_schema: &FFI_ArrowSchema) -> Result<Self, ArrowError> {
644 let dtype = DataType::try_from(c_schema)?;
645 let mut field = Field::new(c_schema.name().unwrap_or(""), dtype, c_schema.nullable());
646 field.set_metadata(c_schema.metadata()?);
647 Ok(field)
648 }
649}
650
651impl TryFrom<&FFI_ArrowSchema> for Schema {
652 type Error = ArrowError;
653
654 fn try_from(c_schema: &FFI_ArrowSchema) -> Result<Self, ArrowError> {
655 let dtype = DataType::try_from(c_schema)?;
657 if let DataType::Struct(fields) = dtype {
658 Ok(Schema::new(fields).with_metadata(c_schema.metadata()?))
659 } else {
660 Err(ArrowError::CDataInterface(
661 "Unable to interpret C data struct as a Schema".to_string(),
662 ))
663 }
664 }
665}
666
667impl TryFrom<&DataType> for FFI_ArrowSchema {
668 type Error = ArrowError;
669
670 fn try_from(dtype: &DataType) -> Result<Self, ArrowError> {
672 let format = get_format_string(dtype)?;
673 let children = match dtype {
675 DataType::List(child)
676 | DataType::LargeList(child)
677 | DataType::ListView(child)
678 | DataType::LargeListView(child)
679 | DataType::FixedSizeList(child, _)
680 | DataType::Map(child, _) => {
681 vec![FFI_ArrowSchema::try_from(child.as_ref())?]
682 }
683 DataType::Union(fields, _) => fields
684 .iter()
685 .map(|(_, f)| f.as_ref().try_into())
686 .collect::<Result<Vec<_>, ArrowError>>()?,
687 DataType::Struct(fields) => fields
688 .iter()
689 .map(FFI_ArrowSchema::try_from)
690 .collect::<Result<Vec<_>, ArrowError>>()?,
691 DataType::RunEndEncoded(run_ends, values) => vec![
692 FFI_ArrowSchema::try_from(run_ends.as_ref())?,
693 FFI_ArrowSchema::try_from(values.as_ref())?,
694 ],
695 _ => vec![],
696 };
697 let dictionary = if let DataType::Dictionary(_, value_data_type) = dtype {
698 Some(Self::try_from(value_data_type.as_ref())?)
699 } else {
700 None
701 };
702
703 let flags = match dtype {
704 DataType::Map(_, true) => Flags::MAP_KEYS_SORTED,
705 _ => Flags::empty(),
706 };
707
708 FFI_ArrowSchema::try_new(&format, children, dictionary)?.with_flags(flags)
709 }
710}
711
712fn get_format_string(dtype: &DataType) -> Result<Cow<'static, str>, ArrowError> {
713 match dtype {
714 DataType::Null => Ok("n".into()),
715 DataType::Boolean => Ok("b".into()),
716 DataType::Int8 => Ok("c".into()),
717 DataType::UInt8 => Ok("C".into()),
718 DataType::Int16 => Ok("s".into()),
719 DataType::UInt16 => Ok("S".into()),
720 DataType::Int32 => Ok("i".into()),
721 DataType::UInt32 => Ok("I".into()),
722 DataType::Int64 => Ok("l".into()),
723 DataType::UInt64 => Ok("L".into()),
724 DataType::Float16 => Ok("e".into()),
725 DataType::Float32 => Ok("f".into()),
726 DataType::Float64 => Ok("g".into()),
727 DataType::BinaryView => Ok("vz".into()),
728 DataType::Binary => Ok("z".into()),
729 DataType::LargeBinary => Ok("Z".into()),
730 DataType::Utf8View => Ok("vu".into()),
731 DataType::Utf8 => Ok("u".into()),
732 DataType::LargeUtf8 => Ok("U".into()),
733 DataType::FixedSizeBinary(num_bytes) => Ok(Cow::Owned(format!("w:{num_bytes}"))),
734 DataType::FixedSizeList(_, num_elems) => Ok(Cow::Owned(format!("+w:{num_elems}"))),
735 DataType::Decimal32(precision, scale) => {
736 Ok(Cow::Owned(format!("d:{precision},{scale},32")))
737 }
738 DataType::Decimal64(precision, scale) => {
739 Ok(Cow::Owned(format!("d:{precision},{scale},64")))
740 }
741 DataType::Decimal128(precision, scale) => Ok(Cow::Owned(format!("d:{precision},{scale}"))),
742 DataType::Decimal256(precision, scale) => {
743 Ok(Cow::Owned(format!("d:{precision},{scale},256")))
744 }
745 DataType::Date32 => Ok("tdD".into()),
746 DataType::Date64 => Ok("tdm".into()),
747 DataType::Time32(TimeUnit::Second) => Ok("tts".into()),
748 DataType::Time32(TimeUnit::Millisecond) => Ok("ttm".into()),
749 DataType::Time64(TimeUnit::Microsecond) => Ok("ttu".into()),
750 DataType::Time64(TimeUnit::Nanosecond) => Ok("ttn".into()),
751 DataType::Timestamp(TimeUnit::Second, None) => Ok("tss:".into()),
752 DataType::Timestamp(TimeUnit::Millisecond, None) => Ok("tsm:".into()),
753 DataType::Timestamp(TimeUnit::Microsecond, None) => Ok("tsu:".into()),
754 DataType::Timestamp(TimeUnit::Nanosecond, None) => Ok("tsn:".into()),
755 DataType::Timestamp(TimeUnit::Second, Some(tz)) => Ok(Cow::Owned(format!("tss:{tz}"))),
756 DataType::Timestamp(TimeUnit::Millisecond, Some(tz)) => Ok(Cow::Owned(format!("tsm:{tz}"))),
757 DataType::Timestamp(TimeUnit::Microsecond, Some(tz)) => Ok(Cow::Owned(format!("tsu:{tz}"))),
758 DataType::Timestamp(TimeUnit::Nanosecond, Some(tz)) => Ok(Cow::Owned(format!("tsn:{tz}"))),
759 DataType::Duration(TimeUnit::Second) => Ok("tDs".into()),
760 DataType::Duration(TimeUnit::Millisecond) => Ok("tDm".into()),
761 DataType::Duration(TimeUnit::Microsecond) => Ok("tDu".into()),
762 DataType::Duration(TimeUnit::Nanosecond) => Ok("tDn".into()),
763 DataType::Interval(IntervalUnit::YearMonth) => Ok("tiM".into()),
764 DataType::Interval(IntervalUnit::DayTime) => Ok("tiD".into()),
765 DataType::Interval(IntervalUnit::MonthDayNano) => Ok("tin".into()),
766 DataType::List(_) => Ok("+l".into()),
767 DataType::LargeList(_) => Ok("+L".into()),
768 DataType::ListView(_) => Ok("+vl".into()),
769 DataType::LargeListView(_) => Ok("+vL".into()),
770 DataType::Struct(_) => Ok("+s".into()),
771 DataType::Map(_, _) => Ok("+m".into()),
772 DataType::RunEndEncoded(_, _) => Ok("+r".into()),
773 DataType::Dictionary(key_data_type, _) => get_format_string(key_data_type),
774 DataType::Union(fields, mode) => {
775 let formats = fields
776 .iter()
777 .map(|(t, _)| t.to_string())
778 .collect::<Vec<_>>();
779 match mode {
780 UnionMode::Dense => Ok(Cow::Owned(format!("{}:{}", "+ud", formats.join(",")))),
781 UnionMode::Sparse => Ok(Cow::Owned(format!("{}:{}", "+us", formats.join(",")))),
782 }
783 }
784 other => Err(ArrowError::CDataInterface(format!(
785 "The datatype \"{other:?}\" is still not supported in Rust implementation"
786 ))),
787 }
788}
789
790impl TryFrom<&FieldRef> for FFI_ArrowSchema {
791 type Error = ArrowError;
792
793 fn try_from(value: &FieldRef) -> Result<Self, Self::Error> {
794 value.as_ref().try_into()
795 }
796}
797
798impl TryFrom<&Field> for FFI_ArrowSchema {
799 type Error = ArrowError;
800
801 fn try_from(field: &Field) -> Result<Self, ArrowError> {
802 let mut flags = if field.is_nullable() {
803 Flags::NULLABLE
804 } else {
805 Flags::empty()
806 };
807
808 if let Some(true) = field.dict_is_ordered() {
809 flags |= Flags::DICTIONARY_ORDERED;
810 }
811
812 FFI_ArrowSchema::try_from(field.data_type())?
813 .with_name(field.name())?
814 .with_flags(flags)?
815 .with_metadata(field.metadata())
816 }
817}
818
819impl TryFrom<&Schema> for FFI_ArrowSchema {
820 type Error = ArrowError;
821
822 fn try_from(schema: &Schema) -> Result<Self, ArrowError> {
823 let dtype = DataType::Struct(schema.fields().clone());
824 let c_schema = FFI_ArrowSchema::try_from(&dtype)?.with_metadata(&schema.metadata)?;
825 Ok(c_schema)
826 }
827}
828
829impl TryFrom<DataType> for FFI_ArrowSchema {
830 type Error = ArrowError;
831
832 fn try_from(dtype: DataType) -> Result<Self, ArrowError> {
833 FFI_ArrowSchema::try_from(&dtype)
834 }
835}
836
837impl TryFrom<Field> for FFI_ArrowSchema {
838 type Error = ArrowError;
839
840 fn try_from(field: Field) -> Result<Self, ArrowError> {
841 FFI_ArrowSchema::try_from(&field)
842 }
843}
844
845impl TryFrom<Schema> for FFI_ArrowSchema {
846 type Error = ArrowError;
847
848 fn try_from(schema: Schema) -> Result<Self, ArrowError> {
849 FFI_ArrowSchema::try_from(&schema)
850 }
851}
852
853#[cfg(test)]
854mod tests {
855 use super::*;
856 use crate::Fields;
857
858 fn round_trip_type(dtype: DataType) {
859 let c_schema = FFI_ArrowSchema::try_from(&dtype).unwrap();
860 let restored = DataType::try_from(&c_schema).unwrap();
861 assert_eq!(restored, dtype);
862 }
863
864 fn round_trip_field(field: Field) {
865 let c_schema = FFI_ArrowSchema::try_from(&field).unwrap();
866 let restored = Field::try_from(&c_schema).unwrap();
867 assert_eq!(restored, field);
868 }
869
870 fn round_trip_schema(schema: Schema) {
871 let c_schema = FFI_ArrowSchema::try_from(&schema).unwrap();
872 let restored = Schema::try_from(&c_schema).unwrap();
873 assert_eq!(restored, schema);
874 }
875
876 #[test]
877 fn test_type() {
878 round_trip_type(DataType::Int64);
879 round_trip_type(DataType::UInt64);
880 round_trip_type(DataType::Float64);
881 round_trip_type(DataType::Date64);
882 round_trip_type(DataType::Time64(TimeUnit::Nanosecond));
883 round_trip_type(DataType::FixedSizeBinary(12));
884 round_trip_type(DataType::FixedSizeList(
885 Arc::new(Field::new("a", DataType::Int64, false)),
886 5,
887 ));
888 round_trip_type(DataType::Utf8);
889 round_trip_type(DataType::Utf8View);
890 round_trip_type(DataType::BinaryView);
891 round_trip_type(DataType::Binary);
892 round_trip_type(DataType::LargeBinary);
893 round_trip_type(DataType::List(Arc::new(Field::new(
894 "a",
895 DataType::Int16,
896 false,
897 ))));
898 round_trip_type(DataType::ListView(Arc::new(Field::new(
899 "a",
900 DataType::Int16,
901 false,
902 ))));
903 round_trip_type(DataType::LargeListView(Arc::new(Field::new(
904 "a",
905 DataType::Int16,
906 false,
907 ))));
908 round_trip_type(DataType::Struct(Fields::from(vec![Field::new(
909 "a",
910 DataType::Utf8,
911 true,
912 )])));
913 round_trip_type(DataType::RunEndEncoded(
914 Arc::new(Field::new("run_ends", DataType::Int32, false)),
915 Arc::new(Field::new("values", DataType::Binary, true)),
916 ));
917 }
918
919 #[test]
920 fn test_field() {
921 let dtype = DataType::Struct(vec![Field::new("a", DataType::Utf8, true)].into());
922 round_trip_field(Field::new("test", dtype, true));
923 }
924
925 #[test]
926 fn test_schema() {
927 let schema = Schema::new(vec![
928 Field::new("name", DataType::Utf8, false),
929 Field::new("address", DataType::Utf8, false),
930 Field::new("priority", DataType::UInt8, false),
931 ])
932 .with_metadata([("hello".to_string(), "world".to_string())].into());
933
934 round_trip_schema(schema);
935
936 let dtype = DataType::Struct(Fields::from(vec![
938 Field::new("a", DataType::Utf8, true),
939 Field::new("b", DataType::Int16, false),
940 ]));
941 let c_schema = FFI_ArrowSchema::try_from(&dtype).unwrap();
942 let schema = Schema::try_from(&c_schema).unwrap();
943 assert_eq!(schema.fields().len(), 2);
944
945 let c_schema = FFI_ArrowSchema::try_from(&DataType::Float64).unwrap();
947 let result = Schema::try_from(&c_schema);
948 assert!(result.is_err());
949 }
950
951 #[test]
952 fn test_map_keys_sorted() {
953 let keys = Field::new("keys", DataType::Int32, false);
954 let values = Field::new("values", DataType::UInt32, false);
955 let entry_struct = DataType::Struct(vec![keys, values].into());
956
957 let map_data_type =
959 DataType::Map(Arc::new(Field::new("entries", entry_struct, false)), true);
960
961 let arrow_schema = FFI_ArrowSchema::try_from(map_data_type).unwrap();
962 assert!(arrow_schema.map_keys_sorted());
963 }
964
965 #[test]
966 fn test_dictionary_ordered() {
967 #[allow(deprecated)]
968 let schema = Schema::new(vec![Field::new_dict(
969 "dict",
970 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
971 false,
972 0,
973 true,
974 )]);
975
976 let arrow_schema = FFI_ArrowSchema::try_from(schema).unwrap();
977 assert!(arrow_schema.child(0).dictionary_ordered());
978 }
979
980 #[test]
981 fn test_set_field_metadata() {
982 let metadata_cases: Vec<HashMap<String, String>> = vec![
983 [].into(),
984 [("key".to_string(), "value".to_string())].into(),
985 [
986 ("key".to_string(), "".to_string()),
987 ("ascii123".to_string(), "你好".to_string()),
988 ("".to_string(), "value".to_string()),
989 ]
990 .into(),
991 ];
992
993 let mut schema = FFI_ArrowSchema::try_new("b", vec![], None)
994 .unwrap()
995 .with_name("test")
996 .unwrap();
997
998 for metadata in metadata_cases {
999 schema = schema.with_metadata(&metadata).unwrap();
1000 let field = Field::try_from(&schema).unwrap();
1001 assert_eq!(field.metadata(), &metadata);
1002 }
1003 }
1004
1005 #[test]
1006 fn test_import_field_with_null_name() {
1007 let dtype = DataType::Int16;
1008 let c_schema = FFI_ArrowSchema::try_from(&dtype).unwrap();
1009 assert!(c_schema.name().is_none());
1010 let field = Field::try_from(&c_schema).unwrap();
1011 assert_eq!(field.name(), "");
1012 }
1013}