1use crate::{
38 ArrowError, DataType, Field, FieldRef, IntervalUnit, Schema, TimeUnit, UnionFields, UnionMode,
39};
40use bitflags::bitflags;
41use std::borrow::Cow;
42use std::sync::Arc;
43use std::{
44 collections::HashMap,
45 ffi::{CStr, CString, c_char, c_void},
46};
47
48bitflags! {
49 pub struct Flags: i64 {
54 const DICTIONARY_ORDERED = 0b00000001;
56 const NULLABLE = 0b00000010;
58 const MAP_KEYS_SORTED = 0b00000100;
60 }
61}
62
63#[repr(C)]
75#[derive(Debug)]
76#[allow(non_camel_case_types)]
77pub struct FFI_ArrowSchema {
78 format: *const c_char,
79 name: *const c_char,
80 metadata: *const c_char,
81 flags: i64,
83 n_children: i64,
84 children: *mut *mut FFI_ArrowSchema,
85 dictionary: *mut FFI_ArrowSchema,
86 release: Option<unsafe extern "C" fn(arg1: *mut FFI_ArrowSchema)>,
87 private_data: *mut c_void,
88}
89
90struct SchemaPrivateData {
91 children: Box<[*mut FFI_ArrowSchema]>,
92 dictionary: *mut FFI_ArrowSchema,
93 metadata: Option<Vec<u8>>,
94}
95
96unsafe extern "C" fn release_schema(schema: *mut FFI_ArrowSchema) {
98 if schema.is_null() {
99 return;
100 }
101 let schema = unsafe { &mut *schema };
102
103 drop(unsafe { CString::from_raw(schema.format as *mut c_char) });
105 if !schema.name.is_null() {
106 drop(unsafe { CString::from_raw(schema.name as *mut c_char) });
107 }
108 if !schema.private_data.is_null() {
109 let private_data = unsafe { Box::from_raw(schema.private_data as *mut SchemaPrivateData) };
110 for child in private_data.children.iter() {
111 drop(unsafe { Box::from_raw(*child) })
112 }
113 if !private_data.dictionary.is_null() {
114 drop(unsafe { Box::from_raw(private_data.dictionary) });
115 }
116
117 drop(private_data);
118 }
119
120 schema.release = None;
121}
122
123impl FFI_ArrowSchema {
124 pub fn try_new(
127 format: &str,
128 children: Vec<FFI_ArrowSchema>,
129 dictionary: Option<FFI_ArrowSchema>,
130 ) -> Result<Self, ArrowError> {
131 let mut this = Self::empty();
132
133 let children_ptr = children
134 .into_iter()
135 .map(Box::new)
136 .map(Box::into_raw)
137 .collect::<Box<_>>();
138
139 this.format = CString::new(format).unwrap().into_raw();
140 this.release = Some(release_schema);
141 this.n_children = children_ptr.len() as i64;
142
143 let dictionary_ptr = dictionary
144 .map(|d| Box::into_raw(Box::new(d)))
145 .unwrap_or(std::ptr::null_mut());
146
147 let mut private_data = Box::new(SchemaPrivateData {
148 children: children_ptr,
149 dictionary: dictionary_ptr,
150 metadata: None,
151 });
152
153 this.children = private_data.children.as_mut_ptr();
155
156 this.dictionary = dictionary_ptr;
157
158 this.private_data = Box::into_raw(private_data) as *mut c_void;
159
160 Ok(this)
161 }
162
163 pub fn with_name(mut self, name: &str) -> Result<Self, ArrowError> {
165 self.name = CString::new(name).unwrap().into_raw();
166 Ok(self)
167 }
168
169 pub fn with_flags(mut self, flags: Flags) -> Result<Self, ArrowError> {
171 self.flags = flags.bits();
172 Ok(self)
173 }
174
175 pub fn with_metadata<I, S>(mut self, metadata: I) -> Result<Self, ArrowError>
177 where
178 I: IntoIterator<Item = (S, S)>,
179 S: AsRef<str>,
180 {
181 let metadata: Vec<(S, S)> = metadata.into_iter().collect();
182 let new_metadata = if !metadata.is_empty() {
184 let mut metadata_serialized: Vec<u8> = Vec::new();
185 let num_entries: i32 = metadata.len().try_into().map_err(|_| {
186 ArrowError::CDataInterface(format!(
187 "metadata can only have {} entries, but {} were provided",
188 i32::MAX,
189 metadata.len()
190 ))
191 })?;
192 metadata_serialized.extend(num_entries.to_ne_bytes());
193
194 for (key, value) in metadata.into_iter() {
195 let key_len: i32 = key.as_ref().len().try_into().map_err(|_| {
196 ArrowError::CDataInterface(format!(
197 "metadata key can only have {} bytes, but {} were provided",
198 i32::MAX,
199 key.as_ref().len()
200 ))
201 })?;
202 let value_len: i32 = value.as_ref().len().try_into().map_err(|_| {
203 ArrowError::CDataInterface(format!(
204 "metadata value can only have {} bytes, but {} were provided",
205 i32::MAX,
206 value.as_ref().len()
207 ))
208 })?;
209
210 metadata_serialized.extend(key_len.to_ne_bytes());
211 metadata_serialized.extend_from_slice(key.as_ref().as_bytes());
212 metadata_serialized.extend(value_len.to_ne_bytes());
213 metadata_serialized.extend_from_slice(value.as_ref().as_bytes());
214 }
215
216 self.metadata = metadata_serialized.as_ptr() as *const c_char;
217 Some(metadata_serialized)
218 } else {
219 self.metadata = std::ptr::null_mut();
220 None
221 };
222
223 unsafe {
224 let mut private_data = Box::from_raw(self.private_data as *mut SchemaPrivateData);
225 private_data.metadata = new_metadata;
226 self.private_data = Box::into_raw(private_data) as *mut c_void;
227 }
228
229 Ok(self)
230 }
231
232 pub unsafe fn from_raw(schema: *mut FFI_ArrowSchema) -> Self {
245 unsafe { std::ptr::replace(schema, Self::empty()) }
246 }
247
248 pub fn empty() -> Self {
250 Self {
251 format: std::ptr::null_mut(),
252 name: std::ptr::null_mut(),
253 metadata: std::ptr::null_mut(),
254 flags: 0,
255 n_children: 0,
256 children: std::ptr::null_mut(),
257 dictionary: std::ptr::null_mut(),
258 release: None,
259 private_data: std::ptr::null_mut(),
260 }
261 }
262
263 pub fn format(&self) -> &str {
265 assert!(!self.format.is_null());
266 unsafe { CStr::from_ptr(self.format) }
268 .to_str()
269 .expect("The external API has a non-utf8 as format")
270 }
271
272 pub fn name(&self) -> Option<&str> {
274 if self.name.is_null() {
275 None
276 } else {
277 Some(
279 unsafe { CStr::from_ptr(self.name) }
280 .to_str()
281 .expect("The external API has a non-utf8 as name"),
282 )
283 }
284 }
285
286 pub fn flags(&self) -> Option<Flags> {
288 Flags::from_bits(self.flags)
289 }
290
291 pub fn child(&self, index: usize) -> &Self {
299 assert!(index < self.n_children as usize);
300 unsafe { self.children.add(index).as_ref().unwrap().as_ref().unwrap() }
301 }
302
303 pub fn children(&self) -> impl Iterator<Item = &Self> {
305 (0..self.n_children as usize).map(move |i| self.child(i))
306 }
307
308 pub fn nullable(&self) -> bool {
311 (self.flags / 2) & 1 == 1
312 }
313
314 pub fn dictionary(&self) -> Option<&Self> {
319 unsafe { self.dictionary.as_ref() }
320 }
321
322 pub fn map_keys_sorted(&self) -> bool {
326 self.flags & 0b00000100 != 0
327 }
328
329 pub fn dictionary_ordered(&self) -> bool {
331 self.flags & 0b00000001 != 0
332 }
333
334 pub fn metadata(&self) -> Result<HashMap<String, String>, ArrowError> {
336 if self.metadata.is_null() {
337 Ok(HashMap::new())
338 } else {
339 let mut pos = 0;
340
341 #[allow(clippy::unnecessary_cast)]
345 let buffer: *const u8 = self.metadata as *const u8;
346
347 fn next_four_bytes(buffer: *const u8, pos: &mut isize) -> [u8; 4] {
348 let out = unsafe {
349 [
350 *buffer.offset(*pos),
351 *buffer.offset(*pos + 1),
352 *buffer.offset(*pos + 2),
353 *buffer.offset(*pos + 3),
354 ]
355 };
356 *pos += 4;
357 out
358 }
359
360 fn next_n_bytes(buffer: *const u8, pos: &mut isize, n: i32) -> &[u8] {
361 let out = unsafe {
362 std::slice::from_raw_parts(buffer.offset(*pos), n.try_into().unwrap())
363 };
364 *pos += isize::try_from(n).unwrap();
365 out
366 }
367
368 let num_entries = i32::from_ne_bytes(next_four_bytes(buffer, &mut pos));
369 if num_entries < 0 {
370 return Err(ArrowError::CDataInterface(
371 "Negative number of metadata entries".to_string(),
372 ));
373 }
374
375 let mut metadata =
376 HashMap::with_capacity(num_entries.try_into().expect("Too many metadata entries"));
377
378 for _ in 0..num_entries {
379 let key_length = i32::from_ne_bytes(next_four_bytes(buffer, &mut pos));
380 if key_length < 0 {
381 return Err(ArrowError::CDataInterface(
382 "Negative key length in metadata".to_string(),
383 ));
384 }
385 let key = String::from_utf8(next_n_bytes(buffer, &mut pos, key_length).to_vec())?;
386 let value_length = i32::from_ne_bytes(next_four_bytes(buffer, &mut pos));
387 if value_length < 0 {
388 return Err(ArrowError::CDataInterface(
389 "Negative value length in metadata".to_string(),
390 ));
391 }
392 let value =
393 String::from_utf8(next_n_bytes(buffer, &mut pos, value_length).to_vec())?;
394 metadata.insert(key, value);
395 }
396
397 Ok(metadata)
398 }
399 }
400}
401
402impl Drop for FFI_ArrowSchema {
403 fn drop(&mut self) {
404 match self.release {
405 None => (),
406 Some(release) => unsafe { release(self) },
407 };
408 }
409}
410
411unsafe impl Send for FFI_ArrowSchema {}
412
413impl TryFrom<&FFI_ArrowSchema> for DataType {
414 type Error = ArrowError;
415
416 fn try_from(c_schema: &FFI_ArrowSchema) -> Result<Self, ArrowError> {
418 let mut dtype = match c_schema.format() {
419 "n" => DataType::Null,
420 "b" => DataType::Boolean,
421 "c" => DataType::Int8,
422 "C" => DataType::UInt8,
423 "s" => DataType::Int16,
424 "S" => DataType::UInt16,
425 "i" => DataType::Int32,
426 "I" => DataType::UInt32,
427 "l" => DataType::Int64,
428 "L" => DataType::UInt64,
429 "e" => DataType::Float16,
430 "f" => DataType::Float32,
431 "g" => DataType::Float64,
432 "vz" => DataType::BinaryView,
433 "z" => DataType::Binary,
434 "Z" => DataType::LargeBinary,
435 "vu" => DataType::Utf8View,
436 "u" => DataType::Utf8,
437 "U" => DataType::LargeUtf8,
438 "tdD" => DataType::Date32,
439 "tdm" => DataType::Date64,
440 "tts" => DataType::Time32(TimeUnit::Second),
441 "ttm" => DataType::Time32(TimeUnit::Millisecond),
442 "ttu" => DataType::Time64(TimeUnit::Microsecond),
443 "ttn" => DataType::Time64(TimeUnit::Nanosecond),
444 "tDs" => DataType::Duration(TimeUnit::Second),
445 "tDm" => DataType::Duration(TimeUnit::Millisecond),
446 "tDu" => DataType::Duration(TimeUnit::Microsecond),
447 "tDn" => DataType::Duration(TimeUnit::Nanosecond),
448 "tiM" => DataType::Interval(IntervalUnit::YearMonth),
449 "tiD" => DataType::Interval(IntervalUnit::DayTime),
450 "tin" => DataType::Interval(IntervalUnit::MonthDayNano),
451 "+l" => {
452 let c_child = c_schema.child(0);
453 DataType::List(Arc::new(Field::try_from(c_child)?))
454 }
455 "+L" => {
456 let c_child = c_schema.child(0);
457 DataType::LargeList(Arc::new(Field::try_from(c_child)?))
458 }
459 "+vl" => {
460 let c_child = c_schema.child(0);
461 DataType::ListView(Arc::new(Field::try_from(c_child)?))
462 }
463 "+vL" => {
464 let c_child = c_schema.child(0);
465 DataType::LargeListView(Arc::new(Field::try_from(c_child)?))
466 }
467 "+s" => {
468 let fields = c_schema.children().map(Field::try_from);
469 DataType::Struct(fields.collect::<Result<_, ArrowError>>()?)
470 }
471 "+m" => {
472 let c_child = c_schema.child(0);
473 let map_keys_sorted = c_schema.map_keys_sorted();
474 DataType::Map(Arc::new(Field::try_from(c_child)?), map_keys_sorted)
475 }
476 "+r" => {
477 let c_run_ends = c_schema.child(0);
478 let c_values = c_schema.child(1);
479 DataType::RunEndEncoded(
480 Arc::new(Field::try_from(c_run_ends)?),
481 Arc::new(Field::try_from(c_values)?),
482 )
483 }
484 other => {
486 match other.splitn(2, ':').collect::<Vec<&str>>().as_slice() {
487 ["w", num_bytes] => {
489 let parsed_num_bytes = num_bytes.parse::<i32>().map_err(|_| {
490 ArrowError::CDataInterface(
491 "FixedSizeBinary requires an integer parameter representing number of bytes per element".to_string())
492 })?;
493 DataType::FixedSizeBinary(parsed_num_bytes)
494 }
495 ["+w", num_elems] => {
497 let c_child = c_schema.child(0);
498 let parsed_num_elems = num_elems.parse::<i32>().map_err(|_| {
499 ArrowError::CDataInterface(
500 "The FixedSizeList type requires an integer parameter representing number of elements per list".to_string())
501 })?;
502 DataType::FixedSizeList(
503 Arc::new(Field::try_from(c_child)?),
504 parsed_num_elems,
505 )
506 }
507 ["d", extra] => match extra.splitn(3, ',').collect::<Vec<&str>>().as_slice() {
509 [precision, scale] => {
510 let parsed_precision = precision.parse::<u8>().map_err(|_| {
511 ArrowError::CDataInterface(
512 "The decimal type requires an integer precision".to_string(),
513 )
514 })?;
515 let parsed_scale = scale.parse::<i8>().map_err(|_| {
516 ArrowError::CDataInterface(
517 "The decimal type requires an integer scale".to_string(),
518 )
519 })?;
520 DataType::Decimal128(parsed_precision, parsed_scale)
521 }
522 [precision, scale, bits] => {
523 let parsed_precision = precision.parse::<u8>().map_err(|_| {
524 ArrowError::CDataInterface(
525 "The decimal type requires an integer precision".to_string(),
526 )
527 })?;
528 let parsed_scale = scale.parse::<i8>().map_err(|_| {
529 ArrowError::CDataInterface(
530 "The decimal type requires an integer scale".to_string(),
531 )
532 })?;
533 match *bits {
534 "32" => DataType::Decimal32(parsed_precision, parsed_scale),
535 "64" => DataType::Decimal64(parsed_precision, parsed_scale),
536 "128" => DataType::Decimal128(parsed_precision, parsed_scale),
537 "256" => DataType::Decimal256(parsed_precision, parsed_scale),
538 _ => return Err(ArrowError::CDataInterface("Only 32/64/128/256 bit wide decimals are supported in the Rust implementation".to_string())),
539 }
540 }
541 _ => {
542 return Err(ArrowError::CDataInterface(format!(
543 "The decimal pattern \"d:{extra:?}\" is not supported in the Rust implementation"
544 )));
545 }
546 },
547 ["+ud", extra] => {
549 let type_ids = extra
550 .split(',')
551 .map(|t| {
552 t.parse::<i8>().map_err(|_| {
553 ArrowError::CDataInterface(
554 "The Union type requires an integer type id".to_string(),
555 )
556 })
557 })
558 .collect::<Result<Vec<_>, ArrowError>>()?;
559 let mut fields = Vec::with_capacity(type_ids.len());
560 for idx in 0..c_schema.n_children {
561 let c_child = c_schema.child(idx as usize);
562 let field = Field::try_from(c_child)?;
563 fields.push(field);
564 }
565
566 if fields.len() != type_ids.len() {
567 return Err(ArrowError::CDataInterface(
568 "The Union type requires same number of fields and type ids"
569 .to_string(),
570 ));
571 }
572
573 DataType::Union(UnionFields::new(type_ids, fields), UnionMode::Dense)
574 }
575 ["+us", extra] => {
577 let type_ids = extra
578 .split(',')
579 .map(|t| {
580 t.parse::<i8>().map_err(|_| {
581 ArrowError::CDataInterface(
582 "The Union type requires an integer type id".to_string(),
583 )
584 })
585 })
586 .collect::<Result<Vec<_>, ArrowError>>()?;
587 let mut fields = Vec::with_capacity(type_ids.len());
588 for idx in 0..c_schema.n_children {
589 let c_child = c_schema.child(idx as usize);
590 let field = Field::try_from(c_child)?;
591 fields.push(field);
592 }
593
594 if fields.len() != type_ids.len() {
595 return Err(ArrowError::CDataInterface(
596 "The Union type requires same number of fields and type ids"
597 .to_string(),
598 ));
599 }
600
601 DataType::Union(UnionFields::new(type_ids, fields), UnionMode::Sparse)
602 }
603
604 ["tss", ""] => DataType::Timestamp(TimeUnit::Second, None),
606 ["tsm", ""] => DataType::Timestamp(TimeUnit::Millisecond, None),
607 ["tsu", ""] => DataType::Timestamp(TimeUnit::Microsecond, None),
608 ["tsn", ""] => DataType::Timestamp(TimeUnit::Nanosecond, None),
609 ["tss", tz] => DataType::Timestamp(TimeUnit::Second, Some(Arc::from(*tz))),
610 ["tsm", tz] => DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from(*tz))),
611 ["tsu", tz] => DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from(*tz))),
612 ["tsn", tz] => DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from(*tz))),
613 _ => {
614 return Err(ArrowError::CDataInterface(format!(
615 "The datatype \"{other:?}\" is still not supported in Rust implementation"
616 )));
617 }
618 }
619 }
620 };
621
622 if let Some(dict_schema) = c_schema.dictionary() {
623 let value_type = Self::try_from(dict_schema)?;
624 dtype = DataType::Dictionary(Box::new(dtype), Box::new(value_type));
625 }
626
627 Ok(dtype)
628 }
629}
630
631impl TryFrom<&FFI_ArrowSchema> for Field {
632 type Error = ArrowError;
633
634 fn try_from(c_schema: &FFI_ArrowSchema) -> Result<Self, ArrowError> {
635 let dtype = DataType::try_from(c_schema)?;
636 let mut field = Field::new(c_schema.name().unwrap_or(""), dtype, c_schema.nullable());
637 field.set_metadata(c_schema.metadata()?);
638 Ok(field)
639 }
640}
641
642impl TryFrom<&FFI_ArrowSchema> for Schema {
643 type Error = ArrowError;
644
645 fn try_from(c_schema: &FFI_ArrowSchema) -> Result<Self, ArrowError> {
646 let dtype = DataType::try_from(c_schema)?;
648 if let DataType::Struct(fields) = dtype {
649 Ok(Schema::new(fields).with_metadata(c_schema.metadata()?))
650 } else {
651 Err(ArrowError::CDataInterface(
652 "Unable to interpret C data struct as a Schema".to_string(),
653 ))
654 }
655 }
656}
657
658impl TryFrom<&DataType> for FFI_ArrowSchema {
659 type Error = ArrowError;
660
661 fn try_from(dtype: &DataType) -> Result<Self, ArrowError> {
663 let format = get_format_string(dtype)?;
664 let children = match dtype {
666 DataType::List(child)
667 | DataType::LargeList(child)
668 | DataType::ListView(child)
669 | DataType::LargeListView(child)
670 | DataType::FixedSizeList(child, _)
671 | DataType::Map(child, _) => {
672 vec![FFI_ArrowSchema::try_from(child.as_ref())?]
673 }
674 DataType::Union(fields, _) => fields
675 .iter()
676 .map(|(_, f)| f.as_ref().try_into())
677 .collect::<Result<Vec<_>, ArrowError>>()?,
678 DataType::Struct(fields) => fields
679 .iter()
680 .map(FFI_ArrowSchema::try_from)
681 .collect::<Result<Vec<_>, ArrowError>>()?,
682 DataType::RunEndEncoded(run_ends, values) => vec![
683 FFI_ArrowSchema::try_from(run_ends.as_ref())?,
684 FFI_ArrowSchema::try_from(values.as_ref())?,
685 ],
686 _ => vec![],
687 };
688 let dictionary = if let DataType::Dictionary(_, value_data_type) = dtype {
689 Some(Self::try_from(value_data_type.as_ref())?)
690 } else {
691 None
692 };
693
694 let flags = match dtype {
695 DataType::Map(_, true) => Flags::MAP_KEYS_SORTED,
696 _ => Flags::empty(),
697 };
698
699 FFI_ArrowSchema::try_new(&format, children, dictionary)?.with_flags(flags)
700 }
701}
702
703fn get_format_string(dtype: &DataType) -> Result<Cow<'static, str>, ArrowError> {
704 match dtype {
705 DataType::Null => Ok("n".into()),
706 DataType::Boolean => Ok("b".into()),
707 DataType::Int8 => Ok("c".into()),
708 DataType::UInt8 => Ok("C".into()),
709 DataType::Int16 => Ok("s".into()),
710 DataType::UInt16 => Ok("S".into()),
711 DataType::Int32 => Ok("i".into()),
712 DataType::UInt32 => Ok("I".into()),
713 DataType::Int64 => Ok("l".into()),
714 DataType::UInt64 => Ok("L".into()),
715 DataType::Float16 => Ok("e".into()),
716 DataType::Float32 => Ok("f".into()),
717 DataType::Float64 => Ok("g".into()),
718 DataType::BinaryView => Ok("vz".into()),
719 DataType::Binary => Ok("z".into()),
720 DataType::LargeBinary => Ok("Z".into()),
721 DataType::Utf8View => Ok("vu".into()),
722 DataType::Utf8 => Ok("u".into()),
723 DataType::LargeUtf8 => Ok("U".into()),
724 DataType::FixedSizeBinary(num_bytes) => Ok(Cow::Owned(format!("w:{num_bytes}"))),
725 DataType::FixedSizeList(_, num_elems) => Ok(Cow::Owned(format!("+w:{num_elems}"))),
726 DataType::Decimal32(precision, scale) => {
727 Ok(Cow::Owned(format!("d:{precision},{scale},32")))
728 }
729 DataType::Decimal64(precision, scale) => {
730 Ok(Cow::Owned(format!("d:{precision},{scale},64")))
731 }
732 DataType::Decimal128(precision, scale) => Ok(Cow::Owned(format!("d:{precision},{scale}"))),
733 DataType::Decimal256(precision, scale) => {
734 Ok(Cow::Owned(format!("d:{precision},{scale},256")))
735 }
736 DataType::Date32 => Ok("tdD".into()),
737 DataType::Date64 => Ok("tdm".into()),
738 DataType::Time32(TimeUnit::Second) => Ok("tts".into()),
739 DataType::Time32(TimeUnit::Millisecond) => Ok("ttm".into()),
740 DataType::Time64(TimeUnit::Microsecond) => Ok("ttu".into()),
741 DataType::Time64(TimeUnit::Nanosecond) => Ok("ttn".into()),
742 DataType::Timestamp(TimeUnit::Second, None) => Ok("tss:".into()),
743 DataType::Timestamp(TimeUnit::Millisecond, None) => Ok("tsm:".into()),
744 DataType::Timestamp(TimeUnit::Microsecond, None) => Ok("tsu:".into()),
745 DataType::Timestamp(TimeUnit::Nanosecond, None) => Ok("tsn:".into()),
746 DataType::Timestamp(TimeUnit::Second, Some(tz)) => Ok(Cow::Owned(format!("tss:{tz}"))),
747 DataType::Timestamp(TimeUnit::Millisecond, Some(tz)) => Ok(Cow::Owned(format!("tsm:{tz}"))),
748 DataType::Timestamp(TimeUnit::Microsecond, Some(tz)) => Ok(Cow::Owned(format!("tsu:{tz}"))),
749 DataType::Timestamp(TimeUnit::Nanosecond, Some(tz)) => Ok(Cow::Owned(format!("tsn:{tz}"))),
750 DataType::Duration(TimeUnit::Second) => Ok("tDs".into()),
751 DataType::Duration(TimeUnit::Millisecond) => Ok("tDm".into()),
752 DataType::Duration(TimeUnit::Microsecond) => Ok("tDu".into()),
753 DataType::Duration(TimeUnit::Nanosecond) => Ok("tDn".into()),
754 DataType::Interval(IntervalUnit::YearMonth) => Ok("tiM".into()),
755 DataType::Interval(IntervalUnit::DayTime) => Ok("tiD".into()),
756 DataType::Interval(IntervalUnit::MonthDayNano) => Ok("tin".into()),
757 DataType::List(_) => Ok("+l".into()),
758 DataType::LargeList(_) => Ok("+L".into()),
759 DataType::ListView(_) => Ok("+vl".into()),
760 DataType::LargeListView(_) => Ok("+vL".into()),
761 DataType::Struct(_) => Ok("+s".into()),
762 DataType::Map(_, _) => Ok("+m".into()),
763 DataType::RunEndEncoded(_, _) => Ok("+r".into()),
764 DataType::Dictionary(key_data_type, _) => get_format_string(key_data_type),
765 DataType::Union(fields, mode) => {
766 let formats = fields
767 .iter()
768 .map(|(t, _)| t.to_string())
769 .collect::<Vec<_>>();
770 match mode {
771 UnionMode::Dense => Ok(Cow::Owned(format!("{}:{}", "+ud", formats.join(",")))),
772 UnionMode::Sparse => Ok(Cow::Owned(format!("{}:{}", "+us", formats.join(",")))),
773 }
774 }
775 other => Err(ArrowError::CDataInterface(format!(
776 "The datatype \"{other:?}\" is still not supported in Rust implementation"
777 ))),
778 }
779}
780
781impl TryFrom<&FieldRef> for FFI_ArrowSchema {
782 type Error = ArrowError;
783
784 fn try_from(value: &FieldRef) -> Result<Self, Self::Error> {
785 value.as_ref().try_into()
786 }
787}
788
789impl TryFrom<&Field> for FFI_ArrowSchema {
790 type Error = ArrowError;
791
792 fn try_from(field: &Field) -> Result<Self, ArrowError> {
793 let mut flags = if field.is_nullable() {
794 Flags::NULLABLE
795 } else {
796 Flags::empty()
797 };
798
799 if let Some(true) = field.dict_is_ordered() {
800 flags |= Flags::DICTIONARY_ORDERED;
801 }
802
803 FFI_ArrowSchema::try_from(field.data_type())?
804 .with_name(field.name())?
805 .with_flags(flags)?
806 .with_metadata(field.metadata())
807 }
808}
809
810impl TryFrom<&Schema> for FFI_ArrowSchema {
811 type Error = ArrowError;
812
813 fn try_from(schema: &Schema) -> Result<Self, ArrowError> {
814 let dtype = DataType::Struct(schema.fields().clone());
815 let c_schema = FFI_ArrowSchema::try_from(&dtype)?.with_metadata(&schema.metadata)?;
816 Ok(c_schema)
817 }
818}
819
820impl TryFrom<DataType> for FFI_ArrowSchema {
821 type Error = ArrowError;
822
823 fn try_from(dtype: DataType) -> Result<Self, ArrowError> {
824 FFI_ArrowSchema::try_from(&dtype)
825 }
826}
827
828impl TryFrom<Field> for FFI_ArrowSchema {
829 type Error = ArrowError;
830
831 fn try_from(field: Field) -> Result<Self, ArrowError> {
832 FFI_ArrowSchema::try_from(&field)
833 }
834}
835
836impl TryFrom<Schema> for FFI_ArrowSchema {
837 type Error = ArrowError;
838
839 fn try_from(schema: Schema) -> Result<Self, ArrowError> {
840 FFI_ArrowSchema::try_from(&schema)
841 }
842}
843
844#[cfg(test)]
845mod tests {
846 use super::*;
847 use crate::Fields;
848
849 fn round_trip_type(dtype: DataType) {
850 let c_schema = FFI_ArrowSchema::try_from(&dtype).unwrap();
851 let restored = DataType::try_from(&c_schema).unwrap();
852 assert_eq!(restored, dtype);
853 }
854
855 fn round_trip_field(field: Field) {
856 let c_schema = FFI_ArrowSchema::try_from(&field).unwrap();
857 let restored = Field::try_from(&c_schema).unwrap();
858 assert_eq!(restored, field);
859 }
860
861 fn round_trip_schema(schema: Schema) {
862 let c_schema = FFI_ArrowSchema::try_from(&schema).unwrap();
863 let restored = Schema::try_from(&c_schema).unwrap();
864 assert_eq!(restored, schema);
865 }
866
867 #[test]
868 fn test_type() {
869 round_trip_type(DataType::Int64);
870 round_trip_type(DataType::UInt64);
871 round_trip_type(DataType::Float64);
872 round_trip_type(DataType::Date64);
873 round_trip_type(DataType::Time64(TimeUnit::Nanosecond));
874 round_trip_type(DataType::FixedSizeBinary(12));
875 round_trip_type(DataType::FixedSizeList(
876 Arc::new(Field::new("a", DataType::Int64, false)),
877 5,
878 ));
879 round_trip_type(DataType::Utf8);
880 round_trip_type(DataType::Utf8View);
881 round_trip_type(DataType::BinaryView);
882 round_trip_type(DataType::Binary);
883 round_trip_type(DataType::LargeBinary);
884 round_trip_type(DataType::List(Arc::new(Field::new(
885 "a",
886 DataType::Int16,
887 false,
888 ))));
889 round_trip_type(DataType::ListView(Arc::new(Field::new(
890 "a",
891 DataType::Int16,
892 false,
893 ))));
894 round_trip_type(DataType::LargeListView(Arc::new(Field::new(
895 "a",
896 DataType::Int16,
897 false,
898 ))));
899 round_trip_type(DataType::Struct(Fields::from(vec![Field::new(
900 "a",
901 DataType::Utf8,
902 true,
903 )])));
904 round_trip_type(DataType::RunEndEncoded(
905 Arc::new(Field::new("run_ends", DataType::Int32, false)),
906 Arc::new(Field::new("values", DataType::Binary, true)),
907 ));
908 }
909
910 #[test]
911 fn test_field() {
912 let dtype = DataType::Struct(vec![Field::new("a", DataType::Utf8, true)].into());
913 round_trip_field(Field::new("test", dtype, true));
914 }
915
916 #[test]
917 fn test_schema() {
918 let schema = Schema::new(vec![
919 Field::new("name", DataType::Utf8, false),
920 Field::new("address", DataType::Utf8, false),
921 Field::new("priority", DataType::UInt8, false),
922 ])
923 .with_metadata([("hello".to_string(), "world".to_string())].into());
924
925 round_trip_schema(schema);
926
927 let dtype = DataType::Struct(Fields::from(vec![
929 Field::new("a", DataType::Utf8, true),
930 Field::new("b", DataType::Int16, false),
931 ]));
932 let c_schema = FFI_ArrowSchema::try_from(&dtype).unwrap();
933 let schema = Schema::try_from(&c_schema).unwrap();
934 assert_eq!(schema.fields().len(), 2);
935
936 let c_schema = FFI_ArrowSchema::try_from(&DataType::Float64).unwrap();
938 let result = Schema::try_from(&c_schema);
939 assert!(result.is_err());
940 }
941
942 #[test]
943 fn test_map_keys_sorted() {
944 let keys = Field::new("keys", DataType::Int32, false);
945 let values = Field::new("values", DataType::UInt32, false);
946 let entry_struct = DataType::Struct(vec![keys, values].into());
947
948 let map_data_type =
950 DataType::Map(Arc::new(Field::new("entries", entry_struct, false)), true);
951
952 let arrow_schema = FFI_ArrowSchema::try_from(map_data_type).unwrap();
953 assert!(arrow_schema.map_keys_sorted());
954 }
955
956 #[test]
957 fn test_dictionary_ordered() {
958 #[allow(deprecated)]
959 let schema = Schema::new(vec![Field::new_dict(
960 "dict",
961 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
962 false,
963 0,
964 true,
965 )]);
966
967 let arrow_schema = FFI_ArrowSchema::try_from(schema).unwrap();
968 assert!(arrow_schema.child(0).dictionary_ordered());
969 }
970
971 #[test]
972 fn test_set_field_metadata() {
973 let metadata_cases: Vec<HashMap<String, String>> = vec![
974 [].into(),
975 [("key".to_string(), "value".to_string())].into(),
976 [
977 ("key".to_string(), "".to_string()),
978 ("ascii123".to_string(), "你好".to_string()),
979 ("".to_string(), "value".to_string()),
980 ]
981 .into(),
982 ];
983
984 let mut schema = FFI_ArrowSchema::try_new("b", vec![], None)
985 .unwrap()
986 .with_name("test")
987 .unwrap();
988
989 for metadata in metadata_cases {
990 schema = schema.with_metadata(&metadata).unwrap();
991 let field = Field::try_from(&schema).unwrap();
992 assert_eq!(field.metadata(), &metadata);
993 }
994 }
995
996 #[test]
997 fn test_import_field_with_null_name() {
998 let dtype = DataType::Int16;
999 let c_schema = FFI_ArrowSchema::try_from(&dtype).unwrap();
1000 assert!(c_schema.name().is_none());
1001 let field = Field::try_from(&c_schema).unwrap();
1002 assert_eq!(field.name(), "");
1003 }
1004}