1use crate::cast::*;
19use arrow_buffer::NullBuffer;
20
21pub(crate) fn value_to_string<O: OffsetSizeTrait>(
22 array: &dyn Array,
23 options: &CastOptions,
24) -> Result<ArrayRef, ArrowError> {
25 let mut builder = GenericStringBuilder::<O>::new();
26 let formatter = ArrayFormatter::try_new(array, &options.format_options)?;
27 let nulls = array.nulls();
28 for i in 0..array.len() {
29 match nulls.map(|x| x.is_null(i)).unwrap_or_default() {
30 true => builder.append_null(),
31 false => {
32 formatter.value(i).write(&mut builder)?;
33 builder.append_value("");
35 }
36 }
37 }
38 Ok(Arc::new(builder.finish()))
39}
40
41pub(crate) fn value_to_string_view(
42 array: &dyn Array,
43 options: &CastOptions,
44) -> Result<ArrayRef, ArrowError> {
45 let mut builder = StringViewBuilder::with_capacity(array.len());
46 let formatter = ArrayFormatter::try_new(array, &options.format_options)?;
47 let nulls = array.nulls();
48 let mut buffer = String::new();
51 for i in 0..array.len() {
52 match nulls.map(|x| x.is_null(i)).unwrap_or_default() {
53 true => builder.append_null(),
54 false => {
55 buffer.clear();
57 formatter.value(i).write(&mut buffer)?;
58 builder.append_value(&buffer)
59 }
60 }
61 }
62 Ok(Arc::new(builder.finish()))
63}
64
65pub(crate) fn parse_string<P: Parser, O: OffsetSizeTrait>(
67 array: &dyn Array,
68 cast_options: &CastOptions,
69) -> Result<ArrayRef, ArrowError> {
70 let string_array = array.as_string::<O>();
71 parse_string_iter::<P, _, _>(string_array.iter(), cast_options, || {
72 string_array.nulls().cloned()
73 })
74}
75
76pub(crate) fn parse_string_view<P: Parser>(
78 array: &dyn Array,
79 cast_options: &CastOptions,
80) -> Result<ArrayRef, ArrowError> {
81 let string_view_array = array.as_string_view();
82 parse_string_iter::<P, _, _>(string_view_array.iter(), cast_options, || {
83 string_view_array.nulls().cloned()
84 })
85}
86
87fn parse_string_iter<
88 'a,
89 P: Parser,
90 I: Iterator<Item = Option<&'a str>>,
91 F: FnOnce() -> Option<NullBuffer>,
92>(
93 iter: I,
94 cast_options: &CastOptions,
95 nulls: F,
96) -> Result<ArrayRef, ArrowError> {
97 let array = if cast_options.safe {
98 let iter = iter.map(|x| x.and_then(P::parse));
99
100 unsafe { PrimitiveArray::<P>::from_trusted_len_iter(iter) }
105 } else {
106 let v = iter
107 .map(|x| match x {
108 Some(v) => P::parse(v).ok_or_else(|| {
109 ArrowError::CastError(format!(
110 "Cannot cast string '{v}' to value of {} type",
111 P::DATA_TYPE
112 ))
113 }),
114 None => Ok(P::Native::default()),
115 })
116 .collect::<Result<Vec<_>, ArrowError>>()?;
117 PrimitiveArray::try_new(v.into(), nulls())?
118 };
119
120 Ok(Arc::new(array) as ArrayRef)
121}
122
123pub(crate) fn cast_string_to_timestamp<O: OffsetSizeTrait, T: ArrowTimestampType>(
125 array: &dyn Array,
126 to_tz: &Option<Arc<str>>,
127 cast_options: &CastOptions,
128) -> Result<ArrayRef, ArrowError> {
129 let array = array.as_string::<O>();
130 let out: PrimitiveArray<T> = match to_tz {
131 Some(tz) => {
132 let tz: Tz = tz.as_ref().parse()?;
133 cast_string_to_timestamp_impl(array.iter(), &tz, cast_options)?
134 }
135 None => cast_string_to_timestamp_impl(array.iter(), &Utc, cast_options)?,
136 };
137 Ok(Arc::new(out.with_timezone_opt(to_tz.clone())))
138}
139
140pub(crate) fn cast_view_to_timestamp<T: ArrowTimestampType>(
142 array: &dyn Array,
143 to_tz: &Option<Arc<str>>,
144 cast_options: &CastOptions,
145) -> Result<ArrayRef, ArrowError> {
146 let array = array.as_string_view();
147 let out: PrimitiveArray<T> = match to_tz {
148 Some(tz) => {
149 let tz: Tz = tz.as_ref().parse()?;
150 cast_string_to_timestamp_impl(array.iter(), &tz, cast_options)?
151 }
152 None => cast_string_to_timestamp_impl(array.iter(), &Utc, cast_options)?,
153 };
154 Ok(Arc::new(out.with_timezone_opt(to_tz.clone())))
155}
156
157fn cast_string_to_timestamp_impl<
158 'a,
159 I: Iterator<Item = Option<&'a str>>,
160 T: ArrowTimestampType,
161 Tz: TimeZone,
162>(
163 iter: I,
164 tz: &Tz,
165 cast_options: &CastOptions,
166) -> Result<PrimitiveArray<T>, ArrowError> {
167 if cast_options.safe {
168 let iter = iter.map(|v| {
169 v.and_then(|v| {
170 let naive = string_to_datetime(tz, v).ok()?.naive_utc();
171 T::make_value(naive)
172 })
173 });
174 Ok(unsafe { PrimitiveArray::from_trusted_len_iter(iter) })
180 } else {
181 let vec = iter
182 .map(|v| {
183 v.map(|v| {
184 let naive = string_to_datetime(tz, v)?.naive_utc();
185 T::make_value(naive).ok_or_else(|| match T::UNIT {
186 TimeUnit::Nanosecond => ArrowError::CastError(format!(
187 "Overflow converting {naive} to Nanosecond. The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804"
188 )),
189 _ => ArrowError::CastError(format!(
190 "Overflow converting {naive} to {:?}",
191 T::UNIT
192 ))
193 })
194 })
195 .transpose()
196 })
197 .collect::<Result<Vec<Option<i64>>, _>>()?;
198
199 Ok(unsafe { PrimitiveArray::from_trusted_len_iter(vec.iter()) })
204 }
205}
206
207pub(crate) fn cast_string_to_interval<Offset, F, ArrowType>(
208 array: &dyn Array,
209 cast_options: &CastOptions,
210 parse_function: F,
211) -> Result<ArrayRef, ArrowError>
212where
213 Offset: OffsetSizeTrait,
214 ArrowType: ArrowPrimitiveType,
215 F: Fn(&str) -> Result<ArrowType::Native, ArrowError> + Copy,
216{
217 let string_array = array
218 .as_any()
219 .downcast_ref::<GenericStringArray<Offset>>()
220 .unwrap();
221 cast_string_to_interval_impl::<_, ArrowType, F>(
222 string_array.iter(),
223 cast_options,
224 parse_function,
225 )
226}
227
228pub(crate) fn cast_string_to_year_month_interval<Offset: OffsetSizeTrait>(
229 array: &dyn Array,
230 cast_options: &CastOptions,
231) -> Result<ArrayRef, ArrowError> {
232 cast_string_to_interval::<Offset, _, IntervalYearMonthType>(
233 array,
234 cast_options,
235 parse_interval_year_month,
236 )
237}
238
239pub(crate) fn cast_string_to_day_time_interval<Offset: OffsetSizeTrait>(
240 array: &dyn Array,
241 cast_options: &CastOptions,
242) -> Result<ArrayRef, ArrowError> {
243 cast_string_to_interval::<Offset, _, IntervalDayTimeType>(
244 array,
245 cast_options,
246 parse_interval_day_time,
247 )
248}
249
250pub(crate) fn cast_string_to_month_day_nano_interval<Offset: OffsetSizeTrait>(
251 array: &dyn Array,
252 cast_options: &CastOptions,
253) -> Result<ArrayRef, ArrowError> {
254 cast_string_to_interval::<Offset, _, IntervalMonthDayNanoType>(
255 array,
256 cast_options,
257 parse_interval_month_day_nano,
258 )
259}
260
261pub(crate) fn cast_view_to_interval<F, ArrowType>(
262 array: &dyn Array,
263 cast_options: &CastOptions,
264 parse_function: F,
265) -> Result<ArrayRef, ArrowError>
266where
267 ArrowType: ArrowPrimitiveType,
268 F: Fn(&str) -> Result<ArrowType::Native, ArrowError> + Copy,
269{
270 let string_view_array = array.as_any().downcast_ref::<StringViewArray>().unwrap();
271 cast_string_to_interval_impl::<_, ArrowType, F>(
272 string_view_array.iter(),
273 cast_options,
274 parse_function,
275 )
276}
277
278pub(crate) fn cast_view_to_year_month_interval(
279 array: &dyn Array,
280 cast_options: &CastOptions,
281) -> Result<ArrayRef, ArrowError> {
282 cast_view_to_interval::<_, IntervalYearMonthType>(
283 array,
284 cast_options,
285 parse_interval_year_month,
286 )
287}
288
289pub(crate) fn cast_view_to_day_time_interval(
290 array: &dyn Array,
291 cast_options: &CastOptions,
292) -> Result<ArrayRef, ArrowError> {
293 cast_view_to_interval::<_, IntervalDayTimeType>(array, cast_options, parse_interval_day_time)
294}
295
296pub(crate) fn cast_view_to_month_day_nano_interval(
297 array: &dyn Array,
298 cast_options: &CastOptions,
299) -> Result<ArrayRef, ArrowError> {
300 cast_view_to_interval::<_, IntervalMonthDayNanoType>(
301 array,
302 cast_options,
303 parse_interval_month_day_nano,
304 )
305}
306
307fn cast_string_to_interval_impl<'a, I, ArrowType, F>(
308 iter: I,
309 cast_options: &CastOptions,
310 parse_function: F,
311) -> Result<ArrayRef, ArrowError>
312where
313 I: Iterator<Item = Option<&'a str>>,
314 ArrowType: ArrowPrimitiveType,
315 F: Fn(&str) -> Result<ArrowType::Native, ArrowError> + Copy,
316{
317 let interval_array = if cast_options.safe {
318 let iter = iter.map(|v| v.and_then(|v| parse_function(v).ok()));
319
320 unsafe { PrimitiveArray::<ArrowType>::from_trusted_len_iter(iter) }
325 } else {
326 let vec = iter
327 .map(|v| v.map(parse_function).transpose())
328 .collect::<Result<Vec<_>, ArrowError>>()?;
329
330 unsafe { PrimitiveArray::<ArrowType>::from_trusted_len_iter(vec) }
335 };
336 Ok(Arc::new(interval_array) as ArrayRef)
337}
338
339fn extend_valid_utf8<'a, B, I>(builder: &mut B, iter: I)
342where
343 B: Extend<Option<&'a str>>,
344 I: Iterator<Item = Option<&'a [u8]>>,
345{
346 builder.extend(iter.map(|value| value.and_then(|bytes| std::str::from_utf8(bytes).ok())));
347}
348
349pub(crate) fn cast_binary_to_string<O: OffsetSizeTrait>(
350 array: &dyn Array,
351 cast_options: &CastOptions,
352) -> Result<ArrayRef, ArrowError> {
353 let array = array
354 .as_any()
355 .downcast_ref::<GenericByteArray<GenericBinaryType<O>>>()
356 .unwrap();
357
358 match GenericStringArray::<O>::try_from_binary(array.clone()) {
359 Ok(a) => Ok(Arc::new(a)),
360 Err(e) => match cast_options.safe {
361 true => {
362 let mut builder =
364 GenericStringBuilder::<O>::with_capacity(array.len(), array.value_data().len());
365
366 extend_valid_utf8(&mut builder, array.iter());
367 Ok(Arc::new(builder.finish()))
368 }
369 false => Err(e),
370 },
371 }
372}
373
374pub(crate) fn cast_binary_view_to_string_view(
375 array: &dyn Array,
376 cast_options: &CastOptions,
377) -> Result<ArrayRef, ArrowError> {
378 let array = array.as_binary_view();
379
380 match array.clone().to_string_view() {
381 Ok(result) => Ok(Arc::new(result)),
382 Err(error) => match cast_options.safe {
383 true => {
384 let mut builder = StringViewBuilder::with_capacity(array.len());
385 extend_valid_utf8(&mut builder, array.iter());
386 Ok(Arc::new(builder.finish()))
387 }
388 false => Err(error),
389 },
390 }
391}
392
393fn cast_string_to_boolean<'a, StrArray>(
395 array: &StrArray,
396 cast_options: &CastOptions,
397) -> Result<ArrayRef, ArrowError>
398where
399 StrArray: StringArrayType<'a>,
400{
401 let output_array = array
402 .iter()
403 .map(|value| match value {
404 Some(value) => match value.to_ascii_lowercase().trim() {
405 "t" | "tr" | "tru" | "true" | "y" | "ye" | "yes" | "on" | "1" => Ok(Some(true)),
406 "f" | "fa" | "fal" | "fals" | "false" | "n" | "no" | "of" | "off" | "0" => {
407 Ok(Some(false))
408 }
409 invalid_value => match cast_options.safe {
410 true => Ok(None),
411 false => Err(ArrowError::CastError(format!(
412 "Cannot cast value '{invalid_value}' to value of Boolean type",
413 ))),
414 },
415 },
416 None => Ok(None),
417 })
418 .collect::<Result<BooleanArray, _>>()?;
419
420 Ok(Arc::new(output_array))
421}
422
423pub(crate) fn cast_utf8_to_boolean<OffsetSize>(
424 from: &dyn Array,
425 cast_options: &CastOptions,
426) -> Result<ArrayRef, ArrowError>
427where
428 OffsetSize: OffsetSizeTrait,
429{
430 let array = from
431 .as_any()
432 .downcast_ref::<GenericStringArray<OffsetSize>>()
433 .unwrap();
434
435 cast_string_to_boolean(&array, cast_options)
436}
437
438pub(crate) fn cast_utf8view_to_boolean(
439 from: &dyn Array,
440 cast_options: &CastOptions,
441) -> Result<ArrayRef, ArrowError> {
442 let array = from.as_any().downcast_ref::<StringViewArray>().unwrap();
443
444 cast_string_to_boolean(&array, cast_options)
445}