1use crate::cast::*;
19use arrow_buffer::NullBuffer;
20
21pub(crate) fn value_to_string<O: OffsetSizeTrait>(
22 array: &dyn Array,
23 options: &CastOptions,
24) -> Result<ArrayRef, ArrowError> {
25 let mut builder = GenericStringBuilder::<O>::new();
26 let formatter = ArrayFormatter::try_new(array, &options.format_options)?;
27 let nulls = array.nulls();
28 for i in 0..array.len() {
29 match nulls.map(|x| x.is_null(i)).unwrap_or_default() {
30 true => builder.append_null(),
31 false => {
32 formatter.value(i).write(&mut builder)?;
33 builder.append_value("");
35 }
36 }
37 }
38 Ok(Arc::new(builder.finish()))
39}
40
41pub(crate) fn value_to_string_view(
42 array: &dyn Array,
43 options: &CastOptions,
44) -> Result<ArrayRef, ArrowError> {
45 let mut builder = StringViewBuilder::with_capacity(array.len());
46 let formatter = ArrayFormatter::try_new(array, &options.format_options)?;
47 let nulls = array.nulls();
48 let mut buffer = String::new();
51 for i in 0..array.len() {
52 match nulls.map(|x| x.is_null(i)).unwrap_or_default() {
53 true => builder.append_null(),
54 false => {
55 buffer.clear();
57 formatter.value(i).write(&mut buffer)?;
58 builder.append_value(&buffer)
59 }
60 }
61 }
62 Ok(Arc::new(builder.finish()))
63}
64
65pub(crate) fn parse_string<P: Parser, O: OffsetSizeTrait>(
67 array: &dyn Array,
68 cast_options: &CastOptions,
69) -> Result<ArrayRef, ArrowError> {
70 let string_array = array.as_string::<O>();
71 parse_string_iter::<P, _, _>(string_array.iter(), cast_options, || {
72 string_array.nulls().cloned()
73 })
74}
75
76pub(crate) fn parse_string_view<P: Parser>(
78 array: &dyn Array,
79 cast_options: &CastOptions,
80) -> Result<ArrayRef, ArrowError> {
81 let string_view_array = array.as_string_view();
82 parse_string_iter::<P, _, _>(string_view_array.iter(), cast_options, || {
83 string_view_array.nulls().cloned()
84 })
85}
86
87fn parse_string_iter<
88 'a,
89 P: Parser,
90 I: Iterator<Item = Option<&'a str>>,
91 F: FnOnce() -> Option<NullBuffer>,
92>(
93 iter: I,
94 cast_options: &CastOptions,
95 nulls: F,
96) -> Result<ArrayRef, ArrowError> {
97 let array = if cast_options.safe {
98 let iter = iter.map(|x| x.and_then(P::parse));
99
100 unsafe { PrimitiveArray::<P>::from_trusted_len_iter(iter) }
105 } else {
106 let v = iter
107 .map(|x| match x {
108 Some(v) => P::parse(v).ok_or_else(|| {
109 ArrowError::CastError(format!(
110 "Cannot cast string '{}' to value of {:?} type",
111 v,
112 P::DATA_TYPE
113 ))
114 }),
115 None => Ok(P::Native::default()),
116 })
117 .collect::<Result<Vec<_>, ArrowError>>()?;
118 PrimitiveArray::new(v.into(), nulls())
119 };
120
121 Ok(Arc::new(array) as ArrayRef)
122}
123
124pub(crate) fn cast_string_to_timestamp<O: OffsetSizeTrait, T: ArrowTimestampType>(
126 array: &dyn Array,
127 to_tz: &Option<Arc<str>>,
128 cast_options: &CastOptions,
129) -> Result<ArrayRef, ArrowError> {
130 let array = array.as_string::<O>();
131 let out: PrimitiveArray<T> = match to_tz {
132 Some(tz) => {
133 let tz: Tz = tz.as_ref().parse()?;
134 cast_string_to_timestamp_impl(array.iter(), &tz, cast_options)?
135 }
136 None => cast_string_to_timestamp_impl(array.iter(), &Utc, cast_options)?,
137 };
138 Ok(Arc::new(out.with_timezone_opt(to_tz.clone())))
139}
140
141pub(crate) fn cast_view_to_timestamp<T: ArrowTimestampType>(
143 array: &dyn Array,
144 to_tz: &Option<Arc<str>>,
145 cast_options: &CastOptions,
146) -> Result<ArrayRef, ArrowError> {
147 let array = array.as_string_view();
148 let out: PrimitiveArray<T> = match to_tz {
149 Some(tz) => {
150 let tz: Tz = tz.as_ref().parse()?;
151 cast_string_to_timestamp_impl(array.iter(), &tz, cast_options)?
152 }
153 None => cast_string_to_timestamp_impl(array.iter(), &Utc, cast_options)?,
154 };
155 Ok(Arc::new(out.with_timezone_opt(to_tz.clone())))
156}
157
158fn cast_string_to_timestamp_impl<
159 'a,
160 I: Iterator<Item = Option<&'a str>>,
161 T: ArrowTimestampType,
162 Tz: TimeZone,
163>(
164 iter: I,
165 tz: &Tz,
166 cast_options: &CastOptions,
167) -> Result<PrimitiveArray<T>, ArrowError> {
168 if cast_options.safe {
169 let iter = iter.map(|v| {
170 v.and_then(|v| {
171 let naive = string_to_datetime(tz, v).ok()?.naive_utc();
172 T::make_value(naive)
173 })
174 });
175 Ok(unsafe { PrimitiveArray::from_trusted_len_iter(iter) })
181 } else {
182 let vec = iter
183 .map(|v| {
184 v.map(|v| {
185 let naive = string_to_datetime(tz, v)?.naive_utc();
186 T::make_value(naive).ok_or_else(|| match T::UNIT {
187 TimeUnit::Nanosecond => ArrowError::CastError(format!(
188 "Overflow converting {naive} to Nanosecond. The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804"
189 )),
190 _ => ArrowError::CastError(format!(
191 "Overflow converting {naive} to {:?}",
192 T::UNIT
193 ))
194 })
195 })
196 .transpose()
197 })
198 .collect::<Result<Vec<Option<i64>>, _>>()?;
199
200 Ok(unsafe { PrimitiveArray::from_trusted_len_iter(vec.iter()) })
205 }
206}
207
208pub(crate) fn cast_string_to_interval<Offset, F, ArrowType>(
209 array: &dyn Array,
210 cast_options: &CastOptions,
211 parse_function: F,
212) -> Result<ArrayRef, ArrowError>
213where
214 Offset: OffsetSizeTrait,
215 ArrowType: ArrowPrimitiveType,
216 F: Fn(&str) -> Result<ArrowType::Native, ArrowError> + Copy,
217{
218 let string_array = array
219 .as_any()
220 .downcast_ref::<GenericStringArray<Offset>>()
221 .unwrap();
222 cast_string_to_interval_impl::<_, ArrowType, F>(
223 string_array.iter(),
224 cast_options,
225 parse_function,
226 )
227}
228
229pub(crate) fn cast_string_to_year_month_interval<Offset: OffsetSizeTrait>(
230 array: &dyn Array,
231 cast_options: &CastOptions,
232) -> Result<ArrayRef, ArrowError> {
233 cast_string_to_interval::<Offset, _, IntervalYearMonthType>(
234 array,
235 cast_options,
236 parse_interval_year_month,
237 )
238}
239
240pub(crate) fn cast_string_to_day_time_interval<Offset: OffsetSizeTrait>(
241 array: &dyn Array,
242 cast_options: &CastOptions,
243) -> Result<ArrayRef, ArrowError> {
244 cast_string_to_interval::<Offset, _, IntervalDayTimeType>(
245 array,
246 cast_options,
247 parse_interval_day_time,
248 )
249}
250
251pub(crate) fn cast_string_to_month_day_nano_interval<Offset: OffsetSizeTrait>(
252 array: &dyn Array,
253 cast_options: &CastOptions,
254) -> Result<ArrayRef, ArrowError> {
255 cast_string_to_interval::<Offset, _, IntervalMonthDayNanoType>(
256 array,
257 cast_options,
258 parse_interval_month_day_nano,
259 )
260}
261
262pub(crate) fn cast_view_to_interval<F, ArrowType>(
263 array: &dyn Array,
264 cast_options: &CastOptions,
265 parse_function: F,
266) -> Result<ArrayRef, ArrowError>
267where
268 ArrowType: ArrowPrimitiveType,
269 F: Fn(&str) -> Result<ArrowType::Native, ArrowError> + Copy,
270{
271 let string_view_array = array.as_any().downcast_ref::<StringViewArray>().unwrap();
272 cast_string_to_interval_impl::<_, ArrowType, F>(
273 string_view_array.iter(),
274 cast_options,
275 parse_function,
276 )
277}
278
279pub(crate) fn cast_view_to_year_month_interval(
280 array: &dyn Array,
281 cast_options: &CastOptions,
282) -> Result<ArrayRef, ArrowError> {
283 cast_view_to_interval::<_, IntervalYearMonthType>(
284 array,
285 cast_options,
286 parse_interval_year_month,
287 )
288}
289
290pub(crate) fn cast_view_to_day_time_interval(
291 array: &dyn Array,
292 cast_options: &CastOptions,
293) -> Result<ArrayRef, ArrowError> {
294 cast_view_to_interval::<_, IntervalDayTimeType>(array, cast_options, parse_interval_day_time)
295}
296
297pub(crate) fn cast_view_to_month_day_nano_interval(
298 array: &dyn Array,
299 cast_options: &CastOptions,
300) -> Result<ArrayRef, ArrowError> {
301 cast_view_to_interval::<_, IntervalMonthDayNanoType>(
302 array,
303 cast_options,
304 parse_interval_month_day_nano,
305 )
306}
307
308fn cast_string_to_interval_impl<'a, I, ArrowType, F>(
309 iter: I,
310 cast_options: &CastOptions,
311 parse_function: F,
312) -> Result<ArrayRef, ArrowError>
313where
314 I: Iterator<Item = Option<&'a str>>,
315 ArrowType: ArrowPrimitiveType,
316 F: Fn(&str) -> Result<ArrowType::Native, ArrowError> + Copy,
317{
318 let interval_array = if cast_options.safe {
319 let iter = iter.map(|v| v.and_then(|v| parse_function(v).ok()));
320
321 unsafe { PrimitiveArray::<ArrowType>::from_trusted_len_iter(iter) }
326 } else {
327 let vec = iter
328 .map(|v| v.map(parse_function).transpose())
329 .collect::<Result<Vec<_>, ArrowError>>()?;
330
331 unsafe { PrimitiveArray::<ArrowType>::from_trusted_len_iter(vec) }
336 };
337 Ok(Arc::new(interval_array) as ArrayRef)
338}
339
340pub(crate) fn cast_binary_to_string<O: OffsetSizeTrait>(
343 array: &dyn Array,
344 cast_options: &CastOptions,
345) -> Result<ArrayRef, ArrowError> {
346 let array = array
347 .as_any()
348 .downcast_ref::<GenericByteArray<GenericBinaryType<O>>>()
349 .unwrap();
350
351 match GenericStringArray::<O>::try_from_binary(array.clone()) {
352 Ok(a) => Ok(Arc::new(a)),
353 Err(e) => match cast_options.safe {
354 true => {
355 let mut builder =
357 GenericStringBuilder::<O>::with_capacity(array.len(), array.value_data().len());
358
359 let iter = array
360 .iter()
361 .map(|v| v.and_then(|v| std::str::from_utf8(v).ok()));
362
363 builder.extend(iter);
364 Ok(Arc::new(builder.finish()))
365 }
366 false => Err(e),
367 },
368 }
369}
370
371fn cast_string_to_boolean<'a, StrArray>(
373 array: &StrArray,
374 cast_options: &CastOptions,
375) -> Result<ArrayRef, ArrowError>
376where
377 StrArray: StringArrayType<'a>,
378{
379 let output_array = array
380 .iter()
381 .map(|value| match value {
382 Some(value) => match value.to_ascii_lowercase().trim() {
383 "t" | "tr" | "tru" | "true" | "y" | "ye" | "yes" | "on" | "1" => Ok(Some(true)),
384 "f" | "fa" | "fal" | "fals" | "false" | "n" | "no" | "of" | "off" | "0" => {
385 Ok(Some(false))
386 }
387 invalid_value => match cast_options.safe {
388 true => Ok(None),
389 false => Err(ArrowError::CastError(format!(
390 "Cannot cast value '{invalid_value}' to value of Boolean type",
391 ))),
392 },
393 },
394 None => Ok(None),
395 })
396 .collect::<Result<BooleanArray, _>>()?;
397
398 Ok(Arc::new(output_array))
399}
400
401pub(crate) fn cast_utf8_to_boolean<OffsetSize>(
402 from: &dyn Array,
403 cast_options: &CastOptions,
404) -> Result<ArrayRef, ArrowError>
405where
406 OffsetSize: OffsetSizeTrait,
407{
408 let array = from
409 .as_any()
410 .downcast_ref::<GenericStringArray<OffsetSize>>()
411 .unwrap();
412
413 cast_string_to_boolean(&array, cast_options)
414}
415
416pub(crate) fn cast_utf8view_to_boolean(
417 from: &dyn Array,
418 cast_options: &CastOptions,
419) -> Result<ArrayRef, ArrowError> {
420 let array = from.as_any().downcast_ref::<StringViewArray>().unwrap();
421
422 cast_string_to_boolean(&array, cast_options)
423}