1use crate::{ArrowError, DataType, extension::ExtensionType};
23
24#[derive(Debug, Default, Clone, Copy, PartialEq)]
57pub struct TimestampWithOffset;
58
59const TIMESTAMP_FIELD_NAME: &str = "timestamp";
60const OFFSET_FIELD_NAME: &str = "offset_minutes";
61
62impl ExtensionType for TimestampWithOffset {
63 const NAME: &'static str = "arrow.timestamp_with_offset";
64
65 type Metadata = ();
66
67 fn metadata(&self) -> &Self::Metadata {
68 &()
69 }
70
71 fn serialize_metadata(&self) -> Option<String> {
72 None
73 }
74
75 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
76 metadata.map_or_else(
77 || Ok(()),
78 |v| {
79 if !v.is_empty() {
80 Err(ArrowError::InvalidArgumentError(
81 "TimestampWithOffset extension type expects no metadata".to_owned(),
82 ))
83 } else {
84 Ok(())
85 }
86 },
87 )
88 }
89
90 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
91 let ok = match data_type {
92 DataType::Struct(fields) => match fields.len() {
93 2 => {
94 let maybe_timestamp = fields.first().unwrap();
95 let maybe_offset = fields.get(1).unwrap();
96
97 let timestamp_type_ok = matches!(maybe_timestamp.data_type(), DataType::Timestamp(_, tz) if {
98 match tz {
99 Some(tz) => {
100 tz.as_ref() == "UTC"
101 },
102 None => false
103 }
104 });
105
106 let offset_type_ok = match maybe_offset.data_type() {
107 DataType::Int16 => true,
108 DataType::Dictionary(key_type, value_type) => {
109 key_type.is_dictionary_key_type()
110 && matches!(value_type.as_ref(), DataType::Int16)
111 }
112 DataType::RunEndEncoded(run_ends, values) => {
113 run_ends.data_type().is_run_ends_type()
114 && matches!(values.data_type(), DataType::Int16)
115 }
116 _ => false,
117 };
118
119 maybe_timestamp.name() == TIMESTAMP_FIELD_NAME
120 && timestamp_type_ok
121 && !maybe_timestamp.is_nullable()
122 && maybe_offset.name() == OFFSET_FIELD_NAME
123 && offset_type_ok
124 && !maybe_offset.is_nullable()
125 }
126 _ => false,
127 },
128 _ => false,
129 };
130
131 match ok {
132 true => Ok(()),
133 false => Err(ArrowError::InvalidArgumentError(format!(
134 "TimestampWithOffset data type mismatch, expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found {data_type}"
135 ))),
136 }
137 }
138
139 fn try_new(data_type: &DataType, _metadata: Self::Metadata) -> Result<Self, ArrowError> {
140 Self.supports_data_type(data_type).map(|_| Self)
141 }
142}
143
144#[cfg(test)]
145mod tests {
146 use std::sync::Arc;
147
148 #[cfg(feature = "canonical_extension_types")]
149 use crate::extension::CanonicalExtensionType;
150 use crate::{
151 Field, Fields, TimeUnit,
152 extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
153 };
154
155 use super::*;
156
157 fn make_valid_field_primitive(time_unit: TimeUnit) -> Field {
158 Field::new(
159 "",
160 DataType::Struct(Fields::from_iter([
161 Field::new(
162 TIMESTAMP_FIELD_NAME,
163 DataType::Timestamp(time_unit, Some("UTC".into())),
164 false,
165 ),
166 Field::new(OFFSET_FIELD_NAME, DataType::Int16, false),
167 ])),
168 false,
169 )
170 }
171
172 fn make_valid_field_dict_encoded(time_unit: TimeUnit, key_type: DataType) -> Field {
173 assert!(key_type.is_dictionary_key_type());
174
175 Field::new(
176 "",
177 DataType::Struct(Fields::from_iter([
178 Field::new(
179 TIMESTAMP_FIELD_NAME,
180 DataType::Timestamp(time_unit, Some("UTC".into())),
181 false,
182 ),
183 Field::new(
184 OFFSET_FIELD_NAME,
185 DataType::Dictionary(Box::new(key_type), Box::new(DataType::Int16)),
186 false,
187 ),
188 ])),
189 false,
190 )
191 }
192
193 fn make_valid_field_run_end_encoded(time_unit: TimeUnit, run_ends_type: DataType) -> Field {
194 assert!(run_ends_type.is_run_ends_type());
195 Field::new(
196 "",
197 DataType::Struct(Fields::from_iter([
198 Field::new(
199 TIMESTAMP_FIELD_NAME,
200 DataType::Timestamp(time_unit, Some("UTC".into())),
201 false,
202 ),
203 Field::new(
204 OFFSET_FIELD_NAME,
205 DataType::RunEndEncoded(
206 Arc::new(Field::new("run_ends", run_ends_type, false)),
207 Arc::new(Field::new("values", DataType::Int16, false)),
208 ),
209 false,
210 ),
211 ])),
212 false,
213 )
214 }
215
216 #[test]
217 fn valid_primitive_offsets() -> Result<(), ArrowError> {
218 let time_units = [
219 TimeUnit::Second,
220 TimeUnit::Millisecond,
221 TimeUnit::Microsecond,
222 TimeUnit::Nanosecond,
223 ];
224
225 for time_unit in time_units {
226 let mut field = make_valid_field_primitive(time_unit);
227 field.try_with_extension_type(TimestampWithOffset)?;
228 field.try_extension_type::<TimestampWithOffset>()?;
229 #[cfg(feature = "canonical_extension_types")]
230 assert_eq!(
231 field.try_canonical_extension_type()?,
232 CanonicalExtensionType::TimestampWithOffset(TimestampWithOffset)
233 );
234 }
235
236 Ok(())
237 }
238
239 #[test]
240 fn valid_dict_encoded_offsets() -> Result<(), ArrowError> {
241 let time_units = [
242 TimeUnit::Second,
243 TimeUnit::Millisecond,
244 TimeUnit::Microsecond,
245 TimeUnit::Nanosecond,
246 ];
247
248 let key_types = [
249 DataType::UInt8,
250 DataType::UInt16,
251 DataType::UInt32,
252 DataType::UInt64,
253 DataType::Int8,
254 DataType::Int16,
255 DataType::Int32,
256 DataType::Int64,
257 ];
258
259 for time_unit in time_units {
260 for key_type in &key_types {
261 let mut field = make_valid_field_dict_encoded(time_unit, key_type.clone());
262 field.try_with_extension_type(TimestampWithOffset)?;
263 field.try_extension_type::<TimestampWithOffset>()?;
264 #[cfg(feature = "canonical_extension_types")]
265 assert_eq!(
266 field.try_canonical_extension_type()?,
267 CanonicalExtensionType::TimestampWithOffset(TimestampWithOffset)
268 );
269 }
270 }
271
272 Ok(())
273 }
274
275 #[test]
276 fn valid_run_end_encoded_offsets() -> Result<(), ArrowError> {
277 let time_units = [
278 TimeUnit::Second,
279 TimeUnit::Millisecond,
280 TimeUnit::Microsecond,
281 TimeUnit::Nanosecond,
282 ];
283
284 let run_ends_types = [DataType::Int16, DataType::Int32, DataType::Int64];
285
286 for time_unit in time_units {
287 for run_ends_type in &run_ends_types {
288 let mut field = make_valid_field_run_end_encoded(time_unit, run_ends_type.clone());
289 field.try_with_extension_type(TimestampWithOffset)?;
290 field.try_extension_type::<TimestampWithOffset>()?;
291 #[cfg(feature = "canonical_extension_types")]
292 assert_eq!(
293 field.try_canonical_extension_type()?,
294 CanonicalExtensionType::TimestampWithOffset(TimestampWithOffset)
295 );
296 }
297 }
298
299 Ok(())
300 }
301
302 #[test]
303 #[should_panic(expected = "Field extension type name missing")]
304 fn missing_name() {
305 let field = make_valid_field_primitive(TimeUnit::Second)
306 .with_metadata([(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())].into());
307 field.extension_type::<TimestampWithOffset>();
308 }
309
310 #[test]
311 #[should_panic(
312 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Boolean"
313 )]
314 fn invalid_type_top_level() {
315 Field::new("", DataType::Boolean, false).with_extension_type(TimestampWithOffset);
316 }
317
318 #[test]
319 #[should_panic(
320 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
321 )]
322 fn invalid_type_struct_field_count() {
323 let data_type =
324 DataType::Struct(Fields::from_iter([Field::new("", DataType::Int16, false)]));
325 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
326 }
327
328 #[test]
329 #[should_panic(
330 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
331 )]
332 fn invalid_type_wrong_timestamp_type() {
333 let data_type = DataType::Struct(Fields::from_iter([
334 Field::new(TIMESTAMP_FIELD_NAME, DataType::Int16, false),
335 Field::new(OFFSET_FIELD_NAME, DataType::Int16, false),
336 ]));
337 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
338 }
339
340 #[test]
341 #[should_panic(
342 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
343 )]
344 fn invalid_type_wrong_offset_type() {
345 let data_type = DataType::Struct(Fields::from_iter([
346 Field::new(
347 TIMESTAMP_FIELD_NAME,
348 DataType::Timestamp(TimeUnit::Second, Some("UTC".into())),
349 false,
350 ),
351 Field::new(OFFSET_FIELD_NAME, DataType::UInt64, false),
352 ]));
353 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
354 }
355
356 #[test]
357 #[should_panic(
358 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
359 )]
360 fn invalid_type_wrong_offset_key_dict_encoded() {
361 let data_type = DataType::Struct(Fields::from_iter([
362 Field::new(
363 TIMESTAMP_FIELD_NAME,
364 DataType::Timestamp(TimeUnit::Second, Some("UTC".into())),
365 false,
366 ),
367 Field::new(
368 OFFSET_FIELD_NAME,
369 DataType::Dictionary(Box::new(DataType::Boolean), Box::new(DataType::Int16)),
370 false,
371 ),
372 ]));
373 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
374 }
375
376 #[test]
377 #[should_panic(
378 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
379 )]
380 fn invalid_type_wrong_offset_value_dict_encoded() {
381 let data_type = DataType::Struct(Fields::from_iter([
382 Field::new(
383 TIMESTAMP_FIELD_NAME,
384 DataType::Timestamp(TimeUnit::Second, Some("UTC".into())),
385 false,
386 ),
387 Field::new(
388 OFFSET_FIELD_NAME,
389 DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::Int32)),
390 false,
391 ),
392 ]));
393 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
394 }
395
396 #[test]
397 #[should_panic(
398 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
399 )]
400 fn invalid_type_wrong_run_ends_run_end_encoded() {
401 let data_type = DataType::Struct(Fields::from_iter([
402 Field::new(
403 TIMESTAMP_FIELD_NAME,
404 DataType::Timestamp(TimeUnit::Second, Some("UTC".into())),
405 false,
406 ),
407 Field::new(
408 OFFSET_FIELD_NAME,
409 DataType::RunEndEncoded(
410 Arc::new(Field::new("run_ends", DataType::Boolean, false)),
411 Arc::new(Field::new("values", DataType::Int16, false)),
412 ),
413 false,
414 ),
415 ]));
416 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
417 }
418
419 #[test]
420 #[should_panic(
421 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
422 )]
423 fn invalid_type_wrong_values_run_end_encoded() {
424 let data_type = DataType::Struct(Fields::from_iter([
425 Field::new(
426 TIMESTAMP_FIELD_NAME,
427 DataType::Timestamp(TimeUnit::Second, Some("UTC".into())),
428 false,
429 ),
430 Field::new(
431 OFFSET_FIELD_NAME,
432 DataType::RunEndEncoded(
433 Arc::new(Field::new("run_ends", DataType::UInt16, false)),
434 Arc::new(Field::new("values", DataType::Int32, false)),
435 ),
436 false,
437 ),
438 ]));
439 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
440 }
441
442 #[test]
443 #[should_panic(
444 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
445 )]
446 fn invalid_type_nullable_timestamp() {
447 let data_type = DataType::Struct(Fields::from_iter([
448 Field::new(
449 TIMESTAMP_FIELD_NAME,
450 DataType::Timestamp(TimeUnit::Second, Some("UTC".into())),
451 true,
452 ),
453 Field::new(OFFSET_FIELD_NAME, DataType::Int16, false),
454 ]));
455 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
456 }
457
458 #[test]
459 #[should_panic(
460 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
461 )]
462 fn invalid_type_nullable_offset() {
463 let data_type = DataType::Struct(Fields::from_iter([
464 Field::new(
465 TIMESTAMP_FIELD_NAME,
466 DataType::Timestamp(TimeUnit::Second, Some("UTC".into())),
467 false,
468 ),
469 Field::new(OFFSET_FIELD_NAME, DataType::Int16, true),
470 ]));
471 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
472 }
473
474 #[test]
475 #[should_panic(
476 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
477 )]
478 fn invalid_type_no_timezone() {
479 let data_type = DataType::Struct(Fields::from_iter([
480 Field::new(
481 TIMESTAMP_FIELD_NAME,
482 DataType::Timestamp(TimeUnit::Second, None),
483 false,
484 ),
485 Field::new(OFFSET_FIELD_NAME, DataType::Int16, false),
486 ]));
487 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
488 }
489
490 #[test]
491 #[should_panic(
492 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
493 )]
494 fn invalid_type_wrong_timezone() {
495 let data_type = DataType::Struct(Fields::from_iter([
496 Field::new(
497 TIMESTAMP_FIELD_NAME,
498 DataType::Timestamp(TimeUnit::Second, Some("Americas/Sao_Paulo".into())),
499 false,
500 ),
501 Field::new(OFFSET_FIELD_NAME, DataType::Int16, false),
502 ]));
503 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
504 }
505
506 #[test]
507 fn no_metadata() {
508 let field = make_valid_field_primitive(TimeUnit::Second).with_metadata(
509 [(
510 EXTENSION_TYPE_NAME_KEY.to_owned(),
511 TimestampWithOffset::NAME.to_owned(),
512 )]
513 .into(),
514 );
515 field.extension_type::<TimestampWithOffset>();
516 }
517
518 #[test]
519 fn empty_metadata() {
520 let field = make_valid_field_primitive(TimeUnit::Second).with_metadata(
521 [
522 (
523 EXTENSION_TYPE_NAME_KEY.to_owned(),
524 TimestampWithOffset::NAME.to_owned(),
525 ),
526 (EXTENSION_TYPE_METADATA_KEY.to_owned(), String::new()),
527 ]
528 .into(),
529 );
530 field.extension_type::<TimestampWithOffset>();
531 }
532}