1use crate::{ArrowError, DataType, extension::ExtensionType};
23
24#[derive(Debug, Default, Clone, Copy, PartialEq)]
57pub struct TimestampWithOffset;
58
59const TIMESTAMP_FIELD_NAME: &str = "timestamp";
60const OFFSET_FIELD_NAME: &str = "offset_minutes";
61
62impl ExtensionType for TimestampWithOffset {
63 const NAME: &'static str = "arrow.timestamp_with_offset";
64
65 type Metadata = ();
66
67 fn metadata(&self) -> &Self::Metadata {
68 &()
69 }
70
71 fn serialize_metadata(&self) -> Option<String> {
72 None
73 }
74
75 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
76 metadata.map_or_else(
77 || Ok(()),
78 |v| {
79 if !v.is_empty() {
80 Err(ArrowError::InvalidArgumentError(
81 "TimestampWithOffset extension type expects no metadata".to_owned(),
82 ))
83 } else {
84 Ok(())
85 }
86 },
87 )
88 }
89
90 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
91 let ok = match data_type {
92 DataType::Struct(fields) => match fields.len() {
93 2 => {
94 let maybe_timestamp = fields.first().unwrap();
95 let maybe_offset = fields.get(1).unwrap();
96
97 let timestamp_type_ok = matches!(maybe_timestamp.data_type(), DataType::Timestamp(_, tz) if {
98 match tz {
99 Some(tz) => {
100 tz.as_ref() == "UTC"
101 },
102 None => false
103 }
104 });
105
106 let offset_type_ok = match maybe_offset.data_type() {
107 DataType::Int16 => true,
108 DataType::Dictionary(key_type, value_type) => {
109 key_type.is_dictionary_key_type()
110 && matches!(value_type.as_ref(), DataType::Int16)
111 }
112 DataType::RunEndEncoded(run_ends, values) => {
113 run_ends.data_type().is_run_ends_type()
114 && matches!(values.data_type(), DataType::Int16)
115 }
116 _ => false,
117 };
118
119 maybe_timestamp.name() == TIMESTAMP_FIELD_NAME
120 && timestamp_type_ok
121 && !maybe_timestamp.is_nullable()
122 && maybe_offset.name() == OFFSET_FIELD_NAME
123 && offset_type_ok
124 && !maybe_offset.is_nullable()
125 }
126 _ => false,
127 },
128 _ => false,
129 };
130
131 match ok {
132 true => Ok(()),
133 false => Err(ArrowError::InvalidArgumentError(format!(
134 "TimestampWithOffset data type mismatch, expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found {data_type}"
135 ))),
136 }
137 }
138
139 fn try_new(data_type: &DataType, _metadata: Self::Metadata) -> Result<Self, ArrowError> {
140 Self.supports_data_type(data_type).map(|_| Self)
141 }
142
143 fn validate(data_type: &DataType, _metadata: Self::Metadata) -> Result<(), ArrowError> {
144 Self.supports_data_type(data_type)
145 }
146}
147
148#[cfg(test)]
149mod tests {
150 use std::sync::Arc;
151
152 #[cfg(feature = "canonical_extension_types")]
153 use crate::extension::CanonicalExtensionType;
154 use crate::{
155 Field, Fields, TimeUnit,
156 extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
157 };
158
159 use super::*;
160
161 fn make_valid_field_primitive(time_unit: TimeUnit) -> Field {
162 Field::new(
163 "",
164 DataType::Struct(Fields::from_iter([
165 Field::new(
166 TIMESTAMP_FIELD_NAME,
167 DataType::Timestamp(time_unit, Some("UTC".into())),
168 false,
169 ),
170 Field::new(OFFSET_FIELD_NAME, DataType::Int16, false),
171 ])),
172 false,
173 )
174 }
175
176 fn make_valid_field_dict_encoded(time_unit: TimeUnit, key_type: DataType) -> Field {
177 assert!(key_type.is_dictionary_key_type());
178
179 Field::new(
180 "",
181 DataType::Struct(Fields::from_iter([
182 Field::new(
183 TIMESTAMP_FIELD_NAME,
184 DataType::Timestamp(time_unit, Some("UTC".into())),
185 false,
186 ),
187 Field::new(
188 OFFSET_FIELD_NAME,
189 DataType::Dictionary(Box::new(key_type), Box::new(DataType::Int16)),
190 false,
191 ),
192 ])),
193 false,
194 )
195 }
196
197 fn make_valid_field_run_end_encoded(time_unit: TimeUnit, run_ends_type: DataType) -> Field {
198 assert!(run_ends_type.is_run_ends_type());
199 Field::new(
200 "",
201 DataType::Struct(Fields::from_iter([
202 Field::new(
203 TIMESTAMP_FIELD_NAME,
204 DataType::Timestamp(time_unit, Some("UTC".into())),
205 false,
206 ),
207 Field::new(
208 OFFSET_FIELD_NAME,
209 DataType::RunEndEncoded(
210 Arc::new(Field::new("run_ends", run_ends_type, false)),
211 Arc::new(Field::new("values", DataType::Int16, false)),
212 ),
213 false,
214 ),
215 ])),
216 false,
217 )
218 }
219
220 #[test]
221 fn valid_primitive_offsets() -> Result<(), ArrowError> {
222 let time_units = [
223 TimeUnit::Second,
224 TimeUnit::Millisecond,
225 TimeUnit::Microsecond,
226 TimeUnit::Nanosecond,
227 ];
228
229 for time_unit in time_units {
230 let mut field = make_valid_field_primitive(time_unit);
231 field.try_with_extension_type(TimestampWithOffset)?;
232 field.try_extension_type::<TimestampWithOffset>()?;
233 #[cfg(feature = "canonical_extension_types")]
234 assert_eq!(
235 field.try_canonical_extension_type()?,
236 CanonicalExtensionType::TimestampWithOffset(TimestampWithOffset)
237 );
238 }
239
240 Ok(())
241 }
242
243 #[test]
244 fn valid_dict_encoded_offsets() -> Result<(), ArrowError> {
245 let time_units = [
246 TimeUnit::Second,
247 TimeUnit::Millisecond,
248 TimeUnit::Microsecond,
249 TimeUnit::Nanosecond,
250 ];
251
252 let key_types = [
253 DataType::UInt8,
254 DataType::UInt16,
255 DataType::UInt32,
256 DataType::UInt64,
257 DataType::Int8,
258 DataType::Int16,
259 DataType::Int32,
260 DataType::Int64,
261 ];
262
263 for time_unit in time_units {
264 for key_type in &key_types {
265 let mut field = make_valid_field_dict_encoded(time_unit, key_type.clone());
266 field.try_with_extension_type(TimestampWithOffset)?;
267 field.try_extension_type::<TimestampWithOffset>()?;
268 #[cfg(feature = "canonical_extension_types")]
269 assert_eq!(
270 field.try_canonical_extension_type()?,
271 CanonicalExtensionType::TimestampWithOffset(TimestampWithOffset)
272 );
273 }
274 }
275
276 Ok(())
277 }
278
279 #[test]
280 fn valid_run_end_encoded_offsets() -> Result<(), ArrowError> {
281 let time_units = [
282 TimeUnit::Second,
283 TimeUnit::Millisecond,
284 TimeUnit::Microsecond,
285 TimeUnit::Nanosecond,
286 ];
287
288 let run_ends_types = [DataType::Int16, DataType::Int32, DataType::Int64];
289
290 for time_unit in time_units {
291 for run_ends_type in &run_ends_types {
292 let mut field = make_valid_field_run_end_encoded(time_unit, run_ends_type.clone());
293 field.try_with_extension_type(TimestampWithOffset)?;
294 field.try_extension_type::<TimestampWithOffset>()?;
295 #[cfg(feature = "canonical_extension_types")]
296 assert_eq!(
297 field.try_canonical_extension_type()?,
298 CanonicalExtensionType::TimestampWithOffset(TimestampWithOffset)
299 );
300 }
301 }
302
303 Ok(())
304 }
305
306 #[test]
307 #[should_panic(expected = "Extension type name missing")]
308 fn missing_name() {
309 let field = make_valid_field_primitive(TimeUnit::Second)
310 .with_metadata([(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())].into());
311 field.extension_type::<TimestampWithOffset>();
312 }
313
314 #[test]
315 #[should_panic(
316 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Boolean"
317 )]
318 fn invalid_type_top_level() {
319 Field::new("", DataType::Boolean, false).with_extension_type(TimestampWithOffset);
320 }
321
322 #[test]
323 #[should_panic(
324 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
325 )]
326 fn invalid_type_struct_field_count() {
327 let data_type =
328 DataType::Struct(Fields::from_iter([Field::new("", DataType::Int16, false)]));
329 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
330 }
331
332 #[test]
333 #[should_panic(
334 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
335 )]
336 fn invalid_type_wrong_timestamp_type() {
337 let data_type = DataType::Struct(Fields::from_iter([
338 Field::new(TIMESTAMP_FIELD_NAME, DataType::Int16, false),
339 Field::new(OFFSET_FIELD_NAME, DataType::Int16, false),
340 ]));
341 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
342 }
343
344 #[test]
345 #[should_panic(
346 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
347 )]
348 fn invalid_type_wrong_offset_type() {
349 let data_type = DataType::Struct(Fields::from_iter([
350 Field::new(
351 TIMESTAMP_FIELD_NAME,
352 DataType::Timestamp(TimeUnit::Second, Some("UTC".into())),
353 false,
354 ),
355 Field::new(OFFSET_FIELD_NAME, DataType::UInt64, false),
356 ]));
357 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
358 }
359
360 #[test]
361 #[should_panic(
362 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
363 )]
364 fn invalid_type_wrong_offset_key_dict_encoded() {
365 let data_type = DataType::Struct(Fields::from_iter([
366 Field::new(
367 TIMESTAMP_FIELD_NAME,
368 DataType::Timestamp(TimeUnit::Second, Some("UTC".into())),
369 false,
370 ),
371 Field::new(
372 OFFSET_FIELD_NAME,
373 DataType::Dictionary(Box::new(DataType::Boolean), Box::new(DataType::Int16)),
374 false,
375 ),
376 ]));
377 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
378 }
379
380 #[test]
381 #[should_panic(
382 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
383 )]
384 fn invalid_type_wrong_offset_value_dict_encoded() {
385 let data_type = DataType::Struct(Fields::from_iter([
386 Field::new(
387 TIMESTAMP_FIELD_NAME,
388 DataType::Timestamp(TimeUnit::Second, Some("UTC".into())),
389 false,
390 ),
391 Field::new(
392 OFFSET_FIELD_NAME,
393 DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::Int32)),
394 false,
395 ),
396 ]));
397 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
398 }
399
400 #[test]
401 #[should_panic(
402 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
403 )]
404 fn invalid_type_wrong_run_ends_run_end_encoded() {
405 let data_type = DataType::Struct(Fields::from_iter([
406 Field::new(
407 TIMESTAMP_FIELD_NAME,
408 DataType::Timestamp(TimeUnit::Second, Some("UTC".into())),
409 false,
410 ),
411 Field::new(
412 OFFSET_FIELD_NAME,
413 DataType::RunEndEncoded(
414 Arc::new(Field::new("run_ends", DataType::Boolean, false)),
415 Arc::new(Field::new("values", DataType::Int16, false)),
416 ),
417 false,
418 ),
419 ]));
420 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
421 }
422
423 #[test]
424 #[should_panic(
425 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
426 )]
427 fn invalid_type_wrong_values_run_end_encoded() {
428 let data_type = DataType::Struct(Fields::from_iter([
429 Field::new(
430 TIMESTAMP_FIELD_NAME,
431 DataType::Timestamp(TimeUnit::Second, Some("UTC".into())),
432 false,
433 ),
434 Field::new(
435 OFFSET_FIELD_NAME,
436 DataType::RunEndEncoded(
437 Arc::new(Field::new("run_ends", DataType::UInt16, false)),
438 Arc::new(Field::new("values", DataType::Int32, false)),
439 ),
440 false,
441 ),
442 ]));
443 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
444 }
445
446 #[test]
447 #[should_panic(
448 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
449 )]
450 fn invalid_type_nullable_timestamp() {
451 let data_type = DataType::Struct(Fields::from_iter([
452 Field::new(
453 TIMESTAMP_FIELD_NAME,
454 DataType::Timestamp(TimeUnit::Second, Some("UTC".into())),
455 true,
456 ),
457 Field::new(OFFSET_FIELD_NAME, DataType::Int16, false),
458 ]));
459 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
460 }
461
462 #[test]
463 #[should_panic(
464 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
465 )]
466 fn invalid_type_nullable_offset() {
467 let data_type = DataType::Struct(Fields::from_iter([
468 Field::new(
469 TIMESTAMP_FIELD_NAME,
470 DataType::Timestamp(TimeUnit::Second, Some("UTC".into())),
471 false,
472 ),
473 Field::new(OFFSET_FIELD_NAME, DataType::Int16, true),
474 ]));
475 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
476 }
477
478 #[test]
479 #[should_panic(
480 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
481 )]
482 fn invalid_type_no_timezone() {
483 let data_type = DataType::Struct(Fields::from_iter([
484 Field::new(
485 TIMESTAMP_FIELD_NAME,
486 DataType::Timestamp(TimeUnit::Second, None),
487 false,
488 ),
489 Field::new(OFFSET_FIELD_NAME, DataType::Int16, false),
490 ]));
491 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
492 }
493
494 #[test]
495 #[should_panic(
496 expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
497 )]
498 fn invalid_type_wrong_timezone() {
499 let data_type = DataType::Struct(Fields::from_iter([
500 Field::new(
501 TIMESTAMP_FIELD_NAME,
502 DataType::Timestamp(TimeUnit::Second, Some("Americas/Sao_Paulo".into())),
503 false,
504 ),
505 Field::new(OFFSET_FIELD_NAME, DataType::Int16, false),
506 ]));
507 Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
508 }
509
510 #[test]
511 fn no_metadata() {
512 let field = make_valid_field_primitive(TimeUnit::Second).with_metadata(
513 [(
514 EXTENSION_TYPE_NAME_KEY.to_owned(),
515 TimestampWithOffset::NAME.to_owned(),
516 )]
517 .into(),
518 );
519 field.extension_type::<TimestampWithOffset>();
520 }
521
522 #[test]
523 fn empty_metadata() {
524 let field = make_valid_field_primitive(TimeUnit::Second).with_metadata(
525 [
526 (
527 EXTENSION_TYPE_NAME_KEY.to_owned(),
528 TimestampWithOffset::NAME.to_owned(),
529 ),
530 (EXTENSION_TYPE_METADATA_KEY.to_owned(), String::new()),
531 ]
532 .into(),
533 );
534 field.extension_type::<TimestampWithOffset>();
535 }
536}