Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
datetime.h
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #ifndef PYARROW_UTIL_DATETIME_H
19 #define PYARROW_UTIL_DATETIME_H
20 
21 #include <algorithm>
22 #include <sstream>
23 
24 #include <datetime.h>
25 #include "arrow/python/platform.h"
26 #include "arrow/status.h"
27 #include "arrow/util/logging.h"
28 
29 namespace arrow {
30 namespace py {
31 
32 // The following code is adapted from
33 // https://github.com/numpy/numpy/blob/master/numpy/core/src/multiarray/datetime.c
34 
35 // Days per month, regular year and leap year
36 static int64_t _days_per_month_table[2][12] = {
37  {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31},
38  {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}};
39 
40 static bool is_leapyear(int64_t year) {
41  return (year & 0x3) == 0 && // year % 4 == 0
42  ((year % 100) != 0 || (year % 400) == 0);
43 }
44 
45 // Calculates the days offset from the 1970 epoch.
46 static int64_t get_days_from_date(int64_t date_year, int64_t date_month,
47  int64_t date_day) {
48  int64_t i, month;
49  int64_t year, days = 0;
50  int64_t* month_lengths;
51 
52  year = date_year - 1970;
53  days = year * 365;
54 
55  // Adjust for leap years
56  if (days >= 0) {
57  // 1968 is the closest leap year before 1970.
58  // Exclude the current year, so add 1.
59  year += 1;
60  // Add one day for each 4 years
61  days += year / 4;
62  // 1900 is the closest previous year divisible by 100
63  year += 68;
64  // Subtract one day for each 100 years
65  days -= year / 100;
66  // 1600 is the closest previous year divisible by 400
67  year += 300;
68  // Add one day for each 400 years
69  days += year / 400;
70  } else {
71  // 1972 is the closest later year after 1970.
72  // Include the current year, so subtract 2.
73  year -= 2;
74  // Subtract one day for each 4 years
75  days += year / 4;
76  // 2000 is the closest later year divisible by 100
77  year -= 28;
78  // Add one day for each 100 years
79  days -= year / 100;
80  // 2000 is also the closest later year divisible by 400
81  // Subtract one day for each 400 years
82  days += year / 400;
83  }
84 
85  month_lengths = _days_per_month_table[is_leapyear(date_year)];
86  month = date_month - 1;
87 
88  // Add the months
89  for (i = 0; i < month; ++i) {
90  days += month_lengths[i];
91  }
92 
93  // Add the days
94  days += date_day - 1;
95 
96  return days;
97 }
98 
99 // Modifies '*days_' to be the day offset within the year,
100 // and returns the year.
101 static int64_t days_to_yearsdays(int64_t* days_) {
102  const int64_t days_per_400years = (400 * 365 + 100 - 4 + 1);
103  // Adjust so it's relative to the year 2000 (divisible by 400)
104  int64_t days = (*days_) - (365 * 30 + 7);
105  int64_t year;
106 
107  // Break down the 400 year cycle to get the year and day within the year
108  if (days >= 0) {
109  year = 400 * (days / days_per_400years);
110  days = days % days_per_400years;
111  } else {
112  year = 400 * ((days - (days_per_400years - 1)) / days_per_400years);
113  days = days % days_per_400years;
114  if (days < 0) {
115  days += days_per_400years;
116  }
117  }
118 
119  // Work out the year/day within the 400 year cycle
120  if (days >= 366) {
121  year += 100 * ((days - 1) / (100 * 365 + 25 - 1));
122  days = (days - 1) % (100 * 365 + 25 - 1);
123  if (days >= 365) {
124  year += 4 * ((days + 1) / (4 * 365 + 1));
125  days = (days + 1) % (4 * 365 + 1);
126  if (days >= 366) {
127  year += (days - 1) / 365;
128  days = (days - 1) % 365;
129  }
130  }
131  }
132 
133  *days_ = days;
134  return year + 2000;
135 }
136 
137 // Extracts the month and year and day number from a number of days
138 static void get_date_from_days(int64_t days, int64_t* date_year, int64_t* date_month,
139  int64_t* date_day) {
140  int64_t *month_lengths, i;
141 
142  *date_year = days_to_yearsdays(&days);
143  month_lengths = _days_per_month_table[is_leapyear(*date_year)];
144 
145  for (i = 0; i < 12; ++i) {
146  if (days < month_lengths[i]) {
147  *date_month = i + 1;
148  *date_day = days + 1;
149  return;
150  } else {
151  days -= month_lengths[i];
152  }
153  }
154 
155  // Should never get here
156  return;
157 }
158 
159 static inline int64_t PyTime_to_us(PyObject* pytime) {
160  return (static_cast<int64_t>(PyDateTime_TIME_GET_HOUR(pytime)) * 3600000000LL +
161  static_cast<int64_t>(PyDateTime_TIME_GET_MINUTE(pytime)) * 60000000LL +
162  static_cast<int64_t>(PyDateTime_TIME_GET_SECOND(pytime)) * 1000000LL +
163  PyDateTime_TIME_GET_MICROSECOND(pytime));
164 }
165 
166 // Splitting time quantities, for example splitting total seconds into
167 // minutes and remaining seconds. After we run
168 // int64_t remaining = split_time(total, quotient, &next)
169 // we have
170 // total = next * quotient + remaining. Handles negative values by propagating
171 // them: If total is negative, next will be negative and remaining will
172 // always be non-negative.
173 static inline int64_t split_time(int64_t total, int64_t quotient, int64_t* next) {
174  int64_t r = total % quotient;
175  if (r < 0) {
176  *next = total / quotient - 1;
177  return r + quotient;
178  } else {
179  *next = total / quotient;
180  return r;
181  }
182 }
183 
184 static inline Status PyTime_convert_int(int64_t val, const TimeUnit::type unit,
185  int64_t* hour, int64_t* minute, int64_t* second,
186  int64_t* microsecond) {
187  switch (unit) {
188  case TimeUnit::NANO:
189  if (val % 1000 != 0) {
190  std::stringstream ss;
191  ss << "Value " << val << " has non-zero nanoseconds";
192  return Status::Invalid(ss.str());
193  }
194  val /= 1000;
195  // fall through
196  case TimeUnit::MICRO:
197  *microsecond = split_time(val, 1000000LL, &val);
198  *second = split_time(val, 60, &val);
199  *minute = split_time(val, 60, hour);
200  break;
201  case TimeUnit::MILLI:
202  *microsecond = split_time(val, 1000, &val) * 1000;
203  // fall through
204  case TimeUnit::SECOND:
205  *second = split_time(val, 60, &val);
206  *minute = split_time(val, 60, hour);
207  break;
208  default:
209  break;
210  }
211  return Status::OK();
212 }
213 
214 static inline Status PyTime_from_int(int64_t val, const TimeUnit::type unit,
215  PyObject** out) {
216  int64_t hour = 0, minute = 0, second = 0, microsecond = 0;
217  RETURN_NOT_OK(PyTime_convert_int(val, unit, &hour, &minute, &second, &microsecond));
218  *out = PyTime_FromTime(static_cast<int32_t>(hour), static_cast<int32_t>(minute),
219  static_cast<int32_t>(second), static_cast<int32_t>(microsecond));
220  return Status::OK();
221 }
222 
223 static inline Status PyDateTime_from_int(int64_t val, const TimeUnit::type unit,
224  PyObject** out) {
225  int64_t hour = 0, minute = 0, second = 0, microsecond = 0;
226  RETURN_NOT_OK(PyTime_convert_int(val, unit, &hour, &minute, &second, &microsecond));
227  int64_t total_days = 0;
228  hour = split_time(hour, 24, &total_days);
229  int64_t year = 0, month = 0, day = 0;
230  get_date_from_days(total_days, &year, &month, &day);
231  *out = PyDateTime_FromDateAndTime(
232  static_cast<int32_t>(year), static_cast<int32_t>(month), static_cast<int32_t>(day),
233  static_cast<int32_t>(hour), static_cast<int32_t>(minute),
234  static_cast<int32_t>(second), static_cast<int32_t>(microsecond));
235  return Status::OK();
236 }
237 
238 static inline int64_t PyDate_to_ms(PyDateTime_Date* pydate) {
239  int64_t total_seconds = 0;
240  total_seconds += PyDateTime_DATE_GET_SECOND(pydate);
241  total_seconds += PyDateTime_DATE_GET_MINUTE(pydate) * 60;
242  total_seconds += PyDateTime_DATE_GET_HOUR(pydate) * 3600;
243  int64_t days =
244  get_days_from_date(PyDateTime_GET_YEAR(pydate), PyDateTime_GET_MONTH(pydate),
245  PyDateTime_GET_DAY(pydate));
246  total_seconds += days * 24 * 3600;
247  return total_seconds * 1000;
248 }
249 
250 static inline int64_t PyDateTime_to_us(PyDateTime_DateTime* pydatetime) {
251  int64_t ms = PyDate_to_ms(reinterpret_cast<PyDateTime_Date*>(pydatetime));
252  int us = PyDateTime_DATE_GET_MICROSECOND(pydatetime);
253  return ms * 1000 + us;
254 }
255 
256 static inline int32_t PyDate_to_days(PyDateTime_Date* pydate) {
257  return static_cast<int32_t>(PyDate_to_ms(pydate) / 86400000LL);
258 }
259 
260 } // namespace py
261 } // namespace arrow
262 
263 #endif // PYARROW_UTIL_DATETIME_H
Definition: type.h:597
type
Definition: type.h:597
Definition: type.h:597
#define RETURN_NOT_OK(s)
Definition: status.h:66
static Status OK()
Definition: status.h:119
static Status Invalid(const std::string &msg)
Definition: status.h:142
Definition: type.h:597
Definition: type.h:597
Top-level namespace for Apache Arrow C++ API.
Definition: allocator.h:29