Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
sse-util.h
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 // From Apache Impala as of 2016-01-29. Pared down to a minimal set of
19 // functions needed for parquet-cpp
20 
21 #ifndef ARROW_UTIL_SSE_UTIL_H
22 #define ARROW_UTIL_SSE_UTIL_H
23 
24 #ifdef ARROW_USE_SSE
25 #include <emmintrin.h>
26 #endif
27 
28 namespace arrow {
29 
31 namespace SSEUtil {
34 static const int CHARS_PER_64_BIT_REGISTER = 8;
35 static const int CHARS_PER_128_BIT_REGISTER = 16;
36 
40 static const int PCMPSTR_EQUAL_ANY = 0x00; // strchr
41 static const int PCMPSTR_EQUAL_EACH = 0x08; // strcmp
42 static const int PCMPSTR_UBYTE_OPS = 0x00; // unsigned char (8-bits, rather than 16)
43 static const int PCMPSTR_NEG_POLARITY = 0x10; // see Intel SDM chapter 4.1.4.
44 
47 static const int STRCHR_MODE = PCMPSTR_EQUAL_ANY | PCMPSTR_UBYTE_OPS;
48 
51 static const int STRCMP_MODE =
52  PCMPSTR_EQUAL_EACH | PCMPSTR_UBYTE_OPS | PCMPSTR_NEG_POLARITY;
53 
55 static const int SSE_BITMASK[CHARS_PER_128_BIT_REGISTER] = {
56  1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7,
57  1 << 8, 1 << 9, 1 << 10, 1 << 11, 1 << 12, 1 << 13, 1 << 14, 1 << 15,
58 };
59 } // namespace SSEUtil
60 
61 #ifdef ARROW_USE_SSE
62 
66 #ifndef IR_COMPILE
67 
76 #define SSE_ALWAYS_INLINE inline __attribute__((__always_inline__))
77 
78 template <int MODE>
79 static inline __m128i SSE4_cmpestrm(__m128i str1, int len1, __m128i str2, int len2) {
80 #ifdef __clang__
81  register volatile __m128i result asm("xmm0");
84  __asm__ volatile("pcmpestrm %5, %2, %1"
85  : "=x"(result)
86  : "x"(str1), "xm"(str2), "a"(len1), "d"(len2), "i"(MODE)
87  : "cc");
88 #else
89  __m128i result;
90  __asm__ volatile("pcmpestrm %5, %2, %1"
91  : "=Yz"(result)
92  : "x"(str1), "xm"(str2), "a"(len1), "d"(len2), "i"(MODE)
93  : "cc");
94 #endif
95  return result;
96 }
97 
98 template <int MODE>
99 static inline int SSE4_cmpestri(__m128i str1, int len1, __m128i str2, int len2) {
100  int result;
101  __asm__("pcmpestri %5, %2, %1"
102  : "=c"(result)
103  : "x"(str1), "xm"(str2), "a"(len1), "d"(len2), "i"(MODE)
104  : "cc");
105  return result;
106 }
107 
108 static inline uint32_t SSE4_crc32_u8(uint32_t crc, uint8_t v) {
109  __asm__("crc32b %1, %0" : "+r"(crc) : "rm"(v));
110  return crc;
111 }
112 
113 static inline uint32_t SSE4_crc32_u16(uint32_t crc, uint16_t v) {
114  __asm__("crc32w %1, %0" : "+r"(crc) : "rm"(v));
115  return crc;
116 }
117 
118 static inline uint32_t SSE4_crc32_u32(uint32_t crc, uint32_t v) {
119  __asm__("crc32l %1, %0" : "+r"(crc) : "rm"(v));
120  return crc;
121 }
122 
123 static inline uint32_t SSE4_crc32_u64(uint32_t crc, uint64_t v) {
124  uint64_t result = crc;
125  __asm__("crc32q %1, %0" : "+r"(result) : "rm"(v));
126  return result;
127 }
128 
129 static inline int64_t POPCNT_popcnt_u64(uint64_t a) {
130  int64_t result;
131  __asm__("popcntq %1, %0" : "=r"(result) : "mr"(a) : "cc");
132  return result;
133 }
134 
135 #undef SSE_ALWAYS_INLINE
136 
137 #elif defined(__SSE4_2__) // IR_COMPILE for SSE 4.2.
138 
143 #include <smmintrin.h>
144 
145 template <int MODE>
146 static inline __m128i SSE4_cmpestrm(__m128i str1, int len1, __m128i str2, int len2) {
147  return _mm_cmpestrm(str1, len1, str2, len2, MODE);
148 }
149 
150 template <int MODE>
151 static inline int SSE4_cmpestri(__m128i str1, int len1, __m128i str2, int len2) {
152  return _mm_cmpestri(str1, len1, str2, len2, MODE);
153 }
154 
155 #define SSE4_crc32_u8 _mm_crc32_u8
156 #define SSE4_crc32_u16 _mm_crc32_u16
157 #define SSE4_crc32_u32 _mm_crc32_u32
158 #define SSE4_crc32_u64 _mm_crc32_u64
159 #define POPCNT_popcnt_u64 _mm_popcnt_u64
160 
161 #else // IR_COMPILE without SSE 4.2.
162 
167 template <int MODE>
168 static inline __m128i SSE4_cmpestrm(__m128i str1, int len1, __m128i str2, int len2) {
169  DCHECK(false) << "CPU doesn't support SSE 4.2";
170  return (__m128i){0}; // NOLINT
171 }
172 
173 template <int MODE>
174 static inline int SSE4_cmpestri(__m128i str1, int len1, __m128i str2, int len2) {
175  DCHECK(false) << "CPU doesn't support SSE 4.2";
176  return 0;
177 }
178 
179 static inline uint32_t SSE4_crc32_u8(uint32_t, uint8_t) {
180  DCHECK(false) << "CPU doesn't support SSE 4.2";
181  return 0;
182 }
183 
184 static inline uint32_t SSE4_crc32_u16(uint32_t, uint16_t) {
185  DCHECK(false) << "CPU doesn't support SSE 4.2";
186  return 0;
187 }
188 
189 static inline uint32_t SSE4_crc32_u32(uint32_t, uint32_t) {
190  DCHECK(false) << "CPU doesn't support SSE 4.2";
191  return 0;
192 }
193 
194 static inline uint32_t SSE4_crc32_u64(uint32_t, uint64_t) {
195  DCHECK(false) << "CPU doesn't support SSE 4.2";
196  return 0;
197 }
198 
199 static inline int64_t POPCNT_popcnt_u64(uint64_t) {
200  DCHECK(false) << "CPU doesn't support SSE 4.2";
201  return 0;
202 }
203 
204 #endif // IR_COMPILE
205 
206 #else
207 
208 static inline uint32_t SSE4_crc32_u8(uint32_t, uint8_t) {
209  DCHECK(false) << "SSE support is not enabled";
210  return 0;
211 }
212 
213 static inline uint32_t SSE4_crc32_u16(uint32_t, uint16_t) {
214  DCHECK(false) << "SSE support is not enabled";
215  return 0;
216 }
217 
218 static inline uint32_t SSE4_crc32_u32(uint32_t, uint32_t) {
219  DCHECK(false) << "SSE support is not enabled";
220  return 0;
221 }
222 
223 static inline uint32_t SSE4_crc32_u64(uint32_t, uint64_t) {
224  DCHECK(false) << "SSE support is not enabled";
225  return 0;
226 }
227 
228 static inline int64_t POPCNT_popcnt_u64(uint64_t) {
229  DCHECK(false) << "SSE support is not enabled";
230  return 0;
231 }
232 
233 #endif // ARROW_USE_SSE
234 
235 } // namespace arrow
236 
237 #endif // ARROW_UTIL_SSE_UTIL_H
Top-level namespace for Apache Arrow C++ API.
Definition: allocator.h:29
#define DCHECK(condition)
Definition: logging.h:78