Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
memory.h
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #ifndef ARROW_UTIL_MEMORY_H
19 #define ARROW_UTIL_MEMORY_H
20 
21 #include <thread>
22 #include <vector>
23 
24 #include "arrow/util/thread-pool.h"
25 
26 namespace arrow {
27 namespace internal {
28 
29 uint8_t* pointer_logical_and(const uint8_t* address, uintptr_t bits) {
30  uintptr_t value = reinterpret_cast<uintptr_t>(address);
31  return reinterpret_cast<uint8_t*>(value & bits);
32 }
33 
34 // A helper function for doing memcpy with multiple threads. This is required
35 // to saturate the memory bandwidth of modern cpus.
36 void parallel_memcopy(uint8_t* dst, const uint8_t* src, int64_t nbytes,
37  uintptr_t block_size, int num_threads) {
38  // XXX This function is really using `num_threads + 1` threads.
39  auto pool = GetCpuThreadPool();
40 
41  uint8_t* left = pointer_logical_and(src + block_size - 1, ~(block_size - 1));
42  uint8_t* right = pointer_logical_and(src + nbytes, ~(block_size - 1));
43  int64_t num_blocks = (right - left) / block_size;
44 
45  // Update right address
46  right = right - (num_blocks % num_threads) * block_size;
47 
48  // Now we divide these blocks between available threads. The remainder is
49  // handled separately.
50  int64_t chunk_size = (right - left) / num_threads;
51  int64_t prefix = left - src;
52  int64_t suffix = src + nbytes - right;
53  // Now the data layout is | prefix | k * num_threads * block_size | suffix |.
54  // We have chunk_size = k * block_size, therefore the data layout is
55  // | prefix | num_threads * chunk_size | suffix |.
56  // Each thread gets a "chunk" of k blocks.
57 
58  // Start all parallel memcpy tasks and handle leftovers while threads run.
59  std::vector<std::future<void*>> futures;
60 
61  for (int i = 0; i < num_threads; i++) {
62  futures.push_back(pool->Submit(memcpy, dst + prefix + i * chunk_size,
63  left + i * chunk_size, chunk_size));
64  }
65  memcpy(dst, src, prefix);
66  memcpy(dst + prefix + num_threads * chunk_size, right, suffix);
67 
68  for (auto& fut : futures) {
69  fut.get();
70  }
71 }
72 
73 } // namespace internal
74 } // namespace arrow
75 
76 #endif // ARROW_UTIL_MEMORY_H
Top-level namespace for Apache Arrow C++ API.
Definition: adapter.h:32