arrow_buffer/alloc/
alignment.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// NOTE: Below code is written for spatial/temporal prefetcher optimizations. Memory allocation
19// should align well with usage pattern of cache access and block sizes on layers of storage levels from
20// registers to non-volatile memory. These alignments are all cache aware alignments incorporated
21// from [cuneiform](https://crates.io/crates/cuneiform) crate. This approach mimics Intel TBB's
22// cache_aligned_allocator which exploits cache locality and minimizes prefetch signals
23// resulting in less round trip time between the layers of storage.
24// For further info: https://software.intel.com/en-us/node/506094
25
26// 32-bit architecture and things other than netburst microarchitecture are using 64 bytes.
27/// Cache and allocation multiple alignment size
28#[cfg(target_arch = "x86")]
29pub const ALIGNMENT: usize = 1 << 6;
30
31// Intel x86_64:
32// L2D streamer from L1:
33// Loads data or instructions from memory to the second-level cache. To use the streamer,
34// organize the data or instructions in blocks of 128 bytes, aligned on 128 bytes.
35// - https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
36/// Cache and allocation multiple alignment size
37#[cfg(target_arch = "x86_64")]
38pub const ALIGNMENT: usize = 1 << 7;
39
40// 24Kc:
41// Data Line Size
42// - https://s3-eu-west-1.amazonaws.com/downloads-mips/documents/MD00346-2B-24K-DTS-04.00.pdf
43// - https://gitlab.e.foundation/e/devices/samsung/n7100/stable_android_kernel_samsung_smdk4412/commit/2dbac10263b2f3c561de68b4c369bc679352ccee
44/// Cache and allocation multiple alignment size
45#[cfg(target_arch = "mips")]
46pub const ALIGNMENT: usize = 1 << 5;
47/// Cache and allocation multiple alignment size
48#[cfg(target_arch = "mips64")]
49pub const ALIGNMENT: usize = 1 << 5;
50
51// Defaults for powerpc
52/// Cache and allocation multiple alignment size
53#[cfg(target_arch = "powerpc")]
54pub const ALIGNMENT: usize = 1 << 5;
55
56// Defaults for the ppc 64
57/// Cache and allocation multiple alignment size
58#[cfg(target_arch = "powerpc64")]
59pub const ALIGNMENT: usize = 1 << 6;
60
61// e.g.: sifive
62// - https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/riscv/sifive-l2-cache.txt#L41
63// in general all of them are the same.
64/// Cache and allocation multiple alignment size
65#[cfg(target_arch = "riscv64")]
66pub const ALIGNMENT: usize = 1 << 6;
67
68// This size is same across all hardware for this architecture.
69// - https://docs.huihoo.com/doxygen/linux/kernel/3.7/arch_2s390_2include_2asm_2cache_8h.html
70/// Cache and allocation multiple alignment size
71#[cfg(target_arch = "s390x")]
72pub const ALIGNMENT: usize = 1 << 8;
73
74// This size is same across all hardware for this architecture.
75// - https://docs.huihoo.com/doxygen/linux/kernel/3.7/arch_2sparc_2include_2asm_2cache_8h.html#a9400cc2ba37e33279bdbc510a6311fb4
76/// Cache and allocation multiple alignment size
77#[cfg(target_arch = "sparc")]
78pub const ALIGNMENT: usize = 1 << 5;
79/// Cache and allocation multiple alignment size
80#[cfg(target_arch = "sparc64")]
81pub const ALIGNMENT: usize = 1 << 6;
82
83// Operating Systems cache size determines this.
84// Currently no way to determine this without runtime inference.
85/// Cache and allocation multiple alignment size
86#[cfg(target_arch = "wasm32")]
87pub const ALIGNMENT: usize = 1 << 6;
88
89// Same as v6 and v7.
90// List goes like that:
91// Cortex A, M, R, ARM v7, v7-M, Krait and NeoverseN uses this size.
92/// Cache and allocation multiple alignment size
93#[cfg(target_arch = "arm")]
94pub const ALIGNMENT: usize = 1 << 5;
95
96// Combined from 4 sectors. Volta says 128.
97// Prevent chunk optimizations better to go to the default size.
98// If you have smaller data with less padded functionality then use 32 with force option.
99// - https://devtalk.nvidia.com/default/topic/803600/variable-cache-line-width-/
100/// Cache and allocation multiple alignment size
101#[cfg(target_arch = "nvptx64")]
102pub const ALIGNMENT: usize = 1 << 7;
103
104// This size is same across all hardware for this architecture.
105/// Cache and allocation multiple alignment size
106#[cfg(target_arch = "aarch64")]
107pub const ALIGNMENT: usize = 1 << 6;
108
109/// Cache and allocation multiple alignment size
110#[cfg(target_arch = "loongarch64")]
111pub const ALIGNMENT: usize = 1 << 6;