parquet/file/metadata/
memory.rs1use crate::basic::{BoundaryOrder, ColumnOrder, Compression, Encoding, PageType};
22use crate::data_type::private::ParquetValueType;
23use crate::file::metadata::{
24 ColumnChunkMetaData, FileMetaData, KeyValue, PageEncodingStats, RowGroupMetaData, SortingColumn,
25};
26use crate::file::page_index::column_index::{
27 ByteArrayColumnIndex, ColumnIndex, ColumnIndexMetaData, PrimitiveColumnIndex,
28};
29use crate::file::page_index::offset_index::{OffsetIndexMetaData, PageLocation};
30use crate::file::statistics::{Statistics, ValueStatistics};
31use std::sync::Arc;
32
33pub trait HeapSize {
35 fn heap_size(&self) -> usize;
41}
42
43impl<T: HeapSize> HeapSize for Vec<T> {
44 fn heap_size(&self) -> usize {
45 let item_size = std::mem::size_of::<T>();
46 (self.capacity() * item_size) +
48 self.iter().map(|t| t.heap_size()).sum::<usize>()
50 }
51}
52
53impl<T: HeapSize> HeapSize for Arc<T> {
54 fn heap_size(&self) -> usize {
55 self.as_ref().heap_size()
56 }
57}
58
59impl<T: HeapSize> HeapSize for Box<T> {
60 fn heap_size(&self) -> usize {
61 std::mem::size_of::<T>() + self.as_ref().heap_size()
62 }
63}
64
65impl<T: HeapSize> HeapSize for Option<T> {
66 fn heap_size(&self) -> usize {
67 self.as_ref().map(|inner| inner.heap_size()).unwrap_or(0)
68 }
69}
70
71impl HeapSize for String {
72 fn heap_size(&self) -> usize {
73 self.capacity()
74 }
75}
76
77impl HeapSize for FileMetaData {
78 fn heap_size(&self) -> usize {
79 #[cfg(feature = "encryption")]
80 let encryption_heap_size =
81 self.encryption_algorithm.heap_size() + self.footer_signing_key_metadata.heap_size();
82 #[cfg(not(feature = "encryption"))]
83 let encryption_heap_size = 0;
84
85 self.created_by.heap_size()
86 + self.key_value_metadata.heap_size()
87 + self.schema_descr.heap_size()
88 + self.column_orders.heap_size()
89 + encryption_heap_size
90 }
91}
92
93impl HeapSize for KeyValue {
94 fn heap_size(&self) -> usize {
95 self.key.heap_size() + self.value.heap_size()
96 }
97}
98
99impl HeapSize for RowGroupMetaData {
100 fn heap_size(&self) -> usize {
101 self.columns.heap_size() + self.sorting_columns.heap_size()
104 }
105}
106
107impl HeapSize for ColumnChunkMetaData {
108 fn heap_size(&self) -> usize {
109 #[cfg(feature = "encryption")]
110 let encryption_heap_size =
111 self.column_crypto_metadata.heap_size() + self.encrypted_column_metadata.heap_size();
112 #[cfg(not(feature = "encryption"))]
113 let encryption_heap_size = 0;
114
115 self.encodings.heap_size()
118 + self.file_path.heap_size()
119 + self.compression.heap_size()
120 + self.statistics.heap_size()
121 + self.encoding_stats.heap_size()
122 + self.unencoded_byte_array_data_bytes.heap_size()
123 + self.repetition_level_histogram.heap_size()
124 + self.definition_level_histogram.heap_size()
125 + self.geo_statistics.heap_size()
126 + encryption_heap_size
127 }
128}
129
130impl HeapSize for Encoding {
131 fn heap_size(&self) -> usize {
132 0 }
134}
135
136impl HeapSize for PageEncodingStats {
137 fn heap_size(&self) -> usize {
138 self.page_type.heap_size() + self.encoding.heap_size()
139 }
140}
141
142impl HeapSize for SortingColumn {
143 fn heap_size(&self) -> usize {
144 0 }
146}
147impl HeapSize for Compression {
148 fn heap_size(&self) -> usize {
149 0 }
151}
152
153impl HeapSize for PageType {
154 fn heap_size(&self) -> usize {
155 0 }
157}
158
159impl HeapSize for Statistics {
160 fn heap_size(&self) -> usize {
161 match self {
162 Statistics::Boolean(value_statistics) => value_statistics.heap_size(),
163 Statistics::Int32(value_statistics) => value_statistics.heap_size(),
164 Statistics::Int64(value_statistics) => value_statistics.heap_size(),
165 Statistics::Int96(value_statistics) => value_statistics.heap_size(),
166 Statistics::Float(value_statistics) => value_statistics.heap_size(),
167 Statistics::Double(value_statistics) => value_statistics.heap_size(),
168 Statistics::ByteArray(value_statistics) => value_statistics.heap_size(),
169 Statistics::FixedLenByteArray(value_statistics) => value_statistics.heap_size(),
170 }
171 }
172}
173
174impl HeapSize for OffsetIndexMetaData {
175 fn heap_size(&self) -> usize {
176 self.page_locations.heap_size() + self.unencoded_byte_array_data_bytes.heap_size()
177 }
178}
179
180impl HeapSize for ColumnIndexMetaData {
181 fn heap_size(&self) -> usize {
182 match self {
183 Self::NONE => 0,
184 Self::BOOLEAN(native_index) => native_index.heap_size(),
185 Self::INT32(native_index) => native_index.heap_size(),
186 Self::INT64(native_index) => native_index.heap_size(),
187 Self::INT96(native_index) => native_index.heap_size(),
188 Self::FLOAT(native_index) => native_index.heap_size(),
189 Self::DOUBLE(native_index) => native_index.heap_size(),
190 Self::BYTE_ARRAY(native_index) => native_index.heap_size(),
191 Self::FIXED_LEN_BYTE_ARRAY(native_index) => native_index.heap_size(),
192 }
193 }
194}
195
196impl HeapSize for ColumnIndex {
197 fn heap_size(&self) -> usize {
198 self.null_pages.heap_size()
199 + self.boundary_order.heap_size()
200 + self.null_counts.heap_size()
201 + self.definition_level_histograms.heap_size()
202 + self.repetition_level_histograms.heap_size()
203 }
204}
205
206impl<T: ParquetValueType> HeapSize for PrimitiveColumnIndex<T> {
207 fn heap_size(&self) -> usize {
208 self.column_index.heap_size() + self.min_values.heap_size() + self.max_values.heap_size()
209 }
210}
211
212impl HeapSize for ByteArrayColumnIndex {
213 fn heap_size(&self) -> usize {
214 self.column_index.heap_size()
215 + self.min_bytes.heap_size()
216 + self.min_offsets.heap_size()
217 + self.max_bytes.heap_size()
218 + self.max_offsets.heap_size()
219 }
220}
221
222impl<T: ParquetValueType> HeapSize for ValueStatistics<T> {
223 fn heap_size(&self) -> usize {
224 self.min_opt().map(T::heap_size).unwrap_or(0)
225 + self.max_opt().map(T::heap_size).unwrap_or(0)
226 }
227}
228impl HeapSize for bool {
229 fn heap_size(&self) -> usize {
230 0 }
232}
233impl HeapSize for u8 {
234 fn heap_size(&self) -> usize {
235 0 }
237}
238impl HeapSize for i32 {
239 fn heap_size(&self) -> usize {
240 0 }
242}
243impl HeapSize for i64 {
244 fn heap_size(&self) -> usize {
245 0 }
247}
248
249impl HeapSize for f32 {
250 fn heap_size(&self) -> usize {
251 0 }
253}
254impl HeapSize for f64 {
255 fn heap_size(&self) -> usize {
256 0 }
258}
259
260impl HeapSize for usize {
261 fn heap_size(&self) -> usize {
262 0 }
264}
265
266impl HeapSize for BoundaryOrder {
267 fn heap_size(&self) -> usize {
268 0 }
270}
271
272impl HeapSize for PageLocation {
273 fn heap_size(&self) -> usize {
274 0 }
276}
277
278impl HeapSize for ColumnOrder {
279 fn heap_size(&self) -> usize {
280 0 }
282}