parquet/file/metadata/
memory.rs1use crate::basic::{ColumnOrder, Compression, Encoding, PageType};
22use crate::data_type::private::ParquetValueType;
23use crate::file::metadata::{ColumnChunkMetaData, FileMetaData, KeyValue, RowGroupMetaData};
24use crate::file::page_encoding_stats::PageEncodingStats;
25use crate::file::page_index::index::{Index, NativeIndex, PageIndex};
26use crate::file::page_index::offset_index::OffsetIndexMetaData;
27use crate::file::statistics::{Statistics, ValueStatistics};
28use crate::format::{BoundaryOrder, PageLocation, SortingColumn};
29use std::sync::Arc;
30
31pub trait HeapSize {
33 fn heap_size(&self) -> usize;
39}
40
41impl<T: HeapSize> HeapSize for Vec<T> {
42 fn heap_size(&self) -> usize {
43 let item_size = std::mem::size_of::<T>();
44 (self.capacity() * item_size) +
46 self.iter().map(|t| t.heap_size()).sum::<usize>()
48 }
49}
50
51impl<T: HeapSize> HeapSize for Arc<T> {
52 fn heap_size(&self) -> usize {
53 self.as_ref().heap_size()
54 }
55}
56
57impl<T: HeapSize> HeapSize for Option<T> {
58 fn heap_size(&self) -> usize {
59 self.as_ref().map(|inner| inner.heap_size()).unwrap_or(0)
60 }
61}
62
63impl HeapSize for String {
64 fn heap_size(&self) -> usize {
65 self.capacity()
66 }
67}
68
69impl HeapSize for FileMetaData {
70 fn heap_size(&self) -> usize {
71 self.created_by.heap_size()
72 + self.key_value_metadata.heap_size()
73 + self.schema_descr.heap_size()
74 + self.column_orders.heap_size()
75 }
76}
77
78impl HeapSize for KeyValue {
79 fn heap_size(&self) -> usize {
80 self.key.heap_size() + self.value.heap_size()
81 }
82}
83
84impl HeapSize for RowGroupMetaData {
85 fn heap_size(&self) -> usize {
86 self.columns.heap_size() + self.sorting_columns.heap_size()
89 }
90}
91
92impl HeapSize for ColumnChunkMetaData {
93 fn heap_size(&self) -> usize {
94 self.encodings.heap_size()
97 + self.file_path.heap_size()
98 + self.compression.heap_size()
99 + self.statistics.heap_size()
100 + self.encoding_stats.heap_size()
101 + self.unencoded_byte_array_data_bytes.heap_size()
102 + self.repetition_level_histogram.heap_size()
103 + self.definition_level_histogram.heap_size()
104 }
105}
106
107impl HeapSize for Encoding {
108 fn heap_size(&self) -> usize {
109 0 }
111}
112
113impl HeapSize for PageEncodingStats {
114 fn heap_size(&self) -> usize {
115 self.page_type.heap_size() + self.encoding.heap_size()
116 }
117}
118
119impl HeapSize for SortingColumn {
120 fn heap_size(&self) -> usize {
121 0 }
123}
124impl HeapSize for Compression {
125 fn heap_size(&self) -> usize {
126 0 }
128}
129
130impl HeapSize for PageType {
131 fn heap_size(&self) -> usize {
132 0 }
134}
135impl HeapSize for Statistics {
136 fn heap_size(&self) -> usize {
137 match self {
138 Statistics::Boolean(value_statistics) => value_statistics.heap_size(),
139 Statistics::Int32(value_statistics) => value_statistics.heap_size(),
140 Statistics::Int64(value_statistics) => value_statistics.heap_size(),
141 Statistics::Int96(value_statistics) => value_statistics.heap_size(),
142 Statistics::Float(value_statistics) => value_statistics.heap_size(),
143 Statistics::Double(value_statistics) => value_statistics.heap_size(),
144 Statistics::ByteArray(value_statistics) => value_statistics.heap_size(),
145 Statistics::FixedLenByteArray(value_statistics) => value_statistics.heap_size(),
146 }
147 }
148}
149
150impl HeapSize for OffsetIndexMetaData {
151 fn heap_size(&self) -> usize {
152 self.page_locations.heap_size() + self.unencoded_byte_array_data_bytes.heap_size()
153 }
154}
155
156impl HeapSize for Index {
157 fn heap_size(&self) -> usize {
158 match self {
159 Index::NONE => 0,
160 Index::BOOLEAN(native_index) => native_index.heap_size(),
161 Index::INT32(native_index) => native_index.heap_size(),
162 Index::INT64(native_index) => native_index.heap_size(),
163 Index::INT96(native_index) => native_index.heap_size(),
164 Index::FLOAT(native_index) => native_index.heap_size(),
165 Index::DOUBLE(native_index) => native_index.heap_size(),
166 Index::BYTE_ARRAY(native_index) => native_index.heap_size(),
167 Index::FIXED_LEN_BYTE_ARRAY(native_index) => native_index.heap_size(),
168 }
169 }
170}
171
172impl<T: ParquetValueType> HeapSize for NativeIndex<T> {
173 fn heap_size(&self) -> usize {
174 self.indexes.heap_size() + self.boundary_order.heap_size()
175 }
176}
177
178impl<T: ParquetValueType> HeapSize for PageIndex<T> {
179 fn heap_size(&self) -> usize {
180 self.min.heap_size() + self.max.heap_size() + self.null_count.heap_size()
181 }
182}
183
184impl<T: ParquetValueType> HeapSize for ValueStatistics<T> {
185 fn heap_size(&self) -> usize {
186 self.min_opt().map(T::heap_size).unwrap_or(0)
187 + self.max_opt().map(T::heap_size).unwrap_or(0)
188 }
189}
190impl HeapSize for bool {
191 fn heap_size(&self) -> usize {
192 0 }
194}
195impl HeapSize for i32 {
196 fn heap_size(&self) -> usize {
197 0 }
199}
200impl HeapSize for i64 {
201 fn heap_size(&self) -> usize {
202 0 }
204}
205
206impl HeapSize for f32 {
207 fn heap_size(&self) -> usize {
208 0 }
210}
211impl HeapSize for f64 {
212 fn heap_size(&self) -> usize {
213 0 }
215}
216
217impl HeapSize for usize {
218 fn heap_size(&self) -> usize {
219 0 }
221}
222
223impl HeapSize for BoundaryOrder {
224 fn heap_size(&self) -> usize {
225 0 }
227}
228
229impl HeapSize for PageLocation {
230 fn heap_size(&self) -> usize {
231 0 }
233}
234
235impl HeapSize for ColumnOrder {
236 fn heap_size(&self) -> usize {
237 0 }
239}