1use std::collections::HashMap;
19
20use arrow_schema::ArrowError;
21use indexmap::IndexSet;
22
23use crate::{VariantMetadata, int_size};
24
25fn write_offset(buf: &mut Vec<u8>, value: usize, nbytes: u8) {
27 let bytes = value.to_le_bytes();
28 buf.extend_from_slice(&bytes[..nbytes as usize]);
29}
30
31pub trait MetadataBuilder: std::fmt::Debug {
37 fn try_upsert_field_name(&mut self, field_name: &str) -> Result<u32, ArrowError>;
41
42 fn field_name(&self, field_id: usize) -> &str;
45
46 fn num_field_names(&self) -> usize;
50
51 fn truncate_field_names(&mut self, new_size: usize);
53
54 fn finish(&mut self) -> usize;
56}
57
58impl MetadataBuilder for WritableMetadataBuilder {
59 fn try_upsert_field_name(&mut self, field_name: &str) -> Result<u32, ArrowError> {
60 Ok(self.upsert_field_name(field_name))
61 }
62 fn field_name(&self, field_id: usize) -> &str {
63 self.field_name(field_id)
64 }
65 fn num_field_names(&self) -> usize {
66 self.num_field_names()
67 }
68 fn truncate_field_names(&mut self, new_size: usize) {
69 self.field_names.truncate(new_size)
70 }
71 fn finish(&mut self) -> usize {
72 self.finish()
73 }
74}
75
76#[derive(Debug)]
85pub struct ReadOnlyMetadataBuilder<'m> {
86 metadata: &'m VariantMetadata<'m>,
87 known_field_names: HashMap<&'m str, u32>,
90}
91
92impl<'m> ReadOnlyMetadataBuilder<'m> {
93 pub fn new(metadata: &'m VariantMetadata<'m>) -> Self {
95 Self {
96 metadata,
97 known_field_names: HashMap::new(),
98 }
99 }
100}
101
102impl MetadataBuilder for ReadOnlyMetadataBuilder<'_> {
103 fn try_upsert_field_name(&mut self, field_name: &str) -> Result<u32, ArrowError> {
104 if let Some(field_id) = self.known_field_names.get(field_name) {
105 return Ok(*field_id);
106 }
107
108 let Some((field_id, field_name)) = self.metadata.get_entry(field_name) else {
109 return Err(ArrowError::InvalidArgumentError(format!(
110 "Field name '{field_name}' not found in metadata dictionary"
111 )));
112 };
113
114 self.known_field_names.insert(field_name, field_id);
115 Ok(field_id)
116 }
117 fn field_name(&self, field_id: usize) -> &str {
118 &self.metadata[field_id]
119 }
120 fn num_field_names(&self) -> usize {
121 self.metadata.len()
122 }
123 fn truncate_field_names(&mut self, new_size: usize) {
124 debug_assert_eq!(self.metadata.len(), new_size);
125 }
126 fn finish(&mut self) -> usize {
127 self.metadata.bytes.len()
128 }
129}
130
131#[derive(Default, Debug)]
140pub struct WritableMetadataBuilder {
141 pub(crate) field_names: IndexSet<String>,
142
143 pub(crate) is_sorted: bool,
144
145 metadata_buffer: Vec<u8>,
147}
148
149impl WritableMetadataBuilder {
150 pub fn upsert_field_name(&mut self, field_name: &str) -> u32 {
152 let (id, new_entry) = self.field_names.insert_full(field_name.to_string());
153
154 if new_entry {
155 let n = self.num_field_names();
156
157 self.is_sorted =
162 n == 1 || self.is_sorted && (self.field_names[n - 2] < self.field_names[n - 1]);
163 }
164
165 id as u32
166 }
167
168 pub fn offset(&self) -> usize {
170 self.metadata_buffer.len()
171 }
172
173 fn num_field_names(&self) -> usize {
180 let n = self.field_names.len();
181 assert!(n <= u32::MAX as usize);
182
183 n
184 }
185
186 fn field_name(&self, i: usize) -> &str {
187 &self.field_names[i]
188 }
189
190 fn metadata_size(&self) -> usize {
191 self.field_names.iter().map(|k| k.len()).sum()
192 }
193
194 pub fn finish(&mut self) -> usize {
198 let nkeys = self.num_field_names();
199
200 let total_dict_size: usize = self.metadata_size();
202
203 let metadata_buffer = &mut self.metadata_buffer;
204 let is_sorted = std::mem::take(&mut self.is_sorted);
205 let field_names = std::mem::take(&mut self.field_names);
206
207 let max_offset = std::cmp::max(total_dict_size, nkeys);
209 let offset_size = int_size(max_offset);
210
211 let offset_start = 1 + offset_size as usize;
212 let string_start = offset_start + (nkeys + 1) * offset_size as usize;
213 let metadata_size = string_start + total_dict_size;
214
215 metadata_buffer.reserve(metadata_size);
216
217 metadata_buffer.push(0x01 | (is_sorted as u8) << 4 | ((offset_size - 1) << 6));
219
220 write_offset(metadata_buffer, nkeys, offset_size);
222
223 let mut cur_offset = 0;
225 for key in field_names.iter() {
226 write_offset(metadata_buffer, cur_offset, offset_size);
227 cur_offset += key.len();
228 }
229 write_offset(metadata_buffer, cur_offset, offset_size);
231
232 for key in field_names {
234 metadata_buffer.extend_from_slice(key.as_bytes());
235 }
236
237 metadata_buffer.len()
238 }
239
240 pub fn into_inner(self) -> Vec<u8> {
242 self.metadata_buffer
243 }
244}
245
246impl<S: AsRef<str>> FromIterator<S> for WritableMetadataBuilder {
247 fn from_iter<T: IntoIterator<Item = S>>(iter: T) -> Self {
248 let mut this = Self::default();
249 this.extend(iter);
250
251 this
252 }
253}
254
255impl<S: AsRef<str>> Extend<S> for WritableMetadataBuilder {
256 fn extend<T: IntoIterator<Item = S>>(&mut self, iter: T) {
257 let iter = iter.into_iter();
258 let (min, _) = iter.size_hint();
259
260 self.field_names.reserve(min);
261
262 for field_name in iter {
263 self.upsert_field_name(field_name.as_ref());
264 }
265 }
266}
267
268#[cfg(test)]
269mod test {
270 use crate::{
271 ParentState, ValueBuilder, Variant, VariantBuilder, VariantMetadata,
272 builder::{
273 metadata::{ReadOnlyMetadataBuilder, WritableMetadataBuilder},
274 object::ObjectBuilder,
275 },
276 };
277
278 #[test]
279 fn test_metadata_builder_from_iter() {
280 let metadata = WritableMetadataBuilder::from_iter(vec!["apple", "banana", "cherry"]);
281 assert_eq!(metadata.num_field_names(), 3);
282 assert_eq!(metadata.field_name(0), "apple");
283 assert_eq!(metadata.field_name(1), "banana");
284 assert_eq!(metadata.field_name(2), "cherry");
285 assert!(metadata.is_sorted);
286
287 let metadata = WritableMetadataBuilder::from_iter(["zebra", "apple", "banana"]);
288 assert_eq!(metadata.num_field_names(), 3);
289 assert_eq!(metadata.field_name(0), "zebra");
290 assert_eq!(metadata.field_name(1), "apple");
291 assert_eq!(metadata.field_name(2), "banana");
292 assert!(!metadata.is_sorted);
293
294 let metadata = WritableMetadataBuilder::from_iter(Vec::<&str>::new());
295 assert_eq!(metadata.num_field_names(), 0);
296 assert!(!metadata.is_sorted);
297 }
298
299 #[test]
300 fn test_metadata_builder_extend() {
301 let mut metadata = WritableMetadataBuilder::default();
302 assert_eq!(metadata.num_field_names(), 0);
303 assert!(!metadata.is_sorted);
304
305 metadata.extend(["apple", "cherry"]);
306 assert_eq!(metadata.num_field_names(), 2);
307 assert_eq!(metadata.field_name(0), "apple");
308 assert_eq!(metadata.field_name(1), "cherry");
309 assert!(metadata.is_sorted);
310
311 metadata.extend(vec!["dinosaur", "monkey"]);
313 assert_eq!(metadata.num_field_names(), 4);
314 assert_eq!(metadata.field_name(2), "dinosaur");
315 assert_eq!(metadata.field_name(3), "monkey");
316 assert!(metadata.is_sorted);
317
318 let initial_count = metadata.num_field_names();
320 metadata.extend(["apple", "monkey"]);
321 assert_eq!(metadata.num_field_names(), initial_count); }
323
324 #[test]
325 fn test_metadata_builder_extend_sort_order() {
326 let mut metadata = WritableMetadataBuilder::default();
327
328 metadata.extend(["middle"]);
329 assert!(metadata.is_sorted);
330
331 metadata.extend(["zebra"]);
332 assert!(metadata.is_sorted);
333
334 metadata.extend(["apple"]);
336 assert!(!metadata.is_sorted);
337 }
338
339 #[test]
340 fn test_metadata_builder_from_iter_with_string_types() {
341 let metadata = WritableMetadataBuilder::from_iter(["a", "b", "c"]);
343 assert_eq!(metadata.num_field_names(), 3);
344
345 let metadata = WritableMetadataBuilder::from_iter(vec![
347 "a".to_string(),
348 "b".to_string(),
349 "c".to_string(),
350 ]);
351 assert_eq!(metadata.num_field_names(), 3);
352
353 let field_names: Vec<Box<str>> = vec!["a".into(), "b".into(), "c".into()];
355 let metadata = WritableMetadataBuilder::from_iter(field_names);
356 assert_eq!(metadata.num_field_names(), 3);
357 }
358
359 #[test]
360 fn test_read_only_metadata_builder() {
361 let mut default_builder = VariantBuilder::new();
363 default_builder.add_field_name("name");
364 default_builder.add_field_name("age");
365 default_builder.add_field_name("active");
366 let (metadata_bytes, _) = default_builder.finish();
367
368 let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
370 let mut metadata_builder = ReadOnlyMetadataBuilder::new(&metadata);
371 let mut value_builder = ValueBuilder::new();
372
373 {
374 let state = ParentState::variant(&mut value_builder, &mut metadata_builder);
375 let mut obj = ObjectBuilder::new(state, false);
376
377 obj.insert("name", "Alice");
379 obj.insert("age", 30i8);
380 obj.insert("active", true);
381 obj.finish();
382 }
383
384 let value = value_builder.into_inner();
385
386 let variant = Variant::try_new(&metadata_bytes, &value).unwrap();
388 let obj = variant.as_object().unwrap();
389 assert_eq!(obj.get("name"), Some(Variant::from("Alice")));
390 assert_eq!(obj.get("age"), Some(Variant::Int8(30)));
391 assert_eq!(obj.get("active"), Some(Variant::from(true)));
392 }
393
394 #[test]
395 fn test_read_only_metadata_builder_fails_on_unknown_field() {
396 let mut default_builder = VariantBuilder::new();
398 default_builder.add_field_name("known_field");
399 let (metadata_bytes, _) = default_builder.finish();
400
401 let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
403 let mut metadata_builder = ReadOnlyMetadataBuilder::new(&metadata);
404 let mut value_builder = ValueBuilder::new();
405
406 {
407 let state = ParentState::variant(&mut value_builder, &mut metadata_builder);
408 let mut obj = ObjectBuilder::new(state, false);
409
410 obj.insert("known_field", "value");
412
413 let result = obj.try_insert("unknown_field", "value");
415 assert!(result.is_err());
416 assert!(
417 result
418 .unwrap_err()
419 .to_string()
420 .contains("Field name 'unknown_field' not found")
421 );
422 }
423 }
424}