Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
dictionary.h
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 // Tools for dictionaries in IPC context
19 
20 #ifndef ARROW_IPC_DICTIONARY_H
21 #define ARROW_IPC_DICTIONARY_H
22 
23 #include <cstdint>
24 #include <memory>
25 #include <unordered_map>
26 
27 #include "arrow/status.h"
28 #include "arrow/util/macros.h"
29 #include "arrow/util/visibility.h"
30 
31 namespace arrow {
32 
33 class Array;
34 class Field;
35 
36 namespace ipc {
37 
38 using DictionaryMap = std::unordered_map<int64_t, std::shared_ptr<Array>>;
39 using DictionaryTypeMap = std::unordered_map<int64_t, std::shared_ptr<Field>>;
40 
42 class ARROW_EXPORT DictionaryMemo {
43  public:
45 
47  Status GetDictionary(int64_t id, std::shared_ptr<Array>* dictionary) const;
48 
50  int64_t GetId(const std::shared_ptr<Array>& dictionary);
51 
53  bool HasDictionary(const std::shared_ptr<Array>& dictionary) const;
54 
56  bool HasDictionaryId(int64_t id) const;
57 
60  Status AddDictionary(int64_t id, const std::shared_ptr<Array>& dictionary);
61 
62  const DictionaryMap& id_to_dictionary() const { return id_to_dictionary_; }
63 
65  int size() const { return static_cast<int>(id_to_dictionary_.size()); }
66 
67  private:
68  // Dictionary memory addresses, to track whether a dictionary has been seen
69  // before
70  std::unordered_map<intptr_t, int64_t> dictionary_to_id_;
71 
72  // Map of dictionary id to dictionary array
73  DictionaryMap id_to_dictionary_;
74 
76 };
77 
78 } // namespace ipc
79 } // namespace arrow
80 
81 #endif // ARROW_IPC_DICTIONARY_H
std::unordered_map< int64_t, std::shared_ptr< Field > > DictionaryTypeMap
Definition: dictionary.h:39
Definition: status.h:106
std::unordered_map< int64_t, std::shared_ptr< Array > > DictionaryMap
Definition: dictionary.h:38
std::shared_ptr< DataType > dictionary(const std::shared_ptr< DataType > &index_type, const std::shared_ptr< Array > &values, bool ordered=false)
Create an instance of Dictionary type.
int size() const
The number of dictionaries stored in the memo.
Definition: dictionary.h:65
const DictionaryMap & id_to_dictionary() const
Definition: dictionary.h:62
Top-level namespace for Apache Arrow C++ API.
Definition: allocator.h:29
Memoization data structure for handling shared dictionaries.
Definition: dictionary.h:42
#define ARROW_DISALLOW_COPY_AND_ASSIGN(TypeName)
Definition: macros.h:23