Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
json-internal.h
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #ifndef ARROW_IPC_JSON_INTERNAL_H
19 #define ARROW_IPC_JSON_INTERNAL_H
20 
21 #define RAPIDJSON_HAS_STDSTRING 1
22 #define RAPIDJSON_HAS_CXX11_RVALUE_REFS 1
23 #define RAPIDJSON_HAS_CXX11_RANGE_FOR 1
24 
25 #include <memory>
26 #include <sstream>
27 #include <string>
28 
29 #include "rapidjson/document.h"
30 #include "rapidjson/stringbuffer.h"
31 #include "rapidjson/writer.h"
32 
33 #include "arrow/type_fwd.h" // IWYU pragma: export
34 #include "arrow/util/visibility.h"
35 
36 namespace rj = rapidjson;
37 using RjWriter = rj::Writer<rj::StringBuffer>;
38 using RjArray = rj::Value::ConstArray;
39 using RjObject = rj::Value::ConstObject;
40 
41 #define RETURN_NOT_FOUND(TOK, NAME, PARENT) \
42  if (NAME == (PARENT).MemberEnd()) { \
43  std::stringstream ss; \
44  ss << "field " << TOK << " not found"; \
45  return Status::Invalid(ss.str()); \
46  }
47 
48 #define RETURN_NOT_STRING(TOK, NAME, PARENT) \
49  RETURN_NOT_FOUND(TOK, NAME, PARENT); \
50  if (!NAME->value.IsString()) { \
51  std::stringstream ss; \
52  ss << "field was not a string" \
53  << " line " << __LINE__; \
54  return Status::Invalid(ss.str()); \
55  }
56 
57 #define RETURN_NOT_BOOL(TOK, NAME, PARENT) \
58  RETURN_NOT_FOUND(TOK, NAME, PARENT); \
59  if (!NAME->value.IsBool()) { \
60  std::stringstream ss; \
61  ss << "field was not a boolean" \
62  << " line " << __LINE__; \
63  return Status::Invalid(ss.str()); \
64  }
65 
66 #define RETURN_NOT_INT(TOK, NAME, PARENT) \
67  RETURN_NOT_FOUND(TOK, NAME, PARENT); \
68  if (!NAME->value.IsInt()) { \
69  std::stringstream ss; \
70  ss << "field was not an int" \
71  << " line " << __LINE__; \
72  return Status::Invalid(ss.str()); \
73  }
74 
75 #define RETURN_NOT_ARRAY(TOK, NAME, PARENT) \
76  RETURN_NOT_FOUND(TOK, NAME, PARENT); \
77  if (!NAME->value.IsArray()) { \
78  std::stringstream ss; \
79  ss << "field was not an array" \
80  << " line " << __LINE__; \
81  return Status::Invalid(ss.str()); \
82  }
83 
84 #define RETURN_NOT_OBJECT(TOK, NAME, PARENT) \
85  RETURN_NOT_FOUND(TOK, NAME, PARENT); \
86  if (!NAME->value.IsObject()) { \
87  std::stringstream ss; \
88  ss << "field was not an object" \
89  << " line " << __LINE__; \
90  return Status::Invalid(ss.str()); \
91  }
92 
93 namespace arrow {
94 namespace ipc {
95 namespace json {
96 namespace internal {
97 
98 Status WriteSchema(const Schema& schema, RjWriter* writer);
99 Status WriteRecordBatch(const RecordBatch& batch, RjWriter* writer);
100 Status WriteArray(const std::string& name, const Array& array, RjWriter* writer);
101 
102 Status ReadSchema(const rj::Value& json_obj, MemoryPool* pool,
103  std::shared_ptr<Schema>* schema);
104 
105 Status ReadRecordBatch(const rj::Value& json_obj, const std::shared_ptr<Schema>& schema,
106  MemoryPool* pool, std::shared_ptr<RecordBatch>* batch);
107 
108 Status ReadArray(MemoryPool* pool, const rj::Value& json_obj,
109  const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* array);
110 
111 Status ReadArray(MemoryPool* pool, const rj::Value& json_obj, const Schema& schema,
112  std::shared_ptr<Array>* array);
113 
114 } // namespace internal
115 } // namespace json
116 } // namespace ipc
117 } // namespace arrow
118 
119 #endif // ARROW_IPC_JSON_INTERNAL_H
Status WriteRecordBatch(const RecordBatch &batch, int64_t buffer_start_offset, io::OutputStream *dst, int32_t *metadata_length, int64_t *body_length, MemoryPool *pool, int max_recursion_depth=kMaxNestingDepth, bool allow_64bit=false)
Low-level API for writing a record batch (without schema) to an OutputStream.
rj::Writer< rj::StringBuffer > RjWriter
Definition: json-internal.h:37
Collection of equal-length arrays matching a particular Schema.
Definition: table.h:118
Status ReadRecordBatch(const std::shared_ptr< Schema > &schema, io::InputStream *stream, std::shared_ptr< RecordBatch > *out)
Read record batch as encapsulated IPC message with metadata size prefix and header.
Definition: status.h:106
rj::Value::ConstObject RjObject
Definition: json-internal.h:39
rj::Value::ConstArray RjArray
Definition: json-internal.h:38
Sequence of arrow::Field objects describing the columns of a record batch or table data structure...
Definition: type.h:741
Top-level namespace for Apache Arrow C++ API.
Definition: allocator.h:29
Array base type Immutable data array with some logical type and some length.
Definition: array.h:180
std::shared_ptr< Schema > schema(const std::vector< std::shared_ptr< Field >> &fields, const std::shared_ptr< const KeyValueMetadata > &metadata=NULLPTR)
Create a Schema instance.
Base class for memory allocation.
Definition: memory_pool.h:34
Status ReadSchema(io::InputStream *stream, std::shared_ptr< Schema > *out)
Read Schema from stream serialized as a sequence of one or more IPC messages.