1use crate::reader::serializer::TapeSerializer;
19use arrow_schema::ArrowError;
20use memchr::memchr2;
21use serde::Serialize;
22use std::fmt::Write;
23
24#[derive(Debug, Copy, Clone, PartialEq, Eq)]
34pub enum TapeElement {
35 StartObject(u32),
39 EndObject(u32),
43 StartList(u32),
47 EndList(u32),
51 String(u32),
55 Number(u32),
59
60 I64(i32),
64
65 I32(i32),
69
70 F64(u32),
74
75 F32(u32),
77
78 True,
80 False,
82 Null,
84}
85
86#[derive(Debug)]
96pub struct Tape<'a> {
97 elements: &'a [TapeElement],
98 strings: &'a str,
99 string_offsets: &'a [usize],
100 num_rows: usize,
101}
102
103impl<'a> Tape<'a> {
104 #[inline]
106 pub fn get_string(&self, idx: u32) -> &'a str {
107 let end_offset = self.string_offsets[idx as usize + 1];
108 let start_offset = self.string_offsets[idx as usize];
109 unsafe { self.strings.get_unchecked(start_offset..end_offset) }
112 }
113
114 pub fn get(&self, idx: u32) -> TapeElement {
116 self.elements[idx as usize]
117 }
118
119 pub fn next(&self, cur_idx: u32, expected: &str) -> Result<u32, ArrowError> {
123 match self.get(cur_idx) {
124 TapeElement::String(_)
125 | TapeElement::Number(_)
126 | TapeElement::True
127 | TapeElement::False
128 | TapeElement::Null
129 | TapeElement::I32(_)
130 | TapeElement::F32(_) => Ok(cur_idx + 1),
131 TapeElement::I64(_) | TapeElement::F64(_) => Ok(cur_idx + 2),
132 TapeElement::StartList(end_idx) => Ok(end_idx + 1),
133 TapeElement::StartObject(end_idx) => Ok(end_idx + 1),
134 TapeElement::EndObject(_) | TapeElement::EndList(_) => {
135 Err(self.error(cur_idx, expected))
136 }
137 }
138 }
139
140 pub fn num_rows(&self) -> usize {
142 self.num_rows
143 }
144
145 fn serialize(&self, out: &mut String, idx: u32) -> u32 {
147 match self.get(idx) {
148 TapeElement::StartObject(end) => {
149 out.push('{');
150 let mut cur_idx = idx + 1;
151 while cur_idx < end {
152 cur_idx = self.serialize(out, cur_idx);
153 out.push_str(": ");
154 cur_idx = self.serialize(out, cur_idx);
155 }
156 out.push('}');
157 return end + 1;
158 }
159 TapeElement::EndObject(_) => out.push('}'),
160 TapeElement::StartList(end) => {
161 out.push('[');
162 let mut cur_idx = idx + 1;
163 while cur_idx < end {
164 cur_idx = self.serialize(out, cur_idx);
165 if cur_idx < end {
166 out.push_str(", ");
167 }
168 }
169 out.push(']');
170 return end + 1;
171 }
172 TapeElement::EndList(_) => out.push(']'),
173 TapeElement::String(s) => {
174 out.push('"');
175 out.push_str(self.get_string(s));
176 out.push('"')
177 }
178 TapeElement::Number(n) => out.push_str(self.get_string(n)),
179 TapeElement::True => out.push_str("true"),
180 TapeElement::False => out.push_str("false"),
181 TapeElement::Null => out.push_str("null"),
182 TapeElement::I64(high) => match self.get(idx + 1) {
183 TapeElement::I32(low) => {
184 let val = ((high as i64) << 32) | (low as u32) as i64;
185 let _ = write!(out, "{val}");
186 return idx + 2;
187 }
188 _ => unreachable!(),
189 },
190 TapeElement::I32(val) => {
191 let _ = write!(out, "{val}");
192 }
193 TapeElement::F64(high) => match self.get(idx + 1) {
194 TapeElement::F32(low) => {
195 let val = f64::from_bits(((high as u64) << 32) | low as u64);
196 let _ = write!(out, "{val}");
197 return idx + 2;
198 }
199 _ => unreachable!(),
200 },
201 TapeElement::F32(val) => {
202 let _ = write!(out, "{}", f32::from_bits(val));
203 }
204 }
205 idx + 1
206 }
207
208 pub fn error(&self, idx: u32, expected: &str) -> ArrowError {
210 let mut out = String::with_capacity(64);
211 self.serialize(&mut out, idx);
212 ArrowError::JsonError(format!("expected {expected} got {out}"))
213 }
214}
215
216#[derive(Debug, Copy, Clone)]
218enum DecoderState {
219 Object(u32),
223 List(u32),
227 String,
228 Value,
229 Number,
230 Colon,
231 Escape,
232 Unicode(u16, u16, u8),
236 Literal(Literal, u8),
240}
241
242impl DecoderState {
243 fn as_str(&self) -> &'static str {
244 match self {
245 DecoderState::Object(_) => "object",
246 DecoderState::List(_) => "list",
247 DecoderState::String => "string",
248 DecoderState::Value => "value",
249 DecoderState::Number => "number",
250 DecoderState::Colon => "colon",
251 DecoderState::Escape => "escape",
252 DecoderState::Unicode(_, _, _) => "unicode literal",
253 DecoderState::Literal(d, _) => d.as_str(),
254 }
255 }
256}
257
258#[derive(Debug, Copy, Clone)]
259enum Literal {
260 Null,
261 True,
262 False,
263}
264
265impl Literal {
266 fn element(&self) -> TapeElement {
267 match self {
268 Literal::Null => TapeElement::Null,
269 Literal::True => TapeElement::True,
270 Literal::False => TapeElement::False,
271 }
272 }
273
274 fn as_str(&self) -> &'static str {
275 match self {
276 Literal::Null => "null",
277 Literal::True => "true",
278 Literal::False => "false",
279 }
280 }
281
282 fn bytes(&self) -> &'static [u8] {
283 self.as_str().as_bytes()
284 }
285}
286
287macro_rules! next {
289 ($next:ident) => {
290 match $next.next() {
291 Some(b) => b,
292 None => break,
293 }
294 };
295}
296
297pub struct TapeDecoder {
299 elements: Vec<TapeElement>,
300
301 cur_row: usize,
303
304 batch_size: usize,
306
307 bytes: Vec<u8>,
312
313 offsets: Vec<usize>,
315
316 stack: Vec<DecoderState>,
318}
319
320impl TapeDecoder {
321 pub fn new(batch_size: usize, num_fields: usize) -> Self {
324 let tokens_per_row = 2 + num_fields * 2;
325 let mut offsets = Vec::with_capacity(batch_size * (num_fields * 2) + 1);
326 offsets.push(0);
327
328 let mut elements = Vec::with_capacity(batch_size * tokens_per_row);
329 elements.push(TapeElement::Null);
330
331 Self {
332 offsets,
333 elements,
334 batch_size,
335 cur_row: 0,
336 bytes: Vec::with_capacity(num_fields * 2 * 8),
337 stack: Vec::with_capacity(10),
338 }
339 }
340
341 pub fn decode(&mut self, buf: &[u8]) -> Result<usize, ArrowError> {
342 let mut iter = BufIter::new(buf);
343
344 while !iter.is_empty() {
345 let state = match self.stack.last_mut() {
346 Some(l) => l,
347 None => {
348 iter.skip_whitespace();
349 if iter.is_empty() || self.cur_row >= self.batch_size {
350 break;
351 }
352
353 self.cur_row += 1;
355 self.stack.push(DecoderState::Value);
356 self.stack.last_mut().unwrap()
357 }
358 };
359
360 match state {
361 DecoderState::Object(start_idx) => {
363 iter.advance_until(|b| !json_whitespace(b) && b != b',');
364 match next!(iter) {
365 b'"' => {
366 self.stack.push(DecoderState::Value);
367 self.stack.push(DecoderState::Colon);
368 self.stack.push(DecoderState::String);
369 }
370 b'}' => {
371 let start_idx = *start_idx;
372 let end_idx = self.elements.len() as u32;
373 self.elements[start_idx as usize] = TapeElement::StartObject(end_idx);
374 self.elements.push(TapeElement::EndObject(start_idx));
375 self.stack.pop();
376 }
377 b => return Err(err(b, "parsing object")),
378 }
379 }
380 DecoderState::List(start_idx) => {
382 iter.advance_until(|b| !json_whitespace(b) && b != b',');
383 match iter.peek() {
384 Some(b']') => {
385 iter.next();
386 let start_idx = *start_idx;
387 let end_idx = self.elements.len() as u32;
388 self.elements[start_idx as usize] = TapeElement::StartList(end_idx);
389 self.elements.push(TapeElement::EndList(start_idx));
390 self.stack.pop();
391 }
392 Some(_) => self.stack.push(DecoderState::Value),
393 None => break,
394 }
395 }
396 DecoderState::String => {
398 let s = iter.skip_chrs(b'\\', b'"');
399 self.bytes.extend_from_slice(s);
400
401 match next!(iter) {
402 b'\\' => self.stack.push(DecoderState::Escape),
403 b'"' => {
404 let idx = self.offsets.len() - 1;
405 self.elements.push(TapeElement::String(idx as _));
406 self.offsets.push(self.bytes.len());
407 self.stack.pop();
408 }
409 b => unreachable!("{}", b),
410 }
411 }
412 state @ DecoderState::Value => {
413 iter.skip_whitespace();
414 *state = match next!(iter) {
415 b'"' => DecoderState::String,
416 b @ b'-' | b @ b'0'..=b'9' => {
417 self.bytes.push(b);
418 DecoderState::Number
419 }
420 b'n' => DecoderState::Literal(Literal::Null, 1),
421 b'f' => DecoderState::Literal(Literal::False, 1),
422 b't' => DecoderState::Literal(Literal::True, 1),
423 b'[' => {
424 let idx = self.elements.len() as u32;
425 self.elements.push(TapeElement::StartList(u32::MAX));
426 DecoderState::List(idx)
427 }
428 b'{' => {
429 let idx = self.elements.len() as u32;
430 self.elements.push(TapeElement::StartObject(u32::MAX));
431 DecoderState::Object(idx)
432 }
433 b => return Err(err(b, "parsing value")),
434 };
435 }
436 DecoderState::Number => {
437 let s = iter.advance_until(|b| {
438 !matches!(b, b'0'..=b'9' | b'-' | b'+' | b'.' | b'e' | b'E')
439 });
440 self.bytes.extend_from_slice(s);
441
442 if !iter.is_empty() {
443 self.stack.pop();
444 let idx = self.offsets.len() - 1;
445 self.elements.push(TapeElement::Number(idx as _));
446 self.offsets.push(self.bytes.len());
447 }
448 }
449 DecoderState::Colon => {
450 iter.skip_whitespace();
451 match next!(iter) {
452 b':' => self.stack.pop(),
453 b => return Err(err(b, "parsing colon")),
454 };
455 }
456 DecoderState::Literal(literal, idx) => {
457 let bytes = literal.bytes();
458 let expected = bytes.iter().skip(*idx as usize).copied();
459 for (expected, b) in expected.zip(&mut iter) {
460 match b == expected {
461 true => *idx += 1,
462 false => return Err(err(b, "parsing literal")),
463 }
464 }
465 if *idx == bytes.len() as u8 {
466 let element = literal.element();
467 self.stack.pop();
468 self.elements.push(element);
469 }
470 }
471 DecoderState::Escape => {
472 let v = match next!(iter) {
473 b'u' => {
474 self.stack.pop();
475 self.stack.push(DecoderState::Unicode(0, 0, 0));
476 continue;
477 }
478 b'"' => b'"',
479 b'\\' => b'\\',
480 b'/' => b'/',
481 b'b' => 8, b'f' => 12, b'n' => b'\n',
484 b'r' => b'\r',
485 b't' => b'\t',
486 b => return Err(err(b, "parsing escape sequence")),
487 };
488
489 self.stack.pop();
490 self.bytes.push(v);
491 }
492 DecoderState::Unicode(high, low, idx) => loop {
494 match *idx {
495 0..=3 => *high = (*high << 4) | parse_hex(next!(iter))? as u16,
496 4 => {
497 if let Some(c) = char::from_u32(*high as u32) {
498 write_char(c, &mut self.bytes);
499 self.stack.pop();
500 break;
501 }
502
503 match next!(iter) {
504 b'\\' => {}
505 b => return Err(err(b, "parsing surrogate pair escape")),
506 }
507 }
508 5 => match next!(iter) {
509 b'u' => {}
510 b => return Err(err(b, "parsing surrogate pair unicode")),
511 },
512 6..=9 => *low = (*low << 4) | parse_hex(next!(iter))? as u16,
513 _ => {
514 let c = char_from_surrogate_pair(*low, *high)?;
515 write_char(c, &mut self.bytes);
516 self.stack.pop();
517 break;
518 }
519 }
520 *idx += 1;
521 },
522 }
523 }
524
525 Ok(buf.len() - iter.len())
526 }
527
528 pub fn serialize<S: Serialize>(&mut self, rows: &[S]) -> Result<(), ArrowError> {
530 if let Some(b) = self.stack.last() {
531 return Err(ArrowError::JsonError(format!(
532 "Cannot serialize to tape containing partial decode state {}",
533 b.as_str()
534 )));
535 }
536
537 let mut serializer =
538 TapeSerializer::new(&mut self.elements, &mut self.bytes, &mut self.offsets);
539
540 rows.iter()
541 .try_for_each(|row| row.serialize(&mut serializer))
542 .map_err(|e| ArrowError::JsonError(e.to_string()))?;
543
544 self.cur_row += rows.len();
545
546 Ok(())
547 }
548
549 pub fn num_buffered_rows(&self) -> usize {
551 self.cur_row
552 }
553
554 pub fn has_partial_row(&self) -> bool {
557 !self.stack.is_empty()
558 }
559
560 pub fn finish(&self) -> Result<Tape<'_>, ArrowError> {
562 if let Some(b) = self.stack.last() {
563 return Err(ArrowError::JsonError(format!(
564 "Truncated record whilst reading {}",
565 b.as_str()
566 )));
567 }
568
569 if self.offsets.len() >= u32::MAX as usize {
570 return Err(ArrowError::JsonError(format!("Encountered more than {} bytes of string data, consider using a smaller batch size", u32::MAX)));
571 }
572
573 if self.offsets.len() >= u32::MAX as usize {
574 return Err(ArrowError::JsonError(format!(
575 "Encountered more than {} JSON elements, consider using a smaller batch size",
576 u32::MAX
577 )));
578 }
579
580 assert_eq!(
582 self.offsets.last().copied().unwrap_or_default(),
583 self.bytes.len()
584 );
585
586 let strings = simdutf8::basic::from_utf8(&self.bytes)
587 .map_err(|_| ArrowError::JsonError("Encountered non-UTF-8 data".to_string()))?;
588
589 for offset in self.offsets.iter().copied() {
590 if !strings.is_char_boundary(offset) {
591 return Err(ArrowError::JsonError(
592 "Encountered truncated UTF-8 sequence".to_string(),
593 ));
594 }
595 }
596
597 Ok(Tape {
598 strings,
599 elements: &self.elements,
600 string_offsets: &self.offsets,
601 num_rows: self.cur_row,
602 })
603 }
604
605 pub fn clear(&mut self) {
607 assert!(self.stack.is_empty());
608
609 self.cur_row = 0;
610 self.bytes.clear();
611 self.elements.clear();
612 self.elements.push(TapeElement::Null);
613 self.offsets.clear();
614 self.offsets.push(0);
615 }
616}
617
618struct BufIter<'a> {
620 buf: &'a [u8],
621 pos: usize,
622}
623
624impl<'a> BufIter<'a> {
625 fn new(buf: &'a [u8]) -> Self {
626 Self { buf, pos: 0 }
627 }
628
629 #[inline]
630 fn as_slice(&self) -> &'a [u8] {
631 &self.buf[self.pos..]
632 }
633
634 #[inline]
635 fn is_empty(&self) -> bool {
636 self.pos >= self.buf.len()
637 }
638
639 fn peek(&self) -> Option<u8> {
640 self.buf.get(self.pos).copied()
641 }
642
643 #[inline]
644 fn advance(&mut self, skip: usize) {
645 self.pos += skip;
646 }
647
648 fn advance_until<F: FnMut(u8) -> bool>(&mut self, f: F) -> &[u8] {
649 let s = self.as_slice();
650 match s.iter().copied().position(f) {
651 Some(x) => {
652 self.advance(x);
653 &s[..x]
654 }
655 None => {
656 self.advance(s.len());
657 s
658 }
659 }
660 }
661
662 fn skip_chrs(&mut self, c1: u8, c2: u8) -> &[u8] {
663 let s = self.as_slice();
664 match memchr2(c1, c2, s) {
665 Some(p) => {
666 self.advance(p);
667 &s[..p]
668 }
669 None => {
670 self.advance(s.len());
671 s
672 }
673 }
674 }
675
676 fn skip_whitespace(&mut self) {
677 self.advance_until(|b| !json_whitespace(b));
678 }
679}
680
681impl Iterator for BufIter<'_> {
682 type Item = u8;
683
684 fn next(&mut self) -> Option<Self::Item> {
685 let b = self.peek();
686 self.pos += 1;
687 b
688 }
689
690 fn size_hint(&self) -> (usize, Option<usize>) {
691 let s = self.buf.len().checked_sub(self.pos).unwrap_or_default();
692 (s, Some(s))
693 }
694}
695
696impl ExactSizeIterator for BufIter<'_> {}
697
698fn err(b: u8, ctx: &str) -> ArrowError {
700 ArrowError::JsonError(format!(
701 "Encountered unexpected '{}' whilst {ctx}",
702 b as char
703 ))
704}
705
706fn char_from_surrogate_pair(low: u16, high: u16) -> Result<char, ArrowError> {
708 match (low, high) {
709 (0xDC00..=0xDFFF, 0xD800..=0xDBFF) => {
710 let n = (((high - 0xD800) as u32) << 10) | ((low - 0xDC00) as u32 + 0x1_0000);
711 char::from_u32(n)
712 .ok_or_else(|| ArrowError::JsonError(format!("Invalid UTF-16 surrogate pair {n}")))
713 }
714 _ => Err(ArrowError::JsonError(format!(
715 "Invalid UTF-16 surrogate pair. High: {high:#02X}, Low: {low:#02X}"
716 ))),
717 }
718}
719
720fn write_char(c: char, out: &mut Vec<u8>) {
722 let mut t = [0; 4];
723 out.extend_from_slice(c.encode_utf8(&mut t).as_bytes());
724}
725
726#[inline]
728fn json_whitespace(b: u8) -> bool {
729 matches!(b, b' ' | b'\n' | b'\r' | b'\t')
730}
731
732fn parse_hex(b: u8) -> Result<u8, ArrowError> {
734 let digit = char::from(b)
735 .to_digit(16)
736 .ok_or_else(|| err(b, "unicode escape"))?;
737 Ok(digit as u8)
738}
739
740#[cfg(test)]
741mod tests {
742 use super::*;
743
744 #[test]
745 fn test_sizes() {
746 assert_eq!(std::mem::size_of::<DecoderState>(), 8);
747 assert_eq!(std::mem::size_of::<TapeElement>(), 8);
748 }
749
750 #[test]
751 fn test_basic() {
752 let a = r#"
753 {"hello": "world", "foo": 2, "bar": 45}
754
755 {"foo": "bar"}
756
757 {"fiz": null}
758
759 {"a": true, "b": false, "c": null}
760
761 {"a": "", "": "a"}
762
763 {"a": "b", "object": {"nested": "hello", "foo": 23}, "b": {}, "c": {"foo": null }}
764
765 {"a": ["", "foo", ["bar", "c"]], "b": {"1": []}, "c": {"2": [1, 2, 3]} }
766 "#;
767 let mut decoder = TapeDecoder::new(16, 2);
768 decoder.decode(a.as_bytes()).unwrap();
769 assert!(!decoder.has_partial_row());
770 assert_eq!(decoder.num_buffered_rows(), 7);
771
772 let finished = decoder.finish().unwrap();
773 assert!(!decoder.has_partial_row());
774 assert_eq!(decoder.num_buffered_rows(), 7); assert_eq!(
776 finished.elements,
777 &[
778 TapeElement::Null,
779 TapeElement::StartObject(8), TapeElement::String(0), TapeElement::String(1), TapeElement::String(2), TapeElement::Number(3), TapeElement::String(4), TapeElement::Number(5), TapeElement::EndObject(1),
787 TapeElement::StartObject(12), TapeElement::String(6), TapeElement::String(7), TapeElement::EndObject(9),
791 TapeElement::StartObject(16), TapeElement::String(8), TapeElement::Null, TapeElement::EndObject(13),
795 TapeElement::StartObject(24), TapeElement::String(9), TapeElement::True, TapeElement::String(10), TapeElement::False, TapeElement::String(11), TapeElement::Null, TapeElement::EndObject(17),
803 TapeElement::StartObject(30), TapeElement::String(12), TapeElement::String(13), TapeElement::String(14), TapeElement::String(15), TapeElement::EndObject(25),
809 TapeElement::StartObject(49), TapeElement::String(16), TapeElement::String(17), TapeElement::String(18), TapeElement::StartObject(40), TapeElement::String(19), TapeElement::String(20), TapeElement::String(21), TapeElement::Number(22), TapeElement::EndObject(35),
819 TapeElement::String(23), TapeElement::StartObject(43), TapeElement::EndObject(42),
822 TapeElement::String(24), TapeElement::StartObject(48), TapeElement::String(25), TapeElement::Null, TapeElement::EndObject(45),
827 TapeElement::EndObject(31),
828 TapeElement::StartObject(75), TapeElement::String(26), TapeElement::StartList(59), TapeElement::String(27), TapeElement::String(28), TapeElement::StartList(58), TapeElement::String(29), TapeElement::String(30), TapeElement::EndList(55),
837 TapeElement::EndList(52),
838 TapeElement::String(31), TapeElement::StartObject(65), TapeElement::String(32), TapeElement::StartList(64), TapeElement::EndList(63),
843 TapeElement::EndObject(61),
844 TapeElement::String(33), TapeElement::StartObject(74), TapeElement::String(34), TapeElement::StartList(73), TapeElement::Number(35), TapeElement::Number(36), TapeElement::Number(37), TapeElement::EndList(69),
852 TapeElement::EndObject(67),
853 TapeElement::EndObject(50)
854 ]
855 );
856
857 assert_eq!(
858 finished.strings,
859 "helloworldfoo2bar45foobarfizabcaaabobjectnestedhellofoo23bcfooafoobarcb1c2123"
860 );
861 assert_eq!(
862 &finished.string_offsets,
863 &[
864 0, 5, 10, 13, 14, 17, 19, 22, 25, 28, 29, 30, 31, 32, 32, 32, 33, 34, 35, 41, 47,
865 52, 55, 57, 58, 59, 62, 63, 63, 66, 69, 70, 71, 72, 73, 74, 75, 76, 77
866 ]
867 );
868
869 decoder.clear();
870 assert!(!decoder.has_partial_row());
871 assert_eq!(decoder.num_buffered_rows(), 0);
872 }
873
874 #[test]
875 fn test_invalid() {
876 let mut decoder = TapeDecoder::new(16, 2);
878 let err = decoder.decode(b"hello").unwrap_err().to_string();
879 assert_eq!(
880 err,
881 "Json error: Encountered unexpected 'h' whilst parsing value"
882 );
883
884 let mut decoder = TapeDecoder::new(16, 2);
885 let err = decoder.decode(b"{\"hello\": }").unwrap_err().to_string();
886 assert_eq!(
887 err,
888 "Json error: Encountered unexpected '}' whilst parsing value"
889 );
890
891 let mut decoder = TapeDecoder::new(16, 2);
892 let err = decoder
893 .decode(b"{\"hello\": [ false, tru ]}")
894 .unwrap_err()
895 .to_string();
896 assert_eq!(
897 err,
898 "Json error: Encountered unexpected ' ' whilst parsing literal"
899 );
900
901 let mut decoder = TapeDecoder::new(16, 2);
902 let err = decoder
903 .decode(b"{\"hello\": \"\\ud8\"}")
904 .unwrap_err()
905 .to_string();
906 assert_eq!(
907 err,
908 "Json error: Encountered unexpected '\"' whilst unicode escape"
909 );
910
911 let mut decoder = TapeDecoder::new(16, 2);
913 let err = decoder
914 .decode(b"{\"hello\": \"\\ud83d\"}")
915 .unwrap_err()
916 .to_string();
917 assert_eq!(
918 err,
919 "Json error: Encountered unexpected '\"' whilst parsing surrogate pair escape"
920 );
921
922 let mut decoder = TapeDecoder::new(16, 2);
924 decoder.decode(b"{\"he").unwrap();
925 assert!(decoder.has_partial_row());
926 assert_eq!(decoder.num_buffered_rows(), 1);
927 let err = decoder.finish().unwrap_err().to_string();
928 assert_eq!(err, "Json error: Truncated record whilst reading string");
929
930 let mut decoder = TapeDecoder::new(16, 2);
931 decoder.decode(b"{\"hello\" : ").unwrap();
932 let err = decoder.finish().unwrap_err().to_string();
933 assert_eq!(err, "Json error: Truncated record whilst reading value");
934
935 let mut decoder = TapeDecoder::new(16, 2);
936 decoder.decode(b"{\"hello\" : [").unwrap();
937 let err = decoder.finish().unwrap_err().to_string();
938 assert_eq!(err, "Json error: Truncated record whilst reading list");
939
940 let mut decoder = TapeDecoder::new(16, 2);
941 decoder.decode(b"{\"hello\" : tru").unwrap();
942 let err = decoder.finish().unwrap_err().to_string();
943 assert_eq!(err, "Json error: Truncated record whilst reading true");
944
945 let mut decoder = TapeDecoder::new(16, 2);
946 decoder.decode(b"{\"hello\" : nu").unwrap();
947 let err = decoder.finish().unwrap_err().to_string();
948 assert_eq!(err, "Json error: Truncated record whilst reading null");
949
950 let mut decoder = TapeDecoder::new(16, 2);
952 decoder.decode(b"{\"hello\" : \"world\xFF\"}").unwrap();
953 let err = decoder.finish().unwrap_err().to_string();
954 assert_eq!(err, "Json error: Encountered non-UTF-8 data");
955
956 let mut decoder = TapeDecoder::new(16, 2);
957 decoder.decode(b"{\"\xe2\" : \"\x96\xa1\"}").unwrap();
958 let err = decoder.finish().unwrap_err().to_string();
959 assert_eq!(err, "Json error: Encountered truncated UTF-8 sequence");
960 }
961
962 #[test]
963 fn test_invalid_surrogates() {
964 let mut decoder = TapeDecoder::new(16, 2);
965 let res = decoder.decode(b"{\"test\": \"\\ud800\\ud801\"}");
966 assert!(res.is_err());
967
968 let mut decoder = TapeDecoder::new(16, 2);
969 let res = decoder.decode(b"{\"test\": \"\\udc00\\udc01\"}");
970 assert!(res.is_err());
971 }
972}