1use crate::reader::serializer::TapeSerializer;
19use arrow_schema::ArrowError;
20use memchr::memchr2;
21use serde::Serialize;
22use std::fmt::Write;
23
24#[derive(Debug, Copy, Clone, PartialEq, Eq)]
34pub enum TapeElement {
35 StartObject(u32),
39 EndObject(u32),
43 StartList(u32),
47 EndList(u32),
51 String(u32),
55 Number(u32),
59
60 I64(i32),
64
65 I32(i32),
69
70 F64(u32),
74
75 F32(u32),
77
78 True,
80 False,
82 Null,
84}
85
86#[derive(Debug)]
96pub struct Tape<'a> {
97 elements: &'a [TapeElement],
98 strings: &'a str,
99 string_offsets: &'a [usize],
100 num_rows: usize,
101}
102
103impl<'a> Tape<'a> {
104 #[inline]
106 pub fn get_string(&self, idx: u32) -> &'a str {
107 let end_offset = self.string_offsets[idx as usize + 1];
108 let start_offset = self.string_offsets[idx as usize];
109 unsafe { self.strings.get_unchecked(start_offset..end_offset) }
112 }
113
114 pub fn get(&self, idx: u32) -> TapeElement {
116 self.elements[idx as usize]
117 }
118
119 pub fn next(&self, cur_idx: u32, expected: &str) -> Result<u32, ArrowError> {
123 match self.get(cur_idx) {
124 TapeElement::String(_)
125 | TapeElement::Number(_)
126 | TapeElement::True
127 | TapeElement::False
128 | TapeElement::Null
129 | TapeElement::I32(_)
130 | TapeElement::F32(_) => Ok(cur_idx + 1),
131 TapeElement::I64(_) | TapeElement::F64(_) => Ok(cur_idx + 2),
132 TapeElement::StartList(end_idx) => Ok(end_idx + 1),
133 TapeElement::StartObject(end_idx) => Ok(end_idx + 1),
134 TapeElement::EndObject(_) | TapeElement::EndList(_) => {
135 Err(self.error(cur_idx, expected))
136 }
137 }
138 }
139
140 pub fn num_rows(&self) -> usize {
142 self.num_rows
143 }
144
145 fn serialize(&self, out: &mut String, idx: u32) -> u32 {
147 match self.get(idx) {
148 TapeElement::StartObject(end) => {
149 out.push('{');
150 let mut cur_idx = idx + 1;
151 while cur_idx < end {
152 cur_idx = self.serialize(out, cur_idx);
153 out.push_str(": ");
154 cur_idx = self.serialize(out, cur_idx);
155 }
156 out.push('}');
157 return end + 1;
158 }
159 TapeElement::EndObject(_) => out.push('}'),
160 TapeElement::StartList(end) => {
161 out.push('[');
162 let mut cur_idx = idx + 1;
163 while cur_idx < end {
164 cur_idx = self.serialize(out, cur_idx);
165 if cur_idx < end {
166 out.push_str(", ");
167 }
168 }
169 out.push(']');
170 return end + 1;
171 }
172 TapeElement::EndList(_) => out.push(']'),
173 TapeElement::String(s) => {
174 out.push('"');
175 out.push_str(self.get_string(s));
176 out.push('"')
177 }
178 TapeElement::Number(n) => out.push_str(self.get_string(n)),
179 TapeElement::True => out.push_str("true"),
180 TapeElement::False => out.push_str("false"),
181 TapeElement::Null => out.push_str("null"),
182 TapeElement::I64(high) => match self.get(idx + 1) {
183 TapeElement::I32(low) => {
184 let val = ((high as i64) << 32) | (low as u32) as i64;
185 let _ = write!(out, "{val}");
186 return idx + 2;
187 }
188 _ => unreachable!(),
189 },
190 TapeElement::I32(val) => {
191 let _ = write!(out, "{val}");
192 }
193 TapeElement::F64(high) => match self.get(idx + 1) {
194 TapeElement::F32(low) => {
195 let val = f64::from_bits(((high as u64) << 32) | low as u64);
196 let _ = write!(out, "{val}");
197 return idx + 2;
198 }
199 _ => unreachable!(),
200 },
201 TapeElement::F32(val) => {
202 let _ = write!(out, "{}", f32::from_bits(val));
203 }
204 }
205 idx + 1
206 }
207
208 pub fn error(&self, idx: u32, expected: &str) -> ArrowError {
210 let mut out = String::with_capacity(64);
211 self.serialize(&mut out, idx);
212 ArrowError::JsonError(format!("expected {expected} got {out}"))
213 }
214}
215
216#[derive(Debug, Copy, Clone)]
218enum DecoderState {
219 Object(u32),
223 List(u32),
227 String,
228 Value,
229 Number,
230 Colon,
231 Escape,
232 Unicode(u16, u16, u8),
236 Literal(Literal, u8),
240}
241
242impl DecoderState {
243 fn as_str(&self) -> &'static str {
244 match self {
245 DecoderState::Object(_) => "object",
246 DecoderState::List(_) => "list",
247 DecoderState::String => "string",
248 DecoderState::Value => "value",
249 DecoderState::Number => "number",
250 DecoderState::Colon => "colon",
251 DecoderState::Escape => "escape",
252 DecoderState::Unicode(_, _, _) => "unicode literal",
253 DecoderState::Literal(d, _) => d.as_str(),
254 }
255 }
256}
257
258#[derive(Debug, Copy, Clone)]
259enum Literal {
260 Null,
261 True,
262 False,
263}
264
265impl Literal {
266 fn element(&self) -> TapeElement {
267 match self {
268 Literal::Null => TapeElement::Null,
269 Literal::True => TapeElement::True,
270 Literal::False => TapeElement::False,
271 }
272 }
273
274 fn as_str(&self) -> &'static str {
275 match self {
276 Literal::Null => "null",
277 Literal::True => "true",
278 Literal::False => "false",
279 }
280 }
281
282 fn bytes(&self) -> &'static [u8] {
283 self.as_str().as_bytes()
284 }
285}
286
287macro_rules! next {
289 ($next:ident) => {
290 match $next.next() {
291 Some(b) => b,
292 None => break,
293 }
294 };
295}
296
297pub struct TapeDecoder {
299 elements: Vec<TapeElement>,
300
301 cur_row: usize,
303
304 batch_size: usize,
306
307 bytes: Vec<u8>,
312
313 offsets: Vec<usize>,
315
316 stack: Vec<DecoderState>,
318}
319
320impl TapeDecoder {
321 pub fn new(batch_size: usize, num_fields: usize) -> Self {
324 let tokens_per_row = 2 + num_fields * 2;
325 let mut offsets = Vec::with_capacity(batch_size * (num_fields * 2) + 1);
326 offsets.push(0);
327
328 let mut elements = Vec::with_capacity(batch_size * tokens_per_row);
329 elements.push(TapeElement::Null);
330
331 Self {
332 offsets,
333 elements,
334 batch_size,
335 cur_row: 0,
336 bytes: Vec::with_capacity(num_fields * 2 * 8),
337 stack: Vec::with_capacity(10),
338 }
339 }
340
341 pub fn decode(&mut self, buf: &[u8]) -> Result<usize, ArrowError> {
342 let mut iter = BufIter::new(buf);
343
344 while !iter.is_empty() {
345 let state = match self.stack.last_mut() {
346 Some(l) => l,
347 None => {
348 iter.skip_whitespace();
349 if iter.is_empty() || self.cur_row >= self.batch_size {
350 break;
351 }
352
353 self.cur_row += 1;
355 self.stack.push(DecoderState::Value);
356 self.stack.last_mut().unwrap()
357 }
358 };
359
360 match state {
361 DecoderState::Object(start_idx) => {
363 iter.advance_until(|b| !json_whitespace(b) && b != b',');
364 match next!(iter) {
365 b'"' => {
366 self.stack.push(DecoderState::Value);
367 self.stack.push(DecoderState::Colon);
368 self.stack.push(DecoderState::String);
369 }
370 b'}' => {
371 let start_idx = *start_idx;
372 let end_idx = self.elements.len() as u32;
373 self.elements[start_idx as usize] = TapeElement::StartObject(end_idx);
374 self.elements.push(TapeElement::EndObject(start_idx));
375 self.stack.pop();
376 }
377 b => return Err(err(b, "parsing object")),
378 }
379 }
380 DecoderState::List(start_idx) => {
382 iter.advance_until(|b| !json_whitespace(b) && b != b',');
383 match iter.peek() {
384 Some(b']') => {
385 iter.next();
386 let start_idx = *start_idx;
387 let end_idx = self.elements.len() as u32;
388 self.elements[start_idx as usize] = TapeElement::StartList(end_idx);
389 self.elements.push(TapeElement::EndList(start_idx));
390 self.stack.pop();
391 }
392 Some(_) => self.stack.push(DecoderState::Value),
393 None => break,
394 }
395 }
396 DecoderState::String => {
398 let s = iter.skip_chrs(b'\\', b'"');
399 self.bytes.extend_from_slice(s);
400
401 match next!(iter) {
402 b'\\' => self.stack.push(DecoderState::Escape),
403 b'"' => {
404 let idx = self.offsets.len() - 1;
405 self.elements.push(TapeElement::String(idx as _));
406 self.offsets.push(self.bytes.len());
407 self.stack.pop();
408 }
409 b => unreachable!("{}", b),
410 }
411 }
412 state @ DecoderState::Value => {
413 iter.skip_whitespace();
414 *state = match next!(iter) {
415 b'"' => DecoderState::String,
416 b @ b'-' | b @ b'0'..=b'9' => {
417 self.bytes.push(b);
418 DecoderState::Number
419 }
420 b'n' => DecoderState::Literal(Literal::Null, 1),
421 b'f' => DecoderState::Literal(Literal::False, 1),
422 b't' => DecoderState::Literal(Literal::True, 1),
423 b'[' => {
424 let idx = self.elements.len() as u32;
425 self.elements.push(TapeElement::StartList(u32::MAX));
426 DecoderState::List(idx)
427 }
428 b'{' => {
429 let idx = self.elements.len() as u32;
430 self.elements.push(TapeElement::StartObject(u32::MAX));
431 DecoderState::Object(idx)
432 }
433 b => return Err(err(b, "parsing value")),
434 };
435 }
436 DecoderState::Number => {
437 let s = iter.advance_until(|b| {
438 !matches!(b, b'0'..=b'9' | b'-' | b'+' | b'.' | b'e' | b'E')
439 });
440 self.bytes.extend_from_slice(s);
441
442 if !iter.is_empty() {
443 self.stack.pop();
444 let idx = self.offsets.len() - 1;
445 self.elements.push(TapeElement::Number(idx as _));
446 self.offsets.push(self.bytes.len());
447 }
448 }
449 DecoderState::Colon => {
450 iter.skip_whitespace();
451 match next!(iter) {
452 b':' => self.stack.pop(),
453 b => return Err(err(b, "parsing colon")),
454 };
455 }
456 DecoderState::Literal(literal, idx) => {
457 let bytes = literal.bytes();
458 let expected = bytes.iter().skip(*idx as usize).copied();
459 for (expected, b) in expected.zip(&mut iter) {
460 match b == expected {
461 true => *idx += 1,
462 false => return Err(err(b, "parsing literal")),
463 }
464 }
465 if *idx == bytes.len() as u8 {
466 let element = literal.element();
467 self.stack.pop();
468 self.elements.push(element);
469 }
470 }
471 DecoderState::Escape => {
472 let v = match next!(iter) {
473 b'u' => {
474 self.stack.pop();
475 self.stack.push(DecoderState::Unicode(0, 0, 0));
476 continue;
477 }
478 b'"' => b'"',
479 b'\\' => b'\\',
480 b'/' => b'/',
481 b'b' => 8, b'f' => 12, b'n' => b'\n',
484 b'r' => b'\r',
485 b't' => b'\t',
486 b => return Err(err(b, "parsing escape sequence")),
487 };
488
489 self.stack.pop();
490 self.bytes.push(v);
491 }
492 DecoderState::Unicode(high, low, idx) => loop {
494 match *idx {
495 0..=3 => *high = (*high << 4) | parse_hex(next!(iter))? as u16,
496 4 => {
497 if let Some(c) = char::from_u32(*high as u32) {
498 write_char(c, &mut self.bytes);
499 self.stack.pop();
500 break;
501 }
502
503 match next!(iter) {
504 b'\\' => {}
505 b => return Err(err(b, "parsing surrogate pair escape")),
506 }
507 }
508 5 => match next!(iter) {
509 b'u' => {}
510 b => return Err(err(b, "parsing surrogate pair unicode")),
511 },
512 6..=9 => *low = (*low << 4) | parse_hex(next!(iter))? as u16,
513 _ => {
514 let c = char_from_surrogate_pair(*low, *high)?;
515 write_char(c, &mut self.bytes);
516 self.stack.pop();
517 break;
518 }
519 }
520 *idx += 1;
521 },
522 }
523 }
524
525 Ok(buf.len() - iter.len())
526 }
527
528 pub fn serialize<S: Serialize>(&mut self, rows: &[S]) -> Result<(), ArrowError> {
530 if let Some(b) = self.stack.last() {
531 return Err(ArrowError::JsonError(format!(
532 "Cannot serialize to tape containing partial decode state {}",
533 b.as_str()
534 )));
535 }
536
537 let mut serializer =
538 TapeSerializer::new(&mut self.elements, &mut self.bytes, &mut self.offsets);
539
540 rows.iter()
541 .try_for_each(|row| row.serialize(&mut serializer))
542 .map_err(|e| ArrowError::JsonError(e.to_string()))?;
543
544 self.cur_row += rows.len();
545
546 Ok(())
547 }
548
549 pub fn num_buffered_rows(&self) -> usize {
551 self.cur_row
552 }
553
554 pub fn has_partial_row(&self) -> bool {
557 !self.stack.is_empty()
558 }
559
560 pub fn finish(&self) -> Result<Tape<'_>, ArrowError> {
562 if let Some(b) = self.stack.last() {
563 return Err(ArrowError::JsonError(format!(
564 "Truncated record whilst reading {}",
565 b.as_str()
566 )));
567 }
568
569 if self.offsets.len() >= u32::MAX as usize {
570 return Err(ArrowError::JsonError(format!(
571 "Encountered more than {} bytes of string data, consider using a smaller batch size",
572 u32::MAX
573 )));
574 }
575
576 if self.offsets.len() >= u32::MAX as usize {
577 return Err(ArrowError::JsonError(format!(
578 "Encountered more than {} JSON elements, consider using a smaller batch size",
579 u32::MAX
580 )));
581 }
582
583 assert_eq!(
585 self.offsets.last().copied().unwrap_or_default(),
586 self.bytes.len()
587 );
588
589 let strings = simdutf8::basic::from_utf8(&self.bytes)
590 .map_err(|_| ArrowError::JsonError("Encountered non-UTF-8 data".to_string()))?;
591
592 for offset in self.offsets.iter().copied() {
593 if !strings.is_char_boundary(offset) {
594 return Err(ArrowError::JsonError(
595 "Encountered truncated UTF-8 sequence".to_string(),
596 ));
597 }
598 }
599
600 Ok(Tape {
601 strings,
602 elements: &self.elements,
603 string_offsets: &self.offsets,
604 num_rows: self.cur_row,
605 })
606 }
607
608 pub fn clear(&mut self) {
610 assert!(self.stack.is_empty());
611
612 self.cur_row = 0;
613 self.bytes.clear();
614 self.elements.clear();
615 self.elements.push(TapeElement::Null);
616 self.offsets.clear();
617 self.offsets.push(0);
618 }
619}
620
621struct BufIter<'a> {
623 buf: &'a [u8],
624 pos: usize,
625}
626
627impl<'a> BufIter<'a> {
628 fn new(buf: &'a [u8]) -> Self {
629 Self { buf, pos: 0 }
630 }
631
632 #[inline]
633 fn as_slice(&self) -> &'a [u8] {
634 &self.buf[self.pos..]
635 }
636
637 #[inline]
638 fn is_empty(&self) -> bool {
639 self.pos >= self.buf.len()
640 }
641
642 fn peek(&self) -> Option<u8> {
643 self.buf.get(self.pos).copied()
644 }
645
646 #[inline]
647 fn advance(&mut self, skip: usize) {
648 self.pos += skip;
649 }
650
651 fn advance_until<F: FnMut(u8) -> bool>(&mut self, f: F) -> &[u8] {
652 let s = self.as_slice();
653 match s.iter().copied().position(f) {
654 Some(x) => {
655 self.advance(x);
656 &s[..x]
657 }
658 None => {
659 self.advance(s.len());
660 s
661 }
662 }
663 }
664
665 fn skip_chrs(&mut self, c1: u8, c2: u8) -> &[u8] {
666 let s = self.as_slice();
667 match memchr2(c1, c2, s) {
668 Some(p) => {
669 self.advance(p);
670 &s[..p]
671 }
672 None => {
673 self.advance(s.len());
674 s
675 }
676 }
677 }
678
679 fn skip_whitespace(&mut self) {
680 self.advance_until(|b| !json_whitespace(b));
681 }
682}
683
684impl Iterator for BufIter<'_> {
685 type Item = u8;
686
687 fn next(&mut self) -> Option<Self::Item> {
688 let b = self.peek();
689 self.pos += 1;
690 b
691 }
692
693 fn size_hint(&self) -> (usize, Option<usize>) {
694 let s = self.buf.len().checked_sub(self.pos).unwrap_or_default();
695 (s, Some(s))
696 }
697}
698
699impl ExactSizeIterator for BufIter<'_> {}
700
701fn err(b: u8, ctx: &str) -> ArrowError {
703 ArrowError::JsonError(format!(
704 "Encountered unexpected '{}' whilst {ctx}",
705 b as char
706 ))
707}
708
709fn char_from_surrogate_pair(low: u16, high: u16) -> Result<char, ArrowError> {
711 match (low, high) {
712 (0xDC00..=0xDFFF, 0xD800..=0xDBFF) => {
713 let n = (((high - 0xD800) as u32) << 10) | ((low - 0xDC00) as u32 + 0x1_0000);
714 char::from_u32(n)
715 .ok_or_else(|| ArrowError::JsonError(format!("Invalid UTF-16 surrogate pair {n}")))
716 }
717 _ => Err(ArrowError::JsonError(format!(
718 "Invalid UTF-16 surrogate pair. High: {high:#02X}, Low: {low:#02X}"
719 ))),
720 }
721}
722
723fn write_char(c: char, out: &mut Vec<u8>) {
725 let mut t = [0; 4];
726 out.extend_from_slice(c.encode_utf8(&mut t).as_bytes());
727}
728
729#[inline]
731fn json_whitespace(b: u8) -> bool {
732 matches!(b, b' ' | b'\n' | b'\r' | b'\t')
733}
734
735fn parse_hex(b: u8) -> Result<u8, ArrowError> {
737 let digit = char::from(b)
738 .to_digit(16)
739 .ok_or_else(|| err(b, "unicode escape"))?;
740 Ok(digit as u8)
741}
742
743#[cfg(test)]
744mod tests {
745 use super::*;
746
747 #[test]
748 fn test_sizes() {
749 assert_eq!(std::mem::size_of::<DecoderState>(), 8);
750 assert_eq!(std::mem::size_of::<TapeElement>(), 8);
751 }
752
753 #[test]
754 fn test_basic() {
755 let a = r#"
756 {"hello": "world", "foo": 2, "bar": 45}
757
758 {"foo": "bar"}
759
760 {"fiz": null}
761
762 {"a": true, "b": false, "c": null}
763
764 {"a": "", "": "a"}
765
766 {"a": "b", "object": {"nested": "hello", "foo": 23}, "b": {}, "c": {"foo": null }}
767
768 {"a": ["", "foo", ["bar", "c"]], "b": {"1": []}, "c": {"2": [1, 2, 3]} }
769 "#;
770 let mut decoder = TapeDecoder::new(16, 2);
771 decoder.decode(a.as_bytes()).unwrap();
772 assert!(!decoder.has_partial_row());
773 assert_eq!(decoder.num_buffered_rows(), 7);
774
775 let finished = decoder.finish().unwrap();
776 assert!(!decoder.has_partial_row());
777 assert_eq!(decoder.num_buffered_rows(), 7); assert_eq!(
779 finished.elements,
780 &[
781 TapeElement::Null,
782 TapeElement::StartObject(8), TapeElement::String(0), TapeElement::String(1), TapeElement::String(2), TapeElement::Number(3), TapeElement::String(4), TapeElement::Number(5), TapeElement::EndObject(1),
790 TapeElement::StartObject(12), TapeElement::String(6), TapeElement::String(7), TapeElement::EndObject(9),
794 TapeElement::StartObject(16), TapeElement::String(8), TapeElement::Null, TapeElement::EndObject(13),
798 TapeElement::StartObject(24), TapeElement::String(9), TapeElement::True, TapeElement::String(10), TapeElement::False, TapeElement::String(11), TapeElement::Null, TapeElement::EndObject(17),
806 TapeElement::StartObject(30), TapeElement::String(12), TapeElement::String(13), TapeElement::String(14), TapeElement::String(15), TapeElement::EndObject(25),
812 TapeElement::StartObject(49), TapeElement::String(16), TapeElement::String(17), TapeElement::String(18), TapeElement::StartObject(40), TapeElement::String(19), TapeElement::String(20), TapeElement::String(21), TapeElement::Number(22), TapeElement::EndObject(35),
822 TapeElement::String(23), TapeElement::StartObject(43), TapeElement::EndObject(42),
825 TapeElement::String(24), TapeElement::StartObject(48), TapeElement::String(25), TapeElement::Null, TapeElement::EndObject(45),
830 TapeElement::EndObject(31),
831 TapeElement::StartObject(75), TapeElement::String(26), TapeElement::StartList(59), TapeElement::String(27), TapeElement::String(28), TapeElement::StartList(58), TapeElement::String(29), TapeElement::String(30), TapeElement::EndList(55),
840 TapeElement::EndList(52),
841 TapeElement::String(31), TapeElement::StartObject(65), TapeElement::String(32), TapeElement::StartList(64), TapeElement::EndList(63),
846 TapeElement::EndObject(61),
847 TapeElement::String(33), TapeElement::StartObject(74), TapeElement::String(34), TapeElement::StartList(73), TapeElement::Number(35), TapeElement::Number(36), TapeElement::Number(37), TapeElement::EndList(69),
855 TapeElement::EndObject(67),
856 TapeElement::EndObject(50)
857 ]
858 );
859
860 assert_eq!(
861 finished.strings,
862 "helloworldfoo2bar45foobarfizabcaaabobjectnestedhellofoo23bcfooafoobarcb1c2123"
863 );
864 assert_eq!(
865 &finished.string_offsets,
866 &[
867 0, 5, 10, 13, 14, 17, 19, 22, 25, 28, 29, 30, 31, 32, 32, 32, 33, 34, 35, 41, 47,
868 52, 55, 57, 58, 59, 62, 63, 63, 66, 69, 70, 71, 72, 73, 74, 75, 76, 77
869 ]
870 );
871
872 decoder.clear();
873 assert!(!decoder.has_partial_row());
874 assert_eq!(decoder.num_buffered_rows(), 0);
875 }
876
877 #[test]
878 fn test_invalid() {
879 let mut decoder = TapeDecoder::new(16, 2);
881 let err = decoder.decode(b"hello").unwrap_err().to_string();
882 assert_eq!(
883 err,
884 "Json error: Encountered unexpected 'h' whilst parsing value"
885 );
886
887 let mut decoder = TapeDecoder::new(16, 2);
888 let err = decoder.decode(b"{\"hello\": }").unwrap_err().to_string();
889 assert_eq!(
890 err,
891 "Json error: Encountered unexpected '}' whilst parsing value"
892 );
893
894 let mut decoder = TapeDecoder::new(16, 2);
895 let err = decoder
896 .decode(b"{\"hello\": [ false, tru ]}")
897 .unwrap_err()
898 .to_string();
899 assert_eq!(
900 err,
901 "Json error: Encountered unexpected ' ' whilst parsing literal"
902 );
903
904 let mut decoder = TapeDecoder::new(16, 2);
905 let err = decoder
906 .decode(b"{\"hello\": \"\\ud8\"}")
907 .unwrap_err()
908 .to_string();
909 assert_eq!(
910 err,
911 "Json error: Encountered unexpected '\"' whilst unicode escape"
912 );
913
914 let mut decoder = TapeDecoder::new(16, 2);
916 let err = decoder
917 .decode(b"{\"hello\": \"\\ud83d\"}")
918 .unwrap_err()
919 .to_string();
920 assert_eq!(
921 err,
922 "Json error: Encountered unexpected '\"' whilst parsing surrogate pair escape"
923 );
924
925 let mut decoder = TapeDecoder::new(16, 2);
927 decoder.decode(b"{\"he").unwrap();
928 assert!(decoder.has_partial_row());
929 assert_eq!(decoder.num_buffered_rows(), 1);
930 let err = decoder.finish().unwrap_err().to_string();
931 assert_eq!(err, "Json error: Truncated record whilst reading string");
932
933 let mut decoder = TapeDecoder::new(16, 2);
934 decoder.decode(b"{\"hello\" : ").unwrap();
935 let err = decoder.finish().unwrap_err().to_string();
936 assert_eq!(err, "Json error: Truncated record whilst reading value");
937
938 let mut decoder = TapeDecoder::new(16, 2);
939 decoder.decode(b"{\"hello\" : [").unwrap();
940 let err = decoder.finish().unwrap_err().to_string();
941 assert_eq!(err, "Json error: Truncated record whilst reading list");
942
943 let mut decoder = TapeDecoder::new(16, 2);
944 decoder.decode(b"{\"hello\" : tru").unwrap();
945 let err = decoder.finish().unwrap_err().to_string();
946 assert_eq!(err, "Json error: Truncated record whilst reading true");
947
948 let mut decoder = TapeDecoder::new(16, 2);
949 decoder.decode(b"{\"hello\" : nu").unwrap();
950 let err = decoder.finish().unwrap_err().to_string();
951 assert_eq!(err, "Json error: Truncated record whilst reading null");
952
953 let mut decoder = TapeDecoder::new(16, 2);
955 decoder.decode(b"{\"hello\" : \"world\xFF\"}").unwrap();
956 let err = decoder.finish().unwrap_err().to_string();
957 assert_eq!(err, "Json error: Encountered non-UTF-8 data");
958
959 let mut decoder = TapeDecoder::new(16, 2);
960 decoder.decode(b"{\"\xe2\" : \"\x96\xa1\"}").unwrap();
961 let err = decoder.finish().unwrap_err().to_string();
962 assert_eq!(err, "Json error: Encountered truncated UTF-8 sequence");
963 }
964
965 #[test]
966 fn test_invalid_surrogates() {
967 let mut decoder = TapeDecoder::new(16, 2);
968 let res = decoder.decode(b"{\"test\": \"\\ud800\\ud801\"}");
969 assert!(res.is_err());
970
971 let mut decoder = TapeDecoder::new(16, 2);
972 let res = decoder.decode(b"{\"test\": \"\\udc00\\udc01\"}");
973 assert!(res.is_err());
974 }
975}