pub struct ReaderBuilder {
schema: SchemaRef,
format: Format,
batch_size: usize,
bounds: Option<(usize, usize)>,
projection: Option<Vec<usize>>,
}
Expand description
CSV file reader builder
Fields§
§schema: SchemaRef
Schema of the CSV file
format: Format
Format of the CSV file
batch_size: usize
Batch size (number of records to load each time)
The default batch size when using the ReaderBuilder
is 1024 records
bounds: Option<(usize, usize)>
The bounds over which to scan the reader. None
starts from 0 and runs until EOF.
projection: Option<Vec<usize>>
Optional projection for which columns to load (zero-based column indices)
Implementations§
Source§impl ReaderBuilder
impl ReaderBuilder
Sourcepub fn new(schema: SchemaRef) -> ReaderBuilder
pub fn new(schema: SchemaRef) -> ReaderBuilder
Create a new builder for configuring CSV parsing options.
To convert a builder into a reader, call ReaderBuilder::build
§Example
let mut file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
// Infer the schema with the first 100 records
let (schema, _) = Format::default().infer_schema(&mut file, Some(100)).unwrap();
file.rewind().unwrap();
// create a builder
ReaderBuilder::new(Arc::new(schema)).build(file).unwrap();
Sourcepub fn with_header(self, has_header: bool) -> Self
pub fn with_header(self, has_header: bool) -> Self
Set whether the CSV file has a header
Sourcepub fn with_format(self, format: Format) -> Self
pub fn with_format(self, format: Format) -> Self
Overrides the Format of this ReaderBuilder
Sourcepub fn with_delimiter(self, delimiter: u8) -> Self
pub fn with_delimiter(self, delimiter: u8) -> Self
Set the CSV file’s column delimiter as a byte character
Sourcepub fn with_escape(self, escape: u8) -> Self
pub fn with_escape(self, escape: u8) -> Self
Set the given character as the CSV file’s escape character
Sourcepub fn with_quote(self, quote: u8) -> Self
pub fn with_quote(self, quote: u8) -> Self
Set the given character as the CSV file’s quote character, by default it is double quote
Sourcepub fn with_terminator(self, terminator: u8) -> Self
pub fn with_terminator(self, terminator: u8) -> Self
Provide a custom terminator character, defaults to CRLF
Sourcepub fn with_comment(self, comment: u8) -> Self
pub fn with_comment(self, comment: u8) -> Self
Provide a comment character, lines starting with this character will be ignored
Sourcepub fn with_null_regex(self, null_regex: Regex) -> Self
pub fn with_null_regex(self, null_regex: Regex) -> Self
Provide a regex to match null values, defaults to ^$
Sourcepub fn with_batch_size(self, batch_size: usize) -> Self
pub fn with_batch_size(self, batch_size: usize) -> Self
Set the batch size (number of records to load at one time)
Sourcepub fn with_bounds(self, start: usize, end: usize) -> Self
pub fn with_bounds(self, start: usize, end: usize) -> Self
Set the bounds over which to scan the reader.
start
and end
are line numbers.
Sourcepub fn with_projection(self, projection: Vec<usize>) -> Self
pub fn with_projection(self, projection: Vec<usize>) -> Self
Set the reader’s column projection
Sourcepub fn with_truncated_rows(self, allow: bool) -> Self
pub fn with_truncated_rows(self, allow: bool) -> Self
Whether to allow truncated rows when parsing.
By default this is set to false
and will error if the CSV rows have different lengths.
When set to true then it will allow records with less than the expected number of columns
and fill the missing columns with nulls. If the record’s schema is not nullable, then it
will still return an error.
Sourcepub fn build<R: Read>(self, reader: R) -> Result<Reader<R>, ArrowError>
pub fn build<R: Read>(self, reader: R) -> Result<Reader<R>, ArrowError>
Create a new Reader
from a non-buffered reader
If R: BufRead
consider using Self::build_buffered
to avoid unnecessary additional
buffering, as internally this method wraps reader
in std::io::BufReader
Sourcepub fn build_buffered<R: BufRead>(
self,
reader: R,
) -> Result<BufReader<R>, ArrowError>
pub fn build_buffered<R: BufRead>( self, reader: R, ) -> Result<BufReader<R>, ArrowError>
Create a new BufReader
from a buffered reader
Sourcepub fn build_decoder(self) -> Decoder
pub fn build_decoder(self) -> Decoder
Builds a decoder that can be used to decode CSV from an arbitrary byte stream