arrow_csv::reader

Struct ReaderBuilder

Source
pub struct ReaderBuilder {
    schema: SchemaRef,
    format: Format,
    batch_size: usize,
    bounds: Option<(usize, usize)>,
    projection: Option<Vec<usize>>,
}
Expand description

CSV file reader builder

Fields§

§schema: SchemaRef

Schema of the CSV file

§format: Format

Format of the CSV file

§batch_size: usize

Batch size (number of records to load each time)

The default batch size when using the ReaderBuilder is 1024 records

§bounds: Option<(usize, usize)>

The bounds over which to scan the reader. None starts from 0 and runs until EOF.

§projection: Option<Vec<usize>>

Optional projection for which columns to load (zero-based column indices)

Implementations§

Source§

impl ReaderBuilder

Source

pub fn new(schema: SchemaRef) -> ReaderBuilder

Create a new builder for configuring CSV parsing options.

To convert a builder into a reader, call ReaderBuilder::build

§Example
let mut file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
// Infer the schema with the first 100 records
let (schema, _) = Format::default().infer_schema(&mut file, Some(100)).unwrap();
file.rewind().unwrap();

// create a builder
ReaderBuilder::new(Arc::new(schema)).build(file).unwrap();
Source

pub fn with_header(self, has_header: bool) -> Self

Set whether the CSV file has a header

Source

pub fn with_format(self, format: Format) -> Self

Overrides the Format of this ReaderBuilder

Source

pub fn with_delimiter(self, delimiter: u8) -> Self

Set the CSV file’s column delimiter as a byte character

Source

pub fn with_escape(self, escape: u8) -> Self

Set the given character as the CSV file’s escape character

Source

pub fn with_quote(self, quote: u8) -> Self

Set the given character as the CSV file’s quote character, by default it is double quote

Source

pub fn with_terminator(self, terminator: u8) -> Self

Provide a custom terminator character, defaults to CRLF

Source

pub fn with_comment(self, comment: u8) -> Self

Provide a comment character, lines starting with this character will be ignored

Source

pub fn with_null_regex(self, null_regex: Regex) -> Self

Provide a regex to match null values, defaults to ^$

Source

pub fn with_batch_size(self, batch_size: usize) -> Self

Set the batch size (number of records to load at one time)

Source

pub fn with_bounds(self, start: usize, end: usize) -> Self

Set the bounds over which to scan the reader. start and end are line numbers.

Source

pub fn with_projection(self, projection: Vec<usize>) -> Self

Set the reader’s column projection

Source

pub fn with_truncated_rows(self, allow: bool) -> Self

Whether to allow truncated rows when parsing.

By default this is set to false and will error if the CSV rows have different lengths. When set to true then it will allow records with less than the expected number of columns and fill the missing columns with nulls. If the record’s schema is not nullable, then it will still return an error.

Source

pub fn build<R: Read>(self, reader: R) -> Result<Reader<R>, ArrowError>

Create a new Reader from a non-buffered reader

If R: BufRead consider using Self::build_buffered to avoid unnecessary additional buffering, as internally this method wraps reader in std::io::BufReader

Source

pub fn build_buffered<R: BufRead>( self, reader: R, ) -> Result<BufReader<R>, ArrowError>

Create a new BufReader from a buffered reader

Source

pub fn build_decoder(self) -> Decoder

Builds a decoder that can be used to decode CSV from an arbitrary byte stream

Trait Implementations§

Source§

impl Debug for ReaderBuilder

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
§

impl<T> Allocation for T
where T: RefUnwindSafe + Send + Sync,