Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add text parser. #11

Merged
merged 15 commits into from
Mar 10, 2020
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,7 @@ Cargo.lock

# Intellij Files
.idea/
serde_ion.iml
serde_ion.iml

# VSCode Files
.vscode/
9 changes: 6 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,25 @@ travis-ci = { repository = "PeytonT/serde_ion", branch = "master" }

[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_bytes = "0.10"
serde_bytes = "0.11"
serde_derive = "1.0"
num-bigint = "0.2.2"
num-traits = "0.2.8"
num-derive = "0.3"
bit-vec = "0.6"
base64 = "0.10.1"
base64 = "0.11"
nom = "5.0.0"
thiserror = "1.0.9"
lazy_static = "1.4.0"
itertools = "0.8.2"

log = "0.4.8"
time = "0.2.6"
lexical-core = "0.7.4"

[dev-dependencies]
hex = "0.4.0"
pretty_assertions = "0.6.1"
pretty_env_logger = "0.4.0"

[build-dependencies]

Expand Down
55 changes: 53 additions & 2 deletions src/error.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use num_bigint::BigInt;
use thiserror::Error;

pub type Result<T> = std::result::Result<T, Error>;
Expand All @@ -23,6 +24,14 @@ pub enum SymbolError {
UnknownSymbolText(usize),
#[error("the text for SID `{0}` is undefined")]
UndefinedSymbolText(usize),
#[error("the provided symbol table is invalid")]
InvalidSymbolTable,
#[error("invalid max_id for import in symbol table: {0:?}")]
InvalidMaxId(String),
#[error("unsupported version for import in symbol table: {0:?}")]
UnsupportedVersion(String),
#[error("invalid SID (outside numeric range): {0:?}")]
SidTooLarge(String),
}

#[derive(Error, Debug, PartialEq)]
Expand Down Expand Up @@ -59,10 +68,52 @@ pub enum BinaryFormatError {
StructUnordered,
#[error("invalid local symbol table")]
LocalTable,
#[error("time component out of range: {0} - {1}")]
TimeComponentRange(TimeComponent, BigInt),
}

#[derive(Error, Debug, PartialEq)]
pub enum TimeComponent {
#[error("offset")]
Offset,
#[error("year")]
Year,
#[error("month")]
Month,
#[error("day")]
Day,
#[error("hour")]
Hour,
#[error("minute")]
Minute,
#[error("second")]
Second,
#[error("fraction")]
Fraction,
}

#[derive(Error, Debug, PartialEq)]
pub enum TextFormatError {
#[error("TODO")]
TODO,
#[error("invalid hex escape: {0}")]
HexEscape(String),
#[error("unterminated short quoted string")]
OpenShortString,
#[error("unterminated long quoted string")]
OpenLongString,
#[error("invalid biguint: {0}")]
BigUint(String),
#[error("invalid bigint: {0}")]
BigInt(String),
#[error("unable to decode Base64 value")]
Base64Decode,
#[error("unable to parse float value: {0}")]
FloatParse(String),
#[error("date out of range (invalid day)")]
DateOutOfRange,
#[error("Ion Version Marker indicates an unsupported version of Ion: {0}.{1}")]
UnsupportedVersion(u32, u32),
#[error("Ion Version Marker could not be parsed (int component too big)")]
IvmParseError,
#[error("Date is too imprecise for time value presence")]
ImpreciseDate,
}
68 changes: 65 additions & 3 deletions src/parser/combinators.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
use crate::parser::parse_error::{IonError, IonResult};
use nom::error::ParseError;
use nom::InputLength;
use nom::{error::ErrorKind, Err};
use nom::{
error::{ErrorKind, ParseError},
AsBytes, AsChar, Err, IResult, InputIter, InputLength, Slice,
};
use std::{
fmt::Debug,
ops::{RangeFrom, RangeTo},
};

/// A collection of parser combinators for building Ion parsers. Mostly forked from nom for various reasons.
/// FIXME: Modifying code from nom like this is unfortunate, and hopefully at some point will be unnecessary.
Expand Down Expand Up @@ -77,3 +82,60 @@ where
Ok((input, second(o1)))
}
}

/// Consumes end of input, or errors if there is more data.
pub fn eof(i: &str) -> IonResult<&str, &str> {
if i.is_empty() {
Ok((i, i))
} else {
Err(Err::Error(IonError::from_error_kind(i, ErrorKind::Eof)))
}
}

/// Takes one element from input if it matches predicate f
pub fn one_if<I, F, Error: ParseError<I>>(
f: F,
) -> impl Fn(I) -> IResult<I, <I as InputIter>::Item, Error>
where
I: Slice<RangeFrom<usize>> + InputIter,
<I as InputIter>::Item: AsChar + Copy,
F: Fn(<I as InputIter>::Item) -> bool,
{
move |i: I| match (i).iter_elements().next().filter(|c| f(*c)) {
Some(c) => Ok((i.slice(c.len()..), c)),
None => Err(Err::Error(Error::from_error_kind(i, ErrorKind::OneOf))),
}
}

#[allow(dead_code)]
/// A helper method for debugging the text parser.
/// Displays parser input and output (whether the output is an error or a successfully created object)
pub(crate) fn dbg_dmp<Input, F, Output>(
context: &'static str,
f: F,
) -> impl Fn(Input) -> IonResult<Input, Output>
where
Input: Clone + Slice<RangeTo<usize>> + AsBytes + Debug + InputLength,
Output: Debug,
F: Fn(Input) -> IonResult<Input, Output>,
{
move |i: Input| {
log::debug!(" {}: -> {:?}", context, &i);
match f(i.clone()) {
Err(e) => {
match &e {
Err::Failure(e) => {
log::debug!("{}: Failure({:?}) at: {:?}", context, e.kind, &i)
}
Err::Error(e) => log::debug!("{}: Error({:?}) at: {:?}", context, e.kind, &i),
Err::Incomplete(n) => log::debug!("{}: Err::Incomplete({:?}) at:", context, n),
}
Err(e)
}
Ok((i, v)) => {
log::debug!(" {}: <- {:?}", context, &v);
Ok((i, v))
}
}
}
}
120 changes: 0 additions & 120 deletions src/parser/constants.rs

This file was deleted.

Loading