diff --git a/.gitignore b/.gitignore index eda66b5..a1c3480 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,7 @@ Cargo.lock # Intellij Files .idea/ -serde_ion.iml \ No newline at end of file +serde_ion.iml + +# VSCode Files +.vscode/ \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index ee80c83..ecdb9fb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,22 +25,25 @@ travis-ci = { repository = "PeytonT/serde_ion", branch = "master" } [dependencies] serde = { version = "1.0", features = ["derive"] } -serde_bytes = "0.10" +serde_bytes = "0.11" serde_derive = "1.0" num-bigint = "0.2.2" num-traits = "0.2.8" num-derive = "0.3" bit-vec = "0.6" -base64 = "0.10.1" +base64 = "0.11" nom = "5.0.0" thiserror = "1.0.9" lazy_static = "1.4.0" itertools = "0.8.2" - +log = "0.4.8" +time = "0.2.6" +lexical-core = "0.7.4" [dev-dependencies] hex = "0.4.0" pretty_assertions = "0.6.1" +pretty_env_logger = "0.4.0" [build-dependencies] diff --git a/src/error.rs b/src/error.rs index 15f6dfe..303b7f1 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,3 +1,4 @@ +use num_bigint::BigInt; use thiserror::Error; pub type Result = std::result::Result; @@ -23,6 +24,14 @@ pub enum SymbolError { UnknownSymbolText(usize), #[error("the text for SID `{0}` is undefined")] UndefinedSymbolText(usize), + #[error("the provided symbol table is invalid")] + InvalidSymbolTable, + #[error("invalid max_id for import in symbol table: {0:?}")] + InvalidMaxId(String), + #[error("unsupported version for import in symbol table: {0:?}")] + UnsupportedVersion(String), + #[error("invalid SID (outside numeric range): {0:?}")] + SidTooLarge(String), } #[derive(Error, Debug, PartialEq)] @@ -59,10 +68,52 @@ pub enum BinaryFormatError { StructUnordered, #[error("invalid local symbol table")] LocalTable, + #[error("time component out of range: {0} - {1}")] + TimeComponentRange(TimeComponent, BigInt), +} + +#[derive(Error, Debug, PartialEq)] +pub enum TimeComponent { + #[error("offset")] + Offset, + #[error("year")] + Year, + #[error("month")] + Month, + #[error("day")] + Day, + #[error("hour")] + Hour, + #[error("minute")] + Minute, + #[error("second")] + Second, + #[error("fraction")] + Fraction, } #[derive(Error, Debug, PartialEq)] pub enum TextFormatError { - #[error("TODO")] - TODO, + #[error("invalid hex escape: {0}")] + HexEscape(String), + #[error("unterminated short quoted string")] + OpenShortString, + #[error("unterminated long quoted string")] + OpenLongString, + #[error("invalid biguint: {0}")] + BigUint(String), + #[error("invalid bigint: {0}")] + BigInt(String), + #[error("unable to decode Base64 value")] + Base64Decode, + #[error("unable to parse float value: {0}")] + FloatParse(String), + #[error("date out of range (invalid day)")] + DateOutOfRange, + #[error("Ion Version Marker indicates an unsupported version of Ion: {0}.{1}")] + UnsupportedVersion(u32, u32), + #[error("Ion Version Marker could not be parsed (int component too big)")] + IvmParseError, + #[error("Date is too imprecise for time value presence")] + ImpreciseDate, } diff --git a/src/parser/combinators.rs b/src/parser/combinators.rs index a056ff4..b5a56f4 100644 --- a/src/parser/combinators.rs +++ b/src/parser/combinators.rs @@ -1,7 +1,12 @@ use crate::parser::parse_error::{IonError, IonResult}; -use nom::error::ParseError; -use nom::InputLength; -use nom::{error::ErrorKind, Err}; +use nom::{ + error::{ErrorKind, ParseError}, + AsBytes, AsChar, Err, IResult, InputIter, InputLength, Slice, +}; +use std::{ + fmt::Debug, + ops::{RangeFrom, RangeTo}, +}; /// A collection of parser combinators for building Ion parsers. Mostly forked from nom for various reasons. /// FIXME: Modifying code from nom like this is unfortunate, and hopefully at some point will be unnecessary. @@ -77,3 +82,60 @@ where Ok((input, second(o1))) } } + +/// Consumes end of input, or errors if there is more data. +pub fn eof(i: &str) -> IonResult<&str, &str> { + if i.is_empty() { + Ok((i, i)) + } else { + Err(Err::Error(IonError::from_error_kind(i, ErrorKind::Eof))) + } +} + +/// Takes one element from input if it matches predicate f +pub fn one_if>( + f: F, +) -> impl Fn(I) -> IResult::Item, Error> +where + I: Slice> + InputIter, + ::Item: AsChar + Copy, + F: Fn(::Item) -> bool, +{ + move |i: I| match (i).iter_elements().next().filter(|c| f(*c)) { + Some(c) => Ok((i.slice(c.len()..), c)), + None => Err(Err::Error(Error::from_error_kind(i, ErrorKind::OneOf))), + } +} + +#[allow(dead_code)] +/// A helper method for debugging the text parser. +/// Displays parser input and output (whether the output is an error or a successfully created object) +pub(crate) fn dbg_dmp( + context: &'static str, + f: F, +) -> impl Fn(Input) -> IonResult +where + Input: Clone + Slice> + AsBytes + Debug + InputLength, + Output: Debug, + F: Fn(Input) -> IonResult, +{ + move |i: Input| { + log::debug!(" {}: -> {:?}", context, &i); + match f(i.clone()) { + Err(e) => { + match &e { + Err::Failure(e) => { + log::debug!("{}: Failure({:?}) at: {:?}", context, e.kind, &i) + } + Err::Error(e) => log::debug!("{}: Error({:?}) at: {:?}", context, e.kind, &i), + Err::Incomplete(n) => log::debug!("{}: Err::Incomplete({:?}) at:", context, n), + } + Err(e) + } + Ok((i, v)) => { + log::debug!(" {}: <- {:?}", context, &v); + Ok((i, v)) + } + } + } +} diff --git a/src/parser/constants.rs b/src/parser/constants.rs deleted file mode 100644 index 79ecc8e..0000000 --- a/src/parser/constants.rs +++ /dev/null @@ -1,120 +0,0 @@ -// An operator is an unquoted sequence of one or more of: !#%&*+-./;<=>?@^`|~ -pub enum OperatorCharacter { - PlaceHolder, - // ! - // # - // % - // & - // * - // + - // - - // . - // / - // ; - // < - // = - // > - // ? - // @ - // ^ - // ` - // | - // ~ -} - -impl OperatorCharacter { - pub fn as_str(&self) -> String { - match self { - &OperatorCharacter::PlaceHolder => todo!(), - // ... - } - } -} - -// In the text notation, integer values must be followed by one of the fifteen numeric stop-characters: {}[](),\"\'\ \t\n\r\v\f. -// In the text notation, real values must be followed by one of the fifteen numeric stop-characters: {}[](),\"\'\ \t\n\r\v\f. -pub enum NumericStopCharacter { - LeftCurlyBracket, - // { - RightCurlyBracket, - // } - LeftSquareBracket, - // [ - RightSquareBracket, - // ] - LeftParenthesis, - // ( - RightParenthesis, - // ) - Comma, - // , - QuotationMark, - // " - Apostrophe, - // ' - Space, - // U+0020 - Tab, - // \t - LineFeed, - // \n - CarriageReturn, - // \r - VerticalTab, - // \v - FormFeed, // \f -} - -impl NumericStopCharacter { - pub fn as_str(&self) -> String { - match self { - &NumericStopCharacter::LeftCurlyBracket => todo!(), - &NumericStopCharacter::RightCurlyBracket => todo!(), - &NumericStopCharacter::LeftSquareBracket => todo!(), - &NumericStopCharacter::RightSquareBracket => todo!(), - &NumericStopCharacter::LeftParenthesis => todo!(), - &NumericStopCharacter::RightParenthesis => todo!(), - &NumericStopCharacter::Comma => todo!(), - &NumericStopCharacter::QuotationMark => todo!(), - &NumericStopCharacter::Apostrophe => todo!(), - &NumericStopCharacter::Space => todo!(), - &NumericStopCharacter::Tab => todo!(), - &NumericStopCharacter::LineFeed => todo!(), - &NumericStopCharacter::CarriageReturn => todo!(), - &NumericStopCharacter::VerticalTab => todo!(), - &NumericStopCharacter::FormFeed => todo!(), - } - } -} - -// The Ion text format supports escape sequences only within quoted strings and symbols. -// Ion supports most of the escape sequences defined by C++, Java, and JSON. -pub enum TextFormatEscapeCharacter { - PlaceHolder, - //U+0000 \0 NUL - //U+0007 \a alert BEL - //U+0008 \b backspace BS - //U+0009 \t horizontal tab HT - //U+000A \n linefeed LF - //U+000C \f form feed FF - //U+000D \r carriage return CR - //U+000B \v vertical tab VT - //U+0022 \" double quote - //U+0027 \' single quote - //U+003F \? question mark - //U+005C \\ backslash - //U+002F \/ forward slash - //nothing \NL escaped NL expands to nothing - //U+00HH \xHH 2-digit hexadecimal Unicode code point - //U+HHHH \uHHHH 4-digit hexadecimal Unicode code point - //U+HHHHHHHH \UHHHHHHHH 8-digit hexadecimal Unicode code point -} - -impl TextFormatEscapeCharacter { - pub fn as_str(&self) -> String { - match self { - &TextFormatEscapeCharacter::PlaceHolder => todo!(), - // ... - } - } -} diff --git a/src/parser/ion_1_0/binary.rs b/src/parser/ion_1_0/binary.rs index 6bbc10f..0e2a52b 100644 --- a/src/parser/ion_1_0/binary.rs +++ b/src/parser/ion_1_0/binary.rs @@ -1,11 +1,13 @@ -use super::current_symbol_table::*; -use super::subfield::*; -use super::typed_value::*; -use crate::error::{BinaryFormatError, FormatError}; -use crate::parser::ion_1_0::current_symbol_table::CurrentSymbolTable; -use crate::parser::parse_error::{IonError, IonResult}; -use crate::symbols::{SymbolToken, SYSTEM_SYMBOL_TABLE_V1}; -use crate::value::{Blob, Clob, Data, Decimal, List, Sexp, Struct, Timestamp, Value}; +use super::{current_symbol_table::*, subfield::*, typed_value::*}; +use crate::{ + error::{BinaryFormatError, FormatError, TimeComponent}, + parser::{ + ion_1_0::current_symbol_table::CurrentSymbolTable, + parse_error::{IonError, IonResult}, + }, + symbols::{SymbolToken, SYSTEM_SYMBOL_TABLE_V1}, + value::{Blob, Clob, Data, Decimal, List, Sexp, Struct, Timestamp, Value}, +}; use itertools::Itertools; use nom::{ combinator::{all_consuming, complete}, @@ -15,9 +17,8 @@ use nom::{ sequence::pair, Err, }; -use num_bigint::{BigInt, BigUint, Sign}; -use num_traits::identities::Zero; -use num_traits::ToPrimitive; +use num_bigint::{BigInt, BigUint, Sign, ToBigInt}; +use num_traits::{identities::Zero, ToPrimitive}; type ParseResult = Result>>; @@ -41,19 +42,17 @@ fn parse_top_level_value<'a, 'b>( Ok((rest, Some(value))) => { // If the value is a Struct... if let Data::Struct(ion_struct) = &value.value { - // And it has an annotations vector... - if let Some(annotations) = &value.annotations { - // And the annotations vector contains a first annotation (i.e. is non-empty)... - if let Some(symbol) = annotations.first() { - // And the first annotation is not a null symbol... - if let Some(token) = symbol { - // And the value of the annotation is "$ion_symbol_table"... - if *token == SYSTEM_SYMBOL_TABLE_V1.symbols[3] { - // Then it is an update to the local symbol table. Apply it. - update_current_symbol_table(symbol_table, ion_struct); - // And return no Value - return Ok((rest, None)); - } + // And the annotations vector contains a first annotation (i.e. is non-empty)... + if let Some(symbol) = value.annotations.first() { + // And the first annotation is not a null symbol... + if let Some(token) = symbol { + // And the value of the annotation is "$ion_symbol_table"... + if *token == SYSTEM_SYMBOL_TABLE_V1.symbols[3] { + // Then it is an update to the local symbol table. Apply it. + update_current_symbol_table(symbol_table, ion_struct) + .map_err(|e| Err::Failure(IonError::from_symbol_error(i, e)))?; + // And return no Value + return Ok((rest, None)); } } } @@ -113,7 +112,7 @@ fn parse_typed_value<'a>( fn wrap_data<'a>(data: Data) -> ParseResult<&'a [u8], Option> { Ok(Some(Value { value: data, - annotations: None, + annotations: vec![], })) } @@ -428,7 +427,32 @@ fn parse_timestamp(typed_value: TypedValue) -> ParseResult<&[u8], Option Ok(None), _ => { let (rest, offset) = take_var_int(typed_value.rep)?; + let offset = match offset.to_i32() { + Some(offset) => offset, + None => { + return Err(Err::Failure(IonError::from_format_error( + typed_value.index, + FormatError::Binary(BinaryFormatError::TimeComponentRange( + TimeComponent::Offset, + offset, + )), + ))) + } + }; + let (rest, year) = take_var_uint(rest)?; + let year = match year.to_u16() { + Some(year) => year, + None => { + return Err(Err::Failure(IonError::from_format_error( + typed_value.index, + FormatError::Binary(BinaryFormatError::TimeComponentRange( + TimeComponent::Year, + year.to_bigint().unwrap(), + )), + ))) + } + }; // Parsing complete with precision of Year if rest.is_empty() { @@ -436,6 +460,18 @@ fn parse_timestamp(typed_value: TypedValue) -> ParseResult<&[u8], Option month, + None => { + return Err(Err::Failure(IonError::from_format_error( + typed_value.index, + FormatError::Binary(BinaryFormatError::TimeComponentRange( + TimeComponent::Month, + month.to_bigint().unwrap(), + )), + ))) + } + }; // Parsing complete with precision of Month if rest.is_empty() { @@ -447,6 +483,18 @@ fn parse_timestamp(typed_value: TypedValue) -> ParseResult<&[u8], Option day, + None => { + return Err(Err::Failure(IonError::from_format_error( + typed_value.index, + FormatError::Binary(BinaryFormatError::TimeComponentRange( + TimeComponent::Day, + day.to_bigint().unwrap(), + )), + ))) + } + }; // Parsing complete with precision of Day if rest.is_empty() { @@ -459,7 +507,31 @@ fn parse_timestamp(typed_value: TypedValue) -> ParseResult<&[u8], Option hour, + None => { + return Err(Err::Failure(IonError::from_format_error( + typed_value.index, + FormatError::Binary(BinaryFormatError::TimeComponentRange( + TimeComponent::Hour, + hour.to_bigint().unwrap(), + )), + ))) + } + }; let (rest, minute) = take_var_uint(rest)?; + let minute = match minute.to_u8() { + Some(minute) => minute, + None => { + return Err(Err::Failure(IonError::from_format_error( + typed_value.index, + FormatError::Binary(BinaryFormatError::TimeComponentRange( + TimeComponent::Minute, + minute.to_bigint().unwrap(), + )), + ))) + } + }; // Parsing complete with precision of Minute if rest.is_empty() { @@ -474,6 +546,18 @@ fn parse_timestamp(typed_value: TypedValue) -> ParseResult<&[u8], Option second, + None => { + return Err(Err::Failure(IonError::from_format_error( + typed_value.index, + FormatError::Binary(BinaryFormatError::TimeComponentRange( + TimeComponent::Second, + second.to_bigint().unwrap(), + )), + ))) + } + }; // Parsing complete with precision of Second if rest.is_empty() { @@ -970,7 +1054,7 @@ fn parse_annotation<'a>( .collect() { Ok(annotations) => { - value.annotations = Some(annotations); + value.annotations = annotations; Ok(value) } Result::Err(error) => Err(Err::Failure(IonError::from_symbol_error( diff --git a/src/parser/ion_1_0/current_symbol_table.rs b/src/parser/ion_1_0/current_symbol_table.rs index d7c6eb9..3ab4dee 100644 --- a/src/parser/ion_1_0/current_symbol_table.rs +++ b/src/parser/ion_1_0/current_symbol_table.rs @@ -1,8 +1,14 @@ -use crate::error::SymbolError; -use crate::symbols::{SymbolToken, SYSTEM_SYMBOL_TABLE_V1}; -use crate::value::{Data, List, Struct}; -use std::collections::hash_map::HashMap; +use crate::{ + error::SymbolError, + symbols::{ImportDescriptor, SymbolToken, SYSTEM_SYMBOL_TABLE_V1}, + value::{Data, List, Struct, Value}, +}; +use itertools::Itertools; +use num_bigint::BigInt; +use num_traits::{One, ToPrimitive, Zero}; +use std::collections::HashMap; +#[derive(Debug)] pub enum CurrentSymbolTable { Local { symbols: Vec }, SystemV1, @@ -30,6 +36,19 @@ impl CurrentSymbolTable { }, } } + + pub(crate) fn add_symbol(&mut self, token: &SymbolToken) { + if !self.contains(&token) { + append_symbols_to_current_table(self, vec![token.clone()]); + } + } + + pub(crate) fn contains(&self, token: &SymbolToken) -> bool { + match self { + CurrentSymbolTable::SystemV1 => SYSTEM_SYMBOL_TABLE_V1.symbols.contains(token), + CurrentSymbolTable::Local { symbols } => symbols.contains(token), + } + } } /// # Local Symbol Tables @@ -61,69 +80,100 @@ impl CurrentSymbolTable { /// /// Any other field (including, for example, name or version) is ignored. -// Modify the current symbol table according to the encountered local symbol table. +/// Modify the current symbol table according to the encountered local symbol table. +/// +/// http://amzn.github.io/ion-docs/docs/symbols.html#local-symbol-tables pub(crate) fn update_current_symbol_table( current: &mut CurrentSymbolTable, encountered: &Option, -) { +) -> Result<(), SymbolError> { let (imports, symbols): (TableImport, Vec) = match encountered { None => (TableImport::None, vec![]), - Some(Struct { fields: values }) => { - let keys: HashMap<&str, usize> = values - .iter() - .enumerate() - .filter_map(|(i, val)| match &val.0 { - SymbolToken::Known { text } => Some((text.as_str(), i)), - SymbolToken::Unknown { .. } => None, - SymbolToken::Zero => None, - }) - .collect(); + Some(Struct { fields }) => { + // When processing imports we currently use only the first value for present keys. + // See https://github.com/amzn/ion-docs/issues/101 + let index_map = make_index_map(fields); + // The imports field should be the symbol $ion_symbol_table or a list as specified. - let imports = match keys.get("imports") { + let imports = match index_map.get("imports") { None => TableImport::None, - Some(index) => match &values.get(*index).unwrap().1.value { - Data::List(list) => match list { - None => TableImport::None, - Some(List { values }) => TableImport::Imports( - values - .iter() - // each element of the list must be a struct; - // each element that is null or is not a struct is ignored. - .filter_map(|value| match &value.value { - Data::Struct(Some(val)) => Some(val.clone()), - _ => None, - }) - .collect(), - ), - }, - _ => TableImport::None, - }, + Some(imports_indices) => { + if imports_indices.is_empty() { + TableImport::None + } else if imports_indices.len() > 1 { + return Err(SymbolError::InvalidSymbolTable); + } else { + match &fields + .get(*imports_indices.get(0).unwrap()) + .unwrap() + .1 + .value + { + // This symbol table replaces the current table with a new set of symbols + // from the catalog and any symbols present within the import itself. + Data::List(list) => match list { + None => TableImport::None, + Some(List { values }) => { + TableImport::Imports( + values + .iter() + // each element of the list must be a struct; + // each element that is null or is not a struct is ignored. + .filter_map(|value| match &value.value { + Data::Struct(Some(val)) => Some(val.clone()), + _ => None, + }) + .collect(), + ) + } + }, + // This symbol table is an update to the current table + Data::Symbol(Some(SymbolToken::Known { text })) => { + if text == "$ion_symbol_table" { + TableImport::IonSymbolTable + } else { + // Should we throw an error here? + TableImport::None + } + } + _ => TableImport::None, + } + } + } }; // The symbols field should be a list of strings. If the field is missing or has any other type, // it is treated as if it were an empty list. // Null elements in the symbols list declare unknown symbol text (“gaps”) for its SID within the // sequence. Any element of the list that is not a string must be interpreted as if it were null. // Any SIDs that refer to null slots in a local symbol table are equivalent to symbol zero. - let symbols = match keys.get("symbols") { + let symbols = match index_map.get("symbols") { None => vec![], - Some(index) => match &values.get(*index).unwrap().1.value { - Data::List(list) => match list { - None => vec![], - Some(List { values }) => values - .iter() - .map(|value| match &value.value { - Data::String(string) => match string { - None => SymbolToken::Zero, - Some(string) => SymbolToken::Known { - text: string.clone(), - }, - }, - _ => SymbolToken::Zero, - }) - .collect(), - }, - _ => vec![], - }, + Some(indices) => { + if indices.is_empty() { + vec![] + } else if indices.len() > 1 { + return Err(SymbolError::InvalidSymbolTable); + } else { + match &fields.get(*indices.get(0).unwrap()).unwrap().1.value { + Data::List(list) => match list { + None => vec![], + Some(List { values }) => values + .iter() + .map(|value| match &value.value { + Data::String(string) => match string { + None => SymbolToken::Zero, + Some(string) => SymbolToken::Known { + text: string.clone(), + }, + }, + _ => SymbolToken::Zero, + }) + .collect(), + }, + _ => vec![], + } + } + } }; (imports, symbols) } @@ -132,7 +182,7 @@ pub(crate) fn update_current_symbol_table( TableImport::None => { if symbols.is_empty() { *current = CurrentSymbolTable::SystemV1; - return; + return Ok(()); } append_symbols_to_system_table(current, symbols); } @@ -146,12 +196,19 @@ pub(crate) fn update_current_symbol_table( append_symbols_to_system_table(current, symbols); } }, - TableImport::Imports(imports) => todo!(), + TableImport::Imports(imports) => { + *current = CurrentSymbolTable::SystemV1; + handle_imports(current, imports)?; + append_symbols_to_current_table(current, symbols); + } } + + Ok(()) } fn append_symbols_to_system_table(table: &mut CurrentSymbolTable, symbols: Vec) { - let mut symbol_vec: Vec = Vec::new(); + let mut symbol_vec: Vec = + Vec::with_capacity(SYSTEM_SYMBOL_TABLE_V1.symbols.len() + symbols.len()); symbol_vec.extend(SYSTEM_SYMBOL_TABLE_V1.symbols.iter().cloned()); symbol_vec.extend(symbols); *table = CurrentSymbolTable::Local { @@ -159,6 +216,23 @@ fn append_symbols_to_system_table(table: &mut CurrentSymbolTable, symbols: Vec) { + match table { + CurrentSymbolTable::SystemV1 => append_symbols_to_system_table(table, symbols), + CurrentSymbolTable::Local { + symbols: current_symbols, + } => current_symbols.extend(symbols.into_iter()), + } +} + /// Imports /// /// A local symbol table implicitly imports the system symbol table that is active at the point @@ -177,6 +251,7 @@ fn append_symbols_to_system_table(table: &mut CurrentSymbolTable, symbols: Vec, +) -> Result<(), SymbolError> { + for Struct { fields } in imports { + // When processing imports we currently use only the first value for present keys. + // See https://github.com/amzn/ion-docs/issues/101 + let index_map = make_index_map(&fields); + + // If no name field is defined, or if it is not a non-empty string, the import clause is ignored. + // If the name field is "$ion", the import clause is ignored. + let import_name = match index_map.get("name") { + Some(indices) => { + if indices.is_empty() { + continue; + } + + let data = &fields.get(*indices.get(0).unwrap()).unwrap().1.value; + match data { + Data::String(Some(value)) if value == "$ion" => continue, + Data::String(Some(value)) => value, + _ => continue, + } + } + None => continue, + }; + + // If no version field is defined, or if it is null, not an int, or less than 1, act as if it is 1. + let version: u32 = match index_map.get("version") { + None => One::one(), + Some(indices) => { + if indices.is_empty() { + continue; + } + + let data = &fields.get(*indices.get(0).unwrap()).unwrap().1.value; + if let Data::Int(Some(value)) = data { + if value < &One::one() { + One::one() + } else if value > &BigInt::from(std::u32::MAX) { + return Err(SymbolError::UnsupportedVersion(data.to_text())); + } else { + value.to_u32().expect("verified above") + } + } else { + One::one() + } + } + }; + + // If a max_id field is defined but is null, not an int, or less than zero, act as if it is undefined. + // Select a shared symbol table instance as follows: + // - Query the catalog to retrieve the specified table by name and version. + // - If an exact match is not found: + // - If max_id is undefined, implementations MUST raise an error and halt processing. + // - Otherwise query the catalog to retrieve the table with the given name and the greatest version available. + // - If no table has been selected, substitute a dummy table containing max_id undefined symbols. + // - If max_id is undefined, set it to the largest symbol ID of the selected table (which will necessarily be an exact match). + + // Allocate the next max_id symbol IDs to this imported symbol table. + let max_id: Option = match index_map.get("max_id") { + None => None, + Some(indices) => { + if indices.is_empty() { + None + } else { + let data = &fields.get(*indices.get(0).unwrap()).unwrap().1.value; + if let Data::Int(Some(value)) = data { + if &BigInt::zero() > value || value > &BigInt::from(std::u32::MAX) { + return Err(SymbolError::InvalidMaxId(data.to_text())); + } else { + Some(value.to_u32().expect("confirmed max_id in range above")) + } + } else { + return Err(SymbolError::InvalidMaxId(data.to_text())); + } + } + } + }; + + // As there is no catalog currently, all undefined max_ids are an error. + match max_id { + None => return Err(SymbolError::InvalidMaxId("None".to_string())), + Some(max_id) => { + // In lieu of looking up the symbol tables in a catalog we'll just add that many + // items to the list. + // TODO: do something better (track max_id, use a sparse vec, etc.) + let filler_symbols = std::iter::repeat_with(|| SymbolToken::Unknown { + import_location: ImportDescriptor::new(import_name.clone(), max_id, version), + }) + .take(max_id as usize) + .collect_vec(); + + append_symbols_to_current_table(current, filler_symbols) + } + } + } + + Ok(()) +} + +// The specification allows us to ignore all fields without specific names when working with symbol +// tables. The care-free use of continue within this function is not necessarily correct elsewhere. +fn make_index_map(map: &[(SymbolToken, Value)]) -> HashMap<&str, Vec> { + let mut key_map: HashMap<&str, Vec> = HashMap::new(); + for (i, value) in map.iter().enumerate() { + match &value.0 { + SymbolToken::Known { text } => { + key_map + .entry(text) + .and_modify(|e| e.push(i)) + .or_insert_with(|| vec![i]); + } + SymbolToken::Unknown { .. } => continue, + SymbolToken::Zero => continue, + } + } + key_map } diff --git a/src/parser/ion_1_0/mod.rs b/src/parser/ion_1_0/mod.rs index 10d13b7..233eee0 100644 --- a/src/parser/ion_1_0/mod.rs +++ b/src/parser/ion_1_0/mod.rs @@ -1,4 +1,5 @@ pub mod binary; pub mod current_symbol_table; mod subfield; +pub mod text; mod typed_value; diff --git a/src/parser/ion_1_0/subfield.rs b/src/parser/ion_1_0/subfield.rs index 247c97f..aec20e3 100644 --- a/src/parser/ion_1_0/subfield.rs +++ b/src/parser/ion_1_0/subfield.rs @@ -2,15 +2,13 @@ use std::iter; use crate::parser::parse_error::{IonError, IonResult}; use bit_vec::BitVec; -use nom::error::ParseError; use nom::{ bytes::complete::{take, take_while}, - error::ErrorKind, + error::{ErrorKind, ParseError}, Err, }; use num_bigint::{BigInt, BigUint, Sign}; -use num_traits::cast::ToPrimitive; -use num_traits::identities::Zero; +use num_traits::{cast::ToPrimitive, identities::Zero}; /// ## Basic Field Formats /// diff --git a/src/parser/ion_1_0/text/mod.rs b/src/parser/ion_1_0/text/mod.rs new file mode 100644 index 0000000..743da5b --- /dev/null +++ b/src/parser/ion_1_0/text/mod.rs @@ -0,0 +1,2597 @@ +#![warn(dead_code, unused_variables)] +#[cfg(test)] +mod tests; +mod time; + +use self::time::{TextDate, TextTime, TextTimestamp}; +use crate::{ + error::{FormatError, SymbolError, TextFormatError}, + parser::{ + combinators::{eof, one_if}, + ion_1_0::current_symbol_table::{update_current_symbol_table, CurrentSymbolTable}, + parse_error::{IonError, IonResult}, + }, + symbols::SymbolToken, + value::{self as ion}, +}; +use ::time::UtcOffset; +use log::warn; +use nom::{ + self, + branch::alt, + bytes::complete::{ + escaped_transform, tag, tag_no_case, take_till, take_until, take_while, take_while1, + take_while_m_n, + }, + character::complete::{char, crlf, one_of}, + combinator::{ + all_consuming, cut, map, map_parser, map_res, not, opt, peek, recognize, value, verify, + }, + error::{ErrorKind, ParseError}, + multi::{many0, many1, separated_list, separated_nonempty_list}, + sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, + AsBytes, AsChar, Compare, Err, ExtendInto, InputIter, InputLength, InputTake, + InputTakeAtPosition, Offset, Slice, +}; +use num_bigint::{BigInt, BigUint, Sign}; +use num_traits::{pow, Num, One, Zero}; +use std::{ + cell::RefCell, + convert::TryFrom, + f64::{INFINITY, NAN, NEG_INFINITY}, + fmt::Debug, + iter::Extend, + ops::{Range, RangeFrom, RangeTo}, + rc::Rc, + str::{self, from_utf8}, +}; + +/// Follows the following documents: +/// Ion Text Encoding: http://amzn.github.io/ion-docs/docs/text.html +/// Ion Specification: http://amzn.github.io/ion-docs/docs/spec.html + +/// TODO: Use FnMut combinators to get rid of the Rc> +type Table = Rc>; + +/// Parses the top level values of the Ion string. +/// +/// Some values are delimited by values other than whitespace. For example, (1+1) is four +/// distinct values. In this case the two values are parsed as a pair. If the value has +/// whitespace as a delimiter, only that value is parsed before continuing. +/// +/// The last parsed value allows the final value to be parsed without the same delimiting rules. +/// +/// Encoding: top_level +pub struct ValueIterator<'a> { + pub(crate) remaining: &'a str, + pub(crate) current_table: Table, + next: Option, +} + +impl<'a> ValueIterator<'a> { + pub(crate) fn new(ion: &'a str) -> Self { + Self { + remaining: ion, + next: None, + current_table: Rc::new(RefCell::new(CurrentSymbolTable::SystemV1)), + } + } + + fn handle_ivm(&mut self, ivm: IonVersionMarker) -> Result<(), TextFormatError> { + if ivm.0 == 1 && ivm.1 == 0 { + self.current_table.replace(CurrentSymbolTable::SystemV1); + Ok(()) + } else { + Err(TextFormatError::UnsupportedVersion(ivm.0, ivm.1)) + } + } + + fn handle_meta_values( + &mut self, + value: ion::Value, + next: Option, + ) -> Option<::Item> { + let value = if is_system_value(&value) { + None + } else if let Some(table) = as_local_symbol_table(&value) { + match update_current_symbol_table( + &mut self.current_table.borrow_mut(), + &Some(table.clone()), + ) { + Ok(_) => None, + Err(e) => Some(Err(Err::Failure(IonError::from_symbol_error( + self.remaining, + e, + )))), + } + } else if let Some(_table) = as_shared_symbol_table(&value) { + // TODO: get clarity on whether shared_symbol_tables should show up in a token stream. + + // Test testfile35.ion includes a struct that should be processed as a shared symbol + // table. As such this seems to fall into the following paragraph: + + // Certain top-level values such as IVMs and local symbol tables are referred to as + // system values; all other values are referred to as user values. An Ion implementation + // may give applications the ability to “skip over” the system values, since they are + // generally irrelevant to the semantics of the user data. + + // The current implementation treats shared symbol tables as system values. This is due + // to the following paragraph of the spec: + + // This section defines the serialized form of shared symbol tables. Unlike local symbol + // tables, the Ion parser does not intrinsically recognize or process this data; it is + // up to higher-level specifications or conventions to define how shared symbol tables + // are communicated. + + // This implies that even if a shared symbol table shows up in a token stream, there is + // no obligation to recognize it as anything other than user data. However, because it + // shows up in the test, it seems there may be some intent to test this use case. + None + } else { + self.next = next; + Some(Ok((self.remaining, value))) + }; + + match value { + Some(v) => Some(v), + None => self.next(), + } + } +} + +fn as_shared_symbol_table(value: &ion::Value) -> Option<&ion::Struct> { + match &value.value { + ion::Data::Struct(Some(table)) => match value.annotations.get(0) { + Some(Some(SymbolToken::Known { text })) if text == "$ion_shared_symbol_table" => { + Some(table) + } + _ => None, + }, + _ => None, + } +} + +fn as_local_symbol_table(value: &ion::Value) -> Option<&ion::Struct> { + match &value.value { + ion::Data::Struct(Some(table)) => match value.annotations.get(0) { + Some(Some(SymbolToken::Known { text })) if text == "$ion_symbol_table" => Some(table), + _ => None, + }, + _ => None, + } +} + +/// ValueIterator contains the logic for top_level within the next method. +/// +/// note that EOF is a concept for the grammar, technically Ion streams +/// are infinite +/// top_level +/// : (ws* top_level_value)* ws* value? EOF +/// ; +impl<'a> Iterator for ValueIterator<'a> { + type Item = IonResult<&'a str, ion::Value>; + + fn next(&mut self) -> Option { + if let Some(next) = self.next.take() { + return Some(Ok((self.remaining, next))); + } + + if self.remaining.is_empty() { + return None; + } + + let maybe_ivm = take_ivm(self.remaining); + match maybe_ivm { + Ok((remaining, ivm)) => { + self.remaining = remaining; + return match self.handle_ivm(ivm) { + Ok(_) => self.next(), + Err(e) => Some(Err(Err::Failure(IonError::from_format_error( + self.remaining, + FormatError::Text(e), + )))), + }; + } + Err(Err::Failure(e)) => return Some(Err(Err::Failure(e))), + Err(_) => (), // fall through if this parser cannot be applied (Err::Error/Incomplete) + } + + let maybe_tlv = + preceded(eat_opt_ws, take_top_level_value(self.current_table.clone()))(self.remaining); + match maybe_tlv { + Ok((remaining, (value, next))) => { + self.remaining = remaining; + return self.handle_meta_values(value, next); + } + Err(Err::Failure(e)) => return Some(Err(Err::Failure(e))), + Err(_) => (), // fall through if this parser cannot be applied (Err::Error/Incomplete) + }; + + let maybe_last = preceded( + eat_opt_ws, + terminated(opt(take_value(self.current_table.clone())), eof), + )(self.remaining); + let last_err = match maybe_last { + Ok((remaining, value)) => { + self.remaining = remaining; + return match value { + Some(value) => self.handle_meta_values(value, None), + None => self.next(), + }; + } + Err(Err::Failure(e)) => { + return Some(Err(Err::Failure(e))); + } + Err(e) => e, + }; + + // If there is any remaining data after attempting to apply all parsers we return + // the error from the least picky parser (take_value). + if self.remaining.is_empty() { + None + } else { + Some(Err(last_err)) + } + } +} + +#[derive(Clone, Copy, Debug)] +struct IonVersionMarker(u32, u32); + +/// System values are NOPs, currently triggered by a symbol which matches the IVM. +fn is_system_value(value: &ion::Value) -> bool { + value.annotations.is_empty() + && value.value + == ion::Data::Symbol(Some(SymbolToken::Known { + text: "$ion_1_0".to_owned(), + })) +} + +fn take_ivm(i: &str) -> IonResult<&str, IonVersionMarker> { + let (i, ivm) = preceded( + eat_opt_ws, + terminated( + preceded( + tag("$ion_"), + separated_pair( + take_while1(is_dec_digit), + char('_'), + take_while1(is_dec_digit), + ), + ), + // TODO: confirm that valid IVMs end with a newline + nl, + ), + )(i)?; + + let (major, minor) = match (ivm.0.parse::(), ivm.1.parse::()) { + (Ok(major), Ok(minor)) => (major, minor), + (_, _) => { + return Err(Err::Failure(IonError::from_format_error( + i, + FormatError::Text(TextFormatError::IvmParseError), + ))) + } + }; + + Ok((i, IonVersionMarker(major, minor))) +} + +/// top_level_value +/// : annotation+ top_level_value +/// | delimiting_entity +/// // numeric literals (if followed by something), need to be followed by +/// // whitespace or a token that is either quoted (e.g. string) or +/// // starts with punctuation (e.g. clob, struct, list) +/// | numeric_entity ws +/// | numeric_entity quoted_annotation value +/// | numeric_entity delimiting_entity +/// // literals that are unquoted symbols or keywords have a similar requirement +/// // as the numerics above, they have different productions because the +/// // rules for numerics are the same in s-expressions, but keywords +/// // have different rules between top-level and s-expressions. +/// | keyword_entity ws +/// | keyword_entity quoted_annotation value +/// | keyword_entity keyword_delimiting_entity +/// ; +fn take_top_level_value( + table: Table, +) -> impl Fn(&str) -> IonResult<&str, (ion::Value, Option)> { + move |i: &str| { + let (i, (annotations, (value, next_value))) = pair( + many0(map(take_annotation(table.clone()), Some)), + take_top_level_data(table.clone()), + )(i)?; + + Ok((i, (ion::Value { value, annotations }, next_value))) + } +} + +fn take_top_level_data( + table: Table, +) -> impl Fn(&str) -> IonResult<&str, (ion::Data, Option)> { + move |i: &str| { + alt(( + map(take_delimiting_entity(table.clone()), |data| (data, None)), + alt(( + // Pairs of (Value, Option) depending on if parsing the next value was required. + take_top_level_numeric_entity(table.clone()), + take_top_level_keyword_entity(table.clone()), + )), + ))(i) + } +} + +fn take_top_level_numeric_entity( + table: Table, +) -> impl Fn(&str) -> IonResult<&str, (ion::Data, Option)> { + move |i: &str| { + take_delimited_value( + take_numeric_entity, + take_delimiting_entity(table.clone()), + None, + table.clone(), + )(i) + } +} + +fn take_top_level_keyword_entity( + table: Table, +) -> impl Fn(&str) -> IonResult<&str, (ion::Data, Option)> { + move |i: &str| { + take_delimited_value( + take_keyword_entity(table.clone()), + take_keyword_delimiting_entity(table.clone()), + None, + table.clone(), + )(i) + } +} + +/// Handles the awkward-to-implement top level value and sexp value delimiting rules. +/// +/// Most values are delimited by whitespace. Some are not, particularly in s-expressions. Since +/// delimiting rules within s-expressions are different there are even more combinations. This +/// function encapsulates value delimiting and returns one or two values, depending on if parsing +/// the next value was necessary for delimiting the first. +/// +/// It would be an improvement to peek at the next character and just execute based on that, +/// especially if we are dealing with any kind of token stream. That approach is more error prone, +/// however, as we have to build the list of acceptable delimiting characters by hand. +fn take_delimited_value<'a, F, D>( + value_parser: F, + delimiter_parser: D, + delimiter: Option, + table: Table, +) -> impl Fn(&'a str) -> IonResult<&'a str, (ion::Data, Option)> +where + F: Fn(&'a str) -> IonResult<&'a str, ion::Data>, + D: Fn(&'a str) -> IonResult<&'a str, ion::Data>, +{ + move |i: &str| { + // First check if we should continue by trying to parse a value. + let (i, data) = value_parser(i)?; + + // Drop out if the next character is the optional delimiter + if let Some(delimiter) = delimiter { + if i.chars().next().map(|c| c == delimiter).unwrap_or(false) { + return Ok((i, (data, None))); + } + }; + + // Find the delimiter, which may be a quoted symbol for example + let (i, next) = alt(( + value(None, peek(many1(ws))), + map( + pair( + take_quoted_annotation(table.clone()), + take_value_parts(table.clone()), + ), + |(head_annotation, (mut rest_annotation, value))| { + // It is mandatory to maintain the order of annotations applied to an object. + rest_annotation.insert(0, Some(head_annotation)); + Some(ion::Value { + value, + annotations: rest_annotation, + }) + }, + ), + map(&delimiter_parser, |value| Some(value.into())), + ))(i)?; + + Ok((i, (data, next))) + } +} + +/// value +/// : annotation* entity +/// ; +fn take_value(table: Table) -> impl Fn(&str) -> IonResult<&str, ion::Value> { + move |i: &str| { + map(take_value_parts(table.clone()), |(annotations, value)| { + ion::Value { value, annotations } + })(i) + } +} + +fn take_value_parts( + table: Table, +) -> impl Fn(&str) -> IonResult<&str, (Vec>, ion::Data)> { + move |i: &str| { + pair( + many0(map(take_annotation(table.clone()), Some)), + take_entity(table.clone()), + )(i) + } +} + +/// An entity is what the Ion Text Encoding refers to as a value partway through parsing which +/// hasn't been combined with any annotations yet. In this library, it is referred to as "Data". +/// +/// Encoding: entity +/// entity +/// : numeric_entity +/// | delimiting_entity +/// | keyword_entity +/// ; +fn take_entity(table: Table) -> impl Fn(&str) -> IonResult<&str, ion::Data> { + move |i: &str| { + alt(( + take_numeric_entity, + take_delimiting_entity(table.clone()), + take_keyword_entity(table.clone()), + ))(i) + } +} + +/// CLOBs and BLOBs are both surrounded with double curly braces, or LOB_START and +/// LOB_END. Since these are allowed in all the same places, this method is a single +/// entry-point into LOB parsing. +/// +/// Both forms of LOBs consist of ASCII characters and are parsed as bytes. This is necessary +/// in certain situations, such as expanding an escape for a character that takes an additional byte +/// when encoded in UTF-8. +fn take_lob(i: &str) -> IonResult<&str, ion::Data> { + let b = i.as_bytes(); + + let result = preceded( + terminated(tag(LOB_START), eat_opt_whitespace), + cut(terminated( + take_lob_body, + preceded(eat_opt_whitespace, tag(LOB_END)), + )), + )(b); + + match result { + Ok((i2, r)) => { + let offset = + i.offset(from_utf8(i2).expect( + "parser should return a reference to the same UTF-8 slice it was given", + )); + Ok((&i[offset..], r)) + } + Err(Err::Error(e)) => Err(Err::Error(e.into_str_err(i))), + Err(Err::Failure(e)) => Err(Err::Failure(e.into_str_err(i))), + Err(Err::Incomplete(n)) => Err(Err::Incomplete(n)), + } +} + +fn take_lob_body(i: &[u8]) -> IonResult<&[u8], ion::Data> { + alt(( + map(take_short_quoted_clob, |c| ion::Data::Clob(Some(c))), + map(take_long_quoted_clob, |c| ion::Data::Clob(Some(c))), + map(take_blob_body, |b| ion::Data::Blob(Some(b))), + ))(i) +} + +/// Encoding: delimiting_entity +/// delimiting_entity +/// : quoted_text +/// | SHORT_QUOTED_CLOB +/// | LONG_QUOTED_CLOB +/// | BLOB +/// | list +/// | sexp +/// | struct +/// ; +fn take_delimiting_entity(table: Table) -> impl Fn(&str) -> IonResult<&str, ion::Data> { + move |i: &str| { + alt(( + take_quoted_text, + take_lob, + map(take_list(table.clone()), |l| ion::Data::List(Some(l))), + map(take_sexp(table.clone()), |s| ion::Data::Sexp(Some(s))), + map(take_struct(table.clone()), |s| ion::Data::Struct(Some(s))), + ))(i) + } +} + +/// keyword_delimiting_entity +/// : delimiting_entity +/// | numeric_entity +/// ; +fn take_keyword_delimiting_entity(table: Table) -> impl Fn(&str) -> IonResult<&str, ion::Data> { + move |i: &str| alt((take_delimiting_entity(table.clone()), take_numeric_entity))(i) +} + +/// Note: take_identifier_symbol covers all cases provided by take_types, so types are omitted. +/// +/// keyword_entity +/// : any_null +/// | BOOL +/// | SPECIAL_FLOAT +/// | IDENTIFIER_SYMBOL +/// // note that this is because we recognize the type names for null +/// // they are ordinary symbols on their own +/// | TYPE +/// ; +fn take_keyword_entity(table: Table) -> impl Fn(&str) -> IonResult<&str, ion::Data> { + move |i: &str| { + alt(( + map_res(take_identifier_symbol, |s| { + let result = get_or_make_symbol(table.clone(), s.to_owned()); + match result { + Ok(symbol) => Ok(ion::Data::Symbol(Some(symbol))), + Err(e) => Err(Err::Failure(IonError::from_symbol_error(i, e))), + } + }), + take_any_null, + map(take_bool, |b| ion::Data::Bool(Some(b))), + map(take_special_float, |f| ion::Data::Float(Some(f))), + ))(i) + } +} + +/// Note: sole entry point to all numeric entity parsing (this includes timestamps) +/// +/// numeric_entity +/// : BIN_INTEGER +/// | DEC_INTEGER +/// | HEX_INTEGER +/// | TIMESTAMP +/// | FLOAT +/// | DECIMAL +/// ; +fn take_numeric_entity(i: &str) -> IonResult<&str, ion::Data> { + alt(( + map(take_timestamp, |t| ion::Data::Timestamp(Some(t))), + take_float_or_decimal, + map( + alt(( + take_bin_integer, + take_hex_integer, + map_res(take_dec_integer, |s| str_to_bigint(s, 10)), + )), + |bigint| ion::Data::Int(Some(bigint)), + ), + ))(i) +} + +/// annotation +/// : symbol ws* COLON COLON ws* +/// ; +fn take_annotation(table: Table) -> impl Fn(&str) -> IonResult<&str, SymbolToken> { + move |i: &str| { + terminated( + terminated(take_symbol(table.clone()), eat_opt_ws), + terminated(tag("::"), eat_opt_ws), + )(i) + } +} + +/// quoted_annotation +/// : QUOTED_SYMBOL ws* COLON COLON ws* +/// ; +fn take_quoted_annotation(table: Table) -> impl Fn(&str) -> IonResult<&str, SymbolToken> { + move |i: &str| { + let (i, result) = map( + terminated( + terminated(take_quoted_symbol, eat_opt_ws), + terminated(tag("::"), eat_opt_ws), + ), + |s| make_symbol(table.clone(), s), + )(i)?; + + match result { + Ok(token) => Ok((i, token)), + Err(e) => Err(Err::Failure(IonError::from_symbol_error(i, e))), + } + } +} + +/// list +/// : L_BRACKET ws* value ws* (COMMA ws* value)* ws* (COMMA ws*)? R_BRACKET +/// | L_BRACKET ws* R_BRACKET +/// ; +fn take_list(table: Table) -> impl Fn(&str) -> IonResult<&str, ion::List> { + move |i: &str| { + map( + preceded( + terminated(char(L_BRACKET), eat_opt_ws), + cut(terminated( + separated_list( + pair(char(COMMA), eat_opt_ws), + terminated(take_value(table.clone()), eat_opt_ws), + ), + preceded(opt(pair(char(COMMA), eat_opt_ws)), char(R_BRACKET)), + )), + ), + |values| ion::List { values }, + )(i) + } +} + +/// sexp +/// : L_PAREN (ws* sexp_value)* ws* value? R_PAREN +/// ; +fn take_sexp(table: Table) -> impl Fn(&str) -> IonResult<&str, ion::Sexp> { + move |i: &str| { + let (i, grouped_values) = preceded( + terminated(char(L_PAREN), eat_opt_ws), + cut(terminated( + many0(preceded(eat_opt_ws, take_sexp_value(table.clone()))), + preceded(eat_opt_ws, char(R_PAREN)), + )), + )(i)?; + + let count = grouped_values.iter().fold( + 0, + |sum, (_, next)| if next.is_some() { sum + 2 } else { sum + 1 }, + ); + + let mut values = Vec::with_capacity(count); + + grouped_values.into_iter().for_each(|(first, next)| { + values.push(first); + if let Some(second) = next { + values.push(second); + } + }); + Ok((i, ion::Sexp { values })) + } +} + +fn take_sexp_numeric_entity_data( + table: Table, +) -> impl Fn(&str) -> IonResult<&str, (ion::Data, Option)> { + move |i: &str| { + take_delimited_value( + take_numeric_entity, + take_delimiting_entity(table.clone()), + Some(R_PAREN), + table.clone(), + )(i) + } +} + +fn take_sexp_keyword_data( + table: Table, +) -> impl Fn(&str) -> IonResult<&str, (ion::Data, Option)> { + move |i: &str| { + take_delimited_value( + take_sexp_keyword_entity(table.clone()), + take_sexp_keyword_delimiting_entity(table.clone()), + Some(R_PAREN), + table.clone(), + )(i) + } +} + +fn take_sexp_null_data( + table: Table, +) -> impl Fn(&str) -> IonResult<&str, (ion::Data, Option)> { + move |i: &str| { + take_delimited_value( + value(ion::Data::Null, take_null), + take_sexp_null_delimiting_entity(table.clone()), + Some(R_PAREN), + table.clone(), + )(i) + } +} + +fn take_sexp_data( + table: Table, +) -> impl Fn(&str) -> IonResult<&str, (ion::Data, Option)> { + move |i: &str| { + alt(( + map(take_delimiting_entity(table.clone()), |data| (data, None)), + alt(( + // Pairs of (Value, Option) depending on if parsing the next value was required. + take_sexp_numeric_entity_data(table.clone()), + take_sexp_keyword_data(table.clone()), + take_sexp_null_data(table.clone()), + )), + map( + map(take_operator(table.clone()), |s| ion::Data::Symbol(Some(s))), + |data| (data, None), + ), + ))(i) + } +} + +/// Note: the body of logic is in take_sexp_data and take_delimited_value +/// +/// sexp_value +/// : annotation+ sexp_value +/// | sexp_delimiting_entity +/// | operator +/// // much like at the top level, numeric/identifiers/keywords +/// // have similar delimiting rules +/// | numeric_entity ws +/// | numeric_entity quoted_annotation value +/// | numeric_entity sexp_delimiting_entity +/// | sexp_keyword_entity ws +/// | sexp_keyword_entity quoted_annotation value +/// | sexp_keyword_entity sexp_keyword_delimiting_entity +/// | NULL ws +/// | NULL quoted_annotation value +/// | NULL sexp_null_delimiting_entity +/// ; +fn take_sexp_value( + table: Table, +) -> impl Fn(&str) -> IonResult<&str, (ion::Value, Option)> { + move |i: &str| { + map( + pair( + many0(map(take_annotation(table.clone()), Some)), + take_sexp_data(table.clone()), + ), + |(annotations, (value, next))| (ion::Value { value, annotations }, next), + )(i) + } +} + +/// Omitted - same as delimiting_entity. +/// +/// sexp_delimiting_entity +/// : delimiting_entity +/// ; + +/// sexp_keyword_delimiting_entity +/// : sexp_delimiting_entity +/// | numeric_entity +/// | operator +/// ; +fn take_sexp_keyword_delimiting_entity( + table: Table, +) -> impl Fn(&str) -> IonResult<&str, ion::Data> { + move |i: &str| { + alt(( + take_delimiting_entity(table.clone()), + take_numeric_entity, + map(take_operator(table.clone()), |s| ion::Data::Symbol(Some(s))), + ))(i) + } +} + +/// sexp_null_delimiting_entity +/// : delimiting_entity +/// | NON_DOT_OPERATOR+ +/// ; +fn take_sexp_null_delimiting_entity(table: Table) -> impl Fn(&str) -> IonResult<&str, ion::Data> { + move |i: &str| { + alt(( + take_delimiting_entity(table.clone()), + map(take_while1(is_non_dot_operator), |s: &str| { + ion::Data::Symbol(Some(SymbolToken::Known { + text: s.to_string(), + })) + }), + ))(i) + } +} + +fn take_sexp_keyword(i: &str) -> IonResult<&str, ion::Data> { + alt(( + take_typed_null, + value(ion::Data::Null, terminated(take_null, peek(not(char('.'))))), + map(take_bool, |b| ion::Data::Bool(Some(b))), + map(take_special_float, |f| ion::Data::Float(Some(f))), + take_sexp_type, + ))(i) +} + +fn take_sexp_type(i: &str) -> IonResult<&str, ion::Data> { + let (i, null_type) = take_type(i)?; + if null_type == NullType::Null { + peek(not(char('.')))(i)?; + } + Ok(( + i, + ion::Data::Symbol(Some(SymbolToken::Known { + text: null_type.as_str().to_string(), + })), + )) +} + +/// sexp_keyword_entity +/// : typed_null +/// | BOOL +/// | SPECIAL_FLOAT +/// | IDENTIFIER_SYMBOL +/// // note that this is because we recognize the type names for null +/// // they are ordinary symbols on their own +/// | TYPE +/// ; +fn take_sexp_keyword_entity(table: Table) -> impl Fn(&str) -> IonResult<&str, ion::Data> { + move |i: &str| { + let result = map( + verify(take_identifier_symbol, |s| { + not(all_consuming(take_sexp_keyword))(s).is_ok() + }), + |s| get_or_make_symbol(table.clone(), s.to_owned()), + )(i); + + match result { + Err(_) => (), + Ok((i, Ok(token))) => return Ok((i, ion::Data::Symbol(Some(token)))), + Ok((i, Err(e))) => return Err(Err::Failure(IonError::from_symbol_error(i, e))), + } + + take_sexp_keyword(i) + } +} + +/// operator +/// : (DOT | NON_DOT_OPERATOR)+ +/// ; +fn take_operator(_table: Table) -> impl Fn(&str) -> IonResult<&str, SymbolToken> { + move |i: &str| { + map( + map( + take_while1(|c| is_non_dot_operator(c) || c == DOT), + String::from, + ), + |text| SymbolToken::Known { text }, + )(i) + } +} + +/// Encoding: struct +/// struct +/// : L_CURLY ws* field (ws* COMMA ws* field)* ws* (COMMA ws*)? R_CURLY +/// | L_CURLY ws* R_CURLY +/// ; +fn take_struct(table: Table) -> impl Fn(&str) -> IonResult<&str, ion::Struct> { + move |i: &str| { + map( + preceded( + terminated(char(L_CURLY), eat_opt_ws), + cut(alt(( + value(vec![], char(R_CURLY)), + terminated( + verify( + separated_list( + pair(char(','), eat_opt_ws), + terminated(take_field(table.clone()), eat_opt_ws), + ), + |list: &[_]| !list.is_empty(), + ), + preceded(opt(pair(char(','), eat_opt_ws)), char(R_CURLY)), + ), + ))), + ), + |fields| ion::Struct { fields }, + )(i) + } +} + +/// field +/// : field_name ws* COLON ws* annotation* entity +/// ; +fn take_field(table: Table) -> impl Fn(&str) -> IonResult<&str, (SymbolToken, ion::Value)> { + move |i: &str| { + map( + separated_pair( + take_field_name(table.clone()), + tuple((eat_opt_ws, char(COLON), eat_opt_ws)), + pair( + many0(map(take_annotation(table.clone()), Some)), + take_entity(table.clone()), + ), + ), + |(field, (annotations, value))| (field, ion::Value { value, annotations }), + )(i) + } +} + +/// any_null +/// : NULL +/// | typed_null +/// ; +fn take_any_null(i: &str) -> IonResult<&str, ion::Data> { + alt((take_typed_null, value(ion::Data::Null, take_null)))(i) +} + +/// typed_null +/// : NULL DOT NULL +/// | NULL DOT TYPE +/// ; +fn take_typed_null(i: &str) -> IonResult<&str, ion::Data> { + let (i, null_type) = preceded(take_null, preceded(char(DOT), take_type))(i)?; + + let data = { + match null_type { + NullType::Null => ion::Data::Null, + NullType::Bool => ion::Data::Bool(None), + NullType::Int => ion::Data::Int(None), + NullType::Float => ion::Data::Float(None), + NullType::Decimal => ion::Data::Decimal(None), + NullType::Timestamp => ion::Data::Timestamp(None), + NullType::String => ion::Data::String(None), + NullType::Symbol => ion::Data::Symbol(None), + NullType::Blob => ion::Data::Blob(None), + NullType::Clob => ion::Data::Clob(None), + NullType::Struct => ion::Data::Struct(None), + NullType::List => ion::Data::List(None), + NullType::Sexp => ion::Data::Sexp(None), + } + }; + + Ok((i, data)) +} + +/// field_name +/// : symbol +/// | SHORT_QUOTED_STRING +/// | (ws* LONG_QUOTED_STRING)+ +/// ; +fn take_field_name(table: Table) -> impl Fn(&str) -> IonResult<&str, SymbolToken> { + move |i: &str| { + alt(( + take_symbol(table.clone()), + map(take_short_quoted_string, |text| SymbolToken::Known { text }), + map(many1(preceded(eat_opt_ws, take_long_quoted_string)), |v| { + SymbolToken::Known { text: v.concat() } + }), + ))(i) + } +} + +/// quoted_text +/// : QUOTED_SYMBOL +/// | SHORT_QUOTED_STRING +/// | (ws* LONG_QUOTED_STRING)+ +/// ; +fn take_quoted_text(i: &str) -> IonResult<&str, ion::Data> { + alt(( + map(take_quoted_symbol, |text| { + ion::Data::Symbol(Some(SymbolToken::Known { text })) + }), + map(take_short_quoted_string, |s| ion::Data::String(Some(s))), + map( + map(many1(preceded(eat_opt_ws, take_long_quoted_string)), |v| { + v.concat() + }), + |s| ion::Data::String(Some(s)), + ), + ))(i) +} + +fn get_or_make_symbol(table: Table, text: String) -> Result { + if let Ok((_, s)) = take_sid(&text) { + let sid = s + .parse::() + .map_err(|_| SymbolError::SidTooLarge(s.to_string()))?; + match table.borrow().lookup_sid(sid) { + Ok(token) => Ok(token), + Err(e) => Err(e), + } + } else { + make_symbol(table, text) + } +} + +fn make_symbol(table: Table, text: String) -> Result { + let token = SymbolToken::Known { text }; + table.borrow_mut().add_symbol(&token); + Ok(token) +} + +/// Note: IDENTIFIER_SYMBOL covers all cases provided by TYPE. +/// +/// symbol +/// : IDENTIFIER_SYMBOL +/// // note that this is because we recognize the type names for null +/// // they are ordinary symbols on their own +/// | TYPE +/// | QUOTED_SYMBOL +/// ; +fn take_symbol(table: Table) -> impl Fn(&str) -> IonResult<&str, SymbolToken> { + move |i: &str| { + let (i, result) = alt(( + map(take_identifier_symbol, |s| { + get_or_make_symbol(table.clone(), s.to_string()) + }), + map(take_quoted_symbol, |s| make_symbol(table.clone(), s)), + ))(i)?; + + match result { + Ok(token) => Ok((i, token)), + Err(e) => Err(Err::Failure(IonError::from_symbol_error(i, e))), + } + } +} + +fn take_sid(i: &str) -> IonResult<&str, &str> { + preceded(char('$'), take_while1(is_dec_digit))(i) +} + +fn eat_opt_ws(i: &str) -> IonResult<&str, &str> { + recognize(many0(ws))(i) +} + +/// ws +/// : WHITESPACE +/// | INLINE_COMMENT +/// | BLOCK_COMMENT +/// ; +fn ws(i: &str) -> IonResult<&str, &str> { + alt((whitespace, take_inline_comment, take_block_comment))(i) +} + +/// +/// Encoding Section: Ion Punctuation +/// + +/// L_BRACKET : '['; +const L_BRACKET: char = '['; + +/// R_BRACKET : ']'; +const R_BRACKET: char = ']'; + +/// L_PAREN : '('; +const L_PAREN: char = '('; + +/// R_PAREN : ')'; +const R_PAREN: char = ')'; + +/// L_CURLY : '{'; +const L_CURLY: char = '{'; + +/// R_CURLY : '}'; +const R_CURLY: char = '}'; + +/// COMMA : ','; +const COMMA: char = ','; + +/// COLON : ':'; +const COLON: char = ':'; + +/// DOT : '.'; +const DOT: char = '.'; + +/// Note: in an ANTLR v4 char set (such as the one found below) the - is a special character and +/// requires the preceding \ as an escape. It is not a valid operator character. +/// +/// NON_DOT_OPERATOR +/// : [!#%&*+\-/;<=>?@^`|~] +/// ; +fn is_non_dot_operator(c: char) -> bool { + [ + '!', '#', '%', '&', '*', '+', '-', '/', ';', '<', '=', '>', '?', '@', '^', '`', '|', '~', + ] + .contains(&c) +} + +/// +/// Encoding Section: Ion Whitespace / Comments +/// + +/// Consumes whitespace (not comments). Used in LOBs. +fn eat_opt_whitespace(i: Input) -> IonResult +where + Input: Clone + + PartialEq + + InputIter + + InputTake + + InputLength + + InputTakeAtPosition + + Slice> + + Offset, + ::Item: AsChar, +{ + recognize(many0(whitespace))(i) +} + +/// WHITESPACE +/// : WS+ +/// ; +fn whitespace(i: Input) -> IonResult +where + Input: Clone + InputIter + InputTake + InputLength + InputTakeAtPosition, + ::Item: AsChar, +{ + take_while1(is_ws)(i) +} + +/// INLINE_COMMENT +/// : '//' .*? (NL | EOF) +/// ; +fn take_inline_comment(i: &str) -> IonResult<&str, &str> { + preceded( + tag("//"), + terminated(take_till(|c| c == '\r' || c == '\n'), alt((nl, eof))), + )(i) +} + +/// BLOCK_COMMENT +/// : '/*' .*? '*/' +/// ; +fn take_block_comment(i: &str) -> IonResult<&str, &str> { + preceded(tag("/*"), cut(terminated(take_until("*/"), tag("*/"))))(i) +} + +/// +/// Encoding Section: Ion Null +/// + +#[derive(Copy, Clone, Debug, PartialEq)] +enum NullType { + Null, + Bool, + Int, + Float, + Decimal, + Timestamp, + String, + Symbol, + Blob, + Clob, + Struct, + List, + Sexp, +} + +impl NullType { + fn as_str(&self) -> &str { + match self { + NullType::Null => "null", + NullType::Bool => "bool", + NullType::Int => "int", + NullType::Float => "float", + NullType::Decimal => "decimal", + NullType::Timestamp => "timestamp", + NullType::String => "string", + NullType::Symbol => "symbol", + NullType::Blob => "blob", + NullType::Clob => "clob", + NullType::Struct => "struct", + NullType::List => "list", + NullType::Sexp => "sexp", + } + } +} + +/// NULL +/// : 'null' +/// ; +fn take_null(i: &str) -> IonResult<&str, &str> { + tag("null")(i) +} + +/// Note: the encoding spec doesn't include null here, it was simpler to do so. +/// TYPE +/// : 'bool' +/// | 'int' +/// | 'float' +/// | 'decimal' +/// | 'timestamp' +/// | 'symbol' +/// | 'string' +/// | 'clob' +/// | 'blob' +/// | 'list' +/// | 'sexp' +/// | 'struct' +/// ; +fn take_type(i: &str) -> IonResult<&str, NullType> { + alt(( + value(NullType::Null, tag("null")), + value(NullType::Bool, tag("bool")), + value(NullType::Int, tag("int")), + value(NullType::Float, tag("float")), + value(NullType::Decimal, tag("decimal")), + value(NullType::Timestamp, tag("timestamp")), + value(NullType::String, tag("string")), + value(NullType::Symbol, tag("symbol")), + value(NullType::Blob, tag("blob")), + value(NullType::Clob, tag("clob")), + value(NullType::Struct, tag("struct")), + value(NullType::List, tag("list")), + value(NullType::Sexp, tag("sexp")), + ))(i) +} + +/// +/// Encoding Section: Ion Bool +/// + +/// Encoding: BOOL +/// BOOL +/// : 'true' +/// | 'false' +/// ; +fn take_bool(i: &str) -> IonResult<&str, bool> { + alt((value(true, tag("true")), value(false, tag("false"))))(i) +} + +/// +/// Encoding Section: Ion TextTimestamp +/// + +/// Encoding: fragment TIMESTAMP +/// TIMESTAMP +/// : DATE ('T' TIME?)? +/// | YEAR '-' MONTH 'T' +/// | YEAR 'T' +/// ; +fn take_timestamp(i: &str) -> IonResult<&str, ion::Timestamp> { + let (i, timestamp) = map_res( + alt(( + map_res( + pair(take_date, opt(preceded(one_of("tT"), opt(take_time)))), + |((year, month, day), maybe_time)| match TextDate::day(year as u16, month, day) { + Ok(date) => match maybe_time { + Some(Some((time, offset))) => { + Ok(TextTimestamp::new(date, Some(time), offset)) + } + _ => Ok(time::TextTimestamp::new(date, None, UtcOffset::UTC)), + }, + Err(e) => Err(e), + }, + ), + // Timestamps which consist of only a year or year and month must be terminated by t or T. + map( + terminated( + separated_pair(take_year, char('-'), take_month), + one_of("tT"), + ), + |(year, month)| { + TextTimestamp::new(TextDate::month(year as u16, month), None, UtcOffset::UTC) + }, + ), + map(terminated(take_year, one_of("tT")), |year| { + TextTimestamp::new(TextDate::year(year as u16), None, UtcOffset::UTC) + }), + )), + ion::Timestamp::try_from, + )(i)?; + + Ok((i, timestamp)) +} + +/// fragment +/// DATE +/// : YEAR '-' MONTH '-' DAY +/// ; +fn take_date(i: &str) -> IonResult<&str, (i32, u8, u8)> { + let (i, (y, (m, d))) = pair( + take_year, + pair( + preceded(char('-'), take_month), + preceded(char('-'), take_day), + ), + )(i)?; + + Ok((i, (y, m, d))) +} + +/// fragment +/// YEAR +/// : '000' [1-9] +/// | '00' [1-9] DEC_DIGIT +/// | '0' [1-9] DEC_DIGIT DEC_DIGIT +/// | [1-9] DEC_DIGIT DEC_DIGIT DEC_DIGIT +/// ; +fn take_year(i: &str) -> IonResult<&str, i32> { + map( + alt(( + recognize(pair(tag("000"), one_of("123456789"))), + recognize(tuple(( + tag("00"), + one_of("123456789"), + one_if(is_dec_digit), + ))), + recognize(tuple(( + tag("0"), + one_of("123456789"), + take_while_m_n(2, 2, is_dec_digit), + ))), + recognize(tuple(( + one_of("123456789"), + take_while_m_n(3, 3, is_dec_digit), + ))), + )), + |s: &str| { + s.parse::() + .expect("the parser ensures it will be within range") + }, + )(i) +} + +/// fragment +/// MONTH +/// : '0' [1-9] +/// | '1' [0-2] +/// ; +fn take_month(i: &str) -> IonResult<&str, u8> { + map( + alt(( + recognize(pair(char('0'), one_of("123456789"))), + recognize(pair(char('1'), one_of("012"))), + )), + |s: &str| { + s.parse::() + .expect("the parser ensures it will be within range") + }, + )(i) +} + +/// fragment +/// DAY +/// : '0' [1-9] +/// | [1-2] DEC_DIGIT +/// | '3' [0-1] +/// ; +fn take_day(i: &str) -> IonResult<&str, u8> { + map( + alt(( + recognize(pair(char('0'), one_of("123456789"))), + recognize(pair(one_of("12"), one_if(is_dec_digit))), + recognize(pair(char('3'), one_of("01"))), + )), + |s: &str| { + s.parse::() + .expect("the parser ensures it will be within range") + }, + )(i) +} + +fn take_hour_and_minute(i: &str) -> IonResult<&str, (u8, u8)> { + separated_pair(take_hour, char(COLON), take_minute)(i) +} + +fn assemble_time_hm(hour: u8, minute: u8) -> TextTime { + TextTime::Minute { hour, minute } +} + +fn assemble_time_hms( + hour: u8, + minute: u8, + second: u8, + maybe_fraction: Option<(BigUint, i32)>, +) -> TextTime { + match maybe_fraction { + Some((fraction_coefficient, fraction_exponent)) => TextTime::FractionalSecond { + hour, + minute, + second, + fraction_coefficient, + fraction_exponent, + }, + None => TextTime::Second { + hour, + minute, + second, + }, + } +} + +/// fragment +/// TIME +/// : HOUR ':' MINUTE (':' SECOND)? OFFSET +/// ; +fn take_time(i: &str) -> IonResult<&str, (TextTime, UtcOffset)> { + let (i, ((hour, minute), second, offset)) = tuple(( + take_hour_and_minute, + opt(preceded(char(COLON), take_second)), + take_offset, + ))(i)?; + + let time = match second { + Some((second, fraction)) => assemble_time_hms(hour, minute, second, fraction), + None => assemble_time_hm(hour, minute), + }; + + Ok((i, (time, offset))) +} + +/// fragment +/// OFFSET +/// : 'Z' +/// | PLUS_OR_MINUS HOUR ':' MINUTE +/// ; +fn take_offset(i: &str) -> IonResult<&str, UtcOffset> { + alt(( + map(char('Z'), |_| UtcOffset::UTC), + map( + pair(take_plus_or_minus, take_hour_and_minute), + |(sign, (hour, minutes))| { + let minutes: i16 = ((hour as i16) * 60) + (minutes as i16); + let signed_minutes = if sign == '-' { -minutes } else { minutes }; + UtcOffset::minutes(signed_minutes) + }, + ), + ))(i) +} + +/// fragment +/// HOUR +/// : [01] DEC_DIGIT +/// | '2' [0-3] +/// ; +fn take_hour(i: &str) -> IonResult<&str, u8> { + map( + alt(( + recognize(pair(one_of("01"), one_if(is_dec_digit))), + recognize(pair(char('2'), one_of("0123"))), + )), + |s: &str| { + s.parse::() + .expect("parser verified hour should be valid u8") + }, + )(i) +} + +/// fragment +/// MINUTE +/// : [0-5] DEC_DIGIT +/// ; +fn take_minute(i: &str) -> IonResult<&str, u8> { + map( + recognize(pair(one_of("012345"), one_if(is_dec_digit))), + |s: &str| { + s.parse::() + .expect("parser verified minute should be valid u32") + }, + )(i) +} + +type FractionalSecond = (BigUint, i32); + +/// note that W3C spec requires a digit after the '.' +/// fragment +/// SECOND +/// : [0-5] DEC_DIGIT ('.' DEC_DIGIT+)? +/// ; +fn take_second(i: &str) -> IonResult<&str, (u8, Option)> { + let (i, seconds) = recognize(pair(one_of("012345"), one_if(is_dec_digit)))(i)?; + let (i, seconds_decimal) = opt(preceded(char('.'), take_while1(is_dec_digit)))(i)?; + let seconds = seconds + .parse::() + .expect("parser verified seconds should be valid u8"); + if let Some(decimal) = seconds_decimal { + let fraction_exponent = -(decimal.len() as i32); + let fraction_coefficient = str_to_biguint(decimal, 10) + .map_err(|e| Err::Failure(IonError::from_format_error(i, e)))?; + Ok(( + i, + (seconds, Some((fraction_coefficient, fraction_exponent))), + )) + } else { + Ok((i, (seconds, None))) + } +} + +/// +/// Encoding Section: Ion Int +/// + +/// Helper for turning &str-ish values into BigInts. +fn str_to_biguint>(digits: T, radix: u32) -> Result { + match BigUint::from_str_radix(digits.as_ref(), radix) { + Ok(biguint) => Ok(biguint), + Err(_) => Err(FormatError::Text(TextFormatError::BigUint( + digits.as_ref().to_string(), + ))), + } +} + +/// Helper for turning &str-ish values into BigInts. +fn str_to_bigint>(digits: T, radix: u32) -> Result { + match BigInt::from_str_radix(digits.as_ref(), radix) { + Ok(bigint) => Ok(bigint), + Err(_) => Err(FormatError::Text(TextFormatError::BigInt( + digits.as_ref().to_string(), + ))), + } +} + +/// Helper for turning Vec<&str>s into BigInts. +fn str_vec_to_bigint(vec: Vec<&str>, radix: u32) -> Result { + let digits: String = vec.concat(); + Ok(str_to_bigint(digits, radix)?) +} + +/// BIN_INTEGER +/// : '-'? '0' [bB] BINARY_DIGIT (UNDERSCORE? BINARY_DIGIT)* +/// ; +fn take_bin_integer(i: &str) -> IonResult<&str, BigInt> { + let (i, negate) = opt(char('-'))(i)?; + let (i, segments) = preceded( + tag_no_case("0b"), + separated_nonempty_list(char(UNDERSCORE), take_while1(is_binary_digit)), + )(i)?; + + let mut number = + String::with_capacity(segments.iter().fold(0, |sum, s| sum + s.chars().count()) + 1); + + if let Some(negate) = negate { + number.push(negate) + } + + segments.into_iter().for_each(|s| number.push_str(s)); + + let integer = + str_to_bigint(number, 2).map_err(|e| Err::Failure(IonError::from_format_error(i, e)))?; + + Ok((i, integer)) +} + +/// Encoding: DEC_INTEGER +/// DEC_INTEGER +/// : '-'? DEC_UNSIGNED_INTEGER +/// ; +fn take_dec_integer(i: &str) -> IonResult<&str, String> { + let (i, (negate, segments)) = pair(opt(char('-')), take_dec_unsigned_integer)(i)?; + + let negated_char = if negate.is_some() { 1 } else { 0 }; + let mut number = String::with_capacity( + segments.iter().fold(0, |sum, s| sum + s.chars().count()) + negated_char, + ); + + if let Some(negate) = negate { + number.push(negate) + } + + segments.iter().for_each(|s| number.push_str(s)); + + Ok((i, number)) +} + +/// HEX_INTEGER +/// : '-'? '0' [xX] HEX_DIGIT (UNDERSCORE? HEX_DIGIT)* +/// ; +fn take_hex_integer(i: &str) -> IonResult<&str, BigInt> { + let (i, negate) = opt(char('-'))(i)?; + let (i, segments) = preceded( + tag_no_case("0x"), + separated_nonempty_list(char(UNDERSCORE), take_while1(is_hex_digit)), + )(i)?; + + let mut number = + String::with_capacity(segments.iter().fold(0, |sum, s| sum + s.chars().count()) + 1); + + if let Some(negate) = negate { + number.push(negate) + } + + segments.iter().for_each(|s| number.push_str(s)); + let integer = + str_to_bigint(number, 16).map_err(|e| Err::Failure(IonError::from_format_error(i, e)))?; + + Ok((i, integer)) +} + +/// +/// Encoding Section: Ion Float +/// + +/// SPECIAL_FLOAT +/// : PLUS_OR_MINUS 'inf' +/// | 'nan' +/// ; +fn take_special_float(i: &str) -> IonResult<&str, f64> { + alt(( + value(INFINITY, tag("+inf")), + value(NEG_INFINITY, tag("-inf")), + value(NAN, tag("nan")), + ))(i) +} + +enum FloatOrDecimal { + Float(String), + Decimal(String), +} + +fn take_float_or_decimal(i: &str) -> IonResult<&str, ion::Data> { + let (i, integer) = take_dec_integer(i)?; + let (i, fractional) = opt(take_dec_frac)(i)?; + let (i, exponent) = opt(alt(( + map(take_float_exp, FloatOrDecimal::Float), + map(take_decimal_exp, FloatOrDecimal::Decimal), + )))(i)?; + + // If there is no fractional and no exponent part then this is an integer. + if fractional.is_none() && exponent.is_none() { + return Err(Err::Error(IonError::from_error_kind(i, ErrorKind::Digit))); + } + + let numeric = match exponent { + Some(FloatOrDecimal::Float(exponent)) => { + ion::Data::Float(Some(assemble_float(i, integer, fractional, exponent)?.1)) + } + Some(FloatOrDecimal::Decimal(exponent)) => ion::Data::Decimal(Some( + assemble_decimal(i, integer, fractional, Some(exponent))?.1, + )), + None => ion::Data::Decimal(Some(assemble_decimal(i, integer, fractional, None)?.1)), + }; + + Ok((i, numeric)) +} + +// Floats are tricky. Ion uses IEEE-754 floats. Additionally: +// +// When encoding a decimal real number that is irrational in base-2 or has more precision than can +// be stored in binary64, the exact binary64 value is determined by using the IEEE-754 +// round-to-nearest mode with a round-half-to-even (sic: ties-to-even) as the tie-break. +// +// When attempting to parse the final float in good/floatDblMin.ion with the stdlib Rust f64, we +// fail. This may be due to https://github.com/rust-lang/rust/issues/31407 +// +// The lexical crate solves this problem for us. It defaults to the same documented rounding mode +// and tie breaker. Additionally, the default parse format appears to be a near-perfect match, +// though that can be configured if necessary. + +/// Note: number parsing consolidated to avoid parsing DEC_INTEGER multiple times when deciding +/// the numeric type to apply. +/// +/// FLOAT +/// : DEC_INTEGER DEC_FRAC? FLOAT_EXP +/// ; +fn assemble_float<'a>( + i: &'a str, + integer: String, + fractional: Option>, + exponent: String, +) -> IonResult<&'a str, f64> { + let mut float = integer.as_bytes().to_vec(); + + if let Some(fractional) = fractional { + float.push(b'.'); + fractional.iter().for_each(|s| float.extend(s.as_bytes())); + } + + float.push(b'e'); + float.extend(exponent.as_bytes()); + + match lexical_core::parse(&float) { + Ok(f) => Ok((i, f)), + Err(_) => Err(Err::Failure(IonError::from_format_error( + i, + FormatError::Text(TextFormatError::FloatParse( + String::from_utf8(float).expect("it was already a string."), + )), + ))), + } +} + +/// fragment +/// FLOAT_EXP +/// : [Ee] PLUS_OR_MINUS? DEC_DIGIT+ +/// ; +fn take_float_exp(i: &str) -> IonResult<&str, String> { + take_exp("eE")(i) +} + +fn take_exp(exponent_delimiters: &'static str) -> impl Fn(&str) -> IonResult<&str, String> { + move |i: &str| { + let (i, _) = one_of(exponent_delimiters)(i)?; + let (i, sign) = opt(take_plus_or_minus)(i)?; + let (i, digits) = take_while1(|c: char| c.is_ascii_digit())(i)?; + + let sign_char = if sign.is_some() { 1 } else { 0 }; + + let mut exponent = String::with_capacity(digits.len() + sign_char); + if let Some(sign) = sign { + exponent.push(sign); + } + exponent.push_str(digits); + + Ok((i, exponent)) + } +} + +/// +/// Encoding Section: Ion Decimal +/// + +/// Note: number parsing consolidated to avoid parsing DEC_INTEGER multiple times when deciding +/// the numeric type to apply. +/// +/// DECIMAL +/// : DEC_INTEGER DEC_FRAC? DECIMAL_EXP? +/// ; +fn assemble_decimal<'a>( + i: &'a str, + integer: String, + fractional: Option>, + exponent: Option, +) -> IonResult<&'a str, ion::Decimal> { + // coefficient drops -0 + let sign = if integer.starts_with('-') { + Sign::Minus + } else { + Sign::Plus + }; + let mut coefficient = + str_to_bigint(integer, 10).map_err(|e| Err::Failure(IonError::from_format_error(i, e)))?; + + // If we have a fractional value we have to normalize it so the value is an integer so + // the values can be represented with the BigInt library via two integers like so: + // coefficient * pow(10, exponent) + // This involves shifting the decimal some number of digits to the right and keeping + // track of the shift for incorporation into any exponent value. + let exponent_shift: usize = if let Some(fractional) = fractional { + let shift_digits = fractional.iter().fold(0, |sum, s| sum + s.chars().count()); + if shift_digits > 0 { + let fractional = str_vec_to_bigint(fractional, 10) + .map_err(|e| Err::Failure(IonError::from_format_error(i, e)))?; + + coefficient *= pow(BigInt::one() * 10, shift_digits); + match sign { + Sign::Plus | Sign::NoSign => coefficient += fractional, + Sign::Minus => coefficient -= fractional, + } + } + + shift_digits + } else { + 0 + }; + + let mut exponent = if let Some(exp_str) = exponent { + str_to_bigint(&exp_str, 10).map_err(|e| Err::Failure(IonError::from_format_error(i, e)))? + } else { + BigInt::zero() + }; + + if exponent_shift > 0 { + exponent -= exponent_shift + } + + Ok(( + i, + ion::Decimal { + coefficient, + exponent, + }, + )) +} + +/// fragment +/// DECIMAL_EXP +/// : [Dd] PLUS_OR_MINUS? DEC_DIGIT+ +/// ; +fn take_decimal_exp(i: &str) -> IonResult<&str, String> { + take_exp("dD")(i) +} + +/// +/// Encoding Section: Ion Symbol +/// + +/// QUOTED_SYMBOL +/// : SYMBOL_QUOTE SYMBOL_TEXT SYMBOL_QUOTE +/// ; +fn take_quoted_symbol(i: &str) -> IonResult<&str, String> { + preceded( + not(tag(LONG_QUOTE)), + take_delimited_input(SYMBOL_QUOTE, |i| { + escaped_transform( + take_symbol_text, + COMMON_ESCAPE, + map(take_text_escape, |esc| esc.into_utf_8_escape()), + )(i) + }), + )(i) +} + +/// Note: escape parsing is handled in take_quoted_symbol. +/// fragment +/// SYMBOL_TEXT +/// : (TEXT_ESCAPE | SYMBOL_TEXT_ALLOWED)* +/// ; +fn take_symbol_text(i: &str) -> IonResult<&str, &str> { + take_while1(is_symbol_text_allowed)(i) +} + +/// non-control Unicode and not single quote or backslash +/// fragment +/// SYMBOL_TEXT_ALLOWED +/// : '\u0020'..'\u0026' // no C1 control characters and no U+0027 single quote +/// | '\u0028'..'\u005B' // no U+005C backslash +/// | '\u005D'..'\uFFFF' // should be up to U+10FFFF +/// | WS_NOT_NL +/// ; +fn is_symbol_text_allowed(c: char) -> bool { + let scalar_value: u32 = c.into(); + + is_ws_not_nl(c) || (scalar_value >= 0x20 && c != '\'' && c != '\\') +} + +/// Identifies Ion values which should not be allowed to be keywords. +fn not_keyword(i: &str) -> bool { + let result: IonResult<&str, ()> = not(all_consuming(alt(( + tag("null"), + tag("nan"), + tag("false"), + tag("true"), + ))))(i); + + result.is_ok() +} + +/// Certain keywords are excluded from being identifier symbols as they need to be translated +/// into other Ion values. +/// +/// IDENTIFIER_SYMBOL +/// : [$_a-zA-Z] ([$_a-zA-Z] | DEC_DIGIT)* +/// ; +fn take_identifier_symbol(i: &str) -> IonResult<&str, &str> { + verify( + recognize(pair( + one_if(|c: char| c.is_ascii_alphabetic() || c == '_' || c == '$'), + take_while(|c: char| { + c.is_ascii_alphabetic() || c.is_ascii_digit() || c == '_' || c == '$' + }), + )), + not_keyword, + )(i) +} + +/// Parses a delimiter, then the input body, then another delimiter. +/// Any error past the first delimiter is considered a failure. +fn take_delimited_input( + delimiter: &'static str, + body_parser: F, +) -> impl Fn(Input) -> IonResult +where + Input: Clone + Compare<&'static str> + InputTake + Slice> + InputLength, + Input: ExtendInto, + Output: Extend<::Item>, + F: Fn(Input) -> IonResult, +{ + move |i: Input| { + map( + preceded( + tag(delimiter), + cut(terminated(opt(&body_parser), tag(delimiter))), + ), + |s| s.unwrap_or_else(|| i.new_builder()), + )(i.clone()) + } +} + +/// Checks for a long quote at each char while consuming allowed text. +/// Helper for STRING_LONG_TEXT_ALLOWED and CLOB_LONG_TEXT +fn take_long_quoted_string_text_allowed( + is_allowed_char: A, +) -> impl Fn(Input) -> IonResult +where + Input: Clone + + Compare<&'static str> + + InputTake + + Slice> + + Slice> + + InputLength + + InputIter + + ExtendInto, + ::Item: Clone + AsChar, + Output: Extend<::Item>, + A: Fn(::Item) -> bool, +{ + move |i: Input| { + for (idx, c) in i.iter_indices() { + if let (_, Some(_)) = opt(peek(tag(LONG_QUOTE)))(i.slice(idx..))? { + return if idx == 0 { + Err(Err::Error(IonError::from_error_kind( + i, + ErrorKind::EscapedTransform, + ))) + } else { + Ok((i.slice(idx..), i.slice(..idx))) + }; + }; + + if !is_allowed_char(c.clone()) && c.as_char() != '\'' { + return if idx == 0 { + Err(Err::Error(IonError::from_error_kind( + i, + ErrorKind::EscapedTransform, + ))) + } else { + Ok((i.slice(idx..), i.slice(..idx))) + }; + } + } + Err(Err::Failure(IonError::from_format_error( + i, + FormatError::Text(TextFormatError::OpenLongString), + ))) + } +} + +/// +/// Encoding Section: Ion String +/// + +/// SHORT_QUOTED_STRING +/// : SHORT_QUOTE STRING_SHORT_TEXT SHORT_QUOTE +/// ; +fn take_short_quoted_string(i: &str) -> IonResult<&str, String> { + take_delimited_input("\"", |i| { + escaped_transform( + take_short_quoted_string_segment, + COMMON_ESCAPE, + map(take_text_escape, |esc| esc.into_utf_8_escape()), + )(i) + })(i) +} + +/// LONG_QUOTED_STRING +/// : LONG_QUOTE STRING_LONG_TEXT LONG_QUOTE +/// ; +fn take_long_quoted_string(i: &str) -> IonResult<&str, String> { + take_delimited_input(LONG_QUOTE, |i| { + escaped_transform( + take_long_quoted_string_segment, + COMMON_ESCAPE, + map(take_text_escape, |esc| esc.into_utf_8_escape()), + )(i) + })(i) +} + +/// fragment +/// STRING_SHORT_TEXT +/// : (TEXT_ESCAPE | STRING_SHORT_TEXT_ALLOWED)* +/// ; +fn take_short_quoted_string_segment(i: &str) -> IonResult<&str, &str> { + take_while1(is_string_short_text_allowed)(i) +} + +/// Encoding: STRING_LONG_TEXT +/// fragment +/// STRING_LONG_TEXT +/// : (TEXT_ESCAPE | STRING_LONG_TEXT_ALLOWED)*? +/// ; +fn take_long_quoted_string_segment(i: &str) -> IonResult<&str, &str> { + take_long_quoted_string_text_allowed(is_string_long_text_allowed)(i) +} + +/// non-control Unicode and not double quote or backslash +/// fragment +/// STRING_SHORT_TEXT_ALLOWED +/// : '\u0020'..'\u0021' // no C1 control characters and no U+0022 double quote +/// | '\u0023'..'\u005B' // no U+005C backslash +/// | '\u005D'..'\uFFFF' // should be up to U+10FFFF +/// | WS_NOT_NL +/// ; +fn is_string_short_text_allowed(c: char) -> bool { + let scalar_value: u32 = c.into(); + + is_ws_not_nl(c) || (scalar_value >= 0x20 && c != '"' && c != '\\') +} + +/// non-control Unicode (newlines are OK) +/// fragment +/// STRING_LONG_TEXT_ALLOWED +/// : '\u0020'..'\u005B' // no C1 control characters and no U+005C blackslash +/// | '\u005D'..'\uFFFF' // should be up to U+10FFFF +/// | WS +/// ; +fn is_string_long_text_allowed(c: char) -> bool { + let scalar_value: u32 = c.into(); + + is_ws(c) || (scalar_value >= 0x20 && c != '\\') +} + +/// fragment +/// TEXT_ESCAPE +/// : COMMON_ESCAPE | HEX_ESCAPE | UNICODE_ESCAPE +/// ; +fn take_text_escape(i: &str) -> IonResult<&str, Escape> { + cut(alt(( + take_common_escape_code, + take_hex_escape, + take_unicode_escape, + )))(i) +} + +/// +/// Encoding Section: Ion CLOB +/// + +/// Note: quoting lowered to take_clob_short_text via take_delimited_input. +/// SHORT_QUOTED_CLOB +/// : LOB_START WS* SHORT_QUOTE CLOB_SHORT_TEXT SHORT_QUOTE WS* LOB_END +/// ; +fn take_short_quoted_clob(i: &[u8]) -> IonResult<&[u8], ion::Clob> { + map(take_clob_short_text, |data| ion::Clob { data })(i) +} + +/// Note: quoting lowered to take_clob_short_text via take_delimited_input. +/// LONG_QUOTED_CLOB +/// : LOB_START (WS* LONG_QUOTE CLOB_LONG_TEXT*? LONG_QUOTE)+ WS* LOB_END +/// ; +fn take_long_quoted_clob(i: &[u8]) -> IonResult<&[u8], ion::Clob> { + let (i, vec) = many1(preceded(eat_opt_whitespace, take_clob_long_text))(i)?; + let data = vec.concat(); + Ok((i, ion::Clob { data })) +} + +/// fragment +/// CLOB_SHORT_TEXT +/// : (CLOB_ESCAPE | CLOB_SHORT_TEXT_ALLOWED)* +/// ; +fn take_clob_short_text(i: &[u8]) -> IonResult<&[u8], Vec> { + take_delimited_input(SHORT_QUOTE, |i| { + escaped_transform( + take_clob_short_text_allowed, + COMMON_ESCAPE, + map(take_clob_escape, |esc| esc.into_ascii_escape()), + )(i) + })(i) +} + +/// The quoting strategy is taken care of by a long quoted string helper. +/// +/// Encoding: CLOB_LONG_TEXT +/// fragment +/// CLOB_LONG_TEXT +/// : CLOB_LONG_TEXT_NO_QUOTE +/// | '\'' CLOB_LONG_TEXT_NO_QUOTE +/// | '\'\'' CLOB_LONG_TEXT_NO_QUOTE +/// ; +fn take_clob_long_text(i: &[u8]) -> IonResult<&[u8], Vec> { + take_delimited_input(LONG_QUOTE, |i| { + escaped_transform( + take_clob_long_text_allowed, + COMMON_ESCAPE, + map(take_clob_escape, |esc| esc.into_ascii_escape()), + )(i) + })(i) +} + +/// Omitted - using a shared long quoted string parser which can be specialized for bodies. +/// See take_long_quoted_string_text_allowed +/// +/// fragment +/// CLOB_LONG_TEXT_NO_QUOTE +/// : (CLOB_ESCAPE | CLOB_LONG_TEXT_ALLOWED) +/// ; + +/// Parser of CLOB_SHORT_TEXT_ALLOWED +fn take_clob_short_text_allowed(i: &[u8]) -> IonResult<&[u8], &[u8]> { + take_while1(is_clob_short_text_allowed)(i) +} + +/// non-control ASCII and not double quote or backslash +/// fragment +/// CLOB_SHORT_TEXT_ALLOWED +/// : '\u0020'..'\u0021' // no U+0022 double quote +/// | '\u0023'..'\u005B' // no U+005C backslash +/// | '\u005D'..'\u007F' +/// | WS_NOT_NL +/// ; +fn is_clob_short_text_allowed(b: u8) -> bool { + is_ws_not_nl(b as char) || (b >= 0x20 && b.as_char() != '"' && b.as_char() != '\\' && b <= 0x7f) +} + +/// Parser of CLOB_LONG_TEXT_ALLOWED +fn take_clob_long_text_allowed(i: &[u8]) -> IonResult<&[u8], &[u8]> { + take_long_quoted_string_text_allowed(is_clob_long_text_allowed)(i) +} + +/// non-control ASCII (newlines are OK) +/// fragment +/// CLOB_LONG_TEXT_ALLOWED +/// : '\u0020'..'\u0026' // no U+0027 single quote +/// | '\u0028'..'\u005B' // no U+005C blackslash +/// | '\u005D'..'\u007F' +/// | WS +/// ; +fn is_clob_long_text_allowed(b: u8) -> bool { + // Note: we allow the single quote and explicitly lookahead for the triple quote while iterating. + is_ws(b.as_char()) || (b >= 0x20 && b.as_char() != '\\' && b.as_char() != '\'' && b <= 0x7f) +} + +/// fragment +/// CLOB_ESCAPE +/// : COMMON_ESCAPE | HEX_ESCAPE +/// ; +fn take_clob_escape(i: Input) -> IonResult +where + Input: Clone + + AsBytes + + InputIter + + InputLength + + InputTake + + Compare<&'static str> + + Slice> + + Slice> + + Slice>, + ::Item: AsChar, +{ + cut(alt((take_common_escape_code, take_hex_escape)))(i) +} + +/// +/// Encoding Section: Ion BLOB +/// + +/// LOB_START * LOB_END is handled by take_lob above which can be +/// specialized with a body parser. +/// +/// BLOB +/// : LOB_START (BASE_64_QUARTET | WS)* BASE_64_PAD? WS* LOB_END +/// ; +fn take_blob_body(i: &[u8]) -> IonResult<&[u8], ion::Blob> { + let (i, vec) = take_base_64(i)?; + match base64::decode(&vec) { + Ok(data) => Ok((i, ion::Blob { data })), + Err(_) => Err(Err::Failure(IonError::from_format_error( + i, + FormatError::Text(TextFormatError::Base64Decode), + ))), + } +} + +// Note: it is allowed for this body to be empty. This is the default "lob" type. +fn take_base_64(i: &[u8]) -> IonResult<&[u8], Vec> { + let (i, (quartets, pad)) = pair( + many0(delimited( + eat_opt_whitespace, + take_base_64_quartet, + eat_opt_whitespace, + )), + terminated(opt(take_base_64_pad), eat_opt_whitespace), + )(i)?; + + let mut bytes = quartets.iter().fold(0, |acc, _| acc + 4); + + if pad.is_some() { + bytes += 4; + } + + let mut base_64_bytes = + quartets + .iter() + .fold(Vec::with_capacity(bytes), |mut s, (s1, s2, s3, s4)| { + s.extend_from_slice(&[*s1, *s2, *s3, *s4]); + s + }); + + if let Some((s1, s2, s3)) = pad { + base_64_bytes.extend_from_slice(&[s1, s2]); + if let Some(s3) = s3 { + base_64_bytes.push(s3); + } + } + + Ok((i, base_64_bytes)) +} + +/// fragment +/// BASE_64_PAD +/// : BASE_64_PAD1 +/// | BASE_64_PAD2 +/// ; +fn take_base_64_pad(i: &[u8]) -> IonResult<&[u8], (u8, u8, Option)> { + if let Ok((i, (s1, s2, s3))) = take_base_64_pad1(i) { + return Ok((i, (s1, s2, Some(s3)))); + } + let (i, (s1, s2)) = take_base_64_pad2(i)?; + Ok((i, (s1, s2, None))) +} + +/// fragment +/// BASE_64_QUARTET +/// : BASE_64_CHAR WS* BASE_64_CHAR WS* BASE_64_CHAR WS* BASE_64_CHAR +/// ; +fn take_base_64_quartet(i: &[u8]) -> IonResult<&[u8], (u8, u8, u8, u8)> { + let (i, ((s1, s2), (s3, s4))) = separated_pair( + separated_pair(take_base_64_char, eat_opt_whitespace, take_base_64_char), + eat_opt_whitespace, + separated_pair(take_base_64_char, eat_opt_whitespace, take_base_64_char), + )(i)?; + Ok((i, (s1, s2, s3, s4))) +} + +/// fragment +/// BASE_64_PAD1 +/// : BASE_64_CHAR WS* BASE_64_CHAR WS* BASE_64_CHAR WS* '=' +/// ; +fn take_base_64_pad1(i: &[u8]) -> IonResult<&[u8], (u8, u8, u8)> { + let (i, ((s1, s2), (s3, _))) = separated_pair( + separated_pair(take_base_64_char, eat_opt_whitespace, take_base_64_char), + eat_opt_whitespace, + separated_pair(take_base_64_char, eat_opt_whitespace, tag(b"=")), + )(i)?; + Ok((i, (s1, s2, s3))) +} + +/// fragment +/// BASE_64_PAD2 +/// : BASE_64_CHAR WS* BASE_64_CHAR WS* '=' WS* '=' +/// ; +fn take_base_64_pad2(i: &[u8]) -> IonResult<&[u8], (u8, u8)> { + let (i, ((s1, s2), _)) = separated_pair( + separated_pair(take_base_64_char, eat_opt_whitespace, take_base_64_char), + eat_opt_whitespace, + separated_pair(tag(b"="), eat_opt_whitespace, tag(b"=")), + )(i)?; + Ok((i, (s1, s2))) +} + +/// fragment +/// BASE_64_CHAR +/// : [0-9a-zA-Z+/] +/// ; +fn take_base_64_char(i: &[u8]) -> IonResult<&[u8], u8> { + one_if(is_base_64_char)(i) +} + +fn is_base_64_char(b: u8) -> bool { + (b as char).is_ascii_alphanumeric() || b == b'+' || b == b'/' +} + +/// +/// Encoding Section: Common Lexer Primitives +/// + +/// fragment LOB_START : '{{'; +const LOB_START: &str = "{{"; + +/// fragment LOB_END : '}}'; +const LOB_END: &str = "}}"; + +/// fragment SYMBOL_QUOTE : '\''; +const SYMBOL_QUOTE: &str = "\'"; + +/// fragment SHORT_QUOTE : '"'; +const SHORT_QUOTE: &str = "\""; + +/// fragment LONG_QUOTE : '\'\'\''; +const LONG_QUOTE: &str = "'''"; + +/// fragment +/// DEC_UNSIGNED_INTEGER +/// : '0' +/// | [1-9] (UNDERSCORE? DEC_DIGIT)* +/// ; +fn take_dec_unsigned_integer(i: &str) -> IonResult<&str, Vec<&str>> { + // Ion does not allow leading zeroes, hence the split here. + let (i, first) = take_while_m_n(1, 1, |c: char| c.is_ascii_digit())(i)?; + let mut digits = vec![first]; + + if first == "0" { + return Ok((i, digits)); + } + + let (i, rest) = many0(preceded( + opt(char(UNDERSCORE)), + take_while1(|c: char| c.is_ascii_digit()), + ))(i)?; + + digits.extend_from_slice(&rest); + + Ok((i, digits)) +} + +/// fragment +/// DEC_FRAC +/// : '.' +/// | '.' DEC_DIGIT (UNDERSCORE? DEC_DIGIT)* +/// ; +fn take_dec_frac(i: &str) -> IonResult<&str, Vec<&str>> { + preceded( + char('.'), + separated_list(char(UNDERSCORE), take_while1(|c: char| c.is_ascii_digit())), + )(i) +} + +/// fragment +/// DEC_DIGIT +/// : [0-9] +/// ; +fn is_dec_digit(c: char) -> bool { + c.is_ascii_digit() +} + +/// fragment +/// HEX_DIGIT +/// : [0-9a-fA-F] +/// ; +fn is_hex_digit(c: Input) -> bool +where + Input: AsChar, +{ + c.as_char().is_ascii_hexdigit() +} + +/// fragment +/// BINARY_DIGIT +/// : [01] +/// ; +fn is_binary_digit(c: char) -> bool { + c == '0' || c == '1' +} + +/// fragment +/// PLUS_OR_MINUS +/// : [+\-] +/// ; +fn take_plus_or_minus(i: &str) -> IonResult<&str, char> { + one_of("+-")(i) +} + +enum Escape { + None, + Char(char), + Digits(String), +} + +impl Escape { + // Converts an escape + fn into_ascii_escape(self) -> ByteEscape { + match self { + Escape::None => ByteEscape(None), + Escape::Char(c) => { + assert!(c as u32 <= u8::max_value() as u32); + ByteEscape(Some(c as u8)) + } + Escape::Digits(digits) => { + assert!(digits.len() <= 2); + if let Ok(b) = u8::from_str_radix(&digits, 16) { + return ByteEscape(Some(b)); + } + unreachable!("ASCII character range confirmed by parser"); + } + } + } + + fn into_utf_8_escape(self) -> CharEscape { + match self { + Escape::None => CharEscape(None), + Escape::Char(c) => CharEscape(Some(c)), + Escape::Digits(digits) => { + if let Ok(int) = u32::from_str_radix(&digits, 16) { + if let Ok(c) = char::try_from(int) { + return CharEscape(Some(c)); + } + } + unreachable!("UTF-8 range confirmed by parser"); + } + } + } +} + +struct CharEscape(Option); +struct ByteEscape(Option); + +impl ExtendInto for CharEscape { + type Item = char; + type Extender = String; + + #[inline] + fn new_builder(&self) -> String { + String::new() + } + #[inline] + fn extend_into(&self, acc: &mut String) { + if let Some(v) = self.0 { + acc.push(v); + } + } +} + +impl ExtendInto for ByteEscape { + type Item = u8; + type Extender = Vec; + + #[inline] + fn new_builder(&self) -> Vec { + vec![] + } + #[inline] + fn extend_into(&self, acc: &mut Vec) { + if let Some(v) = self.0 { + acc.push(v); + } + } +} + +/// fragment +/// COMMON_ESCAPE +/// : '\\' COMMON_ESCAPE_CODE +/// ; +const COMMON_ESCAPE: char = '\\'; + +/// fragment +/// COMMON_ESCAPE_CODE +/// : 'a' +/// | 'b' +/// | 't' +/// | 'n' +/// | 'f' +/// | 'r' +/// | 'v' +/// | '?' +/// | '0' +/// | '\'' +/// | '"' +/// | '/' +/// | '\\' +/// | NL +/// ; +fn take_common_escape_code(i: Input) -> IonResult +where + Input: Clone + + InputIter + + Compare<&'static str> + + Slice> + + Slice> + + InputTake + + Slice>, + ::Item: AsChar, +{ + alt(( + map( + alt(( + value('\x07', char('a')), // alarm (BEL) + value('\x08', char('b')), // backspace (BS) + value('\t', char('t')), // horizontal tab (HT) + value('\n', char('n')), // linefeed (LF) + value('\x0c', char('f')), // form feed (FF) + value('\r', char('r')), // carriage return (CR) + value('\x0b', char('v')), // vertical tab (VT) + value('?', char('?')), // question mark + value('\x00', char('0')), // null (NUL) + value('\'', char('\'')), // single quote + value('"', char('"')), // double quote + value('/', char('/')), // forward slash + value('\\', char('\\')), // backslash + )), + Escape::Char, + ), + // escaped NL expands to nothing + map(nl, |_| Escape::None), + ))(i) +} + +/// Helper for mapping a successfully parsed hex string to an Escape +/// +/// Note: only for use via map_parser. +fn hex_digits_to_escape(i: Input) -> IonResult +where + Input: Clone + AsBytes, +{ + let i2 = i.clone(); + let b = i2.as_bytes(); + if let Ok(int) = u32::from_str_radix(from_utf8(b).unwrap(), 16) { + if char::try_from(int).is_ok() { + return Ok(( + i.clone(), + Escape::Digits(from_utf8(i.as_bytes()).unwrap().to_string()), + )); + } + }; + Err(Err::Failure(IonError::from_format_error( + i, + FormatError::Text(TextFormatError::HexEscape( + from_utf8(b).unwrap().to_string(), + )), + ))) +} + +/// Note: escape character detected by Nom in escaped_transform +/// +/// fragment +/// HEX_ESCAPE +/// : '\\x' HEX_DIGIT HEX_DIGIT +/// ; +fn take_hex_escape(i: Input) -> IonResult +where + Input: Clone + + AsBytes + + InputLength + + InputTake + + InputIter + + Compare<&'static str> + + Slice> + + Slice> + + Slice>, + ::Item: AsChar, +{ + map_parser( + preceded(char('x'), take_while_m_n(2, 2, is_hex_digit)), + hex_digits_to_escape, + )(i) +} + +/// Note: escape character detected by Nom in escaped_transform +/// +/// fragment +/// UNICODE_ESCAPE +/// : '\\u' HEX_DIGIT_QUARTET +/// | '\\U000' HEX_DIGIT_QUARTET HEX_DIGIT +/// | '\\U0010' HEX_DIGIT_QUARTET +/// ; +fn take_unicode_escape(i: &str) -> IonResult<&str, Escape> { + map_parser( + alt(( + preceded(char('u'), take_while_m_n(4, 4, is_hex_digit)), + preceded(tag("U000"), take_while_m_n(5, 5, is_hex_digit)), + preceded( + tag("U00"), + recognize(pair(tag("10"), take_while_m_n(4, 4, is_hex_digit))), + ), + )), + hex_digits_to_escape, + )(i) +} + +/// Omitted - see above. +/// fragment +/// HEX_DIGIT_QUARTET +/// : HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT +/// ; + +/// fragment +/// WS +/// : WS_NOT_NL +/// | '\u000A' // line feed +/// | '\u000D' // carriage return +/// ; +pub fn is_ws(c: Input) -> bool +where + Input: AsChar, +{ + let c = c.as_char(); + + is_ws_not_nl(c) + || c == '\x0d' // Carriage Return + || c == '\x0a' // Line Feed +} + +/// fragment +/// NL +/// : '\u000D\u000A' // carriage return + line feed +/// | '\u000D' // carriage return +/// | '\u000A' // line feed +/// ; +fn nl(i: Input) -> IonResult +where + Input: Clone + + Compare<&'static str> + + Slice> + + InputIter + + Slice> + + Slice> + + InputTake, +{ + alt((crlf, tag("\r"), tag("\n")))(i) +} + +/// fragment +/// WS_NOT_NL +/// : '\u0009' // tab +/// | '\u000B' // vertical tab +/// | '\u000C' // form feed +/// | '\u0020' // space +/// ; +pub fn is_ws_not_nl(c: char) -> bool { + c == '\x09' // Tab + || c == '\x0b' // Vertical Tab + || c == '\x0c' // Form Feed + || c == '\x20' // Space +} + +/// fragment +/// UNDERSCORE +/// : '_' +/// ; +const UNDERSCORE: char = '_'; diff --git a/src/parser/ion_1_0/text/tests/bad.rs b/src/parser/ion_1_0/text/tests/bad.rs new file mode 100644 index 0000000..8122147 --- /dev/null +++ b/src/parser/ion_1_0/text/tests/bad.rs @@ -0,0 +1,36 @@ +use crate::parser::ion_1_0::text::tests::{find_ion_text, parse_file, test_path}; +use itertools::Itertools; + +// TODO: find a way to guarantee that all bad test files are checked + +#[test] +fn test_bad() { + let bad_ion_files = find_ion_text(&test_path("bad")) + .expect("bad tests directory not found. git submodule update --init ?"); + + let results = bad_ion_files + .into_iter() + .map(|path| (path.clone(), parse_file(&path))) + .collect_vec(); + let failed = results.iter().filter(|(_, r)| r.is_ok()).collect_vec(); + let succeeded = results.iter().filter(|(_, r)| r.is_err()).collect_vec(); + + if !failed.is_empty() { + pretty_env_logger::try_init().ok(); + + log::info!( + "Good news first. Correctly failed to read {} files.", + succeeded.len() + ); + log::info!("Read {} invalid .ion files:", failed.len()); + for (path, _) in &failed { + log::info!(" - {:?}", path.file_name()); + } + } + + assert!( + failed.is_empty(), + "Accidentally parsed {} bad Ion files. Set RUST_LOG=info for a list.", + failed.len() + ); +} diff --git a/src/parser/ion_1_0/text/tests/equivalencies.rs b/src/parser/ion_1_0/text/tests/equivalencies.rs new file mode 100644 index 0000000..3fb84a0 --- /dev/null +++ b/src/parser/ion_1_0/text/tests/equivalencies.rs @@ -0,0 +1,165 @@ +use crate::{ + parser::{ + ion_1_0::text::tests::{find_ion_text, parse_file, test_path}, + parse::parse_ion_text_1_0, + }, + value::{self as ion, Value}, +}; +use itertools::Itertools; +use std::{ + ffi::OsStr, + path::{Path, PathBuf}, +}; + +#[test] +fn test_equivs() { + fn equivalent(values: &[Vec]) -> Result<(), String> { + for mut vec in values.iter().combinations(2) { + let a = vec.pop().unwrap(); + let b = vec.pop().unwrap(); + if a != b { + return Err(format!( + "Failed equivalency:\n{:?} should equal \n{:?}", + a, b + )); + } + } + + Ok(()) + } + + // TODO: assert all equivalency tests were executed (glob filename count/test count?) + comparison_test(&test_path("good/equivs"), equivalent); +} + +#[test] +fn test_non_equivs() { + fn non_equivalent(values: &[Vec]) -> Result<(), String> { + for mut vec in values.iter().combinations(2) { + let a = vec.pop().unwrap(); + let b = vec.pop().unwrap(); + if a == b { + return Err(format!( + "Failed equivalency:\n{:?} should not equal \n{:?}", + a, b + )); + } + } + + Ok(()) + } + + // TODO: assert all equivalency tests were executed (glob filename count/test count?) + comparison_test(&test_path("good/non-equivs"), non_equivalent); +} + +fn comparison_test

(path: &Path, equivalence_predicate: P) +where + P: Fn(&[Vec]) -> Result<(), String>, +{ + pretty_env_logger::try_init().ok(); + + let paths = find_ion_text(&path).unwrap_or_else(|_| { + panic!( + "Test path {:?} not found. git submodule update --init ?", + path + ) + }); + + let parsed_test_data = paths + .into_iter() + // TODO(amzn/ion-tests#65): remove this filter when the test is valid + // + // Despite being called stringUtf8.ion, this file contains invalid UTF-8 characters. + // + // On page 5 of RFC3629: + // + // The definition of UTF-8 prohibits encoding character numbers between + // U+D800 and U+DFFF, which are reserved for use with the UTF-16 + // encoding form (as surrogate pairs) and do not directly represent + // characters. + // + .filter(|path| path.file_name() != Some(OsStr::new("stringUtf8.ion"))) + .map(|path| (path.clone(), parse_file(&path))) + .collect_vec(); + + let mut failed: Vec<(PathBuf, String)> = vec![]; + let mut succeeded: Vec = vec![]; + + for (path, result) in parsed_test_data { + match result { + Ok(tlvs) => { + for (tlv_idx, tlv) in tlvs.into_iter().enumerate() { + // This means that the equivalency has embedded strings which should be parsed prior + // to comparison. + let embedded = tlv.has_annotation("embedded_documents"); + + match tlv { + ion::Value { + value: ion::Data::Sexp(Some(ion::Sexp { values })), + .. + } + | ion::Value { + value: ion::Data::List(Some(ion::List { values })), + .. + } => { + let values = if embedded { + values + .into_iter() + .enumerate() + .filter_map(|(idx, v)| { + if let ion::Value { + value: ion::Data::String(Some(value)), + .. + } = v + { + match parse_ion_text_1_0(value.as_str()) { + Ok((_, values)) => Some(values), + Err(e) => panic!( + "{:?}: embedded document {}:{} should be parseable: {:?}, {:?}", + path, tlv_idx, idx, value.as_str(), e + ) + } + } else { + panic!( + "{:?}: embedded document {}:{} contains non-string values", + path, tlv_idx, idx + ) + } + }) + .collect_vec() + } else { + vec![values] + }; + + match equivalence_predicate(&values) { + Ok(_) => succeeded.push(path.clone()), + Err(e) => failed.push((path.clone(), e)), + } + } + value => panic!("Top level value {:?} is not a list or sexp.", value), + }; + } + } + Err(e) => failed.push((path.clone(), format!("failed to parse test file: {}", e))), + } + + if !failed.is_empty() { + log::debug!( + "Good news first. Correctly processed equivalencies for {} files.", + succeeded.len() + ); + log::debug!("Failed the following equivalencies:"); + for (path, error) in &failed { + log::debug!(" - {:?}: {}", path.file_name(), error); + } + } + + assert!( + failed.is_empty(), + "Failed {} of {} equivalencies", + failed.len(), + succeeded.len() + failed.len() + ); + } +} diff --git a/src/parser/ion_1_0/text/tests/good.rs b/src/parser/ion_1_0/text/tests/good.rs new file mode 100644 index 0000000..855bfbb --- /dev/null +++ b/src/parser/ion_1_0/text/tests/good.rs @@ -0,0 +1,2388 @@ +use super::{ + annot, blob_decoded, blob_encoded, blob_encoded_data, boolean, clob, clob_data, decimal, float, + int_i64, int_i64_data, int_s, list, map, map_data, parse_file, sexp, sexp_data, string, symbol, + symbol_data, test_path, time::TextDate, timestamp, value, verify_tlvs, +}; +use crate::{ + parser::ion_1_0::text::tests::{fractional_second, minute}, + symbols::SymbolToken, + value::{self as ion}, +}; +use core::iter; +use num_bigint::BigUint; +use time::UtcOffset; + +// TODO: find a way to guarantee that all good test files are checked + +#[test] +fn test_all_nulls() { + let result = parse_file(&test_path("good/allNulls.ion")); + + let expected: ion::Value = list(vec![ + ion::Data::Null.into(), + ion::Data::Null.into(), + ion::Data::Bool(None).into(), + ion::Data::Int(None).into(), + ion::Data::Float(None).into(), + ion::Data::Decimal(None).into(), + ion::Data::Timestamp(None).into(), + ion::Data::String(None).into(), + ion::Data::Symbol(None).into(), + ion::Data::Blob(None).into(), + ion::Data::Clob(None).into(), + ion::Data::Struct(None).into(), + ion::Data::List(None).into(), + ion::Data::Sexp(None).into(), + ]); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_annotation_quoted_false() { + let result = parse_file(&test_path("good/annotationQuotedFalse.ion")); + + let expected = value(int_i64_data(23), vec![annot("false")]); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_annotation_quoted_nan() { + let result = parse_file(&test_path("good/annotationQuotedNan.ion")); + + let expected = value(int_i64_data(23), vec![annot("nan")]); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_annotation_quoted_neg_inf() { + let result = parse_file(&test_path("good/annotationQuotedNegInf.ion")); + + let expected = value(int_i64_data(23), vec![annot("-inf")]); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_annotation_quoted_null() { + let result = parse_file(&test_path("good/annotationQuotedNull.ion")); + + let expected = value(int_i64_data(23), vec![annot("null")]); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_annotation_quoted_null_int() { + let result = parse_file(&test_path("good/annotationQuotedNullInt.ion")); + + let expected = value(int_i64_data(23), vec![annot("null.int")]); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_annotation_quoted_operator() { + let result = parse_file(&test_path("good/annotationQuotedOperator.ion")); + + let expected = value(int_i64_data(23), vec![annot("@")]); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_annotation_quoted_pos_inf() { + let result = parse_file(&test_path("good/annotationQuotedPosInf.ion")); + + let expected = value(int_i64_data(23), vec![annot("+inf")]); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_annotation_quoted_true() { + let result = parse_file(&test_path("good/annotationQuotedTrue.ion")); + + let expected = value(int_i64_data(23), vec![annot("true")]); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_blank() { + let result = parse_file(&test_path("good/blank.ion")); + + verify_tlvs(vec![], result); +} + +#[test] +fn test_blobs() { + let result = parse_file(&test_path("good/blobs.ion")); + + let expected = vec![ + blob_decoded(br"a b c d e f g h i j k l m n o p q r s t u v w x y z"), + blob_decoded(br"A B C D E F G H I J K L M N O P Q R S T U V W X Y Z"), + blob_decoded(br"1 2 3 4 5 6 7 8 9 0"), + blob_decoded(br", . ; / [ ' ] \ = - 0 9 8 7 6 5 4 3 2 1 ` ~ ! @ # $ % ^ & * ( ) _ + | : < > ?"), + blob_decoded(b"\x3a\x20\x53\x20\xa5\x20\x4f\x20\x00\x49\xbf"), + blob_decoded(b"\xff\xfe\xfd\xfc\xfb\x00\x01\x02\x03\x04\x05"), + blob_decoded(b"\x01\x11\x19\x1e\x2c\x37\x3c\x48\x51\x63\x67\x75\x7d\x8b\x8e\x9c\xa5\xb1\xb5\xc6\xcc\xd3\xdf\xef\xf6\xff\x00"), + blob_decoded(br"A Very Very Very Very Large Test Blob"), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_booleans() { + let result = parse_file(&test_path("good/booleans.ion")); + + let expected = vec![ + ion::Data::Bool(Some(true)).into(), + ion::Data::Bool(Some(false)).into(), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_clobs() { + let result = parse_file(&test_path("good/clobs.ion")); + + let expected = vec![ + clob(b"a b c d e f g h i j k l m n o p q r s t u v w x y z"), + clob(b"A B C D E F G H I J K L M N O P Q R S T U V W X Y Z"), + clob(b"1 2 3 4 5 6 7 8 9 0"), + clob(b", . ; / [ ' ] \\ = - 0 9 8 7 6 5 4 3 2 1 ` ~ ! @ # $ % ^ & * ( ) _ + | : < > ?"), + clob(b"\x00 \x07 \x08 \t \n \x0c \r \x0b \" \' ? \\\\ / \x00\x07\x08\t\n\x0c\r\x0b\"\'?\\\\/"), + clob(b"\x7f \x66 \x00 \x5a\x5b\x00\x1c\x2d\x3f\xFf"), + clob(b"\x7F \x66 \x00 \x5A\x5B\x00\x1C\x2D\x3F\xfF"), + clob(b"Stuff to write on multiple lines if you want to"), + clob(b""), + clob(b""), + clob(b""), + clob(b"concatenated from a single line"), + clob(b""), + clob(b"a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z , . ; / [ ' ] \\ = - 0 9 8 7 6 5 4 3 2 1 ` ~ ! @ # $ % ^ & * ( ) _ + | : < > ? \x00 \x07 \x08 \t \n \x0c \r \x0b \" \' ? \\\\ / \x00\x07\x08\t\n\x0c\r\x0b\"\'?\\\\/\x7f \x66 \x00 \x5a\x5b\x00\x1c\x2d\x3f\x7F \x66 \x00 \x5A\x5B\x00\x1C\x2D\x3F"), + clob(b"multi-line string\nwith embedded\nnew line\ncharacters"), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_clobs_with_quotes() { + let result = parse_file(&test_path("good/clobsWithQuotes.ion")); + + let expected = vec![ + clob(b"'''"), + clob(b"''''''"), + clob(b"\""), + clob(b"\"\""), + clob(b"'''\n12345678901234567890123456789012345678901234567890123456789012345678901234567890\n'''\n12345678901234567890123456789012345678901234567890123456789012345678901234567890\n'''\n"), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_clobs_with_whitespace() { + let result = parse_file(&test_path("good/clobsWithWhitespace.ion")); + + let expected = vec![ + clob(b" "), + clob(b" "), + clob(b" "), + clob(b" "), + clob(b" "), + clob(b" "), + clob(b" "), + clob(b" "), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_clobs_with_del() { + let result = parse_file(&test_path("good/clobWithDel.ion")); + + let expected = vec![clob(b""), clob(b"")]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_comment_multi_line_then_eof() { + let result = parse_file(&test_path("good/commentMultiLineThenEof.ion")); + + let expected = symbol("abc"); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_comment_single_line_then_eof() { + let result = parse_file(&test_path("good/commentSingleLineThenEof.ion")); + + let expected = symbol("abc"); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_decimal_64_bit_boundary() { + let result = parse_file(&test_path("good/decimal64BitBoundary.ion")); + + let expected = vec![ + decimal("18446744073709551615", "0"), + decimal("-18446744073709551615", "0"), + decimal("18446744073709551616", "0"), + decimal("-18446744073709551616", "0"), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_decimal_e_values() { + let result = parse_file(&test_path("good/decimal_e_values.ion")); + + let pos = iter::repeat_with(|| decimal("2718281828459045", "-15")); + let pos_prec = iter::repeat_with(|| decimal("27182818284590450000000000", "-25")); + let neg = iter::repeat_with(|| decimal("-2718281828459045", "-15")); + let neg_prec = iter::repeat_with(|| decimal("-27182818284590450000000000", "-25")); + + let mut expected = vec![]; + + expected.extend(pos.take(5)); + expected.extend(pos_prec.take(3)); + expected.extend(pos.take(2)); + + expected.extend(neg.take(5)); + expected.extend(neg_prec.take(3)); + expected.extend(neg.take(2)); + + expected.extend(pos.take(4)); + expected.extend(pos_prec.take(3)); + expected.extend(pos.take(2)); + + expected.extend(neg.take(4)); + expected.extend(neg_prec.take(3)); + expected.extend(neg.take(2)); + + verify_tlvs(expected, result); +} + +#[test] +fn test_decimal_values() { + let result = parse_file(&test_path("good/decimal_values.ion")); + + let expected = vec![ + decimal("1234560", "-1"), + decimal("123456", "0"), + decimal("123456", "1"), + decimal("123456", "2"), + decimal("123456", "3"), + decimal("123456", "42"), + decimal("123456", "-0"), + decimal("123456", "-1"), + decimal("123456", "-2"), + decimal("123456", "-42"), + decimal("123456", "-6"), + decimal("123456", "-5"), + decimal("123456", "-4"), + decimal("123456", "-3"), + decimal("123456", "-2"), + decimal("123456", "-1"), + decimal("1234560", "-2"), + decimal("12345600", "-3"), + decimal("123004560", "-1"), + decimal("12300456", "-5"), + decimal("12300456", "-4"), + decimal("12300456", "-3"), + decimal("123456", "39"), + decimal("123456", "39"), + decimal("123456", "-45"), + decimal("777777", "6"), + decimal("777777", "-8"), + decimal("777777", "6"), + decimal("777777", "699"), + decimal("777777", "-701"), + decimal("777777", "699"), + decimal("-1234560", "-1"), + decimal("-123456", "0"), + decimal("-123456", "1"), + decimal("-123456", "2"), + decimal("-123456", "3"), + decimal("-123456", "42"), + decimal("-123456", "-0"), + decimal("-123456", "-1"), + decimal("-123456", "-2"), + decimal("-123456", "-42"), + decimal("-123456", "-6"), + decimal("-123456", "-5"), + decimal("-123456", "-4"), + decimal("-123456", "-3"), + decimal("-123456", "-2"), + decimal("-123456", "-1"), + decimal("-1234560", "-2"), + decimal("-12345600", "-3"), + decimal("-123004560", "-1"), + decimal("-12300456", "-5"), + decimal("-12300456", "-4"), + decimal("-12300456", "-3"), + decimal("-123456", "39"), + decimal("-123456", "39"), + decimal("-123456", "-45"), + decimal("-777777", "6"), + decimal("-777777", "-8"), + decimal("-777777", "6"), + decimal("-777777", "699"), + decimal("-777777", "-701"), + decimal("-777777", "699"), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_decimal_zeroes() { + let result = parse_file(&test_path("good/decimal_zeros.ion")); + + let expected = vec![ + decimal("0", "0"), + decimal("0", "0"), + decimal("0", "0"), + decimal("0", "0"), + decimal("0", "0"), + decimal("0", "-1"), + decimal("0", "-0"), + decimal("0", "-0"), + decimal("0", "-42"), + decimal("0", "-313"), + decimal("0", "+103"), + decimal("0", "99"), + decimal("0", "666"), + decimal("0", "98"), + decimal("0", "-90"), + decimal("0", "-4"), + decimal("-0", "0"), + decimal("-0", "0"), + decimal("-0", "0"), + decimal("-0", "-1"), + decimal("-0", "-0"), + decimal("-0", "-0"), + decimal("-0", "-42"), + decimal("-0", "-313"), + decimal("-0", "103"), + decimal("-0", "99"), + decimal("-0", "666"), + decimal("-0", "98"), + decimal("-0", "-90"), + decimal("-0", "-4"), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_negative_one_dot_two_eight() { + let result = parse_file(&test_path("good/decimalNegativeOneDotTwoEight.ion")); + + let expected = vec![decimal("-128", "-2")]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_decimals_with_underscores() { + let result = parse_file(&test_path("good/decimalsWithUnderscores.ion")); + + let expected = vec![ + decimal("12345678", "-4"), + decimal("1234", "0"), + decimal("12345678", "-4"), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_decimals_with_terminating_eof() { + pretty_env_logger::try_init().ok(); + let result = parse_file(&test_path("good/decimalWithTerminatingEof.ion")); + + let expected = vec![decimal("123", "-2")]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_empty() { + let result = parse_file(&test_path("good/empty.ion")); + + verify_tlvs(vec![], result); +} + +#[test] +fn test_eol_comment_cr() { + let result = parse_file(&test_path("good/eolCommentCr.ion")); + + let expected = list(vec![]); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_eol_comment_crlf() { + let result = parse_file(&test_path("good/eolCommentCrLf.ion")); + + let expected = list(vec![]); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_field_name_inf() { + let result = parse_file(&test_path("good/fieldNameInf.ion")); + + let expected = map(vec![("inf".into(), boolean(false))]); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_field_name_quoted_false() { + let result = parse_file(&test_path("good/fieldNameQuotedFalse.ion")); + + let expected = map(vec![("false".into(), boolean(false))]); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_field_name_quoted_nan() { + let result = parse_file(&test_path("good/fieldNameQuotedNan.ion")); + + let expected = map(vec![("nan".into(), boolean(false))]); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_field_name_quoted_neg_inf() { + let result = parse_file(&test_path("good/fieldNameQuotedNegInf.ion")); + + // TODO(amzn/ion-tests#64): when this is fixed the test will fail. s/+/-/. + let expected = map(vec![("+inf".into(), boolean(false))]); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_field_name_quoted_null() { + let result = parse_file(&test_path("good/fieldNameQuotedNull.ion")); + + let expected = map(vec![("null".into(), boolean(false))]); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_field_name_quoted_null_int() { + let result = parse_file(&test_path("good/fieldNameQuotedNullInt.ion")); + + let expected = map(vec![("null.int".into(), boolean(false))]); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_field_name_quoted_pos_inf() { + let result = parse_file(&test_path("good/fieldNameQuotedPosInf.ion")); + + let expected = map(vec![("+inf".into(), boolean(false))]); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_field_name_quoted_true() { + let result = parse_file(&test_path("good/fieldNameQuotedTrue.ion")); + + let expected = map(vec![("true".into(), boolean(false))]); + + verify_tlvs(vec![expected], result); +} + +#[test] +fn test_float_values() { + let result = parse_file(&test_path("good/float_values.ion")); + + let expected = vec![ + float("123456.0e0"), + float("123456e0"), + float("123456e1"), + float("123456e2"), + float("123456e3"), + float("123456e42"), + float("123456e-0"), + float("123456e-1"), + float("123456e-2"), + float("123456e-42"), + float("0.123456e0"), + float("1.23456e0"), + float("12.3456e0"), + float("123.456e0"), + float("1234.56e0"), + float("12345.6e0"), + float("12345.60e0"), + float("12345.600e0"), + float("12300456.0e0"), + float("123.00456e0"), + float("1230.0456e0"), + float("12300.456e0"), + float("123.456e42"), + float("123.456e+42"), + float("123.456e-42"), + float("77777.7e0007"), + float("77777.7e-0007"), + float("77777.7e+0007"), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_float_zeroes() { + let result = parse_file(&test_path("good/float_zeros.ion")); + + let expected = vec![ + float("0e0"), + float("0E0"), + float("0.0e0"), + float("0e-0"), + float("0E-0"), + float("0e-42"), + float("0E-313"), + float("0e+103"), + float("0E+99"), + float("0E666"), + float("0.0e99"), + float("0.000e-87"), + float("0.0000E45"), + float("-0e0"), + float("-0E0"), + float("-0.0e0"), + float("-0e-0"), + float("-0E-0"), + float("-0e-42"), + float("-0E-313"), + float("-0e+103"), + float("-0E+99"), + float("-0E666"), + float("-0.0e99"), + float("-0.000e-87"), + float("-0.0000E45"), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_float_dbl_min() { + let result = parse_file(&test_path("good/floatDblMin.ion")); + + let expected = vec![ + float("2.2250738585072012e-308"), + float("0.00022250738585072012e-304"), + float("2.225073858507201200000e-308"), + float("2.2250738585072012e-00308"), + float("2.2250738585072012997800001e-308"), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_float_specials() { + let result = parse_file(&test_path("good/floatSpecials.ion")); + + match result.unwrap().iter().next() { + Some(ion::Value { + value: ion::Data::List(Some(ion::List { values: list })), + .. + }) => { + let mut values = list.iter(); + + let mut next_float = || match values.next() { + Some(ion::Value { + value: ion::Data::Float(Some(value)), + .. + }) => value, + other => { + log::error!("not what we're looking for: {:?}", other); + panic!("aaah"); + } + }; + + let first = next_float(); + assert!(first.is_nan()); + let second = next_float(); + assert!(second.is_infinite() && second.is_sign_positive()); + let third = next_float(); + assert!(third.is_infinite() && third.is_sign_negative()); + } + _ => panic!("aaaaaaahhhhhhhhhh"), + } +} + +#[test] +fn test_floats_with_underscores() { + let result = parse_file(&test_path("good/floatsWithUnderscores.ion")); + + let expected = vec![ + float("1234.5678e0"), + float("1234e56"), + float("1234.5678e90"), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_float_with_terminating_eof() { + let result = parse_file(&test_path("good/floatWithTerminatingEof.ion")); + + let expected = vec![float("1.23e1")]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_hex_with_terminating_eof() { + let result = parse_file(&test_path("good/hexWithTerminatingEof.ion")); + + let expected = vec![int_s("3", 16)]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_inner_version_identifiers() { + let result = parse_file(&test_path("good/innerVersionIdentifiers.ion")); + + let expected = vec![ + sexp(vec![ + symbol("$ion_1_0"), + symbol("$ion_2300_34"), + value(symbol_data("$ion_1_0"), vec![annot("foo")]), + value(symbol_data("$ion_1_0"), vec![annot("$ion_1_0")]), + sexp(vec![symbol("$ion_1_0")]), + ]), + list(vec![ + symbol("$ion_1_0"), + symbol("$ion_2300_34"), + value(symbol_data("$ion_1_0"), vec![annot("foo")]), + value(symbol_data("$ion_1_0"), vec![annot("$ion_1_0")]), + list(vec![symbol("$ion_1_0")]), + ]), + map(vec![ + ("a".into(), symbol("$ion_1_0")), + ("b".into(), symbol("$ion_2300_34")), + ( + "c".into(), + value(symbol_data("$ion_1_0"), vec![annot("foo")]), + ), + ( + "d".into(), + value(symbol_data("$ion_1_0"), vec![annot("$ion_1_0")]), + ), + ("e".into(), map(vec![("f".into(), symbol("$ion_1_0"))])), + ]), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_int_big_size_256() { + let result = parse_file(&test_path("good/intBigSize256.ion")); + + let expected = vec![int_s("18173238162219679736857031944447898744767430095109316084451026048678348094928854458274167288816962557611640075817315237016025726423548207924331642028847993938530524659112028449811515920726159569583847554301932799584192974700038250645135419704389244690214111003505621818033044965879076306690914532152840279256440975668846810694285470204245958782248405612488959069641454132691581386219910938587286910894148564397155066367399697230287047229035630842240888106685623631032505806388903066971508775182055551847210338095961815021030725796281642316166745051164958432783938535334657296749823645911331793861360616240344479015948", 10)]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_int_big_size_512() { + let result = parse_file(&test_path("good/intBigSize512.ion")); + + let expected = vec![int_s("-FE95F4CFF19A8EE2EDBBEE30C7C0ACBB83BFC4C0A58E8B94BB6250AEEAF3DB8F41B0ACDBB94B990C518D96C5EE3C02E276E06E07570A2B6E5DEA9FE4FAC8475A84EFCA8A8432D6D463BF0CEB470B4AD9B3B0C80730492E5EE660BCA86932D933C471F178140C5256AFFE4EF5C0404D74B4B7776E77178B3281E1C5B65AD8866BCBAA6225C4E1C5B9624B19DCC6001AFC3535A3769C8E937B7E3F9073AB0053CC0FFEB34124D5D570749D0181F4D4DEDCED7D28F038247BEFA18CE02A3D1293DA637BB1AB6598BB6617A6A5CE0512C390236DBCA283ADF0291E6903FBD6819D4C5A8216C69E2083DA5B3FEFB0928B208345A39207C8461E38F793036146107559ADF2F40612D25F14D45D7E2780B45E2CF9B5790D91AAAF327AF3323E20242C2632A64725844F1D9E218AAB0D56EE99AE486034D7B3FBFC4DCE8C9CC2A793CE93AFFE81DEE7158DAD7F0623CE692C8ED0975DBEEF9A717A0B63F90AF4FEBC96785A6FF4E06B090A65D33C98932DF39F7C5B807956A19897E0C3463046DF2EB4DF624C7C43BEF48FAB381A857B9F5B6C1BDBD6B3270C107CD3BC1C41FE04E1DDAC69F14119DE961AF773285544F819F3951542F704B501FF0364BF54D14A86E19BEC39394C85A6B256C6233DA801A44F5DB98CCDD8D9BB6788C014216DD57CB64573333CEED5B5C72A4EE296E75B3E32ED69083675A6A8F6B8AC85DEAED88AD0A7", 16)]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_integer_values() { + let result = parse_file(&test_path("good/integer_values.ion")); + + let expected = vec![ + int_i64(0), + int_i64(42), + int_i64(2112), + int_i64(-999), + int_i64(-0), + int_i64(987_654_321), + int_i64(-123_456_789), + int_s("10", 16), + int_s("ff", 16), + int_s("FF", 16), + int_s("A", 16), + int_s("AbCdEf", 16), + int_s("123456789", 16), + int_s("1234567890abcdef", 16), + int_s("-1234567890ABCDEF", 16), + int_s("0", 16), + int_s("-0", 16), + int_s("-FFFF", 16), + int_s("00FF", 16), + int_s("-00FF", 16), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_int_negative_one_two_eight() { + let result = parse_file(&test_path("good/intNegativeOneTwoEight.ion")); + + let expected = vec![int_i64(-128)]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_int_neg_zero() { + let result = parse_file(&test_path("good/intNegZero.ion")); + + let expected = vec![int_i64(-0)]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_ints_with_underscores() { + let result = parse_file(&test_path("good/intsWithUnderscores.ion")); + + let expected = vec![ + int_i64(123), + int_s("abcd", 16), + int_s("11110000", 2), + int_s("100000", 10), + int_s("-123", 10), + int_s("-abcd", 16), + int_s("-11110000", 2), + int_s("-100000", 10), + sexp(vec![int_i64(123)]), + sexp(vec![int_s("abcd", 16)]), + sexp(vec![int_s("11110000", 2)]), + sexp(vec![int_s("100000", 10)]), + sexp(vec![int_s("-123", 10)]), + sexp(vec![int_s("-abcd", 16)]), + sexp(vec![int_s("-11110000", 2)]), + sexp(vec![int_s("-100000", 10)]), + sexp(vec![int_i64(123), int_i64(123)]), + sexp(vec![int_s("abcd", 16), int_s("abcd", 16)]), + sexp(vec![int_s("11110000", 2), int_s("11110000", 2)]), + sexp(vec![int_s("100000", 10), int_s("100000", 10)]), + sexp(vec![int_s("-123", 10), int_s("-123", 10)]), + sexp(vec![int_s("-abcd", 16), int_s("-abcd", 16)]), + sexp(vec![int_s("-11110000", 2), int_s("-11110000", 2)]), + sexp(vec![int_s("-100000", 10), int_s("-100000", 10)]), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_int_with_terminating_eof() { + let result = parse_file(&test_path("good/intWithTerminatingEof.ion")); + + let expected = vec![int_i64(1247)]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_lists() { + pretty_env_logger::try_init().ok(); + let result = parse_file(&test_path("good/lists.ion")); + + let expected = vec![ + list(vec![ + int_i64(1), + int_i64(2), + int_i64(3), + int_i64(4), + int_i64(5), + ]), + list(vec![ + int_i64(1), + int_i64(2), + int_i64(3), + int_i64(4), + int_i64(5), + ]), + list(vec![ + int_i64(1), + list(vec![int_i64(2), int_i64(3)]), + list(vec![list(vec![list(vec![int_i64(5)])])]), + ]), + list(vec![ + int_i64(1), + sexp(vec![int_i64(2), int_i64(3)]), + list(vec![int_i64(4), sexp(vec![int_i64(5)])]), + ]), + list(vec![ + boolean(true), + decimal("34", "-1"), + decimal("3", "6"), + float("2.3e8"), + string("string"), + string("multi-string"), + symbol("Symbol"), + symbol("qSymbol"), + clob(b"clob data"), + blob_encoded(b"YmxvYiBkYXRh"), + timestamp(TextDate::day(1970, 6, 6).unwrap(), None, None), + ion::Data::Struct(None).into(), + ]), + list(vec![ + map(vec![("one".into(), int_i64(1))]), + int_i64(2), + int_i64(3), + ]), + list(vec![int_s("ab", 16)]), + list(vec![symbol("symbol")]), + list(vec![string("string")]), + list(vec![symbol("symbol")]), + list(vec![float("+inf")]), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_local_symbol_table_import_zero_max_id() { + let result = parse_file(&test_path("good/localSymbolTableImportZeroMaxId.ion")); + + let expected = vec![symbol("a")]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_message_2() { + let result = parse_file(&test_path("good/message2.ion")); + + let expected = vec![value( + map_data(vec![ + ("submission_id".into(), int_i64(99999)), + ("customer_id".into(), int_i64(1234)), + ("sku".into(), string("XXX")), + ("version".into(), int_i64(1)), + ("marketplace_ids".into(), list(vec![int_i64(1)])), + ( + "offer_listings".into(), + list(vec![map(vec![("marketplace_id".into(), int_i64(1))])]), + ), + ( + "product".into(), + map(vec![ + ( + "one".into(), + list(vec![map(vec![("value".into(), string("A"))])]), + ), + ( + "two".into(), + list(vec![ + map(vec![("value".into(), string("A"))]), + map(vec![("value".into(), string("B"))]), + ]), + ), + ( + "three".into(), + list(vec![ + map(vec![("value".into(), string("A"))]), + map(vec![("value".into(), string("B"))]), + map(vec![("value".into(), string("C"))]), + ]), + ), + ]), + ), + ]), + vec![annot("contribution")], + )]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_multiple_annotations() { + let result = parse_file(&test_path("good/multipleAnnotations.ion")); + + let expected = vec![value( + symbol_data("value"), + vec![annot("annot1"), annot("annot2")], + )]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_non_nulls() { + let result = parse_file(&test_path("good/nonNulls.ion")); + + let expected = vec![ + int_i64(0), + decimal("0", "-1"), + decimal("0", "0"), + float("0e0"), + string(""), + string(""), + blob_decoded(b""), + clob(b""), + list(vec![]), + sexp(vec![]), + map(vec![]), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_not_version_marker() { + let result = parse_file(&test_path("good/notVersionMarkers.ion")); + + let expected = vec![ + value(symbol_data("$ion_1_0"), vec![annot("a1")]), + value(symbol_data("$ion_1234_1"), vec![annot("a2")]), + value(symbol_data("$ion_1_0"), vec![annot("$ion_1_0")]), + value( + symbol_data("$ion_1_0"), + vec![annot("a3"), annot("$ion_1234_2")], + ), + value(symbol_data("$ion_1_0"), vec![annot("$ion_symbol_table")]), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_nulls() { + let result = parse_file(&test_path("good/nulls.ion")); + + let expected = vec![ + ion::Data::Null.into(), + ion::Data::Null.into(), + ion::Data::Int(None).into(), + ion::Data::Float(None).into(), + ion::Data::Decimal(None).into(), + ion::Data::Symbol(None).into(), + ion::Data::String(None).into(), + ion::Data::Timestamp(None).into(), + ion::Data::Blob(None).into(), + ion::Data::Clob(None).into(), + ion::Data::Bool(None).into(), + ion::Data::List(None).into(), + ion::Data::Sexp(None).into(), + ion::Data::Struct(None).into(), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_octal_000() { + let result = parse_file(&test_path("good/octal000.ion")); + + let expected = vec![string("0\x00000")]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_one() { + let result = parse_file(&test_path("good/one.ion")); + + let expected = vec![int_i64(1)]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_operators() { + let result = parse_file(&test_path("good/operators.ion")); + + let expected = vec![sexp(vec![ + symbol("!"), + symbol("#"), + symbol("%"), + symbol("&"), + symbol("*"), + symbol("+"), + symbol("-"), + symbol("."), + symbol("/"), + symbol(";"), + symbol("<"), + symbol("="), + symbol(">"), + symbol("?"), + symbol("@"), + symbol("^"), + symbol("`"), + symbol("|"), + symbol("~"), + ])]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_sexp_annotation_quoted_operator() { + let result = parse_file(&test_path("good/sexpAnnotationQuotedOperator.ion")); + + let expected = vec![sexp(vec![value(int_i64_data(23), vec![annot("@")])])]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_sexps() { + let result = parse_file(&test_path("good/sexps.ion")); + + let expected = vec![ + sexp(vec![ + symbol("this"), + symbol("is"), + symbol("a"), + symbol("sexp"), + symbol("list"), + ]), + sexp(vec![ + symbol("`~!@/%^&*-+=|;<>?."), + int_i64(3), + symbol("--"), + symbol("-"), + int_i64(4), + ]), + sexp(vec![ + symbol("+"), + symbol("++"), + symbol("+-+"), + symbol("-++"), + symbol("-"), + symbol("--"), + symbol("---"), + int_i64(-3), + symbol("-"), + int_i64(3), + symbol("--"), + int_i64(3), + symbol("--"), + int_i64(3), + ]), + sexp(vec![ + symbol("+"), + symbol("++"), + symbol("+-+"), + symbol("-++"), + symbol("-"), + symbol("--"), + symbol("---"), + int_i64(-3), + symbol("-"), + int_i64(3), + symbol("--"), + int_i64(3), + symbol("--"), + int_i64(3), + ]), + sexp(vec![ + symbol("&"), + sexp(vec![ + symbol("%"), + symbol("-"), + list(vec![int_i64(42), int_i64(3)]), + symbol("+"), + sexp(vec![int_i64(2)]), + symbol("-"), + ]), + ]), + sexp(vec![sexp(vec![sexp(vec![])])]), + sexp(vec![list(vec![])]), + sexp(vec![ + ion::Data::Null.into(), + symbol("."), + symbol("timestamps"), + ]), + sexp(vec![symbol("op1"), symbol("."), symbol("op2")]), + sexp(vec![ + value(symbol_data("+++"), vec![annot("a_plus_plus_plus_operator")]), + value(int_i64_data(3), vec![annot("a_3")]), + ]), + sexp(vec![ + value(symbol_data("+++"), vec![annot("a_plus_plus_plus_operator")]), + value(int_i64_data(3), vec![annot("a_3")]), + ]), + sexp(vec![value( + symbol_data("+++"), + vec![annot("a_plus_plus_plus_operator")], + )]), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_strings() { + let result = parse_file(&test_path("good/strings.ion")); + + let expected = vec![ + string("a b c d e f g h i j k l m n o p q r s t u v w x y z"), + string("A B C D E F G H I J K L M N O P Q R S T U V W X Y Z"), + string("1 2 3 4 5 6 7 8 9 0"), + string(", . ; / [ ' ] \\ = - 0 9 8 7 6 5 4 3 2 1 ` ~ ! @ # $ % ^ & * ( ) _ + | : < > ?"), + string( + "\x00 \x07 \x08 \t \n \x0c \r \x0b \" \' ? \\\\ / \x00\x07\x08\t\n\x0c\r\x0b\"\'?\\\\/", + ), + string("\u{aa5f}"), + string("\u{abcd} \u{d7ff} \u{ffff} \u{1234} \u{4e6a} \u{d37b}\u{f4c2}\u{0000}\x00\u{ff}"), + string("\u{ABCD} \u{D7FF} \u{FFFF} \u{1234} \u{4E6A} \u{D37B}\u{F4C2}\u{0000}\x00\u{ff}"), + string("\u{aBcD} \u{D7ff} \u{FffF} \u{1234} \u{4E6a} \u{d37B}\u{F4c2}\u{0000}\x00\u{ff}"), + string("Stuff to write on multiple lines if you want to"), + string(""), + string(""), + string(""), + string("concatenated from a single line"), + string(""), + string("a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z , . ; / [ ' ] \\ = - 0 9 8 7 6 5 4 3 2 1 ` ~ ! @ # $ % ^ & * ( ) _ + | : < > ? \x00 \x07 \x08 \t \n \x0c \r \x0b \" \' ? \\\\ / \x00\x07\x08\t\n\x0c\r\x0b\"\'?\\\\/\u{abcd} \u{d7ff} \u{ffff} \u{1234} \u{4e6a} \u{d37b}\u{f4c2}\u{0000}\x00\u{ff}\u{ABCD} \u{D7FF} \u{FFFF} \u{1234} \u{4E6A} \u{D37B}\u{F4C2}\u{0000}\x00\u{ff}\u{aBcD} \u{D7ff} \u{FffF} \u{1234} \u{4E6a} \u{d37B}\u{F4c2}\u{0000}\x00\u{ff}"), + string(""), + string("multi-line string\nwith embedded\nnew line\ncharacters\u{1234}"), + string("\x7f"), + string("\x7f"), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_strings2() { + let result = parse_file(&test_path("good/strings2.ion")); + + let expected = vec![ + string("a b c d e f g h i j k l m n o p q r s t u v w x y z"), + string("A B C D E F G H I J K L M N O P Q R S T U V W X Y Z"), + string("1 2 3 4 5 6 7 8 9 0"), + string(", . ; / [ ' ] \\ = - 0 9 8 7 6 5 4 3 2 1 ` ~ ! @ # $ % ^ & * ( ) _ + | : < > ?"), + string( + "\x00 \x07 \x08 \t \n \x0c \r \x0b \" \' ? \\\\ / \x00\x07\x08\t\n\x0c\r\x0b\"\'?\\\\/", + ), + string("\u{abcd} \u{ffff} \u{1234} \u{4e6a} \u{d37b}\u{f4c2}\u{0000}\x00\u{ff}"), + string("\u{ABCD} \u{cFFF} \u{1234} \u{4E6A} \u{D37B}\u{F4C2}\u{0000}\x00\u{ff}"), + string("\u{aBcD} \u{cffF} \u{1234} \u{4E6a} \u{d37B}\u{F4c2}\u{0000}\x00\u{ff}"), + string("\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}"), + string(".\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}"), + string("..\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}"), + string("...\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}\u{F987}"), + string("Stuff to write on multiple lines if you want to"), + string(""), + string(""), + string(""), + string("concatenated from a single line"), + string(""), + string("a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z , . ; / [ ' ] \\ = - 0 9 8 7 6 5 4 3 2 1 ` ~ ! @ # $ % ^ & * ( ) _ + | : < > ? \x00 \x07 \x08 \t \n \x0c \r \x0b \" \' ? \\\\ / \x00\x07\x08\t\n\x0c\r\x0b\"\'?\\\\/\u{abcd} \u{cfff} \u{1234} \u{4e6a} \u{d37b}\u{f4c2}\u{0000}\x00\u{ff}\u{ABCD} \u{CFFF} \u{1234} \u{4E6A} \u{D37B}\u{F4C2}\u{0000}\x00\u{FF}\u{aBcD} \u{CffF} \u{1234} \u{4E6a} \u{d37B}\u{F4c2}\u{0000}\x00\u{fF}"), + string(""), + string("multi-line string\nwith embedded\nnew line\ncharacters"), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_strings_cr_nl() { + let result = parse_file(&test_path("good/strings_cr_nl.ion")); + + let expected = vec![string( + "short1multi-line string\r\nwith embedded\nnew line\r\ncharacters", + )]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_strings_nl() { + let result = parse_file(&test_path("good/strings_nl.ion")); + + let expected = vec![string( + "short1multi-line string\nwith embedded\nnew line\ncharacters", + )]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_strings_with_whitespace() { + let result = parse_file(&test_path("good/stringsWithWhitespace.ion")); + + let expected = vec![string(" "), string(" "), string(" "), string(" "), string(" ")]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_struct_field_annotations_unquoted_then_quoted() { + let result = parse_file(&test_path( + "good/structFieldAnnotationsUnquotedThenQuoted.ion", + )); + + let expected = vec![map(vec![( + "f".into(), + value(ion::Data::Null, vec![annot("a"), annot("b")]), + )])]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_structs() { + let result = parse_file(&test_path("good/structs.ion")); + + let symbol_iter = iter::repeat_with(|| { + map(vec![ + ("a".into(), symbol("b")), + ("c".into(), int_i64(42)), + ("d".into(), map(vec![("e".into(), symbol("f"))])), + ("g".into(), int_i64(3)), + ]) + }); + + let string_iter = iter::repeat_with(|| { + map(vec![ + ("a".into(), string("b")), + ("c".into(), int_i64(42)), + ("d".into(), map(vec![("e".into(), string("f"))])), + ("g".into(), int_i64(3)), + ]) + }); + + let simple_iter = iter::repeat_with(|| map(vec![("123456789ABCDEF".into(), symbol("v"))])); + + let mut expected = vec![]; + + expected.extend(symbol_iter.take(2)); + expected.extend(string_iter.take(1)); + expected.extend(symbol_iter.take(2)); + expected.extend(string_iter.take(1)); + expected.extend(symbol_iter.take(2)); + expected.extend(string_iter.take(1)); + expected.extend(symbol_iter.take(2)); + expected.extend(string_iter.take(1)); + expected.extend(simple_iter.take(3)); + expected.push(map(vec![( + "123456789ABCDEF123456789ABCDEF".into(), + symbol("v"), + )])); + expected.push(map(vec![("123\n455".into(), symbol("v"))])); + expected.push(map(vec![("123456789ABCDEF\nGHI".into(), symbol("v"))])); + + verify_tlvs(expected, result); +} + +#[test] +fn test_subfield_int() { + let result = parse_file(&test_path("good/subfieldInt.ion")); + + let expected = vec![ + decimal("126", "0"), + decimal("127", "0"), + decimal("128", "0"), + decimal("-126", "0"), + decimal("-127", "0"), + decimal("-128", "0"), + decimal("32766", "0"), + decimal("32767", "0"), + decimal("32768", "0"), + decimal("-32766", "0"), + decimal("-32767", "0"), + decimal("-32768", "0"), + decimal("8388606", "0"), + decimal("8388607", "0"), + decimal("8388608", "0"), + decimal("-8388606", "0"), + decimal("-8388607", "0"), + decimal("-8388608", "0"), + decimal("2147483646", "0"), + decimal("2147483647", "0"), + decimal("2147483648", "0"), + decimal("-2147483646", "0"), + decimal("-2147483647", "0"), + decimal("-2147483648", "0"), + decimal("549755813886", "0"), + decimal("549755813887", "0"), + decimal("549755813888", "0"), + decimal("-549755813886", "0"), + decimal("-549755813887", "0"), + decimal("-549755813888", "0"), + decimal("140737488355326", "0"), + decimal("140737488355327", "0"), + decimal("140737488355328", "0"), + decimal("-140737488355326", "0"), + decimal("-140737488355327", "0"), + decimal("-140737488355328", "0"), + decimal("36028797018963966", "0"), + decimal("36028797018963967", "0"), + decimal("36028797018963968", "0"), + decimal("-36028797018963966", "0"), + decimal("-36028797018963967", "0"), + decimal("-36028797018963968", "0"), + decimal("9223372036854775806", "0"), + decimal("9223372036854775807", "0"), + decimal("9223372036854775808", "0"), + decimal("-9223372036854775806", "0"), + decimal("-9223372036854775807", "0"), + decimal("-9223372036854775808", "0"), + decimal("18446744073709551614", "0"), + decimal("18446744073709551615", "0"), + decimal("18446744073709551616", "0"), + decimal("-18446744073709551614", "0"), + decimal("-18446744073709551615", "0"), + decimal("-18446744073709551616", "0"), + decimal("2361183241434822606846", "0"), + decimal("2361183241434822606847", "0"), + decimal("2361183241434822606848", "0"), + decimal("-2361183241434822606846", "0"), + decimal("-2361183241434822606847", "0"), + decimal("-2361183241434822606848", "0"), + decimal("604462909807314587353086", "0"), + decimal("604462909807314587353087", "0"), + decimal("604462909807314587353088", "0"), + decimal("-604462909807314587353086", "0"), + decimal("-604462909807314587353087", "0"), + decimal("-604462909807314587353088", "0"), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_subfield_uint() { + let result = parse_file(&test_path("good/subfieldUint.ion")); + + let expected = vec![ + int_s("254", 10), + int_s("255", 10), + int_s("256", 10), + int_s("65534", 10), + int_s("65535", 10), + int_s("65536", 10), + int_s("16777214", 10), + int_s("16777215", 10), + int_s("16777216", 10), + int_s("2147483646", 10), + int_s("2147483647", 10), + int_s("2147483648", 10), + int_s("4294967294", 10), + int_s("4294967295", 10), + int_s("4294967296", 10), + int_s("1099511627774", 10), + int_s("1099511627775", 10), + int_s("1099511627776", 10), + int_s("281474976710654", 10), + int_s("281474976710655", 10), + int_s("281474976710656", 10), + int_s("72057594037927934", 10), + int_s("72057594037927935", 10), + int_s("72057594037927936", 10), + int_s("9223372036854775806", 10), + int_s("9223372036854775807", 10), + int_s("9223372036854775808", 10), + int_s("18446744073709551614", 10), + int_s("18446744073709551615", 10), + int_s("18446744073709551616", 10), + int_s("4722366482869645213694", 10), + int_s("4722366482869645213695", 10), + int_s("4722366482869645213696", 10), + int_s("1208925819614629174706174", 10), + int_s("1208925819614629174706175", 10), + int_s("1208925819614629174706176", 10), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_subfield_var_int() { + let result = parse_file(&test_path("good/subfieldVarInt.ion")); + + let expected = vec![ + decimal("0", "62"), + decimal("0", "63"), + decimal("0", "64"), + decimal("0", "8190"), + decimal("0", "8191"), + decimal("0", "8192"), + decimal("0", "1048574"), + decimal("0", "1048575"), + decimal("0", "1048576"), + decimal("0", "134217726"), + decimal("0", "134217727"), + decimal("0", "134217728"), + decimal("0", "2147483646"), + decimal("0", "2147483647"), + // decimal("0", "2147483648"), // Outstanding bug in Amazon internal implementation. + decimal("123456789012345678901234567890123456789012345678901234567890", "-61"), + decimal("1234567890123456789012345678901234567890123456789012345678901", "-62"), + decimal("12345678901234567890123456789012345678901234567890123456789012", "-63"), + decimal("1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678", "-8189"), + decimal("12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789", "-8190"), + decimal("123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890", "-8191"), + ]; + + verify_tlvs(expected, result); +} + +// TODO(#13): need a sparse data structure for symbol tables before this test is reasonable. +#[ignore] +#[test] +fn test_subfield_var_uint() { + let result = parse_file(&test_path("good/subfieldVarUInt.ion")); + + let expected = vec![ + value(int_i64_data(1), vec![annot("boundary-1")]), + value(int_i64_data(1), vec![annot("boundary")]), + value(int_i64_data(1), vec![annot("boundary+1")]), + value(int_i64_data(1), vec![annot("boundary-1")]), + value(int_i64_data(1), vec![annot("boundary")]), + value(int_i64_data(1), vec![annot("boundary+1")]), + value(int_i64_data(1), vec![annot("boundary-1")]), + value(int_i64_data(1), vec![annot("boundary")]), + value(int_i64_data(1), vec![annot("boundary+1")]), + value(int_i64_data(1), vec![annot("boundary-1")]), + value(int_i64_data(1), vec![annot("boundary")]), + value(int_i64_data(1), vec![annot("boundary+1")]), + ]; + + verify_tlvs(expected, result); +} + +// TODO(#13): need a sparse data structure for symbol tables before this test is reasonable. +#[ignore] +#[test] +fn test_subfield_var_uint_15bit() { + let result = parse_file(&test_path("good/subfieldVarUInt15bit.ion")); + + let expected = vec![]; + + verify_tlvs(expected, result); +} + +// TODO(#13): need a sparse data structure for symbol tables before this test is reasonable. +#[ignore] +#[test] +fn test_subfield_var_uint_16bit() { + let result = parse_file(&test_path("good/subfieldVarUInt16bit.ion")); + + let expected = vec![]; + + verify_tlvs(expected, result); +} + +// TODO(#13): need a sparse data structure for symbol tables before this test is reasonable. +#[ignore] +#[test] +fn test_subfield_var_uint_32bit() { + let result = parse_file(&test_path("good/subfieldVarUInt32bit.ion")); + + let expected = vec![]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_symbol_empty() { + let result = parse_file(&test_path("good/symbolEmpty.ion")); + + let expected = vec![ + symbol(""), + map(vec![("".into(), symbol("abc"))]), + value(symbol_data("abc"), vec![annot("")]), + value(symbol_data(""), vec![annot("")]), + map(vec![("".into(), value(symbol_data(""), vec![annot("")]))]), + value(symbol_data(""), vec![annot("abc")]), + map(vec![("".into(), symbol("abc"))]), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_symbol_empty_with_cr() { + let result = parse_file(&test_path("good/symbolEmptyWithCR.ion")); + + let expected = vec![symbol("")]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_symbol_empty_with_cr_lf() { + let result = parse_file(&test_path("good/symbolEmptyWithCRLF.ion")); + + let expected = vec![symbol("")]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_symbol_empty_with_lf() { + let result = parse_file(&test_path("good/symbolEmptyWithLF.ion")); + + let expected = vec![symbol("")]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_symbol_empty_with_lflf() { + let result = parse_file(&test_path("good/symbolEmptyWithLFLF.ion")); + + let expected = vec![symbol("")]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_symbols() { + let result = parse_file(&test_path("good/symbols.ion")); + + let expected = vec![ + symbol("a b c d e f g h i j k l m n o p q r s t u v w x y z"), + symbol("A B C D E F G H I J K L M N O P Q R S T U V W X Y Z"), + symbol("1 2 3 4 5 6 7 8 9 0"), + symbol(", . ; / [ \' ] \\ = - 0 9 8 7 6 5 4 3 2 1 ` ~ ! @ # $ % ^ & * ( ) _ + | : < > ?"), + symbol( + "\x00 \x07 \x08 \t \n \x0c \r \x0b \" \' ? \\\\ / \x00\x07\x08\t\n\x0c\r\x0b\"\'?\\\\/", + ), + symbol("\u{abcd} \u{d7ff} \u{ffff} \u{1234} \u{4e6a} \u{d37b}\u{f4c2}\u{0000}\x00\u{ff}"), + symbol("\u{ABCD} \u{D7FF} \u{FFFF} \u{1234} \u{4E6A} \u{D37B}\u{F4C2}\u{0000}\x00\u{ff}"), + symbol("\u{aBcD} \u{D7ff} \u{FffF} \u{1234} \u{4E6a} \u{d37B}\u{F4c2}\u{0000}\x00\u{ff}"), + symbol("bareSymbol"), + symbol("BareSymbol"), + symbol("$bare"), + symbol("_bare"), + symbol("zzzzz"), + symbol("aaaaa"), + symbol("ZZZZZ"), + symbol("AAAAA"), + symbol("z"), + symbol("Z"), + symbol("a"), + symbol("A"), + symbol("_"), + symbol("$"), + symbol("_9876543210"), + symbol("$ion_symbol_table"), + symbol("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789$_"), + symbol("$99"), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_symbol_with_del() { + let result = parse_file(&test_path("good/symbolWithDel.ion")); + + let expected = vec![symbol("\x7f")]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_symbol_with_whitespace() { + let result = parse_file(&test_path("good/symbolWithSpecialWhitespace.ion")); + + let expected = vec![symbol("\x09"), symbol("\x0B"), symbol("\x0C")]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_symbol_zero() { + let result = parse_file(&test_path("good/symbolZero.ion")); + + let expected = vec![ + ion::Data::Symbol(Some(SymbolToken::Zero)).into(), + value(symbol_data("abc"), vec![Some(SymbolToken::Zero)]), + map(vec![(SymbolToken::Zero, symbol("abc"))]), + map(vec![( + SymbolToken::Zero, + value(symbol_data("abc"), vec![Some(SymbolToken::Zero)]), + )]), + map(vec![( + SymbolToken::Zero, + value( + ion::Data::Symbol(Some(SymbolToken::Zero)), + vec![Some(SymbolToken::Zero)], + ), + )]), + sexp(vec![ + ion::Data::Symbol(Some(SymbolToken::Zero)).into(), + value( + ion::Data::Symbol(Some(SymbolToken::Zero)), + vec![Some(SymbolToken::Zero)], + ), + ]), + ]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile0() { + let result = parse_file(&test_path("good/testfile0.ion")); + + let expected = vec![value( + map_data(vec![ + ("lname".into(), string("smith")), + ("fname".into(), string("john")), + ( + "phonelist".into(), + list(vec![ + map(vec![ + ("ac".into(), int_i64(206)), + ("prefix".into(), int_i64(234)), + ("suffix".into(), int_i64(2934)), + ]), + map(vec![ + ("ac".into(), int_i64(444)), + ("prefix".into(), int_i64(333)), + ("suffix".into(), int_i64(2222)), + ]), + ]), + ), + ("age".into(), string("6483020949")), + ]), + vec![annot("contact")], + )]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile1() { + let result = parse_file(&test_path("good/testfile1.ion")); + + let expected = vec![value( + map_data(vec![ + ("name".into(), string("widgets")), + ("quantity".into(), string("6483021036")), + ]), + vec![annot("PurchaseOrder")], + )]; + + verify_tlvs(expected, result); +} + +// testfile 2 is conspicuously missing. + +#[test] +fn test_testfile3() { + let result = parse_file(&test_path("good/testfile3.ion")); + + let expected = vec![value( + map_data(vec![ + ( + "Header".into(), + map(vec![( + "alertcontrol".into(), + map(vec![ + ("priority".into(), int_i64(1)), + ("expires".into(), string("6483021034")), + ]), + )]), + ), + ( + "Body".into(), + map(vec![( + "alert".into(), + map(vec![("msg".into(), string("The printer is on fire!"))]), + )]), + ), + ]), + vec![annot("Envelope")], + )]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile4() { + let result = parse_file(&test_path("good/testfile4.ion")); + + let expected = vec![value( + map_data(vec![("v".into(), string(""))]), + vec![annot("b")], + )]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile5() { + let result = parse_file(&test_path("good/testfile5.ion")); + + let expected = vec![value( + map_data(vec![("v".into(), string(""))]), + vec![annot("b")], + )]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile6() { + let result = parse_file(&test_path("good/testfile6.ion")); + + let expected = vec![value( + map_data( + (1..=9) + .map(|i| { + ( + (format!("v{}", i).as_str()).into(), + string(&format!("xv{}", i)), + ) + }) + .collect(), + ), + vec![annot("b")], + )]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile7() { + let result = parse_file(&test_path("good/testfile7.ion")); + + let expected = vec![value( + map_data(vec![ + ("x".into(), string("6483021024")), + ("prefix".into(), symbol("xs")), + ( + "schema".into(), + map(vec![("attributeFormDefault".into(), string("qualified"))]), + ), + ]), + vec![annot("root5678901234")], + )]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile8() { + let result = parse_file(&test_path("good/testfile8.ion")); + + let expected = vec![value( + map_data(vec![ + ("x".into(), string("6483021025")), + ("targetNamespace".into(), string("x-schema:ado-schema.xml")), + ( + "xmlns1".into(), + map(vec![( + "namespace1".into(), + string("x-schema:ado-schema.xml"), + )]), + ), + ( + "xmlns2".into(), + map(vec![ + ("prefix".into(), symbol("xs")), + ( + "namespace2".into(), + string("http://www.w3.org/2001/XMLSchema"), + ), + ]), + ), + ( + "schema".into(), + map(vec![ + ("attributeFormDefault".into(), string("qualified")), + ("elementFormDefault".into(), string("qualified")), + ("element".into(), map(vec![("name".into(), string("data"))])), + ]), + ), + ]), + vec![annot("root")], + )]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile9() { + let result = parse_file(&test_path("good/testfile9.ion")); + + let expected = vec![value(map_data(vec![]), vec![annot("b")])]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile10() { + let result = parse_file(&test_path("good/testfile10.ion")); + + let expected = vec![value( + map_data(vec![ + ("u".into(), int_i64(1)), + ("i".into(), int_i64(-2)), + ("f".into(), float("3.1e1")), + ("d".into(), decimal("420", "-2")), + ("s".into(), string("hi")), + ("id".into(), symbol("hi")), + ("id2".into(), symbol("by")), + ]), + vec![annot("m")], + )]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile11() { + let result = parse_file(&test_path("good/testfile11.ion")); + + let expected = vec![value( + map_data(vec![ + ( + "thisisaverylongidentifier_to_keep_the_parser_busy_u".into(), + int_i64(1), + ), + ("i23456789012".into(), int_i64(-2)), + ("f234567890123".into(), float("3.1e1")), + ("d2345678901234".into(), decimal("420", "-2")), + ("s23456789012345".into(), string("hi")), + ("id".into(), symbol("hi")), + ("id2".into(), symbol("by")), + ]), + vec![annot("m")], + )]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile12() { + let result = parse_file(&test_path("good/testfile12.ion")); + + let expected = vec![value( + map_data(vec![ + ("u".into(), map(vec![])), + ("i".into(), list(vec![int_i64(-2)])), + ("i0".into(), list(vec![])), + ("i2".into(), list(vec![list(vec![]), list(vec![])])), + ("f".into(), float("3.1e1")), + ("d".into(), decimal("420", "-2")), + ("s".into(), string("hi")), + ("id".into(), symbol("hi")), + ("id2".into(), symbol("by")), + ]), + vec![annot("m")], + )]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile13() { + let result = parse_file(&test_path("good/testfile13.ion")); + + let expected = vec![value( + map_data(vec![ + ("b1".into(), boolean(true)), + ("b2".into(), boolean(false)), + ("i".into(), int_i64(0)), + ("d".into(), decimal("0", "-1")), + ("e".into(), float("0.0e0")), + ]), + vec![annot("m")], + )]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile14() { + let result = parse_file(&test_path("good/testfile14.ion")); + + let expected = vec![value( + map_data(vec![ + ("f1".into(), float("0.0e0")), + ("f2".into(), float("0.0001e0")), + ("f3".into(), float("1.00e0")), + ("f4".into(), float("1.0e5")), + ("f5".into(), float("123456789012345.0e0")), + ]), + vec![annot("m")], + )]; + + verify_tlvs(expected, result); +} + +// Are these... +#[test] +fn test_testfile15() { + let result = parse_file(&test_path("good/testfile15.ion")); + + let expected = vec![value( + map_data(vec![ + ("f1".into(), float("0.0e0")), + ("f2".into(), float("0.0001e0")), + ("f3".into(), float("1.00e0")), + ("f4".into(), float("1.0e5")), + ("f5".into(), float("123456789012345.0e0")), + ]), + vec![annot("m")], + )]; + + verify_tlvs(expected, result); +} + +// all the same? +#[test] +fn test_testfile16() { + let result = parse_file(&test_path("good/testfile16.ion")); + + let expected = vec![value( + map_data(vec![ + ("f1".into(), float("0.0e0")), + ("f2".into(), float("0.0001e0")), + ("f3".into(), float("1.00e0")), + ("f4".into(), float("1.0e5")), + ("f5".into(), float("123456789012345.0e0")), + ]), + vec![annot("m")], + )]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile17() { + let result = parse_file(&test_path("good/testfile17.ion")); + + let expected = vec![int_i64(42)]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile18() { + let result = parse_file(&test_path("good/testfile18.ion")); + + let expected = vec![int_i64(42)]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile19() { + let result = parse_file(&test_path("good/testfile19.ion")); + + let expected = vec![value( + sexp_data(vec![ + symbol("this"), + symbol("is"), + symbol("an"), + symbol("expression"), + ]), + vec![annot("testexpr")], + )]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile20() { + let result = parse_file(&test_path("good/testfile20.ion")); + + let expected = vec![value( + sexp_data(vec![symbol("aa"), string("ss"), symbol("bb")]), + vec![annot("te2")], + )]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile21() { + let result = parse_file(&test_path("good/testfile21.ion")); + + let expected = vec![value( + sexp_data(vec![ + symbol("this"), + symbol("is"), + string("a string"), + symbol("an"), + symbol("expression"), + symbol("with"), + decimal("140", "-1"), + symbol("nested"), + symbol("stuff"), + list(vec![symbol("some"), symbol("data")]), + map(vec![("a".into(), int_i64(1)), ("b".into(), int_i64(3))]), + ]), + vec![annot("te3")], + )]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile22() { + let result = parse_file(&test_path("good/testfile22.ion")); + + let expected = vec![sexp(vec![ + ion::Data::Null.into(), + ion::Data::Null.into(), + ion::Data::Bool(None).into(), + ion::Data::Int(None).into(), + ion::Data::Float(None).into(), + ion::Data::Decimal(None).into(), + ion::Data::Timestamp(None).into(), + ion::Data::Symbol(None).into(), + ion::Data::String(None).into(), + ion::Data::List(None).into(), + ion::Data::Struct(None).into(), + ])]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile23() { + let result = parse_file(&test_path("good/testfile23.ion")); + + let expected = vec![sexp(vec![ + boolean(true), + boolean(false), + int_i64(0), + int_i64(1), + int_i64(2), + int_i64(12_345_678), + int_i64(2_000_000_000), + int_i64(4_000_000_000), + int_i64(5_000_000_000), + int_i64(20_000_000_000), + int_i64(-0), + int_i64(-1), + int_i64(-2), + int_i64(-12_345_678), + int_i64(-2_000_000_000), + int_i64(-4_000_000_000), + int_i64(-5_000_000_000), + int_i64(-20_000_000_000), + decimal("0", "-1"), + decimal("10", "-1"), + decimal("20", "-1"), + decimal("12345678", "-6"), + decimal("2000000000", "0"), + decimal("4000000000", "0"), + decimal("5000000000", "0"), + decimal("2000000000000", "-2"), + decimal("-0", "-1"), + decimal("-10", "-1"), + decimal("-20", "-1"), + decimal("-12345678", "-6"), + decimal("-2000000000", "0"), + decimal("-4000000000", "0"), + decimal("-5000000000", "0"), + decimal("-2000000000000", "-2"), + float("0.0e0"), + float("1.0e0"), + float("2.0e0"), + float("12.345678e0"), + ])]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile24() { + let result = parse_file(&test_path("good/testfile24.ion")); + + let expected = vec![sexp(vec![ + float("12.345678e0"), + float("2000000000e0"), + float("4000000000e0"), + float("5000000000e0"), + float("20000000000.00e0"), + float("-0.0e0"), + float("-1.0e0"), + float("-2.0e0"), + float("-12.345678e0"), + float("-2000000000e0"), + float("-4000000000e0"), + float("-5000000000e0"), + float("-20000000000.00e0"), + ])]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile25() { + let result = parse_file(&test_path("good/testfile25.ion")); + + let expected = vec![sexp(vec![ + timestamp(TextDate::day(2007, 11, 20).unwrap(), None, None), + timestamp( + TextDate::day(2008, 12, 23).unwrap(), + Some(minute(23, 0)), + Some(UtcOffset::UTC), + ), + timestamp( + TextDate::day(2008, 12, 23).unwrap(), + Some(fractional_second(23, 0, 1, BigUint::from(123u32), -3)), + Some(UtcOffset::east_hours(7)), + ), + timestamp( + TextDate::day(2008, 12, 23).unwrap(), + Some(fractional_second(23, 0, 2, BigUint::from(456u32), -3)), + Some(UtcOffset::west_hours(6)), + ), + timestamp( + TextDate::day(2008, 12, 23).unwrap(), + Some(fractional_second(23, 0, 3, BigUint::from(789u32), -3)), + Some(UtcOffset::east_hours(8)), + ), + ])]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile26() { + let result = parse_file(&test_path("good/testfile26.ion")); + + let expected = vec![sexp(vec![blob_encoded(b"2dDS")])]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile28() { + let result = parse_file(&test_path("good/testfile28.ion")); + + let expected = vec![sexp(vec![value( + clob_data(b"2007-\x00sdf-11-20"), + vec![annot("sjis")], + )])]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile29() { + let result = parse_file(&test_path("good/testfile29.ion")); + + let expected = vec![sexp(vec![value( + blob_encoded_data(b"2dDSGZ/0az07+sdf+11+230="), + vec![annot("ablob")], + )])]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile30() { + let result = parse_file(&test_path("good/testfile30.ion")); + + let expected = vec![map(vec![ + ("st1".into(), symbol("v1")), + ("st2".into(), symbol("v2")), + ])]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile31() { + let result = parse_file(&test_path("good/testfile31.ion")); + + let expected = vec![sexp(vec![ + decimal("2000000000", "0"), + decimal("4000000000", "0"), + decimal("5000000000", "0"), + decimal("2000000000000", "-2"), + decimal("-0", "-1"), + decimal("-10", "-1"), + ])]; + + verify_tlvs(expected, result); +} + +// Where hath thou gone, testfile32.ion? + +#[test] +fn test_testfile33() { + let result = parse_file(&test_path("good/testfile33.ion")); + + let expected = vec![value( + map_data(vec![ + ( + "whenDate".into(), + timestamp(TextDate::day(2007, 1, 31).unwrap(), None, None), + ), + ( + "whenDate".into(), + timestamp( + TextDate::day(2007, 1, 31).unwrap(), + Some(minute(1, 2)), + None, + ), + ), + ( + "whenDate".into(), + timestamp( + TextDate::day(2007, 1, 31).unwrap(), + Some(fractional_second(1, 4, 5, BigUint::from(385u32), -3)), + None, + ), + ), + ( + "whenDate".into(), + timestamp( + TextDate::day(2007, 1, 31).unwrap(), + Some(fractional_second(1, 4, 5, BigUint::from(385u32), -3)), + Some(UtcOffset::east_minutes(60 + 11)), + ), + ), + ]), + vec![annot("dates")], + )]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile34() { + let result = parse_file(&test_path("good/testfile34.ion")); + + let expected = vec![list(vec![ + map(vec![ + ("precision".into(), string("zip")), + ("Latitude".into(), decimal("377668", "-4")), + ("Longitude".into(), decimal("-1223959", "-4")), + ("Address".into(), string("")), + ("City".into(), string("SAN FRANCISCO")), + ("State".into(), string("CA")), + ("Zip".into(), string("94107")), + ("Country".into(), string("US")), + ]), + map(vec![ + ("precision".into(), string("zip")), + ("Latitude".into(), decimal("37371991", "-6")), + ("Longitude".into(), decimal("-122026020", "-6")), + ("Address".into(), string("")), + ("City".into(), string("SUNNYVALE")), + ("State".into(), string("CA")), + ("Zip".into(), string("94085")), + ("Country".into(), string("US")), + ]), + ])]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_testfile35() { + let result = parse_file(&test_path("good/testfile35.ion")); + + let expected = vec![value( + map_data(vec![ + ( + "whenDate".into(), + timestamp(TextDate::day(2007, 1, 31).unwrap(), None, None), + ), + ( + "whenDate".into(), + timestamp( + TextDate::day(2007, 1, 31).unwrap(), + Some(minute(1, 2)), + None, + ), + ), + ( + "whenDate".into(), + timestamp( + TextDate::day(2007, 1, 31).unwrap(), + Some(fractional_second(1, 4, 5, BigUint::from(385u32), -3)), + None, + ), + ), + ( + "whenDate".into(), + timestamp( + TextDate::day(2007, 1, 31).unwrap(), + Some(fractional_second(1, 4, 5, BigUint::from(385u32), -3)), + Some(UtcOffset::east_minutes(60 + 11)), + ), + ), + ]), + vec![annot("dates")], + )]; + + verify_tlvs(expected, result); +} + +// Thirty-six, our final casualty. + +#[test] +fn test_testfile37() { + let result = parse_file(&test_path("good/testfile37.ion")); + + let expected = vec![sexp(vec![ + decimal("2000000000", "0"), + decimal("12345678", "-6"), + decimal("4000000000", "0"), + ])]; + + verify_tlvs(expected, result); +} + +#[test] +fn test_whitespace() { + let result = parse_file(&test_path("good/whitespace.ion")); + + let expected = vec![ + int_i64(1), + symbol("a"), + sexp(vec![int_i64(1), symbol("a")]), + int_i64(1), + symbol("a"), + sexp(vec![int_i64(1), symbol("a")]), + int_i64(1), + symbol("a"), + sexp(vec![int_i64(1), symbol("a")]), + ]; + + verify_tlvs(expected, result); +} diff --git a/src/parser/ion_1_0/text/tests/mod.rs b/src/parser/ion_1_0/text/tests/mod.rs new file mode 100644 index 0000000..df34228 --- /dev/null +++ b/src/parser/ion_1_0/text/tests/mod.rs @@ -0,0 +1,227 @@ +mod bad; +mod equivalencies; +mod good; + +use super::*; +use crate::parser::parse::parse_ion_text_1_0; +use itertools::{EitherOrBoth, Itertools}; +use log::error; +use std::{ + convert::TryInto, + ffi::OsStr, + fs::{self}, + io, + path::{Path, PathBuf}, + str::FromStr, +}; + +/// This file includes some machinery for handling the variety of tests we put the text parser +/// through. Namely, it abstracts away the test file location as well as the comparison between +/// expected and actual values. Following is an example of how a simple test might look: +/// +/// #[test] +/// fn test_example() { +/// let result = parse_file(&test_path("good/.ion")); +/// +/// let expected = vec![]; +/// +/// verify_tlvs(expected, result); +/// } + +const TEST_ROOT: &str = "tests/ion-tests/iontestdata/"; + +fn test_path(test: &str) -> PathBuf { + Path::new(TEST_ROOT).join(test) +} + +fn parse_file(file: &Path) -> Result, String> { + match fs::read_to_string(file) { + Ok(data) => match parse_ion_text_1_0(&data) { + Ok((_, v)) => Ok(v), + Err(e) => Err(e.to_string()), + }, + Err(e) => Err(e.to_string()), + } +} + +/// Recursively locates .ion test files in the provided directory +fn find_ion_text(dir: &Path) -> io::Result> { + let mut tests = vec![]; + + if dir.is_dir() { + for entry in fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + if path.is_dir() { + tests.append(&mut find_ion_text(&path)?); + } else if path.extension() == Some(OsStr::new("ion")) { + tests.push(path); + } + } + } + Ok(tests) +} + +/// Verifies a list of expected values against a list of actual parsed top level values. +fn verify_tlvs(expected: Vec, actuals: Result, String>) { + if let Err(e) = actuals { + pretty_env_logger::try_init().ok(); + panic!("test failed: {}", e) + } + + for (count, result) in expected + .into_iter() + .zip_longest(actuals.unwrap().into_iter()) + .enumerate() + { + match result { + EitherOrBoth::Both(expected, actual) => { + if expected != actual { + pretty_env_logger::try_init().ok(); + error!( + "Failed on top level value {}:", + count + 1 + ); + error!("Expected:", ); + error!("{:?}", expected); + error!("Actual:", ); + error!("{:?}", actual); + panic!("expected/actual differ at {}", count + 1); + } + } + EitherOrBoth::Left(_) | EitherOrBoth::Right(_) => { + panic!("expected/actuals lists differ in length (short one ends at {}), all good until here", count + 1) + } + } + } +} + +// A set of helpers to remove boiler plate for the massive number of tests below. +fn value(value: ion::Data, annotations: Vec>) -> ion::Value { + ion::Value { value, annotations } +} + +fn boolean(b: bool) -> ion::Value { + ion::Data::Bool(Some(b)).into() +} + +fn minute(hour: u8, minute: u8) -> TextTime { + TextTime::Minute { hour, minute } +} + +fn fractional_second( + hour: u8, + minute: u8, + second: u8, + fraction_coefficient: BigUint, + fraction_exponent: i32, +) -> TextTime { + TextTime::FractionalSecond { + hour, + minute, + second, + fraction_coefficient, + fraction_exponent, + } +} + +fn timestamp(date: TextDate, time: Option, offset: Option) -> ion::Value { + timestamp_data(date, time, offset).into() +} + +fn timestamp_data(date: TextDate, time: Option, offset: Option) -> ion::Data { + ion::Data::Timestamp(Some( + TextTimestamp::new(date, time, offset.unwrap_or(UtcOffset::UTC)) + .try_into() + .unwrap(), + )) +} + +fn decimal(coefficient: &str, exponent: &str) -> ion::Value { + let coefficient = BigInt::from_str(coefficient).unwrap(); + let exponent = BigInt::from_str(exponent).unwrap(); + ion::Data::Decimal(Some(ion::Decimal { + coefficient, + exponent, + })) + .into() +} + +fn float(s: &str) -> ion::Value { + ion::Data::Float(Some(lexical_core::parse(s.as_bytes()).unwrap())).into() +} + +fn int_i64_data(i: i64) -> ion::Data { + let int = BigInt::from(i); + ion::Data::Int(Some(int)) +} + +fn int_i64(i: i64) -> ion::Value { + int_i64_data(i).into() +} + +fn int_s(s: &str, radix: u32) -> ion::Value { + let int = BigInt::from_str_radix(s, radix).unwrap(); + ion::Data::Int(Some(int)).into() +} + +fn string(s: &str) -> ion::Value { + ion::Data::String(Some(s.to_string())).into() +} + +fn annot(s: &str) -> Option { + let text = s.to_string(); + Some(SymbolToken::Known { text }) +} + +fn symbol(s: &str) -> ion::Value { + symbol_data(s).into() +} + +fn symbol_data(s: &str) -> ion::Data { + let text = s.to_owned(); + ion::Data::Symbol(Some(SymbolToken::Known { text })) +} + +fn clob(d: &[u8]) -> ion::Value { + clob_data(d).into() +} + +fn clob_data(d: &[u8]) -> ion::Data { + let data = d.to_vec(); + ion::Data::Clob(Some(ion::Clob { data })) +} + +fn blob_decoded(d: &[u8]) -> ion::Value { + let data = d.to_vec(); + ion::Data::Blob(Some(ion::Blob { data })).into() +} + +fn blob_encoded(d: &[u8]) -> ion::Value { + blob_encoded_data(d).into() +} + +fn blob_encoded_data(d: &[u8]) -> ion::Data { + let data = base64::decode(d).unwrap(); + ion::Data::Blob(Some(ion::Blob { data })) +} + +fn sexp_data(values: Vec) -> ion::Data { + ion::Data::Sexp(Some(ion::Sexp { values })) +} + +fn sexp(values: Vec) -> ion::Value { + sexp_data(values).into() +} + +fn list(values: Vec) -> ion::Value { + ion::Data::List(Some(ion::List { values })).into() +} + +fn map_data(fields: Vec<(SymbolToken, ion::Value)>) -> ion::Data { + ion::Data::Struct(Some(ion::Struct { fields })) +} + +fn map(fields: Vec<(SymbolToken, ion::Value)>) -> ion::Value { + map_data(fields).into() +} diff --git a/src/parser/ion_1_0/text/time.rs b/src/parser/ion_1_0/text/time.rs new file mode 100644 index 0000000..1ebee62 --- /dev/null +++ b/src/parser/ion_1_0/text/time.rs @@ -0,0 +1,162 @@ +use crate::{error::TextFormatError, value::Timestamp}; +use num_bigint::BigUint; +use std::{convert::TryFrom, fmt}; +use time::{ComponentRangeError, UtcOffset}; + +#[derive(Clone, PartialEq)] +pub enum TextDate { + Year { year: u16 }, + Month { year: u16, month: u8 }, + Day { date: time::Date }, +} + +impl TextDate { + pub(crate) fn day(year: u16, month: u8, day: u8) -> Result { + let date = time::Date::try_from_ymd(year as i32, month, day)?; + Ok(TextDate::Day { date }) + } + pub(crate) fn month(year: u16, month: u8) -> Self { + TextDate::Month { year, month } + } + pub(crate) fn year(year: u16) -> Self { + TextDate::Year { year } + } +} + +impl fmt::Debug for TextDate { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TextDate::Year { year } => format!("{:04}", year).fmt(f), + TextDate::Month { year, month } => format!("{:04}-{:02}", year, month).fmt(f), + TextDate::Day { date } => date.format("%Y-%m-%d").fmt(f), + } + } +} + +#[derive(Clone, PartialEq)] +pub enum TextTime { + Minute { + hour: u8, + minute: u8, + }, + Second { + hour: u8, + minute: u8, + second: u8, + }, + FractionalSecond { + hour: u8, + minute: u8, + second: u8, + fraction_coefficient: BigUint, + fraction_exponent: i32, + }, +} + +impl fmt::Debug for TextTime { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TextTime::Minute { hour, minute } => format!("{:02}:{:02}", hour, minute).fmt(f), + TextTime::Second { + hour, + minute, + second, + } => format!("{:02}:{:02}:{:02}", hour, minute, second,).fmt(f), + TextTime::FractionalSecond { + hour, + minute, + second, + fraction_coefficient, + .. + } => format!( + "{:02}:{:02}:{:02}.{}", + hour, + minute, + second, + // TODO: leading zeroes need to be added. + fraction_coefficient.to_str_radix(10) + ) + .fmt(f), + } + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct TextTimestamp { + date: TextDate, + time: Option, + offset: UtcOffset, +} + +impl TextTimestamp { + pub fn new(date: TextDate, time: Option, offset: time::UtcOffset) -> Self { + Self { date, time, offset } + } +} + +impl TryFrom for Timestamp { + type Error = TextFormatError; + + fn try_from(timestamp: TextTimestamp) -> Result { + let offset = timestamp.offset.as_seconds(); + Ok(match timestamp.time { + None => match timestamp.date { + TextDate::Year { year } => Timestamp::Year { year, offset }, + TextDate::Month { year, month } => Timestamp::Month { + year, + month, + offset, + }, + TextDate::Day { date } => Timestamp::Day { + year: date.year() as u16, + month: date.month(), + day: date.day(), + offset, + }, + }, + Some(time) => match timestamp.date { + TextDate::Day { date } => match time { + TextTime::Minute { hour, minute } => Timestamp::Minute { + year: date.year() as u16, + month: date.month(), + day: date.day(), + hour, + minute, + offset, + }, + TextTime::Second { + hour, + minute, + second, + } => Timestamp::Second { + year: date.year() as u16, + month: date.month(), + day: date.day(), + hour, + minute, + second, + offset, + }, + TextTime::FractionalSecond { + hour, + minute, + second, + fraction_coefficient, + fraction_exponent, + } => Timestamp::FractionalSecond { + year: date.year() as u16, + month: date.month(), + day: date.day(), + hour, + minute, + second, + fraction_coefficient, + fraction_exponent, + offset, + }, + }, + _ => return Err(TextFormatError::ImpreciseDate), + }, + }) + } +} diff --git a/src/parser/ion_1_0/typed_value.rs b/src/parser/ion_1_0/typed_value.rs index 420f5bf..fa8c3c6 100644 --- a/src/parser/ion_1_0/typed_value.rs +++ b/src/parser/ion_1_0/typed_value.rs @@ -1,6 +1,8 @@ use super::subfield::*; -use crate::error::{BinaryFormatError, FormatError}; -use crate::parser::parse_error::{IonError, IonResult}; +use crate::{ + error::{BinaryFormatError, FormatError}, + parser::parse_error::{IonError, IonResult}, +}; use nom::{bytes::complete::take, Err}; use num_traits::cast::FromPrimitive; @@ -315,10 +317,10 @@ mod tests { } #[test] - #[should_panic] fn type_code_has_no_17th_variant() { let sixteen: u8 = 0b0001_0000; - let type_code: TypeCode = TypeCode::from_u8(sixteen).unwrap(); + let type_code: Option = TypeCode::from_u8(sixteen); + assert_eq!(type_code, None); } #[test] @@ -329,9 +331,9 @@ mod tests { } #[test] - #[should_panic] fn length_code_has_no_17th_variant() { let sixteen: u8 = 0b0001_0000; - let length_code: LengthCode = LengthCode::from_u8(sixteen).unwrap(); + let length_code: Option = LengthCode::from_u8(sixteen); + assert_eq!(length_code, None); } } diff --git a/src/parser/parse.rs b/src/parser/parse.rs index 70064c3..33ce14f 100644 --- a/src/parser/parse.rs +++ b/src/parser/parse.rs @@ -1,11 +1,18 @@ -use super::combinators::{all_consuming, many0, map, preceded}; -use super::ion_1_0; -use crate::parser::ion_1_0::current_symbol_table::CurrentSymbolTable; -use crate::parser::parse_error::IonResult; -use crate::value::Value; +use super::{ + combinators::{all_consuming, many0, map, preceded}, + ion_1_0, +}; +use crate::{ + parser::{ + ion_1_0::{current_symbol_table::CurrentSymbolTable, text::ValueIterator}, + parse_error::{IonError, IonResult}, + }, + value::Value, +}; use nom::{ bytes::complete::{tag, take}, sequence::tuple, + Err, }; // Binary Ion streams begin with a four-octet Binary Version Marker @@ -73,6 +80,21 @@ fn parse_ion_1_0() -> impl FnMut(&[u8]) -> IonResult<&[u8], Vec>> move |i: &[u8]| many0(ion_1_0::binary::parse(CurrentSymbolTable::SystemV1))(i) } +/// Ion text streams consist of zero or more top level values (TLVs). +/// +/// It is assumed that each one starts with the Ion Version Marker (or IVM) if not otherwise +/// marked. The IVM can also be used to reset the symbol table if later encountered as a TLV. +pub fn parse_ion_text_1_0(input: &str) -> IonResult<&str, Vec> { + let values: Result, Err>> = ValueIterator::new(input) + .map(|result| match result { + Ok((_, v)) => Ok(v), + Err(e) => Err(e), + }) + .collect(); + + values.map(|v| (&input[input.len().saturating_sub(1)..], v)) +} + #[allow(non_snake_case)] #[cfg(test)] mod tests { @@ -122,13 +144,7 @@ mod tests { let bytes = include_bytes!("../../tests/ion-tests/iontestdata/good/null.10n"); let (remaining_bytes, value) = parse(bytes).unwrap(); assert_eq!(remaining_bytes, &[] as &[u8]); - assert_eq!( - value, - vec![Value { - value: Data::Null, - annotations: None, - }] - ); + assert_eq!(value, vec![Data::Null.into()]); } #[test] @@ -162,7 +178,7 @@ mod tests { let index_of_error = strip_bvm(bytes.as_bytes()); let err = parse(bytes).err().unwrap(); assert_eq!( - dbg!(err), + err, Err::Error(IonError::from_error_kind(index_of_error, ErrorKind::Eof)) ); } @@ -174,7 +190,7 @@ mod tests { let index_of_error = strip_bvm(bytes.as_bytes()); let err = parse(bytes).err().unwrap(); assert_eq!( - dbg!(err), + err, Err::Failure(IonError::from_format_error( index_of_error, FormatError::Binary(BinaryFormatError::AnnotatedPadding) @@ -193,13 +209,7 @@ mod tests { let bytes = include_bytes!("../../tests/ion-tests/iontestdata/good/nullBool.10n"); let (remaining_bytes, value) = parse(bytes).unwrap(); assert_eq!(remaining_bytes, &[] as &[u8]); - assert_eq!( - value, - vec![Value { - value: Data::Bool(None), - annotations: None, - }] - ); + assert_eq!(value, vec![Data::Bool(None).into()]); } // boolWithInvalidLength_1.10n @@ -214,7 +224,7 @@ mod tests { let index_of_error = strip_bvm(bytes.as_bytes()); let err = parse(bytes).err().unwrap(); assert_eq!( - dbg!(err), + err, Err::Failure(IonError::from_format_error( index_of_error, FormatError::Binary(BinaryFormatError::BoolValue(3)) @@ -234,7 +244,7 @@ mod tests { let index_of_error = strip_bvm(bytes.as_bytes()); let err = parse(bytes).err().unwrap(); assert_eq!( - dbg!(err), + err, Err::Failure(IonError::from_format_error( index_of_error, FormatError::Binary(BinaryFormatError::BoolValue(14)) @@ -253,13 +263,7 @@ mod tests { let bytes = include_bytes!("../../tests/ion-tests/iontestdata/good/nullInt2.10n"); let (remaining_bytes, value) = parse(bytes).unwrap(); assert_eq!(remaining_bytes, &[] as &[u8]); - assert_eq!( - value, - vec![Value { - value: Data::Int(None), - annotations: None, - }] - ); + assert_eq!(value, vec![Data::Int(None).into()]); } #[test] @@ -267,13 +271,7 @@ mod tests { let bytes = include_bytes!("../../tests/ion-tests/iontestdata/good/nullInt3.10n"); let (remaining_bytes, value) = parse(bytes).unwrap(); assert_eq!(remaining_bytes, &[] as &[u8]); - assert_eq!( - value, - vec![Value { - value: Data::Int(None), - annotations: None, - }] - ); + assert_eq!(value, vec![Data::Int(None).into()]); } #[test] @@ -285,8 +283,8 @@ mod tests { match value[0].clone() { Value { value: Data::Int(Some(x)), - annotations: None, - } => {} + annotations, + } if annotations.is_empty() => {} _ => panic!("expected Integer"), } } @@ -300,8 +298,8 @@ mod tests { match value[0].clone() { Value { value: Data::Int(Some(x)), - annotations: None, - } => {} + annotations, + } if annotations.is_empty() => {} _ => panic!("expected Integer"), } } @@ -315,8 +313,8 @@ mod tests { match value[0].clone() { Value { value: Data::Int(Some(x)), - annotations: None, - } => {} + annotations, + } if annotations.is_empty() => {} _ => panic!("expected Integer"), } } @@ -330,8 +328,8 @@ mod tests { match value[0].clone() { Value { value: Data::Int(Some(x)), - annotations: None, - } => {} + annotations, + } if annotations.is_empty() => {} _ => panic!("expected Integer"), } } @@ -345,8 +343,8 @@ mod tests { match value[0].clone() { Value { value: Data::Int(Some(x)), - annotations: None, - } => {} + annotations, + } if annotations.is_empty() => {} _ => panic!("expected Integer"), } } @@ -359,10 +357,7 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Int(Some(BigInt::from_str("9223372036854775808").unwrap())), - annotations: None, - }] + vec![Data::Int(Some(BigInt::from_str("9223372036854775808").unwrap())).into()] ); } @@ -374,10 +369,7 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Int(Some(BigInt::from_str("-9223372036854775808").unwrap())), - annotations: None, - }] + vec![Data::Int(Some(BigInt::from_str("-9223372036854775808").unwrap())).into()] ); } @@ -392,7 +384,7 @@ mod tests { let index_of_error = strip_bvm(bytes.as_bytes()); let err = parse(bytes).err().unwrap(); assert_eq!( - dbg!(err), + err, Err::Error(IonError::from_error_kind(index_of_error, ErrorKind::Eof,)) ); } @@ -408,7 +400,7 @@ mod tests { let index_of_error = &strip_bvm(bytes.as_bytes())[8..]; let err = parse(bytes).err().unwrap(); assert_eq!( - dbg!(err), + err, Err::Error(IonError::from_error_kind(index_of_error, ErrorKind::Eof)) ); } @@ -422,7 +414,7 @@ mod tests { let index_of_error = strip_bvm(bytes.as_bytes()); let err = parse(bytes).err().unwrap(); assert_eq!( - dbg!(err), + err, Err::Failure(IonError::from_format_error( index_of_error, FormatError::Binary(BinaryFormatError::NegativeZero) @@ -440,7 +432,7 @@ mod tests { let index_of_error = strip_bvm(bytes.as_bytes()); let err = parse(bytes).err().unwrap(); assert_eq!( - dbg!(err), + err, Err::Failure(IonError::from_format_error( index_of_error, FormatError::Binary(BinaryFormatError::NegativeZero) @@ -459,13 +451,7 @@ mod tests { let bytes = include_bytes!("../../tests/ion-tests/iontestdata/good/nullFloat.10n"); let (remaining_bytes, value) = parse(bytes).unwrap(); assert_eq!(remaining_bytes, &[] as &[u8]); - assert_eq!( - value, - vec![Value { - value: Data::Float(None), - annotations: None, - }] - ); + assert_eq!(value, vec![Data::Float(None).into()]); } // floatLenTooLarge.10n @@ -479,7 +465,7 @@ mod tests { let index_of_error = strip_bvm(bytes.as_bytes()); let err = parse(bytes).err().unwrap(); assert_eq!( - dbg!(err), + err, Err::Error(IonError::from_error_kind(index_of_error, ErrorKind::Eof)) ); } @@ -495,13 +481,7 @@ mod tests { let bytes = include_bytes!("../../tests/ion-tests/iontestdata/good/nullDecimal.10n"); let (remaining_bytes, value) = parse(bytes).unwrap(); assert_eq!(remaining_bytes, &[] as &[u8]); - assert_eq!( - value, - vec![Value { - value: Data::Decimal(None), - annotations: None, - }] - ); + assert_eq!(value, vec![Data::Decimal(None).into()]); } #[test] @@ -513,13 +493,11 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Decimal(Some(Decimal { - coefficient: BigInt::from_str_radix("-10", 10).unwrap(), - exponent: BigInt::from_str_radix("-1", 10).unwrap(), - })), - annotations: None, - }] + vec![Data::Decimal(Some(Decimal { + coefficient: BigInt::from_str_radix("-10", 10).unwrap(), + exponent: BigInt::from_str_radix("-1", 10).unwrap(), + })) + .into()] ); } @@ -531,13 +509,11 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Decimal(Some(Decimal { - coefficient: BigInt::zero(), - exponent: BigInt::zero(), - })), - annotations: None, - }] + vec![Data::Decimal(Some(Decimal { + coefficient: BigInt::zero(), + exponent: BigInt::zero(), + })) + .into()] ); } @@ -550,13 +526,11 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Decimal(Some(Decimal { - coefficient: BigInt::zero(), - exponent: BigInt::from_str_radix("-1", 10).unwrap(), - })), - annotations: None, - }] + vec![Data::Decimal(Some(Decimal { + coefficient: BigInt::zero(), + exponent: BigInt::from_str_radix("-1", 10).unwrap(), + })) + .into()] ); } @@ -568,13 +542,11 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Decimal(Some(Decimal { - coefficient: BigInt::from_str_radix("10", 10).unwrap(), - exponent: BigInt::from_str_radix("-1", 10).unwrap(), - })), - annotations: None, - }] + vec![Data::Decimal(Some(Decimal { + coefficient: BigInt::from_str_radix("10", 10).unwrap(), + exponent: BigInt::from_str_radix("-1", 10).unwrap(), + })) + .into()] ); } @@ -585,13 +557,11 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Decimal(Some(Decimal { - coefficient: BigInt::zero(), - exponent: BigInt::zero(), - })), - annotations: None, - }] + vec![Data::Decimal(Some(Decimal { + coefficient: BigInt::zero(), + exponent: BigInt::zero(), + })) + .into()] ); } @@ -620,7 +590,7 @@ mod tests { let index_of_error = strip_bvm(bytes.as_bytes()); let err = parse(bytes).err().unwrap(); assert_eq!( - dbg!(err), + err, Err::Error(IonError::from_error_kind(index_of_error, ErrorKind::Eof)) ); } @@ -636,7 +606,7 @@ mod tests { let index_of_error = strip_bvm(bytes.as_bytes()); let err = parse(bytes).err().unwrap(); assert_eq!( - dbg!(err), + err, Err::Error(IonError::from_error_kind(index_of_error, ErrorKind::Eof)) ); } @@ -652,13 +622,7 @@ mod tests { let bytes = include_bytes!("../../tests/ion-tests/iontestdata/good/nullTimestamp.10n"); let (remaining_bytes, value) = parse(bytes).unwrap(); assert_eq!(remaining_bytes, &[] as &[u8]); - assert_eq!( - value, - vec![Value { - value: Data::Timestamp(None), - annotations: None, - }] - ); + assert_eq!(value, vec![Data::Timestamp(None).into()]); } #[test] @@ -670,13 +634,11 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Timestamp(Some(Timestamp::Year { - offset: BigInt::zero(), - year: BigUint::from(2011u32) - })), - annotations: None, - }] + vec![Timestamp::Year { + offset: 0, + year: 2011 + } + .into()] ); } @@ -689,14 +651,12 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Timestamp(Some(Timestamp::Month { - offset: BigInt::zero(), - year: BigUint::from(2011u32), - month: BigUint::from(2u32) - })), - annotations: None, - }] + vec![Timestamp::Month { + offset: 0, + year: 2011, + month: 2 + } + .into()] ); } @@ -709,15 +669,13 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Timestamp(Some(Timestamp::Day { - offset: BigInt::zero(), - year: BigUint::from(2011u32), - month: BigUint::from(2u32), - day: BigUint::from(20u32) - })), - annotations: None, - }] + vec![Timestamp::Day { + offset: 0, + year: 2011, + month: 2, + day: 20 + } + .into()] ); } @@ -730,20 +688,18 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Timestamp(Some(Timestamp::FractionalSecond { - offset: BigInt::from(-480i32), - year: BigUint::from(2011u32), - month: BigUint::from(2u32), - day: BigUint::from(20u32), - hour: BigUint::from(19u32), - minute: BigUint::from(30u32), - second: BigUint::from(59u32), - fraction_coefficient: BigUint::from(100u32), - fraction_exponent: -3, - })), - annotations: None, - }] + vec![Timestamp::FractionalSecond { + offset: -480, + year: 2011, + month: 2, + day: 20, + hour: 19, + minute: 30, + second: 59, + fraction_coefficient: BigUint::from(100u32), + fraction_exponent: -3, + } + .into()] ); } } @@ -759,13 +715,7 @@ mod tests { let bytes = include_bytes!("../../tests/ion-tests/iontestdata/good/nullSymbol.10n"); let (remaining_bytes, value) = parse(bytes).unwrap(); assert_eq!(remaining_bytes, &[] as &[u8]); - assert_eq!( - value, - vec![Value { - value: Data::Symbol(None), - annotations: None, - }] - ); + assert_eq!(value, vec![Data::Symbol(None).into()]); } #[test] @@ -774,13 +724,7 @@ mod tests { include_bytes!("../../tests/ion-tests/iontestdata/good/symbolExplicitZero.10n"); let (remaining_bytes, value) = parse(bytes).unwrap(); assert_eq!(remaining_bytes, &[] as &[u8]); - assert_eq!( - value, - vec![Value { - value: Data::Symbol(Some(SymbolToken::Zero)), - annotations: None, - }] - ); + assert_eq!(value, vec![Data::Symbol(Some(SymbolToken::Zero)).into()]); } #[test] @@ -789,13 +733,7 @@ mod tests { include_bytes!("../../tests/ion-tests/iontestdata/good/symbolImplicitZero.10n"); let (remaining_bytes, value) = parse(bytes).unwrap(); assert_eq!(remaining_bytes, &[] as &[u8]); - assert_eq!( - value, - vec![Value { - value: Data::Symbol(Some(SymbolToken::Zero)), - annotations: None, - }] - ); + assert_eq!(value, vec![Data::Symbol(Some(SymbolToken::Zero)).into()]); } #[test] @@ -805,7 +743,7 @@ mod tests { let index_of_error = strip_bvm(bytes.as_bytes()); let err = parse(bytes).err().unwrap(); assert_eq!( - dbg!(err), + err, Err::Failure(IonError::from_symbol_error( index_of_error, SymbolError::AboveMaxId { @@ -835,7 +773,7 @@ mod tests { let index_of_error = strip_bvm(bytes.as_bytes()); let err = parse(bytes).err().unwrap(); assert_eq!( - dbg!(err), + err, Err::Error(IonError::from_error_kind(index_of_error, ErrorKind::Eof)) ); } @@ -851,13 +789,7 @@ mod tests { let bytes = include_bytes!("../../tests/ion-tests/iontestdata/good/nullString.10n"); let (remaining_bytes, value) = parse(bytes).unwrap(); assert_eq!(remaining_bytes, &[] as &[u8]); - assert_eq!( - value, - vec![Value { - value: Data::String(None), - annotations: None, - }] - ); + assert_eq!(value, vec![Data::String(None).into()]); } // stringLenTooLarge.10n @@ -871,7 +803,7 @@ mod tests { let index_of_error = strip_bvm(bytes.as_bytes()); let err = parse(bytes).err().unwrap(); assert_eq!( - dbg!(err), + err, Err::Error(IonError::from_error_kind(index_of_error, ErrorKind::Eof)) ); } @@ -906,13 +838,7 @@ mod tests { let bytes = include_bytes!("../../tests/ion-tests/iontestdata/good/nullClob.10n"); let (remaining_bytes, value) = parse(bytes).unwrap(); assert_eq!(remaining_bytes, &[] as &[u8]); - assert_eq!( - value, - vec![Value { - value: Data::Clob(None), - annotations: None, - }] - ); + assert_eq!(value, vec![Data::Clob(None).into()]); } #[test] @@ -922,10 +848,7 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Clob(Some(Clob { data: vec![127u8] })), - annotations: None, - }] + vec![Data::Clob(Some(Clob { data: vec![127u8] })).into()] ); } @@ -938,10 +861,7 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Clob(Some(Clob { data: vec![128u8] })), - annotations: None, - }] + vec![Data::Clob(Some(Clob { data: vec![128u8] })).into()] ); } @@ -953,10 +873,7 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Clob(Some(Clob { data: vec![0u8] })), - annotations: None, - }] + vec![Data::Clob(Some(Clob { data: vec![0u8] })).into()] ); } @@ -970,7 +887,7 @@ mod tests { let index_of_error = strip_bvm(bytes.as_bytes()); let err = parse(bytes).err().unwrap(); assert_eq!( - dbg!(err), + err, Err::Error(IonError::from_error_kind(index_of_error, ErrorKind::Eof)) ); } @@ -986,13 +903,7 @@ mod tests { let bytes = include_bytes!("../../tests/ion-tests/iontestdata/good/nullBlob.10n"); let (remaining_bytes, value) = parse(bytes).unwrap(); assert_eq!(remaining_bytes, &[] as &[u8]); - assert_eq!( - value, - vec![Value { - value: Data::Blob(None), - annotations: None, - }] - ); + assert_eq!(value, vec![Data::Blob(None).into()]); } // blobLenTooLarge.10n @@ -1005,7 +916,7 @@ mod tests { let index_of_error = strip_bvm(bytes.as_bytes()); let err = parse(bytes).err().unwrap(); assert_eq!( - dbg!(err), + err, Err::Error(IonError::from_error_kind(index_of_error, ErrorKind::Eof)) ); } @@ -1024,13 +935,7 @@ mod tests { let bytes = include_bytes!("../../tests/ion-tests/iontestdata/good/nullList.10n"); let (remaining_bytes, value) = parse(bytes).unwrap(); assert_eq!(remaining_bytes, &[] as &[u8]); - assert_eq!( - value, - vec![Value { - value: Data::List(None), - annotations: None, - }] - ); + assert_eq!(value, vec![Data::List(None).into()]); } // Bad @@ -1043,7 +948,7 @@ mod tests { let index_of_error = strip_bvm(bytes.as_bytes()); let err = parse(bytes).err().unwrap(); assert_eq!( - dbg!(err), + err, Err::Error(IonError::from_error_kind(index_of_error, ErrorKind::Eof)) ); } @@ -1059,13 +964,7 @@ mod tests { let bytes = include_bytes!("../../tests/ion-tests/iontestdata/good/nullSexp.10n"); let (remaining_bytes, value) = parse(bytes).unwrap(); assert_eq!(remaining_bytes, &[] as &[u8]); - assert_eq!( - value, - vec![Value { - value: Data::Sexp(None), - annotations: None, - }] - ); + assert_eq!(value, vec![Data::Sexp(None).into()]); } } @@ -1082,13 +981,7 @@ mod tests { let bytes = include_bytes!("../../tests/ion-tests/iontestdata/good/nullStruct.10n"); let (remaining_bytes, value) = parse(bytes).unwrap(); assert_eq!(remaining_bytes, &[] as &[u8]); - assert_eq!( - value, - vec![Value { - value: Data::Struct(None), - annotations: None, - }] - ); + assert_eq!(value, vec![Data::Struct(None).into()]); } #[test] @@ -1100,10 +993,7 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Struct(Some(Struct { fields: vec![] })), - annotations: None, - }] + vec![Data::Struct(Some(Struct { fields: vec![] })).into()] ); } @@ -1116,10 +1006,7 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Struct(Some(Struct { fields: vec![] })), - annotations: None, - }] + vec![Data::Struct(Some(Struct { fields: vec![] })).into()] ); } @@ -1132,20 +1019,15 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Struct(Some(Struct { - fields: vec![( - SymbolToken::Known { - text: String::from("name") - }, - Value { - value: Data::Bool(Some(true)), - annotations: None, - } - )] - })), - annotations: None, - }] + vec![Data::Struct(Some(Struct { + fields: vec![( + SymbolToken::Known { + text: String::from("name") + }, + Data::Bool(Some(true)).into(), + )] + })) + .into()] ); } @@ -1158,20 +1040,15 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Struct(Some(Struct { - fields: vec![( - SymbolToken::Known { - text: String::from("name") - }, - Value { - value: Data::Bool(Some(true)), - annotations: None, - } - )] - })), - annotations: None, - }] + vec![Data::Struct(Some(Struct { + fields: vec![( + SymbolToken::Known { + text: String::from("name") + }, + Data::Bool(Some(true)).into(), + )] + })) + .into()] ); } @@ -1185,9 +1062,9 @@ mod tests { value, vec![Value { value: Data::Struct(Some(Struct { fields: vec![] })), - annotations: Some(vec![Some(SymbolToken::Known { + annotations: vec![Some(SymbolToken::Known { text: String::from("max_id") - }),]) + })] }] ); } @@ -1199,10 +1076,7 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Struct(Some(Struct { fields: vec![] })), - annotations: None, - }] + vec![Data::Struct(Some(Struct { fields: vec![] })).into()] ); } @@ -1213,20 +1087,15 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Struct(Some(Struct { - fields: vec![( - SymbolToken::Known { - text: String::from("name") - }, - Value { - value: Data::String(Some(String::from("123456789AB"))), - annotations: None, - } - )] - })), - annotations: None, - }] + vec![Data::Struct(Some(Struct { + fields: vec![( + SymbolToken::Known { + text: String::from("name") + }, + Data::String(Some(String::from("123456789AB"))).into(), + )] + })) + .into()] ); } @@ -1237,20 +1106,15 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Struct(Some(Struct { - fields: vec![( - SymbolToken::Known { - text: String::from("name") - }, - Value { - value: Data::String(Some(String::from("123456789ABC"))), - annotations: None, - } - )] - })), - annotations: None, - }] + vec![Data::Struct(Some(Struct { + fields: vec![( + SymbolToken::Known { + text: String::from("name") + }, + Data::String(Some(String::from("123456789ABC"))).into(), + )] + })) + .into()] ); } @@ -1261,20 +1125,15 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Struct(Some(Struct { - fields: vec![( - SymbolToken::Known { - text: String::from("name") - }, - Value { - value: Data::String(Some(String::from("123456789ABCD"))), - annotations: None, - } - )] - })), - annotations: None, - }] + vec![Data::Struct(Some(Struct { + fields: vec![( + SymbolToken::Known { + text: String::from("name") + }, + Data::String(Some(String::from("123456789ABCD"))).into(), + )] + })) + .into()] ); } @@ -1285,40 +1144,29 @@ mod tests { assert_eq!(remaining_bytes, &[] as &[u8]); assert_eq!( value, - vec![Value { - value: Data::Struct(Some(Struct { - fields: vec![ - ( - SymbolToken::Known { - text: String::from("name") - }, - Value { - value: Data::Null, - annotations: None, - } - ), - ( - SymbolToken::Known { - text: String::from("version") - }, - Value { - value: Data::Bool(Some(false)), - annotations: None, - } - ), - ( - SymbolToken::Known { - text: String::from("imports") - }, - Value { - value: Data::Bool(Some(true)), - annotations: None, - } - ) - ] - })), - annotations: None - }], + vec![Data::Struct(Some(Struct { + fields: vec![ + ( + SymbolToken::Known { + text: String::from("name") + }, + Data::Null.into(), + ), + ( + SymbolToken::Known { + text: String::from("version") + }, + Data::Bool(Some(false)).into(), + ), + ( + SymbolToken::Known { + text: String::from("imports") + }, + Data::Bool(Some(true)).into() + ) + ] + })) + .into()], ); } @@ -1337,39 +1185,30 @@ mod tests { SymbolToken::Known { text: String::from("name") }, - Value { - value: Data::Null, - annotations: None, - } + Data::Null.into(), ), ( SymbolToken::Known { text: String::from("version") }, - Value { - value: Data::Bool(Some(false)), - annotations: None, - } + Data::Bool(Some(false)).into() ), ( SymbolToken::Known { text: String::from("imports") }, - Value { - value: Data::Bool(Some(true)), - annotations: None, - } + Data::Bool(Some(true)).into(), ) ] })), - annotations: Some(vec![ + annotations: vec![ Some(SymbolToken::Known { text: String::from("symbols") }), Some(SymbolToken::Known { text: String::from("max_id") }) - ]), + ], }], ); } @@ -1412,7 +1251,7 @@ mod tests { "../../tests/ion-tests/iontestdata/bad/annotationLengthTooLongScalar.10n" ); let index_of_error = strip_bvm(bytes.as_bytes()); - let err = dbg!(parse(bytes).err().unwrap()); + let err = parse(bytes).err().unwrap(); assert_eq!( err, Err::Error(IonError::from_error_kind(index_of_error, ErrorKind::Eof,)) @@ -1429,7 +1268,7 @@ mod tests { "../../tests/ion-tests/iontestdata/bad/annotationLengthTooLongContainer.10n" ); let index_of_error = strip_bvm(bytes.as_bytes()); - let err = dbg!(parse(bytes).err().unwrap()); + let err = parse(bytes).err().unwrap(); assert_eq!( err, Err::Error(IonError::from_error_kind(index_of_error, ErrorKind::Eof,)) diff --git a/src/parser/parse_error.rs b/src/parser/parse_error.rs index a485b7d..091943f 100644 --- a/src/parser/parse_error.rs +++ b/src/parser/parse_error.rs @@ -1,5 +1,7 @@ use crate::error::{FormatError, SymbolError}; -use nom::{error::ParseError, Err}; +use itertools::Itertools; +use nom::{error::ParseError, Err, Offset}; +use std::str::from_utf8; /// Analogous to nom's IResult. pub type IonResult = Result<(I, T), Err>>; @@ -10,6 +12,25 @@ pub struct IonError { backtrace: Vec>, } +impl<'a> IonError<&'a [u8]> { + pub(crate) fn into_str_err(self, origin: &'a str) -> IonError<&'a str> { + let kind = self.kind.into_str_kind(origin); + let backtrace = self + .backtrace + .into_iter() + .map(|b| b.into_str_kind(origin)) + .collect_vec(); + + IonError { kind, backtrace } + } +} + +impl<'a> IonError<&'a str> { + pub(crate) fn into_bytes_err(self, origin: &str) -> IonError<&'a [u8]> { + todo!() + } +} + #[derive(Debug, PartialEq)] pub enum ErrorKind { Nom(I, nom::error::ErrorKind), @@ -18,6 +39,22 @@ pub enum ErrorKind { Format(I, FormatError), } +impl<'a> ErrorKind<&'a [u8]> { + pub(crate) fn into_str_kind(self, origin: &'a str) -> ErrorKind<&'a str> { + let translate_offset = + |i| { + origin.offset(from_utf8(i).expect( + "parser should return a reference to the same utf-8 slice it was given", + )) + }; + match self { + ErrorKind::Nom(i, kind) => ErrorKind::Nom(&origin[translate_offset(i)..], kind), + ErrorKind::Symbol(i, e) => ErrorKind::Symbol(&origin[translate_offset(i)..], e), + ErrorKind::Format(i, e) => ErrorKind::Format(&origin[translate_offset(i)..], e), + } + } +} + impl IonError { pub(crate) fn from_symbol_error(input: I, error: SymbolError) -> Self { Self { diff --git a/src/symbols.rs b/src/symbols.rs index ae17644..3de8f3a 100644 --- a/src/symbols.rs +++ b/src/symbols.rs @@ -106,6 +106,13 @@ impl SymbolToken { } } +impl From<&str> for SymbolToken { + fn from(symbol: &str) -> Self { + let text = symbol.to_string(); + SymbolToken::Known { text } + } +} + /// ## ImportDescriptor /// /// @@ -116,6 +123,16 @@ pub struct ImportDescriptor { max_id: u32, } +impl ImportDescriptor { + pub(crate) fn new(import_name: String, version: u32, max_id: u32) -> Self { + Self { + import_name, + version, + max_id, + } + } +} + /// ## ImportLocation /// /// diff --git a/src/value.rs b/src/value.rs index d70a8ab..4e2dc5c 100644 --- a/src/value.rs +++ b/src/value.rs @@ -6,13 +6,34 @@ use std::str; use crate::symbols::SymbolToken; use base64::encode; -use num_bigint::BigInt; -use num_bigint::BigUint; +use num_bigint::{BigInt, BigUint}; #[derive(Clone, Debug, PartialEq)] pub struct Value { pub value: Data, - pub annotations: Option>>, + pub annotations: Vec>, +} + +impl Value { + pub(crate) fn has_annotation(&self, annotation: &str) -> bool { + for token in &self.annotations { + match token { + Some(SymbolToken::Known { text }) if text.as_str() == annotation => return true, + _ => (), + } + } + + false + } +} + +impl From for Value { + fn from(value: Data) -> Self { + Self { + value, + annotations: vec![], + } + } } #[allow(clippy::large_enum_variant)] @@ -46,6 +67,35 @@ pub enum Data { Sexp(Option), } +macro_rules! ion_type_promotions { + ($ion_type:ty, $data_variant:expr) => { + impl From<$ion_type> for Data { + fn from(ion_value: $ion_type) -> Self { + $data_variant(Some(ion_value)) + } + } + + impl From<$ion_type> for Value { + fn from(ion_value: $ion_type) -> Self { + $data_variant(Some(ion_value)).into() + } + } + }; +} + +ion_type_promotions!(bool, Data::Bool); +ion_type_promotions!(BigInt, Data::Int); +ion_type_promotions!(f64, Data::Float); +ion_type_promotions!(Decimal, Data::Decimal); +ion_type_promotions!(Timestamp, Data::Timestamp); +ion_type_promotions!(String, Data::String); +ion_type_promotions!(SymbolToken, Data::Symbol); +ion_type_promotions!(Blob, Data::Blob); +ion_type_promotions!(Clob, Data::Clob); +ion_type_promotions!(Struct, Data::Struct); +ion_type_promotions!(List, Data::List); +ion_type_promotions!(Sexp, Data::Sexp); + impl Data { pub fn to_text(&self) -> String { match self { @@ -118,53 +168,59 @@ impl Decimal { } } +#[derive(Clone, PartialEq)] +pub enum Date { + Year { year: i32 }, + Month { year: i32, month: u8 }, + Day { date: time::Date }, +} + // timestamp - Date/time/timezone moments of arbitrary precision // Mostly ISO 8601 // Enum variant names represent the precision of the variant -// TODO: Investigate performance impact of large_enum_variant #[allow(clippy::large_enum_variant)] #[derive(Clone, Debug, PartialEq)] pub enum Timestamp { Year { - offset: BigInt, - year: BigUint, + offset: i32, + year: u16, }, Month { - offset: BigInt, - year: BigUint, - month: BigUint, + offset: i32, + year: u16, + month: u8, }, Day { - offset: BigInt, - year: BigUint, - month: BigUint, - day: BigUint, + offset: i32, + year: u16, + month: u8, + day: u8, }, Minute { - offset: BigInt, - year: BigUint, - month: BigUint, - day: BigUint, - hour: BigUint, - minute: BigUint, + offset: i32, + year: u16, + month: u8, + day: u8, + hour: u8, + minute: u8, }, Second { - offset: BigInt, - year: BigUint, - month: BigUint, - day: BigUint, - hour: BigUint, - minute: BigUint, - second: BigUint, + offset: i32, + year: u16, + month: u8, + day: u8, + hour: u8, + minute: u8, + second: u8, }, FractionalSecond { - offset: BigInt, - year: BigUint, - month: BigUint, - day: BigUint, - hour: BigUint, - minute: BigUint, - second: BigUint, + offset: i32, + year: u16, + month: u8, + day: u8, + hour: u8, + minute: u8, + second: u8, fraction_coefficient: BigUint, // The restriction of fractional_exponent to i32 rather than BigInt should not pose an issue for any non-pathological use fraction_exponent: i32, @@ -235,6 +291,15 @@ impl List { } } +impl IntoIterator for List { + type Item = Value; + type IntoIter = std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.values.into_iter() + } +} + // sexp - Ordered collections of values with application-defined semantics // A subset of symbols called identifiers can be denoted in text without single-quotes. // An identifier is a sequence of ASCII letters, digits, or the @@ -253,3 +318,12 @@ impl Sexp { todo!() } } + +impl IntoIterator for Sexp { + type Item = Value; + type IntoIter = std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.values.into_iter() + } +}