Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pub mod combinators;
pub mod parser;
pub mod reduction;

pub use self::parser::parse;
pub use self::parser::{parse, parse_with_context};
pub use self::reduction::beta;
pub use self::reduction::Order::*;
pub use self::term::Notation::*;
Expand Down
99 changes: 80 additions & 19 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ use self::CToken::*;
use self::Expression::*;
use self::ParseError::*;
use self::Token::*;
use crate::term::Context;
pub use crate::term::Notation::*;
use crate::term::Term::*;
use crate::term::{abs, app, Notation, Term};
use std::collections::VecDeque;
use std::error::Error;
use std::fmt;

Expand All @@ -16,6 +16,8 @@ use std::fmt;
pub enum ParseError {
/// lexical error; contains the invalid character and its index
InvalidCharacter((usize, char)),
/// lexical error; an undefined free variable was found (classic notation only)
UndefinedFreeVariable,
/// syntax error; the expression is invalid
InvalidExpression,
/// syntax error; the expression is empty
Expand All @@ -28,6 +30,9 @@ impl fmt::Display for ParseError {
ParseError::InvalidCharacter((idx, char)) => {
write!(f, "lexical error; invalid character '{}' at {}", char, idx)
}
ParseError::UndefinedFreeVariable => {
write!(f, "lexical error; an undefined free variable was used")
}
ParseError::InvalidExpression => write!(f, "syntax error; the expression is invalid"),
ParseError::EmptyExpression => write!(f, "syntax error; the expression is empty"),
}
Expand Down Expand Up @@ -142,50 +147,50 @@ pub fn tokenize_cla(input: &str) -> Result<Vec<CToken>, ParseError> {
}

#[doc(hidden)]
pub fn convert_classic_tokens(tokens: &[CToken]) -> Vec<Token> {
_convert_classic_tokens(tokens, &mut VecDeque::with_capacity(tokens.len()), &mut 0)
pub fn convert_classic_tokens(ctx: &Context, tokens: &[CToken]) -> Result<Vec<Token>, ParseError> {
let mut stack = Vec::with_capacity(tokens.len());
stack.extend(ctx.iter().rev());
_convert_classic_tokens(tokens, &mut stack, &mut 0)
}

fn _convert_classic_tokens<'t>(
tokens: &'t [CToken],
stack: &mut VecDeque<&'t str>,
stack: &mut Vec<&'t str>,
pos: &mut usize,
) -> Vec<Token> {
) -> Result<Vec<Token>, ParseError> {
let mut output = Vec::with_capacity(tokens.len() - *pos);
let mut inner_stack_count = 0;

while let Some(token) = tokens.get(*pos) {
match *token {
CLambda(ref name) => {
output.push(Lambda);
stack.push_back(name);
stack.push(name);
inner_stack_count += 1;
}
CLparen => {
output.push(Lparen);
*pos += 1;
output.append(&mut _convert_classic_tokens(tokens, stack, pos));
output.append(&mut _convert_classic_tokens(tokens, stack, pos)?);
}
CRparen => {
output.push(Rparen);
stack.truncate(stack.len() - inner_stack_count);
return output;
return Ok(output);
}
CName(ref name) => {
if let Some(index) = stack.iter().rev().position(|t| t == name) {
output.push(Number(index + 1))
} else {
// a new free variable
stack.push_front(name);
// index of the last element + 1
output.push(Number(stack.len()))
// a free variable not defined in the `Context`
return Err(UndefinedFreeVariable);
}
}
}
*pos += 1;
}

output
Ok(output)
}

#[derive(Debug, PartialEq)]
Expand Down Expand Up @@ -231,7 +236,7 @@ fn _get_ast(tokens: &[Token], pos: &mut usize) -> Result<Expression, ParseError>
/// Attempts to parse the input `&str` as a lambda `Term` encoded in the given `Notation`.
///
/// - lambdas can be represented either with the greek letter (λ) or a backslash (\\ -
/// less aesthetic, but only one byte in size)
/// less aesthetic, but only one byte in size)
/// - the identifiers in `Classic` notation are `String`s of alphabetic Unicode characters
/// - `Classic` notation ignores whitespaces where unambiguous
/// - the indices in the `DeBruijn` notation start with 1 and are hexadecimal digits
Expand All @@ -253,10 +258,42 @@ fn _get_ast(tokens: &[Token], pos: &mut usize) -> Result<Expression, ParseError>
///
/// Returns a `ParseError` when a lexing or syntax error is encountered.
pub fn parse(input: &str, notation: Notation) -> Result<Term, ParseError> {
parse_with_context(&Context::empty(), input, notation)
}

/// Attempts to parse the input `&str` using a provided context of free variables.
///
/// This function is identical to `parse()`, but it allows defining a set of named
/// free variables that are considered valid during parsing in `Classic` notation.
///
/// # Examples
/// ```
/// use lambda_calculus::{*, term::Context};
///
/// let ctx = Context::new(&["x", "y"]);
///
/// // `z` is not in the context, so it will be an error.
/// assert!(parse_with_context(&ctx, "z", Classic).is_err());
///
/// // `y` is in the context, so it's parsed as the outermost free variable (Var(2)).
/// assert_eq!(parse_with_context(&ctx, "y", Classic), Ok(Var(2)));
///
/// // In `λa.y`, `y` is still the outermost free variable, but its index is now 3.
/// assert_eq!(parse_with_context(&ctx, "λa.y", Classic), Ok(abs(Var(3))));
/// ```
///
/// # Errors
///
/// Returns a `ParseError` when a lexing or syntax error is encountered.
pub fn parse_with_context(
ctx: &Context,
input: &str,
notation: Notation,
) -> Result<Term, ParseError> {
let tokens = if notation == DeBruijn {
tokenize_dbr(input)?
} else {
convert_classic_tokens(&tokenize_cla(input)?)
convert_classic_tokens(ctx, &tokenize_cla(input)?)?
};
let ast = get_ast(&tokens)?;

Expand Down Expand Up @@ -362,15 +399,19 @@ mod tests {
assert!(tokens_dbr.is_ok());

assert_eq!(
convert_classic_tokens(&tokens_cla.unwrap()),
convert_classic_tokens(&Context::empty(), &tokens_cla.unwrap()).unwrap(),
tokens_dbr.unwrap()
);
}

#[test]
fn tokenization_success_classic_with_free_variables() {
let ctx = Context::new(&["a", "b"]);
let blc_dbr = "12";
let blc_cla = parse(blc_dbr, DeBruijn).unwrap().to_string();
let blc_cla = parse(blc_dbr, DeBruijn)
.unwrap()
.with_context(&ctx)
.to_string();

let tokens_cla = tokenize_cla(&blc_cla);
let tokens_dbr = tokenize_dbr(blc_dbr);
Expand All @@ -379,9 +420,29 @@ mod tests {
assert!(tokens_dbr.is_ok());

assert_eq!(
convert_classic_tokens(&tokens_cla.unwrap()),
tokens_dbr.unwrap()
tokens_cla.and_then(|tokens| convert_classic_tokens(&ctx, &tokens)),
tokens_dbr
);
}

#[test]
fn parse_classic_with_undefined_variable_error() {
let ctx_with_y = Context::new(&["y"]);

// "x" is not defined in the empty context
assert_eq!(
parse_with_context(&Context::empty(), "x", Classic),
Err(UndefinedFreeVariable)
);

// "x" is not defined in this context either
assert_eq!(
parse_with_context(&ctx_with_y, "λz.x", Classic),
Err(UndefinedFreeVariable)
);

// "y" is defined, so this should be OK
assert!(parse_with_context(&ctx_with_y, "y", Classic).is_ok());
}

#[test]
Expand Down
2 changes: 1 addition & 1 deletion src/reduction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use std::{cmp, fmt, mem};
///
/// - the `NOR`, `HNO`, `APP` and `HAP` orders reduce expressions to their normal form
/// - the `APP` order will fail to fully reduce expressions containing terms without a normal form,
/// e.g. the `Y` combinator (they will expand forever)
/// e.g. the `Y` combinator (they will expand forever)
/// - the `CBN` order reduces to weak head normal form
/// - the `CBV` order reduces to weak normal form
/// - the `HSP` order reduces to head normal form
Expand Down
Loading
Loading