Skip to content

Commit f981cba

Browse files
authored
refactor(parser): Remove explicit parsers outside pc package (#117)
* refactor(parser): Remove explicit parser implementation from expression * refactor(parser): Improved binary operator parsing * refactor(parser): Improved `guard` implementation * refactor(parser): Removed explicit parser implementation for PrintArgs * chore(parser): Use IifParser * refactor(parser): Use MapToUnit * Removed explicit parser implementation from sub_call * Closes #116
1 parent 1f1a7d0 commit f981cba

6 files changed

Lines changed: 227 additions & 248 deletions

File tree

rusty_parser/src/core/expression.rs

Lines changed: 134 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -999,7 +999,7 @@ mod built_in_function_call {
999999

10001000
mod binary_expression {
10011001
use rusty_common::Positioned;
1002-
use rusty_pc::and::opt_and_tuple;
1002+
use rusty_pc::and::{TupleCombiner, opt_and_keep_right};
10031003
use rusty_pc::*;
10041004

10051005
use super::{
@@ -1013,115 +1013,139 @@ mod binary_expression {
10131013

10141014
// result ::= <non-bin-expr> <operator> <expr>
10151015
pub fn parser() -> impl Parser<StringView, Output = ExpressionPos, Error = ParserError> {
1016-
BinaryExprParser
1016+
non_bin_expr()
1017+
.then_with_in_context(
1018+
second_parser(),
1019+
|first| first.is_parenthesis(),
1020+
TupleCombiner,
1021+
)
1022+
.map(|(l, r)| match r {
1023+
Some((op, r)) => l.apply_priority_order(r, op.element, op.pos),
1024+
None => l,
1025+
})
1026+
.map(ExpressionPos::simplify_unary_minus_literals)
10171027
}
10181028

1019-
struct BinaryExprParser;
1029+
fn second_parser() -> impl Parser<
1030+
StringView,
1031+
bool,
1032+
Output = Option<(Positioned<Operator>, ExpressionPos)>,
1033+
Error = ParserError,
1034+
> + SetContext<bool> {
1035+
operator()
1036+
.then_with_in_context(third_parser(), |op| is_keyword_op(op), TupleCombiner)
1037+
.to_option()
1038+
}
10201039

1021-
impl Parser<StringView> for BinaryExprParser {
1022-
type Output = ExpressionPos;
1023-
type Error = ParserError;
1040+
fn is_keyword_op(op: &Positioned<Operator>) -> bool {
1041+
op.element == Operator::And || op.element == Operator::Or || op.element == Operator::Modulo
1042+
}
10241043

1025-
fn parse(&mut self, tokenizer: &mut StringView) -> Result<Self::Output, ParserError> {
1026-
self.do_parse(tokenizer)
1027-
.map(ExpressionPos::simplify_unary_minus_literals)
1028-
}
1044+
fn third_parser()
1045+
-> impl Parser<StringView, bool, Output = ExpressionPos, Error = ParserError> + SetContext<bool>
1046+
{
1047+
IifParser::new(
1048+
guard::parser().to_fatal(),
1049+
guard::parser().to_option().map_to_unit(),
1050+
)
1051+
.and_keep_right(right_side_expr().no_context())
10291052
}
10301053

1031-
impl BinaryExprParser {
1032-
fn do_parse(&self, tokenizer: &mut StringView) -> Result<ExpressionPos, ParserError> {
1033-
let first = Self::non_bin_expr().parse(tokenizer)?;
1054+
/// Parses the right side expression, after having parsed the binary operator
1055+
fn right_side_expr() -> impl Parser<StringView, Output = ExpressionPos, Error = ParserError> {
1056+
// boxed breaks apart the recursive type evaluation
1057+
expression_pos_p()
1058+
.or_expected("expression after operator")
1059+
.boxed()
1060+
}
10341061

1035-
let is_paren = first.is_parenthesis();
1036-
match Self::operator(is_paren).parse(tokenizer) {
1037-
Ok(Positioned {
1038-
element: op,
1039-
pos: op_pos,
1040-
}) => {
1041-
if let Err(err) = guard::parser().parse(tokenizer) {
1042-
if err.is_soft() {
1043-
let is_keyword_op =
1044-
op == Operator::And || op == Operator::Or || op == Operator::Modulo;
1045-
if is_keyword_op {
1046-
return Err(ParserError::expected("whitespace or (").to_fatal());
1047-
}
1048-
} else {
1049-
return Err(err);
1050-
}
1051-
}
1062+
fn non_bin_expr() -> impl Parser<StringView, Output = ExpressionPos, Error = ParserError> {
1063+
OrParser::new(vec![
1064+
Box::new(single_or_double_literal::parser()),
1065+
Box::new(string_literal::parser()),
1066+
Box::new(integer_or_long_literal::parser()),
1067+
// property internally uses variable and function_call_or_array_element so they can be skipped
1068+
Box::new(property::parser()),
1069+
Box::new(built_in_function_call::parser()),
1070+
Box::new(parenthesis::parser()),
1071+
Box::new(unary_expression::parser()),
1072+
])
1073+
}
10521074

1053-
expression_pos_p()
1054-
.or_expected("expression after operator")
1055-
.parse(tokenizer)
1056-
.map(|right| first.apply_priority_order(right, op, op_pos))
1057-
}
1058-
Err(err) if err.is_soft() => Ok(first),
1059-
Err(err) => Err(err),
1060-
}
1061-
}
1075+
/// Parses an operator.
1076+
/// The parameter indicates if the previously parsed expression was wrapped in
1077+
/// parenthesis. If that is the case, leading whitespace is not required for
1078+
/// keyword based operators.
1079+
fn operator()
1080+
-> impl Parser<StringView, bool, Output = Positioned<Operator>, Error = ParserError>
1081+
+ SetContext<bool> {
1082+
IifParser::new(
1083+
// no whitespace needed
1084+
opt_and_keep_right(whitespace_ignoring(), operator_p()),
1085+
// whitespace needed
1086+
whitespace_ignoring()
1087+
.and_keep_right(operator_p())
1088+
.or(opt_and_keep_right(
1089+
whitespace_ignoring(),
1090+
symbol_operator_p(),
1091+
)),
1092+
)
1093+
}
10621094

1063-
fn non_bin_expr() -> impl Parser<StringView, Output = ExpressionPos, Error = ParserError> {
1064-
OrParser::new(vec![
1065-
Box::new(single_or_double_literal::parser()),
1066-
Box::new(string_literal::parser()),
1067-
Box::new(integer_or_long_literal::parser()),
1068-
// property internally uses variable and function_call_or_array_element so they can be skipped
1069-
Box::new(property::parser()),
1070-
Box::new(built_in_function_call::parser()),
1071-
Box::new(parenthesis::parser()),
1072-
Box::new(unary_expression::parser()),
1073-
])
1074-
}
1075-
1076-
fn operator(
1077-
is_paren: bool,
1078-
) -> impl Parser<StringView, Output = Positioned<Operator>, Error = ParserError> {
1079-
opt_and_tuple(
1080-
whitespace_ignoring(),
1081-
any_token()
1082-
.filter_map(Self::map_token_to_operator)
1083-
.with_pos(),
1084-
)
1085-
.and_then(move |(leading_ws, op_pos)| {
1086-
let had_whitespace = leading_ws.is_some();
1087-
let needs_whitespace = matches!(
1088-
&op_pos.element,
1089-
Operator::Modulo | Operator::And | Operator::Or
1090-
);
1091-
if had_whitespace || is_paren || !needs_whitespace {
1092-
Ok(op_pos)
1093-
} else {
1094-
Err(ParserError::syntax_error(&format!(
1095-
"Expected: parenthesis before operator {:?}",
1096-
op_pos.element()
1097-
)))
1098-
}
1099-
})
1095+
/// Parses an operator.
1096+
/// Does not check for leading whitespace, this needs to be done at the caller!
1097+
fn operator_p() -> impl Parser<StringView, Output = Positioned<Operator>, Error = ParserError> {
1098+
any_token().filter_map(map_token_to_operator).with_pos()
1099+
}
1100+
1101+
/// Parses a symbol operator (i.e. excludes keyword based operators).
1102+
/// Does not check for leading whitespace, this needs to be done at the caller!
1103+
fn symbol_operator_p()
1104+
-> impl Parser<StringView, Output = Positioned<Operator>, Error = ParserError> {
1105+
any_token()
1106+
.filter_map(map_token_to_symbol_operator)
1107+
.with_pos()
1108+
}
1109+
1110+
/// Maps the given token to an operator.
1111+
fn map_token_to_operator(token: &Token) -> Option<Operator> {
1112+
map_token_to_symbol_operator(token).or_else(|| map_token_to_keyword_operator(token))
1113+
}
1114+
1115+
/// Maps the given token to an operator, considering only operators
1116+
/// that are based on symbols (i.e. excludes keywords).
1117+
/// Symbol based operators do not require leading whitespace.
1118+
fn map_token_to_symbol_operator(token: &Token) -> Option<Operator> {
1119+
match TokenType::from_token(token) {
1120+
TokenType::LessEquals => Some(Operator::LessOrEqual),
1121+
TokenType::Less => Some(Operator::Less),
1122+
TokenType::GreaterEquals => Some(Operator::GreaterOrEqual),
1123+
TokenType::Greater => Some(Operator::Greater),
1124+
TokenType::Equals => Some(Operator::Equal),
1125+
TokenType::NotEquals => Some(Operator::NotEqual),
1126+
TokenType::Symbol => match token.demand_single_char() {
1127+
'+' => Some(Operator::Plus),
1128+
'-' => Some(Operator::Minus),
1129+
'*' => Some(Operator::Multiply),
1130+
'/' => Some(Operator::Divide),
1131+
_ => None,
1132+
},
1133+
_ => None,
11001134
}
1135+
}
11011136

1102-
fn map_token_to_operator(token: &Token) -> Option<Operator> {
1103-
match TokenType::from_token(token) {
1104-
TokenType::LessEquals => Some(Operator::LessOrEqual),
1105-
TokenType::Less => Some(Operator::Less),
1106-
TokenType::GreaterEquals => Some(Operator::GreaterOrEqual),
1107-
TokenType::Greater => Some(Operator::Greater),
1108-
TokenType::Equals => Some(Operator::Equal),
1109-
TokenType::NotEquals => Some(Operator::NotEqual),
1110-
TokenType::Keyword => match Keyword::try_from(token.as_str()).unwrap() {
1111-
Keyword::Mod => Some(Operator::Modulo),
1112-
Keyword::And => Some(Operator::And),
1113-
Keyword::Or => Some(Operator::Or),
1114-
_ => None,
1115-
},
1116-
TokenType::Symbol => match token.demand_single_char() {
1117-
'+' => Some(Operator::Plus),
1118-
'-' => Some(Operator::Minus),
1119-
'*' => Some(Operator::Multiply),
1120-
'/' => Some(Operator::Divide),
1121-
_ => None,
1122-
},
1137+
/// Maps the given token to an operator, considering only operators
1138+
/// that are based on keywords (i.e. excludes symbols).
1139+
/// Keyword based operators require leading whitespace.
1140+
fn map_token_to_keyword_operator(token: &Token) -> Option<Operator> {
1141+
match TokenType::from_token(token) {
1142+
TokenType::Keyword => match Keyword::try_from(token.as_str()).unwrap() {
1143+
Keyword::Mod => Some(Operator::Modulo),
1144+
Keyword::And => Some(Operator::And),
1145+
Keyword::Or => Some(Operator::Or),
11231146
_ => None,
1124-
}
1147+
},
1148+
_ => None,
11251149
}
11261150
}
11271151
}
@@ -1218,34 +1242,24 @@ pub mod guard {
12181242

12191243
use crate::ParserError;
12201244
use crate::input::StringView;
1221-
use crate::tokens::{TokenMatcher, peek_token, whitespace_ignoring};
1222-
1223-
#[derive(Default)]
1224-
pub enum Guard {
1225-
#[default]
1226-
Peeked,
1227-
Whitespace,
1228-
}
1245+
use crate::pc_specific::WithExpected;
1246+
use crate::tokens::{any_symbol_of, any_token_of, whitespace_ignoring};
12291247

12301248
/// `result ::= " " | "("`
12311249
///
12321250
/// The "(" will be undone.
1233-
pub fn parser() -> impl Parser<StringView, Output = Guard, Error = ParserError> {
1234-
whitespace_guard().or(lparen_guard())
1251+
pub fn parser() -> impl Parser<StringView, Output = (), Error = ParserError> {
1252+
whitespace_guard()
1253+
.or(lparen_guard())
1254+
.with_expected_message("Expected: '(' or whitespace")
12351255
}
12361256

1237-
fn whitespace_guard() -> impl Parser<StringView, Output = Guard, Error = ParserError> {
1238-
whitespace_ignoring().map(|_| Guard::Whitespace)
1257+
fn whitespace_guard() -> impl Parser<StringView, Output = (), Error = ParserError> {
1258+
whitespace_ignoring()
12391259
}
12401260

1241-
fn lparen_guard() -> impl Parser<StringView, Output = Guard, Error = ParserError> {
1242-
peek_token().and_then(|token| {
1243-
if '('.matches_token(&token) {
1244-
Ok(Guard::Peeked)
1245-
} else {
1246-
default_parse_error()
1247-
}
1248-
})
1261+
fn lparen_guard() -> impl Parser<StringView, Output = (), Error = ParserError> {
1262+
any_symbol_of!('(').map_to_unit().peek()
12491263
}
12501264
}
12511265

@@ -1867,7 +1881,7 @@ mod tests {
18671881
ExpressionType::Unresolved
18681882
)
18691883
);
1870-
assert_parser_err!("PRINT 1AND 2", expected("parenthesis before operator And"));
1884+
assert_parser_err!("PRINT 1AND 2", expected("end-of-statement"));
18711885
assert_expression!(
18721886
"(1 OR 2)AND 3",
18731887
Expression::BinaryExpression(
@@ -1897,7 +1911,7 @@ mod tests {
18971911
ExpressionType::Unresolved
18981912
)
18991913
);
1900-
assert_parser_err!("PRINT 1OR 2", expected("parenthesis before operator Or"));
1914+
assert_parser_err!("PRINT 1OR 2", expected("end-of-statement"));
19011915
assert_expression!(
19021916
"(1 AND 2)OR 3",
19031917
Expression::BinaryExpression(

rusty_parser/src/core/opt_second_expression.rs

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
use rusty_pc::and::TupleCombiner;
2-
use rusty_pc::{Parser, ParserErrorTrait, SetContext, Token, ctx_parser};
2+
use rusty_pc::{IifParser, Parser, ParserErrorTrait, SetContext};
33

44
use crate::core::expression::ws_expr_pos_p;
55
use crate::error::ParserError;
66
use crate::input::StringView;
77
use crate::pc_specific::keyword;
8-
use crate::tokens::whitespace;
8+
use crate::tokens::whitespace_ignoring;
99
use crate::{ExpressionPos, Keyword};
1010

1111
/// Parses an optional second expression that follows the first expression
@@ -69,12 +69,11 @@ fn err(keyword: Keyword) -> ParserError {
6969
/// * `(1 + 2)AND` no whitespace is required before `AND`
7070
/// * `1 + 2AND` the lack of whitespace before `AND` is an error
7171
pub(super) fn conditionally_opt_whitespace()
72-
-> impl Parser<StringView, bool, Output = Option<Token>, Error = ParserError> + SetContext<bool> {
73-
ctx_parser()
74-
.map(|allow_none| {
75-
whitespace()
76-
.map(Some)
77-
.and_then_err(move |err| if allow_none { Ok(None) } else { Err(err) })
78-
})
79-
.flatten()
72+
-> impl Parser<StringView, bool, Output = (), Error = ParserError> + SetContext<bool> {
73+
IifParser::new(
74+
// allow none
75+
whitespace_ignoring().to_option().map_to_unit(),
76+
// whitespace is required
77+
whitespace_ignoring(),
78+
)
8079
}

0 commit comments

Comments
 (0)