Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion src/parser/expressions_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,17 +327,41 @@ fn parse_atomic_leaf_expression(lexer: &mut ParseSession<'_>) -> Option<AstNode>
_ => {
if lexer.closing_keywords.contains(&vec![KeywordParensClose])
&& matches!(lexer.last_token, KeywordOutputAssignment | KeywordAssignment)
&& matches!(lexer.token, KeywordParensClose | KeywordComma)
{
// due to closing keyword ')' and last_token '=>' / ':='
// we are probably in a call statement missing a parameter assignment 'foo(param := );
// optional parameter assignments are allowed, validation should handle any unwanted cases
Some(AstFactory::create_empty_statement(lexer.location(), lexer.next_id()))
} else {
lexer.accept_diagnostic(Diagnostic::unexpected_token_found(
"Literal",
"expression",
lexer.slice(),
lexer.location(),
));
// If the bad token has any operator interpretation, drain
// any consecutive operator tokens and retry the leaf so the
// recovered AST captures the operand the user wrote. In
// practice this fires for *binary-only* operators misused
// as a prefix (e.g. `&y`, `MOD y`); unary-eligible
// operators (`-`, `+`, `NOT`) also match `to_operator()`
// but normally never reach this fallback because the
// prefix-operator parser higher in the cascade handles
// them first. Looping (rather than tail-recursing per
// token) emits one diagnostic per run instead of N, and
// keeps the recovery cost flat on adversarial input like
// `& & & g`. For non-operator tokens (e.g. `END_CASE`,
// `END_VAR`), leave the token in the stream so outer
// parsers can synchronise on it — the `advanced` guard
// prevents an infinite retry loop in that case.
let mut advanced = false;
while to_operator(&lexer.token).is_some() {
lexer.advance();
advanced = true;
}
if advanced {
return parse_atomic_leaf_expression(lexer);
}
None
}
}
Expand Down
235 changes: 230 additions & 5 deletions src/parser/tests/expressions_parser_tests.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Copyright (c) 2020 Ghaith Hachem and Mathias Rieder
use crate::parser::tests::ref_to;
use crate::test_utils::tests::parse;
use crate::test_utils::tests::{parse, parse_buffered};
use insta::{assert_debug_snapshot, assert_snapshot};
use plc_ast::ast::{Assignment, AstFactory, AstNode, AstStatement, Operator};
use plc_ast::literals::AstLiteral;
Expand Down Expand Up @@ -1014,6 +1014,234 @@ fn amp_as_and_test() {
assert_debug_snapshot!(statement);
}

// Recovery for unexpected tokens in expression position.
// `&` (and other binary-only operators) used as a prefix used to leave the
// bad token in the stream; binary-AND parsing then re-consumed it and built
// `EmptyStatement & rhs`, which crashed codegen with E071. The leaf-fallback
// now advances past the bad token after diagnosing it, so the cascade above
// cannot misinterpret it.

#[test]
fn amp_as_address_of_in_call_arg_recovers() {
let src = "
VAR_GLOBAL g : DINT; END_VAR
FUNCTION foo : DINT VAR_INPUT p : DINT; END_VAR foo := p; END_FUNCTION
PROGRAM main foo(p := &g); END_PROGRAM
";
let (unit, diagnostics) = parse_buffered(src);
assert_snapshot!(diagnostics, @r"
error[E007]: Unexpected token: expected expression but found &
┌─ <internal>:4:31
4 │ PROGRAM main foo(p := &g); END_PROGRAM
│ ^ Unexpected token: expected expression but found &
");
let main = unit.implementations.iter().find(|i| i.name == "main").unwrap();
assert_debug_snapshot!(main.statements, @r#"
[
CallStatement {
operator: ReferenceExpr {
kind: Member(
Identifier {
name: "foo",
},
),
base: None,
},
parameters: Some(
Assignment {
left: ReferenceExpr {
kind: Member(
Identifier {
name: "p",
},
),
base: None,
},
right: ReferenceExpr {
kind: Member(
Identifier {
name: "g",
},
),
base: None,
},
},
),
},
]
"#);
}

#[test]
fn amp_as_unary_prefix_in_assignment_recovers() {
let src = "
PROGRAM prg
VAR x : DINT; y : DINT; END_VAR
x := &y;
END_PROGRAM
";
let (unit, diagnostics) = parse_buffered(src);
assert_snapshot!(diagnostics, @r"
error[E007]: Unexpected token: expected expression but found &
┌─ <internal>:4:18
4 │ x := &y;
│ ^ Unexpected token: expected expression but found &
");
assert_debug_snapshot!(unit.implementations[0].statements, @r#"
[
Assignment {
left: ReferenceExpr {
kind: Member(
Identifier {
name: "x",
},
),
base: None,
},
right: ReferenceExpr {
kind: Member(
Identifier {
name: "y",
},
),
base: None,
},
},
]
"#);
}

#[test]
fn consecutive_bad_operators_emit_a_single_diagnostic() {
// `& & & g` previously emitted one diagnostic per `&` and recursed N
// times. The loop drains the run and retries once, so adversarial input
// produces exactly one diagnostic and one recovered identifier.
let src = "
PROGRAM prg
VAR x : DINT; g : DINT; END_VAR
x := & & & g;
END_PROGRAM
";
let (unit, diagnostics) = parse_buffered(src);
assert_snapshot!(diagnostics, @r"
error[E007]: Unexpected token: expected expression but found &
┌─ <internal>:4:18
4 │ x := & & & g;
│ ^ Unexpected token: expected expression but found &
");
assert_debug_snapshot!(unit.implementations[0].statements, @r#"
[
Assignment {
left: ReferenceExpr {
kind: Member(
Identifier {
name: "x",
},
),
base: None,
},
right: ReferenceExpr {
kind: Member(
Identifier {
name: "g",
},
),
base: None,
},
},
]
"#);
}

#[test]
fn empty_parameter_assignment_carve_out_still_fires() {
// A named parameter with no RHS — `p := )` or `p := ,` — is explicitly
// allowed by the parser: it lands as `Assignment { right: EmptyStatement }`
// and emits no diagnostic. This test locks in that the new "advance past
// unexpected tokens" recovery in `parse_atomic_leaf_expression` did NOT
// regress that carve-out — i.e. it must not consume the trailing `)` or
// `,` while looking for an expression. Both call shapes therefore land
// cleanly with an empty RHS and no parse diagnostics.
let src = "
FUNCTION foo : DINT VAR_INPUT p : DINT; q : DINT; END_VAR foo := p; END_FUNCTION
PROGRAM main
foo(p := );
foo(p := , q := 1);
END_PROGRAM
";
let (unit, diagnostics) = parse_buffered(src);
assert!(diagnostics.is_empty(), "expected no parse diagnostics, got:\n{diagnostics}");
Comment thread
ghaith marked this conversation as resolved.

let main = unit.implementations.iter().find(|i| i.name == "main").unwrap();
// Both calls should land an Assignment with right == EmptyStatement.
for stmt in &main.statements {
let AstStatement::CallStatement(call) = &stmt.stmt else {
panic!("expected CallStatement, got {:?}", stmt);
};
let Some(params) = call.parameters.as_deref() else {
panic!("expected parameters on call, got {:?}", call);
};
// walk into the first Assignment we find and check its RHS
let first_assignment = match &params.stmt {
AstStatement::Assignment(a) => a,
AstStatement::ExpressionList(list) => match &list[0].stmt {
AstStatement::Assignment(a) => a,
other => panic!("expected Assignment in list head, got {:?}", other),
},
other => panic!("expected Assignment or ExpressionList, got {:?}", other),
};
assert!(
matches!(first_assignment.right.stmt, AstStatement::EmptyStatement(..)),
"expected RHS to be EmptyStatement, got {:?}",
first_assignment.right.stmt
);
}
}

#[test]
fn binary_only_operator_as_prefix_recovers() {
// Generality witness — recovery is not specific to `&`.
let src = "
PROGRAM prg
VAR x : DINT; y : DINT; END_VAR
x := MOD y;
END_PROGRAM
";
let (unit, diagnostics) = parse_buffered(src);
assert_snapshot!(diagnostics, @r"
error[E007]: Unexpected token: expected expression but found MOD
┌─ <internal>:4:18
4 │ x := MOD y;
│ ^^^ Unexpected token: expected expression but found MOD
");
assert_debug_snapshot!(unit.implementations[0].statements, @r#"
[
Assignment {
left: ReferenceExpr {
kind: Member(
Identifier {
name: "x",
},
),
base: None,
},
right: ReferenceExpr {
kind: Member(
Identifier {
name: "y",
},
),
base: None,
},
},
]
"#);
}

#[test]
fn and_then_test() {
let src = "
Expand Down Expand Up @@ -1635,7 +1863,7 @@ fn reference_location_test() {

#[test]
fn qualified_reference_location_test() {
let source = "PROGRAM prg a.b.c;aa.bb.cc[2];aaa.bbb.ccc^;&aaa.bbb.ccc; END_PROGRAM";
let source = "PROGRAM prg a.b.c;aa.bb.cc[2];aaa.bbb.ccc^; END_PROGRAM";
let parse_result = parse(source).0;

let unit = &parse_result.implementations[0];
Expand All @@ -1648,9 +1876,6 @@ fn qualified_reference_location_test() {

let location = &unit.statements[2].get_location();
assert_eq!(source[location.to_range().unwrap()].to_string(), "aaa.bbb.ccc^");

let location = &unit.statements[3].get_location();
assert_eq!(source[location.to_range().unwrap()].to_string(), "&aaa.bbb.ccc");
}

#[test]
Expand Down
8 changes: 4 additions & 4 deletions src/parser/tests/parse_errors/parse_error_containers_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,11 +229,11 @@ fn super_is_a_reserved_keyword() {
│ ╰─────────────────^ Unexpected token: expected KeywordSemicolon but found 'VAR
super'

error[E007]: Unexpected token: expected Literal but found END_VAR
error[E007]: Unexpected token: expected expression but found END_VAR
┌─ <internal>:6:9
6 │ END_VAR
│ ^^^^^^^ Unexpected token: expected Literal but found END_VAR
│ ^^^^^^^ Unexpected token: expected expression but found END_VAR

error[E007]: Unexpected token: expected KeywordSemicolon but found 'END_VAR
METHOD super END_METHOD'
Expand Down Expand Up @@ -313,11 +313,11 @@ fn this_is_a_reserved_keyword() {
│ ╰────────────────^ Unexpected token: expected KeywordSemicolon but found 'VAR
this'

error[E007]: Unexpected token: expected Literal but found END_VAR
error[E007]: Unexpected token: expected expression but found END_VAR
┌─ <internal>:6:9
6 │ END_VAR
│ ^^^^^^^ Unexpected token: expected Literal but found END_VAR
│ ^^^^^^^ Unexpected token: expected expression but found END_VAR

error[E007]: Unexpected token: expected KeywordSemicolon but found 'END_VAR
METHOD this END_METHOD'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ error[E007]: Unexpected token: expected Datatype but found ABSTRACT
1 │ CLASS TestClass METHOD foo : ABSTRACT END_METHOD END_CLASS
│ ^^^^^^^^ Unexpected token: expected Datatype but found ABSTRACT

error[E007]: Unexpected token: expected Literal but found ABSTRACT
error[E007]: Unexpected token: expected expression but found ABSTRACT
┌─ <internal>:1:30
1 │ CLASS TestClass METHOD foo : ABSTRACT END_METHOD END_CLASS
│ ^^^^^^^^ Unexpected token: expected Literal but found ABSTRACT
│ ^^^^^^^^ Unexpected token: expected expression but found ABSTRACT

error[E007]: Unexpected token: expected KeywordSemicolon but found 'ABSTRACT'
┌─ <internal>:1:30
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
source: src/parser/tests/parse_errors/parse_error_containers_tests.rs
expression: diagnostics
---
error[E007]: Unexpected token: expected Literal but found :=
error[E007]: Unexpected token: expected expression but found :=
┌─ <internal>:3:15
3 │ a := 2;
│ ^^ Unexpected token: expected Literal but found :=
│ ^^ Unexpected token: expected expression but found :=

error[E007]: Unexpected token: expected KeywordSemicolon but found ':= 2'
┌─ <internal>:3:15
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
source: src/parser/tests/parse_errors/parse_error_literals_tests.rs
expression: diagnostics
---
error[E007]: Unexpected token: expected Literal but found ;
error[E007]: Unexpected token: expected expression but found ;
┌─ <internal>:3:15
3 │ T#;
│ ^ Unexpected token: expected Literal but found ;
│ ^ Unexpected token: expected expression but found ;
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ error[E007]: Unexpected token: expected KeywordSemicolon but found '1'
4 │ 1: x;
│ ^ Unexpected token: expected KeywordSemicolon but found '1'

error[E007]: Unexpected token: expected Literal but found END_CASE
error[E007]: Unexpected token: expected expression but found END_CASE
┌─ <internal>:5:9
5 │ END_CASE
│ ^^^^^^^^ Unexpected token: expected Literal but found END_CASE
│ ^^^^^^^^ Unexpected token: expected expression but found END_CASE

error[E007]: Unexpected token: expected KeywordSemicolon but found 'END_CASE'
┌─ <internal>:5:9
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ error[E007]: Unexpected token: expected KeywordSemicolon but found 'x TO y DO
│ ╰─────────────^ Unexpected token: expected KeywordSemicolon but found 'x TO y DO
x'

error[E007]: Unexpected token: expected Literal but found END_FOR
error[E007]: Unexpected token: expected expression but found END_FOR
┌─ <internal>:6:9
6 │ END_FOR
│ ^^^^^^^ Unexpected token: expected Literal but found END_FOR
│ ^^^^^^^ Unexpected token: expected expression but found END_FOR

error[E007]: Unexpected token: expected KeywordSemicolon but found 'END_FOR'
┌─ <internal>:6:9
Expand Down
Loading
Loading