Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .release-notes/fix-adjacent-string-literals.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## Reject adjacent string literals

The compiler used to silently accept code where two string literals were placed back-to-back with no separator between them, treating them as two unrelated expressions. This most commonly came up as a confusing failure mode for typos involving `"""`, where a missing or misplaced quote produced a program that compiled but behaved nothing like what was written. Adjacent string literals are now reported as a syntax error.
16 changes: 15 additions & 1 deletion src/libponyc/ast/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,14 @@ struct lexer_t
size_t len;
size_t line;
size_t pos;
bool newline;
bool newline; // Drives LPAREN_NEW/LSQUARE_NEW/MINUS_NEW. Block
// comments clear this so a comment-introduced
// newline is not surfaced to symbol disambiguation.
bool token_newline; // Did a real '\n' character precede the next token?
// Used by the parser to detect statement
// separators. Unlike `newline`, block comments
// preserve this so that a real newline before a
// block comment still counts.

// Position of current token
size_t token_line;
Expand Down Expand Up @@ -367,6 +374,7 @@ static token_t* make_token(lexer_t* lexer, token_id id)
{
token_t* t = token_new(id);
token_set_pos(t, lexer->source, lexer->token_line, lexer->token_pos);
token_set_newline(t, lexer->token_newline);
return t;
}

Expand Down Expand Up @@ -471,6 +479,9 @@ static token_t* nested_comment(lexer_t* lexer)
}
}

// Suppress LPAREN_NEW etc. for any `(`/`[`/`-` that follows the comment.
// `token_newline` is intentionally left alone so that a real '\n' before
// the comment is still surfaced to the parser as a statement separator.
lexer->newline = false;
return NULL;
}
Expand Down Expand Up @@ -1275,6 +1286,7 @@ lexer_t* lexer_open(source_t* source, errors_t* errors,
lexer->line = 1;
lexer->pos = 1;
lexer->newline = true;
lexer->token_newline = true;

return lexer;
}
Expand Down Expand Up @@ -1316,6 +1328,7 @@ token_t* lexer_next(lexer_t* lexer)
{
case '\n':
lexer->newline = true;
lexer->token_newline = true;
consume_chars(lexer, 1);
break;

Expand Down Expand Up @@ -1363,6 +1376,7 @@ token_t* lexer_next(lexer_t* lexer)
}

lexer->newline = false; // We've found a symbol, so no longer a new line
lexer->token_newline = false;
return t;
}

Expand Down
10 changes: 6 additions & 4 deletions src/libponyc/ast/parserapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -430,13 +430,15 @@ ast_t* parse_token_set(parser_t* parser, rule_state_t* state, const char* desc,

for(const token_id* p = id_set; *p != TK_NONE; p++)
{
// Match new line if the next token is the first on a line
// Match new line if a real newline character separates this token from
// the previously emitted one. Comparing line numbers is not sufficient
// because multi-line string literals can place the next token several
// source lines after the previous token's start without any actual
// newline between them.
if(*p == TK_NEWLINE)
{
pony_assert(parser->token != NULL);
size_t last_token_line = token_line_number(parser->last_token);
size_t next_token_line = token_line_number(parser->token);
bool is_newline = (next_token_line != last_token_line);
bool is_newline = token_newline(parser->token);

if(out_found != NULL)
*out_found = is_newline;
Expand Down
19 changes: 19 additions & 0 deletions src/libponyc/ast/token.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ struct token_t
size_t line;
size_t pos;
char* printed;
bool newline;

union
{
Expand Down Expand Up @@ -365,6 +366,23 @@ void token_set_pos(token_t* token, source_t* source, size_t line, size_t pos)
token->pos = pos;
}


bool token_newline(token_t* token)
{
pony_assert(token != NULL);
return token->newline;
}


void token_set_newline(token_t* token, bool newline)
{
pony_assert(token != NULL);
#ifndef PONY_NDEBUG
pony_assert(!token->frozen);
#endif
token->newline = newline;
}

// Serialisation

static void token_signature_serialise_trace(pony_ctx_t* ctx, void* object)
Expand Down Expand Up @@ -524,6 +542,7 @@ static void token_serialise(pony_ctx_t* ctx, void* object, void* buf,
dst->line = token->line;
dst->pos = token->pos;
dst->printed = NULL;
dst->newline = token->newline;
#ifndef PONY_NDEBUG
dst->frozen = token->frozen;
#endif
Expand Down
9 changes: 9 additions & 0 deletions src/libponyc/ast/token.h
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,11 @@ size_t token_line_number(token_t* token);
/// Report the position within the line that the given token was found at
size_t token_line_position(token_t* token);

/// Report whether a real newline character separated this token from the
/// previously emitted one. Set by the lexer; used by the parser to detect
/// statement separators.
bool token_newline(token_t* token);

/// Report whether debug info should be generated.
bool token_debug(token_t* token);

Expand Down Expand Up @@ -391,6 +396,10 @@ void token_set_int(token_t* token, lexint_t* value);
/// Set source to NULL to keep current file.
void token_set_pos(token_t* token, source_t* source, size_t line, size_t pos);

/// Set whether a real newline character separated this token from the
/// previously emitted one.
void token_set_newline(token_t* token, bool newline);

/// Set whether debug info should be generated.
void token_set_debug(token_t* token, bool state);

Expand Down
65 changes: 65 additions & 0 deletions test/libponyc/parse_expr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,68 @@ TEST_F(ParseExprTest, CompileErrorNotAllowedOutsideIfdef)

TEST_ERROR(src);
}


// A string literal whose closing quote is immediately followed by another
// quote starts a second string with no separator. Without a real newline or
// semicolon between them this is two expressions on the same line, even when
// the first string spans multiple source lines.
TEST_F(ParseExprTest, AdjacentStringLiteralsRequireSeparator)
{
const char* src =
"actor Main\n"
" new create(env: Env) =>\n"
" env.out.print(\"\n"
" line\n"
" \"\"\")";

TEST_ERRORS_1(src,
"Use a semi colon to separate expressions on the same line");
}


// Same as above for two single-quoted strings on the same physical line.
TEST_F(ParseExprTest, SameLineAdjacentStringLiteralsRequireSeparator)
{
const char* src =
"actor Main\n"
" new create(env: Env) =>\n"
" env.out.print(\"a\"\"b\")";

TEST_ERRORS_1(src,
"Use a semi colon to separate expressions on the same line");
}


// A multi-line string followed by an operator continues a single expression.
// Regression guard: the closing quote sits on a different source line from
// where the string started, but no actual newline separates it from the `+`.
TEST_F(ParseExprTest, MultilineStringFollowedByOperator)
{
const char* src =
"actor Main\n"
" new create(env: Env) =>\n"
" env.out.print(\"\n"
" first\n"
" \" + \" second\")";

TEST_COMPILE(src);
}


// A block comment containing newlines must not swallow the real newline that
// preceded it. The two `let`s here sit on different physical lines and should
// parse as two statements.
TEST_F(ParseExprTest, BlockCommentPreservesPrecedingNewline)
{
const char* src =
"actor Main\n"
" new create(env: Env) =>\n"
" let a: I32 = 5\n"
" /* multi\n"
" line\n"
" comment */ let b: I32 = 6\n"
" env.out.print((a + b).string())";

TEST_COMPILE(src);
}
Loading