Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions source/air/src/def.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ pub const CHOOSE: &str = "%%choose%%";
pub const HOLE: &str = "%%hole%%";
pub const APPLY: &str = "%%apply%%";
pub const TEMP: &str = "%%x%%";
pub const MANGLED_SYMBOL_PREFIX: &str = "%%verus_mangled%%";
pub const SKOLEM_ID_PREFIX: &str = "skolem";
pub const ARRAY_QID: &str = "__AIR_ARRAY_QID__";

Expand Down
8 changes: 2 additions & 6 deletions source/air/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,9 @@ use sise::Node;
use std::io::Write;
use std::sync::Arc;

// Following SMT-LIB syntax specification
fn is_symbol_char(c: char) -> bool {
c.is_ascii_alphanumeric() || "~!@$%^&*_-+=<>.?/".contains(c)
}

// AIR symbols may contain Unicode or other non-SMT characters; the printer will mangle them before sending to the solver.
fn is_symbol(s: &String) -> bool {
s.len() > 0 && s.chars().all(is_symbol_char)
s.len() > 0
}

fn is_bitvec(nodes: &Vec<Node>) -> Option<u32> {
Expand Down
73 changes: 71 additions & 2 deletions source/air/src/printer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,78 @@ use crate::ast::{
Typs, UnaryOp,
};
use crate::context::SmtSolver;
use crate::def::mk_skolem_id;
use crate::def::{MANGLED_SYMBOL_PREFIX, mk_skolem_id};
use crate::util::vec_map;
use sise::{Node, Writer};
use std::sync::Arc;

fn is_smt_simple_symbol_char(c: char) -> bool {
c.is_ascii_alphanumeric()
|| matches!(
c,
'~' | '!'
| '@'
| '$'
| '%'
| '^'
| '&'
| '*'
| '_'
| '-'
| '+'
| '='
| '<'
| '>'
| '.'
| '?'
| '/'
)
}

fn is_valid_smt_atom(s: &str) -> bool {
if let Some(rest) = s.strip_prefix(':') {
if !rest.is_empty() && rest.chars().all(is_smt_simple_symbol_char) {
return true;
}
}
if s.bytes().all(|b| b.is_ascii_digit()) {
return true;
}
if let Some((lhs, rhs)) = s.split_once('.') {
if !lhs.is_empty()
&& !rhs.is_empty()
&& lhs.bytes().all(|b| b.is_ascii_digit())
&& rhs.bytes().all(|b| b.is_ascii_digit())
{
return true;
}
}
let Some(first) = s.chars().next() else {
return false;
};
!first.is_ascii_digit()
&& is_smt_simple_symbol_char(first)
&& s.chars().skip(1).all(is_smt_simple_symbol_char)
}

fn mangle_smt_atom(s: &str) -> String {
use std::fmt::Write as _;
let mut out = String::from(MANGLED_SYMBOL_PREFIX);
if s.is_empty() {
out.push('_');
return out;
}
for ch in s.chars() {
write!(&mut out, "_{:x}", ch as u32).expect("Writing to String should not fail");
}
out
}

pub fn sanitize_ident(s: &str) -> Ident {
Arc::new(if is_valid_smt_atom(s) { s.to_string() } else { mangle_smt_atom(s) })
}

#[inline]
pub fn str_to_node(s: &str) -> Node {
Node::Atom(s.to_string())
}
Expand Down Expand Up @@ -589,7 +656,9 @@ impl NodeWriter {
sise::SpacedStringWriterNodeOptions { break_line_len: if brk { 0 } else { break_len } };
match node {
Node::Atom(a) => {
writer.write_atom(a, opts).unwrap();
let a = if is_valid_smt_atom(a) { a.clone() } else { mangle_smt_atom(a) };
assert!(sise::check_atom(&a), "invalid atom: {}", a);
writer.write_atom(&a, opts).unwrap();
}
Node::List(l) => {
writer.begin_list(opts).unwrap();
Expand Down
12 changes: 11 additions & 1 deletion source/air/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,20 @@ use crate::messages::Reporter;
#[allow(unused_imports)]
use crate::parser::Parser;
#[allow(unused_imports)]
use crate::printer::{macro_push_node, str_to_node};
use crate::printer::{NodeWriter, macro_push_node, str_to_node};
#[allow(unused_imports)]
use sise::Node;

#[test]
fn node_writer_mangles_invalid_atoms() {
for atom in ["r#for$", "r#in$", "r#bad"] {
let mut nw = NodeWriter::new();
let s = nw.node_to_string_indent(&String::new(), &Node::Atom(atom.to_string()));
assert!(s.contains(crate::def::MANGLED_SYMBOL_PREFIX));
assert!(!s.contains('#'));
}
}

#[allow(dead_code)]
fn run_nodes_as_test(should_typecheck: bool, should_be_valid: bool, nodes: &[Node]) {
let message_interface = std::sync::Arc::new(crate::messages::AirMessageInterface {});
Expand Down
130 changes: 130 additions & 0 deletions source/rust_verify_test/tests/regression.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#![feature(rustc_private)]
#![allow(mixed_script_confusables)]
#[macro_use]
mod common;
use common::*;
Expand Down Expand Up @@ -1587,3 +1588,132 @@ test_verify_one_file! {
}
} => Ok(())
}

test_verify_one_file! {
#[test] raw_identifiers_in_smt_2221 verus_code! {
// Raw identifiers (r#keyword) must be sanitized before reaching SMT-LIB,
// since '#' is not a valid SMT-LIB simple-symbol character.
// Test a representative set of Rust strict keywords as function names,
// parameters, local variables, struct fields, and enum variants.

struct r#struct {
r#type: u32,
r#loop: u32,
}

enum r#enum {
r#match,
r#impl,
}

fn r#match(
r#in: u32,
r#out: u32,
r#for: u32,
r#while: u32,
r#return: u32,
r#break: u32,
r#continue: u32,
r#if: u32,
r#else: u32,
r#let: u32,
r#mut: u32,
r#ref: u32,
r#move: u32,
r#const: u32,
r#static: u32,
r#unsafe: u32,
r#where: u32,
r#async: u32,
r#await: u32,
r#dyn: u32,
r#as: u32,
r#use: u32,
r#pub: u32,
r#mod: u32,
) -> u32
requires r#in == r#out
{
let r#type = r#in;
let r#loop = r#for;
let r#match = r#while;
let r#impl = r#return;
let r#enum = r#break;
let r#struct = r#continue;
let r#trait = r#if;
let r#let = r#else;
let r#mut = r#let;
let r#ref = r#mut;
let r#move = r#ref;
let r#const = r#move;
let r#static = r#const;
let r#unsafe = r#static;
let r#where = r#unsafe;
let r#async = r#where;
let r#await = r#async;
let r#dyn = r#await;
let r#as = r#dyn;
let r#use = r#as;
let r#pub = r#use;
let r#mod = r#pub;
assert(r#type == r#out);
r#type
}

fn raw_param(r#for: u32) -> u32 { r#for }
} => Ok(())
}

test_verify_one_file! {
#[test] unicode_identifiers_in_smt_2221 verus_code! {
// Unicode identifiers must be sanitized before reaching AIR/SMT-LIB,
// since non-ASCII characters are not valid in SMT-LIB simple symbols.
// Test unicode in types, fields, enums, traits, generics, functions,
// parameters, locals, constants, and spec/proof functions.

struct Wrapperα<Tβ> {
payloadγ: Tβ,
extraδ: u32,
}

enum Choiceε<Tζ> {
OptionAη,
OptionBθ(Tζ),
}

trait Traitι {
spec fn methodκ(&self) -> int;
}

impl Traitι for Wrapperα<u32> {
spec fn methodκ(&self) -> int { self.payloadγ as int + self.extraδ as int }
}

spec fn αβγ(x: u32) -> u32 { x }

const MAXλ: u32 = 42;

proof fn test_unicode<Genμ>(item: Wrapperα<u32>, choice: Choiceε<u32>)
requires item.payloadγ == MAXλ
{
let baseν = item.payloadγ;
let scaledξ = αβγ(1u32);
let valueο = MAXλ;
assert(baseν == MAXλ);
assert(scaledξ == 1u32);
assert(valueο == 42);
assert(item.methodκ() == MAXλ as int + item.extraδ as int);
}
} => Ok(_err) => {
// Verification succeeds; the only warnings are the mixed_script_confusables
// lint about using non-ASCII identifiers, which is a Rust lint unrelated to AIR sanitization.
assert!(_err.errors.is_empty(), "unexpected errors: {:?}", _err.errors);
for w in &_err.warnings {
let msg = &w.message;
assert!(
msg.contains("mixed script confusables") || msg.contains("warning emitted"),
"unexpected warning: {:?}", msg
);
}
}
}
9 changes: 7 additions & 2 deletions source/vir/src/def.rs
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,12 @@ pub fn encode_dt_as_path(dt: &Dt) -> Path {

pub fn variant_ident(dt: &Dt, variant: &str) -> Ident {
let path = encode_dt_as_path(dt);
Arc::new(format!("{}{}{}", path_to_string(&path), VARIANT_SEPARATOR, variant))
air::printer::sanitize_ident(&format!(
"{}{}{}",
path_to_string(&path),
VARIANT_SEPARATOR,
variant
))
}

pub fn is_variant_ident(datatype: &Dt, variant: &str) -> Ident {
Expand All @@ -694,7 +699,7 @@ pub fn variant_field_ident_internal(
field: &Ident,
internal: bool,
) -> Ident {
Arc::new(format!(
air::printer::sanitize_ident(&format!(
"{}{}{}{}{}",
path_to_string(path),
VARIANT_SEPARATOR,
Expand Down