Skip to content

Implement RFC 3503: frontmatters #140035

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions compiler/rustc_ast_passes/src/feature_gate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,7 @@ pub fn check_crate(krate: &ast::Crate, sess: &Session, features: &Features) {
gate_all!(contracts_internals, "contract internal machinery is for internal use only");
gate_all!(where_clause_attrs, "attributes in `where` clause are unstable");
gate_all!(super_let, "`super let` is experimental");
gate_all!(frontmatter, "frontmatters are experimental");

if !visitor.features.never_patterns() {
if let Some(spans) = spans.get(&sym::never_patterns) {
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_feature/src/unstable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,8 @@ declare_features! (
(incomplete, fn_delegation, "1.76.0", Some(118212)),
/// Allows impls for the Freeze trait.
(internal, freeze_impls, "1.78.0", Some(121675)),
/// Frontmatter `---` blocks for use by external tools.
(unstable, frontmatter, "CURRENT_RUSTC_VERSION", Some(136889)),
/// Allows defining gen blocks and `gen fn`.
(unstable, gen_blocks, "1.75.0", Some(117078)),
/// Infer generic args for both consts and types.
Expand Down
9 changes: 8 additions & 1 deletion compiler/rustc_lexer/src/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,19 @@ pub struct Cursor<'a> {
len_remaining: usize,
/// Iterator over chars. Slightly faster than a &str.
chars: Chars<'a>,
pub(crate) frontmatter_allowed: bool,
#[cfg(debug_assertions)]
prev: char,
}

pub(crate) const EOF_CHAR: char = '\0';

impl<'a> Cursor<'a> {
pub fn new(input: &'a str) -> Cursor<'a> {
pub fn new(input: &'a str, frontmatter_allowed: bool) -> Cursor<'a> {
Cursor {
len_remaining: input.len(),
chars: input.chars(),
frontmatter_allowed,
#[cfg(debug_assertions)]
prev: EOF_CHAR,
}
Expand Down Expand Up @@ -95,6 +97,11 @@ impl<'a> Cursor<'a> {
Some(c)
}

/// Moves to a substring by a number of bytes.
pub(crate) fn bump_bytes(&mut self, n: usize) {
self.chars = self.as_str()[n..].chars();
}

/// Eats symbols while predicate returns true or until the end of file is reached.
pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
// It was tried making optimized version of this for eg. line comments, but
Expand Down
121 changes: 115 additions & 6 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,17 +57,27 @@ impl Token {
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum TokenKind {
/// A line comment, e.g. `// comment`.
LineComment { doc_style: Option<DocStyle> },
LineComment {
doc_style: Option<DocStyle>,
},

/// A block comment, e.g. `/* block comment */`.
///
/// Block comments can be recursive, so a sequence like `/* /* */`
/// will not be considered terminated and will result in a parsing error.
BlockComment { doc_style: Option<DocStyle>, terminated: bool },
BlockComment {
doc_style: Option<DocStyle>,
terminated: bool,
},

/// Any whitespace character sequence.
Whitespace,

Frontmatter {
has_invalid_preceding_whitespace: bool,
invalid_infostring: bool,
},

/// An identifier or keyword, e.g. `ident` or `continue`.
Ident,

Expand Down Expand Up @@ -109,10 +119,15 @@ pub enum TokenKind {
/// this type will need to check for and reject that case.
///
/// See [LiteralKind] for more details.
Literal { kind: LiteralKind, suffix_start: u32 },
Literal {
kind: LiteralKind,
suffix_start: u32,
},

/// A lifetime, e.g. `'a`.
Lifetime { starts_with_number: bool },
Lifetime {
starts_with_number: bool,
},

/// `;`
Semi,
Expand Down Expand Up @@ -280,7 +295,7 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
#[inline]
pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError> {
debug_assert!(!input.is_empty());
let mut cursor = Cursor::new(input);
let mut cursor = Cursor::new(input, false);
// Move past the leading `r` or `br`.
for _ in 0..prefix_len {
cursor.bump().unwrap();
Expand All @@ -290,7 +305,7 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError>

/// Creates an iterator that produces tokens from the input string.
pub fn tokenize(input: &str) -> impl Iterator<Item = Token> {
let mut cursor = Cursor::new(input);
let mut cursor = Cursor::new(input, false);
std::iter::from_fn(move || {
let token = cursor.advance_token();
if token.kind != TokenKind::Eof { Some(token) } else { None }
Expand Down Expand Up @@ -361,7 +376,30 @@ impl Cursor<'_> {
Some(c) => c,
None => return Token::new(TokenKind::Eof, 0),
};

let token_kind = match first_char {
c if self.frontmatter_allowed && is_whitespace(c) => {
let mut last = first_char;
while is_whitespace(self.first()) {
let Some(c) = self.bump() else {
break;
};
last = c;
}
// invalid frontmatter opening as whitespace preceding it isn't newline.
// combine the whitespace and the frontmatter to a single token as we shall
// error later.
if last != '\n' && self.as_str().starts_with("---") {
self.bump();
self.frontmatter(true)
} else {
Whitespace
}
}
'-' if self.frontmatter_allowed && self.as_str().starts_with("--") => {
// happy path
self.frontmatter(false)
}
// Slash, comment or block comment.
'/' => match self.first() {
'/' => self.line_comment(),
Expand Down Expand Up @@ -464,11 +502,82 @@ impl Cursor<'_> {
c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(),
_ => Unknown,
};
if self.frontmatter_allowed {
self.frontmatter_allowed = matches!(token_kind, Whitespace);
}
let res = Token::new(token_kind, self.pos_within_token());
self.reset_pos_within_token();
res
}

/// Given that one `-` was eaten, eat the rest of the frontmatter.
fn frontmatter(&mut self, has_invalid_preceding_whitespace: bool) -> TokenKind {
debug_assert_eq!('-', self.prev());

let pos = self.pos_within_token();
self.eat_while(|c| c == '-');

// one `-` is eaten by the caller.
let length_opening = self.pos_within_token() - pos + 1;

// must be ensured by the caller
debug_assert!(length_opening >= 3);

self.eat_identifier();
self.eat_while(|ch| ch != '\n' && is_whitespace(ch));
let invalid_infostring = self.first() != '\n';

let mut s = self.as_str();
let mut found = false;
while let Some(closing) = s.find(&"-".repeat(length_opening as usize)) {
let preceding_chars_start = s[..closing].rfind("\n").map_or(0, |i| i + 1);
if s[preceding_chars_start..closing].chars().all(is_whitespace) {
// candidate found
self.bump_bytes(closing);
// in case like
// ---cargo
// --- blahblah
// or
// ---cargo
// ----
// combine those stuff into this frontmatter token such that it gets detected later.
self.eat_until(b'\n');
found = true;
break;
} else {
s = &s[closing + length_opening as usize..];
}
}

if !found {
// recovery strategy: a closing statement might have precending whitespace/newline
// but not have enough dashes to properly close. In this case, we eat until there,
// and report a mismatch in the parser.
let mut potential_closing = None;
let mut rest = self.as_str();
while let Some(closing) = rest.find("---") {
let preceding_chars_start = rest[..closing].rfind("\n").map_or(0, |i| i + 1);
if rest[preceding_chars_start..closing].chars().all(is_whitespace) {
// candidate found
potential_closing = Some(closing);
break;
} else {
rest = &rest[closing + 3..];
}
}
if let Some(potential_closing) = potential_closing {
// bump to the potential closing, and eat everything on that line.
self.bump_bytes(potential_closing);
self.eat_until(b'\n');
} else {
// eat everything. this will get reported as an unclosed frontmatter.
self.eat_while(|_| true);
}
}

Frontmatter { has_invalid_preceding_whitespace, invalid_infostring }
}

fn line_comment(&mut self) -> TokenKind {
debug_assert!(self.prev() == '/' && self.first() == '/');
self.bump();
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_lexer/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use super::*;

fn check_raw_str(s: &str, expected: Result<u8, RawStrError>) {
let s = &format!("r{}", s);
let mut cursor = Cursor::new(s);
let mut cursor = Cursor::new(s, false);
cursor.bump();
let res = cursor.raw_double_quoted_string(0);
assert_eq!(res, expected);
Expand Down
13 changes: 13 additions & 0 deletions compiler/rustc_parse/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,19 @@ parse_forgot_paren = perhaps you forgot parentheses?
parse_found_expr_would_be_stmt = expected expression, found `{$token}`
.label = expected expression

parse_frontmatter_extra_characters_after_close = extra characters after frontmatter close are not allowed
parse_frontmatter_invalid_close_preceding_whitespace = invalid preceding whitespace for frontmatter close
.note = frontmatter close should not be preceded by whitespace
parse_frontmatter_invalid_infostring = invalid infostring for frontmatter
.note = frontmatter infostrings must be a single identifier immediately following the opening
parse_frontmatter_invalid_opening_preceding_whitespace = invalid preceding whitespace for frontmatter opening
.note = frontmatter opening should not be preceded by whitespace
parse_frontmatter_length_mismatch = frontmatter close does not match the opening
.label_opening = the opening here has {$len_opening} dashes...
.label_close = ...while the close has {$len_close} dashes
parse_frontmatter_unclosed = unclosed frontmatter
.note = frontmatter opening here was not closed

parse_function_body_equals_expr = function body cannot be `= expression;`
.suggestion = surround the expression with `{"{"}` and `{"}"}` instead of `=` and `;`

Expand Down
55 changes: 55 additions & 0 deletions compiler/rustc_parse/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,61 @@ pub(crate) struct FoundExprWouldBeStmt {
pub suggestion: ExprParenthesesNeeded,
}

#[derive(Diagnostic)]
#[diag(parse_frontmatter_extra_characters_after_close)]
pub(crate) struct FrontmatterExtraCharactersAfterClose {
#[primary_span]
pub span: Span,
}

#[derive(Diagnostic)]
#[diag(parse_frontmatter_invalid_infostring)]
#[note]
pub(crate) struct FrontmatterInvalidInfostring {
#[primary_span]
pub span: Span,
}

#[derive(Diagnostic)]
#[diag(parse_frontmatter_invalid_opening_preceding_whitespace)]
pub(crate) struct FrontmatterInvalidOpeningPrecedingWhitespace {
#[primary_span]
pub span: Span,
#[note]
pub note_span: Span,
}

#[derive(Diagnostic)]
#[diag(parse_frontmatter_unclosed)]
pub(crate) struct FrontmatterUnclosed {
#[primary_span]
pub span: Span,
#[note]
pub note_span: Span,
}

#[derive(Diagnostic)]
#[diag(parse_frontmatter_invalid_close_preceding_whitespace)]
pub(crate) struct FrontmatterInvalidClosingPrecedingWhitespace {
#[primary_span]
pub span: Span,
#[note]
pub note_span: Span,
}

#[derive(Diagnostic)]
#[diag(parse_frontmatter_length_mismatch)]
pub(crate) struct FrontmatterLengthMismatch {
#[primary_span]
pub span: Span,
#[label(parse_label_opening)]
pub opening: Span,
#[label(parse_label_close)]
pub close: Span,
pub len_opening: usize,
pub len_close: usize,
}

#[derive(Diagnostic)]
#[diag(parse_leading_plus_not_supported)]
pub(crate) struct LeadingPlusNotSupported {
Expand Down
Loading
Loading