Purpose Use this skill when creating or modifying Biome's parsers. Covers grammar authoring with ungrammar, lexer implementation, error recovery strategies, and list parsing patterns. Prerequisites Install required tools: just install-tools Understand the language syntax you're implementing Read crates/biome_parser/CONTRIBUTING.md for detailed concepts Common Workflows Create Grammar for New Language Create a .ungram file in xtask/codegen/ (e.g., html.ungram ): // html.ungram // Legend: // Name = -- non-terminal definition // 'ident' -- token (terminal) // A B -- sequence // A | B -- alternation // A -- zero or more repetition // (A (',' A) ','?) -- repetition with separator and optional trailing comma // A? -- zero or one repetition // label:A -- suggested name for field HtmlRoot = element HtmlElement = '<' tag_name: HtmlName attributes: HtmlAttributeList '>' children: HtmlElementList '<' '/' close_tag_name: HtmlName '>' HtmlAttributeList = HtmlAttribute HtmlAttribute = | HtmlSimpleAttribute | HtmlBogusAttribute HtmlSimpleAttribute = name: HtmlName '=' value: HtmlString HtmlBogusAttribute = / error recovery node / Naming conventions: Prefix all nodes with language name: HtmlElement , CssRule Unions start with Any : AnyHtmlAttribute Error recovery nodes use Bogus : HtmlBogusAttribute Lists end with List : HtmlAttributeList Lists are mandatory (never optional), empty by default Generate Parser from Grammar

Generate for specific language

just gen-grammar html

Generate for multiple languages

just gen-grammar html css

Generate all grammars

just gen-grammar

This creates:

biome_html_syntax/src/generated/

- Node definitions

biome_html_factory/src/generated/

- Node construction helpers

Parser skeleton files (you'll implement the actual parsing logic)

Implement a Lexer

Create

lexer/mod.rs

in your parser crate:

use

biome_html_syntax

::

HtmlSyntaxKind

;

use

biome_parser

::

{

lexer

::

Lexer

,

ParseDiagnostic

}

;

pub

(

crate

)

struct

HtmlLexer

<

'source

>

{

source

:

&

'source

str

,

position

:

usize

,

current_kind

:

HtmlSyntaxKind

,

diagnostics

:

Vec

<

ParseDiagnostic

>

,

}

impl

<

'source

>

Lexer

<

'source

>

for

HtmlLexer

<

'source

>

{

const

NEWLINE

:

Self

::

Kind

=

HtmlSyntaxKind

::

NEWLINE

;

const

WHITESPACE

:

Self

::

Kind

=

HtmlSyntaxKind

::

WHITESPACE

;

type

Kind

=

HtmlSyntaxKind

;

type

LexContext

=

(

)

;

type

ReLexContext

=

(

)

;

fn

source

(

&

self

)

->

&

'source

str

{

self

.

source

}

fn

current

(

&

self

)

->

Self

::

Kind

{

self

.

current_kind

}

fn

position

(

&

self

)

->

usize

{

self

.

position

}

fn

advance

(

&

mut

self

,

context

:

Self

::

LexContext

)

->

Self

::

Kind

{

// Implement token scanning logic

let

start

=

self

.

position

;

let

kind

=

self

.

read_next_token

(

)

;

self

.

current_kind

=

kind

;

kind

}

// Implement other required methods...

}

Implement Token Source

use

biome_parser

::

lexer

::

BufferedLexer

;

use

biome_html_syntax

::

HtmlSyntaxKind

;

use

crate

::

lexer

::

HtmlLexer

;

pub

(

crate

)

struct

HtmlTokenSource

<

'src

>

{

lexer

:

BufferedLexer

<

HtmlSyntaxKind

,

HtmlLexer

<

'src

>>

,

}

impl

<

'source

>

TokenSourceWithBufferedLexer

<

HtmlLexer

<

'source

>>

for

HtmlTokenSource

<

'source

>

{

fn

lexer

(

&

mut

self

)

->

&

mut

BufferedLexer

<

HtmlSyntaxKind

,

HtmlLexer

<

'source

>>

{

&

mut

self

.

lexer

}

Write Parse Rules

Example: Parsing an if statement:

use

biome_parser

::

prelude

::

*

;

use

biome_js_syntax

::

JsSyntaxKind

::

*

;

fn

parse_if_statement

(

p

:

&

mut

JsParser

)

->

ParsedSyntax

{

// Presence test - return Absent if not at 'if'

if

!

p

.

at

(

T!

[

if

]

)

{

return

Absent

;

}

let

m

=

p

.

start

(

)

;

// Parse required tokens

p

.

expect

(

T!

[

if

]

)

;

p

.

expect

(

T!

[

'('

]

)

;

// Parse required nodes with error recovery

parse_any_expression

(

p

)

.

or_add_diagnostic

(

p

,

expected_expression

)

;

p

.

expect

(

T!

[

')'

]

)

;

parse_block_statement

(

p

)

.

or_add_diagnostic

(

p

,

expected_block

)

;

// Parse optional else clause

if

p

.

at

(

T!

[

else

]

)

{

parse_else_clause

(

p

)

.

ok

(

)

;

}

Present

(

m

.

complete

(

p

,

JS_IF_STATEMENT

)

}

Parse Lists with Error Recovery

Use

ParseSeparatedList

for comma-separated lists:

struct

ArrayElementsList

;

impl

ParseSeparatedList

for

ArrayElementsList

{

type

ParsedElement

=

CompletedMarker

;

fn

parse_element

(

&

mut

self

,

p

:

&

mut

Parser

)

->

ParsedSyntax

<

Self

::

ParsedElement

>

{

parse_array_element

(

p

)

}

fn

is_at_list_end

(

&

self

,

p

:

&

mut

Parser

)

->

bool

{

// Stop at array closing bracket or file end

p

.

at

(

T!

[

']'

]

)

||

p

.

at

(

EOF

)

}

fn

recover

(

&

mut

self

,

p

:

&

mut

Parser

,

parsed_element

:

ParsedSyntax

<

Self

::

ParsedElement

>

,

)

->

RecoveryResult

{

parsed_element

.

or_recover

(

p

,

&

ParseRecoveryTokenSet

::

new

(

JS_BOGUS_EXPRESSION

,

token_set!

[

T!

[

']'

]

,

T!

[

,

]

)

,

expected_array_element

,

)

}

fn

separating_element_kind

(

&

mut

self

)

->

JsSyntaxKind

{

T!

[

,

]

}

// Use the list parser

fn

parse_array_elements

(

p

:

&

mut

Parser

)

->

CompletedMarker

{

let

m

=

p

.

start

(

)

;

ArrayElementsList

.

parse_list

(

p

)

;

m

.

complete

(

p

,

JS_ARRAY_ELEMENT_LIST

)

}

Implement Error Recovery

Error recovery wraps invalid tokens in

BOGUS

nodes:

// Recovery set includes:

// - List terminator tokens (e.g., ']', '}')

// - Statement terminators (e.g., ';')

// - List separators (e.g., ',')

let

recovery_set

=

token_set!

[

T!

[

']'

]

,

T!

[

,

]

,

T!

[

;

]

;

parsed_element

.

or_recover

(

p

,

&

ParseRecoveryTokenSet

::

new

(

JS_BOGUS_EXPRESSION

,

recovery_set

)

,

expected_expression_error

,

)

Handle Conditional Syntax

For syntax only valid in certain contexts (e.g., strict mode):

fn

parse_with_statement

(

p

:

&

mut

Parser

)

->

ParsedSyntax

{

if

!

p

.

at

(

T!

[

with

]

)

{

return

Absent

;

}

let

m

=

p

.

start

(

)

;

p

.

bump

(

T!

[

with

]

)

;

parenthesized_expression

(

p

)

.

or_add_diagnostic

(

p

,

expected_expression

)

;

parse_statement

(

p

)

.

or_add_diagnostic

(

p

,

expected_statement

)

;

let

with_stmt

=

m

.

complete

(

p

,

JS_WITH_STATEMENT

)

;

// Mark as invalid in strict mode

let

conditional

=

StrictMode

.

excluding_syntax

(

p

,

with_stmt

,

|

p

,

marker

|

{

p

.

err_builder

(

"with statements are not allowed in strict mode"

,

marker

.

range

(

p

)

}

)

;

Present

(

conditional

.

or_invalid_to_bogus

(

p

)

}

Test Parser

Create test files in

tests/

:

crates/biome_html_parser/tests/

├── html_specs/

│ ├── ok/

│ │ ├── simple_element.html

│ │ └── nested_elements.html

│ └── error/

│ ├── unclosed_tag.html

│ └── invalid_syntax.html

└── html_test.rs

Run tests:

cd

crates/biome_html_parser

cargo

test

Tips

Presence test

Always return

Absent

if the first token doesn't match - never progress parsing before returning

Absent

Required vs optional

Use

p.expect()

for required tokens,

p.eat()

for optional ones

Missing markers

Use

.or_add_diagnostic()

for required nodes to add missing markers and errors

Error recovery

Include list terminators, separators, and statement boundaries in recovery sets

Bogus nodes

Check grammar for which

BOGUS_*

node types are valid in your context

Checkpoints

Use

p.checkpoint()

to save state and

p.rewind()

if parsing fails

Lookahead

Use
p.at()
to check tokens,
p.nth_at()
for lookahead beyond current token
Lists are mandatory: Always create list nodes even if empty - use parse_list() not parse_list().ok() Common Patterns // Optional token if p . eat ( T! [ async ] ) { // handle async } // Required token with error p . expect ( T! [ '{' ] ) ; // Optional node parse_type_annotation ( p ) . ok ( ) ; // Required node with error parse_expression ( p ) . or_add_diagnostic ( p , expected_expression ) ; // Lookahead if p . at ( T! [ if ] ) || p . at ( T! [ for ] ) { // handle control flow } // Checkpoint for backtracking let checkpoint = p . checkpoint ( ) ; if parse_something ( p ) . is_absent ( ) { p . rewind ( checkpoint ) ; parse_something_else ( p ) ; } References Full guide: crates/biome_parser/CONTRIBUTING.md Grammar examples: xtask/codegen/*.ungram Parser examples: crates/biome_js_parser/src/syntax/ Error recovery: Search for ParseRecoveryTokenSet in existing parsers

parser-development

安装

Generate for specific language

Generate for multiple languages

Generate all grammars