Skip to content

Commit

Permalink
Improve performance of hidden node parsing (#1790)
Browse files Browse the repository at this point in the history
  • Loading branch information
msujew authored Jan 17, 2025
1 parent ce47f6e commit c006777
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 48 deletions.
72 changes: 38 additions & 34 deletions packages/langium/src/parser/cst-node-builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,14 @@ import type { Range } from 'vscode-languageserver-types';
import type { AbstractElement } from '../languages/generated/ast.js';
import type { AstNode, CompositeCstNode, CstNode, LeafCstNode, RootCstNode } from '../syntax-tree.js';
import { Position } from 'vscode-languageserver-types';
import { isCompositeCstNode } from '../syntax-tree.js';
import { tokenToRange } from '../utils/cst-utils.js';

export class CstNodeBuilder {

private rootNode!: RootCstNodeImpl;
private nodeStack: CompositeCstNodeImpl[] = [];

private get current(): CompositeCstNodeImpl {
get current(): CompositeCstNodeImpl {
return this.nodeStack[this.nodeStack.length - 1] ?? this.rootNode;
}

Expand All @@ -37,8 +36,8 @@ export class CstNodeBuilder {
return compositeNode;
}

buildLeafNode(token: IToken, feature: AbstractElement): LeafCstNode {
const leafNode = new LeafCstNodeImpl(token.startOffset, token.image.length, tokenToRange(token), token.tokenType, false);
buildLeafNode(token: IToken, feature?: AbstractElement): LeafCstNode {
const leafNode = new LeafCstNodeImpl(token.startOffset, token.image.length, tokenToRange(token), token.tokenType, !feature);
leafNode.grammarSource = feature;
leafNode.root = this.rootNode;
this.current.content.push(leafNode);
Expand All @@ -55,6 +54,39 @@ export class CstNodeBuilder {
}
}

addHiddenNodes(tokens: IToken[]): void {
const nodes: LeafCstNode[] = [];
for (const token of tokens) {
const leafNode = new LeafCstNodeImpl(token.startOffset, token.image.length, tokenToRange(token), token.tokenType, true);
leafNode.root = this.rootNode;
nodes.push(leafNode);
}
let current: CompositeCstNode = this.current;
let added = false;
// If we are within a composite node, we add the hidden nodes to the content
if (current.content.length > 0) {
current.content.push(...nodes);
return;
}
// Otherwise we are at a newly created node
// Instead of adding the hidden nodes here, we search for the first parent node with content
while (current.container) {
const index = current.container.content.indexOf(current);
if (index > 0) {
// Add the hidden nodes before the current node
current.container.content.splice(index, 0, ...nodes);
added = true;
break;
}
current = current.container;
}
// If we arrive at the root node, we add the hidden nodes at the beginning
// This is the case if the hidden nodes are the first nodes in the tree
if (!added) {
this.rootNode.content.unshift(...nodes);
}
}

construct(item: { $type: string | symbol | undefined, $cstNode: CstNode }): void {
const current: CstNode = this.current;
// The specified item could be a datatype ($type is symbol) or a fragment ($type is undefined)
Expand All @@ -70,34 +102,6 @@ export class CstNodeBuilder {
this.removeNode(node);
}
}

addHiddenTokens(hiddenTokens: IToken[]): void {
for (const token of hiddenTokens) {
const hiddenNode = new LeafCstNodeImpl(token.startOffset, token.image.length, tokenToRange(token), token.tokenType, true);
hiddenNode.root = this.rootNode;
this.addHiddenToken(this.rootNode, hiddenNode);
}
}

private addHiddenToken(node: CompositeCstNode, token: LeafCstNode): void {
const { offset: tokenStart, end: tokenEnd } = token;

for (let i = 0; i < node.content.length; i++) {
const child = node.content[i];
const { offset: childStart, end: childEnd } = child;
if (isCompositeCstNode(child) && tokenStart > childStart && tokenEnd < childEnd) {
this.addHiddenToken(child, token);
return;
} else if (tokenEnd <= childStart) {
node.content.splice(i, 0, token);
return;
}
}

// We know that we haven't found a suited position for the token
// So we simply add it to the end of the current node
node.content.push(token);
}
}

export abstract class AbstractCstNode implements CstNode {
Expand All @@ -107,7 +111,7 @@ export abstract class AbstractCstNode implements CstNode {
abstract get range(): Range;

container?: CompositeCstNode;
grammarSource: AbstractElement;
grammarSource?: AbstractElement;
root: RootCstNode;
private _astNode?: AstNode;

Expand All @@ -117,7 +121,7 @@ export abstract class AbstractCstNode implements CstNode {
}

/** @deprecated use `grammarSource` instead. */
get feature(): AbstractElement {
get feature(): AbstractElement | undefined {
return this.grammarSource;
}

Expand Down
25 changes: 22 additions & 3 deletions packages/langium/src/parser/langium-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import type { AbstractElement, Action, Assignment, ParserRule } from '../languag
import type { Linker } from '../references/linker.js';
import type { LangiumCoreServices } from '../services.js';
import type { AstNode, AstReflection, CompositeCstNode, CstNode } from '../syntax-tree.js';
import type { Lexer } from './lexer.js';
import type { Lexer, LexerResult } from './lexer.js';
import type { IParserConfig } from './parser-config.js';
import type { ValueConverter } from './value-converter.js';
import { defaultParserErrorProvider, EmbeddedActionsParser, LLkLookaheadStrategy } from 'chevrotain';
Expand Down Expand Up @@ -195,6 +195,7 @@ export class LangiumParser extends AbstractLangiumParser {
private readonly converter: ValueConverter;
private readonly astReflection: AstReflection;
private readonly nodeBuilder = new CstNodeBuilder();
private lexerResult?: LexerResult;
private stack: any[] = [];
private assignmentMap = new Map<AbstractElement, AssignmentElement | undefined>();

Expand Down Expand Up @@ -232,15 +233,16 @@ export class LangiumParser extends AbstractLangiumParser {

parse<T extends AstNode = AstNode>(input: string, options: ParserOptions = {}): ParseResult<T> {
this.nodeBuilder.buildRootNode(input);
const lexerResult = this.lexer.tokenize(input);
const lexerResult = this.lexerResult = this.lexer.tokenize(input);
this.wrapper.input = lexerResult.tokens;
const ruleMethod = options.rule ? this.allRules.get(options.rule) : this.mainRule;
if (!ruleMethod) {
throw new Error(options.rule ? `No rule found with name '${options.rule}'` : 'No main rule available.');
}
const result = ruleMethod.call(this.wrapper, {});
this.nodeBuilder.addHiddenTokens(lexerResult.hidden);
this.nodeBuilder.addHiddenNodes(lexerResult.hidden);
this.unorderedGroups.clear();
this.lexerResult = undefined;
return {
value: result,
lexerErrors: lexerResult.errors,
Expand Down Expand Up @@ -273,9 +275,26 @@ export class LangiumParser extends AbstractLangiumParser {
};
}

private extractHiddenTokens(token: IToken): IToken[] {
const hiddenTokens = this.lexerResult!.hidden;
if (!hiddenTokens.length) {
return [];
}
const offset = token.startOffset;
for (let i = 0; i < hiddenTokens.length; i++) {
const token = hiddenTokens[i];
if (token.startOffset > offset) {
return hiddenTokens.splice(0, i);
}
}
return hiddenTokens.splice(0, hiddenTokens.length);
}

consume(idx: number, tokenType: TokenType, feature: AbstractElement): void {
const token = this.wrapper.wrapConsume(idx, tokenType);
if (!this.isRecording() && this.isValidToken(token)) {
const hiddenTokens = this.extractHiddenTokens(token);
this.nodeBuilder.addHiddenNodes(hiddenTokens);
const leafNode = this.nodeBuilder.buildLeafNode(token, feature);
const { assignment, isCrossRef } = this.getAssignment(feature);
const current = this.current;
Expand Down
13 changes: 6 additions & 7 deletions packages/langium/src/serializer/hydrator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -296,23 +296,22 @@ export class DefaultHydrator implements Hydrator {
return this.lexer.definition[name];
}

protected getGrammarElementId(node: AbstractElement): number | undefined {
protected getGrammarElementId(node: AbstractElement | undefined): number | undefined {
if (!node) {
return undefined;
}
if (this.grammarElementIdMap.size === 0) {
this.createGrammarElementIdMap();
}
return this.grammarElementIdMap.get(node);
}

protected getGrammarElement(id: number): AbstractElement {
protected getGrammarElement(id: number): AbstractElement | undefined {
if (this.grammarElementIdMap.size === 0) {
this.createGrammarElementIdMap();
}
const element = this.grammarElementIdMap.getKey(id);
if (element) {
return element;
} else {
throw new Error('Invalid grammar element id: ' + id);
}
return element;
}

protected createGrammarElementIdMap(): void {
Expand Down
4 changes: 2 additions & 2 deletions packages/langium/src/syntax-tree.ts
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,9 @@ export interface CstNode extends DocumentSegment {
/** The root CST node */
readonly root: RootCstNode;
/** The grammar element from which this node was parsed */
readonly grammarSource: AbstractElement;
readonly grammarSource?: AbstractElement;
/** @deprecated use `grammarSource` instead. */
readonly feature: AbstractElement;
readonly feature?: AbstractElement;
/** The AST node created from this CST node */
readonly astNode: AstNode;
/** @deprecated use `astNode` instead. */
Expand Down
27 changes: 25 additions & 2 deletions packages/langium/test/parser/langium-parser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
* terms of the MIT License, which is available in the project root.
******************************************************************************/

import type { AstNode, LangiumCoreServices } from 'langium';
import { EmptyFileSystem, type AstNode, type LangiumCoreServices } from 'langium';
import { describe, expect, test, beforeEach } from 'vitest';
import { createServicesForGrammar } from 'langium/grammar';
import { createLangiumGrammarServices, createServicesForGrammar } from 'langium/grammar';
import { parseHelper } from 'langium/test';

describe('Partial parsing', () => {
Expand Down Expand Up @@ -66,6 +66,29 @@ describe('Partial parsing', () => {

});

describe('hidden node parsing', () => {

test('finishes in expected time', async () => {
const parser = createLangiumGrammarServices(EmptyFileSystem).grammar.parser.LangiumParser;
let content = 'Rule:';
// Adding hidden nodes used to cause exponential parsing time behavior
for (let i = 0; i < 2500; i++) {
content += "'a' /* A */ /* B */ /* C */\n";
}
content += ';';
const start = Date.now();
// This roughly takes 100-300 ms on a modern machine
// If it takes longer, the hidden node parsing is likely to be exponential
// On an older version of the parser, this took ~5 seconds
const result = parser.parse(content);
expect(result.lexerErrors).toHaveLength(0);
expect(result.parserErrors).toHaveLength(0);
const end = Date.now();
expect(end - start).toBeLessThan(1000);
});

});

interface A extends AstNode {
name: string
}
33 changes: 33 additions & 0 deletions packages/langium/test/utils/cst-utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,39 @@ describe('findLeafNode', () => {
}
});

describe('findCommentNode', () => {
test('Finds correct comment with multiple comments before and after', async () => {
const text = expandToString`
Main: value=AB;
terminal AB:
/** A */
/** B */
'A'
/** C */
/** D */;
`;
const grammar = await parser(text);
const offset = text.indexOf("'A'") + 1;
const leafNode = findLeafNodeAtOffset(grammar.parseResult.value.$cstNode!, offset);
const keyword = leafNode?.astNode;
expect(keyword).toBeDefined();
const comment = CstUtils.findCommentNode(keyword?.$cstNode, ['ML_COMMENT']);
expect(comment?.text).toBe('/** B */');
});

test('Finds correct comment at the start of the file', async () => {
const text = expandToString`
/** A */
/** B */
/** C */
grammar test
`;
const grammar = await parser(text);
const comment = CstUtils.findCommentNode(grammar.parseResult.value.$cstNode, ['ML_COMMENT']);
expect(comment?.text).toBe('/** C */');
});
});

describe('compareRange', () => {
test.each([
// Different lines
Expand Down

0 comments on commit c006777

Please sign in to comment.