diff --git a/packages/langium/src/default-module.ts b/packages/langium/src/default-module.ts index d3aab8fc4..a5c6c8e4a 100644 --- a/packages/langium/src/default-module.ts +++ b/packages/langium/src/default-module.ts @@ -35,6 +35,7 @@ import { LangiumParserErrorMessageProvider } from './parser/langium-parser.js'; import { DefaultAsyncParser } from './parser/async-parser.js'; import { DefaultWorkspaceLock } from './workspace/workspace-lock.js'; import { DefaultHydrator } from './serializer/hydrator.js'; +import { DefaultAstDisassembler, DefaultAstReassembler } from './serializer/reassembler/index.js'; /** * Context required for creating the default language-specific dependency injection module. @@ -76,6 +77,8 @@ export function createDefaultCoreModule(context: DefaultCoreModuleContext): Modu References: (services) => new DefaultReferences(services) }, serializer: { + AstDisassembler: (services) => new DefaultAstDisassembler(services), + AstReassembler: (services) => new DefaultAstReassembler(services), Hydrator: (services) => new DefaultHydrator(services), JsonSerializer: (services) => new DefaultJsonSerializer(services) }, diff --git a/packages/langium/src/serializer/index.ts b/packages/langium/src/serializer/index.ts index 4440218e1..b9eac0900 100644 --- a/packages/langium/src/serializer/index.ts +++ b/packages/langium/src/serializer/index.ts @@ -3,6 +3,6 @@ * This program and the accompanying materials are made available under the * terms of the MIT License, which is available in the project root. ******************************************************************************/ - +export * from './reassembler/index.js'; export * from './hydrator.js'; export * from './json-serializer.js'; diff --git a/packages/langium/src/serializer/reassembler/AstAssemblerInstruction.ts b/packages/langium/src/serializer/reassembler/AstAssemblerInstruction.ts new file mode 100644 index 000000000..1b848ba95 --- /dev/null +++ b/packages/langium/src/serializer/reassembler/AstAssemblerInstruction.ts @@ -0,0 +1,160 @@ +/****************************************************************************** + * Copyright 2024 TypeFox GmbH + * This program and the accompanying materials are made available under the + * terms of the MIT License, which is available in the project root. + ******************************************************************************/ + +import type { Range } from 'vscode-languageserver-protocol'; +import type { Grammar, AbstractElement} from '../../languages/generated/ast.js'; +import { isAbstractElement } from '../../languages/generated/ast.js'; +import { streamAst } from '../../utils/ast-utils.js'; +import { BiMap } from '../../utils/collections.js'; + +export enum InstructionType { + //setup + Allocate, + Error, + Return, + + //CST + RootCstNode, + CompositeCstNode, + LeafCstNode, + PopCstNode, + + //AST + Property, + PropertyArray, + LinkNode, + LinkNodeArray, + Reference, + ReferenceArray, + Empty +} +export enum NodeType { + Cst, + Ast +} +interface AstAssemblerInstructionBase { + $type: InstructionType; +} +export type ReferenceData = { + refText: string; + refNode?: number; +}; +export enum ErrorSource { + Lexer, + Parser +} +export namespace Instructions { + export interface Allocate extends AstAssemblerInstructionBase { + $type: InstructionType.Allocate; + cstNodeCount: number; + astNodeCount: number; + } + export interface Property extends AstAssemblerInstructionBase { + $type: InstructionType.Property; + sourceId: number; + property: string; + value: number | boolean | string | bigint; + } + export interface PropertyArray extends AstAssemblerInstructionBase { + $type: InstructionType.PropertyArray; + sourceId: number; + property: string; + values: Array; + } + export interface Reference extends AstAssemblerInstructionBase { + $type: InstructionType.Reference; + sourceId: number; + property: string; + refText: string; + refNode?: number; + } + export interface ReferenceArray extends AstAssemblerInstructionBase { + $type: InstructionType.ReferenceArray; + sourceId: number; + property: string; + references: ReferenceData[]; + } + export interface LinkNode extends AstAssemblerInstructionBase { + $type: InstructionType.LinkNode; + sourceId: number; + targetKind: NodeType; + property: string; + targetId: number; + } + export interface LinkNodeArray extends AstAssemblerInstructionBase { + $type: InstructionType.LinkNodeArray; + sourceId: number; + targetKind: NodeType; + property: string; + targetIds: number[]; + } + export interface Empty extends AstAssemblerInstructionBase { + $type: InstructionType.Empty; + sourceId: number; + property: string; + } + export interface Error extends AstAssemblerInstructionBase { + $type: InstructionType.Error; + source: ErrorSource; + items: Record; + } + export interface Return extends AstAssemblerInstructionBase { + $type: InstructionType.Return; + rootAstNodeId: number; + } + + export interface RootCstNode extends AstAssemblerInstructionBase { + $type: InstructionType.RootCstNode; + input: string; + astNodeId: number|undefined; + } + export interface CompositeCstNode extends AstAssemblerInstructionBase { + $type: InstructionType.CompositeCstNode; + elementId: number; + astNodeId: number|undefined; + } + export interface LeafCstNode extends AstAssemblerInstructionBase { + $type: InstructionType.LeafCstNode; + tokenOffset: number; + tokenLength: number; + tokenTypeName: string; + elementId: number; + hidden: boolean; + range: Range; + astNodeId: number|undefined; + } + export interface PopCstNode extends AstAssemblerInstructionBase { + $type: InstructionType.PopCstNode; + } +} + +export type AstAssemblerInstruction = +| Instructions.Allocate +| Instructions.Property +| Instructions.PropertyArray +| Instructions.Reference +| Instructions.ReferenceArray +| Instructions.LinkNode +| Instructions.LinkNodeArray +| Instructions.Empty +| Instructions.Error +| Instructions.Return +| Instructions.RootCstNode +| Instructions.CompositeCstNode +| Instructions.LeafCstNode +| Instructions.PopCstNode +; + +export function createGrammarElementIdMap(grammar: Grammar) { + const result = new BiMap(); + let id = 0; + for (const element of streamAst(grammar)) { + if (isAbstractElement(element)) { + result.set(element, id++); + } + } + return result; +} diff --git a/packages/langium/src/serializer/reassembler/AstDisassembler.ts b/packages/langium/src/serializer/reassembler/AstDisassembler.ts new file mode 100644 index 000000000..5752140ae --- /dev/null +++ b/packages/langium/src/serializer/reassembler/AstDisassembler.ts @@ -0,0 +1,222 @@ +/****************************************************************************** + * Copyright 2024 TypeFox GmbH + * This program and the accompanying materials are made available under the + * terms of the MIT License, which is available in the project root. + ******************************************************************************/ + +import type { Reference } from '../../syntax-tree.js'; +import { isRootCstNode, type AstNode, type CstNode, type Mutable, isCompositeCstNode, isLeafCstNode, isAstNode, isReference } from '../../syntax-tree.js'; +import { streamAst } from '../../utils/ast-utils.js'; +import { streamCst } from '../../utils/cst-utils.js'; +import { BiMap, type LangiumCoreServices, assertType, type ParseResult } from '../../index.js'; +import { type AbstractElement } from '../../languages/generated/ast.js'; +import type { AstAssemblerInstruction, Instructions, ReferenceData } from './AstAssemblerInstruction.js'; +import { InstructionType, NodeType, ErrorSource } from './AstAssemblerInstruction.js'; +import { createGrammarElementIdMap } from './AstAssemblerInstruction.js'; + +export interface AstDisassembler { + disassemble(parseResult: ParseResult): Generator; +} + +export class DefaultAstDisassembler implements AstDisassembler { + private readonly cstNodeToId = new Map(); + private readonly astNodeToId = new Map(); + private readonly grammarElementIdMap = new BiMap(); + + constructor(services: LangiumCoreServices) { + this.grammarElementIdMap = createGrammarElementIdMap(services.Grammar); + } + + *disassemble(parseResult: ParseResult): Generator { + //allocate memory for all nodes + const astNode = parseResult.value; + const cstRoot = astNode.$cstNode!; + this.enumerateNodes(astNode, astNode.$cstNode!); + yield { + $type: InstructionType.Allocate, + cstNodeCount: this.cstNodeToId.size, + astNodeCount: this.astNodeToId.size + }; + + //send cst nodes + let cstNodeStack: Array> = []; + for (const node of streamCst(cstRoot)) { + assertType>(node); + if (isRootCstNode(node)) { + yield { + $type: InstructionType.RootCstNode, + input: node.fullText, + astNodeId: this.astNodeToId.get(node.astNode) + }; + cstNodeStack = [node]; + } else if(isCompositeCstNode(node)) { + while(cstNodeStack[cstNodeStack.length-1] !== node.container) { + cstNodeStack.pop(); + yield { + $type: InstructionType.PopCstNode + }; + } + yield { + $type: InstructionType.CompositeCstNode, + elementId: this.grammarElementIdMap.get(node.grammarSource)!, + astNodeId: this.astNodeToId.get(node.astNode) + }; + cstNodeStack.push(node); + } else if(isLeafCstNode(node)) { + while(cstNodeStack[cstNodeStack.length-1] !== node.container) { + cstNodeStack.pop(); + yield { + $type: InstructionType.PopCstNode + }; + } + yield { + $type: InstructionType.LeafCstNode, + elementId: this.grammarElementIdMap.get(node.grammarSource)!, + hidden: node.hidden, + range: node.range, + tokenTypeName: node.tokenType.name, + tokenOffset: node.offset, + tokenLength: node.length, + astNodeId: this.astNodeToId.get(node.astNode) + }; + } + } + + //send ast nodes + for (const node of streamAst(astNode)) { + assertType>(node); + const sourceId = this.astNodeToId.get(node)!; + + const setProperty = (property: string, value: number | boolean | string | bigint) => ({ + $type: InstructionType.Property, + sourceId, + property, + value + }); + + const setPropertyArray = (property: string, values: Array) => ({ + $type: InstructionType.PropertyArray, + sourceId, + property, + values + }); + + const setLink = (property: string, type: NodeType, index: number) => ({ + $type: InstructionType.LinkNode, + sourceId, + targetKind: type, + targetId: index, + property, + }); + + const setLinkArray = (property: string, type: NodeType, indices: number[]) => ({ + $type: InstructionType.LinkNodeArray, + sourceId, + targetKind: type, + targetIds: indices, + property, + }); + + const setReferenceArray = (property: string, references: ReferenceData[]) => ({ + $type: InstructionType.ReferenceArray, + sourceId, + property, + references + }); + + const setReference = (property: string, reference: ReferenceData) => ({ + $type: InstructionType.Reference, + sourceId, + property, + ...reference + }); + + const setEmpty = (property: string) => ({ + $type: InstructionType.Empty, + sourceId, + property, + }); + + yield setProperty('$type', node.$type); + if (node.$containerIndex) { + yield setProperty('$containerIndex', node.$containerIndex); + } + if (node.$containerProperty) { + yield setProperty('$containerProperty', node.$containerProperty); + } + if (node.$container) { + yield setLink('$container', NodeType.Ast, this.astNodeToId.get(node.$container)!); + } + if (node.$cstNode !== undefined) { + yield setLink('$cstNode', NodeType.Cst, this.cstNodeToId.get(node.$cstNode)!); + } + for (const [name, value] of Object.entries(node)) { + if (name.startsWith('$')) { + continue; + } + if (Array.isArray(value)) { + if (value.length > 0) { + const item = value[0]; + if (isAstNode(item)) { + assertType(value); + yield setLinkArray(name, NodeType.Ast, value.map(v => this.astNodeToId.get(v)!)); + } else if (isReference(item)) { + assertType(value); + yield setReferenceArray(name, value.map(v => ({ + refText: v.$refText, + refNode: v.$refNode ? this.cstNodeToId.get(v.$refNode) : undefined + }))); + } else { + //type string[]: just to keep Typescript calm + yield setPropertyArray(name, value as string[]); + } + } else { + yield setEmpty(name); + } + } else if (isAstNode(value)) { + yield setLink(name, NodeType.Ast, this.astNodeToId.get(value)!); + } else if (isReference(value)) { + yield setReference(name, { + refText: value.$refText, + refNode: value.$refNode ? this.cstNodeToId.get(value.$refNode) : undefined + }); + } else if (typeof value === 'boolean' || typeof value === 'bigint' || typeof value === 'number' || typeof value === 'string') { + yield setProperty(name, value); + } + } + } + + //send errors + for (const error of parseResult.lexerErrors) { + yield { + $type: InstructionType.Error, + source: ErrorSource.Lexer, + items: { ...error, message: error.message } + }; + } + for (const error of parseResult.parserErrors) { + yield { + $type: InstructionType.Error, + source: ErrorSource.Parser, + items: { ...error, message: error.message } + }; + } + + //mark end with root node + yield { + $type: InstructionType.Return, + rootAstNodeId: this.astNodeToId.get(astNode)! + }; + } + + private enumerateNodes(astRoot: AstNode, cstRoot: CstNode): void { + this.cstNodeToId.clear(); + [...streamCst(cstRoot)].forEach((cstNode, index) => { + this.cstNodeToId.set(cstNode, index); + }); + this.astNodeToId.clear(); + [...streamAst(astRoot)].forEach((astNode, index) => { + this.astNodeToId.set(astNode, index); + }); + } +} diff --git a/packages/langium/src/serializer/reassembler/AstReassembler.ts b/packages/langium/src/serializer/reassembler/AstReassembler.ts new file mode 100644 index 000000000..fe12e272b --- /dev/null +++ b/packages/langium/src/serializer/reassembler/AstReassembler.ts @@ -0,0 +1,152 @@ +/****************************************************************************** + * Copyright 2024 TypeFox GmbH + * This program and the accompanying materials are made available under the + * terms of the MIT License, which is available in the project root. + ******************************************************************************/ + +import type { CompositeCstNode, Reference, RootCstNode } from '../../syntax-tree.js'; +import { type AstNode, type CstNode, type Mutable } from '../../syntax-tree.js'; +import { BiMap, type LangiumCoreServices, type ParseResult, assertUnreachable, RootCstNodeImpl, CompositeCstNodeImpl, LeafCstNodeImpl } from '../../index.js'; +import { type AbstractElement } from '../../languages/generated/ast.js'; +import type { ILexingError, IRecognitionException, TokenType } from 'chevrotain'; +import type { AstAssemblerInstruction} from './AstAssemblerInstruction.js'; +import { InstructionType, NodeType, ErrorSource, createGrammarElementIdMap } from './AstAssemblerInstruction.js'; + +export interface AstReassemblerContext { + lexerErrors: ILexingError[]; + parserErrors: IRecognitionException[]; + idToAstNode: Array>; + idToCstNode: CstNode[]; + nextFreeCstNode: number; + cstStack: CompositeCstNode[]; + rootCstNodeId: number; + rootAstNodeId: number; + elementToId: BiMap; +} + +export interface AstReassembler { + reassemble(context: AstReassemblerContext, instr: AstAssemblerInstruction): boolean; + buildParseResult(context: AstReassemblerContext): ParseResult; +} + +export class DefaultAstReassembler implements AstReassembler { + private readonly grammarElementIdMap: BiMap; + private readonly grammarTokenTypeIdMap: BiMap; + constructor(services: LangiumCoreServices) { + this.grammarElementIdMap = createGrammarElementIdMap(services.Grammar); + const tokens = services.parser.TokenBuilder.buildTokens(services.Grammar) as TokenType[]; + this.grammarTokenTypeIdMap = new BiMap(tokens.map(tk => [tk, tk.name] as const)); + } + + buildParseResult(context: AstReassemblerContext): ParseResult { + return { + lexerErrors: context.lexerErrors, + parserErrors: context.parserErrors, + value: context.idToAstNode[context.rootAstNodeId] as T + }; + } + + reassemble(ctx: AstReassemblerContext, instr: AstAssemblerInstruction): boolean { + switch (instr.$type) { + case InstructionType.Allocate: + ctx.rootAstNodeId = -1; + ctx.rootCstNodeId = -1; + ctx.idToAstNode = []; + ctx.idToCstNode = []; + ctx.nextFreeCstNode = 0; + ctx.cstStack = []; + ctx.lexerErrors = []; + ctx.parserErrors = []; + ctx.elementToId = this.grammarElementIdMap; + ctx.idToCstNode = Array.from({ length: instr.cstNodeCount }).map(() => (undefined! as Mutable)); + ctx.idToAstNode = Array.from({ length: instr.astNodeCount }).map(() => ({} as Mutable)); + break; + case InstructionType.Empty: + ctx.idToAstNode[instr.sourceId][instr.property] = []; + break; + case InstructionType.Property: + ctx.idToAstNode[instr.sourceId][instr.property] = instr.value; + break; + case InstructionType.PropertyArray: + ctx.idToAstNode[instr.sourceId][instr.property] = instr.values; + break; + case InstructionType.Reference: { + const reference = { + $refText: instr.refText, + $refNode: instr.refNode ? ctx.idToCstNode[instr.refNode] : undefined + }; + ctx.idToAstNode[instr.sourceId][instr.property] = reference; + break; + } + case InstructionType.ReferenceArray: { + const references = instr.references.map(r => ({ + $refText: r.refText, + $refNode: r.refNode ? ctx.idToCstNode[r.refNode] : undefined + })); + ctx.idToAstNode[instr.sourceId][instr.property] = references; + break; + } + case InstructionType.LinkNode: { + const node = instr.targetKind === NodeType.Ast ? ctx.idToAstNode[instr.targetId] : ctx.idToCstNode[instr.targetId]; + ctx.idToAstNode[instr.sourceId][instr.property] = node; + break; + } + case InstructionType.LinkNodeArray: { + const nodes = instr.targetKind === NodeType.Ast + ? instr.targetIds.map(id => ctx.idToAstNode[id]) + : instr.targetIds.map(id => ctx.idToCstNode[id]) + ; + ctx.idToAstNode[instr.sourceId][instr.property] = nodes; + break; + } + case InstructionType.Return: + ctx.rootAstNodeId = instr.rootAstNodeId; + return true; + case InstructionType.Error: + if (instr.source === ErrorSource.Lexer) { + ctx.lexerErrors.push({ ...instr.items } as unknown as ILexingError); + } else { + ctx.parserErrors.push({ ...instr.items } as unknown as IRecognitionException); + } + break; + case InstructionType.RootCstNode: { + const index = ctx.nextFreeCstNode++; + const rootNode = ctx.idToCstNode[index] = new RootCstNodeImpl(instr.input); + rootNode.astNode = typeof instr.astNodeId === 'number' ? ctx.idToAstNode[instr.astNodeId] as unknown as AstNode : undefined; + rootNode.root = rootNode; + ctx.cstStack = [rootNode]; + ctx.rootCstNodeId = index; + break; + } + case InstructionType.CompositeCstNode: { + const index = ctx.nextFreeCstNode++; + const compositeNode = ctx.idToCstNode[index] = new CompositeCstNodeImpl(); + compositeNode.grammarSource = ctx.elementToId.getKey(instr.elementId)!; + compositeNode.astNode = typeof instr.astNodeId === 'number' ? ctx.idToAstNode[instr.astNodeId] as unknown as AstNode : undefined; + compositeNode.root = ctx.idToCstNode[ctx.rootCstNodeId] as RootCstNode; + const current = ctx.cstStack[ctx.cstStack.length-1]; + current.content.push(compositeNode); + ctx.cstStack.push(compositeNode); + break; + } + case InstructionType.LeafCstNode: { + const index = ctx.nextFreeCstNode++; + const tokenType = this.grammarTokenTypeIdMap.getKey(instr.tokenTypeName)!; + const leafNode = ctx.idToCstNode[index] = new LeafCstNodeImpl(instr.tokenOffset, instr.tokenLength, instr.range, tokenType, instr.hidden); + leafNode.grammarSource = ctx.elementToId.getKey(instr.elementId)!; + leafNode.astNode = typeof instr.astNodeId === 'number' ? ctx.idToAstNode[instr.astNodeId] as unknown as AstNode : undefined; + leafNode.root = ctx.idToCstNode[ctx.rootCstNodeId] as RootCstNode; + const current = ctx.cstStack[ctx.cstStack.length-1]; + current.content.push(leafNode); + break; + } + case InstructionType.PopCstNode: { + ctx.cstStack.pop(); + break; + } + default: + assertUnreachable(instr); + } + return false; + } +} diff --git a/packages/langium/src/serializer/reassembler/index.ts b/packages/langium/src/serializer/reassembler/index.ts new file mode 100644 index 000000000..85b4afec4 --- /dev/null +++ b/packages/langium/src/serializer/reassembler/index.ts @@ -0,0 +1,8 @@ +/****************************************************************************** + * Copyright 2023 TypeFox GmbH + * This program and the accompanying materials are made available under the + * terms of the MIT License, which is available in the project root. + ******************************************************************************/ +export * from './AstAssemblerInstruction.js'; +export * from './AstDisassembler.js'; +export * from './AstReassembler.js'; diff --git a/packages/langium/src/services.ts b/packages/langium/src/services.ts index 288fa0d2f..fcb84b821 100644 --- a/packages/langium/src/services.ts +++ b/packages/langium/src/services.ts @@ -37,6 +37,7 @@ import type { IndexManager } from './workspace/index-manager.js'; import type { WorkspaceLock } from './workspace/workspace-lock.js'; import type { Hydrator } from './serializer/hydrator.js'; import type { WorkspaceManager } from './workspace/workspace-manager.js'; +import type { AstDisassembler, AstReassembler } from './serializer/reassembler/index.js'; /** * The services generated by `langium-cli` for a specific language. These are derived from the @@ -76,6 +77,8 @@ export type LangiumDefaultCoreServices = { ScopeComputation: ScopeComputation } serializer: { + AstDisassembler: AstDisassembler, + AstReassembler: AstReassembler, Hydrator: Hydrator JsonSerializer: JsonSerializer } diff --git a/packages/langium/src/utils/errors.ts b/packages/langium/src/utils/errors.ts index b46f6b278..607e81f29 100644 --- a/packages/langium/src/utils/errors.ts +++ b/packages/langium/src/utils/errors.ts @@ -15,3 +15,5 @@ export class ErrorWithLocation extends Error { export function assertUnreachable(_: never): never { throw new Error('Error! The input value was not handled.'); } + +export function assertType(value: unknown): asserts value is T {} diff --git a/packages/langium/test/parser/worker-thread-async-parser-with-beamer.test.ts b/packages/langium/test/parser/worker-thread-async-parser-with-beamer.test.ts new file mode 100644 index 000000000..ee8bea209 --- /dev/null +++ b/packages/langium/test/parser/worker-thread-async-parser-with-beamer.test.ts @@ -0,0 +1,207 @@ +/****************************************************************************** + * Copyright 2024 TypeFox GmbH + * This program and the accompanying materials are made available under the + * terms of the MIT License, which is available in the project root. + ******************************************************************************/ + +import { describe, expect, test } from 'vitest'; +import { WorkerThreadAsyncParser } from 'langium/node'; +import { createLangiumGrammarServices } from 'langium/grammar'; +import type { AstNode, Grammar, LangiumCoreServices, ParseResult, AstReassembler, AstReassemblerContext } from 'langium'; +import type { LangiumServices } from 'langium/lsp'; +import { EmptyFileSystem, GrammarUtils, CstUtils, GrammarAST, isOperationCancelled, Deferred, ParserWorker, BiMap } from 'langium'; +import { CancellationToken, CancellationTokenSource } from 'vscode-languageserver'; +import { fail } from 'node:assert'; +import { fileURLToPath } from 'node:url'; +import { Worker } from 'node:worker_threads'; + +class TestAsyncParser extends WorkerThreadAsyncParser { + protected reassembler: AstReassembler; + constructor(services: LangiumCoreServices) { + super(services, () => fileURLToPath(new URL('.', import.meta.url)) + '/worker-thread-beamer.js'); + this.reassembler = services.serializer.AstReassembler; + } + setThreadCount(threadCount: number): void { + this.threadCount = threadCount; + } + override async parse(text: string, cancelToken: CancellationToken): Promise> { + const worker = await this.acquireParserWorker(cancelToken); + const deferred = new Deferred>(); + let timeout: NodeJS.Timeout | undefined; + // If the cancellation token is requested, we wait for a certain time before terminating the worker. + // Since the cancellation token lives longer than the parsing process, we need to dispose the event listener. + // Otherwise, we might accidentally terminate the worker after the parsing process has finished. + const cancellation = cancelToken.onCancellationRequested(() => { + timeout = setTimeout(() => { + this.terminateWorker(worker); + }, this.terminationDelay); + }); + worker.parse(text).then(result => { + deferred.resolve(result as unknown as ParseResult); + }).catch(err => { + deferred.reject(err); + }).finally(() => { + cancellation.dispose(); + clearTimeout(timeout); + }); + return deferred.promise; + } + protected override createWorker(): ParserWorker { + const path = typeof this.workerPath === 'function' ? this.workerPath() : this.workerPath; + const worker = new Worker(path); + const parserWorker = new BeamingParserWorker(worker, this.reassembler); + return parserWorker; + } + +} + +class BeamingParserWorker extends ParserWorker { + constructor(worker: Worker, reassembler: AstReassembler) { + super( + (message) => worker.postMessage(message), + cb => { + const context: AstReassemblerContext = { + cstStack: [], + elementToId: new BiMap(), + idToAstNode: [], + idToCstNode: [], + lexerErrors: [], + nextFreeCstNode: 0, + parserErrors: [], + rootAstNodeId: -1, + rootCstNodeId: -1, + + }; + worker.on('message', (instr) => { + if(reassembler.reassemble(context, instr)) { + cb(reassembler.buildParseResult(context)); + } + }); + }, + cb => worker.on('error', cb), + () => worker.terminate() + ); + } + +} + +describe('WorkerThreadAsyncParser with Beamer', () => { + + test('BEAMER performs async parsing in parallel', async () => { + const services = getServices(); + const file = createLargeFile(10); + const asyncParser = services.parser.AsyncParser as TestAsyncParser; + asyncParser.setThreadCount(4); + const promises: Array>> = []; + for (let i = 0; i < 16; i++) { + promises.push(asyncParser.parse(file, CancellationToken.None)); + } + const result = await Promise.all(promises); + for (const parseResult of result) { + console.log(GrammarUtils.findNodeForProperty(parseResult.value.$cstNode, 'name')!.offset); + expect(parseResult.value.name).toBe('Test'); + expect(GrammarUtils.findNodeForProperty(parseResult.value.$cstNode, 'name')!.offset).toBe(8); + } + }, 20000); + + test('BEAMER async parsing can be cancelled', async () => { + const services = getServices(); + // This file should take a few seconds to parse + const file = createLargeFile(100000); + const asyncParser = services.parser.AsyncParser; + const cancellationTokenSource = new CancellationTokenSource(); + setTimeout(() => cancellationTokenSource.cancel(), 50); + const start = Date.now(); + try { + await asyncParser.parse(file, cancellationTokenSource.token); + fail('Parsing should have been cancelled'); + } catch (err) { + expect(isOperationCancelled(err)).toBe(true); + } + const end = Date.now(); + // The whole parsing process should have been successfully cancelled within a second + expect(end - start).toBeLessThan(1000); + }); + + test('BEAMER async parsing can be cancelled and then restarted', async () => { + const services = getServices(); + // This file should take a few seconds to parse + const file = createLargeFile(100000); + const asyncParser = services.parser.AsyncParser; + const cancellationTokenSource = new CancellationTokenSource(); + setTimeout(() => cancellationTokenSource.cancel(), 50); + try { + await asyncParser.parse(file, cancellationTokenSource.token); + fail('Parsing should have been cancelled'); + } catch (err) { + expect(isOperationCancelled(err)).toBe(true); + } + // Calling this method should recreate the worker and parse the file correctly + const result = await asyncParser.parse(createLargeFile(10), CancellationToken.None); + expect(result.value.name).toBe('Test'); + }); + + test('BEAMER async parsing yields correct CST', async () => { + const services = getServices(); + const file = createLargeFile(10); + const result = await services.parser.AsyncParser.parse(file, CancellationToken.None); + const index = file.indexOf('TestRule'); + // Assert that the CST can be found at all from the root node + // This indicates that the CST is correctly linked to itself + const node = CstUtils.findLeafNodeAtOffset(result.value.$cstNode!, index)!; + expect(node).toBeDefined(); + expect(node.text).toBe('TestRule0'); + // Assert that the CST node is correctly linked to its container elements + expect(node.container?.container).toBeDefined(); + expect(node.container!.container!.text).toBe('TestRule0: name="Hello";'); + // Assert that the CST node has a reference to the root + expect(node.root).toBeDefined(); + expect(node.root.fullText).toBe(file); + // Assert that the CST node has a reference to the correct AST node + const astNode = node?.astNode as GrammarAST.ParserRule; + expect(astNode).toBeDefined(); + expect(astNode.$type).toBe(GrammarAST.ParserRule); + expect(astNode.name).toBe('TestRule0'); + }); + + test('BEAMER parser errors are correctly transmitted', async () => { + const services = getServices(); + const file = 'grammar Test Rule: name="Hello" // missing semicolon'; + const result = await services.parser.AsyncParser.parse(file, CancellationToken.None); + expect(result.parserErrors).toHaveLength(1); + expect(result.parserErrors[0].name).toBe('MismatchedTokenException'); + expect(result.parserErrors[0]).toHaveProperty('previousToken'); + expect(result.parserErrors[0]).toHaveProperty('message', "Expecting token of type ';' but found ``."); + }); + + test.skip('BEAMER Check metrics of async parser', async () => { + const services = getServices(); + // This file should take a few seconds to parse + const file = createLargeFile(100_000); + const asyncParser = services.parser.AsyncParser; + const start = Date.now(); + const promise = asyncParser.parse(file, CancellationToken.None); + await promise; + const end = Date.now(); + console.log(end-start); + }, 100_000); + + function createLargeFile(size: number): string { + let result = 'grammar Test\n'; + for (let i = 0; i < size; i++) { + result += 'TestRule' + i + ': name="Hello";\n'; + } + return result; + } + + function getServices(): LangiumServices { + const services = createLangiumGrammarServices(EmptyFileSystem, undefined, { + parser: { + AsyncParser: (services) => new TestAsyncParser(services) + } + }).grammar; + // We usually only need one thread for testing + (services.parser.AsyncParser as TestAsyncParser).setThreadCount(1); + return services; + } +}); diff --git a/packages/langium/test/parser/worker-thread-async-parser.test.ts b/packages/langium/test/parser/worker-thread-async-parser-with-hydrator.test.ts similarity index 86% rename from packages/langium/test/parser/worker-thread-async-parser.test.ts rename to packages/langium/test/parser/worker-thread-async-parser-with-hydrator.test.ts index 1dabba00e..88fd86122 100644 --- a/packages/langium/test/parser/worker-thread-async-parser.test.ts +++ b/packages/langium/test/parser/worker-thread-async-parser-with-hydrator.test.ts @@ -13,10 +13,9 @@ import { EmptyFileSystem, GrammarUtils, CstUtils, GrammarAST, isOperationCancell import { CancellationToken, CancellationTokenSource } from 'vscode-languageserver'; import { fail } from 'node:assert'; import { fileURLToPath } from 'node:url'; - class TestAsyncParser extends WorkerThreadAsyncParser { constructor(services: LangiumCoreServices) { - super(services, () => fileURLToPath(new URL('.', import.meta.url)) + '/worker-thread.js'); + super(services, () => fileURLToPath(new URL('.', import.meta.url)) + '/worker-thread-hydrator.js'); } setThreadCount(threadCount: number): void { this.threadCount = threadCount; @@ -25,7 +24,7 @@ class TestAsyncParser extends WorkerThreadAsyncParser { describe('WorkerThreadAsyncParser', () => { - test('performs async parsing in parallel', async () => { + test('HYDRATOR performs async parsing in parallel', async () => { const services = getServices(); const file = createLargeFile(10); const asyncParser = services.parser.AsyncParser as TestAsyncParser; @@ -41,7 +40,7 @@ describe('WorkerThreadAsyncParser', () => { } }, 20000); - test('async parsing can be cancelled', async () => { + test('HYDRATOR async parsing can be cancelled', async () => { const services = getServices(); // This file should take a few seconds to parse const file = createLargeFile(100000); @@ -60,7 +59,7 @@ describe('WorkerThreadAsyncParser', () => { expect(end - start).toBeLessThan(1000); }); - test('async parsing can be cancelled and then restarted', async () => { + test('HYDRATOR async parsing can be cancelled and then restarted', async () => { const services = getServices(); // This file should take a few seconds to parse const file = createLargeFile(100000); @@ -78,7 +77,7 @@ describe('WorkerThreadAsyncParser', () => { expect(result.value.name).toBe('Test'); }); - test('async parsing yields correct CST', async () => { + test('HYDRATOR async parsing yields correct CST', async () => { const services = getServices(); const file = createLargeFile(10); const result = await services.parser.AsyncParser.parse(file, CancellationToken.None); @@ -101,7 +100,7 @@ describe('WorkerThreadAsyncParser', () => { expect(astNode.name).toBe('TestRule0'); }); - test('parser errors are correctly transmitted', async () => { + test('HYDRATOR parser errors are correctly transmitted', async () => { const services = getServices(); const file = 'grammar Test Rule: name="Hello" // missing semicolon'; const result = await services.parser.AsyncParser.parse(file, CancellationToken.None); @@ -111,6 +110,18 @@ describe('WorkerThreadAsyncParser', () => { expect(result.parserErrors[0]).toHaveProperty('message', "Expecting token of type ';' but found ``."); }); + test.skip('HYDRATOR Check metrics of async parser', async () => { + const services = getServices(); + // This file should take a few seconds to parse + const file = createLargeFile(100_000); + const asyncParser = services.parser.AsyncParser; + const start = Date.now(); + const promise = asyncParser.parse(file, CancellationToken.None); + await promise; + const end = Date.now(); + console.log(end-start); + }, 100_000); + function createLargeFile(size: number): string { let result = 'grammar Test\n'; for (let i = 0; i < size; i++) { diff --git a/packages/langium/test/parser/worker-thread-beamer.js b/packages/langium/test/parser/worker-thread-beamer.js new file mode 100644 index 000000000..44af1116e --- /dev/null +++ b/packages/langium/test/parser/worker-thread-beamer.js @@ -0,0 +1,20 @@ +/****************************************************************************** + * Copyright 2023 TypeFox GmbH + * This program and the accompanying materials are made available under the + * terms of the MIT License, which is available in the project root. + ******************************************************************************/ + +import { EmptyFileSystem } from 'langium'; +import { createLangiumGrammarServices } from 'langium/grammar'; +import { parentPort } from 'node:worker_threads'; + +const services = createLangiumGrammarServices(EmptyFileSystem).grammar; +const parser = services.parser.LangiumParser; +const disassembler = services.serializer.AstDisassembler; + +parentPort.on('message', text => { + const result = parser.parse(text); + for (const instr of disassembler.disassemble(result)) { + parentPort.postMessage(instr); + } +}); diff --git a/packages/langium/test/parser/worker-thread.js b/packages/langium/test/parser/worker-thread-hydrator.js similarity index 100% rename from packages/langium/test/parser/worker-thread.js rename to packages/langium/test/parser/worker-thread-hydrator.js