From b2b1cbef85b174bff576131ebf04915a1a7237f2 Mon Sep 17 00:00:00 2001 From: Johannes Meier Date: Fri, 25 Oct 2024 13:14:18 +0200 Subject: [PATCH] Extend the API of the validation registry for more performant custom validations (#1614) * new API for registering set-up/tear-down logic which is executed once before/after validating an AST * test cases for the new validation API * improvements according to the review: comments regarding stateful validation, typed and simplified test cases * use get instead of getter, according to the review --- .../src/validation/document-validator.ts | 25 ++- .../src/validation/validation-registry.ts | 112 +++++++++++-- .../validation/document-validator.test.ts | 156 +++++++++++++++++- 3 files changed, 272 insertions(+), 21 deletions(-) diff --git a/packages/langium/src/validation/document-validator.ts b/packages/langium/src/validation/document-validator.ts index 7e3a487a3..fb3754411 100644 --- a/packages/langium/src/validation/document-validator.ts +++ b/packages/langium/src/validation/document-validator.ts @@ -184,6 +184,22 @@ export class DefaultDocumentValidator implements DocumentValidator { validationItems.push(this.toDiagnostic(severity, message, info)); }; + await this.validateAstBefore(rootNode, options, acceptor, cancelToken); + await this.validateAstNodes(rootNode, options, acceptor, cancelToken); + await this.validateAstAfter(rootNode, options, acceptor, cancelToken); + + return validationItems; + } + + protected async validateAstBefore(rootNode: AstNode, options: ValidationOptions, acceptor: ValidationAcceptor, cancelToken = CancellationToken.None): Promise { + const checksBefore = this.validationRegistry.checksBefore; + for (const checkBefore of checksBefore) { + await interruptAndCheck(cancelToken); + await checkBefore(rootNode, acceptor, options.categories ?? [], cancelToken); + } + } + + protected async validateAstNodes(rootNode: AstNode, options: ValidationOptions, acceptor: ValidationAcceptor, cancelToken = CancellationToken.None): Promise { await Promise.all(streamAst(rootNode).map(async node => { await interruptAndCheck(cancelToken); const checks = this.validationRegistry.getChecks(node.$type, options.categories); @@ -191,7 +207,14 @@ export class DefaultDocumentValidator implements DocumentValidator { await check(node, acceptor, cancelToken); } })); - return validationItems; + } + + protected async validateAstAfter(rootNode: AstNode, options: ValidationOptions, acceptor: ValidationAcceptor, cancelToken = CancellationToken.None): Promise { + const checksAfter = this.validationRegistry.checksAfter; + for (const checkAfter of checksAfter) { + await interruptAndCheck(cancelToken); + await checkAfter(rootNode, acceptor, options.categories ?? [], cancelToken); + } } protected toDiagnostic(severity: ValidationSeverity, message: string, info: DiagnosticInfo): Diagnostic { diff --git a/packages/langium/src/validation/validation-registry.ts b/packages/langium/src/validation/validation-registry.ts index 5c3d7acc1..8f7dc8a9a 100644 --- a/packages/langium/src/validation/validation-registry.ts +++ b/packages/langium/src/validation/validation-registry.ts @@ -5,15 +5,16 @@ ******************************************************************************/ import type { CodeDescription, DiagnosticRelatedInformation, DiagnosticTag, integer, Range } from 'vscode-languageserver-types'; -import type { CancellationToken } from '../utils/cancellation.js'; +import { assertUnreachable } from '../index.js'; import type { LangiumCoreServices } from '../services.js'; import type { AstNode, AstReflection, Properties } from '../syntax-tree.js'; -import type { MaybePromise } from '../utils/promise-utils.js'; -import type { Stream } from '../utils/stream.js'; -import type { DocumentSegment } from '../workspace/documents.js'; +import type { CancellationToken } from '../utils/cancellation.js'; import { MultiMap } from '../utils/collections.js'; +import type { MaybePromise } from '../utils/promise-utils.js'; import { isOperationCancelled } from '../utils/promise-utils.js'; +import type { Stream } from '../utils/stream.js'; import { stream } from '../utils/stream.js'; +import type { DocumentSegment } from '../workspace/documents.js'; export type DiagnosticInfo> = { /** The AST node to which the diagnostic is attached. */ @@ -63,6 +64,20 @@ export type ValidationAcceptor = (severity: ValidationSeverit export type ValidationCheck = (node: T, accept: ValidationAcceptor, cancelToken: CancellationToken) => MaybePromise; +/** + * A utility type for describing functions which will be called once before or after all the AstNodes of an AST/Langium document are validated. + * + * The AST is represented by its root AstNode. + * + * The given validation acceptor helps to report some early or lately detected issues. + * + * The 'categories' indicate, which validation categories are executed for all the AstNodes. + * This helps to tailor the preparations/tear-down logic to the actually executed checks on the nodes. + * + * It is recommended to support interrupts during long-running logic with 'interruptAndCheck(cancelToken)'. + */ +export type ValidationPreparation = (rootNode: AstNode, accept: ValidationAcceptor, categories: ValidationCategory[], cancelToken: CancellationToken) => MaybePromise; + /** * A utility type for associating non-primitive AST types to corresponding validation checks. For example: * @@ -110,6 +125,9 @@ export class ValidationRegistry { private readonly entries = new MultiMap(); private readonly reflection: AstReflection; + private entriesBefore: ValidationPreparation[] = []; + private entriesAfter: ValidationPreparation[] = []; + constructor(services: LangiumCoreServices) { this.reflection = services.shared.AstReflection; } @@ -142,28 +160,34 @@ export class ValidationRegistry { category }; this.addEntry(type, entry); + } else { + assertUnreachable(callbacks); } } } protected wrapValidationException(check: ValidationCheck, thisObj: unknown): ValidationCheck { return async (node, accept, cancelToken) => { - try { - await check.call(thisObj, node, accept, cancelToken); - } catch (err) { - if (isOperationCancelled(err)) { - throw err; - } - console.error('An error occurred during validation:', err); - const message = err instanceof Error ? err.message : String(err); - if (err instanceof Error && err.stack) { - console.error(err.stack); - } - accept('error', 'An error occurred during validation: ' + message, { node }); - } + await this.handleException(() => check.call(thisObj, node, accept, cancelToken), 'An error occurred during validation', accept, node); }; } + protected async handleException(functionality: () => MaybePromise, messageContext: string, accept: ValidationAcceptor, node: AstNode): Promise { + try { + await functionality(); + } catch (err) { + if (isOperationCancelled(err)) { + throw err; + } + console.error(`${messageContext}:`, err); + if (err instanceof Error && err.stack) { + console.error(err.stack); + } + const messageDetails = err instanceof Error ? err.message : String(err); + accept('error', `${messageContext}: ${messageDetails}`, { node }); + } + } + protected addEntry(type: string, entry: ValidationCheckEntry): void { if (type === 'AstNode') { this.entries.add('AstNode', entry); @@ -183,4 +207,58 @@ export class ValidationRegistry { return checks.map(entry => entry.check); } + /** + * Register logic which will be executed once before validating all the nodes of an AST/Langium document. + * This helps to prepare or initialize some information which are required or reusable for the following checks on the AstNodes. + * + * As an example, for validating unique fully-qualified names of nodes in the AST, + * here the map for mapping names to nodes could be established. + * During the usual checks on the nodes, they are put into this map with their name. + * + * Note that this approach makes validations stateful, which is relevant e.g. when cancelling the validation. + * Therefore it is recommended to clear stored information + * _before_ validating an AST to validate each AST unaffected from other ASTs + * AND _after_ validating the AST to free memory by information which are no longer used. + * + * @param checkBefore a set-up function which will be called once before actually validating an AST + * @param thisObj Optional object to be used as `this` when calling the validation check functions. + */ + registerBeforeDocument(checkBefore: ValidationPreparation, thisObj: ThisParameterType = this): void { + this.entriesBefore.push(this.wrapPreparationException(checkBefore, 'An error occurred during set-up of the validation', thisObj)); + } + + /** + * Register logic which will be executed once after validating all the nodes of an AST/Langium document. + * This helps to finally evaluate information which are collected during the checks on the AstNodes. + * + * As an example, for validating unique fully-qualified names of nodes in the AST, + * here the map with all the collected nodes and their names is checked + * and validation hints are created for all nodes with the same name. + * + * Note that this approach makes validations stateful, which is relevant e.g. when cancelling the validation. + * Therefore it is recommended to clear stored information + * _before_ validating an AST to validate each AST unaffected from other ASTs + * AND _after_ validating the AST to free memory by information which are no longer used. + * + * @param checkBefore a set-up function which will be called once before actually validating an AST + * @param thisObj Optional object to be used as `this` when calling the validation check functions. + */ + registerAfterDocument(checkAfter: ValidationPreparation, thisObj: ThisParameterType = this): void { + this.entriesAfter.push(this.wrapPreparationException(checkAfter, 'An error occurred during tear-down of the validation', thisObj)); + } + + protected wrapPreparationException(check: ValidationPreparation, messageContext: string, thisObj: unknown): ValidationPreparation { + return async (rootNode, accept, categories, cancelToken) => { + await this.handleException(() => check.call(thisObj, rootNode, accept, categories, cancelToken), messageContext, accept, rootNode); + }; + } + + get checksBefore(): ValidationPreparation[] { + return this.entriesBefore; + } + + get checksAfter(): ValidationPreparation[] { + return this.entriesAfter; + } + } diff --git a/packages/langium/test/validation/document-validator.test.ts b/packages/langium/test/validation/document-validator.test.ts index d9804637a..2e3f468fa 100644 --- a/packages/langium/test/validation/document-validator.test.ts +++ b/packages/langium/test/validation/document-validator.test.ts @@ -4,12 +4,14 @@ * terms of the MIT License, which is available in the project root. ******************************************************************************/ -import type { AstNode, ValidationChecks } from 'langium'; +import { AstUtils } from 'langium'; +import type { AstNode, ValidationAcceptor, ValidationChecks } from 'langium'; +import { createServicesForGrammar } from 'langium/grammar'; +import type { LangiumServices } from 'langium/lsp'; import type { ValidationResult } from 'langium/test'; +import { validationHelper } from 'langium/test'; import { beforeAll, describe, expect, test } from 'vitest'; import { Position, Range } from 'vscode-languageserver'; -import { createServicesForGrammar } from 'langium/grammar'; -import { validationHelper } from 'langium/test'; // Related to https://github.com/eclipse-langium/langium/issues/571 describe('Parser error is thrown on resynced token with NaN position', () => { @@ -110,3 +112,151 @@ describe('Generic `AstNode` validation applies to all nodes', () => { }); }); + +describe('Register Before/AfterDocument logic for validations with state', () => { + + // the grammar + const grammar = `grammar NestedNamedNodes + entry Model: + (elements+=NamedNode)*; + + NamedNode: + name=ID '{' (children+=NamedNode)* '}'; + + hidden terminal WS: /\\s+/; + terminal ID: /[_a-zA-Z][\\w_]*/; + `; + + // AST types derived from the grammar + type NestedNamedNodesAstTypes = { + Model: Model; + NamedNode: NamedNode; + } + interface Model extends AstNode { + $type: 'Model'; + elements: NamedNode[]; + } + interface NamedNode extends AstNode { + $type: 'NamedNode'; + $container: Model | NamedNode; + name: string; + children: NamedNode[]; + } + + // utilities + let validate: (input: string) => Promise>; + let services: LangiumServices; + + function getFQN(node: AstNode): string { + if ('name' in node && node.$container) { + const parentName = getFQN(node.$container); + if (parentName) { + return parentName + '.' + node.name; + } + return '' + node.name; + } + return ''; + } + + function rememberNamedNode(child: NamedNode, fqnMap: Map): void { + const fqn = getFQN(child); + let list = fqnMap.get(fqn); + if (!list) { + list = []; + fqnMap.set(fqn, list); + } + list.push(child); + } + + function checkForDuplicates(fqnMap: Map, accept: ValidationAcceptor): void { + for (const [key, value] of fqnMap.entries()) { + if (value.length >= 2) { + value.forEach(child => accept('error', `The FQN '${key}' is not unique.`, { node: child })); + } + } + } + + // 1. realize validation in state-less way + function registerValidationForRootElement() { + const validationChecks: ValidationChecks = { + // do the expensive validation once on the root Model only + Model: (node, accept) => { + const fqnMap = new Map(); + // collect the FQN of all nodes + // !! This 'streamAllContents' is saved in the alternative version of the validation !! + AstUtils.streamAllContents(node).forEach(child => { + rememberNamedNode(child as NamedNode, fqnMap); + }); + // check for duplicates + checkForDuplicates(fqnMap, accept); + }, + }; + services.validation.ValidationRegistry.register(validationChecks); + } + + // 2. realize validation without an additional AST traversal by exploiting the stateful validation approach + function registerValidationBeforeAfter() { + const fqnMap = new Map(); // this is the new state: remember all NamedNode nodes, classified by their name + services.validation.ValidationRegistry.registerBeforeDocument((_rootNode, _accept, _categories) => { + fqnMap.clear(); // clear everything to be sure when starting to validate an AST + }); + services.validation.ValidationRegistry.register(>{ + // register the named nodes in the map (but don't validate/check them now) + NamedNode: (node, _accept) => { + // Streaming the whole AST is not required with this approach, since the streaming is already done by the DocumentValidator! + rememberNamedNode(node, fqnMap); + }, + }); + services.validation.ValidationRegistry.registerAfterDocument((_rootNode, accept, _categories) => { + // check for duplicates after all checks for the single nodes of the AST are done + checkForDuplicates(fqnMap, accept); + fqnMap.clear(); // free memory afterwards + }); + } + + // test cases to ensure, that both approaches produce the same validation hints + describe('Using the stateless validation', () => { + beforeAll(async () => { + services = await createServicesForGrammar({ + grammar + }); + validate = validationHelper(services); + registerValidationForRootElement(); + }); + + test('Children with same name on same level (stateless)', async () => { + const validationResult = await validate('A { B{} C{} B{} }'); + const diagnostics = validationResult.diagnostics; + expect(diagnostics).toHaveLength(2); + }); + + test('Nested Children with same name (stateless)', async () => { + const validationResult = await validate('A { A{ A{} } }'); + const diagnostics = validationResult.diagnostics; + expect(diagnostics).toHaveLength(0); + }); + }); + + describe('Using the stateful validation', () => { + beforeAll(async () => { + services = await createServicesForGrammar({ + grammar + }); + validate = validationHelper(services); + registerValidationBeforeAfter(); + }); + + test('Children with same name on same level (with state)', async () => { + const validationResult = await validate('A { B{} C{} B{} }'); + const diagnostics = validationResult.diagnostics; + expect(diagnostics).toHaveLength(2); + }); + + test('Nested Children with same name (with state)', async () => { + const validationResult = await validate('A { A{ A{} } }'); + const diagnostics = validationResult.diagnostics; + expect(diagnostics).toHaveLength(0); + }); + }); + +});