Skip to content

Commit

Permalink
Read workflow_settings.yaml (#1580)
Browse files Browse the repository at this point in the history
* Add base dir structure for reading workflow settings, some cleanup

* Add a basic workflow_settings reader

* Progress with loading YAML via require

* Working YAML reader for workflow settings

* Tidying

* Cleanup bits

* Fix relative path for requiring workflow settings

* Fix import orders and lint errors

* Stop exporting getWorkflowSettings

* Move TSLint comment locations

* Add explicit include for new compilation_sql BUILD file to cli/api

* Fix compilation sql visibility
  • Loading branch information
Ekrekr authored Nov 20, 2023
1 parent 83f9457 commit 14f6562
Show file tree
Hide file tree
Showing 16 changed files with 322 additions and 22 deletions.
1 change: 1 addition & 0 deletions cli/api/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ ts_library(
"//common/protos",
"//common/strings",
"//core",
"//core/compilation_sql",
"//protos:ts",
"//cli/vm:compile_loader",
"//sqlx:lexer",
Expand Down
2 changes: 1 addition & 1 deletion cli/vm/compile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ export function compile(compileConfig: dataform.ICompileConfig) {
resolve: (moduleName, parentDirName) =>
path.join(parentDirName, path.relative(parentDirName, compileConfig.projectDir), moduleName)
},
sourceExtensions: ["js", "sql", "sqlx"],
sourceExtensions: ["js", "sql", "sqlx", "yaml"],
compiler
});

Expand Down
61 changes: 55 additions & 6 deletions core/BUILD
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
load("//tools:ts_library.bzl", "ts_library")
load("//tools:expand_template.bzl", "expand_template")
load("//:version.bzl", "DF_VERSION")
load("//testing:index.bzl", "ts_test_suite")
load("//tools:node_modules.bzl", "node_modules")

package(default_visibility = ["//visibility:public"])

Expand All @@ -13,24 +15,71 @@ expand_template(
template = "version.ts.tmpl",
)

filegroup(
name = "files",
srcs = glob(["**/*.*"]) + [":version.ts"],
)

ts_library(
name = "core",
srcs = glob(["**/*.ts"]) + [":version.ts"],
srcs = [
"assertion.ts",
"column_descriptors.ts",
"common.ts",
"compilers.ts",
"declaration.ts",
"index.ts",
"main.ts",
"operation.ts",
"session.ts",
"table.ts",
"targets.ts",
"tasks.ts",
"test.ts",
"utils.ts",
"workflow_settings.ts",
":version.ts",
],
deps = [
"//common/errors",
"//common/protos",
"//common/strings",
"//core/compilation_sql",
"//protos:ts",
"//sqlx:lexer",
"@npm//@types/fs-extra",
"@npm//@types/js-yaml",
"@npm//@types/node",
"@npm//@types/semver",
"@npm//fs-extra",
"@npm//js-yaml",
"@npm//protobufjs",
"@npm//semver",
"@npm//tarjan-graph",
],
)

ts_test_suite(
name = "tests",
srcs = [
"main_test.ts",
],
data = [
":node_modules",
],
deps = [
":core",
"//common/protos",
"//protos:ts",
"//testing",
"//tests/utils",
"@npm//@types/chai",
"@npm//@types/fs-extra",
"@npm//@types/node",
"@npm//chai",
"@npm//fs-extra",
"@npm//vm2",
],
)

node_modules(
name = "node_modules",
deps = [
"//packages/@dataform/core:package_tar",
],
)
13 changes: 13 additions & 0 deletions core/compilation_sql/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
load("//tools:ts_library.bzl", "ts_library")

package(default_visibility = ["//visibility:public"])

ts_library(
name = "compilation_sql",
srcs = [
"index.ts",
],
deps = [
"//protos:ts",
]
)
17 changes: 15 additions & 2 deletions core/compilers.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,23 @@
import { load as loadYaml, YAMLException } from "js-yaml";

import * as utils from "df/core/utils";
import { SyntaxTreeNode, SyntaxTreeNodeType } from "df/sqlx/lexer";

export function compile(code: string, path: string) {
export function compile(code: string, path: string): string {
if (path.endsWith(".sqlx")) {
return compileSqlx(SyntaxTreeNode.create(code), path);
}
if (path.endsWith(".yaml")) {
try {
const yamlAsJson = JSON.stringify(loadYaml(code));
return `exports.asJson = () => (${yamlAsJson})`;
} catch (e) {
if (e instanceof YAMLException) {
throw Error(`${path} is not a valid YAML file: ${e}`);
}
throw e;
}
}
return code;
}

Expand All @@ -31,7 +44,7 @@ export function extractJsBlocks(code: string): { sql: string; js: string } {
};
}

function compileSqlx(rootNode: SyntaxTreeNode, path: string) {
function compileSqlx(rootNode: SyntaxTreeNode, path: string): string {
const { config, js, sql, incremental, preOperations, postOperations, inputs } = extractSqlxParts(
rootNode
);
Expand Down
3 changes: 2 additions & 1 deletion core/main.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { decode64, encode64 } from "df/common/protos";
import { Session } from "df/core/session";
import * as utils from "df/core/utils";
import { readWorkflowSettings } from "df/core/workflow_settings";
import { dataform } from "df/protos/ts";

/**
Expand All @@ -23,7 +24,7 @@ export function main(coreExecutionRequest: Uint8Array | string): Uint8Array | st
const compileRequest = request.compile;

// Read the project config from the root of the project.
const originalProjectConfig = require("dataform.json");
const originalProjectConfig = readWorkflowSettings();

const projectConfigOverride = compileRequest.compileConfig.projectConfigOverride ?? {};

Expand Down
159 changes: 159 additions & 0 deletions core/main_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import { expect } from "chai";
import * as fs from "fs-extra";
import * as path from "path";
import { CompilerFunction, NodeVM } from "vm2";

import { decode64, encode64 } from "df/common/protos";
import { compile } from "df/core/compilers";
import { dataform } from "df/protos/ts";
import { suite, test } from "df/testing";
import { TmpDirFixture } from "df/testing/fixtures";
import { asPlainObject } from "df/tests/utils";

const VALID_WORKFLOW_SETTINGS_YAML = `
warehouse: bigquery
defaultDatabase: dataform
`;

const VALID_DATAFORM_JSON = `
{
"warehouse": "bigquery",
"defaultDatabase": "dataform"
}
`;

suite("@dataform/core", ({ afterEach }) => {
const tmpDirFixture = new TmpDirFixture(afterEach);

suite("workflow settings", () => {
test(`main succeeds when a valid workflow_settings.yaml is present`, () => {
const projectDir = tmpDirFixture.createNewTmpDir();
// tslint:disable-next-line: tsr-detect-non-literal-fs-filename
fs.writeFileSync(
path.join(projectDir, "workflow_settings.yaml"),
VALID_WORKFLOW_SETTINGS_YAML
);
const coreExecutionRequest = dataform.CoreExecutionRequest.create({
compile: { compileConfig: { projectDir } }
});

const result = runMainInVm(coreExecutionRequest);

expect(asPlainObject(result.compile.compiledGraph.projectConfig)).deep.equals(
asPlainObject({
warehouse: "bigquery",
defaultDatabase: "dataform"
})
);
});

// dataform.json for workflow settings is deprecated, but still currently supported.
test(`main succeeds when a valid dataform.json is present`, () => {
const projectDir = tmpDirFixture.createNewTmpDir();
// tslint:disable-next-line: tsr-detect-non-literal-fs-filename
fs.writeFileSync(path.join(projectDir, "dataform.json"), VALID_DATAFORM_JSON);
const coreExecutionRequest = dataform.CoreExecutionRequest.create({
compile: { compileConfig: { projectDir } }
});

const result = runMainInVm(coreExecutionRequest);

expect(asPlainObject(result.compile.compiledGraph.projectConfig)).deep.equals(
asPlainObject({
warehouse: "bigquery",
defaultDatabase: "dataform"
})
);
});

test(`main fails when no workflow settings file is present`, () => {
const projectDir = tmpDirFixture.createNewTmpDir();
const coreExecutionRequest = dataform.CoreExecutionRequest.create({
compile: { compileConfig: { projectDir } }
});

expect(() => runMainInVm(coreExecutionRequest)).to.throw(
"Failed to resolve workflow_settings.yaml"
);
});

test(`main fails when both workflow settings and dataform.json files are present`, () => {
const projectDir = tmpDirFixture.createNewTmpDir();
// tslint:disable-next-line: tsr-detect-non-literal-fs-filename
fs.writeFileSync(path.join(projectDir, "dataform.json"), VALID_DATAFORM_JSON);
// tslint:disable-next-line: tsr-detect-non-literal-fs-filename
fs.writeFileSync(
path.join(projectDir, "workflow_settings.yaml"),
VALID_WORKFLOW_SETTINGS_YAML
);
const coreExecutionRequest = dataform.CoreExecutionRequest.create({
compile: { compileConfig: { projectDir } }
});

expect(() => runMainInVm(coreExecutionRequest)).to.throw(
"dataform.json has been deprecated and cannot be defined alongside workflow_settings.yaml"
);
});

test(`main fails when workflow_settings.yaml is an invalid yaml file`, () => {
const projectDir = tmpDirFixture.createNewTmpDir();
// tslint:disable-next-line: tsr-detect-non-literal-fs-filename
fs.writeFileSync(path.join(projectDir, "workflow_settings.yaml"), "&*19132sdS:asd:");
const coreExecutionRequest = dataform.CoreExecutionRequest.create({
compile: { compileConfig: { projectDir } }
});

expect(() => runMainInVm(coreExecutionRequest)).to.throw(
"workflow_settings.yaml contains invalid fields"
);
});

test(`main fails when dataform.json is an invalid json file`, () => {
const projectDir = tmpDirFixture.createNewTmpDir();
// tslint:disable-next-line: tsr-detect-non-literal-fs-filename
fs.writeFileSync(path.join(projectDir, "dataform.json"), '{keyWithNoQuotes: "validValue"}');
const coreExecutionRequest = dataform.CoreExecutionRequest.create({
compile: { compileConfig: { projectDir } }
});

expect(() => runMainInVm(coreExecutionRequest)).to.throw(
"Unexpected token k in JSON at position 1"
);
});
});
});

// A VM is needed when running main because Node functions like `require` are overridden.
function runMainInVm(coreExecutionRequest: dataform.CoreExecutionRequest) {
const projectDir = coreExecutionRequest.compile.compileConfig.projectDir;

// Copy over the build Dataform Core that is set up as a node_modules directory.
fs.copySync(`${process.cwd()}/core/node_modules`, `${projectDir}/node_modules`);

const compiler = compile as CompilerFunction;
// Then use vm2's native compiler integration to apply the compiler to files.
const nodeVm = new NodeVM({
// Inheriting the console makes console.logs show when tests are running, which is useful for
// debugging.
console: "inherit",
wrapper: "none",
require: {
builtin: ["path"],
context: "sandbox",
external: true,
root: projectDir,
resolve: (moduleName, parentDirName) =>
path.join(parentDirName, path.relative(parentDirName, projectDir), moduleName)
},
sourceExtensions: ["js", "sql", "sqlx", "yaml"],
compiler
});

const encodedCoreExecutionRequest = encode64(dataform.CoreExecutionRequest, coreExecutionRequest);
const vmIndexFileName = path.resolve(path.join(projectDir, "index.js"));
const encodedCoreExecutionResponse = nodeVm.run(
`return require("@dataform/core").main("${encodedCoreExecutionRequest}")`,
vmIndexFileName
);
return decode64(dataform.CoreExecutionResponse, encodedCoreExecutionResponse);
}
46 changes: 46 additions & 0 deletions core/workflow_settings.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import { dataform } from "df/protos/ts";

export function readWorkflowSettings(): dataform.ProjectConfig {
const workflowSettingsYaml = maybeRequire("workflow_settings.yaml");
// `dataform.json` is deprecated; new versions of Dataform Core prefer `workflow_settings.yaml`.
const dataformJson = maybeRequire("dataform.json");

if (workflowSettingsYaml && dataformJson) {
throw Error(
"dataform.json has been deprecated and cannot be defined alongside workflow_settings.yaml"
);
}

if (workflowSettingsYaml) {
const workflowSettingsAsJson = workflowSettingsYaml.asJson();
verifyWorkflowSettingsAsJson(workflowSettingsAsJson);
return dataform.ProjectConfig.create(workflowSettingsAsJson);
}

if (dataformJson) {
verifyWorkflowSettingsAsJson(dataformJson);
return dataform.ProjectConfig.create(dataformJson);
}

throw Error("Failed to resolve workflow_settings.yaml");
}

function verifyWorkflowSettingsAsJson(workflowSettingsAsJson?: object) {
// TODO(ekrekr): Implement a protobuf field validator. Protobufjs's verify method is not fit for
// purpose.
if (!workflowSettingsAsJson) {
throw Error("workflow_settings.yaml contains invalid fields");
}
}

function maybeRequire(file: string): any {
try {
// tslint:disable-next-line: tsr-detect-non-literal-require
return require(file);
} catch (e) {
if (e instanceof SyntaxError) {
throw e;
}
return undefined;
}
}
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
"@types/glob": "^8.1.0",
"@types/google-protobuf": "^3.2.7",
"@types/js-beautify": "^1.8.1",
"@types/js-yaml": "^4.0.5",
"@types/json-stable-stringify": "^1.0.32",
"@types/long": "^4.0.0",
"@types/moo": "^0.5.0",
Expand Down Expand Up @@ -71,6 +72,7 @@
"grpc-web-client": "^0.5.0",
"handy-redis": "^1.8.3",
"js-beautify": "^1.10.2",
"js-yaml": "^4.1.0",
"jsdoc": "^3.6.11",
"json-stable-stringify": "^1.0.1",
"long": "^4.0.0",
Expand Down
1 change: 1 addition & 0 deletions packages/@dataform/core/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ ts_library(
)

externals = [
"js-yaml",
"protobufjs",
"tarjan-graph",
"semver",
Expand Down
Loading

0 comments on commit 14f6562

Please sign in to comment.