Skip to content

Commit

Permalink
Merge pull request #7 from whylabs/non-secret-config
Browse files Browse the repository at this point in the history
    Add additional columns to upload output, move config to function inputs
  • Loading branch information
naddeoa authored Oct 26, 2023
2 parents df8314e + 3a1b45a commit 57230b7
Show file tree
Hide file tree
Showing 20 changed files with 574 additions and 228 deletions.
26 changes: 17 additions & 9 deletions .github/workflows/workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,21 +44,25 @@ jobs:
run: make setup

- name: Check types
run: make lint
run: make lint VERSION=${{ needs.setup.outputs.short_sha }}

- name: Check formatting
run: make format
run: make format VERSION=${{ needs.setup.outputs.short_sha }}

- name: Run test
run: make test

- name: Update version
run: make version_py VERSION=${{ needs.setup.outputs.short_sha }}
run: make test VERSION=${{ needs.setup.outputs.short_sha }}

- name: Build
run: make all
run: make all VERSION=${{ needs.setup.outputs.short_sha }}

- name: Ensure dist/version.sha isn't "dev"
run: |
if grep -q "dev" dist/version.sha; then
echo "dist/version.sha contains 'dev'"
exit 1
fi
- name: Upload udf artifacts for upload later
- name: Upload udf artifacts
uses: actions/upload-artifact@v2
with:
name: udfs
Expand Down Expand Up @@ -137,26 +141,30 @@ jobs:
## Published S3 Paths
- Latest UDFs
- `s3://whylabs-snowflake-udfs/udfs/v1/latest/setup.sql`
- `s3://whylabs-snowflake-udfs/udfs/v1/latest/version.sha`
- `s3://whylabs-snowflake-udfs/udfs/v1/latest/whylogs_udf.py`
- `s3://whylabs-snowflake-udfs/udfs/v1/latest/whylabs_upload_udf.py`
- Version tagged UDFs
- `s3://whylabs-snowflake-udfs/udfs/v1/archive/${{ needs.setup.outputs.short_sha }}/setup.sql`
- `s3://whylabs-snowflake-udfs/udfs/v1/archive/${{ needs.setup.outputs.short_sha }}/version.sha`
- `s3://whylabs-snowflake-udfs/udfs/v1/archive/${{ needs.setup.outputs.short_sha }}/whylogs_udf.py`
- `s3://whylabs-snowflake-udfs/udfs/v1/archive/${{ needs.setup.outputs.short_sha }}/whylabs_upload_udf.py`
## Published HTTP paths
- Latest UDFs
- `https://whylabs-snowflake-udfs.s3.us-west-2.amazonaws.com/udfs/v1/latest/setup.sql`
- `https://whylabs-snowflake-udfs.s3.us-west-2.amazonaws.com/udfs/v1/latest/version.sha`
- `https://whylabs-snowflake-udfs.s3.us-west-2.amazonaws.com/udfs/v1/latest/whylogs_udf.py`
- `https://whylabs-snowflake-udfs.s3.us-west-2.amazonaws.com/udfs/v1/latest/whylabs_upload_udf.py`
- Version tagged UDFs
- `https://whylabs-snowflake-udfs.s3.us-west-2.amazonaws.com/udfs/v1/archive/${{ needs.setup.outputs.short_sha }}/setup.sql`
- `https://whylabs-snowflake-udfs.s3.us-west-2.amazonaws.com/udfs/v1/archive/${{ needs.setup.outputs.short_sha }}/version.sha`
- `https://whylabs-snowflake-udfs.s3.us-west-2.amazonaws.com/udfs/v1/archive/${{ needs.setup.outputs.short_sha }}/whylogs_udf.py`
- `https://whylabs-snowflake-udfs.s3.us-west-2.amazonaws.com/udfs/v1/archive/${{ needs.setup.outputs.short_sha }}/whylabs_upload_udf.py`
## Snowflake SQL
Create the latest functions in Snowflake like so. Make sure to see the [official doc page](https://docs.whylabs.ai/docs/integrations-snowflake) for all of the instructions. This snippet doesn't cover creating the required secrets and secret configuration.
Create the latest functions in Snowflake like so. Make sure to see the [official doc page](https://docs.whylabs.ai/docs/integrations-snowflake) for all of the instructions. This snippet doesn't cover creating the required secrets.
```sql
${{ steps.sql_snippet.outputs.sql }}
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
dist/
__pycache__
*.pyc
*.bin
63 changes: 49 additions & 14 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,29 +1,41 @@
project_name = udfs
build_dir = dist
outputs = $(build_dir)/whylogs_udf.py $(build_dir)/whylabs_upload_udf.py
src := $(shell find $(project_name)/ -name "*.py" -type f)
setup_sql = ./dist/setup.sql
random_alphanum=$(shell cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 8 | head -n 1)
output_udfs = $(build_dir)/whylogs_udf.py $(build_dir)/whylabs_upload_udf.py
outputs = $(output_udfs) $(build_dir)/version.sha $(setup_sql)
VERSION ?= dev

.PHONY: udfs lint format format-fix setup test help populate_demo_table all version_py ./udfs/version.py
ifndef VERSION
$(error VERSION is not set)
endif

.PHONY: udfs lint format format-fix setup test help populate_demo_table all version_py clean $(build_dir)/version.sha

default:help

all: $(project_name) ./dist/setup.sql ## Build the UDFs and the setup script
all: $(outputs) version_py ## Build the UDFs and the setup script

udfs: $(output_udfs)

udfs: $(outputs)
clean: ## Remove generated build files
rm -rf dist/

upload-dev: all ## Upload dev UDFs to the public bucket
@$(call i, Uploading UDFs to public dev bucket)
aws s3 sync "./$(build_dir)/" s3://whylabs-snowflake-udfs/udfs/dev/$(random_alphanum)/
@echo "Uploaded to s3://whylabs-snowflake-udfs/udfs/dev/$(random_alphanum)"
@echo "whylogs_udf: '@whylabs_udf_stage/dev/$(random_alphanum)/whylogs_udf.py'"
@echo "whylabs_upload_udf: '@whylabs_udf_stage/dev/$(random_alphanum)/whylogs_upload_udf.py'"

upload-dev-local: all ## Upload dev UDFs to the Snowflake account
@$(call i, Uploading dev mode UDFs to Snowflake @dev stage)
snowsql -c whylabs --query "create stage if not exists dev;"
snowsql -c whylabs --query "put file://./dist/*.py @dev/ auto_compress=false overwrite=true;"

$(setup_sql): build_dir ./sql/*.sql
$(setup_sql): $(build_dir) ./sql/*.sql
@$(call i, Generating the setup.sql file)
rm -f $(setup_sql) && touch $(setup_sql)

echo "-- Set up network rules" >> $(setup_sql)
Expand All @@ -41,39 +53,62 @@ $(setup_sql): build_dir ./sql/*.sql
./udfs/version.py:
poetry run python ./scripts/create_version.py $(VERSION) > ./udfs/version.py

$(build_dir)/version.sha: $(build_dir)
@$(call i, Generating the setup.sha file)
echo $(VERSION) > $(build_dir)/version.sha

version_py: ./udfs/version.py ## Generate the version python module

build_dir:
$(build_dir):
@$(call i, Generating the build dir)
mkdir -p $(build_dir)

$(build_dir)/whylogs_udf.py: udfs/whylogs_udf.py build_dir
$(build_dir)/whylogs_udf.py: lint format udfs/whylogs_udf.py $(build_dir)
@$(call i, Generating the whylogs_udf.py file)
poetry run python ./scripts/merger.py --entry udfs/whylogs_udf.py --output $(build_dir)/whylogs_udf.py
poetry run python ./scripts/remove_relative_imports.py $(build_dir)/whylogs_udf.py

$(build_dir)/whylabs_upload_udf.py: udfs/whylabs_upload_udf.py build_dir
$(build_dir)/whylabs_upload_udf.py: lint format udfs/whylabs_upload_udf.py $(build_dir)
@$(call i, Generating the whylabs_upload_udf.py file)
poetry run python ./scripts/merger.py --entry udfs/whylabs_upload_udf.py --output $(build_dir)/whylabs_upload_udf.py
poetry run python ./scripts/remove_relative_imports.py $(build_dir)/whylabs_upload_udf.py

lint: ## Check for type issues with mypy
lint: ./udfs/version.py ## Check for type issues with mypy
@$(call i, Linting with mypy)
poetry run mypy $(project_name)/

format: ## Check for formatting issues
poetry run black --check --line-length 120 $(project_name)
@$(call i, Formatting with black)
poetry run black --check --exclude version.py $(project_name)
poetry run autoflake --check --in-place --remove-unused-variables $(src)

format-fix: ## Fix formatting issues
poetry run black --line-length 120 $(project_name)
@$(call i, Formatting and fixing with black)
poetry run black --check --exclude version.py $(project_name)
poetry run black --exclude version.py $(project_name)
poetry run autoflake --in-place --remove-unused-variables $(src)

setup: ## Install dependencies with poetry
poetry install

test: ## Run unit tests
@$(call i, Running unit tests)
PYTHONPATH=. poetry run pytest

populate_demo_table: ## Use the data gen script to upload new data to the dummy table.
for i in $$(seq 1 100); do python ./generate-data.py | snowsql -c whylabs; done

help: ## Show this help message.
@echo 'usage: make [target] ...'
@echo
@echo 'targets:'
@egrep '^(.+)\:(.*) ##\ (.+)' ${MAKEFILE_LIST} | sed -s 's/:\(.*\)##/: ##/' | column -t -c 2 -s ':#'

define i
echo "\n\e[1;34m[INFO]$(1)\e[0m\n"
endef

define w
echo "\n\e[1;93m[WARN]$(1)\e[0m\n"
endef

define e
echo "\n\e[1;91m[ERROR]$(1)\e[0m\n"
endef
33 changes: 32 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ faker = "^19.6.2"
pinliner = {git = "https://github.com/Akrog/pinliner.git"}
pytest = "^7.4.2"
autoflake = "2.0.1"
astunparse = "^1.6.3"

[build-system]
requires = ["poetry-core"]
Expand Down
6 changes: 5 additions & 1 deletion scripts/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ def merge_recursive(filepath: str, base_dir: str, seen: Set[str]) -> str:


if __name__ == "__main__":
"""
The output of this script is a giant Python file that contains all the code from the relative import graph
of the --entry file. The relative imports do have to be removed from the giant file though, which is what
the remove_relative_imports.py script does.
"""
parser = argparse.ArgumentParser(description="Merge Python files into one")
parser.add_argument("--entry", required=True, help="Entry Python file to scan")
parser.add_argument("--output", required=True, help="Output Python file")
Expand All @@ -53,7 +58,6 @@ def merge_recursive(filepath: str, base_dir: str, seen: Set[str]) -> str:
seen: Set[str] = set()

merged_content: str = merge_recursive(entry_file, base_dir, seen)
merged_content = re.sub(r"^from \..*|^import \..*", "", merged_content, flags=re.M)

with open(output_file, "w") as f:
f.write(merged_content)
Expand Down
1 change: 1 addition & 0 deletions scripts/print_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@

print(view.to_pandas())
print(view._metadata)
print(view.dataset_timestamp)
12 changes: 12 additions & 0 deletions scripts/print_profile_bin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from whylogs.core.dataset_profile import DatasetProfileView
import base64
import sys

filepath = sys.argv[1]

# Read ./profile.base64
profile = open(filepath, "rb").read()
view = DatasetProfileView.deserialize(profile)

print(view.to_pandas())
print(view._metadata)
41 changes: 41 additions & 0 deletions scripts/remove_relative_imports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import ast
import astunparse
import sys

def remove_relative_imports(filename) -> None:
# Read the source file.
with open(filename, "r") as source:
tree = ast.parse(source.read(), filename)

# Function to check if the current node is a relative import.
def is_relative_import(node) -> bool:
return isinstance(node, ast.ImportFrom) and node.level != 0

def is_comment(node) -> bool:
return isinstance(node, ast.Expr) and isinstance(node.value, ast.Str)

# Remove the relative import statement nodes.
new_tree_body = [node for node in tree.body if not is_relative_import(node) and not is_comment(node)]
tree.body = new_tree_body

# Write the modified tree back to the source file.
with open(filename, "w") as source:
source.write(astunparse.unparse(tree))

def main() -> None:
# Check if the script received the right number of arguments.
if len(sys.argv) != 2:
print(f"Usage: {sys.argv[0]} filename")
sys.exit(1)

# The list of command line arguments passed to a Python script. argv[0] is the script name.
file_path = sys.argv[1]

try:
remove_relative_imports(file_path)
print(f"Removed all relative imports from '{file_path}'.")
except Exception as e:
print(f"An error occurred: {e}")

if __name__ == "__main__":
main()
18 changes: 12 additions & 6 deletions sql/create-udf.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,27 @@ create or replace function whylogs(data object)
)
language python
runtime_version = '3.10'
external_access_integrations = (whylabs_integration)
secrets = ('data_grouper_freq' = data_grouper_freq, 'segment_columns' = segment_columns)
packages = ('snowflake-snowpark-python', 'whylogs', 'pandas')
handler = 'whylogs_udf.handler'
imports = ('@whylabs_udf_stage/v1/latest/whylogs_udf.py')
;
;


create or replace function whylabs_upload(profile_view varchar, segment_partition varchar, segment varchar)
returns table (upload_result varchar)
create or replace function whylabs_upload(data object)
returns table (
dataset_id varchar,
result varchar,
error varchar,
dataset_timestamp int,
segment varchar
)
language python
runtime_version = '3.10'
external_access_integrations = (whylabs_upload_integration)
secrets = ('whylabs_api_key' = whylabs_api_key, 'whylabs_org_id' = whylabs_org_id, 'whylabs_dataset_id' = whylabs_dataset_id)
secrets = ('whylabs_api_key' = whylabs_api_key, 'whylabs_org_id' = whylabs_org_id)
packages = ('snowflake-snowpark-python', 'requests', 'whylogs', 'whylabs-client')
handler = 'whylabs_upload_udf.handler'
imports = ('@whylabs_udf_stage/v1/latest/whylabs_upload_udf.py')
;


Loading

0 comments on commit 57230b7

Please sign in to comment.