From a5a48d6d9f8244d003df238d01d8c17496f21d78 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Tue, 2 Mar 2021 09:23:22 -0500 Subject: [PATCH 01/41] User / project crud operations with fastapi --- workers/cs_workers/models/clients/job.py | 11 +- workers/cs_workers/models/clients/server.py | 25 +--- workers/cs_workers/services/api/__init__.py | 0 workers/cs_workers/services/api/alembic.ini | 85 ++++++++++++ .../cs_workers/services/api/alembic/README | 1 + .../cs_workers/services/api/alembic/env.py | 86 ++++++++++++ .../services/api/alembic/script.py.mako | 24 ++++ .../api/alembic/versions/6bc6aaccc77a_init.py | 80 +++++++++++ workers/cs_workers/services/api/database.py | 12 ++ .../cs_workers/services/api/dependencies.py | 62 +++++++++ workers/cs_workers/services/api/main.py | 23 ++++ workers/cs_workers/services/api/models.py | 56 ++++++++ .../services/api/routers/__init__.py | 0 .../services/api/routers/deployments.py | 0 .../cs_workers/services/api/routers/login.py | 48 +++++++ .../services/api/routers/projects.py | 42 ++++++ .../services/api/routers/simulations.py | 0 .../cs_workers/services/api/routers/users.py | 77 +++++++++++ workers/cs_workers/services/api/schemas.py | 89 +++++++++++++ .../services/api/scripts/create_super_user.py | 31 +++++ workers/cs_workers/services/api/security.py | 34 +++++ workers/cs_workers/services/api/settings.py | 63 +++++++++ .../cs_workers/services/api/tests/__init__.py | 1 + .../cs_workers/services/api/tests/conftest.py | 125 ++++++++++++++++++ .../services/api/tests/test_projects.py | 85 ++++++++++++ .../services/api/tests/test_users.py | 66 +++++++++ .../cs_workers/services/api/tests/utils.py | 7 + workers/cs_workers/services/scheduler.py | 13 +- 28 files changed, 1117 insertions(+), 29 deletions(-) create mode 100644 workers/cs_workers/services/api/__init__.py create mode 100644 workers/cs_workers/services/api/alembic.ini create mode 100644 workers/cs_workers/services/api/alembic/README create mode 100644 workers/cs_workers/services/api/alembic/env.py create mode 100644 workers/cs_workers/services/api/alembic/script.py.mako create mode 100644 workers/cs_workers/services/api/alembic/versions/6bc6aaccc77a_init.py create mode 100644 workers/cs_workers/services/api/database.py create mode 100644 workers/cs_workers/services/api/dependencies.py create mode 100644 workers/cs_workers/services/api/main.py create mode 100644 workers/cs_workers/services/api/models.py create mode 100644 workers/cs_workers/services/api/routers/__init__.py create mode 100644 workers/cs_workers/services/api/routers/deployments.py create mode 100644 workers/cs_workers/services/api/routers/login.py create mode 100644 workers/cs_workers/services/api/routers/projects.py create mode 100644 workers/cs_workers/services/api/routers/simulations.py create mode 100644 workers/cs_workers/services/api/routers/users.py create mode 100644 workers/cs_workers/services/api/schemas.py create mode 100644 workers/cs_workers/services/api/scripts/create_super_user.py create mode 100644 workers/cs_workers/services/api/security.py create mode 100644 workers/cs_workers/services/api/settings.py create mode 100644 workers/cs_workers/services/api/tests/__init__.py create mode 100644 workers/cs_workers/services/api/tests/conftest.py create mode 100644 workers/cs_workers/services/api/tests/test_projects.py create mode 100644 workers/cs_workers/services/api/tests/test_users.py create mode 100644 workers/cs_workers/services/api/tests/utils.py diff --git a/workers/cs_workers/models/clients/job.py b/workers/cs_workers/models/clients/job.py index de36f7c9..f1de2e0e 100644 --- a/workers/cs_workers/models/clients/job.py +++ b/workers/cs_workers/models/clients/job.py @@ -7,6 +7,7 @@ from kubernetes import client as kclient, config as kconfig from cs_workers.utils import clean, redis_conn_from_env +from cs_workers.models.secrets import ModelSecrets redis_conn = dict( username="scheduler", @@ -55,8 +56,8 @@ def env(self, owner, title, config): safeowner = clean(owner) safetitle = clean(title) envs = [ - kclient.V1EnvVar("OWNER", config["owner"]), - kclient.V1EnvVar("TITLE", config["title"]), + kclient.V1EnvVar("OWNER", owner), + kclient.V1EnvVar("TITLE", title), kclient.V1EnvVar("EXP_TASK_TIME", str(config["exp_task_time"])), ] for sec in [ @@ -76,7 +77,9 @@ def env(self, owner, title, config): ) ) - for secret in self.model_config._list_secrets(config): + for secret in ModelSecrets( + owner=owner, title=title, project=self.project + ).list(): envs.append( kclient.V1EnvVar( name=secret, @@ -97,7 +100,7 @@ def configure(self, owner, title, tag, job_id=None): else: job_id = str(job_id) - config = self.model_config.projects()[f"{owner}/{title}"] + config = self.model_config safeowner = clean(owner) safetitle = clean(title) diff --git a/workers/cs_workers/models/clients/server.py b/workers/cs_workers/models/clients/server.py index c1dc33f6..3f31290d 100644 --- a/workers/cs_workers/models/clients/server.py +++ b/workers/cs_workers/models/clients/server.py @@ -9,6 +9,7 @@ from cs_workers.utils import clean, redis_conn_from_env from cs_workers.config import ModelConfig from cs_workers.ingressroute import IngressRouteApi, ingressroute_template +from cs_workers.models.secrets import ModelSecrets PORT = 8010 @@ -71,7 +72,9 @@ def env(self, owner, title, deployment_name, config): kclient.V1EnvVar("TITLE", config["title"]), ] - for secret in self.model_config._list_secrets(config): + for secret in ModelSecrets( + owner=owner, title=title, project=self.project + ).list(): envs.append( kclient.V1EnvVar( name=secret, @@ -94,7 +97,7 @@ def env(self, owner, title, deployment_name, config): return envs def configure(self): - config = self.model_config.projects()[f"{self.owner}/{self.title}"] + config = self.model_config safeowner = clean(self.owner) safetitle = clean(self.title) app_name = f"{safeowner}-{safetitle}" @@ -321,21 +324,3 @@ def full_name(self): safeowner = clean(self.owner) safetitle = clean(self.title) return f"{safeowner}-{safetitle}-{self.deployment_name}" - - -if __name__ == "__main__": - server = Server( - project="cs-workers-dev", - owner="hdoupe", - title="ccc-widget", - tag="fix-iframe-link3", - deployment_name="hankdoupe", - model_config=ModelConfig("cs-workers-dev", "https://dev.compute.studio"), - callable_name="dash", - incluster=False, - quiet=True, - ) - server - server.configure() - server.create() - # server.delete() diff --git a/workers/cs_workers/services/api/__init__.py b/workers/cs_workers/services/api/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/workers/cs_workers/services/api/alembic.ini b/workers/cs_workers/services/api/alembic.ini new file mode 100644 index 00000000..cf785fc1 --- /dev/null +++ b/workers/cs_workers/services/api/alembic.ini @@ -0,0 +1,85 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = alembic + +# template used to generate migration files +# file_template = %%(rev)s_%%(slug)s + +# timezone to use when rendering the date +# within the migration file as well as the filename. +# string value is passed to dateutil.tz.gettz() +# leave blank for localtime +timezone = UTC + +# max length of characters to apply to the +# "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; this defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path +# version_locations = %(here)s/bar %(here)s/bat alembic/versions + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +; sqlalchemy.url = driver://user:pass@localhost/dbname + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +hooks=black +black.type=console_scripts +black.entrypoint=black +black.options=-l 90 + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/workers/cs_workers/services/api/alembic/README b/workers/cs_workers/services/api/alembic/README new file mode 100644 index 00000000..98e4f9c4 --- /dev/null +++ b/workers/cs_workers/services/api/alembic/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/workers/cs_workers/services/api/alembic/env.py b/workers/cs_workers/services/api/alembic/env.py new file mode 100644 index 00000000..317aee3b --- /dev/null +++ b/workers/cs_workers/services/api/alembic/env.py @@ -0,0 +1,86 @@ +import os + +from logging.config import fileConfig + +from sqlalchemy import engine_from_config +from sqlalchemy import pool + +from alembic import context + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +from cs_workers.services.api import database, models + +target_metadata = database.Base.metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def get_url(): + DB_HOST = os.environ.get("DB_HOST", "127.0.0.1") + DB_USER = os.environ.get("DB_USER", "postgres") + DB_PASS = os.environ.get("DB_PASS", "") + DB_NAME = os.environ.get("DB_NAME", "") + DB_PORT = os.environ.get("DB_PORT", "5432") + + return f"postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}" + + +def run_migrations_offline(): + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = get_url() + context.configure( + url=url, target_metadata=target_metadata, literal_binds=True, compare_type=True + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online(): + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + configuration = config.get_section(config.config_ini_section) + configuration["sqlalchemy.url"] = get_url() + connectable = engine_from_config( + configuration, prefix="sqlalchemy.", poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure( + connection=connection, target_metadata=target_metadata, compare_type=True + ) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/workers/cs_workers/services/api/alembic/script.py.mako b/workers/cs_workers/services/api/alembic/script.py.mako new file mode 100644 index 00000000..2c015630 --- /dev/null +++ b/workers/cs_workers/services/api/alembic/script.py.mako @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade(): + ${upgrades if upgrades else "pass"} + + +def downgrade(): + ${downgrades if downgrades else "pass"} diff --git a/workers/cs_workers/services/api/alembic/versions/6bc6aaccc77a_init.py b/workers/cs_workers/services/api/alembic/versions/6bc6aaccc77a_init.py new file mode 100644 index 00000000..4b4c45b6 --- /dev/null +++ b/workers/cs_workers/services/api/alembic/versions/6bc6aaccc77a_init.py @@ -0,0 +1,80 @@ +"""Init + +Revision ID: 6bc6aaccc77a +Revises: +Create Date: 2021-03-02 03:27:17.375165+00:00 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "6bc6aaccc77a" +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "users", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("username", sa.String(), nullable=True), + sa.Column("url", sa.String(), nullable=True), + sa.Column("email", sa.String(), nullable=False), + sa.Column("hashed_password", sa.String(), nullable=False), + sa.Column("is_active", sa.Boolean(), nullable=True), + sa.Column("is_superuser", sa.Boolean(), nullable=True), + sa.Column("is_approved", sa.Boolean(), nullable=True), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index(op.f("ix_users_email"), "users", ["email"], unique=True) + op.create_index(op.f("ix_users_id"), "users", ["id"], unique=False) + op.create_index(op.f("ix_users_username"), "users", ["username"], unique=False) + op.create_table( + "jobs", + sa.Column("id", sa.String(), nullable=False), + sa.Column("owner_id", sa.Integer(), nullable=True), + sa.Column("created_at", sa.DateTime(), nullable=True), + sa.ForeignKeyConstraint(["owner_id"], ["users.id"],), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index(op.f("ix_jobs_id"), "jobs", ["id"], unique=False) + op.create_table( + "projects", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("user_id", sa.Integer(), nullable=True), + sa.Column("owner", sa.String(), nullable=False), + sa.Column("title", sa.String(), nullable=False), + sa.Column("tech", sa.String(), nullable=False), + sa.Column("callable_name", sa.String(), nullable=True), + sa.Column("exp_task_time", sa.String(), nullable=False), + sa.Column("cpu", sa.Integer(), nullable=True), + sa.Column("memory", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(["user_id"], ["users.id"],), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint( + "owner", "title", "user_id", name="unique_owner_title_project" + ), + ) + op.create_index(op.f("ix_projects_id"), "projects", ["id"], unique=False) + op.create_index(op.f("ix_projects_owner"), "projects", ["owner"], unique=False) + op.create_index(op.f("ix_projects_title"), "projects", ["title"], unique=False) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f("ix_projects_title"), table_name="projects") + op.drop_index(op.f("ix_projects_owner"), table_name="projects") + op.drop_index(op.f("ix_projects_id"), table_name="projects") + op.drop_table("projects") + op.drop_index(op.f("ix_jobs_id"), table_name="jobs") + op.drop_table("jobs") + op.drop_index(op.f("ix_users_username"), table_name="users") + op.drop_index(op.f("ix_users_id"), table_name="users") + op.drop_index(op.f("ix_users_email"), table_name="users") + op.drop_table("users") + # ### end Alembic commands ### diff --git a/workers/cs_workers/services/api/database.py b/workers/cs_workers/services/api/database.py new file mode 100644 index 00000000..f410d237 --- /dev/null +++ b/workers/cs_workers/services/api/database.py @@ -0,0 +1,12 @@ +import os +from sqlalchemy import create_engine +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker + +from .settings import settings + + +engine = create_engine(settings.SQLALCHEMY_DATABASE_URI, pool_pre_ping=True) +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + +Base = declarative_base() diff --git a/workers/cs_workers/services/api/dependencies.py b/workers/cs_workers/services/api/dependencies.py new file mode 100644 index 00000000..088feddd --- /dev/null +++ b/workers/cs_workers/services/api/dependencies.py @@ -0,0 +1,62 @@ +from typing import Generator + +from fastapi import Depends, HTTPException, status +from fastapi.security import OAuth2PasswordBearer + +from jose import jwt +from pydantic import ValidationError +from sqlalchemy.orm import Session + +from . import models, schemas, security +from .settings import settings +from .database import SessionLocal, engine + +reusable_oauth2 = OAuth2PasswordBearer( + tokenUrl=f"{settings.API_PREFIX_STR}/login/access-token" +) + + +def get_db() -> Generator: + try: + print("NOOOOOOOOOOO") + db = SessionLocal() + yield db + finally: + db.close() + + +def get_current_user( + db: Session = Depends(get_db), token: str = Depends(reusable_oauth2) +) -> models.User: + try: + payload = jwt.decode( + token, settings.API_SECRET_KEY, algorithms=[security.ALGORITHM] + ) + token_data = schemas.TokenPayload(**payload) + except (jwt.JWTError, ValidationError): + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Could not validate credentials", + ) + user = db.query(models.User).filter(models.User.id == token_data.sub).one_or_none() + if not user: + raise HTTPException(status_code=404, detail="User not found") + return user + + +def get_current_active_user( + current_user: models.User = Depends(get_current_user), +) -> models.User: + if not current_user.is_active: + raise HTTPException(status_code=400, detail="Inactive user") + return current_user + + +def get_current_active_superuser( + current_user: models.User = Depends(get_current_user), +) -> models.User: + if not current_user.is_superuser: + raise HTTPException( + status_code=400, detail="The user doesn't have enough privileges" + ) + return current_user diff --git a/workers/cs_workers/services/api/main.py b/workers/cs_workers/services/api/main.py new file mode 100644 index 00000000..c6554d30 --- /dev/null +++ b/workers/cs_workers/services/api/main.py @@ -0,0 +1,23 @@ +from fastapi import FastAPI +from starlette.middleware.cors import CORSMiddleware + +from .settings import settings +from .routers import users, login, projects + +app = FastAPI( + title=settings.PROJECT_NAME, openapi_url=f"{settings.API_PREFIX_STR}/openapi.json", +) + +# Set all CORS enabled origins +if settings.BACKEND_CORS_ORIGINS: + app.add_middleware( + CORSMiddleware, + allow_origins=[str(origin) for origin in settings.BACKEND_CORS_ORIGINS], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) + +app.include_router(login.router, prefix=settings.API_PREFIX_STR) +app.include_router(users.router, prefix=settings.API_PREFIX_STR) +app.include_router(projects.router, prefix=settings.API_PREFIX_STR) diff --git a/workers/cs_workers/services/api/models.py b/workers/cs_workers/services/api/models.py new file mode 100644 index 00000000..6ca55735 --- /dev/null +++ b/workers/cs_workers/services/api/models.py @@ -0,0 +1,56 @@ +from sqlalchemy import Boolean, Column, ForeignKey, Integer, String, DateTime +from sqlalchemy.orm import relationship +from sqlalchemy.schema import UniqueConstraint + +from .database import Base + + +class User(Base): + __tablename__ = "users" + id = Column(Integer, primary_key=True, index=True) + username = Column(String, index=True) + url = Column(String, nullable=True) + email = Column(String, unique=True, index=True, nullable=False) + + hashed_password = Column(String, nullable=False) + + is_active = Column(Boolean(), default=True) + is_superuser = Column(Boolean(), default=False) + is_approved = Column(Boolean(), default=False) + + jobs = relationship("Job", back_populates="owner") + projects = relationship("Project", back_populates="user") + + +class Job(Base): + __tablename__ = "jobs" + id = Column(String, primary_key=True, index=True) + owner_id = Column(Integer, ForeignKey("users.id")) + created_at = Column(DateTime) + + owner = relationship("User", back_populates="jobs") + + +class Project(Base): + __tablename__ = "projects" + id = Column(Integer, primary_key=True, index=True) + user_id = Column(Integer, ForeignKey("users.id")) + owner = Column(String, nullable=False, index=True) + title = Column(String, nullable=False, index=True) + tech = Column(String, nullable=False) + callable_name = Column(String) + exp_task_time = Column(String, nullable=False) + cpu = Column(Integer) + memory = Column(Integer) + + user = relationship("User", back_populates="projects") + + __table_args__ = ( + UniqueConstraint( + "owner", "title", "user_id", name="unique_owner_title_project", + ), + ) + + class Config: + orm_mode = True + extra = "ignore" diff --git a/workers/cs_workers/services/api/routers/__init__.py b/workers/cs_workers/services/api/routers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/workers/cs_workers/services/api/routers/deployments.py b/workers/cs_workers/services/api/routers/deployments.py new file mode 100644 index 00000000..e69de29b diff --git a/workers/cs_workers/services/api/routers/login.py b/workers/cs_workers/services/api/routers/login.py new file mode 100644 index 00000000..4055d759 --- /dev/null +++ b/workers/cs_workers/services/api/routers/login.py @@ -0,0 +1,48 @@ +from datetime import timedelta, datetime +from typing import Any, Optional + +from fastapi import APIRouter, Body, Depends, HTTPException +from fastapi.security import OAuth2PasswordRequestForm + +import pytz +from sqlalchemy.orm import Session + +from .. import security +from ..models import User +from .. import schemas +from .. import dependencies as deps +from ..settings import settings + +router = APIRouter(tags=["login"]) + + +def authenticate(db: Session, *, username: str, password: str) -> Optional[User]: + user = db.query(User).filter(User.username == username).one_or_none() + if not user: + return None + if not security.verify_password(password, user.hashed_password): + return None + return user + + +@router.post("/login/access-token", response_model=schemas.Token) +def login_access_token( + db: Session = Depends(deps.get_db), form_data: OAuth2PasswordRequestForm = Depends() +) -> Any: + """ + OAuth2 compatible token login, get an access token for future requests + """ + user = authenticate(db, username=form_data.username, password=form_data.password) + + if not user: + raise HTTPException(status_code=400, detail="Incorrect username or password") + elif not user.is_active: + raise HTTPException(status_code=400, detail="Inactive user") + access_token_expires = timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES) + return { + "access_token": security.create_access_token( + user.id, expires_delta=access_token_expires + ), + "token_type": "bearer", + "expires_at": datetime.now().replace(tzinfo=pytz.UTC) + access_token_expires, + } diff --git a/workers/cs_workers/services/api/routers/projects.py b/workers/cs_workers/services/api/routers/projects.py new file mode 100644 index 00000000..81c6ea97 --- /dev/null +++ b/workers/cs_workers/services/api/routers/projects.py @@ -0,0 +1,42 @@ +from typing import List + +from fastapi import APIRouter, Depends, Body +from sqlalchemy.orm import Session + +from .. import models, schemas, dependencies as deps + +router = APIRouter() + + +@router.post("/projects/sync/", response_model=List[schemas.Project], status_code=200) +def sync_projects( + projects: List[schemas.ProjectSync] = Body(...), + db: Session = Depends(deps.get_db), + user: schemas.User = Depends(deps.get_current_active_user), +): + orm_projects = [] + for project in projects: + orm_project = ( + db.query(models.Project) + .filter( + models.Project.title == project.title, + models.Project.owner == project.owner, + models.Project.user_id == user.id, + ) + .one_or_none() + ) + if orm_project is None: + orm_project = models.Project(**project.dict(), user_id=user.id) + orm_projects.append(orm_project) + db.add_all(orm_projects) + db.commit() + return orm_projects + + +@router.get("/projects/", response_model=List[schemas.Project], status_code=200) +def get_projects( + projects=List[schemas.Project], + db: Session = Depends(deps.get_db), + user: schemas.User = Depends(deps.get_current_active_user), +): + return user.projects diff --git a/workers/cs_workers/services/api/routers/simulations.py b/workers/cs_workers/services/api/routers/simulations.py new file mode 100644 index 00000000..e69de29b diff --git a/workers/cs_workers/services/api/routers/users.py b/workers/cs_workers/services/api/routers/users.py new file mode 100644 index 00000000..ff7bb690 --- /dev/null +++ b/workers/cs_workers/services/api/routers/users.py @@ -0,0 +1,77 @@ +from typing import Any + +from fastapi import APIRouter, Body, Depends, HTTPException +from sqlalchemy.orm import Session +from pydantic.networks import EmailStr, AnyHttpUrl # pylint: disable=no-name-in-module + +from .. import schemas, models, dependencies as deps, security + +router = APIRouter(prefix="/users", tags=["users"]) + + +@router.get("/me", response_model=schemas.User) +def read_user_me( + db: Session = Depends(deps.get_db), + current_user: models.User = Depends(deps.get_current_active_user), +) -> Any: + """ + Get current user. + """ + return current_user + + +@router.post("/", response_model=schemas.User, status_code=201) +def create_user( + *, + db: Session = Depends(deps.get_db), + password: str = Body(...), + email: EmailStr = Body(...), + url: AnyHttpUrl = Body(...), + username: str = Body(None), +) -> models.User: + """ + Create new user. + """ + user = db.query(models.User).filter(models.User.username == username).one_or_none() + if user: + raise HTTPException( + status_code=400, + detail="The user with this username already exists in the system", + ) + user_in = schemas.UserCreate( + password=password, email=email, username=username, url=url + ) + user_db = models.User( + email=user_in.email, + username=user_in.username, + url=user_in.url, + hashed_password=security.get_password_hash(user_in.password), + ) + db.add(user_db) + db.commit() + db.refresh(user_db) + return user_db + + +@router.post("/approve/", response_model=schemas.User) +def approve_user( + *, + db: Session = Depends(deps.get_db), + current_super_user: models.User = Depends(deps.get_current_active_superuser), + user_approve: schemas.UserApprove = Body(...), +) -> models.User: + """ + Create new user. + """ + user: models.User = db.query(models.User).filter( + models.User.username == user_approve.username + ).one_or_none() + if not user: + raise HTTPException( + status_code=400, detail="The user with this username does not exist", + ) + user.is_approved = user_approve.is_approved + db.add(user) + db.commit() + db.refresh(user) + return user diff --git a/workers/cs_workers/services/api/schemas.py b/workers/cs_workers/services/api/schemas.py new file mode 100644 index 00000000..a0c9090e --- /dev/null +++ b/workers/cs_workers/services/api/schemas.py @@ -0,0 +1,89 @@ +from datetime import datetime +from typing import List, Optional, Dict +from enum import Enum + +from pydantic import BaseModel # pylint: disable=no-name-in-module +from pydantic.networks import EmailStr, AnyHttpUrl # pylint: disable=no-name-in-module + + +class JobBase(BaseModel): + owner_id: int + created_at: datetime + + +class JobCreate(JobBase): + pass + + +class Job(JobBase): + id: int + + class Config: + orm_mode = True + + +# Shared properties +class UserBase(BaseModel): + email: Optional[EmailStr] = None + username: Optional[str] = None + url: Optional[AnyHttpUrl] + is_approved: Optional[bool] + + is_active: Optional[bool] = True + + +# Properties to receive via API on creation +class UserCreate(UserBase): + email: EmailStr = None + username: str = None + url: AnyHttpUrl + password: str + + +class UserApprove(UserBase): + username: str + is_approved: bool + + +class UserInDBBase(UserBase): + class Config: + orm_mode = True + + +# Additional properties to return via API +class User(UserInDBBase): + pass + + +# Additional properties stored in DB +class UserInDB(UserInDBBase): + id: Optional[int] = None + hashed_password: str + + +class Token(BaseModel): + access_token: str + token_type: str + expires_at: datetime + + +class TokenPayload(BaseModel): + sub: Optional[int] = None + + +class ProjectSync(BaseModel): + owner: str + title: str + tech: str + callable_name: str + exp_task_time: int + cpu: int + memory: int + + +class Project(BaseModel): + id: int + + class Config: + orm_mode = True + extra = "ignore" diff --git a/workers/cs_workers/services/api/scripts/create_super_user.py b/workers/cs_workers/services/api/scripts/create_super_user.py new file mode 100644 index 00000000..9d1139b6 --- /dev/null +++ b/workers/cs_workers/services/api/scripts/create_super_user.py @@ -0,0 +1,31 @@ +import argparse +from getpass import getpass + +from cs_workers.services.api.security import get_password_hash +from cs_workers.services.api.models import User +from cs_workers.services.api.database import SessionLocal +from cs_workers.services.api.schemas import User as UserSchema + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--username") + parser.add_argument("--email") + args = parser.parse_args() + + password = getpass() + assert password, "Password required." + + user = User( + username=args.username, + hashed_password=get_password_hash(password), + email=args.email, + is_superuser=True, + is_active=True, + url=None, + ) + session = SessionLocal() + session.add(user) + session.commit() + session.refresh(user) + print("User created successfully:") + print(UserSchema.from_orm(user).dict()) diff --git a/workers/cs_workers/services/api/security.py b/workers/cs_workers/services/api/security.py new file mode 100644 index 00000000..80d1462e --- /dev/null +++ b/workers/cs_workers/services/api/security.py @@ -0,0 +1,34 @@ +from datetime import datetime, timedelta +from typing import Any, Union + +from jose import jwt +from passlib.context import CryptContext + +from .settings import settings + +pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") + + +ALGORITHM = "HS256" + + +def create_access_token( + subject: Union[str, Any], expires_delta: timedelta = None +) -> str: + if expires_delta: + expire = datetime.utcnow() + expires_delta + else: + expire = datetime.utcnow() + timedelta( + minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES + ) + to_encode = {"exp": expire, "sub": str(subject)} + encoded_jwt = jwt.encode(to_encode, settings.API_SECRET_KEY, algorithm=ALGORITHM) + return encoded_jwt + + +def verify_password(plain_password: str, hashed_password: str) -> bool: + return pwd_context.verify(plain_password, hashed_password) + + +def get_password_hash(password: str) -> str: + return pwd_context.hash(password) diff --git a/workers/cs_workers/services/api/settings.py b/workers/cs_workers/services/api/settings.py new file mode 100644 index 00000000..b4e263be --- /dev/null +++ b/workers/cs_workers/services/api/settings.py @@ -0,0 +1,63 @@ +import os +from typing import Any, Dict, List, Optional, Union + +from pydantic import AnyHttpUrl, BaseSettings, EmailStr, HttpUrl, PostgresDsn, validator + + +class Settings(BaseSettings): + API_PREFIX_STR: str = "/api/v1" + API_SECRET_KEY: str + # 60 minutes * 24 hours * 8 days = 8 days + ACCESS_TOKEN_EXPIRE_MINUTES: int = 60 * 24 * 8 + SERVER_NAME: Optional[str] + SERVER_HOST: Optional[AnyHttpUrl] + + BACKEND_CORS_ORIGINS: List[AnyHttpUrl] = [] + + @validator("BACKEND_CORS_ORIGINS", pre=True) + def assemble_cors_origins(cls, v: Union[str, List[str]]) -> Union[List[str], str]: + if isinstance(v, str) and not v.startswith("["): + return [i.strip() for i in v.split(",")] + elif isinstance(v, (list, str)): + return v + raise ValueError(v) + + PROJECT_NAME: str = "C/S Cluster Api" + SENTRY_DSN: Optional[HttpUrl] = None + + @validator("SENTRY_DSN", pre=True) + def sentry_dsn_can_be_blank(cls, v: str) -> Optional[str]: + if v and len(v) == 0: + return None + return v + + DB_HOST: str + DB_USER: str + DB_PASS: str + DB_NAME: str + + TEST_DB_NAME: str = "test" + TEST_DB_PASS: str = os.environ.get("TEST_DB_PASS", "test") + + SQLALCHEMY_DATABASE_URI: Optional[PostgresDsn] = None + + @validator("SQLALCHEMY_DATABASE_URI", pre=True) + def assemble_db_connection(cls, v: Optional[str], values: Dict[str, Any]) -> Any: + if isinstance(v, str): + return v + return "{scheme}://{user}:{password}@{host}{path}".format( + scheme="postgresql", + user=values.get("DB_USER"), + password=values.get("DB_PASS"), + host=values.get("DB_HOST"), + path=f"/{values.get('DB_NAME')}", + ) + + FIRST_SUPERUSER: Optional[EmailStr] + FIRST_SUPERUSER_PASSWORD: Optional[str] + + class Config: + case_sensitive = True + + +settings = Settings() diff --git a/workers/cs_workers/services/api/tests/__init__.py b/workers/cs_workers/services/api/tests/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/workers/cs_workers/services/api/tests/__init__.py @@ -0,0 +1 @@ + diff --git a/workers/cs_workers/services/api/tests/conftest.py b/workers/cs_workers/services/api/tests/conftest.py new file mode 100644 index 00000000..94a55d64 --- /dev/null +++ b/workers/cs_workers/services/api/tests/conftest.py @@ -0,0 +1,125 @@ +from typing import Dict, Generator + +import pytest +from fastapi.testclient import TestClient + +import sqlalchemy as sa +from sqlalchemy import create_engine +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import Session, sessionmaker, exc + +from ..settings import settings +from ..database import SessionLocal +from ..main import app +from ..dependencies import get_db +from .. import models, schemas, security + + +SQLALCHEMY_DATABASE_URI = f"postgresql://{settings.DB_USER}:{settings.TEST_DB_PASS}@{settings.DB_HOST}/{settings.TEST_DB_NAME}" +assert settings.DB_NAME != settings.TEST_DB_NAME + +engine = create_engine(SQLALCHEMY_DATABASE_URI, pool_pre_ping=True) + +Base = declarative_base() + +Base.metadata.create_all(bind=engine) + + +# Adapted from: +# https://github.com/jeancochrane/pytest-flask-sqlalchemy/blob/c109469f83450b8c5ff5de962faa1105064f5619/pytest_flask_sqlalchemy/fixtures.py#L25-L84 +@pytest.fixture(scope="function") +def db(request) -> Generator: + connection = engine.connect() + transaction = connection.begin() + TestingSessionLocal = sessionmaker( + autocommit=False, autoflush=False, bind=connection + ) + session = TestingSessionLocal() + + # Make sure the session, connection, and transaction can't be closed by accident in + # the codebase + connection.force_close = connection.close + transaction.force_rollback = transaction.rollback + + connection.close = lambda: None + transaction.rollback = lambda: None + session.close = lambda: None + + session.begin_nested() + # Each time the SAVEPOINT for the nested transaction ends, reopen it + @sa.event.listens_for(session, "after_transaction_end") + def restart_savepoint(session, trans): + if trans.nested and not trans._parent.nested: + # ensure that state is expired the way + # session.commit() at the top level normally does + session.expire_all() + + session.begin_nested() + + # Force the connection to use nested transactions + connection.begin = connection.begin_nested + + # If an object gets moved to the 'detached' state by a call to flush the session, + # add it back into the session (this allows us to see changes made to objects + # in the context of a test, even when the change was made elsewhere in + # the codebase) + @sa.event.listens_for(session, "persistent_to_detached") + @sa.event.listens_for(session, "deleted_to_detached") + def rehydrate_object(session, obj): + session.add(obj) + + @request.addfinalizer + def teardown_transaction(): + # Delete the session + session.close() + + # Rollback the transaction and return the connection to the pool + transaction.force_rollback() + connection.force_close() + + app.dependency_overrides[get_db] = lambda: session + return session + + +@pytest.fixture(scope="function") +def client() -> Generator: + with TestClient(app) as c: + yield c + + +@pytest.fixture(scope="function") +def new_user(db): + user_ = models.User( + username="test", + email="test@test.com", + url="http://localhost:8000", + hashed_password=security.get_password_hash("heyhey2222"), + ) + db.add(user_) + db.commit() + db.refresh(user_) + return user_ + + +@pytest.fixture(scope="function") +def user(db, new_user): + new_user.approved = True + db.add(new_user) + db.commit() + db.refresh(new_user) + return new_user + + +@pytest.fixture(scope="function") +def superuser(db): + user_ = models.User( + username="super-user", + email="super-user@test.com", + url="http://localhost:8000", + hashed_password=security.get_password_hash("heyhey2222"), + is_superuser=True, + ) + db.add(user_) + db.commit() + db.refresh(user_) + yield user_ diff --git a/workers/cs_workers/services/api/tests/test_projects.py b/workers/cs_workers/services/api/tests/test_projects.py new file mode 100644 index 00000000..cabdf844 --- /dev/null +++ b/workers/cs_workers/services/api/tests/test_projects.py @@ -0,0 +1,85 @@ +from .utils import get_access_token +from ..settings import settings +from ..models import Project + + +class TestProjects: + def test_sync_projects(self, db, client, user): + access_token = get_access_token(client, user) + assert access_token + data = { + "owner": "test", + "title": "test-app", + "tech": "bokeh", + "callable_name": "hello", + "exp_task_time": 10, + "cpu": 4, + "memory": 10, + } + + resp = client.post( + f"{settings.API_PREFIX_STR}/projects/sync/", + json=[data], + headers={"Authorization": f"Bearer {access_token}"}, + ) + assert resp.status_code == 200 + assert resp.json() + assert ( + db.query(Project) + .filter(Project.owner == "test", Project.title == "test-app") + .one() + ) + + resp = client.post( + f"{settings.API_PREFIX_STR}/projects/sync/", + json=[data], + headers={"Authorization": f"Bearer {access_token}"}, + ) + assert resp.status_code == 200 + assert resp.json() + assert ( + db.query(Project) + .filter(Project.owner == "test", Project.title == "test-app") + .one() + ) + + resp = client.post( + f"{settings.API_PREFIX_STR}/projects/sync/", + json=[dict(data, title="test-app-another")], + headers={"Authorization": f"Bearer {access_token}"}, + ) + assert resp.status_code == 200 + assert resp.json() + assert ( + db.query(Project) + .filter(Project.owner == "test", Project.title == "test-app-another") + .one() + ) + assert db.query(Project).count() == 2 + + def test_get_projects(self, db, client, user): + access_token = get_access_token(client, user) + assert access_token + data = { + "owner": "test", + "title": "test-app", + "tech": "bokeh", + "callable_name": "hello", + "exp_task_time": 10, + "cpu": 4, + "memory": 10, + } + for i in range(3): + resp = client.post( + f"{settings.API_PREFIX_STR}/projects/sync/", + json=[dict(data, title=f"new-app-{i}")], + headers={"Authorization": f"Bearer {access_token}"}, + ) + assert resp.status_code == 200 + + resp = client.get( + f"{settings.API_PREFIX_STR}/projects/", + headers={"Authorization": f"Bearer {access_token}"}, + ) + assert resp.status_code == 200 + assert len(resp.json()) == db.query(Project).count() == 3 diff --git a/workers/cs_workers/services/api/tests/test_users.py b/workers/cs_workers/services/api/tests/test_users.py new file mode 100644 index 00000000..eb441ee0 --- /dev/null +++ b/workers/cs_workers/services/api/tests/test_users.py @@ -0,0 +1,66 @@ +from .. import models + + +class TestUsers: + def test_create_user(self, db, client): + resp = client.post( + "/api/v1/users/", + json={ + "email": "new_user@test.com", + "username": "new_user", + "password": "hello world", + "url": "https://example.com", + }, + ) + assert resp.status_code == 201, resp.text + assert resp.json() == { + "email": "new_user@test.com", + "username": "new_user", + "url": "https://example.com", + "is_approved": False, + "is_active": True, + } + + def test_login_user(self, db, client, new_user): + resp = client.post( + "/api/v1/login/access-token", + data={"username": "test", "password": "heyhey2222"}, + ) + assert resp.status_code == 200, f"Got {resp.status_code}: {resp.text}" + assert resp.json().get("access_token") + assert resp.json().get("expires_at") + + def test_get_current_user(self, db, client, new_user): + resp = client.post( + "/api/v1/login/access-token", + data={"username": "test", "password": "heyhey2222"}, + ) + assert resp.status_code == 200, f"Got {resp.status_code}: {resp.text}" + access_token = resp.json()["access_token"] + resp = client.get( + "/api/v1/users/me/", headers={"Authorization": f"Bearer {access_token}"} + ) + assert resp.status_code == 200, f"Got {resp.status_code}: {resp.text}" + assert resp.json()["username"] + + def test_approve_user(self, db, client, new_user, superuser): + resp = client.post( + "/api/v1/login/access-token", + data={"username": "super-user", "password": "heyhey2222"}, + ) + + assert resp.status_code == 200 + + assert superuser.is_superuser + assert not new_user.is_approved + print("user.username", new_user.username, type(new_user.username)) + access_token = resp.json()["access_token"] + resp = client.post( + "/api/v1/users/approve/", + json={"username": new_user.username, "is_approved": True}, + headers={"Authorization": f"Bearer {access_token}"}, + ) + assert resp.status_code == 200, f"Got {resp.status_code}: {resp.text}" + + db.refresh(new_user) + assert new_user.is_approved diff --git a/workers/cs_workers/services/api/tests/utils.py b/workers/cs_workers/services/api/tests/utils.py new file mode 100644 index 00000000..263aec70 --- /dev/null +++ b/workers/cs_workers/services/api/tests/utils.py @@ -0,0 +1,7 @@ +def get_access_token(test_client, user): + resp = test_client.post( + "/api/v1/login/access-token", + data={"username": "test", "password": "heyhey2222"}, + ) + assert resp.status_code == 200, f"Got {resp.status_code}: {resp.text}" + return resp.json()["access_token"] diff --git a/workers/cs_workers/services/scheduler.py b/workers/cs_workers/services/scheduler.py index f1a759ff..4f0aca62 100644 --- a/workers/cs_workers/services/scheduler.py +++ b/workers/cs_workers/services/scheduler.py @@ -55,8 +55,11 @@ async def post(self, owner, title): return payload = Payload().loads(self.request.body.decode("utf-8")) - if f"{owner}/{title}" not in self.config[self.user.username].projects(): + try: + project = self.config[self.user.username].get_project(owner, title) + except KeyError: self.set_status(404) + return task_id = payload.get("task_id") if task_id is None: @@ -88,7 +91,7 @@ async def post(self, owner, title): owner, title, tag=tag, - model_config=self.config[self.user.username], + model_config=project, job_id=task_id, job_kwargs=payload["task_kwargs"], rclient=self.rclient, @@ -146,7 +149,7 @@ def get(self, owner, title, deployment_name): owner=project["owner"], title=project["title"], tag=None, - model_config=self.config[self.user.username], + model_config=project, callable_name=project["callable_name"], deployment_name=deployment_name, incluster=incluster, @@ -174,7 +177,7 @@ def delete(self, owner, title, deployment_name): owner=project["owner"], title=project["title"], tag=None, - model_config=self.config[self.user.username], + model_config=project, callable_name=project["callable_name"], deployment_name=deployment_name, incluster=incluster, @@ -225,7 +228,7 @@ def post(self, owner, title): owner=project["owner"], title=project["title"], tag=data["tag"], - model_config=self.config[self.user.username], + model_config=project, callable_name=project["callable_name"], deployment_name=data["deployment_name"], incluster=incluster, From c33f1a1c40b084b84abf55e9749776dda31c0116 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Tue, 23 Mar 2021 00:26:49 -0400 Subject: [PATCH 02/41] Checkpoint: can run jobs with new api --- .dockerignore | 2 + webapp/apps/comp/asyncsubmit.py | 6 +- webapp/apps/comp/compute.py | 29 ++- webapp/apps/comp/ioutils.py | 8 +- .../migrations/0029_auto_20210321_2247.py | 35 ++++ webapp/apps/comp/model_parameters.py | 60 +++++-- webapp/apps/comp/models.py | 13 ++ webapp/apps/comp/parser.py | 5 +- webapp/apps/comp/serializers.py | 26 +++ webapp/apps/comp/views/__init__.py | 1 + webapp/apps/comp/views/api.py | 56 +++++- webapp/apps/publish/views.py | 2 + webapp/apps/users/api.py | 5 +- .../migrations/0025_auto_20210321_2004.py | 34 ++++ .../apps/users/migrations/0026_tag_version.py | 18 ++ webapp/apps/users/models.py | 81 ++++++--- webapp/apps/users/serializers.py | 3 + webapp/urls.py | 5 + workers/cs_workers/cli.py | 12 +- .../cs_workers/dockerfiles/Dockerfile.model | 6 + workers/cs_workers/models/clients/api_task.py | 26 --- workers/cs_workers/models/clients/job.py | 75 ++++---- workers/cs_workers/models/executors/job.py | 50 +++--- .../models/executors/task_wrapper.py | 76 ++------ workers/cs_workers/models/manage.py | 77 +++++++- ...6aaccc77a_init.py => 07a0d0f7e1b4_init.py} | 26 ++- .../f027333560c0_fix_cpu_and_memory_types.py | 54 ++++++ .../cs_workers/services/api/dependencies.py | 3 +- workers/cs_workers/services/api/main.py | 3 +- workers/cs_workers/services/api/models.py | 37 +++- .../services/api/routers/projects.py | 7 +- .../services/api/routers/simulations.py | 166 ++++++++++++++++++ .../cs_workers/services/api/routers/users.py | 15 ++ workers/cs_workers/services/api/schemas.py | 49 +++++- workers/cs_workers/services/api/security.py | 35 ++++ workers/cs_workers/services/api/settings.py | 8 +- .../cs_workers/services/api/tests/conftest.py | 28 +++ .../services/api/tests/test_users.py | 2 + workers/cs_workers/services/manage.py | 6 +- .../cs_workers/services/outputs_processor.py | 143 +++++---------- .../redis-master-Deployment.template.yaml | 6 - workers/setup.py | 3 + 42 files changed, 943 insertions(+), 359 deletions(-) create mode 100755 webapp/apps/comp/migrations/0029_auto_20210321_2247.py create mode 100755 webapp/apps/users/migrations/0025_auto_20210321_2004.py create mode 100755 webapp/apps/users/migrations/0026_tag_version.py delete mode 100644 workers/cs_workers/models/clients/api_task.py rename workers/cs_workers/services/api/alembic/versions/{6bc6aaccc77a_init.py => 07a0d0f7e1b4_init.py} (75%) create mode 100644 workers/cs_workers/services/api/alembic/versions/f027333560c0_fix_cpu_and_memory_types.py diff --git a/.dockerignore b/.dockerignore index 97f42add..79d8f756 100644 --- a/.dockerignore +++ b/.dockerignore @@ -19,3 +19,5 @@ coverage.xml .git redis-data db-data +cluster-db-data +./workers/cluster-db-data \ No newline at end of file diff --git a/webapp/apps/comp/asyncsubmit.py b/webapp/apps/comp/asyncsubmit.py index 3a87f896..46cb8edc 100755 --- a/webapp/apps/comp/asyncsubmit.py +++ b/webapp/apps/comp/asyncsubmit.py @@ -128,7 +128,11 @@ def submit(self): project = self.sim.project tag = str(project.latest_tag) self.submitted_id = self.compute.submit_job( - project=inputs.project, task_name=actions.SIM, task_kwargs=data, tag=tag, + project=inputs.project, + task_name=actions.SIM, + task_kwargs=data, + tag=tag, + path_prefix="/api/v1/jobs" if project.cluster.version == "v1" else "", ) print(f"job id: {self.submitted_id}") diff --git a/webapp/apps/comp/compute.py b/webapp/apps/comp/compute.py index 316673c3..18499fb2 100755 --- a/webapp/apps/comp/compute.py +++ b/webapp/apps/comp/compute.py @@ -28,10 +28,14 @@ def remote_submit_job( response = requests.post(url, json=data, timeout=timeout, headers=headers) return response - def submit_job(self, project, task_name, task_kwargs, tag=None): - print("submitting", task_name) + def submit_job(self, project, task_name, task_kwargs, path_prefix="", tag=None): + print( + "submitting", task_name, + ) cluster = project.cluster - url = f"{cluster.url}/{project.owner}/{project.title}/" + tag = tag or str(project.latest_tag) + url = f"{cluster.url}{path_prefix}/{project.owner}/{project.title}/" + print(url) return self.submit( tasks=dict(task_name=task_name, tag=tag, task_kwargs=task_kwargs), url=url, @@ -43,16 +47,17 @@ def submit(self, tasks, url, headers): attempts = 0 while not submitted: try: + print(tasks) response = self.remote_submit_job( url, data=tasks, timeout=TIMEOUT_IN_SECONDS, headers=headers ) - if response.status_code == 200: + if response.status_code in (200, 201): print("submitted: ", url) submitted = True data = response.json() - job_id = data["task_id"] + job_id = data.get("task_id") or data.get("id") else: - print("FAILED: ", url, response.status_code) + print("FAILED: ", url, response.status_code, response.json()) attempts += 1 except Timeout: print("Couldn't submit to: ", url) @@ -83,7 +88,7 @@ def submit(self, tasks, url, headers): return data = response.json() else: - print("FAILED: ", url, response.status_code) + print("FAILED: ", url, response.status_code, response.text) attempts += 1 except Timeout: print("Couldn't submit to: ", url) @@ -95,7 +100,10 @@ def submit(self, tasks, url, headers): print("Exceeded max attempts. Bailing out.") raise WorkersUnreachableError() - success = data["status"] == "SUCCESS" + if isinstance(data, list): + success = True + else: + success = data["status"] == "SUCCESS" if success: return success, data else: @@ -104,6 +112,9 @@ def submit(self, tasks, url, headers): class SyncProjects(SyncCompute): def submit_job(self, project, cluster): - url = f"{cluster.url}/sync/" + if cluster.version == "v0": + url = f"{cluster.url}/sync/" + else: + url = f"{cluster.url}/api/v1/projects/sync/" headers = cluster.headers() return self.submit(tasks=[project], url=url, headers=headers) diff --git a/webapp/apps/comp/ioutils.py b/webapp/apps/comp/ioutils.py index ac8f9621..84e28f9b 100755 --- a/webapp/apps/comp/ioutils.py +++ b/webapp/apps/comp/ioutils.py @@ -1,5 +1,5 @@ from typing import NamedTuple, Type -from webapp.apps.comp.model_parameters import ModelParameters +from webapp.apps.comp.model_parameters import ModelParameters, NotReady from webapp.apps.comp.parser import Parser @@ -8,8 +8,10 @@ class IOClasses(NamedTuple): Parser: Type[Parser] -def get_ioutils(project, **kwargs): +def get_ioutils(project, compute=None, **kwargs): return IOClasses( - model_parameters=kwargs.get("ModelParameters", ModelParameters)(project), + model_parameters=kwargs.get("ModelParameters", ModelParameters)( + project, compute=compute + ), Parser=kwargs.get("Parser", Parser), ) diff --git a/webapp/apps/comp/migrations/0029_auto_20210321_2247.py b/webapp/apps/comp/migrations/0029_auto_20210321_2247.py new file mode 100755 index 00000000..bfbaf68f --- /dev/null +++ b/webapp/apps/comp/migrations/0029_auto_20210321_2247.py @@ -0,0 +1,35 @@ +# Generated by Django 3.0.13 on 2021-03-21 22:47 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("comp", "0028_simulation_tag"), + ] + + operations = [ + migrations.AddField( + model_name="modelconfig", + name="job_id", + field=models.UUIDField(blank=True, default=None, null=True), + ), + migrations.AddField( + model_name="modelconfig", + name="status", + field=models.CharField( + choices=[ + ("STARTED", "Started"), + ("PENDING", "Pending"), + ("SUCCESS", "Success"), + ("INVALID", "Invalid"), + ("FAIL", "Fail"), + ("WORKER_FAILURE", "Worker Failure"), + ], + default="SUCCESS", + max_length=20, + ), + preserve_default=False, + ), + ] diff --git a/webapp/apps/comp/model_parameters.py b/webapp/apps/comp/model_parameters.py index 0ee4f8cb..5815c05f 100755 --- a/webapp/apps/comp/model_parameters.py +++ b/webapp/apps/comp/model_parameters.py @@ -1,7 +1,10 @@ +from typing import Union + + import paramtools as pt from webapp.apps.comp.models import ModelConfig -from webapp.apps.comp.compute import SyncCompute, JobFailError +from webapp.apps.comp.compute import Compute, SyncCompute, JobFailError from webapp.apps.comp import actions from webapp.apps.comp.exceptions import AppError @@ -16,14 +19,25 @@ def pt_factory(classname, defaults): return type(classname, (pt.Parameters,), {"defaults": defaults}) +class NotReady(Exception): + pass + + class ModelParameters: """ Handles logic for getting cached model parameters and updating the cache. """ - def __init__(self, project: "Project", compute: SyncCompute = None): + def __init__(self, project: "Project", compute: Union[SyncCompute, Compute] = None): self.project = project - self.compute = compute or SyncCompute() + print(self.project) + if compute is not None: + self.comptue = compute + elif self.project.cluster.version == "v0": + self.compute = compute or SyncCompute() + else: + self.compute = compute or Compute() + self.config = None def defaults(self, init_meta_parameters=None): @@ -53,6 +67,15 @@ def model_parameters_parser(self, meta_parameters_values=None): # return model_parameters_parser return res["model_parameters"] + def cleanup_meta_parameters(self, meta_parameters_values, meta_parameters): + # clean up meta parameters before saving them. + if not meta_parameters_values: + return {} + + mp = pt_factory("MP", meta_parameters)() + mp.adjust(meta_parameters_values) + return mp.specification(meta_data=False, serializable=True) + def get_inputs(self, meta_parameters_values=None): """ Get cached version of inputs or retrieve new version. @@ -65,22 +88,37 @@ def get_inputs(self, meta_parameters_values=None): model_version=str(self.project.latest_tag), meta_parameters_values=meta_parameters_values, ) + print("STATUS", config.status) + if config.status != "SUCCESS": + print("raise yo") + raise NotReady() except ModelConfig.DoesNotExist: - success, result = self.compute.submit_job( + response = self.compute.submit_job( project=self.project, task_name=actions.INPUTS, task_kwargs={"meta_param_dict": meta_parameters_values or {}}, + path_prefix="/api/v1/jobs" + if self.project.cluster.version == "v1" + else "", ) + if self.project.cluster.version == "v1": + config = ModelConfig.objects.create( + project=self.project, + model_version=str(self.project.latest_tag), + meta_parameters_values=meta_parameters_values, + inputs_version="v1", + job_id=response, + status="PENDING", + ) + raise NotReady() + + success, result = response if not success: raise AppError(meta_parameters_values, result["traceback"]) - # clean up meta parameters before saving them. - if meta_parameters_values: - mp = pt_factory("MP", result["meta_parameters"])() - mp.adjust(meta_parameters_values) - save_vals = mp.specification(meta_data=False, serializable=True) - else: - save_vals = {} + save_vals = self.cleanup_meta_parameters( + meta_parameters_values, result["meta_parameters"] + ) config = ModelConfig.objects.create( project=self.project, diff --git a/webapp/apps/comp/models.py b/webapp/apps/comp/models.py index 36ec81a8..4b3d1128 100755 --- a/webapp/apps/comp/models.py +++ b/webapp/apps/comp/models.py @@ -77,6 +77,19 @@ class ModelConfig(models.Model): meta_parameters = JSONField(default=dict) model_parameters = JSONField(default=dict) + job_id = models.UUIDField(blank=True, default=None, null=True) + status = models.CharField( + choices=( + ("STARTED", "Started"), + ("PENDING", "Pending"), + ("SUCCESS", "Success"), + ("INVALID", "Invalid"), + ("FAIL", "Fail"), + ("WORKER_FAILURE", "Worker Failure"), + ), + max_length=20, + ) + objects = ModelConfigManager() class Meta: diff --git a/webapp/apps/comp/parser.py b/webapp/apps/comp/parser.py index 0f3d76f9..ac40bd51 100755 --- a/webapp/apps/comp/parser.py +++ b/webapp/apps/comp/parser.py @@ -55,7 +55,10 @@ def post(self, errors_warnings, params): "errors_warnings": errors_warnings, } job_id = self.compute.submit_job( - project=self.project, task_name=actions.PARSE, task_kwargs=data + project=self.project, + task_name=actions.PARSE, + task_kwargs=data, + path_prefix="/api/v1/jobs" if self.project.cluster.version == "v1" else "", ) return job_id diff --git a/webapp/apps/comp/serializers.py b/webapp/apps/comp/serializers.py index e1652abe..f795093a 100755 --- a/webapp/apps/comp/serializers.py +++ b/webapp/apps/comp/serializers.py @@ -107,12 +107,30 @@ class Meta: ) +class ModelConfigAsyncSerializer(serializers.Serializer): + job_id = serializers.UUIDField(required=False) + status = serializers.ChoiceField( + choices=(("SUCCESS", "Success"), ("FAIL", "Fail")), required=False + ) + outputs = serializers.JSONField(required=False) + + def to_internal_value(self, data): + if "outputs" in data: + data.update(**data.pop("outputs")) + if "task_id" in data: + data["job_id"] = data.pop("task_id") + print(data.keys()) + return super().to_internal_value(data) + + class ModelConfigSerializer(serializers.ModelSerializer): project = serializers.StringRelatedField() class Meta: model = ModelConfig fields = ( + "job_id", + "status", "project", "model_version", "meta_parameters_values", @@ -130,6 +148,14 @@ class Meta: "creation_date", ) + def to_internal_value(self, data): + if "outputs" in data: + data.update(**data.pop("outputs")) + if "task_id" in data: + data["job_id"] = data.pop("task_id") + print(data.keys()) + return super().to_internal_value(data) + class InputsSerializer(serializers.ModelSerializer): """ diff --git a/webapp/apps/comp/views/__init__.py b/webapp/apps/comp/views/__init__.py index 13496c57..2863c72e 100755 --- a/webapp/apps/comp/views/__init__.py +++ b/webapp/apps/comp/views/__init__.py @@ -20,6 +20,7 @@ OutputsAPIView, DetailMyInputsAPIView, MyInputsAPIView, + ModelConfigAPIView, NewSimulationAPIView, AuthorsAPIView, AuthorsDeleteAPIView, diff --git a/webapp/apps/comp/views/api.py b/webapp/apps/comp/views/api.py index 29b2821b..1bcb5b65 100755 --- a/webapp/apps/comp/views/api.py +++ b/webapp/apps/comp/views/api.py @@ -44,14 +44,21 @@ PrivateAppException, PrivateSimException, ) -from webapp.apps.comp.ioutils import get_ioutils -from webapp.apps.comp.models import Inputs, Simulation, PendingPermission, ANON_BEFORE +from webapp.apps.comp.ioutils import get_ioutils, NotReady +from webapp.apps.comp.models import ( + Inputs, + Simulation, + PendingPermission, + ModelConfig, + ANON_BEFORE, +) from webapp.apps.comp.parser import APIParser from webapp.apps.comp.serializers import ( SimulationSerializer, MiniSimulationSerializer, InputsSerializer, OutputsSerializer, + ModelConfigSerializer, AddAuthorsSerializer, SimAccessSerializer, PendingPermissionSerializer, @@ -80,10 +87,14 @@ def get_inputs(self, kwargs, meta_parameters=None): ioutils = get_ioutils(project) try: defaults = ioutils.model_parameters.defaults(meta_parameters) + except NotReady: + print("NOT READY") + return Response(status=202) except pt.ValidationError as e: return Response(str(e), status=status.HTTP_400_BAD_REQUEST) if "year" in defaults["meta_parameters"]: defaults.update({"extend": True}) + print("got defaults", defaults) return Response(defaults) def get(self, request, *args, **kwargs): @@ -385,6 +396,7 @@ class OutputsAPIView(RecordOutputsMixin, APIView): authentication_classes = ( ClusterAuthentication, + OAuth2Authentication, # Uncomment to allow token-based authentication for this endpoint. # TokenAuthentication, ) @@ -397,8 +409,8 @@ def put(self, request, *args, **kwargs): sim = get_object_or_404( Simulation.objects.prefetch_related("project"), job_id=data["job_id"] ) - if not sim.project.has_write_access(request.user): - return Response(status=status.HTTP_401_UNAUTHORIZED) + # if not sim.project.has_write_access(request.user): + # return Response(status=status.HTTP_401_UNAUTHORIZED) if sim.status == "PENDING": self.record_outputs(sim, data) if sim.notify_on_completion: @@ -442,6 +454,7 @@ def put(self, request, *args, **kwargs): class MyInputsAPIView(APIView): authentication_classes = ( ClusterAuthentication, + OAuth2Authentication, # Uncomment to allow token-based authentication for this endpoint. # TokenAuthentication, ) @@ -454,8 +467,8 @@ def put(self, request, *args, **kwargs): inputs = get_object_or_404( Inputs.objects.prefetch_related("project"), job_id=data["job_id"] ) - if not inputs.project.has_write_access(request.user): - return Response(status=status.HTTP_401_UNAUTHORIZED) + # if not inputs.project.has_write_access(request.user): + # return Response(status=status.HTTP_401_UNAUTHORIZED) if inputs.status in ("PENDING", "INVALID", "FAIL"): # successful run if data["status"] == "SUCCESS": @@ -490,6 +503,37 @@ def put(self, request, *args, **kwargs): return Response(ser.errors, status=status.HTTP_400_BAD_REQUEST) +class ModelConfigAPIView(APIView): + authentication_classes = ( + ClusterAuthentication, + OAuth2Authentication, + # Uncomment to allow token-based authentication for this endpoint. + # TokenAuthentication, + ) + + def put(self, request, *args, **kwargs): + print("myinputs api method=PUT", kwargs) + ser = ModelConfigSerializer(data=request.data) + if ser.is_valid(): + data = ser.validated_data + model_config = get_object_or_404( + ModelConfig.objects.prefetch_related("project"), job_id=data["job_id"] + ) + if model_config.status in ("PENDING", "INVALID", "FAIL"): + ioutils = get_ioutils(model_config.project) + model_config.meta_parameters_values = ioutils.model_parameters.cleanup_meta_parameters( + model_config.meta_parameters_values, data["meta_parameters"] + ) + model_config.meta_parameters = data["meta_parameters"] + model_config.model_parameters = data["model_parameters"] + model_config.status = data["status"] + model_config.save() + return Response(status=status.HTTP_200_OK) + else: + print("model config put error", ser.errors) + return Response(ser.errors, status=status.HTTP_400_BAD_REQUEST) + + class AuthorsAPIView(RequiresLoginPermissions, GetOutputsObjectMixin, APIView): permission_classes = (StrictRequiresActive,) authentication_classes = ( diff --git a/webapp/apps/publish/views.py b/webapp/apps/publish/views.py index d098a0ff..c0be75a0 100644 --- a/webapp/apps/publish/views.py +++ b/webapp/apps/publish/views.py @@ -288,6 +288,7 @@ def post(self, request, *args, **kwargs): tag, _ = Tag.objects.get_or_create( project=project, image_tag=data.get("staging_tag"), + version=data.get("version"), defaults=dict(cpu=project.cpu, memory=project.memory), ) project.staging_tag = tag @@ -298,6 +299,7 @@ def post(self, request, *args, **kwargs): tag, _ = Tag.objects.get_or_create( project=project, image_tag=data.get("latest_tag"), + version=data.get("version"), defaults=dict(cpu=project.cpu, memory=project.memory), ) project.latest_tag = tag diff --git a/webapp/apps/users/api.py b/webapp/apps/users/api.py index f98a21f4..1b15c8b9 100644 --- a/webapp/apps/users/api.py +++ b/webapp/apps/users/api.py @@ -10,10 +10,7 @@ TokenAuthentication, ) -from oauth2_provider.contrib.rest_framework import ( - OAuth2Authentication, - TokenHasReadWriteScope, -) +from oauth2_provider.contrib.rest_framework import OAuth2Authentication from webapp.apps.publish.views import GetProjectMixin from .permissions import StrictRequiresActive diff --git a/webapp/apps/users/migrations/0025_auto_20210321_2004.py b/webapp/apps/users/migrations/0025_auto_20210321_2004.py new file mode 100755 index 00000000..1a17771a --- /dev/null +++ b/webapp/apps/users/migrations/0025_auto_20210321_2004.py @@ -0,0 +1,34 @@ +# Generated by Django 3.0.13 on 2021-03-21 20:04 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("users", "0024_project_social_image_link"), + ] + + operations = [ + migrations.AddField( + model_name="cluster", + name="access_token", + field=models.CharField(max_length=512, null=True), + ), + migrations.AddField( + model_name="cluster", + name="access_token_expires_at", + field=models.DateTimeField(null=True), + ), + migrations.AddField( + model_name="cluster", + name="cluster_password", + field=models.CharField(max_length=512, null=True), + ), + migrations.AddField( + model_name="cluster", + name="version", + field=models.CharField(default="v0", max_length=32), + preserve_default=False, + ), + ] diff --git a/webapp/apps/users/migrations/0026_tag_version.py b/webapp/apps/users/migrations/0026_tag_version.py new file mode 100755 index 00000000..38d4dcc5 --- /dev/null +++ b/webapp/apps/users/migrations/0026_tag_version.py @@ -0,0 +1,18 @@ +# Generated by Django 3.0.13 on 2021-03-21 20:56 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("users", "0025_auto_20210321_2004"), + ] + + operations = [ + migrations.AddField( + model_name="tag", + name="version", + field=models.CharField(max_length=255, null=True), + ), + ] diff --git a/webapp/apps/users/models.py b/webapp/apps/users/models.py index 30f5c27f..e6dce248 100755 --- a/webapp/apps/users/models.py +++ b/webapp/apps/users/models.py @@ -1,5 +1,5 @@ from collections import defaultdict -from datetime import timedelta +from datetime import timedelta, datetime import json import secrets import uuid @@ -182,28 +182,69 @@ def default(self): return self.get(service_account__user__username=DEFAULT_CLUSTER_USER) +class ClusterLoginException(Exception): + pass + + class Cluster(models.Model): url = models.URLField(max_length=64) - jwt_secret = models.CharField(max_length=512, null=True) service_account = models.OneToOneField( Profile, null=True, on_delete=models.SET_NULL ) created_at = models.DateTimeField(auto_now_add=True) deleted_at = models.DateTimeField(null=True) + # v0 + jwt_secret = models.CharField(max_length=512, null=True) + + # v1 + cluster_password = models.CharField(max_length=512, null=True) + access_token = models.CharField(max_length=512, null=True) + access_token_expires_at = models.DateTimeField(null=True) + + version = models.CharField(null=False, max_length=32) + objects = ClusterManager() - def headers(self): - jwt_token = jwt.encode( - {"username": self.service_account.user.username,}, - cryptkeeper.decrypt(self.jwt_secret), + def ensure_access_token(self): + missing_token = self.access_token is None + is_expired = ( + self.access_token_expires_at is not None + and self.access_token_expires_at < (timezone.now() - timedelta(seconds=60)) ) - return { - "Authorization": jwt_token, - "Cluster-User": self.service_account.user.username, - } + + if missing_token or is_expired: + resp = requests.post( + f"{self.url}/api/v1/login/access-token", + data={ + "username": str(self.service_account), + "password": self.cluster_password, + }, + ) + if resp.status_code != 200: + raise ClusterLoginException() + data = resp.json() + self.access_token = data["access_token"] + self.access_token_expires_at = datetime.fromisoformat(data["expires_at"]) + self.save() + self.refresh_from_db() + + def headers(self): + if self.version == "v0": + jwt_token = jwt.encode( + {"username": self.service_account.user.username,}, + cryptkeeper.decrypt(self.jwt_secret), + ) + return { + "Authorization": jwt_token, + "Cluster-User": self.service_account.user.username, + } + elif self.version == "v1": + self.ensure_access_token() + return {"Authorization": f"Bearer {self.access_token}"} def create_user_in_cluster(self, cs_url): + # only works for v0. resp = requests.post( f"{self.url}/auth/", json={ @@ -479,21 +520,10 @@ def version(self): return None if self.status != "running": return None - try: - success, result = SyncCompute().submit_job( - project=self, task_name=actions.VERSION, task_kwargs=dict() - ) - if success: - return result["version"] - else: - print(f"error retrieving version for {self}", result) - return None - except Exception as e: - print(f"error retrieving version for {self}", e) - import traceback - - traceback.print_exc() - return None + if self.latest_tag: + return self.latest_tag.version + if self.staging_tag: + return self.staging_tag.version def is_owner(self, user): return user == self.owner.user @@ -639,6 +669,7 @@ class Tag(models.Model): cpu = models.DecimalField(max_digits=5, decimal_places=1, null=True, default=2) memory = models.DecimalField(max_digits=5, decimal_places=1, null=True, default=6) created_at = models.DateTimeField(auto_now_add=True) + version = models.CharField(max_length=255, null=True) def __str__(self): return str(self.image_tag) diff --git a/webapp/apps/users/serializers.py b/webapp/apps/users/serializers.py index bd3f2e34..809c822a 100644 --- a/webapp/apps/users/serializers.py +++ b/webapp/apps/users/serializers.py @@ -154,6 +154,7 @@ class Meta: class TagUpdateSerializer(serializers.Serializer): latest_tag = serializers.CharField(allow_null=True, required=False) staging_tag = serializers.CharField(allow_null=True, required=False) + version = serializers.CharField(allow_null=True, required=False) def validate(self, attrs): if attrs.get("latest_tag") is None and attrs.get("staging_tag") is None: @@ -165,6 +166,7 @@ def validate(self, attrs): class TagSerializer(serializers.ModelSerializer): project = serializers.StringRelatedField() + version = serializers.CharField(allow_null=True, required=False) class Meta: model = Tag @@ -174,6 +176,7 @@ class Meta: "memory", "cpu", "created_at", + "version", ) read_only = ( diff --git a/webapp/urls.py b/webapp/urls.py index 58a99f38..c263b9b5 100644 --- a/webapp/urls.py +++ b/webapp/urls.py @@ -42,6 +42,11 @@ ), path("outputs/api/", compviews.OutputsAPIView.as_view(), name="outputs_api"), path("inputs/api/", compviews.MyInputsAPIView.as_view(), name="myinputs_api"), + path( + "model-config/api/", + compviews.ModelConfigAPIView.as_view(), + name="modelconfig_api", + ), url(r"^rest-auth/", include("rest_auth.urls")), url(r"^rest-auth/registration/", include("rest_auth.registration.urls")), path("api/v1/sims", compviews.UserSimsAPIView.as_view(), name="sim_api"), diff --git a/workers/cs_workers/cli.py b/workers/cs_workers/cli.py index 41d0fbc5..9476e2af 100644 --- a/workers/cs_workers/cli.py +++ b/workers/cs_workers/cli.py @@ -8,11 +8,13 @@ from cs_deploy.config import workers_config as config import cs_workers.services.manage -import cs_workers.services.scheduler + +# import cs_workers.services.scheduler import cs_workers.services.outputs_processor import cs_workers.models.manage import cs_workers.models.executors.job -import cs_workers.models.executors.api_task + +# import cs_workers.models.executors.api_task import cs_workers.models.executors.server @@ -31,11 +33,11 @@ def cli(subparsers: argparse._SubParsersAction = None): sub_parsers = parser.add_subparsers() cs_workers.services.manage.cli(sub_parsers, config=config) - cs_workers.services.scheduler.cli(sub_parsers) - cs_workers.services.outputs_processor.cli(sub_parsers) + # cs_workers.services.scheduler.cli(sub_parsers) + # cs_workers.services.outputs_processor.cli(sub_parsers) cs_workers.models.manage.cli(sub_parsers) cs_workers.models.executors.job.cli(sub_parsers) - cs_workers.models.executors.api_task.cli(sub_parsers) + # cs_workers.models.executors.api_task.cli(sub_parsers) cs_workers.models.executors.server.cli(sub_parsers) if subparsers is None: diff --git a/workers/cs_workers/dockerfiles/Dockerfile.model b/workers/cs_workers/dockerfiles/Dockerfile.model index ada23f19..87ee9563 100644 --- a/workers/cs_workers/dockerfiles/Dockerfile.model +++ b/workers/cs_workers/dockerfiles/Dockerfile.model @@ -30,3 +30,9 @@ RUN if test -f "./cs-config/setup.py"; then pip install -e ./cs-config; fi EXPOSE 8010 ENV PORT=8010 ENV HOST=0.0.0.0 + +COPY ./secrets ./secrets +COPY ./deploy ./deploy +COPY ./workers ./workers + +RUN pip install -e ./secrets && pip install -e ./deploy && pip install -e ./workers diff --git a/workers/cs_workers/models/clients/api_task.py b/workers/cs_workers/models/clients/api_task.py deleted file mode 100644 index 0f1a529f..00000000 --- a/workers/cs_workers/models/clients/api_task.py +++ /dev/null @@ -1,26 +0,0 @@ -import httpx - - -from cs_workers.utils import clean - - -class APITask: - def __init__(self, owner, title, task_id, task_name, **task_kwargs): - self.owner = owner - self.title = title - self.task_id = task_id - self.task_name = task_name - self.task_kwargs = task_kwargs - - async def create(self, asynchronous=False): - method = "async" if asynchronous else "sync" - async with httpx.AsyncClient() as client: - resp = await client.post( - f"http://{clean(self.owner)}-{clean(self.title)}-api-task/{method}/", - json={ - "task_id": self.task_id, - "task_name": self.task_name, - "task_kwargs": self.task_kwargs, - }, - ) - return resp diff --git a/workers/cs_workers/models/clients/job.py b/workers/cs_workers/models/clients/job.py index f1de2e0e..87af101c 100644 --- a/workers/cs_workers/models/clients/job.py +++ b/workers/cs_workers/models/clients/job.py @@ -24,33 +24,31 @@ def __init__( title, tag, model_config, - job_id=None, - job_kwargs=None, + job_id, + callback_url, + route_name="sim", cr="gcr.io", incluster=True, - rclient=None, quiet=True, + namespace="default", ): self.project = project self.owner = owner self.title = title self.tag = tag self.model_config = model_config + print(self.model_config) self.cr = cr self.quiet = quiet + self.namespace = namespace self.incluster = incluster - if rclient is None: - self.rclient = redis.Redis(**redis_conn) - else: - self.rclient = rclient if self.incluster: kconfig.load_incluster_config() else: kconfig.load_kube_config() self.api_client = kclient.BatchV1Api() - self.job = self.configure(owner, title, tag, job_id) - self.save_job_kwargs(self.job_id, job_kwargs) + self.job = self.configure(owner, title, tag, job_id, callback_url, route_name) def env(self, owner, title, config): safeowner = clean(owner) @@ -60,22 +58,22 @@ def env(self, owner, title, config): kclient.V1EnvVar("TITLE", title), kclient.V1EnvVar("EXP_TASK_TIME", str(config["exp_task_time"])), ] - for sec in [ - "BUCKET", - "REDIS_HOST", - "REDIS_PORT", - "REDIS_EXECUTOR_PW", - ]: - envs.append( - kclient.V1EnvVar( - sec, - value_from=kclient.V1EnvVarSource( - secret_key_ref=( - kclient.V1SecretKeySelector(key=sec, name="worker-secret") - ) - ), - ) - ) + # for sec in [ + # "BUCKET", + # "REDIS_HOST", + # "REDIS_PORT", + # "REDIS_EXECUTOR_PW", + # ]: + # envs.append( + # kclient.V1EnvVar( + # sec, + # value_from=kclient.V1EnvVarSource( + # secret_key_ref=( + # kclient.V1SecretKeySelector(key=sec, name="worker-secret") + # ) + # ), + # ) + # ) for secret in ModelSecrets( owner=owner, title=title, project=self.project @@ -94,11 +92,8 @@ def env(self, owner, title, config): ) return envs - def configure(self, owner, title, tag, job_id=None): - if job_id is None: - job_id = str(uuid.uuid4()) - else: - job_id = str(job_id) + def configure(self, owner, title, tag, job_id, callback_url, route_name): + job_id = str(job_id) config = self.model_config @@ -108,7 +103,14 @@ def configure(self, owner, title, tag, job_id=None): container = kclient.V1Container( name=job_id, image=f"{self.cr}/{self.project}/{safeowner}_{safetitle}_tasks:{tag}", - command=["csw", "job", "--job-id", job_id, "--route-name", "sim"], + command=[ + "csw", + "job", + "--callback-url", + callback_url, + "--route-name", + route_name, + ], env=self.env(owner, title, config), resources=kclient.V1ResourceRequirements(**config["resources"]), ) @@ -140,18 +142,15 @@ def configure(self, owner, title, tag, job_id=None): return job - def save_job_kwargs(self, job_id, job_kwargs): - if not job_id.startswith("job-"): - job_id = f"job-{job_id}" - self.rclient.set(job_id, json.dumps(job_kwargs)) - def create(self): - return self.api_client.create_namespaced_job(body=self.job, namespace="default") + return self.api_client.create_namespaced_job( + body=self.job, namespace=self.namespace + ) def delete(self): return self.api_client.delete_namespaced_job( name=self.job.metadata.name, - namespace="default", + namespace=self.namespace, body=kclient.V1DeleteOptions(), ) diff --git a/workers/cs_workers/models/executors/job.py b/workers/cs_workers/models/executors/job.py index a0ad740e..6bf6b44c 100644 --- a/workers/cs_workers/models/executors/job.py +++ b/workers/cs_workers/models/executors/job.py @@ -1,50 +1,46 @@ import argparse import asyncio -import functools -import json -import os -import redis import httpx import cs_storage -from cs_workers.models.executors.task_wrapper import async_task_wrapper +from cs_workers.models.executors.task_wrapper import task_wrapper - -def sim_handler(task_id, meta_param_dict, adjustment): +try: from cs_config import functions +except ImportError: + functions = None + + +def version(**task_kwargs): + return {"version": functions.get_version()} + + +def defaults(meta_param_dict=None, **task_kwargs): + return functions.get_inputs(meta_param_dict) + +def parse(meta_param_dict, adjustment, errors_warnings): + return functions.validate_inputs(meta_param_dict, adjustment, errors_warnings) + + +def sim(meta_param_dict, adjustment): outputs = functions.run_model(meta_param_dict, adjustment) print("got result") - outputs = cs_storage.serialize_to_json(outputs) - print("storing results") - for i in range(3): - try: - resp = httpx.post( - "http://outputs-processor/write/", - json={"task_id": task_id, "outputs": outputs}, - timeout=120.0, - ) - break - except Exception as e: - print(i, e) - - print("got resp", resp.status_code, resp.url) - assert resp.status_code == 200, f"Got code: {resp.status_code}" - return resp.json() + return cs_storage.serialize_to_json(outputs) -routes = {"sim": sim_handler} +routes = {"version": version, "defaults": defaults, "parse": parse, "sim": sim} def main(args: argparse.Namespace): asyncio.run( - async_task_wrapper(args.job_id, args.route_name, routes[args.route_name]) + task_wrapper(args.callback_url, args.route_name, routes[args.route_name]) ) def cli(subparsers: argparse._SubParsersAction): parser = subparsers.add_parser("job", description="CLI for C/S jobs.") - parser.add_argument("--job-id", "-t", required=True) - parser.add_argument("--route-name", "-r", required=True) + parser.add_argument("--callback-url", required=True) + parser.add_argument("--route-name", required=True) parser.set_defaults(func=main) diff --git a/workers/cs_workers/models/executors/task_wrapper.py b/workers/cs_workers/models/executors/task_wrapper.py index 8dccae06..c6cb092e 100644 --- a/workers/cs_workers/models/executors/task_wrapper.py +++ b/workers/cs_workers/models/executors/task_wrapper.py @@ -1,72 +1,32 @@ -import functools -import json -import os -import re import time import traceback -import redis import httpx import cs_storage -from cs_workers.utils import redis_conn_from_env - - -redis_conn = dict( - username="executor", - password=os.environ.get("REDIS_EXECUTOR_PW"), - **redis_conn_from_env(), -) - try: from cs_config import functions except ImportError as ie: - # if os.environ.get("IS_FLASK", "False") == "True": - # functions = None - # else: - # raise ie pass -async def sync_task_wrapper(task_id, task_name, func, task_kwargs=None): - print("sync task", task_id, func, task_kwargs) - start = time.time() - traceback_str = None - res = {} - try: - outputs = func(task_id, **(task_kwargs or {})) - res.update(outputs) - except Exception: - traceback_str = traceback.format_exc() - finish = time.time() - if "meta" not in res: - res["meta"] = {} - res["meta"]["task_times"] = [finish - start] - if traceback_str is None: - res["status"] = "SUCCESS" - else: - res["status"] = "FAIL" - res["traceback"] = traceback_str - return res - - -async def async_task_wrapper(task_id, task_name, func, task_kwargs=None): - print("async task", task_id, func, task_kwargs) +async def task_wrapper(callback_url, task_name, func, task_kwargs=None): + print("async task", callback_url, func, task_kwargs) start = time.time() traceback_str = None - res = {"task_id": task_id} + res = { + "task_name": task_name, + } try: if task_kwargs is None: - if not task_id.startswith("job-"): - _task_id = f"job-{task_id}" - else: - _task_id = task_id - with redis.Redis(**redis_conn) as rclient: - task_kwargs = rclient.get(_task_id) - if task_kwargs is not None: - task_kwargs = json.loads(task_kwargs.decode()) - outputs = func(task_id, **(task_kwargs or {})) + print("getting task_kwargs") + async with httpx.AsyncClient() as client: + resp = await client.get(callback_url) + resp.raise_for_status() + task_kwargs = resp.json()["inputs"] + print("got task_kwargs", task_kwargs) + outputs = func(**(task_kwargs or {})) res.update( { "model_version": functions.get_version(), @@ -76,22 +36,24 @@ async def async_task_wrapper(task_id, task_name, func, task_kwargs=None): ) except Exception: traceback_str = traceback.format_exc() + finish = time.time() + if "meta" not in res: res["meta"] = {} res["meta"]["task_times"] = [finish - start] + if traceback_str is None: res["status"] = "SUCCESS" else: res["status"] = "FAIL" res["traceback"] = traceback_str + print("saving results...") async with httpx.AsyncClient() as client: - resp = await client.post( - "http://outputs-processor/push/", - json={"task_name": task_name, "result": res}, - ) + resp = await client.post(callback_url, json=res, timeout=120) + print("resp", resp.status_code, resp.url) - assert resp.status_code == 200, f"Got code: {resp.status_code}" + assert resp.status_code in (200, 201), f"Got code: {resp.status_code} ({resp.text})" return res diff --git a/workers/cs_workers/models/manage.py b/workers/cs_workers/models/manage.py index 41a9e7c8..7aa853d2 100644 --- a/workers/cs_workers/models/manage.py +++ b/workers/cs_workers/models/manage.py @@ -24,6 +24,13 @@ BASE_PATH = CURR_PATH / ".." +def strip_secrets(line, secrets): + line = line.decode() + for name, value in secrets.items(): + line = line.replace(name, "******").replace(value, "******") + return line.strip("\n") + + class BaseManager: def __init__(self, project, cs_url, cs_api_token): self.project = project @@ -152,9 +159,12 @@ def stage(self): def promote(self): self.apply_method_to_apps(method=self.promote_app) + def version(self): + self.apply_method_to_apps(method=self.get_version) + def write_app_config(self): self.apply_method_to_apps(method=self.write_secrets) - self.apply_method_to_apps(method=self._write_api_task) + # self.apply_method_to_apps(method=self._write_api_task) def apply_method_to_apps(self, method): """ @@ -266,12 +276,6 @@ def test_app_image(self, app): try: - def strip_secrets(line, secrets): - line = line.decode() - for name, value in secrets.items(): - line = line.replace(name, "******").replace(value, "******") - return line.strip("\n") - def stream_logs(container): for line in container.logs(stream=True): print(strip_secrets(line, secrets)) @@ -357,10 +361,41 @@ def push_app_image(self, app): run(f"{cmd_prefix} {self.cr}/{self.project}/{img_name}:{tag}") + def get_version(self, app, print_stdout=True): + safeowner = clean(app["owner"]) + safetitle = clean(app["title"]) + img_name = f"{safeowner}_{safetitle}_tasks" + + app_version = None + if app["tech"] == "python-paramtools": + secrets = self.config._list_secrets(app) + client = docker.from_env() + container = client.containers.run( + f"{img_name}:{self.tag}", + [ + "python", + "-c", + "from cs_config import functions; print(functions.get_version())", + ], + environment=secrets, + detach=True, + ports=None, + ) + logs = [] + for line in container.logs(stream=True): + logs.append(strip_secrets(line, secrets)) + app_version = logs[0] if logs else None + + if app_version and print_stdout: + sys.stdout.write(app_version) + + return app_version + def stage_app(self, app): + app_version = self.get_version(app, print_stdout=False) resp = httpx.post( f"{self.config.cs_url}/apps/api/v1/{app['owner']}/{app['title']}/tags/", - json={"staging_tag": self.staging_tag}, + json={"staging_tag": self.staging_tag, "version": app_version}, headers={"Authorization": f"Token {self.cs_api_token}"}, ) assert ( @@ -378,9 +413,16 @@ def promote_app(self, app): resp.status_code == 200 ), f"Got: {resp.url} {resp.status_code} {resp.text}" staging_tag = resp.json()["staging_tag"]["image_tag"] + app_version = resp.json()["staging_tag"]["version"] + if app_version is None: + app_version = self.get_version(app, print_stdout=False) resp = httpx.post( f"{self.config.cs_url}/apps/api/v1/{app['owner']}/{app['title']}/tags/", - json={"latest_tag": staging_tag or self.tag, "staging_tag": None}, + json={ + "latest_tag": staging_tag or self.tag, + "staging_tag": None, + "version": app_version, + }, headers={"Authorization": f"Token {self.cs_api_token}"}, ) assert ( @@ -584,6 +626,20 @@ def stage(args: argparse.Namespace): manager.stage() +def version(args: argparse.Namespace): + manager = Manager( + project=args.project, + tag=args.tag, + cs_url=getattr(args, "cs_url", None) or workers_config["CS_URL"], + cs_api_token=getattr(args, "cs_api_token", None), + models=args.names, + base_branch=args.base_branch, + cr=args.cr, + staging_tag=getattr(args, "staging_tag", None), + ) + manager.version() + + def cli(subparsers: argparse._SubParsersAction): parser = subparsers.add_parser( "models", description="Deploy and manage models on C/S compute cluster." @@ -620,6 +676,9 @@ def cli(subparsers: argparse._SubParsersAction): promote_parser = model_subparsers.add_parser("promote") promote_parser.set_defaults(func=promote) + version_parser = model_subparsers.add_parser("version") + version_parser.set_defaults(func=version) + secrets.cli(model_subparsers) parser.set_defaults(func=lambda args: print(args)) diff --git a/workers/cs_workers/services/api/alembic/versions/6bc6aaccc77a_init.py b/workers/cs_workers/services/api/alembic/versions/07a0d0f7e1b4_init.py similarity index 75% rename from workers/cs_workers/services/api/alembic/versions/6bc6aaccc77a_init.py rename to workers/cs_workers/services/api/alembic/versions/07a0d0f7e1b4_init.py index 4b4c45b6..d7efbd4e 100644 --- a/workers/cs_workers/services/api/alembic/versions/6bc6aaccc77a_init.py +++ b/workers/cs_workers/services/api/alembic/versions/07a0d0f7e1b4_init.py @@ -1,16 +1,16 @@ """Init -Revision ID: 6bc6aaccc77a +Revision ID: 07a0d0f7e1b4 Revises: -Create Date: 2021-03-02 03:27:17.375165+00:00 +Create Date: 2021-03-21 18:17:39.921958+00:00 """ from alembic import op import sqlalchemy as sa - +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = "6bc6aaccc77a" +revision = "07a0d0f7e1b4" down_revision = None branch_labels = None depends_on = None @@ -28,6 +28,10 @@ def upgrade(): sa.Column("is_active", sa.Boolean(), nullable=True), sa.Column("is_superuser", sa.Boolean(), nullable=True), sa.Column("is_approved", sa.Boolean(), nullable=True), + sa.Column("client_id", sa.String(), nullable=True), + sa.Column("client_secret", sa.String(), nullable=True), + sa.Column("access_token", sa.String(), nullable=True), + sa.Column("access_token_expires_at", sa.DateTime(), nullable=True), sa.PrimaryKeyConstraint("id"), ) op.create_index(op.f("ix_users_email"), "users", ["email"], unique=True) @@ -35,13 +39,18 @@ def upgrade(): op.create_index(op.f("ix_users_username"), "users", ["username"], unique=False) op.create_table( "jobs", - sa.Column("id", sa.String(), nullable=False), - sa.Column("owner_id", sa.Integer(), nullable=True), + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("user_id", sa.Integer(), nullable=True), + sa.Column("name", sa.String(), nullable=True), sa.Column("created_at", sa.DateTime(), nullable=True), - sa.ForeignKeyConstraint(["owner_id"], ["users.id"],), + sa.Column("finished_at", sa.DateTime(), nullable=True), + sa.Column("status", sa.String(), nullable=True), + sa.Column("inputs", sa.JSON(), nullable=True), + sa.Column("outputs", sa.JSON(), nullable=True), + sa.Column("tag", sa.String(), nullable=True), + sa.ForeignKeyConstraint(["user_id"], ["users.id"],), sa.PrimaryKeyConstraint("id"), ) - op.create_index(op.f("ix_jobs_id"), "jobs", ["id"], unique=False) op.create_table( "projects", sa.Column("id", sa.Integer(), nullable=False), @@ -71,7 +80,6 @@ def downgrade(): op.drop_index(op.f("ix_projects_owner"), table_name="projects") op.drop_index(op.f("ix_projects_id"), table_name="projects") op.drop_table("projects") - op.drop_index(op.f("ix_jobs_id"), table_name="jobs") op.drop_table("jobs") op.drop_index(op.f("ix_users_username"), table_name="users") op.drop_index(op.f("ix_users_id"), table_name="users") diff --git a/workers/cs_workers/services/api/alembic/versions/f027333560c0_fix_cpu_and_memory_types.py b/workers/cs_workers/services/api/alembic/versions/f027333560c0_fix_cpu_and_memory_types.py new file mode 100644 index 00000000..474a04c4 --- /dev/null +++ b/workers/cs_workers/services/api/alembic/versions/f027333560c0_fix_cpu_and_memory_types.py @@ -0,0 +1,54 @@ +"""Fix cpu and memory types + +Revision ID: f027333560c0 +Revises: 07a0d0f7e1b4 +Create Date: 2021-03-22 14:27:21.523743+00:00 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "f027333560c0" +down_revision = "07a0d0f7e1b4" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "projects", + "cpu", + existing_type=sa.INTEGER(), + type_=sa.Float(), + existing_nullable=True, + ) + op.alter_column( + "projects", + "memory", + existing_type=sa.INTEGER(), + type_=sa.Float(), + existing_nullable=True, + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "projects", + "memory", + existing_type=sa.Float(), + type_=sa.INTEGER(), + existing_nullable=True, + ) + op.alter_column( + "projects", + "cpu", + existing_type=sa.Float(), + type_=sa.INTEGER(), + existing_nullable=True, + ) + # ### end Alembic commands ### diff --git a/workers/cs_workers/services/api/dependencies.py b/workers/cs_workers/services/api/dependencies.py index 088feddd..752949fa 100644 --- a/workers/cs_workers/services/api/dependencies.py +++ b/workers/cs_workers/services/api/dependencies.py @@ -18,7 +18,6 @@ def get_db() -> Generator: try: - print("NOOOOOOOOOOO") db = SessionLocal() yield db finally: @@ -28,6 +27,7 @@ def get_db() -> Generator: def get_current_user( db: Session = Depends(get_db), token: str = Depends(reusable_oauth2) ) -> models.User: + print("get_current_user") try: payload = jwt.decode( token, settings.API_SECRET_KEY, algorithms=[security.ALGORITHM] @@ -47,6 +47,7 @@ def get_current_user( def get_current_active_user( current_user: models.User = Depends(get_current_user), ) -> models.User: + print("get_current_active_user") if not current_user.is_active: raise HTTPException(status_code=400, detail="Inactive user") return current_user diff --git a/workers/cs_workers/services/api/main.py b/workers/cs_workers/services/api/main.py index c6554d30..97264c75 100644 --- a/workers/cs_workers/services/api/main.py +++ b/workers/cs_workers/services/api/main.py @@ -2,7 +2,7 @@ from starlette.middleware.cors import CORSMiddleware from .settings import settings -from .routers import users, login, projects +from .routers import users, login, projects, simulations app = FastAPI( title=settings.PROJECT_NAME, openapi_url=f"{settings.API_PREFIX_STR}/openapi.json", @@ -21,3 +21,4 @@ app.include_router(login.router, prefix=settings.API_PREFIX_STR) app.include_router(users.router, prefix=settings.API_PREFIX_STR) app.include_router(projects.router, prefix=settings.API_PREFIX_STR) +app.include_router(simulations.router, prefix=settings.API_PREFIX_STR) diff --git a/workers/cs_workers/services/api/models.py b/workers/cs_workers/services/api/models.py index 6ca55735..e181f991 100644 --- a/workers/cs_workers/services/api/models.py +++ b/workers/cs_workers/services/api/models.py @@ -1,4 +1,16 @@ -from sqlalchemy import Boolean, Column, ForeignKey, Integer, String, DateTime +import uuid + +from sqlalchemy import ( + Boolean, + Column, + ForeignKey, + Integer, + String, + DateTime, + JSON, + Float, +) +from sqlalchemy.dialects.postgresql import UUID from sqlalchemy.orm import relationship from sqlalchemy.schema import UniqueConstraint @@ -18,17 +30,28 @@ class User(Base): is_superuser = Column(Boolean(), default=False) is_approved = Column(Boolean(), default=False) - jobs = relationship("Job", back_populates="owner") + client_id = Column(String) + client_secret = Column(String) + access_token = Column(String) + access_token_expires_at = Column(DateTime) + + jobs = relationship("Job", back_populates="user") projects = relationship("Project", back_populates="user") class Job(Base): __tablename__ = "jobs" - id = Column(String, primary_key=True, index=True) - owner_id = Column(Integer, ForeignKey("users.id")) + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + user_id = Column(Integer, ForeignKey("users.id")) + name = Column(String) created_at = Column(DateTime) + finished_at = Column(DateTime) + status = Column(String) + inputs = Column(JSON) + outputs = Column(JSON) + tag = Column(String) - owner = relationship("User", back_populates="jobs") + user = relationship("User", back_populates="jobs") class Project(Base): @@ -40,8 +63,8 @@ class Project(Base): tech = Column(String, nullable=False) callable_name = Column(String) exp_task_time = Column(String, nullable=False) - cpu = Column(Integer) - memory = Column(Integer) + cpu = Column(Float) + memory = Column(Float) user = relationship("User", back_populates="projects") diff --git a/workers/cs_workers/services/api/routers/projects.py b/workers/cs_workers/services/api/routers/projects.py index 81c6ea97..8e862d11 100644 --- a/workers/cs_workers/services/api/routers/projects.py +++ b/workers/cs_workers/services/api/routers/projects.py @@ -5,10 +5,10 @@ from .. import models, schemas, dependencies as deps -router = APIRouter() +router = APIRouter(prefix="/projects", tags=["projects"]) -@router.post("/projects/sync/", response_model=List[schemas.Project], status_code=200) +@router.post("/sync/", response_model=List[schemas.Project], status_code=200) def sync_projects( projects: List[schemas.ProjectSync] = Body(...), db: Session = Depends(deps.get_db), @@ -33,9 +33,8 @@ def sync_projects( return orm_projects -@router.get("/projects/", response_model=List[schemas.Project], status_code=200) +@router.get("/", response_model=List[schemas.Project], status_code=200) def get_projects( - projects=List[schemas.Project], db: Session = Depends(deps.get_db), user: schemas.User = Depends(deps.get_current_active_user), ): diff --git a/workers/cs_workers/services/api/routers/simulations.py b/workers/cs_workers/services/api/routers/simulations.py index e69de29b..4c254d85 100644 --- a/workers/cs_workers/services/api/routers/simulations.py +++ b/workers/cs_workers/services/api/routers/simulations.py @@ -0,0 +1,166 @@ +from datetime import datetime +import math +import os +import uuid + +import httpx +from fastapi import APIRouter, Depends, Body, HTTPException +from sqlalchemy.orm import Session + +from cs_workers.models.clients import job +from .. import models, schemas, dependencies as deps, security + +incluster = os.environ.get("KUBERNETES_SERVICE_HOST", False) is not False +assert not incluster + +PROJECT = os.environ.get("PROJECT") + + +router = APIRouter(prefix="/jobs", tags=["jobs"]) + + +@router.get("/callback/{job_id}/", status_code=201, response_model=schemas.Job) +def job_callback(job_id: str, db: Session = Depends(deps.get_db)): + instance: models.Job = db.query(models.Job).filter( + models.Job.id == job_id + ).one_or_none() + if instance is None: + raise HTTPException(status_code=404, detail="Job not found.") + print(instance.finished_at) + if instance.finished_at: + raise HTTPException( + status_code=403, detail="No permission to retrieve job once it's finished." + ) + + if instance.status == "CREATED": + instance.status = "RUNNING" + db.add(instance) + db.commit() + db.refresh(instance) + + print(instance.inputs) + + return instance + + +@router.post("/callback/{job_id}/", status_code=201, response_model=schemas.Job) +async def finish_job( + job_id: str, + task: schemas.TaskComplete = Body(...), + db: Session = Depends(deps.get_db), +): + print("got data for ", job_id) + print(task.dict()) + instance = db.query(models.Job).filter(models.Job.id == job_id).one_or_none() + if instance is None: + raise HTTPException(status_code=404, detail="Job not found.") + + if instance.finished_at: + raise HTTPException(status_code=400, detail="Job already marked as complete.") + + instance.outputs = task.outputs + instance.status = task.status + instance.finished_at = datetime.utcnow() + + db.add(instance) + db.commit() + db.refresh(instance) + + user = instance.user + await security.ensure_cs_access_token(db, user) + async with httpx.AsyncClient() as client: + resp = await client.post( + f"http://localhost:5001/{job_id}/", + json={ + "url": user.url, + "headers": {"Authorization": f"Bearer {user.access_token}"}, + "task": task.dict(), + }, + ) + print(resp.text) + resp.raise_for_status() + + return instance + + +# @router.get("/{job_id}/", status_code=201, response_model=schemas.Job) +# def get_job( +# job_id: str, +# db: Session = Depends(deps.get_db), +# user: schemas.User = Depends(deps.get_current_active_user), +# ): +# instance = db.query(models.Job).filter(models.Job.id == job_id).one_or_none() +# if instance is None: +# raise HTTPException(status_code=404, detail="Job not found.") + +# return instance + + +@router.post("/{owner}/{title}/", response_model=schemas.Job, status_code=201) +def create_job( + owner: str, + title: str, + task: schemas.Task = Body(...), + db: Session = Depends(deps.get_db), + user: schemas.User = Depends(deps.get_current_active_user), +): + print(owner, title) + print(task.task_kwargs) + project = ( + db.query(models.Project) + .filter( + models.Project.owner == owner, + models.Project.title == title, + models.Project.user_id == user.id, + ) + .one_or_none() + ) + + if not project: + raise HTTPException(status_code=404, detail="Project not found.") + + task_name, _, task_kwargs, tag = ( + task.task_name, + task.task_id, + task.task_kwargs, + task.tag, + ) + + instance = models.Job( + user_id=user.id, + name=task_name, + created_at=datetime.utcnow(), + finished_at=None, + inputs=task_kwargs, + tag=tag, + status="CREATED", + ) + db.add(instance) + db.commit() + db.refresh(instance) + + project_data = schemas.Project.from_orm(project).dict() + mem = float(project_data.pop("memory")) + cpu = float(project_data.pop("cpu")) + if cpu and mem: + project_data["resources"] = { + "requests": {"memory": f"{mem}G", "cpu": cpu}, + "limits": {"memory": f"{math.ceil(mem * 1.2)}G", "cpu": cpu,}, + } + + client = job.Job( + PROJECT, + owner, + title, + tag=tag, + model_config=project_data, + job_id=instance.id, + callback_url=f"https://hdoupe.ngrok.io/api/v1/jobs/callback/{instance.id}/", + route_name=task_name, + incluster=incluster, + namespace="worker-api", + ) + + client.create() + + return instance diff --git a/workers/cs_workers/services/api/routers/users.py b/workers/cs_workers/services/api/routers/users.py index ff7bb690..bcb34b83 100644 --- a/workers/cs_workers/services/api/routers/users.py +++ b/workers/cs_workers/services/api/routers/users.py @@ -1,5 +1,7 @@ +import base64 from typing import Any +import httpx from fastapi import APIRouter, Body, Depends, HTTPException from sqlalchemy.orm import Session from pydantic.networks import EmailStr, AnyHttpUrl # pylint: disable=no-name-in-module @@ -28,6 +30,8 @@ def create_user( email: EmailStr = Body(...), url: AnyHttpUrl = Body(...), username: str = Body(None), + client_id: str = Body(...), + client_secret: str = Body(...), ) -> models.User: """ Create new user. @@ -46,6 +50,8 @@ def create_user( username=user_in.username, url=user_in.url, hashed_password=security.get_password_hash(user_in.password), + client_id=client_id, + client_secret=client_secret, ) db.add(user_db) db.commit() @@ -53,6 +59,15 @@ def create_user( return user_db +@router.get("/ping/", status_code=200) +async def ping( + *, + db: Session = Depends(deps.get_db), + current_user: models.User = Depends(deps.get_current_active_user), +): + await security.ensure_cs_access_token(db, current_user) + + @router.post("/approve/", response_model=schemas.User) def approve_user( *, diff --git a/workers/cs_workers/services/api/schemas.py b/workers/cs_workers/services/api/schemas.py index a0c9090e..40b65a65 100644 --- a/workers/cs_workers/services/api/schemas.py +++ b/workers/cs_workers/services/api/schemas.py @@ -1,14 +1,23 @@ from datetime import datetime -from typing import List, Optional, Dict +from typing import List, Optional, Dict, Optional, Any from enum import Enum +import uuid -from pydantic import BaseModel # pylint: disable=no-name-in-module +from pydantic import BaseModel, Json # pylint: disable=no-name-in-module from pydantic.networks import EmailStr, AnyHttpUrl # pylint: disable=no-name-in-module class JobBase(BaseModel): - owner_id: int + user_id: int created_at: datetime + name: str + created_at: datetime + finished_at: Optional[datetime] + status: str + inputs: Optional[Dict] + outputs: Optional[Dict] + traceback: Optional[str] + tag: str class JobCreate(JobBase): @@ -16,12 +25,29 @@ class JobCreate(JobBase): class Job(JobBase): - id: int + id: uuid.UUID class Config: orm_mode = True +class TaskComplete(BaseModel): + model_version: Optional[str] + outputs: Optional[Dict] + traceback: Optional[str] + version: Optional[str] + meta: Dict # Dict[str, str] + status: str + task_name: str + + +class Task(BaseModel): + task_id: Optional[str] + task_name: str + task_kwargs: Dict # Dict[str, str] + tag: str + + # Shared properties class UserBase(BaseModel): email: Optional[EmailStr] = None @@ -71,17 +97,24 @@ class TokenPayload(BaseModel): sub: Optional[int] = None +class CSOauthResponse(BaseModel): + access_token: str + expires_in: int + token_type: str + scope: str + + class ProjectSync(BaseModel): owner: str title: str tech: str - callable_name: str + callable_name: Optional[str] exp_task_time: int - cpu: int - memory: int + cpu: float + memory: float -class Project(BaseModel): +class Project(ProjectSync): id: int class Config: diff --git a/workers/cs_workers/services/api/security.py b/workers/cs_workers/services/api/security.py index 80d1462e..bc79d2a7 100644 --- a/workers/cs_workers/services/api/security.py +++ b/workers/cs_workers/services/api/security.py @@ -1,10 +1,16 @@ from datetime import datetime, timedelta from typing import Any, Union +import httpx from jose import jwt from passlib.context import CryptContext +from sqlalchemy.orm import Session + +from fastapi import HTTPException from .settings import settings +from . import schemas +from . import models pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") @@ -32,3 +38,32 @@ def verify_password(plain_password: str, hashed_password: str) -> bool: def get_password_hash(password: str) -> str: return pwd_context.hash(password) + + +async def ensure_cs_access_token(db: Session, user: models.User): + missing_token = user.access_token is None + is_expired = ( + user.access_token_expires_at is not None + and user.access_token_expires_at < (datetime.utcnow() - timedelta(seconds=60)) + ) + if missing_token or is_expired: + async with httpx.AsyncClient() as client: + resp = await client.post( + f"{user.url}/o/token/", + data={ + "grant_type": "client_credentials", + "client_id": user.client_id, + "client_secret": user.client_secret, + }, + ) + if resp.status_code != 200: + raise HTTPException(status_code=400, detail=resp.text) + data = schemas.CSOauthResponse(**resp.json()) + user.access_token = data.access_token + user.access_token_expires_at = datetime.utcnow() + timedelta( + seconds=data.expires_in + ) + db.add(user) + db.commit() + db.refresh(user) + return user diff --git a/workers/cs_workers/services/api/settings.py b/workers/cs_workers/services/api/settings.py index b4e263be..6904e982 100644 --- a/workers/cs_workers/services/api/settings.py +++ b/workers/cs_workers/services/api/settings.py @@ -12,7 +12,11 @@ class Settings(BaseSettings): SERVER_NAME: Optional[str] SERVER_HOST: Optional[AnyHttpUrl] - BACKEND_CORS_ORIGINS: List[AnyHttpUrl] = [] + BACKEND_CORS_ORIGINS: List[AnyHttpUrl] = [ + "http://10.0.0.137:5000", + "http://localhost:5000", + "https://hdoupe.ngrok.io", + ] @validator("BACKEND_CORS_ORIGINS", pre=True) def assemble_cors_origins(cls, v: Union[str, List[str]]) -> Union[List[str], str]: @@ -56,6 +60,8 @@ def assemble_db_connection(cls, v: Optional[str], values: Dict[str, Any]) -> Any FIRST_SUPERUSER: Optional[EmailStr] FIRST_SUPERUSER_PASSWORD: Optional[str] + JOB_NAMESPACE: str = "worker-api" + class Config: case_sensitive = True diff --git a/workers/cs_workers/services/api/tests/conftest.py b/workers/cs_workers/services/api/tests/conftest.py index 94a55d64..4d4b3404 100644 --- a/workers/cs_workers/services/api/tests/conftest.py +++ b/workers/cs_workers/services/api/tests/conftest.py @@ -81,6 +81,32 @@ def teardown_transaction(): return session +# # sa 1.4??? +# # https://github.com/jeancochrane/pytest-flask-sqlalchemy/blob/c109469f83450b8c5ff5de962faa1105064f5619/pytest_flask_sqlalchemy/fixtures.py#L25-L84 +# @pytest.fixture(scope="function") +# def db(request) -> Generator: +# engine = create_engine(SQLALCHEMY_DATABASE_URI, pool_pre_ping=True) +# connection = engine.connect() +# transaction = connection.begin() + +# Base = declarative_base() + +# Base.metadata.create_all(bind=connection) + +# TestingSessionLocal = sessionmaker( +# autocommit=False, autoflush=False, bind=connection +# ) +# session = TestingSessionLocal() +# try: +# session.begin() + +# app.dependency_overrides[get_db] = lambda: session +# yield session +# finally: +# session.rollback() +# transaction.rollback() + + @pytest.fixture(scope="function") def client() -> Generator: with TestClient(app) as c: @@ -94,6 +120,8 @@ def new_user(db): email="test@test.com", url="http://localhost:8000", hashed_password=security.get_password_hash("heyhey2222"), + client_id="abc123", + client_secret="abc123", ) db.add(user_) db.commit() diff --git a/workers/cs_workers/services/api/tests/test_users.py b/workers/cs_workers/services/api/tests/test_users.py index eb441ee0..4b22ebab 100644 --- a/workers/cs_workers/services/api/tests/test_users.py +++ b/workers/cs_workers/services/api/tests/test_users.py @@ -10,6 +10,8 @@ def test_create_user(self, db, client): "username": "new_user", "password": "hello world", "url": "https://example.com", + "client_id": "abc123", + "client_secret": "abc123", }, ) assert resp.status_code == 201, resp.text diff --git a/workers/cs_workers/services/manage.py b/workers/cs_workers/services/manage.py index aa1e2fd6..cdf68c55 100644 --- a/workers/cs_workers/services/manage.py +++ b/workers/cs_workers/services/manage.py @@ -11,7 +11,8 @@ from cs_deploy.config import workers_config from cs_workers.services.secrets import ServicesSecrets -from cs_workers.services import scheduler + +# from cs_workers.services import scheduler CURR_PATH = Path(os.path.abspath(os.path.dirname(__file__))) BASE_PATH = CURR_PATH / ".." @@ -369,7 +370,8 @@ def port_forward(args: argparse.Namespace): def serve(args: argparse.Namespace): - scheduler.run() + # scheduler.run() + pass def cli(subparsers: argparse._SubParsersAction, config=None, **kwargs): diff --git a/workers/cs_workers/services/outputs_processor.py b/workers/cs_workers/services/outputs_processor.py index e5e2dc7b..fa735d62 100644 --- a/workers/cs_workers/services/outputs_processor.py +++ b/workers/cs_workers/services/outputs_processor.py @@ -3,9 +3,12 @@ import os import httpx +from pydantic import BaseModel import redis -import tornado.ioloop -import tornado.web +from rq import Queue +from fastapi import FastAPI, Body +from .api.schemas import TaskComplete + try: from dask.distributed import Client @@ -14,109 +17,53 @@ import cs_storage -from cs_workers.services import auth -from cs_workers.utils import redis_conn_from_env +app = FastAPI() -redis_conn = dict( - username=os.environ.get("REDIS_USER"), - password=os.environ.get("REDIS_PW"), - **redis_conn_from_env(), -) +queue = Queue(connection=redis.Redis()) BUCKET = os.environ.get("BUCKET") -async def write(task_id, outputs): - async with await Client(asynchronous=True, processes=False) as client: - outputs = cs_storage.deserialize_from_json(outputs) - res = await client.submit(cs_storage.write, task_id, outputs) +class Result(BaseModel): + url: str + headers: dict + task: TaskComplete + + +def write(task_id, outputs): + outputs = cs_storage.deserialize_from_json(outputs) + res = cs_storage.write(task_id, outputs) return res -async def push(url, auth_headers, task_name, result): - async with httpx.AsyncClient(headers=auth_headers) as client: - if task_name == "sim": - print(f"posting data to {url}/outputs/api/") - return await client.put(f"{url}/outputs/api/", json=result) - if task_name == "parse": - print(f"posting data to {url}/inputs/api/") - return await client.put(f"{url}/inputs/api/", json=result) - else: - raise ValueError(f"Unknown task type: {task_name}.") - - -class Write(tornado.web.RequestHandler): - async def post(self): - print("POST -- /write/") - payload = json.loads(self.request.body.decode("utf-8")) - result = await write(**payload) - print("success-write") - self.write(result) - - -class Push(tornado.web.RequestHandler): - async def post(self): - print("POST -- /push/") - data = json.loads(self.request.body.decode("utf-8")) - job_id = data.get("result", {}).get("task_id", None) - if job_id is None: - print("missing job id") - self.set_status(400) - self.write(json.dumps({"error": "Missing job id."})) - return - - with redis.Redis(**redis_conn) as rclient: - data = rclient.get(f"jobinfo-{job_id}") - - if data is None: - print("Unknown job id: ", job_id) - self.set_status(400) - self.write(json.dumps({"error": "Unknown job id."})) - return - - jobinfo = json.loads(data.decode()) - print("got jobinfo", jobinfo) - cluster_user = jobinfo.get("cluster_user", None) - if cluster_user is None: - print("missing Cluster-User") - self.set_status(400) - self.write(json.dumps({"error": "Missing cluster_user."})) - return - user = auth.User.get(cluster_user) - if user is None: - print("unknown user", cluster_user) - self.set_status(404) - return - - print("got user", user.username, user.url) - - payload = json.loads(self.request.body.decode("utf-8")) - resp = await push(url=user.url, auth_headers=user.headers(), **payload) - print("got resp-push", resp.status_code, resp.url) - self.set_status(200) - - -def get_app(): - assert Client is not None, "Unable to import dask client" - assert auth.cryptkeeper is not None - assert BUCKET - return tornado.web.Application([(r"/write/", Write), (r"/push/", Push)]) - - -def start(args: argparse.Namespace): - if args.start: - app = get_app() - app.listen(8888) - tornado.ioloop.IOLoop.current().start() - - -def cli(subparsers: argparse._SubParsersAction): - parser = subparsers.add_parser( - "outputs-processor", - aliases=["outputs"], - description="REST API for processing and storing outputs.", - ) - parser.add_argument("--start", required=False, action="store_true") - parser.set_defaults(func=start) +def push(job_id: str, result: Result): + if result.task.task_name == "sim": + print(f"posting data to {result.url}/outputs/api/") + result.task.outputs = write(job_id, result.task.outputs) + return httpx.put( + f"{result.url}/outputs/api/", + json=dict(job_id=job_id, **result.task.dict()), + headers=result.headers, + ) + elif result.task.task_name == "parse": + print(f"posting data to {result.url}/inputs/api/") + return httpx.put( + f"{result.url}/inputs/api/", + json=dict(job_id=job_id, **result.task.dict()), + headers=result.headers, + ) + elif result.task.task_name == "defaults": + print(f"posting data to {result.url}/model-config/api/") + return httpx.put( + f"{result.url}/model-config/api/", + json=dict(job_id=job_id, **result.task.dict()), + headers=result.headers, + ) + + +@app.post("/{job_id}/", status_code=200) +async def post(job_id: str, result: Result = Body(...)): + print("POST -- /", job_id) + queue.enqueue(push, job_id, result) diff --git a/workers/cs_workers/templates/services/redis-master-Deployment.template.yaml b/workers/cs_workers/templates/services/redis-master-Deployment.template.yaml index daffb3bf..2cbfa702 100644 --- a/workers/cs_workers/templates/services/redis-master-Deployment.template.yaml +++ b/workers/cs_workers/templates/services/redis-master-Deployment.template.yaml @@ -22,12 +22,6 @@ spec: - env: [] command: ["redis-server", "--appendonly", "yes"] image: # redis-python - lifecycle: - postStart: - exec: - command: - - python3 - - /home/redis_init.py name: master ports: - containerPort: 6379 diff --git a/workers/setup.py b/workers/setup.py index 77ba1226..c7d23092 100644 --- a/workers/setup.py +++ b/workers/setup.py @@ -28,6 +28,9 @@ "tornado", "cs-storage>=1.11.0", "docker", + "pydantic[email,dotenv]", + "fastapi", + "rq", ], include_package_data=True, entry_points={ From 226e1a6ff3abb0ac3aa1b6ffa782f770b9dcce34 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Wed, 24 Mar 2021 11:26:28 -0400 Subject: [PATCH 03/41] Set up helm for kubernetes management --- workers/cs-workers/.helmignore | 23 +++++++ workers/cs-workers/Chart.yaml | 23 +++++++ workers/cs-workers/templates/_helpers.tpl | 63 +++++++++++++++++++ workers/cs-workers/templates/deployment.yaml | 61 ++++++++++++++++++ .../cs-workers/templates/job-cleanup-Job.yaml | 17 +++++ .../templates/job-cleanup-RBAC.yaml | 28 +++++++++ .../outputs-processor-Deployment.yaml | 56 +++++++++++++++++ .../templates/outputs-processor-Service.yaml | 10 +++ .../templates/redis-master-Deployment.yaml | 42 +++++++++++++ .../templates/redis-master-Service.yaml | 15 +++++ .../templates/scheduler-Deployment.yaml | 56 +++++++++++++++++ .../cs-workers/templates/scheduler-RBAC.yaml | 52 +++++++++++++++ .../scheduler-ingressroute.template.yaml | 16 +++++ workers/cs-workers/values.yaml | 11 ++++ 14 files changed, 473 insertions(+) create mode 100644 workers/cs-workers/.helmignore create mode 100644 workers/cs-workers/Chart.yaml create mode 100644 workers/cs-workers/templates/_helpers.tpl create mode 100644 workers/cs-workers/templates/deployment.yaml create mode 100644 workers/cs-workers/templates/job-cleanup-Job.yaml create mode 100644 workers/cs-workers/templates/job-cleanup-RBAC.yaml create mode 100755 workers/cs-workers/templates/outputs-processor-Deployment.yaml create mode 100644 workers/cs-workers/templates/outputs-processor-Service.yaml create mode 100644 workers/cs-workers/templates/redis-master-Deployment.yaml create mode 100644 workers/cs-workers/templates/redis-master-Service.yaml create mode 100755 workers/cs-workers/templates/scheduler-Deployment.yaml create mode 100644 workers/cs-workers/templates/scheduler-RBAC.yaml create mode 100644 workers/cs-workers/templates/scheduler-ingressroute.template.yaml create mode 100644 workers/cs-workers/values.yaml diff --git a/workers/cs-workers/.helmignore b/workers/cs-workers/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/workers/cs-workers/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/workers/cs-workers/Chart.yaml b/workers/cs-workers/Chart.yaml new file mode 100644 index 00000000..53974388 --- /dev/null +++ b/workers/cs-workers/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: cs-workers +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 1.16.0 diff --git a/workers/cs-workers/templates/_helpers.tpl b/workers/cs-workers/templates/_helpers.tpl new file mode 100644 index 00000000..e21e6717 --- /dev/null +++ b/workers/cs-workers/templates/_helpers.tpl @@ -0,0 +1,63 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "cs-workers.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "cs-workers.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "cs-workers.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "cs-workers.labels" -}} +helm.sh/chart: {{ include "cs-workers.chart" . }} +{{ include "cs-workers.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "cs-workers.selectorLabels" -}} +app.kubernetes.io/name: {{ include "cs-workers.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "cs-workers.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "cs-workers.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/workers/cs-workers/templates/deployment.yaml b/workers/cs-workers/templates/deployment.yaml new file mode 100644 index 00000000..75e78a99 --- /dev/null +++ b/workers/cs-workers/templates/deployment.yaml @@ -0,0 +1,61 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "cs-workers.fullname" . }} + labels: + {{- include "cs-workers.labels" . | nindent 4 }} +spec: +{{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} +{{- end }} + selector: + matchLabels: + {{- include "cs-workers.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "cs-workers.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "cs-workers.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: 80 + protocol: TCP + livenessProbe: + httpGet: + path: / + port: http + readinessProbe: + httpGet: + path: / + port: http + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/workers/cs-workers/templates/job-cleanup-Job.yaml b/workers/cs-workers/templates/job-cleanup-Job.yaml new file mode 100644 index 00000000..72f9325a --- /dev/null +++ b/workers/cs-workers/templates/job-cleanup-Job.yaml @@ -0,0 +1,17 @@ +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: job-cleanup +spec: + schedule: "*/30 * * * *" + successfulJobsHistoryLimit: 0 + jobTemplate: + spec: + template: + spec: + serviceAccountName: job-cleanup + containers: + - name: kubectl-container + image: bitnami/kubectl:latest + command: ["sh", "-c", "kubectl delete jobs --field-selector status.successful=1"] + restartPolicy: Never diff --git a/workers/cs-workers/templates/job-cleanup-RBAC.yaml b/workers/cs-workers/templates/job-cleanup-RBAC.yaml new file mode 100644 index 00000000..61ddb5e2 --- /dev/null +++ b/workers/cs-workers/templates/job-cleanup-RBAC.yaml @@ -0,0 +1,28 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: job-cleanup +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: job-remove + namespace: default +rules: + - apiGroups: ["batch", "extensions"] + resources: ["jobs"] + verbs: ["get", "list", "watch", "delete"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: job-remove + namespace: default +subjects: + - kind: ServiceAccount + name: job-cleanup + namespace: default +roleRef: + kind: Role + name: job-remove + apiGroup: rbac.authorization.k8s.io diff --git a/workers/cs-workers/templates/outputs-processor-Deployment.yaml b/workers/cs-workers/templates/outputs-processor-Deployment.yaml new file mode 100755 index 00000000..8a5825e2 --- /dev/null +++ b/workers/cs-workers/templates/outputs-processor-Deployment.yaml @@ -0,0 +1,56 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: outputs-processor +spec: + replicas: 1 + selector: + matchLabels: + app: outputs-processor + template: + metadata: + labels: + app: outputs-processor + spec: + serviceAccountName: outputs-processor + containers: + - name: outputs-processor + image: + ports: + - containerPort: 8888 + env: + - name: BUCKET + valueFrom: + secretKeyRef: + name: worker-secret + key: BUCKET + - name: REDIS_HOST + valueFrom: + secretKeyRef: + name: worker-secret + key: REDIS_HOST + - name: REDIS_PORT + valueFrom: + secretKeyRef: + name: worker-secret + key: REDIS_PORT + - name: REDIS_DB + valueFrom: + secretKeyRef: + name: worker-secret + key: REDIS_DB + optional: true + - name: REDIS_USER + value: outputs + - name: REDIS_PW + valueFrom: + secretKeyRef: + name: worker-secret + key: REDIS_OUTPUTS_PW + - name: CS_CRYPT_KEY + valueFrom: + secretKeyRef: + name: worker-secret + key: CS_CRYPT_KEY + nodeSelector: + component: api diff --git a/workers/cs-workers/templates/outputs-processor-Service.yaml b/workers/cs-workers/templates/outputs-processor-Service.yaml new file mode 100644 index 00000000..2edac636 --- /dev/null +++ b/workers/cs-workers/templates/outputs-processor-Service.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Service +metadata: + name: outputs-processor +spec: + ports: + - port: 80 + targetPort: 8888 + selector: + app: outputs-processor diff --git a/workers/cs-workers/templates/redis-master-Deployment.yaml b/workers/cs-workers/templates/redis-master-Deployment.yaml new file mode 100644 index 00000000..e1e46e29 --- /dev/null +++ b/workers/cs-workers/templates/redis-master-Deployment.yaml @@ -0,0 +1,42 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: redis + name: redis-master +spec: + replicas: 1 + selector: + matchLabels: + app: redis + role: master + tier: backend + template: + metadata: + labels: + app: redis + role: master + tier: backend + spec: + containers: + - env: [] + command: ["redis-server", "--appendonly", "yes", "--requirepassword"] + image: # redis-python + name: master + ports: + - containerPort: 6379 + resources: + requests: + cpu: 100m + memory: 100Mi + volumeMounts: + - mountPath: /data + name: redis-volume + volumes: + - name: redis-volume + # This GCE PD must already exist. + gcePersistentDisk: + pdName: redis-disk + fsType: ext4 + nodeSelector: + component: api diff --git a/workers/cs-workers/templates/redis-master-Service.yaml b/workers/cs-workers/templates/redis-master-Service.yaml new file mode 100644 index 00000000..04af2120 --- /dev/null +++ b/workers/cs-workers/templates/redis-master-Service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app: redis + role: master + tier: backend + name: redis-master +spec: + ports: + - port: 6379 + targetPort: 6379 + selector: + app: redis + role: master diff --git a/workers/cs-workers/templates/scheduler-Deployment.yaml b/workers/cs-workers/templates/scheduler-Deployment.yaml new file mode 100755 index 00000000..a49c0500 --- /dev/null +++ b/workers/cs-workers/templates/scheduler-Deployment.yaml @@ -0,0 +1,56 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: scheduler +spec: + replicas: 1 + selector: + matchLabels: + app: scheduler + template: + metadata: + labels: + app: scheduler + spec: + serviceAccountName: scheduler + containers: + - name: scheduler + image: + ports: + - containerPort: 8888 + env: + - name: PROJECT + valueFrom: + secretKeyRef: + name: worker-secret + key: PROJECT + - name: CS_CRYPT_KEY + valueFrom: + secretKeyRef: + name: worker-secret + key: CS_CRYPT_KEY + - name: REDIS_HOST + valueFrom: + secretKeyRef: + name: worker-secret + key: REDIS_HOST + - name: REDIS_PORT + valueFrom: + secretKeyRef: + name: worker-secret + key: REDIS_PORT + - name: REDIS_DB + valueFrom: + secretKeyRef: + name: worker-secret + key: REDIS_DB + optional: true + - name: REDIS_USER + value: scheduler + - name: REDIS_PW + valueFrom: + secretKeyRef: + name: worker-secret + key: REDIS_SCHEDULER_PW + nodeSelector: + component: api diff --git a/workers/cs-workers/templates/scheduler-RBAC.yaml b/workers/cs-workers/templates/scheduler-RBAC.yaml new file mode 100644 index 00000000..35ecae7d --- /dev/null +++ b/workers/cs-workers/templates/scheduler-RBAC.yaml @@ -0,0 +1,52 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: scheduler +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: job-admin + namespace: default +rules: + - apiGroups: ["batch", "extensions"] + resources: ["jobs"] + verbs: ["get", "list", "watch", "create", "update", "delete"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: job-admin + namespace: default +subjects: + - kind: ServiceAccount + name: scheduler + namespace: default +roleRef: + kind: Role + name: job-admin + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: viz-admin + namespace: default +rules: + - apiGroups: ["apps", "", "traefik.containo.us"] + resources: ["deployments", "services", "ingressroutes"] + verbs: ["get", "list", "watch", "create", "update", "delete"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: viz-admin + namespace: default +subjects: + - kind: ServiceAccount + name: scheduler + namespace: default +roleRef: + kind: Role + name: viz-admin + apiGroup: rbac.authorization.k8s.io diff --git a/workers/cs-workers/templates/scheduler-ingressroute.template.yaml b/workers/cs-workers/templates/scheduler-ingressroute.template.yaml new file mode 100644 index 00000000..e64f2714 --- /dev/null +++ b/workers/cs-workers/templates/scheduler-ingressroute.template.yaml @@ -0,0 +1,16 @@ +apiVersion: traefik.containo.us/v1alpha1 +kind: IngressRoute +metadata: + name: scheduler-tls + namespace: default +spec: + entryPoints: + - websecure + routes: + - match: + kind: Rule + services: + - name: scheduler + port: 80 + tls: + certResolver: myresolver diff --git a/workers/cs-workers/values.yaml b/workers/cs-workers/values.yaml new file mode 100644 index 00000000..ca55cbba --- /dev/null +++ b/workers/cs-workers/values.yaml @@ -0,0 +1,11 @@ +# Default values for cs-workers. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +image: + repository: nginx + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "" From c1c0d07978ed14b363f12b2408c48b0368e593e1 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Wed, 24 Mar 2021 18:03:34 -0400 Subject: [PATCH 04/41] Use helm for compute cluster config --- kind-config.yaml | 8 +- workers/cs-workers/Chart.yaml | 4 + .../cs-workers/templates/db-deployment.yaml | 54 +++++ workers/cs-workers/templates/db-secret.yaml | 10 + workers/cs-workers/templates/db-service.yaml | 12 ++ workers/cs-workers/templates/deployment.yaml | 61 ------ .../templates/job-cleanup-RBAC.yaml | 6 +- .../outputs-processor-Deployment.yaml | 39 +--- .../templates/outputs-processor-RBAC.yaml | 4 + .../templates/outputs-processor-Service.yaml | 4 +- .../templates/redis-master-Deployment.yaml | 23 ++- .../cs-workers/templates/rq-Deployment.yaml | 28 +++ workers/cs-workers/templates/rq-RBAC.yaml | 4 + .../templates/scheduler-Deployment.yaml | 57 +++--- .../cs-workers/templates/scheduler-RBAC.yaml | 18 +- .../templates/scheduler-Service.yaml | 11 + ...plate.yaml => scheduler-ingressroute.yaml} | 2 + .../templates/workers-api-secret.yaml | 7 + workers/cs-workers/values.yaml | 47 ++++- .../dockerfiles/Dockerfile.outputs_processor | 8 +- ...rfile.scheduler => Dockerfile.workers_api} | 6 +- .../services/api/routers/simulations.py | 1 - workers/cs_workers/services/manage.py | 188 +----------------- .../cs_workers/services/outputs_processor.py | 6 +- ...outputs-processor-Deployment.template.yaml | 35 +--- workers/requirements.txt | 3 + 26 files changed, 274 insertions(+), 372 deletions(-) create mode 100644 workers/cs-workers/templates/db-deployment.yaml create mode 100644 workers/cs-workers/templates/db-secret.yaml create mode 100644 workers/cs-workers/templates/db-service.yaml delete mode 100644 workers/cs-workers/templates/deployment.yaml create mode 100644 workers/cs-workers/templates/outputs-processor-RBAC.yaml create mode 100644 workers/cs-workers/templates/rq-Deployment.yaml create mode 100644 workers/cs-workers/templates/rq-RBAC.yaml create mode 100644 workers/cs-workers/templates/scheduler-Service.yaml rename workers/cs-workers/templates/{scheduler-ingressroute.template.yaml => scheduler-ingressroute.yaml} (88%) create mode 100644 workers/cs-workers/templates/workers-api-secret.yaml rename workers/cs_workers/dockerfiles/{Dockerfile.scheduler => Dockerfile.workers_api} (70%) diff --git a/kind-config.yaml b/kind-config.yaml index fc35654b..f5557d68 100644 --- a/kind-config.yaml +++ b/kind-config.yaml @@ -4,8 +4,12 @@ nodes: - role: control-plane - role: worker extraMounts: - - hostPath: /home/hankdoupe/compute-studio/redis-data - containerPath: /redis-data + - hostPath: /home/hankdoupe/compute-studio/redis-queue-data + containerPath: /redis-queue-data + - hostPath: /home/hankdoupe/compute-studio/workers-db-data + containerPath: /workers-db-data + - hostPath: /home/hankdoupe/compute-studio + containerPath: /code - role: worker - role: worker extraMounts: diff --git a/workers/cs-workers/Chart.yaml b/workers/cs-workers/Chart.yaml index 53974388..d570356a 100644 --- a/workers/cs-workers/Chart.yaml +++ b/workers/cs-workers/Chart.yaml @@ -21,3 +21,7 @@ version: 0.1.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. appVersion: 1.16.0 +# dependencies: +# - name: traefik +# version: "9.1.1" +# repository: "https://helm.traefik.io/traefik" diff --git a/workers/cs-workers/templates/db-deployment.yaml b/workers/cs-workers/templates/db-deployment.yaml new file mode 100644 index 00000000..6ffcb5c4 --- /dev/null +++ b/workers/cs-workers/templates/db-deployment.yaml @@ -0,0 +1,54 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: workers-db +spec: + replicas: 1 + selector: + matchLabels: + app: workers-db + template: + metadata: + labels: + app: workers-db + spec: + containers: + - name: workers-db + env: + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + name: workers-db-secret + key: USER + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: workers-db-secret + key: PASSWORD + - name: POSTGRES_DB + valueFrom: + secretKeyRef: + name: workers-db-secret + key: NAME + image: postgres:12.4 + ports: + - containerPort: 5432 + resources: + requests: + cpu: 100m + memory: 100Mi + volumeMounts: + {{- range $name, $value := .Values.db.volumeMounts }} + - name: {{ $value.name }} + mountPath: {{ quote $value.mountPath }} + subPath: {{ quote $value.subPath }} + {{- end }} + volumes: + {{- range $name, $value := .Values.db.volumes }} + - name: {{ $value.name }} + hostPath: + path: {{ $value.hostPath.path }} + type: {{ $value.hostPath.type }} + {{- end}} + nodeSelector: + component: web diff --git a/workers/cs-workers/templates/db-secret.yaml b/workers/cs-workers/templates/db-secret.yaml new file mode 100644 index 00000000..dccc2f02 --- /dev/null +++ b/workers/cs-workers/templates/db-secret.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Secret +metadata: + name: workers-db-secret +type: Opaque +stringData: + USER: {{ .Values.db.user }} + PASSWORD: {{ .Values.db.password }} + HOST: {{ .Values.db.host }} + NAME: {{ .Values.db.name }} \ No newline at end of file diff --git a/workers/cs-workers/templates/db-service.yaml b/workers/cs-workers/templates/db-service.yaml new file mode 100644 index 00000000..51841105 --- /dev/null +++ b/workers/cs-workers/templates/db-service.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app: workers-db + name: workers-db +spec: + ports: + - port: 5432 + targetPort: 5432 + selector: + app: workers-db diff --git a/workers/cs-workers/templates/deployment.yaml b/workers/cs-workers/templates/deployment.yaml deleted file mode 100644 index 75e78a99..00000000 --- a/workers/cs-workers/templates/deployment.yaml +++ /dev/null @@ -1,61 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "cs-workers.fullname" . }} - labels: - {{- include "cs-workers.labels" . | nindent 4 }} -spec: -{{- if not .Values.autoscaling.enabled }} - replicas: {{ .Values.replicaCount }} -{{- end }} - selector: - matchLabels: - {{- include "cs-workers.selectorLabels" . | nindent 6 }} - template: - metadata: - {{- with .Values.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "cs-workers.selectorLabels" . | nindent 8 }} - spec: - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - serviceAccountName: {{ include "cs-workers.serviceAccountName" . }} - securityContext: - {{- toYaml .Values.podSecurityContext | nindent 8 }} - containers: - - name: {{ .Chart.Name }} - securityContext: - {{- toYaml .Values.securityContext | nindent 12 }} - image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.image.pullPolicy }} - ports: - - name: http - containerPort: 80 - protocol: TCP - livenessProbe: - httpGet: - path: / - port: http - readinessProbe: - httpGet: - path: / - port: http - resources: - {{- toYaml .Values.resources | nindent 12 }} - {{- with .Values.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} diff --git a/workers/cs-workers/templates/job-cleanup-RBAC.yaml b/workers/cs-workers/templates/job-cleanup-RBAC.yaml index 61ddb5e2..48d061d6 100644 --- a/workers/cs-workers/templates/job-cleanup-RBAC.yaml +++ b/workers/cs-workers/templates/job-cleanup-RBAC.yaml @@ -7,7 +7,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: job-remove - namespace: default + namespace: {{ .Release.Namespace }} rules: - apiGroups: ["batch", "extensions"] resources: ["jobs"] @@ -17,11 +17,11 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: job-remove - namespace: default + namespace: {{ .Release.Namespace }} subjects: - kind: ServiceAccount name: job-cleanup - namespace: default + namespace: {{ .Release.Namespace }} roleRef: kind: Role name: job-remove diff --git a/workers/cs-workers/templates/outputs-processor-Deployment.yaml b/workers/cs-workers/templates/outputs-processor-Deployment.yaml index 8a5825e2..44d23dd7 100755 --- a/workers/cs-workers/templates/outputs-processor-Deployment.yaml +++ b/workers/cs-workers/templates/outputs-processor-Deployment.yaml @@ -15,42 +15,17 @@ spec: serviceAccountName: outputs-processor containers: - name: outputs-processor - image: + image: "{{ .Values.registry }}/{{ .Values.project }}/outputs_processor:{{ .Values.tag }}" ports: - - containerPort: 8888 + - containerPort: 5000 env: - name: BUCKET - valueFrom: - secretKeyRef: - name: worker-secret - key: BUCKET + value: {{ .Values.bucket }} + - name: PROJECT + value: {{ .Values.project }} - name: REDIS_HOST - valueFrom: - secretKeyRef: - name: worker-secret - key: REDIS_HOST + value: {{ .Values.redis.host }} - name: REDIS_PORT - valueFrom: - secretKeyRef: - name: worker-secret - key: REDIS_PORT - - name: REDIS_DB - valueFrom: - secretKeyRef: - name: worker-secret - key: REDIS_DB - optional: true - - name: REDIS_USER - value: outputs - - name: REDIS_PW - valueFrom: - secretKeyRef: - name: worker-secret - key: REDIS_OUTPUTS_PW - - name: CS_CRYPT_KEY - valueFrom: - secretKeyRef: - name: worker-secret - key: CS_CRYPT_KEY + value: "{{ .Values.redis.port }}" nodeSelector: component: api diff --git a/workers/cs-workers/templates/outputs-processor-RBAC.yaml b/workers/cs-workers/templates/outputs-processor-RBAC.yaml new file mode 100644 index 00000000..b78a435d --- /dev/null +++ b/workers/cs-workers/templates/outputs-processor-RBAC.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: outputs-processor diff --git a/workers/cs-workers/templates/outputs-processor-Service.yaml b/workers/cs-workers/templates/outputs-processor-Service.yaml index 2edac636..3914b3c0 100644 --- a/workers/cs-workers/templates/outputs-processor-Service.yaml +++ b/workers/cs-workers/templates/outputs-processor-Service.yaml @@ -4,7 +4,7 @@ metadata: name: outputs-processor spec: ports: - - port: 80 - targetPort: 8888 + - port: 80 + targetPort: 5000 selector: app: outputs-processor diff --git a/workers/cs-workers/templates/redis-master-Deployment.yaml b/workers/cs-workers/templates/redis-master-Deployment.yaml index e1e46e29..bbbd5ba2 100644 --- a/workers/cs-workers/templates/redis-master-Deployment.yaml +++ b/workers/cs-workers/templates/redis-master-Deployment.yaml @@ -20,8 +20,8 @@ spec: spec: containers: - env: [] - command: ["redis-server", "--appendonly", "yes", "--requirepassword"] - image: # redis-python + command: ["redis-server", "--appendonly", "yes"] + image: redis name: master ports: - containerPort: 6379 @@ -30,13 +30,16 @@ spec: cpu: 100m memory: 100Mi volumeMounts: - - mountPath: /data - name: redis-volume - volumes: - - name: redis-volume - # This GCE PD must already exist. - gcePersistentDisk: - pdName: redis-disk - fsType: ext4 + {{- range $name, $value := .Values.redis.volumeMounts }} + - name: {{ $value.name }} + mountPath: {{ quote $value.mountPath }} + {{- end }} + volumes: + {{- range $name, $value := .Values.redis.volumes }} + - name: {{ $value.name }} + hostPath: + path: {{ $value.hostPath.path }} + type: {{ $value.hostPath.type }} + {{- end}} nodeSelector: component: api diff --git a/workers/cs-workers/templates/rq-Deployment.yaml b/workers/cs-workers/templates/rq-Deployment.yaml new file mode 100644 index 00000000..d8a1a16e --- /dev/null +++ b/workers/cs-workers/templates/rq-Deployment.yaml @@ -0,0 +1,28 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: rq-worker-outputs +spec: + replicas: 1 + selector: + matchLabels: + app: rq-worker-outputs + template: + metadata: + labels: + app: rq-worker-outputs + spec: + serviceAccountName: rq-worker + containers: + - name: rq-worker-outputs + command: ["rq", "worker", "--with-scheduler", "--url", "$(REDIS_URL)"] + image: "{{ .Values.registry }}/{{ .Values.project }}/outputs_processor:{{ .Values.tag }}" + env: + - name: BUCKET + value: {{ .Values.bucket }} + - name: PROJECT + value: {{ .Values.project }} + - name: REDIS_URL + value: "redis://{{ .Values.redis.host }}:{{ .Values.redis.port }}" + nodeSelector: + component: api diff --git a/workers/cs-workers/templates/rq-RBAC.yaml b/workers/cs-workers/templates/rq-RBAC.yaml new file mode 100644 index 00000000..a435d527 --- /dev/null +++ b/workers/cs-workers/templates/rq-RBAC.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: rq-worker diff --git a/workers/cs-workers/templates/scheduler-Deployment.yaml b/workers/cs-workers/templates/scheduler-Deployment.yaml index a49c0500..03abdf80 100755 --- a/workers/cs-workers/templates/scheduler-Deployment.yaml +++ b/workers/cs-workers/templates/scheduler-Deployment.yaml @@ -1,56 +1,53 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: scheduler + name: workers-api spec: replicas: 1 selector: matchLabels: - app: scheduler + app: workers-api template: metadata: labels: - app: scheduler + app: workers-api spec: - serviceAccountName: scheduler + serviceAccountName: workers-api containers: - - name: scheduler - image: + - name: workers-api + image: "{{ .Values.registry }}/{{ .Values.project }}/workers_api:{{ .Values.tag }}" ports: - - containerPort: 8888 + - containerPort: 5000 env: - - name: PROJECT + - name: BUCKET + value: "{{ .Values.bucket }}" + - name: VIZ_HOST + value: "{{ .Values.viz_host }}" + - name: API_SECRET_KEY valueFrom: secretKeyRef: - name: worker-secret - key: PROJECT - - name: CS_CRYPT_KEY + name: workers-api-secret + key: API_SECRET_KEY + - name: DB_USER valueFrom: secretKeyRef: - name: worker-secret - key: CS_CRYPT_KEY - - name: REDIS_HOST + name: workers-db-secret + key: USER + - name: DB_PASS valueFrom: secretKeyRef: - name: worker-secret - key: REDIS_HOST - - name: REDIS_PORT + name: workers-db-secret + key: PASSWORD + - name: DB_NAME valueFrom: secretKeyRef: - name: worker-secret - key: REDIS_PORT - - name: REDIS_DB + name: workers-db-secret + key: NAME + - name: DB_HOST valueFrom: secretKeyRef: - name: worker-secret - key: REDIS_DB - optional: true - - name: REDIS_USER - value: scheduler - - name: REDIS_PW - valueFrom: - secretKeyRef: - name: worker-secret - key: REDIS_SCHEDULER_PW + name: workers-db-secret + key: HOST + nodeSelector: component: api diff --git a/workers/cs-workers/templates/scheduler-RBAC.yaml b/workers/cs-workers/templates/scheduler-RBAC.yaml index 35ecae7d..8eecae3d 100644 --- a/workers/cs-workers/templates/scheduler-RBAC.yaml +++ b/workers/cs-workers/templates/scheduler-RBAC.yaml @@ -1,13 +1,13 @@ apiVersion: v1 kind: ServiceAccount metadata: - name: scheduler + name: workers-api --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: job-admin - namespace: default + namespace: {{ .Release.Namespace }} rules: - apiGroups: ["batch", "extensions"] resources: ["jobs"] @@ -17,11 +17,11 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: job-admin - namespace: default + namespace: {{ .Release.Namespace }} subjects: - kind: ServiceAccount - name: scheduler - namespace: default + name: workers-api + namespace: {{ .Release.Namespace }} roleRef: kind: Role name: job-admin @@ -31,7 +31,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: viz-admin - namespace: default + namespace: {{ .Release.Namespace }} rules: - apiGroups: ["apps", "", "traefik.containo.us"] resources: ["deployments", "services", "ingressroutes"] @@ -41,11 +41,11 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: viz-admin - namespace: default + namespace: {{ .Release.Namespace }} subjects: - kind: ServiceAccount - name: scheduler - namespace: default + name: workers-api + namespace: {{ .Release.Namespace }} roleRef: kind: Role name: viz-admin diff --git a/workers/cs-workers/templates/scheduler-Service.yaml b/workers/cs-workers/templates/scheduler-Service.yaml new file mode 100644 index 00000000..f5534095 --- /dev/null +++ b/workers/cs-workers/templates/scheduler-Service.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Service +metadata: + name: workers-api +spec: + ports: + - port: 80 + targetPort: 5000 + selector: + app: workers-api + type: LoadBalancer diff --git a/workers/cs-workers/templates/scheduler-ingressroute.template.yaml b/workers/cs-workers/templates/scheduler-ingressroute.yaml similarity index 88% rename from workers/cs-workers/templates/scheduler-ingressroute.template.yaml rename to workers/cs-workers/templates/scheduler-ingressroute.yaml index e64f2714..585d2993 100644 --- a/workers/cs-workers/templates/scheduler-ingressroute.template.yaml +++ b/workers/cs-workers/templates/scheduler-ingressroute.yaml @@ -1,3 +1,4 @@ +{{ if .Values.use_traefik }} apiVersion: traefik.containo.us/v1alpha1 kind: IngressRoute metadata: @@ -14,3 +15,4 @@ spec: port: 80 tls: certResolver: myresolver +{{ end }} \ No newline at end of file diff --git a/workers/cs-workers/templates/workers-api-secret.yaml b/workers/cs-workers/templates/workers-api-secret.yaml new file mode 100644 index 00000000..309e237f --- /dev/null +++ b/workers/cs-workers/templates/workers-api-secret.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Secret +metadata: + name: workers-api-secret +type: Opaque +stringData: + API_SECRET_KEY: {{ .Values.api_secret_key }} diff --git a/workers/cs-workers/values.yaml b/workers/cs-workers/values.yaml index ca55cbba..b36b6112 100644 --- a/workers/cs-workers/values.yaml +++ b/workers/cs-workers/values.yaml @@ -3,9 +3,46 @@ # Declare variables to be passed into your templates. replicaCount: 1 +bucket: cs-outputs-dev-private +viz_host: devviz.compute.studio +# image: +project: project +registry: gcr.io +# Overrides the image tag whose default is the chart appVersion. +tag: tag -image: - repository: nginx - pullPolicy: IfNotPresent - # Overrides the image tag whose default is the chart appVersion. - tag: "" +api_secret_key: abc123 + +redis: + host: "redis-master" + port: "6379" + volumes: + - name: redis-volume-v1 + hostPath: + path: /redis-queue-data + type: Directory + volumeMounts: + - mountPath: /data + name: redis-volume-v1 + +db: + name: cluster_db + user: postgres + password: password + host: workers-db + volumes: + - name: workers-db-volume + hostPath: + path: /workers-db-data + type: DirectoryOrCreate + + volumeMounts: + - mountPath: /var/lib/postgresql/data/ + name: workers-db-volume + subPath: postgres +# credsVolumes: +# env_var: GOOGLE_APPLICATION_CREDENTIALS +# path: /google-creds.json +# volumes: +# - name: google-creds-volume +# hostPath diff --git a/workers/cs_workers/dockerfiles/Dockerfile.outputs_processor b/workers/cs_workers/dockerfiles/Dockerfile.outputs_processor index 862e1719..f785e223 100755 --- a/workers/cs_workers/dockerfiles/Dockerfile.outputs_processor +++ b/workers/cs_workers/dockerfiles/Dockerfile.outputs_processor @@ -12,8 +12,8 @@ RUN apt-get update && \ RUN pip install -r requirements.txt && \ - pip install pyppeteer2 && \ - conda install -c conda-forge jinja2 bokeh tornado dask && \ + pip install pyppeteer2 rq && \ + conda install -c conda-forge jinja2 bokeh && \ pyppeteer-install RUN mkdir /home/cs_workers @@ -27,8 +27,8 @@ RUN pip install -e ./secrets COPY deploy /home/deploy RUN pip install -e ./deploy -WORKDIR /home +WORKDIR /home/cs_workers ENV PYTHONUNBUFFERED 1 -CMD ["csw", "outputs", "--start"] \ No newline at end of file +CMD ["uvicorn", "services.outputs_processor:app", "--port", "5000", "--reload"] \ No newline at end of file diff --git a/workers/cs_workers/dockerfiles/Dockerfile.scheduler b/workers/cs_workers/dockerfiles/Dockerfile.workers_api similarity index 70% rename from workers/cs_workers/dockerfiles/Dockerfile.scheduler rename to workers/cs_workers/dockerfiles/Dockerfile.workers_api index 6d13c983..893b682c 100755 --- a/workers/cs_workers/dockerfiles/Dockerfile.scheduler +++ b/workers/cs_workers/dockerfiles/Dockerfile.workers_api @@ -12,7 +12,7 @@ EXPOSE 80 EXPOSE 8888 RUN pip install -r requirements.txt && \ - conda install tornado + pip install python-multipart sqlalchemy python-jose[cryptography] psycopg2-binary passlib[bcrypt] RUN mkdir /home/cs_workers COPY workers/cs_workers /home/cs_workers @@ -29,4 +29,6 @@ WORKDIR /home ENV PYTHONUNBUFFERED 1 -CMD ["csw", "scheduler", "--start"] \ No newline at end of file +WORKDIR /home/cs_workers/services/ + +CMD ["uvicorn", "api.main:app", "--port", "5000", "--reload"] \ No newline at end of file diff --git a/workers/cs_workers/services/api/routers/simulations.py b/workers/cs_workers/services/api/routers/simulations.py index 4c254d85..7a8d95fe 100644 --- a/workers/cs_workers/services/api/routers/simulations.py +++ b/workers/cs_workers/services/api/routers/simulations.py @@ -11,7 +11,6 @@ from .. import models, schemas, dependencies as deps, security incluster = os.environ.get("KUBERNETES_SERVICE_HOST", False) is not False -assert not incluster PROJECT = os.environ.get("PROJECT") diff --git a/workers/cs_workers/services/manage.py b/workers/cs_workers/services/manage.py index cdf68c55..6b0d5f17 100644 --- a/workers/cs_workers/services/manage.py +++ b/workers/cs_workers/services/manage.py @@ -50,7 +50,7 @@ class Manager: Deploy and manage Compute Studio compute cluster: - build, tag, and push the docker images for the flask app and compute.studio modeling apps. - - write k8s config files for the scheduler deployment and the + - write k8s config files for the workers_api deployment and the compute.studio modeling app deployments. - apply k8s config files to an existing compute cluster. @@ -94,36 +94,6 @@ def __init__( self.templates_dir = BASE_PATH / Path("templates") self.dockerfiles_dir = BASE_PATH / Path("dockerfiles") - with open( - self.templates_dir / "services" / "scheduler-Deployment.template.yaml", "r" - ) as f: - self.scheduler_template = yaml.safe_load(f.read()) - - with open( - self.templates_dir / "services" / "scheduler-ingressroute.template.yaml", - "r", - ) as f: - self.scheduler_ir_template = yaml.safe_load(f.read()) - - with open( - self.templates_dir - / "services" - / "outputs-processor-Deployment.template.yaml", - "r", - ) as f: - self.outputs_processor_template = yaml.safe_load(f.read()) - with open( - self.templates_dir / "services" / "outputs-processor-ServiceAccount.yaml", - "r", - ) as f: - self.outputs_processor_serviceaccount = yaml.safe_load(f.read()) - - with open( - self.templates_dir / "services" / "redis-master-Deployment.template.yaml", - "r", - ) as f: - self.redis_master_template = yaml.safe_load(f.read()) - with open(self.templates_dir / "secret.template.yaml", "r") as f: self.secret_template = yaml.safe_load(f.read()) @@ -132,33 +102,26 @@ def __init__( def build(self): """ - Build, tag, and push base images for the scheduler app. + Build, tag, and push base images for the workers_api app. Note: distributed and celerybase are tagged as "latest." All other apps pull from either distributed:latest or celerybase:latest. """ distributed = self.dockerfiles_dir / "Dockerfile" - redis = self.dockerfiles_dir / "Dockerfile.redis" outputs_processor = self.dockerfiles_dir / "Dockerfile.outputs_processor" - scheduler = self.dockerfiles_dir / "Dockerfile.scheduler" + workers_api = self.dockerfiles_dir / "Dockerfile.workers_api" run(f"docker build -t distributed:latest -f {distributed} ./") - run(f"docker build -t redis-python:{self.tag} -f {redis} ./") run(f"docker build -t outputs_processor:{self.tag} -f {outputs_processor} ./") - run(f"docker build -t scheduler:{self.tag} -f {scheduler} ./") + run(f"docker build -t workers_api:{self.tag} -f {workers_api} ./") def push(self): run(f"docker tag distributed {self.cr}/{self.project}/distributed:latest") - run( - f"docker tag redis-python:{self.tag} {self.cr}/{self.project}/redis-python:{self.tag}" - ) - run( f"docker tag outputs_processor:{self.tag} {self.cr}/{self.project}/outputs_processor:{self.tag}" ) - run( - f"docker tag scheduler:{self.tag} {self.cr}/{self.project}/scheduler:{self.tag}" + f"docker tag workers_api:{self.tag} {self.cr}/{self.project}/workers_api:{self.tag}" ) if self.use_kind: @@ -167,119 +130,12 @@ def push(self): cmd_prefix = "docker push" run(f"{cmd_prefix} {self.cr}/{self.project}/distributed:latest") - run(f"{cmd_prefix} {self.cr}/{self.project}/redis-python:{self.tag}") run(f"{cmd_prefix} {self.cr}/{self.project}/outputs_processor:{self.tag}") - run(f"{cmd_prefix} {self.cr}/{self.project}/scheduler:{self.tag}") + run(f"{cmd_prefix} {self.cr}/{self.project}/workers_api:{self.tag}") def config(self, update_redis=False, update_dns=False): - config_filenames = [ - "scheduler-Service.yaml", - "scheduler-RBAC.yaml", - "outputs-processor-Service.yaml", - "job-cleanup-Job.yaml", - "job-cleanup-RBAC.yaml", - ] - if update_redis: - config_filenames.append("redis-master-Service.yaml") - for filename in config_filenames: - with open(self.templates_dir / "services" / f"{filename}", "r") as f: - configs = yaml.safe_load_all(f.read()) - for config in configs: - name = config["metadata"]["name"] - kind = config["kind"] - self.write_config(f"{name}-{kind}.yaml", config) - self.write_scheduler_deployment() if update_dns: - self.write_scheduler_ingressroute() self.write_cloudflare_api_token() - self.write_outputs_processor_deployment() - self.write_secret() - if update_redis: - self.write_redis_deployment() - - def write_scheduler_deployment(self): - """ - Write scheduler deployment file. Only step is filling in the image uri. - """ - deployment = copy.deepcopy(self.scheduler_template) - deployment["spec"]["template"]["spec"]["containers"][0][ - "image" - ] = f"gcr.io/{self.project}/scheduler:{self.tag}" - deployment["spec"]["template"]["spec"]["containers"][0]["env"] += [ - {"name": "VIZ_HOST", "value": self.viz_host}, - ] - self.write_config("scheduler-Deployment.yaml", deployment) - - return deployment - - def write_scheduler_ingressroute(self): - """ - Write scheduler ingressroute file. Only step is filling in the cluster host. - """ - ir = copy.deepcopy(self.scheduler_ir_template) - ir["spec"]["routes"][0]["match"] = f"Host(`{self.cluster_host}`)" - self.write_config("scheduler-ingressroute.yaml", ir) - - return ir - - def write_outputs_processor_deployment(self): - """ - Write outputs processor deployment file. Only step is filling - in the image uri. - """ - deployment = copy.deepcopy(self.outputs_processor_template) - deployment["spec"]["template"]["spec"]["containers"][0][ - "image" - ] = f"gcr.io/{self.project}/outputs_processor:{self.tag}" - - self.write_config( - "outputs-processor-ServiceAccount.yaml", - self.outputs_processor_serviceaccount, - ) - self.write_config("outputs-processor-Deployment.yaml", deployment) - - return deployment - - def write_redis_deployment(self): - deployment = copy.deepcopy(self.redis_master_template) - container = deployment["spec"]["template"]["spec"]["containers"][0] - container["image"] = f"gcr.io/{self.project}/redis-python:{self.tag}" - redis_secrets = self.redis_secrets() - for name, sec in redis_secrets.items(): - if sec is not None: - container["env"].append( - { - "name": name, - "valueFrom": { - "secretKeyRef": {"key": name, "name": "worker-secret"} - }, - } - ) - - if workers_config.get("redis"): - redis_config = workers_config["redis"] - assert ( - redis_config.get("provider") == "volume" - ), f"Got: {redis_config.get('provider', None)}" - args = redis_config["args"][0] - deployment["spec"]["template"]["spec"]["volumes"] = args["volumes"] - self.write_config("redis-master-Deployment.yaml", deployment) - - def write_secret(self): - assert self.bucket - assert self.project - secrets = copy.deepcopy(self.secret_template) - secrets["stringData"]["BUCKET"] = self.bucket - secrets["stringData"]["PROJECT"] = self.project - secrets["stringData"]["CS_CRYPT_KEY"] = workers_config.get( - "CS_CRYPT_KEY" - ) or self.secrets.get("CS_CRYPT_KEY") - redis_secrets = self.redis_secrets() - for name, sec in redis_secrets.items(): - if sec is not None: - secrets["stringData"][name] = sec - - self.write_config("secret.yaml", secrets) def write_cloudflare_api_token(self): api_token = self.secrets.get("CLOUDFLARE_API_TOKEN") @@ -303,34 +159,6 @@ def write_config(self, filename, config): with open(f"{self.kubernetes_target}/{filename}", "w") as f: f.write(yaml.dump(config)) - def redis_secrets(self): - """ - Return redis ACL user passwords. If they are not in the secret manager, - try to generate them using a local instance of redis. If this fails, - they are set to an empty string. - """ - if self._redis_secrets is not None: - return self._redis_secrets - from google.api_core import exceptions - - redis_secrets = dict( - REDIS_ADMIN_PW="", - REDIS_EXECUTOR_PW="", - REDIS_SCHEDULER_PW="", - REDIS_OUTPUTS_PW="", - ) - for sec in redis_secrets: - try: - value = self.secrets.get(sec) - except exceptions.NotFound: - try: - value = redis_acl_genpass() - self.secrets.set(sec, value) - except Exception: - value = "" - redis_secrets[sec] = value - return redis_secrets - @property def secrets(self): if self._secrets is None: @@ -366,11 +194,11 @@ def config_(args: argparse.Namespace): def port_forward(args: argparse.Namespace): - run("kubectl port-forward svc/scheduler 8888:80") + run("kubectl port-forward svc/workers_api 8888:80") def serve(args: argparse.Namespace): - # scheduler.run() + # workers_api.run() pass diff --git a/workers/cs_workers/services/outputs_processor.py b/workers/cs_workers/services/outputs_processor.py index fa735d62..e718938a 100644 --- a/workers/cs_workers/services/outputs_processor.py +++ b/workers/cs_workers/services/outputs_processor.py @@ -20,7 +20,11 @@ app = FastAPI() -queue = Queue(connection=redis.Redis()) +queue = Queue( + connection=redis.Redis( + host=os.environ.get("REDIS_HOST"), port=os.environ.get("REDIS_PORT") + ) +) BUCKET = os.environ.get("BUCKET") diff --git a/workers/cs_workers/templates/services/outputs-processor-Deployment.template.yaml b/workers/cs_workers/templates/services/outputs-processor-Deployment.template.yaml index 8a5825e2..44e06dda 100755 --- a/workers/cs_workers/templates/services/outputs-processor-Deployment.template.yaml +++ b/workers/cs_workers/templates/services/outputs-processor-Deployment.template.yaml @@ -20,37 +20,12 @@ spec: - containerPort: 8888 env: - name: BUCKET - valueFrom: - secretKeyRef: - name: worker-secret - key: BUCKET + value: {{ .Values.bucket }} + - name: PROJECT + value: {{ .Values.project }} - name: REDIS_HOST - valueFrom: - secretKeyRef: - name: worker-secret - key: REDIS_HOST + value: {{ .Values.redis.host }} - name: REDIS_PORT - valueFrom: - secretKeyRef: - name: worker-secret - key: REDIS_PORT - - name: REDIS_DB - valueFrom: - secretKeyRef: - name: worker-secret - key: REDIS_DB - optional: true - - name: REDIS_USER - value: outputs - - name: REDIS_PW - valueFrom: - secretKeyRef: - name: worker-secret - key: REDIS_OUTPUTS_PW - - name: CS_CRYPT_KEY - valueFrom: - secretKeyRef: - name: worker-secret - key: CS_CRYPT_KEY + value: {{ .Values.redis.port }} nodeSelector: component: api diff --git a/workers/requirements.txt b/workers/requirements.txt index c490b75f..1dffa2ac 100755 --- a/workers/requirements.txt +++ b/workers/requirements.txt @@ -9,3 +9,6 @@ pyyaml google-cloud-secret-manager cs-crypt>=0.0.2 pyjwt +uvicorn[standard] +fastapi +pydantic[email,dotenv] \ No newline at end of file From b69a09290d562961effd0e3d67e8c95ad044bcd8 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Wed, 24 Mar 2021 18:07:49 -0400 Subject: [PATCH 05/41] Delete old config files --- .../templates/services/job-cleanup-Job.yaml | 17 ------ .../templates/services/job-cleanup-RBAC.yaml | 28 ---------- ...outputs-processor-Deployment.template.yaml | 31 ---------- .../services/outputs-processor-Service.yaml | 10 ---- .../outputs-processor-ServiceAccount.yaml | 4 -- .../redis-master-Deployment.template.yaml | 42 -------------- .../services/redis-master-Service.yaml | 15 ----- .../scheduler-Deployment.template.yaml | 56 ------------------- .../templates/services/scheduler-RBAC.yaml | 52 ----------------- .../templates/services/scheduler-Service.yaml | 11 ---- .../scheduler-ingressroute.template.yaml | 16 ------ 11 files changed, 282 deletions(-) delete mode 100644 workers/cs_workers/templates/services/job-cleanup-Job.yaml delete mode 100644 workers/cs_workers/templates/services/job-cleanup-RBAC.yaml delete mode 100755 workers/cs_workers/templates/services/outputs-processor-Deployment.template.yaml delete mode 100644 workers/cs_workers/templates/services/outputs-processor-Service.yaml delete mode 100644 workers/cs_workers/templates/services/outputs-processor-ServiceAccount.yaml delete mode 100644 workers/cs_workers/templates/services/redis-master-Deployment.template.yaml delete mode 100644 workers/cs_workers/templates/services/redis-master-Service.yaml delete mode 100755 workers/cs_workers/templates/services/scheduler-Deployment.template.yaml delete mode 100644 workers/cs_workers/templates/services/scheduler-RBAC.yaml delete mode 100644 workers/cs_workers/templates/services/scheduler-Service.yaml delete mode 100644 workers/cs_workers/templates/services/scheduler-ingressroute.template.yaml diff --git a/workers/cs_workers/templates/services/job-cleanup-Job.yaml b/workers/cs_workers/templates/services/job-cleanup-Job.yaml deleted file mode 100644 index 72f9325a..00000000 --- a/workers/cs_workers/templates/services/job-cleanup-Job.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: batch/v1beta1 -kind: CronJob -metadata: - name: job-cleanup -spec: - schedule: "*/30 * * * *" - successfulJobsHistoryLimit: 0 - jobTemplate: - spec: - template: - spec: - serviceAccountName: job-cleanup - containers: - - name: kubectl-container - image: bitnami/kubectl:latest - command: ["sh", "-c", "kubectl delete jobs --field-selector status.successful=1"] - restartPolicy: Never diff --git a/workers/cs_workers/templates/services/job-cleanup-RBAC.yaml b/workers/cs_workers/templates/services/job-cleanup-RBAC.yaml deleted file mode 100644 index 61ddb5e2..00000000 --- a/workers/cs_workers/templates/services/job-cleanup-RBAC.yaml +++ /dev/null @@ -1,28 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: job-cleanup ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: job-remove - namespace: default -rules: - - apiGroups: ["batch", "extensions"] - resources: ["jobs"] - verbs: ["get", "list", "watch", "delete"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: job-remove - namespace: default -subjects: - - kind: ServiceAccount - name: job-cleanup - namespace: default -roleRef: - kind: Role - name: job-remove - apiGroup: rbac.authorization.k8s.io diff --git a/workers/cs_workers/templates/services/outputs-processor-Deployment.template.yaml b/workers/cs_workers/templates/services/outputs-processor-Deployment.template.yaml deleted file mode 100755 index 44e06dda..00000000 --- a/workers/cs_workers/templates/services/outputs-processor-Deployment.template.yaml +++ /dev/null @@ -1,31 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: outputs-processor -spec: - replicas: 1 - selector: - matchLabels: - app: outputs-processor - template: - metadata: - labels: - app: outputs-processor - spec: - serviceAccountName: outputs-processor - containers: - - name: outputs-processor - image: - ports: - - containerPort: 8888 - env: - - name: BUCKET - value: {{ .Values.bucket }} - - name: PROJECT - value: {{ .Values.project }} - - name: REDIS_HOST - value: {{ .Values.redis.host }} - - name: REDIS_PORT - value: {{ .Values.redis.port }} - nodeSelector: - component: api diff --git a/workers/cs_workers/templates/services/outputs-processor-Service.yaml b/workers/cs_workers/templates/services/outputs-processor-Service.yaml deleted file mode 100644 index 2edac636..00000000 --- a/workers/cs_workers/templates/services/outputs-processor-Service.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: outputs-processor -spec: - ports: - - port: 80 - targetPort: 8888 - selector: - app: outputs-processor diff --git a/workers/cs_workers/templates/services/outputs-processor-ServiceAccount.yaml b/workers/cs_workers/templates/services/outputs-processor-ServiceAccount.yaml deleted file mode 100644 index b78a435d..00000000 --- a/workers/cs_workers/templates/services/outputs-processor-ServiceAccount.yaml +++ /dev/null @@ -1,4 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: outputs-processor diff --git a/workers/cs_workers/templates/services/redis-master-Deployment.template.yaml b/workers/cs_workers/templates/services/redis-master-Deployment.template.yaml deleted file mode 100644 index 2cbfa702..00000000 --- a/workers/cs_workers/templates/services/redis-master-Deployment.template.yaml +++ /dev/null @@ -1,42 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - labels: - app: redis - name: redis-master -spec: - replicas: 1 - selector: - matchLabels: - app: redis - role: master - tier: backend - template: - metadata: - labels: - app: redis - role: master - tier: backend - spec: - containers: - - env: [] - command: ["redis-server", "--appendonly", "yes"] - image: # redis-python - name: master - ports: - - containerPort: 6379 - resources: - requests: - cpu: 100m - memory: 100Mi - volumeMounts: - - mountPath: /data - name: redis-volume - volumes: - - name: redis-volume - # This GCE PD must already exist. - gcePersistentDisk: - pdName: redis-disk - fsType: ext4 - nodeSelector: - component: api diff --git a/workers/cs_workers/templates/services/redis-master-Service.yaml b/workers/cs_workers/templates/services/redis-master-Service.yaml deleted file mode 100644 index 04af2120..00000000 --- a/workers/cs_workers/templates/services/redis-master-Service.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - labels: - app: redis - role: master - tier: backend - name: redis-master -spec: - ports: - - port: 6379 - targetPort: 6379 - selector: - app: redis - role: master diff --git a/workers/cs_workers/templates/services/scheduler-Deployment.template.yaml b/workers/cs_workers/templates/services/scheduler-Deployment.template.yaml deleted file mode 100755 index a49c0500..00000000 --- a/workers/cs_workers/templates/services/scheduler-Deployment.template.yaml +++ /dev/null @@ -1,56 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: scheduler -spec: - replicas: 1 - selector: - matchLabels: - app: scheduler - template: - metadata: - labels: - app: scheduler - spec: - serviceAccountName: scheduler - containers: - - name: scheduler - image: - ports: - - containerPort: 8888 - env: - - name: PROJECT - valueFrom: - secretKeyRef: - name: worker-secret - key: PROJECT - - name: CS_CRYPT_KEY - valueFrom: - secretKeyRef: - name: worker-secret - key: CS_CRYPT_KEY - - name: REDIS_HOST - valueFrom: - secretKeyRef: - name: worker-secret - key: REDIS_HOST - - name: REDIS_PORT - valueFrom: - secretKeyRef: - name: worker-secret - key: REDIS_PORT - - name: REDIS_DB - valueFrom: - secretKeyRef: - name: worker-secret - key: REDIS_DB - optional: true - - name: REDIS_USER - value: scheduler - - name: REDIS_PW - valueFrom: - secretKeyRef: - name: worker-secret - key: REDIS_SCHEDULER_PW - nodeSelector: - component: api diff --git a/workers/cs_workers/templates/services/scheduler-RBAC.yaml b/workers/cs_workers/templates/services/scheduler-RBAC.yaml deleted file mode 100644 index 35ecae7d..00000000 --- a/workers/cs_workers/templates/services/scheduler-RBAC.yaml +++ /dev/null @@ -1,52 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: scheduler ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: job-admin - namespace: default -rules: - - apiGroups: ["batch", "extensions"] - resources: ["jobs"] - verbs: ["get", "list", "watch", "create", "update", "delete"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: job-admin - namespace: default -subjects: - - kind: ServiceAccount - name: scheduler - namespace: default -roleRef: - kind: Role - name: job-admin - apiGroup: rbac.authorization.k8s.io ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: viz-admin - namespace: default -rules: - - apiGroups: ["apps", "", "traefik.containo.us"] - resources: ["deployments", "services", "ingressroutes"] - verbs: ["get", "list", "watch", "create", "update", "delete"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: viz-admin - namespace: default -subjects: - - kind: ServiceAccount - name: scheduler - namespace: default -roleRef: - kind: Role - name: viz-admin - apiGroup: rbac.authorization.k8s.io diff --git a/workers/cs_workers/templates/services/scheduler-Service.yaml b/workers/cs_workers/templates/services/scheduler-Service.yaml deleted file mode 100644 index 674835ae..00000000 --- a/workers/cs_workers/templates/services/scheduler-Service.yaml +++ /dev/null @@ -1,11 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: scheduler -spec: - ports: - - port: 80 - targetPort: 8888 - selector: - app: scheduler - type: LoadBalancer diff --git a/workers/cs_workers/templates/services/scheduler-ingressroute.template.yaml b/workers/cs_workers/templates/services/scheduler-ingressroute.template.yaml deleted file mode 100644 index e64f2714..00000000 --- a/workers/cs_workers/templates/services/scheduler-ingressroute.template.yaml +++ /dev/null @@ -1,16 +0,0 @@ -apiVersion: traefik.containo.us/v1alpha1 -kind: IngressRoute -metadata: - name: scheduler-tls - namespace: default -spec: - entryPoints: - - websecure - routes: - - match: - kind: Rule - services: - - name: scheduler - port: 80 - tls: - certResolver: myresolver From 918aa2127ae7c8474dbee1db0173e8ce8432217e Mon Sep 17 00:00:00 2001 From: hdoupe Date: Mon, 5 Apr 2021 11:45:49 -0400 Subject: [PATCH 06/41] Set up workers / project namespaces and get full simulation system working --- webapp/apps/comp/serializers.py | 2 +- webapp/apps/comp/views/api.py | 1 + webapp/apps/users/models.py | 10 ++++++---- .../cs-workers/templates/db-deployment.yaml | 1 + workers/cs-workers/templates/db-secret.yaml | 1 + workers/cs-workers/templates/db-service.yaml | 1 + .../cs-workers/templates/job-cleanup-Job.yaml | 1 + .../templates/job-cleanup-RBAC.yaml | 6 +++--- .../outputs-processor-Deployment.yaml | 1 + .../templates/outputs-processor-RBAC.yaml | 1 + .../templates/outputs-processor-Service.yaml | 1 + .../templates/project_namespace.yaml | 4 ++++ .../templates/redis-master-Deployment.yaml | 1 + .../templates/redis-master-Service.yaml | 1 + .../cs-workers/templates/rq-Deployment.yaml | 1 + workers/cs-workers/templates/rq-RBAC.yaml | 1 + .../templates/scheduler-Deployment.yaml | 19 +++++++++++++------ .../cs-workers/templates/scheduler-RBAC.yaml | 18 +++++++++--------- .../templates/scheduler-Service.yaml | 9 +++++---- .../templates/scheduler-ingressroute.yaml | 6 +++--- .../templates/workers-api-secret.yaml | 5 +++-- .../templates/workers_namespace.yaml | 4 ++++ workers/cs-workers/values.yaml | 9 ++++++++- .../dockerfiles/Dockerfile.outputs_processor | 2 +- .../dockerfiles/Dockerfile.workers_api | 2 +- .../services/api/routers/simulations.py | 9 ++++----- workers/cs_workers/services/api/settings.py | 15 +++++++++++++++ .../cs_workers/services/outputs_processor.py | 17 ++++++++++++++--- 28 files changed, 106 insertions(+), 43 deletions(-) create mode 100644 workers/cs-workers/templates/project_namespace.yaml create mode 100644 workers/cs-workers/templates/workers_namespace.yaml diff --git a/webapp/apps/comp/serializers.py b/webapp/apps/comp/serializers.py index f795093a..1597a8bd 100755 --- a/webapp/apps/comp/serializers.py +++ b/webapp/apps/comp/serializers.py @@ -13,7 +13,7 @@ class OutputsSerializer(serializers.Serializer): job_id = serializers.UUIDField() status = serializers.ChoiceField(choices=(("SUCCESS", "Success"), ("FAIL", "Fail"))) - traceback = serializers.CharField(required=False) + traceback = serializers.CharField(required=False, allow_null=True) model_version = serializers.CharField(required=False) meta = serializers.JSONField() outputs = serializers.JSONField(required=False) diff --git a/webapp/apps/comp/views/api.py b/webapp/apps/comp/views/api.py index 1bcb5b65..2993cbcf 100755 --- a/webapp/apps/comp/views/api.py +++ b/webapp/apps/comp/views/api.py @@ -448,6 +448,7 @@ def put(self, request, *args, **kwargs): ) return Response(status=status.HTTP_200_OK) else: + print(f"Data from compute cluster is invalid: {ser.errors}") return Response(ser.errors, status=status.HTTP_400_BAD_REQUEST) diff --git a/webapp/apps/users/models.py b/webapp/apps/users/models.py index e6dce248..12e01735 100755 --- a/webapp/apps/users/models.py +++ b/webapp/apps/users/models.py @@ -209,10 +209,10 @@ class Cluster(models.Model): def ensure_access_token(self): missing_token = self.access_token is None is_expired = ( - self.access_token_expires_at is not None - and self.access_token_expires_at < (timezone.now() - timedelta(seconds=60)) + self.access_token_expires_at is None + or self.access_token_expires_at < (timezone.now() - timedelta(seconds=60)) ) - + print(missing_token, is_expired) if missing_token or is_expired: resp = requests.post( f"{self.url}/api/v1/login/access-token", @@ -222,7 +222,9 @@ def ensure_access_token(self): }, ) if resp.status_code != 200: - raise ClusterLoginException() + raise ClusterLoginException( + f"Expected 200, got {resp.status_code}: {resp.text}" + ) data = resp.json() self.access_token = data["access_token"] self.access_token_expires_at = datetime.fromisoformat(data["expires_at"]) diff --git a/workers/cs-workers/templates/db-deployment.yaml b/workers/cs-workers/templates/db-deployment.yaml index 6ffcb5c4..032709b2 100644 --- a/workers/cs-workers/templates/db-deployment.yaml +++ b/workers/cs-workers/templates/db-deployment.yaml @@ -2,6 +2,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: workers-db + namespace: {{ .Values.workers_namespace }} spec: replicas: 1 selector: diff --git a/workers/cs-workers/templates/db-secret.yaml b/workers/cs-workers/templates/db-secret.yaml index dccc2f02..8361a878 100644 --- a/workers/cs-workers/templates/db-secret.yaml +++ b/workers/cs-workers/templates/db-secret.yaml @@ -2,6 +2,7 @@ apiVersion: v1 kind: Secret metadata: name: workers-db-secret + namespace: {{ .Values.workers_namespace }} type: Opaque stringData: USER: {{ .Values.db.user }} diff --git a/workers/cs-workers/templates/db-service.yaml b/workers/cs-workers/templates/db-service.yaml index 51841105..f0c8195f 100644 --- a/workers/cs-workers/templates/db-service.yaml +++ b/workers/cs-workers/templates/db-service.yaml @@ -4,6 +4,7 @@ metadata: labels: app: workers-db name: workers-db + namespace: {{ .Values.workers_namespace }} spec: ports: - port: 5432 diff --git a/workers/cs-workers/templates/job-cleanup-Job.yaml b/workers/cs-workers/templates/job-cleanup-Job.yaml index 72f9325a..017174f4 100644 --- a/workers/cs-workers/templates/job-cleanup-Job.yaml +++ b/workers/cs-workers/templates/job-cleanup-Job.yaml @@ -2,6 +2,7 @@ apiVersion: batch/v1beta1 kind: CronJob metadata: name: job-cleanup + namespace: {{ .Values.project_namespace }} spec: schedule: "*/30 * * * *" successfulJobsHistoryLimit: 0 diff --git a/workers/cs-workers/templates/job-cleanup-RBAC.yaml b/workers/cs-workers/templates/job-cleanup-RBAC.yaml index 48d061d6..6a71a8db 100644 --- a/workers/cs-workers/templates/job-cleanup-RBAC.yaml +++ b/workers/cs-workers/templates/job-cleanup-RBAC.yaml @@ -7,7 +7,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: job-remove - namespace: {{ .Release.Namespace }} + namespace: {{ .Values.project_namespace }} rules: - apiGroups: ["batch", "extensions"] resources: ["jobs"] @@ -17,11 +17,11 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: job-remove - namespace: {{ .Release.Namespace }} + namespace: {{ .Values.project_namespace }} subjects: - kind: ServiceAccount name: job-cleanup - namespace: {{ .Release.Namespace }} + namespace: {{ .Values.project_namespace }} roleRef: kind: Role name: job-remove diff --git a/workers/cs-workers/templates/outputs-processor-Deployment.yaml b/workers/cs-workers/templates/outputs-processor-Deployment.yaml index 44d23dd7..1ec527b9 100755 --- a/workers/cs-workers/templates/outputs-processor-Deployment.yaml +++ b/workers/cs-workers/templates/outputs-processor-Deployment.yaml @@ -2,6 +2,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: outputs-processor + namespace: {{ .Values.workers_namespace }} spec: replicas: 1 selector: diff --git a/workers/cs-workers/templates/outputs-processor-RBAC.yaml b/workers/cs-workers/templates/outputs-processor-RBAC.yaml index b78a435d..dd3f5e0d 100644 --- a/workers/cs-workers/templates/outputs-processor-RBAC.yaml +++ b/workers/cs-workers/templates/outputs-processor-RBAC.yaml @@ -2,3 +2,4 @@ apiVersion: v1 kind: ServiceAccount metadata: name: outputs-processor + namespace: {{ .Values.workers_namespace }} diff --git a/workers/cs-workers/templates/outputs-processor-Service.yaml b/workers/cs-workers/templates/outputs-processor-Service.yaml index 3914b3c0..4ac71616 100644 --- a/workers/cs-workers/templates/outputs-processor-Service.yaml +++ b/workers/cs-workers/templates/outputs-processor-Service.yaml @@ -2,6 +2,7 @@ apiVersion: v1 kind: Service metadata: name: outputs-processor + namespace: {{ .Values.workers_namespace }} spec: ports: - port: 80 diff --git a/workers/cs-workers/templates/project_namespace.yaml b/workers/cs-workers/templates/project_namespace.yaml new file mode 100644 index 00000000..bb37da36 --- /dev/null +++ b/workers/cs-workers/templates/project_namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: {{ .Values.project_namespace }} \ No newline at end of file diff --git a/workers/cs-workers/templates/redis-master-Deployment.yaml b/workers/cs-workers/templates/redis-master-Deployment.yaml index bbbd5ba2..cc4ca44b 100644 --- a/workers/cs-workers/templates/redis-master-Deployment.yaml +++ b/workers/cs-workers/templates/redis-master-Deployment.yaml @@ -4,6 +4,7 @@ metadata: labels: app: redis name: redis-master + namespace: {{ .Values.workers_namespace }} spec: replicas: 1 selector: diff --git a/workers/cs-workers/templates/redis-master-Service.yaml b/workers/cs-workers/templates/redis-master-Service.yaml index 04af2120..e5e838a4 100644 --- a/workers/cs-workers/templates/redis-master-Service.yaml +++ b/workers/cs-workers/templates/redis-master-Service.yaml @@ -6,6 +6,7 @@ metadata: role: master tier: backend name: redis-master + namespace: {{ .Values.workers_namespace }} spec: ports: - port: 6379 diff --git a/workers/cs-workers/templates/rq-Deployment.yaml b/workers/cs-workers/templates/rq-Deployment.yaml index d8a1a16e..1cbe734a 100644 --- a/workers/cs-workers/templates/rq-Deployment.yaml +++ b/workers/cs-workers/templates/rq-Deployment.yaml @@ -2,6 +2,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: rq-worker-outputs + namespace: {{ .Values.workers_namespace }} spec: replicas: 1 selector: diff --git a/workers/cs-workers/templates/rq-RBAC.yaml b/workers/cs-workers/templates/rq-RBAC.yaml index a435d527..da48fd78 100644 --- a/workers/cs-workers/templates/rq-RBAC.yaml +++ b/workers/cs-workers/templates/rq-RBAC.yaml @@ -2,3 +2,4 @@ apiVersion: v1 kind: ServiceAccount metadata: name: rq-worker + namespace: {{ .Values.workers_namespace }} diff --git a/workers/cs-workers/templates/scheduler-Deployment.yaml b/workers/cs-workers/templates/scheduler-Deployment.yaml index 03abdf80..c7d5cabb 100755 --- a/workers/cs-workers/templates/scheduler-Deployment.yaml +++ b/workers/cs-workers/templates/scheduler-Deployment.yaml @@ -1,33 +1,40 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: workers-api + name: api + namespace: {{ .Values.workers_namespace }} spec: replicas: 1 selector: matchLabels: - app: workers-api + app: api template: metadata: labels: - app: workers-api + app: api spec: - serviceAccountName: workers-api + serviceAccountName: api containers: - - name: workers-api + - name: api image: "{{ .Values.registry }}/{{ .Values.project }}/workers_api:{{ .Values.tag }}" ports: - containerPort: 5000 env: - name: BUCKET value: "{{ .Values.bucket }}" + - name: PROJECT + value: "{{ .Values.project }}" - name: VIZ_HOST value: "{{ .Values.viz_host }}" - name: API_SECRET_KEY valueFrom: secretKeyRef: - name: workers-api-secret + name: api-secret key: API_SECRET_KEY + - name: BACKEND_CORS_ORIGINS + value: '{{ .Values.api.allow_origins | toJson }}' + - name: PROJECT_NAMESPACE + value: '{{ .Values.project_namespace }}' - name: DB_USER valueFrom: secretKeyRef: diff --git a/workers/cs-workers/templates/scheduler-RBAC.yaml b/workers/cs-workers/templates/scheduler-RBAC.yaml index 8eecae3d..1b615fdb 100644 --- a/workers/cs-workers/templates/scheduler-RBAC.yaml +++ b/workers/cs-workers/templates/scheduler-RBAC.yaml @@ -1,13 +1,13 @@ apiVersion: v1 kind: ServiceAccount metadata: - name: workers-api + name: api --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: job-admin - namespace: {{ .Release.Namespace }} + namespace: {{ .Values.project_namespace }} rules: - apiGroups: ["batch", "extensions"] resources: ["jobs"] @@ -17,11 +17,11 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: job-admin - namespace: {{ .Release.Namespace }} + namespace: {{ .Values.project_namespace }} subjects: - kind: ServiceAccount - name: workers-api - namespace: {{ .Release.Namespace }} + name: api + namespace: {{ .Values.workers_namespace }} roleRef: kind: Role name: job-admin @@ -31,7 +31,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: viz-admin - namespace: {{ .Release.Namespace }} + namespace: {{ .Values.project_namespace }} rules: - apiGroups: ["apps", "", "traefik.containo.us"] resources: ["deployments", "services", "ingressroutes"] @@ -41,11 +41,11 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: viz-admin - namespace: {{ .Release.Namespace }} + namespace: {{ .Values.workers_namespace }} subjects: - kind: ServiceAccount - name: workers-api - namespace: {{ .Release.Namespace }} + name: api + namespace: {{ .Values.workers_namespace }} roleRef: kind: Role name: viz-admin diff --git a/workers/cs-workers/templates/scheduler-Service.yaml b/workers/cs-workers/templates/scheduler-Service.yaml index f5534095..bd2faf4e 100644 --- a/workers/cs-workers/templates/scheduler-Service.yaml +++ b/workers/cs-workers/templates/scheduler-Service.yaml @@ -1,11 +1,12 @@ apiVersion: v1 kind: Service metadata: - name: workers-api + name: api + namespace: {{ .Values.workers_namespace }} spec: ports: - - port: 80 - targetPort: 5000 + - port: 80 + targetPort: 5000 selector: - app: workers-api + app: api type: LoadBalancer diff --git a/workers/cs-workers/templates/scheduler-ingressroute.yaml b/workers/cs-workers/templates/scheduler-ingressroute.yaml index 585d2993..70400b51 100644 --- a/workers/cs-workers/templates/scheduler-ingressroute.yaml +++ b/workers/cs-workers/templates/scheduler-ingressroute.yaml @@ -2,8 +2,8 @@ apiVersion: traefik.containo.us/v1alpha1 kind: IngressRoute metadata: - name: scheduler-tls - namespace: default + name: api-tls + namespace: {{ .Values.workers_namespace }} spec: entryPoints: - websecure @@ -11,7 +11,7 @@ spec: - match: kind: Rule services: - - name: scheduler + - name: api port: 80 tls: certResolver: myresolver diff --git a/workers/cs-workers/templates/workers-api-secret.yaml b/workers/cs-workers/templates/workers-api-secret.yaml index 309e237f..0e126fdc 100644 --- a/workers/cs-workers/templates/workers-api-secret.yaml +++ b/workers/cs-workers/templates/workers-api-secret.yaml @@ -1,7 +1,8 @@ apiVersion: v1 kind: Secret metadata: - name: workers-api-secret + name: api-secret + namespace: {{ .Values.workers_namespace }} type: Opaque stringData: - API_SECRET_KEY: {{ .Values.api_secret_key }} + API_SECRET_KEY: {{ .Values.api.secret_key }} diff --git a/workers/cs-workers/templates/workers_namespace.yaml b/workers/cs-workers/templates/workers_namespace.yaml new file mode 100644 index 00000000..7219c80b --- /dev/null +++ b/workers/cs-workers/templates/workers_namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: {{ .Values.workers_namespace }} \ No newline at end of file diff --git a/workers/cs-workers/values.yaml b/workers/cs-workers/values.yaml index b36b6112..7d119707 100644 --- a/workers/cs-workers/values.yaml +++ b/workers/cs-workers/values.yaml @@ -11,8 +11,15 @@ registry: gcr.io # Overrides the image tag whose default is the chart appVersion. tag: tag -api_secret_key: abc123 +workers_namespace: workers +project_namespace: projects +api: + secret_key: abc123 + allow_origins: + - "http://10.0.0.137:5000" + - "http://localhost:5000" + - "http://api.workers.svc.cluster.local" redis: host: "redis-master" port: "6379" diff --git a/workers/cs_workers/dockerfiles/Dockerfile.outputs_processor b/workers/cs_workers/dockerfiles/Dockerfile.outputs_processor index f785e223..c2a625f1 100755 --- a/workers/cs_workers/dockerfiles/Dockerfile.outputs_processor +++ b/workers/cs_workers/dockerfiles/Dockerfile.outputs_processor @@ -31,4 +31,4 @@ WORKDIR /home/cs_workers ENV PYTHONUNBUFFERED 1 -CMD ["uvicorn", "services.outputs_processor:app", "--port", "5000", "--reload"] \ No newline at end of file +CMD ["uvicorn", "services.outputs_processor:app", "--host", "0.0.0.0", "--port", "5000", "--reload"] \ No newline at end of file diff --git a/workers/cs_workers/dockerfiles/Dockerfile.workers_api b/workers/cs_workers/dockerfiles/Dockerfile.workers_api index 893b682c..fc022698 100755 --- a/workers/cs_workers/dockerfiles/Dockerfile.workers_api +++ b/workers/cs_workers/dockerfiles/Dockerfile.workers_api @@ -31,4 +31,4 @@ ENV PYTHONUNBUFFERED 1 WORKDIR /home/cs_workers/services/ -CMD ["uvicorn", "api.main:app", "--port", "5000", "--reload"] \ No newline at end of file +CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "5000", "--reload"] \ No newline at end of file diff --git a/workers/cs_workers/services/api/routers/simulations.py b/workers/cs_workers/services/api/routers/simulations.py index 7a8d95fe..0d6a05bf 100644 --- a/workers/cs_workers/services/api/routers/simulations.py +++ b/workers/cs_workers/services/api/routers/simulations.py @@ -8,7 +8,7 @@ from sqlalchemy.orm import Session from cs_workers.models.clients import job -from .. import models, schemas, dependencies as deps, security +from .. import models, schemas, dependencies as deps, security, settings incluster = os.environ.get("KUBERNETES_SERVICE_HOST", False) is not False @@ -49,7 +49,6 @@ async def finish_job( db: Session = Depends(deps.get_db), ): print("got data for ", job_id) - print(task.dict()) instance = db.query(models.Job).filter(models.Job.id == job_id).one_or_none() if instance is None: raise HTTPException(status_code=404, detail="Job not found.") @@ -69,7 +68,7 @@ async def finish_job( await security.ensure_cs_access_token(db, user) async with httpx.AsyncClient() as client: resp = await client.post( - f"http://localhost:5001/{job_id}/", + f"http://outputs-processor/{job_id}/", json={ "url": user.url, "headers": {"Authorization": f"Bearer {user.access_token}"}, @@ -154,10 +153,10 @@ def create_job( tag=tag, model_config=project_data, job_id=instance.id, - callback_url=f"https://hdoupe.ngrok.io/api/v1/jobs/callback/{instance.id}/", + callback_url=f"http://api.workers.svc.cluster.local/api/v1/jobs/callback/{instance.id}/", route_name=task_name, incluster=incluster, - namespace="worker-api", + namespace=settings.settings.PROJECT_NAMESPACE, ) client.create() diff --git a/workers/cs_workers/services/api/settings.py b/workers/cs_workers/services/api/settings.py index 6904e982..5f7c2ebe 100644 --- a/workers/cs_workers/services/api/settings.py +++ b/workers/cs_workers/services/api/settings.py @@ -1,8 +1,11 @@ import os +from pathlib import Path from typing import Any, Dict, List, Optional, Union from pydantic import AnyHttpUrl, BaseSettings, EmailStr, HttpUrl, PostgresDsn, validator +NAMESPACE_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/namespace" + class Settings(BaseSettings): API_PREFIX_STR: str = "/api/v1" @@ -26,6 +29,18 @@ def assemble_cors_origins(cls, v: Union[str, List[str]]) -> Union[List[str], str return v raise ValueError(v) + PROJECT_NAMESPACE: str + + @validator("PROJECT_NAMESPACE", pre=True) + def get_project_namespace(cls, v: Optional[str]) -> str: + if v: + return v + elif Path(NAMESPACE_PATH).exists(): + with open(NAMESPACE_PATH) as f: + return f.read().strip() + else: + return "default" + PROJECT_NAME: str = "C/S Cluster Api" SENTRY_DSN: Optional[HttpUrl] = None diff --git a/workers/cs_workers/services/outputs_processor.py b/workers/cs_workers/services/outputs_processor.py index e718938a..d023158a 100644 --- a/workers/cs_workers/services/outputs_processor.py +++ b/workers/cs_workers/services/outputs_processor.py @@ -43,29 +43,40 @@ def write(task_id, outputs): def push(job_id: str, result: Result): + resp = None if result.task.task_name == "sim": print(f"posting data to {result.url}/outputs/api/") result.task.outputs = write(job_id, result.task.outputs) - return httpx.put( + resp = httpx.put( f"{result.url}/outputs/api/", json=dict(job_id=job_id, **result.task.dict()), headers=result.headers, ) elif result.task.task_name == "parse": print(f"posting data to {result.url}/inputs/api/") - return httpx.put( + resp = httpx.put( f"{result.url}/inputs/api/", json=dict(job_id=job_id, **result.task.dict()), headers=result.headers, ) elif result.task.task_name == "defaults": print(f"posting data to {result.url}/model-config/api/") - return httpx.put( + resp = httpx.put( f"{result.url}/model-config/api/", json=dict(job_id=job_id, **result.task.dict()), headers=result.headers, ) + if resp is not None and resp.status_code == 400: + print(resp.text) + resp.raise_for_status() + elif resp is not None: + resp.raise_for_status() + else: + raise ValueError( + f"resp is None for: {job_id} with name {result.task.task_name}" + ) + @app.post("/{job_id}/", status_code=200) async def post(job_id: str, result: Result = Body(...)): From 354d5826d66d595441b9048c323b7b8735720955 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Wed, 7 Apr 2021 09:44:54 -0400 Subject: [PATCH 07/41] Update frontend to poll for model configuration objects if not cached yet --- src/Simulation/API.ts | 37 ++++++--------- src/Simulation/index.tsx | 70 ++++++++++++++-------------- webapp/apps/comp/exceptions.py | 5 ++ webapp/apps/comp/ioutils.py | 2 +- webapp/apps/comp/model_parameters.py | 31 ++++++------ webapp/apps/comp/parser.py | 34 +++++++------- webapp/apps/comp/views/api.py | 5 +- 7 files changed, 91 insertions(+), 93 deletions(-) diff --git a/src/Simulation/API.ts b/src/Simulation/API.ts index 676183c0..ae14283a 100644 --- a/src/Simulation/API.ts +++ b/src/Simulation/API.ts @@ -43,34 +43,25 @@ export default class API { }); } - getInputsDetail(): Promise { + async getInputsDetail(): Promise { if (!this.modelpk) return; - return axios - .get(`/${this.owner}/${this.title}/api/v1/${this.modelpk}/edit/`) - .then(resp => resp.data); + const resp = await axios.get(`/${this.owner}/${this.title}/api/v1/${this.modelpk}/edit/`); + return resp.data; } - getInitialValues(): Promise { - let data: Inputs; - if (!this.modelpk) { - return axios.get(`/${this.owner}/${this.title}/api/v1/inputs/`).then(inputsResp => { - data = inputsResp.data; - return data; + async getInputs(meta_parameters?: InputsDetail["meta_parameters"]): Promise { + let resp; + if (!!meta_parameters) { + resp = await axios.post(`/${this.owner}/${this.title}/api/v1/inputs/`, meta_parameters); + } else { + resp = await axios.get(`/${this.owner}/${this.title}/api/v1/inputs/`); + } + if (resp.status === 202) { + return new Promise(resolve => { + setTimeout(async () => resolve(await this.getInputs(meta_parameters)), 2000); }); } else { - return axios - .get(`/${this.owner}/${this.title}/api/v1/${this.modelpk}/edit/`) - .then(detailResp => { - return axios - .post(`/${this.owner}/${this.title}/api/v1/inputs/`, { - meta_parameters: detailResp.data.meta_parameters, - }) - .then(inputsResp => { - data = inputsResp.data; - data["detail"] = detailResp.data; - return data; - }); - }); + return resp.data; } } diff --git a/src/Simulation/index.tsx b/src/Simulation/index.tsx index ca0dbca5..dace264e 100755 --- a/src/Simulation/index.tsx +++ b/src/Simulation/index.tsx @@ -128,48 +128,50 @@ class SimTabs extends React.Component< this.handleSubmit = this.handleSubmit.bind(this); } - componentDidMount() { + async componentDidMount() { this.api.getAccessStatus().then(data => { this.setState({ accessStatus: data, }); }); - this.api - .getInitialValues() - .then(data => { - const [serverValues, sects, inputs, schema, unknownParams] = convertToFormik(data); - let isEmpty = true; - for (const msectvals of Object.values(data.detail?.adjustment || {})) { - if (Object.keys(msectvals).length > 0) { - isEmpty = false; - } - } - let initialValues; - if (isEmpty) { - const storage = Persist.pop( - `${this.props.match.params.owner}/${this.props.match.params.title}/inputs` - ); - // Use values from local storage if available. Default to empty dict from server. - initialValues = storage || serverValues; - } else { - initialValues = serverValues; - } - - this.setState({ - inputs: inputs, - initialValues: initialValues, - sects: sects, - schema: schema, - unknownParams: unknownParams, - extend: "extend" in data ? data.extend : false, - }); - }) - .catch(error => { - this.setState({ error }); - }); if (this.api.modelpk) { this.setOutputs(); } + let data: Inputs; + if (this.api.modelpk) { + const detail = await this.api.getInputsDetail(); + data = await this.api.getInputs(detail.meta_parameters); + data.detail = detail; + } else { + data = await this.api.getInputs(); + } + + const [serverValues, sects, inputs, schema, unknownParams] = convertToFormik(data); + let isEmpty = true; + for (const msectvals of Object.values(data.detail?.adjustment || {})) { + if (Object.keys(msectvals).length > 0) { + isEmpty = false; + } + } + let initialValues; + if (isEmpty) { + const storage = Persist.pop( + `${this.props.match.params.owner}/${this.props.match.params.title}/inputs` + ); + // Use values from local storage if available. Default to empty dict from server. + initialValues = storage || serverValues; + } else { + initialValues = serverValues; + } + + this.setState({ + inputs: inputs, + initialValues: initialValues, + sects: sects, + schema: schema, + unknownParams: unknownParams, + extend: "extend" in inputs ? inputs.extend : false, + }); } resetInitialValues(metaParameters: InputsDetail["meta_parameters"]) { diff --git a/webapp/apps/comp/exceptions.py b/webapp/apps/comp/exceptions.py index 6b457e8a..6367a09d 100755 --- a/webapp/apps/comp/exceptions.py +++ b/webapp/apps/comp/exceptions.py @@ -72,3 +72,8 @@ def todict(self): collaborator=getattr(self.collaborator, "username", str(self.collaborator)), msg=str(self), ) + + +class NotReady(CSException): + def __init__(self, instance, *args, **kwargs): + self.instance = instance diff --git a/webapp/apps/comp/ioutils.py b/webapp/apps/comp/ioutils.py index 84e28f9b..13a9e096 100755 --- a/webapp/apps/comp/ioutils.py +++ b/webapp/apps/comp/ioutils.py @@ -1,5 +1,5 @@ from typing import NamedTuple, Type -from webapp.apps.comp.model_parameters import ModelParameters, NotReady +from webapp.apps.comp.model_parameters import ModelParameters from webapp.apps.comp.parser import Parser diff --git a/webapp/apps/comp/model_parameters.py b/webapp/apps/comp/model_parameters.py index 5815c05f..a8ebb411 100755 --- a/webapp/apps/comp/model_parameters.py +++ b/webapp/apps/comp/model_parameters.py @@ -6,7 +6,7 @@ from webapp.apps.comp.models import ModelConfig from webapp.apps.comp.compute import Compute, SyncCompute, JobFailError from webapp.apps.comp import actions -from webapp.apps.comp.exceptions import AppError +from webapp.apps.comp.exceptions import AppError, NotReady import os @@ -19,10 +19,6 @@ def pt_factory(classname, defaults): return type(classname, (pt.Parameters,), {"defaults": defaults}) -class NotReady(Exception): - pass - - class ModelParameters: """ Handles logic for getting cached model parameters and updating the cache. @@ -52,9 +48,11 @@ def defaults(self, init_meta_parameters=None): "meta_parameters": meta_parameters, } - def meta_parameters_parser(self): + def meta_parameters_parser(self) -> pt.Parameters: res = self.get_inputs() - return pt_factory("MetaParametersParser", res["meta_parameters"])() + params = pt_factory("MetaParametersParser", res["meta_parameters"])() + # params._defer_validation = True + return params def model_parameters_parser(self, meta_parameters_values=None): res = self.get_inputs(meta_parameters_values) @@ -83,15 +81,15 @@ def get_inputs(self, meta_parameters_values=None): meta_parameters_values = meta_parameters_values or {} try: - config = ModelConfig.objects.get( + self.config = ModelConfig.objects.get( project=self.project, model_version=str(self.project.latest_tag), meta_parameters_values=meta_parameters_values, ) - print("STATUS", config.status) - if config.status != "SUCCESS": + print("STATUS", self.config.status) + if self.config.status != "SUCCESS": print("raise yo") - raise NotReady() + raise NotReady(self.config) except ModelConfig.DoesNotExist: response = self.compute.submit_job( project=self.project, @@ -102,7 +100,7 @@ def get_inputs(self, meta_parameters_values=None): else "", ) if self.project.cluster.version == "v1": - config = ModelConfig.objects.create( + self.config = ModelConfig.objects.create( project=self.project, model_version=str(self.project.latest_tag), meta_parameters_values=meta_parameters_values, @@ -110,7 +108,7 @@ def get_inputs(self, meta_parameters_values=None): job_id=response, status="PENDING", ) - raise NotReady() + raise NotReady(self.config) success, result = response if not success: @@ -120,7 +118,7 @@ def get_inputs(self, meta_parameters_values=None): meta_parameters_values, result["meta_parameters"] ) - config = ModelConfig.objects.create( + self.config = ModelConfig.objects.create( project=self.project, model_version=str(self.project.latest_tag), meta_parameters_values=save_vals, @@ -129,8 +127,7 @@ def get_inputs(self, meta_parameters_values=None): inputs_version="v1", ) - self.config = config return { - "meta_parameters": config.meta_parameters, - "model_parameters": config.model_parameters, + "meta_parameters": self.config.meta_parameters, + "model_parameters": self.config.model_parameters, } diff --git a/webapp/apps/comp/parser.py b/webapp/apps/comp/parser.py index ac40bd51..e4584ccf 100755 --- a/webapp/apps/comp/parser.py +++ b/webapp/apps/comp/parser.py @@ -3,7 +3,7 @@ from webapp.apps.comp import actions from webapp.apps.comp.compute import Compute -from webapp.apps.comp.exceptions import AppError +from webapp.apps.comp.exceptions import AppError, NotReady from webapp.apps.comp.models import Inputs ParamData = namedtuple("ParamData", ["name", "data"]) @@ -23,11 +23,18 @@ def __init__( self.valid_meta_params = valid_meta_params for param, value in valid_meta_params.items(): setattr(self, param, value) - defaults = model_parameters.defaults(self.valid_meta_params) - self.grouped_defaults = defaults["model_parameters"] - self.flat_defaults = { - k: v for _, sect in self.grouped_defaults.items() for k, v in sect.items() - } + try: + defaults = model_parameters.defaults(self.valid_meta_params) + except NotReady: + self.grouped_defaults = {} + self.flat_defaults = {} + else: + self.grouped_defaults = defaults["model_parameters"] + self.flat_defaults = { + k: v + for _, sect in self.grouped_defaults.items() + for k, v in sect.items() + } @staticmethod def append_errors_warnings(errors_warnings, append_func, defaults=None): @@ -41,11 +48,11 @@ def append_errors_warnings(errors_warnings, append_func, defaults=None): append_func(param, msg, defaults) def parse_parameters(self): + sects = set(self.grouped_defaults.keys()) | set(self.clean_inputs.keys()) errors_warnings = { - sect: {"errors": {}, "warnings": {}} - for sect in list(self.grouped_defaults) + ["GUI", "API"] + sect: {"errors": {}, "warnings": {}} for sect in sects | {"GUI", "API"} } - adjustment = {sect: {} for sect in self.grouped_defaults} + adjustment = defaultdict(dict) return errors_warnings, adjustment def post(self, errors_warnings, params): @@ -70,13 +77,8 @@ class Parser: class APIParser(BaseParser): def parse_parameters(self): errors_warnings, adjustment = super().parse_parameters() - extra_keys = set(self.clean_inputs.keys() - self.grouped_defaults.keys()) - if extra_keys: - errors_warnings["API"]["errors"] = { - "extra_keys": [f"Has extra sections: {' ,'.join(extra_keys)}"] - } - - for sect in adjustment: + sects = set(self.grouped_defaults.keys()) | set(self.clean_inputs.keys()) + for sect in sects: adjustment[sect].update(self.clean_inputs.get(sect, {})) # kick off async parsing diff --git a/webapp/apps/comp/views/api.py b/webapp/apps/comp/views/api.py index 2993cbcf..521cdd31 100755 --- a/webapp/apps/comp/views/api.py +++ b/webapp/apps/comp/views/api.py @@ -43,8 +43,9 @@ ForkObjectException, PrivateAppException, PrivateSimException, + NotReady, ) -from webapp.apps.comp.ioutils import get_ioutils, NotReady +from webapp.apps.comp.ioutils import get_ioutils from webapp.apps.comp.models import ( Inputs, Simulation, @@ -94,7 +95,7 @@ def get_inputs(self, kwargs, meta_parameters=None): return Response(str(e), status=status.HTTP_400_BAD_REQUEST) if "year" in defaults["meta_parameters"]: defaults.update({"extend": True}) - print("got defaults", defaults) + return Response(defaults) def get(self, request, *args, **kwargs): From add353287bce24aeda041dbd73d4cc28614788a0 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Thu, 8 Apr 2021 10:11:28 -0400 Subject: [PATCH 08/41] All existing tests pass - Update oauth2 validation for client credentials to return application user - Re-activate write access check for project in mc / inputs / outputs endpoints. --- pytest.ini | 1 + webapp/apps/comp/compute.py | 6 ++--- webapp/apps/comp/model_parameters.py | 1 + webapp/apps/comp/tests/test_api_parser.py | 5 ++++ webapp/apps/comp/tests/test_asyncviews.py | 12 +++++---- webapp/apps/comp/views/api.py | 22 ++++++++------- webapp/apps/conftest.py | 1 + webapp/apps/users/auth.py | 33 +++++++++++++++++++++++ 8 files changed, 63 insertions(+), 18 deletions(-) diff --git a/pytest.ini b/pytest.ini index ca5c3a32..1aba610a 100644 --- a/pytest.ini +++ b/pytest.ini @@ -2,3 +2,4 @@ DJANGO_SETTINGS_MODULE = webapp.settings markers = register: for the register module. + requires_stripe: whether to use stripe or not. \ No newline at end of file diff --git a/webapp/apps/comp/compute.py b/webapp/apps/comp/compute.py index 18499fb2..bfd3e8da 100755 --- a/webapp/apps/comp/compute.py +++ b/webapp/apps/comp/compute.py @@ -104,10 +104,8 @@ def submit(self, tasks, url, headers): success = True else: success = data["status"] == "SUCCESS" - if success: - return success, data - else: - return success, data + + return success, data class SyncProjects(SyncCompute): diff --git a/webapp/apps/comp/model_parameters.py b/webapp/apps/comp/model_parameters.py index a8ebb411..b07b35a4 100755 --- a/webapp/apps/comp/model_parameters.py +++ b/webapp/apps/comp/model_parameters.py @@ -125,6 +125,7 @@ def get_inputs(self, meta_parameters_values=None): meta_parameters=result["meta_parameters"], model_parameters=result["model_parameters"], inputs_version="v1", + status="SUCCESS", ) return { diff --git a/webapp/apps/comp/tests/test_api_parser.py b/webapp/apps/comp/tests/test_api_parser.py index f1d1861b..27d3de46 100755 --- a/webapp/apps/comp/tests/test_api_parser.py +++ b/webapp/apps/comp/tests/test_api_parser.py @@ -1,3 +1,4 @@ +import pytest from webapp.apps.users.models import Project from webapp.apps.comp.model_parameters import ModelParameters from webapp.apps.comp.ioutils import get_ioutils @@ -35,6 +36,10 @@ def get_inputs(self, meta_parameters=None): assert errors_warnings["GUI"] == exp_errors_warnings +# Opting out of this validation for now. It may not be good to have in the long +# run but in the short term, requiring the model parameters to be loaded from the db +# before running a sim is a bottleneck. +@pytest.mark.xfail def test_api_parser_extra_section(db, get_inputs, valid_meta_params): class MockMp(ModelParameters): def get_inputs(self, meta_parameters=None): diff --git a/webapp/apps/comp/tests/test_asyncviews.py b/webapp/apps/comp/tests/test_asyncviews.py index d4a8f205..d41fee7c 100755 --- a/webapp/apps/comp/tests/test_asyncviews.py +++ b/webapp/apps/comp/tests/test_asyncviews.py @@ -193,14 +193,16 @@ def post_adjustment( adj_resp_data: dict, adj: dict, ) -> Response: - mock.register_uri( - "POST", - f"{self.project.cluster.url}/{self.project}/", - json=lambda request, context: { + def mock_json(request, context): + return { "defaults": defaults_resp_data, "parse": adj_resp_data, "version": {"status": "SUCCESS", "version": "v1"}, - }[request.json()["task_name"]], + "sim": {"task_id": str(uuid.uuid4())}, + }[request.json()["task_name"]] + + mock.register_uri( + "POST", f"{self.project.cluster.url}/{self.project}/", json=mock_json, ) init_resp = self.api_client.post( f"/{self.project}/api/v1/", data=adj, format="json" diff --git a/webapp/apps/comp/views/api.py b/webapp/apps/comp/views/api.py index 521cdd31..4cd8b921 100755 --- a/webapp/apps/comp/views/api.py +++ b/webapp/apps/comp/views/api.py @@ -25,7 +25,7 @@ import paramtools as pt import cs_storage -from webapp.apps.users.auth import ClusterAuthentication +from webapp.apps.users.auth import ClusterAuthentication, ClientOAuth2Authentication from webapp.apps.users.models import ( Project, Profile, @@ -397,21 +397,22 @@ class OutputsAPIView(RecordOutputsMixin, APIView): authentication_classes = ( ClusterAuthentication, - OAuth2Authentication, + ClientOAuth2Authentication, # Uncomment to allow token-based authentication for this endpoint. # TokenAuthentication, ) def put(self, request, *args, **kwargs): - print("myoutputs api method=PUT", kwargs) + print("myoutputs api method=PUT", request.user, kwargs) + print("authenticator", request.user, request.successful_authenticator) ser = OutputsSerializer(data=request.data) if ser.is_valid(): data = ser.validated_data sim = get_object_or_404( Simulation.objects.prefetch_related("project"), job_id=data["job_id"] ) - # if not sim.project.has_write_access(request.user): - # return Response(status=status.HTTP_401_UNAUTHORIZED) + if not sim.project.has_write_access(request.user): + return Response(status=status.HTTP_401_UNAUTHORIZED) if sim.status == "PENDING": self.record_outputs(sim, data) if sim.notify_on_completion: @@ -456,21 +457,22 @@ def put(self, request, *args, **kwargs): class MyInputsAPIView(APIView): authentication_classes = ( ClusterAuthentication, - OAuth2Authentication, + ClientOAuth2Authentication, # Uncomment to allow token-based authentication for this endpoint. # TokenAuthentication, ) def put(self, request, *args, **kwargs): print("myinputs api method=PUT", kwargs) + print("authenticator", request.user, request.successful_authenticator) ser = InputsSerializer(data=request.data) if ser.is_valid(): data = ser.validated_data inputs = get_object_or_404( Inputs.objects.prefetch_related("project"), job_id=data["job_id"] ) - # if not inputs.project.has_write_access(request.user): - # return Response(status=status.HTTP_401_UNAUTHORIZED) + if not inputs.project.has_write_access(request.user): + return Response(status=status.HTTP_401_UNAUTHORIZED) if inputs.status in ("PENDING", "INVALID", "FAIL"): # successful run if data["status"] == "SUCCESS": @@ -508,13 +510,15 @@ def put(self, request, *args, **kwargs): class ModelConfigAPIView(APIView): authentication_classes = ( ClusterAuthentication, - OAuth2Authentication, + ClientOAuth2Authentication, # Uncomment to allow token-based authentication for this endpoint. # TokenAuthentication, ) def put(self, request, *args, **kwargs): print("myinputs api method=PUT", kwargs) + print("authenticator", request.user, request.successful_authenticator) + ser = ModelConfigSerializer(data=request.data) if ser.is_valid(): data = ser.validated_data diff --git a/webapp/apps/conftest.py b/webapp/apps/conftest.py index 79744528..e395f615 100755 --- a/webapp/apps/conftest.py +++ b/webapp/apps/conftest.py @@ -76,6 +76,7 @@ def django_db_setup(django_db_setup, django_db_blocker): service_account=comp_api_user.profile, url="http://scheduler", jwt_secret=cryptkeeper.encrypt(binascii.hexlify(os.urandom(32)).decode()), + version="v0", ) common = { diff --git a/webapp/apps/users/auth.py b/webapp/apps/users/auth.py index e303b1ac..997a0dc1 100644 --- a/webapp/apps/users/auth.py +++ b/webapp/apps/users/auth.py @@ -4,6 +4,9 @@ from rest_framework import authentication from rest_framework.exceptions import AuthenticationFailed +from oauth2_provider.contrib.rest_framework import ( + OAuth2Authentication as BaseOAuth2Authentication, +) from webapp.apps.users.models import ( @@ -52,3 +55,33 @@ def authenticate(self, request): raise AuthenticationFailed("No such user") return (cluster.service_account.user, None) + + +class ClientOAuth2Authentication(BaseOAuth2Authentication): + """ + Authenticator that forces request.user to be present even if the + oauth2_provider package doesn't want it to be. + + Works around the change introduced in: + https://github.com/evonove/django-oauth-toolkit/commit/628f9e6ba98007d2940bb1a4c28136c03d81c245 + + Reference: + https://github.com/evonove/django-oauth-toolkit/issues/38 + + """ + + def authenticate(self, request): + super_result = super().authenticate(request) + + if super_result: + # The request was found to be authentic. + user, token = super_result + if ( + user is None + and token.application.authorization_grant_type == "client-credentials" + ): + user = token.application.user + result = user, token + else: + result = super_result + return result From 644a618dcf6142fededd6b14c86efe6e9c87397e Mon Sep 17 00:00:00 2001 From: hdoupe Date: Sat, 10 Apr 2021 12:55:05 -0400 Subject: [PATCH 09/41] Update namespaces and add option to run redis with gce disk --- .../cs-workers/templates/db-deployment.yaml | 2 ++ workers/cs-workers/templates/db-service.yaml | 2 ++ .../cs-workers/templates/job-cleanup-Job.yaml | 2 +- .../cs-workers/templates/job-cleanup-RBAC.yaml | 3 ++- .../outputs-processor-Deployment.yaml | 7 +++++++ .../templates/redis-master-Deployment.yaml | 7 +++++++ .../cs-workers/templates/rq-Deployment.yaml | 7 +++++++ .../templates/scheduler-Deployment.yaml | 18 +++++++++++++++++- .../cs-workers/templates/scheduler-RBAC.yaml | 9 +++++---- workers/cs-workers/values.yaml | 5 +++++ 10 files changed, 55 insertions(+), 7 deletions(-) diff --git a/workers/cs-workers/templates/db-deployment.yaml b/workers/cs-workers/templates/db-deployment.yaml index 032709b2..1cccd7c0 100644 --- a/workers/cs-workers/templates/db-deployment.yaml +++ b/workers/cs-workers/templates/db-deployment.yaml @@ -1,3 +1,4 @@ +{{ if .Values.db.delploy_db }} apiVersion: apps/v1 kind: Deployment metadata: @@ -53,3 +54,4 @@ spec: {{- end}} nodeSelector: component: web +{{ end }} \ No newline at end of file diff --git a/workers/cs-workers/templates/db-service.yaml b/workers/cs-workers/templates/db-service.yaml index f0c8195f..db2f5dec 100644 --- a/workers/cs-workers/templates/db-service.yaml +++ b/workers/cs-workers/templates/db-service.yaml @@ -1,3 +1,4 @@ +{{ if .Values.deploy_db }} apiVersion: v1 kind: Service metadata: @@ -11,3 +12,4 @@ spec: targetPort: 5432 selector: app: workers-db +{{ end }} \ No newline at end of file diff --git a/workers/cs-workers/templates/job-cleanup-Job.yaml b/workers/cs-workers/templates/job-cleanup-Job.yaml index 017174f4..b1203828 100644 --- a/workers/cs-workers/templates/job-cleanup-Job.yaml +++ b/workers/cs-workers/templates/job-cleanup-Job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1beta1 kind: CronJob metadata: name: job-cleanup - namespace: {{ .Values.project_namespace }} + namespace: {{ .Values.workers_namespace }} spec: schedule: "*/30 * * * *" successfulJobsHistoryLimit: 0 diff --git a/workers/cs-workers/templates/job-cleanup-RBAC.yaml b/workers/cs-workers/templates/job-cleanup-RBAC.yaml index 6a71a8db..a4336ba4 100644 --- a/workers/cs-workers/templates/job-cleanup-RBAC.yaml +++ b/workers/cs-workers/templates/job-cleanup-RBAC.yaml @@ -2,6 +2,7 @@ apiVersion: v1 kind: ServiceAccount metadata: name: job-cleanup + namespace: {{ .Values.workers_namespace }} --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role @@ -21,7 +22,7 @@ metadata: subjects: - kind: ServiceAccount name: job-cleanup - namespace: {{ .Values.project_namespace }} + namespace: {{ .Values.workers_namespace }} roleRef: kind: Role name: job-remove diff --git a/workers/cs-workers/templates/outputs-processor-Deployment.yaml b/workers/cs-workers/templates/outputs-processor-Deployment.yaml index 1ec527b9..0700011a 100755 --- a/workers/cs-workers/templates/outputs-processor-Deployment.yaml +++ b/workers/cs-workers/templates/outputs-processor-Deployment.yaml @@ -28,5 +28,12 @@ spec: value: {{ .Values.redis.host }} - name: REDIS_PORT value: "{{ .Values.redis.port }}" + resources: + requests: + cpu: 1 + memory: 1G + limits: + cpu: 1 + memory: 2G nodeSelector: component: api diff --git a/workers/cs-workers/templates/redis-master-Deployment.yaml b/workers/cs-workers/templates/redis-master-Deployment.yaml index cc4ca44b..7c3a7791 100644 --- a/workers/cs-workers/templates/redis-master-Deployment.yaml +++ b/workers/cs-workers/templates/redis-master-Deployment.yaml @@ -38,9 +38,16 @@ spec: volumes: {{- range $name, $value := .Values.redis.volumes }} - name: {{ $value.name }} + {{ if $value.hostPath }} hostPath: path: {{ $value.hostPath.path }} type: {{ $value.hostPath.type }} + {{ end }} + {{ if $value.gcePersistentDisk }} + gcePersistentDisk: + pdName: {{ $value.gcePersistentDisk.pdName }} + fsType: {{ $value.gcePersistentDisk.fsType }} + {{ end }} {{- end}} nodeSelector: component: api diff --git a/workers/cs-workers/templates/rq-Deployment.yaml b/workers/cs-workers/templates/rq-Deployment.yaml index 1cbe734a..af24f36d 100644 --- a/workers/cs-workers/templates/rq-Deployment.yaml +++ b/workers/cs-workers/templates/rq-Deployment.yaml @@ -25,5 +25,12 @@ spec: value: {{ .Values.project }} - name: REDIS_URL value: "redis://{{ .Values.redis.host }}:{{ .Values.redis.port }}" + resources: + requests: + cpu: 1 + memory: 1G + limits: + cpu: 1 + memory: 2G nodeSelector: component: api diff --git a/workers/cs-workers/templates/scheduler-Deployment.yaml b/workers/cs-workers/templates/scheduler-Deployment.yaml index c7d5cabb..f3bc8b1f 100755 --- a/workers/cs-workers/templates/scheduler-Deployment.yaml +++ b/workers/cs-workers/templates/scheduler-Deployment.yaml @@ -13,7 +13,7 @@ spec: labels: app: api spec: - serviceAccountName: api + serviceAccountName: workers-api containers: - name: api image: "{{ .Values.registry }}/{{ .Values.project }}/workers_api:{{ .Values.tag }}" @@ -55,6 +55,22 @@ spec: secretKeyRef: name: workers-db-secret key: HOST + resources: + requests: + cpu: 1 + memory: 1G + limits: + cpu: 1 + memory: 2G + {{ if .Values.db.use_gcp_cloud_proxy }} + - name: cloud-sql-proxy + image: gcr.io/cloudsql-docker/gce-proxy:1.17 + command: + - "/cloud_sql_proxy" + - "-instances={{ .Values.db.gcp_sql_instance_name }}=tcp:5432" + securityContext: + runAsNonRoot: true + {{ end }} nodeSelector: component: api diff --git a/workers/cs-workers/templates/scheduler-RBAC.yaml b/workers/cs-workers/templates/scheduler-RBAC.yaml index 1b615fdb..b912d240 100644 --- a/workers/cs-workers/templates/scheduler-RBAC.yaml +++ b/workers/cs-workers/templates/scheduler-RBAC.yaml @@ -1,7 +1,8 @@ apiVersion: v1 kind: ServiceAccount metadata: - name: api + name: workers-api + namespace: {{ .Values.workers_namespace }} --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role @@ -20,7 +21,7 @@ metadata: namespace: {{ .Values.project_namespace }} subjects: - kind: ServiceAccount - name: api + name: workers-api namespace: {{ .Values.workers_namespace }} roleRef: kind: Role @@ -41,10 +42,10 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: viz-admin - namespace: {{ .Values.workers_namespace }} + namespace: {{ .Values.project_namespace }} subjects: - kind: ServiceAccount - name: api + name: workers-api namespace: {{ .Values.workers_namespace }} roleRef: kind: Role diff --git a/workers/cs-workers/values.yaml b/workers/cs-workers/values.yaml index 7d119707..9933b748 100644 --- a/workers/cs-workers/values.yaml +++ b/workers/cs-workers/values.yaml @@ -37,6 +37,11 @@ db: user: postgres password: password host: workers-db + + # deploy_db: true + # use_gcp_cloud_proxy: false + # gcp_sql_instance_name: null + volumes: - name: workers-db-volume hostPath: From cb269dd7f5455538268c041b66d7905230642305 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Sat, 10 Apr 2021 13:29:02 -0400 Subject: [PATCH 10/41] Fix namspace for compute job cleanup --- workers/cs-workers/templates/job-cleanup-Job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workers/cs-workers/templates/job-cleanup-Job.yaml b/workers/cs-workers/templates/job-cleanup-Job.yaml index b1203828..5bde05b9 100644 --- a/workers/cs-workers/templates/job-cleanup-Job.yaml +++ b/workers/cs-workers/templates/job-cleanup-Job.yaml @@ -14,5 +14,5 @@ spec: containers: - name: kubectl-container image: bitnami/kubectl:latest - command: ["sh", "-c", "kubectl delete jobs --field-selector status.successful=1"] + command: ["sh", "-c", "kubectl delete jobs --namespace {{ .Values.project_namespace }} --field-selector status.successful=1"] restartPolicy: Never From 5e0cc95647e1431ecda5bcf1443ac8c21c046f05 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Sat, 10 Apr 2021 13:42:53 -0400 Subject: [PATCH 11/41] Set up traefik routes --- .../templates/scheduler-ingressroute.yaml | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/workers/cs-workers/templates/scheduler-ingressroute.yaml b/workers/cs-workers/templates/scheduler-ingressroute.yaml index 70400b51..56d1bf75 100644 --- a/workers/cs-workers/templates/scheduler-ingressroute.yaml +++ b/workers/cs-workers/templates/scheduler-ingressroute.yaml @@ -1,4 +1,4 @@ -{{ if .Values.use_traefik }} +{{ if .Values.workers_host }} apiVersion: traefik.containo.us/v1alpha1 kind: IngressRoute metadata: @@ -8,7 +8,24 @@ spec: entryPoints: - websecure routes: - - match: + - match: Host(`{{ .Values.workers_host }}`) + kind: Rule + services: + - name: api + port: 80 + tls: + certResolver: myresolver +--- +apiVersion: traefik.containo.us/v1alpha1 +kind: IngressRoute +metadata: + name: api + namespace: {{ .Values.workers_namespace }} +spec: + entryPoints: + - web + routes: + - match: Host(`{{ .Values.workers_host }}`) kind: Rule services: - name: api From 77285e8b39f3e4aae0354d94fd84c705199b8bc1 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Sat, 10 Apr 2021 15:35:10 -0400 Subject: [PATCH 12/41] rename template files from 'scheduler' to 'api' --- .../templates/{scheduler-Deployment.yaml => api-Deployment.yaml} | 0 .../cs-workers/templates/{scheduler-RBAC.yaml => api-RBAC.yaml} | 0 .../templates/{scheduler-Service.yaml => api-Service.yaml} | 0 .../{scheduler-ingressroute.yaml => api-ingressroute.yaml} | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename workers/cs-workers/templates/{scheduler-Deployment.yaml => api-Deployment.yaml} (100%) rename workers/cs-workers/templates/{scheduler-RBAC.yaml => api-RBAC.yaml} (100%) rename workers/cs-workers/templates/{scheduler-Service.yaml => api-Service.yaml} (100%) rename workers/cs-workers/templates/{scheduler-ingressroute.yaml => api-ingressroute.yaml} (100%) diff --git a/workers/cs-workers/templates/scheduler-Deployment.yaml b/workers/cs-workers/templates/api-Deployment.yaml similarity index 100% rename from workers/cs-workers/templates/scheduler-Deployment.yaml rename to workers/cs-workers/templates/api-Deployment.yaml diff --git a/workers/cs-workers/templates/scheduler-RBAC.yaml b/workers/cs-workers/templates/api-RBAC.yaml similarity index 100% rename from workers/cs-workers/templates/scheduler-RBAC.yaml rename to workers/cs-workers/templates/api-RBAC.yaml diff --git a/workers/cs-workers/templates/scheduler-Service.yaml b/workers/cs-workers/templates/api-Service.yaml similarity index 100% rename from workers/cs-workers/templates/scheduler-Service.yaml rename to workers/cs-workers/templates/api-Service.yaml diff --git a/workers/cs-workers/templates/scheduler-ingressroute.yaml b/workers/cs-workers/templates/api-ingressroute.yaml similarity index 100% rename from workers/cs-workers/templates/scheduler-ingressroute.yaml rename to workers/cs-workers/templates/api-ingressroute.yaml From 58c2e69fe047e9a4801734165ecbb8232dab51b5 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Sat, 10 Apr 2021 16:06:32 -0400 Subject: [PATCH 13/41] Update action to deploy new workers api --- .github/workflows/dev-workers-deploy.yml | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/.github/workflows/dev-workers-deploy.yml b/.github/workflows/dev-workers-deploy.yml index 2923b154..02d96afe 100644 --- a/.github/workflows/dev-workers-deploy.yml +++ b/.github/workflows/dev-workers-deploy.yml @@ -4,10 +4,11 @@ on: push: branches: - dev + - ci-api paths: - "workers/**" - "deploy/**" - - ".github/workflows/dev-workers-deploy.yml" + - ".github/workflows/deprecated-dev-workers-deploy.yml" # Environment variables available to all jobs and steps in this workflow env: @@ -20,7 +21,6 @@ env: REGISTRY_HOSTNAME: gcr.io PROJECT: ${{ secrets.DEV_GKE_PROJECT }} - HOST: dev.compute.studio TAG: ${{ github.sha }} CS_CONFIG: ${{ secrets.DEV_CS_CONFIG }} @@ -58,9 +58,10 @@ jobs: # Set up docker to authenticate gcloud auth configure-docker - - name: Set cs-config.yaml file. + - name: Set cs-config.yaml and values.yaml files. run: | echo $CS_CONFIG | base64 --decode > cs-config.yaml + echo $DEV_WORKERS_VALUES | base64 --decode > ./workers/values.yaml - name: Build Docker Images run: | @@ -74,5 +75,11 @@ jobs: - name: Deploy run: | gcloud container clusters get-credentials $GKE_CLUSTER --zone $GKE_ZONE --project $GKE_PROJECT - cs workers svc config -o - --update-dns | kubectl apply -f - - kubectl get pods -o wide + cd workers + helm template cs-workers \ + --set project=$PROJECT \ + --set tag=$TAG \ + --set api.secret_key=$(cs secrets get WORKERS_API_SECRET_KEY) \ + --set db.password=$(cs secrets get WORKERS_DB_PASSWORD) \ + --namespace workers \ + -f values.yaml | kubectl apply -f - From f229814cd376453713af04b06f2e5039bd3704ce Mon Sep 17 00:00:00 2001 From: hdoupe Date: Sat, 10 Apr 2021 16:07:12 -0400 Subject: [PATCH 14/41] Fix workflow file name that triggers deploy --- .github/workflows/dev-workers-deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dev-workers-deploy.yml b/.github/workflows/dev-workers-deploy.yml index 02d96afe..7d50a514 100644 --- a/.github/workflows/dev-workers-deploy.yml +++ b/.github/workflows/dev-workers-deploy.yml @@ -8,7 +8,7 @@ on: paths: - "workers/**" - "deploy/**" - - ".github/workflows/deprecated-dev-workers-deploy.yml" + - ".github/workflows/dev-workers-deploy.yml" # Environment variables available to all jobs and steps in this workflow env: From 71d8113aa3bb8cc2fe526f9a604b7f16a5a8f28d Mon Sep 17 00:00:00 2001 From: hdoupe Date: Sat, 10 Apr 2021 16:25:47 -0400 Subject: [PATCH 15/41] Pin redis to fix deployment with volume issue --- workers/cs-workers/templates/redis-master-Deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workers/cs-workers/templates/redis-master-Deployment.yaml b/workers/cs-workers/templates/redis-master-Deployment.yaml index 7c3a7791..bd511e2b 100644 --- a/workers/cs-workers/templates/redis-master-Deployment.yaml +++ b/workers/cs-workers/templates/redis-master-Deployment.yaml @@ -22,7 +22,7 @@ spec: containers: - env: [] command: ["redis-server", "--appendonly", "yes"] - image: redis + image: redis:6.2.1 name: master ports: - containerPort: 6379 From 9f5df21bf928b9c4daa2ceeb7b5fab2617d6f505 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Sat, 10 Apr 2021 16:28:24 -0400 Subject: [PATCH 16/41] Deploy _dev_ webapp on ci-api branch --- .github/workflows/dev-webapp-deploy.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/dev-webapp-deploy.yml b/.github/workflows/dev-webapp-deploy.yml index 64eddb5f..ba0b0092 100644 --- a/.github/workflows/dev-webapp-deploy.yml +++ b/.github/workflows/dev-webapp-deploy.yml @@ -4,6 +4,7 @@ on: push: branches: - dev + - ci-api paths: - "webapp/**" - "Dockerfile" From 58a2dd79aead958e38373992ea10b6e2d6f16fc1 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Sat, 10 Apr 2021 17:37:58 -0400 Subject: [PATCH 17/41] Use non-default values.yaml file name --- .github/workflows/dev-workers-deploy.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/dev-workers-deploy.yml b/.github/workflows/dev-workers-deploy.yml index 7d50a514..388af3f1 100644 --- a/.github/workflows/dev-workers-deploy.yml +++ b/.github/workflows/dev-workers-deploy.yml @@ -61,7 +61,7 @@ jobs: - name: Set cs-config.yaml and values.yaml files. run: | echo $CS_CONFIG | base64 --decode > cs-config.yaml - echo $DEV_WORKERS_VALUES | base64 --decode > ./workers/values.yaml + echo $DEV_WORKERS_VALUES | base64 --decode > ./workers/values.deploy.yaml - name: Build Docker Images run: | @@ -82,4 +82,4 @@ jobs: --set api.secret_key=$(cs secrets get WORKERS_API_SECRET_KEY) \ --set db.password=$(cs secrets get WORKERS_DB_PASSWORD) \ --namespace workers \ - -f values.yaml | kubectl apply -f - + -f values.deploy.yaml | kubectl apply -f - From 4c008c3fdfbf643ef35848103b6fc36ee4e71be7 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Sun, 11 Apr 2021 13:39:25 -0400 Subject: [PATCH 18/41] Set values in the env section --- .github/workflows/dev-workers-deploy.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/dev-workers-deploy.yml b/.github/workflows/dev-workers-deploy.yml index 388af3f1..8c806f12 100644 --- a/.github/workflows/dev-workers-deploy.yml +++ b/.github/workflows/dev-workers-deploy.yml @@ -24,6 +24,7 @@ env: TAG: ${{ github.sha }} CS_CONFIG: ${{ secrets.DEV_CS_CONFIG }} + DEV_WORKERS_VALUES: ${{ secrets.DEV_WORKERS_VALUES }} jobs: setup-build-publish-deploy: From 19fc59b2a47528ab0d95f25765030b3a27af4df3 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Sun, 11 Apr 2021 16:09:49 -0400 Subject: [PATCH 19/41] Add deployments api to new workers cluster --- webapp/apps/comp/views/views.py | 4 +- .../users/migrations/0027_cluster_viz_host.py | 18 +++ webapp/apps/users/models.py | 25 ++- .../cs-workers/templates/api-Deployment.yaml | 4 + workers/cs-workers/values.yaml | 1 + workers/cs_workers/services/api/main.py | 3 +- .../services/api/routers/deployments.py | 153 ++++++++++++++++++ .../services/api/routers/simulations.py | 9 +- workers/cs_workers/services/api/schemas.py | 26 +++ workers/cs_workers/services/api/settings.py | 9 ++ 10 files changed, 242 insertions(+), 10 deletions(-) create mode 100755 webapp/apps/users/migrations/0027_cluster_viz_host.py diff --git a/webapp/apps/comp/views/views.py b/webapp/apps/comp/views/views.py index 28b63dba..a98a819e 100755 --- a/webapp/apps/comp/views/views.py +++ b/webapp/apps/comp/views/views.py @@ -184,7 +184,7 @@ def get(self, request, *args, **kwargs): context["tech"] = project.tech context["object"] = project context["deployment"] = deployment - context["viz_host"] = DEFAULT_VIZ_HOST + context["viz_host"] = project.cluster.viz_host or DEFAULT_VIZ_HOST context["protocol"] = "https" return render(request, self.template_name, context) @@ -212,7 +212,7 @@ def get(self, request, *args, **kwargs): "object": project, "deployment": deployment, "protocol": "https", - "viz_host": DEFAULT_VIZ_HOST, + "viz_host": project.cluster.viz_host or DEFAULT_VIZ_HOST, } response = render(request, self.template_name, context) diff --git a/webapp/apps/users/migrations/0027_cluster_viz_host.py b/webapp/apps/users/migrations/0027_cluster_viz_host.py new file mode 100755 index 00000000..8b72d11e --- /dev/null +++ b/webapp/apps/users/migrations/0027_cluster_viz_host.py @@ -0,0 +1,18 @@ +# Generated by Django 3.0.14 on 2021-04-11 20:06 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("users", "0026_tag_version"), + ] + + operations = [ + migrations.AddField( + model_name="cluster", + name="viz_host", + field=models.CharField(max_length=128, null=True), + ), + ] diff --git a/webapp/apps/users/models.py b/webapp/apps/users/models.py index 12e01735..873cdf17 100755 --- a/webapp/apps/users/models.py +++ b/webapp/apps/users/models.py @@ -202,6 +202,9 @@ class Cluster(models.Model): access_token = models.CharField(max_length=512, null=True) access_token_expires_at = models.DateTimeField(null=True) + # Make viz host configurable to work with multiple clusters at once. + viz_host = models.CharField(max_length=128, null=True) + version = models.CharField(null=False, max_length=32) objects = ClusterManager() @@ -262,6 +265,13 @@ def create_user_in_cluster(self, cs_url): raise Exception(f"{resp.status_code} {resp.text}") + @property + def path_prefix(self): + if self.version == "v0": + return "" + else: + return "/api/v1" + class ProjectPermissions: READ = ( @@ -794,10 +804,11 @@ def create_deployment(self): self.tag = self.project.latest_tag self.save() + cluster: Cluster = self.project.cluster resp = requests.post( - f"{self.project.cluster.url}/deployments/{self.project}/", + f"{cluster.url}{cluster.path_prefix}/deployments/{self.project}/", json={"deployment_name": self.public_name, "tag": str(self.tag)}, - headers=self.project.cluster.headers(), + headers=cluster.headers(), ) if resp.status_code == 200: @@ -810,17 +821,19 @@ def create_deployment(self): raise Exception(f"{resp.status_code} {resp.text}") def get_deployment(self): + cluster: Cluster = self.project.cluster resp = requests.get( - f"{self.project.cluster.url}/deployments/{self.project}/{self.public_name}/", - headers=self.project.cluster.headers(), + f"{cluster.url}{cluster.path_prefix}/deployments/{self.project}/{self.public_name}/", + headers=cluster.headers(), ) assert resp.status_code == 200, f"Got {resp.status_code}, {resp.text}" return resp.json() def delete_deployment(self): + cluster: Cluster = self.project.cluster resp = requests.delete( - f"{self.project.cluster.url}/deployments/{self.project}/{self.public_name}/", - headers=self.project.cluster.headers(), + f"{cluster.url}{cluster.path_prefix}/deployments/{self.project}/{self.public_name}/", + headers=cluster.headers(), ) assert resp.status_code == 200, f"Got {resp.status_code}, {resp.text}" self.deleted_at = timezone.now() diff --git a/workers/cs-workers/templates/api-Deployment.yaml b/workers/cs-workers/templates/api-Deployment.yaml index f3bc8b1f..e32ffb0e 100755 --- a/workers/cs-workers/templates/api-Deployment.yaml +++ b/workers/cs-workers/templates/api-Deployment.yaml @@ -24,6 +24,10 @@ spec: value: "{{ .Values.bucket }}" - name: PROJECT value: "{{ .Values.project }}" + {{ if .Values.workers_host }} + - name: HOST + value: "{{ .Values.workers_host }}" + {{ end }} - name: VIZ_HOST value: "{{ .Values.viz_host }}" - name: API_SECRET_KEY diff --git a/workers/cs-workers/values.yaml b/workers/cs-workers/values.yaml index 9933b748..7e2c5009 100644 --- a/workers/cs-workers/values.yaml +++ b/workers/cs-workers/values.yaml @@ -4,6 +4,7 @@ replicaCount: 1 bucket: cs-outputs-dev-private + viz_host: devviz.compute.studio # image: project: project diff --git a/workers/cs_workers/services/api/main.py b/workers/cs_workers/services/api/main.py index 97264c75..cbdf4df6 100644 --- a/workers/cs_workers/services/api/main.py +++ b/workers/cs_workers/services/api/main.py @@ -2,7 +2,7 @@ from starlette.middleware.cors import CORSMiddleware from .settings import settings -from .routers import users, login, projects, simulations +from .routers import users, login, projects, simulations, deployments app = FastAPI( title=settings.PROJECT_NAME, openapi_url=f"{settings.API_PREFIX_STR}/openapi.json", @@ -22,3 +22,4 @@ app.include_router(users.router, prefix=settings.API_PREFIX_STR) app.include_router(projects.router, prefix=settings.API_PREFIX_STR) app.include_router(simulations.router, prefix=settings.API_PREFIX_STR) +app.include_router(deployments.router, prefix=settings.API_PREFIX_STR) diff --git a/workers/cs_workers/services/api/routers/deployments.py b/workers/cs_workers/services/api/routers/deployments.py index e69de29b..cc71436d 100644 --- a/workers/cs_workers/services/api/routers/deployments.py +++ b/workers/cs_workers/services/api/routers/deployments.py @@ -0,0 +1,153 @@ +import os + +from fastapi import APIRouter, Depends, Body, HTTPException +from sqlalchemy.orm import Session + +from cs_workers.models.clients import server +from .. import models, schemas, dependencies as deps, security, settings + +incluster = os.environ.get("KUBERNETES_SERVICE_HOST", False) is not False + +PROJECT = os.environ.get("PROJECT") + + +router = APIRouter(prefix="/deployments", tags=["deployments"]) + + +@router.post( + "/{owner}/{title}/", response_model=schemas.DeploymentReadyStats, status_code=201 +) +def create_deployment( + owner: str, + title: str, + data: schemas.DeploymentCreate = Body(...), + db: Session = Depends(deps.get_db), + user: schemas.User = Depends(deps.get_current_active_user), +): + print("create deployment", data) + project: models.Project = ( + db.query(models.Project) + .filter( + models.Project.owner == owner, + models.Project.title == title, + models.Project.user_id == user.id, + ) + .one_or_none() + ) + + if not project: + raise HTTPException(status_code=404, detail="Project not found.") + + if project["tech"] not in ("dash", "bokeh"): + return HTTPException(status_code=400, detail=f"Unsuported tech: {project.tech}") + + viz = server.Server( + project=PROJECT, + owner=project.owner, + title=project.title, + tag=data.tag, + model_config=project, + callable_name=project.callable_name, + deployment_name=data.deployment_name, + incluster=incluster, + viz_host=settings.settings.VIZ_HOST, + namespace=settings.settings.PROJECT_NAMESPACE, + ) + dep = viz.deployment_from_cluster() + if dep is not None: + raise HTTPException(status_code=400, detail="Deployment is already running.") + + viz.configure() + viz.create() + ready_stats = schemas.DeploymentReadyStats(viz.ready_stats()) + return ready_stats + + +@router.get( + "/{owner}/{title}/{deployment_name}", + response_model=schemas.DeploymentReadyStats, + status_code=201, +) +def get_deployment( + owner: str, + title: str, + deployment_name: str, + db: Session = Depends(deps.get_db), + user: schemas.User = Depends(deps.get_current_active_user), +): + project: models.Project = ( + db.query(models.Project) + .filter( + models.Project.owner == owner, + models.Project.title == title, + models.Project.user_id == user.id, + ) + .one_or_none() + ) + + if not project: + raise HTTPException(status_code=404, detail="Project not found.") + + if project["tech"] not in ("dash", "bokeh"): + return HTTPException(status_code=400, detail=f"Unsuported tech: {project.tech}") + + viz = server.Server( + project=PROJECT, + owner=project.owner, + title=project.title, + tag=None, + model_config=project, + callable_name=project.callable_name, + deployment_name=deployment_name, + incluster=incluster, + viz_host=settings.settings.VIZ_HOST, + namespace=settings.settings.PROJECT_NAMESPACE, + ) + + ready_stats = schemas.DeploymentReadyStats(viz.ready_stats()) + return ready_stats + + +@router.delete( + "/{owner}/{title}/{deployment_name}", + response_model=schemas.DeploymentReadyStats, + status_code=201, +) +def delete_deployment( + owner: str, + title: str, + deployment_name: str, + db: Session = Depends(deps.get_db), + user: schemas.User = Depends(deps.get_current_active_user), +): + project: models.Project = ( + db.query(models.Project) + .filter( + models.Project.owner == owner, + models.Project.title == title, + models.Project.user_id == user.id, + ) + .one_or_none() + ) + + if not project: + raise HTTPException(status_code=404, detail="Project not found.") + + if project["tech"] not in ("dash", "bokeh"): + return HTTPException(status_code=400, detail=f"Unsuported tech: {project.tech}") + + viz = server.Server( + project=PROJECT, + owner=project.owner, + title=project.title, + tag=None, + model_config=project, + callable_name=project.callable_name, + deployment_name=deployment_name, + incluster=incluster, + viz_host=settings.settings.VIZ_HOST, + namespace=settings.settings.PROJECT_NAMESPACE, + ) + + ready_stats = schemas.DeploymentReadyStats(viz.ready_stats()) + return ready_stats diff --git a/workers/cs_workers/services/api/routers/simulations.py b/workers/cs_workers/services/api/routers/simulations.py index 0d6a05bf..7d9a842f 100644 --- a/workers/cs_workers/services/api/routers/simulations.py +++ b/workers/cs_workers/services/api/routers/simulations.py @@ -146,6 +146,13 @@ def create_job( "limits": {"memory": f"{math.ceil(mem * 1.2)}G", "cpu": cpu,}, } + if settings.settings.HOST: + url = f"https://{settings.settings.HOST}" + else: + url = f"http://api.{settings.settings.NAMESPACE}.svc.cluster.local" + + url += settings.settings.API_PREFIX_STR + client = job.Job( PROJECT, owner, @@ -153,7 +160,7 @@ def create_job( tag=tag, model_config=project_data, job_id=instance.id, - callback_url=f"http://api.workers.svc.cluster.local/api/v1/jobs/callback/{instance.id}/", + callback_url=f"{url}/jobs/callback/{instance.id}/", route_name=task_name, incluster=incluster, namespace=settings.settings.PROJECT_NAMESPACE, diff --git a/workers/cs_workers/services/api/schemas.py b/workers/cs_workers/services/api/schemas.py index 40b65a65..96ca967c 100644 --- a/workers/cs_workers/services/api/schemas.py +++ b/workers/cs_workers/services/api/schemas.py @@ -120,3 +120,29 @@ class Project(ProjectSync): class Config: orm_mode = True extra = "ignore" + + +class DeploymentCreate(BaseModel): + tag: str + deployment_name: str + + +class ReadyStats(BaseModel): + created_at: datetime + ready: bool + + +class DeploymentReadyStats(BaseModel): + deployment: ReadyStats + svc: ReadyStats + ingressroute: ReadyStats + + +class Deleted(BaseModel): + deleted: bool + + +class DeploymentDelete(BaseModel): + deployment: Deleted + svc: Deleted + ingressroute: Deleted diff --git a/workers/cs_workers/services/api/settings.py b/workers/cs_workers/services/api/settings.py index 5f7c2ebe..46f08f04 100644 --- a/workers/cs_workers/services/api/settings.py +++ b/workers/cs_workers/services/api/settings.py @@ -21,6 +21,9 @@ class Settings(BaseSettings): "https://hdoupe.ngrok.io", ] + HOST: Optional[str] + VIZ_HOST: str + @validator("BACKEND_CORS_ORIGINS", pre=True) def assemble_cors_origins(cls, v: Union[str, List[str]]) -> Union[List[str], str]: if isinstance(v, str) and not v.startswith("["): @@ -32,6 +35,12 @@ def assemble_cors_origins(cls, v: Union[str, List[str]]) -> Union[List[str], str PROJECT_NAMESPACE: str @validator("PROJECT_NAMESPACE", pre=True) + def get_project_namespace(cls, v: Optional[str]) -> str: + return v or "default" + + NAMESPACE: str + + @validator("NAMESPACE", pre=True) def get_project_namespace(cls, v: Optional[str]) -> str: if v: return v From da988bd15d433ab46210070664f2b90749723ab0 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Sun, 11 Apr 2021 16:45:25 -0400 Subject: [PATCH 20/41] Make namespace var optional --- workers/cs_workers/services/api/settings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workers/cs_workers/services/api/settings.py b/workers/cs_workers/services/api/settings.py index 46f08f04..2a26f7e7 100644 --- a/workers/cs_workers/services/api/settings.py +++ b/workers/cs_workers/services/api/settings.py @@ -38,10 +38,10 @@ def assemble_cors_origins(cls, v: Union[str, List[str]]) -> Union[List[str], str def get_project_namespace(cls, v: Optional[str]) -> str: return v or "default" - NAMESPACE: str + NAMESPACE: Optional[str] @validator("NAMESPACE", pre=True) - def get_project_namespace(cls, v: Optional[str]) -> str: + def get_namespace(cls, v: Optional[str]) -> str: if v: return v elif Path(NAMESPACE_PATH).exists(): From ba21138bf682d5e001007c39346a78405adbcb6d Mon Sep 17 00:00:00 2001 From: hdoupe Date: Mon, 12 Apr 2021 09:55:19 -0400 Subject: [PATCH 21/41] Add trailing slash --- workers/cs_workers/services/api/routers/deployments.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workers/cs_workers/services/api/routers/deployments.py b/workers/cs_workers/services/api/routers/deployments.py index cc71436d..09a5dc6b 100644 --- a/workers/cs_workers/services/api/routers/deployments.py +++ b/workers/cs_workers/services/api/routers/deployments.py @@ -64,7 +64,7 @@ def create_deployment( @router.get( - "/{owner}/{title}/{deployment_name}", + "/{owner}/{title}/{deployment_name}/", response_model=schemas.DeploymentReadyStats, status_code=201, ) @@ -109,7 +109,7 @@ def get_deployment( @router.delete( - "/{owner}/{title}/{deployment_name}", + "/{owner}/{title}/{deployment_name}/", response_model=schemas.DeploymentReadyStats, status_code=201, ) From c7d04e5ce7b671bdfb0141cf0a45af818ec000fb Mon Sep 17 00:00:00 2001 From: hdoupe Date: Mon, 12 Apr 2021 10:35:02 -0400 Subject: [PATCH 22/41] Add app location and fix server args --- ...661c80_add_app_location_for_deployments.py | 28 +++++++++++++++++++ workers/cs_workers/services/api/models.py | 1 + .../services/api/routers/deployments.py | 23 ++++++++++----- .../services/api/routers/simulations.py | 10 ++----- 4 files changed, 47 insertions(+), 15 deletions(-) create mode 100644 workers/cs_workers/services/api/alembic/versions/49437c661c80_add_app_location_for_deployments.py diff --git a/workers/cs_workers/services/api/alembic/versions/49437c661c80_add_app_location_for_deployments.py b/workers/cs_workers/services/api/alembic/versions/49437c661c80_add_app_location_for_deployments.py new file mode 100644 index 00000000..a5f0513e --- /dev/null +++ b/workers/cs_workers/services/api/alembic/versions/49437c661c80_add_app_location_for_deployments.py @@ -0,0 +1,28 @@ +"""Add app location for deployments + +Revision ID: 49437c661c80 +Revises: f027333560c0 +Create Date: 2021-04-12 14:32:50.280249+00:00 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "49437c661c80" +down_revision = "f027333560c0" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("projects", sa.Column("app_location", sa.String(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("projects", "app_location") + # ### end Alembic commands ### diff --git a/workers/cs_workers/services/api/models.py b/workers/cs_workers/services/api/models.py index e181f991..2dc40d19 100644 --- a/workers/cs_workers/services/api/models.py +++ b/workers/cs_workers/services/api/models.py @@ -60,6 +60,7 @@ class Project(Base): user_id = Column(Integer, ForeignKey("users.id")) owner = Column(String, nullable=False, index=True) title = Column(String, nullable=False, index=True) + app_location = Column(String, nullable=True) tech = Column(String, nullable=False) callable_name = Column(String) exp_task_time = Column(String, nullable=False) diff --git a/workers/cs_workers/services/api/routers/deployments.py b/workers/cs_workers/services/api/routers/deployments.py index 09a5dc6b..a701478f 100644 --- a/workers/cs_workers/services/api/routers/deployments.py +++ b/workers/cs_workers/services/api/routers/deployments.py @@ -4,7 +4,7 @@ from sqlalchemy.orm import Session from cs_workers.models.clients import server -from .. import models, schemas, dependencies as deps, security, settings +from .. import utils, models, schemas, dependencies as deps, security, settings incluster = os.environ.get("KUBERNETES_SERVICE_HOST", False) is not False @@ -38,15 +38,18 @@ def create_deployment( if not project: raise HTTPException(status_code=404, detail="Project not found.") - if project["tech"] not in ("dash", "bokeh"): + if project.tech not in ("dash", "bokeh"): return HTTPException(status_code=400, detail=f"Unsuported tech: {project.tech}") + project_data = schemas.Project.from_orm(project).dict() + utils.set_resource_requirements(project_data) + viz = server.Server( project=PROJECT, owner=project.owner, title=project.title, tag=data.tag, - model_config=project, + model_config=project_data, callable_name=project.callable_name, deployment_name=data.deployment_name, incluster=incluster, @@ -88,15 +91,18 @@ def get_deployment( if not project: raise HTTPException(status_code=404, detail="Project not found.") - if project["tech"] not in ("dash", "bokeh"): + if project.tech not in ("dash", "bokeh"): return HTTPException(status_code=400, detail=f"Unsuported tech: {project.tech}") + project_data = schemas.Project.from_orm(project).dict() + utils.set_resource_requirements(project_data) + viz = server.Server( project=PROJECT, owner=project.owner, title=project.title, tag=None, - model_config=project, + model_config=project_data, callable_name=project.callable_name, deployment_name=deployment_name, incluster=incluster, @@ -133,15 +139,18 @@ def delete_deployment( if not project: raise HTTPException(status_code=404, detail="Project not found.") - if project["tech"] not in ("dash", "bokeh"): + if project.tech not in ("dash", "bokeh"): return HTTPException(status_code=400, detail=f"Unsuported tech: {project.tech}") + project_data = schemas.Project.from_orm(project).dict() + utils.set_resource_requirements(project_data) + viz = server.Server( project=PROJECT, owner=project.owner, title=project.title, tag=None, - model_config=project, + model_config=project_data, callable_name=project.callable_name, deployment_name=deployment_name, incluster=incluster, diff --git a/workers/cs_workers/services/api/routers/simulations.py b/workers/cs_workers/services/api/routers/simulations.py index 7d9a842f..0f7b1d67 100644 --- a/workers/cs_workers/services/api/routers/simulations.py +++ b/workers/cs_workers/services/api/routers/simulations.py @@ -8,7 +8,7 @@ from sqlalchemy.orm import Session from cs_workers.models.clients import job -from .. import models, schemas, dependencies as deps, security, settings +from .. import utils, models, schemas, dependencies as deps, security, settings incluster = os.environ.get("KUBERNETES_SERVICE_HOST", False) is not False @@ -138,13 +138,7 @@ def create_job( db.refresh(instance) project_data = schemas.Project.from_orm(project).dict() - mem = float(project_data.pop("memory")) - cpu = float(project_data.pop("cpu")) - if cpu and mem: - project_data["resources"] = { - "requests": {"memory": f"{mem}G", "cpu": cpu}, - "limits": {"memory": f"{math.ceil(mem * 1.2)}G", "cpu": cpu,}, - } + utils.set_resource_requirements(project_data) if settings.settings.HOST: url = f"https://{settings.settings.HOST}" From b1d7bbd9bc562dfaa26ecd433df0a6f058878321 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Mon, 12 Apr 2021 10:44:47 -0400 Subject: [PATCH 23/41] Add utils module --- workers/cs_workers/services/api/utils.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 workers/cs_workers/services/api/utils.py diff --git a/workers/cs_workers/services/api/utils.py b/workers/cs_workers/services/api/utils.py new file mode 100644 index 00000000..2093a33d --- /dev/null +++ b/workers/cs_workers/services/api/utils.py @@ -0,0 +1,11 @@ +import math + + +def set_resource_requirements(project_data): + mem = float(project_data.pop("memory")) + cpu = float(project_data.pop("cpu")) + if cpu and mem: + project_data["resources"] = { + "requests": {"memory": f"{mem}G", "cpu": cpu}, + "limits": {"memory": f"{math.ceil(mem * 1.2)}G", "cpu": cpu,}, + } From 1260cba105df91a989a24edfcca06a1f4eb2ff52 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Mon, 12 Apr 2021 11:01:19 -0400 Subject: [PATCH 24/41] Fix deployment pydantic args and deployment delete endpoint --- workers/cs_workers/services/api/routers/deployments.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/workers/cs_workers/services/api/routers/deployments.py b/workers/cs_workers/services/api/routers/deployments.py index a701478f..d9b577e9 100644 --- a/workers/cs_workers/services/api/routers/deployments.py +++ b/workers/cs_workers/services/api/routers/deployments.py @@ -62,7 +62,7 @@ def create_deployment( viz.configure() viz.create() - ready_stats = schemas.DeploymentReadyStats(viz.ready_stats()) + ready_stats = schemas.DeploymentReadyStats(**viz.ready_stats()) return ready_stats @@ -110,13 +110,13 @@ def get_deployment( namespace=settings.settings.PROJECT_NAMESPACE, ) - ready_stats = schemas.DeploymentReadyStats(viz.ready_stats()) + ready_stats = schemas.DeploymentReadyStats(**viz.ready_stats()) return ready_stats @router.delete( "/{owner}/{title}/{deployment_name}/", - response_model=schemas.DeploymentReadyStats, + response_model=schemas.DeploymentDelete, status_code=201, ) def delete_deployment( @@ -158,5 +158,5 @@ def delete_deployment( namespace=settings.settings.PROJECT_NAMESPACE, ) - ready_stats = schemas.DeploymentReadyStats(viz.ready_stats()) - return ready_stats + delete = schemas.DeploymentDelete(**viz.delete()) + return delete From 54409d15fc8d1ef78c95b9254411d45f7a1142a9 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Mon, 12 Apr 2021 11:14:32 -0400 Subject: [PATCH 25/41] Make created_at optional in schema --- workers/cs_workers/services/api/schemas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workers/cs_workers/services/api/schemas.py b/workers/cs_workers/services/api/schemas.py index 96ca967c..ae601ea0 100644 --- a/workers/cs_workers/services/api/schemas.py +++ b/workers/cs_workers/services/api/schemas.py @@ -128,7 +128,7 @@ class DeploymentCreate(BaseModel): class ReadyStats(BaseModel): - created_at: datetime + created_at: Optional[datetime] ready: bool From 6d5e5014eea90740170a290827b7bcdba7aec6ac Mon Sep 17 00:00:00 2001 From: hdoupe Date: Mon, 12 Apr 2021 11:27:46 -0400 Subject: [PATCH 26/41] Fix deployments status codes --- workers/cs_workers/services/api/routers/deployments.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workers/cs_workers/services/api/routers/deployments.py b/workers/cs_workers/services/api/routers/deployments.py index d9b577e9..bd23b2af 100644 --- a/workers/cs_workers/services/api/routers/deployments.py +++ b/workers/cs_workers/services/api/routers/deployments.py @@ -69,7 +69,7 @@ def create_deployment( @router.get( "/{owner}/{title}/{deployment_name}/", response_model=schemas.DeploymentReadyStats, - status_code=201, + status_code=200, ) def get_deployment( owner: str, @@ -117,7 +117,7 @@ def get_deployment( @router.delete( "/{owner}/{title}/{deployment_name}/", response_model=schemas.DeploymentDelete, - status_code=201, + status_code=200, ) def delete_deployment( owner: str, From 3d03ed01a2255ad54fbf8939cf90f774ff360612 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Mon, 12 Apr 2021 12:05:25 -0400 Subject: [PATCH 27/41] Add app_location to schema --- workers/cs_workers/services/api/schemas.py | 1 + 1 file changed, 1 insertion(+) diff --git a/workers/cs_workers/services/api/schemas.py b/workers/cs_workers/services/api/schemas.py index ae601ea0..5c771dcf 100644 --- a/workers/cs_workers/services/api/schemas.py +++ b/workers/cs_workers/services/api/schemas.py @@ -109,6 +109,7 @@ class ProjectSync(BaseModel): title: str tech: str callable_name: Optional[str] + app_location: Optional[str] exp_task_time: int cpu: float memory: float From 3f1c5cfc95d1b8ea1392afc0962be2de54843d0f Mon Sep 17 00:00:00 2001 From: hdoupe Date: Mon, 12 Apr 2021 13:44:14 -0400 Subject: [PATCH 28/41] Update project on sync and add more logging --- workers/cs_workers/services/api/routers/projects.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/workers/cs_workers/services/api/routers/projects.py b/workers/cs_workers/services/api/routers/projects.py index 8e862d11..abd1ac73 100644 --- a/workers/cs_workers/services/api/routers/projects.py +++ b/workers/cs_workers/services/api/routers/projects.py @@ -25,8 +25,15 @@ def sync_projects( ) .one_or_none() ) + project_data = project.dict() if orm_project is None: - orm_project = models.Project(**project.dict(), user_id=user.id) + print("creating object from data", project_data) + orm_project = models.Project(**project_data, user_id=user.id) + else: + print("updating object from data", project_data) + for attr, val in project.dict().items(): + print("setting", attr, val) + setattr(orm_project, attr, val) orm_projects.append(orm_project) db.add_all(orm_projects) db.commit() From d3adf91c59a47880831e12e35b0451019d4113e6 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Mon, 12 Apr 2021 13:44:56 -0400 Subject: [PATCH 29/41] Add more logging for deployment creation --- workers/cs_workers/models/clients/server.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/workers/cs_workers/models/clients/server.py b/workers/cs_workers/models/clients/server.py index 3f31290d..271a0533 100644 --- a/workers/cs_workers/models/clients/server.py +++ b/workers/cs_workers/models/clients/server.py @@ -273,14 +273,19 @@ def create(self): deployment_resp = self.deployment_api_client.create_namespaced_deployment( namespace=self.namespace, body=self.deployment ) + print("dep resp") + print(deployment_resp) service_resp = self.service_api_client.create_namespaced_service( namespace=self.namespace, body=self.service ) - + print("svc resp") + print(service_resp) ingressroute_resp = self.ir_api_client.create_namespaced_ingressroute( namespace=self.namespace, body=self.ingressroute ) + print("ir resp") + print(ingressroute_resp) return deployment_resp, service_resp, ingressroute_resp From 5f8f9002688ac61110aa6991ab9e6e736da52be5 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Mon, 12 Apr 2021 14:18:39 -0400 Subject: [PATCH 30/41] Allow 201 response on deployment create --- webapp/apps/users/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webapp/apps/users/models.py b/webapp/apps/users/models.py index 873cdf17..cd1e6316 100755 --- a/webapp/apps/users/models.py +++ b/webapp/apps/users/models.py @@ -811,7 +811,7 @@ def create_deployment(self): headers=cluster.headers(), ) - if resp.status_code == 200: + if resp.status_code in (200, 201): return resp.json() elif resp.status_code == 400: data = resp.json() From 972319fa9ace519259d4918b9f94a3af7e1a95e7 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Wed, 12 May 2021 23:02:02 -0400 Subject: [PATCH 31/41] Fix local db and host configuration for workers api --- workers/cs-workers/templates/api-Deployment.yaml | 6 +++--- workers/cs-workers/templates/api-ingressroute.yaml | 8 ++++---- workers/cs-workers/templates/db-deployment.yaml | 2 +- workers/cs-workers/templates/db-service.yaml | 2 +- workers/cs-workers/values.yaml | 2 +- workers/cs_workers/services/api/routers/simulations.py | 4 ++-- workers/cs_workers/services/api/settings.py | 2 +- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/workers/cs-workers/templates/api-Deployment.yaml b/workers/cs-workers/templates/api-Deployment.yaml index e32ffb0e..8129aa27 100755 --- a/workers/cs-workers/templates/api-Deployment.yaml +++ b/workers/cs-workers/templates/api-Deployment.yaml @@ -24,9 +24,9 @@ spec: value: "{{ .Values.bucket }}" - name: PROJECT value: "{{ .Values.project }}" - {{ if .Values.workers_host }} - - name: HOST - value: "{{ .Values.workers_host }}" + {{ if .Values.workers_api_host }} + - name: WORKERS_API_HOST + value: "{{ .Values.workers_api_host }}" {{ end }} - name: VIZ_HOST value: "{{ .Values.viz_host }}" diff --git a/workers/cs-workers/templates/api-ingressroute.yaml b/workers/cs-workers/templates/api-ingressroute.yaml index 56d1bf75..187dcf41 100644 --- a/workers/cs-workers/templates/api-ingressroute.yaml +++ b/workers/cs-workers/templates/api-ingressroute.yaml @@ -1,14 +1,14 @@ -{{ if .Values.workers_host }} +{{ if .Values.workers_api_host }} apiVersion: traefik.containo.us/v1alpha1 kind: IngressRoute metadata: name: api-tls - namespace: {{ .Values.workers_namespace }} + namespace: {{ .Values.workers_api_namespace }} spec: entryPoints: - websecure routes: - - match: Host(`{{ .Values.workers_host }}`) + - match: Host(`{{ .Values.workers_api_host }}`) kind: Rule services: - name: api @@ -25,7 +25,7 @@ spec: entryPoints: - web routes: - - match: Host(`{{ .Values.workers_host }}`) + - match: Host(`{{ .Values.workers_api_host }}`) kind: Rule services: - name: api diff --git a/workers/cs-workers/templates/db-deployment.yaml b/workers/cs-workers/templates/db-deployment.yaml index 1cccd7c0..5de8196f 100644 --- a/workers/cs-workers/templates/db-deployment.yaml +++ b/workers/cs-workers/templates/db-deployment.yaml @@ -1,4 +1,4 @@ -{{ if .Values.db.delploy_db }} +{{ if .Values.db.deploy_db }} apiVersion: apps/v1 kind: Deployment metadata: diff --git a/workers/cs-workers/templates/db-service.yaml b/workers/cs-workers/templates/db-service.yaml index db2f5dec..6e8cf665 100644 --- a/workers/cs-workers/templates/db-service.yaml +++ b/workers/cs-workers/templates/db-service.yaml @@ -1,4 +1,4 @@ -{{ if .Values.deploy_db }} +{{ if .Values.db.deploy_db }} apiVersion: v1 kind: Service metadata: diff --git a/workers/cs-workers/values.yaml b/workers/cs-workers/values.yaml index 7e2c5009..4088a441 100644 --- a/workers/cs-workers/values.yaml +++ b/workers/cs-workers/values.yaml @@ -39,7 +39,7 @@ db: password: password host: workers-db - # deploy_db: true + deploy_db: true # use_gcp_cloud_proxy: false # gcp_sql_instance_name: null diff --git a/workers/cs_workers/services/api/routers/simulations.py b/workers/cs_workers/services/api/routers/simulations.py index 0f7b1d67..b33be33d 100644 --- a/workers/cs_workers/services/api/routers/simulations.py +++ b/workers/cs_workers/services/api/routers/simulations.py @@ -140,8 +140,8 @@ def create_job( project_data = schemas.Project.from_orm(project).dict() utils.set_resource_requirements(project_data) - if settings.settings.HOST: - url = f"https://{settings.settings.HOST}" + if settings.settings.WORKERS_API_HOST: + url = f"https://{settings.settings.WORKERS_API_HOST}" else: url = f"http://api.{settings.settings.NAMESPACE}.svc.cluster.local" diff --git a/workers/cs_workers/services/api/settings.py b/workers/cs_workers/services/api/settings.py index 2a26f7e7..96e5da40 100644 --- a/workers/cs_workers/services/api/settings.py +++ b/workers/cs_workers/services/api/settings.py @@ -21,7 +21,7 @@ class Settings(BaseSettings): "https://hdoupe.ngrok.io", ] - HOST: Optional[str] + WORKERS_API_HOST: Optional[str] VIZ_HOST: str @validator("BACKEND_CORS_ORIGINS", pre=True) From 6e94e6f190778dd649a002a09fd6a65faa264242 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Wed, 12 May 2021 23:03:04 -0400 Subject: [PATCH 32/41] Handle retries for retrieving model inputs --- webapp/apps/comp/exceptions.py | 7 +++++++ webapp/apps/comp/model_parameters.py | 21 ++++++++++++++++----- webapp/apps/comp/models.py | 6 ++++++ 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/webapp/apps/comp/exceptions.py b/webapp/apps/comp/exceptions.py index 6367a09d..86e6baf9 100755 --- a/webapp/apps/comp/exceptions.py +++ b/webapp/apps/comp/exceptions.py @@ -77,3 +77,10 @@ def todict(self): class NotReady(CSException): def __init__(self, instance, *args, **kwargs): self.instance = instance + super().__init__(*args, **kwargs) + + +class Stale(CSException): + def __init__(self, instance, *args, **kwargs): + self.instance = instance + super().__init__(*args, **kwargs) diff --git a/webapp/apps/comp/model_parameters.py b/webapp/apps/comp/model_parameters.py index b07b35a4..7902c5e5 100755 --- a/webapp/apps/comp/model_parameters.py +++ b/webapp/apps/comp/model_parameters.py @@ -1,4 +1,5 @@ from typing import Union +from django.db.models.base import Model import paramtools as pt @@ -6,7 +7,7 @@ from webapp.apps.comp.models import ModelConfig from webapp.apps.comp.compute import Compute, SyncCompute, JobFailError from webapp.apps.comp import actions -from webapp.apps.comp.exceptions import AppError, NotReady +from webapp.apps.comp.exceptions import AppError, NotReady, Stale import os @@ -79,7 +80,7 @@ def get_inputs(self, meta_parameters_values=None): Get cached version of inputs or retrieve new version. """ meta_parameters_values = meta_parameters_values or {} - + self.config = None try: self.config = ModelConfig.objects.get( project=self.project, @@ -87,10 +88,12 @@ def get_inputs(self, meta_parameters_values=None): meta_parameters_values=meta_parameters_values, ) print("STATUS", self.config.status) - if self.config.status != "SUCCESS": + if self.config.status != "SUCCESS" and not self.config.is_stale(): print("raise yo") raise NotReady(self.config) - except ModelConfig.DoesNotExist: + elif self.config.status != "SUCCESS" and self.config.is_stale(): + raise Stale(self.config) + except (ModelConfig.DoesNotExist, Stale) as e: response = self.compute.submit_job( project=self.project, task_name=actions.INPUTS, @@ -99,7 +102,9 @@ def get_inputs(self, meta_parameters_values=None): if self.project.cluster.version == "v1" else "", ) - if self.project.cluster.version == "v1": + if self.project.cluster.version == "v1" and isinstance( + e, ModelConfig.DoesNotExist + ): self.config = ModelConfig.objects.create( project=self.project, model_version=str(self.project.latest_tag), @@ -109,6 +114,12 @@ def get_inputs(self, meta_parameters_values=None): status="PENDING", ) raise NotReady(self.config) + elif self.project.cluster.version == "v1" and isinstance(e, Stale): + self.config.model_version = str(self.project.latest_tag) + self.config.job_id = response + self.config.status = "PENDING" + self.config.save() + raise NotReady(self.config) success, result = response if not success: diff --git a/webapp/apps/comp/models.py b/webapp/apps/comp/models.py index 4b3d1128..68c7cd91 100755 --- a/webapp/apps/comp/models.py +++ b/webapp/apps/comp/models.py @@ -100,6 +100,12 @@ class Meta: ) ] + def is_stale(self, timeout=10): + return ( + self.status != "SUCCESS" + and (timezone.now() - self.creation_date).total_seconds() > timeout + ) + class Inputs(models.Model): objects: models.Manager From 0d83cd7189bc2320fcf006256634c613ecf0b16e Mon Sep 17 00:00:00 2001 From: hdoupe Date: Wed, 12 May 2021 23:03:32 -0400 Subject: [PATCH 33/41] Fix reset values bug --- src/Simulation/API.ts | 18 ++++++++++---- src/Simulation/index.tsx | 53 +++++++++++++++++++--------------------- 2 files changed, 38 insertions(+), 33 deletions(-) diff --git a/src/Simulation/API.ts b/src/Simulation/API.ts index ae14283a..0cd68377 100644 --- a/src/Simulation/API.ts +++ b/src/Simulation/API.ts @@ -65,12 +65,20 @@ export default class API { } } - resetInitialValues(metaParameters: { [metaParam: string]: any }): Promise { - return axios - .post(`/${this.owner}/${this.title}/api/v1/inputs/`, metaParameters) - .then(response => { - return response.data; + async resetInitialValues(metaParameters: { [metaParam: string]: any }): Promise { + let resp; + if (!!metaParameters) { + resp = await axios.post(`/${this.owner}/${this.title}/api/v1/inputs/`, metaParameters); + } else { + resp = await axios.get(`/${this.owner}/${this.title}/api/v1/inputs/`); + } + if (resp.status === 202) { + return new Promise(resolve => { + setTimeout(async () => resolve(await this.getInputs(metaParameters)), 2000); }); + } else { + return resp.data; + } } getAccessStatus(): Promise { diff --git a/src/Simulation/index.tsx b/src/Simulation/index.tsx index dace264e..ba41fd20 100755 --- a/src/Simulation/index.tsx +++ b/src/Simulation/index.tsx @@ -174,35 +174,32 @@ class SimTabs extends React.Component< }); } - resetInitialValues(metaParameters: InputsDetail["meta_parameters"]) { + async resetInitialValues(metaParameters: InputsDetail["meta_parameters"]) { this.setState({ resetting: true }); - this.api - .resetInitialValues({ - meta_parameters: tbLabelSchema.cast(metaParameters), - }) - .then(data => { - const [ - initialValues, - sects, - { meta_parameters, model_parameters }, - schema, - unknownParams, - ] = convertToFormik(data); - this.setState(prevState => ({ - inputs: { - ...prevState.inputs, - ...{ - meta_parameters: meta_parameters, - model_parameters: model_parameters, - }, - }, - initialValues: initialValues, - sects: sects, - schema: schema, - unknownParams: unknownParams, - resetting: false, - })); - }); + const data = await this.api.resetInitialValues({ + meta_parameters: tbLabelSchema.cast(metaParameters), + }); + const [ + initialValues, + sects, + { meta_parameters, model_parameters }, + schema, + unknownParams, + ] = convertToFormik(data); + this.setState(prevState => ({ + inputs: { + ...prevState.inputs, + ...{ + meta_parameters: meta_parameters, + model_parameters: model_parameters, + }, + }, + initialValues: initialValues, + sects: sects, + schema: schema, + unknownParams: unknownParams, + resetting: false, + })); } resetAccessStatus() { From 79a6f8d385c35540b7b44b49896334899ff55a4d Mon Sep 17 00:00:00 2001 From: hdoupe Date: Thu, 13 May 2021 10:01:32 -0400 Subject: [PATCH 34/41] Clean up simulations module --- .../services/api/routers/simulations.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/workers/cs_workers/services/api/routers/simulations.py b/workers/cs_workers/services/api/routers/simulations.py index b33be33d..6de51063 100644 --- a/workers/cs_workers/services/api/routers/simulations.py +++ b/workers/cs_workers/services/api/routers/simulations.py @@ -1,7 +1,5 @@ from datetime import datetime -import math import os -import uuid import httpx from fastapi import APIRouter, Depends, Body, HTTPException @@ -19,7 +17,9 @@ @router.get("/callback/{job_id}/", status_code=201, response_model=schemas.Job) -def job_callback(job_id: str, db: Session = Depends(deps.get_db)): +def job_callback( + job_id: str, db: Session = Depends(deps.get_db), +): instance: models.Job = db.query(models.Job).filter( models.Job.id == job_id ).one_or_none() @@ -81,19 +81,6 @@ async def finish_job( return instance -# @router.get("/{job_id}/", status_code=201, response_model=schemas.Job) -# def get_job( -# job_id: str, -# db: Session = Depends(deps.get_db), -# user: schemas.User = Depends(deps.get_current_active_user), -# ): -# instance = db.query(models.Job).filter(models.Job.id == job_id).one_or_none() -# if instance is None: -# raise HTTPException(status_code=404, detail="Job not found.") - -# return instance - - @router.post("/{owner}/{title}/", response_model=schemas.Job, status_code=201) def create_job( owner: str, From a75601195af35523787d05805005f249cd2b0742 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Thu, 13 May 2021 10:03:20 -0400 Subject: [PATCH 35/41] Rename simulations module to jobs --- workers/cs_workers/services/api/main.py | 4 ++-- .../services/api/routers/{simulations.py => jobs.py} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename workers/cs_workers/services/api/routers/{simulations.py => jobs.py} (100%) diff --git a/workers/cs_workers/services/api/main.py b/workers/cs_workers/services/api/main.py index cbdf4df6..eb5e635e 100644 --- a/workers/cs_workers/services/api/main.py +++ b/workers/cs_workers/services/api/main.py @@ -2,7 +2,7 @@ from starlette.middleware.cors import CORSMiddleware from .settings import settings -from .routers import users, login, projects, simulations, deployments +from .routers import users, login, projects, jobs, deployments app = FastAPI( title=settings.PROJECT_NAME, openapi_url=f"{settings.API_PREFIX_STR}/openapi.json", @@ -21,5 +21,5 @@ app.include_router(login.router, prefix=settings.API_PREFIX_STR) app.include_router(users.router, prefix=settings.API_PREFIX_STR) app.include_router(projects.router, prefix=settings.API_PREFIX_STR) -app.include_router(simulations.router, prefix=settings.API_PREFIX_STR) +app.include_router(jobs.router, prefix=settings.API_PREFIX_STR) app.include_router(deployments.router, prefix=settings.API_PREFIX_STR) diff --git a/workers/cs_workers/services/api/routers/simulations.py b/workers/cs_workers/services/api/routers/jobs.py similarity index 100% rename from workers/cs_workers/services/api/routers/simulations.py rename to workers/cs_workers/services/api/routers/jobs.py From 3960e5afdb16e5f6ae43e2665d6d7c7997e9b522 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Thu, 13 May 2021 10:04:04 -0400 Subject: [PATCH 36/41] Add retry logic for retrieving task args via callback url and support for reading an authorization header from env --- .../models/executors/task_wrapper.py | 36 ++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/workers/cs_workers/models/executors/task_wrapper.py b/workers/cs_workers/models/executors/task_wrapper.py index c6cb092e..32a3adca 100644 --- a/workers/cs_workers/models/executors/task_wrapper.py +++ b/workers/cs_workers/models/executors/task_wrapper.py @@ -1,8 +1,8 @@ +import os import time import traceback import httpx -import cs_storage try: @@ -11,6 +11,36 @@ pass +async def get_task_kwargs(callback_url, retries=5): + """ + Retrieve task kwargs from callback_url. + + Returns + ------- + resp: httpx.Response + """ + job_token = os.environ.get("JOB_TOKEN", None) + if job_token is not None: + headers = {"Authorization": f"Token {job_token}"} + else: + headers = None + + for retry in range(0, retries + 1): + try: + async with httpx.AsyncClient() as client: + resp = await client.get(callback_url, headers=headers) + resp.raise_for_status() + return resp + except Exception as e: + print(f"Exception when retrieving value from callback url: {callback_url}") + print(f"Exception: {e}") + if retry >= retries: + raise e + wait_time = 2 ** retry + print(f"Trying again in {wait_time} seconds.") + time.sleep(wait_time) + + async def task_wrapper(callback_url, task_name, func, task_kwargs=None): print("async task", callback_url, func, task_kwargs) start = time.time() @@ -21,9 +51,7 @@ async def task_wrapper(callback_url, task_name, func, task_kwargs=None): try: if task_kwargs is None: print("getting task_kwargs") - async with httpx.AsyncClient() as client: - resp = await client.get(callback_url) - resp.raise_for_status() + resp = await get_task_kwargs(callback_url) task_kwargs = resp.json()["inputs"] print("got task_kwargs", task_kwargs) outputs = func(**(task_kwargs or {})) From 67d575bc916b89249100f32b7b340fb7bbfed6dc Mon Sep 17 00:00:00 2001 From: hdoupe Date: Sat, 15 May 2021 11:27:23 -0400 Subject: [PATCH 37/41] Fix migrations files conflicts --- ...0321_2004.py => 0026_auto_20210515_1512.py} | 14 ++++++++++++-- .../apps/users/migrations/0026_tag_version.py | 18 ------------------ .../users/migrations/0027_cluster_viz_host.py | 18 ------------------ webapp/apps/users/models.py | 2 +- 4 files changed, 13 insertions(+), 39 deletions(-) rename webapp/apps/users/migrations/{0025_auto_20210321_2004.py => 0026_auto_20210515_1512.py} (66%) delete mode 100755 webapp/apps/users/migrations/0026_tag_version.py delete mode 100755 webapp/apps/users/migrations/0027_cluster_viz_host.py diff --git a/webapp/apps/users/migrations/0025_auto_20210321_2004.py b/webapp/apps/users/migrations/0026_auto_20210515_1512.py similarity index 66% rename from webapp/apps/users/migrations/0025_auto_20210321_2004.py rename to webapp/apps/users/migrations/0026_auto_20210515_1512.py index 1a17771a..624ab6e6 100755 --- a/webapp/apps/users/migrations/0025_auto_20210321_2004.py +++ b/webapp/apps/users/migrations/0026_auto_20210515_1512.py @@ -1,4 +1,4 @@ -# Generated by Django 3.0.13 on 2021-03-21 20:04 +# Generated by Django 3.0.14 on 2021-05-15 15:12 from django.db import migrations, models @@ -6,7 +6,7 @@ class Migration(migrations.Migration): dependencies = [ - ("users", "0024_project_social_image_link"), + ("users", "0025_project_embed_background_color"), ] operations = [ @@ -31,4 +31,14 @@ class Migration(migrations.Migration): field=models.CharField(default="v0", max_length=32), preserve_default=False, ), + migrations.AddField( + model_name="cluster", + name="viz_host", + field=models.CharField(max_length=128, null=True), + ), + migrations.AddField( + model_name="tag", + name="version", + field=models.CharField(max_length=255, null=True), + ), ] diff --git a/webapp/apps/users/migrations/0026_tag_version.py b/webapp/apps/users/migrations/0026_tag_version.py deleted file mode 100755 index 38d4dcc5..00000000 --- a/webapp/apps/users/migrations/0026_tag_version.py +++ /dev/null @@ -1,18 +0,0 @@ -# Generated by Django 3.0.13 on 2021-03-21 20:56 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("users", "0025_auto_20210321_2004"), - ] - - operations = [ - migrations.AddField( - model_name="tag", - name="version", - field=models.CharField(max_length=255, null=True), - ), - ] diff --git a/webapp/apps/users/migrations/0027_cluster_viz_host.py b/webapp/apps/users/migrations/0027_cluster_viz_host.py deleted file mode 100755 index 8b72d11e..00000000 --- a/webapp/apps/users/migrations/0027_cluster_viz_host.py +++ /dev/null @@ -1,18 +0,0 @@ -# Generated by Django 3.0.14 on 2021-04-11 20:06 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("users", "0026_tag_version"), - ] - - operations = [ - migrations.AddField( - model_name="cluster", - name="viz_host", - field=models.CharField(max_length=128, null=True), - ), - ] diff --git a/webapp/apps/users/models.py b/webapp/apps/users/models.py index 5ffe9e19..b2b6e2f4 100755 --- a/webapp/apps/users/models.py +++ b/webapp/apps/users/models.py @@ -215,7 +215,7 @@ def ensure_access_token(self): self.access_token_expires_at is None or self.access_token_expires_at < (timezone.now() - timedelta(seconds=60)) ) - print(missing_token, is_expired) + print("token is missing", missing_token, "token is expired", is_expired) if missing_token or is_expired: resp = requests.post( f"{self.url}/api/v1/login/access-token", From 0222c89512535d952e10e52677dcdfa3ad870154 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Sat, 15 May 2021 12:21:50 -0400 Subject: [PATCH 38/41] Set up password authentication for redis --- .github/workflows/dev-workers-deploy.yml | 1 + .../templates/outputs-processor-Deployment.yaml | 5 +++++ workers/cs-workers/templates/redis-secret.yaml | 8 ++++++++ workers/cs-workers/templates/rq-Deployment.yaml | 14 +++++++++++--- workers/cs-workers/values.yaml | 1 + workers/cs_workers/services/outputs_processor.py | 4 +++- workers/cs_workers/services/rq_settings.py | 6 ++++++ 7 files changed, 35 insertions(+), 4 deletions(-) create mode 100644 workers/cs-workers/templates/redis-secret.yaml create mode 100644 workers/cs_workers/services/rq_settings.py diff --git a/.github/workflows/dev-workers-deploy.yml b/.github/workflows/dev-workers-deploy.yml index 8c806f12..d979e5c7 100644 --- a/.github/workflows/dev-workers-deploy.yml +++ b/.github/workflows/dev-workers-deploy.yml @@ -82,5 +82,6 @@ jobs: --set tag=$TAG \ --set api.secret_key=$(cs secrets get WORKERS_API_SECRET_KEY) \ --set db.password=$(cs secrets get WORKERS_DB_PASSWORD) \ + --set redis.password=$(cs secrets get WORKERS_REDIS_PASSWORD) \ --namespace workers \ -f values.deploy.yaml | kubectl apply -f - diff --git a/workers/cs-workers/templates/outputs-processor-Deployment.yaml b/workers/cs-workers/templates/outputs-processor-Deployment.yaml index 0700011a..f7baf308 100755 --- a/workers/cs-workers/templates/outputs-processor-Deployment.yaml +++ b/workers/cs-workers/templates/outputs-processor-Deployment.yaml @@ -28,6 +28,11 @@ spec: value: {{ .Values.redis.host }} - name: REDIS_PORT value: "{{ .Values.redis.port }}" + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: workers-redis-secret + key: PASSWORD resources: requests: cpu: 1 diff --git a/workers/cs-workers/templates/redis-secret.yaml b/workers/cs-workers/templates/redis-secret.yaml new file mode 100644 index 00000000..3cbf24a3 --- /dev/null +++ b/workers/cs-workers/templates/redis-secret.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Secret +metadata: + name: workers-redis-secret + namespace: {{ .Values.workers_namespace }} +type: Opaque +stringData: + PASSWORD: {{ .Values.redis.password }} diff --git a/workers/cs-workers/templates/rq-Deployment.yaml b/workers/cs-workers/templates/rq-Deployment.yaml index af24f36d..7dd948a7 100644 --- a/workers/cs-workers/templates/rq-Deployment.yaml +++ b/workers/cs-workers/templates/rq-Deployment.yaml @@ -16,15 +16,23 @@ spec: serviceAccountName: rq-worker containers: - name: rq-worker-outputs - command: ["rq", "worker", "--with-scheduler", "--url", "$(REDIS_URL)"] + command: + ["rq", "worker", "--with-scheduler", "-c", "cs_workers.services.rq_settings"] image: "{{ .Values.registry }}/{{ .Values.project }}/outputs_processor:{{ .Values.tag }}" env: - name: BUCKET value: {{ .Values.bucket }} - name: PROJECT value: {{ .Values.project }} - - name: REDIS_URL - value: "redis://{{ .Values.redis.host }}:{{ .Values.redis.port }}" + - name: REDIS_HOST + value: {{ .Values.redis.host }} + - name: REDIS_PORT + value: "{{ .Values.redis.port }}" + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: workers-redis-secret + key: PASSWORD resources: requests: cpu: 1 diff --git a/workers/cs-workers/values.yaml b/workers/cs-workers/values.yaml index 4088a441..3d3d259a 100644 --- a/workers/cs-workers/values.yaml +++ b/workers/cs-workers/values.yaml @@ -24,6 +24,7 @@ api: redis: host: "redis-master" port: "6379" + password: "" volumes: - name: redis-volume-v1 hostPath: diff --git a/workers/cs_workers/services/outputs_processor.py b/workers/cs_workers/services/outputs_processor.py index d023158a..49aff979 100644 --- a/workers/cs_workers/services/outputs_processor.py +++ b/workers/cs_workers/services/outputs_processor.py @@ -22,7 +22,9 @@ queue = Queue( connection=redis.Redis( - host=os.environ.get("REDIS_HOST"), port=os.environ.get("REDIS_PORT") + host=os.environ.get("REDIS_HOST"), + port=os.environ.get("REDIS_PORT"), + password=os.environ.get("REDIS_PASSWORD"), ) ) diff --git a/workers/cs_workers/services/rq_settings.py b/workers/cs_workers/services/rq_settings.py new file mode 100644 index 00000000..ae9096bd --- /dev/null +++ b/workers/cs_workers/services/rq_settings.py @@ -0,0 +1,6 @@ +import os + +host = os.environ.get("REDIS_HOST") +port = os.environ.get("REDIS_PORT") +password = os.environ.get("REDIS_PASSWORD", None) +REDIS_URL = f"redis://:{password}@{host}:{port}/" From a96c8da4dddc5f4d826cf3f4b1df021800ebb679 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Mon, 17 May 2021 20:22:51 -0400 Subject: [PATCH 39/41] Update production actions file --- .github/workflows/workers-deploy.yml | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/.github/workflows/workers-deploy.yml b/.github/workflows/workers-deploy.yml index 8042c737..4f33e6f3 100644 --- a/.github/workflows/workers-deploy.yml +++ b/.github/workflows/workers-deploy.yml @@ -20,10 +20,10 @@ env: REGISTRY_HOSTNAME: gcr.io PROJECT: ${{ secrets.GKE_PROJECT }} - HOST: compute.studio TAG: ${{ github.sha }} CS_CONFIG: ${{ secrets.CS_CONFIG }} + WORKERS_VALUES: ${{ secrets.DEV_WORKERS_VALUES }} jobs: setup-build-publish-deploy: @@ -58,9 +58,10 @@ jobs: # Set up docker to authenticate gcloud auth configure-docker - - name: Set cs-config.yaml file. + - name: Set cs-config.yaml and values.yaml files. run: | echo $CS_CONFIG | base64 --decode > cs-config.yaml + echo $WORKERS_VALUES | base64 --decode > ./workers/values.deploy.yaml - name: Build Docker Images run: | @@ -74,5 +75,12 @@ jobs: - name: Deploy run: | gcloud container clusters get-credentials $GKE_CLUSTER --zone $GKE_ZONE --project $GKE_PROJECT - cs workers svc config -o - --update-dns | kubectl apply -f - - kubectl get pods -o wide + cd workers + helm template cs-workers \ + --set project=$PROJECT \ + --set tag=$TAG \ + --set api.secret_key=$(cs secrets get WORKERS_API_SECRET_KEY) \ + --set db.password=$(cs secrets get WORKERS_DB_PASSWORD) \ + --set redis.password=$(cs secrets get WORKERS_REDIS_PASSWORD) \ + --namespace workers \ + -f values.deploy.yaml | kubectl apply -f - From 075011bc7f4ebae3c7b415fa188f06b76c283635 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Mon, 17 May 2021 20:24:15 -0400 Subject: [PATCH 40/41] Remove dev commands --- workers/cs_workers/dockerfiles/Dockerfile.model | 6 ------ 1 file changed, 6 deletions(-) diff --git a/workers/cs_workers/dockerfiles/Dockerfile.model b/workers/cs_workers/dockerfiles/Dockerfile.model index 87ee9563..ada23f19 100644 --- a/workers/cs_workers/dockerfiles/Dockerfile.model +++ b/workers/cs_workers/dockerfiles/Dockerfile.model @@ -30,9 +30,3 @@ RUN if test -f "./cs-config/setup.py"; then pip install -e ./cs-config; fi EXPOSE 8010 ENV PORT=8010 ENV HOST=0.0.0.0 - -COPY ./secrets ./secrets -COPY ./deploy ./deploy -COPY ./workers ./workers - -RUN pip install -e ./secrets && pip install -e ./deploy && pip install -e ./workers From 78a928791515ab47049f07f2066e5d5513c535b1 Mon Sep 17 00:00:00 2001 From: hdoupe Date: Mon, 17 May 2021 20:27:32 -0400 Subject: [PATCH 41/41] Remove ci-api from branches that trigger dev workflow runs --- .github/workflows/dev-webapp-deploy.yml | 1 - .github/workflows/dev-workers-deploy.yml | 1 - 2 files changed, 2 deletions(-) diff --git a/.github/workflows/dev-webapp-deploy.yml b/.github/workflows/dev-webapp-deploy.yml index ba0b0092..64eddb5f 100644 --- a/.github/workflows/dev-webapp-deploy.yml +++ b/.github/workflows/dev-webapp-deploy.yml @@ -4,7 +4,6 @@ on: push: branches: - dev - - ci-api paths: - "webapp/**" - "Dockerfile" diff --git a/.github/workflows/dev-workers-deploy.yml b/.github/workflows/dev-workers-deploy.yml index d979e5c7..5d5c3a3c 100644 --- a/.github/workflows/dev-workers-deploy.yml +++ b/.github/workflows/dev-workers-deploy.yml @@ -4,7 +4,6 @@ on: push: branches: - dev - - ci-api paths: - "workers/**" - "deploy/**"