Merge pull request #319 from bothub-it/staging

Staging
weni-ai · Nov 30, 2021 · 29b9faf · 29b9faf
2 parents df7a7ea + e10993c
commit 29b9faf
Show file tree

Hide file tree

Showing 24 changed files with 261 additions and 158 deletions.
diff --git a/...ws/build-develop-aiplatform_xx-spacy.yaml → ...ows/build-develop-aiplatform_xx-none.yaml b/...ws/build-develop-aiplatform_xx-spacy.yaml → ...ows/build-develop-aiplatform_xx-none.yaml
@@ -1,12 +1,12 @@
-name: Build develop AI-Platform NLP - xx-SPACY
+name: Build develop AI-Platform NLP - xx-None
 on:
   push:
     tags:
       - 'develop-v*.*.*'
 jobs:
   docker:
     runs-on: ubuntu-latest
-    environment: 
+    environment:
       name: develop
     steps:
       - name: Check out the repo
@@ -33,7 +33,7 @@ jobs:
           context: .
           file: ./aiplatform.Dockerfile
           push: true
-          tags: us.gcr.io/bothub-273521/bothub-nlp-ai-platform:${{ steps.vars.outputs.tag }}-xx-SPACY
+          tags: us.gcr.io/bothub-273521/bothub-nlp-ai-platform:${{ steps.vars.outputs.tag }}-xx-NONE
           no-cache: true
           build-args: |
-            DOWNLOAD_MODELS=xx-SPACY
+            DOWNLOAD_MODELS=xx-NONE
diff --git a/...kflows/build-develop-worker_xx-spacy.yaml → ...rkflows/build-develop-worker_xx-none.yaml b/...kflows/build-develop-worker_xx-spacy.yaml → ...rkflows/build-develop-worker_xx-none.yaml
@@ -1,12 +1,12 @@
-name: Build develop Worker NLP - xx-SPACY
+name: Build develop Worker NLP - xx-None
 on:
   push:
     tags:
       - 'develop-v*.*.*'
 jobs:
   docker:
     runs-on: ubuntu-latest
-    environment: 
+    environment:
       name: develop
     steps:
       - name: Check out the repo
@@ -21,7 +21,7 @@ jobs:
         id: vars
         run: echo ::set-output name=tag::${GITHUB_REF#refs/*/}
       - name: Login to DockerHub
-        uses: docker/login-action@v1 
+        uses: docker/login-action@v1
         with:
           username: ${{ secrets.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -32,7 +32,7 @@ jobs:
           context: .
           file: ./nlp.Dockerfile
           push: true
-          tags: bothubit/bothub-nlp:${{ steps.vars.outputs.tag }}-xx-SPACY
+          tags: bothubit/bothub-nlp:${{ steps.vars.outputs.tag }}-xx-NONE
           no-cache: true
           build-args: |
-            DOWNLOAD_MODELS=xx-SPACY
+            DOWNLOAD_MODELS=xx-NONE
diff --git a/...build-production-aiplatform_xx-spacy.yaml → .../build-production-aiplatform_xx-none.yaml b/...build-production-aiplatform_xx-spacy.yaml → .../build-production-aiplatform_xx-none.yaml
@@ -1,4 +1,4 @@
-name: Build production AI-Platform NLP - xx-SPACY
+name: Build production AI-Platform NLP - xx-NONE
 on:
   push:
     tags:
@@ -33,7 +33,7 @@ jobs:
           context: .
           file: ./aiplatform.Dockerfile
           push: true
-          tags: us.gcr.io/bothub-273521/bothub-nlp-ai-platform:${{ steps.vars.outputs.tag }}-xx-SPACY
+          tags: us.gcr.io/bothub-273521/bothub-nlp-ai-platform:${{ steps.vars.outputs.tag }}-xx-NONE
           no-cache: true
           build-args: |
-            DOWNLOAD_MODELS=xx-SPACY
+            DOWNLOAD_MODELS=xx-NONE
diff --git a/...ows/build-production-worker_xx-spacy.yaml → ...lows/build-production-worker_xx-none.yaml b/...ows/build-production-worker_xx-spacy.yaml → ...lows/build-production-worker_xx-none.yaml
@@ -1,4 +1,4 @@
-name: Build production Worker NLP - xx-SPACY
+name: Build production Worker NLP - xx-NONE
 on:
   push:
     tags:
@@ -32,7 +32,7 @@ jobs:
           context: .
           file: ./nlp.Dockerfile
           push: true
-          tags: bothubit/bothub-nlp:${{ steps.vars.outputs.tag }}-xx-SPACY
+          tags: bothubit/bothub-nlp:${{ steps.vars.outputs.tag }}-xx-NONE
           no-cache: true
           build-args: |
-            DOWNLOAD_MODELS=xx-SPACY
+            DOWNLOAD_MODELS=xx-NONE
diff --git a/...ws/build-staging-aiplatform_xx-spacy.yaml → ...ows/build-staging-aiplatform_xx-none.yaml b/...ws/build-staging-aiplatform_xx-spacy.yaml → ...ows/build-staging-aiplatform_xx-none.yaml
@@ -1,12 +1,12 @@
-name: Build staging AI-Platform NLP - xx-SPACY
+name: Build staging AI-Platform NLP - xx-NONE
 on:
   push:
     tags:
       - 'staging-v*.*.*'
 jobs:
   docker:
     runs-on: ubuntu-latest
-    environment: 
+    environment:
       name: staging
     steps:
       - name: Check out the repo
@@ -33,7 +33,7 @@ jobs:
           context: .
           file: ./aiplatform.Dockerfile
           push: true
-          tags: us.gcr.io/bothub-273521/bothub-nlp-ai-platform:${{ steps.vars.outputs.tag }}-xx-SPACY
+          tags: us.gcr.io/bothub-273521/bothub-nlp-ai-platform:${{ steps.vars.outputs.tag }}-xx-NONE
           no-cache: true
           build-args: |
-            DOWNLOAD_MODELS=xx-SPACY
+            DOWNLOAD_MODELS=xx-NONE
diff --git a/...kflows/build-staging-worker_xx-spacy.yaml → ...rkflows/build-staging-worker_xx-none.yaml b/...kflows/build-staging-worker_xx-spacy.yaml → ...rkflows/build-staging-worker_xx-none.yaml
@@ -1,12 +1,12 @@
-name: Build staging Worker NLP - xx-SPACY
+name: Build staging Worker NLP - xx-NONE
 on:
   push:
     tags:
       - 'staging-v*.*.*'
 jobs:
   docker:
     runs-on: ubuntu-latest
-    environment: 
+    environment:
       name: staging
     steps:
       - name: Check out the repo
@@ -21,7 +21,7 @@ jobs:
         id: vars
         run: echo ::set-output name=tag::${GITHUB_REF#refs/*/}
       - name: Login to DockerHub
-        uses: docker/login-action@v1 
+        uses: docker/login-action@v1
         with:
           username: ${{ secrets.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_TOKEN }}
@@ -32,7 +32,7 @@ jobs:
           context: .
           file: ./nlp.Dockerfile
           push: true
-          tags: bothubit/bothub-nlp:${{ steps.vars.outputs.tag }}-xx-SPACY
+          tags: bothubit/bothub-nlp:${{ steps.vars.outputs.tag }}-xx-NONE
           no-cache: true
           build-args: |
-            DOWNLOAD_MODELS=xx-SPACY
+            DOWNLOAD_MODELS=xx-NONE
diff --git a/.travis.yml b/.travis.yml
@@ -8,7 +8,6 @@ install:
   - pip install coveralls
 env:
   global:
-    - SUPPORTED_LANGUAGES="en|pt"
     - BOTHUB_NLP_LANGUAGE_QUEUE="en"
     - BOTHUB_NLP_SERVICE_WORKER=true
 before_script:

diff --git a/Makefile b/Makefile
@@ -1,45 +1,29 @@
-lint:
-	@echo "${INFO}Linting...${NC}"
-	@export PIPENV_DONT_LOAD_ENV=1
-	@cd bothub-nlp-api \
-		&& PIPENV_DONT_LOAD_ENV=1 pipenv run lint \
-		&& echo "${SUCCESS}✔${NC} bothub-nlp-api" || echo "${DANGER}✖${NC} bothub-nlp-api"
-	@cd bothub-nlp-nlu-worker \
-		&& PIPENV_DONT_LOAD_ENV=1 pipenv run lint \
-		&& echo "${SUCCESS}✔${NC} bothub-nlp-nlu-worker" || echo "${DANGER}✖${NC} bothub-nlp-nlu-worker"
-	@cd bothub-nlp-nlu-worker-on-demand \
-		&& PIPENV_DONT_LOAD_ENV=1 pipenv run lint \
-		&& echo "${SUCCESS}✔${NC} bothub-nlp-nlu-worker-on-demand" || echo "${DANGER}✖${NC} bothub-nlp-nlu-worker-on-demand"
-
-init_env:
+init_development_env:
 	@echo "${INFO}Starting init environment...${NC}"
-	@echo "SUPPORTED_LANGUAGES=en:en_core_web_md" >> .env
-	@echo "BOTHUB_ENGINE_URL=https://api.bothub.it" >> .env
-	@echo "ENGINE_PORT=8000" >> .env
+	@echo "BOTHUB_ENGINE_URL=http://localhost" >> .env
+	@echo "BOTHUB_NLP_SERVICE_WORKER=True" >> .env
+	@echo "BOTHUB_NLP_LANGUAGE_QUEUE=en" >> .env
+	@echo "BOTHUB_LANGUAGE_MODEL=BERT" >> .env
 	@echo "${SUCCESS}Finish...${NC}"
 
 start_development:
 	@echo "${INFO}Starting Build all project (Docker)...${NC}"
-	@docker-compose build --build-arg DOWNLOAD_SPACY_MODELS=en:en_core_web_md
+	@docker-compose build --build-arg DOWNLOAD_MODELS=en-BERT
 	@docker-compose up -d
 	@echo "${SUCCESS}Finish...${NC}"
 
 
 install_development_requirements:
 	@echo "${INFO}Installing development requirements...${NC}"
 	@git clone --branch master --depth 1 --single-branch https://github.com/Ilhasoft/spacy-lang-models spacy-langs
-	@python scripts/link_lang_spacy.py pt_br ./spacy-langs/pt_br/
-	@python scripts/link_lang_spacy.py mn ./spacy-langs/mn/
-	@python scripts/link_lang_spacy.py ha ./spacy-langs/ha/
-	@python bothub_nlp_nlu_worker/bothub_nlp_nlu/scripts/download_spacy_models.py en:en_core_web_md
+	@python bothub/shared/utils/scripts/link_lang_spacy.py pt_br ./spacy-langs/pt_br/
+	@python bothub/shared/utils/scripts/download_models.py en-BERT
 	@echo "${SUCCESS}✔${NC} Development requirements installed"
 
 
 start_celery:
-	@celery worker --autoscale 50,10 -O fair --workdir bothub_nlp_nlu_worker -A celery_app -c 1 -l INFO -E -Q en
-
-python_celery:
 	@python start_celery.py
+
 # Utils
 
 ## Colors

diff --git a/README.md b/README.md
@@ -2,22 +2,19 @@
 
 [![Build Status](https://travis-ci.org/bothub-it/bothub-nlp.svg?branch=master)](https://travis-ci.org/bothub-it/bothub-nlp) [![Coverage Status](https://coveralls.io/repos/github/bothub-it/bothub-nlp/badge.svg?branch=master)](https://coveralls.io/github/bothub-it/bothub-nlp?branch=master) ![version 3.0.1](https://img.shields.io/badge/version-3.0.1-blue.svg) [![python 3.6](https://img.shields.io/badge/python-3.6-green.svg)](https://docs.python.org/3.6/whatsnew/changelog.html) [![license AGPL-3.0](https://img.shields.io/badge/license-AGPL--3.0-red.svg)](https://github.com/bothub-it/bothub-nlp/blob/master/LICENSE)
 
-Check the [main Bothub project repository](https://github.com/Ilhasoft/bothub).
 
 
 ## Services
 
 ### bothub-nlp-nlu-worker
 
-### bothub-nlp-ai-platform
+### [bothub-nlp-api](https://github.com/bothub-it/bothub-nlp-api)
 
 ## Packages
 
-### bothub-nlp (python 3.6)
+### [bothub-backend](https://github.com/bothub-it/bothub-backend) (python 3.6)
 
-### bothub-nlp-celery (python 3.6)
-
-### bothub-nlp-nlu (python 3.6)
+### [bothub-nlp-celery](https://github.com/bothub-it/bothub-nlp-celery) (python 3.6)
 
 
 # Requirements
@@ -28,39 +25,57 @@ Check the [main Bothub project repository](https://github.com/Ilhasoft/bothub).
 
 ## Development
 
-Use ```make``` commands to ```lint```, ```init_env```, ```start_development```.
+Use ```make``` commands
 
 | Command | Description |
 |--|--|
-| make lint | Show lint warnings and errors
-| make init_env | Init file .env with variables environment
-| make start_development | Create .env with variable environment and start build docker
+| make init_development_env | Init file .env with variables environment |
+| make start_development | Start build docker |
+| make install_development_requirements | Install some default models |
+| make start_celery | Run celery application |
 
 
 ## Environment Variables
 
 You can set environment variables in your OS, write on ```.env``` file or pass via Docker config.
 
+### bothub-backend
+
 | Variable | Type | Default | Description |
 |--|--|--|--|
-| SUPPORTED_LANGUAGES | `str` | `en|pt` | Set supported languages. Separe languages using |. You can set location follow the format: [LANGUAGE_CODE]:[LANGUAGE_LOCATION]. |
 | BOTHUB_ENGINE_URL | `str` | `https://api.bothub.it` | Web service url |
-| BOTHUB_NLP_CELERY_BROKER_URL | `str` | `redis://localhost:6379/0	` | `Celery Broker URL, check usage instructions in Celery Docs` |
-| BOTHUB_NLP_CELERY_BACKEND_URL | `str` | `BOTHUB_NLP_CELERY_BROKER_URL` value | Celery Backend URL, check usage instructions in [Celery Docs](http://docs.celeryproject.org/en/latest/index.html) |
-| BOTHUB_NLP_NLU_AGROUP_LANGUAGE_QUEUE | `boolean` | `True` | Agroup tasks by language in celery queue, if `True` there will be only one queue per language. |
-| BOTHUB_NLP_AWS_ACCESS_KEY_ID | `str` |  |  |
-| BOTHUB_NLP_AWS_SECRET_ACCESS_KEY | `str` |  |  |
-| BOTHUB_NLP_AWS_S3_BUCKET_NAME | `str` |  |  |
-| BOTHUB_NLP_AWS_REGION_NAME | `str` |  |  |
-| BOTHUB_NLP_LANGUAGE_QUEUE | `str` | en | Set language that will be loaded in celery |
-| BOTHUB_NLP_SERVICE_WORKER | `boolean` | `False` | Set true if you are running celery bothub-nlp-nlu-worker |
-| BOTHUB_NLP_CELERY_SENTRY_CLIENT | `bool` | `False` |  |
-| BOTHUB_NLP_CELERY_SENTRY | `str` | `None` |  |
+
+### nlp-nlu-worker / nlp-ai-platform
+
+You can set environment variables in your OS, write on ```.env``` file or pass via Docker config.
+
+| Variable | Type | Default | Description |
+|--|--|--|--|
+| WORKER_CACHE_CLEANING_PERIOD | `float` | `3*3600` | Period of time (seconds) the worker will look for idle interpreters to clean cache |
+| INTERPRETER_CACHE_IDLE_LIMIT | `float` | `24*3600` | Idle limit of time (seconds) the interpreter cache will keep cache |
+| DYNAMIC_EPOCHS_THRESHOLD | `int` | `10000` | Minimum number of sentences to start decreasing training number of epochs |
+| BOTHUB_NLP_AWS_ACCESS_KEY_ID | `str` | | AWS bucket access to save trained models and evaluation results |
+| BOTHUB_NLP_AWS_SECRET_ACCESS_KEY | `str` | | AWS bucket access to save trained models and evaluation results |
+| BOTHUB_NLP_AWS_S3_BUCKET_NAME | `str` | | AWS bucket access to save trained models and evaluation results |
+| BOTHUB_NLP_AWS_REGION_NAME | `str` | | AWS bucket access to save trained models and evaluation results |
+
+### bothub-celery
+
+| Variable | Type | Default | Description |
+|--|--|--|--|
+| BOTHUB_NLP_CELERY_BROKER_URL | `string` | `redis://localhost:6379/0` | Celery Broker URL, check usage instructions in [Celery Docs](http://docs.celeryproject.org/en/latest/index.html) |
+| BOTHUB_NLP_CELERY_BACKEND_URL | `string` | `BOTHUB_NLP_CELERY_BROKER_URL` value | Celery Backend URL, check usage instructions in [Celery Docs](http://docs.celeryproject.org/en/latest/index.html) |
+| BOTHUB_NLP_CELERY_SENTRY_CLIENT | `bool` | `False` | Enable Sentry |
+| BOTHUB_NLP_CELERY_SENTRY | `str` | `None` | Set URL Sentry Server |
+| BOTHUB_NLP_LANGUAGE_QUEUE | `string` | `en` | Set language of model that will be loaded in celery and will define its queue |
+| BOTHUB_LANGUAGE_MODEL | `string` | `None` | Set type of model (BERT/SPACY/NONE) |
+| TASK_GENERAL_TIME_LIMIT | `int` | `120` | Time limit of celery tasks |
+| TASK_PARSE_TIME_LIMIT | `int` | `10` | Time limit of parse task |
 
 ## Docker Arguments
 
 You need to set --build-arg when you are building docker-compose
 
 | Argument | Type | Default | Description |
 |--|--|--|--|
-| DOWNLOAD_MODELS | ```string```|  ```en-BERT``` | Set supported languages. Separe languages using ```\|```. You can set location follow the format: ```[LANGUAGE_CODE]-[LANGUAGE_MODEL]```.
+| DOWNLOAD_MODELS | ```string```|  ```en-BERT``` | Set language and model in build time. Following the format: ```[LANGUAGE_CODE]-[LANGUAGE_MODEL]```.
diff --git a/ai_platform/aiplatform_app.py b/ai_platform/aiplatform_app.py
@@ -1,5 +1,3 @@
-import argparse
-
 from bothub.shared.train import train_update as train
 from bothub.shared.evaluate_crossval import (
     evaluate_crossval_update as evaluate_crossval,

diff --git a/ai_platform/aiplatform_requirements.txt b/ai_platform/aiplatform_requirements.txt
@@ -1,5 +1,5 @@
 git+https://github.com/bothub-it/[email protected]
-git+https://github.com/bothub-it/[email protected].37
+git+https://github.com/bothub-it/[email protected].38
 rasa==1.10.6
 transformers==2.11.0
 emoji==0.6.0

diff --git a/ai_platform/settings.py b/ai_platform/settings.py
@@ -7,10 +7,10 @@
     "--operation", help='What operation will be done, "train" or "evaluate"'
 )
 PARSER.add_argument(
-    "--repository-version", help="The version of repository.", type=int
+    "--repository-version", help="The id of repository-version.", type=int
 )
 PARSER.add_argument(
-    "--by-id", help=".", type=int
+    "--by-id", help="User id sending the job", type=int
 )
 PARSER.add_argument(
     "--repository-authorization", help="Repository authorization string."

diff --git a/bothub/nlu_worker/interpreter_manager.py b/bothub/nlu_worker/interpreter_manager.py
@@ -8,7 +8,7 @@
 from tempfile import mkdtemp
 from datetime import datetime
 
-from bothub import settings
+from bothub.shared import settings
 from bothub.shared.utils.persistor import BothubPersistor
 from bothub.shared.utils.backend import backend
 from bothub.shared.utils.rasa_components.bothub_interpreter import BothubInterpreter

diff --git a/bothub/settings.py → bothub/shared/settings.py b/bothub/settings.py → bothub/shared/settings.py
@@ -8,3 +8,7 @@
 INTERPRETER_CACHE_IDLE_LIMIT = config(
     "INTERPRETER_CACHE_IDLE_LIMIT", cast=float, default=24*3600
 )
+# Minimum number of sentences to start decreasing number of epochs
+DYNAMIC_EPOCHS_THRESHOLD = config(
+    "DYNAMIC_EPOCHS_THRESHOLD", cast=int, default=10000
+)
diff --git a/bothub/shared/train.py b/bothub/shared/train.py
@@ -43,9 +43,12 @@ def train_update(
                     )
                 )
 
+            update_request["dataset_size"] = len(examples)
+
             pipeline_builder = PipelineBuilder(update_request)
             pipeline_builder.print_pipeline()
             rasa_nlu_config = pipeline_builder.get_nlu_model()
+
             trainer = Trainer(rasa_nlu_config, ComponentBuilder(use_cache=False))
             training_data = TrainingData(
                 training_examples=examples, lookup_tables=None