From d266cf41f679e88db32435e7c626bbae7e933d80 Mon Sep 17 00:00:00 2001 From: jnovikov Date: Thu, 19 Dec 2024 03:10:08 +0000 Subject: [PATCH] DOCS: MVP --- checkers/docs/checker.py | 152 +++++++++++++++++++ checkers/docs/docs_lib.py | 141 ++++++++++++++++++ services/docs/api/.dockerignore | 1 + services/docs/api/Dockerfile | 15 ++ services/docs/api/app.env | 3 + services/docs/api/src/app/__init__.py | 0 services/docs/api/src/app/api.py | 197 +++++++++++++++++++++++++ services/docs/api/src/app/app.py | 27 ++++ services/docs/api/src/app/auth.py | 50 +++++++ services/docs/api/src/app/config.py | 16 ++ services/docs/api/src/app/db.py | 11 ++ services/docs/api/src/app/dto.py | 43 ++++++ services/docs/api/src/app/models.py | 28 ++++ services/docs/api/src/app/search.py | 29 ++++ services/docs/api/src/requirements.txt | 44 ++++++ services/docs/dev/test.py | 73 +++++++++ services/docs/docker-compose.yml | 36 +++++ services/docs/search/Dockerfile | 32 ++++ services/docs/search/src/go.mod | 45 ++++++ services/docs/search/src/go.sum | 87 +++++++++++ services/docs/search/src/main.go | 97 ++++++++++++ 21 files changed, 1127 insertions(+) create mode 100755 checkers/docs/checker.py create mode 100644 checkers/docs/docs_lib.py create mode 100644 services/docs/api/.dockerignore create mode 100644 services/docs/api/Dockerfile create mode 100644 services/docs/api/app.env create mode 100644 services/docs/api/src/app/__init__.py create mode 100644 services/docs/api/src/app/api.py create mode 100644 services/docs/api/src/app/app.py create mode 100644 services/docs/api/src/app/auth.py create mode 100644 services/docs/api/src/app/config.py create mode 100644 services/docs/api/src/app/db.py create mode 100644 services/docs/api/src/app/dto.py create mode 100644 services/docs/api/src/app/models.py create mode 100644 services/docs/api/src/app/search.py create mode 100644 services/docs/api/src/requirements.txt create mode 100644 services/docs/dev/test.py create mode 100644 services/docs/docker-compose.yml create mode 100644 services/docs/search/Dockerfile create mode 100644 services/docs/search/src/go.mod create mode 100644 services/docs/search/src/go.sum create mode 100644 services/docs/search/src/main.go diff --git a/checkers/docs/checker.py b/checkers/docs/checker.py new file mode 100755 index 0000000..321fe54 --- /dev/null +++ b/checkers/docs/checker.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 +import random +import re +import string +import sys + +import checklib +import requests +from checklib import * +from checklib import status + +import docs_lib + +LEVEL_1_DOMAINS = [ + ".AC", ".AD", ".AE", ".AERO", ".AF", ".AG", ".AI", ".AL", ".AM", ".AN", ".AO", ".AQ", ".AR", ".ARPA", ".AS", ".ASIA", + ".AT", ".AU", ".AW", ".AX", ".AZ", ".BA", ".BB", ".BD", ".BE", ".BF", ".BG", ".BH", ".BI", ".BIZ", ".BJ", ".BL", ".BM", + ".BN", ".BO", ".BR", ".BS", ".BT", ".BV", ".BW", ".BY", ".BZ", ".CA", ".CAT", ".CC", ".CD", ".CF", ".CG", ".CH", ".CI", + ".CK", ".CL", ".CM", ".CN", ".CO", ".COM", ".COOP", ".CR", ".CU", ".CV", ".CX", ".CY", ".CZ", ".DE", ".DJ", ".DK", ".DM", + ".DO", ".DZ", ".EC", ".EDU", ".EE", ".EG", ".EH", ".ER", ".ES", ".ET", ".EU", ".FI", ".FJ", ".FK", ".FM", ".FO", ".FR", + ".GA", ".GB", ".GD", ".GE", ".GF", ".GG", ".GH", ".GI", ".GL", ".GM", ".GN", ".GOV", ".GP", ".GQ", ".GR", ".GS", ".GT", + ".GU", ".GW", ".GY", ".HK", ".HM", ".HN", ".HR", ".HT", ".HU", ".ID", ".IE", ".IL", ".IM", ".IN", ".INFO", ".INT", ".IO", + ".IQ", ".IR", ".IS", ".IT", ".JE", ".JM", ".JO", ".JOBS", ".JP", ".KE", ".KG", ".KH", ".KI", ".KM", ".KN", ".KP", ".KR", + ".KW", ".KY", ".KZ", ".LA", ".LB", ".LC", ".LI", ".LK", ".LR", ".LS", ".LT", ".LU", ".LV", ".LY", ".MA", ".MC", ".MD", + ".ME", ".MF", ".MG", ".MH", ".MIL", ".MK", ".ML", ".MM", ".MN", ".MO", ".MOBI", ".MP", ".MQ", ".MR", ".MS", ".MT", ".MU", + ".MUSEUM", ".MV", ".MW", ".MX", ".MY", ".MZ", ".NA", ".NAME", ".NC", ".NE", ".NET", ".NF", ".NG", ".NI", ".NL", ".NO", + ".NP", ".NR", ".NU", ".NZ", ".OM", ".ORG", ".PA", ".PE", ".PF", ".PG", ".PH", ".PK", ".PL", ".PM", ".PN", ".PR", ".PRO", + ".PS", ".PT", ".PW", ".PY", ".QA", ".RE", ".RO", ".RS", ".RU", ".RW", ".SA", ".SB", ".SC", ".SD", ".SE", ".SG", ".SH", + ".SI", ".SJ", ".SK", ".SL", ".SM", ".SN", ".SO", ".SR", ".ST", ".SU", ".SV", ".SY", ".SZ", ".TC", ".TD", ".TEL", ".TF", + ".TG", ".TH", ".TJ", ".TK", ".TL", ".TM", ".TN", ".TO", ".TP", ".TR", ".TRAVEL", ".TT", ".TV", ".TW", ".TZ", ".UA", ".UG", + ".UK", ".UM", ".US", ".UY", ".UZ", ".VA", ".VC", ".VE", ".VG", ".VI", ".VN", ".VU", ".WF", ".WS" +] + + +class Checker(BaseChecker): + vulns: int = 1 + timeout: int = 15 + uses_attack_data: bool = True + + def __init__(self, *args, **kwargs): + super(Checker, self).__init__(*args, **kwargs) + self.lib = docs_lib.DocsLib(self) + self.token_regexp = re.compile(r'^[0-9A-Za-z]{1,80}$') + + def get_random_org(self): + l = rnd_string(10, alphabet=string.ascii_lowercase) + r = random.choice(LEVEL_1_DOMAINS) + return f"{l}{r}".lower() + + def action(self, action, *args, **kwargs): + try: + super(Checker, self).action(action, *args, **kwargs) + except requests.exceptions.ConnectionError: + self.cquit(Status.DOWN, 'Connection error', 'Got requests connection error') + + def check(self): + session = checklib.get_initialized_session() + org = self.get_random_org() + + response = self.lib.create_org(session, org) + token = response.get('token') + org_id = response.get('id') + + self.assert_eq(bool(self.token_regexp.fullmatch(token)), True, 'Invalid token format') + + u, p = rnd_username(), rnd_password() + u1, p1 = rnd_username(), rnd_password() + self.lib.create_user(session, u, p, token) + u = f'{u}@{org}' + + session = self.lib.login(session, u, p) + + title = rnd_string(10) + content = rnd_string(10) + + got_doc = self.lib.create_doc(session, title, content) + got_doc = self.lib.get_doc(session, got_doc.get('id')) + + self.lib.create_user(session, u1, p1, token) + u1 = f'{u1}@{org}' + + session_alter = checklib.get_initialized_session() + self.lib.login(session_alter, u1, p1) + + got_alter_doc = self.lib.get_doc(session_alter, got_doc.get('id')) + self.assert_eq(got_alter_doc.get('title'), title, 'Failed to get document') + self.assert_eq(got_alter_doc.get('content'), content, 'Failed to get document') + + new_title = rnd_string(10) + self.lib.update_doc(session, got_doc.get('id'), title=new_title) + + got_updated_doc = self.lib.get_doc(session, got_doc.get('id')) + self.assert_eq(got_updated_doc.get('title'), new_title, 'Failed to update document') + self.assert_eq(got_updated_doc.get('content'), content, 'Failed to update document') + + search_results = self.lib.search(session_alter, new_title) + self.assert_in(got_updated_doc.get('id'), [x.get('id') for x in search_results], 'Failed to search document') + self.assert_in(got_updated_doc.get('title'), [x.get('title') for x in search_results], + 'Failed to search document') + self.assert_in(got_updated_doc.get('content'), [x.get('content') for x in search_results], + 'Failed to search document') + + self.cquit(Status.OK) + + def put(self, flag_id: str, flag: str, vuln: str): + session = checklib.get_initialized_session() + org_id = self.get_random_org() + + response = self.lib.create_org(session, org_id) + token = response.get('token') + + self.assert_eq(bool(self.token_regexp.fullmatch(token)), True, 'Invalid token format') + + u, p = rnd_username(), rnd_password() + self.lib.create_user(session, u, p, token) + + sess = checklib.get_initialized_session() + u = f'{u}@{org_id}' + self.lib.login(sess, u, p) + title = checklib.rnd_string(10) + created_doc = self.lib.create_doc(sess, title, flag) + + doc_id = created_doc.get('id') + self.assert_eq(bool(self.token_regexp.fullmatch(doc_id)), True, 'Invalid docid format') + + self.cquit(Status.OK, doc_id, f"{token}:{u}:{p}:{doc_id}") + + def get(self, flag_id: str, flag: str, vuln: str): + token, u, p, doc_id = flag_id.split(':') + sess = checklib.get_initialized_session() + self.lib.login(sess, u, p, status=status.Status.CORRUPT) + doc = self.lib.get_doc(sess, doc_id, status=status.Status.CORRUPT) + self.assert_eq(doc.get('content'), flag, 'Invalid content', status=status.Status.CORRUPT) + + sess = checklib.get_initialized_session() + u1, p1 = rnd_username(), rnd_password() + created_user = self.lib.create_user(sess, u1, p1, token) + + sess = checklib.get_initialized_session() + self.lib.login(sess, created_user.get('email'), created_user.get('password')) + + self.lib.search(sess, '', status=status.Status.CORRUPT) + + self.cquit(Status.OK) + + +if __name__ == '__main__': + c = Checker(sys.argv[2]) + + try: + c.action(sys.argv[1], *sys.argv[3:]) + except c.get_check_finished_exception() as e: + cquit(status.Status(c.status), c.public, c.private) diff --git a/checkers/docs/docs_lib.py b/checkers/docs/docs_lib.py new file mode 100644 index 0000000..8b114c2 --- /dev/null +++ b/checkers/docs/docs_lib.py @@ -0,0 +1,141 @@ +from typing import Optional + +import checklib +from checklib import BaseChecker +import requests + +PORT = 8000 + + +class DocsLib: + @property + def api_url(self): + return f'http://{self.host}:{self.port}/api' + + def __init__(self, checker: BaseChecker, port=PORT, host=None): + self.c = checker + self.port = port + self.host = host or self.c.host + + def create_org(self, session: requests.Session, domain: str): + document = { + "domain": domain, + } + + resp = session.post( + f"{self.api_url}/organizations", + json=document + ) + self.c.assert_eq(resp.status_code, 200, 'Failed to create organization') + response_data = self.c.get_json(resp, 'Failed to create organization: invalid JSON') + self.c.assert_eq(type(response_data), dict, 'Failed to create organization: invalid JSON') + return response_data + + + def list_orgs(self, session: requests.Session): + resp = session.get( + f"{self.api_url}/organizations" + ) + self.c.assert_eq(resp.status_code, 200, 'Failed to list organization') + return self.c.get_json(resp, 'Failed to list organization: invalid JSON') + + def create_user(self, session: requests.Session, username: str, password: str, token: str): + document = { + "username": username, + "password": password, + "token": token + } + resp = session.post( + f"{self.api_url}/users", + json=document + ) + self.c.assert_eq(resp.status_code, 200, 'Failed to create user') + return self.c.get_json(resp, 'Failed to create user: invalid JSON') + + def login(self, session: requests.Session, username: str, password: str, status: checklib.Status = checklib.Status.MUMBLE): + document = { + "email": username, + "password": password + } + + response = session.post( + f"{self.api_url}/login", + json=document + ) + self.c.assert_eq(response.status_code, 200, 'Failed to login', status=status) + resp_json = self.c.get_json(response, 'Failed to login: invalid JSON', status=status) + self.c.assert_eq(type(resp_json), dict, 'Failed to login: invalid JSON', status=status) + token = resp_json.get('token') or '' + session.headers['Authorization'] = f"Bearer {token}" + return session + + def get_user(self, session: requests.Session, status: checklib.Status = checklib.Status.MUMBLE): + response = session.get( + f"{self.api_url}/users/me", + + ) + self.c.assert_eq(response.status_code, 200, 'Failed to get user', status=status) + return self.c.get_json(response, 'Failed to get user: invalid JSON', status=status) + + + def create_doc(self, session: requests.Session, title: str, content: str, status: checklib.Status = checklib.Status.MUMBLE): + document = { + "title": title, + "content": content + } + + response = session.post( + f"{self.api_url}/documents", + json=document + ) + self.c.assert_eq(response.status_code, 200, 'Failed to create document', status=status) + return self.c.get_json(response, 'Failed to create document: invalid JSON', status=status) + + def update_doc(self, session: requests.Session, doc_id:str, title: str | None, content: str | None = None, + status: checklib.Status = checklib.Status.MUMBLE): + document = {} + if title: + document['title'] = title + if content: + document['content'] = content + + response = session.patch( + f"{self.api_url}/documents/{doc_id}", + json=document + ) + print(response.text) + self.c.assert_eq(response.status_code, 200, 'Failed to update document', status=status) + return self.c.get_json(response, 'Failed to create document: invalid JSON', status=status) + + def get_doc(self, session: requests.Session, doc_id: str, status: checklib.Status = checklib.Status.MUMBLE): + response = session.get( + f"{self.api_url}/documents/{doc_id}" + ) + self.c.assert_eq(response.status_code, 200, 'Failed to get document', status=status) + return self.c.get_json(response, 'Failed to get document: invalid JSON', status=status) + + def delete_doc(self, session: requests.Session, doc_id: str, status: checklib.Status = checklib.Status.MUMBLE): + response = session.delete( + f"{self.api_url}/documents/{doc_id}" + ) + self.c.assert_eq(response.status_code, 200, 'Failed to delete document', status=status) + + def search(self, session: requests.Session, query: str, status: checklib.Status = checklib.Status.MUMBLE): + response = session.get(f"{self.api_url}/documents", + params={'query': query} + ) + print(response.text) + self.c.assert_eq(response.status_code, 200, 'Failed to search', status=status) + return self.c.get_json(response, 'Failed to search: invalid JSON', status=status) + + + def document_get_txt(self, session: requests.Session, doc_id: str, status: checklib.Status = checklib.Status.MUMBLE): + response = session.get( + f"{self.api_url}/document/{doc_id}/text" + ) + self.c.assert_eq(response.status_code, 200, 'Failed to get txt', status=status) + return response.text + + + + diff --git a/services/docs/api/.dockerignore b/services/docs/api/.dockerignore new file mode 100644 index 0000000..f5e96db --- /dev/null +++ b/services/docs/api/.dockerignore @@ -0,0 +1 @@ +venv \ No newline at end of file diff --git a/services/docs/api/Dockerfile b/services/docs/api/Dockerfile new file mode 100644 index 0000000..54c0f15 --- /dev/null +++ b/services/docs/api/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.12-slim-bullseye + +RUN apt update && apt install -y xxd +WORKDIR /app +ADD src/requirements.txt requirements.txt + +RUN pip3 install -r requirements.txt + +COPY app.env app.env + +RUN sed -i "s/JWT_KEY=.*/JWT_KEY=$(xxd -u -l 20 -p /dev/urandom)/g" app.env + +COPY src . + +CMD fastapi run app/app.py \ No newline at end of file diff --git a/services/docs/api/app.env b/services/docs/api/app.env new file mode 100644 index 0000000..c341a80 --- /dev/null +++ b/services/docs/api/app.env @@ -0,0 +1,3 @@ +JWT_KEY=secret +MONGO_URI=mongodb://mongodb:27017/docs +SEARCH_HOST=http://search:8080 \ No newline at end of file diff --git a/services/docs/api/src/app/__init__.py b/services/docs/api/src/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/services/docs/api/src/app/api.py b/services/docs/api/src/app/api.py new file mode 100644 index 0000000..ddc99af --- /dev/null +++ b/services/docs/api/src/app/api.py @@ -0,0 +1,197 @@ +import logging +import secrets + +import fastapi +from fastapi import APIRouter, Query + +from app import config, models, dto, auth, search +from beanie.operators import In +from beanie import BeanieObjectId + +api = APIRouter() +logger = logging.getLogger(__name__) + +def json_error(error): + return {'error': error} + +async def get_search_client(): + return search.SearchClient(config.settings.search_host) + +async def get_current_user(data = fastapi.Depends(auth.jwt_bearer)) -> models.User | None: + uid = data.get('uid') + user = await models.User.get(uid) + if not user: + raise fastapi.HTTPException( + status_code=fastapi.status.HTTP_401_UNAUTHORIZED, + detail='invalid token' + ) + return user + + +@api.get('/') +async def signup_handler(response: fastapi.Response): + return {"hello": "world"} + + + +@api.post('/organizations', response_model=dto.Organization) +async def create_organization(org: dto.OrganizationCreate): + # Check if organization with same name exists + existing_org = await models.Organization.find_one({"domain": org.domain}) + if existing_org: + raise fastapi.HTTPException( + status_code=fastapi.status.HTTP_412_PRECONDITION_FAILED, + detail=json_error('organization already exists') + ) + + token = secrets.token_hex(32) + + # Create new organization + new_org = models.Organization(domain=org.domain, token=token) + await new_org.insert() + return dto.Organization(domain=new_org.domain, token=new_org.token, id=str(new_org.id)) + +@api.get('/organizations') +async def list_organizations(): + organizations = await models.Organization.find().to_list() + return [dto.OrganizationList(id=str(org.id), domain=org.domain) for org in organizations] + +@api.post('/users', response_model=dto.UserCreated) +async def create_user(user: dto.UserCreate): + # Check if organization exists + org = await models.Organization.find_one(models.Organization.token == user.token) + if not org: + raise fastapi.HTTPException( + status_code=fastapi.status.HTTP_412_PRECONDITION_FAILED, + detail=json_error('organization not found') + ) + + new_username = f"{user.username}@{org.domain}" + existing_user = await models.User.find_one({"username": new_username}) + if existing_user: + raise fastapi.HTTPException( + status_code=fastapi.status.HTTP_412_PRECONDITION_FAILED, + detail=json_error('user already exists') + ) + + # Create new user + new_user = models.User(username=new_username, password=user.password, organization_id=str(org.id)) + await new_user.insert() + return dto.UserCreated(email=new_user.username, password=new_user.password, organization_id=new_user.organization_id, id=str(new_user.id)) + +@api.post('/login') +async def login(req: dto.LoginRequest): + user = await models.User.find_one({"username": req.email}) + if not user: + raise fastapi.HTTPException( + status_code=fastapi.status.HTTP_401_UNAUTHORIZED, + detail=json_error('invalid username or password') + ) + if user.password != req.password: + raise fastapi.HTTPException( + status_code=fastapi.status.HTTP_401_UNAUTHORIZED, + detail=json_error('invalid username or password') + ) + + token = auth.jwt_helper.gen_token({'uid': str(user.id)}) + return {'token': token} + +@api.get('/users/me', response_model=dto.UserCreated) +async def get_me(user = fastapi.Depends(get_current_user)): + return dto.UserCreated(email=user.username, password="", organization_id=user.organization_id, id=str(user.id)) + + +@api.post('/documents') +async def create_document(doc: dto.CreateDocument, user = fastapi.Depends(get_current_user), search_client = fastapi.Depends(get_search_client)): + new_doc = models.Doc(title=doc.title, author_id=str(user.id), content=doc.content, organization_id=user.organization_id) + await new_doc.insert() + doc_id = str(new_doc.id) + await search_client.index(doc_id, new_doc.title, new_doc.content, new_doc.organization_id) + return {'id': str(new_doc.id)} + +async def must_get_doc(doc_id): + doc = await models.Doc.get(doc_id) + if not doc: + raise fastapi.HTTPException( + status_code=fastapi.status.HTTP_404_NOT_FOUND, + detail=json_error('document not found')) + return doc + +@api.get('/documents/{doc_id}', response_model=dto.Document) +async def get_document(doc_id: str, user = fastapi.Depends(get_current_user)): + doc = await must_get_doc(doc_id) + if doc.organization_id != user.organization_id: + raise fastapi.HTTPException( + status_code=fastapi.status.HTTP_403_FORBIDDEN, + detail=json_error('forbidden') + ) + return dto.Document(id=str(doc.id), title=doc.title, content=doc.content, organization_id=doc.organization_id) + +@api.patch('/documents/{doc_id}') +async def update_document(doc_id: str, update: dto.UpdateDocument, user = fastapi.Depends(get_current_user), search_client = fastapi.Depends(get_search_client)): + doc = await must_get_doc(doc_id) + if doc.organization_id != user.organization_id: + raise fastapi.HTTPException( + status_code=fastapi.status.HTTP_403_FORBIDDEN, + detail=json_error('forbidden') + ) + if update.title: + doc.title = update.title + if update.content: + doc.content = update.content + await doc.save() + await search_client.index(doc_id, update.title, update.content, doc.organization_id) + return {'id': str(doc.id)} + + +@api.delete('/documents/{doc_id}') +async def delete_document(doc_id: str, user = fastapi.Depends(get_current_user), search_client = fastapi.Depends(get_search_client)): + doc = await must_get_doc(doc_id) + if doc.organization_id != user.organization_id: + raise fastapi.HTTPException( + status_code=fastapi.status.HTTP_403_FORBIDDEN, + detail=json_error('forbidden') + ) + await doc.delete() + return {} + +@api.get('/documents/{doc_id}/text') +async def get_document_text(doc_id: str, user = fastapi.Depends(get_current_user)): + doc = await must_get_doc(doc_id) + if doc.organization_id != user.organization_id: + raise fastapi.HTTPException( + status_code=fastapi.status.HTTP_403_FORBIDDEN, + detail=json_error('forbidden') + ) + return fastapi.responses.FileResponse(path=doc.content, filename=f"{doc.title}.txt", media_type='text/plain') + +# @api.get('/document/{doc_id}/pdf') +# async def get_document_pdf(doc_id: str, user = fastapi.Depends(get_current_user)): +# doc = await must_get_doc(doc_id) +# if doc.organization_id != user.organization_id: +# raise fastapi.HTTPException( +# status_code=fastapi.status.HTTP_403_FORBIDDEN, +# detail=json_error('forbidden') +# ) + + + + +@api.get('/documents') +async def search_docs( + query: str | None = Query(default=''), + user = fastapi.Depends(get_current_user), + search_client = fastapi.Depends(get_search_client)): + org_id = user.organization_id + not_allowed_chars = ['?', '*', '@' '#', '%', ';'] + for char in not_allowed_chars: + if char in query: + raise fastapi.HTTPException( + status_code=fastapi.status.HTTP_400_BAD_REQUEST, + detail=json_error('invalid query') + ) + doc_ids = await search_client.search(query, str(org_id)) + doc_ids = [BeanieObjectId(doc_id) for doc_id in doc_ids] + docs = await models.Doc.find(In(models.Doc.id, doc_ids)).to_list() + return [dto.Document(id=str(doc.id), title=doc.title, content=doc.content, organization_id=str(org_id)) for doc in docs] + diff --git a/services/docs/api/src/app/app.py b/services/docs/api/src/app/app.py new file mode 100644 index 0000000..831c33e --- /dev/null +++ b/services/docs/api/src/app/app.py @@ -0,0 +1,27 @@ +from fastapi import FastAPI +from starlette.requests import Request +from starlette.responses import JSONResponse + +# from app.db import init_db +from app.api import api +from app.db import init_db +from .config import settings + +app = FastAPI() + + + +@app.on_event("startup") +async def start_db(): + await init_db(settings.mongo_uri) + + +@app.exception_handler(ValueError) +async def value_error_exception_handler(request: Request, exc: ValueError): + return JSONResponse( + status_code=400, + content={"error": str(exc)}, + ) + + +app.include_router(api, prefix="/api") \ No newline at end of file diff --git a/services/docs/api/src/app/auth.py b/services/docs/api/src/app/auth.py new file mode 100644 index 0000000..30ddabc --- /dev/null +++ b/services/docs/api/src/app/auth.py @@ -0,0 +1,50 @@ +import logging + +import jwt +from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +from fastapi import Request, HTTPException + +from app.config import settings + +class JWTHelper(object): + ALGORITHM = "HS256" + + def __init__(self, security_key): + self.security_key = security_key + + def gen_token(self, data): + try: + return jwt.encode(data, self.security_key, algorithm=self.ALGORITHM) + except Exception as e: + logging.error("failed to generate jwt token: {}".format(e)) + return "" + + def decode_token(self, cookie): + try: + return jwt.decode(cookie, self.security_key, algorithms=[self.ALGORITHM, ]) + except Exception as e: + logging.error("failed to decode jwt token: {}".format(e)) + return None + + + +class JWTBearer(HTTPBearer): + def __init__(self, auto_error: bool = True): + super(JWTBearer, self).__init__(auto_error=auto_error) + self.jwt_helper = JWTHelper(settings.jwt_key) + + async def __call__(self, request: Request): + credentials: HTTPAuthorizationCredentials = await super(JWTBearer, self).__call__(request) + if credentials: + if not credentials.scheme == "Bearer": + raise HTTPException(status_code=403, detail="Invalid authentication scheme.") + payload = self.jwt_helper.decode_token(credentials.credentials) + if not payload: + raise HTTPException(status_code=403, detail="Invalid token or expired token.") + return payload + else: + raise HTTPException(status_code=403, detail="Invalid authorization code.") + +jwt_helper = JWTHelper(settings.jwt_key) +jwt_bearer = JWTBearer() + diff --git a/services/docs/api/src/app/config.py b/services/docs/api/src/app/config.py new file mode 100644 index 0000000..f70cf6c --- /dev/null +++ b/services/docs/api/src/app/config.py @@ -0,0 +1,16 @@ +from functools import lru_cache + +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + jwt_key: str = 'secret' + mongo_uri: str = 'mongodb://localhost:27017' + search_host: str = 'http://localhost:8080' + + model_config = SettingsConfigDict(env_file="app.env") + + + + +settings = Settings() \ No newline at end of file diff --git a/services/docs/api/src/app/db.py b/services/docs/api/src/app/db.py new file mode 100644 index 0000000..87cba5e --- /dev/null +++ b/services/docs/api/src/app/db.py @@ -0,0 +1,11 @@ +from beanie import init_beanie +import motor.motor_asyncio + +from app import models + + +async def init_db(mongo_uri): + client = motor.motor_asyncio.AsyncIOMotorClient( + mongo_uri + ) + await init_beanie(database=client.db_name, document_models=[models.User, models.Organization, models.Doc]) diff --git a/services/docs/api/src/app/dto.py b/services/docs/api/src/app/dto.py new file mode 100644 index 0000000..b376aac --- /dev/null +++ b/services/docs/api/src/app/dto.py @@ -0,0 +1,43 @@ +from pydantic import BaseModel, Field + +class OrganizationCreate(BaseModel): + domain: str = Field(pattern=r'^[a-zA-Z0-9-]+\.[a-z]+$') + +class Organization(BaseModel): + id: str + domain: str + token: str + +class OrganizationList(BaseModel): + id: str + domain: str + +class UserCreate(BaseModel): + token: str + username: str = Field(min_length=3, max_length=20, pattern=r'^[a-zA-Z0-9_]+$') + password: str = Field(min_length=8) + + +class LoginRequest(BaseModel): + email: str + password: str + +class UserCreated(BaseModel): + id: str + email: str + password: str + organization_id: str + +class CreateDocument(BaseModel): + title: str + content: str + +class UpdateDocument(BaseModel): + title: str | None = None + content: str | None = None + +class Document(BaseModel): + id: str + title: str + content: str + organization_id: str diff --git a/services/docs/api/src/app/models.py b/services/docs/api/src/app/models.py new file mode 100644 index 0000000..f03c272 --- /dev/null +++ b/services/docs/api/src/app/models.py @@ -0,0 +1,28 @@ +from beanie import Document + + +class User(Document): + username: str + password: str + organization_id: str + + class Settings: + name = "users" + + +class Organization(Document): + domain: str + token: str + + class Settings: + name = "organizations" + + +class Doc(Document): + title: str + author_id: str + content: str + organization_id: str + + class Settings: + name = "documents" diff --git a/services/docs/api/src/app/search.py b/services/docs/api/src/app/search.py new file mode 100644 index 0000000..824defd --- /dev/null +++ b/services/docs/api/src/app/search.py @@ -0,0 +1,29 @@ +import httpx + +from app.config import settings + + +class SearchClient: + def __init__(self, host): + self.host = host + self.client = httpx.AsyncClient() + + async def search(self, query, org_id) -> list[str]: + response = await self.client.get( + f"{self.host}/search?q={query}&org_id={org_id}" + ) + if response.status_code != 200: + return [] + response = response.json() + return [x.get('id') for x in response.get('hits')] + + async def index(self, doc_id: str, title: str, content: str, org_id: str): + body = { + 'id': doc_id, + 'title': title, + 'content': content, + 'org_id': org_id + } + response = await self.client.post(f'{self.host}/index', json=body) + response.raise_for_status() + diff --git a/services/docs/api/src/requirements.txt b/services/docs/api/src/requirements.txt new file mode 100644 index 0000000..642bec0 --- /dev/null +++ b/services/docs/api/src/requirements.txt @@ -0,0 +1,44 @@ +annotated-types==0.7.0 +anyio==4.7.0 +beanie==1.28.0 +certifi==2024.12.14 +cffi==1.17.1 +click==8.1.7 +cryptography==44.0.0 +dnspython==2.7.0 +email_validator==2.2.0 +fastapi==0.115.6 +fastapi-cli==0.0.7 +h11==0.14.0 +httpcore==1.0.7 +httptools==0.6.4 +httpx==0.28.1 +idna==3.10 +Jinja2==3.1.4 +lazy-model==0.2.0 +markdown-it-py==3.0.0 +MarkupSafe==3.0.2 +mdurl==0.1.2 +motor==3.6.0 +pycparser==2.22 +pydantic==2.10.3 +pydantic-settings==2.7.0 +pydantic_core==2.27.1 +Pygments==2.18.0 +PyJWT==2.10.1 +pymongo==4.9.2 +python-dotenv==1.0.1 +python-multipart==0.0.20 +PyYAML==6.0.2 +rich==13.9.4 +rich-toolkit==0.12.0 +shellingham==1.5.4 +sniffio==1.3.1 +starlette==0.41.3 +toml==0.10.2 +typer==0.15.1 +typing_extensions==4.12.2 +uvicorn==0.34.0 +uvloop==0.21.0 +watchfiles==1.0.3 +websockets==14.1 diff --git a/services/docs/dev/test.py b/services/docs/dev/test.py new file mode 100644 index 0000000..000aab7 --- /dev/null +++ b/services/docs/dev/test.py @@ -0,0 +1,73 @@ +import requests +import json + +def index_document(doc_id: str, title: str, content: str, org_id: str, host: str = "http://localhost:8080") -> bool: + """ + Index a document in the search server. + + Args: + doc_id: Unique identifier for the document + title: Title of the document + content: Content/body of the document + org_id: Organization ID the document belongs to + host: Host URL of the search server + + Returns: + bool: True if indexing was successful, False otherwise + + Raises: + requests.exceptions.RequestException: If there is an error making the request + """ + + document = { + "id": doc_id, + "title": title, + "content": content, + "org_id": org_id + } + + try: + response = requests.post( + f"{host}/index", + json=document + ) + response.raise_for_status() + return True + + except requests.exceptions.RequestException as e: + print(f"Error indexing document: {e}") + return False + + +def search(query: str, org_id: str, host: str = "http://localhost:8080"): + + try: + response = requests.get( + f"{host}/search?q={query}&org_id={org_id}" + ) + response.raise_for_status() + return response.json() + + except requests.exceptions.RequestException as e: + print(f"Error indexing document: {e}") + return False + + +def main(): + # print(index_document("1", "test kek", "test", "1")) + # print(index_document("2", "test", "test", "1")) + # print(index_document("3", "kektest", "test", "1")) + # print(search("test", "1").get('hits')) + # print(search("kek", "1").get('hits')) + # print(search("lol", "1").get('hits')) + # print(search("kek test", "1").get('hits')) + # print(search("test", "2").get('hits')) + # print(search("kek", "2").get('hits')) + host = "http://localhost:8080" + response = requests.get( + f"{host}/search?q=&org_id=1&org_id=1" + ) + print(response.json()) + +if __name__ == "__main__": + main() diff --git a/services/docs/docker-compose.yml b/services/docs/docker-compose.yml new file mode 100644 index 0000000..cbb3f96 --- /dev/null +++ b/services/docs/docker-compose.yml @@ -0,0 +1,36 @@ +services: + mongodb: + image: mongo:8.0.0 + volumes: + - dbdata:/data/db + restart: unless-stopped + pids_limit: 512 + mem_limit: 512m + cpus: 1 + + search: + build: search + volumes: + - searchdata:/data + environment: + - "INDEX_FILE=/data/bleve.index" + restart: unless-stopped + pids_limit: 512 + mem_limit: 512m + cpus: 1 + + api: + build: api + ports: + - "8000:8000" + restart: unless-stopped + depends_on: + - mongodb + - search + pids_limit: 512 + mem_limit: 1g + cpus: 2 + +volumes: + dbdata: + searchdata: \ No newline at end of file diff --git a/services/docs/search/Dockerfile b/services/docs/search/Dockerfile new file mode 100644 index 0000000..6f43ed5 --- /dev/null +++ b/services/docs/search/Dockerfile @@ -0,0 +1,32 @@ +# Stage 1: Build the Go binary +FROM golang:1.23-alpine AS builder + +# Set the working directory inside the container +WORKDIR /app + +# Copy the go.mod and go.sum files +COPY src/go.mod src/go.sum ./ + +# Download the Go module dependencies +RUN go mod download + +# Copy the rest of the application source code +COPY src . + +# Build the Go binary +RUN go build -o /app/search + +# Stage 2: Create a minimal image with the Go binary +FROM alpine:latest + +# Set the working directory inside the container +WORKDIR /root/ + +# Copy the Go binary from the builder stage +COPY --from=builder /app/search . + +# Expose the port the service will run on +EXPOSE 8080 + +# Command to run the Go binary +CMD ["./search"] \ No newline at end of file diff --git a/services/docs/search/src/go.mod b/services/docs/search/src/go.mod new file mode 100644 index 0000000..150d339 --- /dev/null +++ b/services/docs/search/src/go.mod @@ -0,0 +1,45 @@ +module search + +go 1.20 + +require ( + github.com/blevesearch/bleve/v2 v2.3.10 + github.com/gofiber/fiber/v2 v2.52.5 +) + +require ( + github.com/RoaringBitmap/roaring v1.2.3 // indirect + github.com/andybalholm/brotli v1.0.5 // indirect + github.com/bits-and-blooms/bitset v1.2.0 // indirect + github.com/blevesearch/bleve_index_api v1.0.6 // indirect + github.com/blevesearch/geo v0.1.18 // indirect + github.com/blevesearch/go-porterstemmer v1.0.3 // indirect + github.com/blevesearch/gtreap v0.1.1 // indirect + github.com/blevesearch/mmap-go v1.0.4 // indirect + github.com/blevesearch/scorch_segment_api/v2 v2.1.6 // indirect + github.com/blevesearch/segment v0.9.1 // indirect + github.com/blevesearch/snowballstem v0.9.0 // indirect + github.com/blevesearch/upsidedown_store_api v1.0.2 // indirect + github.com/blevesearch/vellum v1.0.10 // indirect + github.com/blevesearch/zapx/v11 v11.3.10 // indirect + github.com/blevesearch/zapx/v12 v12.3.10 // indirect + github.com/blevesearch/zapx/v13 v13.3.10 // indirect + github.com/blevesearch/zapx/v14 v14.3.10 // indirect + github.com/blevesearch/zapx/v15 v15.3.13 // indirect + github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 // indirect + github.com/golang/protobuf v1.3.2 // indirect + github.com/golang/snappy v0.0.1 // indirect + github.com/google/uuid v1.5.0 // indirect + github.com/json-iterator/go v0.0.0-20171115153421-f7279a603ede // indirect + github.com/klauspost/compress v1.17.0 // indirect + github.com/mattn/go-colorable v0.1.13 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mattn/go-runewidth v0.0.15 // indirect + github.com/mschoch/smat v0.2.0 // indirect + github.com/rivo/uniseg v0.2.0 // indirect + github.com/valyala/bytebufferpool v1.0.0 // indirect + github.com/valyala/fasthttp v1.51.0 // indirect + github.com/valyala/tcplisten v1.0.0 // indirect + go.etcd.io/bbolt v1.3.7 // indirect + golang.org/x/sys v0.15.0 // indirect +) diff --git a/services/docs/search/src/go.sum b/services/docs/search/src/go.sum new file mode 100644 index 0000000..8fc3564 --- /dev/null +++ b/services/docs/search/src/go.sum @@ -0,0 +1,87 @@ +github.com/RoaringBitmap/roaring v1.2.3 h1:yqreLINqIrX22ErkKI0vY47/ivtJr6n+kMhVOVmhWBY= +github.com/RoaringBitmap/roaring v1.2.3/go.mod h1:plvDsJQpxOC5bw8LRteu/MLWHsHez/3y6cubLI4/1yE= +github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= +github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= +github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA= +github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= +github.com/blevesearch/bleve/v2 v2.3.10 h1:z8V0wwGoL4rp7nG/O3qVVLYxUqCbEwskMt4iRJsPLgg= +github.com/blevesearch/bleve/v2 v2.3.10/go.mod h1:RJzeoeHC+vNHsoLR54+crS1HmOWpnH87fL70HAUCzIA= +github.com/blevesearch/bleve_index_api v1.0.6 h1:gyUUxdsrvmW3jVhhYdCVL6h9dCjNT/geNU7PxGn37p8= +github.com/blevesearch/bleve_index_api v1.0.6/go.mod h1:YXMDwaXFFXwncRS8UobWs7nvo0DmusriM1nztTlj1ms= +github.com/blevesearch/geo v0.1.18 h1:Np8jycHTZ5scFe7VEPLrDoHnnb9C4j636ue/CGrhtDw= +github.com/blevesearch/geo v0.1.18/go.mod h1:uRMGWG0HJYfWfFJpK3zTdnnr1K+ksZTuWKhXeSokfnM= +github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo= +github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M= +github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZGW8Y= +github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk= +github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc= +github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs= +github.com/blevesearch/scorch_segment_api/v2 v2.1.6 h1:CdekX/Ob6YCYmeHzD72cKpwzBjvkOGegHOqhAkXp6yA= +github.com/blevesearch/scorch_segment_api/v2 v2.1.6/go.mod h1:nQQYlp51XvoSVxcciBjtvuHPIVjlWrN1hX4qwK2cqdc= +github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU= +github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw= +github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s= +github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs= +github.com/blevesearch/upsidedown_store_api v1.0.2 h1:U53Q6YoWEARVLd1OYNc9kvhBMGZzVrdmaozG2MfoB+A= +github.com/blevesearch/upsidedown_store_api v1.0.2/go.mod h1:M01mh3Gpfy56Ps/UXHjEO/knbqyQ1Oamg8If49gRwrQ= +github.com/blevesearch/vellum v1.0.10 h1:HGPJDT2bTva12hrHepVT3rOyIKFFF4t7Gf6yMxyMIPI= +github.com/blevesearch/vellum v1.0.10/go.mod h1:ul1oT0FhSMDIExNjIxHqJoGpVrBpKCdgDQNxfqgJt7k= +github.com/blevesearch/zapx/v11 v11.3.10 h1:hvjgj9tZ9DeIqBCxKhi70TtSZYMdcFn7gDb71Xo/fvk= +github.com/blevesearch/zapx/v11 v11.3.10/go.mod h1:0+gW+FaE48fNxoVtMY5ugtNHHof/PxCqh7CnhYdnMzQ= +github.com/blevesearch/zapx/v12 v12.3.10 h1:yHfj3vXLSYmmsBleJFROXuO08mS3L1qDCdDK81jDl8s= +github.com/blevesearch/zapx/v12 v12.3.10/go.mod h1:0yeZg6JhaGxITlsS5co73aqPtM04+ycnI6D1v0mhbCs= +github.com/blevesearch/zapx/v13 v13.3.10 h1:0KY9tuxg06rXxOZHg3DwPJBjniSlqEgVpxIqMGahDE8= +github.com/blevesearch/zapx/v13 v13.3.10/go.mod h1:w2wjSDQ/WBVeEIvP0fvMJZAzDwqwIEzVPnCPrz93yAk= +github.com/blevesearch/zapx/v14 v14.3.10 h1:SG6xlsL+W6YjhX5N3aEiL/2tcWh3DO75Bnz77pSwwKU= +github.com/blevesearch/zapx/v14 v14.3.10/go.mod h1:qqyuR0u230jN1yMmE4FIAuCxmahRQEOehF78m6oTgns= +github.com/blevesearch/zapx/v15 v15.3.13 h1:6EkfaZiPlAxqXz0neniq35my6S48QI94W/wyhnpDHHQ= +github.com/blevesearch/zapx/v15 v15.3.13/go.mod h1:Turk/TNRKj9es7ZpKK95PS7f6D44Y7fAFy8F4LXQtGg= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/gofiber/fiber/v2 v2.52.5 h1:tWoP1MJQjGEe4GB5TUGOi7P2E0ZMMRx5ZTG4rT+yGMo= +github.com/gofiber/fiber/v2 v2.52.5/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ= +github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 h1:gtexQ/VGyN+VVFRXSFiguSNcXmS6rkKT+X7FdIrTtfo= +github.com/golang/geo v0.0.0-20210211234256-740aa86cb551/go.mod h1:QZ0nwyI2jOfgRAoBvP+ab5aRr7c9x7lhGEJrKvBwjWI= +github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= +github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= +github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/json-iterator/go v0.0.0-20171115153421-f7279a603ede h1:YrgBGwxMRK0Vq0WSCWFaZUnTsrA/PZE/xs1QZh+/edg= +github.com/json-iterator/go v0.0.0-20171115153421-f7279a603ede/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/klauspost/compress v1.17.0 h1:Rnbp4K9EjcDuVuHtd0dgA4qNuv9yKDYKK1ulpJwgrqM= +github.com/klauspost/compress v1.17.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= +github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM= +github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasthttp v1.51.0 h1:8b30A5JlZ6C7AS81RsWjYMQmrZG6feChmgAolCl1SqA= +github.com/valyala/fasthttp v1.51.0/go.mod h1:oI2XroL+lI7vdXyYoQk03bXBThfFl2cVdIA3Xl7cH8g= +github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8= +github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc= +go.etcd.io/bbolt v1.3.7 h1:j+zJOnnEjF/kyHlDDgGnVL/AIqIJPq8UoB2GSNfkUfQ= +go.etcd.io/bbolt v1.3.7/go.mod h1:N9Mkw9X8x5fupy0IKsmuqVtoGDyxsaDlbk4Rd05IAQw= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= +golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/services/docs/search/src/main.go b/services/docs/search/src/main.go new file mode 100644 index 0000000..64a272b --- /dev/null +++ b/services/docs/search/src/main.go @@ -0,0 +1,97 @@ +package main + +import ( + "errors" + "log" + "os" + + "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/search/query" + "github.com/gofiber/fiber/v2" +) + +type Document struct { + ID string `json:"id"` + Title string `json:"title"` + Content string `json:"content"` + OrgID string `json:"org_id"` +} + +type searchApp struct { + index bleve.Index +} + +func (app *searchApp) handleIndex(c *fiber.Ctx) error { + var doc Document + if err := c.BodyParser(&doc); err != nil { + return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{ + "error": err.Error(), + }) + } + + if err := app.index.Index(doc.ID, doc); err != nil { + return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{ + "error": err.Error(), + }) + } + + return c.SendStatus(fiber.StatusCreated) +} + +func (app *searchApp) handleSearch(c *fiber.Ctx) error { + qs := c.Queries() + + orgID := qs["org_id"] + q := qs["q"] + + // if orgID == "" { + // return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{ + // "error": "org_id is required", + // }) + // } + queries := []query.Query{} + if orgID != "" { + orgIdQ := bleve.NewMatchQuery(orgID) + orgIdQ.SetField("org_id") + queries = append(queries, orgIdQ) + } + if q != "" { + queries = append(queries, bleve.NewMatchQuery(q)) + } + + + searchRequest := bleve.NewSearchRequest(bleve.NewConjunctionQuery(queries...)) + searchResult, err := app.index.Search(searchRequest) + if err != nil { + return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{ + "error": err.Error(), + }) + } + + return c.JSON(searchResult) +} + +func main() { + // Create a new index + mapping := bleve.NewIndexMapping() + app := &searchApp{} + var err error + + app.index, err = bleve.Open(os.Getenv("INDEX_FILE")) + if err != nil && errors.Is(err, bleve.ErrorIndexPathDoesNotExist) { + log.Printf("Index not found, creating new index %s", os.Getenv("INDEX_FILE")) + app.index, err = bleve.New(os.Getenv("INDEX_FILE"), mapping) + } + if err != nil { + log.Fatal(err) + } + defer app.index.Close() + + fiber := fiber.New() + + fiber.Post("/index", app.handleIndex) + fiber.Get("/search", app.handleSearch) + + log.Printf("Starting server on :8080") + log.Fatal(fiber.Listen(":8080")) +}