diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index 71a84a4..bc2eb1d 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -2,11 +2,11 @@ name: CI
 
 on:
   push:
-    branches: [ master ]
+    branches: [ master, main ]
     tags:
       - 'v*.*.*'
   pull_request:
-    branches: [ master ]
+    branches: [ master, main ]
 
 jobs:
   test:
@@ -54,10 +54,3 @@ jobs:
           pip: true
           # only upload if a tag is pushed (otherwise just build & check)
           upload: ${{ github.event_name == 'push' && steps.check-tag.outputs.match == 'true' }}
-      - name: Create GitHub release
-        if: ${{ github.event_name == 'push' && steps.check-tag.outputs.match == 'true' }}
-        uses: Roang-zero1/github-create-release-action@master
-        with:
-          version_regex: ^v[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
deleted file mode 100644
index 143a76f..0000000
--- a/CHANGELOG.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Changelog
-
-## Unreleased
-
-- initial release
diff --git a/LICENSE.txt b/LICENSE
similarity index 100%
rename from LICENSE.txt
rename to LICENSE
diff --git a/README.md b/README.md
index dc1ee7a..d130c66 100644
--- a/README.md
+++ b/README.md
@@ -1,22 +1,17 @@
 # ChatBot
 
+[![Latest Release](https://img.shields.io/pypi/v/deltachat-chatbot.svg)](https://pypi.org/project/deltachat-chatbot)
 [![CI](https://github.com/deltachat-bot/chatbot/actions/workflows/python-ci.yml/badge.svg)](https://github.com/deltachat-bot/chatbot/actions/workflows/python-ci.yml)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 
-Conversational chat-bot for Delta Chat, using OpenAI API.
+Conversational chat-bot for Delta Chat, using GPT4All.
 
 ## Install
 
 ```sh
-pip install git+https://github.com/deltachat-bot/chatbot.git
+pip install deltachat-chatbot
 ```
 
-### Installing deltachat-rpc-server
-
-This program depends on a standalone Delta Chat RPC server `deltachat-rpc-server` program that must be
-available in your `PATH`. To install it check:
-https://github.com/deltachat/deltachat-core-rust/tree/master/deltachat-rpc-server
-
 ## Usage
 
 Configure the bot:
@@ -25,26 +20,6 @@ Configure the bot:
 chatbot init bot@example.com PASSWORD
 ```
 
-Create a configuration file named `config.json` in the program folder, for example:
-
-```
-{
-  "api_key": "sk-...",
-  "global_monthly_quota": 10000000,
-  "user_hourly_tokens_quota": 4000,
-  "user_hourly_queries_quota": 60,
-  "openai": {
-    "model": "gpt-3.5-turbo",
-    "max_tokens": 500,
-    "messages": [{"role": "system", "content": "You are a helpful assistant inside Delta Chat messenger."}],
-    "temperature": 0.7
-  }
-}
-```
-
-**Note:** On GNU/Linux the program folder is at `~/.config/chatbot/`, when configuring the bot it will
-print the path to the accounts folder that is inside the configuration folder.
-
 Start the bot:
 
 ```sh
@@ -52,12 +27,3 @@ chatbot serve
 ```
 
 Run `chatbot --help` to see all available options.
-
-To change log level set the enviroment variable `CHATBOT_LOG_LEVEL` to one of:
-`debug`, `info`, `warning` or `error`
-
-
-## Using the bot in groups
-
-To use the bot in groups just add it to a group and send a message starting with @BotName to
-mention it or quote-reply a previous message from the the bot.
diff --git a/deltachat_chatbot/__init__.py b/deltachat_chatbot/__init__.py
index f1ab9fa..bc85d5f 100644
--- a/deltachat_chatbot/__init__.py
+++ b/deltachat_chatbot/__init__.py
@@ -1,5 +1,4 @@
-"""chat bot."""
-import asyncio
+"""Chat bot."""
 
 from .hooks import cli
 
@@ -7,6 +6,6 @@
 def main() -> None:
     """Run the application."""
     try:
-        asyncio.run(cli.start())
+        cli.start()
     except KeyboardInterrupt:
         pass
diff --git a/deltachat_chatbot/__main__.py b/deltachat_chatbot/__main__.py
index b872046..f4a5c4a 100644
--- a/deltachat_chatbot/__main__.py
+++ b/deltachat_chatbot/__main__.py
@@ -1,4 +1,5 @@
 """Support for package execution."""
+
 from . import main
 
 main()
diff --git a/deltachat_chatbot/gpt4all.py b/deltachat_chatbot/gpt4all.py
new file mode 100644
index 0000000..9d2a77a
--- /dev/null
+++ b/deltachat_chatbot/gpt4all.py
@@ -0,0 +1,207 @@
+# pylama:ignore=C0114,W0212,R0913,R0914,C0301,R1735,
+import ctypes
+import warnings
+from typing import Any
+
+from gpt4all import GPT4All as _GPT4All
+from gpt4all._pyllmodel import (
+    LLModel,
+    PromptCallback,
+    RecalculateCallback,
+    ResponseCallback,
+    ResponseCallbackType,
+    empty_response_callback,
+    llmodel,
+)
+from gpt4all.gpt4all import MessageType
+
+
+def prompt_model(
+    self,
+    prompt: str,
+    prompt_template: str,
+    callback: ResponseCallbackType,
+    n_predict: int = 4096,
+    top_k: int = 40,
+    top_p: float = 0.9,
+    min_p: float = 0.0,
+    temp: float = 0.1,
+    n_batch: int = 8,
+    repeat_penalty: float = 1.2,
+    repeat_last_n: int = 10,
+    context_erase: float = 0.75,
+    reset_context: bool = False,
+    special: bool = False,
+    fake_reply: str = "",
+):
+    """
+    Generate response from model from a prompt.
+
+    Parameters
+    ----------
+    prompt: str
+        Question, task, or conversation for model to respond to
+    callback(token_id:int, response:str): bool
+        The model sends response tokens to callback
+
+    Returns
+    -------
+    None
+    """
+
+    if self.model is None:
+        self._raise_closed()
+
+    self.buffer.clear()
+    self.buff_expecting_cont_bytes = 0
+
+    self._set_context(
+        n_predict=n_predict,
+        top_k=top_k,
+        top_p=top_p,
+        min_p=min_p,
+        temp=temp,
+        n_batch=n_batch,
+        repeat_penalty=repeat_penalty,
+        repeat_last_n=repeat_last_n,
+        context_erase=context_erase,
+        reset_context=reset_context,
+    )
+
+    llmodel.llmodel_prompt(
+        self.model,
+        ctypes.c_char_p(prompt.encode()),
+        ctypes.c_char_p(prompt_template.encode()),
+        PromptCallback(self._prompt_callback),
+        ResponseCallback(self._callback_decoder(callback)),
+        RecalculateCallback(self._recalculate_callback),
+        self.context,
+        special,
+        ctypes.c_char_p(fake_reply.encode()) if fake_reply else ctypes.c_char_p(),
+    )
+
+
+LLModel.prompt_model = prompt_model
+
+
+class GPT4All(_GPT4All):
+    """Patch GPT4All to support fake_reply parameter.
+    See https://github.com/nomic-ai/gpt4all/issues/1959
+    """
+
+    def generate(  # noqa
+        self,
+        prompt: str,
+        *,
+        max_tokens: int = 200,
+        temp: float = 0.7,
+        top_k: int = 40,
+        top_p: float = 0.4,
+        min_p: float = 0.0,
+        repeat_penalty: float = 1.18,
+        repeat_last_n: int = 64,
+        n_batch: int = 8,
+        n_predict: int | None = None,
+        callback: ResponseCallbackType = empty_response_callback,
+        fake_reply="",
+    ) -> Any:
+        """
+        Generate outputs from any GPT4All model.
+
+        Args:
+            prompt: The prompt for the model the complete.
+            max_tokens: The maximum number of tokens to generate.
+            temp: The model temperature. Larger values increase creativity but decrease factuality.
+            top_k: Randomly sample from the top_k most likely tokens at each generation step. Set this to 1 for greedy decoding.
+            top_p: Randomly sample at each generation step from the top most likely tokens whose probabilities add up to top_p.
+            min_p: Randomly sample at each generation step from the top most likely tokens whose probabilities are at least min_p.
+            repeat_penalty: Penalize the model for repetition. Higher values result in less repetition.
+            repeat_last_n: How far in the models generation history to apply the repeat penalty.
+            n_batch: Number of prompt tokens processed in parallel. Larger values decrease latency but increase resource requirements.
+            n_predict: Equivalent to max_tokens, exists for backwards compatibility.
+            callback: A function with arguments token_id:int and response:str, which receives the tokens from the model as they are generated and stops the generation by returning False.
+
+        Returns:
+            Either the entire completion or a generator that yields the completion token by token.
+        """
+
+        # Preparing the model request
+        generate_kwargs: dict[str, Any] = dict(
+            temp=temp,
+            top_k=top_k,
+            top_p=top_p,
+            min_p=min_p,
+            repeat_penalty=repeat_penalty,
+            repeat_last_n=repeat_last_n,
+            n_batch=n_batch,
+            n_predict=n_predict if n_predict is not None else max_tokens,
+            fake_reply=fake_reply,
+        )
+
+        if self._history is not None:
+            # check if there is only one message, i.e. system prompt:
+            reset = len(self._history) == 1
+            self._history.append({"role": "user", "content": prompt})
+
+            fct_func = self._format_chat_prompt_template.__func__  # type: ignore[attr-defined]
+            if fct_func is GPT4All._format_chat_prompt_template:
+                if reset:
+                    # ingest system prompt
+                    # use "%1%2" and not "%1" to avoid implicit whitespace
+                    self.model.prompt_model(
+                        self._history[0]["content"],
+                        "%1%2",
+                        empty_response_callback,
+                        n_batch=n_batch,
+                        n_predict=0,
+                        reset_context=True,
+                        special=True,
+                    )
+                prompt_template = self._current_prompt_template.format("%1", "%2")
+            else:
+                warnings.warn(
+                    "_format_chat_prompt_template is deprecated. Please use a chat session with a prompt template.",
+                    DeprecationWarning,
+                )
+                # special tokens won't be processed
+                prompt = self._format_chat_prompt_template(
+                    self._history[-1:],
+                    self._history[0]["content"] if reset else "",
+                )
+                prompt_template = "%1"
+                generate_kwargs["reset_context"] = reset
+        else:
+            prompt_template = "%1"
+            generate_kwargs["reset_context"] = True
+
+        # Prepare the callback, process the model response
+        output_collector: list[MessageType]
+        output_collector = [
+            {"content": ""}
+        ]  # placeholder for the self._history if chat session is not activated
+
+        if self._history is not None:
+            self._history.append({"role": "assistant", "content": ""})
+            output_collector = self._history
+
+        def _callback_wrapper(
+            callback: ResponseCallbackType,
+            output_collector: list[MessageType],
+        ) -> ResponseCallbackType:
+            def _callback(token_id: int, response: str) -> bool:
+                nonlocal callback, output_collector
+
+                output_collector[-1]["content"] += response
+
+                return callback(token_id, response)
+
+            return _callback
+
+        self.model.prompt_model(  # noqa
+            prompt,
+            prompt_template,
+            _callback_wrapper(callback, output_collector),
+            **generate_kwargs,
+        )
+
+        return output_collector[-1]["content"]
diff --git a/deltachat_chatbot/hooks.py b/deltachat_chatbot/hooks.py
index 7d33a2e..6736a60 100644
--- a/deltachat_chatbot/hooks.py
+++ b/deltachat_chatbot/hooks.py
@@ -1,168 +1,176 @@
 """Event Hooks"""
-# pylama:ignore=W0603
-import asyncio
-import json
-import logging
-import os
-from argparse import Namespace
-from typing import List, Tuple
-
-import openai
-import tiktoken
-from deltabot_cli import AttrDict, Bot, BotCli, EventType, const, events
 
-from .openai import get_reply, init_openai
-from .orm import init as init_db
-from .quota import QuotaManager
-from .utils import get_log_level, human_time_duration, run_in_background
-
-cli = BotCli("chatbot", get_log_level())
-cfg: dict = {}
-quota_manager = QuotaManager(cli, {})
-fail_count = 5  # pylint:disable=C0103
+import time
+from argparse import Namespace
+from typing import List
+
+from deltabot_cli import BotCli
+from deltachat2 import (
+    Bot,
+    ChatType,
+    CoreEvent,
+    EventType,
+    Message,
+    MsgData,
+    NewMsgEvent,
+    SpecialContactId,
+    events,
+)
+from rich.logging import RichHandler
+
+from .gpt4all import GPT4All
+
+cli = BotCli("chatbot")
+cli.add_generic_option(
+    "--no-time",
+    help="do not display date timestamp in log messages",
+    action="store_false",
+)
+cli.add_generic_option(
+    "--model",
+    help="gpt4all model to use (default: %(default)s)",
+    default="mistral-7b-openorca.gguf2.Q4_0.gguf",
+)
+cli.add_generic_option("--system-prompt", help="an initial instruction for the model")
+cli.add_generic_option(
+    "--max-tokens",
+    help="the maximum number of tokens to generate (default: %(default)s)",
+    default=200,
+    type=int,
+)
+cli.add_generic_option(
+    "--history",
+    help="the maximum number replies to rember (default: %(default)s)",
+    default=20,
+    type=int,
+)
+cli.add_generic_option(
+    "--temperature",
+    help="the model temperature. Larger values increase creativity but decrease factuality."
+    " (default: %(default)s)",
+    default=0.5,
+    type=float,
+)
+
+gpt4all: GPT4All = None
+args = Namespace()
 
 
 @cli.on_init
-async def on_init(bot: Bot, _args: Namespace) -> None:
-    if not await bot.account.get_config("displayname"):
-        await bot.account.set_config("displayname", "ChatBot")
-        status = "I am a conversational Delta Chat bot, you can chat with me in private"
-        await bot.account.set_config("selfstatus", status)
+def on_init(bot: Bot, opts: Namespace) -> None:
+    bot.logger.handlers = [
+        RichHandler(show_path=False, omit_repeated_times=False, show_time=opts.no_time)
+    ]
+    for accid in bot.rpc.get_all_account_ids():
+        if not bot.rpc.get_config(accid, "displayname"):
+            bot.rpc.set_config(accid, "displayname", "ChatBot")
+            status = "I am a conversational bot, you can chat with me in private"
+            bot.rpc.set_config(accid, "selfstatus", status)
 
 
 @cli.on_start
-async def _on_start(bot: Bot, args: Namespace) -> None:
-    global quota_manager  # pylint:disable=C0103
-    path = os.path.join(args.config_dir, "config.json")
-    if os.path.exists(path):
-        with open(path, encoding="utf-8") as config:
-            cfg.update(json.load(config))
-    cfg["openai"] = {"model": "gpt-3.5-turbo", "n": 1, **(cfg.get("openai") or {})}
-    api_key = cfg.get("api_key", "")
-    assert api_key, "API key is not set"
-    await init_openai(api_key, cfg["openai"])
-
-    path = os.path.join(args.config_dir, "sqlite.db")
-    await init_db(f"sqlite+aiosqlite:///{path}")
-
-    quota_manager = QuotaManager(cli, cfg)
-    run_in_background(quota_manager.cooldown_loop())
-    logging.info(
-        "Listening for messages at: %s", await bot.account.get_config("configured_addr")
-    )
+def on_start(_bot: Bot, opts: Namespace) -> None:
+    global gpt4all, args  # noqa
+    args = opts
+    gpt4all = GPT4All(args.model)
 
 
 @cli.on(events.RawEvent)
-async def log_event(event: AttrDict) -> None:
-    if event.type == EventType.INFO:
-        logging.info(event.msg)
-    elif event.type == EventType.WARNING:
-        logging.warning(event.msg)
-    elif event.type == EventType.ERROR:
-        logging.error(event.msg)
-
-
-@cli.on(events.MemberListChanged(added=True))
-async def _member_added(event: AttrDict) -> None:
-    msg = event.message_snapshot
-    account = msg.message.account
-    if event.member == await account.get_config("configured_addr"):
-        await msg.chat.send_text("👋")
-
-
-@cli.on(events.NewMessage(is_info=False, func=cli.is_not_known_command))
-async def _filter_messages(event: AttrDict) -> None:
-    global fail_count  # pylint:disable=C0103
-    msg = event.message_snapshot
-    chat = await msg.chat.get_basic_snapshot()
-    if not msg.text or not await _should_reply(msg, chat):
+def log_event(bot: Bot, accid: int, event: CoreEvent) -> None:
+    if event.kind == EventType.INFO:
+        bot.logger.debug(event.msg)
+    elif event.kind == EventType.WARNING:
+        bot.logger.warning(event.msg)
+    elif event.kind == EventType.ERROR:
+        bot.logger.error(event.msg)
+    elif event.kind == EventType.SECUREJOIN_INVITER_PROGRESS:
+        if event.progress == 1000:
+            if not bot.rpc.get_contact(accid, event.contact_id).is_bot:
+                bot.logger.debug("QR scanned by contact id=%s", event.contact_id)
+                chatid = bot.rpc.create_chat_by_contact_id(accid, event.contact_id)
+                send_help(bot, accid, chatid)
+
+
+@cli.on(events.NewMessage(command="/help"))
+def _help(bot: Bot, accid: int, event: NewMsgEvent) -> None:
+    bot.rpc.markseen_msgs(accid, [event.msg.id])
+    send_help(bot, accid, event.msg.chat_id)
+
+
+@cli.on(events.NewMessage(command="/clear"))
+def _clear(bot: Bot, accid: int, event: NewMsgEvent) -> None:
+    bot.rpc.markseen_msgs(accid, [event.msg.id])
+    bot.rpc.delete_chat(accid, event.msg.chat_id)
+
+
+@cli.on(events.NewMessage(is_info=False))
+def on_message(bot: Bot, accid: int, event: NewMsgEvent) -> None:
+    if bot.has_command(event.command):
+        return
+
+    msg = event.msg
+    chat = bot.rpc.get_basic_chat_info(accid, msg.chat_id)
+    if chat.chat_type != ChatType.SINGLE:
         return
 
-    messages, prompt_tokens = await _get_messages(msg)
-    if not messages:
-        await msg.chat.send_message(text="TL;DR", quoted_msg=msg.id)
-    else:
-        global_quota_exceeded = await quota_manager.global_quota_exceeded()
-        if global_quota_exceeded:
-            cooldown = human_time_duration(await quota_manager.get_global_cooldown())
-            await msg.chat.send_message(
-                text=f"Quota exceeded, wait for: ⏰ {cooldown}", quoted_msg=msg.id
-            )
-            return
-
-        quota_exceeded = await quota_manager.quota_exceeded(msg.from_id)
-        if quota_exceeded > 0:
-            cooldown = human_time_duration(quota_exceeded)
-            await msg.chat.send_message(
-                text=f"Quota exceeded, wait for: ⏰ {cooldown}", quoted_msg=msg.id
-            )
-            return
-
-        if quota_manager.is_rate_limited():
-            await msg.chat.send_message(
-                text="⏰ I'm not available right now, try again later", quoted_msg=msg.id
-            )
-            return
-
-        try:
-            max_tokens = int(cfg["openai"].get("max_tokens") or 0)
-            reply = await get_reply(
-                str(msg.from_id), messages, max_tokens - prompt_tokens
-            )
-            logging.debug("bot reply: %s", reply)
-            await quota_manager.increase_usage(msg.from_id, reply.usage.total_tokens)
-            text = reply.choices[0].message.content.strip()
-            await msg.chat.send_message(text=text, quoted_msg=msg.id)
-            fail_count = 2
-            await asyncio.sleep(1)  # avoid rate limits
-        except openai.error.RateLimitError as ex:
-            logging.exception(ex)
-            await msg.chat.send_message(
-                text="⏰ I'm not available right now, try again later", quoted_msg=msg.id
-            )
-            fail_count = min(fail_count + 1, 60)
-            quota_manager.set_rate_limit(60 * fail_count)
-
-
-async def _get_messages(msg: AttrDict) -> Tuple[List[dict], int]:
-    text = ""
-    if msg.quote and msg.quote.text:
-        text = "> " + msg.quote.text.replace("\n", "\n> ") + "\n\n"
-
-    max_tokens = int(cfg["openai"].get("max_tokens") or 0)
-    prompt_tokens = 0
-    if max_tokens:
-        enc = tiktoken.encoding_for_model(cfg["openai"].get("model"))
-        for text2 in (text + msg.text, msg.text):
-            tokens = len(enc.encode(text2))
-            if tokens <= max_tokens // 2:
-                prompt_tokens = tokens
-                text = text2
-                break
-        else:
-            text = ""
-
-    return [{"role": "user", "content": text}] if text else [], prompt_tokens
-
-
-async def _should_reply(msg: AttrDict, chat: AttrDict) -> bool:
-    # 1:1 direct chat
-    if chat.chat_type == const.ChatType.SINGLE:
-        return True
-
-    # mentions
-    account = msg.message.account
-    selfaddr = await account.get_config("configured_addr")
-    displayname = await account.get_config("displayname")
-    mention = displayname and msg.text.startswith(f"@{displayname}")
-    if msg.text.startswith(selfaddr) or mention:
-        return True
-
-    # quote-reply
-    if msg.quote and msg.quote.get("message_id"):
-        quote = account.get_message_by_id(msg.quote.message_id)
-        snapshot = await quote.get_snapshot()
-        if snapshot.sender == account.self_contact:
-            return True
-
-    return False
+    bot.rpc.markseen_msgs(accid, [msg.id])
+
+    if not msg.text:
+        send_help(bot, accid, msg.chat_id)
+        return
+
+    with gpt4all.chat_session(system_prompt=args.system_prompt or None):
+        bot.logger.debug(f"[chat={msg.chat_id}] Processing message={msg.id}")
+        load_history(bot, accid, msg.chat_id)
+
+        start = time.time()
+        text = gpt4all.generate(
+            msg.text, max_tokens=args.max_tokens, temp=args.temperature
+        )
+        text = text.strip() or "😶"
+        took = time.time() - start
+        bot.logger.debug(f"[chat={msg.chat_id}] Generated reply in {took:.1f} seconds")
+
+    bot.rpc.send_msg(accid, msg.chat_id, MsgData(text=text, quoted_message_id=msg.id))
+
+
+@cli.after(events.NewMessage)
+def delete_msgs(bot: Bot, accid: int, event: NewMsgEvent) -> None:
+    if event.command != "/clear":  # /clear deletes the whole chat
+        msg = event.msg
+        bot.rpc.delete_messages(accid, [msg.id])
+        bot.logger.debug(f"[chat={msg.chat_id}] Deleted message={msg.id}")
+
+
+def load_history(bot: Bot, accid: int, chatid: int) -> None:
+    to_process: List[Message] = []
+    to_delete: List[int] = []
+    for msgid in reversed(bot.rpc.get_message_ids(accid, chatid, False, False)):
+        oldmsg = bot.rpc.get_message(accid, msgid)
+        if oldmsg.from_id == SpecialContactId.SELF:
+            if len(to_process) >= args.history:
+                to_delete.append(msgid)
+            else:
+                to_process.append(oldmsg)
+
+    if to_delete:
+        bot.rpc.delete_messages(accid, to_delete)
+
+    start = time.time()
+    for oldmsg in reversed(to_process):
+        prompt = oldmsg.quote.text if oldmsg.quote else ""
+        gpt4all.generate(prompt, max_tokens=0, fake_reply=oldmsg.text)
+    took = time.time() - start
+    bot.logger.debug(
+        f"[chat={chatid}] Loaded {len(to_process)} entries of history in {took:.1f} seconds"
+    )
+
+
+def send_help(bot: Bot, accid: int, chatid: int) -> None:
+    lines = [
+        "👋 I am a conversational bot and you can chat with me in private only.",
+        "No 3rd party service is involved, only I will have access to the messages you send to me.",
+        'To control our chat history, you should set "Disappearing Messages" in this chat.',
+        "Alternatively, send /clear and I will forget all the messages I have received here",
+    ]
+    bot.rpc.send_msg(accid, chatid, MsgData(text="\n".join(lines)))
diff --git a/deltachat_chatbot/openai.py b/deltachat_chatbot/openai.py
deleted file mode 100644
index 426d24f..0000000
--- a/deltachat_chatbot/openai.py
+++ /dev/null
@@ -1,24 +0,0 @@
-"""Interaction with OpenAI API"""
-import copy
-import logging
-
-import openai
-from openai.openai_object import OpenAIObject
-
-_cfg: dict = {}
-
-
-async def init_openai(api_key: str, config: dict) -> None:
-    """Set openAI configuration."""
-    _cfg.update(config)
-    openai.api_key = api_key
-
-
-async def get_reply(user: str, messages: list, max_tokens: int) -> OpenAIObject:
-    kwargs = copy.deepcopy(_cfg)
-    if max_tokens:
-        kwargs["max_tokens"] = max_tokens
-    kwargs.setdefault("messages", []).extend(messages)
-    kwargs["user"] = user
-    logging.debug("user message: %s", kwargs)
-    return await openai.ChatCompletion.acreate(**kwargs)
diff --git a/deltachat_chatbot/orm.py b/deltachat_chatbot/orm.py
deleted file mode 100644
index aad7d1c..0000000
--- a/deltachat_chatbot/orm.py
+++ /dev/null
@@ -1,44 +0,0 @@
-"""database"""
-from typing import Any
-
-from sqlalchemy import Column, Integer
-from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
-from sqlalchemy.orm import declarative_base, sessionmaker
-from sqlalchemy.sql.selectable import Select
-
-Base = declarative_base()
-_session = None  # noqa
-
-
-class Usage(Base):  # noqa
-    """User quota stats"""
-
-    __tablename__ = "usage"
-    user_id = Column(Integer, primary_key=True)
-    tokens = Column(Integer, nullable=False)
-    queries = Column(Integer, nullable=False)
-    ends_at = Column(Integer, nullable=False)
-
-    def __init__(self, **kwargs) -> None:
-        kwargs.setdefault("tokens", 0)
-        kwargs.setdefault("queries", 0)
-        super().__init__(**kwargs)
-
-
-def async_session():
-    """Get session"""
-    return _session()
-
-
-async def init(path: str, debug: bool = False) -> None:
-    """Initialize engine."""
-    global _session  # noqa
-    engine = create_async_engine(path, echo=debug)
-    async with engine.begin() as conn:
-        await conn.run_sync(Base.metadata.create_all)
-
-    _session = sessionmaker(engine, class_=AsyncSession)
-
-
-async def fetchone(session: sessionmaker, stmt: Select) -> Any:
-    return (await session.execute(stmt.limit(1))).scalars().first()
diff --git a/deltachat_chatbot/quota.py b/deltachat_chatbot/quota.py
deleted file mode 100644
index befdaf2..0000000
--- a/deltachat_chatbot/quota.py
+++ /dev/null
@@ -1,96 +0,0 @@
-"""Cooldown loop logic"""
-import asyncio
-import time
-from datetime import datetime, timedelta
-
-from deltabot_cli import BotCli
-from sqlalchemy.future import select
-from sqlalchemy.sql.expression import delete
-
-from .orm import Usage, async_session, fetchone
-
-
-class QuotaManager:
-    """Manage user and global quotas"""
-
-    def __init__(self, cli: BotCli, cfg: dict) -> None:
-        self._cli = cli
-        self._cfg = cfg
-        self._rate_limit = 0
-        self._lock = asyncio.Lock()
-
-    async def cooldown_loop(self) -> None:
-        next_month = await self.get_global_cooldown()
-        while True:
-            if self._rate_limit <= time.time():
-                self._rate_limit = 0
-            if next_month <= time.time():
-                async with self._lock:
-                    self._cli.set_custom_config("used_tokens", "0")
-                next_month = _get_next_month_timestamp()
-                await self._cli.set_custom_config("next_month", str(next_month))
-
-            async with async_session() as session:
-                async with session.begin():
-                    stmt = delete(Usage).filter(Usage.ends_at <= time.time())
-                    await session.execute(stmt)
-            await asyncio.sleep(5)
-
-    async def increase_usage(self, user_id: int, tokens: int) -> None:
-        # increase global usage
-        async with self._lock:
-            used_tokens = int(await self._cli.get_custom_config("used_tokens") or 0)
-            await self._cli.set_custom_config("used_tokens", str(used_tokens + tokens))
-
-        # increase user usage
-        async with async_session() as session:
-            async with session.begin():
-                stmt = select(Usage).filter_by(user_id=user_id)
-                usage = await fetchone(session, stmt)
-                if not usage:
-                    usage = Usage(user_id=user_id, ends_at=_get_next_hour_timestamp())
-                    session.add(usage)
-                usage.queries += 1
-                usage.tokens += tokens
-
-    def is_rate_limited(self) -> bool:
-        return bool(self._rate_limit)
-
-    def set_rate_limit(self, seconds: int) -> None:
-        self._rate_limit = int(time.time()) + seconds
-
-    async def get_global_cooldown(self) -> int:
-        return int(await self._cli.get_custom_config("next_month") or 0)
-
-    async def global_quota_exceeded(self) -> bool:
-        global_quota = int(self._cfg.get("global_monthly_quota") or 0)
-        used_tokens = int(await self._cli.get_custom_config("used_tokens") or 0)
-        if global_quota and used_tokens >= global_quota:
-            return True
-        return False
-
-    async def quota_exceeded(self, user_id: int) -> int:
-        """If quota was exceeded return the cooldown, otherwise return zero"""
-        tokens_quota = int(self._cfg.get("user_hourly_tokens_quota") or 0)
-        queries_quota = int(self._cfg.get("user_hourly_queries_quota") or 0)
-        now = int(time.time())
-        async with async_session() as session:
-            async with session.begin():
-                stmt = select(Usage).filter_by(user_id=user_id)
-                usage = await fetchone(session, stmt)
-                if usage:
-                    if usage.tokens >= tokens_quota or usage.queries >= queries_quota:
-                        return usage.ends_at - now
-        return 0
-
-
-def _get_next_month_timestamp() -> int:
-    return int(
-        (datetime.today().replace(day=25) + timedelta(days=7))
-        .replace(day=1, hour=0, minute=0, second=0, microsecond=0)
-        .timestamp()
-    )
-
-
-def _get_next_hour_timestamp() -> int:
-    return int((datetime.today() + timedelta(hours=1)).timestamp())
diff --git a/deltachat_chatbot/utils.py b/deltachat_chatbot/utils.py
deleted file mode 100644
index b8ceef1..0000000
--- a/deltachat_chatbot/utils.py
+++ /dev/null
@@ -1,28 +0,0 @@
-"""Utilities"""
-import asyncio
-import logging
-import os
-from typing import Coroutine
-
-_background_tasks = set()
-
-
-def run_in_background(coro: Coroutine) -> None:
-    """Schedule the execution of a coroutine object in a spawn task, keeping a
-    reference to the task to avoid it disappearing mid-execution due to GC.
-    """
-    task = asyncio.create_task(coro)
-    _background_tasks.add(task)
-    task.add_done_callback(_background_tasks.discard)
-
-
-def human_time_duration(seconds: int) -> str:
-    hours, seconds = divmod(int(seconds), 60 * 60)
-    minutes, seconds = divmod(int(seconds), 60)
-    return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
-
-
-def get_log_level() -> int:
-    """Get log level from environment variables. Defaults to INFO if not set."""
-    level = os.getenv("CHATBOT_LOG_LEVEL", "info").upper()
-    return int(getattr(logging, level))
diff --git a/pylama.ini b/pylama.ini
index dd9c7c7..a6d598c 100644
--- a/pylama.ini
+++ b/pylama.ini
@@ -1,4 +1,4 @@
 [pylama]
 linters=mccabe,pyflakes,pylint,isort,mypy
 ignore=R0903,C0116
-skip=.*,build/*,tests/*,*/flycheck_*
\ No newline at end of file
+skip=.*,build/*,tests/*,*/_version.py,*/flycheck_*
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 13b41e4..dfd909d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,33 +1,30 @@
 [build-system]
-requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2"]
+requires = ["setuptools>=64", "setuptools_scm>=8"]
 build-backend = "setuptools.build_meta"
 
 [project]
 name = "deltachat-chatbot"
 description = "Conversational chat-bot for Delta Chat"
+dynamic = ["version"]
 readme = "README.md"
 requires-python = ">=3.8"
-license = {file = "LICENSE.txt"}
-keywords = ["deltachat", "bot"]
+keywords = ["deltachat", "bot", "chatbot", "ai"]
 authors = [
-  {email = "adbenitez@hispanilandia.net"},
-  {name = "adbenitez"}
+  {name = "adbenitez", email = "adb@merlinux.eu"},
 ]
 classifiers = [
   "Development Status :: 4 - Beta",
-  "Programming Language :: Python"
-]
-dynamic = [
-    "version"
+  "Programming Language :: Python :: 3",
+  "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
 ]
 dependencies = [
-    "SQLAlchemy>=1.4.44",
-    "aiosqlite>=0.17.0",
-    "openai>=0.27.2",
-    "tiktoken>=0.3.2",
-    "deltabot-cli @ git+https://github.com/deltachat-bot/deltabot-cli-py.git",
+    "deltabot-cli>=6.0.0,<7.0",
+    "gpt4all>=2.6.0,<3.0",
 ]
 
+[project.urls]
+Homepage = "https://github.com/deltachat-bot/chatbot"
+
 [project.optional-dependencies]
 dev = [
   "black",
@@ -41,6 +38,9 @@ dev = [
 [project.scripts]
 chatbot = "deltachat_chatbot:main"
 
+[tool.setuptools_scm]
+# can be empty if no extra settings are needed, presence enables setuptools_scm
+
 [tool.isort]
 profile = "black"