docker-compose.yml

services:
  qwen2:
    image: qwenllm/qwenvl:2-cu121
    container_name: qwen2
    ports:
      - "11434:11434"
    ipc: "host"
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    command: >
      bash -c "python -m vllm.entrypoints.openai.api_server
      --served-model-name Qwen2-VL-7B-Instruct
      --model Qwen/Qwen2-VL-7B-Instruct
      --max_model_len 10000
      --cpu-offload-gb 0
      --gpu-memory-utilization 0.9
      --port 11434"

  webui:
    image: ghcr.io/open-webui/open-webui:main
    container_name: open-webui
    ports:
      - "3000:8080"
    volumes:
      - open-webui:/app/backend/data
    extra_hosts:
      - "host.docker.internal:host-gateway"
    environment:
      - WEBUI_AUTH=False
      - OPENAI_API_BASE_URL=http://host.docker.internal:11434/v1
      - ENABLE_OPENAI_API=true

volumes:
  open-webui: