Skip to content

Commit

Permalink
LuxonisParser - RoboFlow URL Support (#189)
Browse files Browse the repository at this point in the history
  • Loading branch information
kozlov721 authored Oct 8, 2024
1 parent 4fa0a02 commit 6bcbb11
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 17 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ jobs:
with:
ref: ${{ github.head_ref }}

- name: Install pre-commit
run: python3 -m pip install 'pre-commit<4.0.0'

- name: Run pre-commit
uses: pre-commit/[email protected]

Expand Down
68 changes: 62 additions & 6 deletions luxonis_ml/data/parsers/luxonis_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import zipfile
from enum import Enum
from importlib.util import find_spec
from pathlib import Path
from typing import (
Dict,
Expand All @@ -16,7 +17,8 @@
from luxonis_ml.data import DATASETS_REGISTRY, BaseDataset, LuxonisDataset
from luxonis_ml.data.utils.enums import LabelType
from luxonis_ml.enums import DatasetType
from luxonis_ml.utils import LuxonisFileSystem
from luxonis_ml.utils import LuxonisFileSystem, environ
from luxonis_ml.utils.filesystem import _pip_install

from .base_parser import BaseParser
from .classification_directory_parser import ClassificationDirectoryParser
Expand Down Expand Up @@ -72,8 +74,15 @@ def __init__(
appropriate parser.
@type dataset_dir: str
@param dataset_dir: Path to the dataset directory or zip file.
Can also be a remote URL supported by L{LuxonisFileSystem}.
@param dataset_dir: Identifier of the dataset directory.
Can be one of:
- Local path to the dataset directory.
- Remote URL supported by L{LuxonisFileSystem}.
- C{gcs://} for Google Cloud Storage
- C{s3://} for Amazon S3
- C{roboflow://} for Roboflow datasets.
- Expected format: C{roboflow://workspace/project/version/format}.
Can be a remote URL supported by L{LuxonisFileSystem}.
@type dataset_name: Optional[str]
@param dataset_name: Name of the dataset. If C{None}, the name
is derived from the name of the dataset directory.
Expand All @@ -97,9 +106,16 @@ def __init__(
names.
"""
save_dir = Path(save_dir) if save_dir else None
name = Path(dataset_dir).name
local_path = (save_dir or Path.cwd()) / name
self.dataset_dir = LuxonisFileSystem.download(dataset_dir, local_path)
if dataset_dir.startswith("roboflow://"):
self.dataset_dir, name = self._download_roboflow_dataset(
dataset_dir, save_dir
)
else:
name = dataset_dir.split("/")[-1]
local_path = (save_dir or Path.cwd()) / name
self.dataset_dir = LuxonisFileSystem.download(
dataset_dir, local_path
)
if self.dataset_dir.suffix == ".zip":
with zipfile.ZipFile(self.dataset_dir, "r") as zip_ref:
unzip_dir = self.dataset_dir.parent / self.dataset_dir.stem
Expand Down Expand Up @@ -237,3 +253,43 @@ def _parse_split(
return self.parser.parse_split(
split, random_split, split_ratios, **parsed_kwargs, **kwargs
)

def _download_roboflow_dataset(
self, dataset_dir: str, local_path: Optional[Path]
) -> Tuple[Path, str]:
if find_spec("roboflow") is None:
_pip_install("roboflow", "roboflow", "0.1.1")

from roboflow import Roboflow

if environ.ROBOFLOW_API_KEY is None:
raise RuntimeError(
"ROBOFLOW_API_KEY environment variable is not set. "
"Please set it to your Roboflow API key."
)

rf = Roboflow(api_key=environ.ROBOFLOW_API_KEY)
parts = dataset_dir.split("roboflow://")[1].split("/")
if len(parts) != 4:
raise ValueError(
f"Incorrect Roboflow dataset URL: `{dataset_dir}`. "
"Expected format: `roboflow://workspace/project/version/format`."
)
workspace, project, version, format = dataset_dir.split("roboflow://")[
1
].split("/")
try:
version = int(version)
except ValueError as e:
raise ValueError(
f"Roboflow version must be an integer, got `{version}`."
) from e

local_path = local_path or Path.cwd() / f"{project}_{format}"
dataset = (
rf.workspace(workspace)
.project(project)
.version(int(version))
.download(format, str(local_path / project))
)
return Path(dataset.location), project
1 change: 1 addition & 0 deletions luxonis_ml/data/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ pycocotools>=2.0.7
typeguard>=4.1.0
polars[timezone]>=0.20.31
ordered-set>=4.0.0
# roboflow>=0.1.1
2 changes: 2 additions & 0 deletions luxonis_ml/utils/environ.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ class Environ(BaseSettings):
LUXONISML_BASE_PATH: Path = Path.home() / "luxonis_ml"
LUXONISML_TEAM_ID: str = "offline"

ROBOFLOW_API_KEY: Optional[str] = None

GOOGLE_APPLICATION_CREDENTIALS: Optional[str] = None

LOG_LEVEL: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = (
Expand Down
21 changes: 11 additions & 10 deletions luxonis_ml/utils/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,19 +674,12 @@ def upload(local_path: PathType, url: str) -> None:


def _check_package_installed(protocol: str) -> None: # pragma: no cover
def _pip_install(package: str, version: str) -> None:
logger.error(f"{package} is necessary for {protocol} protocol.")
logger.info(f"Installing {package}...")
subprocess.run(
[sys.executable, "-m", "pip", "install", f"{package}>={version}"]
)

if protocol in ["gs", "gcs"] and find_spec("gcsfs") is None:
_pip_install("gcsfs", "2023.3.0")
_pip_install(protocol, "gcsfs", "2023.3.0")
elif protocol == "s3" and find_spec("s3fs") is None:
_pip_install("s3fs", "2023.3.0")
_pip_install(protocol, "s3fs", "2023.3.0")
elif protocol == "mlflow" and find_spec("mlflow") is None:
_pip_install("mlflow", "2.10.0")
_pip_install(protocol, "mlflow", "2.10.0")


def _get_protocol_and_path(path: str) -> Tuple[str, Optional[str]]:
Expand All @@ -702,3 +695,11 @@ def _get_protocol_and_path(path: str) -> Tuple[str, Optional[str]]:
protocol = "file"

return protocol, path if path else None


def _pip_install(protocol: str, package: str, version: str) -> None:
logger.error(f"'{package}' is necessary for '{protocol}://' protocol.")
logger.info(f"Installing {package}...")
subprocess.run(
[sys.executable, "-m", "pip", "install", f"{package}>={version}"]
)
14 changes: 13 additions & 1 deletion tests/test_data/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from luxonis_ml.data import LabelType, LuxonisLoader, LuxonisParser
from luxonis_ml.enums import DatasetType
from luxonis_ml.utils import environ

URL_PREFIX: Final[str] = "gs://luxonis-test-bucket/luxonis-ml-test-data"
WORK_DIR: Final[str] = "tests/data/parser_datasets"
Expand Down Expand Up @@ -82,13 +83,24 @@ def prepare_dir():
"D1_ParkingSlot-solo.zip",
[LabelType.BOUNDINGBOX, LabelType.SEGMENTATION],
),
(
DatasetType.COCO,
"roboflow://team-roboflow/coco-128/2/coco",
[LabelType.BOUNDINGBOX, LabelType.CLASSIFICATION],
),
],
)
def test_dir_parser(
dataset_type: DatasetType, url: str, expected_label_types: List[LabelType]
):
if not url.startswith("roboflow://"):
url = f"{URL_PREFIX}/{url}"

elif environ.ROBOFLOW_API_KEY is None:
pytest.skip("Roboflow API key is not set")

parser = LuxonisParser(
f"{URL_PREFIX}/{url}",
url,
dataset_name=f"test-{dataset_type}",
delete_existing=True,
save_dir=WORK_DIR,
Expand Down

0 comments on commit 6bcbb11

Please sign in to comment.