From f2d5ba750a86cd2102761f097a86519ca8e82249 Mon Sep 17 00:00:00 2001 From: Myles Scolnick Date: Tue, 9 Apr 2024 01:26:39 -0400 Subject: [PATCH] feat: support pyarrow and column major data in mo.ui.table (#1091) * feat: support pyarrow and column major data in mo.ui.table * fix test * fixes * maybe fix type * fixes * fix ciruclar import * fix * py3.8 compat --------- Co-authored-by: Akshay Agrawal --- marimo/_dependencies/dependencies.py | 10 ++ marimo/_output/data/data.py | 49 +---- marimo/_plugins/ui/_impl/data_explorer.py | 11 +- .../_plugins/ui/_impl/dataframes/dataframe.py | 11 +- marimo/_plugins/ui/_impl/table.py | 169 +++++------------- .../_plugins/ui/_impl/tables/default_table.py | 118 ++++++++++++ .../_plugins/ui/_impl/tables/pandas_table.py | 80 +++++++++ .../_plugins/ui/_impl/tables/polars_table.py | 42 +++++ .../_plugins/ui/_impl/tables/pyarrow_table.py | 60 +++++++ .../_plugins/ui/_impl/tables/table_manager.py | 57 ++++++ marimo/_plugins/ui/_impl/tables/utils.py | 36 ++++ marimo/_plugins/ui/_impl/utils/dataframe.py | 64 ++----- marimo/_smoke_tests/dataframe.py | 166 ++++++++++++++++- pyproject.toml | 8 +- .../ui/_impl/tables/test_default_table.py | 71 ++++++++ .../ui/_impl/tables/test_pandas_table.py | 51 ++++++ .../ui/_impl/tables/test_polars_table.py | 48 +++++ .../_plugins/ui/_impl/tables/test_pyarrow.py | 47 +++++ tests/_plugins/ui/_impl/test_table.py | 49 +++-- .../_plugins/ui/_impl/utils/test_dataframe.py | 37 ++-- 20 files changed, 907 insertions(+), 277 deletions(-) create mode 100644 marimo/_plugins/ui/_impl/tables/default_table.py create mode 100644 marimo/_plugins/ui/_impl/tables/pandas_table.py create mode 100644 marimo/_plugins/ui/_impl/tables/polars_table.py create mode 100644 marimo/_plugins/ui/_impl/tables/pyarrow_table.py create mode 100644 marimo/_plugins/ui/_impl/tables/table_manager.py create mode 100644 marimo/_plugins/ui/_impl/tables/utils.py create mode 100644 tests/_plugins/ui/_impl/tables/test_default_table.py create mode 100644 tests/_plugins/ui/_impl/tables/test_pandas_table.py create mode 100644 tests/_plugins/ui/_impl/tables/test_polars_table.py create mode 100644 tests/_plugins/ui/_impl/tables/test_pyarrow.py diff --git a/marimo/_dependencies/dependencies.py b/marimo/_dependencies/dependencies.py index 76d2bdfe2df..c8fa00baa80 100644 --- a/marimo/_dependencies/dependencies.py +++ b/marimo/_dependencies/dependencies.py @@ -97,6 +97,11 @@ def require_plotly(why: str) -> None: + "You can install it with 'pip install plotly'" ) from None + @staticmethod + def has(pkg: str) -> bool: + """Return True if any lib is installed.""" + return importlib.util.find_spec(pkg) is not None + @staticmethod def has_openai() -> bool: """Return True if openai is installed.""" @@ -107,6 +112,11 @@ def has_pandas() -> bool: """Return True if pandas is installed.""" return importlib.util.find_spec("pandas") is not None + @staticmethod + def has_pyarrow() -> bool: + """Return True if pyarrow is installed.""" + return importlib.util.find_spec("pyarrow") is not None + @staticmethod def has_polars() -> bool: """Return True if polars is installed.""" diff --git a/marimo/_output/data/data.py b/marimo/_output/data/data.py index 6c396ead0b2..5e4ac2f653f 100644 --- a/marimo/_output/data/data.py +++ b/marimo/_output/data/data.py @@ -1,9 +1,8 @@ # Copyright 2024 Marimo. All rights reserved. import base64 import io -from typing import TYPE_CHECKING, Union +from typing import Union -from marimo._dependencies.dependencies import DependencyManager from marimo._plugins.core.media import is_data_empty from marimo._runtime.context import get_context from marimo._runtime.virtual_file import ( @@ -12,10 +11,6 @@ VirtualFileLifecycleItem, ) -if TYPE_CHECKING: - import pandas as pd - import polars as pl - def pdf(data: bytes) -> VirtualFile: """Create a virtual file from a PDF. @@ -65,9 +60,7 @@ def audio(data: bytes, ext: str = "wav") -> VirtualFile: return item.virtual_file -def csv( - data: Union[str, bytes, io.BytesIO, "pd.DataFrame", "pl.DataFrame"] -) -> VirtualFile: +def csv(data: Union[str, bytes, io.BytesIO]) -> VirtualFile: """Create a virtual file for CSV data. **Args.** @@ -79,30 +72,10 @@ def csv( A `VirtualFile` object. """ - # Pandas DataFrame - if DependencyManager.has_pandas(): - import pandas as pd - - if isinstance(data, pd.DataFrame): - buffer = data.to_csv( - index=False, - ).encode("utf-8") - return any_data(buffer, ext="csv") - - # Polars DataFrame - if DependencyManager.has_polars(): - import polars as pl - - if isinstance(data, pl.DataFrame): - buffer = data.write_csv().encode("utf-8") - return any_data(buffer, ext="csv") - return any_data(data, ext="csv") # type: ignore -def json( - data: Union[str, bytes, io.BytesIO, "pd.DataFrame", "pl.DataFrame"] -) -> VirtualFile: +def json(data: Union[str, bytes, io.BytesIO]) -> VirtualFile: """Create a virtual file for JSON data. **Args.** @@ -114,22 +87,6 @@ def json( A `VirtualFile` object. """ - # Pandas DataFrame - if DependencyManager.has_pandas(): - import pandas as pd - - if isinstance(data, pd.DataFrame): - buffer = data.to_json(orient="records").encode("utf-8") - return any_data(buffer, ext="json") - - # Polars DataFrame - if DependencyManager.has_polars(): - import polars as pl - - if isinstance(data, pl.DataFrame): - buffer = data.write_json(row_oriented=True).encode("utf-8") - return any_data(buffer, ext="json") - return any_data(data, ext="json") # type: ignore diff --git a/marimo/_plugins/ui/_impl/data_explorer.py b/marimo/_plugins/ui/_impl/data_explorer.py index ca23ebab349..bd5e382d24b 100644 --- a/marimo/_plugins/ui/_impl/data_explorer.py +++ b/marimo/_plugins/ui/_impl/data_explorer.py @@ -1,10 +1,13 @@ # Copyright 2023 Marimo. All rights reserved. from __future__ import annotations -from typing import TYPE_CHECKING, Any, Callable, Dict, Final, Optional +from typing import TYPE_CHECKING, Any, Callable, Dict, Final, Optional, Union + +from marimo._plugins.ui._impl.tables.utils import get_table_manager if TYPE_CHECKING: import pandas as pd + import polars as pl import marimo._output.data.data as mo_data @@ -36,18 +39,20 @@ class data_explorer(UIElement[Dict[str, Any], Dict[str, Any]]): def __init__( self, - df: pd.DataFrame, + df: Union[pd.DataFrame, pl.DataFrame], on_change: Optional[Callable[[Dict[str, Any]], None]] = None, ) -> None: self._data = df + manager = get_table_manager(df) + super().__init__( component_name=data_explorer._name, initial_value={}, on_change=on_change, label="", args={ - "data": mo_data.csv(df).url, + "data": mo_data.csv(manager.to_csv()).url, }, ) diff --git a/marimo/_plugins/ui/_impl/dataframes/dataframe.py b/marimo/_plugins/ui/_impl/dataframes/dataframe.py index a779078f154..02d9d492f52 100644 --- a/marimo/_plugins/ui/_impl/dataframes/dataframe.py +++ b/marimo/_plugins/ui/_impl/dataframes/dataframe.py @@ -5,6 +5,10 @@ import sys from typing import TYPE_CHECKING, Any, Callable, Dict, Final, List, Optional +from marimo._plugins.ui._impl.tables.pandas_table import ( + PandasTableManagerFactory, +) + if TYPE_CHECKING: import pandas as pd @@ -13,7 +17,6 @@ import marimo._output.data.data as mo_data from marimo._output.rich_help import mddoc from marimo._plugins.ui._core.ui_element import UIElement -from marimo._plugins.ui._impl.utils.dataframe import get_row_headers from marimo._runtime.functions import EmptyArgs, Function from marimo._utils.parse_dataclass import parse_raw @@ -92,6 +95,7 @@ def __init__( pass self._data = df + self._manager = PandasTableManagerFactory.create()(df) self._transform_container = TransformsContainer(df) self._error: Optional[str] = None @@ -129,13 +133,14 @@ def get_dataframe(self, _args: EmptyArgs) -> GetDataFrameResponse: if self._error is not None: raise Exception(self._error) - url = mo_data.csv(self._value.head(LIMIT)).url + manager = PandasTableManagerFactory.create()(self._value.head(LIMIT)) + url = mo_data.csv(manager.to_csv()).url total_rows = len(self._value) return GetDataFrameResponse( url=url, total_rows=total_rows, has_more=total_rows > LIMIT, - row_headers=get_row_headers(self._value), + row_headers=manager.get_row_headers(), ) def get_column_values( diff --git a/marimo/_plugins/ui/_impl/table.py b/marimo/_plugins/ui/_impl/table.py index c17af1ec8d9..d994b51bf7e 100644 --- a/marimo/_plugins/ui/_impl/table.py +++ b/marimo/_plugins/ui/_impl/table.py @@ -4,15 +4,14 @@ from dataclasses import dataclass from typing import ( TYPE_CHECKING, + Any, Callable, + Dict, Final, List, Literal, Optional, - Tuple, - TypeVar, Union, - cast, ) import marimo._output.data.data as mo_data @@ -22,19 +21,18 @@ from marimo._output.rich_help import mddoc from marimo._plugins.core.web_component import JSONType from marimo._plugins.ui._core.ui_element import UIElement -from marimo._plugins.ui._impl.utils.dataframe import TableData, get_row_headers +from marimo._plugins.ui._impl.tables.table_manager import TableManager +from marimo._plugins.ui._impl.tables.utils import get_table_manager +from marimo._plugins.ui._impl.utils.dataframe import ListOrTuple, TableData from marimo._runtime.functions import Function LOGGER = _loggers.marimo_logger() -T = TypeVar("T") - -Numeric = Union[int, float] -ListOrTuple = Union[List[T], Tuple[T, ...]] if TYPE_CHECKING: import pandas as pd import polars as pl + import pyarrow as pa # ignore @dataclass @@ -103,13 +101,16 @@ class table( **Initialization Args.** - - `data`: A pandas dataframe, a polars dataframe, - a list of values representing a column, or a list of dicts - where each dict represents a row in the table - (mapping column names to values). Values can be - primitives (`str`, `int`, `float`, `bool`, or `None`) - or marimo elements: e.g. - `mo.ui.button(...)`, `mo.md(...)`, `mo.as_html(...)`, etc. + - `data`: Values can be primitives (`str`, + `int`, `float`, `bool`, or `None`) or marimo elements: e.g. + `mo.ui.button(...)`, `mo.md(...)`, `mo.as_html(...)`, etc. Data can be + passed in many ways: + - as dataframes: a pandas dataframe, a polars dataframe + - as rows: a list of dicts, where each dict represents a row in the + table + - as columns: a dict keyed by column names, where the value of each + entry is a list representing a column + - as a single column: a list of values - `pagination`: whether to paginate; if `False`, all rows will be shown defaults to `True` when above 10 rows, `False` otherwise - `page_size`: the number of rows to show per page. @@ -126,9 +127,11 @@ def __init__( self, data: Union[ ListOrTuple[Union[str, int, float, bool, MIME, None]], - ListOrTuple[dict[str, JSONType]], + ListOrTuple[Dict[str, JSONType]], + Dict[str, ListOrTuple[JSONType]], "pd.DataFrame", "pl.DataFrame", + "pa.Table", ], pagination: Optional[bool] = None, selection: Optional[Literal["single", "multi"]] = "multi", @@ -137,13 +140,22 @@ def __init__( label: str = "", on_change: Optional[ Callable[ - [Union[List[JSONType], "pd.DataFrame", "pl.DataFrame"]], None + [ + Union[ + List[JSONType], + Dict[str, ListOrTuple[JSONType]], + "pd.DataFrame", + "pl.DataFrame", + "pa.Table", + ] + ], + None, ] ] = None, ) -> None: self._data = data - normalized_data = _normalize_data(data) - self._normalized_data = normalized_data + self._manager = get_table_manager(data) + self._filtered_manager: Optional[TableManager[Any]] = None # pagination defaults to True if there are more than 10 rows if pagination is None: @@ -158,12 +170,12 @@ def __init__( label=label, initial_value=[], args={ - "data": normalized_data, + "data": self._manager.to_data(), "pagination": pagination, "page-size": page_size, "selection": selection, "show-download": can_download, - "row-headers": get_row_headers(data), + "row-headers": self._manager.get_row_headers(), }, on_change=on_change, functions=( @@ -184,116 +196,23 @@ def data( def _convert_value( self, value: list[str] ) -> Union[List[JSONType], "pd.DataFrame", "pl.DataFrame"]: - # Handle pandas - if DependencyManager.has_pandas(): - import pandas as pd - - if isinstance(self._data, pd.DataFrame): - return self._data.iloc[[int(v) for v in value]] - - # Handle polars - if DependencyManager.has_polars(): - import polars as pl - - if isinstance(self._data, pl.DataFrame): - return self._data[[int(v) for v in value]] - - return [self._data[int(v)] for v in value] # type: ignore[misc] - - def _as_data_frame( - self, data: TableData - ) -> Union["pd.DataFrame", "pl.DataFrame"]: - """ - Convert the given data to the same type as the original data. - Otherwise, convert to whatever framework we have. - """ - # Handle pandas - if DependencyManager.has_pandas(): - import pandas as pd - - # Make result a dataframe of the original type - if isinstance(self._data, pd.DataFrame) and not isinstance( - data, pd.DataFrame - ): - return pd.DataFrame(data) # type: ignore[arg-type] - - # Handle polars - if DependencyManager.has_polars(): - import polars as pl - - # Make result a dataframe of the original type - if isinstance(self._data, pl.DataFrame) and not isinstance( - data, pl.DataFrame - ): - return pl.DataFrame(data) - - # Convert to whatever framework we have - - if DependencyManager.has_pandas(): - import pandas as pd - - return pd.DataFrame(data) # type: ignore[arg-type] - - if DependencyManager.has_polars(): - import polars as pl - - return pl.DataFrame(data) - - raise ValueError("Requires pandas or polars to be installed.") + indices = [int(v) for v in value] + self._filtered_manager = self._manager.select_rows(indices) + self._has_any_selection = len(indices) > 0 + return self._filtered_manager.data # type: ignore[no-any-return] def download_as(self, args: DownloadAsArgs) -> str: # download selected rows if there are any, otherwise use all rows - data: TableData = self._value if len(self._value) > 0 else self._data + manager = ( + self._filtered_manager + if self._filtered_manager and self._has_any_selection + else self._manager + ) - df = self._as_data_frame(data) ext = args.format if ext == "csv": - return mo_data.csv(df).url + return mo_data.csv(manager.to_csv()).url elif ext == "json": - return mo_data.json(df).url + return mo_data.json(manager.to_json()).url else: raise ValueError("format must be one of 'csv' or 'json'.") - - -# TODO: more narrow return type -def _normalize_data(data: TableData) -> JSONType: - # Handle pandas - if DependencyManager.has_pandas(): - import pandas as pd - - if isinstance(data, pd.DataFrame): - vf = mo_data.csv(data) - return vf.url - - # Handle polars - if DependencyManager.has_polars(): - import polars as pl - - if isinstance(data, pl.DataFrame): - vf = mo_data.csv(data) - return vf.url - - # Assert that data is a list - if not isinstance(data, (list, tuple)): - raise ValueError("data must be a list or tuple.") - - # Handle empty data - if len(data) == 0: - return [] - - # Handle single-column data - if not isinstance(data[0], dict) and isinstance( - data[0], (str, int, float, bool, type(None)) - ): - # we're going to assume that data has the right shape, after - # having checked just the first entry - casted = cast(List[Union[str, int, float, bool, MIME, None]], data) - return [{"value": datum} for datum in casted] - elif not isinstance(data[0], dict): - raise ValueError( - "data must be a sequence of JSON-serializable types, or a " - "sequence of dicts." - ) - - # Sequence of dicts - return data diff --git a/marimo/_plugins/ui/_impl/tables/default_table.py b/marimo/_plugins/ui/_impl/tables/default_table.py new file mode 100644 index 00000000000..f6b23db8e8f --- /dev/null +++ b/marimo/_plugins/ui/_impl/tables/default_table.py @@ -0,0 +1,118 @@ +# Copyright 2024 Marimo. All rights reserved. +from __future__ import annotations + +from typing import ( + Any, + Dict, + List, + Sequence, + Union, + cast, +) + +from marimo._dependencies.dependencies import DependencyManager +from marimo._output.mime import MIME +from marimo._plugins.core.web_component import JSONType +from marimo._plugins.ui._impl.tables.pandas_table import ( + PandasTableManagerFactory, +) +from marimo._plugins.ui._impl.tables.polars_table import ( + PolarsTableManagerFactory, +) +from marimo._plugins.ui._impl.tables.table_manager import TableManager + +JsonTableData = Union[ + Sequence[Union[str, int, float, bool, MIME, None]], + Sequence[JSONType], + List[JSONType], + Dict[str, Sequence[Union[str, int, float, bool, MIME, None]]], +] + + +class DefaultTableManager(TableManager[JsonTableData]): + def __init__(self, data: JsonTableData): + self.data = data + + def to_data(self) -> JSONType: + return self._normalize_data(self.data) + + def to_csv(self) -> bytes: + return self._as_table_manager().to_csv() + + def to_json(self) -> bytes: + return self._as_table_manager().to_json() + + def select_rows(self, indices: List[int]) -> DefaultTableManager: + # Column major data + if isinstance(self.data, dict): + new_data: Dict[Any, Any] = { + key: [value[i] for i in indices] + for key, value in self.data.items() + } + return DefaultTableManager(new_data) + # Row major data + return DefaultTableManager([self.data[i] for i in indices]) + + def get_row_headers(self) -> list[tuple[str, list[str | int | float]]]: + return [] + + def _as_table_manager(self) -> TableManager[Any]: + if DependencyManager.has_pandas(): + import pandas as pd + + return PandasTableManagerFactory.create()(pd.DataFrame(self.data)) + if DependencyManager.has_polars(): + import polars as pl + + return PolarsTableManagerFactory.create()(pl.DataFrame(self.data)) + + raise ValueError("No supported table libraries found.") + + @staticmethod + def is_type(value: Any) -> bool: + return isinstance(value, (list, tuple, dict)) + + @staticmethod + def _normalize_data(data: JsonTableData) -> list[dict[str, Any]]: + # If it is a dict of lists (column major), + # convert to list of dicts (row major) + if isinstance(data, dict) and all( + isinstance(value, (list, tuple)) for value in data.values() + ): + # reshape column major + # { "col1": [1, 2, 3], "col2": [4, 5, 6], ... } + # into row major + # [ {"col1": 1, "col2": 4}, {"col1": 2, "col2": 5 }, ...] + column_values = data.values() + column_names = list(data.keys()) + return [ + {key: value for key, value in zip(column_names, row_values)} + for row_values in zip(*column_values) + ] + + # Assert that data is a list + if not isinstance(data, (list, tuple)): + raise ValueError( + "data must be a list or tuple or a dict of lists." + ) + + # Handle empty data + if len(data) == 0: + return [] + + # Handle single-column data + if not isinstance(data[0], dict) and isinstance( + data[0], (str, int, float, bool, type(None)) + ): + # we're going to assume that data has the right shape, after + # having checked just the first entry + casted = cast(List[Union[str, int, float, bool, MIME, None]], data) + return [{"value": datum} for datum in casted] + elif not isinstance(data[0], dict): + raise ValueError( + "data must be a sequence of JSON-serializable types, or a " + "sequence of dicts." + ) + + # Sequence of dicts + return cast(List[Dict[str, Any]], data) diff --git a/marimo/_plugins/ui/_impl/tables/pandas_table.py b/marimo/_plugins/ui/_impl/tables/pandas_table.py new file mode 100644 index 00000000000..3c5f13493dd --- /dev/null +++ b/marimo/_plugins/ui/_impl/tables/pandas_table.py @@ -0,0 +1,80 @@ +# Copyright 2024 Marimo. All rights reserved. +from __future__ import annotations + +from typing import Any + +from marimo._plugins.ui._impl.tables.table_manager import ( + TableManager, + TableManagerFactory, +) + + +class PandasTableManagerFactory(TableManagerFactory): + @staticmethod + def package_name() -> str: + return "pandas" + + @staticmethod + def create() -> type[TableManager[Any]]: + import pandas as pd + + class PandasTableManager(TableManager[pd.DataFrame]): + def to_csv(self) -> bytes: + return self.data.to_csv( + index=False, + ).encode("utf-8") + + def to_json(self) -> bytes: + return self.data.to_json(orient="records").encode("utf-8") + + def select_rows( + self, indices: list[int] + ) -> TableManager[pd.DataFrame]: + return PandasTableManager(self.data.iloc[indices]) + + def get_row_headers( + self, + ) -> list[tuple[str, list[str | int | float]]]: + return PandasTableManager._get_row_headers_for_index( + self.data.index + ) + + @staticmethod + def is_type(value: Any) -> bool: + return isinstance(value, pd.DataFrame) + + @staticmethod + def _get_row_headers_for_index( + index: pd.Index[Any], + ) -> list[tuple[str, list[str | int | float]]]: + if isinstance(index, pd.RangeIndex): + return [] + + if isinstance(index, pd.MultiIndex): + # recurse + headers: list[Any] = [] + for i in range(index.nlevels): + headers.extend( + PandasTableManager._get_row_headers_for_index( + index.get_level_values(i) + ) + ) + return headers + + # we only care about the index if it has a name + # or if it is type 'object' + # otherwise, it may look like meaningless number + if isinstance(index, pd.Index): + dtype = str(index.dtype) + if ( + index.name + or dtype == "object" + or dtype == "string" + or dtype == "category" + ): + name = str(index.name) if index.name else "" + return [(name, index.tolist())] # type: ignore[list-item] + + return [] + + return PandasTableManager diff --git a/marimo/_plugins/ui/_impl/tables/polars_table.py b/marimo/_plugins/ui/_impl/tables/polars_table.py new file mode 100644 index 00000000000..4426d5d296c --- /dev/null +++ b/marimo/_plugins/ui/_impl/tables/polars_table.py @@ -0,0 +1,42 @@ +# Copyright 2024 Marimo. All rights reserved. +from __future__ import annotations + +from typing import Any + +from marimo._plugins.ui._impl.tables.table_manager import ( + TableManager, + TableManagerFactory, +) + + +class PolarsTableManagerFactory(TableManagerFactory): + @staticmethod + def package_name() -> str: + return "polars" + + @staticmethod + def create() -> type[TableManager[Any]]: + import polars as pl + + class PolarsTableManager(TableManager[pl.DataFrame]): + def to_csv(self) -> bytes: + return self.data.write_csv().encode("utf-8") + + def to_json(self) -> bytes: + return self.data.write_json(row_oriented=True).encode("utf-8") + + def select_rows( + self, indices: list[int] + ) -> TableManager[pl.DataFrame]: + return PolarsTableManager(self.data[indices]) + + def get_row_headers( + self, + ) -> list[tuple[str, list[str | int | float]]]: + return [] + + @staticmethod + def is_type(value: Any) -> bool: + return isinstance(value, pl.DataFrame) + + return PolarsTableManager diff --git a/marimo/_plugins/ui/_impl/tables/pyarrow_table.py b/marimo/_plugins/ui/_impl/tables/pyarrow_table.py new file mode 100644 index 00000000000..cd47fce3fdd --- /dev/null +++ b/marimo/_plugins/ui/_impl/tables/pyarrow_table.py @@ -0,0 +1,60 @@ +# Copyright 2024 Marimo. All rights reserved. +from __future__ import annotations + +import io +from typing import Any, Union + +from marimo._plugins.ui._impl.tables.table_manager import ( + TableManager, + TableManagerFactory, +) + + +class PyArrowTableManagerFactory(TableManagerFactory): + @staticmethod + def package_name() -> str: + return "pyarrow" + + @staticmethod + def create() -> type[TableManager[Any]]: + import pyarrow as pa # type: ignore + + class PyArrowTableManager( + TableManager[Union[pa.Table, pa.RecordBatch]] + ): + def to_csv(self) -> bytes: + import pyarrow.csv as csv # type: ignore + + buffer = io.BytesIO() + csv.write_csv(self.data, buffer) + return buffer.getvalue() + + def to_json(self) -> bytes: + # Arrow does not have a built-in JSON writer + return ( + self.data.to_pandas() + .to_json(orient="records") + .encode("utf-8") + ) + + def select_rows(self, indices: list[int]) -> PyArrowTableManager: + if not indices: + return PyArrowTableManager( + pa.Table.from_pylist([], schema=self.data.schema) + ) + return PyArrowTableManager(self.data.take(indices)) + + def get_row_headers( + self, + ) -> list[tuple[str, list[str | int | float]]]: + return [] + + @staticmethod + def is_type(value: Any) -> bool: + import pyarrow as pa # type: ignore + + return isinstance(value, pa.Table) or isinstance( + value, pa.RecordBatch + ) + + return PyArrowTableManager diff --git a/marimo/_plugins/ui/_impl/tables/table_manager.py b/marimo/_plugins/ui/_impl/tables/table_manager.py new file mode 100644 index 00000000000..79721ff47f1 --- /dev/null +++ b/marimo/_plugins/ui/_impl/tables/table_manager.py @@ -0,0 +1,57 @@ +# Copyright 2024 Marimo. All rights reserved. +from __future__ import annotations + +import abc +from typing import Any, Generic, TypeVar + +import marimo._output.data.data as mo_data +from marimo._plugins.core.web_component import JSONType + +T = TypeVar("T") + + +class TableManager(abc.ABC, Generic[T]): + def __init__(self, data: T) -> None: + self.data = data + + def to_data(self) -> JSONType: + """ + The best way to represent the data in a table as JSON. + + By default, this method calls `to_csv` and returns the result as + a string. + """ + return mo_data.csv(self.to_csv()).url + + @abc.abstractmethod + def to_csv(self) -> bytes: + raise NotImplementedError + + @abc.abstractmethod + def to_json(self) -> bytes: + raise NotImplementedError + + @abc.abstractmethod + def select_rows(self, indices: list[int]) -> TableManager[T]: + raise NotImplementedError + + @abc.abstractmethod + def get_row_headers(self) -> list[tuple[str, list[str | int | float]]]: + raise NotImplementedError + + @staticmethod + @abc.abstractmethod + def is_type(value: Any) -> bool: + raise NotImplementedError + + +class TableManagerFactory(abc.ABC): + @staticmethod + @abc.abstractmethod + def package_name() -> str: + raise NotImplementedError + + @staticmethod + @abc.abstractmethod + def create() -> type[TableManager[Any]]: + raise NotImplementedError diff --git a/marimo/_plugins/ui/_impl/tables/utils.py b/marimo/_plugins/ui/_impl/tables/utils.py new file mode 100644 index 00000000000..95e25702037 --- /dev/null +++ b/marimo/_plugins/ui/_impl/tables/utils.py @@ -0,0 +1,36 @@ +# Copyright 2024 Marimo. All rights reserved. +from __future__ import annotations + +from typing import Any, List + +from marimo._dependencies.dependencies import DependencyManager +from marimo._plugins.ui._impl.tables.default_table import DefaultTableManager +from marimo._plugins.ui._impl.tables.pandas_table import ( + PandasTableManagerFactory, +) +from marimo._plugins.ui._impl.tables.polars_table import ( + PolarsTableManagerFactory, +) +from marimo._plugins.ui._impl.tables.pyarrow_table import ( + PyArrowTableManagerFactory, +) +from marimo._plugins.ui._impl.tables.table_manager import ( + TableManager, + TableManagerFactory, +) + +MANAGERS: List[TableManagerFactory] = [ + PandasTableManagerFactory(), + PolarsTableManagerFactory(), + PyArrowTableManagerFactory(), +] + + +def get_table_manager(data: Any) -> TableManager[Any]: + for manager_factory in MANAGERS: + if DependencyManager.has(manager_factory.package_name()): + manager = manager_factory.create() + if manager.is_type(data): + return manager(data) + + return DefaultTableManager(data) diff --git a/marimo/_plugins/ui/_impl/utils/dataframe.py b/marimo/_plugins/ui/_impl/utils/dataframe.py index ebb6a676d45..bd34074b03b 100644 --- a/marimo/_plugins/ui/_impl/utils/dataframe.py +++ b/marimo/_plugins/ui/_impl/utils/dataframe.py @@ -3,77 +3,35 @@ from typing import ( TYPE_CHECKING, - Any, + Dict, List, - Sequence, + Tuple, + TypeVar, Union, ) from marimo import _loggers -from marimo._dependencies.dependencies import DependencyManager from marimo._output.mime import MIME from marimo._plugins.core.web_component import JSONType if TYPE_CHECKING: import pandas as pd import polars as pl + import pyarrow as pa # type: ignore LOGGER = _loggers.marimo_logger() +T = TypeVar("T") Numeric = Union[int, float] +ListOrTuple = Union[List[T], Tuple[T, ...]] + TableData = Union[ - Sequence[Union[str, int, float, bool, MIME, None]], - Sequence[JSONType], List[JSONType], + ListOrTuple[Union[str, int, float, bool, MIME, None]], + ListOrTuple[Dict[str, JSONType]], + Dict[str, ListOrTuple[JSONType]], "pd.DataFrame", "pl.DataFrame", + "pa.Table", ] - - -def get_row_headers( - data: TableData, -) -> List[tuple[str, List[str | int | float]]]: - if not DependencyManager.has_pandas(): - return [] - - import pandas as pd - - if isinstance(data, pd.DataFrame): - return _get_row_headers_for_index(data.index) - - return [] - - -def _get_row_headers_for_index( - index: pd.Index[Any], -) -> List[tuple[str, List[str | int | float]]]: - import pandas as pd - - if isinstance(index, pd.RangeIndex): - return [] - - if isinstance(index, pd.MultiIndex): - # recurse - headers = [] - for i in range(index.nlevels): - headers.extend( - _get_row_headers_for_index(index.get_level_values(i)) - ) - return headers - - # we only care about the index if it has a name - # or if it is type 'object' - # otherwise, it may look like meaningless number - if isinstance(index, pd.Index): - dtype = str(index.dtype) - if ( - index.name - or dtype == "object" - or dtype == "string" - or dtype == "category" - ): - name = str(index.name) if index.name else "" - return [(name, index.tolist())] # type: ignore[list-item] - - return [] diff --git a/marimo/_smoke_tests/dataframe.py b/marimo/_smoke_tests/dataframe.py index cd0cf79fa32..c9285a39d38 100644 --- a/marimo/_smoke_tests/dataframe.py +++ b/marimo/_smoke_tests/dataframe.py @@ -1,10 +1,78 @@ # Copyright 2024 Marimo. All rights reserved. + import marimo -__generated_with = "0.1.31" +__generated_with = "0.3.10" app = marimo.App(width="full") +@app.cell +def __(mo): + mo.md("# 🤖 Lists/Dicts") + return + + +@app.cell +def __(mo): + _data = [ + {"Name": "John", "Age": 30, "City": "New York"}, + {"Name": "Alice", "Age": 24, "City": "San Francisco"}, + ] + as_list = mo.ui.table(_data) + as_list + return as_list, + + +@app.cell +def __(as_list): + as_list.value + return + + +@app.cell +def __(mo): + _data = { + "Name": ["John", "Alice"], + "Age": [30, 24], + "City": ["New York", "San Francisco"], + } + as_dict = mo.ui.table(_data) + as_dict + return as_dict, + + +@app.cell +def __(as_dict): + as_dict.value + return + + +@app.cell +def __(mo): + _data = [1, 2, "hello", False] + as_primitives = mo.ui.table(_data) + as_primitives + return as_primitives, + + +@app.cell +def __(as_primitives): + as_primitives.value + return + + +@app.cell +def __(mo): + mo.md("# 🐼 Pandas") + return + + +@app.cell +def __(mo): + mo.md("## mo.ui.dataframe") + return + + @app.cell def __(cars, mo): dataframe = mo.ui.dataframe(cars) @@ -12,12 +80,24 @@ def __(cars, mo): return dataframe, +@app.cell +def __(mo): + mo.md("## mo.ui.table") + return + + @app.cell def __(dataframe, mo): mo.ui.table(dataframe.value, selection=None) return +@app.cell +def __(mo): + mo.md("## .value") + return + + @app.cell def __(dataframe): dataframe.value @@ -31,24 +111,96 @@ def __(dataframe): @app.cell -def __(dataframe, mo): - mo.hstack([dataframe.value.to_dict("records"), dataframe.value.to_dict()]) +def __(mo): + mo.md("## mo.ui.data_explorer") return @app.cell -def __(): - import marimo as mo - return mo, +def __(mo, pl_dataframe): + mo.ui.data_explorer(pl_dataframe) + return + + +@app.cell +def __(mo): + mo.md("# 🐻‍❄️ Polars") + return + + +@app.cell +def __(mo): + mo.md("## mo.ui.table") + return + + +@app.cell +def __(cars, mo, pl): + pl_dataframe = pl.DataFrame(cars) + mo.ui.table(pl_dataframe, selection=None) + return pl_dataframe, + + +@app.cell +def __(mo): + mo.md("## mo.ui.data_explorer") + return + + +@app.cell +def __(mo, pl_dataframe): + mo.ui.data_explorer(pl_dataframe) + return + + +@app.cell +def __(mo): + mo.md("# 🏹 Arrow") + return + + +@app.cell +def __(cars, mo, pa): + arrow_table = pa.Table.from_pandas(cars) + mo.accordion({"Details": mo.plain_text(arrow_table)}) + return arrow_table, + + +@app.cell +def __(mo): + mo.md("## mo.ui.table") + return + + +@app.cell +def __(arrow_table, mo): + arrow_table_el = mo.ui.table(arrow_table) + arrow_table_el + return arrow_table_el, + + +@app.cell +def __(mo): + mo.md("## .value") + return + + +@app.cell +def __(arrow_table_el): + arrow_table_el.value + return @app.cell def __(): + import marimo as mo import pandas as pd + import polars as pl + import pyarrow as pa import vega_datasets cars = vega_datasets.data.cars() - return cars, pd, vega_datasets + return cars, mo, pa, pd, pl, vega_datasets if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index 8ca5bd40f1c..14c90120ee8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,9 +70,11 @@ dev = [ "build~=0.10.0", # for server testing "httpx~=0.26.0", - # For testing mo.ui.chart + # For testing mo.ui.chart, dataframes, tables "pandas>=1.3.0", "pandas-stubs>=1.3.0", + "pyarrow>=15.0.2,<16", + "pyarrow-stubs>=10", # For testing mo.image "pillow~=10.2.0", "types-Pillow~=10.2.0.20240311", @@ -108,10 +110,12 @@ testcore = [ ] testoptional = [ - # For testing mo.ui.chart + # For testing mo.ui.chart, table, ... "altair>=5.0.0", "pandas>=1.3.0", "pandas-stubs>=1.3.0", + "pyarrow>=15.0.2,<16", + "pyarrow-stubs>=10", "pillow~=10.2.0", "types-Pillow~=10.2.0.20240311", # polars 0.19.13 requires building maturn from source, but we don't diff --git a/tests/_plugins/ui/_impl/tables/test_default_table.py b/tests/_plugins/ui/_impl/tables/test_default_table.py new file mode 100644 index 00000000000..13cda3c38d6 --- /dev/null +++ b/tests/_plugins/ui/_impl/tables/test_default_table.py @@ -0,0 +1,71 @@ +import unittest +from typing import Any, Dict + +from marimo._plugins.ui._impl.tables.default_table import DefaultTableManager + + +class TestDefaultTable(unittest.TestCase): + def setUp(self) -> None: + self.data = [ + {"A": 1, "B": "a"}, + {"A": 2, "B": "b"}, + {"A": 3, "B": "c"}, + ] + self.manager = DefaultTableManager(self.data) + + def test_select_rows(self) -> None: + indices = [0, 2] + selected_manager = self.manager.select_rows(indices) + expected_data = [ + {"A": 1, "B": "a"}, + {"A": 3, "B": "c"}, + ] + self.assertEqual(selected_manager.data, expected_data) + + def test_select_rows_empty(self) -> None: + selected_manager = self.manager.select_rows([]) + self.assertEqual(selected_manager.data, []) + + def test_get_row_headers(self) -> None: + expected_headers = [] + self.assertEqual(self.manager.get_row_headers(), expected_headers) + + def test_is_type(self) -> None: + self.assertTrue(self.manager.is_type(self.data)) + self.assertFalse(self.manager.is_type("not a dataframe")) + + +class TestColumnarDefaultTable(unittest.TestCase): + def setUp(self) -> None: + self.data: Dict[str, Any] = { + "A": [1, 2, 3], + "B": ["a", "b", "c"], + } + self.manager = DefaultTableManager(self.data) + + def test_select_rows(self) -> None: + indices = [0, 2] + selected_manager = self.manager.select_rows(indices) + expected_data = { + "A": [1, 3], + "B": ["a", "c"], + } + self.assertEqual(selected_manager.data, expected_data) + + def test_select_rows_empty(self) -> None: + selected_manager = self.manager.select_rows([]) + self.assertEqual( + selected_manager.data, + { + "A": [], + "B": [], + }, + ) + + def test_get_row_headers(self) -> None: + expected_headers = [] + self.assertEqual(self.manager.get_row_headers(), expected_headers) + + def test_is_type(self) -> None: + self.assertTrue(self.manager.is_type(self.data)) + self.assertFalse(self.manager.is_type("not a dataframe")) diff --git a/tests/_plugins/ui/_impl/tables/test_pandas_table.py b/tests/_plugins/ui/_impl/tables/test_pandas_table.py new file mode 100644 index 00000000000..11c4f545f43 --- /dev/null +++ b/tests/_plugins/ui/_impl/tables/test_pandas_table.py @@ -0,0 +1,51 @@ +import unittest + +import pytest + +from marimo._dependencies.dependencies import DependencyManager +from marimo._plugins.ui._impl.tables.pandas_table import ( + PandasTableManagerFactory, +) + +HAS_DEPS = DependencyManager.has_pandas() + + +@pytest.mark.skipif(not HAS_DEPS, reason="optional dependencies not installed") +class TestPandasTableManager(unittest.TestCase): + def setUp(self) -> None: + import pandas as pd + + self.factory = PandasTableManagerFactory() + self.data = pd.DataFrame({"A": [1, 2, 3], "B": ["a", "b", "c"]}) + self.manager = self.factory.create()(self.data) + + def test_package_name(self) -> None: + self.assertEqual(self.factory.package_name(), "pandas") + + def test_to_csv(self) -> None: + expected_csv = self.data.to_csv(index=False).encode("utf-8") + self.assertEqual(self.manager.to_csv(), expected_csv) + + def test_to_json(self) -> None: + expected_json = self.data.to_json(orient="records").encode("utf-8") + self.assertEqual(self.manager.to_json(), expected_json) + + def test_select_rows(self) -> None: + import pandas as pd + + indices = [0, 2] + selected_manager = self.manager.select_rows(indices) + expected_data = self.data.iloc[indices] + pd.testing.assert_frame_equal(selected_manager.data, expected_data) + + def test_select_rows_empty(self) -> None: + selected_manager = self.manager.select_rows([]) + self.assertEqual(selected_manager.data.shape, (0, 2)) + + def test_get_row_headers(self) -> None: + expected_headers = [] + self.assertEqual(self.manager.get_row_headers(), expected_headers) + + def test_is_type(self) -> None: + self.assertTrue(self.manager.is_type(self.data)) + self.assertFalse(self.manager.is_type("not a dataframe")) diff --git a/tests/_plugins/ui/_impl/tables/test_polars_table.py b/tests/_plugins/ui/_impl/tables/test_polars_table.py new file mode 100644 index 00000000000..94fdb34f0fd --- /dev/null +++ b/tests/_plugins/ui/_impl/tables/test_polars_table.py @@ -0,0 +1,48 @@ +import unittest + +import pytest + +from marimo._dependencies.dependencies import DependencyManager +from marimo._plugins.ui._impl.tables.polars_table import ( + PolarsTableManagerFactory, +) + +HAS_DEPS = DependencyManager.has_polars() + + +@pytest.mark.skipif(not HAS_DEPS, reason="optional dependencies not installed") +class TestPolarsTableManagerFactory(unittest.TestCase): + def setUp(self) -> None: + import polars as pl + + self.factory = PolarsTableManagerFactory() + self.data = pl.DataFrame({"A": [1, 2, 3], "B": ["a", "b", "c"]}) + self.manager = self.factory.create()(self.data) + + def test_package_name(self) -> None: + self.assertEqual(self.factory.package_name(), "polars") + + def test_to_csv(self) -> None: + self.assertIsInstance(self.manager.to_csv(), bytes) + + def test_to_json(self) -> None: + self.assertIsInstance(self.manager.to_json(), bytes) + + def test_select_rows(self) -> None: + indices = [0, 2] + selected_manager = self.manager.select_rows(indices) + expected_data = self.data[indices] + assert selected_manager.data.frame_equal(expected_data) + + def test_select_rows_empty(self) -> None: + selected_manager = self.manager.select_rows([]) + assert selected_manager.data.shape == (0, 0) + assert selected_manager.data.columns == [] + + def test_get_row_headers(self) -> None: + expected_headers = [] + self.assertEqual(self.manager.get_row_headers(), expected_headers) + + def test_is_type(self) -> None: + self.assertTrue(self.manager.is_type(self.data)) + self.assertFalse(self.manager.is_type("not a dataframe")) diff --git a/tests/_plugins/ui/_impl/tables/test_pyarrow.py b/tests/_plugins/ui/_impl/tables/test_pyarrow.py new file mode 100644 index 00000000000..7af6d0f7387 --- /dev/null +++ b/tests/_plugins/ui/_impl/tables/test_pyarrow.py @@ -0,0 +1,47 @@ +import unittest + +import pytest + +from marimo._dependencies.dependencies import DependencyManager +from marimo._plugins.ui._impl.tables.pyarrow_table import ( + PyArrowTableManagerFactory, +) + +HAS_DEPS = DependencyManager.has_pyarrow() + + +@pytest.mark.skipif(not HAS_DEPS, reason="optional dependencies not installed") +class TestPyArrowTableManagerFactory(unittest.TestCase): + def setUp(self) -> None: + import pyarrow as pa + + self.factory = PyArrowTableManagerFactory() + self.data = pa.table({"A": [1, 2, 3], "B": ["a", "b", "c"]}) + self.manager = self.factory.create()(self.data) + + def test_package_name(self) -> None: + self.assertEqual(self.factory.package_name(), "pyarrow") + + def test_to_csv(self) -> None: + self.assertIsInstance(self.manager.to_csv(), bytes) + + def test_to_json(self) -> None: + self.assertIsInstance(self.manager.to_json(), bytes) + + def test_select_rows(self) -> None: + indices = [0, 2] + selected_manager = self.manager.select_rows(indices) + expected_data = self.data.take(indices) + assert selected_manager.data == expected_data + + def test_select_rows_empty(self) -> None: + selected_manager = self.manager.select_rows([]) + assert selected_manager.data.num_rows == 0 + + def test_get_row_headers(self) -> None: + expected_headers = [] + self.assertEqual(self.manager.get_row_headers(), expected_headers) + + def test_is_type(self) -> None: + self.assertTrue(self.manager.is_type(self.data)) + self.assertFalse(self.manager.is_type("not a dataframe")) diff --git a/tests/_plugins/ui/_impl/test_table.py b/tests/_plugins/ui/_impl/test_table.py index 533dbbbed5c..0a0855af218 100644 --- a/tests/_plugins/ui/_impl/test_table.py +++ b/tests/_plugins/ui/_impl/test_table.py @@ -3,16 +3,15 @@ from typing import Any -import pytest - -from marimo._dependencies.dependencies import DependencyManager -from marimo._plugins.ui._impl.table import ( - _normalize_data, -) +from marimo._plugins.ui._impl.tables.default_table import DefaultTableManager from marimo._plugins.ui._impl.utils.dataframe import TableData from marimo._runtime.runtime import Kernel +def _normalize_data(data: Any) -> list[dict[str, Any]]: + return DefaultTableManager._normalize_data(data) + + def test_normalize_data(executing_kernel: Kernel) -> None: # unused, except for the side effect of giving the kernel an execution # context @@ -52,6 +51,24 @@ def test_normalize_data(executing_kernel: Kernel) -> None: {"key3": "value3"}, ] + # Dictionary with list of integers + data = {"key": [1, 2, 3]} + result = _normalize_data(data) + assert result == [ + {"key": 1}, + {"key": 2}, + {"key": 3}, + ] + + # Dictionary with tuple of integers + data = {"key": (1, 2, 3)} + result = _normalize_data(data) + assert result == [ + {"key": 1}, + {"key": 2}, + {"key": 3}, + ] + # Test with empty list data = [] result = _normalize_data(data) @@ -62,7 +79,7 @@ def test_normalize_data(executing_kernel: Kernel) -> None: try: _normalize_data(data2) except ValueError as e: - assert str(e) == "data must be a list or tuple." + assert str(e) == "data must be a list or tuple or a dict of lists." # Test with invalid data structure data3: Any = [set([1, 2, 3])] @@ -74,21 +91,3 @@ def test_normalize_data(executing_kernel: Kernel) -> None: == "data must be a sequence of JSON-serializable types, or a " + "sequence of dicts." ) - - -HAS_DEPS = DependencyManager.has_pandas() - - -@pytest.mark.skipif(not HAS_DEPS, reason="optional dependencies not installed") -def test_normalize_data_pandas(executing_kernel: Kernel) -> None: - # unused, except for the side effect of giving the kernel an execution - # context - del executing_kernel - - # Test with pandas DataFrame - import pandas as pd - - data = pd.DataFrame({"column1": [1, 2, 3], "column2": ["a", "b", "c"]}) - result = _normalize_data(data) - assert isinstance(result, str) - assert result.endswith(".csv") diff --git a/tests/_plugins/ui/_impl/utils/test_dataframe.py b/tests/_plugins/ui/_impl/utils/test_dataframe.py index 1afc23b2c1a..4b5fe56c8bb 100644 --- a/tests/_plugins/ui/_impl/utils/test_dataframe.py +++ b/tests/_plugins/ui/_impl/utils/test_dataframe.py @@ -1,18 +1,27 @@ # Copyright 2024 Marimo. All rights reserved. from __future__ import annotations -from typing import List +from typing import Any, List import pytest from marimo._dependencies.dependencies import DependencyManager -from marimo._plugins.ui._impl.utils.dataframe import get_row_headers +from marimo._plugins.ui._impl.tables.utils import get_table_manager -HAS_DEPS = DependencyManager.has_pandas() +HAS_PANDAS = DependencyManager.has_pandas() -@pytest.mark.skipif(not HAS_DEPS, reason="optional dependencies not installed") -def test_get_row_headers() -> None: +def _get_row_headers( + data: Any, +) -> list[tuple[str, list[str | int | float]]]: + manager = get_table_manager(data) + return manager.get_row_headers() + + +@pytest.mark.skipif( + not HAS_PANDAS, reason="optional dependencies not installed" +) +def test_get_row_headers_pandas() -> None: import pandas as pd expected: List[tuple[str, List[str]]] @@ -20,10 +29,7 @@ def test_get_row_headers() -> None: # Test with pandas DataFrame df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) df.index.name = "Index" - assert get_row_headers(df) == [] - - # Test with non-DataFrame input - assert get_row_headers([1, 2, 3]) == [] + assert _get_row_headers(df) == [] # Test with MultiIndex arrays = [ @@ -35,20 +41,25 @@ def test_get_row_headers() -> None: ("", ["foo", "bar", "baz"]), ("", ["one", "two", "three"]), ] - assert get_row_headers(df_multi) == expected + assert _get_row_headers(df_multi) == expected # Test with RangeIndex df_range = pd.DataFrame({"A": range(3)}) - assert get_row_headers(df_range) == [] + assert _get_row_headers(df_range) == [] # Test with categorical Index df_cat = pd.DataFrame({"A": range(3)}) df_cat.index = pd.CategoricalIndex(["a", "b", "c"]) expected = [("", ["a", "b", "c"])] - assert get_row_headers(df_cat) == expected + assert _get_row_headers(df_cat) == expected # Test with named categorical Index df_cat = pd.DataFrame({"A": range(3)}) df_cat.index = pd.CategoricalIndex(["a", "b", "c"], name="Colors") expected = [("Colors", ["a", "b", "c"])] - assert get_row_headers(df_cat) == expected + assert _get_row_headers(df_cat) == expected + + +def test_get_row_headers_list() -> None: + # Test with non-DataFrame input + assert _get_row_headers([1, 2, 3]) == []