Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compare more models across frame and tabular #444

Merged
merged 30 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
d13073f
add pytorch tabular benchmark
yiweny May 13, 2024
52311f8
fix script
yiweny May 13, 2024
c893563
add pytorch tabular benchmark script
yiweny May 13, 2024
2ab3a8d
comparing tab transforemr
wsad1 Sep 2, 2024
d061413
comparing tab transforemr
wsad1 Sep 2, 2024
219451e
comparing tab transforemr
wsad1 Sep 2, 2024
b91cf67
fix lint issue
wsad1 Sep 2, 2024
d9da5eb
use device correctly
wsad1 Sep 2, 2024
b553c6c
changelog update
wsad1 Sep 2, 2024
8397439
Merge branch 'master' into yyuan/add-pytorch-tabular-comparison-script
wsad1 Sep 2, 2024
974dd19
Update exp_version_manager.yml
wsad1 Sep 2, 2024
c742861
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 2, 2024
ab735b1
add fttransformer
wsad1 Sep 2, 2024
4934510
print device
wsad1 Sep 3, 2024
35f891a
correctly set gpu
wsad1 Sep 3, 2024
5c80f6b
Merge branch 'yyuan/add-pytorch-tabular-comparison-script' into tab_f…
wsad1 Sep 3, 2024
ff54765
Update benchmark/pytorch_tabular_benchmark.py
wsad1 Sep 4, 2024
26bf69a
Update benchmark/pytorch_tabular_benchmark.py
wsad1 Sep 4, 2024
b0816f1
rm .pt_tmp
akihironitta Sep 6, 2024
cfe3539
update changelog
akihironitta Sep 6, 2024
8d27720
clean up
akihironitta Sep 6, 2024
7951a88
Merge branch 'master' into yyuan/add-pytorch-tabular-comparison-script
akihironitta Sep 6, 2024
64b7314
Merge branch 'yyuan/add-pytorch-tabular-comparison-script' into tab_f…
akihironitta Sep 6, 2024
71bd20f
Merge branch 'master' into yyuan/add-pytorch-tabular-comparison-script
akihironitta Sep 6, 2024
57a3115
Merge branch 'yyuan/add-pytorch-tabular-comparison-script' into tab_f…
akihironitta Sep 6, 2024
aa6ae6f
Merge branch 'master' into tab_frame_comp_2
akihironitta Sep 6, 2024
75d63dd
update
akihironitta Sep 6, 2024
c580efc
Update pytorch_tabular_benchmark.py
wsad1 Sep 6, 2024
4d451aa
Update benchmark/pytorch_tabular_benchmark.py
wsad1 Sep 6, 2024
388aebd
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

### Added

- Added a benchmark script to compare PyTorch Frame with PyTorch Tabular ([#398](https://github.com/pyg-team/pytorch-frame/pull/398))
- Added a benchmark script to compare PyTorch Frame with PyTorch Tabular ([#398](https://github.com/pyg-team/pytorch-frame/pull/398), [#444](https://github.com/pyg-team/pytorch-frame/pull/444))
- Added `is_floating_point` method to `MultiNestedTensor` and `MultiEmbeddingTensor` ([#445](https://github.com/pyg-team/pytorch-frame/pull/445))
- Added support for inferring `stype.categorical` from boolean columns in `utils.infer_series_stype` ([#421](https://github.com/pyg-team/pytorch-frame/pull/421))

Expand Down
102 changes: 69 additions & 33 deletions benchmark/pytorch_tabular_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,14 @@
import torch.nn.functional as F
from pytorch_tabular import TabularModel
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig
from pytorch_tabular.models.common.heads import LinearHeadConfig
from pytorch_tabular.models.tab_transformer import TabTransformerConfig
from pytorch_tabular.models import FTTransformerConfig, TabTransformerConfig, LinearHeadConfig
wsad1 marked this conversation as resolved.
Show resolved Hide resolved
from sklearn.metrics import roc_auc_score
from tqdm import tqdm

from torch_frame import TaskType, stype
from torch_frame.data import DataLoader
from torch_frame.datasets import DataFrameBenchmark
from torch_frame.nn import TabTransformer
from torch_frame.nn import FTTransformer, TabTransformer

parser = argparse.ArgumentParser()
parser.add_argument('--task_type', type=str, choices=['binary_classification'],
Expand All @@ -29,7 +28,7 @@
parser.add_argument('--batch_size', type=int, default=256)
parser.add_argument('--epochs', type=int, default=1)
parser.add_argument('--model_type', type=str, default='TabTransformer',
choices=['TabTransformer'])
choices=['TabTransformer', 'FTTransformer'])
wsad1 marked this conversation as resolved.
Show resolved Hide resolved
args = parser.parse_args()

# Data, model params, device setup are the same for both models
Expand Down Expand Up @@ -72,24 +71,49 @@ def train_tabular_model() -> float:
accelerator='gpu' if device.type == 'cuda' else 'cpu',
)
optimizer_config = OptimizerConfig()
head_config = LinearHeadConfig(
layers="520-1040",
dropout=0.1,
initialization="kaiming",
use_batch_norm=True,
).__dict__ # Convert to dict to pass to the model config
model_config = TabTransformerConfig(
task="classification",
learning_rate=1e-3,
head="LinearHead", # Linear Head
input_embed_dim=channels,
num_heads=num_heads,
num_attn_blocks=num_layers,
attn_dropout=attn_dropout,
ff_dropout=ffn_dropout,
head_config=head_config, # Linear Head Config
ff_hidden_multiplier=0,
)

if args.model_type == 'TabTransformer':
head_config = LinearHeadConfig(
layers="520-1040",
dropout=0.1,
initialization="kaiming",
use_batch_norm=True,
).__dict__ # Convert to dict to pass to the model config
model_config = TabTransformerConfig(
task="classification",
learning_rate=1e-3,
head="LinearHead", # Linear Head
input_embed_dim=channels,
num_heads=num_heads,
num_attn_blocks=num_layers,
attn_dropout=attn_dropout,
ff_dropout=ffn_dropout,
head_config=head_config, # Linear Head Config
ff_hidden_multiplier=0,
)
elif args.model_type == 'FTTransformer':
head_config = LinearHeadConfig(
layers=f"{channels}-{dataset.num_classes}",
dropout=0.1,
initialization="kaiming",
use_batch_norm=True,
).__dict__ # Convert to dict to pass to the model config
model_config = FTTransformerConfig(
task="classification",
learning_rate=1e-3,
head="LinearHead", # Linear Head
input_embed_dim=channels,
# dividing by 4 to match the number of params
# in FTTransformer from torch frame
num_heads=int(num_heads / 4),
num_attn_blocks=num_layers,
attn_dropout=attn_dropout,
head_config=head_config, # Linear Head Config
ff_hidden_multiplier=0,
)
else:
raise ValueError(f"Invalid model type: {args.model_type}")

tabular_model = TabularModel(
data_config=data_config,
model_config=model_config,
Expand Down Expand Up @@ -119,17 +143,28 @@ def train_frame_model() -> float:
shuffle=True,
)
val_loader = DataLoader(val_tensor_frame, batch_size=args.batch_size)
model = TabTransformer(
channels=channels,
out_channels=dataset.num_classes,
num_layers=num_layers,
num_heads=num_heads,
encoder_pad_size=2,
attn_dropout=attn_dropout,
ffn_dropout=ffn_dropout,
col_stats=dataset.col_stats,
col_names_dict=train_tensor_frame.col_names_dict,
).to(device)
# Set up model and optimizer
if args.model_type == 'TabTransformer':
model = TabTransformer(
channels=channels,
out_channels=dataset.num_classes,
num_layers=num_layers,
num_heads=num_heads,
encoder_pad_size=2,
attn_dropout=attn_dropout,
ffn_dropout=ffn_dropout,
col_stats=dataset.col_stats,
col_names_dict=train_tensor_frame.col_names_dict,
).to(device)
elif args.model_type == 'FTTransformer':
model = FTTransformer(
channels=channels,
out_channels=dataset.num_classes,
num_layers=num_layers,
col_stats=dataset.col_stats,
col_names_dict=train_tensor_frame.col_names_dict,
).to(device)

num_params = 0
for m in model.parameters():
if m.requires_grad:
Expand Down Expand Up @@ -175,6 +210,7 @@ def test(loader: DataLoader) -> float:

frame_train_time = train_frame_model()
tabular_train_time = train_tabular_model()
print(f"Model type: {args.model_type}. Device: {device}")
print(f"Frame average time per epoch: "
f"{frame_train_time / args.epochs:.2f}s")
print(f"Tabular average time per epoch: "
Expand Down
Loading