Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
sunggg committed Jan 9, 2024
1 parent 58a1720 commit 2149d5d
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 7 deletions.
2 changes: 1 addition & 1 deletion serve/benchmarks/benchmark_latency.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Benchmark offline user metric."""
"""Benchmark latency offline."""
import argparse
import time, numpy as np
from mlc_serve.engine import (
Expand Down
11 changes: 5 additions & 6 deletions serve/mlc_serve/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,24 +56,23 @@ def create_mlc_engine(args: argparse.Namespace):
"max_decode_steps": args.max_decode_steps,
}
)
# type: off

if args.use_staging_engine:
engine = StagingInferenceEngine(
engine = StagingInferenceEngine( # type: ignore
tokenizer_module=HfTokenizerModule(args.model_artifact_path),
model_module_loader=PagedCacheModelModule,
model_module_loader=PagedCacheModelModule, # type: ignore
model_module_loader_kwargs={
"model_artifact_path": args.model_artifact_path,
"engine_config": engine_config,
},
)
engine.start()
else:
engine = SynchronousInferenceEngine(
PagedCacheModelModule(
engine = SynchronousInferenceEngine( # type: ignore
PagedCacheModelModule( # type: ignore
model_artifact_path=args.model_artifact_path,
engine_config=engine_config,
)
)
# type: on
return engine

0 comments on commit 2149d5d

Please sign in to comment.