Skip to content

Commit

Permalink
Add more logging to engine / model initialization (#101)
Browse files Browse the repository at this point in the history
* add more log

* more

* more
  • Loading branch information
masahi authored Dec 8, 2023
1 parent e417f99 commit 9eeedfe
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 5 deletions.
20 changes: 15 additions & 5 deletions serve/mlc_serve/engine/staging_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def __init__(
# Log state
structlog.contextvars.get_contextvars(),
json_log_output,
logging.getLevelName(logging.getLogger().level)
logging.getLevelName(logging.getLogger().level),
),
)

Expand All @@ -90,7 +90,9 @@ def start(self):
"StagingInferenceEngine worker is not ready before timeout."
)
except:
raise RuntimeError("Failed to start StagingInferenceEngine worker process.")
raise RuntimeError(
f"Failed to start StagingInferenceEngine worker process with timeout {self.init_timeout}."
)

def stop(self):
self.command_queue.put(ShutdownCommand())
Expand Down Expand Up @@ -154,9 +156,14 @@ def wait_for_request(self, timeout_seconds=None) -> bool:
return False

def step(self) -> InferenceStepResult:
log_every(self, 1000, LOG.debug, "StagingInferenceEngine.step",
log_every(
self,
1000,
LOG.debug,
"StagingInferenceEngine.step",
_is_ready_to_serve=self._is_ready_to_serve(),
has_pending_requests=self.has_pending_requests())
has_pending_requests=self.has_pending_requests(),
)

if not self._is_ready_to_serve():
raise RuntimeError("GenerationLoopWorker process is not running")
Expand All @@ -177,7 +184,10 @@ def step(self) -> InferenceStepResult:

outputs = list[RequestOutput]()
with self.requests_lock:
LOG.debug("StagingInferenceEngine.step obtained requests_lock", generation_output=generation_output)
LOG.debug(
"StagingInferenceEngine.step obtained requests_lock",
generation_output=generation_output,
)
for seq_output in generation_output.sequences:
# TODO: support multi-sequence per request
request_id = seq_output.id.request_id
Expand Down
1 change: 1 addition & 0 deletions serve/mlc_serve/engine/staging_engine_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,7 @@ def run_generation_loop_worker(

try:
model_module = model_module_loader(**model_module_loader_kwargs)
LOG.info("Model is initalized.")
worker = GenerationLoopWorker(model_module=model_module)
except:
LOG.exception("An error raised in model initialization.")
Expand Down
5 changes: 5 additions & 0 deletions serve/mlc_serve/model/paged_cache_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ def copy_to_worker_0(sess: di.Session, host_array):


def get_tvm_model(config, dev):
LOG.info(f"Loading parameters from {config.model_artifact_path}.")
lib_path = os.path.join(config.model_artifact_path, config.library_name)

if config.num_shards == 1:
Expand Down Expand Up @@ -732,6 +733,7 @@ def __init__(
)

if engine_config.max_num_batched_tokens > 0:
LOG.info("Running memory profiling.")
num_blocks = get_num_cache_blocks(
model,
[engine_config.max_input_len] * engine_config.max_num_sequences,
Expand Down Expand Up @@ -763,6 +765,9 @@ def __init__(
model.disco_session,
model_artifact_config.sliding_window,
)

LOG.info("Allocated KV cache blocks.")

self.engine_config = engine_config
self.model_artifact_config = model_artifact_config
self.text_generator = PagedCacheModelTextGenerator(model)
Expand Down

0 comments on commit 9eeedfe

Please sign in to comment.