-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add readme example & fix peer access
- Loading branch information
Your Name
committed
Apr 16, 2024
1 parent
75e468c
commit 7f97af6
Showing
3 changed files
with
28 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import torch | ||
import os | ||
from transformers import AutoTokenizer, SwitchTransformersForConditionalGeneration | ||
from moe_infinity import MoE | ||
|
||
user_home = os.path.expanduser('~') | ||
|
||
checkpoint = 'TheBloke/Mixtral-8x7B-v0.1-GPTQ' | ||
tokenizer = AutoTokenizer.from_pretrained(checkpoint) | ||
|
||
config = { | ||
"offload_path": os.path.join(user_home, "moe-infinity"), | ||
"device_memory_ratio": 0.75, # 75% of the device memory is used for caching, change the value according to your device memory size on OOM | ||
} | ||
|
||
model = MoE(checkpoint, config) | ||
|
||
input_text = "translate English to German: How old are you?" | ||
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda:0") | ||
|
||
output_ids = model.generate(input_ids) | ||
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True) | ||
|
||
print(output_text) |