diff --git a/.github/workflows/publish-test.yml b/.github/workflows/publish-test.yml
new file mode 100644
index 0000000..95d6ae5
--- /dev/null
+++ b/.github/workflows/publish-test.yml
@@ -0,0 +1,105 @@
+# This workflow will upload a Python Package to Release asset
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: Publish to Test PyPI
+
+on:
+  push:
+    branches:
+      - main
+
+# Needed to create release and upload assets
+permissions:
+  contents: write
+
+
+jobs:
+  setup-version:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Generate version number
+      run: |
+        VERSION_HASH=$(date +"%Y%m%d%H%M%S")
+        echo "Generated version hash: $VERSION_HASH"
+        echo $VERSION_HASH > version.txt
+    
+    - name: Upload version number as artifact
+      uses: actions/upload-artifact@v2
+      with:
+        name: version
+        path: version.txt
+
+  wheel:
+    name: Build Wheel
+    runs-on: ${{ matrix.os }}
+    permissions: write-all
+
+    strategy:
+      fail-fast: false
+      matrix:
+          os: ['ubuntu-20.04']
+          python-version: ['3.8', '3.9', '3.10', '3.11']
+          cuda-version: ['11.7']
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+    #   - name: Set up Linux Env
+    #     if: ${{ runner.os == 'Linux' }}
+    #     run: |
+    #       bash -x .github/workflows/scripts/env.sh
+
+    # https://github.com/orgs/community/discussions/26313
+      - name: Download version value artifact
+        uses: actions/download-artifact@v2
+        with:
+            name: version
+            path: artifact
+
+      - name: Free disk space
+        run: |
+          sudo rm -rf /usr/local/cuda-* /opt/cuda
+          sudo rm -rf /usr/local/cuda
+          bash -x .github/workflows/scripts/free-disk-space.sh
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+            python-version: ${{ matrix.python-version }}
+
+      - name: Install CUDA ${{ matrix.cuda-version }}
+        run: |
+          bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ${{ matrix.os }}
+
+      - name: Build wheel
+        shell: bash
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install build
+          VERSION_HASH=$(cat artifact/version.txt)
+          MOEINF_VERSION=0.0.1dev${VERSION_HASH} BUILD_OPS=1 python -m build --wheel
+          wheel_name=$(ls dist/*whl | xargs -n 1 basename)
+          asset_name=${wheel_name//"linux"/"manylinux1"}
+          echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
+          echo "asset_name=${asset_name}" >> $GITHUB_ENV
+      
+
+      # only build source when the python version is 3.8
+      - name: Build Source
+        if: ${{ matrix.python-version == '3.8' }}
+        run: |
+          VERSION_HASH=$(cat artifact/version.txt)
+          MOEINF_VERSION=0.0.1dev${VERSION_HASH} python -m build --sdist
+
+      - name: Rename wheel
+        run: |
+          mv dist/${{ env.wheel_name }} dist/${{ env.asset_name }}
+
+    #   (Danielkinz): This last step will publish the .whl to pypi. Warning: untested
+      - name: Publish package
+        uses: pypa/gh-action-pypi-publish@release/v1.8
+        with:
+          repository-url: https://test.pypi.org/legacy/
+          skip-existing: true
\ No newline at end of file
diff --git a/README.md b/README.md
index 21ffaa9..103b1e7 100644
--- a/README.md
+++ b/README.md
@@ -137,7 +137,7 @@ CUDA_VISIBLE_DEVICES=0,1 python script.py
 We provide a simple example to run inference on a Huggingface LLM model. The script will download the model checkpoint and run inference on the specified input text. The output will be printed to the console.
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 python example/interface_example.py --model_name_or_path "mistralai/Mixtral-8x7B-Instruct-v0.1" --offload_dir <your local path on SSD> 
+CUDA_VISIBLE_DEVICES=0 python examples/interface_example.py --model_name_or_path "mistralai/Mixtral-8x7B-Instruct-v0.1" --offload_dir <your local path on SSD> 
 ```
 
 ## Release Plan
diff --git a/core/prefetch/archer_prefetch_handle.cpp b/core/prefetch/archer_prefetch_handle.cpp
index 70fb5d3..a107c50 100644
--- a/core/prefetch/archer_prefetch_handle.cpp
+++ b/core/prefetch/archer_prefetch_handle.cpp
@@ -34,12 +34,18 @@ ArcherPrefetchHandle::ArcherPrefetchHandle(const std::string& prefix,
     ARCHER_LOG_INFO("Device count ", device_count);
 
     for (int i = 0; i < device_count; i++) {
-        cudaSetDevice(i);
         for (int j = 0; j < device_count; j++) {
-            if (i != j) { cudaDeviceEnablePeerAccess(j, 0); }
+            if (i != j) {
+                int can_access = 0;
+                cudaDeviceCanAccessPeer(&can_access, i, j);
+                if (can_access == 1) {
+                    cudaSetDevice(i);
+                    cudaDeviceEnablePeerAccess(j, 0);
+                }
+            }
         }
     }
-
+    
     ARCHER_LOG_INFO("Enabled peer access for all devices");
 }
 
diff --git a/examples/readme_example.py b/examples/readme_example.py
new file mode 100644
index 0000000..2f1129c
--- /dev/null
+++ b/examples/readme_example.py
@@ -0,0 +1,24 @@
+import torch
+import os
+from transformers import AutoTokenizer, SwitchTransformersForConditionalGeneration
+from moe_infinity import MoE
+
+user_home = os.path.expanduser('~')
+
+checkpoint = 'TheBloke/Mixtral-8x7B-v0.1-GPTQ'
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+
+config = {
+    "offload_path": os.path.join(user_home, "moe-infinity"),
+    "device_memory_ratio": 0.75, # 75% of the device memory is used for caching, change the value according to your device memory size on OOM
+}
+
+model = MoE(checkpoint, config)
+
+input_text = "translate English to German: How old are you?"
+input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda:0")
+
+output_ids = model.generate(input_ids)
+output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+
+print(output_text)
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 35242a5..936da6e 100644
--- a/setup.py
+++ b/setup.py
@@ -76,7 +76,7 @@ def read_readme() -> str:
 # install all files in the package, rather than just the egg    
 setup(
     name='moe_infinity',
-    version='0.0.1',
+    version=os.getenv('MOEINF_VERSION', '0.0.1'),
     packages=find_packages(exclude=['op_builder', 'op_builder.*', 'moe_infinity.ops.core.*']),
     package_data={
         'moe_infinity.ops.prefetch': ['**/*.so'],