Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Distributed layers #1270

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions mlx/distributed/distributed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ class EmptyGroup : public GroupImpl {
throw std::runtime_error(
"Communication not implemented in an empty distributed group.");
}
void barrier() override {
throw std::runtime_error(
"Barrier not implemented in an empty distributed group.");
}
};

} // namespace detail
Expand All @@ -80,6 +84,10 @@ Group Group::split(int color, int key /* = -1 */) const {
return Group(group_->split(color, key));
}

void Group::barrier() {
return group_->barrier();
}

Group init(bool strict /* = false */) {
auto init_group = [strict]() {
auto default_group = mpi::init(strict);
Expand Down
2 changes: 2 additions & 0 deletions mlx/distributed/distributed.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ struct Group {
return group_;
}

void barrier();

private:
std::shared_ptr<detail::GroupImpl> group_{nullptr};
};
Expand Down
1 change: 1 addition & 0 deletions mlx/distributed/distributed_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class GroupImpl {
virtual void all_gather(const array& input, array& output) = 0;
virtual void send(const array& input, int dst) = 0;
virtual void recv(array& out, int src) = 0;
virtual void barrier() = 0;
};

/* Return the communication stream. */
Expand Down
6 changes: 6 additions & 0 deletions mlx/distributed/mpi/mpi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ struct MPIWrapper {
LOAD_SYMBOL(MPI_Allgather, all_gather);
LOAD_SYMBOL(MPI_Send, send);
LOAD_SYMBOL(MPI_Recv, recv);
LOAD_SYMBOL(MPI_Barrier, barrier);
LOAD_SYMBOL(MPI_Type_contiguous, mpi_type_contiguous);
LOAD_SYMBOL(MPI_Type_commit, mpi_type_commit);
LOAD_SYMBOL(MPI_Op_create, mpi_op_create);
Expand Down Expand Up @@ -198,6 +199,7 @@ struct MPIWrapper {
int (*comm_free)(MPI_Comm*);
int (*send)(const void*, int, MPI_Datatype, int, int, MPI_Comm);
int (*recv)(void*, int, MPI_Datatype, int, int, MPI_Comm, MPI_Status*);
int (*barrier)(MPI_Comm);

// Objects
MPI_Comm comm_world_;
Expand Down Expand Up @@ -319,6 +321,10 @@ class MPIGroup : public GroupImpl {
&status);
}

void barrier() override {
mpi().barrier(comm_);
}

private:
MPI_Comm comm_;
bool global_;
Expand Down
6 changes: 6 additions & 0 deletions python/mlx/nn/layers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@
ConvTranspose2d,
ConvTranspose3d,
)
from mlx.nn.layers.distributed import (
AllToShardedLinear,
QuantizedAllToShardedLinear,
QuantizedShardedToAllLinear,
ShardedToAllLinear,
)
from mlx.nn.layers.dropout import Dropout, Dropout2d, Dropout3d
from mlx.nn.layers.embedding import Embedding
from mlx.nn.layers.linear import Bilinear, Identity, Linear
Expand Down
Loading