Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the bug about torch.utils.cpp_extension.load #215

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Repo create
data/
log/
output/
pretrained_models/

# Generated by vscode
.vscode
__pycache__/
build/

# Custom
detail-api/
*.py[co]
/temp*/
/temp*.*/

# Unknown
scripts/

# Model parameters
*.pth
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ Performance on the Cityscapes dataset. The models are trained and tested with th

## Quick start
### Install
1. Install PyTorch=0.4.1 following the [official instructions](https://pytorch.org/)
1. Install PyTorch=0.4.1 following the [official instructions](https://pytorch.org/) or [this](torch_installation.md)
2. git clone https://github.com/HRNet/HRNet-Semantic-Segmentation $SEG_ROOT
3. Install dependencies: pip install -r requirements.txt

Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
84 changes: 62 additions & 22 deletions lib/models/sync_bn/inplace_abn/functions.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from os import path

import torch.autograd as autograd
Expand All @@ -6,14 +7,20 @@
from torch.utils.cpp_extension import load

_src_path = path.join(path.dirname(path.abspath(__file__)), "src")
_backend = load(name="inplace_abn",
extra_cflags=["-O3"],
sources=[path.join(_src_path, f) for f in [
"inplace_abn.cpp",
"inplace_abn_cpu.cpp",
"inplace_abn_cuda.cu"
]],
extra_cuda_cflags=["--expt-extended-lambda"])
build_folder = path.join(path.split(_src_path)[0], "build")

if not path.exists(build_folder):
os.makedirs(build_folder, 0o777)

_backend = load(
name="inplace_abn",
extra_cflags=["-O3"],
sources=[
path.join(_src_path, f) for f in
["inplace_abn.cpp", "inplace_abn_cpu.cpp", "inplace_abn_cuda.cu"]
],
extra_cuda_cflags=["--expt-extended-lambda"],
build_directory=build_folder)

# Activation names
ACT_RELU = "relu"
Expand Down Expand Up @@ -73,9 +80,19 @@ def _act_backward(ctx, x, dx):


class InPlaceABN(autograd.Function):

@staticmethod
def forward(ctx, x, weight, bias, running_mean, running_var,
training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
def forward(ctx,
x,
weight,
bias,
running_mean,
running_var,
training=True,
momentum=0.1,
eps=1e-05,
activation=ACT_LEAKY_RELU,
slope=0.01):
# Save context
ctx.training = training
ctx.momentum = momentum
Expand All @@ -95,7 +112,8 @@ def forward(ctx, x, weight, bias, running_mean, running_var,

# Update running stats
running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var *
count / (count - 1))

# Mark in-place modified tensors
ctx.mark_dirty(x, running_mean, running_var)
Expand All @@ -122,23 +140,37 @@ def backward(ctx, dz):
_act_backward(ctx, z, dz)

if ctx.training:
edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine,
ctx.eps)
else:
# TODO: implement simplified CUDA backward for inference mode
edz = dz.new_zeros(dz.size(1))
eydz = dz.new_zeros(dz.size(1))

dx, dweight, dbias = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
dx, dweight, dbias = _backend.backward(z, dz, var, weight, bias, edz,
eydz, ctx.affine, ctx.eps)
dweight = dweight if ctx.affine else None
dbias = dbias if ctx.affine else None

return dx, dweight, dbias, None, None, None, None, None, None, None


class InPlaceABNSync(autograd.Function):

@classmethod
def forward(cls, ctx, x, weight, bias, running_mean, running_var,
extra, training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
def forward(cls,
ctx,
x,
weight,
bias,
running_mean,
running_var,
extra,
training=True,
momentum=0.1,
eps=1e-05,
activation=ACT_LEAKY_RELU,
slope=0.01):
# Save context
cls._parse_extra(ctx, extra)
ctx.training = training
Expand Down Expand Up @@ -169,9 +201,10 @@ def forward(cls, ctx, x, weight, bias, running_mean, running_var,
vars = comm.gather(vars)

mean = means.mean(0)
var = (vars + (mean - means) ** 2).mean(0)
var = (vars + (mean - means)**2).mean(0)

tensors = comm.broadcast_coalesced((mean, var), [mean.get_device()] + ctx.worker_ids)
tensors = comm.broadcast_coalesced(
(mean, var), [mean.get_device()] + ctx.worker_ids)
for ts, queue in zip(tensors[1:], ctx.worker_queues):
queue.put(ts)
else:
Expand All @@ -181,7 +214,8 @@ def forward(cls, ctx, x, weight, bias, running_mean, running_var,

# Update running stats
running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var *
count / (count - 1))

# Mark in-place modified tensors
ctx.mark_dirty(x, running_mean, running_var)
Expand All @@ -208,7 +242,8 @@ def backward(ctx, dz):
_act_backward(ctx, z, dz)

if ctx.training:
edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine,
ctx.eps)

if ctx.is_master:
edzs, eydzs = [edz], [eydz]
Expand All @@ -221,7 +256,8 @@ def backward(ctx, dz):
edz = comm.reduce_add(edzs) / (ctx.master_queue.maxsize + 1)
eydz = comm.reduce_add(eydzs) / (ctx.master_queue.maxsize + 1)

tensors = comm.broadcast_coalesced((edz, eydz), [edz.get_device()] + ctx.worker_ids)
tensors = comm.broadcast_coalesced(
(edz, eydz), [edz.get_device()] + ctx.worker_ids)
for ts, queue in zip(tensors[1:], ctx.worker_queues):
queue.put(ts)
else:
Expand All @@ -232,7 +268,8 @@ def backward(ctx, dz):
edz = dz.new_zeros(dz.size(1))
eydz = dz.new_zeros(dz.size(1))

dx, dweight, dbias = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
dx, dweight, dbias = _backend.backward(z, dz, var, weight, bias, edz,
eydz, ctx.affine, ctx.eps)
dweight = dweight if ctx.affine else None
dbias = dbias if ctx.affine else None

Expand All @@ -253,4 +290,7 @@ def _parse_extra(ctx, extra):
inplace_abn = InPlaceABN.apply
inplace_abn_sync = InPlaceABNSync.apply

__all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"]
__all__ = [
"inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU",
"ACT_NONE"
]
21 changes: 21 additions & 0 deletions torch_installation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# How to install Pytorch==0.4.1 with cuda support.

Firstly, you can initialize a new conda environment. Note that you'd better create a environment based on python 3.6.

```
conda create -n hrnet python=3.6
```

Then you can manually install cudatoolkit 9.0.

```
conda install cudatoolkit=9.0 -c https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/linux-64/
```

Then install Pytorch-0.4.1.

```
pip install http://download.pytorch.org/whl/cu90/torch-0.4.1-cp36-cp36m-linux_x86_64.whl
```

Then install other modules like: Cython, opencv-python and so on.