Merge pull request #25 from LuxDL/ap/remove_pt

Cleaning up of the codebase
LuxDL · Jun 27, 2024 · ed1f825 · ed1f825 · avik-pal · Jun 27, 2024
2 parents f5ffbb1 + 4f5b4ea
commit ed1f825
Show file tree

Hide file tree

Showing 20 changed files with 495 additions and 595 deletions.
diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml
@@ -5,4 +5,5 @@ margin = 92
 indent = 4
 format_docstrings = true
 separate_kwargs_with_semicolon = true
+join_lines_based_on_source = false
 always_for_in = true
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -16,7 +16,7 @@ steps:
           queue: "juliagpu"
           cuda: "*"
         env:
-          GROUP: "CUDA"
+          BACKEND_GROUP: "CUDA"
         if: build.message !~ /\[skip tests\]/
         timeout_in_minutes: 240
         matrix:
@@ -61,7 +61,7 @@ steps:
           queue: "juliagpu"
           cuda: "*"
         env:
-          GROUP: "CUDA"
+          BACKEND_GROUP: "CUDA"
           DOWNSTREAM_TEST_REPO: "{{matrix.repo}}"
         if: build.message !~ /\[skip tests\]/ || build.message !~ /\[skip downstream\]/
         timeout_in_minutes: 240
@@ -111,7 +111,7 @@ steps:
           rocm: "*"
           rocmgpu: "*"
         env:
-          GROUP: "AMDGPU"
+          BACKEND_GROUP: "AMDGPU"
           JULIA_AMDGPU_CORE_MUST_LOAD: "1"
           JULIA_AMDGPU_HIP_MUST_LOAD: "1"
           JULIA_AMDGPU_DISABLE_ARTIFACTS: "1"

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -37,7 +37,9 @@ jobs:
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
         env:
-          GROUP: "CPU"
+          BACKEND_GROUP: "CPU"
+          RETESTITEMS_NWORKERS: 4
+          RETESTITEMS_NWORKER_THREADS: 2
       - uses: julia-actions/julia-processcoverage@v1
         with:
           directories: src,ext

diff --git a/.github/workflows/Downgrade.yml b/.github/workflows/Downgrade.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        version: ['1.9']
+        version: ['1']
     steps:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v2
@@ -27,7 +27,7 @@ jobs:
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
         env:
-          GROUP: "CPU"
+          BACKEND_GROUP: "CPU"
           RETESTITEMS_NWORKERS: 4
           RETESTITEMS_NWORKER_THREADS: 2
       - uses: julia-actions/julia-processcoverage@v1

diff --git a/.github/workflows/Downstream.yml b/.github/workflows/Downstream.yml
@@ -16,7 +16,7 @@ jobs:
     name: ${{ matrix.package.repo }}/${{ matrix.package.group }}
     runs-on: ${{ matrix.os }}
     env:
-      GROUP: ${{ matrix.package.group }}
+      BACKEND_GROUP: ${{ matrix.package.group }}
     strategy:
       fail-fast: false
       matrix:

diff --git a/.github/workflows/FormatCheck.yml b/.github/workflows/FormatCheck.yml
diff --git a/.github/workflows/QualityCheck.yml b/.github/workflows/QualityCheck.yml
@@ -0,0 +1,19 @@
+name: Code Quality Check
+
+on: [pull_request]
+
+jobs:
+  code-style:
+    name: Format Suggestions
+    runs-on: ubuntu-latest
+    steps:
+      - uses: julia-actions/julia-format@v3
+
+  typos-check:
+    name: Spell Check with Typos
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Actions Repository
+        uses: actions/checkout@v4
+      - name: Check spelling
+        uses: crate-ci/[email protected]
diff --git a/.typos.toml b/.typos.toml
@@ -0,0 +1,2 @@
+[default.extend-words]
+nin = "nin"
diff --git a/Project.toml b/Project.toml
@@ -1,13 +1,13 @@
 name = "WeightInitializers"
 uuid = "d49dbf32-c5c2-4618-8acc-27bb2598ef2d"
 authors = ["Avik Pal <[email protected]> and contributors"]
-version = "0.1.7"
+version = "0.1.8"
 
 [deps]
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 PartialFunctions = "570af359-4316-4cb7-8c74-252c00c2016b"
-PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
@@ -19,26 +19,30 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 WeightInitializersCUDAExt = "CUDA"
 
 [compat]
-Aqua = "0.8"
-CUDA = "5"
-ChainRulesCore = "1.21"
-LinearAlgebra = "1.9"
+Aqua = "0.8.7"
+CUDA = "5.3.2"
+ChainRulesCore = "1.23"
+Documenter = "1.5.0"
+ExplicitImports = "1.6.0"
+GPUArraysCore = "0.1.6"
+LinearAlgebra = "1.10"
 PartialFunctions = "1.2"
-PrecompileTools = "1.2"
-Random = "1.9"
+Random = "1.10"
+ReTestItems = "1.24.0"
 SpecialFunctions = "2"
 StableRNGs = "1"
-Statistics = "1.9"
-Test = "1.9"
-julia = "1.9"
+Statistics = "1.10"
+Test = "1.10"
+julia = "1.10"
 
 [extras]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
-Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
+ReTestItems = "817f1d60-ba6b-4fd5-9520-3cf149f6a823"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
-Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Aqua", "Test", "StableRNGs", "Random", "Statistics", "CUDA"]
+test = ["Aqua", "CUDA", "Documenter", "ExplicitImports", "ReTestItems", "StableRNGs", "Test"]
diff --git a/README.md b/README.md
@@ -1,14 +1,13 @@
 # WeightInitializers
 
 [![Join the chat at https://julialang.zulipchat.com #machine-learning](https://img.shields.io/static/v1?label=Zulip&message=chat&color=9558b2&labelColor=389826)](https://julialang.zulipchat.com/#narrow/stream/machine-learning)
-[![Latest Docs](https://img.shields.io/badge/docs-latest-blue.svg)](https://lux.csail.mit.edu/dev/api/)
-[![Stable Docs](https://img.shields.io/badge/docs-stable-blue.svg)](https://lux.csail.mit.edu/stable/api/)
+[![Latest Docs](https://img.shields.io/badge/docs-latest-blue.svg)](https://lux.csail.mit.edu/dev/api/Building_Blocks/WeightInitializers)
+[![Stable Docs](https://img.shields.io/badge/docs-stable-blue.svg)](https://lux.csail.mit.edu/stable/api/Building_Blocks/WeightInitializers)
 [![Aqua QA](https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg)](https://github.com/JuliaTesting/Aqua.jl)
 
 [![Build status](https://badge.buildkite.com/ffa2c8c3629cd58322446cddd3e8dcc4f121c28a574ee3e626.svg?branch=main)](https://buildkite.com/julialang/weightinitializers-dot-jl)
 [![CI](https://github.com/LuxDL/WeightInitializers.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/LuxDL/WeightInitializers.jl/actions/workflows/CI.yml)
 [![codecov](https://codecov.io/gh/LuxDL/WeightInitializers.jl/branch/main/graph/badge.svg?token=1ZY0A2NPEM)](https://codecov.io/gh/LuxDL/WeightInitializers.jl)
-[![Package Downloads](https://shields.io/endpoint?url=https://pkgs.genieframework.com/api/v1/badge/WeightInitializers)](https://pkgs.genieframework.com?packages=WeightInitializers)
 
 [![ColPrac: Contributor's Guide on Collaborative Practices for Community Packages](https://img.shields.io/badge/ColPrac-Contributor's%20Guide-blueviolet)](https://github.com/SciML/ColPrac)
 [![SciML Code Style](https://img.shields.io/static/v1?label=code%20style&message=SciML&color=9558b2&labelColor=389826)](https://github.com/SciML/SciMLStyle)

diff --git a/ext/WeightInitializersCUDAExt.jl b/ext/WeightInitializersCUDAExt.jl
@@ -1,79 +1,15 @@
 module WeightInitializersCUDAExt
 
-using WeightInitializers, CUDA
-using Random
-import WeightInitializers: __partial_apply, NUM_TO_FPOINT, identity_init, sparse_init,
-                           orthogonal
+using CUDA: CUDA, CURAND
+using WeightInitializers: WeightInitializers
 
 const AbstractCuRNG = Union{CUDA.RNG, CURAND.RNG}
 
-for T in ("16", "32", "64", "C16", "C32", "C64"), fname in (:ones, :zeros)
-    name = Symbol(fname, T)
-    TP = NUM_TO_FPOINT[Symbol(T)]
-    @eval begin
-        function WeightInitializers.$(name)(rng::AbstractCuRNG, dims::Integer...; kwargs...)
-            return CUDA.$(fname)($TP, dims...; kwargs...)
-        end
-    end
-
-    @eval function WeightInitializers.$(name)(rng::AbstractCuRNG; kwargs...)
-        return __partial_apply($name, (rng, (; kwargs...)))
-    end
-end
-
-function sparse_init(rng::AbstractCuRNG, ::Type{T}, dims::Integer...;
-        sparsity::Number, std::Number=T(0.01)) where {T <: Number}
-    if length(dims) != 2
-        throw(ArgumentError("Only 2-dimensional outputs are supported for sparse initialization."))
-    end
-
-    rows, cols = dims
-    prop_zero = min(1.0, sparsity)
-    num_zeros = ceil(Integer, prop_zero * rows)
-    sparse_array = randn(rng, T, dims...) .* T(std)
-    sparse_array[1:num_zeros, :] .= CUDA.zero(T)
-
-    return CUDA.@allowscalar mapslices(shuffle, sparse_array, dims=1)
+function WeightInitializers.__zeros(::AbstractCuRNG, T::Type, dims::Integer...)
+    return CUDA.zeros(T, dims...)
 end
-
-function identity_init(rng::AbstractCuRNG, ::Type{T}, dims::Integer...;
-        gain::Number=1, shift::Integer=0) where {T <: Number}
-    if length(dims) == 1
-        # Bias initialization
-        return CUDA.zeros(T, dims...)
-    elseif length(dims) == 2
-        # Matrix multiplication
-        rows, cols = dims
-        mat = CUDA.zeros(T, rows, cols)
-        diag_indices = 1:min(rows, cols)
-        CUDA.fill!(view(mat, diag_indices, diag_indices), T(gain))
-        return CUDA.circshift(mat, shift)
-    else
-        # Convolution or more dimensions
-        nin, nout = dims[end - 1], dims[end]
-        centers = map(d -> cld(d, 2), dims[1:(end - 2)])
-        weights = CUDA.zeros(T, dims...)
-        #we should really find a better way to do this
-        CUDA.@allowscalar for i in 1:min(nin, nout)
-            index = (centers..., i, i)
-            weights[index...] = T(gain)
-        end
-        return CUDA.circshift(weights, (ntuple(d -> 0, length(dims) - 2)..., shift, shift))
-    end
-end
-
-for initializer in (:sparse_init, :identity_init)
-    @eval function ($initializer)(rng::AbstractCuRNG, dims::Integer...; kwargs...)
-        return $initializer(rng, Float32, dims...; kwargs...)
-    end
-
-    @eval function ($initializer)(rng::AbstractCuRNG; kwargs...)
-        return __partial_apply($initializer, (rng, (; kwargs...)))
-    end
-    @eval function ($initializer)(rng::AbstractCuRNG,
-            ::Type{T}; kwargs...) where {T <: Number}
-        return __partial_apply($initializer, ((rng, T), (; kwargs...)))
-    end
+function WeightInitializers.__ones(::AbstractCuRNG, T::Type, dims::Integer...)
+    return CUDA.ones(T, dims...)
 end
 
 end
diff --git a/src/WeightInitializers.jl b/src/WeightInitializers.jl
@@ -1,62 +1,27 @@
 module WeightInitializers
 
-import PrecompileTools: @recompile_invalidations
+#! format: off
+using ChainRulesCore: ChainRulesCore
+using GPUArraysCore: @allowscalar
+using LinearAlgebra: LinearAlgebra, Diagonal, qr
+using PartialFunctions: :$
+using Random: Random, AbstractRNG, Xoshiro, shuffle
+using SpecialFunctions: SpecialFunctions, erf, erfinv
+using Statistics: Statistics, std
+#! format: on
 
-@recompile_invalidations begin
-    using ChainRulesCore, PartialFunctions, Random, SpecialFunctions, Statistics,
-          LinearAlgebra
-end
+const CRC = ChainRulesCore
 
 include("utils.jl")
 include("initializers.jl")
-
-# Mark the functions as non-differentiable
-for f in [
-    :zeros64,
-    :ones64,
-    :rand64,
-    :randn64,
-    :zeros32,
-    :ones32,
-    :rand32,
-    :randn32,
-    :zeros16,
-    :ones16,
-    :rand16,
-    :randn16,
-    :zerosC64,
-    :onesC64,
-    :randC64,
-    :randnC64,
-    :zerosC32,
-    :onesC32,
-    :randC32,
-    :randnC32,
-    :zerosC16,
-    :onesC16,
-    :randC16,
-    :randnC16,
-    :glorot_normal,
-    :glorot_uniform,
-    :kaiming_normal,
-    :kaiming_uniform,
-    :truncated_normal,
-    :orthogonal,
-    :sparse_init,
-    :identity_init
-]
-    @eval @non_differentiable $(f)(::Any...)
-end
+include("autodiff.jl")
 
 export zeros64, ones64, rand64, randn64, zeros32, ones32, rand32, randn32, zeros16, ones16,
        rand16, randn16
 export zerosC64, onesC64, randC64, randnC64, zerosC32, onesC32, randC32, randnC32, zerosC16,
        onesC16, randC16, randnC16
 export glorot_normal, glorot_uniform
 export kaiming_normal, kaiming_uniform
-export truncated_normal
-export orthogonal
-export sparse_init
-export identity_init
+export truncated_normal, orthogonal, sparse_init, identity_init
 
 end
diff --git a/src/autodiff.jl b/src/autodiff.jl
@@ -0,0 +1,8 @@
+# Mark the functions as non-differentiable
+for f in [:zeros64, :ones64, :rand64, :randn64, :zeros32, :ones32, :rand32, :randn32,
+    :zeros16, :ones16, :rand16, :randn16, :zerosC64, :onesC64, :randC64,
+    :randnC64, :zerosC32, :onesC32, :randC32, :randnC32, :zerosC16, :onesC16,
+    :randC16, :randnC16, :glorot_normal, :glorot_uniform, :kaiming_normal,
+    :kaiming_uniform, :truncated_normal, :orthogonal, :sparse_init, :identity_init]
+    @eval CRC.@non_differentiable $(f)(::Any...)
+end