From dc52b3eef73f8646c7a03893ad966d8efde5e9e3 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Wed, 26 Jun 2024 23:11:37 -0700 Subject: [PATCH] Finish rewriting the tests --- .buildkite/pipeline.yml | 6 +- .github/workflows/CI.yml | 2 +- .github/workflows/Downgrade.yml | 2 +- .github/workflows/Downstream.yml | 2 +- .github/workflows/FormatCheck.yml | 40 ---- .github/workflows/QualityCheck.yml | 19 ++ Project.toml | 2 - README.md | 1 - ext/WeightInitializersCUDAExt.jl | 3 +- src/initializers.jl | 96 +++++----- test/initializers_tests.jl | 267 +++++++++++++++++++++++++++ test/qa_tests.jl | 23 +++ test/runtests.jl | 287 +---------------------------- test/shared_testsetup.jl | 20 ++ test/utils_tests.jl | 9 + 15 files changed, 395 insertions(+), 384 deletions(-) delete mode 100644 .github/workflows/FormatCheck.yml create mode 100644 .github/workflows/QualityCheck.yml create mode 100644 test/initializers_tests.jl create mode 100644 test/qa_tests.jl create mode 100644 test/shared_testsetup.jl create mode 100644 test/utils_tests.jl diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index a625b0f..565e58f 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -16,7 +16,7 @@ steps: queue: "juliagpu" cuda: "*" env: - GROUP: "CUDA" + BACKEND_GROUP: "CUDA" if: build.message !~ /\[skip tests\]/ timeout_in_minutes: 240 matrix: @@ -61,7 +61,7 @@ steps: queue: "juliagpu" cuda: "*" env: - GROUP: "CUDA" + BACKEND_GROUP: "CUDA" DOWNSTREAM_TEST_REPO: "{{matrix.repo}}" if: build.message !~ /\[skip tests\]/ || build.message !~ /\[skip downstream\]/ timeout_in_minutes: 240 @@ -111,7 +111,7 @@ steps: rocm: "*" rocmgpu: "*" env: - GROUP: "AMDGPU" + BACKEND_GROUP: "AMDGPU" JULIA_AMDGPU_CORE_MUST_LOAD: "1" JULIA_AMDGPU_HIP_MUST_LOAD: "1" JULIA_AMDGPU_DISABLE_ARTIFACTS: "1" diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 2ad20de..6596d9d 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -37,7 +37,7 @@ jobs: - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 env: - GROUP: "CPU" + BACKEND_GROUP: "CPU" RETESTITEMS_NWORKERS: 4 RETESTITEMS_NWORKER_THREADS: 2 - uses: julia-actions/julia-processcoverage@v1 diff --git a/.github/workflows/Downgrade.yml b/.github/workflows/Downgrade.yml index 269275e..5a5bcb1 100644 --- a/.github/workflows/Downgrade.yml +++ b/.github/workflows/Downgrade.yml @@ -27,7 +27,7 @@ jobs: - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 env: - GROUP: "CPU" + BACKEND_GROUP: "CPU" RETESTITEMS_NWORKERS: 4 RETESTITEMS_NWORKER_THREADS: 2 - uses: julia-actions/julia-processcoverage@v1 diff --git a/.github/workflows/Downstream.yml b/.github/workflows/Downstream.yml index b215b2b..bf579cb 100644 --- a/.github/workflows/Downstream.yml +++ b/.github/workflows/Downstream.yml @@ -16,7 +16,7 @@ jobs: name: ${{ matrix.package.repo }}/${{ matrix.package.group }} runs-on: ${{ matrix.os }} env: - GROUP: ${{ matrix.package.group }} + BACKEND_GROUP: ${{ matrix.package.group }} strategy: fail-fast: false matrix: diff --git a/.github/workflows/FormatCheck.yml b/.github/workflows/FormatCheck.yml deleted file mode 100644 index ac75c52..0000000 --- a/.github/workflows/FormatCheck.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: FormatCheck - -on: - push: - branches: - - 'main' - - 'release-' - tags: ['*'] - pull_request: - -jobs: - build: - runs-on: ${{ matrix.os }} - strategy: - matrix: - julia-version: ["1"] - julia-arch: [x86] - os: [ubuntu-latest] - steps: - - uses: julia-actions/setup-julia@latest - with: - version: ${{ matrix.julia-version }} - - - uses: actions/checkout@v4 - - name: Install JuliaFormatter and format - run: | - julia -e 'using Pkg; Pkg.add(PackageSpec(name="JuliaFormatter"))' - julia -e 'using JuliaFormatter; format(".", verbose=true)' - - name: Format check - run: | - julia -e ' - out = Cmd(`git diff --name-only`) |> read |> String - if out == "" - exit(0) - else - @error "Some files have not been formatted !!!" - write(stdout, out) - exit(1) - end' - \ No newline at end of file diff --git a/.github/workflows/QualityCheck.yml b/.github/workflows/QualityCheck.yml new file mode 100644 index 0000000..3bfa611 --- /dev/null +++ b/.github/workflows/QualityCheck.yml @@ -0,0 +1,19 @@ +name: Code Quality Check + +on: [pull_request] + +jobs: + code-style: + name: Format Suggestions + runs-on: ubuntu-latest + steps: + - uses: julia-actions/julia-format@v3 + + typos-check: + name: Spell Check with Typos + runs-on: ubuntu-latest + steps: + - name: Checkout Actions Repository + uses: actions/checkout@v4 + - name: Check spelling + uses: crate-ci/typos@v1.22.9 diff --git a/Project.toml b/Project.toml index be3e84a..6981002 100644 --- a/Project.toml +++ b/Project.toml @@ -4,7 +4,6 @@ authors = ["Avik Pal and contributors"] version = "0.1.8" [deps] -ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" @@ -21,7 +20,6 @@ WeightInitializersCUDAExt = "CUDA" [compat] Aqua = "0.8.7" -ArgCheck = "2.3.0" CUDA = "5.3.2" ChainRulesCore = "1.23" Documenter = "1.5.0" diff --git a/README.md b/README.md index edede1c..4dc182c 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,6 @@ [![Build status](https://badge.buildkite.com/ffa2c8c3629cd58322446cddd3e8dcc4f121c28a574ee3e626.svg?branch=main)](https://buildkite.com/julialang/weightinitializers-dot-jl) [![CI](https://github.com/LuxDL/WeightInitializers.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/LuxDL/WeightInitializers.jl/actions/workflows/CI.yml) [![codecov](https://codecov.io/gh/LuxDL/WeightInitializers.jl/branch/main/graph/badge.svg?token=1ZY0A2NPEM)](https://codecov.io/gh/LuxDL/WeightInitializers.jl) -[![Package Downloads](https://shields.io/endpoint?url=https://pkgs.genieframework.com/api/v1/badge/WeightInitializers)](https://pkgs.genieframework.com?packages=WeightInitializers) [![ColPrac: Contributor's Guide on Collaborative Practices for Community Packages](https://img.shields.io/badge/ColPrac-Contributor's%20Guide-blueviolet)](https://github.com/SciML/ColPrac) [![SciML Code Style](https://img.shields.io/static/v1?label=code%20style&message=SciML&color=9558b2&labelColor=389826)](https://github.com/SciML/SciMLStyle) diff --git a/ext/WeightInitializersCUDAExt.jl b/ext/WeightInitializersCUDAExt.jl index e97f268..ac2d391 100644 --- a/ext/WeightInitializersCUDAExt.jl +++ b/ext/WeightInitializersCUDAExt.jl @@ -1,8 +1,7 @@ module WeightInitializersCUDAExt using CUDA: CUDA, CURAND -using Random: Random, shuffle -using WeightInitializers: WeightInitializers, NUM_TO_FPOINT, __partial_apply +using WeightInitializers: WeightInitializers const AbstractCuRNG = Union{CUDA.RNG, CURAND.RNG} diff --git a/src/initializers.jl b/src/initializers.jl index 7877d2b..2a5e4c8 100644 --- a/src/initializers.jl +++ b/src/initializers.jl @@ -104,7 +104,8 @@ truncated normal distribution. The numbers are distributed like function truncated_normal(rng::AbstractRNG, ::Type{T}, dims::Integer...; mean=T(0), std=T(1), lo=-T(2), hi=T(2)) where {T <: Real} if (mean < lo - 2 * std) || (mean > hi + 2 * std) - @warn "Mean is more than 2 std outside the limits in truncated_normal, so the distribution of values may be inaccurate." + @warn "Mean is more than 2 std outside the limits in truncated_normal, so the \ + distribution of values may be inaccurate." end l = _norm_cdf((T(lo) - T(mean)) / T(std)) u = _norm_cdf((T(hi) - T(mean)) / T(std)) @@ -122,13 +123,12 @@ end gain = 1) -> AbstractArray{T, length(dims)} Return an `AbstractArray{T}` of the given dimensions (`dims`) which is a -(semi) orthogonal matrix, as described in [^Saxe14] +(semi) orthogonal matrix, as described in [1]. The function constructs an orthogonal or semi-orthogonal matrix depending on the specified -dimensions. For two dimensions, it returns a matrix where `dims = (rows, cols)`. -For more than two dimensions, it computes an orthogonal matrix of -size `prod(dims[1:(end - 1)])` by `dims[end]` before reshaping it to -the original dimensions. +dimensions. For two dimensions, it returns a matrix where `dims = (rows, cols)`. For more +than two dimensions, it computes an orthogonal matrix of size `prod(dims[1:(end - 1)])` by +`dims[end]` before reshaping it to the original dimensions. Cannot construct a vector, i.e., `length(dims) == 1` is forbidden. @@ -141,9 +141,8 @@ Cannot construct a vector, i.e., `length(dims) == 1` is forbidden. # References -[^Saxe14] Saxe, McClelland, Ganguli. "Exact solutions to the nonlinear dynamics of -learning in deep linear neural networks", -ICLR 2014, https://arxiv.org/abs/1312.6120 +[1] Saxe, McClelland, Ganguli. "Exact solutions to the nonlinear dynamics of learning in +deep linear neural networks", ICLR 2014, https://arxiv.org/abs/1312.6120 """ function orthogonal(rng::AbstractRNG, ::Type{T}, dims::Integer...; gain::Number=T(1.0)) where {T <: Number} @@ -164,12 +163,15 @@ end sparsity::Number, std::Number=0.01) -> AbstractArray{T} Creates a sparsely initialized weight matrix with a specified proportion of zeroed elements, -using random numbers drawn from a normal distribution for the non-zero elements. -This method is introduced in [^Martens2010]. -Note: The sparsity parameter controls the proportion of the matrix that will be zeroed. -For example, a sparsity of 0.3 means that approximately 30% of the elements will be -set to zero. The non-zero elements are distributed according to a normal distribution, -scaled by the std parameter. +using random numbers drawn from a normal distribution for the non-zero elements. This method +was introduced in [1]. + +!!! note + + The sparsity parameter controls the proportion of the matrix that will be zeroed. For + example, a sparsity of 0.3 means that approximately 30% of the elements will be set to + zero. The non-zero elements are distributed according to a normal distribution, scaled + by the std parameter. # Arguments @@ -177,43 +179,36 @@ scaled by the std parameter. - `T::Type{<:Number}`: The numeric type of the elements in the returned array. - `dims::Integer...`: The dimensions of the weight matrix to be generated. - `sparsity::Number`: The proportion of elements to be zeroed. Must be between 0 and 1. - - `std::Number=0.01`: The standard deviation of the normal distribution - before applying `gain`. + - `std::Number=0.01`: The standard deviation of the normal distribution before applying + `gain`. # Returns - - `AbstractArray{T}`: A sparsely initialized weight matrix of dimensions `dims` - and type `T`. + - `AbstractArray{T}`: A sparsely initialized weight matrix of dimensions `dims` and type + `T`. # Examples -```julia -using Random +```jldoctest +julia> y = sparse_init(Xoshiro(123), Float32, 5, 5; sparsity=0.3, std=0.01); -# Initialize a 5x5 sparsely initialized matrix with 30% sparsity -rng = MersenneTwister(123) -matrix = sparse_init(rng, Float32, 5, 5; sparsity=0.3, std=0.01) -``` +julia> y isa Matrix{Float32} +true -``` -5×5 Matrix{Float64}: - 0.0 0.00273815 0.00592403 0.0 0.0 - 0.00459416 -0.000754831 -0.00888936 -0.0077507 0.0 - 0.0 -0.00194229 0.0 0.0 -0.00468489 - 0.0114265 0.0 0.0 -0.00734886 0.00277726 - -0.00396679 0.0 0.00327215 -0.0071741 -0.00880897 +julia> size(y) == (5, 5) +true ``` # References -[^Martens2010] Martens, J, "Deep learning via Hessian-free optimization" -_Proceedings of the 27th International Conference on International Conference -on Machine Learning_. 2010. +[1] Martens, J, "Deep learning via Hessian-free optimization" Proceedings of the 27th +International Conference on International Conference on Machine Learning. 2010. """ function sparse_init(rng::AbstractRNG, ::Type{T}, dims::Integer...; sparsity::Number, std::Number=T(0.01)) where {T <: Number} if length(dims) != 2 - throw(ArgumentError("Only 2-dimensional outputs are supported for sparse initialization.")) + throw(ArgumentError("Only 2-dimensional outputs are supported for sparse \ + initialization.")) end rows, cols = dims @@ -250,8 +245,8 @@ most layers of a neural network. The identity mapping is scaled by the `gain` pa - Layers must have `input_size == output_size` for a perfect identity mapping. In cases where this condition is not met, the function pads extra dimensions with zeros. - For convolutional layers to achieve an identity mapping, kernel sizes must be odd, - and appropriate padding must be applied to ensure the output - feature maps are the same size as the input feature maps. + and appropriate padding must be applied to ensure the output feature maps are the same + size as the input feature maps. # Arguments @@ -271,16 +266,21 @@ most layers of a neural network. The identity mapping is scaled by the `gain` pa # Examples -```julia -using Random - -# Identity matrix for fully connected layer -identity_matrix = identity_init(MersenneTwister(123), Float32, 5, 5) - -# Identity tensor for convolutional layer -identity_tensor = identity_init(MersenneTwister(123), Float32, # Bias initialization - 3, 3, 5, # Matrix multiplication - 5; gain=1.5, shift=(1, 0)) +```jldoctest +julia> identity_init(Xoshiro(123), Float32, 5, 5) +5×5 Matrix{Float32}: + 1.0 1.0 1.0 1.0 1.0 + 1.0 1.0 1.0 1.0 1.0 + 1.0 1.0 1.0 1.0 1.0 + 1.0 1.0 1.0 1.0 1.0 + 1.0 1.0 1.0 1.0 1.0 + +julia> identity_init(Xoshiro(123), Float32, 3, 3, 1, 1; gain=1.5) +3×3×1×1 Array{Float32, 4}: +[:, :, 1, 1] = + 0.0 0.0 0.0 + 0.0 1.5 0.0 + 0.0 0.0 0.0 ``` """ function identity_init(rng::AbstractRNG, ::Type{T}, dims::Integer...; diff --git a/test/initializers_tests.jl b/test/initializers_tests.jl new file mode 100644 index 0000000..202e10d --- /dev/null +++ b/test/initializers_tests.jl @@ -0,0 +1,267 @@ +@testitem "Warning: truncated_normal" begin + @test_warn "Mean is more than 2 std outside the limits in truncated_normal, so \ + the distribution of values may be inaccurate." truncated_normal(2; mean=-5.0f0) +end + +@testitem "Identity Initialization" begin + @testset "Non-identity sizes" begin + @test identity_init(2, 3)[:, end] == zeros(Float32, 2) + @test identity_init(3, 2; shift=1)[1, :] == zeros(Float32, 2) + @test identity_init(1, 1, 3, 4)[:, :, :, end] == zeros(Float32, 1, 1, 3) + @test identity_init(2, 1, 3, 3)[end, :, :, :] == zeros(Float32, 1, 3, 3) + @test identity_init(1, 2, 3, 3)[:, end, :, :] == zeros(Float32, 1, 3, 3) + end +end + +@testitem "Orthogonal Initialization" setup=[SharedTestSetup] begin + using GPUArraysCore, LinearAlgebra + + @testset "rng = $(typeof(rng)) & arrtype = $arrtype" for (rng, arrtype) in RNGS_ARRTYPES + # A matrix of dim = (m,n) with m > n should produce a QR decomposition. + # In the other case, the transpose should be taken to compute the QR decomposition. + for (rows, cols) in [(5, 3), (3, 5)] + v = orthogonal(rng, rows, cols) + GPUArraysCore.@allowscalar rows < cols ? (@test v * v' ≈ I(rows)) : + (@test v' * v ≈ I(cols)) + end + + for mat in [(3, 4, 5), (2, 2, 5)] + v = orthogonal(rng, mat...) + cols = mat[end] + rows = div(prod(mat), cols) + v = reshape(v, (rows, cols)) + GPUArraysCore.@allowscalar rows < cols ? (@test v * v' ≈ I(rows)) : + (@test v' * v ≈ I(cols)) + end + + @testset "Orthogonal Types $T" for T in (Float32, Float64) + @test eltype(orthogonal(rng, T, 3, 4; gain=1.5)) == T + @test eltype(orthogonal(rng, T, 3, 4, 5; gain=1.5)) == T + end + + @testset "Orthogonal AbstractArray Type $T" for T in (Float32, Float64) + @test orthogonal(rng, T, 3, 5) isa AbstractArray{T, 2} + @test orthogonal(rng, T, 3, 5) isa arrtype{T, 2} + + cl = orthogonal(rng) + @test cl(T, 3, 5) isa arrtype{T, 2} + + cl = orthogonal(rng, T) + @test cl(3, 5) isa arrtype{T, 2} + end + + @testset "Orthogonal Closure" begin + cl = orthogonal(;) + + # Sizes + @test size(cl(3, 4)) == (3, 4) + @test size(cl(rng, 3, 4)) == (3, 4) + @test size(cl(3, 4, 5)) == (3, 4, 5) + @test size(cl(rng, 3, 4, 5)) == (3, 4, 5) + + # Type + @test eltype(cl(4, 2)) == Float32 + @test eltype(cl(rng, 4, 2)) == Float32 + end + end +end + +@testitem "Sparse Initialization" setup=[SharedTestSetup] begin + using Statistics + + @testset "rng = $(typeof(rng)) & arrtype = $arrtype" for (rng, arrtype) in RNGS_ARRTYPES + # sparse_init should yield an error for non 2-d dimensions + # sparse_init should yield no zero elements if sparsity < 0 + # sparse_init should yield all zero elements if sparsity > 1 + # sparse_init should yield exactly ceil(n_in * sparsity) elements in each column for + # other sparsity values + # sparse_init should yield a kernel in its non-zero elements consistent with the std + # parameter + + @test_throws ArgumentError sparse_init(3, 4, 5, sparsity=0.1) + @test_throws ArgumentError sparse_init(3, sparsity=0.1) + v = sparse_init(100, 100; sparsity=-0.1) + @test sum(v .== 0) == 0 + v = sparse_init(100, 100; sparsity=1.1) + @test sum(v .== 0) == length(v) + + for (n_in, n_out, sparsity, σ) in [(100, 100, 0.25, 0.1), (100, 400, 0.75, 0.01)] + expected_zeros = ceil(Integer, n_in * sparsity) + v = sparse_init(n_in, n_out; sparsity=sparsity, std=σ) + @test all([sum(v[:, col] .== 0) == expected_zeros for col in 1:n_out]) + @test 0.9 * σ < std(v[v .!= 0]) < 1.1 * σ + end + + @testset "sparse_init Types $T" for T in (Float16, Float32, Float64) + @test eltype(sparse_init(rng, T, 3, 4; sparsity=0.5)) == T + end + + @testset "sparse_init AbstractArray Type $T" for T in (Float16, Float32, Float64) + @test sparse_init(T, 3, 5; sparsity=0.5) isa AbstractArray{T, 2} + @test sparse_init(rng, T, 3, 5; sparsity=0.5) isa arrtype{T, 2} + + cl = sparse_init(rng; sparsity=0.5) + @test cl(T, 3, 5) isa arrtype{T, 2} + + cl = sparse_init(rng, T; sparsity=0.5) + @test cl(3, 5) isa arrtype{T, 2} + end + + @testset "sparse_init Closure" begin + cl = sparse_init(; sparsity=0.5) + # Sizes + @test size(cl(3, 4)) == (3, 4) + @test size(cl(rng, 3, 4)) == (3, 4) + # Type + @test eltype(cl(4, 2)) == Float32 + @test eltype(cl(rng, 4, 2)) == Float32 + end + end +end + +@testitem "Basic Initializations" setup=[SharedTestSetup] begin + using LinearAlgebra, Statistics + + @testset "rng = $(typeof(rng)) & arrtype = $arrtype" for (rng, arrtype) in RNGS_ARRTYPES + @testset "Sizes and Types: $init" for init in [ + zeros32, ones32, rand32, randn32, kaiming_uniform, kaiming_normal, + glorot_uniform, glorot_normal, truncated_normal, identity_init] + # Sizes + @test size(init(3)) == (3,) + @test size(init(rng, 3)) == (3,) + @test size(init(3, 4)) == (3, 4) + @test size(init(rng, 3, 4)) == (3, 4) + @test size(init(3, 4, 5)) == (3, 4, 5) + @test size(init(rng, 3, 4, 5)) == (3, 4, 5) + # Type + @test eltype(init(rng, 4, 2)) == Float32 + @test eltype(init(4, 2)) == Float32 + # RNG Closure + cl = init(rng) + @test cl(3) isa arrtype{Float32, 1} + @test cl(3, 5) isa arrtype{Float32, 2} + end + + @testset "Sizes and Types: $init" for (init, fp) in [ + (zeros16, Float16), (zerosC16, ComplexF16), (zeros32, Float32), + (zerosC32, ComplexF32), (zeros64, Float64), (zerosC64, ComplexF64), + (ones16, Float16), (onesC16, ComplexF16), (ones32, Float32), + (onesC32, ComplexF32), (ones64, Float64), (onesC64, ComplexF64), + (rand16, Float16), (randC16, ComplexF16), (rand32, Float32), + (randC32, ComplexF32), (rand64, Float64), (randC64, ComplexF64), + (randn16, Float16), (randnC16, ComplexF16), (randn32, Float32), + (randnC32, ComplexF32), (randn64, Float64), (randnC64, ComplexF64)] + # Sizes + @test size(init(3)) == (3,) + @test size(init(rng, 3)) == (3,) + @test size(init(3, 4)) == (3, 4) + @test size(init(rng, 3, 4)) == (3, 4) + @test size(init(3, 4, 5)) == (3, 4, 5) + @test size(init(rng, 3, 4, 5)) == (3, 4, 5) + # Type + @test eltype(init(rng, 4, 2)) == fp + @test eltype(init(4, 2)) == fp + # RNG Closure + cl = init(rng) + @test cl(3) isa arrtype{fp, 1} + @test cl(3, 5) isa arrtype{fp, 2} + end + + @testset "AbstractArray Type: $init $T" for init in [ + kaiming_uniform, kaiming_normal, glorot_uniform, + glorot_normal, truncated_normal, identity_init], + T in (Float16, Float32, Float64, ComplexF16, ComplexF32, ComplexF64) + + init === truncated_normal && !(T <: Real) && continue + + @test init(T, 3) isa AbstractArray{T, 1} + @test init(rng, T, 3) isa arrtype{T, 1} + @test init(T, 3, 5) isa AbstractArray{T, 2} + @test init(rng, T, 3, 5) isa arrtype{T, 2} + + cl = init(rng) + @test cl(T, 3) isa arrtype{T, 1} + @test cl(T, 3, 5) isa arrtype{T, 2} + + cl = init(rng, T) + @test cl(3) isa arrtype{T, 1} + @test cl(3, 5) isa arrtype{T, 2} + end + + @testset "Closure: $init" for init in [ + kaiming_uniform, kaiming_normal, glorot_uniform, + glorot_normal, truncated_normal, identity_init] + cl = init(;) + # Sizes + @test size(cl(3)) == (3,) + @test size(cl(rng, 3)) == (3,) + @test size(cl(3, 4)) == (3, 4) + @test size(cl(rng, 3, 4)) == (3, 4) + @test size(cl(3, 4, 5)) == (3, 4, 5) + @test size(cl(rng, 3, 4, 5)) == (3, 4, 5) + # Type + @test eltype(cl(4, 2)) == Float32 + @test eltype(cl(rng, 4, 2)) == Float32 + end + + @testset "Kwargs types" for T in ( + Float16, Float32, Float64, ComplexF16, ComplexF32, ComplexF64) + if (T <: Real) + @test eltype(truncated_normal(T, 2, 5; mean=0, std=1, lo=-2, hi=2)) == T + @test eltype(orthogonal(T, 2, 5; gain=1.0)) == T + end + @test eltype(glorot_uniform(T, 2, 5; gain=1.0)) == T + @test eltype(glorot_normal(T, 2, 5; gain=1.0)) == T + @test eltype(kaiming_uniform(T, 2, 5; gain=sqrt(2))) == T + @test eltype(kaiming_normal(T, 2, 5; gain=sqrt(2))) == T + @test eltype(identity_init(T, 2, 5; gain=1.0)) == T + @test eltype(sparse_init(T, 2, 5; sparsity=0.5, std=0.01)) == T + end + + @testset "kaiming" begin + # kaiming_uniform should yield a kernel in range [-sqrt(6/n_out), sqrt(6/n_out)] + # and kaiming_normal should yield a kernel with stddev ~= sqrt(2/n_out) + for (n_in, n_out) in [(100, 100), (100, 400)] + v = kaiming_uniform(rng, n_in, n_out) + σ2 = sqrt(6 / n_out) + @test -1σ2 < minimum(v) < -0.9σ2 + @test 0.9σ2 < maximum(v) < 1σ2 + + v = kaiming_normal(rng, n_in, n_out) + σ2 = sqrt(2 / n_out) + @test 0.9σ2 < std(v) < 1.1σ2 + end + # Type + @test eltype(kaiming_uniform(rng, 3, 4; gain=1.5f0)) == Float32 + @test eltype(kaiming_normal(rng, 3, 4; gain=1.5f0)) == Float32 + end + + @testset "glorot: $init" for init in [glorot_uniform, glorot_normal] + # glorot_uniform and glorot_normal should both yield a kernel with + # variance ≈ 2/(fan_in + fan_out) + for dims in [(1000,), (100, 100), (100, 400), (2, 3, 32, 64), (2, 3, 4, 32, 64)] + v = init(dims...) + fan_in, fan_out = WeightInitializers._nfan(dims...) + σ2 = 2 / (fan_in + fan_out) + @test 0.9σ2 < var(v) < 1.1σ2 + end + @test eltype(init(3, 4; gain=1.5)) == Float32 + end + + @testset "orthogonal" begin + # A matrix of dim = (m,n) with m > n should produce a QR decomposition. In the other case, the transpose should be taken to compute the QR decomposition. + for (rows, cols) in [(5, 3), (3, 5)] + v = orthogonal(rows, cols) + rows < cols ? (@test v * v' ≈ I(rows)) : (@test v' * v ≈ I(cols)) + end + for mat in [(3, 4, 5), (2, 2, 5)] + v = orthogonal(mat...) + cols = mat[end] + rows = div(prod(mat), cols) + v = reshape(v, (rows, cols)) + rows < cols ? (@test v * v' ≈ I(rows)) : (@test v' * v ≈ I(cols)) + end + @test eltype(orthogonal(3, 4; gain=1.5)) == Float32 + end + end +end diff --git a/test/qa_tests.jl b/test/qa_tests.jl new file mode 100644 index 0000000..c5c93c2 --- /dev/null +++ b/test/qa_tests.jl @@ -0,0 +1,23 @@ +@testitem "Aqua: Quality Assurance" begin + using Aqua + + Aqua.test_all(WeightInitializers; ambiguities=false) + Aqua.test_ambiguities(WeightInitializers; recursive=false) +end + +@testitem "Explicit Imports: Quality Assurance" setup=[SharedTestSetup] begin + using CUDA, ExplicitImports + + @test check_no_implicit_imports(WeightInitializers) === nothing + @test check_no_stale_explicit_imports(WeightInitializers) === nothing + @test check_no_self_qualified_accesses(WeightInitializers) === nothing +end + +@testitem "doctests: Quality Assurance" begin + using Documenter + + doctestexpr = :(using Random, WeightInitializers) + + DocMeta.setdocmeta!(WeightInitializers, :DocTestSetup, doctestexpr; recursive=true) + doctest(WeightInitializers; manual=false) +end diff --git a/test/runtests.jl b/test/runtests.jl index a620753..8ba7978 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,286 +1,3 @@ -using Aqua -using WeightInitializers, Test, Statistics -using StableRNGs, Random, CUDA, LinearAlgebra +using ReTestItems -CUDA.allowscalar(false) - -const GROUP = get(ENV, "GROUP", "All") - -@testset "WeightInitializers.jl Tests" begin - rngs_arrtypes = [] - - if GROUP == "All" || GROUP == "CPU" - append!(rngs_arrtypes, - [(StableRNG(12345), AbstractArray), (Random.default_rng(), AbstractArray)]) - end - - if GROUP == "All" || GROUP == "CUDA" - append!(rngs_arrtypes, [(CUDA.default_rng(), CuArray)]) - end - - @testset "_nfan" begin - # Fallback - @test WeightInitializers._nfan() == (1, 1) - # Vector - @test WeightInitializers._nfan(4) == (1, 4) - # Matrix - @test WeightInitializers._nfan(4, 5) == (5, 4) - # Tuple - @test WeightInitializers._nfan((4, 5, 6)) == WeightInitializers._nfan(4, 5, 6) - # Convolution - @test WeightInitializers._nfan(4, 5, 6) == 4 .* (5, 6) - end - - @testset "rng = $(typeof(rng)) & arrtype = $arrtype" for (rng, arrtype) in rngs_arrtypes - @testset "Sizes and Types: $init" for init in [ - zeros32, ones32, rand32, randn32, kaiming_uniform, kaiming_normal, - glorot_uniform, glorot_normal, truncated_normal, identity_init] - # Sizes - @test size(init(3)) == (3,) - @test size(init(rng, 3)) == (3,) - @test size(init(3, 4)) == (3, 4) - @test size(init(rng, 3, 4)) == (3, 4) - @test size(init(3, 4, 5)) == (3, 4, 5) - @test size(init(rng, 3, 4, 5)) == (3, 4, 5) - # Type - @test eltype(init(rng, 4, 2)) == Float32 - @test eltype(init(4, 2)) == Float32 - # RNG Closure - cl = init(rng) - @test cl(3) isa arrtype{Float32, 1} - @test cl(3, 5) isa arrtype{Float32, 2} - end - - @testset "Sizes and Types: $init" for (init, fp) in [ - (zeros16, Float16), (zerosC16, ComplexF16), (zeros32, Float32), - (zerosC32, ComplexF32), (zeros64, Float64), (zerosC64, ComplexF64), - (ones16, Float16), (onesC16, ComplexF16), (ones32, Float32), - (onesC32, ComplexF32), (ones64, Float64), (onesC64, ComplexF64), - (rand16, Float16), (randC16, ComplexF16), (rand32, Float32), - (randC32, ComplexF32), (rand64, Float64), (randC64, ComplexF64), - (randn16, Float16), (randnC16, ComplexF16), (randn32, Float32), - (randnC32, ComplexF32), (randn64, Float64), (randnC64, ComplexF64)] - # Sizes - @test size(init(3)) == (3,) - @test size(init(rng, 3)) == (3,) - @test size(init(3, 4)) == (3, 4) - @test size(init(rng, 3, 4)) == (3, 4) - @test size(init(3, 4, 5)) == (3, 4, 5) - @test size(init(rng, 3, 4, 5)) == (3, 4, 5) - # Type - @test eltype(init(rng, 4, 2)) == fp - @test eltype(init(4, 2)) == fp - # RNG Closure - cl = init(rng) - @test cl(3) isa arrtype{fp, 1} - @test cl(3, 5) isa arrtype{fp, 2} - end - - @testset "AbstractArray Type: $init $T" for init in [ - kaiming_uniform, kaiming_normal, glorot_uniform, - glorot_normal, truncated_normal, identity_init], - T in (Float16, Float32, Float64, ComplexF16, ComplexF32, ComplexF64) - - init === truncated_normal && !(T <: Real) && continue - - @test init(T, 3) isa AbstractArray{T, 1} - @test init(rng, T, 3) isa arrtype{T, 1} - @test init(T, 3, 5) isa AbstractArray{T, 2} - @test init(rng, T, 3, 5) isa arrtype{T, 2} - - cl = init(rng) - @test cl(T, 3) isa arrtype{T, 1} - @test cl(T, 3, 5) isa arrtype{T, 2} - - cl = init(rng, T) - @test cl(3) isa arrtype{T, 1} - @test cl(3, 5) isa arrtype{T, 2} - end - - @testset "Closure: $init" for init in [ - kaiming_uniform, kaiming_normal, glorot_uniform, - glorot_normal, truncated_normal, identity_init] - cl = init(;) - # Sizes - @test size(cl(3)) == (3,) - @test size(cl(rng, 3)) == (3,) - @test size(cl(3, 4)) == (3, 4) - @test size(cl(rng, 3, 4)) == (3, 4) - @test size(cl(3, 4, 5)) == (3, 4, 5) - @test size(cl(rng, 3, 4, 5)) == (3, 4, 5) - # Type - @test eltype(cl(4, 2)) == Float32 - @test eltype(cl(rng, 4, 2)) == Float32 - end - - @testset "Kwargs types" for T in ( - Float16, Float32, Float64, ComplexF16, ComplexF32, ComplexF64) - if (T <: Real) - @test eltype(truncated_normal(T, 2, 5; mean=0, std=1, lo=-2, hi=2)) == T - @test eltype(orthogonal(T, 2, 5; gain=1.0)) == T - end - @test eltype(glorot_uniform(T, 2, 5; gain=1.0)) == T - @test eltype(glorot_normal(T, 2, 5; gain=1.0)) == T - @test eltype(kaiming_uniform(T, 2, 5; gain=sqrt(2))) == T - @test eltype(kaiming_normal(T, 2, 5; gain=sqrt(2))) == T - @test eltype(identity_init(T, 2, 5; gain=1.0)) == T - @test eltype(sparse_init(T, 2, 5; sparsity=0.5, std=0.01)) == T - end - - @testset "kaiming" begin - # kaiming_uniform should yield a kernel in range [-sqrt(6/n_out), sqrt(6/n_out)] - # and kaiming_normal should yield a kernel with stddev ~= sqrt(2/n_out) - for (n_in, n_out) in [(100, 100), (100, 400)] - v = kaiming_uniform(rng, n_in, n_out) - σ2 = sqrt(6 / n_out) - @test -1σ2 < minimum(v) < -0.9σ2 - @test 0.9σ2 < maximum(v) < 1σ2 - - v = kaiming_normal(rng, n_in, n_out) - σ2 = sqrt(2 / n_out) - @test 0.9σ2 < std(v) < 1.1σ2 - end - # Type - @test eltype(kaiming_uniform(rng, 3, 4; gain=1.5f0)) == Float32 - @test eltype(kaiming_normal(rng, 3, 4; gain=1.5f0)) == Float32 - end - - @testset "glorot: $init" for init in [glorot_uniform, glorot_normal] - # glorot_uniform and glorot_normal should both yield a kernel with - # variance ≈ 2/(fan_in + fan_out) - for dims in [(1000,), (100, 100), (100, 400), (2, 3, 32, 64), (2, 3, 4, 32, 64)] - v = init(dims...) - fan_in, fan_out = WeightInitializers._nfan(dims...) - σ2 = 2 / (fan_in + fan_out) - @test 0.9σ2 < var(v) < 1.1σ2 - end - @test eltype(init(3, 4; gain=1.5)) == Float32 - end - - @testset "orthogonal" begin - # A matrix of dim = (m,n) with m > n should produce a QR decomposition. In the other case, the transpose should be taken to compute the QR decomposition. - for (rows, cols) in [(5, 3), (3, 5)] - v = orthogonal(rows, cols) - rows < cols ? (@test v * v' ≈ I(rows)) : (@test v' * v ≈ I(cols)) - end - for mat in [(3, 4, 5), (2, 2, 5)] - v = orthogonal(mat...) - cols = mat[end] - rows = div(prod(mat), cols) - v = reshape(v, (rows, cols)) - rows < cols ? (@test v * v' ≈ I(rows)) : (@test v' * v ≈ I(cols)) - end - @test eltype(orthogonal(3, 4; gain=1.5)) == Float32 - end - end - - @testset "Orthogonal rng = $(typeof(rng)) & arrtype = $arrtype" for (rng, arrtype) in rngs_arrtypes - # A matrix of dim = (m,n) with m > n should produce a QR decomposition. - # In the other case, the transpose should be taken to compute the QR decomposition. - for (rows, cols) in [(5, 3), (3, 5)] - v = orthogonal(rng, rows, cols) - CUDA.@allowscalar rows < cols ? (@test v * v' ≈ I(rows)) : - (@test v' * v ≈ I(cols)) - end - for mat in [(3, 4, 5), (2, 2, 5)] - v = orthogonal(rng, mat...) - cols = mat[end] - rows = div(prod(mat), cols) - v = reshape(v, (rows, cols)) - CUDA.@allowscalar rows < cols ? (@test v * v' ≈ I(rows)) : - (@test v' * v ≈ I(cols)) - end - # Type - @testset "Orthogonal Types $T" for T in (Float32, Float64)#(Float16, Float32, Float64) - @test eltype(orthogonal(rng, T, 3, 4; gain=1.5)) == T - @test eltype(orthogonal(rng, T, 3, 4, 5; gain=1.5)) == T - end - @testset "Orthogonal AbstractArray Type $T" for T in (Float32, Float64)#(Float16, Float32, Float64) - @test orthogonal(T, 3, 5) isa AbstractArray{T, 2} - @test orthogonal(rng, T, 3, 5) isa arrtype{T, 2} - - cl = orthogonal(rng) - @test cl(T, 3, 5) isa arrtype{T, 2} - - cl = orthogonal(rng, T) - @test cl(3, 5) isa arrtype{T, 2} - end - @testset "Orthogonal Closure" begin - cl = orthogonal(;) - # Sizes - @test size(cl(3, 4)) == (3, 4) - @test size(cl(rng, 3, 4)) == (3, 4) - @test size(cl(3, 4, 5)) == (3, 4, 5) - @test size(cl(rng, 3, 4, 5)) == (3, 4, 5) - # Type - @test eltype(cl(4, 2)) == Float32 - @test eltype(cl(rng, 4, 2)) == Float32 - end - end - - @testset "sparse_init rng = $(typeof(rng)) & arrtype = $arrtype" for (rng, arrtype) in rngs_arrtypes - # sparse_init should yield an error for non 2-d dimensions - # sparse_init should yield no zero elements if sparsity < 0 - # sparse_init should yield all zero elements if sparsity > 1 - # sparse_init should yield exactly ceil(n_in * sparsity) elements in each column for other sparsity values - # sparse_init should yield a kernel in its non-zero elements consistent with the std parameter - - @test_throws ArgumentError sparse_init(3, 4, 5, sparsity=0.1) - @test_throws ArgumentError sparse_init(3, sparsity=0.1) - v = sparse_init(100, 100; sparsity=-0.1) - @test sum(v .== 0) == 0 - v = sparse_init(100, 100; sparsity=1.1) - @test sum(v .== 0) == length(v) - - for (n_in, n_out, sparsity, σ) in [(100, 100, 0.25, 0.1), (100, 400, 0.75, 0.01)] - expected_zeros = ceil(Integer, n_in * sparsity) - v = sparse_init(n_in, n_out; sparsity=sparsity, std=σ) - @test all([sum(v[:, col] .== 0) == expected_zeros for col in 1:n_out]) - @test 0.9 * σ < std(v[v .!= 0]) < 1.1 * σ - end - - # Type - @testset "sparse_init Types $T" for T in (Float16, Float32, Float64) - @test eltype(sparse_init(rng, T, 3, 4; sparsity=0.5)) == T - end - @testset "sparse_init AbstractArray Type $T" for T in (Float16, Float32, Float64) - @test sparse_init(T, 3, 5; sparsity=0.5) isa AbstractArray{T, 2} - @test sparse_init(rng, T, 3, 5; sparsity=0.5) isa arrtype{T, 2} - - cl = sparse_init(rng; sparsity=0.5) - @test cl(T, 3, 5) isa arrtype{T, 2} - - cl = sparse_init(rng, T; sparsity=0.5) - @test cl(3, 5) isa arrtype{T, 2} - end - @testset "sparse_init Closure" begin - cl = sparse_init(; sparsity=0.5) - # Sizes - @test size(cl(3, 4)) == (3, 4) - @test size(cl(rng, 3, 4)) == (3, 4) - # Type - @test eltype(cl(4, 2)) == Float32 - @test eltype(cl(rng, 4, 2)) == Float32 - end - end - - @testset "identity_init" begin - @testset "Non-identity sizes" begin - @test identity_init(2, 3)[:, end] == zeros(Float32, 2) - @test identity_init(3, 2; shift=1)[1, :] == zeros(Float32, 2) - @test identity_init(1, 1, 3, 4)[:, :, :, end] == zeros(Float32, 1, 1, 3) - @test identity_init(2, 1, 3, 3)[end, :, :, :] == zeros(Float32, 1, 3, 3) - @test identity_init(1, 2, 3, 3)[:, end, :, :] == zeros(Float32, 1, 3, 3) - end - end - - @testset "Warning: truncated_normal" begin - @test_warn "Mean is more than 2 std outside the limits in truncated_normal, so \ - the distribution of values may be inaccurate." truncated_normal(2; mean=-5.0f0) - end - - @testset "Aqua: Quality Assurance" begin - Aqua.test_all(WeightInitializers; ambiguities=false) - Aqua.test_ambiguities(WeightInitializers; recursive=false) - end -end +ReTestItems.runtests(@__DIR__) diff --git a/test/shared_testsetup.jl b/test/shared_testsetup.jl new file mode 100644 index 0000000..5b18e59 --- /dev/null +++ b/test/shared_testsetup.jl @@ -0,0 +1,20 @@ +@testsetup module SharedTestSetup + +using CUDA, Random, StableRNGs + +CUDA.allowscalar(false) + +const BACKEND_GROUP = lowercase(get(ENV, "BACKEND_GROUP", "All")) + +RNGS_ARRTYPES = [] +if BACKEND_GROUP == "all" || BACKEND_GROUP == "cpu" + append!(RNGS_ARRTYPES, + [(StableRNG(12345), AbstractArray), (Random.GLOBAL_RNG, AbstractArray)]) +end +if BACKEND_GROUP == "all" || BACKEND_GROUP == "cuda" + push!(RNGS_ARRTYPES, (CUDA.default_rng(), CuArray)) +end + +export StableRNG, RNGS_ARRTYPES + +end diff --git a/test/utils_tests.jl b/test/utils_tests.jl new file mode 100644 index 0000000..c6c2b62 --- /dev/null +++ b/test/utils_tests.jl @@ -0,0 +1,9 @@ +@testitem "_nfan" begin + using WeightInitializers: _nfan + + @test _nfan() == (1, 1) # Fallback + @test _nfan(4) == (1, 4) # Vector + @test _nfan(4, 5) == (5, 4) # Matrix + @test _nfan((4, 5, 6)) == _nfan(4, 5, 6) # Tuple + @test _nfan(4, 5, 6) == 4 .* (5, 6) # Convolution +end