Changes for compatibility with Flux v0.16

msainsburydale · Jan 12, 2025 · f22f763 · f22f763
1 parent 32e1462
commit f22f763
Show file tree

Hide file tree

Showing 12 changed files with 79 additions and 122 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "NeuralEstimators"
 uuid = "38f6df31-6b4a-4144-b2af-7ace2da57606"
 authors = ["Matthew Sainsbury-Dale <[email protected]> and contributors"]
-version = "0.2.1"
+version = "0.2.2"
 
 [deps]
 BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
@@ -45,9 +45,9 @@ CairoMakie = "0.12"
 ColorSchemes = "2, 3"
 DataFrames = "1"
 Distances = "0.10, 0.11"
-Flux = "0.14"
+Flux = "0.16"
 Folds = "0.2"
-GraphNeuralNetworks = "0.6"
+GraphNeuralNetworks = "0.6, 1"
 Graphs = "1"
 InvertedIndices = "1"
 Metal = "1"
@@ -57,7 +57,7 @@ RecursiveArrayTools = "2, 3"
 SpecialFunctions = "2"
 Statistics = "1"
 StatsBase = "0.33, 0.34"
-julia = "1.9"
+julia = "1.9, 1.10, 1.11"
 
 [extras]
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"

diff --git a/README.md b/README.md
@@ -70,6 +70,6 @@ If you find a bug or have a suggestion, please [open an issue](https://github.co
 
 - **Neural Bayes estimators for irregular spatial data using graph neural networks** [[paper]](https://doi.org/10.1080/10618600.2024.2433671)[[code]](https://github.com/msainsburydale/NeuralEstimatorsGNN)
 
-- **Modern extreme value statistics for Utopian extremes** [[paper]](https://arxiv.org/abs/2311.11054)
+- **Neural Methods for Amortized Inference** [[paper]](https://arxiv.org/abs/2404.12484)[[code]](https://github.com/andrewzm/Amortised_Neural_Inference_Review)
 
-- **Neural Methods for Amortized Inference** [[paper]](https://arxiv.org/abs/2404.12484)[[code]](https://github.com/andrewzm/Amortised_Neural_Inference_Review)
+- **Neural parameter estimation with incomplete data** [[paper]](https://arxiv.org/abs/2501.04330)[[code]](https://github.com/msainsburydale/NeuralEM)
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -58,7 +58,7 @@ This software was developed as part of academic research. If you would like to s
 
 - **Neural Bayes estimators for irregular spatial data using graph neural networks** [[paper]](https://arxiv.org/abs/2310.02600)[[code]](https://github.com/msainsburydale/NeuralEstimatorsGNN)
 
-- **Modern extreme value statistics for Utopian extremes** [[paper]](https://arxiv.org/abs/2311.11054)
-
 - **Neural Methods for Amortized Inference** [[paper]](https://arxiv.org/abs/2404.12484)[[code]](https://github.com/andrewzm/Amortised_Neural_Inference_Review)
 
+- **Neural parameter estimation with incomplete data** [[paper]](https://arxiv.org/abs/2501.04330)[[code]](https://github.com/msainsburydale/NeuralEM)
+
diff --git a/src/Architectures.jl b/src/Architectures.jl
@@ -136,7 +136,6 @@ struct DeepSet{T, G, K}
 	a::ElementwiseAggregator
 	S::K
 end
-@layer DeepSet
 function DeepSet(ψ, ϕ, a::Function = mean; S = nothing)
 	@assert !isnothing(ψ) | !isnothing(S) "At least one of `ψ` or `S` must be given"
 	DeepSet(ψ, ϕ, ElementwiseAggregator(a), S)
@@ -391,7 +390,6 @@ end
 Compress(a, b) = Compress(float.(a), float.(b), ones(eltype(float.(a)), length(a)))
 Compress(a::Number, b::Number) = Compress([float(a)], [float(b)])
 (l::Compress)(θ) = l.a .+ (l.b - l.a) ./ (one(eltype(θ)) .+ exp.(-l.k .* θ))
-@layer Compress
 Flux.trainable(l::Compress) =  ()
 
 
@@ -413,7 +411,6 @@ function (l::TruncateSupport)(θ::AbstractMatrix)
 end
 TruncateSupport(a, b) = TruncateSupport(float.(a), float.(b), length(a))
 TruncateSupport(a::Number, b::Number) = TruncateSupport([float(a)], [float(b)], 1)
-Flux.@functor TruncateSupport
 Flux.trainable(l::TruncateSupport) = ()
 tuncatesupport(θ, a, b) = min(max(θ, a), b)
 
@@ -712,7 +709,6 @@ struct DensePositive
 	last_only::Bool
 end
 DensePositive(layer::Dense; g::Function = Flux.relu, last_only::Bool = false) = DensePositive(layer, g, last_only)
-@layer DensePositive
 # Simple version of forward pass:
 # (d::DensePositive)(x) = d.layer.σ.(Flux.softplus(d.layer.weight) * x .+ d.layer.bias)
 # Complex version of forward pass based on Flux's Dense code:
@@ -778,8 +774,6 @@ struct PowerDifference{A,B}
 	a::A
 	b::B
 end
-@layer PowerDifference
-export PowerDifference
 PowerDifference() = PowerDifference([0.5f0], [2.0f0])
 PowerDifference(a::Number, b::AbstractArray) = PowerDifference([a], b)
 PowerDifference(a::AbstractArray, b::Number) = PowerDifference(a, [b])
@@ -808,7 +802,6 @@ b(z)
 struct ResidualBlock{B}
     block::B
 end
-Flux.@functor ResidualBlock
 (b::ResidualBlock)(x) = relu.(b.block(x))
 function ResidualBlock(filter, channels; stride = 1)
 
@@ -838,5 +831,4 @@ end
 struct Shortcut{S}
     s::S
 end
-Flux.@functor Shortcut
 (s::Shortcut)(mx, x) = mx + s.s(x)
diff --git a/src/Estimators.jl b/src/Estimators.jl
@@ -21,7 +21,6 @@ struct PointEstimator <: NeuralEstimator
 	c::Union{Function,Compress} # NB don't document `c` since Compress layer is usually just included in `deepset`
 end
 PointEstimator(arch) = PointEstimator(arch, identity)
-@layer PointEstimator
 (est::PointEstimator)(Z) = est.c(est.arch(Z))
 
 # ---- IntervalEstimator  ----
@@ -101,7 +100,6 @@ end
 IntervalEstimator(u::DeepSet, v::DeepSet = u; probs = [0.025, 0.975], g = exp) = IntervalEstimator(deepcopy(u), deepcopy(v), identity, probs, g)
 IntervalEstimator(u::DeepSet, c::Compress; probs = [0.025, 0.975], g = exp) = IntervalEstimator(deepcopy(u), deepcopy(u), c, probs, g)
 IntervalEstimator(u::DeepSet, v::DeepSet, c::Compress; probs = [0.025, 0.975], g = exp) = IntervalEstimator(deepcopy(u), deepcopy(v), c, probs, g)
-@layer IntervalEstimator
 Flux.trainable(est::IntervalEstimator) = (u = est.u, v = est.v)
 function (est::IntervalEstimator)(Z)
 	bₗ = est.u(Z)                # lower bound
@@ -259,7 +257,6 @@ function QuantileEstimatorDiscrete(v::DeepSet; probs = [0.05, 0.25, 0.5, 0.75, 0
 	if !isnothing(i) @assert i > 0 end
 	QuantileEstimatorDiscrete(deepcopy.(repeat([v], length(probs))), probs, g, i)
 end
-@layer QuantileEstimatorDiscrete
 Flux.trainable(est::QuantileEstimatorDiscrete) = (v = est.v, )
 function (est::QuantileEstimatorDiscrete)(input) # input might be Z, or a tuple (Z, θ₋ᵢ)
 
@@ -354,7 +351,7 @@ See also [`QuantileEstimatorDiscrete`](@ref).
 
 # Examples
 ```
-using NeuralEstimators, Flux, Distributions , InvertedIndices, Statistics
+using NeuralEstimators, Flux, Distributions, InvertedIndices, Statistics
 using AlgebraOfGraphics, CairoMakie
 
 # Model: Z|θ ~ N(θ, 1) with θ ~ N(0, 1)
@@ -468,7 +465,6 @@ function QuantileEstimatorContinuous(deepset::DeepSet; i::Union{Integer, Nothing
 	if !isnothing(i) @assert i > 0 end
 	QuantileEstimatorContinuous(deepset, i)
 end
-@layer QuantileEstimatorContinuous
 # core method (used internally)
 (est::QuantileEstimatorContinuous)(tup::Tuple) = est.deepset(tup)
 # user-level convenience functions (not used internally)
@@ -545,7 +541,7 @@ of output neurons in the final layer of the summary network plus the number of
 parameters in the statistical model. Second, the number of output neurons in the
 final layer of the inference network must be equal to one.
 
-The ratio estimator is trained by solving a relatively straightforward binary
+The ratio estimator is trained by solving a binary
 classification problem. Specifically, consider the problem of distinguishing
 dependent parameter--data pairs
 ${(\boldsymbol{\theta}', \boldsymbol{Z}')' \sim p(\boldsymbol{Z}, \boldsymbol{\theta})}$ with
@@ -569,7 +565,7 @@ $r(\boldsymbol{Z}, \boldsymbol{\theta})$ is returned (setting the keyword argume
 can then be used in various downstream Bayesian
 (e.g., [Hermans et al., 2020](https://proceedings.mlr.press/v119/hermans20a.html))
 or Frequentist
-(e.g., [Walchessen et al., 2023](https://arxiv.org/abs/2305.04634))
+(e.g., [Walchessen et al., 2024](https://doi.org/10.1016/j.spasta.2024.100848))
 inferential algorithms.
 
 See also [`mlestimate`](@ref) and [`mapestimate`](@ref) for obtaining
@@ -578,7 +574,7 @@ approximate maximum-likelihood and maximum-a-posteriori estimates, and
 
 # Examples
 ```
-using NeuralEstimators, Flux, Statistics, Optim
+using NeuralEstimators, Flux, Statistics
 
 # Generate data from Z|μ,σ ~ N(μ, σ²) with μ, σ ~ U(0, 1)
 p = 2     # number of unknown parameters in the statistical model
@@ -609,24 +605,26 @@ r̂ = RatioEstimator(deepset)
 # Train the estimator
 r̂ = train(r̂, prior, simulate, m = m)
 
-# Inference with "observed" data set
+# Inference with "observed" data (grid-based optimisation)
 θ = prior(1)
 z = simulate(θ, m)[1]
-θ₀ = [0.5, 0.5]                           # initial estimate
-mlestimate(r̂, z;  θ₀ = θ₀)                # maximum-likelihood estimate (requires Optim.jl to be loaded)
-mapestimate(r̂, z; θ₀ = θ₀)                # maximum-a-posteriori estimate (requires Optim.jl to be loaded)
 θ_grid = expandgrid(0:0.01:1, 0:0.01:1)'  # fine gridding of the parameter space
 θ_grid = Float32.(θ_grid)
 r̂(z, θ_grid)                              # likelihood-to-evidence ratios over grid
 mlestimate(r̂, z;  θ_grid = θ_grid)        # maximum-likelihood estimate
 mapestimate(r̂, z; θ_grid = θ_grid)        # maximum-a-posteriori estimate
 sampleposterior(r̂, z; θ_grid = θ_grid)    # posterior samples
+
+# Inference with "observed" data (gradient-based optimisation using Optim.jl)
+using Optim
+θ₀ = [0.5, 0.5]                           # initial estimate
+mlestimate(r̂, z;  θ₀ = θ₀)                # maximum-likelihood estimate
+mapestimate(r̂, z; θ₀ = θ₀)                # maximum-a-posteriori estimate
 ```
 """
 struct RatioEstimator <: NeuralEstimator
 	deepset::DeepSet
 end
-@layer RatioEstimator
 function (est::RatioEstimator)(Z, θ; kwargs...)
 	est((Z, θ); kwargs...) # "Tupleise" the input and pass to Tuple method
 end
@@ -727,7 +725,6 @@ struct PiecewiseEstimator <: NeuralEstimator
 		end
 	end
 end
-@layer PiecewiseEstimator
 function (pe::PiecewiseEstimator)(Z)
 	# Note that this is an inefficient implementation, analogous to the inefficient
 	# DeepSet implementation. A more efficient approach would be to subset Z based
@@ -943,7 +940,6 @@ struct Ensemble <: NeuralEstimator
 	estimators
 end
 Ensemble(architecture::Function, J::Integer) = Ensemble([architecture() for j in 1:J])
-@layer Ensemble
 
 function train(ensemble::Ensemble, args...; kwargs...)
 	kwargs = (;kwargs...)

diff --git a/src/Graphs.jl b/src/Graphs.jl
@@ -194,7 +194,6 @@ function (l::IndicatorWeights)(h::M) where M <: AbstractMatrix{T} where T
 	N = reduce(vcat, N)
 	Float32.(N)
 end
-@layer IndicatorWeights
 Flux.trainable(l::IndicatorWeights) =  ()
 
 
@@ -240,7 +239,6 @@ function (l::KernelWeights)(h::M) where M <: AbstractMatrix{T} where T
 	N = reduce(vcat, N) 
 	Float32.(N) 
 end 
-@layer KernelWeights 
 Flux.trainable(l::KernelWeights) = ()
 
 
@@ -355,8 +353,6 @@ struct SpatialGraphConv{W<:AbstractMatrix, A, B,C, F} <: GNNLayer
 	f::C
 	g::F
 end
-@layer SpatialGraphConv
-WeightedGraphConv = SpatialGraphConv; export WeightedGraphConv # alias for backwards compatability
 function SpatialGraphConv(
 	ch::Pair{Int,Int},
 	g = relu;
@@ -546,7 +542,6 @@ struct GNNSummary{F, G}
 	propagation::F   # propagation module
 	readout::G       # readout module
 end
-@layer GNNSummary
 Base.show(io::IO, D::GNNSummary) = print(io, "\nThe propagation and readout modules of a graph neural network (GNN), with a total of $(nparams(D)) trainable parameters:\n\nPropagation module ($(nparams(D.propagation)) parameters):  $(D.propagation)\n\nReadout module ($(nparams(D.readout)) parameters):  $(D.readout)")
 
 function (ψ::GNNSummary)(g::GNNGraph)

diff --git a/src/NeuralEstimators.jl b/src/NeuralEstimators.jl
@@ -8,8 +8,7 @@ using CSV
 using DataFrames
 using Distances
 using Flux
-using Flux: ofeltype, DataLoader, update!, glorot_uniform, onehotbatch, _match_eltype, @non_differentiable, @ignore_derivatives # @layer
-using Flux: @functor; var"@layer" = var"@functor" # NB did this because even semi-recent versions of Flux do not include @layer
+using Flux: ofeltype, DataLoader, update!, glorot_uniform, onehotbatch, _match_eltype, @non_differentiable, @ignore_derivatives
 using Folds
 using Graphs
 using GraphNeuralNetworks
@@ -34,7 +33,7 @@ include("loss.jl")
 export ParameterConfigurations, subsetparameters
 include("Parameters.jl")
 
-export DeepSet, summarystatistics, Compress, CovarianceMatrix, CorrelationMatrix, ResidualBlock
+export DeepSet, summarystatistics, Compress, CovarianceMatrix, CorrelationMatrix, ResidualBlock, PowerDifference
 export vectotril, vectotriu
 include("Architectures.jl")
 
@@ -70,7 +69,6 @@ export EM, removedata, encodedata
 include("missingdata.jl")
 
 # Backwards compatability and deprecations:
-simulategaussianprocess = simulategaussian; export simulategaussianprocess
 export loadbestweights, loadweights
 include("deprecated.jl")
 
@@ -79,6 +77,7 @@ end
 # ---- longer term/lower priority:
 # - Amortised posterior approximation (https://github.com/slimgroup/InvertibleNetworks.jl). Also allow for conditioning.
 # - Amortised likelihood approximation (https://github.com/slimgroup/InvertibleNetworks.jl)
+# - Sequence (e.g., time-series) input: https://jldc.ch/post/seq2one-flux/, and see also the new recurrent layers added to Flux. 
 # - Extension: Incorporate the following package to greatly expand bootstrap functionality: https://github.com/juliangehring/Bootstrap.jl. Note also the "straps()" method that allows one to obtain the bootstrap distribution. I think what I can do is define a method of interval(bs::BootstrapSample). Maybe one difficulty will be how to re-sample... Not sure how the bootstrap method will know to sample from the independent replicates dimension (the last dimension) of each array.
 # - Add NeuralEstimators.jl to the list of packages that use Documenter: see https://documenter.juliadocs.org/stable/man/examples/
 # -	Add NeuralEstimators.jl to https://github.com/smsharma/awesome-neural-sbi#code-packages-and-benchmarks
@@ -90,7 +89,6 @@ end
 # -	Add option to check validation risk (and save the optimal estimator) more frequently than the end of each epoch.
 # - Should have initialise_estimator() as an internal function, and instead have the public API be based on constructors of the various estimator classes. This aligns more with the basic ideas of Julia, where functions returning a certain class should be made as a constructor rather than a separate function.
 # - Examples: discrete parameters (e.g., Chan et al., 2018). Might need extra functionality for this.
-# - Sequence (e.g., time-series) input: https://jldc.ch/post/seq2one-flux/
 # - Precompile NeuralEstimators.jl to reduce latency: See https://julialang.org/blog/2021/01/precompile_tutorial/. Seems easy, just need to add precompile(f, (arg_types…)) to whichever methods we want to precompile
 # - Examples: data plots within each example. Can show a histogram for univariate data; a scatterplot for bivariate data; a heatmap for gridded data; and scatterplot for irregular spatial data.
 # - GPU on MacOS with Metal.jl (already have extension written, need to wait until Metal.jl is further developed; in particular, need convolution layers to be implemented)

diff --git a/src/deprecated.jl b/src/deprecated.jl
@@ -6,3 +6,8 @@ Returns the weights of the neural network saved as 'best_network.bson' in the gi
 """
 loadbestweights(path::String) = loadweights(joinpath(path, "best_network.bson"))
 loadweights(path::String) = load(path, @__MODULE__)[:weights]
+
+
+# aliases for backwards compatability
+WeightedGraphConv = SpatialGraphConv; export WeightedGraphConv 
+simulategaussianprocess = simulategaussian; export simulategaussianprocess
diff --git a/src/summarystatistics.jl b/src/summarystatistics.jl
@@ -187,5 +187,4 @@ function (l::NeighbourhoodVariogram)(g::GNNGraph)
 	Σ = sum(z .* N, dims = 1)        # ∑(Zⱼ - Zᵢ)² in each bin
 	vec(Σ ./ 2N_card)
 end
-@layer NeighbourhoodVariogram
 Flux.trainable(l::NeighbourhoodVariogram) =  ()