Skip to content

Commit

Permalink
Changes for compatibility with Flux v0.16
Browse files Browse the repository at this point in the history
  • Loading branch information
Matthew Sainsbury-Dale authored and Matthew Sainsbury-Dale committed Jan 12, 2025
1 parent 32e1462 commit f22f763
Show file tree
Hide file tree
Showing 12 changed files with 79 additions and 122 deletions.
8 changes: 4 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "NeuralEstimators"
uuid = "38f6df31-6b4a-4144-b2af-7ace2da57606"
authors = ["Matthew Sainsbury-Dale <[email protected]> and contributors"]
version = "0.2.1"
version = "0.2.2"

[deps]
BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
Expand Down Expand Up @@ -45,9 +45,9 @@ CairoMakie = "0.12"
ColorSchemes = "2, 3"
DataFrames = "1"
Distances = "0.10, 0.11"
Flux = "0.14"
Flux = "0.16"
Folds = "0.2"
GraphNeuralNetworks = "0.6"
GraphNeuralNetworks = "0.6, 1"
Graphs = "1"
InvertedIndices = "1"
Metal = "1"
Expand All @@ -57,7 +57,7 @@ RecursiveArrayTools = "2, 3"
SpecialFunctions = "2"
Statistics = "1"
StatsBase = "0.33, 0.34"
julia = "1.9"
julia = "1.9, 1.10, 1.11"

[extras]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,6 @@ If you find a bug or have a suggestion, please [open an issue](https://github.co

- **Neural Bayes estimators for irregular spatial data using graph neural networks** [[paper]](https://doi.org/10.1080/10618600.2024.2433671)[[code]](https://github.com/msainsburydale/NeuralEstimatorsGNN)

- **Modern extreme value statistics for Utopian extremes** [[paper]](https://arxiv.org/abs/2311.11054)
- **Neural Methods for Amortized Inference** [[paper]](https://arxiv.org/abs/2404.12484)[[code]](https://github.com/andrewzm/Amortised_Neural_Inference_Review)

- **Neural Methods for Amortized Inference** [[paper]](https://arxiv.org/abs/2404.12484)[[code]](https://github.com/andrewzm/Amortised_Neural_Inference_Review)
- **Neural parameter estimation with incomplete data** [[paper]](https://arxiv.org/abs/2501.04330)[[code]](https://github.com/msainsburydale/NeuralEM)
4 changes: 2 additions & 2 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ This software was developed as part of academic research. If you would like to s

- **Neural Bayes estimators for irregular spatial data using graph neural networks** [[paper]](https://arxiv.org/abs/2310.02600)[[code]](https://github.com/msainsburydale/NeuralEstimatorsGNN)

- **Modern extreme value statistics for Utopian extremes** [[paper]](https://arxiv.org/abs/2311.11054)

- **Neural Methods for Amortized Inference** [[paper]](https://arxiv.org/abs/2404.12484)[[code]](https://github.com/andrewzm/Amortised_Neural_Inference_Review)

- **Neural parameter estimation with incomplete data** [[paper]](https://arxiv.org/abs/2501.04330)[[code]](https://github.com/msainsburydale/NeuralEM)

8 changes: 0 additions & 8 deletions src/Architectures.jl
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ struct DeepSet{T, G, K}
a::ElementwiseAggregator
S::K
end
@layer DeepSet
function DeepSet(ψ, ϕ, a::Function = mean; S = nothing)
@assert !isnothing(ψ) | !isnothing(S) "At least one of `ψ` or `S` must be given"
DeepSet(ψ, ϕ, ElementwiseAggregator(a), S)
Expand Down Expand Up @@ -391,7 +390,6 @@ end
Compress(a, b) = Compress(float.(a), float.(b), ones(eltype(float.(a)), length(a)))
Compress(a::Number, b::Number) = Compress([float(a)], [float(b)])
(l::Compress)(θ) = l.a .+ (l.b - l.a) ./ (one(eltype(θ)) .+ exp.(-l.k .* θ))
@layer Compress
Flux.trainable(l::Compress) = ()


Expand All @@ -413,7 +411,6 @@ function (l::TruncateSupport)(θ::AbstractMatrix)
end
TruncateSupport(a, b) = TruncateSupport(float.(a), float.(b), length(a))
TruncateSupport(a::Number, b::Number) = TruncateSupport([float(a)], [float(b)], 1)
Flux.@functor TruncateSupport
Flux.trainable(l::TruncateSupport) = ()
tuncatesupport(θ, a, b) = min(max(θ, a), b)

Expand Down Expand Up @@ -712,7 +709,6 @@ struct DensePositive
last_only::Bool
end
DensePositive(layer::Dense; g::Function = Flux.relu, last_only::Bool = false) = DensePositive(layer, g, last_only)
@layer DensePositive
# Simple version of forward pass:
# (d::DensePositive)(x) = d.layer.σ.(Flux.softplus(d.layer.weight) * x .+ d.layer.bias)
# Complex version of forward pass based on Flux's Dense code:
Expand Down Expand Up @@ -778,8 +774,6 @@ struct PowerDifference{A,B}
a::A
b::B
end
@layer PowerDifference
export PowerDifference
PowerDifference() = PowerDifference([0.5f0], [2.0f0])
PowerDifference(a::Number, b::AbstractArray) = PowerDifference([a], b)
PowerDifference(a::AbstractArray, b::Number) = PowerDifference(a, [b])
Expand Down Expand Up @@ -808,7 +802,6 @@ b(z)
struct ResidualBlock{B}
block::B
end
Flux.@functor ResidualBlock
(b::ResidualBlock)(x) = relu.(b.block(x))
function ResidualBlock(filter, channels; stride = 1)

Expand Down Expand Up @@ -838,5 +831,4 @@ end
struct Shortcut{S}
s::S
end
Flux.@functor Shortcut
(s::Shortcut)(mx, x) = mx + s.s(x)
26 changes: 11 additions & 15 deletions src/Estimators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ struct PointEstimator <: NeuralEstimator
c::Union{Function,Compress} # NB don't document `c` since Compress layer is usually just included in `deepset`
end
PointEstimator(arch) = PointEstimator(arch, identity)
@layer PointEstimator
(est::PointEstimator)(Z) = est.c(est.arch(Z))

# ---- IntervalEstimator ----
Expand Down Expand Up @@ -101,7 +100,6 @@ end
IntervalEstimator(u::DeepSet, v::DeepSet = u; probs = [0.025, 0.975], g = exp) = IntervalEstimator(deepcopy(u), deepcopy(v), identity, probs, g)
IntervalEstimator(u::DeepSet, c::Compress; probs = [0.025, 0.975], g = exp) = IntervalEstimator(deepcopy(u), deepcopy(u), c, probs, g)
IntervalEstimator(u::DeepSet, v::DeepSet, c::Compress; probs = [0.025, 0.975], g = exp) = IntervalEstimator(deepcopy(u), deepcopy(v), c, probs, g)
@layer IntervalEstimator
Flux.trainable(est::IntervalEstimator) = (u = est.u, v = est.v)
function (est::IntervalEstimator)(Z)
bₗ = est.u(Z) # lower bound
Expand Down Expand Up @@ -259,7 +257,6 @@ function QuantileEstimatorDiscrete(v::DeepSet; probs = [0.05, 0.25, 0.5, 0.75, 0
if !isnothing(i) @assert i > 0 end
QuantileEstimatorDiscrete(deepcopy.(repeat([v], length(probs))), probs, g, i)
end
@layer QuantileEstimatorDiscrete
Flux.trainable(est::QuantileEstimatorDiscrete) = (v = est.v, )
function (est::QuantileEstimatorDiscrete)(input) # input might be Z, or a tuple (Z, θ₋ᵢ)

Expand Down Expand Up @@ -354,7 +351,7 @@ See also [`QuantileEstimatorDiscrete`](@ref).
# Examples
```
using NeuralEstimators, Flux, Distributions , InvertedIndices, Statistics
using NeuralEstimators, Flux, Distributions, InvertedIndices, Statistics
using AlgebraOfGraphics, CairoMakie
# Model: Z|θ ~ N(θ, 1) with θ ~ N(0, 1)
Expand Down Expand Up @@ -468,7 +465,6 @@ function QuantileEstimatorContinuous(deepset::DeepSet; i::Union{Integer, Nothing
if !isnothing(i) @assert i > 0 end
QuantileEstimatorContinuous(deepset, i)
end
@layer QuantileEstimatorContinuous
# core method (used internally)
(est::QuantileEstimatorContinuous)(tup::Tuple) = est.deepset(tup)
# user-level convenience functions (not used internally)
Expand Down Expand Up @@ -545,7 +541,7 @@ of output neurons in the final layer of the summary network plus the number of
parameters in the statistical model. Second, the number of output neurons in the
final layer of the inference network must be equal to one.
The ratio estimator is trained by solving a relatively straightforward binary
The ratio estimator is trained by solving a binary
classification problem. Specifically, consider the problem of distinguishing
dependent parameter--data pairs
${(\boldsymbol{\theta}', \boldsymbol{Z}')' \sim p(\boldsymbol{Z}, \boldsymbol{\theta})}$ with
Expand All @@ -569,7 +565,7 @@ $r(\boldsymbol{Z}, \boldsymbol{\theta})$ is returned (setting the keyword argume
can then be used in various downstream Bayesian
(e.g., [Hermans et al., 2020](https://proceedings.mlr.press/v119/hermans20a.html))
or Frequentist
(e.g., [Walchessen et al., 2023](https://arxiv.org/abs/2305.04634))
(e.g., [Walchessen et al., 2024](https://doi.org/10.1016/j.spasta.2024.100848))
inferential algorithms.
See also [`mlestimate`](@ref) and [`mapestimate`](@ref) for obtaining
Expand All @@ -578,7 +574,7 @@ approximate maximum-likelihood and maximum-a-posteriori estimates, and
# Examples
```
using NeuralEstimators, Flux, Statistics, Optim
using NeuralEstimators, Flux, Statistics
# Generate data from Z|μ,σ ~ N(μ, σ²) with μ, σ ~ U(0, 1)
p = 2 # number of unknown parameters in the statistical model
Expand Down Expand Up @@ -609,24 +605,26 @@ r̂ = RatioEstimator(deepset)
# Train the estimator
r̂ = train(r̂, prior, simulate, m = m)
# Inference with "observed" data set
# Inference with "observed" data (grid-based optimisation)
θ = prior(1)
z = simulate(θ, m)[1]
θ₀ = [0.5, 0.5] # initial estimate
mlestimate(r̂, z; θ₀ = θ₀) # maximum-likelihood estimate (requires Optim.jl to be loaded)
mapestimate(r̂, z; θ₀ = θ₀) # maximum-a-posteriori estimate (requires Optim.jl to be loaded)
θ_grid = expandgrid(0:0.01:1, 0:0.01:1)' # fine gridding of the parameter space
θ_grid = Float32.(θ_grid)
r̂(z, θ_grid) # likelihood-to-evidence ratios over grid
mlestimate(r̂, z; θ_grid = θ_grid) # maximum-likelihood estimate
mapestimate(r̂, z; θ_grid = θ_grid) # maximum-a-posteriori estimate
sampleposterior(r̂, z; θ_grid = θ_grid) # posterior samples
# Inference with "observed" data (gradient-based optimisation using Optim.jl)
using Optim
θ₀ = [0.5, 0.5] # initial estimate
mlestimate(r̂, z; θ₀ = θ₀) # maximum-likelihood estimate
mapestimate(r̂, z; θ₀ = θ₀) # maximum-a-posteriori estimate
```
"""
struct RatioEstimator <: NeuralEstimator
deepset::DeepSet
end
@layer RatioEstimator
function (est::RatioEstimator)(Z, θ; kwargs...)
est((Z, θ); kwargs...) # "Tupleise" the input and pass to Tuple method
end
Expand Down Expand Up @@ -727,7 +725,6 @@ struct PiecewiseEstimator <: NeuralEstimator
end
end
end
@layer PiecewiseEstimator
function (pe::PiecewiseEstimator)(Z)
# Note that this is an inefficient implementation, analogous to the inefficient
# DeepSet implementation. A more efficient approach would be to subset Z based
Expand Down Expand Up @@ -943,7 +940,6 @@ struct Ensemble <: NeuralEstimator
estimators
end
Ensemble(architecture::Function, J::Integer) = Ensemble([architecture() for j in 1:J])
@layer Ensemble

function train(ensemble::Ensemble, args...; kwargs...)
kwargs = (;kwargs...)
Expand Down
5 changes: 0 additions & 5 deletions src/Graphs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,6 @@ function (l::IndicatorWeights)(h::M) where M <: AbstractMatrix{T} where T
N = reduce(vcat, N)
Float32.(N)
end
@layer IndicatorWeights
Flux.trainable(l::IndicatorWeights) = ()


Expand Down Expand Up @@ -240,7 +239,6 @@ function (l::KernelWeights)(h::M) where M <: AbstractMatrix{T} where T
N = reduce(vcat, N)
Float32.(N)
end
@layer KernelWeights
Flux.trainable(l::KernelWeights) = ()


Expand Down Expand Up @@ -355,8 +353,6 @@ struct SpatialGraphConv{W<:AbstractMatrix, A, B,C, F} <: GNNLayer
f::C
g::F
end
@layer SpatialGraphConv
WeightedGraphConv = SpatialGraphConv; export WeightedGraphConv # alias for backwards compatability
function SpatialGraphConv(
ch::Pair{Int,Int},
g = relu;
Expand Down Expand Up @@ -546,7 +542,6 @@ struct GNNSummary{F, G}
propagation::F # propagation module
readout::G # readout module
end
@layer GNNSummary
Base.show(io::IO, D::GNNSummary) = print(io, "\nThe propagation and readout modules of a graph neural network (GNN), with a total of $(nparams(D)) trainable parameters:\n\nPropagation module ($(nparams(D.propagation)) parameters): $(D.propagation)\n\nReadout module ($(nparams(D.readout)) parameters): $(D.readout)")

function::GNNSummary)(g::GNNGraph)
Expand Down
8 changes: 3 additions & 5 deletions src/NeuralEstimators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@ using CSV
using DataFrames
using Distances
using Flux
using Flux: ofeltype, DataLoader, update!, glorot_uniform, onehotbatch, _match_eltype, @non_differentiable, @ignore_derivatives # @layer
using Flux: @functor; var"@layer" = var"@functor" # NB did this because even semi-recent versions of Flux do not include @layer
using Flux: ofeltype, DataLoader, update!, glorot_uniform, onehotbatch, _match_eltype, @non_differentiable, @ignore_derivatives
using Folds
using Graphs
using GraphNeuralNetworks
Expand All @@ -34,7 +33,7 @@ include("loss.jl")
export ParameterConfigurations, subsetparameters
include("Parameters.jl")

export DeepSet, summarystatistics, Compress, CovarianceMatrix, CorrelationMatrix, ResidualBlock
export DeepSet, summarystatistics, Compress, CovarianceMatrix, CorrelationMatrix, ResidualBlock, PowerDifference
export vectotril, vectotriu
include("Architectures.jl")

Expand Down Expand Up @@ -70,7 +69,6 @@ export EM, removedata, encodedata
include("missingdata.jl")

# Backwards compatability and deprecations:
simulategaussianprocess = simulategaussian; export simulategaussianprocess
export loadbestweights, loadweights
include("deprecated.jl")

Expand All @@ -79,6 +77,7 @@ end
# ---- longer term/lower priority:
# - Amortised posterior approximation (https://github.com/slimgroup/InvertibleNetworks.jl). Also allow for conditioning.
# - Amortised likelihood approximation (https://github.com/slimgroup/InvertibleNetworks.jl)
# - Sequence (e.g., time-series) input: https://jldc.ch/post/seq2one-flux/, and see also the new recurrent layers added to Flux.
# - Extension: Incorporate the following package to greatly expand bootstrap functionality: https://github.com/juliangehring/Bootstrap.jl. Note also the "straps()" method that allows one to obtain the bootstrap distribution. I think what I can do is define a method of interval(bs::BootstrapSample). Maybe one difficulty will be how to re-sample... Not sure how the bootstrap method will know to sample from the independent replicates dimension (the last dimension) of each array.
# - Add NeuralEstimators.jl to the list of packages that use Documenter: see https://documenter.juliadocs.org/stable/man/examples/
# - Add NeuralEstimators.jl to https://github.com/smsharma/awesome-neural-sbi#code-packages-and-benchmarks
Expand All @@ -90,7 +89,6 @@ end
# - Add option to check validation risk (and save the optimal estimator) more frequently than the end of each epoch.
# - Should have initialise_estimator() as an internal function, and instead have the public API be based on constructors of the various estimator classes. This aligns more with the basic ideas of Julia, where functions returning a certain class should be made as a constructor rather than a separate function.
# - Examples: discrete parameters (e.g., Chan et al., 2018). Might need extra functionality for this.
# - Sequence (e.g., time-series) input: https://jldc.ch/post/seq2one-flux/
# - Precompile NeuralEstimators.jl to reduce latency: See https://julialang.org/blog/2021/01/precompile_tutorial/. Seems easy, just need to add precompile(f, (arg_types…)) to whichever methods we want to precompile
# - Examples: data plots within each example. Can show a histogram for univariate data; a scatterplot for bivariate data; a heatmap for gridded data; and scatterplot for irregular spatial data.
# - GPU on MacOS with Metal.jl (already have extension written, need to wait until Metal.jl is further developed; in particular, need convolution layers to be implemented)
Expand Down
5 changes: 5 additions & 0 deletions src/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,8 @@ Returns the weights of the neural network saved as 'best_network.bson' in the gi
"""
loadbestweights(path::String) = loadweights(joinpath(path, "best_network.bson"))
loadweights(path::String) = load(path, @__MODULE__)[:weights]


# aliases for backwards compatability
WeightedGraphConv = SpatialGraphConv; export WeightedGraphConv
simulategaussianprocess = simulategaussian; export simulategaussianprocess
1 change: 0 additions & 1 deletion src/summarystatistics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -187,5 +187,4 @@ function (l::NeighbourhoodVariogram)(g::GNNGraph)
Σ = sum(z .* N, dims = 1) # ∑(Zⱼ - Zᵢ)² in each bin
vec./ 2N_card)
end
@layer NeighbourhoodVariogram
Flux.trainable(l::NeighbourhoodVariogram) = ()
Loading

0 comments on commit f22f763

Please sign in to comment.