From 243703d4e691f1e63ac6402e942ea08b7d708492 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Wed, 8 Nov 2023 09:04:39 +0100 Subject: [PATCH 01/12] Initial addition of a benchmark to measure the diff performance for a NN. --- benchmark/CMakeLists.txt | 1 + benchmark/nn_diff.cpp | 59 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 benchmark/nn_diff.cpp diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 5725f244f..0999797cf 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -43,6 +43,7 @@ ADD_HEYOKA_BENCHMARK(taylor_ANN) ADD_HEYOKA_BENCHMARK(taylor_ANN_2) ADD_HEYOKA_BENCHMARK(apophis) ADD_HEYOKA_BENCHMARK(stiff_equation) +ADD_HEYOKA_BENCHMARK(nn_diff) ADD_HEYOKA_BENCHMARK(mascon_models) ADD_HEYOKA_BENCHMARK(outer_ss_jet_benchmark) ADD_HEYOKA_BENCHMARK(outer_ss_long_term) diff --git a/benchmark/nn_diff.cpp b/benchmark/nn_diff.cpp new file mode 100644 index 000000000..5494ce1d8 --- /dev/null +++ b/benchmark/nn_diff.cpp @@ -0,0 +1,59 @@ +// Copyright 2020, 2021, 2022, 2023 Francesco Biscani (bluescarni@gmail.com), Dario Izzo (dario.izzo@gmail.com) +// +// This file is part of the heyoka library. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include + +using namespace heyoka; + +int main(int argc, char *argv[]) +{ + namespace po = boost::program_options; + + create_logger(); + set_logger_level_trace(); + + auto logger = spdlog::get("heyoka"); + + unsigned nn_layer{}; + + po::options_description desc("Options"); + + desc.add_options()("help", "produce help message")("nn_layer", po::value(&nn_layer)->default_value(10u), + "number of neurons per layer"); + + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + po::notify(vm); + + if (vm.count("help") != 0u) { + std::cout << desc << "\n"; + return 0; + } + + if (nn_layer == 0u) { + throw std::invalid_argument("The number of neurons per layer cannot be zero"); + } + + auto [x, y] = make_vars("x", "y"); + auto ffnn = model::ffnn(kw::inputs = {x, y}, kw::nn_hidden = {nn_layer, nn_layer, nn_layer}, kw::n_out = 2, + kw::activations = {heyoka::tanh, heyoka::tanh, heyoka::tanh, heyoka::tanh}); + + auto dt = diff_tensors(ffnn, kw::diff_args = diff_args::params); + auto dNdtheta = dt.get_jacobian(); +} From 814ff7596f0ec4b0d206c4bb49d7eb5654c0d3a9 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Wed, 8 Nov 2023 09:08:41 +0100 Subject: [PATCH 02/12] Change the logic of diff_tensors() not to use a std::map internally. Rather, the derivatives for each order are added in unordered fashion and sorted only at the end of an iteration. --- src/expression_diff.cpp | 117 +++++++++++++++++++++++++++------------- 1 file changed, 79 insertions(+), 38 deletions(-) diff --git a/src/expression_diff.cpp b/src/expression_diff.cpp index e96f519b6..9fa6fc168 100644 --- a/src/expression_diff.cpp +++ b/src/expression_diff.cpp @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -27,6 +26,7 @@ #include #include +#include #include #include @@ -34,6 +34,7 @@ #include #include +#include #include #include #include @@ -450,11 +451,26 @@ auto diff_make_adj_dep(const std::vector &dc, std::vector{}(v.size()); + + for (auto idx : v) { + boost::hash_combine(seed, std::hash{}(idx)); + } + + return seed; + } +}; + // Forward-mode implementation of diff_tensors(). template void diff_tensors_forward_impl( - // The map of derivatives. It wil be updated as new derivatives - // are computed. + // The map of derivatives. It will be updated after all the + // derivatives have been computed. DiffMap &diff_map, // The number of derivatives in the previous-order tensor. std::vector::size_type cur_nouts, @@ -478,6 +494,10 @@ void diff_tensors_forward_impl( { assert(dc.size() > nvars); + // Local map used to temporarily store the derivatives. + // It will be merged into diff_map at the end. + fast_umap local_diff_map; + // An indices vector used as temporary variable in several places below. std::vector tmp_v_idx; @@ -523,7 +543,7 @@ void diff_tensors_forward_impl( // Use try_emplace() so that if the derivative // has already been computed, nothing happens. - diff_map.try_emplace(tmp_v_idx, 0_dbl); + local_diff_map.try_emplace(tmp_v_idx, 0_dbl); } // Move to the next diff argument. @@ -625,7 +645,7 @@ void diff_tensors_forward_impl( } // Add the derivatives of all outputs wrt the current input - // to diff_map. + // to local_diff_map. auto out_it = prev_begin; for (std::vector::size_type out_idx = 0; out_idx < cur_nouts; ++out_idx, ++out_it) { @@ -636,22 +656,25 @@ void diff_tensors_forward_impl( tmp_v_idx[diff_arg_idx + 1u] += 1u; // Check if we already computed this derivative. - if (const auto it = diff_map.find(tmp_v_idx); it == diff_map.end()) { + if (const auto it = local_diff_map.find(tmp_v_idx); it == local_diff_map.end()) { // The derivative is new. auto cur_der = diffs[diffs.size() - cur_nouts + out_idx]; - [[maybe_unused]] const auto [_, flag] = diff_map.try_emplace(tmp_v_idx, std::move(cur_der)); + [[maybe_unused]] const auto [_, flag] = local_diff_map.try_emplace(tmp_v_idx, std::move(cur_der)); assert(flag); } } } + + // Merge local_diff_map into diff_map. + diff_map.insert(diff_map.end(), local_diff_map.begin(), local_diff_map.end()); } // Reverse-mode implementation of diff_tensors(). template void diff_tensors_reverse_impl( - // The map of derivatives. It wil be updated as new derivatives - // are computed. + // The map of derivatives. It will be updated after all the + // derivatives have been computed. DiffMap &diff_map, // The number of derivatives in the previous-order tensor. std::vector::size_type cur_nouts, @@ -675,6 +698,10 @@ void diff_tensors_reverse_impl( { assert(dc.size() > nvars); + // Local map used to temporarily store the derivatives. + // It will be merged into diff_map at the end. + fast_umap local_diff_map; + // Cache the number of diff arguments. const auto nargs = args.size(); @@ -813,7 +840,7 @@ void diff_tensors_reverse_impl( assert(flag); } - // Add the derivatives to diff_map. + // Add the derivatives to local_diff_map. for (decltype(args.size()) j = 0; j < nargs; ++j) { // Compute the indices vector for the current derivative. tmp_v_idx = prev_begin->first; @@ -824,7 +851,7 @@ void diff_tensors_reverse_impl( tmp_v_idx[j + 1u] += 1u; // Check if we already computed this derivative. - if (const auto it = diff_map.find(tmp_v_idx); it == diff_map.end()) { + if (const auto it = local_diff_map.find(tmp_v_idx); it == local_diff_map.end()) { // The derivative is new. If the diff argument is present in the // decomposition, then we will calculate the derivative and add it. // Otherwise, we set the derivative to zero and add it. @@ -834,24 +861,18 @@ void diff_tensors_reverse_impl( cur_der = it_dmap->second; } - [[maybe_unused]] const auto [_, flag] = diff_map.try_emplace(tmp_v_idx, std::move(cur_der)); + [[maybe_unused]] const auto [_, flag] = local_diff_map.try_emplace(tmp_v_idx, std::move(cur_der)); assert(flag); } } // Update prev_begin as we move to the next output. - // NOTE; the iterator here stays valid even if we keep on modifying - // diff_map thanks to the iterator invalidation rules for associative - // containers (i.e., we never call erase, we just keep on inserting new - // elements). - // NOTE: prev_begin is iterating over the previous-order derivatives - // without interference from the new derivatives we are adding, since - // they are all higher-order derivatives (and thus appended past the end - // of the previous order derivative range). ++prev_begin; - - assert(prev_begin != diff_map.end()); + assert(prev_begin != diff_map.end() || i + 1u == cur_nouts); } + + // Merge local_diff_map into diff_map. + diff_map.insert(diff_map.end(), local_diff_map.begin(), local_diff_map.end()); } } // namespace @@ -1003,9 +1024,23 @@ auto diff_tensors_impl(const std::vector &v_ex, const std::vector diff_map; + // This will be kept manually sorted according to dtens_v_idx_cmp + // and it will be turned into a flat map at the end. + dtens_map_t::sequence_type diff_map; + + // Helper to locate the vector of indices v in diff_map. If not present, + // diff_map.end() will be returned. + auto search_diff_map = [&diff_map](const dtens_v_idx_t &v) { + auto it = std::lower_bound(diff_map.begin(), diff_map.end(), v, [](const auto &item, const auto &vec) { + return dtens_v_idx_cmp{}(item.first, vec); + }); + + if (it != diff_map.end() && it->first == v) { + return it; + } else { + return diff_map.end(); + } + }; // An indices vector with preallocated storage, // used as temporary variable in several places below. @@ -1021,8 +1056,8 @@ auto diff_tensors_impl(const std::vector &v_ex, const std::vector(i); - assert(diff_map.count(tmp_v_idx) == 0u); - diff_map[tmp_v_idx] = v_ex[i]; + assert(search_diff_map(tmp_v_idx) == diff_map.end()); + diff_map.emplace_back(tmp_v_idx, v_ex[i]); } // Iterate over the derivative orders. @@ -1032,7 +1067,7 @@ auto diff_tensors_impl(const std::vector &v_ex, const std::vector(0)); - const auto prev_begin = diff_map.find(tmp_v_idx); + const auto prev_begin = search_diff_map(tmp_v_idx); assert(prev_begin != diff_map.end()); // Store the previous-order derivatives into a separate @@ -1051,6 +1086,10 @@ auto diff_tensors_impl(const std::vector &v_ex, const std::vector &v_ex, const std::vector(0)); - const auto new_begin = diff_map.find(tmp_v_idx); - assert(new_begin != diff_map.end()); - // Create the vector of expressions for the substitution. std::vector subs_ret; - for (auto it = new_begin; it != diff_map.end(); ++it) { + for (auto *it = cur_begin; it != cur_end; ++it) { subs_ret.push_back(it->second); } @@ -1087,7 +1127,7 @@ auto diff_tensors_impl(const std::vector &v_ex, const std::vectorsecond = subs_ret[i]; } @@ -1111,7 +1151,8 @@ auto diff_tensors_impl(const std::vector &v_ex, const std::vector Date: Wed, 8 Nov 2023 09:17:01 +0100 Subject: [PATCH 03/12] Replace several usages of std::unordered with the fast counterparts from Boost in the diff_tensors() code. --- src/expression_diff.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/expression_diff.cpp b/src/expression_diff.cpp index 9fa6fc168..4477f86b3 100644 --- a/src/expression_diff.cpp +++ b/src/expression_diff.cpp @@ -336,7 +336,7 @@ auto diff_make_adj_dep(const std::vector &dc, std::vector> adj; + std::vector> adj; adj.resize(boost::numeric_cast(dc.size())); std::vector> dep; @@ -506,7 +506,7 @@ void diff_tensors_forward_impl( // to avoid iterating over those subexpressions which do not depend on // an input. We need two containers (with identical content) // because we need both ordered iteration AND fast lookup. - std::unordered_set in_deps; + fast_uset in_deps; std::vector sorted_in_deps; // A stack to be used when filling up in_deps/sorted_in_deps. @@ -515,7 +515,7 @@ void diff_tensors_forward_impl( // Create a dictionary mapping an input to its position // in the decomposition. This is used to locate diff arguments // in the decomposition. - std::unordered_map::size_type> input_idx_map; + fast_umap::size_type, std::hash> input_idx_map; for (std::vector::size_type i = 0; i < nvars; ++i) { const auto &cur_in = dc[i]; assert(input_idx_map.count(cur_in) == 0u); @@ -714,7 +714,7 @@ void diff_tensors_reverse_impl( // does not depend (recall that the decomposition contains the subexpressions // for ALL outputs). We need two containers (with identical content) // because we need both ordered iteration AND fast lookup. - std::unordered_set out_deps; + fast_uset out_deps; std::vector sorted_out_deps; // A stack to be used when filling up out_deps/sorted_out_deps. @@ -834,7 +834,7 @@ void diff_tensors_reverse_impl( // to fetch from diffs only the derivatives we are interested in // (since there may be vars/params in the decomposition wrt which // the derivatives are not requested). - std::unordered_map dmap; + fast_umap> dmap; for (std::vector::size_type j = 0; j < nvars; ++j) { [[maybe_unused]] const auto [_, flag] = dmap.try_emplace(dc[j], diffs[j]); assert(flag); @@ -1225,7 +1225,7 @@ dtens diff_tensors(const std::vector &v_ex, const std::variant> args_set(args.begin(), args.end()); if (args_set.size() != args.size()) { throw std::invalid_argument( fmt::format("Duplicate entries detected in the list of variables/parameters with respect to which the " From 4978b36cdc1e79df5e4afc6f581857fcdc1aa476 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Wed, 8 Nov 2023 09:26:00 +0100 Subject: [PATCH 04/12] Parallelise diff sorting. --- src/expression_diff.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/expression_diff.cpp b/src/expression_diff.cpp index 4477f86b3..448213006 100644 --- a/src/expression_diff.cpp +++ b/src/expression_diff.cpp @@ -33,6 +33,8 @@ #include #include +#include + #include #include #include @@ -1107,8 +1109,8 @@ auto diff_tensors_impl(const std::vector &v_ex, const std::vector Date: Wed, 8 Nov 2023 09:28:50 +0100 Subject: [PATCH 05/12] Internal rename. --- src/expression_diff.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/expression_diff.cpp b/src/expression_diff.cpp index 448213006..89c504f9f 100644 --- a/src/expression_diff.cpp +++ b/src/expression_diff.cpp @@ -1109,8 +1109,8 @@ auto diff_tensors_impl(const std::vector &v_ex, const std::vector Date: Wed, 8 Nov 2023 09:48:16 +0100 Subject: [PATCH 06/12] Typos. --- doc/changelog.rst | 2 +- src/expression_diff.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/changelog.rst b/doc/changelog.rst index 695db4cb3..952d6d02c 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -150,7 +150,7 @@ Changes ~~~~~~~ - The step callbacks are now copied in :ref:`ensemble propagations ` - rather then being shared among threads. The aim of this change + rather than being shared among threads. The aim of this change is to reduce the likelihood of data races (`#334 `__). - Comprehensive overhaul of the expression system, including: diff --git a/src/expression_diff.cpp b/src/expression_diff.cpp index 89c504f9f..c1ac51527 100644 --- a/src/expression_diff.cpp +++ b/src/expression_diff.cpp @@ -1114,7 +1114,7 @@ auto diff_tensors_impl(const std::vector &v_ex, const std::vector Date: Wed, 8 Nov 2023 12:09:15 +0100 Subject: [PATCH 07/12] Avoid storing derivatives in maps when we are at the first order. --- src/expression_diff.cpp | 131 ++++++++++++++++++++++++++++++---------- 1 file changed, 99 insertions(+), 32 deletions(-) diff --git a/src/expression_diff.cpp b/src/expression_diff.cpp index c1ac51527..3334595f7 100644 --- a/src/expression_diff.cpp +++ b/src/expression_diff.cpp @@ -492,13 +492,30 @@ void diff_tensors_forward_impl( const std::vector &args, // Iterator in diff_map pointing to the first // derivative for the previous order. - typename DiffMap::iterator prev_begin) + typename DiffMap::iterator prev_begin, + // The current derivative order. + std::uint32_t cur_order) { assert(dc.size() > nvars); - - // Local map used to temporarily store the derivatives. - // It will be merged into diff_map at the end. - fast_umap local_diff_map; + assert(cur_order > 0u); + + // Local data structures used to temporarily store the derivatives, + // which will eventually be added to diff_map. + // For derivative orders > 1, the algorithm we employ + // will produce several times the same derivative, and thus + // we need to store the derivatives in a dictionary in order + // to prevent duplicates. For order-1 derivatives, no duplicate + // derivatives will be produced and thus we can use a plain vector + // which can be quite a bit faster. + using diff_map_t = fast_umap; + using diff_vec_t = std::vector>; + using local_diff_t = std::variant; + auto local_diff = (cur_order == 1u) ? local_diff_t(diff_vec_t{}) : local_diff_t(diff_map_t{}); + + // Helpers to ease the access to the active member of the local_diff variant. + // NOTE: if used incorrectly, these will throw at runtime. + auto local_dmap = [&local_diff]() -> diff_map_t & { return std::get(local_diff); }; + auto local_dvec = [&local_diff]() -> diff_vec_t & { return std::get(local_diff); }; // An indices vector used as temporary variable in several places below. std::vector tmp_v_idx; @@ -530,6 +547,7 @@ void diff_tensors_forward_impl( for (decltype(args.size()) diff_arg_idx = 0; diff_arg_idx < args.size(); ++diff_arg_idx) { const auto &cur_diff_arg = args[diff_arg_idx]; + // Check if the current diff argument is one of the inputs. if (input_idx_map.count(cur_diff_arg) == 0u) { // The diff argument is not one of the inputs: // set the derivatives of all outputs wrt to the @@ -543,9 +561,13 @@ void diff_tensors_forward_impl( assert(diff_arg_idx + 1u < tmp_v_idx.size()); tmp_v_idx[diff_arg_idx + 1u] += 1u; - // Use try_emplace() so that if the derivative - // has already been computed, nothing happens. - local_diff_map.try_emplace(tmp_v_idx, 0_dbl); + if (cur_order == 1u) { + local_dvec().emplace_back(tmp_v_idx, 0_dbl); + } else { + // NOTE: use try_emplace() so that if the derivative + // has already been computed, nothing happens. + local_dmap().try_emplace(tmp_v_idx, 0_dbl); + } } // Move to the next diff argument. @@ -647,7 +669,7 @@ void diff_tensors_forward_impl( } // Add the derivatives of all outputs wrt the current input - // to local_diff_map. + // to the local map. auto out_it = prev_begin; for (std::vector::size_type out_idx = 0; out_idx < cur_nouts; ++out_idx, ++out_it) { @@ -657,19 +679,28 @@ void diff_tensors_forward_impl( assert(diff_arg_idx + 1u < tmp_v_idx.size()); tmp_v_idx[diff_arg_idx + 1u] += 1u; - // Check if we already computed this derivative. - if (const auto it = local_diff_map.find(tmp_v_idx); it == local_diff_map.end()) { - // The derivative is new. + if (cur_order == 1u) { auto cur_der = diffs[diffs.size() - cur_nouts + out_idx]; + local_dvec().emplace_back(tmp_v_idx, std::move(cur_der)); + } else { + // Check if we already computed this derivative. + if (const auto it = local_dmap().find(tmp_v_idx); it == local_dmap().end()) { + // The derivative is new. + auto cur_der = diffs[diffs.size() - cur_nouts + out_idx]; - [[maybe_unused]] const auto [_, flag] = local_diff_map.try_emplace(tmp_v_idx, std::move(cur_der)); - assert(flag); + [[maybe_unused]] const auto [_, flag] = local_dmap().try_emplace(tmp_v_idx, std::move(cur_der)); + assert(flag); + } } } } - // Merge local_diff_map into diff_map. - diff_map.insert(diff_map.end(), local_diff_map.begin(), local_diff_map.end()); + // Merge the local map into diff_map. + if (cur_order == 1u) { + diff_map.insert(diff_map.end(), local_dvec().begin(), local_dvec().end()); + } else { + diff_map.insert(diff_map.end(), local_dmap().begin(), local_dmap().end()); + } } // Reverse-mode implementation of diff_tensors(). @@ -696,13 +727,30 @@ void diff_tensors_reverse_impl( const std::vector &args, // Iterator in diff_map pointing to the first // derivative for the previous order. - typename DiffMap::iterator prev_begin) + typename DiffMap::iterator prev_begin, + // The current derivative order. + std::uint32_t cur_order) { assert(dc.size() > nvars); - - // Local map used to temporarily store the derivatives. - // It will be merged into diff_map at the end. - fast_umap local_diff_map; + assert(cur_order > 0u); + + // Local data structures used to temporarily store the derivatives, + // which will eventually be added to diff_map. + // For derivative orders > 1, the algorithm we employ + // will produce several times the same derivative, and thus + // we need to store the derivatives in a dictionary in order + // to prevent duplicates. For order-1 derivatives, no duplicate + // derivatives will be produced and thus we can use a plain vector + // which can be quite a bit faster. + using diff_map_t = fast_umap; + using diff_vec_t = std::vector>; + using local_diff_t = std::variant; + auto local_diff = (cur_order == 1u) ? local_diff_t(diff_vec_t{}) : local_diff_t(diff_map_t{}); + + // Helpers to ease the access to the active member of the local_diff variant. + // NOTE: if used incorrectly, these will throw at runtime. + auto local_dmap = [&local_diff]() -> diff_map_t & { return std::get(local_diff); }; + auto local_dvec = [&local_diff]() -> diff_vec_t & { return std::get(local_diff); }; // Cache the number of diff arguments. const auto nargs = args.size(); @@ -842,7 +890,7 @@ void diff_tensors_reverse_impl( assert(flag); } - // Add the derivatives to local_diff_map. + // Add the derivatives to the local map. for (decltype(args.size()) j = 0; j < nargs; ++j) { // Compute the indices vector for the current derivative. tmp_v_idx = prev_begin->first; @@ -852,10 +900,9 @@ void diff_tensors_reverse_impl( // is representable by std::uint32_t. tmp_v_idx[j + 1u] += 1u; - // Check if we already computed this derivative. - if (const auto it = local_diff_map.find(tmp_v_idx); it == local_diff_map.end()) { - // The derivative is new. If the diff argument is present in the - // decomposition, then we will calculate the derivative and add it. + if (cur_order == 1u) { + // Check if the diff argument is present in the + // decomposition: if it is, we will calculate the derivative and add it. // Otherwise, we set the derivative to zero and add it. expression cur_der = 0_dbl; @@ -863,8 +910,22 @@ void diff_tensors_reverse_impl( cur_der = it_dmap->second; } - [[maybe_unused]] const auto [_, flag] = local_diff_map.try_emplace(tmp_v_idx, std::move(cur_der)); - assert(flag); + local_dvec().emplace_back(tmp_v_idx, std::move(cur_der)); + } else { + // Check if we already computed this derivative. + if (const auto it = local_dmap().find(tmp_v_idx); it == local_dmap().end()) { + // The derivative is new. If the diff argument is present in the + // decomposition, then we will calculate the derivative and add it. + // Otherwise, we set the derivative to zero and add it. + expression cur_der = 0_dbl; + + if (const auto it_dmap = dmap.find(args[j]); it_dmap != dmap.end()) { + cur_der = it_dmap->second; + } + + [[maybe_unused]] const auto [_, flag] = local_dmap().try_emplace(tmp_v_idx, std::move(cur_der)); + assert(flag); + } } } @@ -873,8 +934,12 @@ void diff_tensors_reverse_impl( assert(prev_begin != diff_map.end() || i + 1u == cur_nouts); } - // Merge local_diff_map into diff_map. - diff_map.insert(diff_map.end(), local_diff_map.begin(), local_diff_map.end()); + // Merge the local map into diff_map. + if (cur_order == 1u) { + diff_map.insert(diff_map.end(), local_dvec().begin(), local_dvec().end()); + } else { + diff_map.insert(diff_map.end(), local_dmap().begin(), local_dmap().end()); + } } } // namespace @@ -1099,9 +1164,11 @@ auto diff_tensors_impl(const std::vector &v_ex, const std::vector= args.size()) { - diff_tensors_forward_impl(diff_map, cur_nouts, dc, dep, revdep, adj, nvars, args, prev_begin); + diff_tensors_forward_impl(diff_map, cur_nouts, dc, dep, revdep, adj, nvars, args, prev_begin, + cur_order + 1u); } else { - diff_tensors_reverse_impl(diff_map, cur_nouts, dc, dep, revdep, adj, nvars, args, prev_begin); + diff_tensors_reverse_impl(diff_map, cur_nouts, dc, dep, revdep, adj, nvars, args, prev_begin, + cur_order + 1u); } // Determine the range in diff_map for the current-order derivatives. From 87a23a9a53387f5e4a3f89678672b67532cf48c8 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Fri, 10 Nov 2023 15:48:18 +0100 Subject: [PATCH 08/12] Minor. --- src/expression_diff.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/expression_diff.cpp b/src/expression_diff.cpp index 3334595f7..5959efa4f 100644 --- a/src/expression_diff.cpp +++ b/src/expression_diff.cpp @@ -505,7 +505,7 @@ void diff_tensors_forward_impl( // will produce several times the same derivative, and thus // we need to store the derivatives in a dictionary in order // to prevent duplicates. For order-1 derivatives, no duplicate - // derivatives will be produced and thus we can use a plain vector + // derivatives will be produced and thus we can use a plain vector, // which can be quite a bit faster. using diff_map_t = fast_umap; using diff_vec_t = std::vector>; @@ -740,7 +740,7 @@ void diff_tensors_reverse_impl( // will produce several times the same derivative, and thus // we need to store the derivatives in a dictionary in order // to prevent duplicates. For order-1 derivatives, no duplicate - // derivatives will be produced and thus we can use a plain vector + // derivatives will be produced and thus we can use a plain vector, // which can be quite a bit faster. using diff_map_t = fast_umap; using diff_vec_t = std::vector>; From 99664eceb0f29cf60268ab24853b879a71f5e470 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Fri, 10 Nov 2023 22:07:57 +0100 Subject: [PATCH 09/12] Fix quadratic complexity when building a ffnn model. --- src/model/ffnn.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/model/ffnn.cpp b/src/model/ffnn.cpp index 4e5358021..435b8839b 100644 --- a/src/model/ffnn.cpp +++ b/src/model/ffnn.cpp @@ -19,6 +19,7 @@ #include #include +#include #include HEYOKA_BEGIN_NAMESPACE @@ -43,21 +44,28 @@ std::vector compute_layer(su32 layer_id, const std::vector retval(static_cast::size_type>(n_neurons_curr_layer), 0_dbl); + std::vector retval, tmp_sum; + retval.reserve(n_neurons_curr_layer); + for (su32 i = 0; i < n_neurons_curr_layer; ++i) { + // Clear the summation terms. + tmp_sum.clear(); + for (su32 j = 0; j < n_neurons_prev_layer; ++j) { // Add the weight and update the weight counter. - retval[i] += nn_wb[wcounter] * inputs[j]; + tmp_sum.push_back(nn_wb[wcounter] * inputs[j]); ++wcounter; } // Add the bias and update the counter. - retval[i] += nn_wb[bcounter + n_net_w]; + tmp_sum.push_back(nn_wb[bcounter + n_net_w]); ++bcounter; + // Activation function. - retval[i] = activation(retval[i]); + retval.push_back(activation(sum(tmp_sum))); } + return retval; } From 889e8e0baf00f0d57447c166572a16c64c13da54 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Fri, 10 Nov 2023 22:16:45 +0100 Subject: [PATCH 10/12] Coverage fixes. --- src/expression_diff.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/expression_diff.cpp b/src/expression_diff.cpp index 5959efa4f..f2b52111d 100644 --- a/src/expression_diff.cpp +++ b/src/expression_diff.cpp @@ -749,7 +749,10 @@ void diff_tensors_reverse_impl( // Helpers to ease the access to the active member of the local_diff variant. // NOTE: if used incorrectly, these will throw at runtime. - auto local_dmap = [&local_diff]() -> diff_map_t & { return std::get(local_diff); }; + // NOTE: currently local_dmap is never used because the heuristic + // for deciding between forward and reverse mode prevents reverse mode + // from being used for order > 1. + auto local_dmap = [&local_diff]() -> diff_map_t & { return std::get(local_diff); }; // LCOV_EXCL_LINE auto local_dvec = [&local_diff]() -> diff_vec_t & { return std::get(local_diff); }; // Cache the number of diff arguments. @@ -912,6 +915,7 @@ void diff_tensors_reverse_impl( local_dvec().emplace_back(tmp_v_idx, std::move(cur_der)); } else { + // LCOV_EXCL_START // Check if we already computed this derivative. if (const auto it = local_dmap().find(tmp_v_idx); it == local_dmap().end()) { // The derivative is new. If the diff argument is present in the @@ -926,6 +930,7 @@ void diff_tensors_reverse_impl( [[maybe_unused]] const auto [_, flag] = local_dmap().try_emplace(tmp_v_idx, std::move(cur_der)); assert(flag); } + // LCOV_EXCL_STOP } } @@ -938,7 +943,7 @@ void diff_tensors_reverse_impl( if (cur_order == 1u) { diff_map.insert(diff_map.end(), local_dvec().begin(), local_dvec().end()); } else { - diff_map.insert(diff_map.end(), local_dmap().begin(), local_dmap().end()); + diff_map.insert(diff_map.end(), local_dmap().begin(), local_dmap().end()); // LCOV_EXCL_LINE } } From 360af6185faadbd6324e87495bb3379717d4c29b Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Fri, 10 Nov 2023 22:18:37 +0100 Subject: [PATCH 11/12] Update changelog. --- doc/changelog.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/changelog.rst b/doc/changelog.rst index 7c605e168..9a8e42762 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -21,6 +21,9 @@ New Changes ~~~~~~~ +- Substantial speedups in the computation of first-order derivatives + with respect to many variables/parameters + (`#358 `__). - Substantial performance improvements in the computation of derivative tensors of large expressions with a high degree of internal redundancy From 2832c9e69da937a27ad9981776da7e651dc32091 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Fri, 10 Nov 2023 23:33:09 +0100 Subject: [PATCH 12/12] Dox fix. --- doc/advanced_tutorials.rst | 2 +- doc/basic_tutorials.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/advanced_tutorials.rst b/doc/advanced_tutorials.rst index ad1d9a419..02833671f 100644 --- a/doc/advanced_tutorials.rst +++ b/doc/advanced_tutorials.rst @@ -5,7 +5,7 @@ Advanced tutorials .. important:: - More :ref:`tutorials ` and :ref:`examples ` are available in the documentation + More tutorials and examples are available in the documentation of heyoka's `Python bindings `__. In this section we will show some of heyoka's more advanced functionalities, diff --git a/doc/basic_tutorials.rst b/doc/basic_tutorials.rst index 5c69e1a2f..951a8d6ef 100644 --- a/doc/basic_tutorials.rst +++ b/doc/basic_tutorials.rst @@ -5,7 +5,7 @@ Basic tutorials .. important:: - More :ref:`tutorials ` and :ref:`examples ` are available in the documentation + More tutorials and examples are available in the documentation of heyoka's `Python bindings `__. The code snippets in these tutorials assume the inclusion of the