From 0c934309c0ae5908c75efaf04ae5b6f4e106af5e Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 20 Oct 2021 11:09:18 +0100 Subject: [PATCH] Performance improvements for transformations (#69) * Eagerly DAGify when comparing for equality If we determine that two expressions are equal, we can DAGify by replacing the operands of one with the operands of the other. This significantly speeds up traversals for complex forms that have many equal sub-terms. * map_dag: Allow caller to provide caches This will help reuse of transformed traversals when the same mapping function is applied multiple times. This occurs, for example, in the dispatching for derivatives in apply_derivatives. * restrictions: Reuse caches between calls to restriction propagation * derivatives: Reuse cache between calls to dispatched rulesets --- ufl/algorithms/apply_derivatives.py | 43 +++++++++++++++++++++------- ufl/algorithms/apply_restrictions.py | 9 ++++-- ufl/corealg/map_dag.py | 34 ++++++++++++++++++---- ufl/exprequals.py | 2 ++ 4 files changed, 71 insertions(+), 17 deletions(-) diff --git a/ufl/algorithms/apply_derivatives.py b/ufl/algorithms/apply_derivatives.py index d1fc39f99..c4123f3cc 100644 --- a/ufl/algorithms/apply_derivatives.py +++ b/ufl/algorithms/apply_derivatives.py @@ -7,6 +7,7 @@ # # SPDX-License-Identifier: LGPL-3.0-or-later +from collections import defaultdict from ufl.log import error, warning @@ -1048,6 +1049,9 @@ def coordinate_derivative(self, o): class DerivativeRuleDispatcher(MultiFunction): def __init__(self): MultiFunction.__init__(self) + # caches for reuse in the dispatched transformers + self.vcaches = defaultdict(dict) + self.rcaches = defaultdict(dict) def terminal(self, o): return o @@ -1059,24 +1063,41 @@ def derivative(self, o): def grad(self, o, f): rules = GradRuleset(o.ufl_shape[-1]) - return map_expr_dag(rules, f) + key = (GradRuleset, o.ufl_shape[-1]) + return map_expr_dag(rules, f, + vcache=self.vcaches[key], + rcache=self.rcaches[key]) def reference_grad(self, o, f): rules = ReferenceGradRuleset(o.ufl_shape[-1]) # FIXME: Look over this and test better. - return map_expr_dag(rules, f) + key = (ReferenceGradRuleset, o.ufl_shape[-1]) + return map_expr_dag(rules, f, + vcache=self.vcaches[key], + rcache=self.rcaches[key]) def variable_derivative(self, o, f, dummy_v): - rules = VariableRuleset(o.ufl_operands[1]) - return map_expr_dag(rules, f) + op = o.ufl_operands[1] + rules = VariableRuleset(op) + key = (VariableRuleset, op) + return map_expr_dag(rules, f, + vcache=self.vcaches[key], + rcache=self.rcaches[key]) def coefficient_derivative(self, o, f, dummy_w, dummy_v, dummy_cd): dummy, w, v, cd = o.ufl_operands rules = GateauxDerivativeRuleset(w, v, cd) - return map_expr_dag(rules, f) + key = (GateauxDerivativeRuleset, w, v, cd) + return map_expr_dag(rules, f, + vcache=self.vcaches[key], + rcache=self.rcaches[key]) def coordinate_derivative(self, o, f, dummy_w, dummy_v, dummy_cd): o_ = o.ufl_operands - return CoordinateDerivative(map_expr_dag(self, o_[0]), o_[1], o_[2], o_[3]) + key = (CoordinateDerivative, o_[0]) + return CoordinateDerivative(map_expr_dag(self, o_[0], + vcache=self.vcaches[key], + rcache=self.rcaches[key]), + o_[1], o_[2], o_[3]) def indexed(self, o, Ap, ii): # TODO: (Partially) duplicated in generic rules # Reuse if untouched @@ -1209,6 +1230,8 @@ def jacobian(self, o): class CoordinateDerivativeRuleDispatcher(MultiFunction): def __init__(self): MultiFunction.__init__(self) + self.vcache = defaultdict(dict) + self.rcache = defaultdict(dict) def terminal(self, o): return o @@ -1227,17 +1250,17 @@ def reference_grad(self, o): def coefficient_derivative(self, o): return o - def coordinate_derivative(self, o): + def coordinate_derivative(self, o, f, w, v, cd): from ufl.algorithms import extract_unique_elements spaces = set(c.family() for c in extract_unique_elements(o)) unsupported_spaces = {"Argyris", "Bell", "Hermite", "Morley"} if spaces & unsupported_spaces: error("CoordinateDerivative is not supported for elements of type %s. " "This is because their pullback is not implemented in UFL." % unsupported_spaces) - f, w, v, cd = o.ufl_operands - f = self(f) # transform f + _, w, v, cd = o.ufl_operands rules = CoordinateDerivativeRuleset(w, v, cd) - return map_expr_dag(rules, f) + key = (CoordinateDerivativeRuleset, w, v, cd) + return map_expr_dag(rules, f, vcache=self.vcache[key], rcache=self.rcache[key]) def apply_coordinate_derivatives(expression): diff --git a/ufl/algorithms/apply_restrictions.py b/ufl/algorithms/apply_restrictions.py index a9d38d79d..4047f1d0b 100644 --- a/ufl/algorithms/apply_restrictions.py +++ b/ufl/algorithms/apply_restrictions.py @@ -21,6 +21,9 @@ def __init__(self, side=None): MultiFunction.__init__(self) self.current_restriction = side self.default_restriction = "+" + # Caches for propagating the restriction with map_expr_dag + self.vcaches = {"+": {}, "-": {}} + self.rcaches = {"+": {}, "-": {}} if self.current_restriction is None: self._rp = {"+": RestrictionPropagator("+"), "-": RestrictionPropagator("-")} @@ -32,8 +35,10 @@ def restricted(self, o): if self.current_restriction is not None: error("Cannot restrict an expression twice.") # Configure a propagator for this side and apply to subtree - # FIXME: Reuse cache between these calls! - return map_expr_dag(self._rp[o.side()], o.ufl_operands[0]) + side = o.side() + return map_expr_dag(self._rp[side], o.ufl_operands[0], + vcache=self.vcaches[side], + rcache=self.rcaches[side]) # --- Reusable rules diff --git a/ufl/corealg/map_dag.py b/ufl/corealg/map_dag.py index 84846eef7..6b20c4179 100644 --- a/ufl/corealg/map_dag.py +++ b/ufl/corealg/map_dag.py @@ -14,32 +14,56 @@ from ufl.corealg.multifunction import MultiFunction -def map_expr_dag(function, expression, compress=True): +def map_expr_dag(function, expression, + compress=True, + vcache=None, + rcache=None): """Apply a function to each subexpression node in an expression DAG. If *compress* is ``True`` (default) the output object from the function is cached in a ``dict`` and reused such that the resulting expression DAG does not contain duplicate objects. + If the same funtion is called multiple times in a transformation + (as for example in apply_derivatives), then to reuse caches across + the call, provide these two arguments: + + :arg vcache: Optional dict for caching results of intermediate transformations + :arg rcache: Optional dict for caching results for compression. + Return the result of the final function call. """ - result, = map_expr_dags(function, [expression], compress=compress) + result, = map_expr_dags(function, [expression], compress=compress, + vcache=vcache, + rcache=rcache) return result -def map_expr_dags(function, expressions, compress=True): +def map_expr_dags(function, expressions, + compress=True, + vcache=None, + rcache=None): """Apply a function to each subexpression node in an expression DAG. If *compress* is ``True`` (default) the output object from the function is cached in a ``dict`` and reused such that the resulting expression DAG does not contain duplicate objects. + If the same funtion is called multiple times in a transformation + (as for example in apply_derivatives), then to reuse caches across + the call, provide these two arguments: + + :arg vcache: Optional dict for caching results of intermediate transformations + :arg rcache: Optional dict for caching results for compression. + Return a list with the result of the final function call for each expression. """ # Temporary data structures - vcache = {} # expr -> r = function(expr,...), cache of intermediate results - rcache = {} # r -> r, cache of result objects for memory reuse + # expr -> r = function(expr,...), cache of intermediate results + vcache = {} if vcache is None else vcache + # r -> r, cache of result objects for memory reuse + rcache = {} if rcache is None else rcache # Build mapping typecode:bool, for which types to skip the subtree of if isinstance(function, MultiFunction): diff --git a/ufl/exprequals.py b/ufl/exprequals.py index 592be1aed..653e39fe4 100644 --- a/ufl/exprequals.py +++ b/ufl/exprequals.py @@ -145,6 +145,8 @@ def nonrecursive_expr_equals(self, other): left.append((s, o)) # Equal if we get out of the above loop! + # Eagerly DAGify to reduce the size of the tree. + self.ufl_operands = other.ufl_operands return True