From e2da3c5562784d4adea7c31b1c6cfabe1b5a9709 Mon Sep 17 00:00:00 2001
From: Mylonas Charilaos <mylonas.charilaos@gmail.com>
Date: Tue, 12 Jan 2021 13:14:51 +0100
Subject: [PATCH] fixed a silent bug, cleaned-up a bit.

---
 README.md                  |   1 -
 ibk_gnns/graphnet_utils.py | 775 +------------------------------------
 ibk_gnns/minigraphnets.py  |  31 +-
 ibk_gnns/test.py           | 224 -----------
 ibk_gnns/utils_train.py    |   2 +-
 test.py                    |   1 -
 6 files changed, 18 insertions(+), 1016 deletions(-)
 delete mode 100644 ibk_gnns/test.py

diff --git a/README.md b/README.md
index ca34e54..2a85807 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,5 @@
 # Tensorflow message passing networks library
 A library for easy construction of message-passing networks in tensorflow keras.
-Under construction.
 
 # Usage
 ## Basic data-structures
diff --git a/ibk_gnns/graphnet_utils.py b/ibk_gnns/graphnet_utils.py
index 2a57afc..20181d5 100644
--- a/ibk_gnns/graphnet_utils.py
+++ b/ibk_gnns/graphnet_utils.py
@@ -282,7 +282,7 @@ def graph_tuple_eval(self, tf_graph_tuple):
         # no changes in Graph topology - nothing else to do!
 
     def graph_eval(self, graph, eval_mode = "batched"):
-        # Evaluate the full GraphNet step:
+        # Evaluate the full GraphNet step: ("naive" implementations - used for testing)
         # parameters:
         #   eval_mode - ['batched' , 'safe']: 
         #                batched: assuming that the first non-batch dimension of the attributes of the nodes and 
@@ -296,10 +296,11 @@ def graph_eval(self, graph, eval_mode = "batched"):
         #                         
         # 
         # 
-        eval_mode_dict = {'batched' : self.graph_eval_batched,'safe' : self.graph_eval_safe}
+        eval_mode_dict = {'batched' : self._graph_eval_batched,'safe' : self._graph_eval_safe}
         return eval_mode_dict[eval_mode](graph)
 
-    def graph_eval_safe(self, graph):
+    def _graph_eval_safe(self, graph):
+        # 
         #                safe:    This is a naive implementation. It is useful for testing and providing some memory 
         #                         efficiency when dealing with larger graphs ('batched' method may fail in such cases 
         #                         since it creates big intermediate tensors).
@@ -382,7 +383,7 @@ def load_graph_functions(path):
                 print("loading %s"%(d_))
         return functions
 
-    def graph_eval_batched(self, graph):
+    def _graph_eval_batched(self, graph):
         # Evaluate the full GraphNet step:
         # parameters:
         #   eval_mode - ['batched' , 'safe']: 
@@ -832,769 +833,3 @@ def make_mlp_graphnet_functions(units,
     return {"edge_function" : edge_mlp, "node_function": node_mlp, "edge_aggregation_function": agg_fcn} # Can be passed directly  to the GraphNet construction with **kwargs
 
 
-# The following is the initial implementation - I was using "dummy" variables because I didn't want to properly deal with cases with different types of MLP functions (for instance the GN lib should support both edge functions that support only receiver-node inputs but also sender-node inputs and edge inputs seamlesly (without conditions or wasting memory). 
-#
-#class GraphNetFunctionFactory:
-#    def __init__(self, network_size_global= 50, use_prenetworks= True, edge_node_state_size= 15, graph_function_output_activation = "gated_tanh", 
-#            n_conv_blocks = 3, nfilts = 18, nfilts2 = 50, ksize = 3, conv_block_activation_type = 'leaky_relu', channels_in = 2):
-#        """
-#        Summary: 
-#          A factory for graphnet functions. It is custom-made for the problems of RUL from time-series. 
-#          It can be adapted to other prediction models. All models (except the aggregation function) are 
-#          relatively small MLPs terminated by sigmoid()*tanh() activation (simple tanh could also work).
-#          
-#        network_size_global: A parameter controlling the width of different networks involved.
-#    
-#        use_prenetworks:     Use a deeper architecture (see code)
-#        
-#        edge_node_state_size:  the size of the node states, and edge states. This is needed
-#                               to create consistent graph functions. Eventhough here I'm using the same global size,
-#                               the sizes of edge states and node states can be different.
-#
-#        graph_function_activation: controls how the graph functions are terminated. The special 
-#                       option "gated_tanh" is the default (RNN/Wavenet-like activation). Original graphnets had ReLU.
-#
-#        """
-#        self.network_size_global =network_size_global 
-#        self.use_prenetworks = use_prenetworks 
-#        self.edge_and_node_state_size = edge_node_state_size 
-#        self.graph_function_output_activation = graph_function_output_activation
-#        self.model_constr_dict= str(inspect.getargvalues(inspect.currentframe()).locals)
-#        self.model_str = str(self.model_constr_dict)
-#        # Passed with other vargs on construction:
-#        self.cnn_params = {
-#                'n_conv_blocks' : n_conv_blocks ,
-#                'nfilts' : nfilts, 
-#                'nfilts2' : nfilts2, 
-#                'ksize': ksize ,
-#                'activation_type' : conv_block_activation_type,
-#                'channels_in' : channels_in
-#                }
-#
-#    @staticmethod
-#    def make_from_record(record):
-#        """
-#        Method to easilly create the object from a record.
-#        Subsequently it is loaded from disk.
-#        """
-#
-#        s = inspect.signature(GraphNetFunctionFactory.__init__)
-#
-#        # Depending on whether the input is a dictionary or a pd dataframe, transform the 
-#        # keys to a list in order to pass them to the constructor.
-#        import pandas as pd
-#        record_type_to_list_transformation = {
-#                    pd.core.series.Series : lambda x : list(x.index),
-#                    dict : lambda k : [k for k in x.keys()]
-#                }
-#
-#        l_ = record_type_to_list_transformation[type(record)](record)
-#
-#        l = [s_ for s_ in s.parameters.keys() if s_ in l_]
-#
-#        return GraphNetFunctionFactory(**{k_:record[k_] for k_ in l})
-#        
-#
-#    def get_hash(self):
-#        import hashlib
-#        return hashlib.md5(self.model_str.encode("utf-8"))
-#
-#    def make_gamma_node_observation_mlp(self, n_node_state_output):
-#        """
-#        Takes as input a node state and returns a gamma probability distribution
-#        """
-#        seq = keras.Sequential()
-#        NParams= 1;
-#        NSamples = 100;
-#        #seq.add(Dense(n_gamma_internal, use_bias = True, activation = "relu", name = "output1"))
-#        seq.add(Dense(NParams*2, use_bias = False, activation = lambda x : tf.nn.softplus(x),name = "output"));
-#        # Change that in the future to the _instantiate_gamma() version (out of class)
-#        def instantiate_gamma(t):
-#            return tfd.Gamma(concentration = t[...,0:NParams], rate = t[...,NParams:2*NParams])
-#
-#        #seq.add()
-#        seq.add(            
-#            tfp.layers.DistributionLambda(
-#                make_distribution_fn = lambda t: instantiate_gamma(t),
-#                convert_to_tensor_fn= lambda s : s.sample(NSamples)))
-#        seq._set_inputs(tf.keras.layers.Input((n_node_state_output,)))
-#        return seq
-#
-#    def weights(self):
-#        return [*self.core.weights(), *self.graph_indep.weights()];
-#
-#    def make_edge_function(self,n_edge_state_input = None,n_edge_state_output = None, n_node_state_input = None):
-#        edge_state_in = Input(shape = (n_edge_state_input), name = "edge_state");
-#        node_state_sender_in = Input(shape = (n_node_state_input), name = "node_sender");
-#        node_state_receiver_in = Input(shape = (n_node_state_input), name = "node_receiver");
-#
-#        ## Building the edge MLP:
-#        edge_out = keras.layers.concatenate([edge_state_in, node_state_sender_in, node_state_receiver_in])
-#
-#        if self.use_prenetworks:
-#            edge_out = Dense(self.network_size_global,  use_bias = False,name = "edge_input")(edge_out)
-#            edge_out = Dropout(rate = 0.2)(edge_out)
-#
-#        edge_out_gate = Dense(n_edge_state_output, activation = "sigmoid", use_bias = False,name = "edge_fcnA")(edge_out)
-#        edge_outB = Dense(n_edge_state_output, activation = "tanh", use_bias = False,name = "edge_fcnB")(edge_out)
-#        edge_out =  edge_outB * edge_out_gate #+ (1 - edge_out_gate) * edge_state_in
-#
-#
-#        edge_mlp = Model(
-#            inputs = [edge_state_in, node_state_sender_in, node_state_receiver_in ] ,
-#            outputs = edge_out)
-#
-#        return edge_mlp
-#    
-#    def save(self, path):
-#        gi_path = os.path.join(path,"graph_independent")
-#        core_path = os.path.join(path,"core")
-#        if not os.path.exists(path):
-#            os.makedirs(gi_path)
-#            os.makedirs(core_path)
-#            
-#        self.core.save(core_path)
-#        self.graph_indep.save(gi_path)
-#    
-#    def load(self,path):
-#        self.gi_path = os.path.join(path,"graph_independent")
-#        self.core_path = os.path.join(path,"core")
-#        
-#        self.core        = GraphNet.make_from_path(self.core_path)
-#        self.graph_indep = GraphNet.make_from_path(self.gi_path)
-#
-#    def make_edge_function_gi(self,n_edge_state_input = None, n_edge_state_output = None, n_node_state_input = None):
-#        # for graph independent.
-#        if n_edge_state_output is None:
-#            n_edge_state_output = n_edge_state_input
-#
-#        edge_state_in = Input(shape = (n_edge_state_input), name = "edge_state");
-#        edge_out = edge_state_in
-#
-#        if self.use_prenetworks:
-#            edge_out = Dense(self.network_size_global,  use_bias = True, name = "edge_gi_input_fcn1")(edge_out)
-#            edge_out = tf.keras.layers.LeakyReLU()(edge_out)
-#            edge_out = Dropout(rate = 0.2)(edge_out)
-#            edge_out = Dense(self.network_size_global,  use_bias = True, name = "edge_gi_input_fcn2")(edge_out)
-#            edge_out = tf.keras.layers.LeakyReLU()(edge_out)
-#
-##        if self.graph_function_output_activation == 'gated_tanh':
-##            edge_out_gate = Dense(n_edge_state_output, use_bias = False, activation = "sigmoid", name = "edge_gi_fcnA")(edge_out)
-##            edge_outB = Dense(n_edge_state_output, use_bias = False, activation = "tanh", name = "edge_gi_fcnB")(edge_out)
-##            edge_out = edge_outB * edge_out_gate 
-##        else:
-##            edge_out_gate = Dense(n_edge_state_output, use_bias = False, activation = self.graph_function_output_activation, name = "edge_gi_fcnA")(edge_out)
-#        edge_out = self.network_function_output(edge_out, 
-#                name_prefix = "edge_gi",
-#                output_size = n_edge_state_output) # Attention! Reads parameters from the factory class. Written for avoiding code repetition, not for clarity.
-#
-#        edge_mlp = Model(inputs = edge_state_in,outputs = edge_out)
-#        return edge_mlp
-#
-#    def network_function_output(self,tensor_in,name_prefix = None, output_size = None): 
-#        """
-#        Implement the gated_tanh output head and treat it uniformly with other options for the output network options (useful for hyperparameter searches)
-#        """
-#
-#        if self.graph_function_output_activation== 'gated_tanh': # not realy an activation...
-#            _out_gate = Dense(output_size, use_bias = False, activation = "sigmoid", name = "%s_fcnA"%name_prefix)(tensor_in)
-#            _outB = Dense(output_size, use_bias = False, activation = "tanh", name = "%s_fcnB"%name_prefix)(tensor_in)
-#            _out = _outB * _out_gate
-#            #_mlp = Model(inputs = tensor_in,outputs = _out)
-#        else:
-#            _out = Dense(output_size, use_bias = False, activation = self.graph_function_output_activation, name = "%s_fcn"%name_prefix)(tensor_in)
-#            #edge_mlp = Model(inputs = tensor_in,outputs = _out)
-#
-#        return _out
-#
-#    @classmethod
-#    def make_edge_aggregation_function(self,edge_out_shape):
-#        xin = tf.keras.layers.Input(shape = (None,edge_out_shape))
-#        xout = tf.reduce_mean(xin,0)
-#        return Model(inputs = xin, outputs= xout)
-#
-#
-#    def make_node_function(self,n_edge_state_input = None,n_node_state_input = None):
-#        agg_edge_state_in = Input(shape = (n_edge_state_input), name = "edge_state_agg");
-#        node_prev_state = Input(shape = (n_node_state_input), name = "node_sender");
-#
-#        ## Building the edge MLP:
-#        node_out = keras.layers.concatenate([agg_edge_state_in, node_prev_state]);
-#
-#        if self.use_prenetworks:
-#            node_out = Dense(self.network_size_global,  use_bias = True,name = "node_fcn1")(node_out)
-#            node_out = Dropout(rate = 0.2)(node_out)
-#            node_out = tf.keras.layers.LeakyReLU()(node_out)
-#            node_out = Dense(self.network_size_global,  use_bias = True , name = "node_fcn2")(node_out)
-#            node_out = tf.keras.layers.LeakyReLU()(node_out)
-#        #node_out = Dense(n_node_state_input, use_bias = False)(node_out)
-#
-#        node_out_nl = Dense(n_node_state_input, activation = "tanh", use_bias = False,name = "node_fcn_nl")(node_out)
-#        node_out_gate = Dense(n_node_state_input, activation = "sigmoid", use_bias = False,name = "node_fcn_gate")(node_out)
-#        node_out = node_out_nl * node_out_gate# + node_prev_state * (1-node_out_gate)
-#
-#        node_out_model = Model(inputs = [agg_edge_state_in, node_prev_state] ,outputs = node_out)
-#
-#        return node_out_model
-#
-#
-#
-#    def make_conv_input_head_node_function(self,edge_input_dummy_size , n_conv_blocks = 3, nfilts = 18, nfilts2 = 50, ksize = 3, output_size = None, use_dropout = True, activation_type = 'leaky_relu', channels_in = 2):
-#        """
-#        A simple 1D CNN for extracting features from the timeseries. It is used in the graph_independent graphnet block. 
-#        Each conv block is as such:
-#         * 1Dconv kernelsize/stride/filters : 1 / 1 / nfilts2 (e.g. 50)
-#         * 1Dconv kernelsize/stride/filters : 2 / 2 / nfilts  (e.g. 18)
-#         * 1Dconv kernelsize/stride/filters : 2 / 2 / nfilts  (e.g. 18)
-#         * (optional) dropout(0.2)
-#         * activation
-#         * 1Dconv kernelsize/stride/filters : 2 / 2 / nfilts  (e.g. 18)
-#         * AveragePooling(kernel = 2)
-#
-#         The network returned is `n_conv_blocks' of the aformentioned stacked. 
-#
-#        parameters:
-#            n_conv_blocks : number of convolutional blocks stacked.
-#            nfilts        : number of bottleneck filts (for instance 18)
-#            nfilts2       : number of filters for the 1x1 convolution (typically larger than nfilts)
-#            ksize         : size of kernel used for all internal convs (3)
-#            output_size   : the node state size (default: None)
-#            use_dropout   : use/notuse dropout between conv layers (some literature suggests it does not help)
-#            activation    : the activation used after the dropout layer.
-#
-#          edge_input_dummy_size : This has to do with the (bad) implementation of the node block. For uniform treatment of edge inputs, 
-#        """
-#        txt2act = {'relu' : tf.keras.layers.ReLU(), 'leaky_relu' : tf.keras.layers.LeakyReLU()}
-#        _activation = lambda: txt2act[activation_type]
-#
-#
-#        xin_node_ts = tf.keras.Input(shape = (None, channels_in) , name = "timeseries_input"); 
-#        xin_edge_dummy = tf.keras.Input(shape = ( edge_input_dummy_size), name = "edge_input_dummy");
-#
-#        def conv_block(conv_block_input, names_suffix= ""):
-#            yout_ = Conv1D(kernel_size = 1 ,  filters = nfilts2, strides = 1, use_bias= False,name = "conv_fcnA"+names_suffix)(conv_block_input)
-#            yout_ = Conv1D(kernel_size=ksize, filters = nfilts, strides=2  , use_bias= False,name  = "conv_fcnB"+names_suffix)(yout_)
-#            yout_ = Conv1D(kernel_size=ksize, filters = nfilts, strides=2  , use_bias= False,name  = "conv_fcnC"+names_suffix)(yout_)
-#            if use_dropout:
-#                yout_ = Dropout(rate = 0.2)(yout_)
-#            yout_ = Conv1D(kernel_size=ksize,strides=2, filters = nfilts2,use_bias= True)(yout_)
-#            yout_ = _activation()(yout_)
-#            #yout_ = keras.layers.AveragePooling1D(pool_size=2)(yout_)
-#            return yout_
-#        
-#        yout = conv_block(xin_node_ts)
-#        yout = keras.layers.AveragePooling1D(pool_size=2)(yout)
-#        for b in range(n_conv_blocks-1):
-#            yout = conv_block(yout, names_suffix=str(b))
-#        
-#
-#        yout = keras.layers.GlobalAveragePooling1D()(yout)
-#        #yout = keras.layers.GlobalMaxPooling1D()(yout)
-#        yout = Dense(output_size, use_bias = True)(yout)
-#        yout = keras.layers.LayerNormalization()(yout)
-#        yout = tf.keras.layers.LeakyReLU()(yout)
-#
-#        mconv = keras.Model(inputs = [xin_edge_dummy,xin_node_ts], outputs = yout)
-#        return mconv
-#    
-#    def make_graphnet_comp_blocks(self, n_node_state_input_gi = None):
-#        """
-#        Prepares the graphnet blocks for the subsequent computation. 
-#        Subsequently these blocks are composed so that a series of inputs can return
-#        a gamma distribution directly.
-#        """
-#        #NETWORK_STATES_SIZE = 30
-#        n_node_state_input , n_edge_state_input = [self.edge_and_node_state_size,self.edge_and_node_state_size]
-#        n_edge_output = n_edge_state_input
-#        
-#        batch_size = 10; # An arbitrary number, to create a batch and call the 
-#                         #functions once to initialize them.
-#
-#        n_edge_state_input_gi = 1
-#        n_edge_output_gi = self.edge_and_node_state_size;
-#        
-#        ##########################################
-#        # Graph independent processing:
-#        edge_mlp_gi = self.make_edge_function_gi(n_edge_state_input = n_edge_state_input_gi,
-#                                            n_edge_state_output= n_edge_output_gi,
-#                                            n_node_state_input = n_node_state_input_gi)
-#
-#        conv_head_params = self.cnn_params
-#        conv_head_params.update({'edge_input_dummy_size' : n_edge_state_input_gi, 'output_size' : n_node_state_input })
-#        node_mlp_gi = self.make_conv_input_head_node_function(**conv_head_params ) #edge_input_dummy_size=n_edge_state_input_gi, output_size = n_node_state_input)
-#
-#        node_mlp_gi([np.random.randn(batch_size,n_edge_state_input_gi),np.random.randn(batch_size,n_node_state_input_gi,self.cnn_params['channels_in'])])
-#        
-#        graph_indep = GraphNet(edge_function = edge_mlp_gi,
-#                               node_function = node_mlp_gi,
-#                               edge_aggregation_function= None, 
-#                               node_to_prob= None)
-#
-#        #########################################
-#        # Graph processing:
-#        
-#        edge_mlp = self.make_edge_function(n_edge_state_input,n_edge_output, n_node_state_input) # THe node state is used for two nodes.
-#        dat_list= [vv.astype("float32") for vv in [np.random.randn(batch_size,n_edge_state_input), np.random.randn(batch_size,n_node_state_input), np.random.randn(batch_size,n_node_state_input)]]
-#        edge_mlp(dat_list)
-#
-#        node_mlp = self.make_node_function(n_edge_state_input, n_node_state_input)
-#        node_to_prob_mlp = self.make_gamma_node_observation_mlp(n_node_state_input);
-#        node_to_prob_mlp(np.random.randn(batch_size,n_node_state_input))
-#        node_mlp([vv.astype("float32") for vv in [np.random.randn(batch_size,n_edge_state_input), np.random.randn(batch_size,n_node_state_input)]])
-#        per_node_edge_aggregator = self.make_edge_aggregation_function(n_edge_output)
-#        edge_aggregation_function = per_node_edge_aggregator
-#
-#        gn = GraphNet(edge_function = edge_mlp,
-#                      node_function=node_mlp,
-#                      edge_aggregation_function=edge_aggregation_function,
-#                      node_to_prob= node_to_prob_mlp)
-#        self.core = gn
-#        self.graph_indep = graph_indep
-#
-#    
-#
-#    
-#        
-#    def eval_graphnets(self,graph_data_, iterations = 5, eval_mode = "batched", return_reparametrization = False,return_final_node = False, return_intermediate_graphs = False, node_index_to_use = -1):
-#        """
-#        graph_data_                : is a "graph" object that contains a batch of graphs (more correctly, a graph tuple as DM calls it)
-#        iterations                 : number of core iterations for the computation.
-#        eval_mode                  : "batched" (batch nodes and edges before evaluation) or "safe" (more memory efficient - less prone to OOM errors no batching).-
-#        return_distr_params        : return the distribution parameters instead of the distribution itself. This is in place because of 
-#                                     some buggy model loading (loaded models don't return distribution objects).
-#        return_intermediate_graphs : Return all the intermediate computations.
-#        """
-#        graph_out = self.graph_indep.graph_eval(graph_data_)
-#        intermediate_graphs = [];
-#        for iterations in range(iterations):
-#            graph_out = self.core.graph_eval(graph_out, eval_mode = eval_mode) + graph_out # Addition adds all representations (look at implementation of "Graph")
-#            if return_intermediate_graphs:
-#                intermediate_graphs.append(graph_out.copy())
-#
-#        if return_intermediate_graphs:
-#            return intermediate_graphs
-#
-#        # Finally the node_to_prob returns a reparametrized "Gamma" distribution from only the final node state
-#        node_final = graph_out.nodes[node_index_to_use].node_attr_tensor
-#        if return_final_node:
-#            return node_final
-#
-#        if not return_reparametrization:
-#            return self.core.node_to_prob_function(node_final)
-#        else:
-#            return self.core.node_to_prob_function.get_layer("output")(node_final)
-#
-#    def bootstrap_eval_graphnets(self, graph_data_, iterations = 5,n_bootstrap_samples = 1, n_nodes_keep = 5, eval_mode = "batched", return_final_node = False, node_index_to_use = -1):
-#        """
-#        Evaluate multiple random samples of nodes from the past. 
-#        The signature is alsmost the same as `eval_graphnets` with the difference of the parameters n_boostrap_samples (how many times to resample the past nodes) and n_nodes_keep 
-#        (how many nodes from the past to keep). The last node is always in the computed sample.
-#        """
-#        bootstrap_results = [];
-#        for nbs in range(n_bootstrap_samples):
-#
-#            keep_nodes = [graph_data_.nodes[-1]]
-#            node_indices = list(np.random.choice(len(graph_data_.nodes)-1,n_nodes_keep-1, replace = False))
-#            node_indices.sort()
-#            node_indices.append(len(graph_data_.nodes)-1)
-#            subgraph = graph_data_.get_subgraph_from_nodes([graph_data_.nodes[i] for i in node_indices]) # are gradients passing?
-#            bootstrap_result = self.eval_graphnets(subgraph, iterations = iterations, eval_mode = eval_mode, return_final_node = True, node_index_to_use = -1)
-#            bootstrap_results.append(bootstrap_result)
-#
-#        if return_final_node is False:
-#            bootstrap_node_value = tf.reduce_mean(bootstrap_results,0)
-#            return self.core.node_to_prob_function(bootstrap_node_value)
-#        else:
-#            return bootstrap_results
-#
-#        return bootstrap_results
-#          
-#
-#    def set_weights(self,weights):
-#        """
-#        Takes a list of weights (as returned from a similar object)  and sets the to the functions of this one.
-#        """
-#        for w , new_weight in zip([*self.core.weights(), *self.graph_indep.weights()][:] , new_weights):
-#            w = new_weight
-#class GraphNetFunctionFactory:
-#    def __init__(self, network_size_global= 50, use_prenetworks= True, edge_node_state_size= 15, graph_function_output_activation = "gated_tanh", 
-#            n_conv_blocks = 3, nfilts = 18, nfilts2 = 50, ksize = 3, conv_block_activation_type = 'leaky_relu', channels_in = 2):
-#        """
-#        Summary: 
-#          A factory for graphnet functions. It is custom-made for the problems of RUL from time-series. 
-#          It can be adapted to other prediction models. All models (except the aggregation function) are 
-#          relatively small MLPs terminated by sigmoid()*tanh() activation (simple tanh could also work).
-#          
-#        network_size_global: A parameter controlling the width of different networks involved.
-#    
-#        use_prenetworks:     Use a deeper architecture (see code)
-#        
-#        edge_node_state_size:  the size of the node states, and edge states. This is needed
-#                               to create consistent graph functions. Eventhough here I'm using the same global size,
-#                               the sizes of edge states and node states can be different.
-#
-#        graph_function_activation: controls how the graph functions are terminated. The special 
-#                       option "gated_tanh" is the default (RNN/Wavenet-like activation). Original graphnets had ReLU.
-#
-#        """
-#        self.network_size_global =network_size_global 
-#        self.use_prenetworks = use_prenetworks 
-#        self.edge_and_node_state_size = edge_node_state_size 
-#        self.graph_function_output_activation = graph_function_output_activation
-#        self.model_constr_dict= str(inspect.getargvalues(inspect.currentframe()).locals)
-#        self.model_str = str(self.model_constr_dict)
-#        # Passed with other vargs on construction:
-#        self.cnn_params = {
-#                'n_conv_blocks' : n_conv_blocks ,
-#                'nfilts' : nfilts, 
-#                'nfilts2' : nfilts2, 
-#                'ksize': ksize ,
-#                'activation_type' : conv_block_activation_type,
-#                'channels_in' : channels_in
-#                }
-#
-#    @staticmethod
-#    def make_from_record(record):
-#        """
-#        Method to easilly create the object from a record.
-#        Subsequently it is loaded from disk.
-#        """
-#
-#        s = inspect.signature(GraphNetFunctionFactory.__init__)
-#
-#        # Depending on whether the input is a dictionary or a pd dataframe, transform the 
-#        # keys to a list in order to pass them to the constructor.
-#        import pandas as pd
-#        record_type_to_list_transformation = {
-#                    pd.core.series.Series : lambda x : list(x.index),
-#                    dict : lambda k : [k for k in x.keys()]
-#                }
-#
-#        l_ = record_type_to_list_transformation[type(record)](record)
-#
-#        l = [s_ for s_ in s.parameters.keys() if s_ in l_]
-#
-#        return GraphNetFunctionFactory(**{k_:record[k_] for k_ in l})
-#        
-#
-#    def get_hash(self):
-#        import hashlib
-#        return hashlib.md5(self.model_str.encode("utf-8"))
-#
-#    def make_gamma_node_observation_mlp(self, n_node_state_output):
-#        """
-#        Takes as input a node state and returns a gamma probability distribution
-#        """
-#        seq = keras.Sequential()
-#        NParams= 1;
-#        NSamples = 100;
-#        #seq.add(Dense(n_gamma_internal, use_bias = True, activation = "relu", name = "output1"))
-#        seq.add(Dense(NParams*2, use_bias = False, activation = lambda x : tf.nn.softplus(x),name = "output"));
-#        # Change that in the future to the _instantiate_gamma() version (out of class)
-#        def instantiate_gamma(t):
-#            return tfd.Gamma(concentration = t[...,0:NParams], rate = t[...,NParams:2*NParams])
-#
-#        #seq.add()
-#        seq.add(            
-#            tfp.layers.DistributionLambda(
-#                make_distribution_fn = lambda t: instantiate_gamma(t),
-#                convert_to_tensor_fn= lambda s : s.sample(NSamples)))
-#        seq._set_inputs(tf.keras.layers.Input((n_node_state_output,)))
-#        return seq
-#
-#    def weights(self):
-#        return [*self.core.weights(), *self.graph_indep.weights()];
-#
-#    def make_edge_function(self,n_edge_state_input = None,n_edge_state_output = None, n_node_state_input = None):
-#        edge_state_in = Input(shape = (n_edge_state_input), name = "edge_state");
-#        node_state_sender_in = Input(shape = (n_node_state_input), name = "node_sender");
-#        node_state_receiver_in = Input(shape = (n_node_state_input), name = "node_receiver");
-#
-#        ## Building the edge MLP:
-#        edge_out = keras.layers.concatenate([edge_state_in, node_state_sender_in, node_state_receiver_in])
-#
-#        if self.use_prenetworks:
-#            edge_out = Dense(self.network_size_global,  use_bias = False,name = "edge_input")(edge_out)
-#            edge_out = Dropout(rate = 0.2)(edge_out)
-#
-#        edge_out_gate = Dense(n_edge_state_output, activation = "sigmoid", use_bias = False,name = "edge_fcnA")(edge_out)
-#        edge_outB = Dense(n_edge_state_output, activation = "tanh", use_bias = False,name = "edge_fcnB")(edge_out)
-#        edge_out =  edge_outB * edge_out_gate #+ (1 - edge_out_gate) * edge_state_in
-#
-#
-#        edge_mlp = Model(
-#            inputs = [edge_state_in, node_state_sender_in, node_state_receiver_in ] ,
-#            outputs = edge_out)
-#
-#        return edge_mlp
-#    
-#    def save(self, path):
-#        gi_path = os.path.join(path,"graph_independent")
-#        core_path = os.path.join(path,"core")
-#        if not os.path.exists(path):
-#            os.makedirs(gi_path)
-#            os.makedirs(core_path)
-#            
-#        self.core.save(core_path)
-#        self.graph_indep.save(gi_path)
-#    
-#    def load(self,path):
-#        self.gi_path = os.path.join(path,"graph_independent")
-#        self.core_path = os.path.join(path,"core")
-#        
-#        self.core        = GraphNet.make_from_path(self.core_path)
-#        self.graph_indep = GraphNet.make_from_path(self.gi_path)
-#
-#    def make_edge_function_gi(self,n_edge_state_input = None, n_edge_state_output = None, n_node_state_input = None):
-#        # for graph independent.
-#        if n_edge_state_output is None:
-#            n_edge_state_output = n_edge_state_input
-#
-#        edge_state_in = Input(shape = (n_edge_state_input), name = "edge_state");
-#        edge_out = edge_state_in
-#
-#        if self.use_prenetworks:
-#            edge_out = Dense(self.network_size_global,  use_bias = True, name = "edge_gi_input_fcn1")(edge_out)
-#            edge_out = tf.keras.layers.LeakyReLU()(edge_out)
-#            edge_out = Dropout(rate = 0.2)(edge_out)
-#            edge_out = Dense(self.network_size_global,  use_bias = True, name = "edge_gi_input_fcn2")(edge_out)
-#            edge_out = tf.keras.layers.LeakyReLU()(edge_out)
-#
-##        if self.graph_function_output_activation == 'gated_tanh':
-##            edge_out_gate = Dense(n_edge_state_output, use_bias = False, activation = "sigmoid", name = "edge_gi_fcnA")(edge_out)
-##            edge_outB = Dense(n_edge_state_output, use_bias = False, activation = "tanh", name = "edge_gi_fcnB")(edge_out)
-##            edge_out = edge_outB * edge_out_gate 
-##        else:
-##            edge_out_gate = Dense(n_edge_state_output, use_bias = False, activation = self.graph_function_output_activation, name = "edge_gi_fcnA")(edge_out)
-#        edge_out = self.network_function_output(edge_out, 
-#                name_prefix = "edge_gi",
-#                output_size = n_edge_state_output) # Attention! Reads parameters from the factory class. Written for avoiding code repetition, not for clarity.
-#
-#        edge_mlp = Model(inputs = edge_state_in,outputs = edge_out)
-#        return edge_mlp
-#
-#    def network_function_output(self,tensor_in,name_prefix = None, output_size = None): 
-#        """
-#        Implement the gated_tanh output head and treat it uniformly with other options for the output network options (useful for hyperparameter searches)
-#        """
-#
-#        if self.graph_function_output_activation== 'gated_tanh': # not realy an activation...
-#            _out_gate = Dense(output_size, use_bias = False, activation = "sigmoid", name = "%s_fcnA"%name_prefix)(tensor_in)
-#            _outB = Dense(output_size, use_bias = False, activation = "tanh", name = "%s_fcnB"%name_prefix)(tensor_in)
-#            _out = _outB * _out_gate
-#            #_mlp = Model(inputs = tensor_in,outputs = _out)
-#        else:
-#            _out = Dense(output_size, use_bias = False, activation = self.graph_function_output_activation, name = "%s_fcn"%name_prefix)(tensor_in)
-#            #edge_mlp = Model(inputs = tensor_in,outputs = _out)
-#
-#        return _out
-#
-#    @classmethod
-#    def make_edge_aggregation_function(self,edge_out_shape):
-#        xin = tf.keras.layers.Input(shape = (None,edge_out_shape))
-#        xout = tf.reduce_mean(xin,0)
-#        return Model(inputs = xin, outputs= xout)
-#
-#
-#    def make_node_function(self,n_edge_state_input = None,n_node_state_input = None):
-#        agg_edge_state_in = Input(shape = (n_edge_state_input), name = "edge_state_agg");
-#        node_prev_state = Input(shape = (n_node_state_input), name = "node_sender");
-#
-#        ## Building the edge MLP:
-#        node_out = keras.layers.concatenate([agg_edge_state_in, node_prev_state]);
-#
-#        if self.use_prenetworks:
-#            node_out = Dense(self.network_size_global,  use_bias = True,name = "node_fcn1")(node_out)
-#            node_out = Dropout(rate = 0.2)(node_out)
-#            node_out = tf.keras.layers.LeakyReLU()(node_out)
-#            node_out = Dense(self.network_size_global,  use_bias = True , name = "node_fcn2")(node_out)
-#            node_out = tf.keras.layers.LeakyReLU()(node_out)
-#        #node_out = Dense(n_node_state_input, use_bias = False)(node_out)
-#
-#        node_out_nl = Dense(n_node_state_input, activation = "tanh", use_bias = False,name = "node_fcn_nl")(node_out)
-#        node_out_gate = Dense(n_node_state_input, activation = "sigmoid", use_bias = False,name = "node_fcn_gate")(node_out)
-#        node_out = node_out_nl * node_out_gate# + node_prev_state * (1-node_out_gate)
-#
-#        node_out_model = Model(inputs = [agg_edge_state_in, node_prev_state] ,outputs = node_out)
-#
-#        return node_out_model
-#
-#
-#
-#    def make_conv_input_head_node_function(self,edge_input_dummy_size , n_conv_blocks = 3, nfilts = 18, nfilts2 = 50, ksize = 3, output_size = None, use_dropout = True, activation_type = 'leaky_relu', channels_in = 2):
-#        """
-#        A simple 1D CNN for extracting features from the timeseries. It is used in the graph_independent graphnet block. 
-#        Each conv block is as such:
-#         * 1Dconv kernelsize/stride/filters : 1 / 1 / nfilts2 (e.g. 50)
-#         * 1Dconv kernelsize/stride/filters : 2 / 2 / nfilts  (e.g. 18)
-#         * 1Dconv kernelsize/stride/filters : 2 / 2 / nfilts  (e.g. 18)
-#         * (optional) dropout(0.2)
-#         * activation
-#         * 1Dconv kernelsize/stride/filters : 2 / 2 / nfilts  (e.g. 18)
-#         * AveragePooling(kernel = 2)
-#
-#         The network returned is `n_conv_blocks' of the aformentioned stacked. 
-#
-#        parameters:
-#            n_conv_blocks : number of convolutional blocks stacked.
-#            nfilts        : number of bottleneck filts (for instance 18)
-#            nfilts2       : number of filters for the 1x1 convolution (typically larger than nfilts)
-#            ksize         : size of kernel used for all internal convs (3)
-#            output_size   : the node state size (default: None)
-#            use_dropout   : use/notuse dropout between conv layers (some literature suggests it does not help)
-#            activation    : the activation used after the dropout layer.
-#
-#          edge_input_dummy_size : This has to do with the (bad) implementation of the node block. For uniform treatment of edge inputs, 
-#        """
-#        txt2act = {'relu' : tf.keras.layers.ReLU(), 'leaky_relu' : tf.keras.layers.LeakyReLU()}
-#        _activation = lambda: txt2act[activation_type]
-#
-#
-#        xin_node_ts = tf.keras.Input(shape = (None, channels_in) , name = "timeseries_input"); 
-#        xin_edge_dummy = tf.keras.Input(shape = ( edge_input_dummy_size), name = "edge_input_dummy");
-#
-#        def conv_block(conv_block_input, names_suffix= ""):
-#            yout_ = Conv1D(kernel_size = 1 ,  filters = nfilts2, strides = 1, use_bias= False,name = "conv_fcnA"+names_suffix)(conv_block_input)
-#            yout_ = Conv1D(kernel_size=ksize, filters = nfilts, strides=2  , use_bias= False,name  = "conv_fcnB"+names_suffix)(yout_)
-#            yout_ = Conv1D(kernel_size=ksize, filters = nfilts, strides=2  , use_bias= False,name  = "conv_fcnC"+names_suffix)(yout_)
-#            if use_dropout:
-#                yout_ = Dropout(rate = 0.2)(yout_)
-#            yout_ = Conv1D(kernel_size=ksize,strides=2, filters = nfilts2,use_bias= True)(yout_)
-#            yout_ = _activation()(yout_)
-#            #yout_ = keras.layers.AveragePooling1D(pool_size=2)(yout_)
-#            return yout_
-#        
-#        yout = conv_block(xin_node_ts)
-#        yout = keras.layers.AveragePooling1D(pool_size=2)(yout)
-#        for b in range(n_conv_blocks-1):
-#            yout = conv_block(yout, names_suffix=str(b))
-#        
-#
-#        yout = keras.layers.GlobalAveragePooling1D()(yout)
-#        #yout = keras.layers.GlobalMaxPooling1D()(yout)
-#        yout = Dense(output_size, use_bias = True)(yout)
-#        yout = keras.layers.LayerNormalization()(yout)
-#        yout = tf.keras.layers.LeakyReLU()(yout)
-#
-#        mconv = keras.Model(inputs = [xin_edge_dummy,xin_node_ts], outputs = yout)
-#        return mconv
-#    
-#    def make_graphnet_comp_blocks(self, n_node_state_input_gi = None):
-#        """
-#        Prepares the graphnet blocks for the subsequent computation. 
-#        Subsequently these blocks are composed so that a series of inputs can return
-#        a gamma distribution directly.
-#        """
-#        #NETWORK_STATES_SIZE = 30
-#        n_node_state_input , n_edge_state_input = [self.edge_and_node_state_size,self.edge_and_node_state_size]
-#        n_edge_output = n_edge_state_input
-#        
-#        batch_size = 10; # An arbitrary number, to create a batch and call the 
-#                         #functions once to initialize them.
-#
-#        n_edge_state_input_gi = 1
-#        n_edge_output_gi = self.edge_and_node_state_size;
-#        
-#        ##########################################
-#        # Graph independent processing:
-#        edge_mlp_gi = self.make_edge_function_gi(n_edge_state_input = n_edge_state_input_gi,
-#                                            n_edge_state_output= n_edge_output_gi,
-#                                            n_node_state_input = n_node_state_input_gi)
-#
-#        conv_head_params = self.cnn_params
-#        conv_head_params.update({'edge_input_dummy_size' : n_edge_state_input_gi, 'output_size' : n_node_state_input })
-#        node_mlp_gi = self.make_conv_input_head_node_function(**conv_head_params ) #edge_input_dummy_size=n_edge_state_input_gi, output_size = n_node_state_input)
-#
-#        node_mlp_gi([np.random.randn(batch_size,n_edge_state_input_gi),np.random.randn(batch_size,n_node_state_input_gi,self.cnn_params['channels_in'])])
-#        
-#        graph_indep = GraphNet(edge_function = edge_mlp_gi,
-#                               node_function = node_mlp_gi,
-#                               edge_aggregation_function= None, 
-#                               node_to_prob= None)
-#
-#        #########################################
-#        # Graph processing:
-#        
-#        edge_mlp = self.make_edge_function(n_edge_state_input,n_edge_output, n_node_state_input) # THe node state is used for two nodes.
-#        dat_list= [vv.astype("float32") for vv in [np.random.randn(batch_size,n_edge_state_input), np.random.randn(batch_size,n_node_state_input), np.random.randn(batch_size,n_node_state_input)]]
-#        edge_mlp(dat_list)
-#
-#        node_mlp = self.make_node_function(n_edge_state_input, n_node_state_input)
-#        node_to_prob_mlp = self.make_gamma_node_observation_mlp(n_node_state_input);
-#        node_to_prob_mlp(np.random.randn(batch_size,n_node_state_input))
-#        node_mlp([vv.astype("float32") for vv in [np.random.randn(batch_size,n_edge_state_input), np.random.randn(batch_size,n_node_state_input)]])
-#        per_node_edge_aggregator = self.make_edge_aggregation_function(n_edge_output)
-#        edge_aggregation_function = per_node_edge_aggregator
-#
-#        gn = GraphNet(edge_function = edge_mlp,
-#                      node_function=node_mlp,
-#                      edge_aggregation_function=edge_aggregation_function,
-#                      node_to_prob= node_to_prob_mlp)
-#        self.core = gn
-#        self.graph_indep = graph_indep
-#
-#    
-#
-#    
-#        
-#    def eval_graphnets(self,graph_data_, iterations = 5, eval_mode = "batched", return_reparametrization = False,return_final_node = False, return_intermediate_graphs = False, node_index_to_use = -1):
-#        """
-#        graph_data_                : is a "graph" object that contains a batch of graphs (more correctly, a graph tuple as DM calls it)
-#        iterations                 : number of core iterations for the computation.
-#        eval_mode                  : "batched" (batch nodes and edges before evaluation) or "safe" (more memory efficient - less prone to OOM errors no batching).-
-#        return_distr_params        : return the distribution parameters instead of the distribution itself. This is in place because of 
-#                                     some buggy model loading (loaded models don't return distribution objects).
-#        return_intermediate_graphs : Return all the intermediate computations.
-#        """
-#        graph_out = self.graph_indep.graph_eval(graph_data_)
-#        intermediate_graphs = [];
-#        for iterations in range(iterations):
-#            graph_out = self.core.graph_eval(graph_out, eval_mode = eval_mode) + graph_out # Addition adds all representations (look at implementation of "Graph")
-#            if return_intermediate_graphs:
-#                intermediate_graphs.append(graph_out.copy())
-#
-#        if return_intermediate_graphs:
-#            return intermediate_graphs
-#
-#        # Finally the node_to_prob returns a reparametrized "Gamma" distribution from only the final node state
-#        node_final = graph_out.nodes[node_index_to_use].node_attr_tensor
-#        if return_final_node:
-#            return node_final
-#
-#        if not return_reparametrization:
-#            return self.core.node_to_prob_function(node_final)
-#        else:
-#            return self.core.node_to_prob_function.get_layer("output")(node_final)
-#
-#    def bootstrap_eval_graphnets(self, graph_data_, iterations = 5,n_bootstrap_samples = 1, n_nodes_keep = 5, eval_mode = "batched", return_final_node = False, node_index_to_use = -1):
-#        """
-#        Evaluate multiple random samples of nodes from the past. 
-#        The signature is alsmost the same as `eval_graphnets` with the difference of the parameters n_boostrap_samples (how many times to resample the past nodes) and n_nodes_keep 
-#        (how many nodes from the past to keep). The last node is always in the computed sample.
-#        """
-#        bootstrap_results = [];
-#        for nbs in range(n_bootstrap_samples):
-#
-#            keep_nodes = [graph_data_.nodes[-1]]
-#            node_indices = list(np.random.choice(len(graph_data_.nodes)-1,n_nodes_keep-1, replace = False))
-#            node_indices.sort()
-#            node_indices.append(len(graph_data_.nodes)-1)
-#            subgraph = graph_data_.get_subgraph_from_nodes([graph_data_.nodes[i] for i in node_indices]) # are gradients passing?
-#            bootstrap_result = self.eval_graphnets(subgraph, iterations = iterations, eval_mode = eval_mode, return_final_node = True, node_index_to_use = -1)
-#            bootstrap_results.append(bootstrap_result)
-#
-#        if return_final_node is False:
-#            bootstrap_node_value = tf.reduce_mean(bootstrap_results,0)
-#            return self.core.node_to_prob_function(bootstrap_node_value)
-#        else:
-#            return bootstrap_results
-#
-#        return bootstrap_results
-#          
-#
-#    def set_weights(self,weights):
-#        """
-#        Takes a list of weights (as returned from a similar object)  and sets the to the functions of this one.
-#        """
-#        for w , new_weight in zip([*self.core.weights(), *self.graph_indep.weights()][:] , new_weights):
-#            w = new_weight
diff --git a/ibk_gnns/minigraphnets.py b/ibk_gnns/minigraphnets.py
index 62f23f7..1c97154 100644
--- a/ibk_gnns/minigraphnets.py
+++ b/ibk_gnns/minigraphnets.py
@@ -4,16 +4,19 @@
 
 def _copy_any_ds(val):
     """
-    Tensorflow copies (by value) resource variables so there is no copy method for them.
-    Because the datastructures I want need to have clear copy semantics this is a helper
-    function to make sure that when I want to copy I actually copy.
+    Copy semantics for different datatypes accepted.
+    This affects what happens when copying nodes, edges and graphs. 
+    In order to trace gradients, 
+    and defines a consistent interface regardless of the input data-structure.
     """
+    valout = val
     if isinstance(val , np.ndarray) or isinstance(val, list):
         valout = val.copy()
 
     if isinstance(val, tf.Variable) or isinstance(val,tf.Tensor):
-        valout = val# if this is an eager tensor, the assignment copies the tensor.
-    return val
+        valout = tf.identity(val) # TODO: maybe have a flag to override this? Adding more ops does not always make sense.
+    
+    return valout
 
 class Node:
     def __init__(self, node_attr_tensor):
@@ -32,11 +35,6 @@ def set_tensor(self, tensor):
         
     def copy(self):
         return Node(_copy_any_ds(self.node_attr_tensor))
-#        if isinstance(self.node_attr_tensor , np.ndarray):
-#            node_attr_tensor = self.node_attr_tensor.copy()
-#        else:
-#            node_attr_tensor = self.node_attr_tensor # if this is an eager tensor, the assignment copies the tensor.
-#        return Node(node_attr_tensor)
 
     def __add__(self, n):
         return Node(self.node_attr_tensor + n.node_attr_tensor)
@@ -44,8 +42,6 @@ def __add__(self, n):
     def __sub__(self, n):
         return Node(self.node_attr_tensor  - n.node_attr_tensor)
     
-# My implementation relies on eager mode and all computation happens in place. In reality only nodes
-# and edges have data and the graph class is just for defining the computation between them.
 class Edge:
     def __init__(self, edge_attr_tensor, node_from, node_to):
         self.edge_tensor = edge_attr_tensor
@@ -127,11 +123,7 @@ def copy(self):
         # Instantiate the new edges:
         coppied_edge_instances = []
         for e in self.edges:
-            #if isinstance(e.edge_tensor, np.ndarray):
-            #    edge_val = e.edge_tensor.copy()
-            #else:
-            #    edge_val = e.edge_tensor
-            enew = e.copy(nodes_correspondence) #Edge(edge_val, nodes_correspondence[e.node_from], nodes_correspondence[e.node_to])
+            enew = e.copy(nodes_correspondence) 
             coppied_edge_instances.append(enew)
         return Graph(nodes_coppied, coppied_edge_instances)
 
@@ -224,8 +216,6 @@ def make_graph_tuple_from_graph_list(list_of_graphs):
         senders.append(all_nodes.index(e.node_from))
         receivers.append(all_nodes.index(e.node_to))
         
-        #senders.append(e.node_from.find(gin.nodes))
-        #receivers.append(e.node_to.find(gin.nodes))
     
     for n in all_nodes:
         nodes_attr_tensor.append(n.node_attr_tensor)
@@ -289,6 +279,9 @@ def get_graph(self, graph_index):
         """
         Returns a new graph with the same properties as the original  graph.
         gradients are not traced through this operation.
+
+        It's better if this method is avoided since it's inneficient. 
+        TODO: include the implementation of algs for slicing graphs from graph tuples etc.
         """
         assert(graph_index >=0 )
         if graph_index > self.n_graphs:
diff --git a/ibk_gnns/test.py b/ibk_gnns/test.py
deleted file mode 100644
index 28382b5..0000000
--- a/ibk_gnns/test.py
+++ /dev/null
@@ -1,224 +0,0 @@
-import unittest 
-
-class TestGraphDatastructures(unittest.TestCase):
-
-    def test_construct_nodes_edges_simple_graph_np(self):
-        """
-        Tests the construction of some basic datastructures useful for GraphNet computation
-        """
-        n1 = Node(np.random.randn(10,10))
-        n2 = Node(np.random.randn(10,10))
-        e12 = Edge(np.random.randn(5,10),n1,n2)
-        g = Graph([n1,n2], [e12])
-
-        
-    def test_node_operations(self):
-
-        r1 = np.random.randn(10,10)
-        r2 = np.random.randn(10,10)
-        n1 = Node(r1)
-        n2 = Node(r2)
-        n3 = n1  + n2
-        self.assertEqual(np.linalg.norm(n2.node_attr_tensor + n1.node_attr_tensor-n3.node_attr_tensor),0)
-
-    def test_node_copy(self):
-        """
-        test that when copying the object the value is coppied but not the 
-        reference
-        """
-        n1 = Node(np.random.randn(10,10))
-        n2 = n1.copy()
-        self.assertTrue(n1 != n2)
-        self.assertTrue(np.linalg.norm((n1 - n2).node_attr_tensor)== 0.)
-
-    def test_graph_tuple_construction(self):
-        """
-        Tests if I can properly set and then retrieve a graph tuple.
-        """
-        batch_size = 1
-        node_input_size = 2
-        edge_input_size = 2
-        n1 = Node(np.random.randn(batch_size,node_input_size))
-        n2 = Node(np.random.randn(batch_size, node_input_size))
-        n3 = Node(np.random.randn(batch_size, node_input_size))
-        n4 = Node(np.random.randn(batch_size, node_input_size))
-        n5 = Node(np.random.randn(batch_size, node_input_size))
-
-        e12 = Edge(np.random.randn(batch_size, edge_input_size),node_from = n1,node_to = n2)
-        e21 = Edge(np.random.randn(batch_size, edge_input_size),node_from = n2,node_to = n1)
-        e23 = Edge(np.random.randn(batch_size, edge_input_size),node_from = n2,node_to = n3)
-        e34 = Edge(np.random.randn(batch_size, edge_input_size), node_from = n3, node_to = n4)
-        e45 = Edge(np.random.randn(batch_size, edge_input_size), node_from = n4, node_to = n5)
-
-        g1 = Graph([n1,n2],[e12])
-        g2 = Graph([n1,n2,n3,n4],[e12,e21,e23,e34])
-        g3 = Graph([n3, n4] , [e34])
-
-        from ibk_gnns import GraphTuple, make_graph_tuple_from_graph_list # the current folder is the module.
-        old_graphs_list = [g1.copy(),g2.copy(),g3.copy()]
-        graph_tuple = make_graph_tuple_from_graph_list(old_graphs_list)
-        new_graphs_list = [graph_tuple.get_graph(k) for k in range(graph_tuple.n_graphs)]
-        self.assertTrue(np.all([(k.is_equal_by_value(m) and k.compare_connectivity(m) ) for k, m in zip(new_graphs_list, old_graphs_list)]))
-
-    def test_graph_tuple_copy(self):
-        batch_size = 1
-        node_input_size = 2
-        edge_input_size = 2
-        n1 = Node(np.random.randn(batch_size,node_input_size))
-        n2 = Node(np.random.randn(batch_size, node_input_size))
-        n3 = Node(np.random.randn(batch_size, node_input_size))
-        n4 = Node(np.random.randn(batch_size, node_input_size))
-        n5 = Node(np.random.randn(batch_size, node_input_size))
-
-        e12 = Edge(np.random.randn(batch_size, edge_input_size),node_from = n1,node_to = n2)
-        e21 = Edge(np.random.randn(batch_size, edge_input_size),node_from = n2,node_to = n1)
-        e23 = Edge(np.random.randn(batch_size, edge_input_size),node_from = n2,node_to = n3)
-        e34 = Edge(np.random.randn(batch_size, edge_input_size), node_from = n3, node_to = n4)
-        e45 = Edge(np.random.randn(batch_size, edge_input_size), node_from = n4, node_to = n5)
-
-        g1 = Graph([n1,n2],[e12])
-        g2 = Graph([n1,n2,n3,n4],[e12,e21,e23,e34])
-        g3 = Graph([n3, n4] , [e34])
-
-        from ibk_gnns import GraphTuple, make_graph_tuple_from_graph_list # the current folder is the module.
-        old_graphs_list = [g1.copy(),g2.copy(),g3.copy()]
-        graph_tuple = make_graph_tuple_from_graph_list(old_graphs_list)
-        graph_tuple_copy = graph_tuple.copy()
-        graph_tuple_copy.is_equal_by_value(graph_tuple)
-
-
-
-class TestGraphNet(unittest.TestCase):
-    def test_construct_simple_eval_graphnet(self):
-        from ibk_gnns import GraphNet,  make_keras_simple_agg
-        edge_input_size = 15
-        node_input_size = 10
-        node_output_size, edge_output_size = node_input_size, edge_input_size
-        node_input = tf.keras.layers.Input(shape = (node_input_size,))
-        edge_input = tf.keras.layers.Input(shape = (edge_input_size,))
-
-        node_function = tf.keras.Model(outputs = tf.keras.layers.Dense(node_output_size)(node_input), inputs= node_input)
-        edge_function = tf.keras.Model(outputs = tf.keras.layers.Dense(edge_output_size)(edge_input), inputs= edge_input)
-        edge_aggregation_function = make_keras_simple_agg(edge_output_size , 'mean')
-        graphnet = GraphNet(node_function = node_function, edge_function = edge_function, edge_aggregation_function = edge_aggregation_function, node_to_prob = None)
-        batch_size = 10
-        n1 = Node(np.random.randn(batch_size,node_input_size))
-        n2 = Node(np.random.randn(batch_size, node_input_size))
-        e12 = Edge(np.random.randn(batch_size, edge_input_size),node_from = n1,node_to = n2)
-        g = Graph([n1,n2],[e12])
-
-    def test_eval_modes(self):
-        """
-        test the different evaluation modes.
-        There are 3 evaluation modes - one appropriate for batched graphs, and two for graphs of the same shape ("batched" or unbached ("safe")).
-        The "safe" mode is used as reference for the correct results; All modes should give the same output within an error margin (due to finite precission 
-        rounding errors and the different comp. graphs.)
-        """
-        from ibk_gnns import GraphNet, make_mlp_graphnet_functions 
-
-        batch_size = 12
-        tf.keras.backend.set_floatx("float64")
-        node_input_size = 10
-        edge_input_size = node_input_size
-
-        n1 = Node(np.random.randn(batch_size,node_input_size))
-        n2 = Node(np.random.randn(batch_size, node_input_size))
-        n3 = Node(np.random.randn(batch_size, node_input_size))
-        node_abs_vals = [np.abs(n.node_attr_tensor) for n in [n1,n2,n3]]
-
-        e12 = Edge(np.random.randn(batch_size, edge_input_size),node_from = n1,node_to = n2)
-        e21 = Edge(np.random.randn(batch_size, edge_input_size),node_from = n2,node_to = n1)
-        e23 = Edge(np.random.randn(batch_size, edge_input_size),node_from = n2,node_to = n3)
-        edge_abs_vals = [np.abs(e.edge_tensor) for e in [e12,e21,e23]]
-
-        g1 = Graph([n1,n2,n3],[e12,e21,e23])
-
-        ## The non-graph independent version:
-        gi = False
-        graph_fcn = make_mlp_graphnet_functions(150, node_input_size = node_input_size, node_output_size = node_input_size, graph_indep=gi)
-        graph_fcn.update({"graph_independent" : gi})
-        gn = GraphNet(**graph_fcn )
-        res1 = gn.graph_eval(g1.copy(),eval_mode = "safe")
-        res2 = gn.graph_eval(g1.copy(), eval_mode = "batched")
-        error_nodes = np.max([np.linalg.norm(n1.node_attr_tensor - n2.node_attr_tensor) for n1, n2 in zip(res1.nodes, res2.nodes)])/np.min(node_abs_vals)
-        error_edges = np.max([np.linalg.norm(e1.edge_tensor - e2.edge_tensor) for e1,e2 in zip(res1.edges, res2.edges)])/np.min(edge_abs_vals)
-        #print(error_nodes, error_edges)
-        self.assertTrue(error_nodes < 1e-10)
-        self.assertTrue(error_edges < 1e-10)
-
-        ## The graph-independent version:
-        gi = True
-        graph_fcn = make_mlp_graphnet_functions(150, node_input_size = node_input_size, node_output_size = node_input_size, graph_indep=gi)
-        graph_fcn.update({"graph_independent" : gi})
-        gn = GraphNet(**graph_fcn )
-        res1 = gn.graph_eval(g1.copy(),eval_mode = "safe")
-
-        res2 = gn.graph_eval(g1.copy(), eval_mode = "batched")
-
-        error_nodes = np.max([np.linalg.norm(n1.node_attr_tensor - n2.node_attr_tensor) for n1, n2 in zip(res1.nodes, res2.nodes)])/np.min(node_abs_vals)
-        error_edges = np.max([np.linalg.norm(e1.edge_tensor - e2.edge_tensor) for e1,e2 in zip(res1.edges, res2.edges)])/np.min(edge_abs_vals)
-        #print(error_nodes, error_edges)
-        self.assertTrue(error_nodes < 1e-10)
-        self.assertTrue(error_edges < 1e-10)
-
-    def save_load_weights_ok(self):
-        graph_fcn = make_mlp_graphnet_functions(150, node_node_input_size = 10, node_output_size = 10, graph_indep=False)
-        gn = GraphNet(**graph_fc)
-        gn.save("/tmp/test_gn")
-        gn_loaded = GraphNet.make_from_path("/tmp/test_gn")
-        
-        self.assertTrue(np.all([np.sum(np.abs(w1 - w2))<1e-10 for w1,w2 in zip(gn.weights(),gn_loaded.weights())]))
-
-    def test_load_save_different_aggregators(self):
-        print("-"*20)
-        print("Loading and saving different aggregators test not implemented yet!")
-        print("-"*20)
-        NotImplemented
-        None
-
-    def test_graph_tuple_eval(self):
-        """
-        The graph tuples are graphs of different sizes batched to a single object,
-        to allow for more single-instruction multiple-data computation (batched computation).
-        This is the only evalution mode DeepMind's graphnets implement directly. 
-        This mode is much more computationally efficient.
-        This mode allows computation with unsorted segment sum aggregators.
-        """
-
-        graph_fcn = make_mlp_graphnet_functions(150, node_node_input_size = 2, node_output_size = 2, graph_indep=False)
-        gn = GraphNet(**graph_fc)
-
-        from ibk_gnns import GraphTuple, make_graph_tuple_from_graph_list # the current folder is the module.
-
-        batch_size = 1
-        node_input_size = 2
-        edge_input_size = 2
-        n1 = Node(np.random.randn(batch_size,node_input_size))
-        n2 = Node(np.random.randn(batch_size, node_input_size))
-        n3 = Node(np.random.randn(batch_size, node_input_size))
-        n4 = Node(np.random.randn(batch_size, node_input_size))
-        n5 = Node(np.random.randn(batch_size, node_input_size))
-
-        e12 = Edge(np.random.randn(batch_size, edge_input_size),node_from = n1,node_to = n2)
-        e21 = Edge(np.random.randn(batch_size, edge_input_size),node_from = n2,node_to = n1)
-        e23 = Edge(np.random.randn(batch_size, edge_input_size),node_from = n2,node_to = n3)
-        e34 = Edge(np.random.randn(batch_size, edge_input_size), node_from = n3, node_to = n4)
-        e45 = Edge(np.random.randn(batch_size, edge_input_size), node_from = n4, node_to = n5)
-
-        g1 = Graph([n1,n2],[e12])
-        g2 = Graph([n1,n2,n3,n4],[e12,e21,e23,e34])
-        g3 = Graph([n3, n4] , [e34])
-
-        old_graphs_list = [g1.copy(),g2.copy(),g3.copy()]
-        graph_tuple = make_graph_tuple_from_graph_list(old_graphs_list)
-        gn.graph_tuple_eval(graph_tuple)
-        
-        None
-
-if __name__ == "__main__":
-
-    from ibk_gnns import Node, Edge, Graph
-    import tensorflow as tf
-    import numpy as np
-    unittest.main(verbosity = 2)
-
diff --git a/ibk_gnns/utils_train.py b/ibk_gnns/utils_train.py
index 6ecdd08..23cc05f 100644
--- a/ibk_gnns/utils_train.py
+++ b/ibk_gnns/utils_train.py
@@ -1,6 +1,6 @@
 import numpy as np
 """
-Some utilities (mostly mirroring keras facilities) with custom behavior for training GNNs.
+Some utilities (mostly mirroring keras facilities)
 These were prefered to the keras analogues to have greater transparency in what is going on and also have full control of conditions/events etc.
 """
 class LossLogger:
diff --git a/test.py b/test.py
index 4c26a38..d223992 100644
--- a/test.py
+++ b/test.py
@@ -187,7 +187,6 @@ def test_graph_tuple_eval(self):
         gn.graph_tuple_eval(gt_copy)
         graphs_evaluated_separately = [gn.graph_eval(g_)  for g_ in old_graphs_list]
         graphs_evaluated_from_graph_tuple = [gt_copy.get_graph(i) for i in range(gt_copy.n_graphs)]
-        #import tensorflow as tf
         flatten_nodes = lambda x : tf.stack([x_.get_state() for x_ in x.nodes])
         flatten_edges = lambda x : tf.stack([x_.edge_tensor for x_ in x.edges])
         for g1,g2 in zip(graphs_evaluated_from_graph_tuple, graphs_evaluated_separately):