v0.5.1

eliorc · Nov 29, 2019 · a499d13 · a499d13
2 parents 8320f4b + 4be3d5a
commit a499d13
Show file tree

Hide file tree

Showing 8 changed files with 11 additions and 21 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -59,7 +59,7 @@ jobs:
   test-3.8:
     <<: *test-template
     docker:
-      - image: circleci/python:3.6
+      - image: circleci/python:3.8
   test-3.6:
     <<: *test-template
     docker:

diff --git a/requirements/docs.txt b/requirements/docs.txt
@@ -4,5 +4,5 @@ sphinxcontrib-httpdomain
 sphinx_rtd_theme
 pygments
 pygments-style-github
-tensorflow==2.0.0-rc0
+tensorflow==2.0.0
 tavolo
diff --git a/requirements/test.txt b/requirements/test.txt
@@ -2,4 +2,4 @@ pytest
 pytest-cov
 twine
 codecov
-tensorflow==2.0.0-rc0
+tensorflow==2.0.0
diff --git a/setup.py b/setup.py
@@ -1,6 +1,6 @@
 from setuptools import setup
 
-VERSION = '0.5.0'
+VERSION = '0.5.1'
 
 setup(name='tavolo',
       version=VERSION,

diff --git a/tavolo/__init__.py b/tavolo/__init__.py
@@ -1,5 +1,5 @@
 __name__ = 'tavolo'
-__version__ = '0.5.0'
+__version__ = '0.5.1'
 
 from . import embeddings
 from . import seq2vec

diff --git a/tavolo/embeddings.py b/tavolo/embeddings.py
@@ -89,7 +89,7 @@ def build(self, input_shape):
         self.positional_encoding[:, 0::2] = np.sin(self.positional_encoding[:, 0::2])
         self.positional_encoding[:, 1::2] = np.cos(self.positional_encoding[:, 1::2])
 
-        self.positional_encoding = self.add_variable(
+        self.positional_encoding = self.add_weight(
             'embedding_matrix',
             shape=self.positional_encoding.shape,
             initializer=tf.keras.initializers.Constant(self.positional_encoding),
@@ -335,7 +335,7 @@ class ContextualDynamicMetaEmbedding(tf.keras.layers.Layer):
 
     .. _`Dynamic Meta-Embeddings for Improved Sentence Representations`:
         https://arxiv.org/abs/1804.07983
-    """
+add    """
 
     def __init__(self,
                  embedding_matrices: List[tf.keras.layers.Embedding],

diff --git a/tavolo/seq2seq.py b/tavolo/seq2seq.py
@@ -13,7 +13,6 @@ class MultiHeadedAttention(tf.keras.layers.Layer):
     
     - `n_heads` (``int``): Number of attention heads
     - `n_units` (``int``): Number of units (sum of units of all heads), defaults to the last dimension of the input
-    - `dropout_rate` (``float``): Rate of outputs to drop in the range [0, 1]
     - `causal` (``bool``): Use causality (make each time point in output dependent only on previous timepoints of input)
     - `name` (``str``): Layer name
     
@@ -82,7 +81,6 @@ class MultiHeadedAttention(tf.keras.layers.Layer):
     def __init__(self,
                  n_heads: int = 4,
                  n_units: Optional[int] = None,
-                 dropout_rate: float = 0.,
                  causal: bool = False,
                  name: str = 'multi_headed_attention',
                  **kwargs):
@@ -96,7 +94,6 @@ def __init__(self,
 
         :param n_heads: Number of attention heads
         :param n_units: Number of units (sum of units of all heads), defaults to the last dimension of the input
-        :param dropout_rate: Rate of outputs to drop in the range [0, 1]
         :param causal: Use causality (make each time point in output dependent only on previous timepoints of input)
         :param name: Layer name
         """
@@ -105,13 +102,11 @@ def __init__(self,
 
         self.n_heads = n_heads
         self.n_units = n_units
-        self.dropout_rate = dropout_rate
         self.causal = causal
         self.Q = None
         self.K = None
         self.V = None
         self.output_projection = None
-        self.dropout = tf.keras.layers.Dropout(rate=dropout_rate)
         self.very_small_value = (-2 ** 32 + 1)  # Used for padding to avoid attending
 
     def build(self, input_shape):
@@ -199,10 +194,6 @@ def call(self, inputs,
         attended = self.attention([Q, V, K],
                                   mask=attention_mask)  # shape=(batch_size * n_heads, time_steps, n_units / n_heads)
 
-        # Dropout
-        attended = self.dropout(attended,
-                                training=training)  # shape=(batch_size * n_heads, time_steps, n_units / n_heads)
-
         # Restore original shape
         outputs = tf.concat(tf.split(attended, self.n_heads, axis=0),
                             axis=2)  # shape=(batch_size, time_steps, n_units)
@@ -216,7 +207,6 @@ def get_config(self):
         base_config = super().get_config()
         base_config['n_heads'] = self.n_heads
         base_config['n_units'] = self.n_units
-        base_config['dropout_rate'] = self.dropout_rate
         base_config['causal'] = self.causal
 
         return base_config

diff --git a/tavolo/seq2vec.py b/tavolo/seq2vec.py
@@ -77,10 +77,10 @@ def __init__(self, n_units: int,
                                            activation=tf.nn.tanh,
                                            dtype=self.dtype,
                                            name='omega')
-        self.u_omega = self.add_variable('u_omega',
-                                         shape=(self.n_units,),
-                                         initializer=tf.keras.initializers.RandomNormal(stddev=0.1),
-                                         dtype=self.dtype)
+        self.u_omega = self.add_weight('u_omega',
+                                       shape=(self.n_units,),
+                                       initializer=tf.keras.initializers.RandomNormal(stddev=0.1),
+                                       dtype=self.dtype)
 
     def compute_mask(self, inputs, mask=None):
         return mask