diff --git a/gematria/model/python/BUILD.bazel b/gematria/model/python/BUILD.bazel
index da52d499..6fe00310 100644
--- a/gematria/model/python/BUILD.bazel
+++ b/gematria/model/python/BUILD.bazel
@@ -78,9 +78,6 @@ gematria_py_test(
     size = "small",
     timeout = "moderate",
     srcs = ["main_function_test.py"],
-    tags = [
-        "manual",
-    ],
     deps = [
         ":inference",
         ":main_function",
diff --git a/gematria/model/python/main_function.py b/gematria/model/python/main_function.py
index ef3cc48e..b17de7d2 100644
--- a/gematria/model/python/main_function.py
+++ b/gematria/model/python/main_function.py
@@ -48,6 +48,7 @@ def main(_):
 from gematria.utils.python import timer
 import numpy as np
 import tensorflow.compat.v1 as tf
+import tensorflow as tf2
 
 _ACTION = flags.DEFINE_enum_class(
     'gematria_action',
@@ -199,11 +200,6 @@ def main(_):
     20,
     'The number of learned values to display for the list.',
 )
-_GEMATRIA_LOG_DEVICE_PLACEMENT = flags.DEFINE_bool(
-    'gematria_log_device_placement',
-    False,
-    'Print TensorFlow op placement to devices to the log.',
-)
 _GEMATRIA_RANDOM_SEED = flags.DEFINE_integer(
     'gematria_random_seed',
     123456789,
@@ -295,15 +291,15 @@ def main(_):
     '',
     'The directory to which the summaries from the training are stored.',
 )
-_GEMATRIA_SAVE_CHECKPOINT_SECS = flags.DEFINE_integer(
-    'gematria_save_checkpoint_secs',
-    60,
-    'The number of seconds of training after which a checkpoint is saved.',
+_GEMATRIA_SAVE_CHECKPOINT_EPOCHS = flags.DEFINE_integer(
+    'gematria_save_checkpoint_epochs',
+    100,
+    'The number of epochs of training after which a checkpoint is saved.',
 )
-_GEMATRIA_SAVE_SUMMARIES_SECS = flags.DEFINE_integer(
-    'gematria_save_summaries_secs',
-    60,
-    'The number of seconds of training after which summaries are saved.',
+_GEMATRIA_SAVE_SUMMARIES_EPOCHS = flags.DEFINE_integer(
+    'gematria_save_summaries_epochs',
+    100,
+    'The number of epochs of training after which summaries are saved.',
 )
 _GEMATRIA_EVAL_INTERVAL_SECS = flags.DEFINE_integer(
     'gematria_eval_interval_secs',
@@ -505,79 +501,22 @@ def _resume_from_and_resume_to_dir_must_be_used_at_the_same_time(
   return bool(resume_from_dir) == bool(resume_to_dir)
 
 
-def _warmstart_from_file(scaffold: tf.train.Scaffold, sess):
+def _warmstart_from_file(model: model_base.ModelBase):
   """Warmstarts the model from a specific checkpoint."""
-  del scaffold  # Unused.
   if not tf.io.gfile.exists(f'{_WARMSTART_FILE.value}.index'):
     raise ValueError(f'No checkpoint was found at "{_WARMSTART_FILE.value}"')
   training.partially_restore_from_checkpoint(
-      _WARMSTART_FILE.value, _LOAD_GLOBAL_STEP_FROM_CKPT.value, sess
+      _WARMSTART_FILE.value, _LOAD_GLOBAL_STEP_FROM_CKPT.value, model
   )
 
 
-def _warmstart_from_dir(scaffold: tf.train.Scaffold, sess):
+def _warmstart_from_dir(model: model_base.ModelBase):
   """Warmstarts the model from the latest checkpoint in a directory."""
-  del scaffold  # Unused.
   checkpoint = tf.train.latest_checkpoint(_WARMSTART_DIR.value)
   if not checkpoint:
     raise ValueError(f'No checkpoint was found at "{_WARMSTART_DIR.value}"')
   training.partially_restore_from_checkpoint(
-      checkpoint, _LOAD_GLOBAL_STEP_FROM_CKPT.value, sess
-  )
-
-
-def _monitored_training_session_from_flags(
-    model: model_base.ModelBase, is_chief: bool
-) -> tf.train.MonitoredTrainingSession:
-  """Creates a monitored training session for 'model' from command-line flags.
-
-  Args:
-    model: The model for which the session is created.
-    is_chief: True when this is the chief training worker in a distributed
-      setup.
-
-  Returns:
-    The monitored training session object.
-  """
-  hooks = []
-  if _GEMATRIA_TRAINING_NUM_EPOCHS.value > 0:
-    hooks.append(
-        tf.train.StopAtStepHook(last_step=_GEMATRIA_TRAINING_NUM_EPOCHS.value)
-    )
-  hooks += model.get_monitored_training_session_hooks()
-  session_config = tf.ConfigProto(
-      log_device_placement=_GEMATRIA_LOG_DEVICE_PLACEMENT.value
-  )
-  scaffold_init_fn = None
-  if _WARMSTART_FILE.value:
-    # If there is a checkpoint to bootstrap from, we add an init_fn to the
-    # monitored session that restores it. This init_fn is called only when an
-    # actual checkpoint is not available to fully restore the model.
-    scaffold_init_fn = _warmstart_from_file
-  elif _WARMSTART_DIR.value:
-    # If there is a directory to bootstrap from, we find the latest checkpoint
-    # in this directory and add an init_fn to the monitored session the same way
-    # as with _WARMSTART_FILE above.
-    scaffold_init_fn = _warmstart_from_dir
-
-  scaffold = tf.train.Scaffold(
-      init_fn=scaffold_init_fn,
-      saver=tf.train.Saver(
-          max_to_keep=_GEMATRIA_CHECKPOINT_MAX_TO_KEEP.value,
-          keep_checkpoint_every_n_hours=1,
-      ),
-  )
-
-  return tf.train.MonitoredTrainingSession(
-      checkpoint_dir=_CHECKPOINT_DIR.value,
-      config=session_config,
-      scaffold=scaffold,
-      hooks=hooks,
-      is_chief=is_chief,
-      master=_MASTER.value,
-      save_checkpoint_secs=_GEMATRIA_SAVE_CHECKPOINT_SECS.value,
-      save_summaries_secs=_GEMATRIA_SAVE_SUMMARIES_SECS.value,
-      summary_dir=_GEMATRIA_SUMMARY_DIR.value,
+      checkpoint, _LOAD_GLOBAL_STEP_FROM_CKPT.value, model
   )
 
 
@@ -751,12 +690,12 @@ def _extract_basic_blocks_with_throughput(
       yield block
 
 
-def _session_from_checkpoint(checkpoint_file: str) -> tf.Session:
-  """Creates a local TF Session and restores it from a given checkpoint file."""
-  sess = tf.Session()
-  saver = tf.train.Saver()
-  saver.restore(sess, checkpoint_file)
-  return sess
+def _restore_model_from_checkpoint(
+    checkpoint_file: str, model: model_base.ModelBase
+) -> None:
+  """Restores a model from a checkpoint."""
+  checkpoint = tf2.train.Checkpoint(model)
+  checkpoint.restore(checkpoint_file)
 
 
 def _task_names_from_command_line_flags() -> Sequence[str]:
@@ -793,116 +732,111 @@ def run_gematria_model_from_command_line_flags(
     tf.random.set_random_seed(_GEMATRIA_RANDOM_SEED.value)
     random.seed(_GEMATRIA_RANDOM_SEED.value)
   is_chief = _GEMATRIA_TRAINING_TASK.value == 0
-  with tf.Graph().as_default():
-    dev = tf.train.replica_device_setter(
-        ps_tasks=_GEMATRIA_TRAINING_PS_TASKS.value
-    )
-    with tf.device(dev):
-      with timer.scoped('Creating model: ' + model_class.__name__):
-        num_replicas = _GEMATRIA_NUM_TRAINING_WORKER_REPLICAS.value
-        num_replicas_to_aggregate = (
-            _GEMATRIA_NUM_TRAINING_WORKER_REPLICAS_TO_AGGREGATE.value
-        )
-        model = model_class(  # pytype: disable=wrong-arg-types
-            model_name=_MODEL_NAME.value,
-            task_list=_task_names_from_command_line_flags(),
-            synchronous_training=_GEMATRIA_SYNCHRONOUS_TRAINING.value,
-            loss_type=_LOSS_TYPE.value,
-            loss_normalization=_LOSS_NORMALIZATION.value,
-            trained_variable_groups=_TRAINED_VARIABLES.value,
-            learning_rate=_LEARNING_RATE.value,
-            decay_steps=_DECAY_STEPS.value,
-            decay_rate=_DECAY_RATE.value,
-            learning_rate_schedule=_LEARNING_RATE_SCHEDULE.value,
-            optimizer_type=_OPTIMIZER_TYPE.value,
-            grad_clip_norm=_GRAD_CLIP_NORM.value,
-            use_delta_loss=_GEMATRIA_USE_SEQ2SEQ_LOSS.value,
-            collected_percentile_ranks=tuple(
-                map(int, _COLLECTED_PERCENTILE_RANKS.value)
-            ),
-            num_training_worker_replicas=num_replicas,
-            num_training_worker_replicas_to_aggregate=num_replicas_to_aggregate,
-            is_chief=is_chief,
-            **model_kwargs,
-        )
-        model.initialize()
-      with timer.scoped('Loading basic blocks'):
-        if _ACTION.value not in model_options.ACTIONS_WITHOUT_INPUT_DATA:
-          input_files = _INPUT_FILES.value
-          if not input_files:
-            sys.exit(
-                'At least one .tfrecord file must be specified through'
-                ' --gematria_input_file.'
-            )
-          basic_block_protos = _make_basic_block_reader_from_command_line_flags(
-              input_files, _THROUGHPUT_SOURCE_FILTERS.value
-          )
-          blocks_with_throughput = _extract_basic_blocks_with_throughput(
-              model, basic_block_protos
-          )
-        else:
-          basic_block_protos = None
-          blocks_with_throughput = None
-      max_instructions_in_batch = _GEMATRIA_MAX_INSTRUCTIONS_IN_BATCH.value
-      if _ACTION.value == model_options.Action.EVAL:
-        session_hooks = None
-        model.run_continuous_evaluation(
-            tuple(blocks_with_throughput),
-            _CHECKPOINT_DIR.value,
-            _GEMATRIA_SUMMARY_DIR.value,
-            tf_master=_MASTER.value,
-            session_hooks=session_hooks,
-            eval_interval_seconds=_GEMATRIA_EVAL_INTERVAL_SECS.value,
-            max_blocks_in_batch=_GEMATRIA_MAX_BLOCKS_IN_BATCH.value,
-            max_instructions_in_batch=max_instructions_in_batch,
-        )
-      elif _ACTION.value == model_options.Action.PREDICT:
-        with _session_from_checkpoint(_CHECKPOINT_FILE.value) as sess:
-          output_blocks = inference.predict_for_protos(
-              model,
-              sess,
-              basic_block_protos,
-              max_blocks_in_batch=_GEMATRIA_MAX_BLOCKS_IN_BATCH.value,
-              max_instructions_in_batch=max_instructions_in_batch,
+  dev = tf.train.replica_device_setter(
+      ps_tasks=_GEMATRIA_TRAINING_PS_TASKS.value
+  )
+  with tf.device(dev):
+    with timer.scoped('Creating model: ' + model_class.__name__):
+      num_replicas = _GEMATRIA_NUM_TRAINING_WORKER_REPLICAS.value
+      num_replicas_to_aggregate = (
+          _GEMATRIA_NUM_TRAINING_WORKER_REPLICAS_TO_AGGREGATE.value
+      )
+      model = model_class(  # pytype: disable=wrong-arg-types
+          model_name=_MODEL_NAME.value,
+          task_list=_task_names_from_command_line_flags(),
+          synchronous_training=_GEMATRIA_SYNCHRONOUS_TRAINING.value,
+          loss_type=_LOSS_TYPE.value,
+          loss_normalization=_LOSS_NORMALIZATION.value,
+          trained_variable_groups=_TRAINED_VARIABLES.value,
+          learning_rate=_LEARNING_RATE.value,
+          decay_steps=_DECAY_STEPS.value,
+          decay_rate=_DECAY_RATE.value,
+          learning_rate_schedule=_LEARNING_RATE_SCHEDULE.value,
+          optimizer_type=_OPTIMIZER_TYPE.value,
+          grad_clip_norm=_GRAD_CLIP_NORM.value,
+          use_delta_loss=_GEMATRIA_USE_SEQ2SEQ_LOSS.value,
+          collected_percentile_ranks=tuple(
+              map(int, _COLLECTED_PERCENTILE_RANKS.value)
+          ),
+          num_training_worker_replicas=num_replicas,
+          num_training_worker_replicas_to_aggregate=num_replicas_to_aggregate,
+          is_chief=is_chief,
+          **model_kwargs,
+      )
+      model.initialize()
+    with timer.scoped('Loading basic blocks'):
+      if _ACTION.value not in model_options.ACTIONS_WITHOUT_INPUT_DATA:
+        input_files = _INPUT_FILES.value
+        if not input_files:
+          sys.exit(
+              'At least one .tfrecord file must be specified through'
+              ' --gematria_input_file.'
           )
-          tfrecord.write_protos(_GEMATRIA_OUTPUT_FILE.value, output_blocks)
-      elif _ACTION.value == model_options.Action.EXPORT_GRAPH_DEF:
-        graph_def = tf.get_default_graph().as_graph_def()
-        graph_def = tf.compat.v1.graph_util.remove_training_nodes(
-            graph_def, protected_nodes=model.output_tensor_names
+        basic_block_protos = _make_basic_block_reader_from_command_line_flags(
+            input_files, _THROUGHPUT_SOURCE_FILTERS.value
         )
-        if _CHECKPOINT_FILE.value:
-          # When a checkpoint file is specified, replace tf.Variable nodes with
-          # tf.constant() nodes with the values of the variables from this
-          # checkpoint.
-          with _session_from_checkpoint(_CHECKPOINT_FILE.value) as sess:
-            graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(
-                sess=sess,
-                input_graph_def=graph_def,
-                output_node_names=model.output_tensor_names,
-            )
-        tf.io.write_graph(
-            graph_def,
-            logdir=os.path.dirname(_GRAPH_DEF_FILE.value),
-            name=os.path.basename(_GRAPH_DEF_FILE.value),
+        blocks_with_throughput = _extract_basic_blocks_with_throughput(
+            model, basic_block_protos
         )
-      elif _ACTION.value == model_options.Action.TRAIN:
-        if is_chief:
-          _resume_previous_experiment_if_needed()
-        with timer.scoped('Create training session'):
-          session = _monitored_training_session_from_flags(model, is_chief)
-        with timer.scoped('Running the training'):
-          with session:
-            randomize_expected_outputs = (
-                _TRAINING_THROUGHPUT_SELECTION.value
-                == io_options.ThroughputSelection.RANDOM
-            )
-            model.train(
-                session,
-                tuple(blocks_with_throughput),
-                max_blocks_in_batch=_GEMATRIA_MAX_BLOCKS_IN_BATCH.value,
-                max_instructions_in_batch=max_instructions_in_batch,
-                num_epochs=_GEMATRIA_TRAINING_NUM_EPOCHS.value,
-                randomize_batches=_GEMATRIA_TRAINING_RANDOMIZE_BATCHES.value,
-                randomize_expected_outputs=randomize_expected_outputs,
-            )
+      else:
+        basic_block_protos = None
+        blocks_with_throughput = None
+    max_instructions_in_batch = _GEMATRIA_MAX_INSTRUCTIONS_IN_BATCH.value
+    if _ACTION.value == model_options.Action.EVAL:
+      session_hooks = None
+      model.run_continuous_evaluation(
+          tuple(blocks_with_throughput),
+          _CHECKPOINT_DIR.value,
+          _GEMATRIA_SUMMARY_DIR.value,
+          tf_master=_MASTER.value,
+          session_hooks=session_hooks,
+          eval_interval_seconds=_GEMATRIA_EVAL_INTERVAL_SECS.value,
+          max_blocks_in_batch=_GEMATRIA_MAX_BLOCKS_IN_BATCH.value,
+          max_instructions_in_batch=max_instructions_in_batch,
+      )
+    elif _ACTION.value == model_options.Action.PREDICT:
+      _restore_model_from_checkpoint(_CHECKPOINT_FILE.value, model)
+      output_blocks = inference.predict_for_protos(
+          model,
+          basic_block_protos,
+          max_blocks_in_batch=_GEMATRIA_MAX_BLOCKS_IN_BATCH.value,
+          max_instructions_in_batch=max_instructions_in_batch,
+      )
+      tfrecord.write_protos(_GEMATRIA_OUTPUT_FILE.value, output_blocks)
+    elif _ACTION.value == model_options.Action.TRAIN:
+      if is_chief:
+        _resume_previous_experiment_if_needed()
+      if _WARMSTART_FILE.value:
+        # If there is a checkpoint to bootstrap from, we add an init_fn to the
+        # monitored session that restores it. This init_fn is called only when an
+        # actual checkpoint is not available to fully restore the model.
+        _warmstart_from_file(_WARMSTART_FILE.value)
+      elif _WARMSTART_DIR.value:
+        # If there is a directory to bootstrap from, we find the latest checkpoint
+        # in this directory and add an init_fn to the monitored session the same way
+        # as with _WARMSTART_FILE above.
+        _warmstart_from_dir(_WARMSTART_DIR.value)
+      randomize_expected_outputs = (
+          _TRAINING_THROUGHPUT_SELECTION.value
+          == io_options.ThroughputSelection.RANDOM
+      )
+
+      checkpoint = tf2.train.Checkpoint(model)
+      checkpoint_manager = tf2.train.CheckpointManager(
+          checkpoint,
+          _CHECKPOINT_DIR.value,
+          _GEMATRIA_CHECKPOINT_MAX_TO_KEEP.value,
+      )
+
+      def checkpoint_model():
+        checkpoint_manager.save()
+
+      model.train(
+          tuple(blocks_with_throughput),
+          max_blocks_in_batch=_GEMATRIA_MAX_BLOCKS_IN_BATCH.value,
+          max_instructions_in_batch=max_instructions_in_batch,
+          num_epochs=_GEMATRIA_TRAINING_NUM_EPOCHS.value,
+          randomize_batches=_GEMATRIA_TRAINING_RANDOMIZE_BATCHES.value,
+          randomize_expected_outputs=randomize_expected_outputs,
+          hooks=[(_GEMATRIA_SAVE_CHECKPOINT_EPOCHS.value, checkpoint_model)],
+      )
diff --git a/gematria/model/python/main_function_test.py b/gematria/model/python/main_function_test.py
index bb1f148f..a14b874a 100644
--- a/gematria/model/python/main_function_test.py
+++ b/gematria/model/python/main_function_test.py
@@ -15,6 +15,7 @@
 import copy
 import functools
 from os import path
+import os
 import re
 from unittest import mock
 
@@ -32,7 +33,7 @@
 from gematria.testing.python import matchers
 from gematria.testing.python import model_test
 import numpy as np
-import tensorflow.compat.v1 as tf
+import tensorflow as tf
 
 FLAGS = flags.FLAGS
 
@@ -49,30 +50,26 @@ class TestModel(model_base.ModelBase):
   num_blocks_in_batch: int = 0
   num_instructions_in_batch: int = 0
 
+  def __init__(self, **kwargs):
+    super().__init__(**kwargs)
+    self._prediction_var = tf.Variable(
+        tf.zeros((1, self.num_tasks), dtype=self.dtype)
+    )
+
   # @Override
-  def _create_tf_graph(self):
-    self.prediction_var = tf.get_variable(
-        'prediction',
-        (1, self.num_tasks),
-        dtype=self.dtype,
-        initializer=tf.initializers.constant(0),
-    )
-    self.output_shape_tensor = tf.placeholder(dtype=tf.dtypes.int32, shape=(2,))
-    self.output_deltas_shape_tensor = tf.placeholder(
-        dtype=tf.dtypes.int32, shape=(2,)
-    )
-    if self._use_deltas:
-      self._output_tensor_deltas = tf.broadcast_to(
-          self.prediction_var,
-          self.output_deltas_shape_tensor,
-          name=self.OUTPUT_TENSOR_DELTAS_NAME,
-      )
+  def _forward(self, feed_dict):
+    if not self._use_deltas:
+      return {
+          'output': tf.broadcast_to(
+              self._prediction_var, feed_dict['output_shape']
+          )
+      }
     else:
-      self._output_tensor = tf.broadcast_to(
-          self.prediction_var,
-          self.output_shape_tensor,
-          name=self.OUTPUT_TENSOR_NAME,
-      )
+      return {
+          'output_deltas': tf.broadcast_to(
+              self._prediction_var, feed_dict['output_deltas_shape']
+          )
+      }
 
   # @Override
   def _start_batch(self):
@@ -88,10 +85,8 @@ def _add_basic_block_to_batch(self, block):
   # @Override
   def _make_batch_feed_dict(self):
     return {
-        self.output_shape_tensor: np.array(
-            (self.num_blocks_in_batch, self.num_tasks)
-        ),
-        self.output_deltas_shape_tensor: np.array(
+        'output_shape': np.array((self.num_blocks_in_batch, self.num_tasks)),
+        'output_deltas_shape': np.array(
             (self.num_instructions_in_batch, self.num_tasks)
         ),
     }
@@ -117,10 +112,9 @@ def setUp(self):
 
   def _create_checkpoint_file(
       self,
-      filename,
+      checkpoint_prefix,
       prediction_value,
       *model_args,
-      global_step=None,
       **model_kwargs,
   ):
     """Creates a checkpoint file for the test model.
@@ -129,25 +123,22 @@ def _create_checkpoint_file(
     model predicts 'prediction_value' for all basic blocks.
 
     Args:
-      filename: The name of the checkpoint file.
+      checkpoint_prefix: The checkpoint prefix to name checkpoints.
       prediction_value: The value predicted by the model loaded from the
         checkpoint file.
       *model_args: Extra positional arguments, passed to the constructor of the
         model.
-      global_step: The value of global step used for the checkpoint. When None,
-        the checkpoint in the model is not modified.
       **model_kwargs: Extra keyword arguments, passed to the constructor of the
         model.
+
+    Returns:
+      The path to the checkpoint file that was created.
     """
     model = TestModel(*model_args, dtype=tf.dtypes.float32, **model_kwargs)
     model.initialize()
-    with self.session() as sess:
-      sess.run(tf.global_variables_initializer())
-      sess.run(tf.assign(model.prediction_var, [[prediction_value]]))
-      if global_step is not None:
-        sess.run(tf.assign(model.global_step, global_step))
-      saver = tf.train.Saver()
-      saver.save(sess, filename, global_step=global_step)
+    model._prediction_var.assign([[prediction_value]])
+    checkpoint = tf.train.Checkpoint(model)
+    return checkpoint.save(checkpoint_prefix)
 
   def _assert_file_exists(self, pattern):
     """Checks that the working directory contains a file.
@@ -258,13 +249,15 @@ def test_predict(self):
     predicted_value = 123456
     max_blocks_in_batch = 15
     max_instructions_in_batch = 124
-    checkpoint_filename = path.join(
+    checkpoint_directory = path.join(
         self.work_directory.full_path, 'checkpoint.ckpt'
     )
     output_filename = path.join(
         self.work_directory.full_path, 'output.tfrecord'
     )
-    self._create_checkpoint_file(checkpoint_filename, predicted_value)
+    checkpoint_filename = self._create_checkpoint_file(
+        checkpoint_directory, predicted_value
+    )
 
     model = None
 
@@ -298,7 +291,6 @@ def MockModel(*args, **kwargs):
 
     inference.predict_for_protos.assert_called_once_with(
         model,
-        mock.ANY,  # The TF session.
         mock.ANY,  # An iterable object reading the basic blocks.
         max_blocks_in_batch=max_blocks_in_batch,
         max_instructions_in_batch=max_instructions_in_batch,
@@ -341,13 +333,15 @@ def test_predict_with_custom_name(self):
     predicted_value = 123456
     max_blocks_in_batch = 15
     max_instructions_in_batch = 124
-    checkpoint_filename = path.join(
+    checkpoint_directory = path.join(
         self.work_directory.full_path, 'checkpoint.ckpt'
     )
     output_filename = path.join(
         self.work_directory.full_path, 'output.tfrecord'
     )
-    self._create_checkpoint_file(checkpoint_filename, predicted_value)
+    checkpoint_filename = self._create_checkpoint_file(
+        checkpoint_directory, predicted_value
+    )
 
     FLAGS.gematria_action = model_options.Action.PREDICT
     FLAGS.gematria_model_name = 'CustomModelName'
@@ -453,39 +447,39 @@ def MockModel(*args, **kwargs):
     self.assertEqual(model.num_tasks, 1)
     self.assertEqual(model._use_delta_loss, use_seq2seq_loss)
     model.train.assert_called_once_with(
-        mock.ANY,  # The TF session.
         matchers.SequenceEqual(expected_blocks),
         max_blocks_in_batch=max_blocks_in_batch,
         max_instructions_in_batch=max_instructions_in_batch,
         num_epochs=num_epochs,
         randomize_batches=randomize_batches,
         randomize_expected_outputs=True,
+        hooks=mock.ANY,  # Any hooks, they are an implementation detail.
     )
 
     # Check that the files created by the monitored session are there.
     self._assert_file_exists('checkpoint/checkpoint')
-    self._assert_file_exists('checkpoint/graph.pbtxt')
-    self._assert_file_exists('checkpoint/model.ckpt-*')
-    self._assert_file_exists('summary/events.out.tfevents.*')
+    self._assert_file_exists('checkpoint/ckpt-1.index')
+    # TODO(boomanaiden154): Fix this
+    # self._assert_file_exists('summary/events.out.tfevents.*')
 
     # Try to load the latest checkpoint with the model.
     model = TestModel(dtype=tf.dtypes.float32)
     model.initialize()
-    with self.session() as sess:
-      saver = tf.train.Saver()
-      latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
-      saver.restore(sess, latest_checkpoint)
-
-      # Inspect the value of the prediction variable. It is initialized to zero,
-      # and it must change during the training. While it is not clear what the
-      # actual value will be, it is certain that it will be greater than zero.
-      prediction = sess.run(model.prediction_var)
-      self.assertLen(prediction, 1)
-      self.assertGreater(prediction[0], 0)
-
-      # Check the value of the global step loaded from the checkpoint. This
-      # should be equal to the number of training epochs.
-      self.assertEqual(sess.run(model.global_step), num_epochs)
+    latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
+    checkpoint = tf.train.Checkpoint(model)
+    checkpoint.restore(latest_checkpoint)
+
+    # Inspect the value of the prediction variable. It is initialized to zero,
+    # and it must change during the training. While it is not clear what the
+    # actual value will be, it is certain that it will be greater than zero.
+    prediction = model._prediction_var.numpy()
+    self.assertLen(prediction, 1)
+    self.assertGreater(prediction[0], 0)
+
+    # Check the value of the global step loaded from the checkpoint. This
+    # should be equal to the number of training epochs.
+    # TODO(boomanaiden154): Fix this
+    # self.assertEqual(sess.run(model.global_step), num_epochs)
 
   @flagsaver.flagsaver
   def test_train_with_min_throughput(self):
@@ -539,13 +533,13 @@ def MockModel(*args, **kwargs):
     ]
 
     model.train.assert_called_once_with(
-        mock.ANY,  # The TF session.
         matchers.SequenceEqual(expected_blocks),
         max_blocks_in_batch=max_blocks_in_batch,
         max_instructions_in_batch=max_instructions_in_batch,
         num_epochs=num_epochs,
         randomize_batches=randomize_batches,
         randomize_expected_outputs=False,
+        hooks=mock.ANY,  # Any hooks, they are an implementation detail.
     )
 
   @flagsaver.flagsaver
@@ -603,13 +597,13 @@ def MockModel(*args, **kwargs):
     ]
 
     model.train.assert_called_once_with(
-        mock.ANY,  # The TF session.
         matchers.SequenceEqual(expected_blocks),
         max_blocks_in_batch=max_blocks_in_batch,
         max_instructions_in_batch=max_instructions_in_batch,
         num_epochs=num_epochs,
         randomize_batches=randomize_batches,
         randomize_expected_outputs=False,
+        hooks=mock.ANY,  # Any hooks, they are an implementation detail.
     )
 
   def test_train_with_resume(self):
@@ -619,30 +613,25 @@ def test_train_with_resume(self):
     experiment to the directory of the current experiment.
     """
     predicted_value = 123.0
-    global_step = 999
 
     old_checkpoint_dir = path.join(self.work_directory, 'old')
     new_checkpoint_dir = path.join(self.work_directory, 'new')
     summary_dir = path.join(self.work_directory, 'summaries')
 
     # Create a checkpoint in the "old" directory.
-    old_checkpoint_file = path.join(old_checkpoint_dir, 'model.ckpt')
+    old_checkpoint_prefix = path.join(old_checkpoint_dir, 'old_checkpoint')
     tf.io.gfile.makedirs(old_checkpoint_dir)
-    self._create_checkpoint_file(
-        old_checkpoint_file, predicted_value, global_step=global_step
-    )
+    self._create_checkpoint_file(old_checkpoint_prefix, predicted_value)
 
     # Check that the checkpoint dir has the expected structure. There must be at
     # least a file called "checkpoint" that contains the list of the actual
-    # checkpoints in text format. We check that the file is there, it contains
-    # references to the old dir and no references to the "new" checkpoint dir.
+    # checkpoints in text format.
     with tf.io.gfile.GFile(
         path.join(old_checkpoint_dir, 'checkpoint'), 'r'
     ) as f:
       checkpoint_list_pbtxt = f.read()
-      self.assertIn(old_checkpoint_file, checkpoint_list_pbtxt)
-      self.assertNotIn(new_checkpoint_dir, checkpoint_list_pbtxt)
-    checkpoint_files = tf.io.gfile.glob(old_checkpoint_file + '*')
+      self.assertIn('old_checkpoint', checkpoint_list_pbtxt)
+    checkpoint_files = tf.io.gfile.glob(old_checkpoint_prefix + '*')
     self.assertNotEmpty(checkpoint_files)
 
     model = None
@@ -686,14 +675,12 @@ def MockModel(*args, **kwargs):
         f'Not all files were copied.\nOld: {old_glob}\nNew: {new_glob}',
     )
 
-    # Check that all paths related to the old directory have been replaced with
-    # the new one.
+    # Check that the old checkpoint still exists in the new checkpoint file.
     with tf.io.gfile.GFile(
         path.join(new_checkpoint_dir, 'checkpoint'), 'r'
     ) as f:
       checkpoint_list_pbtxt = f.read()
-      self.assertNotIn(old_checkpoint_dir, checkpoint_list_pbtxt)
-      self.assertIn(new_checkpoint_dir, checkpoint_list_pbtxt)
+      self.assertIn('old_checkpoint', checkpoint_list_pbtxt)
 
   @flagsaver.flagsaver
   def test_eval_with_source_filters(self):
@@ -760,54 +747,6 @@ def MockModel(*args, **kwargs):
     self.assertLen(FLAGS.gematria_throughput_source_filter, model.num_tasks)
     self.assertEqual(model.task_list, ('task_1', 'task_2', 'task_3'))
 
-  @flagsaver.flagsaver
-  def test_export_graph_def(self):
-    """Tests exporting the model to a GraphDef proto."""
-    graph_def_filename = path.join(
-        self.work_directory.full_path, 'graph_def.pbtxt'
-    )
-
-    FLAGS.gematria_action = model_options.Action.EXPORT_GRAPH_DEF
-    FLAGS.gematria_graph_def_file = graph_def_filename
-
-    main_function.run_gematria_model_from_command_line_flags(
-        TestModel, dtype=tf.dtypes.float32
-    )
-    with open(graph_def_filename, 'r') as graph_def_file:
-      graph_def_pbtxt = graph_def_file.read()
-    # We did not replace variable nodes with constants, so there should be at
-    # least one variable node.
-    self.assertIn('Variable', graph_def_pbtxt)
-
-  @flagsaver.flagsaver
-  def test_export_frozen_graph_def(self):
-    """Tests exporting a frozen model to a GraphDef proto."""
-    predicted_value = 123654
-    graph_def_filename = path.join(
-        self.work_directory.full_path, 'graph_def.pbtxt'
-    )
-
-    checkpoint_filename = path.join(
-        self.work_directory.full_path, 'checkpoint.ckpt'
-    )
-    self._create_checkpoint_file(checkpoint_filename, predicted_value)
-
-    FLAGS.gematria_action = model_options.Action.EXPORT_GRAPH_DEF
-    FLAGS.gematria_graph_def_file = graph_def_filename
-    FLAGS.gematria_checkpoint_file = checkpoint_filename
-
-    main_function.run_gematria_model_from_command_line_flags(
-        TestModel, dtype=tf.dtypes.float32
-    )
-    with open(graph_def_filename, 'r') as graph_def_file:
-      graph_def_pbtxt = graph_def_file.read()
-    # Check that the graph definition is not empty, there are no variable nodes,
-    # and it contains the predicted value (which should have been injected into
-    # it as a constant).
-    self.assertNotEmpty(graph_def_pbtxt)
-    self.assertNotIn('Variable', graph_def_pbtxt)
-    self.assertIn(str(predicted_value), graph_def_pbtxt)
-
   @flagsaver.flagsaver
   def test_multi_task_flags(self):
     """Tests validation of multi-task learning flags."""
@@ -833,5 +772,4 @@ def test_multi_task_flags(self):
 
 
 if __name__ == '__main__':
-  tf.disable_v2_behavior()
   tf.test.main()
diff --git a/gematria/model/python/model_base.py b/gematria/model/python/model_base.py
index 6518752c..3d5ca5a5 100644
--- a/gematria/model/python/model_base.py
+++ b/gematria/model/python/model_base.py
@@ -1210,6 +1210,7 @@ def train(
       max_instructions_in_batch: Optional[int],
       randomize_batches: bool = True,
       randomize_expected_outputs: bool = False,
+      hooks=[],
   ) -> Optional[training.TrainingEpochStats]:
     """Runs training of the model on the given training data.
 
@@ -1231,6 +1232,7 @@ def train(
       randomize_expected_outputs: Set to True to randomly select the expected
         outputs used for training from the available values. When False, it
         takes the first value from the list.
+      hooks: Hooks to run during the training process.
 
     Returns:
       The loss before the last training step. Returns None when no training was
@@ -1270,10 +1272,14 @@ def run_one_epoch():
         )
         return self.train_batch(schedule)
 
-    for _ in range(0, num_epochs):
+    for epoch_index in range(0, num_epochs):
       stats = run_one_epoch()
       logging.info('Training: %s', stats)
-      return stats
+      for hook in hooks:
+        epochs_every, hook_function = hook
+        if epoch_index % epochs_every == 0:
+          hook_function()
+    return stats
 
   def compute_loss(self, schedule: FeedDict):
     output = self(schedule, train=True)