From f47642702cdb3ee68f5579ccc08a8085c2819532 Mon Sep 17 00:00:00 2001
From: "Restyled.io" <commits@restyled.io>
Date: Thu, 19 Mar 2020 20:18:10 +0000
Subject: [PATCH 1/4] Restyled by autopep8

---
 consts.py          |  76 +++----
 network.py         | 341 +++++++++++++++----------------
 policy_training.py | 487 +++++++++++++++++++++++----------------------
 util.py            | 121 +++++------
 4 files changed, 518 insertions(+), 507 deletions(-)

diff --git a/consts.py b/consts.py
index b0eea0e..976de0a 100644
--- a/consts.py
+++ b/consts.py
@@ -22,37 +22,37 @@
 FOURS = []
 # Horizontal
 for row in range(HEIGHT):
-  for column in range(WIDTH - 3):
-    four = np.zeros([HEIGHT, WIDTH], bool)
-    for i in range(4):
-      four[row, column + i] = True
-    FOURS.append(four)
+    for column in range(WIDTH - 3):
+        four = np.zeros([HEIGHT, WIDTH], bool)
+        for i in range(4):
+            four[row, column + i] = True
+        FOURS.append(four)
 # Vertical
 for row in range(HEIGHT - 3):
-  for column in range(WIDTH):
-    four = np.zeros([HEIGHT, WIDTH], bool)
-    for i in range(4):
-      four[row + i, column] = True
-    FOURS.append(four)
+    for column in range(WIDTH):
+        four = np.zeros([HEIGHT, WIDTH], bool)
+        for i in range(4):
+            four[row + i, column] = True
+        FOURS.append(four)
 # Diagonal
 for row in range(HEIGHT - 3):
-  for column in range(WIDTH - 3):
-    four1 = np.zeros([HEIGHT, WIDTH], bool)
-    four2 = np.zeros([HEIGHT, WIDTH], bool)
-    for i in range(4):
-      four1[row + i, column + i] = True
-      four2[row + 3 - i, column + i] = True
-    FOURS.append(four1)
-    FOURS.append(four2)
+    for column in range(WIDTH - 3):
+        four1 = np.zeros([HEIGHT, WIDTH], bool)
+        four2 = np.zeros([HEIGHT, WIDTH], bool)
+        for i in range(4):
+            four1[row + i, column + i] = True
+            four2[row + 3 - i, column + i] = True
+        FOURS.append(four1)
+        FOURS.append(four2)
 FOURS = np.array(FOURS)
 
 DISK_FOURS = {}
 DISK_FOUR_COUNTS = np.zeros([HEIGHT, WIDTH], int)
 for row in range(HEIGHT):
-  for column in range(WIDTH):
-    disk_fours = [four for four in FOURS if four[row, column]]
-    DISK_FOURS[row, column] = disk_fours
-    DISK_FOUR_COUNTS[row, column] = len(disk_fours)
+    for column in range(WIDTH):
+        disk_fours = [four for four in FOURS if four[row, column]]
+        DISK_FOURS[row, column] = disk_fours
+        DISK_FOUR_COUNTS[row, column] = len(disk_fours)
 
 # Results
 RED_WIN = 1
@@ -68,21 +68,21 @@
 NEW_POSITION_HASH = np.uint64(0)
 DISK_HASHES = np.zeros([COLOURS, HEIGHT, WIDTH], np.uint64)
 for colour in range(COLOURS):
-  for row in range(HEIGHT):
-    disks_in_column = row ^ (row + 1)
-    yellow_disks = 2**(row + 3) if colour == YELLOW else 0
-    row_hash = disks_in_column | yellow_disks
-    for column in range(WIDTH):
-      row_column_hash = row_hash << (9 * column)
-      DISK_HASHES[colour, HEIGHT - row - 1, column] = row_column_hash
+    for row in range(HEIGHT):
+        disks_in_column = row ^ (row + 1)
+        yellow_disks = 2**(row + 3) if colour == YELLOW else 0
+        row_hash = disks_in_column | yellow_disks
+        for column in range(WIDTH):
+            row_column_hash = row_hash << (9 * column)
+            DISK_HASHES[colour, HEIGHT - row - 1, column] = row_column_hash
 
 if __name__ == '__main__':
-  print(FOURS[0])
-  print(DISK_FOURS[0, 0])
-  print(DISK_FOUR_COUNTS)
-  print(TILED_COLUMNS.reshape([HEIGHT, WIDTH]))
-  print(TILED_ROWS.reshape([HEIGHT, WIDTH]))
-  print(ROW_EDGE_DISTANCE.reshape([HEIGHT, WIDTH]))
-  print(COLUMN_EDGE_DISTANCE.reshape([HEIGHT, WIDTH]))
-  print(ODDS.reshape([HEIGHT, WIDTH]))
-  print(np.array(map(bin, DISK_HASHES.flatten())).reshape(DISK_HASHES.shape))
+    print(FOURS[0])
+    print(DISK_FOURS[0, 0])
+    print(DISK_FOUR_COUNTS)
+    print(TILED_COLUMNS.reshape([HEIGHT, WIDTH]))
+    print(TILED_ROWS.reshape([HEIGHT, WIDTH]))
+    print(ROW_EDGE_DISTANCE.reshape([HEIGHT, WIDTH]))
+    print(COLUMN_EDGE_DISTANCE.reshape([HEIGHT, WIDTH]))
+    print(ODDS.reshape([HEIGHT, WIDTH]))
+    print(np.array(map(bin, DISK_HASHES.flatten())).reshape(DISK_HASHES.shape))
diff --git a/network.py b/network.py
index 4fa6514..bc653ae 100644
--- a/network.py
+++ b/network.py
@@ -5,178 +5,181 @@
 
 
 class BaseNetwork(object):
-  def __init__(self, scope, use_symmetry):
-    self.scope = scope
-
-    with tf.name_scope('inputs'):
-      self.turn = tf.placeholder(tf.float32, shape=[None], name='turn')
-      tiled_turn = tf.tile(
-          tf.reshape(util.turn_win(self.turn), [-1, 1, 1, 1]),
-          [1, 2, HEIGHT, WIDTH])
-
-      self.disks = tf.placeholder(
-          tf.float32, shape=[None, 2, HEIGHT, WIDTH], name='disks')
-
-      self.empty = tf.placeholder(
-          tf.float32, shape=[None, HEIGHT, WIDTH], name='empty')
-      empty = tf.expand_dims(self.empty, axis=1)
-
-      self.legal_moves = tf.placeholder(
-          tf.float32, shape=[None, HEIGHT, WIDTH], name='legal_moves')
-      legal_moves = tf.expand_dims(self.legal_moves, axis=1)
-
-      self.threats = tf.placeholder(
-          tf.float32, shape=[None, 2, HEIGHT, WIDTH], name='threats')
-
-      constant_features = np.array(
-          [TILED_ROWS, ODDS, ROW_EDGE_DISTANCE, COLUMN_EDGE_DISTANCE],
-          dtype=np.float32).reshape([1, 4, HEIGHT, WIDTH])
-      batch_size = tf.shape(self.turn)[0]
-      tiled_constant_features = tf.tile(constant_features,
-                                        [batch_size, 1, 1, 1])
-
-      feature_planes = tf.concat(
-          [
-              tiled_turn, self.disks, empty, legal_moves, self.threats,
-              tiled_constant_features
-          ],
-          axis=1)
-
-      if use_symmetry:
-        # Interleave horizontally flipped position
-        feature_planes_shape = [-1] + feature_planes.shape.as_list()[1:]
-        flipped = tf.reverse(feature_planes, axis=[3])
-        feature_planes = tf.reshape(
-            tf.stack([feature_planes, flipped], axis=1), feature_planes_shape)
-
-    with tf.name_scope('conv_layers'):
-      if self.gpu_available():
-        data_format = 'channels_first'
-      else:
-        feature_planes = tf.transpose(feature_planes, [0, 2, 3, 1])
-        data_format = 'channels_last'
-
-      conv1 = tf.layers.conv2d(
-          feature_planes,
-          filters=32,
-          kernel_size=[4, 5],
-          padding='same',
-          data_format=data_format,
-          use_bias=False,
-          name='conv1')
-
-      conv2 = tf.layers.conv2d(
-          conv1,
-          filters=32,
-          kernel_size=[4, 5],
-          padding='same',
-          data_format=data_format,
-          activation=tf.nn.relu,
-          name='conv2')
-
-      conv3 = tf.layers.conv2d(
-          conv2,
-          filters=32,
-          kernel_size=[4, 5],
-          padding='same',
-          data_format=data_format,
-          activation=tf.nn.relu,
-          name='conv3')
-
-      final_conv = tf.layers.conv2d(
-          conv3,
-          filters=1,
-          kernel_size=[1, 1],
-          data_format=data_format,
-          name='final_conv')
-      disk_bias = tf.get_variable('disk_bias', shape=[TOTAL_DISKS])
-      self.conv_output = tf.add(
-          tf.contrib.layers.flatten(final_conv), disk_bias, name='conv_output')
-
-      self.conv_layers = [conv1, conv2, conv3, self.conv_output]
-
-  def gpu_available(self):
-    devices = device_lib.list_local_devices()
-    return len([d for d in devices if d.device_type == 'GPU']) > 0
-
-  @property
-  def variables(self):
-    # Add '/' to stop network-1 containing network-10 variables
-    return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
-                             self.scope + '/')
-
-  def assign(self, other):
-    return [
-        tf.assign(other_var, self_var)
-        for self_var, other_var in zip(self.variables, other.variables)
-    ]
+    def __init__(self, scope, use_symmetry):
+        self.scope = scope
+
+        with tf.name_scope('inputs'):
+            self.turn = tf.placeholder(tf.float32, shape=[None], name='turn')
+            tiled_turn = tf.tile(
+                tf.reshape(util.turn_win(self.turn), [-1, 1, 1, 1]),
+                [1, 2, HEIGHT, WIDTH])
+
+            self.disks = tf.placeholder(
+                tf.float32, shape=[None, 2, HEIGHT, WIDTH], name='disks')
+
+            self.empty = tf.placeholder(
+                tf.float32, shape=[None, HEIGHT, WIDTH], name='empty')
+            empty = tf.expand_dims(self.empty, axis=1)
+
+            self.legal_moves = tf.placeholder(
+                tf.float32, shape=[None, HEIGHT, WIDTH], name='legal_moves')
+            legal_moves = tf.expand_dims(self.legal_moves, axis=1)
+
+            self.threats = tf.placeholder(
+                tf.float32, shape=[None, 2, HEIGHT, WIDTH], name='threats')
+
+            constant_features = np.array(
+                [TILED_ROWS, ODDS, ROW_EDGE_DISTANCE, COLUMN_EDGE_DISTANCE],
+                dtype=np.float32).reshape([1, 4, HEIGHT, WIDTH])
+            batch_size = tf.shape(self.turn)[0]
+            tiled_constant_features = tf.tile(constant_features,
+                                              [batch_size, 1, 1, 1])
+
+            feature_planes = tf.concat(
+                [
+                    tiled_turn, self.disks, empty, legal_moves, self.threats,
+                    tiled_constant_features
+                ],
+                axis=1)
+
+            if use_symmetry:
+                # Interleave horizontally flipped position
+                feature_planes_shape = [-1] + \
+                    feature_planes.shape.as_list()[1:]
+                flipped = tf.reverse(feature_planes, axis=[3])
+                feature_planes = tf.reshape(
+                    tf.stack([feature_planes, flipped], axis=1), feature_planes_shape)
+
+        with tf.name_scope('conv_layers'):
+            if self.gpu_available():
+                data_format = 'channels_first'
+            else:
+                feature_planes = tf.transpose(feature_planes, [0, 2, 3, 1])
+                data_format = 'channels_last'
+
+            conv1 = tf.layers.conv2d(
+                feature_planes,
+                filters=32,
+                kernel_size=[4, 5],
+                padding='same',
+                data_format=data_format,
+                use_bias=False,
+                name='conv1')
+
+            conv2 = tf.layers.conv2d(
+                conv1,
+                filters=32,
+                kernel_size=[4, 5],
+                padding='same',
+                data_format=data_format,
+                activation=tf.nn.relu,
+                name='conv2')
+
+            conv3 = tf.layers.conv2d(
+                conv2,
+                filters=32,
+                kernel_size=[4, 5],
+                padding='same',
+                data_format=data_format,
+                activation=tf.nn.relu,
+                name='conv3')
+
+            final_conv = tf.layers.conv2d(
+                conv3,
+                filters=1,
+                kernel_size=[1, 1],
+                data_format=data_format,
+                name='final_conv')
+            disk_bias = tf.get_variable('disk_bias', shape=[TOTAL_DISKS])
+            self.conv_output = tf.add(
+                tf.contrib.layers.flatten(final_conv), disk_bias, name='conv_output')
+
+            self.conv_layers = [conv1, conv2, conv3, self.conv_output]
+
+    def gpu_available(self):
+        devices = device_lib.list_local_devices()
+        return len([d for d in devices if d.device_type == 'GPU']) > 0
+
+    @property
+    def variables(self):
+        # Add '/' to stop network-1 containing network-10 variables
+        return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
+                                 self.scope + '/')
+
+    def assign(self, other):
+        return [
+            tf.assign(other_var, self_var)
+            for self_var, other_var in zip(self.variables, other.variables)
+        ]
 
 
 class PolicyNetwork(BaseNetwork):
-  def __init__(self, scope, temperature=1.0, reuse=None, use_symmetry=False):
-    with tf.variable_scope(scope, reuse=reuse):
-      super(PolicyNetwork, self).__init__(scope, use_symmetry)
-
-      with tf.name_scope('policy'):
-        self.temperature = tf.placeholder_with_default(
-            temperature, (), name='temperature')
-
-        disk_logits = tf.divide(
-            self.conv_output, self.temperature, name='disk_logits')
-
-        if use_symmetry:
-          # Calculate average of actual and horizontally flipped position
-          normal, flipped = tf.split(
-              tf.reshape(disk_logits, [-1, 2, HEIGHT, WIDTH]),
-              num_or_size_splits=2,
-              axis=1)
-          disk_logits = tf.reshape(
-              tf.reduce_mean(
-                  tf.concat([normal, tf.reverse(flipped, axis=[3])], axis=1),
-                  axis=1), [-1, TOTAL_DISKS])
-
-        # Make illegal moves impossible:
-        #   - Legal moves have positive logits
-        #   - Illegal moves have -ILLEGAL_PENALTY logits
-        legal_moves = tf.contrib.layers.flatten(self.legal_moves)
-        legal_disk_logits = (tf.nn.relu(disk_logits) * legal_moves +
-                             (legal_moves - 1) * ILLEGAL_PENALTY)
-
-        self.policy = tf.nn.softmax(legal_disk_logits, name='policy')
-        self.sample_move = tf.squeeze(
-            tf.multinomial(legal_disk_logits, 1) % WIDTH,
-            axis=1,
-            name='sample_move')
-
-        self.entropy = tf.reduce_sum(
-            self.policy * -tf.log(self.policy + EPSILON),  # Avoid Nans
-            axis=1,
-            name='entropy')
-
-        self.policy_layers = self.conv_layers + [
-            disk_logits, self.policy, self.entropy
-        ]
+    def __init__(self, scope, temperature=1.0, reuse=None, use_symmetry=False):
+        with tf.variable_scope(scope, reuse=reuse):
+            super(PolicyNetwork, self).__init__(scope, use_symmetry)
+
+            with tf.name_scope('policy'):
+                self.temperature = tf.placeholder_with_default(
+                    temperature, (), name='temperature')
+
+                disk_logits = tf.divide(
+                    self.conv_output, self.temperature, name='disk_logits')
+
+                if use_symmetry:
+                    # Calculate average of actual and horizontally flipped position
+                    normal, flipped = tf.split(
+                        tf.reshape(disk_logits, [-1, 2, HEIGHT, WIDTH]),
+                        num_or_size_splits=2,
+                        axis=1)
+                    disk_logits = tf.reshape(
+                        tf.reduce_mean(
+                            tf.concat(
+                                [normal, tf.reverse(flipped, axis=[3])], axis=1),
+                            axis=1), [-1, TOTAL_DISKS])
+
+                # Make illegal moves impossible:
+                #   - Legal moves have positive logits
+                #   - Illegal moves have -ILLEGAL_PENALTY logits
+                legal_moves = tf.contrib.layers.flatten(self.legal_moves)
+                legal_disk_logits = (tf.nn.relu(disk_logits) * legal_moves +
+                                     (legal_moves - 1) * ILLEGAL_PENALTY)
+
+                self.policy = tf.nn.softmax(legal_disk_logits, name='policy')
+                self.sample_move = tf.squeeze(
+                    tf.multinomial(legal_disk_logits, 1) % WIDTH,
+                    axis=1,
+                    name='sample_move')
+
+                self.entropy = tf.reduce_sum(
+                    self.policy * -tf.log(self.policy + EPSILON),  # Avoid Nans
+                    axis=1,
+                    name='entropy')
+
+                self.policy_layers = self.conv_layers + [
+                    disk_logits, self.policy, self.entropy
+                ]
 
 
 class ValueNetwork(BaseNetwork):
-  def __init__(self, scope, use_symmetry=False):
-    with tf.variable_scope(scope):
-      super(ValueNetwork, self).__init__(scope, use_symmetry)
-
-      with tf.name_scope('value'):
-        fully_connected = tf.layers.dense(
-            self.conv_output,
-            units=64,
-            activation=tf.nn.relu,
-            name='fully_connected')
-
-        value = tf.layers.dense(fully_connected, 1, tf.tanh)
-
-        if use_symmetry:
-          # Calculate average of actual and horizontally flipped position
-          self.value = tf.reduce_mean(
-              tf.reshape(value, [-1, 2]), axis=1, name='value')
-        else:
-          self.value = tf.squeeze(value, axis=1, name='value')
-
-        self.value_layers = self.conv_layers + [fully_connected, self.value]
+    def __init__(self, scope, use_symmetry=False):
+        with tf.variable_scope(scope):
+            super(ValueNetwork, self).__init__(scope, use_symmetry)
+
+            with tf.name_scope('value'):
+                fully_connected = tf.layers.dense(
+                    self.conv_output,
+                    units=64,
+                    activation=tf.nn.relu,
+                    name='fully_connected')
+
+                value = tf.layers.dense(fully_connected, 1, tf.tanh)
+
+                if use_symmetry:
+                    # Calculate average of actual and horizontally flipped position
+                    self.value = tf.reduce_mean(
+                        tf.reshape(value, [-1, 2]), axis=1, name='value')
+                else:
+                    self.value = tf.squeeze(value, axis=1, name='value')
+
+                self.value_layers = self.conv_layers + \
+                    [fully_connected, self.value]
diff --git a/policy_training.py b/policy_training.py
index b8e74fc..f8f3a1c 100644
--- a/policy_training.py
+++ b/policy_training.py
@@ -17,258 +17,263 @@
 
 
 class PolicyTraining(object):
-  def __init__(self, config):
-    self.config = config
-    self.run_dir = util.run_directory(config)
-
-    self.session = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(
-        allow_growth=True)))
-
-    self.policy_network = PolicyNetwork('policy')
-    self.policy_player = PolicyPlayer(self.policy_network, self.session)
-    util.restore_or_initialize_network(self.session, self.run_dir,
-                                       self.policy_network)
-
-    # Train ops
-    self.create_train_op(self.policy_network)
-    self.writer = tf.summary.FileWriter(self.run_dir)
-    util.restore_or_initialize_scope(self.session, self.run_dir,
-                                     self.training_scope.name)
-
-    self.opponents = Opponents(
-        [RandomPlayer(),
-         RandomThreatPlayer(),
-         MaxThreatPlayer()])
-    self.opponents.restore_networks(self.session, self.run_dir)
-
-  def create_train_op(self, policy_network):
-    with tf.variable_scope('policy_training') as self.training_scope:
-      self.move = tf.placeholder(tf.int32, shape=[None], name='move')
-      self.result = tf.placeholder(tf.float32, shape=[None], name='result')
-
-      policy = tf.reshape(policy_network.policy, [-1, HEIGHT, WIDTH])
-      move = tf.expand_dims(tf.one_hot(self.move, WIDTH), axis=1)
-      turn = util.turn_win(policy_network.turn)
-      move_probability = tf.reduce_sum(policy * move, axis=[1, 2])
-
-      result_loss = -tf.reduce_mean(
-          tf.log(move_probability) * turn * self.result)
-      entropy_regularisation = (
-          -config.entropy * tf.reduce_mean(policy_network.entropy))
-      loss = result_loss + entropy_regularisation
-
-      optimizer = tf.train.AdamOptimizer(self.config.learning_rate)
-      self.global_step = tf.contrib.framework.get_or_create_global_step()
-      self.train_op = optimizer.minimize(loss, self.global_step)
-
-      # Summary
-      tf.summary.scalar('loss', loss)
-      for var in policy_network.variables + policy_network.policy_layers:
-        tf.summary.histogram(var.name, var)
-      self.summary = tf.summary.merge_all()
-
-  def train(self):
-    for _ in range(self.config.batches):
-      opponent = self.opponents.choose_opponent()
-      games = self.play_games(opponent)
-      step, summary = self.train_games(opponent, games)
-      self.process_results(opponent, games, step, summary)
-
-      if self.opponents.all_beaten():
-        name = self.opponents.next_network_name()
-        print('All opponents beaten. Creating %s' % name)
-        self.create_new_opponent(name)
-
-      if step % 100 == 0:
+    def __init__(self, config):
+        self.config = config
+        self.run_dir = util.run_directory(config)
+
+        self.session = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(
+            allow_growth=True)))
+
+        self.policy_network = PolicyNetwork('policy')
+        self.policy_player = PolicyPlayer(self.policy_network, self.session)
+        util.restore_or_initialize_network(self.session, self.run_dir,
+                                           self.policy_network)
+
+        # Train ops
+        self.create_train_op(self.policy_network)
+        self.writer = tf.summary.FileWriter(self.run_dir)
+        util.restore_or_initialize_scope(self.session, self.run_dir,
+                                         self.training_scope.name)
+
+        self.opponents = Opponents(
+            [RandomPlayer(),
+             RandomThreatPlayer(),
+             MaxThreatPlayer()])
+        self.opponents.restore_networks(self.session, self.run_dir)
+
+    def create_train_op(self, policy_network):
+        with tf.variable_scope('policy_training') as self.training_scope:
+            self.move = tf.placeholder(tf.int32, shape=[None], name='move')
+            self.result = tf.placeholder(
+                tf.float32, shape=[None], name='result')
+
+            policy = tf.reshape(policy_network.policy, [-1, HEIGHT, WIDTH])
+            move = tf.expand_dims(tf.one_hot(self.move, WIDTH), axis=1)
+            turn = util.turn_win(policy_network.turn)
+            move_probability = tf.reduce_sum(policy * move, axis=[1, 2])
+
+            result_loss = -tf.reduce_mean(
+                tf.log(move_probability) * turn * self.result)
+            entropy_regularisation = (
+                -config.entropy * tf.reduce_mean(policy_network.entropy))
+            loss = result_loss + entropy_regularisation
+
+            optimizer = tf.train.AdamOptimizer(self.config.learning_rate)
+            self.global_step = tf.contrib.framework.get_or_create_global_step()
+            self.train_op = optimizer.minimize(loss, self.global_step)
+
+            # Summary
+            tf.summary.scalar('loss', loss)
+            for var in policy_network.variables + policy_network.policy_layers:
+                tf.summary.histogram(var.name, var)
+            self.summary = tf.summary.merge_all()
+
+    def train(self):
+        for _ in range(self.config.batches):
+            opponent = self.opponents.choose_opponent()
+            games = self.play_games(opponent)
+            step, summary = self.train_games(opponent, games)
+            self.process_results(opponent, games, step, summary)
+
+            if self.opponents.all_beaten():
+                name = self.opponents.next_network_name()
+                print('All opponents beaten. Creating %s' % name)
+                self.create_new_opponent(name)
+
+            if step % 100 == 0:
+                self.save()
+
         self.save()
 
-    self.save()
-
-  def save(self):
-    util.save_network(self.session, self.run_dir, self.policy_network)
-    util.save_scope(self.session, self.run_dir, self.training_scope.name)
-    self.opponents.save_opponent_stats(self.run_dir)
-
-  def play_games(self, opponent):
-    # Create games
-    games = incomplete_games = [Game() for _ in range(self.config.batch_size)]
-
-    # Let opponent play first in half of the games
-    self.play_move(games[0:len(games) // 2], opponent)
-    player = self.policy_player
-
-    while incomplete_games:
-      self.play_move(incomplete_games, player)
-      player = self.policy_player if player != self.policy_player else opponent
-      incomplete_games = [
-          game for game in incomplete_games if not game.position.gameover()
-      ]
-
-    return games
-
-  def play_move(self, games, player):
-    positions = [game.position for game in games]
-    moves = player.play(positions)
-
-    for game, move in zip(games, moves):
-      game.move(move, player == self.policy_player)
-
-  def train_games(self, opponent, games):
-    turn, disks, empty, legal_moves, threats, moves, results = ([], [], [], [],
-                                                                [], [], [])
-    for game in games:
-      for position, move in game.policy_player_moves:
-        turn.append(position.turn)
-        disks.append(position.disks)
-        empty.append(position.empty)
-        legal_moves.append(position.legal_moves)
-        threats.append(position.threats)
-        moves.append(move)
-        results.append(game.result)
-
-    _, step, summary = self.session.run(
-        [self.train_op, self.global_step, self.summary], {
-            self.policy_network.turn: turn,
-            self.policy_network.disks: disks,
-            self.policy_network.empty: empty,
-            self.policy_network.legal_moves: legal_moves,
-            self.policy_network.threats: threats,
-            self.move: moves,
-            self.result: results
-        })
-
-    return step, summary
-
-  def process_results(self, opponent, games, step, summary):
-    win_rate = np.mean([game.policy_player_score for game in games])
-    average_moves = sum(len(game.moves) for game in games) / self.config.batch_size
-
-    opponent_summary = tf.Summary()
-    opponent_summary.value.add(
-        tag=self.training_scope.name + '/' + opponent.name + '/win_rate',
-        simple_value=win_rate)
-    opponent_summary.value.add(
-        tag=self.training_scope.name + '/' + opponent.name + '/moves',
-        simple_value=average_moves)
-
-    self.writer.add_summary(summary, step)
-    self.writer.add_summary(opponent_summary, step)
-
-    self.opponents.update_win_rate(opponent, win_rate)
-
-    print('Step %d. Opponent %s, win rate %.2f <%.2f>, %.2f moves' %
-          (step, opponent.name, win_rate, self.opponents.win_rates[opponent],
-           average_moves))
-
-  def create_new_opponent(self, name):
-    # Create clone of policy_player
-    clone = PolicyNetwork(name)
-    self.session.run(self.policy_network.assign(clone))
-    util.save_network(self.session, self.run_dir, clone)
-    new_opponent = PolicyPlayer(clone, self.session)
-
-    self.opponents.decrease_win_rates()
-    self.opponents.add_opponent(new_opponent)
+    def save(self):
+        util.save_network(self.session, self.run_dir, self.policy_network)
+        util.save_scope(self.session, self.run_dir, self.training_scope.name)
+        self.opponents.save_opponent_stats(self.run_dir)
+
+    def play_games(self, opponent):
+        # Create games
+        games = incomplete_games = [Game()
+                                    for _ in range(self.config.batch_size)]
+
+        # Let opponent play first in half of the games
+        self.play_move(games[0:len(games) // 2], opponent)
+        player = self.policy_player
+
+        while incomplete_games:
+            self.play_move(incomplete_games, player)
+            player = self.policy_player if player != self.policy_player else opponent
+            incomplete_games = [
+                game for game in incomplete_games if not game.position.gameover()
+            ]
+
+        return games
+
+    def play_move(self, games, player):
+        positions = [game.position for game in games]
+        moves = player.play(positions)
+
+        for game, move in zip(games, moves):
+            game.move(move, player == self.policy_player)
+
+    def train_games(self, opponent, games):
+        turn, disks, empty, legal_moves, threats, moves, results = ([], [], [], [],
+                                                                    [], [], [])
+        for game in games:
+            for position, move in game.policy_player_moves:
+                turn.append(position.turn)
+                disks.append(position.disks)
+                empty.append(position.empty)
+                legal_moves.append(position.legal_moves)
+                threats.append(position.threats)
+                moves.append(move)
+                results.append(game.result)
+
+        _, step, summary = self.session.run(
+            [self.train_op, self.global_step, self.summary], {
+                self.policy_network.turn: turn,
+                self.policy_network.disks: disks,
+                self.policy_network.empty: empty,
+                self.policy_network.legal_moves: legal_moves,
+                self.policy_network.threats: threats,
+                self.move: moves,
+                self.result: results
+            })
+
+        return step, summary
+
+    def process_results(self, opponent, games, step, summary):
+        win_rate = np.mean([game.policy_player_score for game in games])
+        average_moves = sum(len(game.moves)
+                            for game in games) / self.config.batch_size
+
+        opponent_summary = tf.Summary()
+        opponent_summary.value.add(
+            tag=self.training_scope.name + '/' + opponent.name + '/win_rate',
+            simple_value=win_rate)
+        opponent_summary.value.add(
+            tag=self.training_scope.name + '/' + opponent.name + '/moves',
+            simple_value=average_moves)
+
+        self.writer.add_summary(summary, step)
+        self.writer.add_summary(opponent_summary, step)
+
+        self.opponents.update_win_rate(opponent, win_rate)
+
+        print('Step %d. Opponent %s, win rate %.2f <%.2f>, %.2f moves' %
+              (step, opponent.name, win_rate, self.opponents.win_rates[opponent],
+               average_moves))
+
+    def create_new_opponent(self, name):
+        # Create clone of policy_player
+        clone = PolicyNetwork(name)
+        self.session.run(self.policy_network.assign(clone))
+        util.save_network(self.session, self.run_dir, clone)
+        new_opponent = PolicyPlayer(clone, self.session)
+
+        self.opponents.decrease_win_rates()
+        self.opponents.add_opponent(new_opponent)
 
 
 class Opponents(object):
-  def __init__(self, opponents):
-    self.win_rates = {}
-    for opponent in opponents:
-      self.add_opponent(opponent)
-
-  def add_opponent(self, opponent):
-    self.win_rates[opponent] = EPSILON
-
-  def decrease_win_rates(self):
-    # Decrease win rate so tough players must be beaten again
-    self.win_rates = {
-        opponent: max(2 * win_rate - 1, EPSILON)
-        for opponent, win_rate in self.win_rates.items()
-    }
-
-  def update_win_rate(self, opponent, win_rate):
-    # Win rate is a moving average
-    self.win_rates[opponent] = self.win_rates[opponent] * 0.9 + win_rate * 0.1
-
-  def all_beaten(self):
-    result = True
-    for win_rate in self.win_rates.values():
-      result = result and win_rate > 0.7
-    return result
-
-  def choose_opponent(self):
-    # More difficult opponents are chosen more often
-    win_rates = np.maximum(list(self.win_rates.values()), 0.1)
-    probs = (1 / win_rates**2) - 1
-    normalised_probs = probs / probs.sum()
-    return np.random.choice(list(self.win_rates.keys()), p=normalised_probs)
-
-  def next_network_name(self):
-    network_opponents = len([
-        opponent for opponent in self.win_rates.keys()
-        if type(opponent) == PolicyPlayer
-    ])
-    return 'network-%d' % (network_opponents + 1)
-
-  def save_opponent_stats(self, run_dir):
-    with open(os.path.join(run_dir, 'opponents'), 'w') as f:
-      f.write('\n'.join([
-          opponent.name + ' ' + str(win_rate)
-          for opponent, win_rate in sorted(
-              self.win_rates.items(), key=lambda x: x[1])
-      ]))
-
-  def restore_networks(self, session, run_dir):
-    opponents_file = os.path.join(run_dir, 'opponents')
-    if os.path.exists(opponents_file):
-      with open(opponents_file) as f:
-        for line in f.readlines():
-          opponent_name, win_rate_string = line.strip().split()
-          win_rate = float(win_rate_string)
-          if opponent_name[:8] == 'network-':
-            print('Restoring %s' % opponent_name)
-            network = PolicyNetwork(opponent_name)
-            util.restore_network_or_fail(session, run_dir, network)
-            opponent = PolicyPlayer(network, session)
-            self.win_rates[opponent] = win_rate
-          else:
-            for opponent in self.win_rates.keys():
-              if opponent_name == opponent.name:
-                self.win_rates[opponent] = win_rate
+    def __init__(self, opponents):
+        self.win_rates = {}
+        for opponent in opponents:
+            self.add_opponent(opponent)
+
+    def add_opponent(self, opponent):
+        self.win_rates[opponent] = EPSILON
+
+    def decrease_win_rates(self):
+        # Decrease win rate so tough players must be beaten again
+        self.win_rates = {
+            opponent: max(2 * win_rate - 1, EPSILON)
+            for opponent, win_rate in self.win_rates.items()
+        }
+
+    def update_win_rate(self, opponent, win_rate):
+        # Win rate is a moving average
+        self.win_rates[opponent] = self.win_rates[opponent] * \
+            0.9 + win_rate * 0.1
+
+    def all_beaten(self):
+        result = True
+        for win_rate in self.win_rates.values():
+            result = result and win_rate > 0.7
+        return result
+
+    def choose_opponent(self):
+        # More difficult opponents are chosen more often
+        win_rates = np.maximum(list(self.win_rates.values()), 0.1)
+        probs = (1 / win_rates**2) - 1
+        normalised_probs = probs / probs.sum()
+        return np.random.choice(list(self.win_rates.keys()), p=normalised_probs)
+
+    def next_network_name(self):
+        network_opponents = len([
+            opponent for opponent in self.win_rates.keys()
+            if type(opponent) == PolicyPlayer
+        ])
+        return 'network-%d' % (network_opponents + 1)
+
+    def save_opponent_stats(self, run_dir):
+        with open(os.path.join(run_dir, 'opponents'), 'w') as f:
+            f.write('\n'.join([
+                opponent.name + ' ' + str(win_rate)
+                for opponent, win_rate in sorted(
+                    self.win_rates.items(), key=lambda x: x[1])
+            ]))
+
+    def restore_networks(self, session, run_dir):
+        opponents_file = os.path.join(run_dir, 'opponents')
+        if os.path.exists(opponents_file):
+            with open(opponents_file) as f:
+                for line in f.readlines():
+                    opponent_name, win_rate_string = line.strip().split()
+                    win_rate = float(win_rate_string)
+                    if opponent_name[:8] == 'network-':
+                        print('Restoring %s' % opponent_name)
+                        network = PolicyNetwork(opponent_name)
+                        util.restore_network_or_fail(session, run_dir, network)
+                        opponent = PolicyPlayer(network, session)
+                        self.win_rates[opponent] = win_rate
+                    else:
+                        for opponent in self.win_rates.keys():
+                            if opponent_name == opponent.name:
+                                self.win_rates[opponent] = win_rate
 
 
 class Game(object):
-  def __init__(self):
-    self.position = Position()
-    self.positions = [self.position]
-    self.moves = []
-    self.policy_player_moves = []
-    self.result = None
-
-    # Make it equally likely to train on red as yellow
-    if np.random.rand() < 0.5:
-      self.move(np.random.choice(self.position.legal_columns()))
-
-    # Setup a random position
-    while np.random.rand() < 0.75:
-      self.move(np.random.choice(self.position.legal_columns()))
-
-  def move(self, move, policy_player_turn=False):
-    if policy_player_turn:
-      self.policy_player_moves.append((self.position, move))
-    self.moves.append(move)
-    self.position = self.position.move(move)
-    self.positions.append(self.position)
-    if self.position.gameover():
-      self.result = self.position.result
-      self.policy_player_score = float(policy_player_turn) if self.result else 0.5
+    def __init__(self):
+        self.position = Position()
+        self.positions = [self.position]
+        self.moves = []
+        self.policy_player_moves = []
+        self.result = None
+
+        # Make it equally likely to train on red as yellow
+        if np.random.rand() < 0.5:
+            self.move(np.random.choice(self.position.legal_columns()))
+
+        # Setup a random position
+        while np.random.rand() < 0.75:
+            self.move(np.random.choice(self.position.legal_columns()))
+
+    def move(self, move, policy_player_turn=False):
+        if policy_player_turn:
+            self.policy_player_moves.append((self.position, move))
+        self.moves.append(move)
+        self.position = self.position.move(move)
+        self.positions.append(self.position)
+        if self.position.gameover():
+            self.result = self.position.result
+            self.policy_player_score = float(
+                policy_player_turn) if self.result else 0.5
 
 
 def main(_):
-  training = PolicyTraining(config)
-  training.train()
+    training = PolicyTraining(config)
+    training.train()
 
 
 if __name__ == '__main__':
-  tf.app.run()
+    tf.app.run()
diff --git a/util.py b/util.py
index 84b17f5..19a4743 100644
--- a/util.py
+++ b/util.py
@@ -6,86 +6,89 @@
 
 
 def run_directory(config):
-  def find_previous_run(dir):
-    if os.path.isdir(dir):
-      runs = [child[4:] for child in os.listdir(dir) if child[:4] == 'run_']
-      if runs:
-        return max(int(run) for run in runs)
+    def find_previous_run(dir):
+        if os.path.isdir(dir):
+            runs = [child[4:]
+                    for child in os.listdir(dir) if child[:4] == 'run_']
+            if runs:
+                return max(int(run) for run in runs)
 
-    return 0
+        return 0
 
-  if config.run_dir == 'latest':
-    parent_dir = 'runs/'
-    previous_run = find_previous_run(parent_dir)
-    run_dir = parent_dir + ('run_%d' % previous_run)
-  elif config.run_dir:
-    run_dir = config.run_dir
-  else:
-    parent_dir = 'runs/'
-    previous_run = find_previous_run(parent_dir)
-    run_dir = parent_dir + ('run_%d' % (previous_run + 1))
+    if config.run_dir == 'latest':
+        parent_dir = 'runs/'
+        previous_run = find_previous_run(parent_dir)
+        run_dir = parent_dir + ('run_%d' % previous_run)
+    elif config.run_dir:
+        run_dir = config.run_dir
+    else:
+        parent_dir = 'runs/'
+        previous_run = find_previous_run(parent_dir)
+        run_dir = parent_dir + ('run_%d' % (previous_run + 1))
 
-  if run_dir[-1] != '/':
-    run_dir += '/'
+    if run_dir[-1] != '/':
+        run_dir += '/'
 
-  if not os.path.isdir(run_dir):
-    os.makedirs(run_dir)
+    if not os.path.isdir(run_dir):
+        os.makedirs(run_dir)
 
-  print('Checkpoint and summary directory is %s' % run_dir)
+    print('Checkpoint and summary directory is %s' % run_dir)
 
-  return run_dir
+    return run_dir
 
 
 def turn_win(turn):
-  return turn * -2 + 1  # RED = +1, YELLOW = -1
+    return turn * -2 + 1  # RED = +1, YELLOW = -1
 
 
 def restore_or_initialize_scope(session, run_dir, scope):
-  variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope)
-  latest_checkpoint = tf.train.latest_checkpoint(run_dir,
-                                                 scope + '_checkpoint')
-  if latest_checkpoint:
-    tf.train.Saver(variables).restore(session, latest_checkpoint)
-    print('Restored %s scope from %s' % (scope, latest_checkpoint))
-  else:
-    session.run(tf.variables_initializer(variables))
-    print('Initialized %s scope' % scope)
+    variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope)
+    latest_checkpoint = tf.train.latest_checkpoint(run_dir,
+                                                   scope + '_checkpoint')
+    if latest_checkpoint:
+        tf.train.Saver(variables).restore(session, latest_checkpoint)
+        print('Restored %s scope from %s' % (scope, latest_checkpoint))
+    else:
+        session.run(tf.variables_initializer(variables))
+        print('Initialized %s scope' % scope)
 
 
 def save_scope(session, run_dir, scope):
-  os.makedirs(run_dir, exist_ok=True)
-  variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope)
-  tf.train.Saver(variables).save(
-      session,
-      os.path.join(run_dir, scope + '.ckpt'),
-      latest_filename=scope + '_checkpoint')
+    os.makedirs(run_dir, exist_ok=True)
+    variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope)
+    tf.train.Saver(variables).save(
+        session,
+        os.path.join(run_dir, scope + '.ckpt'),
+        latest_filename=scope + '_checkpoint')
 
 
 def restore_or_initialize_network(session, run_dir, network):
-  latest_checkpoint = tf.train.latest_checkpoint(run_dir,
-                                                 network.scope + '_checkpoint')
-  if latest_checkpoint:
-    tf.train.Saver(network.variables).restore(session, latest_checkpoint)
-    print('Restored %s network from %s' % (network.scope, latest_checkpoint))
-  else:
-    session.run(tf.variables_initializer(network.variables))
-    print('Initialized %s network' % network.scope)
+    latest_checkpoint = tf.train.latest_checkpoint(run_dir,
+                                                   network.scope + '_checkpoint')
+    if latest_checkpoint:
+        tf.train.Saver(network.variables).restore(session, latest_checkpoint)
+        print('Restored %s network from %s' %
+              (network.scope, latest_checkpoint))
+    else:
+        session.run(tf.variables_initializer(network.variables))
+        print('Initialized %s network' % network.scope)
 
 
 def restore_network_or_fail(session, run_dir, network):
-  latest_checkpoint = tf.train.latest_checkpoint(run_dir,
-                                                 network.scope + '_checkpoint')
-  if latest_checkpoint:
-    tf.train.Saver(network.variables).restore(session, latest_checkpoint)
-    print('Restored %s network from %s' % (network.scope, latest_checkpoint))
-  else:
-    raise Exception('Network checkpoint %s not found in %s' %
-                    (network.scope, run_dir))
+    latest_checkpoint = tf.train.latest_checkpoint(run_dir,
+                                                   network.scope + '_checkpoint')
+    if latest_checkpoint:
+        tf.train.Saver(network.variables).restore(session, latest_checkpoint)
+        print('Restored %s network from %s' %
+              (network.scope, latest_checkpoint))
+    else:
+        raise Exception('Network checkpoint %s not found in %s' %
+                        (network.scope, run_dir))
 
 
 def save_network(session, run_dir, network):
-  os.makedirs(run_dir, exist_ok=True)
-  tf.train.Saver(network.variables).save(
-      session,
-      os.path.join(run_dir, network.scope + '.ckpt'),
-      latest_filename=network.scope + '_checkpoint')
+    os.makedirs(run_dir, exist_ok=True)
+    tf.train.Saver(network.variables).save(
+        session,
+        os.path.join(run_dir, network.scope + '.ckpt'),
+        latest_filename=network.scope + '_checkpoint')

From e0b5c11f6c297830b1978b275873518789d2f7d1 Mon Sep 17 00:00:00 2001
From: "Restyled.io" <commits@restyled.io>
Date: Thu, 19 Mar 2020 20:18:13 +0000
Subject: [PATCH 2/4] Restyled by black

---
 consts.py          |   7 +--
 network.py         | 126 +++++++++++++++++++++++---------------
 policy_training.py | 147 ++++++++++++++++++++++++++-------------------
 util.py            |  59 +++++++++---------
 4 files changed, 194 insertions(+), 145 deletions(-)

diff --git a/consts.py b/consts.py
index 976de0a..4b6b9cd 100644
--- a/consts.py
+++ b/consts.py
@@ -15,8 +15,7 @@
 TILED_COLUMNS = np.arange(TOTAL_DISKS) % WIDTH
 
 ROW_EDGE_DISTANCE = np.min([TILED_ROWS, np.flip(TILED_ROWS, axis=0)], axis=0)
-COLUMN_EDGE_DISTANCE = np.min(
-    [TILED_COLUMNS, np.flip(TILED_COLUMNS, axis=0)], axis=0)
+COLUMN_EDGE_DISTANCE = np.min([TILED_COLUMNS, np.flip(TILED_COLUMNS, axis=0)], axis=0)
 ODDS = TILED_ROWS % 2
 
 FOURS = []
@@ -70,13 +69,13 @@
 for colour in range(COLOURS):
     for row in range(HEIGHT):
         disks_in_column = row ^ (row + 1)
-        yellow_disks = 2**(row + 3) if colour == YELLOW else 0
+        yellow_disks = 2 ** (row + 3) if colour == YELLOW else 0
         row_hash = disks_in_column | yellow_disks
         for column in range(WIDTH):
             row_column_hash = row_hash << (9 * column)
             DISK_HASHES[colour, HEIGHT - row - 1, column] = row_column_hash
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     print(FOURS[0])
     print(DISK_FOURS[0, 0])
     print(DISK_FOUR_COUNTS)
diff --git a/network.py b/network.py
index bc653ae..02bec19 100644
--- a/network.py
+++ b/network.py
@@ -8,103 +8,117 @@ class BaseNetwork(object):
     def __init__(self, scope, use_symmetry):
         self.scope = scope
 
-        with tf.name_scope('inputs'):
-            self.turn = tf.placeholder(tf.float32, shape=[None], name='turn')
+        with tf.name_scope("inputs"):
+            self.turn = tf.placeholder(tf.float32, shape=[None], name="turn")
             tiled_turn = tf.tile(
                 tf.reshape(util.turn_win(self.turn), [-1, 1, 1, 1]),
-                [1, 2, HEIGHT, WIDTH])
+                [1, 2, HEIGHT, WIDTH],
+            )
 
             self.disks = tf.placeholder(
-                tf.float32, shape=[None, 2, HEIGHT, WIDTH], name='disks')
+                tf.float32, shape=[None, 2, HEIGHT, WIDTH], name="disks"
+            )
 
             self.empty = tf.placeholder(
-                tf.float32, shape=[None, HEIGHT, WIDTH], name='empty')
+                tf.float32, shape=[None, HEIGHT, WIDTH], name="empty"
+            )
             empty = tf.expand_dims(self.empty, axis=1)
 
             self.legal_moves = tf.placeholder(
-                tf.float32, shape=[None, HEIGHT, WIDTH], name='legal_moves')
+                tf.float32, shape=[None, HEIGHT, WIDTH], name="legal_moves"
+            )
             legal_moves = tf.expand_dims(self.legal_moves, axis=1)
 
             self.threats = tf.placeholder(
-                tf.float32, shape=[None, 2, HEIGHT, WIDTH], name='threats')
+                tf.float32, shape=[None, 2, HEIGHT, WIDTH], name="threats"
+            )
 
             constant_features = np.array(
                 [TILED_ROWS, ODDS, ROW_EDGE_DISTANCE, COLUMN_EDGE_DISTANCE],
-                dtype=np.float32).reshape([1, 4, HEIGHT, WIDTH])
+                dtype=np.float32,
+            ).reshape([1, 4, HEIGHT, WIDTH])
             batch_size = tf.shape(self.turn)[0]
-            tiled_constant_features = tf.tile(constant_features,
-                                              [batch_size, 1, 1, 1])
+            tiled_constant_features = tf.tile(constant_features, [batch_size, 1, 1, 1])
 
             feature_planes = tf.concat(
                 [
-                    tiled_turn, self.disks, empty, legal_moves, self.threats,
-                    tiled_constant_features
+                    tiled_turn,
+                    self.disks,
+                    empty,
+                    legal_moves,
+                    self.threats,
+                    tiled_constant_features,
                 ],
-                axis=1)
+                axis=1,
+            )
 
             if use_symmetry:
                 # Interleave horizontally flipped position
-                feature_planes_shape = [-1] + \
-                    feature_planes.shape.as_list()[1:]
+                feature_planes_shape = [-1] + feature_planes.shape.as_list()[1:]
                 flipped = tf.reverse(feature_planes, axis=[3])
                 feature_planes = tf.reshape(
-                    tf.stack([feature_planes, flipped], axis=1), feature_planes_shape)
+                    tf.stack([feature_planes, flipped], axis=1), feature_planes_shape
+                )
 
-        with tf.name_scope('conv_layers'):
+        with tf.name_scope("conv_layers"):
             if self.gpu_available():
-                data_format = 'channels_first'
+                data_format = "channels_first"
             else:
                 feature_planes = tf.transpose(feature_planes, [0, 2, 3, 1])
-                data_format = 'channels_last'
+                data_format = "channels_last"
 
             conv1 = tf.layers.conv2d(
                 feature_planes,
                 filters=32,
                 kernel_size=[4, 5],
-                padding='same',
+                padding="same",
                 data_format=data_format,
                 use_bias=False,
-                name='conv1')
+                name="conv1",
+            )
 
             conv2 = tf.layers.conv2d(
                 conv1,
                 filters=32,
                 kernel_size=[4, 5],
-                padding='same',
+                padding="same",
                 data_format=data_format,
                 activation=tf.nn.relu,
-                name='conv2')
+                name="conv2",
+            )
 
             conv3 = tf.layers.conv2d(
                 conv2,
                 filters=32,
                 kernel_size=[4, 5],
-                padding='same',
+                padding="same",
                 data_format=data_format,
                 activation=tf.nn.relu,
-                name='conv3')
+                name="conv3",
+            )
 
             final_conv = tf.layers.conv2d(
                 conv3,
                 filters=1,
                 kernel_size=[1, 1],
                 data_format=data_format,
-                name='final_conv')
-            disk_bias = tf.get_variable('disk_bias', shape=[TOTAL_DISKS])
+                name="final_conv",
+            )
+            disk_bias = tf.get_variable("disk_bias", shape=[TOTAL_DISKS])
             self.conv_output = tf.add(
-                tf.contrib.layers.flatten(final_conv), disk_bias, name='conv_output')
+                tf.contrib.layers.flatten(final_conv), disk_bias, name="conv_output"
+            )
 
             self.conv_layers = [conv1, conv2, conv3, self.conv_output]
 
     def gpu_available(self):
         devices = device_lib.list_local_devices()
-        return len([d for d in devices if d.device_type == 'GPU']) > 0
+        return len([d for d in devices if d.device_type == "GPU"]) > 0
 
     @property
     def variables(self):
         # Add '/' to stop network-1 containing network-10 variables
-        return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
-                                 self.scope + '/')
+        return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope + "/")
 
     def assign(self, other):
         return [
@@ -118,45 +132,56 @@ def __init__(self, scope, temperature=1.0, reuse=None, use_symmetry=False):
         with tf.variable_scope(scope, reuse=reuse):
             super(PolicyNetwork, self).__init__(scope, use_symmetry)
 
-            with tf.name_scope('policy'):
+            with tf.name_scope("policy"):
                 self.temperature = tf.placeholder_with_default(
-                    temperature, (), name='temperature')
+                    temperature, (), name="temperature"
+                )
 
                 disk_logits = tf.divide(
-                    self.conv_output, self.temperature, name='disk_logits')
+                    self.conv_output, self.temperature, name="disk_logits"
+                )
 
                 if use_symmetry:
                     # Calculate average of actual and horizontally flipped position
                     normal, flipped = tf.split(
                         tf.reshape(disk_logits, [-1, 2, HEIGHT, WIDTH]),
                         num_or_size_splits=2,
-                        axis=1)
+                        axis=1,
+                    )
                     disk_logits = tf.reshape(
                         tf.reduce_mean(
-                            tf.concat(
-                                [normal, tf.reverse(flipped, axis=[3])], axis=1),
-                            axis=1), [-1, TOTAL_DISKS])
+                            tf.concat([normal, tf.reverse(flipped, axis=[3])], axis=1),
+                            axis=1,
+                        ),
+                        [-1, TOTAL_DISKS],
+                    )
 
                 # Make illegal moves impossible:
                 #   - Legal moves have positive logits
                 #   - Illegal moves have -ILLEGAL_PENALTY logits
                 legal_moves = tf.contrib.layers.flatten(self.legal_moves)
-                legal_disk_logits = (tf.nn.relu(disk_logits) * legal_moves +
-                                     (legal_moves - 1) * ILLEGAL_PENALTY)
+                legal_disk_logits = (
+                    tf.nn.relu(disk_logits) * legal_moves
+                    + (legal_moves - 1) * ILLEGAL_PENALTY
+                )
 
-                self.policy = tf.nn.softmax(legal_disk_logits, name='policy')
+                self.policy = tf.nn.softmax(legal_disk_logits, name="policy")
                 self.sample_move = tf.squeeze(
                     tf.multinomial(legal_disk_logits, 1) % WIDTH,
                     axis=1,
-                    name='sample_move')
+                    name="sample_move",
+                )
 
                 self.entropy = tf.reduce_sum(
                     self.policy * -tf.log(self.policy + EPSILON),  # Avoid Nans
                     axis=1,
-                    name='entropy')
+                    name="entropy",
+                )
 
                 self.policy_layers = self.conv_layers + [
-                    disk_logits, self.policy, self.entropy
+                    disk_logits,
+                    self.policy,
+                    self.entropy,
                 ]
 
 
@@ -165,21 +190,22 @@ def __init__(self, scope, use_symmetry=False):
         with tf.variable_scope(scope):
             super(ValueNetwork, self).__init__(scope, use_symmetry)
 
-            with tf.name_scope('value'):
+            with tf.name_scope("value"):
                 fully_connected = tf.layers.dense(
                     self.conv_output,
                     units=64,
                     activation=tf.nn.relu,
-                    name='fully_connected')
+                    name="fully_connected",
+                )
 
                 value = tf.layers.dense(fully_connected, 1, tf.tanh)
 
                 if use_symmetry:
                     # Calculate average of actual and horizontally flipped position
                     self.value = tf.reduce_mean(
-                        tf.reshape(value, [-1, 2]), axis=1, name='value')
+                        tf.reshape(value, [-1, 2]), axis=1, name="value"
+                    )
                 else:
-                    self.value = tf.squeeze(value, axis=1, name='value')
+                    self.value = tf.squeeze(value, axis=1, name="value")
 
-                self.value_layers = self.conv_layers + \
-                    [fully_connected, self.value]
+                self.value_layers = self.conv_layers + [fully_connected, self.value]
diff --git a/policy_training.py b/policy_training.py
index f8f3a1c..d32bfa1 100644
--- a/policy_training.py
+++ b/policy_training.py
@@ -8,11 +8,11 @@
 import util
 
 flags = tf.app.flags
-flags.DEFINE_string('run_dir', None, 'Run directory')
-flags.DEFINE_integer('batch_size', 256, 'Batch size')
-flags.DEFINE_integer('batches', 10000, 'Number of batches')
-flags.DEFINE_float('entropy', 0.03, 'Entropy regularisation rate')
-flags.DEFINE_float('learning_rate', 0.001, 'Adam learning rate')
+flags.DEFINE_string("run_dir", None, "Run directory")
+flags.DEFINE_integer("batch_size", 256, "Batch size")
+flags.DEFINE_integer("batches", 10000, "Number of batches")
+flags.DEFINE_float("entropy", 0.03, "Entropy regularisation rate")
+flags.DEFINE_float("learning_rate", 0.001, "Adam learning rate")
 config = flags.FLAGS
 
 
@@ -21,41 +21,42 @@ def __init__(self, config):
         self.config = config
         self.run_dir = util.run_directory(config)
 
-        self.session = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(
-            allow_growth=True)))
+        self.session = tf.Session(
+            config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
+        )
 
-        self.policy_network = PolicyNetwork('policy')
+        self.policy_network = PolicyNetwork("policy")
         self.policy_player = PolicyPlayer(self.policy_network, self.session)
-        util.restore_or_initialize_network(self.session, self.run_dir,
-                                           self.policy_network)
+        util.restore_or_initialize_network(
+            self.session, self.run_dir, self.policy_network
+        )
 
         # Train ops
         self.create_train_op(self.policy_network)
         self.writer = tf.summary.FileWriter(self.run_dir)
-        util.restore_or_initialize_scope(self.session, self.run_dir,
-                                         self.training_scope.name)
+        util.restore_or_initialize_scope(
+            self.session, self.run_dir, self.training_scope.name
+        )
 
         self.opponents = Opponents(
-            [RandomPlayer(),
-             RandomThreatPlayer(),
-             MaxThreatPlayer()])
+            [RandomPlayer(), RandomThreatPlayer(), MaxThreatPlayer()]
+        )
         self.opponents.restore_networks(self.session, self.run_dir)
 
     def create_train_op(self, policy_network):
-        with tf.variable_scope('policy_training') as self.training_scope:
-            self.move = tf.placeholder(tf.int32, shape=[None], name='move')
-            self.result = tf.placeholder(
-                tf.float32, shape=[None], name='result')
+        with tf.variable_scope("policy_training") as self.training_scope:
+            self.move = tf.placeholder(tf.int32, shape=[None], name="move")
+            self.result = tf.placeholder(tf.float32, shape=[None], name="result")
 
             policy = tf.reshape(policy_network.policy, [-1, HEIGHT, WIDTH])
             move = tf.expand_dims(tf.one_hot(self.move, WIDTH), axis=1)
             turn = util.turn_win(policy_network.turn)
             move_probability = tf.reduce_sum(policy * move, axis=[1, 2])
 
-            result_loss = -tf.reduce_mean(
-                tf.log(move_probability) * turn * self.result)
-            entropy_regularisation = (
-                -config.entropy * tf.reduce_mean(policy_network.entropy))
+            result_loss = -tf.reduce_mean(tf.log(move_probability) * turn * self.result)
+            entropy_regularisation = -config.entropy * tf.reduce_mean(
+                policy_network.entropy
+            )
             loss = result_loss + entropy_regularisation
 
             optimizer = tf.train.AdamOptimizer(self.config.learning_rate)
@@ -63,7 +64,7 @@ def create_train_op(self, policy_network):
             self.train_op = optimizer.minimize(loss, self.global_step)
 
             # Summary
-            tf.summary.scalar('loss', loss)
+            tf.summary.scalar("loss", loss)
             for var in policy_network.variables + policy_network.policy_layers:
                 tf.summary.histogram(var.name, var)
             self.summary = tf.summary.merge_all()
@@ -77,7 +78,7 @@ def train(self):
 
             if self.opponents.all_beaten():
                 name = self.opponents.next_network_name()
-                print('All opponents beaten. Creating %s' % name)
+                print("All opponents beaten. Creating %s" % name)
                 self.create_new_opponent(name)
 
             if step % 100 == 0:
@@ -92,11 +93,10 @@ def save(self):
 
     def play_games(self, opponent):
         # Create games
-        games = incomplete_games = [Game()
-                                    for _ in range(self.config.batch_size)]
+        games = incomplete_games = [Game() for _ in range(self.config.batch_size)]
 
         # Let opponent play first in half of the games
-        self.play_move(games[0:len(games) // 2], opponent)
+        self.play_move(games[0 : len(games) // 2], opponent)
         player = self.policy_player
 
         while incomplete_games:
@@ -116,8 +116,15 @@ def play_move(self, games, player):
             game.move(move, player == self.policy_player)
 
     def train_games(self, opponent, games):
-        turn, disks, empty, legal_moves, threats, moves, results = ([], [], [], [],
-                                                                    [], [], [])
+        turn, disks, empty, legal_moves, threats, moves, results = (
+            [],
+            [],
+            [],
+            [],
+            [],
+            [],
+            [],
+        )
         for game in games:
             for position, move in game.policy_player_moves:
                 turn.append(position.turn)
@@ -129,39 +136,49 @@ def train_games(self, opponent, games):
                 results.append(game.result)
 
         _, step, summary = self.session.run(
-            [self.train_op, self.global_step, self.summary], {
+            [self.train_op, self.global_step, self.summary],
+            {
                 self.policy_network.turn: turn,
                 self.policy_network.disks: disks,
                 self.policy_network.empty: empty,
                 self.policy_network.legal_moves: legal_moves,
                 self.policy_network.threats: threats,
                 self.move: moves,
-                self.result: results
-            })
+                self.result: results,
+            },
+        )
 
         return step, summary
 
     def process_results(self, opponent, games, step, summary):
         win_rate = np.mean([game.policy_player_score for game in games])
-        average_moves = sum(len(game.moves)
-                            for game in games) / self.config.batch_size
+        average_moves = sum(len(game.moves) for game in games) / self.config.batch_size
 
         opponent_summary = tf.Summary()
         opponent_summary.value.add(
-            tag=self.training_scope.name + '/' + opponent.name + '/win_rate',
-            simple_value=win_rate)
+            tag=self.training_scope.name + "/" + opponent.name + "/win_rate",
+            simple_value=win_rate,
+        )
         opponent_summary.value.add(
-            tag=self.training_scope.name + '/' + opponent.name + '/moves',
-            simple_value=average_moves)
+            tag=self.training_scope.name + "/" + opponent.name + "/moves",
+            simple_value=average_moves,
+        )
 
         self.writer.add_summary(summary, step)
         self.writer.add_summary(opponent_summary, step)
 
         self.opponents.update_win_rate(opponent, win_rate)
 
-        print('Step %d. Opponent %s, win rate %.2f <%.2f>, %.2f moves' %
-              (step, opponent.name, win_rate, self.opponents.win_rates[opponent],
-               average_moves))
+        print(
+            "Step %d. Opponent %s, win rate %.2f <%.2f>, %.2f moves"
+            % (
+                step,
+                opponent.name,
+                win_rate,
+                self.opponents.win_rates[opponent],
+                average_moves,
+            )
+        )
 
     def create_new_opponent(self, name):
         # Create clone of policy_player
@@ -192,8 +209,7 @@ def decrease_win_rates(self):
 
     def update_win_rate(self, opponent, win_rate):
         # Win rate is a moving average
-        self.win_rates[opponent] = self.win_rates[opponent] * \
-            0.9 + win_rate * 0.1
+        self.win_rates[opponent] = self.win_rates[opponent] * 0.9 + win_rate * 0.1
 
     def all_beaten(self):
         result = True
@@ -204,34 +220,42 @@ def all_beaten(self):
     def choose_opponent(self):
         # More difficult opponents are chosen more often
         win_rates = np.maximum(list(self.win_rates.values()), 0.1)
-        probs = (1 / win_rates**2) - 1
+        probs = (1 / win_rates ** 2) - 1
         normalised_probs = probs / probs.sum()
         return np.random.choice(list(self.win_rates.keys()), p=normalised_probs)
 
     def next_network_name(self):
-        network_opponents = len([
-            opponent for opponent in self.win_rates.keys()
-            if type(opponent) == PolicyPlayer
-        ])
-        return 'network-%d' % (network_opponents + 1)
+        network_opponents = len(
+            [
+                opponent
+                for opponent in self.win_rates.keys()
+                if type(opponent) == PolicyPlayer
+            ]
+        )
+        return "network-%d" % (network_opponents + 1)
 
     def save_opponent_stats(self, run_dir):
-        with open(os.path.join(run_dir, 'opponents'), 'w') as f:
-            f.write('\n'.join([
-                opponent.name + ' ' + str(win_rate)
-                for opponent, win_rate in sorted(
-                    self.win_rates.items(), key=lambda x: x[1])
-            ]))
+        with open(os.path.join(run_dir, "opponents"), "w") as f:
+            f.write(
+                "\n".join(
+                    [
+                        opponent.name + " " + str(win_rate)
+                        for opponent, win_rate in sorted(
+                            self.win_rates.items(), key=lambda x: x[1]
+                        )
+                    ]
+                )
+            )
 
     def restore_networks(self, session, run_dir):
-        opponents_file = os.path.join(run_dir, 'opponents')
+        opponents_file = os.path.join(run_dir, "opponents")
         if os.path.exists(opponents_file):
             with open(opponents_file) as f:
                 for line in f.readlines():
                     opponent_name, win_rate_string = line.strip().split()
                     win_rate = float(win_rate_string)
-                    if opponent_name[:8] == 'network-':
-                        print('Restoring %s' % opponent_name)
+                    if opponent_name[:8] == "network-":
+                        print("Restoring %s" % opponent_name)
                         network = PolicyNetwork(opponent_name)
                         util.restore_network_or_fail(session, run_dir, network)
                         opponent = PolicyPlayer(network, session)
@@ -266,8 +290,7 @@ def move(self, move, policy_player_turn=False):
         self.positions.append(self.position)
         if self.position.gameover():
             self.result = self.position.result
-            self.policy_player_score = float(
-                policy_player_turn) if self.result else 0.5
+            self.policy_player_score = float(policy_player_turn) if self.result else 0.5
 
 
 def main(_):
@@ -275,5 +298,5 @@ def main(_):
     training.train()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     tf.app.run()
diff --git a/util.py b/util.py
index 19a4743..2055268 100644
--- a/util.py
+++ b/util.py
@@ -8,31 +8,30 @@
 def run_directory(config):
     def find_previous_run(dir):
         if os.path.isdir(dir):
-            runs = [child[4:]
-                    for child in os.listdir(dir) if child[:4] == 'run_']
+            runs = [child[4:] for child in os.listdir(dir) if child[:4] == "run_"]
             if runs:
                 return max(int(run) for run in runs)
 
         return 0
 
-    if config.run_dir == 'latest':
-        parent_dir = 'runs/'
+    if config.run_dir == "latest":
+        parent_dir = "runs/"
         previous_run = find_previous_run(parent_dir)
-        run_dir = parent_dir + ('run_%d' % previous_run)
+        run_dir = parent_dir + ("run_%d" % previous_run)
     elif config.run_dir:
         run_dir = config.run_dir
     else:
-        parent_dir = 'runs/'
+        parent_dir = "runs/"
         previous_run = find_previous_run(parent_dir)
-        run_dir = parent_dir + ('run_%d' % (previous_run + 1))
+        run_dir = parent_dir + ("run_%d" % (previous_run + 1))
 
-    if run_dir[-1] != '/':
-        run_dir += '/'
+    if run_dir[-1] != "/":
+        run_dir += "/"
 
     if not os.path.isdir(run_dir):
         os.makedirs(run_dir)
 
-    print('Checkpoint and summary directory is %s' % run_dir)
+    print("Checkpoint and summary directory is %s" % run_dir)
 
     return run_dir
 
@@ -43,14 +42,13 @@ def turn_win(turn):
 
 def restore_or_initialize_scope(session, run_dir, scope):
     variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope)
-    latest_checkpoint = tf.train.latest_checkpoint(run_dir,
-                                                   scope + '_checkpoint')
+    latest_checkpoint = tf.train.latest_checkpoint(run_dir, scope + "_checkpoint")
     if latest_checkpoint:
         tf.train.Saver(variables).restore(session, latest_checkpoint)
-        print('Restored %s scope from %s' % (scope, latest_checkpoint))
+        print("Restored %s scope from %s" % (scope, latest_checkpoint))
     else:
         session.run(tf.variables_initializer(variables))
-        print('Initialized %s scope' % scope)
+        print("Initialized %s scope" % scope)
 
 
 def save_scope(session, run_dir, scope):
@@ -58,37 +56,40 @@ def save_scope(session, run_dir, scope):
     variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope)
     tf.train.Saver(variables).save(
         session,
-        os.path.join(run_dir, scope + '.ckpt'),
-        latest_filename=scope + '_checkpoint')
+        os.path.join(run_dir, scope + ".ckpt"),
+        latest_filename=scope + "_checkpoint",
+    )
 
 
 def restore_or_initialize_network(session, run_dir, network):
-    latest_checkpoint = tf.train.latest_checkpoint(run_dir,
-                                                   network.scope + '_checkpoint')
+    latest_checkpoint = tf.train.latest_checkpoint(
+        run_dir, network.scope + "_checkpoint"
+    )
     if latest_checkpoint:
         tf.train.Saver(network.variables).restore(session, latest_checkpoint)
-        print('Restored %s network from %s' %
-              (network.scope, latest_checkpoint))
+        print("Restored %s network from %s" % (network.scope, latest_checkpoint))
     else:
         session.run(tf.variables_initializer(network.variables))
-        print('Initialized %s network' % network.scope)
+        print("Initialized %s network" % network.scope)
 
 
 def restore_network_or_fail(session, run_dir, network):
-    latest_checkpoint = tf.train.latest_checkpoint(run_dir,
-                                                   network.scope + '_checkpoint')
+    latest_checkpoint = tf.train.latest_checkpoint(
+        run_dir, network.scope + "_checkpoint"
+    )
     if latest_checkpoint:
         tf.train.Saver(network.variables).restore(session, latest_checkpoint)
-        print('Restored %s network from %s' %
-              (network.scope, latest_checkpoint))
+        print("Restored %s network from %s" % (network.scope, latest_checkpoint))
     else:
-        raise Exception('Network checkpoint %s not found in %s' %
-                        (network.scope, run_dir))
+        raise Exception(
+            "Network checkpoint %s not found in %s" % (network.scope, run_dir)
+        )
 
 
 def save_network(session, run_dir, network):
     os.makedirs(run_dir, exist_ok=True)
     tf.train.Saver(network.variables).save(
         session,
-        os.path.join(run_dir, network.scope + '.ckpt'),
-        latest_filename=network.scope + '_checkpoint')
+        os.path.join(run_dir, network.scope + ".ckpt"),
+        latest_filename=network.scope + "_checkpoint",
+    )

From e7ada8e216f3ff1f4ea1ecc42601c0f9fb112c1e Mon Sep 17 00:00:00 2001
From: "Restyled.io" <commits@restyled.io>
Date: Thu, 19 Mar 2020 20:18:14 +0000
Subject: [PATCH 3/4] Restyled by reorder-python-imports

---
 network.py         |  3 ++-
 policy_training.py | 10 ++++++----
 util.py            |  5 +++--
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/network.py b/network.py
index 02bec19..c027ad6 100644
--- a/network.py
+++ b/network.py
@@ -1,7 +1,8 @@
-from consts import *
 import tensorflow as tf
 from tensorflow.python.client import device_lib
+
 import util
+from consts import *
 
 
 class BaseNetwork(object):
diff --git a/policy_training.py b/policy_training.py
index d32bfa1..0e7967b 100644
--- a/policy_training.py
+++ b/policy_training.py
@@ -1,11 +1,13 @@
+import os
+
+import numpy as np
+import tensorflow as tf
+
+import util
 from consts import *
 from network import PolicyNetwork
-import numpy as np
-import os
 from players import *
 from position import Position
-import tensorflow as tf
-import util
 
 flags = tf.app.flags
 flags.DEFINE_string("run_dir", None, "Run directory")
diff --git a/util.py b/util.py
index 2055268..5799272 100644
--- a/util.py
+++ b/util.py
@@ -1,8 +1,9 @@
-from datetime import datetime
 import os
-import tensorflow as tf
 import threading
 import time
+from datetime import datetime
+
+import tensorflow as tf
 
 
 def run_directory(config):

From 1c0d91f3ca50651982aa4be9186120523ac16692 Mon Sep 17 00:00:00 2001
From: "Restyled.io" <commits@restyled.io>
Date: Thu, 19 Mar 2020 20:18:17 +0000
Subject: [PATCH 4/4] Restyled by yapf

---
 consts.py          |  5 ++-
 network.py         | 72 ++++++++++++++++++----------------
 policy_training.py | 96 ++++++++++++++++++++++------------------------
 util.py            | 24 ++++++------
 4 files changed, 100 insertions(+), 97 deletions(-)

diff --git a/consts.py b/consts.py
index 4b6b9cd..54a5522 100644
--- a/consts.py
+++ b/consts.py
@@ -15,7 +15,8 @@
 TILED_COLUMNS = np.arange(TOTAL_DISKS) % WIDTH
 
 ROW_EDGE_DISTANCE = np.min([TILED_ROWS, np.flip(TILED_ROWS, axis=0)], axis=0)
-COLUMN_EDGE_DISTANCE = np.min([TILED_COLUMNS, np.flip(TILED_COLUMNS, axis=0)], axis=0)
+COLUMN_EDGE_DISTANCE = np.min(
+    [TILED_COLUMNS, np.flip(TILED_COLUMNS, axis=0)], axis=0)
 ODDS = TILED_ROWS % 2
 
 FOURS = []
@@ -69,7 +70,7 @@
 for colour in range(COLOURS):
     for row in range(HEIGHT):
         disks_in_column = row ^ (row + 1)
-        yellow_disks = 2 ** (row + 3) if colour == YELLOW else 0
+        yellow_disks = 2**(row + 3) if colour == YELLOW else 0
         row_hash = disks_in_column | yellow_disks
         for column in range(WIDTH):
             row_column_hash = row_hash << (9 * column)
diff --git a/network.py b/network.py
index c027ad6..7f2e1e4 100644
--- a/network.py
+++ b/network.py
@@ -16,30 +16,31 @@ def __init__(self, scope, use_symmetry):
                 [1, 2, HEIGHT, WIDTH],
             )
 
-            self.disks = tf.placeholder(
-                tf.float32, shape=[None, 2, HEIGHT, WIDTH], name="disks"
-            )
+            self.disks = tf.placeholder(tf.float32,
+                                        shape=[None, 2, HEIGHT, WIDTH],
+                                        name="disks")
 
-            self.empty = tf.placeholder(
-                tf.float32, shape=[None, HEIGHT, WIDTH], name="empty"
-            )
+            self.empty = tf.placeholder(tf.float32,
+                                        shape=[None, HEIGHT, WIDTH],
+                                        name="empty")
             empty = tf.expand_dims(self.empty, axis=1)
 
-            self.legal_moves = tf.placeholder(
-                tf.float32, shape=[None, HEIGHT, WIDTH], name="legal_moves"
-            )
+            self.legal_moves = tf.placeholder(tf.float32,
+                                              shape=[None, HEIGHT, WIDTH],
+                                              name="legal_moves")
             legal_moves = tf.expand_dims(self.legal_moves, axis=1)
 
-            self.threats = tf.placeholder(
-                tf.float32, shape=[None, 2, HEIGHT, WIDTH], name="threats"
-            )
+            self.threats = tf.placeholder(tf.float32,
+                                          shape=[None, 2, HEIGHT, WIDTH],
+                                          name="threats")
 
             constant_features = np.array(
                 [TILED_ROWS, ODDS, ROW_EDGE_DISTANCE, COLUMN_EDGE_DISTANCE],
                 dtype=np.float32,
             ).reshape([1, 4, HEIGHT, WIDTH])
             batch_size = tf.shape(self.turn)[0]
-            tiled_constant_features = tf.tile(constant_features, [batch_size, 1, 1, 1])
+            tiled_constant_features = tf.tile(constant_features,
+                                              [batch_size, 1, 1, 1])
 
             feature_planes = tf.concat(
                 [
@@ -55,11 +56,12 @@ def __init__(self, scope, use_symmetry):
 
             if use_symmetry:
                 # Interleave horizontally flipped position
-                feature_planes_shape = [-1] + feature_planes.shape.as_list()[1:]
+                feature_planes_shape = [-1
+                                        ] + feature_planes.shape.as_list()[1:]
                 flipped = tf.reverse(feature_planes, axis=[3])
                 feature_planes = tf.reshape(
-                    tf.stack([feature_planes, flipped], axis=1), feature_planes_shape
-                )
+                    tf.stack([feature_planes, flipped], axis=1),
+                    feature_planes_shape)
 
         with tf.name_scope("conv_layers"):
             if self.gpu_available():
@@ -106,9 +108,9 @@ def __init__(self, scope, use_symmetry):
                 name="final_conv",
             )
             disk_bias = tf.get_variable("disk_bias", shape=[TOTAL_DISKS])
-            self.conv_output = tf.add(
-                tf.contrib.layers.flatten(final_conv), disk_bias, name="conv_output"
-            )
+            self.conv_output = tf.add(tf.contrib.layers.flatten(final_conv),
+                                      disk_bias,
+                                      name="conv_output")
 
             self.conv_layers = [conv1, conv2, conv3, self.conv_output]
 
@@ -119,7 +121,8 @@ def gpu_available(self):
     @property
     def variables(self):
         # Add '/' to stop network-1 containing network-10 variables
-        return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope + "/")
+        return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
+                                 self.scope + "/")
 
     def assign(self, other):
         return [
@@ -135,12 +138,11 @@ def __init__(self, scope, temperature=1.0, reuse=None, use_symmetry=False):
 
             with tf.name_scope("policy"):
                 self.temperature = tf.placeholder_with_default(
-                    temperature, (), name="temperature"
-                )
+                    temperature, (), name="temperature")
 
-                disk_logits = tf.divide(
-                    self.conv_output, self.temperature, name="disk_logits"
-                )
+                disk_logits = tf.divide(self.conv_output,
+                                        self.temperature,
+                                        name="disk_logits")
 
                 if use_symmetry:
                     # Calculate average of actual and horizontally flipped position
@@ -151,7 +153,9 @@ def __init__(self, scope, temperature=1.0, reuse=None, use_symmetry=False):
                     )
                     disk_logits = tf.reshape(
                         tf.reduce_mean(
-                            tf.concat([normal, tf.reverse(flipped, axis=[3])], axis=1),
+                            tf.concat(
+                                [normal, tf.reverse(flipped, axis=[3])],
+                                axis=1),
                             axis=1,
                         ),
                         [-1, TOTAL_DISKS],
@@ -161,10 +165,8 @@ def __init__(self, scope, temperature=1.0, reuse=None, use_symmetry=False):
                 #   - Legal moves have positive logits
                 #   - Illegal moves have -ILLEGAL_PENALTY logits
                 legal_moves = tf.contrib.layers.flatten(self.legal_moves)
-                legal_disk_logits = (
-                    tf.nn.relu(disk_logits) * legal_moves
-                    + (legal_moves - 1) * ILLEGAL_PENALTY
-                )
+                legal_disk_logits = (tf.nn.relu(disk_logits) * legal_moves +
+                                     (legal_moves - 1) * ILLEGAL_PENALTY)
 
                 self.policy = tf.nn.softmax(legal_disk_logits, name="policy")
                 self.sample_move = tf.squeeze(
@@ -203,10 +205,12 @@ def __init__(self, scope, use_symmetry=False):
 
                 if use_symmetry:
                     # Calculate average of actual and horizontally flipped position
-                    self.value = tf.reduce_mean(
-                        tf.reshape(value, [-1, 2]), axis=1, name="value"
-                    )
+                    self.value = tf.reduce_mean(tf.reshape(value, [-1, 2]),
+                                                axis=1,
+                                                name="value")
                 else:
                     self.value = tf.squeeze(value, axis=1, name="value")
 
-                self.value_layers = self.conv_layers + [fully_connected, self.value]
+                self.value_layers = self.conv_layers + [
+                    fully_connected, self.value
+                ]
diff --git a/policy_training.py b/policy_training.py
index 0e7967b..6f5b795 100644
--- a/policy_training.py
+++ b/policy_training.py
@@ -23,42 +23,42 @@ def __init__(self, config):
         self.config = config
         self.run_dir = util.run_directory(config)
 
-        self.session = tf.Session(
-            config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
-        )
+        self.session = tf.Session(config=tf.ConfigProto(
+            gpu_options=tf.GPUOptions(allow_growth=True)))
 
         self.policy_network = PolicyNetwork("policy")
         self.policy_player = PolicyPlayer(self.policy_network, self.session)
-        util.restore_or_initialize_network(
-            self.session, self.run_dir, self.policy_network
-        )
+        util.restore_or_initialize_network(self.session, self.run_dir,
+                                           self.policy_network)
 
         # Train ops
         self.create_train_op(self.policy_network)
         self.writer = tf.summary.FileWriter(self.run_dir)
-        util.restore_or_initialize_scope(
-            self.session, self.run_dir, self.training_scope.name
-        )
+        util.restore_or_initialize_scope(self.session, self.run_dir,
+                                         self.training_scope.name)
 
         self.opponents = Opponents(
-            [RandomPlayer(), RandomThreatPlayer(), MaxThreatPlayer()]
-        )
+            [RandomPlayer(),
+             RandomThreatPlayer(),
+             MaxThreatPlayer()])
         self.opponents.restore_networks(self.session, self.run_dir)
 
     def create_train_op(self, policy_network):
         with tf.variable_scope("policy_training") as self.training_scope:
             self.move = tf.placeholder(tf.int32, shape=[None], name="move")
-            self.result = tf.placeholder(tf.float32, shape=[None], name="result")
+            self.result = tf.placeholder(tf.float32,
+                                         shape=[None],
+                                         name="result")
 
             policy = tf.reshape(policy_network.policy, [-1, HEIGHT, WIDTH])
             move = tf.expand_dims(tf.one_hot(self.move, WIDTH), axis=1)
             turn = util.turn_win(policy_network.turn)
             move_probability = tf.reduce_sum(policy * move, axis=[1, 2])
 
-            result_loss = -tf.reduce_mean(tf.log(move_probability) * turn * self.result)
+            result_loss = -tf.reduce_mean(
+                tf.log(move_probability) * turn * self.result)
             entropy_regularisation = -config.entropy * tf.reduce_mean(
-                policy_network.entropy
-            )
+                policy_network.entropy)
             loss = result_loss + entropy_regularisation
 
             optimizer = tf.train.AdamOptimizer(self.config.learning_rate)
@@ -95,17 +95,20 @@ def save(self):
 
     def play_games(self, opponent):
         # Create games
-        games = incomplete_games = [Game() for _ in range(self.config.batch_size)]
+        games = incomplete_games = [
+            Game() for _ in range(self.config.batch_size)
+        ]
 
         # Let opponent play first in half of the games
-        self.play_move(games[0 : len(games) // 2], opponent)
+        self.play_move(games[0:len(games) // 2], opponent)
         player = self.policy_player
 
         while incomplete_games:
             self.play_move(incomplete_games, player)
             player = self.policy_player if player != self.policy_player else opponent
             incomplete_games = [
-                game for game in incomplete_games if not game.position.gameover()
+                game for game in incomplete_games
+                if not game.position.gameover()
             ]
 
         return games
@@ -154,7 +157,8 @@ def train_games(self, opponent, games):
 
     def process_results(self, opponent, games, step, summary):
         win_rate = np.mean([game.policy_player_score for game in games])
-        average_moves = sum(len(game.moves) for game in games) / self.config.batch_size
+        average_moves = sum(len(game.moves)
+                            for game in games) / self.config.batch_size
 
         opponent_summary = tf.Summary()
         opponent_summary.value.add(
@@ -171,16 +175,13 @@ def process_results(self, opponent, games, step, summary):
 
         self.opponents.update_win_rate(opponent, win_rate)
 
-        print(
-            "Step %d. Opponent %s, win rate %.2f <%.2f>, %.2f moves"
-            % (
-                step,
-                opponent.name,
-                win_rate,
-                self.opponents.win_rates[opponent],
-                average_moves,
-            )
-        )
+        print("Step %d. Opponent %s, win rate %.2f <%.2f>, %.2f moves" % (
+            step,
+            opponent.name,
+            win_rate,
+            self.opponents.win_rates[opponent],
+            average_moves,
+        ))
 
     def create_new_opponent(self, name):
         # Create clone of policy_player
@@ -211,7 +212,8 @@ def decrease_win_rates(self):
 
     def update_win_rate(self, opponent, win_rate):
         # Win rate is a moving average
-        self.win_rates[opponent] = self.win_rates[opponent] * 0.9 + win_rate * 0.1
+        self.win_rates[
+            opponent] = self.win_rates[opponent] * 0.9 + win_rate * 0.1
 
     def all_beaten(self):
         result = True
@@ -222,32 +224,25 @@ def all_beaten(self):
     def choose_opponent(self):
         # More difficult opponents are chosen more often
         win_rates = np.maximum(list(self.win_rates.values()), 0.1)
-        probs = (1 / win_rates ** 2) - 1
+        probs = (1 / win_rates**2) - 1
         normalised_probs = probs / probs.sum()
-        return np.random.choice(list(self.win_rates.keys()), p=normalised_probs)
+        return np.random.choice(list(self.win_rates.keys()),
+                                p=normalised_probs)
 
     def next_network_name(self):
-        network_opponents = len(
-            [
-                opponent
-                for opponent in self.win_rates.keys()
-                if type(opponent) == PolicyPlayer
-            ]
-        )
+        network_opponents = len([
+            opponent for opponent in self.win_rates.keys()
+            if type(opponent) == PolicyPlayer
+        ])
         return "network-%d" % (network_opponents + 1)
 
     def save_opponent_stats(self, run_dir):
         with open(os.path.join(run_dir, "opponents"), "w") as f:
-            f.write(
-                "\n".join(
-                    [
-                        opponent.name + " " + str(win_rate)
-                        for opponent, win_rate in sorted(
-                            self.win_rates.items(), key=lambda x: x[1]
-                        )
-                    ]
-                )
-            )
+            f.write("\n".join([
+                opponent.name + " " + str(win_rate)
+                for opponent, win_rate in sorted(self.win_rates.items(),
+                                                 key=lambda x: x[1])
+            ]))
 
     def restore_networks(self, session, run_dir):
         opponents_file = os.path.join(run_dir, "opponents")
@@ -292,7 +287,8 @@ def move(self, move, policy_player_turn=False):
         self.positions.append(self.position)
         if self.position.gameover():
             self.result = self.position.result
-            self.policy_player_score = float(policy_player_turn) if self.result else 0.5
+            self.policy_player_score = float(
+                policy_player_turn) if self.result else 0.5
 
 
 def main(_):
diff --git a/util.py b/util.py
index 5799272..da4487d 100644
--- a/util.py
+++ b/util.py
@@ -9,7 +9,9 @@
 def run_directory(config):
     def find_previous_run(dir):
         if os.path.isdir(dir):
-            runs = [child[4:] for child in os.listdir(dir) if child[:4] == "run_"]
+            runs = [
+                child[4:] for child in os.listdir(dir) if child[:4] == "run_"
+            ]
             if runs:
                 return max(int(run) for run in runs)
 
@@ -43,7 +45,8 @@ def turn_win(turn):
 
 def restore_or_initialize_scope(session, run_dir, scope):
     variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope)
-    latest_checkpoint = tf.train.latest_checkpoint(run_dir, scope + "_checkpoint")
+    latest_checkpoint = tf.train.latest_checkpoint(run_dir,
+                                                   scope + "_checkpoint")
     if latest_checkpoint:
         tf.train.Saver(variables).restore(session, latest_checkpoint)
         print("Restored %s scope from %s" % (scope, latest_checkpoint))
@@ -64,11 +67,11 @@ def save_scope(session, run_dir, scope):
 
 def restore_or_initialize_network(session, run_dir, network):
     latest_checkpoint = tf.train.latest_checkpoint(
-        run_dir, network.scope + "_checkpoint"
-    )
+        run_dir, network.scope + "_checkpoint")
     if latest_checkpoint:
         tf.train.Saver(network.variables).restore(session, latest_checkpoint)
-        print("Restored %s network from %s" % (network.scope, latest_checkpoint))
+        print("Restored %s network from %s" %
+              (network.scope, latest_checkpoint))
     else:
         session.run(tf.variables_initializer(network.variables))
         print("Initialized %s network" % network.scope)
@@ -76,15 +79,14 @@ def restore_or_initialize_network(session, run_dir, network):
 
 def restore_network_or_fail(session, run_dir, network):
     latest_checkpoint = tf.train.latest_checkpoint(
-        run_dir, network.scope + "_checkpoint"
-    )
+        run_dir, network.scope + "_checkpoint")
     if latest_checkpoint:
         tf.train.Saver(network.variables).restore(session, latest_checkpoint)
-        print("Restored %s network from %s" % (network.scope, latest_checkpoint))
+        print("Restored %s network from %s" %
+              (network.scope, latest_checkpoint))
     else:
-        raise Exception(
-            "Network checkpoint %s not found in %s" % (network.scope, run_dir)
-        )
+        raise Exception("Network checkpoint %s not found in %s" %
+                        (network.scope, run_dir))
 
 
 def save_network(session, run_dir, network):