From 6e51a76d32e587fb320aadfa57bb5a8710207511 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Fri, 15 Aug 2014 21:51:18 +0930
Subject: [PATCH 01/33] Evaluate empty, single, newlines.

Still need to handle mismatches, and re-syncing streams.
---
 evaluate.py                | 72 ++++++++++++++++++++++++++++++++++++++
 unit_test/evaluate_test.py | 63 +++++++++++++++++++++++++++++++++
 2 files changed, 135 insertions(+)
 create mode 100644 evaluate.py
 create mode 100644 unit_test/evaluate_test.py

diff --git a/evaluate.py b/evaluate.py
new file mode 100644
index 0000000..c28746d
--- /dev/null
+++ b/evaluate.py
@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+
+"""
+Module: evaluate.py
+Desc: Evaluate the ocr esults against the expected output and provide metrics on failures
+Author: Barrie Treloar
+Email: baerrach@gmail.com
+DATE: 13th Aug 2014
+
+  TODO
+"""
+
+import collections;
+
+class Evaluation:
+  def __init__(self):
+    self.success = None;
+    self.count = 0;
+    self.failures = collections.defaultdict(list);
+
+_newline = u"NL";
+
+def _read(stream):
+  """
+  Read a single unicode character from a stream and ignore windows \r characters by reading the next character.
+  \n is rewritten as NL so that mismatches are printable characters.
+  """
+  char = stream.read(1);
+  while u"\r" == char:
+    char = stream.read(1);
+
+  if u"\n" == char:
+    char = _newline;
+
+  return char;
+
+def _isnewline(char):
+  return _newline == char;
+
+def evaluate(actual, expected):
+  """
+  Evaluate the actual ocr results against the expected results and provide metrics on failures.
+
+  :param actual: io.TextIOBase of the actual ocr results
+  :param expected: io.TextIOBase of the expected ocr results
+  """
+  result = Evaluation();
+  line = 1;
+  column = 0;
+  while True:
+    expected_char = _read(expected);
+    actual_char = _read(actual);
+    if expected_char == "" or actual_char == "":
+      if result.success == None:
+        result.success = True;
+      break;
+
+    if _isnewline(expected_char) and _isnewline(actual_char):
+      line += 1;
+      column = 0;
+    else:
+      result.count += 1;
+      column += 1;
+
+    if expected_char != actual_char:
+      result.success = False;
+      result.failures[expected_char].append({ "actual" : actual_char, "line" : line, "position" : column});
+
+
+  return result;
+
+
diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
new file mode 100644
index 0000000..d0f0c00
--- /dev/null
+++ b/unit_test/evaluate_test.py
@@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+
+import io;
+import evaluate;
+
+class TestEvaluate:
+
+  def test_empty(self):
+    actual = io.StringIO();
+    expected = io.StringIO();
+    result = evaluate.evaluate(actual, expected);
+    assert result.success;
+    assert result.count == 0;
+    assert result.failures == {};
+
+  def test_onecharacter(self):
+    actual = io.StringIO(u"し",);
+    expected = io.StringIO(u"し");
+    result = evaluate.evaluate(actual, expected);
+    assert result.success;
+    assert result.count == 1;
+    assert result.failures == {};
+
+  def test_onecharacter_does_not_match(self):
+    actual = io.StringIO(u"あ");
+    expected = io.StringIO(u"し");
+    result = evaluate.evaluate(actual, expected);
+    assert result.success == False;
+    assert result.count == 1;
+    assert result.failures == { u"し" : [{ "actual" : u"あ", "line" : 1, "position" : 1}] };
+
+  def test_endofline_unix_doesnot_increase_count(self):
+    actual = io.StringIO(u"\n");
+    expected = io.StringIO(u"\n");
+    result = evaluate.evaluate(actual, expected);
+    assert result.success;
+    assert result.count == 0;
+    assert result.failures == {};
+
+  def test_endofline_windows_doesnot_increase_count(self):
+    actual = io.StringIO(u"\r\n");
+    expected = io.StringIO(u"\r\n");
+    result = evaluate.evaluate(actual, expected);
+    assert result.success;
+    assert result.count == 0;
+    assert result.failures == {};
+
+  def test_endofline_mixed_unix_and_windows_doesnot_increase_count(self):
+    actual = io.StringIO(u"\n");
+    expected = io.StringIO(u"\r\n");
+    result = evaluate.evaluate(actual, expected);
+    assert result.success;
+    assert result.count == 0;
+    assert result.failures == {};
+
+  def test_line_reported_in_failures(self):
+    actual = io.StringIO(u"\r\nあ");
+    expected = io.StringIO(u"\r\nし");
+    result = evaluate.evaluate(actual, expected);
+    assert result.success == False;
+    assert result.count == 1;
+    assert result.failures == { u"し" : [{ "actual" : u"あ", "line" : 2, "position" : 1}] };
+

From 8d449ebee8f8dc3ad518ba6cbd3de9774220e217 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Sat, 16 Aug 2014 23:40:38 +0930
Subject: [PATCH 02/33] Handle EOF mismatch

---
 evaluate.py                | 75 +++++++++++++++++++++++++-------------
 unit_test/evaluate_test.py | 20 +++++++++-
 2 files changed, 67 insertions(+), 28 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index c28746d..7cce75e 100644
--- a/evaluate.py
+++ b/evaluate.py
@@ -18,24 +18,50 @@ def __init__(self):
     self.count = 0;
     self.failures = collections.defaultdict(list);
 
-_newline = u"NL";
-
-def _read(stream):
+class EvaluationStream():
   """
-  Read a single unicode character from a stream and ignore windows \r characters by reading the next character.
-  \n is rewritten as NL so that mismatches are printable characters.
+  Wrap an io.TextIOBase to provide Evaluation support.
+
+  :param stream: io.TextIOBase of the actual ocr results
   """
-  char = stream.read(1);
-  while u"\r" == char:
-    char = stream.read(1);
+  def __init__(self, stream):
+    self._stream = stream;
+    self._newline = u"NL";
+    self._eof = u"EOF";
+    self._line = 1;
+    self._position = 0;
+    self.count = 0;
+
+  def read(self):
+    """
+    As per io.TextIOBase.read(1), but also ignore windows \r characters by reading the next character.
+    \n is rewritten as NL so that mismatches are printable characters.
+    end of file is rewritten as EOF for printability.
+    """
+    char = self._stream.read(1);
+    while u"\r" == char:
+      char = self._stream.read(1);
+
+    if u"" == char:
+      char = self._eof;
+    elif u"\n" == char:
+      char = self._newline;
+      self._line += 1;
+      self._position = 0;
+    else:
+      self._position += 1;
+      self.count += 1;
 
-  if u"\n" == char:
-    char = _newline;
+    return char;
 
-  return char;
+  def isnewline(self, char):
+    return self._newline == char;
 
-def _isnewline(char):
-  return _newline == char;
+  def iseof(self, char):
+    return self._eof == char;
+
+  def location(self):
+    return "{0:d}:{1:d}".format(self._line, self._position);
 
 def evaluate(actual, expected):
   """
@@ -45,28 +71,25 @@ def evaluate(actual, expected):
   :param expected: io.TextIOBase of the expected ocr results
   """
   result = Evaluation();
-  line = 1;
-  column = 0;
+  actual = EvaluationStream(actual);
+  expected = EvaluationStream(expected);
   while True:
-    expected_char = _read(expected);
-    actual_char = _read(actual);
-    if expected_char == "" or actual_char == "":
+    expected_char = expected.read();
+    actual_char = actual.read();
+    if expected.iseof(expected_char) and actual.iseof(actual_char):
       if result.success == None:
         result.success = True;
       break;
 
-    if _isnewline(expected_char) and _isnewline(actual_char):
-      line += 1;
-      column = 0;
-    else:
-      result.count += 1;
-      column += 1;
-
     if expected_char != actual_char:
       result.success = False;
-      result.failures[expected_char].append({ "actual" : actual_char, "line" : line, "position" : column});
+      result.failures[expected_char].append({ "actual" : actual_char, "actual_position" : actual.location(), "expected_position" : expected.location()});
 
+    if expected.iseof(expected_char):
+      result.success = False;
+      break;
 
+  result.count = expected.count;
   return result;
 
 
diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index d0f0c00..5853b73 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -27,7 +27,7 @@ def test_onecharacter_does_not_match(self):
     result = evaluate.evaluate(actual, expected);
     assert result.success == False;
     assert result.count == 1;
-    assert result.failures == { u"し" : [{ "actual" : u"あ", "line" : 1, "position" : 1}] };
+    assert result.failures == { u"し" : [{ "actual" : u"あ", "actual_position": "1:1", "expected_position": "1:1"}] };
 
   def test_endofline_unix_doesnot_increase_count(self):
     actual = io.StringIO(u"\n");
@@ -59,5 +59,21 @@ def test_line_reported_in_failures(self):
     result = evaluate.evaluate(actual, expected);
     assert result.success == False;
     assert result.count == 1;
-    assert result.failures == { u"し" : [{ "actual" : u"あ", "line" : 2, "position" : 1}] };
+    assert result.failures == { u"し" : [{ "actual" : u"あ", "actual_position": "2:1", "expected_position": "2:1"}] };
+
+  def test_endoffile_mismatch_more_in_actual(self):
+    actual = io.StringIO(u"あ\r\nし");
+    expected = io.StringIO(u"あ\r\n");
+    result = evaluate.evaluate(actual, expected);
+    assert result.success == False;
+    assert result.count == 1;
+    assert result.failures == { u"EOF" : [{ "actual" : u"し", "actual_position": "2:1", "expected_position": "2:0"}] };
+
+  def test_endoffile_mismatch_more_in_expected(self):
+    actual = io.StringIO(u"あ\r\n");
+    expected = io.StringIO(u"あ\r\nし");
+    result = evaluate.evaluate(actual, expected);
+    assert result.success == False;
+    assert result.count == 2;
+    assert result.failures == { u"し" : [{ "actual" : u"EOF", "actual_position": "2:0", "expected_position": "2:1"}] };
 

From fa44facbbd2b3d95758fe87dfa42313a6e304ab1 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Sat, 16 Aug 2014 23:42:29 +0930
Subject: [PATCH 03/33] Rename failure's *_position to _location

---
 evaluate.py                | 2 +-
 unit_test/evaluate_test.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index 7cce75e..7a8758e 100644
--- a/evaluate.py
+++ b/evaluate.py
@@ -83,7 +83,7 @@ def evaluate(actual, expected):
 
     if expected_char != actual_char:
       result.success = False;
-      result.failures[expected_char].append({ "actual" : actual_char, "actual_position" : actual.location(), "expected_position" : expected.location()});
+      result.failures[expected_char].append({ "actual" : actual_char, "actual_location" : actual.location(), "expected_location" : expected.location()});
 
     if expected.iseof(expected_char):
       result.success = False;
diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index 5853b73..a892bcb 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -27,7 +27,7 @@ def test_onecharacter_does_not_match(self):
     result = evaluate.evaluate(actual, expected);
     assert result.success == False;
     assert result.count == 1;
-    assert result.failures == { u"し" : [{ "actual" : u"あ", "actual_position": "1:1", "expected_position": "1:1"}] };
+    assert result.failures == { u"し" : [{ "actual" : u"あ", "actual_location": "1:1", "expected_location": "1:1"}] };
 
   def test_endofline_unix_doesnot_increase_count(self):
     actual = io.StringIO(u"\n");
@@ -59,7 +59,7 @@ def test_line_reported_in_failures(self):
     result = evaluate.evaluate(actual, expected);
     assert result.success == False;
     assert result.count == 1;
-    assert result.failures == { u"し" : [{ "actual" : u"あ", "actual_position": "2:1", "expected_position": "2:1"}] };
+    assert result.failures == { u"し" : [{ "actual" : u"あ", "actual_location": "2:1", "expected_location": "2:1"}] };
 
   def test_endoffile_mismatch_more_in_actual(self):
     actual = io.StringIO(u"あ\r\nし");
@@ -67,7 +67,7 @@ def test_endoffile_mismatch_more_in_actual(self):
     result = evaluate.evaluate(actual, expected);
     assert result.success == False;
     assert result.count == 1;
-    assert result.failures == { u"EOF" : [{ "actual" : u"し", "actual_position": "2:1", "expected_position": "2:0"}] };
+    assert result.failures == { u"EOF" : [{ "actual" : u"し", "actual_location": "2:1", "expected_location": "2:0"}] };
 
   def test_endoffile_mismatch_more_in_expected(self):
     actual = io.StringIO(u"あ\r\n");
@@ -75,5 +75,5 @@ def test_endoffile_mismatch_more_in_expected(self):
     result = evaluate.evaluate(actual, expected);
     assert result.success == False;
     assert result.count == 2;
-    assert result.failures == { u"し" : [{ "actual" : u"EOF", "actual_position": "2:0", "expected_position": "2:1"}] };
+    assert result.failures == { u"し" : [{ "actual" : u"EOF", "actual_location": "2:0", "expected_location": "2:1"}] };
 

From 9d20ea22c25669c8ee676258d092219d7379aa8d Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Sun, 17 Aug 2014 20:55:04 +0930
Subject: [PATCH 04/33] Refactor test names

---
 unit_test/evaluate_test.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index a892bcb..af496f8 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -13,7 +13,7 @@ def test_empty(self):
     assert result.count == 0;
     assert result.failures == {};
 
-  def test_onecharacter(self):
+  def test_one_character(self):
     actual = io.StringIO(u"し",);
     expected = io.StringIO(u"し");
     result = evaluate.evaluate(actual, expected);
@@ -21,7 +21,7 @@ def test_onecharacter(self):
     assert result.count == 1;
     assert result.failures == {};
 
-  def test_onecharacter_does_not_match(self):
+  def test_one_character_does_not_match(self):
     actual = io.StringIO(u"あ");
     expected = io.StringIO(u"し");
     result = evaluate.evaluate(actual, expected);
@@ -29,7 +29,7 @@ def test_onecharacter_does_not_match(self):
     assert result.count == 1;
     assert result.failures == { u"し" : [{ "actual" : u"あ", "actual_location": "1:1", "expected_location": "1:1"}] };
 
-  def test_endofline_unix_doesnot_increase_count(self):
+  def test_endofline_unix_does_not_increase_count(self):
     actual = io.StringIO(u"\n");
     expected = io.StringIO(u"\n");
     result = evaluate.evaluate(actual, expected);
@@ -37,7 +37,7 @@ def test_endofline_unix_doesnot_increase_count(self):
     assert result.count == 0;
     assert result.failures == {};
 
-  def test_endofline_windows_doesnot_increase_count(self):
+  def test_endofline_windows_does_not_increase_count(self):
     actual = io.StringIO(u"\r\n");
     expected = io.StringIO(u"\r\n");
     result = evaluate.evaluate(actual, expected);
@@ -45,7 +45,7 @@ def test_endofline_windows_doesnot_increase_count(self):
     assert result.count == 0;
     assert result.failures == {};
 
-  def test_endofline_mixed_unix_and_windows_doesnot_increase_count(self):
+  def test_endofline_mixed_unix_and_windows_does_not_increase_count(self):
     actual = io.StringIO(u"\n");
     expected = io.StringIO(u"\r\n");
     result = evaluate.evaluate(actual, expected);

From b29da6d690288805bcc0be92eea76b3c6dc80ac3 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Mon, 18 Aug 2014 14:12:21 +0930
Subject: [PATCH 05/33] Implement stream resyncing

Lookahead characters to see if the stream needs to be resynced in order
to get better evaluation results
---
 evaluate.py                | 98 +++++++++++++++++++++++++++++++++-----
 unit_test/evaluate_test.py | 13 +++++
 2 files changed, 99 insertions(+), 12 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index 7a8758e..5b52852 100644
--- a/evaluate.py
+++ b/evaluate.py
@@ -24,15 +24,27 @@ class EvaluationStream():
 
   :param stream: io.TextIOBase of the actual ocr results
   """
+
+  _newline = u"NL";
+  _eof = u"EOF";
+
+  @staticmethod
+  def isnewline(char):
+    return EvaluationStream._newline == char;
+
+  @staticmethod
+  def iseof(char):
+    return EvaluationStream._eof == char;
+
   def __init__(self, stream):
     self._stream = stream;
-    self._newline = u"NL";
-    self._eof = u"EOF";
     self._line = 1;
     self._position = 0;
     self.count = 0;
+    self._peek_buffer = collections.deque();
+    self._max_peek_lookahead = 2;
 
-  def read(self):
+  def _read_with_translations(self):
     """
     As per io.TextIOBase.read(1), but also ignore windows \r characters by reading the next character.
     \n is rewritten as NL so that mismatches are printable characters.
@@ -43,9 +55,34 @@ def read(self):
       char = self._stream.read(1);
 
     if u"" == char:
-      char = self._eof;
+      char = EvaluationStream._eof;
     elif u"\n" == char:
-      char = self._newline;
+      char = EvaluationStream._newline;
+
+    return char
+
+  def _read_stream_or_peek_buffer(self):
+    if self._peek_buffer:
+      char = self._peek_buffer.popleft();
+    else:
+      char = self._read_with_translations();
+
+    return char;
+
+  def read(self):
+    """
+    As per io.TextIOBase.read(1), but also ignore windows \r characters by reading the next character.
+    \n is rewritten as NL so that mismatches are printable characters.
+    end of file is rewritten as EOF for printability.
+
+    To support peek, an internal buffer is used and read from before re-reading from stream.
+    """
+
+    char = self._read_stream_or_peek_buffer();
+
+    if self.iseof(char):
+      pass; # EOF doesn't increment counts
+    elif self.isnewline(char):
       self._line += 1;
       self._position = 0;
     else:
@@ -54,15 +91,49 @@ def read(self):
 
     return char;
 
-  def isnewline(self, char):
-    return self._newline == char;
-
-  def iseof(self, char):
-    return self._eof == char;
-
   def location(self):
     return "{0:d}:{1:d}".format(self._line, self._position);
 
+  def peek(self, n):
+    """
+    Peek ahead n characters in the input stream and return that character
+    """
+
+    current_peek_chars_available = len(self._peek_buffer);
+    chars_needed = n - current_peek_chars_available;
+    for _ in range(chars_needed):
+      self._peek_buffer.append(self._read_with_translations());
+    result = self._peek_buffer[n-1];
+    return result;
+
+  def resync(self, current_char, tostream):
+    """
+    Lookahead on the stream to see if re-syncing is required.
+    If re-syncing is required the the extra characters will be consumed and returned appended to current_char
+
+    :param current_char: the current failing character
+    :param tostream: the evaluation stream to sync to
+    """
+    sync_to_char = tostream.peek(1);
+
+    if self.iseof(sync_to_char):
+      # Dont resync on EOF
+      return current_char;
+
+    resync_found_ahead_at = None;
+    for i in range(1, self._max_peek_lookahead+1):
+      candidate_sync_spot = self.peek(i);
+      if sync_to_char == candidate_sync_spot:
+        resync_found_ahead_at = i;
+
+    if resync_found_ahead_at:
+      while (resync_found_ahead_at > 1): # capture up to (but not including) the resync character
+        resync_found_ahead_at -= 1;
+        current_char += self.read();
+
+    return current_char;
+
+
 def evaluate(actual, expected):
   """
   Evaluate the actual ocr results against the expected results and provide metrics on failures.
@@ -83,7 +154,10 @@ def evaluate(actual, expected):
 
     if expected_char != actual_char:
       result.success = False;
-      result.failures[expected_char].append({ "actual" : actual_char, "actual_location" : actual.location(), "expected_location" : expected.location()});
+      failure_details = { "actual_location" : actual.location(), "expected_location" : expected.location()};
+      actual_char = actual.resync(actual_char, expected);
+      failure_details["actual"] = actual_char; # resync'ing changes location to the end of the sync, and we want the beginning
+      result.failures[expected_char].append(failure_details);
 
     if expected.iseof(expected_char):
       result.success = False;
diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index af496f8..e06362d 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -77,3 +77,16 @@ def test_endoffile_mismatch_more_in_expected(self):
     assert result.count == 2;
     assert result.failures == { u"し" : [{ "actual" : u"EOF", "actual_location": "2:0", "expected_location": "2:1"}] };
 
+  def test_out_of_sync_stream(self):
+    actual = io.StringIO(u"ぃ　あし\r\n");
+    expected = io.StringIO(u"いあし\r\n");
+    result = evaluate.evaluate(actual, expected);
+    assert result.success == False;
+    assert result.count == 3;
+    assert result.failures == { u"い" : [{ "actual" : u"ぃ　", "actual_location": "1:1", "expected_location": "1:1"}] };
+
+  def test_peek_when_empty(self):
+    stream = io.StringIO();
+    OUT = evaluate.EvaluationStream(stream);
+    assert evaluate.EvaluationStream.iseof(OUT.peek(1));
+    assert evaluate.EvaluationStream.iseof(OUT.peek(2));

From def1f1ea1606399c88f512c201cb57457a3ad344 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Mon, 18 Aug 2014 14:12:40 +0930
Subject: [PATCH 06/33] Ignore Eclipse private files

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index d2d6f36..470acfa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -33,3 +33,4 @@ nosetests.xml
 .mr.developer.cfg
 .project
 .pydevproject
+/.settings/

From b4f7f52b3b98b8f7c968125125f86ce7d66e5c15 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Mon, 18 Aug 2014 17:40:12 +0930
Subject: [PATCH 07/33] Test peek

---
 unit_test/evaluate_test.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index e06362d..6110ee5 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -88,5 +88,22 @@ def test_out_of_sync_stream(self):
   def test_peek_when_empty(self):
     stream = io.StringIO();
     OUT = evaluate.EvaluationStream(stream);
-    assert evaluate.EvaluationStream.iseof(OUT.peek(1));
-    assert evaluate.EvaluationStream.iseof(OUT.peek(2));
+    assert OUT.iseof(OUT.peek(1));
+    assert OUT.iseof(OUT.peek(2));
+
+  def test_peek(self):
+    stream = io.StringIO(u"いあし\r\n");
+    OUT = evaluate.EvaluationStream(stream);
+    assert u"い" == OUT.peek(1);
+    assert "1:0" == OUT.location();
+    assert u"あ" == OUT.peek(2);
+    assert "1:0" == OUT.location();
+    assert u"し" == OUT.peek(3);
+    assert "1:0" == OUT.location();
+    assert OUT.isnewline(OUT.peek(4));
+    assert "1:0" == OUT.location();
+    assert OUT.iseof(OUT.peek(5));
+    assert "1:0" == OUT.location();
+
+  def test_success_statistics(self):
+    assert False, "Implement: Every successful match should increment counter on that character. Should also check whether failed results ever success correctly."

From 3e9236e654d7839ae47c95ec29262cee75ba7fc1 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Mon, 18 Aug 2014 22:10:09 +0930
Subject: [PATCH 08/33] Add success tracking and percentages

---
 evaluate.py                | 15 +++++++++++++++
 unit_test/evaluate_test.py | 23 ++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/evaluate.py b/evaluate.py
index 5b52852..88f49e5 100644
--- a/evaluate.py
+++ b/evaluate.py
@@ -17,6 +17,17 @@ def __init__(self):
     self.success = None;
     self.count = 0;
     self.failures = collections.defaultdict(list);
+    self.successes = collections.defaultdict(list);
+
+  def percentages(self):
+    keys = set(self.successes.iterkeys()).union(self.failures.iterkeys());
+    result = {};
+    for key in keys:
+      failure_count = len(self.failures[key]) if key in self.failures else 0
+      success_count = len(self.successes[key]) if key in self.successes else 0;
+      result[key] = success_count / float( failure_count + success_count );
+
+    return result;
 
 class EvaluationStream():
   """
@@ -158,11 +169,15 @@ def evaluate(actual, expected):
       actual_char = actual.resync(actual_char, expected);
       failure_details["actual"] = actual_char; # resync'ing changes location to the end of the sync, and we want the beginning
       result.failures[expected_char].append(failure_details);
+    else:
+      if not expected.isnewline(expected_char):
+        result.successes[expected_char].append(expected.location());
 
     if expected.iseof(expected_char):
       result.success = False;
       break;
 
+
   result.count = expected.count;
   return result;
 
diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index 6110ee5..aef18ca 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -106,4 +106,25 @@ def test_peek(self):
     assert "1:0" == OUT.location();
 
   def test_success_statistics(self):
-    assert False, "Implement: Every successful match should increment counter on that character. Should also check whether failed results ever success correctly."
+    actual = io.StringIO(u"ぃ　あしろろる\r\n");
+    expected = io.StringIO(u"いあしるろる\r\n");
+    result = evaluate.evaluate(actual, expected);
+    assert result.success == False;
+    assert result.count == 6;
+    assert result.failures == { u"い" : [{ "actual" : u"ぃ　", "actual_location": "1:1", "expected_location": "1:1"}],
+                                u"る" : [{ "actual" : u"ろ", "actual_location" : "1:5", "expected_location": "1:4"}]
+                               };
+    assert result.successes == {
+                                u"あ" : ["1:2"],
+                                u"し" : ["1:3"],
+                                u"ろ" : ["1:5"],
+                                u"る" : ["1:6"]
+                                };
+    assert result.percentages() == {
+                                    u"い" : 0.0,
+                                    u"あ" : 1.0,
+                                    u"し" : 1.0,
+                                    u"る" : 0.5,
+                                    u"ろ" : 1.0
+                                   };
+

From 4b9df9392acffd0eb1752fcd2c2d21fc747383d7 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Sat, 23 Aug 2014 21:52:07 +0930
Subject: [PATCH 09/33] Resync on endofline mismatches

---
 evaluate.py                | 154 ++++++++++++++++++++++++++++++++-----
 unit_test/evaluate_test.py |  22 ++++++
 2 files changed, 157 insertions(+), 19 deletions(-)
 mode change 100644 => 100755 evaluate.py

diff --git a/evaluate.py b/evaluate.py
old mode 100644
new mode 100755
index 88f49e5..d564f89
--- a/evaluate.py
+++ b/evaluate.py
@@ -1,3 +1,4 @@
+#!/usr/bin/python
 # -*- coding: utf-8 -*-
 
 """
@@ -10,7 +11,16 @@
   TODO
 """
 
+import codecs;
 import collections;
+import argparse;
+import arg;
+import sys;
+import os;
+import json;
+import logging
+
+logger = logging.getLogger(__name__);
 
 class Evaluation:
   def __init__(self):
@@ -18,16 +28,51 @@ def __init__(self):
     self.count = 0;
     self.failures = collections.defaultdict(list);
     self.successes = collections.defaultdict(list);
+    self._percentages = None;
 
   def percentages(self):
-    keys = set(self.successes.iterkeys()).union(self.failures.iterkeys());
-    result = {};
-    for key in keys:
-      failure_count = len(self.failures[key]) if key in self.failures else 0
-      success_count = len(self.successes[key]) if key in self.successes else 0;
-      result[key] = success_count / float( failure_count + success_count );
-
-    return result;
+    if not self._percentages:
+      keys = set(self.successes.iterkeys()).union(self.failures.iterkeys());
+      self._percentages = {};
+      for key in keys:
+        failure_count = len(self.failures[key]) if key in self.failures else 0
+        success_count = len(self.successes[key]) if key in self.successes else 0;
+        self._percentages[key] = success_count / float( failure_count + success_count );
+
+    return self._percentages;
+
+  def overall(self):
+    values = self.percentages().values()
+    return sum(values)/len(values);
+
+  def __str__(self):
+    return unicode(self).encode('utf-8');
+
+  def __unicode__(self):
+    result = [];
+    result.append(u"success={0!s}".format(self.success));
+    result.append(u"count={0:d}".format(self.count));
+    result.append(u"failures={");
+    for key, value in self.failures.iteritems():
+      result.append(u"  '{0}' = {1},".format(key, unicode(value)));
+    result.append(u"}");
+    result.append(u"successes={");
+    for key, value in self.successes.iteritems():
+      result.append(u"  '{0}' = {1},".format(key, value));
+    result.append(u"}");
+    result.append(u"percentages={");
+    for key, value in self.percentages().iteritems():
+      result.append(u"  '{0}' = {1},".format(key, value));
+    result.append(u"}");
+    result.append(u"overall={0}".format(self.overall()));
+    return u"\n".join(result);
+
+  def summary(self):
+    result = [];
+    result.append(u"success={0!s}".format(self.success));
+    result.append(u"count={0:d}".format(self.count));
+    result.append(u"overall={0}".format(self.overall()));
+    return u"\n".join(result);
 
 class EvaluationStream():
   """
@@ -91,9 +136,9 @@ def read(self):
 
     char = self._read_stream_or_peek_buffer();
 
-    if self.iseof(char):
+    if EvaluationStream.iseof(char):
       pass; # EOF doesn't increment counts
-    elif self.isnewline(char):
+    elif EvaluationStream.isnewline(char):
       self._line += 1;
       self._position = 0;
     else:
@@ -103,7 +148,7 @@ def read(self):
     return char;
 
   def location(self):
-    return "{0:d}:{1:d}".format(self._line, self._position);
+    return u"{0:d}:{1:d}".format(self._line, self._position);
 
   def peek(self, n):
     """
@@ -127,7 +172,7 @@ def resync(self, current_char, tostream):
     """
     sync_to_char = tostream.peek(1);
 
-    if self.iseof(sync_to_char):
+    if EvaluationStream.iseof(sync_to_char):
       # Dont resync on EOF
       return current_char;
 
@@ -155,30 +200,101 @@ def evaluate(actual, expected):
   result = Evaluation();
   actual = EvaluationStream(actual);
   expected = EvaluationStream(expected);
+
+  if logger.isEnabledFor(logging.DEBUG):
+    sys.stdout.write("Debug Legend:\n");
+    sys.stdout.write("  . = matched\n");
+    sys.stdout.write("  X = failed\n");
+    sys.stdout.write("  s = skipped\n");
+
   while True:
     expected_char = expected.read();
     actual_char = actual.read();
-    if expected.iseof(expected_char) and actual.iseof(actual_char):
+    if EvaluationStream.iseof(expected_char) and EvaluationStream.iseof(actual_char):
       if result.success == None:
         result.success = True;
       break;
 
+    up_to_count = expected.count;
+
     if expected_char != actual_char:
       result.success = False;
-      failure_details = { "actual_location" : actual.location(), "expected_location" : expected.location()};
-      actual_char = actual.resync(actual_char, expected);
-      failure_details["actual"] = actual_char; # resync'ing changes location to the end of the sync, and we want the beginning
-      result.failures[expected_char].append(failure_details);
+      failure_details = { u"actual_location" : actual.location(), u"expected_location" : expected.location()};
+      if EvaluationStream.isnewline(expected_char):
+        # Resync other stream to the next newline
+        while not (EvaluationStream.isnewline(actual_char) or EvaluationStream.iseof(actual_char)):
+          failure_details = { u"actual" : actual_char, u"actual_location" : actual.location(), u"expected_location" : expected.location()};
+          result.failures[expected_char].append(failure_details);
+          actual_char = actual.read();
+      elif EvaluationStream.isnewline(actual_char):
+        # Resync other stream to the next newline
+        while not (EvaluationStream.isnewline(expected_char) or EvaluationStream.iseof(expected_char)):
+          failure_details = { u"actual" : actual_char, u"actual_location" : actual.location(), u"expected_location" : expected.location()};
+          result.failures[expected_char].append(failure_details);
+          expected_char = expected.read();
+      else:
+        actual_char = actual.resync(actual_char, expected);
+        failure_details[u"actual"] = actual_char; # resync'ing changes location to the end of the sync, and we want the beginning
+        result.failures[expected_char].append(failure_details);
+        if logger.isEnabledFor(logging.DEBUG):
+          sys.stdout.write("X");
+          if len(actual_char) > 1:
+            sys.stdout.write("s" * (len(actual_char)-1));
     else:
-      if not expected.isnewline(expected_char):
+      if not EvaluationStream.isnewline(expected_char):
         result.successes[expected_char].append(expected.location());
+        if logger.isEnabledFor(logging.DEBUG):
+          sys.stdout.write(".");
+      else:
+        if logger.isEnabledFor(logging.DEBUG):
+          sys.stdout.write("\n");
 
-    if expected.iseof(expected_char):
+    if EvaluationStream.iseof(expected_char):
       result.success = False;
       break;
 
 
+  sys.stdout.write("\n");
+  sys.stdout.flush();
   result.count = expected.count;
   return result;
 
+def main():
+  parser = argparse.ArgumentParser(description="Evaluate text against correct version.");
+  parser.add_argument("-c", "--correct", dest="correct_file", help="File containing the correct text");
+  parser.add_argument("-i", "--input", dest="input_file", required=True, help="File containing the text to compare against the correct version");
+  parser.add_argument("-r", "--results", dest="results_file", help="File to write evaluation results to");
+  parser.add_argument("-d", "--debug", action="store_true", help="Enable debug tracing");
+
+  arg.value = parser.parse_args();
+  correct_file = arg.string_value("correct_file", default_value="correct.txt");
+  input_file = arg.string_value("input_file");
+  results_file = arg.string_value("results_file", default_value=input_file+"-results.txt");
+  if arg.boolean_value("debug"):
+    logging.getLogger().setLevel(logging.DEBUG);
+
+  if not os.path.isfile(input_file):
+    print("Input file '{0}' does not exist. Use -h option for help".format(input_file));
+    sys.exit(-1);
+
+  if not os.path.isfile(correct_file):
+    print("Correct file '{0}' does not exist. Use -h option for help".format(correct_file));
+    sys.exit(-1);
+
+  with codecs.open(correct_file, "rU", "utf-8") as c, codecs.open(input_file, "rU", "utf-8") as i:
+    result = evaluate(i, c);
+
+  with codecs.open(results_file, "wU", "utf-8") as w:
+    json.dump(result.__dict__, w, ensure_ascii=False, indent=2, separators=(',', ': '), sort_keys=True)
+
+  print(u"Summary of evaluation results:");
+  print(u"results={0}".format(results_file));
+  print(result.summary());
+
+
+if __name__ == "__main__":
+  logging.basicConfig(stream=sys.stderr, level=logging.INFO);
 
+  UTF8Writer = codecs.getwriter('utf8');
+  sys.stdout = UTF8Writer(sys.stdout);
+  main();
diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index aef18ca..1bd89f1 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -85,6 +85,28 @@ def test_out_of_sync_stream(self):
     assert result.count == 3;
     assert result.failures == { u"い" : [{ "actual" : u"ぃ　", "actual_location": "1:1", "expected_location": "1:1"}] };
 
+  def test_out_of_sync_stream_actual_new_lined_early(self):
+    actual = io.StringIO(u"新しい\nしごと");
+    expected = io.StringIO(u"新しいむすこ\nしごと\n");
+    result = evaluate.evaluate(actual, expected);
+    assert result.success == False;
+    assert result.count == 9;
+    assert result.failures == { u"む" : [{ "actual" : u"NL", "actual_location": "2:0", "expected_location": "1:4"}],
+                                u"す" : [{ "actual" : u"NL", "actual_location": "2:0", "expected_location": "1:5"}],
+                                u"こ" : [{ "actual" : u"NL", "actual_location": "2:0", "expected_location": "1:6"}],
+                                };
+
+  def test_out_of_sync_stream_expected_new_lined_early(self):
+    actual = io.StringIO(u"新しいむすこ\nしごと\n");
+    expected = io.StringIO(u"新しい\nしごと");
+    result = evaluate.evaluate(actual, expected);
+    assert result.success == False;
+    assert result.count == 6;
+    assert result.failures == { u"NL" : [{ "actual" : u"む", "actual_location": "1:4", "expected_location": "2:0"},
+                                         { "actual" : u"す", "actual_location": "1:5", "expected_location": "2:0"},
+                                         { "actual" : u"こ", "actual_location": "1:6", "expected_location": "2:0"}]
+                              };
+
   def test_peek_when_empty(self):
     stream = io.StringIO();
     OUT = evaluate.EvaluationStream(stream);

From a3752f577df7fdb95fd43a18b127ce8a8c6ca759 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Sun, 24 Aug 2014 18:24:21 +0930
Subject: [PATCH 10/33] Refactor evaluate.evaluate into Evalution object

---
 evaluate.py                | 151 +++++++++++++++++++------------------
 unit_test/evaluate_test.py |  45 +++++++----
 2 files changed, 107 insertions(+), 89 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index d564f89..f353bac 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -22,13 +22,87 @@
 
 logger = logging.getLogger(__name__);
 
+class IgnoreUnderscoreEncoder(json.JSONEncoder):
+    def default(self, obj):
+        attributes = {}
+        obj_dict = obj.__dict__
+        for key, value in obj_dict.iteritems():
+          if key.startswith(u'_'):
+              continue
+          attributes[key] = value
+        return attributes
+
 class Evaluation:
-  def __init__(self):
+  def __init__(self,expected_stream,actual_stream):
     self.success = None;
     self.count = 0;
     self.failures = collections.defaultdict(list);
     self.successes = collections.defaultdict(list);
     self._percentages = None;
+    self._actual = EvaluationStream(actual_stream);
+    self._expected = EvaluationStream(expected_stream);
+
+  def evaluate(self):
+    """
+    Evaluate the actual ocr results against the expected results and provide metrics on failures.
+    """
+
+    if logger.isEnabledFor(logging.DEBUG):
+      sys.stdout.write("Debug Legend:\n");
+      sys.stdout.write("  . = matched\n");
+      sys.stdout.write("  X = failed\n");
+      sys.stdout.write("  s = skipped\n");
+
+    while True:
+      expected_char = self._expected.read();
+      actual_char = self._actual.read();
+      if EvaluationStream.iseof(expected_char) and EvaluationStream.iseof(actual_char):
+        if self.success == None:
+          self.success = True;
+        break;
+
+      up_to_count = self._expected.count;
+
+      if expected_char != actual_char:
+        self.success = False;
+        failure_details = { u"actual_location" : self._actual.location(), u"expected_location" : self._expected.location()};
+        if EvaluationStream.isnewline(expected_char):
+          # Resync other stream to the next newline
+          while not (EvaluationStream.isnewline(actual_char) or EvaluationStream.iseof(actual_char)):
+            failure_details = { u"actual" : actual_char, u"actual_location" : self._actual.location(), u"expected_location" : self._expected.location()};
+            self.failures[expected_char].append(failure_details);
+            actual_char = self._actual.read();
+        elif EvaluationStream.isnewline(actual_char):
+          # Resync other stream to the next newline
+          while not (EvaluationStream.isnewline(expected_char) or EvaluationStream.iseof(expected_char)):
+            failure_details = { u"actual" : actual_char, u"actual_location" : self._actual.location(), u"expected_location" : self._expected.location()};
+            self.failures[expected_char].append(failure_details);
+            expected_char = self._expected.read();
+        else:
+          actual_char = self._actual.resync(actual_char, self._expected);
+          failure_details[u"actual"] = actual_char; # resync'ing changes location to the end of the sync, and we want the beginning
+          self.failures[expected_char].append(failure_details);
+          if logger.isEnabledFor(logging.DEBUG):
+            sys.stdout.write("X");
+            if len(actual_char) > 1:
+              sys.stdout.write("s" * (len(actual_char)-1));
+      else:
+        if not EvaluationStream.isnewline(expected_char):
+          self.successes[expected_char].append(self._expected.location());
+          if logger.isEnabledFor(logging.DEBUG):
+            sys.stdout.write(".");
+        else:
+          if logger.isEnabledFor(logging.DEBUG):
+            sys.stdout.write("\n");
+
+      if EvaluationStream.iseof(expected_char):
+        self.success = False;
+        break;
+
+    sys.stdout.write("\n");
+    sys.stdout.flush();
+    self.count = self._expected.count;
+    return self;
 
   def percentages(self):
     if not self._percentages:
@@ -189,76 +263,6 @@ def resync(self, current_char, tostream):
 
     return current_char;
 
-
-def evaluate(actual, expected):
-  """
-  Evaluate the actual ocr results against the expected results and provide metrics on failures.
-
-  :param actual: io.TextIOBase of the actual ocr results
-  :param expected: io.TextIOBase of the expected ocr results
-  """
-  result = Evaluation();
-  actual = EvaluationStream(actual);
-  expected = EvaluationStream(expected);
-
-  if logger.isEnabledFor(logging.DEBUG):
-    sys.stdout.write("Debug Legend:\n");
-    sys.stdout.write("  . = matched\n");
-    sys.stdout.write("  X = failed\n");
-    sys.stdout.write("  s = skipped\n");
-
-  while True:
-    expected_char = expected.read();
-    actual_char = actual.read();
-    if EvaluationStream.iseof(expected_char) and EvaluationStream.iseof(actual_char):
-      if result.success == None:
-        result.success = True;
-      break;
-
-    up_to_count = expected.count;
-
-    if expected_char != actual_char:
-      result.success = False;
-      failure_details = { u"actual_location" : actual.location(), u"expected_location" : expected.location()};
-      if EvaluationStream.isnewline(expected_char):
-        # Resync other stream to the next newline
-        while not (EvaluationStream.isnewline(actual_char) or EvaluationStream.iseof(actual_char)):
-          failure_details = { u"actual" : actual_char, u"actual_location" : actual.location(), u"expected_location" : expected.location()};
-          result.failures[expected_char].append(failure_details);
-          actual_char = actual.read();
-      elif EvaluationStream.isnewline(actual_char):
-        # Resync other stream to the next newline
-        while not (EvaluationStream.isnewline(expected_char) or EvaluationStream.iseof(expected_char)):
-          failure_details = { u"actual" : actual_char, u"actual_location" : actual.location(), u"expected_location" : expected.location()};
-          result.failures[expected_char].append(failure_details);
-          expected_char = expected.read();
-      else:
-        actual_char = actual.resync(actual_char, expected);
-        failure_details[u"actual"] = actual_char; # resync'ing changes location to the end of the sync, and we want the beginning
-        result.failures[expected_char].append(failure_details);
-        if logger.isEnabledFor(logging.DEBUG):
-          sys.stdout.write("X");
-          if len(actual_char) > 1:
-            sys.stdout.write("s" * (len(actual_char)-1));
-    else:
-      if not EvaluationStream.isnewline(expected_char):
-        result.successes[expected_char].append(expected.location());
-        if logger.isEnabledFor(logging.DEBUG):
-          sys.stdout.write(".");
-      else:
-        if logger.isEnabledFor(logging.DEBUG):
-          sys.stdout.write("\n");
-
-    if EvaluationStream.iseof(expected_char):
-      result.success = False;
-      break;
-
-
-  sys.stdout.write("\n");
-  sys.stdout.flush();
-  result.count = expected.count;
-  return result;
-
 def main():
   parser = argparse.ArgumentParser(description="Evaluate text against correct version.");
   parser.add_argument("-c", "--correct", dest="correct_file", help="File containing the correct text");
@@ -282,10 +286,11 @@ def main():
     sys.exit(-1);
 
   with codecs.open(correct_file, "rU", "utf-8") as c, codecs.open(input_file, "rU", "utf-8") as i:
-    result = evaluate(i, c);
+    result = Evaluation(c, i);
+    result.evaluate();
 
   with codecs.open(results_file, "wU", "utf-8") as w:
-    json.dump(result.__dict__, w, ensure_ascii=False, indent=2, separators=(',', ': '), sort_keys=True)
+    json.dump(result, w, cls=IgnoreUnderscoreEncoder, ensure_ascii=False, indent=2, separators=(',', ': '), sort_keys=True);
 
   print(u"Summary of evaluation results:");
   print(u"results={0}".format(results_file));
diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index 1bd89f1..310aec4 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -1,14 +1,15 @@
 # -*- coding: utf-8 -*-
 
 import io;
-import evaluate;
+from evaluate import Evaluation, EvaluationStream;
 
 class TestEvaluate:
 
   def test_empty(self):
     actual = io.StringIO();
     expected = io.StringIO();
-    result = evaluate.evaluate(actual, expected);
+    result = Evaluation(expected,actual);
+    result.evaluate();
     assert result.success;
     assert result.count == 0;
     assert result.failures == {};
@@ -16,7 +17,8 @@ def test_empty(self):
   def test_one_character(self):
     actual = io.StringIO(u"し",);
     expected = io.StringIO(u"し");
-    result = evaluate.evaluate(actual, expected);
+    result = Evaluation(expected,actual);
+    result.evaluate();
     assert result.success;
     assert result.count == 1;
     assert result.failures == {};
@@ -24,7 +26,8 @@ def test_one_character(self):
   def test_one_character_does_not_match(self):
     actual = io.StringIO(u"あ");
     expected = io.StringIO(u"し");
-    result = evaluate.evaluate(actual, expected);
+    result = Evaluation(expected,actual);
+    result.evaluate();
     assert result.success == False;
     assert result.count == 1;
     assert result.failures == { u"し" : [{ "actual" : u"あ", "actual_location": "1:1", "expected_location": "1:1"}] };
@@ -32,7 +35,8 @@ def test_one_character_does_not_match(self):
   def test_endofline_unix_does_not_increase_count(self):
     actual = io.StringIO(u"\n");
     expected = io.StringIO(u"\n");
-    result = evaluate.evaluate(actual, expected);
+    result = Evaluation(expected,actual);
+    result.evaluate();
     assert result.success;
     assert result.count == 0;
     assert result.failures == {};
@@ -40,7 +44,8 @@ def test_endofline_unix_does_not_increase_count(self):
   def test_endofline_windows_does_not_increase_count(self):
     actual = io.StringIO(u"\r\n");
     expected = io.StringIO(u"\r\n");
-    result = evaluate.evaluate(actual, expected);
+    result = Evaluation(expected,actual);
+    result.evaluate();
     assert result.success;
     assert result.count == 0;
     assert result.failures == {};
@@ -48,7 +53,8 @@ def test_endofline_windows_does_not_increase_count(self):
   def test_endofline_mixed_unix_and_windows_does_not_increase_count(self):
     actual = io.StringIO(u"\n");
     expected = io.StringIO(u"\r\n");
-    result = evaluate.evaluate(actual, expected);
+    result = Evaluation(expected,actual);
+    result.evaluate();
     assert result.success;
     assert result.count == 0;
     assert result.failures == {};
@@ -56,7 +62,8 @@ def test_endofline_mixed_unix_and_windows_does_not_increase_count(self):
   def test_line_reported_in_failures(self):
     actual = io.StringIO(u"\r\nあ");
     expected = io.StringIO(u"\r\nし");
-    result = evaluate.evaluate(actual, expected);
+    result = Evaluation(expected,actual);
+    result.evaluate();
     assert result.success == False;
     assert result.count == 1;
     assert result.failures == { u"し" : [{ "actual" : u"あ", "actual_location": "2:1", "expected_location": "2:1"}] };
@@ -64,7 +71,8 @@ def test_line_reported_in_failures(self):
   def test_endoffile_mismatch_more_in_actual(self):
     actual = io.StringIO(u"あ\r\nし");
     expected = io.StringIO(u"あ\r\n");
-    result = evaluate.evaluate(actual, expected);
+    result = Evaluation(expected,actual);
+    result.evaluate();
     assert result.success == False;
     assert result.count == 1;
     assert result.failures == { u"EOF" : [{ "actual" : u"し", "actual_location": "2:1", "expected_location": "2:0"}] };
@@ -72,7 +80,8 @@ def test_endoffile_mismatch_more_in_actual(self):
   def test_endoffile_mismatch_more_in_expected(self):
     actual = io.StringIO(u"あ\r\n");
     expected = io.StringIO(u"あ\r\nし");
-    result = evaluate.evaluate(actual, expected);
+    result = Evaluation(expected,actual);
+    result.evaluate();
     assert result.success == False;
     assert result.count == 2;
     assert result.failures == { u"し" : [{ "actual" : u"EOF", "actual_location": "2:0", "expected_location": "2:1"}] };
@@ -80,7 +89,8 @@ def test_endoffile_mismatch_more_in_expected(self):
   def test_out_of_sync_stream(self):
     actual = io.StringIO(u"ぃ　あし\r\n");
     expected = io.StringIO(u"いあし\r\n");
-    result = evaluate.evaluate(actual, expected);
+    result = Evaluation(expected,actual);
+    result.evaluate();
     assert result.success == False;
     assert result.count == 3;
     assert result.failures == { u"い" : [{ "actual" : u"ぃ　", "actual_location": "1:1", "expected_location": "1:1"}] };
@@ -88,7 +98,8 @@ def test_out_of_sync_stream(self):
   def test_out_of_sync_stream_actual_new_lined_early(self):
     actual = io.StringIO(u"新しい\nしごと");
     expected = io.StringIO(u"新しいむすこ\nしごと\n");
-    result = evaluate.evaluate(actual, expected);
+    result = Evaluation(expected,actual);
+    result.evaluate();
     assert result.success == False;
     assert result.count == 9;
     assert result.failures == { u"む" : [{ "actual" : u"NL", "actual_location": "2:0", "expected_location": "1:4"}],
@@ -99,7 +110,8 @@ def test_out_of_sync_stream_actual_new_lined_early(self):
   def test_out_of_sync_stream_expected_new_lined_early(self):
     actual = io.StringIO(u"新しいむすこ\nしごと\n");
     expected = io.StringIO(u"新しい\nしごと");
-    result = evaluate.evaluate(actual, expected);
+    result = Evaluation(expected,actual);
+    result.evaluate();
     assert result.success == False;
     assert result.count == 6;
     assert result.failures == { u"NL" : [{ "actual" : u"む", "actual_location": "1:4", "expected_location": "2:0"},
@@ -109,13 +121,13 @@ def test_out_of_sync_stream_expected_new_lined_early(self):
 
   def test_peek_when_empty(self):
     stream = io.StringIO();
-    OUT = evaluate.EvaluationStream(stream);
+    OUT = EvaluationStream(stream);
     assert OUT.iseof(OUT.peek(1));
     assert OUT.iseof(OUT.peek(2));
 
   def test_peek(self):
     stream = io.StringIO(u"いあし\r\n");
-    OUT = evaluate.EvaluationStream(stream);
+    OUT = EvaluationStream(stream);
     assert u"い" == OUT.peek(1);
     assert "1:0" == OUT.location();
     assert u"あ" == OUT.peek(2);
@@ -130,7 +142,8 @@ def test_peek(self):
   def test_success_statistics(self):
     actual = io.StringIO(u"ぃ　あしろろる\r\n");
     expected = io.StringIO(u"いあしるろる\r\n");
-    result = evaluate.evaluate(actual, expected);
+    result = Evaluation(expected,actual);
+    result.evaluate();
     assert result.success == False;
     assert result.count == 6;
     assert result.failures == { u"い" : [{ "actual" : u"ぃ　", "actual_location": "1:1", "expected_location": "1:1"}],

From baf358d9e76094cde4af50a0453fc2533fcfcfdf Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Mon, 25 Aug 2014 12:17:14 +0930
Subject: [PATCH 11/33] Refactor actual_char and expected_char to instance
 variables

---
 evaluate.py | 51 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 21 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index f353bac..a56ee10 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -41,6 +41,14 @@ def __init__(self,expected_stream,actual_stream):
     self._percentages = None;
     self._actual = EvaluationStream(actual_stream);
     self._expected = EvaluationStream(expected_stream);
+    self._actual_char = None;
+    self._expected_char = None;
+
+  def readFromExpected(self):
+    self._expected_char = self._expected.read();
+
+  def readFromActual(self):
+    self._actual_char = self._actual.read()
 
   def evaluate(self):
     """
@@ -54,53 +62,54 @@ def evaluate(self):
       sys.stdout.write("  s = skipped\n");
 
     while True:
-      expected_char = self._expected.read();
-      actual_char = self._actual.read();
-      if EvaluationStream.iseof(expected_char) and EvaluationStream.iseof(actual_char):
+      self.readFromExpected();
+      self.readFromActual();
+      if EvaluationStream.iseof(self._expected_char) and EvaluationStream.iseof(self._actual_char):
         if self.success == None:
           self.success = True;
         break;
 
       up_to_count = self._expected.count;
 
-      if expected_char != actual_char:
+      if self._expected_char != self._actual_char:
         self.success = False;
         failure_details = { u"actual_location" : self._actual.location(), u"expected_location" : self._expected.location()};
-        if EvaluationStream.isnewline(expected_char):
+        if EvaluationStream.isnewline(self._expected_char):
           # Resync other stream to the next newline
-          while not (EvaluationStream.isnewline(actual_char) or EvaluationStream.iseof(actual_char)):
-            failure_details = { u"actual" : actual_char, u"actual_location" : self._actual.location(), u"expected_location" : self._expected.location()};
-            self.failures[expected_char].append(failure_details);
-            actual_char = self._actual.read();
-        elif EvaluationStream.isnewline(actual_char):
+          while not (EvaluationStream.isnewline(self._actual_char) or EvaluationStream.iseof(self._actual_char)):
+            failure_details = { u"actual" : self._actual_char, u"actual_location" : self._actual.location(), u"expected_location" : self._expected.location()};
+            self.failures[self._expected_char].append(failure_details);
+            self._actual_char = self._actual.read();
+        elif EvaluationStream.isnewline(self._actual_char):
           # Resync other stream to the next newline
-          while not (EvaluationStream.isnewline(expected_char) or EvaluationStream.iseof(expected_char)):
-            failure_details = { u"actual" : actual_char, u"actual_location" : self._actual.location(), u"expected_location" : self._expected.location()};
-            self.failures[expected_char].append(failure_details);
-            expected_char = self._expected.read();
+          while not (EvaluationStream.isnewline(self._expected_char) or EvaluationStream.iseof(self._expected_char)):
+            failure_details = { u"actual" : self._actual_char, u"actual_location" : self._actual.location(), u"expected_location" : self._expected.location()};
+            self.failures[self._expected_char].append(failure_details);
+            self._expected_char = self._expected.read();
         else:
-          actual_char = self._actual.resync(actual_char, self._expected);
+          actual_char = self._actual.resync(self._actual_char, self._expected);
           failure_details[u"actual"] = actual_char; # resync'ing changes location to the end of the sync, and we want the beginning
-          self.failures[expected_char].append(failure_details);
+          self.failures[self._expected_char].append(failure_details);
           if logger.isEnabledFor(logging.DEBUG):
             sys.stdout.write("X");
             if len(actual_char) > 1:
               sys.stdout.write("s" * (len(actual_char)-1));
       else:
-        if not EvaluationStream.isnewline(expected_char):
-          self.successes[expected_char].append(self._expected.location());
+        if not EvaluationStream.isnewline(self._expected_char):
+          self.successes[self._expected_char].append(self._expected.location());
           if logger.isEnabledFor(logging.DEBUG):
             sys.stdout.write(".");
         else:
           if logger.isEnabledFor(logging.DEBUG):
             sys.stdout.write("\n");
 
-      if EvaluationStream.iseof(expected_char):
+      if EvaluationStream.iseof(self._expected_char):
         self.success = False;
         break;
 
-    sys.stdout.write("\n");
-    sys.stdout.flush();
+    if logger.isEnabledFor(logging.DEBUG):
+      sys.stdout.write("\n");
+      sys.stdout.flush();
     self.count = self._expected.count;
     return self;
 

From 809127ee47773b4e143c31495bb5470ef5936293 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Mon, 25 Aug 2014 12:32:11 +0930
Subject: [PATCH 12/33] Refactor failure handling into methods

---
 evaluate.py | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index a56ee10..001a0fe 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -50,6 +50,19 @@ def readFromExpected(self):
   def readFromActual(self):
     self._actual_char = self._actual.read()
 
+  def markFailure(self, actual_location=None):
+    if not actual_location:
+      actual_location = self._actual.location();
+    failure_details = {u"actual":self._actual_char, u"actual_location":actual_location, u"expected_location":self._expected.location()};
+    self.failures[self._expected_char].append(failure_details);
+    if logger.isEnabledFor(logging.DEBUG):
+      sys.stdout.write("X");
+      if len(self._actual_char) > 1:
+        sys.stdout.write("s" * (len(self._actual_char)-1));
+
+  def resyncActual(self):
+    self._actual_char = self._actual.resync(self._actual_char, self._expected);
+
   def evaluate(self):
     """
     Evaluate the actual ocr results against the expected results and provide metrics on failures.
@@ -73,27 +86,20 @@ def evaluate(self):
 
       if self._expected_char != self._actual_char:
         self.success = False;
-        failure_details = { u"actual_location" : self._actual.location(), u"expected_location" : self._expected.location()};
         if EvaluationStream.isnewline(self._expected_char):
           # Resync other stream to the next newline
           while not (EvaluationStream.isnewline(self._actual_char) or EvaluationStream.iseof(self._actual_char)):
-            failure_details = { u"actual" : self._actual_char, u"actual_location" : self._actual.location(), u"expected_location" : self._expected.location()};
-            self.failures[self._expected_char].append(failure_details);
-            self._actual_char = self._actual.read();
+            self.markFailure();
+            self.readFromActual();
         elif EvaluationStream.isnewline(self._actual_char):
           # Resync other stream to the next newline
           while not (EvaluationStream.isnewline(self._expected_char) or EvaluationStream.iseof(self._expected_char)):
-            failure_details = { u"actual" : self._actual_char, u"actual_location" : self._actual.location(), u"expected_location" : self._expected.location()};
-            self.failures[self._expected_char].append(failure_details);
-            self._expected_char = self._expected.read();
+            self.markFailure();
+            self.readFromExpected();
         else:
-          actual_char = self._actual.resync(self._actual_char, self._expected);
-          failure_details[u"actual"] = actual_char; # resync'ing changes location to the end of the sync, and we want the beginning
-          self.failures[self._expected_char].append(failure_details);
-          if logger.isEnabledFor(logging.DEBUG):
-            sys.stdout.write("X");
-            if len(actual_char) > 1:
-              sys.stdout.write("s" * (len(actual_char)-1));
+          mark_failure_position = self._actual.location()
+          self.resyncActual();
+          self.markFailure(mark_failure_position);
       else:
         if not EvaluationStream.isnewline(self._expected_char):
           self.successes[self._expected_char].append(self._expected.location());

From e9b4e28cc7c429ad01c45696d069ad3d6510e835 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Mon, 25 Aug 2014 12:44:13 +0930
Subject: [PATCH 13/33] Refactor resync from EvaluationStream into Evaluation

---
 evaluate.py | 50 +++++++++++++++++++++-----------------------------
 1 file changed, 21 insertions(+), 29 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index 001a0fe..5f5645b 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -43,6 +43,7 @@ def __init__(self,expected_stream,actual_stream):
     self._expected = EvaluationStream(expected_stream);
     self._actual_char = None;
     self._expected_char = None;
+    self._max_peek_lookahead = 2;
 
   def readFromExpected(self):
     self._expected_char = self._expected.read();
@@ -61,7 +62,26 @@ def markFailure(self, actual_location=None):
         sys.stdout.write("s" * (len(self._actual_char)-1));
 
   def resyncActual(self):
-    self._actual_char = self._actual.resync(self._actual_char, self._expected);
+    """
+    Lookahead on the stream to see if re-syncing is required.
+    If re-syncing is required then the extra characters will be consumed and appended to self._actual_char
+    """
+    sync_to_char = self._expected.peek(1);
+
+    if EvaluationStream.iseof(sync_to_char):
+      # Dont resync on EOF
+      return;
+
+    resync_found_ahead_at = None;
+    for i in range(1, self._max_peek_lookahead+1):
+      candidate_sync_spot = self._actual.peek(i);
+      if sync_to_char == candidate_sync_spot:
+        resync_found_ahead_at = i;
+
+    if resync_found_ahead_at:
+      while (resync_found_ahead_at > 1): # capture up to (but not including) the resync character
+        resync_found_ahead_at -= 1;
+        self._actual_char += self._actual.read();
 
   def evaluate(self):
     """
@@ -187,7 +207,6 @@ def __init__(self, stream):
     self._position = 0;
     self.count = 0;
     self._peek_buffer = collections.deque();
-    self._max_peek_lookahead = 2;
 
   def _read_with_translations(self):
     """
@@ -251,33 +270,6 @@ def peek(self, n):
     result = self._peek_buffer[n-1];
     return result;
 
-  def resync(self, current_char, tostream):
-    """
-    Lookahead on the stream to see if re-syncing is required.
-    If re-syncing is required the the extra characters will be consumed and returned appended to current_char
-
-    :param current_char: the current failing character
-    :param tostream: the evaluation stream to sync to
-    """
-    sync_to_char = tostream.peek(1);
-
-    if EvaluationStream.iseof(sync_to_char):
-      # Dont resync on EOF
-      return current_char;
-
-    resync_found_ahead_at = None;
-    for i in range(1, self._max_peek_lookahead+1):
-      candidate_sync_spot = self.peek(i);
-      if sync_to_char == candidate_sync_spot:
-        resync_found_ahead_at = i;
-
-    if resync_found_ahead_at:
-      while (resync_found_ahead_at > 1): # capture up to (but not including) the resync character
-        resync_found_ahead_at -= 1;
-        current_char += self.read();
-
-    return current_char;
-
 def main():
   parser = argparse.ArgumentParser(description="Evaluate text against correct version.");
   parser.add_argument("-c", "--correct", dest="correct_file", help="File containing the correct text");

From 611657bda10bc9b27707618197cc1957e9a233c8 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Mon, 25 Aug 2014 12:48:58 +0930
Subject: [PATCH 14/33] Remove semicolons

---
 evaluate.py                | 272 ++++++++++++++++++-------------------
 unit_test/evaluate_test.py | 222 +++++++++++++++---------------
 2 files changed, 247 insertions(+), 247 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index 5f5645b..2c4c9cd 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -11,16 +11,16 @@
   TODO
 """
 
-import codecs;
-import collections;
-import argparse;
-import arg;
-import sys;
-import os;
-import json;
+import codecs
+import collections
+import argparse
+import arg
+import sys
+import os
+import json
 import logging
 
-logger = logging.getLogger(__name__);
+logger = logging.getLogger(__name__)
 
 class IgnoreUnderscoreEncoder(json.JSONEncoder):
     def default(self, obj):
@@ -34,54 +34,54 @@ def default(self, obj):
 
 class Evaluation:
   def __init__(self,expected_stream,actual_stream):
-    self.success = None;
-    self.count = 0;
-    self.failures = collections.defaultdict(list);
-    self.successes = collections.defaultdict(list);
-    self._percentages = None;
-    self._actual = EvaluationStream(actual_stream);
-    self._expected = EvaluationStream(expected_stream);
-    self._actual_char = None;
-    self._expected_char = None;
-    self._max_peek_lookahead = 2;
+    self.success = None
+    self.count = 0
+    self.failures = collections.defaultdict(list)
+    self.successes = collections.defaultdict(list)
+    self._percentages = None
+    self._actual = EvaluationStream(actual_stream)
+    self._expected = EvaluationStream(expected_stream)
+    self._actual_char = None
+    self._expected_char = None
+    self._max_peek_lookahead = 2
 
   def readFromExpected(self):
-    self._expected_char = self._expected.read();
+    self._expected_char = self._expected.read()
 
   def readFromActual(self):
     self._actual_char = self._actual.read()
 
   def markFailure(self, actual_location=None):
     if not actual_location:
-      actual_location = self._actual.location();
-    failure_details = {u"actual":self._actual_char, u"actual_location":actual_location, u"expected_location":self._expected.location()};
-    self.failures[self._expected_char].append(failure_details);
+      actual_location = self._actual.location()
+    failure_details = {u"actual":self._actual_char, u"actual_location":actual_location, u"expected_location":self._expected.location()}
+    self.failures[self._expected_char].append(failure_details)
     if logger.isEnabledFor(logging.DEBUG):
-      sys.stdout.write("X");
+      sys.stdout.write("X")
       if len(self._actual_char) > 1:
-        sys.stdout.write("s" * (len(self._actual_char)-1));
+        sys.stdout.write("s" * (len(self._actual_char)-1))
 
   def resyncActual(self):
     """
     Lookahead on the stream to see if re-syncing is required.
     If re-syncing is required then the extra characters will be consumed and appended to self._actual_char
     """
-    sync_to_char = self._expected.peek(1);
+    sync_to_char = self._expected.peek(1)
 
     if EvaluationStream.iseof(sync_to_char):
       # Dont resync on EOF
-      return;
+      return
 
-    resync_found_ahead_at = None;
+    resync_found_ahead_at = None
     for i in range(1, self._max_peek_lookahead+1):
-      candidate_sync_spot = self._actual.peek(i);
+      candidate_sync_spot = self._actual.peek(i)
       if sync_to_char == candidate_sync_spot:
-        resync_found_ahead_at = i;
+        resync_found_ahead_at = i
 
     if resync_found_ahead_at:
       while (resync_found_ahead_at > 1): # capture up to (but not including) the resync character
-        resync_found_ahead_at -= 1;
-        self._actual_char += self._actual.read();
+        resync_found_ahead_at -= 1
+        self._actual_char += self._actual.read()
 
   def evaluate(self):
     """
@@ -89,99 +89,99 @@ def evaluate(self):
     """
 
     if logger.isEnabledFor(logging.DEBUG):
-      sys.stdout.write("Debug Legend:\n");
-      sys.stdout.write("  . = matched\n");
-      sys.stdout.write("  X = failed\n");
-      sys.stdout.write("  s = skipped\n");
+      sys.stdout.write("Debug Legend:\n")
+      sys.stdout.write("  . = matched\n")
+      sys.stdout.write("  X = failed\n")
+      sys.stdout.write("  s = skipped\n")
 
     while True:
-      self.readFromExpected();
-      self.readFromActual();
+      self.readFromExpected()
+      self.readFromActual()
       if EvaluationStream.iseof(self._expected_char) and EvaluationStream.iseof(self._actual_char):
         if self.success == None:
-          self.success = True;
-        break;
+          self.success = True
+        break
 
-      up_to_count = self._expected.count;
+      up_to_count = self._expected.count
 
       if self._expected_char != self._actual_char:
-        self.success = False;
+        self.success = False
         if EvaluationStream.isnewline(self._expected_char):
           # Resync other stream to the next newline
           while not (EvaluationStream.isnewline(self._actual_char) or EvaluationStream.iseof(self._actual_char)):
-            self.markFailure();
-            self.readFromActual();
+            self.markFailure()
+            self.readFromActual()
         elif EvaluationStream.isnewline(self._actual_char):
           # Resync other stream to the next newline
           while not (EvaluationStream.isnewline(self._expected_char) or EvaluationStream.iseof(self._expected_char)):
-            self.markFailure();
-            self.readFromExpected();
+            self.markFailure()
+            self.readFromExpected()
         else:
           mark_failure_position = self._actual.location()
-          self.resyncActual();
-          self.markFailure(mark_failure_position);
+          self.resyncActual()
+          self.markFailure(mark_failure_position)
       else:
         if not EvaluationStream.isnewline(self._expected_char):
-          self.successes[self._expected_char].append(self._expected.location());
+          self.successes[self._expected_char].append(self._expected.location())
           if logger.isEnabledFor(logging.DEBUG):
-            sys.stdout.write(".");
+            sys.stdout.write(".")
         else:
           if logger.isEnabledFor(logging.DEBUG):
-            sys.stdout.write("\n");
+            sys.stdout.write("\n")
 
       if EvaluationStream.iseof(self._expected_char):
-        self.success = False;
-        break;
+        self.success = False
+        break
 
     if logger.isEnabledFor(logging.DEBUG):
-      sys.stdout.write("\n");
-      sys.stdout.flush();
-    self.count = self._expected.count;
-    return self;
+      sys.stdout.write("\n")
+      sys.stdout.flush()
+    self.count = self._expected.count
+    return self
 
   def percentages(self):
     if not self._percentages:
-      keys = set(self.successes.iterkeys()).union(self.failures.iterkeys());
-      self._percentages = {};
+      keys = set(self.successes.iterkeys()).union(self.failures.iterkeys())
+      self._percentages = {}
       for key in keys:
         failure_count = len(self.failures[key]) if key in self.failures else 0
-        success_count = len(self.successes[key]) if key in self.successes else 0;
-        self._percentages[key] = success_count / float( failure_count + success_count );
+        success_count = len(self.successes[key]) if key in self.successes else 0
+        self._percentages[key] = success_count / float( failure_count + success_count )
 
-    return self._percentages;
+    return self._percentages
 
   def overall(self):
     values = self.percentages().values()
-    return sum(values)/len(values);
+    return sum(values)/len(values)
 
   def __str__(self):
-    return unicode(self).encode('utf-8');
+    return unicode(self).encode('utf-8')
 
   def __unicode__(self):
-    result = [];
-    result.append(u"success={0!s}".format(self.success));
-    result.append(u"count={0:d}".format(self.count));
-    result.append(u"failures={");
+    result = []
+    result.append(u"success={0!s}".format(self.success))
+    result.append(u"count={0:d}".format(self.count))
+    result.append(u"failures={")
     for key, value in self.failures.iteritems():
-      result.append(u"  '{0}' = {1},".format(key, unicode(value)));
-    result.append(u"}");
-    result.append(u"successes={");
+      result.append(u"  '{0}' = {1},".format(key, unicode(value)))
+    result.append(u"}")
+    result.append(u"successes={")
     for key, value in self.successes.iteritems():
-      result.append(u"  '{0}' = {1},".format(key, value));
-    result.append(u"}");
-    result.append(u"percentages={");
+      result.append(u"  '{0}' = {1},".format(key, value))
+    result.append(u"}")
+    result.append(u"percentages={")
     for key, value in self.percentages().iteritems():
-      result.append(u"  '{0}' = {1},".format(key, value));
-    result.append(u"}");
-    result.append(u"overall={0}".format(self.overall()));
-    return u"\n".join(result);
+      result.append(u"  '{0}' = {1},".format(key, value))
+    result.append(u"}")
+    result.append(u"overall={0}".format(self.overall()))
+    return u"\n".join(result)
 
   def summary(self):
-    result = [];
-    result.append(u"success={0!s}".format(self.success));
-    result.append(u"count={0:d}".format(self.count));
-    result.append(u"overall={0}".format(self.overall()));
-    return u"\n".join(result);
+    result = []
+    result.append(u"success={0!s}".format(self.success))
+    result.append(u"count={0:d}".format(self.count))
+    result.append(u"overall={0}".format(self.overall()))
+    return u"\n".join(result)
 
 class EvaluationStream():
   """
@@ -190,23 +190,23 @@ class EvaluationStream():
   :param stream: io.TextIOBase of the actual ocr results
   """
 
-  _newline = u"NL";
-  _eof = u"EOF";
+  _newline = u"NL"
+  _eof = u"EOF"
 
   @staticmethod
   def isnewline(char):
-    return EvaluationStream._newline == char;
+    return EvaluationStream._newline == char
 
   @staticmethod
   def iseof(char):
-    return EvaluationStream._eof == char;
+    return EvaluationStream._eof == char
 
   def __init__(self, stream):
-    self._stream = stream;
-    self._line = 1;
-    self._position = 0;
-    self.count = 0;
-    self._peek_buffer = collections.deque();
+    self._stream = stream
+    self._line = 1
+    self._position = 0
+    self.count = 0
+    self._peek_buffer = collections.deque()
 
   def _read_with_translations(self):
     """
@@ -214,24 +214,24 @@ def _read_with_translations(self):
     \n is rewritten as NL so that mismatches are printable characters.
     end of file is rewritten as EOF for printability.
     """
-    char = self._stream.read(1);
+    char = self._stream.read(1)
     while u"\r" == char:
-      char = self._stream.read(1);
+      char = self._stream.read(1)
 
     if u"" == char:
-      char = EvaluationStream._eof;
+      char = EvaluationStream._eof
     elif u"\n" == char:
-      char = EvaluationStream._newline;
+      char = EvaluationStream._newline
 
     return char
 
   def _read_stream_or_peek_buffer(self):
     if self._peek_buffer:
-      char = self._peek_buffer.popleft();
+      char = self._peek_buffer.popleft()
     else:
-      char = self._read_with_translations();
+      char = self._read_with_translations()
 
-    return char;
+    return char
 
   def read(self):
     """
@@ -242,71 +242,71 @@ def read(self):
     To support peek, an internal buffer is used and read from before re-reading from stream.
     """
 
-    char = self._read_stream_or_peek_buffer();
+    char = self._read_stream_or_peek_buffer()
 
     if EvaluationStream.iseof(char):
-      pass; # EOF doesn't increment counts
+      pass # EOF doesn't increment counts
     elif EvaluationStream.isnewline(char):
-      self._line += 1;
-      self._position = 0;
+      self._line += 1
+      self._position = 0
     else:
-      self._position += 1;
-      self.count += 1;
+      self._position += 1
+      self.count += 1
 
-    return char;
+    return char
 
   def location(self):
-    return u"{0:d}:{1:d}".format(self._line, self._position);
+    return u"{0:d}:{1:d}".format(self._line, self._position)
 
   def peek(self, n):
     """
     Peek ahead n characters in the input stream and return that character
     """
 
-    current_peek_chars_available = len(self._peek_buffer);
-    chars_needed = n - current_peek_chars_available;
+    current_peek_chars_available = len(self._peek_buffer)
+    chars_needed = n - current_peek_chars_available
     for _ in range(chars_needed):
-      self._peek_buffer.append(self._read_with_translations());
-    result = self._peek_buffer[n-1];
-    return result;
+      self._peek_buffer.append(self._read_with_translations())
+    result = self._peek_buffer[n-1]
+    return result
 
 def main():
-  parser = argparse.ArgumentParser(description="Evaluate text against correct version.");
-  parser.add_argument("-c", "--correct", dest="correct_file", help="File containing the correct text");
-  parser.add_argument("-i", "--input", dest="input_file", required=True, help="File containing the text to compare against the correct version");
-  parser.add_argument("-r", "--results", dest="results_file", help="File to write evaluation results to");
-  parser.add_argument("-d", "--debug", action="store_true", help="Enable debug tracing");
-
-  arg.value = parser.parse_args();
-  correct_file = arg.string_value("correct_file", default_value="correct.txt");
-  input_file = arg.string_value("input_file");
-  results_file = arg.string_value("results_file", default_value=input_file+"-results.txt");
+  parser = argparse.ArgumentParser(description="Evaluate text against correct version.")
+  parser.add_argument("-c", "--correct", dest="correct_file", help="File containing the correct text")
+  parser.add_argument("-i", "--input", dest="input_file", required=True, help="File containing the text to compare against the correct version")
+  parser.add_argument("-r", "--results", dest="results_file", help="File to write evaluation results to")
+  parser.add_argument("-d", "--debug", action="store_true", help="Enable debug tracing")
+
+  arg.value = parser.parse_args()
+  correct_file = arg.string_value("correct_file", default_value="correct.txt")
+  input_file = arg.string_value("input_file")
+  results_file = arg.string_value("results_file", default_value=input_file+"-results.txt")
   if arg.boolean_value("debug"):
-    logging.getLogger().setLevel(logging.DEBUG);
+    logging.getLogger().setLevel(logging.DEBUG)
 
   if not os.path.isfile(input_file):
-    print("Input file '{0}' does not exist. Use -h option for help".format(input_file));
-    sys.exit(-1);
+    print("Input file '{0}' does not exist. Use -h option for help".format(input_file))
+    sys.exit(-1)
 
   if not os.path.isfile(correct_file):
-    print("Correct file '{0}' does not exist. Use -h option for help".format(correct_file));
-    sys.exit(-1);
+    print("Correct file '{0}' does not exist. Use -h option for help".format(correct_file))
+    sys.exit(-1)
 
   with codecs.open(correct_file, "rU", "utf-8") as c, codecs.open(input_file, "rU", "utf-8") as i:
-    result = Evaluation(c, i);
-    result.evaluate();
+    result = Evaluation(c, i)
+    result.evaluate()
 
   with codecs.open(results_file, "wU", "utf-8") as w:
-    json.dump(result, w, cls=IgnoreUnderscoreEncoder, ensure_ascii=False, indent=2, separators=(',', ': '), sort_keys=True);
+    json.dump(result, w, cls=IgnoreUnderscoreEncoder, ensure_ascii=False, indent=2, separators=(',', ': '), sort_keys=True)
 
-  print(u"Summary of evaluation results:");
-  print(u"results={0}".format(results_file));
-  print(result.summary());
+  print(u"Summary of evaluation results:")
+  print(u"results={0}".format(results_file))
+  print(result.summary())
 
 
 if __name__ == "__main__":
-  logging.basicConfig(stream=sys.stderr, level=logging.INFO);
+  logging.basicConfig(stream=sys.stderr, level=logging.INFO)
 
-  UTF8Writer = codecs.getwriter('utf8');
-  sys.stdout = UTF8Writer(sys.stdout);
-  main();
+  UTF8Writer = codecs.getwriter('utf8')
+  sys.stdout = UTF8Writer(sys.stdout)
+  main()
diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index 310aec4..911f3cb 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -1,165 +1,165 @@
 # -*- coding: utf-8 -*-
 
-import io;
-from evaluate import Evaluation, EvaluationStream;
+import io
+from evaluate import Evaluation, EvaluationStream
 
 class TestEvaluate:
 
   def test_empty(self):
-    actual = io.StringIO();
-    expected = io.StringIO();
-    result = Evaluation(expected,actual);
-    result.evaluate();
-    assert result.success;
-    assert result.count == 0;
-    assert result.failures == {};
+    actual = io.StringIO()
+    expected = io.StringIO()
+    result = Evaluation(expected,actual)
+    result.evaluate()
+    assert result.success
+    assert result.count == 0
+    assert result.failures == {}
 
   def test_one_character(self):
-    actual = io.StringIO(u"し",);
-    expected = io.StringIO(u"し");
-    result = Evaluation(expected,actual);
-    result.evaluate();
-    assert result.success;
-    assert result.count == 1;
-    assert result.failures == {};
+    actual = io.StringIO(u"し",)
+    expected = io.StringIO(u"し")
+    result = Evaluation(expected,actual)
+    result.evaluate()
+    assert result.success
+    assert result.count == 1
+    assert result.failures == {}
 
   def test_one_character_does_not_match(self):
-    actual = io.StringIO(u"あ");
-    expected = io.StringIO(u"し");
-    result = Evaluation(expected,actual);
-    result.evaluate();
-    assert result.success == False;
-    assert result.count == 1;
-    assert result.failures == { u"し" : [{ "actual" : u"あ", "actual_location": "1:1", "expected_location": "1:1"}] };
+    actual = io.StringIO(u"あ")
+    expected = io.StringIO(u"し")
+    result = Evaluation(expected,actual)
+    result.evaluate()
+    assert result.success == False
+    assert result.count == 1
+    assert result.failures == { u"し" : [{ "actual" : u"あ", "actual_location": "1:1", "expected_location": "1:1"}] }
 
   def test_endofline_unix_does_not_increase_count(self):
-    actual = io.StringIO(u"\n");
-    expected = io.StringIO(u"\n");
-    result = Evaluation(expected,actual);
-    result.evaluate();
-    assert result.success;
-    assert result.count == 0;
-    assert result.failures == {};
+    actual = io.StringIO(u"\n")
+    expected = io.StringIO(u"\n")
+    result = Evaluation(expected,actual)
+    result.evaluate()
+    assert result.success
+    assert result.count == 0
+    assert result.failures == {}
 
   def test_endofline_windows_does_not_increase_count(self):
-    actual = io.StringIO(u"\r\n");
-    expected = io.StringIO(u"\r\n");
-    result = Evaluation(expected,actual);
-    result.evaluate();
-    assert result.success;
-    assert result.count == 0;
-    assert result.failures == {};
+    actual = io.StringIO(u"\r\n")
+    expected = io.StringIO(u"\r\n")
+    result = Evaluation(expected,actual)
+    result.evaluate()
+    assert result.success
+    assert result.count == 0
+    assert result.failures == {}
 
   def test_endofline_mixed_unix_and_windows_does_not_increase_count(self):
-    actual = io.StringIO(u"\n");
-    expected = io.StringIO(u"\r\n");
-    result = Evaluation(expected,actual);
-    result.evaluate();
-    assert result.success;
-    assert result.count == 0;
-    assert result.failures == {};
+    actual = io.StringIO(u"\n")
+    expected = io.StringIO(u"\r\n")
+    result = Evaluation(expected,actual)
+    result.evaluate()
+    assert result.success
+    assert result.count == 0
+    assert result.failures == {}
 
   def test_line_reported_in_failures(self):
-    actual = io.StringIO(u"\r\nあ");
-    expected = io.StringIO(u"\r\nし");
-    result = Evaluation(expected,actual);
-    result.evaluate();
-    assert result.success == False;
-    assert result.count == 1;
-    assert result.failures == { u"し" : [{ "actual" : u"あ", "actual_location": "2:1", "expected_location": "2:1"}] };
+    actual = io.StringIO(u"\r\nあ")
+    expected = io.StringIO(u"\r\nし")
+    result = Evaluation(expected,actual)
+    result.evaluate()
+    assert result.success == False
+    assert result.count == 1
+    assert result.failures == { u"し" : [{ "actual" : u"あ", "actual_location": "2:1", "expected_location": "2:1"}] }
 
   def test_endoffile_mismatch_more_in_actual(self):
-    actual = io.StringIO(u"あ\r\nし");
-    expected = io.StringIO(u"あ\r\n");
-    result = Evaluation(expected,actual);
-    result.evaluate();
-    assert result.success == False;
-    assert result.count == 1;
-    assert result.failures == { u"EOF" : [{ "actual" : u"し", "actual_location": "2:1", "expected_location": "2:0"}] };
+    actual = io.StringIO(u"あ\r\nし")
+    expected = io.StringIO(u"あ\r\n")
+    result = Evaluation(expected,actual)
+    result.evaluate()
+    assert result.success == False
+    assert result.count == 1
+    assert result.failures == { u"EOF" : [{ "actual" : u"し", "actual_location": "2:1", "expected_location": "2:0"}] }
 
   def test_endoffile_mismatch_more_in_expected(self):
-    actual = io.StringIO(u"あ\r\n");
-    expected = io.StringIO(u"あ\r\nし");
-    result = Evaluation(expected,actual);
-    result.evaluate();
-    assert result.success == False;
-    assert result.count == 2;
-    assert result.failures == { u"し" : [{ "actual" : u"EOF", "actual_location": "2:0", "expected_location": "2:1"}] };
+    actual = io.StringIO(u"あ\r\n")
+    expected = io.StringIO(u"あ\r\nし")
+    result = Evaluation(expected,actual)
+    result.evaluate()
+    assert result.success == False
+    assert result.count == 2
+    assert result.failures == { u"し" : [{ "actual" : u"EOF", "actual_location": "2:0", "expected_location": "2:1"}] }
 
   def test_out_of_sync_stream(self):
-    actual = io.StringIO(u"ぃ　あし\r\n");
-    expected = io.StringIO(u"いあし\r\n");
-    result = Evaluation(expected,actual);
-    result.evaluate();
-    assert result.success == False;
-    assert result.count == 3;
-    assert result.failures == { u"い" : [{ "actual" : u"ぃ　", "actual_location": "1:1", "expected_location": "1:1"}] };
+    actual = io.StringIO(u"ぃ　あし\r\n")
+    expected = io.StringIO(u"いあし\r\n")
+    result = Evaluation(expected,actual)
+    result.evaluate()
+    assert result.success == False
+    assert result.count == 3
+    assert result.failures == { u"い" : [{ "actual" : u"ぃ　", "actual_location": "1:1", "expected_location": "1:1"}] }
 
   def test_out_of_sync_stream_actual_new_lined_early(self):
-    actual = io.StringIO(u"新しい\nしごと");
-    expected = io.StringIO(u"新しいむすこ\nしごと\n");
-    result = Evaluation(expected,actual);
-    result.evaluate();
-    assert result.success == False;
-    assert result.count == 9;
+    actual = io.StringIO(u"新しい\nしごと")
+    expected = io.StringIO(u"新しいむすこ\nしごと\n")
+    result = Evaluation(expected,actual)
+    result.evaluate()
+    assert result.success == False
+    assert result.count == 9
     assert result.failures == { u"む" : [{ "actual" : u"NL", "actual_location": "2:0", "expected_location": "1:4"}],
                                 u"す" : [{ "actual" : u"NL", "actual_location": "2:0", "expected_location": "1:5"}],
                                 u"こ" : [{ "actual" : u"NL", "actual_location": "2:0", "expected_location": "1:6"}],
-                                };
+                                }
 
   def test_out_of_sync_stream_expected_new_lined_early(self):
-    actual = io.StringIO(u"新しいむすこ\nしごと\n");
-    expected = io.StringIO(u"新しい\nしごと");
-    result = Evaluation(expected,actual);
-    result.evaluate();
-    assert result.success == False;
-    assert result.count == 6;
+    actual = io.StringIO(u"新しいむすこ\nしごと\n")
+    expected = io.StringIO(u"新しい\nしごと")
+    result = Evaluation(expected,actual)
+    result.evaluate()
+    assert result.success == False
+    assert result.count == 6
     assert result.failures == { u"NL" : [{ "actual" : u"む", "actual_location": "1:4", "expected_location": "2:0"},
                                          { "actual" : u"す", "actual_location": "1:5", "expected_location": "2:0"},
                                          { "actual" : u"こ", "actual_location": "1:6", "expected_location": "2:0"}]
-                              };
+                              }
 
   def test_peek_when_empty(self):
-    stream = io.StringIO();
-    OUT = EvaluationStream(stream);
-    assert OUT.iseof(OUT.peek(1));
-    assert OUT.iseof(OUT.peek(2));
+    stream = io.StringIO()
+    OUT = EvaluationStream(stream)
+    assert OUT.iseof(OUT.peek(1))
+    assert OUT.iseof(OUT.peek(2))
 
   def test_peek(self):
-    stream = io.StringIO(u"いあし\r\n");
-    OUT = EvaluationStream(stream);
-    assert u"い" == OUT.peek(1);
-    assert "1:0" == OUT.location();
-    assert u"あ" == OUT.peek(2);
-    assert "1:0" == OUT.location();
-    assert u"し" == OUT.peek(3);
-    assert "1:0" == OUT.location();
-    assert OUT.isnewline(OUT.peek(4));
-    assert "1:0" == OUT.location();
-    assert OUT.iseof(OUT.peek(5));
-    assert "1:0" == OUT.location();
+    stream = io.StringIO(u"いあし\r\n")
+    OUT = EvaluationStream(stream)
+    assert u"い" == OUT.peek(1)
+    assert "1:0" == OUT.location()
+    assert u"あ" == OUT.peek(2)
+    assert "1:0" == OUT.location()
+    assert u"し" == OUT.peek(3)
+    assert "1:0" == OUT.location()
+    assert OUT.isnewline(OUT.peek(4))
+    assert "1:0" == OUT.location()
+    assert OUT.iseof(OUT.peek(5))
+    assert "1:0" == OUT.location()
 
   def test_success_statistics(self):
-    actual = io.StringIO(u"ぃ　あしろろる\r\n");
-    expected = io.StringIO(u"いあしるろる\r\n");
-    result = Evaluation(expected,actual);
-    result.evaluate();
-    assert result.success == False;
-    assert result.count == 6;
+    actual = io.StringIO(u"ぃ　あしろろる\r\n")
+    expected = io.StringIO(u"いあしるろる\r\n")
+    result = Evaluation(expected,actual)
+    result.evaluate()
+    assert result.success == False
+    assert result.count == 6
     assert result.failures == { u"い" : [{ "actual" : u"ぃ　", "actual_location": "1:1", "expected_location": "1:1"}],
                                 u"る" : [{ "actual" : u"ろ", "actual_location" : "1:5", "expected_location": "1:4"}]
-                               };
+                               }
     assert result.successes == {
                                 u"あ" : ["1:2"],
                                 u"し" : ["1:3"],
                                 u"ろ" : ["1:5"],
                                 u"る" : ["1:6"]
-                                };
+                                }
     assert result.percentages() == {
                                     u"い" : 0.0,
                                     u"あ" : 1.0,
                                     u"し" : 1.0,
                                     u"る" : 0.5,
                                     u"ろ" : 1.0
-                                   };
+                                   }
 

From 403443c2f6de681eacae64c81209a8445417e17e Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Mon, 25 Aug 2014 13:19:40 +0930
Subject: [PATCH 15/33] Refactor handling match and mismatch to methods

---
 evaluate.py | 58 ++++++++++++++++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 23 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index 2c4c9cd..daf6a4b 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -57,9 +57,15 @@ def markFailure(self, actual_location=None):
     failure_details = {u"actual":self._actual_char, u"actual_location":actual_location, u"expected_location":self._expected.location()}
     self.failures[self._expected_char].append(failure_details)
     if logger.isEnabledFor(logging.DEBUG):
-      sys.stdout.write("X")
+      if EvaluationStream.iseof(self._expected_char):
+        sys.stdout.write("E")
+      elif EvaluationStream.iseof(self._actual_char):
+        sys.stdout.write("e")
+      else:
+        sys.stdout.write("X")
       if len(self._actual_char) > 1:
         sys.stdout.write("s" * (len(self._actual_char)-1))
+    logger.debug(u"expected='{0}' actual='{1}' expected_location={2} actual_location={3}".format(self._expected_char, self._actual_char, self._expected.location(), self._actual.location()))
 
   def resyncActual(self):
     """
@@ -83,6 +89,31 @@ def resyncActual(self):
         resync_found_ahead_at -= 1
         self._actual_char += self._actual.read()
 
+
+  def handleMismatch(self):
+    self.success = False
+    if EvaluationStream.isnewline(self._expected_char): # Resync actual stream to the next newline
+      while not EvaluationStream.isnewline(self._actual_char) or EvaluationStream.iseof(self._actual_char):
+        self.markFailure()
+        self.readFromActual()
+    elif EvaluationStream.isnewline(self._actual_char): # Resync expected stream to the next newline
+      while not EvaluationStream.isnewline(self._expected_char) or EvaluationStream.iseof(self._expected_char):
+        self.markFailure()
+        self.readFromExpected()
+    else:
+      mark_failure_position = self._actual.location()
+      self.resyncActual()
+      self.markFailure(mark_failure_position)
+
+
+  def handleMatch(self):
+    if not EvaluationStream.isnewline(self._expected_char):
+      self.successes[self._expected_char].append(self._expected.location())
+      if logger.isEnabledFor(logging.DEBUG):
+        sys.stdout.write(".")
+    elif logger.isEnabledFor(logging.DEBUG):
+      sys.stdout.write("\n")
+
   def evaluate(self):
     """
     Evaluate the actual ocr results against the expected results and provide metrics on failures.
@@ -105,29 +136,9 @@ def evaluate(self):
       up_to_count = self._expected.count
 
       if self._expected_char != self._actual_char:
-        self.success = False
-        if EvaluationStream.isnewline(self._expected_char):
-          # Resync other stream to the next newline
-          while not (EvaluationStream.isnewline(self._actual_char) or EvaluationStream.iseof(self._actual_char)):
-            self.markFailure()
-            self.readFromActual()
-        elif EvaluationStream.isnewline(self._actual_char):
-          # Resync other stream to the next newline
-          while not (EvaluationStream.isnewline(self._expected_char) or EvaluationStream.iseof(self._expected_char)):
-            self.markFailure()
-            self.readFromExpected()
-        else:
-          mark_failure_position = self._actual.location()
-          self.resyncActual()
-          self.markFailure(mark_failure_position)
+        self.handleMismatch()
       else:
-        if not EvaluationStream.isnewline(self._expected_char):
-          self.successes[self._expected_char].append(self._expected.location())
-          if logger.isEnabledFor(logging.DEBUG):
-            sys.stdout.write(".")
-        else:
-          if logger.isEnabledFor(logging.DEBUG):
-            sys.stdout.write("\n")
+        self.handleMatch()
 
       if EvaluationStream.iseof(self._expected_char):
         self.success = False
@@ -136,6 +147,7 @@ def evaluate(self):
     if logger.isEnabledFor(logging.DEBUG):
       sys.stdout.write("\n")
       sys.stdout.flush()
+
     self.count = self._expected.count
     return self
 

From ebb645217458e086e8bf505a9f46caadd25105b9 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Mon, 25 Aug 2014 13:39:39 +0930
Subject: [PATCH 16/33] Update debug legend

---
 evaluate.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/evaluate.py b/evaluate.py
index daf6a4b..5ef6ef2 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -124,6 +124,8 @@ def evaluate(self):
       sys.stdout.write("  . = matched\n")
       sys.stdout.write("  X = failed\n")
       sys.stdout.write("  s = skipped\n")
+      sys.stdout.write("  E = End of File (expected)\n")
+      sys.stdout.write("  e = End of File (actual)\n")
 
     while True:
       self.readFromExpected()

From 6f00cf772ff75f973163f16b15402c8dd3848dd8 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Mon, 25 Aug 2014 13:50:56 +0930
Subject: [PATCH 17/33] Fix EOF not stopping evaluation

---
 evaluate.py                | 5 +++--
 unit_test/evaluate_test.py | 1 -
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index 5ef6ef2..68577ec 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -93,11 +93,11 @@ def resyncActual(self):
   def handleMismatch(self):
     self.success = False
     if EvaluationStream.isnewline(self._expected_char): # Resync actual stream to the next newline
-      while not EvaluationStream.isnewline(self._actual_char) or EvaluationStream.iseof(self._actual_char):
+      while not EvaluationStream.isnewline(self._actual_char) and not EvaluationStream.iseof(self._actual_char):
         self.markFailure()
         self.readFromActual()
     elif EvaluationStream.isnewline(self._actual_char): # Resync expected stream to the next newline
-      while not EvaluationStream.isnewline(self._expected_char) or EvaluationStream.iseof(self._expected_char):
+      while not EvaluationStream.isnewline(self._expected_char) and not EvaluationStream.iseof(self._expected_char):
         self.markFailure()
         self.readFromExpected()
     else:
@@ -124,6 +124,7 @@ def evaluate(self):
       sys.stdout.write("  . = matched\n")
       sys.stdout.write("  X = failed\n")
       sys.stdout.write("  s = skipped\n")
+      sys.stdout.write("  _ = skipped extra whitespace\n")
       sys.stdout.write("  E = End of File (expected)\n")
       sys.stdout.write("  e = End of File (actual)\n")
 
diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index 911f3cb..33aca45 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -162,4 +162,3 @@ def test_success_statistics(self):
                                     u"る" : 0.5,
                                     u"ろ" : 1.0
                                    }
-

From d7ab9c66cf484e32111729c414cbc972fc4a962b Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Mon, 25 Aug 2014 14:27:11 +0930
Subject: [PATCH 18/33] Handle extraneous space in stream

---
 evaluate.py                | 16 +++++++++++++++-
 unit_test/evaluate_test.py | 18 +++++++++++++++++-
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index 68577ec..6425fce 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -100,12 +100,14 @@ def handleMismatch(self):
       while not EvaluationStream.isnewline(self._expected_char) and not EvaluationStream.iseof(self._expected_char):
         self.markFailure()
         self.readFromExpected()
+    elif EvaluationStream.isspace(self._actual_char) and self._expected_char == self._actual.peek(1): # ignore whitespace if the next char matches
+        self.markFailure()
+        self._expected.push_back(self._expected_char)
     else:
       mark_failure_position = self._actual.location()
       self.resyncActual()
       self.markFailure(mark_failure_position)
 
-
   def handleMatch(self):
     if not EvaluationStream.isnewline(self._expected_char):
       self.successes[self._expected_char].append(self._expected.location())
@@ -216,6 +218,10 @@ def isnewline(char):
   def iseof(char):
     return EvaluationStream._eof == char
 
+  @staticmethod
+  def isspace(char):
+    return u" " == char
+
   def __init__(self, stream):
     self._stream = stream
     self._line = 1
@@ -285,6 +291,14 @@ def peek(self, n):
     result = self._peek_buffer[n-1]
     return result
 
+  def push_back(self, char):
+    assert not EvaluationStream.iseof(char)
+    assert not EvaluationStream.isnewline(char)
+    self._position -= 1
+    self.count -= 1
+
+    self._peek_buffer.appendleft(char)
+
 def main():
   parser = argparse.ArgumentParser(description="Evaluate text against correct version.")
   parser.add_argument("-c", "--correct", dest="correct_file", help="File containing the correct text")
diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index 33aca45..df8d571 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -1,7 +1,9 @@
 # -*- coding: utf-8 -*-
 
 import io
-from evaluate import Evaluation, EvaluationStream
+import json
+from evaluate import Evaluation, EvaluationStream, IgnoreUnderscoreEncoder
+import sys
 
 class TestEvaluate:
 
@@ -162,3 +164,17 @@ def test_success_statistics(self):
                                     u"る" : 0.5,
                                     u"ろ" : 1.0
                                    }
+  def test_extra_whitespace(self):
+    actual = io.StringIO(u"新 し い むすこ\nし ご と")
+    expected = io.StringIO(u"新しいむすこ\nしごと\n")
+    result = Evaluation(expected,actual)
+    result.evaluate()
+    assert result.success == False
+    assert result.count == 9
+    json.dump(result.failures, sys.stdout, cls=IgnoreUnderscoreEncoder, ensure_ascii=False, indent=2, separators=(',', ': '))
+    assert result.failures == { u"し" : [{ "actual" : u" ", "actual_location": "1:2", "expected_location": "1:2"}],
+                               u"い" : [{ "actual" : u" ", "actual_location": "1:4", "expected_location": "1:3"}],
+                               u"む" : [{ "actual" : u" ", "actual_location": "1:6", "expected_location": "1:4"}],
+                               u"ご" : [{ "actual" : u" ", "actual_location": "2:2", "expected_location": "2:2"}],
+                               u"と" : [{ "actual" : u" ", "actual_location": "2:4", "expected_location": "2:3"}],
+                               }

From 523261c95caae2d4e502bdffd48271fa3dd63469 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Mon, 25 Aug 2014 20:11:43 +0930
Subject: [PATCH 19/33] Fix bug resync'ing two duplicate characters grabbed too
 many characters.

When the stream finds the first candidate sync spot it should stop.
If there were multiple candidate characers in a row it would incorrectly
greedily grab too many.
---
 evaluate.py                |  1 +
 unit_test/evaluate_test.py | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/evaluate.py b/evaluate.py
index 6425fce..f6878bd 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -83,6 +83,7 @@ def resyncActual(self):
       candidate_sync_spot = self._actual.peek(i)
       if sync_to_char == candidate_sync_spot:
         resync_found_ahead_at = i
+        break
 
     if resync_found_ahead_at:
       while (resync_found_ahead_at > 1): # capture up to (but not including) the resync character
diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index df8d571..08f446e 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -88,6 +88,42 @@ def test_endoffile_mismatch_more_in_expected(self):
     assert result.count == 2
     assert result.failures == { u"し" : [{ "actual" : u"EOF", "actual_location": "2:0", "expected_location": "2:1"}] }
 
+  def test_mismatch_prior_to_endofline(self):
+    actual = io.StringIO(u"\"\nいあ")
+    expected = io.StringIO(u"。\nいあ")
+    result = Evaluation(expected,actual)
+    result.evaluate()
+    assert result.success == False
+    assert result.count == 3
+    assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
+
+  def test_mismatch_prior_to_endofline_windows(self):
+    actual = io.StringIO(u"\"\r\nいあ")
+    expected = io.StringIO(u"。\r\nいあ")
+    result = Evaluation(expected,actual)
+    result.evaluate()
+    assert result.success == False
+    assert result.count == 3
+    assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
+
+  def test_mismatch_prior_to_endofline_followed_by_another_endofline(self):
+    actual = io.StringIO(u"\"\n\nいあ")
+    expected = io.StringIO(u"。\n\nいあ")
+    result = Evaluation(expected,actual)
+    result.evaluate()
+    assert result.success == False
+    assert result.count == 3
+    assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
+
+  def test_mismatch_prior_to_endofline_followed_by_another_endofline_windows(self):
+    actual = io.StringIO(u"\"\r\n\r\nいあ")
+    expected = io.StringIO(u"。\r\n\r\nいあ")
+    result = Evaluation(expected,actual)
+    result.evaluate()
+    assert result.success == False
+    assert result.count == 3
+    assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
+
   def test_out_of_sync_stream(self):
     actual = io.StringIO(u"ぃ　あし\r\n")
     expected = io.StringIO(u"いあし\r\n")
@@ -164,6 +200,7 @@ def test_success_statistics(self):
                                     u"る" : 0.5,
                                     u"ろ" : 1.0
                                    }
+
   def test_extra_whitespace(self):
     actual = io.StringIO(u"新 し い むすこ\nし ご と")
     expected = io.StringIO(u"新しいむすこ\nしごと\n")

From 6d65e4d3aa212cbdf0b283a4bcd93b30c4ad02b8 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Mon, 25 Aug 2014 22:33:18 +0930
Subject: [PATCH 20/33] Ensure sync doesnt reach past endofline

---
 evaluate.py                |  2 ++
 unit_test/evaluate_test.py | 14 +++++++++++++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/evaluate.py b/evaluate.py
index f6878bd..e7ddc64 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -61,6 +61,8 @@ def markFailure(self, actual_location=None):
         sys.stdout.write("E")
       elif EvaluationStream.iseof(self._actual_char):
         sys.stdout.write("e")
+      elif EvaluationStream.isspace(self._actual_char):
+        sys.stdout.write("_")
       else:
         sys.stdout.write("X")
       if len(self._actual_char) > 1:
diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index 08f446e..7abc776 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -124,7 +124,7 @@ def test_mismatch_prior_to_endofline_followed_by_another_endofline_windows(self)
     assert result.count == 3
     assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
 
-  def test_out_of_sync_stream(self):
+  def skip_test_out_of_sync_stream(self):
     actual = io.StringIO(u"ぃ　あし\r\n")
     expected = io.StringIO(u"いあし\r\n")
     result = Evaluation(expected,actual)
@@ -157,6 +157,18 @@ def test_out_of_sync_stream_expected_new_lined_early(self):
                                          { "actual" : u"こ", "actual_location": "1:6", "expected_location": "2:0"}]
                               }
 
+  def test_out_of_sync_stream_doesnt_sync_past_endofline(self):
+    actual =   io.StringIO(u"新しいむすあ\nこしごと\n")
+    expected = io.StringIO(u"新しいむすこ\nしごと\n")
+    result = Evaluation(expected,actual)
+    result.evaluate()
+    assert result.success == False
+    assert result.count == 9
+    assert result.failures == { u"こ" : [{ "actual" : u"あ", "actual_location": "1:6", "expected_location": "1:6"}],
+                                u"し" : [{ "actual" : u"こし", "actual_location": "2:1", "expected_location": "2:1"}]
+                                }
+
+
   def test_peek_when_empty(self):
     stream = io.StringIO()
     OUT = EvaluationStream(stream)

From 4e93052f0293e91e4ae3b4b7dae6482d6523279e Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Mon, 25 Aug 2014 22:45:19 +0930
Subject: [PATCH 21/33] Look 3 deep for resync.

Fixed debug of end of lines.
---
 evaluate.py                |  9 ++++++---
 unit_test/evaluate_test.py | 27 ++++++++++++++++++---------
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index e7ddc64..2ab36f7 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -43,7 +43,7 @@ def __init__(self,expected_stream,actual_stream):
     self._expected = EvaluationStream(expected_stream)
     self._actual_char = None
     self._expected_char = None
-    self._max_peek_lookahead = 2
+    self._max_peek_lookahead = 3
 
   def readFromExpected(self):
     self._expected_char = self._expected.read()
@@ -61,12 +61,14 @@ def markFailure(self, actual_location=None):
         sys.stdout.write("E")
       elif EvaluationStream.iseof(self._actual_char):
         sys.stdout.write("e")
+      elif EvaluationStream.isnewline(self._expected_char) or EvaluationStream.isnewline(self._actual_char):
+        sys.stdout.write("$")
       elif EvaluationStream.isspace(self._actual_char):
         sys.stdout.write("_")
       else:
         sys.stdout.write("X")
-      if len(self._actual_char) > 1:
-        sys.stdout.write("s" * (len(self._actual_char)-1))
+        if len(self._actual_char) > 1:
+          sys.stdout.write("s" * (len(self._actual_char)-1))
     logger.debug(u"expected='{0}' actual='{1}' expected_location={2} actual_location={3}".format(self._expected_char, self._actual_char, self._expected.location(), self._actual.location()))
 
   def resyncActual(self):
@@ -130,6 +132,7 @@ def evaluate(self):
       sys.stdout.write("  X = failed\n")
       sys.stdout.write("  s = skipped\n")
       sys.stdout.write("  _ = skipped extra whitespace\n")
+      sys.stdout.write("  $ = End of Line (expected or actual)")
       sys.stdout.write("  E = End of File (expected)\n")
       sys.stdout.write("  e = End of File (actual)\n")
 
diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index 7abc776..bac5de0 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -34,7 +34,7 @@ def test_one_character_does_not_match(self):
     assert result.count == 1
     assert result.failures == { u"し" : [{ "actual" : u"あ", "actual_location": "1:1", "expected_location": "1:1"}] }
 
-  def test_endofline_unix_does_not_increase_count(self):
+  def test_newline_unix_does_not_increase_count(self):
     actual = io.StringIO(u"\n")
     expected = io.StringIO(u"\n")
     result = Evaluation(expected,actual)
@@ -43,7 +43,7 @@ def test_endofline_unix_does_not_increase_count(self):
     assert result.count == 0
     assert result.failures == {}
 
-  def test_endofline_windows_does_not_increase_count(self):
+  def test_newline_windows_does_not_increase_count(self):
     actual = io.StringIO(u"\r\n")
     expected = io.StringIO(u"\r\n")
     result = Evaluation(expected,actual)
@@ -52,7 +52,7 @@ def test_endofline_windows_does_not_increase_count(self):
     assert result.count == 0
     assert result.failures == {}
 
-  def test_endofline_mixed_unix_and_windows_does_not_increase_count(self):
+  def test_newline_mixed_unix_and_windows_does_not_increase_count(self):
     actual = io.StringIO(u"\n")
     expected = io.StringIO(u"\r\n")
     result = Evaluation(expected,actual)
@@ -88,7 +88,7 @@ def test_endoffile_mismatch_more_in_expected(self):
     assert result.count == 2
     assert result.failures == { u"し" : [{ "actual" : u"EOF", "actual_location": "2:0", "expected_location": "2:1"}] }
 
-  def test_mismatch_prior_to_endofline(self):
+  def test_mismatch_prior_to_newline(self):
     actual = io.StringIO(u"\"\nいあ")
     expected = io.StringIO(u"。\nいあ")
     result = Evaluation(expected,actual)
@@ -97,7 +97,7 @@ def test_mismatch_prior_to_endofline(self):
     assert result.count == 3
     assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
 
-  def test_mismatch_prior_to_endofline_windows(self):
+  def test_mismatch_prior_to_newline_windows(self):
     actual = io.StringIO(u"\"\r\nいあ")
     expected = io.StringIO(u"。\r\nいあ")
     result = Evaluation(expected,actual)
@@ -106,7 +106,7 @@ def test_mismatch_prior_to_endofline_windows(self):
     assert result.count == 3
     assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
 
-  def test_mismatch_prior_to_endofline_followed_by_another_endofline(self):
+  def test_mismatch_prior_to_newline_followed_by_another_newline(self):
     actual = io.StringIO(u"\"\n\nいあ")
     expected = io.StringIO(u"。\n\nいあ")
     result = Evaluation(expected,actual)
@@ -115,7 +115,7 @@ def test_mismatch_prior_to_endofline_followed_by_another_endofline(self):
     assert result.count == 3
     assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
 
-  def test_mismatch_prior_to_endofline_followed_by_another_endofline_windows(self):
+  def test_mismatch_prior_to_newline_followed_by_another_newline_windows(self):
     actual = io.StringIO(u"\"\r\n\r\nいあ")
     expected = io.StringIO(u"。\r\n\r\nいあ")
     result = Evaluation(expected,actual)
@@ -124,7 +124,7 @@ def test_mismatch_prior_to_endofline_followed_by_another_endofline_windows(self)
     assert result.count == 3
     assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
 
-  def skip_test_out_of_sync_stream(self):
+  def test_out_of_sync_stream(self):
     actual = io.StringIO(u"ぃ　あし\r\n")
     expected = io.StringIO(u"いあし\r\n")
     result = Evaluation(expected,actual)
@@ -133,6 +133,15 @@ def skip_test_out_of_sync_stream(self):
     assert result.count == 3
     assert result.failures == { u"い" : [{ "actual" : u"ぃ　", "actual_location": "1:1", "expected_location": "1:1"}] }
 
+  def test_out_of_sync_stream_two_deep(self):
+    actual = io.StringIO(u"ぃ　'あし\r\n")
+    expected = io.StringIO(u"いあし\r\n")
+    result = Evaluation(expected,actual)
+    result.evaluate()
+    assert result.success == False
+    assert result.count == 3
+    assert result.failures == { u"い" : [{ "actual" : u"ぃ　'", "actual_location": "1:1", "expected_location": "1:1"}] }
+
   def test_out_of_sync_stream_actual_new_lined_early(self):
     actual = io.StringIO(u"新しい\nしごと")
     expected = io.StringIO(u"新しいむすこ\nしごと\n")
@@ -157,7 +166,7 @@ def test_out_of_sync_stream_expected_new_lined_early(self):
                                          { "actual" : u"こ", "actual_location": "1:6", "expected_location": "2:0"}]
                               }
 
-  def test_out_of_sync_stream_doesnt_sync_past_endofline(self):
+  def test_out_of_sync_stream_doesnt_sync_past_newline(self):
     actual =   io.StringIO(u"新しいむすあ\nこしごと\n")
     expected = io.StringIO(u"新しいむすこ\nしごと\n")
     result = Evaluation(expected,actual)

From fca798c06b90da878e9c5d3a29323168953df5f2 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Mon, 25 Aug 2014 22:49:53 +0930
Subject: [PATCH 22/33] Add trace option to print mismatches as they occur.

---
 evaluate.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index 2ab36f7..c758808 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -21,6 +21,8 @@
 import logging
 
 logger = logging.getLogger(__name__)
+trace = logging.getLogger("trace")
+trace.setLevel(logging.INFO)
 
 class IgnoreUnderscoreEncoder(json.JSONEncoder):
     def default(self, obj):
@@ -69,7 +71,7 @@ def markFailure(self, actual_location=None):
         sys.stdout.write("X")
         if len(self._actual_char) > 1:
           sys.stdout.write("s" * (len(self._actual_char)-1))
-    logger.debug(u"expected='{0}' actual='{1}' expected_location={2} actual_location={3}".format(self._expected_char, self._actual_char, self._expected.location(), self._actual.location()))
+      trace.debug(u"expected='{0}' actual='{1}' expected_location={2} actual_location={3}".format(self._expected_char, self._actual_char, self._expected.location(), self._actual.location()))
 
   def resyncActual(self):
     """
@@ -311,13 +313,16 @@ def main():
   parser.add_argument("-i", "--input", dest="input_file", required=True, help="File containing the text to compare against the correct version")
   parser.add_argument("-r", "--results", dest="results_file", help="File to write evaluation results to")
   parser.add_argument("-d", "--debug", action="store_true", help="Enable debug tracing")
+  parser.add_argument("-t", "--trace", action="store_true", help="Print out mismatches as they occur. Also enables debug")
 
   arg.value = parser.parse_args()
   correct_file = arg.string_value("correct_file", default_value="correct.txt")
   input_file = arg.string_value("input_file")
   results_file = arg.string_value("results_file", default_value=input_file+"-results.txt")
-  if arg.boolean_value("debug"):
+  if arg.boolean_value("debug") or arg.boolean_value("trace"):
     logging.getLogger().setLevel(logging.DEBUG)
+  if arg.boolean_value("trace"):
+    trace.setLevel(logging.DEBUG)
 
   if not os.path.isfile(input_file):
     print("Input file '{0}' does not exist. Use -h option for help".format(input_file))

From 474ecc29058b8cf4322ffb15eeeb88161e23cf81 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Mon, 25 Aug 2014 23:19:26 +0930
Subject: [PATCH 23/33] Make percentages properties for json dumping

---
 evaluate.py                | 30 ++++++++++++++----------------
 unit_test/evaluate_test.py | 16 ++++++++++------
 2 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index c758808..2327ec0 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -40,7 +40,8 @@ def __init__(self,expected_stream,actual_stream):
     self.count = 0
     self.failures = collections.defaultdict(list)
     self.successes = collections.defaultdict(list)
-    self._percentages = None
+    self.percentages = None
+    self.percentage_overall = None
     self._actual = EvaluationStream(actual_stream)
     self._expected = EvaluationStream(expected_stream)
     self._actual_char = None
@@ -116,8 +117,8 @@ def handleMismatch(self):
       self.markFailure(mark_failure_position)
 
   def handleMatch(self):
+    self.successes[self._expected_char].append({"expected_location":self._expected.location(),"actual_location":self._actual.location()})
     if not EvaluationStream.isnewline(self._expected_char):
-      self.successes[self._expected_char].append(self._expected.location())
       if logger.isEnabledFor(logging.DEBUG):
         sys.stdout.write(".")
     elif logger.isEnabledFor(logging.DEBUG):
@@ -164,20 +165,16 @@ def evaluate(self):
     self.count = self._expected.count
     return self
 
-  def percentages(self):
-    if not self._percentages:
-      keys = set(self.successes.iterkeys()).union(self.failures.iterkeys())
-      self._percentages = {}
-      for key in keys:
-        failure_count = len(self.failures[key]) if key in self.failures else 0
-        success_count = len(self.successes[key]) if key in self.successes else 0
-        self._percentages[key] = success_count / float( failure_count + success_count )
+  def calculate_percentages(self):
+    keys = set(self.successes.iterkeys()).union(self.failures.iterkeys())
+    self.percentages = {}
+    for key in keys:
+      failure_count = len(self.failures[key]) if key in self.failures else 0
+      success_count = len(self.successes[key]) if key in self.successes else 0
+      self.percentages[key] = success_count / float( failure_count + success_count )
 
-    return self._percentages
-
-  def overall(self):
-    values = self.percentages().values()
-    return sum(values)/len(values)
+    values = self.percentages.values()
+    self.percentage_overall = sum(values)/len(values)
 
   def __str__(self):
     return unicode(self).encode('utf-8')
@@ -205,7 +202,7 @@ def summary(self):
     result = []
     result.append(u"success={0!s}".format(self.success))
     result.append(u"count={0:d}".format(self.count))
-    result.append(u"overall={0}".format(self.overall()))
+    result.append(u"overall={0}".format(self.percentage_overall))
     return u"\n".join(result)
 
 class EvaluationStream():
@@ -335,6 +332,7 @@ def main():
   with codecs.open(correct_file, "rU", "utf-8") as c, codecs.open(input_file, "rU", "utf-8") as i:
     result = Evaluation(c, i)
     result.evaluate()
+    result.calculate_percentages()
 
   with codecs.open(results_file, "wU", "utf-8") as w:
     json.dump(result, w, cls=IgnoreUnderscoreEncoder, ensure_ascii=False, indent=2, separators=(',', ': '), sort_keys=True)
diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index bac5de0..580d362 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -209,18 +209,22 @@ def test_success_statistics(self):
                                 u"る" : [{ "actual" : u"ろ", "actual_location" : "1:5", "expected_location": "1:4"}]
                                }
     assert result.successes == {
-                                u"あ" : ["1:2"],
-                                u"し" : ["1:3"],
-                                u"ろ" : ["1:5"],
-                                u"る" : ["1:6"]
+                                u"あ" : [{'actual_location': '1:3', 'expected_location': '1:2'}],
+                                u"し" : [{'actual_location': '1:4', 'expected_location': '1:3'}],
+                                u"ろ" : [{'actual_location': '1:6', 'expected_location': '1:5'}],
+                                u"る" : [{'actual_location': '1:7', 'expected_location': '1:6'}],
+                                u"NL" : [{'actual_location': '2:0', 'expected_location': '2:0'}]
                                 }
-    assert result.percentages() == {
+    result.calculate_percentages()
+    assert result.percentages == {
                                     u"い" : 0.0,
                                     u"あ" : 1.0,
                                     u"し" : 1.0,
                                     u"る" : 0.5,
-                                    u"ろ" : 1.0
+                                    u"ろ" : 1.0,
+                                    u"NL" : 1.0
                                    }
+    assert result.percentage_overall == 0.75
 
   def test_extra_whitespace(self):
     actual = io.StringIO(u"新 し い むすこ\nし ご と")

From fe805de4b5474e30bf0b007a7acbb0190ef6f532 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Tue, 26 Aug 2014 22:37:11 +0930
Subject: [PATCH 24/33] Fix indendation

---
 evaluate.py | 350 ++++++++++++++++++++++++++--------------------------
 1 file changed, 174 insertions(+), 176 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index 2327ec0..73b1c86 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -7,8 +7,6 @@
 Author: Barrie Treloar
 Email: baerrach@gmail.com
 DATE: 13th Aug 2014
-
-  TODO
 """
 
 import codecs
@@ -29,181 +27,181 @@ def default(self, obj):
         attributes = {}
         obj_dict = obj.__dict__
         for key, value in obj_dict.iteritems():
-          if key.startswith(u'_'):
-              continue
-          attributes[key] = value
+            if key.startswith(u'_'):
+                continue
+            attributes[key] = value
         return attributes
 
 class Evaluation:
-  def __init__(self,expected_stream,actual_stream):
-    self.success = None
-    self.count = 0
-    self.failures = collections.defaultdict(list)
-    self.successes = collections.defaultdict(list)
-    self.percentages = None
-    self.percentage_overall = None
-    self._actual = EvaluationStream(actual_stream)
-    self._expected = EvaluationStream(expected_stream)
-    self._actual_char = None
-    self._expected_char = None
-    self._max_peek_lookahead = 3
-
-  def readFromExpected(self):
-    self._expected_char = self._expected.read()
-
-  def readFromActual(self):
-    self._actual_char = self._actual.read()
-
-  def markFailure(self, actual_location=None):
-    if not actual_location:
-      actual_location = self._actual.location()
-    failure_details = {u"actual":self._actual_char, u"actual_location":actual_location, u"expected_location":self._expected.location()}
-    self.failures[self._expected_char].append(failure_details)
-    if logger.isEnabledFor(logging.DEBUG):
-      if EvaluationStream.iseof(self._expected_char):
-        sys.stdout.write("E")
-      elif EvaluationStream.iseof(self._actual_char):
-        sys.stdout.write("e")
-      elif EvaluationStream.isnewline(self._expected_char) or EvaluationStream.isnewline(self._actual_char):
-        sys.stdout.write("$")
-      elif EvaluationStream.isspace(self._actual_char):
-        sys.stdout.write("_")
-      else:
-        sys.stdout.write("X")
-        if len(self._actual_char) > 1:
-          sys.stdout.write("s" * (len(self._actual_char)-1))
-      trace.debug(u"expected='{0}' actual='{1}' expected_location={2} actual_location={3}".format(self._expected_char, self._actual_char, self._expected.location(), self._actual.location()))
-
-  def resyncActual(self):
-    """
-    Lookahead on the stream to see if re-syncing is required.
-    If re-syncing is required then the extra characters will be consumed and appended to self._actual_char
-    """
-    sync_to_char = self._expected.peek(1)
-
-    if EvaluationStream.iseof(sync_to_char):
-      # Dont resync on EOF
-      return
-
-    resync_found_ahead_at = None
-    for i in range(1, self._max_peek_lookahead+1):
-      candidate_sync_spot = self._actual.peek(i)
-      if sync_to_char == candidate_sync_spot:
-        resync_found_ahead_at = i
-        break
-
-    if resync_found_ahead_at:
-      while (resync_found_ahead_at > 1): # capture up to (but not including) the resync character
-        resync_found_ahead_at -= 1
-        self._actual_char += self._actual.read()
-
-
-  def handleMismatch(self):
-    self.success = False
-    if EvaluationStream.isnewline(self._expected_char): # Resync actual stream to the next newline
-      while not EvaluationStream.isnewline(self._actual_char) and not EvaluationStream.iseof(self._actual_char):
-        self.markFailure()
-        self.readFromActual()
-    elif EvaluationStream.isnewline(self._actual_char): # Resync expected stream to the next newline
-      while not EvaluationStream.isnewline(self._expected_char) and not EvaluationStream.iseof(self._expected_char):
-        self.markFailure()
-        self.readFromExpected()
-    elif EvaluationStream.isspace(self._actual_char) and self._expected_char == self._actual.peek(1): # ignore whitespace if the next char matches
-        self.markFailure()
-        self._expected.push_back(self._expected_char)
-    else:
-      mark_failure_position = self._actual.location()
-      self.resyncActual()
-      self.markFailure(mark_failure_position)
-
-  def handleMatch(self):
-    self.successes[self._expected_char].append({"expected_location":self._expected.location(),"actual_location":self._actual.location()})
-    if not EvaluationStream.isnewline(self._expected_char):
-      if logger.isEnabledFor(logging.DEBUG):
-        sys.stdout.write(".")
-    elif logger.isEnabledFor(logging.DEBUG):
-      sys.stdout.write("\n")
-
-  def evaluate(self):
-    """
-    Evaluate the actual ocr results against the expected results and provide metrics on failures.
-    """
-
-    if logger.isEnabledFor(logging.DEBUG):
-      sys.stdout.write("Debug Legend:\n")
-      sys.stdout.write("  . = matched\n")
-      sys.stdout.write("  X = failed\n")
-      sys.stdout.write("  s = skipped\n")
-      sys.stdout.write("  _ = skipped extra whitespace\n")
-      sys.stdout.write("  $ = End of Line (expected or actual)")
-      sys.stdout.write("  E = End of File (expected)\n")
-      sys.stdout.write("  e = End of File (actual)\n")
-
-    while True:
-      self.readFromExpected()
-      self.readFromActual()
-      if EvaluationStream.iseof(self._expected_char) and EvaluationStream.iseof(self._actual_char):
-        if self.success == None:
-          self.success = True
-        break
-
-      up_to_count = self._expected.count
-
-      if self._expected_char != self._actual_char:
-        self.handleMismatch()
-      else:
-        self.handleMatch()
-
-      if EvaluationStream.iseof(self._expected_char):
+    def __init__(self, expected_stream, actual_stream):
+        self.success = None
+        self.count = 0
+        self.failures = collections.defaultdict(list)
+        self.successes = collections.defaultdict(list)
+        self.percentages = None
+        self.percentage_overall = None
+        self._actual = EvaluationStream(actual_stream)
+        self._expected = EvaluationStream(expected_stream)
+        self._actual_char = None
+        self._expected_char = None
+        self._max_peek_lookahead = 3
+
+    def readFromExpected(self):
+        self._expected_char = self._expected.read()
+
+    def readFromActual(self):
+        self._actual_char = self._actual.read()
+
+    def markFailure(self, actual_location=None):
+        if not actual_location:
+            actual_location = self._actual.location()
+        failure_details = {u"actual":self._actual_char, u"actual_location":actual_location, u"expected_location":self._expected.location()}
+        self.failures[self._expected_char].append(failure_details)
+        if logger.isEnabledFor(logging.DEBUG):
+            if EvaluationStream.iseof(self._expected_char):
+                sys.stdout.write("E")
+            elif EvaluationStream.iseof(self._actual_char):
+                sys.stdout.write("e")
+            elif EvaluationStream.isnewline(self._expected_char) or EvaluationStream.isnewline(self._actual_char):
+                sys.stdout.write("$")
+            elif EvaluationStream.isspace(self._actual_char):
+                sys.stdout.write("_")
+            else:
+                sys.stdout.write("X")
+                if len(self._actual_char) > 1:
+                    sys.stdout.write("s" * (len(self._actual_char) - 1))
+                trace.debug(u"expected='{0}' actual='{1}' expected_location={2} actual_location={3}".format(self._expected_char, self._actual_char, self._expected.location(), self._actual.location()))
+
+    def resyncActual(self):
+        """
+        Lookahead on the stream to see if re-syncing is required.
+        If re-syncing is required then the extra characters will be consumed and appended to self._actual_char
+        """
+        sync_to_char = self._expected.peek(1)
+
+        if EvaluationStream.iseof(sync_to_char):
+            # Dont resync on EOF
+            return
+
+        resync_found_ahead_at = None
+        for i in range(1, self._max_peek_lookahead + 1):
+            candidate_sync_spot = self._actual.peek(i)
+            if sync_to_char == candidate_sync_spot:
+                resync_found_ahead_at = i
+                break
+
+        if resync_found_ahead_at:
+            while (resync_found_ahead_at > 1):  # capture up to (but not including) the resync character
+                resync_found_ahead_at -= 1
+                self._actual_char += self._actual.read()
+
+
+    def handleMismatch(self):
         self.success = False
-        break
-
-    if logger.isEnabledFor(logging.DEBUG):
-      sys.stdout.write("\n")
-      sys.stdout.flush()
-
-    self.count = self._expected.count
-    return self
-
-  def calculate_percentages(self):
-    keys = set(self.successes.iterkeys()).union(self.failures.iterkeys())
-    self.percentages = {}
-    for key in keys:
-      failure_count = len(self.failures[key]) if key in self.failures else 0
-      success_count = len(self.successes[key]) if key in self.successes else 0
-      self.percentages[key] = success_count / float( failure_count + success_count )
-
-    values = self.percentages.values()
-    self.percentage_overall = sum(values)/len(values)
-
-  def __str__(self):
-    return unicode(self).encode('utf-8')
-
-  def __unicode__(self):
-    result = []
-    result.append(u"success={0!s}".format(self.success))
-    result.append(u"count={0:d}".format(self.count))
-    result.append(u"failures={")
-    for key, value in self.failures.iteritems():
-      result.append(u"  '{0}' = {1},".format(key, unicode(value)))
-    result.append(u"}")
-    result.append(u"successes={")
-    for key, value in self.successes.iteritems():
-      result.append(u"  '{0}' = {1},".format(key, value))
-    result.append(u"}")
-    result.append(u"percentages={")
-    for key, value in self.percentages().iteritems():
-      result.append(u"  '{0}' = {1},".format(key, value))
-    result.append(u"}")
-    result.append(u"overall={0}".format(self.overall()))
-    return u"\n".join(result)
-
-  def summary(self):
-    result = []
-    result.append(u"success={0!s}".format(self.success))
-    result.append(u"count={0:d}".format(self.count))
-    result.append(u"overall={0}".format(self.percentage_overall))
-    return u"\n".join(result)
+        if EvaluationStream.isnewline(self._expected_char):  # Resync actual stream to the next newline
+            while not EvaluationStream.isnewline(self._actual_char) and not EvaluationStream.iseof(self._actual_char):
+                self.markFailure()
+                self.readFromActual()
+        elif EvaluationStream.isnewline(self._actual_char):  # Resync expected stream to the next newline
+            while not EvaluationStream.isnewline(self._expected_char) and not EvaluationStream.iseof(self._expected_char):
+                self.markFailure()
+                self.readFromExpected()
+        elif EvaluationStream.isspace(self._actual_char) and self._expected_char == self._actual.peek(1):  # ignore whitespace if the next char matches
+            self.markFailure()
+            self._expected.push_back(self._expected_char)
+        else:
+            mark_failure_position = self._actual.location()
+            self.resyncActual()
+            self.markFailure(mark_failure_position)
+
+    def handleMatch(self):
+        self.successes[self._expected_char].append({"expected_location":self._expected.location(), "actual_location":self._actual.location()})
+        if not EvaluationStream.isnewline(self._expected_char):
+            if logger.isEnabledFor(logging.DEBUG):
+                sys.stdout.write(".")
+        elif logger.isEnabledFor(logging.DEBUG):
+            sys.stdout.write("\n")
+
+    def evaluate(self):
+        """
+        Evaluate the actual ocr results against the expected results and provide metrics on failures.
+        """
+
+        if logger.isEnabledFor(logging.DEBUG):
+            sys.stdout.write("Debug Legend:\n")
+            sys.stdout.write("  . = matched\n")
+            sys.stdout.write("  X = failed\n")
+            sys.stdout.write("  s = skipped\n")
+            sys.stdout.write("  _ = skipped extra whitespace\n")
+            sys.stdout.write("  $ = End of Line (expected or actual)")
+            sys.stdout.write("  E = End of File (expected)\n")
+            sys.stdout.write("  e = End of File (actual)\n")
+
+        while True:
+            self.readFromExpected()
+            self.readFromActual()
+            if EvaluationStream.iseof(self._expected_char) and EvaluationStream.iseof(self._actual_char):
+                if self.success == None:
+                    self.success = True
+                break
+
+            up_to_count = self._expected.count
+
+            if self._expected_char != self._actual_char:
+                self.handleMismatch()
+            else:
+                self.handleMatch()
+
+            if EvaluationStream.iseof(self._expected_char):
+                self.success = False
+                break
+
+        if logger.isEnabledFor(logging.DEBUG):
+            sys.stdout.write("\n")
+            sys.stdout.flush()
+
+        self.count = self._expected.count
+        return self
+
+    def calculate_percentages(self):
+        keys = set(self.successes.iterkeys()).union(self.failures.iterkeys())
+        self.percentages = {}
+        for key in keys:
+            failure_count = len(self.failures[key]) if key in self.failures else 0
+            success_count = len(self.successes[key]) if key in self.successes else 0
+            self.percentages[key] = success_count / float(failure_count + success_count)
+
+        values = self.percentages.values()
+        self.percentage_overall = sum(values) / len(values)
+
+    def __str__(self):
+        return unicode(self).encode('utf-8')
+
+    def __unicode__(self):
+        result = []
+        result.append(u"success={0!s}".format(self.success))
+        result.append(u"count={0:d}".format(self.count))
+        result.append(u"failures={")
+        for key, value in self.failures.iteritems():
+            result.append(u"  '{0}' = {1},".format(key, unicode(value)))
+        result.append(u"}")
+        result.append(u"successes={")
+        for key, value in self.successes.iteritems():
+            result.append(u"  '{0}' = {1},".format(key, value))
+        result.append(u"}")
+        result.append(u"percentages={")
+        for key, value in self.percentages().iteritems():
+            result.append(u"  '{0}' = {1},".format(key, value))
+        result.append(u"}")
+        result.append(u"overall={0}".format(self.overall()))
+        return u"\n".join(result)
+
+    def summary(self):
+        result = []
+        result.append(u"success={0!s}".format(self.success))
+        result.append(u"count={0:d}".format(self.count))
+        result.append(u"overall={0}".format(self.percentage_overall))
+        return u"\n".join(result)
 
 class EvaluationStream():
   """
@@ -271,7 +269,7 @@ def read(self):
     char = self._read_stream_or_peek_buffer()
 
     if EvaluationStream.iseof(char):
-      pass # EOF doesn't increment counts
+      pass  # EOF doesn't increment counts
     elif EvaluationStream.isnewline(char):
       self._line += 1
       self._position = 0
@@ -293,7 +291,7 @@ def peek(self, n):
     chars_needed = n - current_peek_chars_available
     for _ in range(chars_needed):
       self._peek_buffer.append(self._read_with_translations())
-    result = self._peek_buffer[n-1]
+    result = self._peek_buffer[n - 1]
     return result
 
   def push_back(self, char):
@@ -315,7 +313,7 @@ def main():
   arg.value = parser.parse_args()
   correct_file = arg.string_value("correct_file", default_value="correct.txt")
   input_file = arg.string_value("input_file")
-  results_file = arg.string_value("results_file", default_value=input_file+"-results.txt")
+  results_file = arg.string_value("results_file", default_value=input_file + "-results.txt")
   if arg.boolean_value("debug") or arg.boolean_value("trace"):
     logging.getLogger().setLevel(logging.DEBUG)
   if arg.boolean_value("trace"):

From 502eebbe339f6c185db1416e68b55d5d26ff2c04 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Wed, 27 Aug 2014 20:47:25 +0930
Subject: [PATCH 25/33] Fix spacing to be 4

---
 evaluate.py                | 230 +++++++++---------
 unit_test/evaluate_test.py | 463 ++++++++++++++++++-------------------
 2 files changed, 346 insertions(+), 347 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index 73b1c86..2b00d80 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -204,145 +204,145 @@ def summary(self):
         return u"\n".join(result)
 
 class EvaluationStream():
-  """
-  Wrap an io.TextIOBase to provide Evaluation support.
+    """
+    Wrap an io.TextIOBase to provide Evaluation support.
 
-  :param stream: io.TextIOBase of the actual ocr results
-  """
+    :param stream: io.TextIOBase of the actual ocr results
+    """
 
-  _newline = u"NL"
-  _eof = u"EOF"
+    _newline = u"NL"
+    _eof = u"EOF"
 
-  @staticmethod
-  def isnewline(char):
-    return EvaluationStream._newline == char
+    @staticmethod
+    def isnewline(char):
+        return EvaluationStream._newline == char
 
-  @staticmethod
-  def iseof(char):
-    return EvaluationStream._eof == char
+    @staticmethod
+    def iseof(char):
+        return EvaluationStream._eof == char
 
-  @staticmethod
-  def isspace(char):
-    return u" " == char
+    @staticmethod
+    def isspace(char):
+        return u" " == char
 
-  def __init__(self, stream):
-    self._stream = stream
-    self._line = 1
-    self._position = 0
-    self.count = 0
-    self._peek_buffer = collections.deque()
+    def __init__(self, stream):
+        self._stream = stream
+        self._line = 1
+        self._position = 0
+        self.count = 0
+        self._peek_buffer = collections.deque()
 
-  def _read_with_translations(self):
-    """
-    As per io.TextIOBase.read(1), but also ignore windows \r characters by reading the next character.
-    \n is rewritten as NL so that mismatches are printable characters.
-    end of file is rewritten as EOF for printability.
-    """
-    char = self._stream.read(1)
-    while u"\r" == char:
-      char = self._stream.read(1)
+    def _read_with_translations(self):
+        """
+        As per io.TextIOBase.read(1), but also ignore windows \r characters by reading the next character.
+        \n is rewritten as NL so that mismatches are printable characters.
+        end of file is rewritten as EOF for printability.
+        """
+        char = self._stream.read(1)
+        while u"\r" == char:
+            char = self._stream.read(1)
 
-    if u"" == char:
-      char = EvaluationStream._eof
-    elif u"\n" == char:
-      char = EvaluationStream._newline
+        if u"" == char:
+            char = EvaluationStream._eof
+        elif u"\n" == char:
+            char = EvaluationStream._newline
 
-    return char
+        return char
 
-  def _read_stream_or_peek_buffer(self):
-    if self._peek_buffer:
-      char = self._peek_buffer.popleft()
-    else:
-      char = self._read_with_translations()
+    def _read_stream_or_peek_buffer(self):
+        if self._peek_buffer:
+            char = self._peek_buffer.popleft()
+        else:
+            char = self._read_with_translations()
 
-    return char
+        return char
 
-  def read(self):
-    """
-    As per io.TextIOBase.read(1), but also ignore windows \r characters by reading the next character.
-    \n is rewritten as NL so that mismatches are printable characters.
-    end of file is rewritten as EOF for printability.
+    def read(self):
+        """
+        As per io.TextIOBase.read(1), but also ignore windows \r characters by reading the next character.
+        \n is rewritten as NL so that mismatches are printable characters.
+        end of file is rewritten as EOF for printability.
 
-    To support peek, an internal buffer is used and read from before re-reading from stream.
-    """
+        To support peek, an internal buffer is used and read from before re-reading from stream.
+        """
 
-    char = self._read_stream_or_peek_buffer()
+        char = self._read_stream_or_peek_buffer()
 
-    if EvaluationStream.iseof(char):
-      pass  # EOF doesn't increment counts
-    elif EvaluationStream.isnewline(char):
-      self._line += 1
-      self._position = 0
-    else:
-      self._position += 1
-      self.count += 1
+        if EvaluationStream.iseof(char):
+            pass    # EOF doesn't increment counts
+        elif EvaluationStream.isnewline(char):
+            self._line += 1
+            self._position = 0
+        else:
+            self._position += 1
+            self.count += 1
 
-    return char
+        return char
 
-  def location(self):
-    return u"{0:d}:{1:d}".format(self._line, self._position)
+    def location(self):
+        return u"{0:d}:{1:d}".format(self._line, self._position)
 
-  def peek(self, n):
-    """
-    Peek ahead n characters in the input stream and return that character
-    """
+    def peek(self, n):
+        """
+        Peek ahead n characters in the input stream and return that character
+        """
 
-    current_peek_chars_available = len(self._peek_buffer)
-    chars_needed = n - current_peek_chars_available
-    for _ in range(chars_needed):
-      self._peek_buffer.append(self._read_with_translations())
-    result = self._peek_buffer[n - 1]
-    return result
+        current_peek_chars_available = len(self._peek_buffer)
+        chars_needed = n - current_peek_chars_available
+        for _ in range(chars_needed):
+            self._peek_buffer.append(self._read_with_translations())
+        result = self._peek_buffer[n - 1]
+        return result
 
-  def push_back(self, char):
-    assert not EvaluationStream.iseof(char)
-    assert not EvaluationStream.isnewline(char)
-    self._position -= 1
-    self.count -= 1
+    def push_back(self, char):
+        assert not EvaluationStream.iseof(char)
+        assert not EvaluationStream.isnewline(char)
+        self._position -= 1
+        self.count -= 1
 
-    self._peek_buffer.appendleft(char)
+        self._peek_buffer.appendleft(char)
 
 def main():
-  parser = argparse.ArgumentParser(description="Evaluate text against correct version.")
-  parser.add_argument("-c", "--correct", dest="correct_file", help="File containing the correct text")
-  parser.add_argument("-i", "--input", dest="input_file", required=True, help="File containing the text to compare against the correct version")
-  parser.add_argument("-r", "--results", dest="results_file", help="File to write evaluation results to")
-  parser.add_argument("-d", "--debug", action="store_true", help="Enable debug tracing")
-  parser.add_argument("-t", "--trace", action="store_true", help="Print out mismatches as they occur. Also enables debug")
-
-  arg.value = parser.parse_args()
-  correct_file = arg.string_value("correct_file", default_value="correct.txt")
-  input_file = arg.string_value("input_file")
-  results_file = arg.string_value("results_file", default_value=input_file + "-results.txt")
-  if arg.boolean_value("debug") or arg.boolean_value("trace"):
-    logging.getLogger().setLevel(logging.DEBUG)
-  if arg.boolean_value("trace"):
-    trace.setLevel(logging.DEBUG)
-
-  if not os.path.isfile(input_file):
-    print("Input file '{0}' does not exist. Use -h option for help".format(input_file))
-    sys.exit(-1)
-
-  if not os.path.isfile(correct_file):
-    print("Correct file '{0}' does not exist. Use -h option for help".format(correct_file))
-    sys.exit(-1)
-
-  with codecs.open(correct_file, "rU", "utf-8") as c, codecs.open(input_file, "rU", "utf-8") as i:
-    result = Evaluation(c, i)
-    result.evaluate()
-    result.calculate_percentages()
-
-  with codecs.open(results_file, "wU", "utf-8") as w:
-    json.dump(result, w, cls=IgnoreUnderscoreEncoder, ensure_ascii=False, indent=2, separators=(',', ': '), sort_keys=True)
-
-  print(u"Summary of evaluation results:")
-  print(u"results={0}".format(results_file))
-  print(result.summary())
+    parser = argparse.ArgumentParser(description="Evaluate text against correct version.")
+    parser.add_argument("-c", "--correct", dest="correct_file", help="File containing the correct text")
+    parser.add_argument("-i", "--input", dest="input_file", required=True, help="File containing the text to compare against the correct version")
+    parser.add_argument("-r", "--results", dest="results_file", help="File to write evaluation results to")
+    parser.add_argument("-d", "--debug", action="store_true", help="Enable debug tracing")
+    parser.add_argument("-t", "--trace", action="store_true", help="Print out mismatches as they occur. Also enables debug")
+
+    arg.value = parser.parse_args()
+    correct_file = arg.string_value("correct_file", default_value="correct.txt")
+    input_file = arg.string_value("input_file")
+    results_file = arg.string_value("results_file", default_value=input_file + "-results.txt")
+    if arg.boolean_value("debug") or arg.boolean_value("trace"):
+        logging.getLogger().setLevel(logging.DEBUG)
+    if arg.boolean_value("trace"):
+        trace.setLevel(logging.DEBUG)
+
+    if not os.path.isfile(input_file):
+        print("Input file '{0}' does not exist. Use -h option for help".format(input_file))
+        sys.exit(-1)
+
+    if not os.path.isfile(correct_file):
+        print("Correct file '{0}' does not exist. Use -h option for help".format(correct_file))
+        sys.exit(-1)
+
+    with codecs.open(correct_file, "rU", "utf-8") as c, codecs.open(input_file, "rU", "utf-8") as i:
+        result = Evaluation(c, i)
+        result.evaluate()
+        result.calculate_percentages()
+
+    with codecs.open(results_file, "wU", "utf-8") as w:
+        json.dump(result, w, cls=IgnoreUnderscoreEncoder, ensure_ascii=False, indent=2, separators=(',', ': '), sort_keys=True)
+
+    print(u"Summary of evaluation results:")
+    print(u"results={0}".format(results_file))
+    print(result.summary())
 
 
 if __name__ == "__main__":
-  logging.basicConfig(stream=sys.stderr, level=logging.INFO)
+    logging.basicConfig(stream=sys.stderr, level=logging.INFO)
 
-  UTF8Writer = codecs.getwriter('utf8')
-  sys.stdout = UTF8Writer(sys.stdout)
-  main()
+    UTF8Writer = codecs.getwriter('utf8')
+    sys.stdout = UTF8Writer(sys.stdout)
+    main()
diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index 580d362..9ace977 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -7,236 +7,235 @@
 
 class TestEvaluate:
 
-  def test_empty(self):
-    actual = io.StringIO()
-    expected = io.StringIO()
-    result = Evaluation(expected,actual)
-    result.evaluate()
-    assert result.success
-    assert result.count == 0
-    assert result.failures == {}
-
-  def test_one_character(self):
-    actual = io.StringIO(u"し",)
-    expected = io.StringIO(u"し")
-    result = Evaluation(expected,actual)
-    result.evaluate()
-    assert result.success
-    assert result.count == 1
-    assert result.failures == {}
-
-  def test_one_character_does_not_match(self):
-    actual = io.StringIO(u"あ")
-    expected = io.StringIO(u"し")
-    result = Evaluation(expected,actual)
-    result.evaluate()
-    assert result.success == False
-    assert result.count == 1
-    assert result.failures == { u"し" : [{ "actual" : u"あ", "actual_location": "1:1", "expected_location": "1:1"}] }
-
-  def test_newline_unix_does_not_increase_count(self):
-    actual = io.StringIO(u"\n")
-    expected = io.StringIO(u"\n")
-    result = Evaluation(expected,actual)
-    result.evaluate()
-    assert result.success
-    assert result.count == 0
-    assert result.failures == {}
-
-  def test_newline_windows_does_not_increase_count(self):
-    actual = io.StringIO(u"\r\n")
-    expected = io.StringIO(u"\r\n")
-    result = Evaluation(expected,actual)
-    result.evaluate()
-    assert result.success
-    assert result.count == 0
-    assert result.failures == {}
-
-  def test_newline_mixed_unix_and_windows_does_not_increase_count(self):
-    actual = io.StringIO(u"\n")
-    expected = io.StringIO(u"\r\n")
-    result = Evaluation(expected,actual)
-    result.evaluate()
-    assert result.success
-    assert result.count == 0
-    assert result.failures == {}
-
-  def test_line_reported_in_failures(self):
-    actual = io.StringIO(u"\r\nあ")
-    expected = io.StringIO(u"\r\nし")
-    result = Evaluation(expected,actual)
-    result.evaluate()
-    assert result.success == False
-    assert result.count == 1
-    assert result.failures == { u"し" : [{ "actual" : u"あ", "actual_location": "2:1", "expected_location": "2:1"}] }
-
-  def test_endoffile_mismatch_more_in_actual(self):
-    actual = io.StringIO(u"あ\r\nし")
-    expected = io.StringIO(u"あ\r\n")
-    result = Evaluation(expected,actual)
-    result.evaluate()
-    assert result.success == False
-    assert result.count == 1
-    assert result.failures == { u"EOF" : [{ "actual" : u"し", "actual_location": "2:1", "expected_location": "2:0"}] }
-
-  def test_endoffile_mismatch_more_in_expected(self):
-    actual = io.StringIO(u"あ\r\n")
-    expected = io.StringIO(u"あ\r\nし")
-    result = Evaluation(expected,actual)
-    result.evaluate()
-    assert result.success == False
-    assert result.count == 2
-    assert result.failures == { u"し" : [{ "actual" : u"EOF", "actual_location": "2:0", "expected_location": "2:1"}] }
-
-  def test_mismatch_prior_to_newline(self):
-    actual = io.StringIO(u"\"\nいあ")
-    expected = io.StringIO(u"。\nいあ")
-    result = Evaluation(expected,actual)
-    result.evaluate()
-    assert result.success == False
-    assert result.count == 3
-    assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
-
-  def test_mismatch_prior_to_newline_windows(self):
-    actual = io.StringIO(u"\"\r\nいあ")
-    expected = io.StringIO(u"。\r\nいあ")
-    result = Evaluation(expected,actual)
-    result.evaluate()
-    assert result.success == False
-    assert result.count == 3
-    assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
-
-  def test_mismatch_prior_to_newline_followed_by_another_newline(self):
-    actual = io.StringIO(u"\"\n\nいあ")
-    expected = io.StringIO(u"。\n\nいあ")
-    result = Evaluation(expected,actual)
-    result.evaluate()
-    assert result.success == False
-    assert result.count == 3
-    assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
-
-  def test_mismatch_prior_to_newline_followed_by_another_newline_windows(self):
-    actual = io.StringIO(u"\"\r\n\r\nいあ")
-    expected = io.StringIO(u"。\r\n\r\nいあ")
-    result = Evaluation(expected,actual)
-    result.evaluate()
-    assert result.success == False
-    assert result.count == 3
-    assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
-
-  def test_out_of_sync_stream(self):
-    actual = io.StringIO(u"ぃ　あし\r\n")
-    expected = io.StringIO(u"いあし\r\n")
-    result = Evaluation(expected,actual)
-    result.evaluate()
-    assert result.success == False
-    assert result.count == 3
-    assert result.failures == { u"い" : [{ "actual" : u"ぃ　", "actual_location": "1:1", "expected_location": "1:1"}] }
-
-  def test_out_of_sync_stream_two_deep(self):
-    actual = io.StringIO(u"ぃ　'あし\r\n")
-    expected = io.StringIO(u"いあし\r\n")
-    result = Evaluation(expected,actual)
-    result.evaluate()
-    assert result.success == False
-    assert result.count == 3
-    assert result.failures == { u"い" : [{ "actual" : u"ぃ　'", "actual_location": "1:1", "expected_location": "1:1"}] }
-
-  def test_out_of_sync_stream_actual_new_lined_early(self):
-    actual = io.StringIO(u"新しい\nしごと")
-    expected = io.StringIO(u"新しいむすこ\nしごと\n")
-    result = Evaluation(expected,actual)
-    result.evaluate()
-    assert result.success == False
-    assert result.count == 9
-    assert result.failures == { u"む" : [{ "actual" : u"NL", "actual_location": "2:0", "expected_location": "1:4"}],
-                                u"す" : [{ "actual" : u"NL", "actual_location": "2:0", "expected_location": "1:5"}],
-                                u"こ" : [{ "actual" : u"NL", "actual_location": "2:0", "expected_location": "1:6"}],
-                                }
-
-  def test_out_of_sync_stream_expected_new_lined_early(self):
-    actual = io.StringIO(u"新しいむすこ\nしごと\n")
-    expected = io.StringIO(u"新しい\nしごと")
-    result = Evaluation(expected,actual)
-    result.evaluate()
-    assert result.success == False
-    assert result.count == 6
-    assert result.failures == { u"NL" : [{ "actual" : u"む", "actual_location": "1:4", "expected_location": "2:0"},
-                                         { "actual" : u"す", "actual_location": "1:5", "expected_location": "2:0"},
-                                         { "actual" : u"こ", "actual_location": "1:6", "expected_location": "2:0"}]
-                              }
-
-  def test_out_of_sync_stream_doesnt_sync_past_newline(self):
-    actual =   io.StringIO(u"新しいむすあ\nこしごと\n")
-    expected = io.StringIO(u"新しいむすこ\nしごと\n")
-    result = Evaluation(expected,actual)
-    result.evaluate()
-    assert result.success == False
-    assert result.count == 9
-    assert result.failures == { u"こ" : [{ "actual" : u"あ", "actual_location": "1:6", "expected_location": "1:6"}],
-                                u"し" : [{ "actual" : u"こし", "actual_location": "2:1", "expected_location": "2:1"}]
-                                }
-
-
-  def test_peek_when_empty(self):
-    stream = io.StringIO()
-    OUT = EvaluationStream(stream)
-    assert OUT.iseof(OUT.peek(1))
-    assert OUT.iseof(OUT.peek(2))
-
-  def test_peek(self):
-    stream = io.StringIO(u"いあし\r\n")
-    OUT = EvaluationStream(stream)
-    assert u"い" == OUT.peek(1)
-    assert "1:0" == OUT.location()
-    assert u"あ" == OUT.peek(2)
-    assert "1:0" == OUT.location()
-    assert u"し" == OUT.peek(3)
-    assert "1:0" == OUT.location()
-    assert OUT.isnewline(OUT.peek(4))
-    assert "1:0" == OUT.location()
-    assert OUT.iseof(OUT.peek(5))
-    assert "1:0" == OUT.location()
-
-  def test_success_statistics(self):
-    actual = io.StringIO(u"ぃ　あしろろる\r\n")
-    expected = io.StringIO(u"いあしるろる\r\n")
-    result = Evaluation(expected,actual)
-    result.evaluate()
-    assert result.success == False
-    assert result.count == 6
-    assert result.failures == { u"い" : [{ "actual" : u"ぃ　", "actual_location": "1:1", "expected_location": "1:1"}],
-                                u"る" : [{ "actual" : u"ろ", "actual_location" : "1:5", "expected_location": "1:4"}]
-                               }
-    assert result.successes == {
-                                u"あ" : [{'actual_location': '1:3', 'expected_location': '1:2'}],
-                                u"し" : [{'actual_location': '1:4', 'expected_location': '1:3'}],
-                                u"ろ" : [{'actual_location': '1:6', 'expected_location': '1:5'}],
-                                u"る" : [{'actual_location': '1:7', 'expected_location': '1:6'}],
-                                u"NL" : [{'actual_location': '2:0', 'expected_location': '2:0'}]
-                                }
-    result.calculate_percentages()
-    assert result.percentages == {
-                                    u"い" : 0.0,
-                                    u"あ" : 1.0,
-                                    u"し" : 1.0,
-                                    u"る" : 0.5,
-                                    u"ろ" : 1.0,
-                                    u"NL" : 1.0
+    def test_empty(self):
+        actual = io.StringIO()
+        expected = io.StringIO()
+        result = Evaluation(expected,actual)
+        result.evaluate()
+        assert result.success
+        assert result.count == 0
+        assert result.failures == {}
+
+    def test_one_character(self):
+        actual = io.StringIO(u"し",)
+        expected = io.StringIO(u"し")
+        result = Evaluation(expected,actual)
+        result.evaluate()
+        assert result.success
+        assert result.count == 1
+        assert result.failures == {}
+
+    def test_one_character_does_not_match(self):
+        actual = io.StringIO(u"あ")
+        expected = io.StringIO(u"し")
+        result = Evaluation(expected,actual)
+        result.evaluate()
+        assert result.success == False
+        assert result.count == 1
+        assert result.failures == { u"し" : [{ "actual" : u"あ", "actual_location": "1:1", "expected_location": "1:1"}] }
+
+    def test_newline_unix_does_not_increase_count(self):
+        actual = io.StringIO(u"\n")
+        expected = io.StringIO(u"\n")
+        result = Evaluation(expected,actual)
+        result.evaluate()
+        assert result.success
+        assert result.count == 0
+        assert result.failures == {}
+
+    def test_newline_windows_does_not_increase_count(self):
+        actual = io.StringIO(u"\r\n")
+        expected = io.StringIO(u"\r\n")
+        result = Evaluation(expected,actual)
+        result.evaluate()
+        assert result.success
+        assert result.count == 0
+        assert result.failures == {}
+
+    def test_newline_mixed_unix_and_windows_does_not_increase_count(self):
+        actual = io.StringIO(u"\n")
+        expected = io.StringIO(u"\r\n")
+        result = Evaluation(expected,actual)
+        result.evaluate()
+        assert result.success
+        assert result.count == 0
+        assert result.failures == {}
+
+    def test_line_reported_in_failures(self):
+        actual = io.StringIO(u"\r\nあ")
+        expected = io.StringIO(u"\r\nし")
+        result = Evaluation(expected,actual)
+        result.evaluate()
+        assert result.success == False
+        assert result.count == 1
+        assert result.failures == { u"し" : [{ "actual" : u"あ", "actual_location": "2:1", "expected_location": "2:1"}] }
+
+    def test_endoffile_mismatch_more_in_actual(self):
+        actual = io.StringIO(u"あ\r\nし")
+        expected = io.StringIO(u"あ\r\n")
+        result = Evaluation(expected,actual)
+        result.evaluate()
+        assert result.success == False
+        assert result.count == 1
+        assert result.failures == { u"EOF" : [{ "actual" : u"し", "actual_location": "2:1", "expected_location": "2:0"}] }
+
+    def test_endoffile_mismatch_more_in_expected(self):
+        actual = io.StringIO(u"あ\r\n")
+        expected = io.StringIO(u"あ\r\nし")
+        result = Evaluation(expected,actual)
+        result.evaluate()
+        assert result.success == False
+        assert result.count == 2
+        assert result.failures == { u"し" : [{ "actual" : u"EOF", "actual_location": "2:0", "expected_location": "2:1"}] }
+
+    def test_mismatch_prior_to_newline(self):
+        actual = io.StringIO(u"\"\nいあ")
+        expected = io.StringIO(u"。\nいあ")
+        result = Evaluation(expected,actual)
+        result.evaluate()
+        assert result.success == False
+        assert result.count == 3
+        assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
+
+    def test_mismatch_prior_to_newline_windows(self):
+        actual = io.StringIO(u"\"\r\nいあ")
+        expected = io.StringIO(u"。\r\nいあ")
+        result = Evaluation(expected,actual)
+        result.evaluate()
+        assert result.success == False
+        assert result.count == 3
+        assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
+
+    def test_mismatch_prior_to_newline_followed_by_another_newline(self):
+        actual = io.StringIO(u"\"\n\nいあ")
+        expected = io.StringIO(u"。\n\nいあ")
+        result = Evaluation(expected,actual)
+        result.evaluate()
+        assert result.success == False
+        assert result.count == 3
+        assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
+
+    def test_mismatch_prior_to_newline_followed_by_another_newline_windows(self):
+        actual = io.StringIO(u"\"\r\n\r\nいあ")
+        expected = io.StringIO(u"。\r\n\r\nいあ")
+        result = Evaluation(expected,actual)
+        result.evaluate()
+        assert result.success == False
+        assert result.count == 3
+        assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
+
+    def test_out_of_sync_stream(self):
+        actual = io.StringIO(u"ぃ　あし\r\n")
+        expected = io.StringIO(u"いあし\r\n")
+        result = Evaluation(expected,actual)
+        result.evaluate()
+        assert result.success == False
+        assert result.count == 3
+        assert result.failures == { u"い" : [{ "actual" : u"ぃ　", "actual_location": "1:1", "expected_location": "1:1"}] }
+
+    def test_out_of_sync_stream_two_deep(self):
+        actual = io.StringIO(u"ぃ　'あし\r\n")
+        expected = io.StringIO(u"いあし\r\n")
+        result = Evaluation(expected,actual)
+        result.evaluate()
+        assert result.success == False
+        assert result.count == 3
+        assert result.failures == { u"い" : [{ "actual" : u"ぃ　'", "actual_location": "1:1", "expected_location": "1:1"}] }
+
+    def test_out_of_sync_stream_actual_new_lined_early(self):
+        actual = io.StringIO(u"新しい\nしごと")
+        expected = io.StringIO(u"新しいむすこ\nしごと\n")
+        result = Evaluation(expected,actual)
+        result.evaluate()
+        assert result.success == False
+        assert result.count == 9
+        assert result.failures == { u"む" : [{ "actual" : u"NL", "actual_location": "2:0", "expected_location": "1:4"}],
+                                    u"す" : [{ "actual" : u"NL", "actual_location": "2:0", "expected_location": "1:5"}],
+                                    u"こ" : [{ "actual" : u"NL", "actual_location": "2:0", "expected_location": "1:6"}],
+                                    }
+
+    def test_out_of_sync_stream_expected_new_lined_early(self):
+        actual = io.StringIO(u"新しいむすこ\nしごと\n")
+        expected = io.StringIO(u"新しい\nしごと")
+        result = Evaluation(expected,actual)
+        result.evaluate()
+        assert result.success == False
+        assert result.count == 6
+        assert result.failures == { u"NL" : [{ "actual" : u"む", "actual_location": "1:4", "expected_location": "2:0"},
+                                             { "actual" : u"す", "actual_location": "1:5", "expected_location": "2:0"},
+                                             { "actual" : u"こ", "actual_location": "1:6", "expected_location": "2:0"}]
                                    }
-    assert result.percentage_overall == 0.75
-
-  def test_extra_whitespace(self):
-    actual = io.StringIO(u"新 し い むすこ\nし ご と")
-    expected = io.StringIO(u"新しいむすこ\nしごと\n")
-    result = Evaluation(expected,actual)
-    result.evaluate()
-    assert result.success == False
-    assert result.count == 9
-    json.dump(result.failures, sys.stdout, cls=IgnoreUnderscoreEncoder, ensure_ascii=False, indent=2, separators=(',', ': '))
-    assert result.failures == { u"し" : [{ "actual" : u" ", "actual_location": "1:2", "expected_location": "1:2"}],
-                               u"い" : [{ "actual" : u" ", "actual_location": "1:4", "expected_location": "1:3"}],
-                               u"む" : [{ "actual" : u" ", "actual_location": "1:6", "expected_location": "1:4"}],
-                               u"ご" : [{ "actual" : u" ", "actual_location": "2:2", "expected_location": "2:2"}],
-                               u"と" : [{ "actual" : u" ", "actual_location": "2:4", "expected_location": "2:3"}],
-                               }
+
+    def test_out_of_sync_stream_doesnt_sync_past_newline(self):
+        actual = io.StringIO(u"新しいむすあ\nこしごと\n")
+        expected = io.StringIO(u"新しいむすこ\nしごと\n")
+        result = Evaluation(expected,actual)
+        result.evaluate()
+        assert result.success == False
+        assert result.count == 9
+        assert result.failures == { u"こ" : [{ "actual" : u"あ", "actual_location": "1:6", "expected_location": "1:6"}],
+                                    u"し" : [{ "actual" : u"こし", "actual_location": "2:1", "expected_location": "2:1"}]
+                                    }
+
+    def test_peek_when_empty(self):
+        stream = io.StringIO()
+        OUT = EvaluationStream(stream)
+        assert OUT.iseof(OUT.peek(1))
+        assert OUT.iseof(OUT.peek(2))
+
+    def test_peek(self):
+        stream = io.StringIO(u"いあし\r\n")
+        OUT = EvaluationStream(stream)
+        assert u"い" == OUT.peek(1)
+        assert "1:0" == OUT.location()
+        assert u"あ" == OUT.peek(2)
+        assert "1:0" == OUT.location()
+        assert u"し" == OUT.peek(3)
+        assert "1:0" == OUT.location()
+        assert OUT.isnewline(OUT.peek(4))
+        assert "1:0" == OUT.location()
+        assert OUT.iseof(OUT.peek(5))
+        assert "1:0" == OUT.location()
+
+    def test_success_statistics(self):
+        actual = io.StringIO(u"ぃ　あしろろる\r\n")
+        expected = io.StringIO(u"いあしるろる\r\n")
+        result = Evaluation(expected,actual)
+        result.evaluate()
+        assert result.success == False
+        assert result.count == 6
+        assert result.failures == { u"い" : [{ "actual" : u"ぃ　", "actual_location": "1:1", "expected_location": "1:1"}],
+                                    u"る" : [{ "actual" : u"ろ", "actual_location" : "1:5", "expected_location": "1:4"}]
+                                    }
+        assert result.successes == {
+                                    u"あ" : [{'actual_location': '1:3', 'expected_location': '1:2'}],
+                                    u"し" : [{'actual_location': '1:4', 'expected_location': '1:3'}],
+                                    u"ろ" : [{'actual_location': '1:6', 'expected_location': '1:5'}],
+                                    u"る" : [{'actual_location': '1:7', 'expected_location': '1:6'}],
+                                    u"NL" : [{'actual_location': '2:0', 'expected_location': '2:0'}]
+                                    }
+        result.calculate_percentages()
+        print result.percentages
+        assert result.percentages == {
+                                      u"い" : 0.0,
+                                      u"し" : 1.0,
+                                      u"る" : 0.5,
+                                      u"ろ" : 1.0,
+                                      u"NL" : 1.0
+                                      }
+        assert result.percentage_overall == 0.75
+
+    def test_extra_whitespace(self):
+        actual = io.StringIO(u"新 し い むすこ\nし ご と")
+        expected = io.StringIO(u"新しいむすこ\nしごと\n")
+        result = Evaluation(expected,actual)
+        result.evaluate()
+        assert result.success == False
+        assert result.count == 9
+        json.dump(result.failures, sys.stdout, cls=IgnoreUnderscoreEncoder, ensure_ascii=False, indent=2, separators=(',', ': '))
+        assert result.failures == { u"し" : [{ "actual" : u" ", "actual_location": "1:2", "expected_location": "1:2"}],
+                                    u"い" : [{ "actual" : u" ", "actual_location": "1:4", "expected_location": "1:3"}],
+                                    u"む" : [{ "actual" : u" ", "actual_location": "1:6", "expected_location": "1:4"}],
+                                    u"ご" : [{ "actual" : u" ", "actual_location": "2:2", "expected_location": "2:2"}],
+                                    u"と" : [{ "actual" : u" ", "actual_location": "2:4", "expected_location": "2:3"}],
+                                    }

From 57780a2e2a62cbdc74ad60bd8fa97b8787dc9402 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Wed, 27 Aug 2014 20:49:26 +0930
Subject: [PATCH 26/33] Fix test_success_statistics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Was missing あ for some reason...
---
 unit_test/evaluate_test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index 9ace977..cc7dc37 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -217,6 +217,7 @@ def test_success_statistics(self):
         result.calculate_percentages()
         print result.percentages
         assert result.percentages == {
+                                      u"あ" : 1.0,
                                       u"い" : 0.0,
                                       u"し" : 1.0,
                                       u"る" : 0.5,

From da22eb0ca2feceed536c7a9be4b6db62182e59f3 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Wed, 27 Aug 2014 20:52:24 +0930
Subject: [PATCH 27/33] Refactor calculate_percentages as private and call in
 evaluate

---
 evaluate.py                | 8 +++++---
 unit_test/evaluate_test.py | 1 -
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index 2b00d80..502a025 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -161,9 +161,11 @@ def evaluate(self):
             sys.stdout.flush()
 
         self.count = self._expected.count
+
+        self._calculate_percentages()
         return self
 
-    def calculate_percentages(self):
+    def _calculate_percentages(self):
         keys = set(self.successes.iterkeys()).union(self.failures.iterkeys())
         self.percentages = {}
         for key in keys:
@@ -172,7 +174,8 @@ def calculate_percentages(self):
             self.percentages[key] = success_count / float(failure_count + success_count)
 
         values = self.percentages.values()
-        self.percentage_overall = sum(values) / len(values)
+        if values:
+            self.percentage_overall = sum(values) / len(values)
 
     def __str__(self):
         return unicode(self).encode('utf-8')
@@ -330,7 +333,6 @@ def main():
     with codecs.open(correct_file, "rU", "utf-8") as c, codecs.open(input_file, "rU", "utf-8") as i:
         result = Evaluation(c, i)
         result.evaluate()
-        result.calculate_percentages()
 
     with codecs.open(results_file, "wU", "utf-8") as w:
         json.dump(result, w, cls=IgnoreUnderscoreEncoder, ensure_ascii=False, indent=2, separators=(',', ': '), sort_keys=True)
diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index cc7dc37..81240fb 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -214,7 +214,6 @@ def test_success_statistics(self):
                                     u"る" : [{'actual_location': '1:7', 'expected_location': '1:6'}],
                                     u"NL" : [{'actual_location': '2:0', 'expected_location': '2:0'}]
                                     }
-        result.calculate_percentages()
         print result.percentages
         assert result.percentages == {
                                       u"あ" : 1.0,

From 19ed35ad36577f192f0200f0b23672d38c81ab48 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Wed, 27 Aug 2014 21:14:52 +0930
Subject: [PATCH 28/33] Fix whitespace in expressions

---
 unit_test/evaluate_test.py | 84 ++++++++++++++++++++------------------
 1 file changed, 44 insertions(+), 40 deletions(-)

diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index 81240fb..657ad36 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -32,7 +32,7 @@ def test_one_character_does_not_match(self):
         result.evaluate()
         assert result.success == False
         assert result.count == 1
-        assert result.failures == { u"し" : [{ "actual" : u"あ", "actual_location": "1:1", "expected_location": "1:1"}] }
+        assert result.failures == {u"し": [{"actual": u"あ", "actual_location": "1:1", "expected_location": "1:1"}]}
 
     def test_newline_unix_does_not_increase_count(self):
         actual = io.StringIO(u"\n")
@@ -68,7 +68,7 @@ def test_line_reported_in_failures(self):
         result.evaluate()
         assert result.success == False
         assert result.count == 1
-        assert result.failures == { u"し" : [{ "actual" : u"あ", "actual_location": "2:1", "expected_location": "2:1"}] }
+        assert result.failures == {u"し": [{"actual": u"あ", "actual_location": "2:1", "expected_location": "2:1"}]}
 
     def test_endoffile_mismatch_more_in_actual(self):
         actual = io.StringIO(u"あ\r\nし")
@@ -77,7 +77,7 @@ def test_endoffile_mismatch_more_in_actual(self):
         result.evaluate()
         assert result.success == False
         assert result.count == 1
-        assert result.failures == { u"EOF" : [{ "actual" : u"し", "actual_location": "2:1", "expected_location": "2:0"}] }
+        assert result.failures == {u"EOF": [{"actual": u"し", "actual_location": "2:1", "expected_location": "2:0"}]}
 
     def test_endoffile_mismatch_more_in_expected(self):
         actual = io.StringIO(u"あ\r\n")
@@ -86,7 +86,7 @@ def test_endoffile_mismatch_more_in_expected(self):
         result.evaluate()
         assert result.success == False
         assert result.count == 2
-        assert result.failures == { u"し" : [{ "actual" : u"EOF", "actual_location": "2:0", "expected_location": "2:1"}] }
+        assert result.failures == { u"し": [{"actual" : u"EOF", "actual_location": "2:0", "expected_location": "2:1"}] }
 
     def test_mismatch_prior_to_newline(self):
         actual = io.StringIO(u"\"\nいあ")
@@ -95,7 +95,7 @@ def test_mismatch_prior_to_newline(self):
         result.evaluate()
         assert result.success == False
         assert result.count == 3
-        assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
+        assert result.failures == {u"。": [{"actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}]}
 
     def test_mismatch_prior_to_newline_windows(self):
         actual = io.StringIO(u"\"\r\nいあ")
@@ -104,7 +104,7 @@ def test_mismatch_prior_to_newline_windows(self):
         result.evaluate()
         assert result.success == False
         assert result.count == 3
-        assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
+        assert result.failures == {u"。": [{"actual": u"\"", "actual_location": "1:1", "expected_location": "1:1"}]}
 
     def test_mismatch_prior_to_newline_followed_by_another_newline(self):
         actual = io.StringIO(u"\"\n\nいあ")
@@ -113,7 +113,7 @@ def test_mismatch_prior_to_newline_followed_by_another_newline(self):
         result.evaluate()
         assert result.success == False
         assert result.count == 3
-        assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
+        assert result.failures == {u"。": [{"actual": u"\"", "actual_location": "1:1", "expected_location": "1:1"}]}
 
     def test_mismatch_prior_to_newline_followed_by_another_newline_windows(self):
         actual = io.StringIO(u"\"\r\n\r\nいあ")
@@ -122,7 +122,7 @@ def test_mismatch_prior_to_newline_followed_by_another_newline_windows(self):
         result.evaluate()
         assert result.success == False
         assert result.count == 3
-        assert result.failures == { u"。" : [{ "actual" : u"\"", "actual_location": "1:1", "expected_location": "1:1"}] }
+        assert result.failures == {u"。": [{"actual": u"\"", "actual_location": "1:1", "expected_location": "1:1"}]}
 
     def test_out_of_sync_stream(self):
         actual = io.StringIO(u"ぃ　あし\r\n")
@@ -131,7 +131,7 @@ def test_out_of_sync_stream(self):
         result.evaluate()
         assert result.success == False
         assert result.count == 3
-        assert result.failures == { u"い" : [{ "actual" : u"ぃ　", "actual_location": "1:1", "expected_location": "1:1"}] }
+        assert result.failures == {u"い": [{ "actual": u"ぃ　", "actual_location": "1:1", "expected_location": "1:1"}]}
 
     def test_out_of_sync_stream_two_deep(self):
         actual = io.StringIO(u"ぃ　'あし\r\n")
@@ -140,7 +140,7 @@ def test_out_of_sync_stream_two_deep(self):
         result.evaluate()
         assert result.success == False
         assert result.count == 3
-        assert result.failures == { u"い" : [{ "actual" : u"ぃ　'", "actual_location": "1:1", "expected_location": "1:1"}] }
+        assert result.failures == {u"い": [{"actual": u"ぃ　'", "actual_location": "1:1", "expected_location": "1:1"}]}
 
     def test_out_of_sync_stream_actual_new_lined_early(self):
         actual = io.StringIO(u"新しい\nしごと")
@@ -149,10 +149,11 @@ def test_out_of_sync_stream_actual_new_lined_early(self):
         result.evaluate()
         assert result.success == False
         assert result.count == 9
-        assert result.failures == { u"む" : [{ "actual" : u"NL", "actual_location": "2:0", "expected_location": "1:4"}],
-                                    u"す" : [{ "actual" : u"NL", "actual_location": "2:0", "expected_location": "1:5"}],
-                                    u"こ" : [{ "actual" : u"NL", "actual_location": "2:0", "expected_location": "1:6"}],
-                                    }
+        assert result.failures == {
+                                   u"む" : [{"actual": u"NL", "actual_location": "2:0", "expected_location": "1:4"}],
+                                   u"す" : [{"actual": u"NL", "actual_location": "2:0", "expected_location": "1:5"}],
+                                   u"こ" : [{"actual": u"NL", "actual_location": "2:0", "expected_location": "1:6"}],
+                                   }
 
     def test_out_of_sync_stream_expected_new_lined_early(self):
         actual = io.StringIO(u"新しいむすこ\nしごと\n")
@@ -161,9 +162,9 @@ def test_out_of_sync_stream_expected_new_lined_early(self):
         result.evaluate()
         assert result.success == False
         assert result.count == 6
-        assert result.failures == { u"NL" : [{ "actual" : u"む", "actual_location": "1:4", "expected_location": "2:0"},
-                                             { "actual" : u"す", "actual_location": "1:5", "expected_location": "2:0"},
-                                             { "actual" : u"こ", "actual_location": "1:6", "expected_location": "2:0"}]
+        assert result.failures == {u"NL": [{"actual": u"む", "actual_location": "1:4", "expected_location": "2:0"},
+                                           {"actual": u"す", "actual_location": "1:5", "expected_location": "2:0"},
+                                           {"actual": u"こ", "actual_location": "1:6", "expected_location": "2:0"}]
                                    }
 
     def test_out_of_sync_stream_doesnt_sync_past_newline(self):
@@ -173,9 +174,10 @@ def test_out_of_sync_stream_doesnt_sync_past_newline(self):
         result.evaluate()
         assert result.success == False
         assert result.count == 9
-        assert result.failures == { u"こ" : [{ "actual" : u"あ", "actual_location": "1:6", "expected_location": "1:6"}],
-                                    u"し" : [{ "actual" : u"こし", "actual_location": "2:1", "expected_location": "2:1"}]
-                                    }
+        assert result.failures == {
+                                   u"こ": [{"actual": u"あ", "actual_location": "1:6", "expected_location": "1:6"}],
+                                   u"し": [{"actual": u"こし", "actual_location": "2:1", "expected_location": "2:1"}]
+                                   }
 
     def test_peek_when_empty(self):
         stream = io.StringIO()
@@ -204,24 +206,25 @@ def test_success_statistics(self):
         result.evaluate()
         assert result.success == False
         assert result.count == 6
-        assert result.failures == { u"い" : [{ "actual" : u"ぃ　", "actual_location": "1:1", "expected_location": "1:1"}],
-                                    u"る" : [{ "actual" : u"ろ", "actual_location" : "1:5", "expected_location": "1:4"}]
-                                    }
+        assert result.failures == {
+                                   u"い": [{"actual": u"ぃ　", "actual_location": "1:1", "expected_location": "1:1"}],
+                                   u"る": [{"actual": u"ろ", "actual_location" : "1:5", "expected_location": "1:4"}]
+                                   }
         assert result.successes == {
-                                    u"あ" : [{'actual_location': '1:3', 'expected_location': '1:2'}],
-                                    u"し" : [{'actual_location': '1:4', 'expected_location': '1:3'}],
-                                    u"ろ" : [{'actual_location': '1:6', 'expected_location': '1:5'}],
-                                    u"る" : [{'actual_location': '1:7', 'expected_location': '1:6'}],
-                                    u"NL" : [{'actual_location': '2:0', 'expected_location': '2:0'}]
+                                    u"あ": [{'actual_location': '1:3', 'expected_location': '1:2'}],
+                                    u"し": [{'actual_location': '1:4', 'expected_location': '1:3'}],
+                                    u"ろ": [{'actual_location': '1:6', 'expected_location': '1:5'}],
+                                    u"る": [{'actual_location': '1:7', 'expected_location': '1:6'}],
+                                    u"NL": [{'actual_location': '2:0', 'expected_location': '2:0'}]
                                     }
         print result.percentages
         assert result.percentages == {
-                                      u"あ" : 1.0,
-                                      u"い" : 0.0,
-                                      u"し" : 1.0,
-                                      u"る" : 0.5,
-                                      u"ろ" : 1.0,
-                                      u"NL" : 1.0
+                                      u"あ": 1.0,
+                                      u"い": 0.0,
+                                      u"し": 1.0,
+                                      u"る": 0.5,
+                                      u"ろ": 1.0,
+                                      u"NL": 1.0
                                       }
         assert result.percentage_overall == 0.75
 
@@ -233,9 +236,10 @@ def test_extra_whitespace(self):
         assert result.success == False
         assert result.count == 9
         json.dump(result.failures, sys.stdout, cls=IgnoreUnderscoreEncoder, ensure_ascii=False, indent=2, separators=(',', ': '))
-        assert result.failures == { u"し" : [{ "actual" : u" ", "actual_location": "1:2", "expected_location": "1:2"}],
-                                    u"い" : [{ "actual" : u" ", "actual_location": "1:4", "expected_location": "1:3"}],
-                                    u"む" : [{ "actual" : u" ", "actual_location": "1:6", "expected_location": "1:4"}],
-                                    u"ご" : [{ "actual" : u" ", "actual_location": "2:2", "expected_location": "2:2"}],
-                                    u"と" : [{ "actual" : u" ", "actual_location": "2:4", "expected_location": "2:3"}],
-                                    }
+        assert result.failures == {
+                                   u"し": [{"actual": u" ", "actual_location": "1:2", "expected_location": "1:2"}],
+                                   u"い": [{"actual": u" ", "actual_location": "1:4", "expected_location": "1:3"}],
+                                   u"む": [{"actual": u" ", "actual_location": "1:6", "expected_location": "1:4"}],
+                                   u"ご": [{"actual": u" ", "actual_location": "2:2", "expected_location": "2:2"}],
+                                   u"と": [{"actual": u" ", "actual_location": "2:4", "expected_location": "2:3"}],
+                                   }

From fffa10b8c3de5082084405c1d9a968f518027941 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Wed, 27 Aug 2014 21:18:42 +0930
Subject: [PATCH 29/33] Fix print left over from debugging

---
 unit_test/evaluate_test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index 657ad36..fc0ba58 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -217,7 +217,6 @@ def test_success_statistics(self):
                                     u"る": [{'actual_location': '1:7', 'expected_location': '1:6'}],
                                     u"NL": [{'actual_location': '2:0', 'expected_location': '2:0'}]
                                     }
-        print result.percentages
         assert result.percentages == {
                                       u"あ": 1.0,
                                       u"い": 0.0,

From 82d170f09c7fcbbd0fe06840534baa3aac06de7d Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Wed, 27 Aug 2014 22:09:52 +0930
Subject: [PATCH 30/33] Document classes/methods

---
 evaluate.py | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 110 insertions(+), 4 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index 502a025..d33da2a 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -3,7 +3,7 @@
 
 """
 Module: evaluate.py
-Desc: Evaluate the ocr esults against the expected output and provide metrics on failures
+Desc: Evaluate the ocr results against the expected output and provide metrics on failures
 Author: Barrie Treloar
 Email: baerrach@gmail.com
 DATE: 13th Aug 2014
@@ -23,6 +23,14 @@
 trace.setLevel(logging.INFO)
 
 class IgnoreUnderscoreEncoder(json.JSONEncoder):
+
+    """
+    This JSON Encoder ignores any keys that start with an underscore.
+
+    Python uses underscore to indicate a private field. This JSON Encoder will ignore these
+    private fields and return the public version of the data.
+    """
+
     def default(self, obj):
         attributes = {}
         obj_dict = obj.__dict__
@@ -33,7 +41,43 @@ def default(self, obj):
         return attributes
 
 class Evaluation:
+
+    u"""
+    Evaluation takes an expected stream and an actual stream and evaluates them to determine how closely they match.
+
+    self.success = True if they match completely, false otherwise.
+    self.count = The count of the characters read from the actual stream
+    self.failures = A dictionary of the failures, keyed by the failed character.
+                    The value is a list of dictionaries that describe the failure locations.
+                    e.g.
+                    {
+                     u"い": [{"actual": u"ぃ　", "actual_location": "1:1", "expected_location": "1:1"}],
+                     u"る": [{"actual": u"ろ", "actual_location" : "1:5", "expected_location": "1:4"}]
+                     }
+    self.successes = A dictionary of the successes, keyed by the success character.
+                     The value is a list of dictionaries that describe the success locations.
+                     e.g.
+                     {
+                      u"あ": [{'actual_location': '1:3', 'expected_location': '1:2'}],
+                      u"し": [{'actual_location': '1:4', 'expected_location': '1:3'}]
+                      }
+    self.percentages = A dictionary of characters and their percentages of successful matches.
+    self.percentages_overall = A percentage of the successes over the entire stream.
+    """
+
     def __init__(self, expected_stream, actual_stream):
+        """
+        Setup the Evaluation state to evaluate the two provided streams.
+
+        Internally the streams are wrapped in EvaluationStream objects to provide additional features needed
+        to track the stream; line and column positions, universal newline handling, peeking.
+
+        :param expected_stream: Contains the expected text
+        :type expected_stream: io.TextIOBase
+        :param actual_stream: Contains the actual text
+        :type actual_stream: io.TextIOBase
+        """
+
         self.success = None
         self.count = 0
         self.failures = collections.defaultdict(list)
@@ -47,12 +91,20 @@ def __init__(self, expected_stream, actual_stream):
         self._max_peek_lookahead = 3
 
     def readFromExpected(self):
+        """Read from the expected stream and store the character read"""
         self._expected_char = self._expected.read()
 
     def readFromActual(self):
+        """Read from the actual stream and store the character read"""
         self._actual_char = self._actual.read()
 
     def markFailure(self, actual_location=None):
+        """
+        Mark the current character as a failure.
+
+        :param actual_location: The location where the failure occurred. If not provided the current location of the actual stream is used.
+        """
+
         if not actual_location:
             actual_location = self._actual.location()
         failure_details = {u"actual":self._actual_char, u"actual_location":actual_location, u"expected_location":self._expected.location()}
@@ -75,8 +127,17 @@ def markFailure(self, actual_location=None):
     def resyncActual(self):
         """
         Lookahead on the stream to see if re-syncing is required.
-        If re-syncing is required then the extra characters will be consumed and appended to self._actual_char
+        If re-syncing is required then the extra characters will be consumed and appended to self._actual_char.
+        If characters are consumed then then position in the stream will also change. If you need to know the original
+        position prior to resyncing then store the location prior to invoking this method.
+
+        TODO: Add confidence by continuing past the resync point and determining how many characters still match.
+              Note: For Japanese this isn't so useful since a kanji is an entire word and OCR may be failing
+              on every other character and reducing confidence.
+
+        TODO: Resync should stop at newlines. Tests indicate that this is not currently a problem.
         """
+
         sync_to_char = self._expected.peek(1)
 
         if EvaluationStream.iseof(sync_to_char):
@@ -95,8 +156,21 @@ def resyncActual(self):
                 resync_found_ahead_at -= 1
                 self._actual_char += self._actual.read()
 
-
     def handleMismatch(self):
+        """
+        Handle a mismatch of the streams.
+
+        This will mark the self.success as False indicating that the evaulation was not successful.
+
+        If a newline is encountered on either stream then the other stream is consumed until a newline is found
+        or the end of file is reached.
+
+        If actual char is a space then peek ahead to see if that character matches what was expected.
+        The space is marked as a failure and the expected char pushed back onto the stream to get back in sync.
+
+        Otherwise the actual location is marked and a resync is attemped before marking the failure.
+        """
+
         self.success = False
         if EvaluationStream.isnewline(self._expected_char):  # Resync actual stream to the next newline
             while not EvaluationStream.isnewline(self._actual_char) and not EvaluationStream.iseof(self._actual_char):
@@ -115,6 +189,10 @@ def handleMismatch(self):
             self.markFailure(mark_failure_position)
 
     def handleMatch(self):
+        """
+        Handle a match of the streams.
+        """
+
         self.successes[self._expected_char].append({"expected_location":self._expected.location(), "actual_location":self._actual.location()})
         if not EvaluationStream.isnewline(self._expected_char):
             if logger.isEnabledFor(logging.DEBUG):
@@ -166,6 +244,10 @@ def evaluate(self):
         return self
 
     def _calculate_percentages(self):
+        """
+        Calculate the percentages of successes to failures.
+        """
+
         keys = set(self.successes.iterkeys()).union(self.failures.iterkeys())
         self.percentages = {}
         for key in keys:
@@ -200,17 +282,23 @@ def __unicode__(self):
         return u"\n".join(result)
 
     def summary(self):
+        """
+        Provide a summary version of __unicode__
+        """
+
         result = []
         result.append(u"success={0!s}".format(self.success))
         result.append(u"count={0:d}".format(self.count))
         result.append(u"overall={0}".format(self.percentage_overall))
         return u"\n".join(result)
 
+
 class EvaluationStream():
+
     """
     Wrap an io.TextIOBase to provide Evaluation support.
 
-    :param stream: io.TextIOBase of the actual ocr results
+    self.count = How many characters have been read from the stream.
     """
 
     _newline = u"NL"
@@ -218,14 +306,17 @@ class EvaluationStream():
 
     @staticmethod
     def isnewline(char):
+        """Check whether the char is a newline"""
         return EvaluationStream._newline == char
 
     @staticmethod
     def iseof(char):
+        """Check whether the char is the end of file"""
         return EvaluationStream._eof == char
 
     @staticmethod
     def isspace(char):
+        """Check whether the character is a space"""
         return u" " == char
 
     def __init__(self, stream):
@@ -240,7 +331,10 @@ def _read_with_translations(self):
         As per io.TextIOBase.read(1), but also ignore windows \r characters by reading the next character.
         \n is rewritten as NL so that mismatches are printable characters.
         end of file is rewritten as EOF for printability.
+
+        Use self._read_stream_or_peek_buffer instead of this function directly.
         """
+
         char = self._stream.read(1)
         while u"\r" == char:
             char = self._stream.read(1)
@@ -253,6 +347,10 @@ def _read_with_translations(self):
         return char
 
     def _read_stream_or_peek_buffer(self):
+        """
+        Reads a character from the peek buffer, if there is anything on it, or else directly fron the stream.
+        """
+
         if self._peek_buffer:
             char = self._peek_buffer.popleft()
         else:
@@ -267,6 +365,8 @@ def read(self):
         end of file is rewritten as EOF for printability.
 
         To support peek, an internal buffer is used and read from before re-reading from stream.
+
+        Internal counters are incrememented to track the current line and position, see self.location()
         """
 
         char = self._read_stream_or_peek_buffer()
@@ -283,6 +383,7 @@ def read(self):
         return char
 
     def location(self):
+        """Return a string description of the streams location, in the form of <line>:<position>"""
         return u"{0:d}:{1:d}".format(self._line, self._position)
 
     def peek(self, n):
@@ -298,6 +399,11 @@ def peek(self, n):
         return result
 
     def push_back(self, char):
+        """
+        Push the provided character back onto the head of the stream.
+
+        Newline and EOF are not supported.
+        """
         assert not EvaluationStream.iseof(char)
         assert not EvaluationStream.isnewline(char)
         self._position -= 1

From aa6c61cba7ce2540ece7d28d5770116d15fa996d Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Wed, 27 Aug 2014 22:10:32 +0930
Subject: [PATCH 31/33] Orgnise imports

---
 evaluate.py                | 9 +++++----
 unit_test/evaluate_test.py | 3 ++-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index d33da2a..2f70f5a 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -9,14 +9,15 @@
 DATE: 13th Aug 2014
 """
 
+import argparse
 import codecs
 import collections
-import argparse
-import arg
-import sys
-import os
 import json
 import logging
+import os
+import sys
+
+import arg
 
 logger = logging.getLogger(__name__)
 trace = logging.getLogger("trace")
diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index fc0ba58..87f51d4 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -2,9 +2,10 @@
 
 import io
 import json
-from evaluate import Evaluation, EvaluationStream, IgnoreUnderscoreEncoder
 import sys
 
+from evaluate import Evaluation, EvaluationStream, IgnoreUnderscoreEncoder
+
 class TestEvaluate:
 
     def test_empty(self):

From b358686a835e8ac2494f8e22bd6cee4c0aa88731 Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Wed, 27 Aug 2014 22:11:13 +0930
Subject: [PATCH 32/33] Rename mark_failure_position as mark_failure_location

---
 evaluate.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index 2f70f5a..2a18432 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -185,9 +185,9 @@ def handleMismatch(self):
             self.markFailure()
             self._expected.push_back(self._expected_char)
         else:
-            mark_failure_position = self._actual.location()
+            mark_failure_location = self._actual.location()
             self.resyncActual()
-            self.markFailure(mark_failure_position)
+            self.markFailure(mark_failure_location)
 
     def handleMatch(self):
         """

From 9f4d56c4d419849177f49278a677482a27a9749a Mon Sep 17 00:00:00 2001
From: Barrie Treloar <baerrach@gmail.com>
Date: Wed, 27 Aug 2014 22:15:22 +0930
Subject: [PATCH 33/33] Count should be actual characters read, not expected

---
 evaluate.py                |  2 +-
 unit_test/evaluate_test.py | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index 2a18432..c39572c 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -239,7 +239,7 @@ def evaluate(self):
             sys.stdout.write("\n")
             sys.stdout.flush()
 
-        self.count = self._expected.count
+        self.count = self._actual.count
 
         self._calculate_percentages()
         return self
diff --git a/unit_test/evaluate_test.py b/unit_test/evaluate_test.py
index 87f51d4..985d988 100644
--- a/unit_test/evaluate_test.py
+++ b/unit_test/evaluate_test.py
@@ -77,7 +77,7 @@ def test_endoffile_mismatch_more_in_actual(self):
         result = Evaluation(expected,actual)
         result.evaluate()
         assert result.success == False
-        assert result.count == 1
+        assert result.count == 2
         assert result.failures == {u"EOF": [{"actual": u"し", "actual_location": "2:1", "expected_location": "2:0"}]}
 
     def test_endoffile_mismatch_more_in_expected(self):
@@ -86,7 +86,7 @@ def test_endoffile_mismatch_more_in_expected(self):
         result = Evaluation(expected,actual)
         result.evaluate()
         assert result.success == False
-        assert result.count == 2
+        assert result.count == 1
         assert result.failures == { u"し": [{"actual" : u"EOF", "actual_location": "2:0", "expected_location": "2:1"}] }
 
     def test_mismatch_prior_to_newline(self):
@@ -131,7 +131,7 @@ def test_out_of_sync_stream(self):
         result = Evaluation(expected,actual)
         result.evaluate()
         assert result.success == False
-        assert result.count == 3
+        assert result.count == 4
         assert result.failures == {u"い": [{ "actual": u"ぃ　", "actual_location": "1:1", "expected_location": "1:1"}]}
 
     def test_out_of_sync_stream_two_deep(self):
@@ -140,7 +140,7 @@ def test_out_of_sync_stream_two_deep(self):
         result = Evaluation(expected,actual)
         result.evaluate()
         assert result.success == False
-        assert result.count == 3
+        assert result.count == 5
         assert result.failures == {u"い": [{"actual": u"ぃ　'", "actual_location": "1:1", "expected_location": "1:1"}]}
 
     def test_out_of_sync_stream_actual_new_lined_early(self):
@@ -149,7 +149,7 @@ def test_out_of_sync_stream_actual_new_lined_early(self):
         result = Evaluation(expected,actual)
         result.evaluate()
         assert result.success == False
-        assert result.count == 9
+        assert result.count == 6
         assert result.failures == {
                                    u"む" : [{"actual": u"NL", "actual_location": "2:0", "expected_location": "1:4"}],
                                    u"す" : [{"actual": u"NL", "actual_location": "2:0", "expected_location": "1:5"}],
@@ -162,7 +162,7 @@ def test_out_of_sync_stream_expected_new_lined_early(self):
         result = Evaluation(expected,actual)
         result.evaluate()
         assert result.success == False
-        assert result.count == 6
+        assert result.count == 9
         assert result.failures == {u"NL": [{"actual": u"む", "actual_location": "1:4", "expected_location": "2:0"},
                                            {"actual": u"す", "actual_location": "1:5", "expected_location": "2:0"},
                                            {"actual": u"こ", "actual_location": "1:6", "expected_location": "2:0"}]
@@ -174,7 +174,7 @@ def test_out_of_sync_stream_doesnt_sync_past_newline(self):
         result = Evaluation(expected,actual)
         result.evaluate()
         assert result.success == False
-        assert result.count == 9
+        assert result.count == 10
         assert result.failures == {
                                    u"こ": [{"actual": u"あ", "actual_location": "1:6", "expected_location": "1:6"}],
                                    u"し": [{"actual": u"こし", "actual_location": "2:1", "expected_location": "2:1"}]
@@ -206,7 +206,7 @@ def test_success_statistics(self):
         result = Evaluation(expected,actual)
         result.evaluate()
         assert result.success == False
-        assert result.count == 6
+        assert result.count == 7
         assert result.failures == {
                                    u"い": [{"actual": u"ぃ　", "actual_location": "1:1", "expected_location": "1:1"}],
                                    u"る": [{"actual": u"ろ", "actual_location" : "1:5", "expected_location": "1:4"}]
@@ -234,7 +234,7 @@ def test_extra_whitespace(self):
         result = Evaluation(expected,actual)
         result.evaluate()
         assert result.success == False
-        assert result.count == 9
+        assert result.count == 14
         json.dump(result.failures, sys.stdout, cls=IgnoreUnderscoreEncoder, ensure_ascii=False, indent=2, separators=(',', ': '))
         assert result.failures == {
                                    u"し": [{"actual": u" ", "actual_location": "1:2", "expected_location": "1:2"}],