diff --git a/examples/grep_speed.py b/examples/grep_speed.py
index 16b4d7bc..8325d2bc 100644
--- a/examples/grep_speed.py
+++ b/examples/grep_speed.py
@@ -6,9 +6,14 @@
_, filename, limit = sys.argv
-with open(filename) as fh:
- for line in fh:
- for _ in range(int(limit)):
- if re.search(r'y', line):
- print(line)
+def grep(regex, filename):
+ with open(filename) as fh:
+ for line in fh:
+ if re.search(regex, line):
+ print(line, end='')
+i = int(limit)
+
+while i:
+ i-=1
+ grep('y', filename)
diff --git a/examples/grep_speed.sh b/examples/grep_speed.sh
index 9d946dde..2ced1a2a 100644
--- a/examples/grep_speed.sh
+++ b/examples/grep_speed.sh
@@ -1,7 +1,13 @@
+#!/bin/bash
+
+if (($# != 2)); then
+ echo "$0" 'FILENAME LIMIT' >&2
+ exit 1
+fi
+
filename=$1
limit=$2
-for ((i=1;i<=$limit;i++));
-do
- grep y $filename
+for ((i=limit; i; --i)); do
+ grep y "$filename"
done
diff --git a/examples/grep_speed_open_once.py b/examples/grep_speed_open_once.py
new file mode 100644
index 00000000..1c089f3d
--- /dev/null
+++ b/examples/grep_speed_open_once.py
@@ -0,0 +1,20 @@
+import sys
+import re
+
+if len(sys.argv) != 3:
+ exit(f"{sys.argv[0]} FILENAME LIMIT")
+
+_, filename, limit = sys.argv
+
+def grep(regex, fh):
+ for line in fh:
+ if re.search(regex, line):
+ print(line, end='')
+
+i = int(limit)
+
+with open(filename) as fh:
+ while i:
+ i-=1
+ grep('y', fh)
+ fh.seek(0)
diff --git a/examples/grep_speed_optimized.py b/examples/grep_speed_optimized.py
new file mode 100644
index 00000000..041b7924
--- /dev/null
+++ b/examples/grep_speed_optimized.py
@@ -0,0 +1,21 @@
+import sys
+import re
+
+if len(sys.argv) != 3:
+ exit(f"{sys.argv[0]} FILENAME LIMIT")
+
+_, filename, limit = sys.argv
+
+def grep(regex, fh):
+ for line in fh:
+ if regex.search(line):
+ print(line, end='')
+
+i = int(limit)
+
+y = re.compile('y')
+with open(filename) as fh:
+ while i:
+ i-=1
+ grep(y, fh)
+ fh.seek(0)
diff --git a/examples/grep_speed_oxo.py b/examples/grep_speed_oxo.py
index 5c3644cc..9b4d3d4a 100644
--- a/examples/grep_speed_oxo.py
+++ b/examples/grep_speed_oxo.py
@@ -6,9 +6,16 @@
_, filename, limit = sys.argv
-with open(filename) as fh:
+def grep(regex, fh):
for line in fh:
- for _ in range(int(limit)):
- if re.search(r'(.)y\1', line):
- print(line)
+ if regex.search(line):
+ print(line, end='')
+
+i = int(limit)
+y = re.compile(r'(.)y\1')
+with open(filename) as fh:
+ while i:
+ i-=1
+ grep(y, fh)
+ fh.seek(0)
diff --git a/examples/grep_speed_oxo.sh b/examples/grep_speed_oxo.sh
index e92fc2d4..95b0207a 100644
--- a/examples/grep_speed_oxo.sh
+++ b/examples/grep_speed_oxo.sh
@@ -1,7 +1,13 @@
+#!/bin/bash
+
+if (($# != 2)); then
+ echo "$0" 'FILENAME LIMIT' >&2
+ exit 1
+fi
+
filename=$1
limit=$2
-for ((i=1;i<=$limit;i++));
-do
- grep '\(.\)y\1' $filename
+for ((i=limit; i; --i)); do
+ grep '\(.\)y\1' "$filename"
done
diff --git a/examples/grep_speed_oxo_unoptimized.py b/examples/grep_speed_oxo_unoptimized.py
new file mode 100644
index 00000000..1d75fe7c
--- /dev/null
+++ b/examples/grep_speed_oxo_unoptimized.py
@@ -0,0 +1,19 @@
+import sys
+import re
+
+if len(sys.argv) != 3:
+ exit(f"{sys.argv[0]} FILENAME LIMIT")
+
+_, filename, limit = sys.argv
+
+def grep(regex, filename):
+ with open(filename) as fh:
+ for line in fh:
+ if re.search(regex, line):
+ print(line, end='')
+
+i = int(limit)
+
+while i:
+ i-=1
+ grep(r'(.)y\1', filename)
diff --git a/sites/en/pages/compare-the-speed-of-grep-with-python-regex.txt b/sites/en/pages/compare-the-speed-of-grep-with-python-regex.txt
index 9c62679e..fe210fea 100644
--- a/sites/en/pages/compare-the-speed-of-grep-with-python-regex.txt
+++ b/sites/en/pages/compare-the-speed-of-grep-with-python-regex.txt
@@ -9,7 +9,7 @@
=abstract start
-At one of my client we had a Bash script that grepped a huge log file 20 times in order to generate a report.
+One of my clients had a Bash script that grepped a huge log file 20 times in order to generate a report.
It created a lot of load on the server as grep was reading the entire file 20 times.
As we were converting our Shell scripts to Python anyway I thought I could rewrite it in Python and go over the file
@@ -31,21 +31,34 @@ We can run it like this, indicating the name of the file we would like to create
the number of rows and the length of rows.
-python create-big-file.py FILENAME NUMBER-OF-ROWS LENGTH-OF-ROWS
+$ python create-big-file.py FILENAME NUMBER-OF-ROWS LENGTH-OF-ROWS
+
+
+For example:
+
+
+$ python create-big-file.py a.txt 100000 50
It will create a file full of the character "x", with a single "y" somewhere.
+
+$ wc a.txt
+ 1000000 1000000 51000000 a.txt
+$ grep y a.txt
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyx
+
+
I think this is going to be good enough for our simple example.
-python create-big-file.py a.txt 100000 50
+$ time bash examples/grep_speed.sh a.txt 20 >/dev/null
+
+real 0m0.355s
+user 0m0.238s
+sys 0m0.097s
-Verify the file:
-$ wc a.txt
- 1000000 1000000 51000000 a.txt
-
+$ time python examples/grep_speed.py a.txt 20 >/dev/null
-
-# grep y a.txt
-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyx
+real 0m9.897s
+user 0m9.772s
+sys 0m0.120s
+So, grep is upwards of 30 times faster than Python; what if we optimize the Python code by only opening the file once?
+
+
-$ time bash examples/grep_speed.sh a.txt 20
+$ time python examples/grep_speed_open_once.py a.txt 20 >/dev/null
-real 0m0.227s
-user 0m0.055s
-sys 0m0.172s
+real 0m9.712s
+user 0m9.625s
+sys 0m0.082s
+What if we optimize the regular expression by compiling it only once?
+
+
-$ time python examples/grep_speed.py a.txt 20
+$ time python examples/grep_speed_optimized.py a.txt 20 >/dev/null
-real 0m9.509s
-user 0m9.477s
-sys 0m0.032s
+real 0m2.198s
+user 0m2.121s
+sys 0m0.075s
-
-grep is about 50 times faster than Python even though grep had to read the file 20 time while Python only read it once.
+By pre-compiling the regular expression, the Python code is now about 4.5x faster than the unoptimized Python code; however, grep is still about 6 times faster than Python, even though grep must start from afresh on each iteration.
-grep '\(.\)y\1' a.txt
+$ grep '\(.\)y\1' a.txt
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyx
-$ time bash examples/grep_speed_oxo.sh a.txt 20
+$ time bash examples/grep_speed_oxo.sh a.txt 20 >/dev/null
-real 0m0.196s
-user 0m0.035s
-sys 0m0.161s
+real 0m0.413s
+user 0m0.297s
+sys 0m0.097s
-$ time python examples/grep_speed_oxo.py a.txt 20
+$ time python examples/grep_speed_oxo.py a.txt 20 >/dev/null
-real 0m25.067s
-user 0m24.972s
-sys 0m0.016s
+real 0m12.724s
+user 0m12.589s
+sys 0m0.128s
-The speed of grep did not change, but Python became even slower. This time grep is more than a 100 times faster than Python.
+The speed of grep did not change appreciably, but the Python code became much slower; this time, grep is more than a 30 times faster than Python, despite using some explicit optimizations in the Python code. How does the unoptimized code fair?
+
+
+$ time python examples/grep_speed_oxo_unoptimized.py a.txt 20 >/dev/null
+
+real 0m23.448s
+user 0m23.319s
+sys 0m0.114s
+
$ grep -V
-grep (GNU grep) 3.4
+grep (GNU grep) 3.3