From 5dd6db30d18c914b88e16d44ffebb1de78017e86 Mon Sep 17 00:00:00 2001
From: David Nies <david.nies@posteo.de>
Date: Sat, 9 Feb 2019 03:02:19 +0100
Subject: [PATCH 01/27] Add `LowLevelConnection`

This is a first draft that implements this class. It is responsible
to obtain connections to the DB and make sure the expected tables
exist.
---
 pynance/database.py      | 57 ++++++++++++++++++++++++++++++++++++++++
 pynance/database_test.py | 46 ++++++++++++++++++++++++++++++++
 unittests.py             |  4 ++-
 3 files changed, 106 insertions(+), 1 deletion(-)
 create mode 100644 pynance/database.py
 create mode 100644 pynance/database_test.py

diff --git a/pynance/database.py b/pynance/database.py
new file mode 100644
index 0000000..f8a7bae
--- /dev/null
+++ b/pynance/database.py
@@ -0,0 +1,57 @@
+import sqlite3
+
+class LowLevelConnection(object):
+    """
+    Class that handles low-level database connection. Should be used in with-statements.
+    """
+
+
+    # Schema evolution should be handled later once it is needed
+    SUPPORTED_SCHEMA_VERSIONS = [1]
+
+    TABLE_SCHEMA_VERSION = 'schema'
+    TABLE_TRANSACTIONS = 'transactions'
+    TABLE_TRANSACTIONS_FIELDS = [
+        'id INTEGER PRIMARY KEY',
+        'imported_at INTEGER', # unix timestamp
+        'date TEXT', # format: YYYY-MM-DD
+        'sender_account TEXT', 
+        'receiver_account TEXT',
+        'text TEXT',
+        'amount REAL',
+        'total_balance REAL',
+        'currency TEXT',
+        'category TEXT',
+        'tags TEXT'
+    ]
+
+    def __init__(self, schema_version, db_file_name):
+        """
+        Parameters:
+         * `schema_version`: Integer denoting the schema version.
+         * `db_file_name`: This DB file will be created if it does not yet exist.
+        """
+        assert schema_version in LowLevelConnection.SUPPORTED_SCHEMA_VERSIONS
+        self.db_file_name = db_file_name
+
+        with sqlite3.connect(self.db_file_name) as conn:
+            cursor = conn.cursor()
+            cursor.execute('BEGIN TRANSACTION')
+
+            cursor.execute('CREATE TABLE IF NOT EXISTS {} (version INTEGER)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION))
+            cursor.execute('INSERT INTO {} VALUES (1)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION))
+
+            cursor.execute('CREATE TABLE IF NOT EXISTS {} ({})'.format(
+                LowLevelConnection.TABLE_TRANSACTIONS,
+                ', '.join(LowLevelConnection.TABLE_TRANSACTIONS_FIELDS)
+            ))
+
+            cursor.execute('COMMIT')
+            conn.commit()
+    
+    def __enter__(self):
+        self.conn = sqlite3.connect(self.db_file_name)
+        return self.conn
+    
+    def __exit__(self, _1, _2, _3):
+        self.conn.close()
\ No newline at end of file
diff --git a/pynance/database_test.py b/pynance/database_test.py
new file mode 100644
index 0000000..ed2bfbe
--- /dev/null
+++ b/pynance/database_test.py
@@ -0,0 +1,46 @@
+import unittest
+import os.path
+from tempfile import TemporaryDirectory
+
+from pynance.database import LowLevelConnection
+
+class LowLevelConnectionTestCase(unittest.TestCase):
+    def test_creates_database_file_if_not_exists(self):
+        with TemporaryDirectory() as tmp_dir:
+            db_file = os.path.join(tmp_dir, 'test.db')
+            self.assertFalse(os.path.exists(db_file))
+            with LowLevelConnection(1, db_file) as _:
+                pass
+            self.assertTrue(os.path.exists(db_file))
+    
+    def test_opens_connection(self):
+        with TemporaryDirectory() as tmp_dir:
+            with LowLevelConnection(1, os.path.join(tmp_dir, 'test.db')) as conn:
+                self.assertIsNotNone(conn)
+    
+    def test_creates_expected_tables(self):
+        with TemporaryDirectory() as tmp_dir:
+            with LowLevelConnection(1, os.path.join(tmp_dir, 'test.db')) as conn:
+                cursor = conn.cursor()
+                tables = set(map(
+                    lambda x: x[0],
+                    cursor.execute('select name from sqlite_master where type="table"').fetchall()
+                ))
+                self.assertEqual(
+                    tables,
+                    set([LowLevelConnection.TABLE_SCHEMA_VERSION,LowLevelConnection.TABLE_TRANSACTIONS
+                ]))
+                self.assertEqual(
+                    [(1,)],
+                    cursor.execute('select count(*) from {}'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)).fetchall()
+                )
+
+
+
+def test_suite():
+    "return the test suite"
+    suite = unittest.TestSuite()
+    suite.addTest(LowLevelConnectionTestCase('test_creates_database_file_if_not_exists'))
+    suite.addTest(LowLevelConnectionTestCase('test_opens_connection'))
+    suite.addTest(LowLevelConnectionTestCase('test_creates_expected_tables'))
+    return suite
\ No newline at end of file
diff --git a/unittests.py b/unittests.py
index ad89cad..b86eb81 100644
--- a/unittests.py
+++ b/unittests.py
@@ -6,6 +6,7 @@
 import pynance.dummy_test
 import pynance.textimporter_test
 import pynance.dash_viz.plot_flow_test
+import pynance.database_test
 
 def doc_test_suite():
     "Returns the testsuite doctests for all modules. Please don't forget to add new modules here."
@@ -32,6 +33,7 @@ def test_suite():
     suite.addTests(pynance.dummy_test.test_suite())
     suite.addTests(pynance.textimporter_test.test_suite())
     suite.addTests(pynance.dash_viz.plot_flow_test.test_suite())
+    suite.addTests(pynance.database_test.test_suite())
 
     suite.addTest(doc_test_suite())
 
@@ -46,4 +48,4 @@ def run_all_unit_tests():
 if __name__ == "__main__":
     import sys
     all_tests_ok = run_all_unit_tests()
-    sys.exit(not all_tests_ok)
\ No newline at end of file
+    sys.exit(not all_tests_ok)

From 4585a32a5a4f29cf58c5aa7a5c44703687545961 Mon Sep 17 00:00:00 2001
From: David Nies <david.nies@posteo.de>
Date: Sat, 9 Feb 2019 03:15:30 +0100
Subject: [PATCH 02/27] Make tests involving temp directories compatible with
 Python 2

Python 2 does not have tempfile.TemporaryDirectory
---
 pynance/database_test.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/pynance/database_test.py b/pynance/database_test.py
index ed2bfbe..e0f8ab1 100644
--- a/pynance/database_test.py
+++ b/pynance/database_test.py
@@ -1,9 +1,18 @@
 import unittest
 import os.path
-from tempfile import TemporaryDirectory
+import shutil
+from tempfile import mkdtemp
 
 from pynance.database import LowLevelConnection
 
+class TemporaryDirectory(object):
+    def __enter__(self):
+        self.dir = mkdtemp()
+        return self.dir
+    
+    def __exit__(self, _1, _2, _3):
+        shutil.rmtree(self.dir)
+
 class LowLevelConnectionTestCase(unittest.TestCase):
     def test_creates_database_file_if_not_exists(self):
         with TemporaryDirectory() as tmp_dir:

From 37b37a30ddca4e5db42f6dcec3c765566c7495b9 Mon Sep 17 00:00:00 2001
From: David Nies <david.nies@posteo.de>
Date: Sun, 10 Feb 2019 15:45:10 +0100
Subject: [PATCH 03/27] Add class InsertTable

This class duplicates a Pandas DataFrame into a temporary table
inside a sqlite database. The table is disposed once it is not needed
any more.
---
 pynance/database.py      | 61 ++++++++++++++++++++++++++++++++---
 pynance/database_test.py | 68 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 123 insertions(+), 6 deletions(-)

diff --git a/pynance/database.py b/pynance/database.py
index f8a7bae..349014c 100644
--- a/pynance/database.py
+++ b/pynance/database.py
@@ -2,17 +2,17 @@
 
 class LowLevelConnection(object):
     """
-    Class that handles low-level database connection. Should be used in with-statements.
+    Class that handles low-level database connection. Makes sure the expected table strucutre exists.
+    Should be used in with-statements. 
     """
 
-
     # Schema evolution should be handled later once it is needed
     SUPPORTED_SCHEMA_VERSIONS = [1]
 
     TABLE_SCHEMA_VERSION = 'schema'
     TABLE_TRANSACTIONS = 'transactions'
+    TABLE_TRANSACTIONS_ID = 'id INTEGER PRIMARY KEY'
     TABLE_TRANSACTIONS_FIELDS = [
-        'id INTEGER PRIMARY KEY',
         'imported_at INTEGER', # unix timestamp
         'date TEXT', # format: YYYY-MM-DD
         'sender_account TEXT', 
@@ -43,8 +43,11 @@ def __init__(self, schema_version, db_file_name):
 
             cursor.execute('CREATE TABLE IF NOT EXISTS {} ({})'.format(
                 LowLevelConnection.TABLE_TRANSACTIONS,
-                ', '.join(LowLevelConnection.TABLE_TRANSACTIONS_FIELDS)
+                ', '.join(
+                    [LowLevelConnection.TABLE_TRANSACTIONS_ID] + LowLevelConnection.TABLE_TRANSACTIONS_FIELDS
+                )
             ))
+            cursor.execute('CREATE INDEX date_index ON {} ({})'.format(LowLevelConnection.TABLE_TRANSACTIONS, 'date'))
 
             cursor.execute('COMMIT')
             conn.commit()
@@ -54,4 +57,52 @@ def __enter__(self):
         return self.conn
     
     def __exit__(self, _1, _2, _3):
-        self.conn.close()
\ No newline at end of file
+        self.conn.close()
+
+
+class InsertTable(object):
+    """
+    This class makes sure that a DataFrame is inserted into a temporary table of a sqlite databases.
+    It also makes sure that the temporary table is created in a safe way and disposed afterwards. For
+    this purpuse, instances of this class should be used in with statements.
+    """
+    
+    @staticmethod
+    def create_temp_table(conn):
+        """Creates temporary table suitable for inserting the DataFrame and returns its name."""
+
+        cursor = conn.cursor()
+        i, table_name, go_on = 0, '', True
+
+        while go_on:
+            go_on = False
+            table_name = 'insert_df_{}'.format(i)
+            try:
+                cursor.execute('CREATE TEMPORARY TABLE {} ({})'.format( table_name, ', '.join(LowLevelConnection.TABLE_TRANSACTIONS_FIELDS)))
+            except sqlite3.OperationalError:
+                go_on = True
+                i += 1
+        
+        return 'temp', table_name
+
+    def __init__(self, conn, data_frame):
+        "uses conn, fetches everything from 'data_frame' into a temporary table"
+        
+        self.conn = conn
+        self.temp_table_schema, self.temp_table_name = InsertTable.create_temp_table(conn)
+        data_frame.to_sql(
+            name=self.temp_table_name, 
+            schema=self.temp_table_schema,
+            index=False,
+            con=conn, 
+            chunksize=5000
+        )
+    
+    def __enter__(self):
+        return (self.temp_table_schema, self.temp_table_name)
+    
+    def __exit__(self, _1, _2, _3):
+        "Make sure the table is gone."
+        self.conn.cursor().execute('DROP TABLE {}.{}'.format(
+            self.temp_table_schema, self.temp_table_name
+        ))
\ No newline at end of file
diff --git a/pynance/database_test.py b/pynance/database_test.py
index e0f8ab1..13637ba 100644
--- a/pynance/database_test.py
+++ b/pynance/database_test.py
@@ -2,8 +2,10 @@
 import os.path
 import shutil
 from tempfile import mkdtemp
+import sqlite3
 
-from pynance.database import LowLevelConnection
+from pynance.database import LowLevelConnection, InsertTable
+from pynance.textimporter import read_csv, SupportedCsvTypes
 
 class TemporaryDirectory(object):
     def __enter__(self):
@@ -44,12 +46,76 @@ def test_creates_expected_tables(self):
                     cursor.execute('select count(*) from {}'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)).fetchall()
                 )
 
+class InsertTableTestCase(unittest.TestCase):
+
+    def test_create_temp_table_table_exists(self):
+        with TemporaryDirectory() as tmp_dir:
+            with LowLevelConnection(1, os.path.join(tmp_dir, 'test.db')) as conn:
+                table_schema, table_name = InsertTable.create_temp_table(conn)
+                # Fails if and only if table does not exist
+                conn.cursor().execute('select count(*) from {}.{}'.format(table_schema, table_name))
+    
+    def test_create_temp_table_choses_other_table_if_exists(self):
+        with TemporaryDirectory() as tmp_dir:
+            with LowLevelConnection(1, os.path.join(tmp_dir, 'test.db')) as conn:
+                conn.cursor().execute('CREATE TEMPORARY TABLE insert_df_0 (id INT)')
+                table_schema, table_name = InsertTable.create_temp_table(conn)
+                self.assertEqual(table_schema, 'temp')
+                self.assertEqual(table_name, 'insert_df_1', 'expected table creation to fail exactly the first time')
+    
+    def test_it_removes_the_temporary_table(self):
+        test_data_frame = read_csv(os.path.join('pynance', 'test_data', 'dkb_cash_sample.csv'), SupportedCsvTypes.DKBCash)
+        # TODO: get rid of the 'drop' here
+        test_data_frame = test_data_frame.drop(['origin'], axis=1)
+        with TemporaryDirectory() as tmp_dir:
+            with LowLevelConnection(1, os.path.join(tmp_dir, 'test.db')) as conn:
+                insert_table_with_schema = ''
+
+                def check_if_table_exists():
+                    conn.cursor().execute('select count(*) from {}'.format(insert_table_with_schema))
+
+                with InsertTable(conn, test_data_frame) as insert_table:
+                    insert_table_with_schema = '{}.{}'.format(insert_table[0], insert_table[1])
+                    check_if_table_exists()
+                
+                self.assertRaises(sqlite3.OperationalError, check_if_table_exists)
+
+    def test_it_works_with_dataframes_from_text_importer(self):
+        def run_test(csv_file, df_format):
+            # Get the DataFrame
+            self.assertTrue(os.path.isfile(csv_file))
+            # TODO: Investigate what origin is good for and if we want to include it as column
+            # in the database as well.
+            data_frame = read_csv(csv_file, df_format).drop(['origin'], axis=1)
+            self.assertTrue(len(data_frame.index) > 0)
+    
+            # Load it into the InserTable and test this
+            with TemporaryDirectory() as tmp_dir:
+                with LowLevelConnection(1, os.path.join(tmp_dir, 'test.db')) as conn:
+                    with InsertTable(conn, data_frame) as insert_table:
+
+                        data_frame_size = len(data_frame.index)
+                        database_rows = conn.cursor() \
+                            .execute('SELECT count(*) FROM {}.{}'.format(insert_table[0], insert_table[1])).fetchall()[0][0]
+
+                        self.assertEqual(data_frame_size, database_rows, 'not all (or more?) rows written to database')
+
+        run_test(os.path.join('pynance', 'test_data', 'dkb_cash_sample.csv'), SupportedCsvTypes.DKBCash)
+        run_test(os.path.join('pynance', 'test_data', 'dkb_visa_sample.csv'), SupportedCsvTypes.DKBVisa)
 
 
 def test_suite():
     "return the test suite"
+
     suite = unittest.TestSuite()
+
     suite.addTest(LowLevelConnectionTestCase('test_creates_database_file_if_not_exists'))
     suite.addTest(LowLevelConnectionTestCase('test_opens_connection'))
     suite.addTest(LowLevelConnectionTestCase('test_creates_expected_tables'))
+
+    suite.addTest(InsertTableTestCase('test_create_temp_table_table_exists'))
+    suite.addTest(InsertTableTestCase('test_create_temp_table_choses_other_table_if_exists'))
+    suite.addTest(InsertTableTestCase('test_it_removes_the_temporary_table'))
+    suite.addTest(InsertTableTestCase('test_it_works_with_dataframes_from_text_importer'))
+
     return suite
\ No newline at end of file

From d0b6214568d4384796778e949a3471ca383ffd23 Mon Sep 17 00:00:00 2001
From: David Nies <david.nies@posteo.de>
Date: Sat, 16 Feb 2019 00:01:25 +0100
Subject: [PATCH 04/27] Remove `dummy_test` from `unittest.py`

---
 unittests.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/unittests.py b/unittests.py
index ad0faaf..359240e 100644
--- a/unittests.py
+++ b/unittests.py
@@ -4,7 +4,6 @@
 import unittest
 import doctest
 
-import pynance.dummy_test
 import pynance.textimporter_test
 import pynance.dash_viz.plot_flow_test
 import pynance.database_test

From 0cd15c000b29e11cd96cbf40633265a1774ddc1e Mon Sep 17 00:00:00 2001
From: David Nies <david.nies@posteo.de>
Date: Sun, 17 Feb 2019 08:00:01 +0100
Subject: [PATCH 05/27] Adapt imports in `database_test.py`

---
 pynance/database_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pynance/database_test.py b/pynance/database_test.py
index 13637ba..74ee4a3 100644
--- a/pynance/database_test.py
+++ b/pynance/database_test.py
@@ -5,7 +5,8 @@
 import sqlite3
 
 from pynance.database import LowLevelConnection, InsertTable
-from pynance.textimporter import read_csv, SupportedCsvTypes
+from pynance.textimporter import read_csv
+from pynance.dkb import SupportedCsvTypes
 
 class TemporaryDirectory(object):
     def __enter__(self):

From 656ac1f8e3260b2a3bea663188ceb27668fe6359 Mon Sep 17 00:00:00 2001
From: David Nies <david.nies@posteo.de>
Date: Sun, 17 Feb 2019 08:34:48 +0100
Subject: [PATCH 06/27] Use DEFERRED isolation level in `LowLevelConnection`

This is the default in Python 3 anyways, Python 2 uses a different
default that forces transactions to be committed immediatelly.
---
 pynance/database.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/pynance/database.py b/pynance/database.py
index 349014c..7f01121 100644
--- a/pynance/database.py
+++ b/pynance/database.py
@@ -25,6 +25,18 @@ class LowLevelConnection(object):
         'tags TEXT'
     ]
 
+    def _get_db_conn(self):
+        """
+        Get the connection to the sqlite database. We use the 'DEFERRED' isolation level. This
+        is the default in Python 3 anyways, in Python 2 the default is autocommit mode. The DEFERRED
+        isolation level seems appropriate in this case. See also
+        * https://www.sqlite.org/lang_transaction.html
+        """
+        return sqlite3.connect(
+            self.db_file_name,
+            isolation_level = 'DEFERRED' 
+        )
+
     def __init__(self, schema_version, db_file_name):
         """
         Parameters:
@@ -34,9 +46,9 @@ def __init__(self, schema_version, db_file_name):
         assert schema_version in LowLevelConnection.SUPPORTED_SCHEMA_VERSIONS
         self.db_file_name = db_file_name
 
-        with sqlite3.connect(self.db_file_name) as conn:
+        with self._get_db_conn() as conn:
             cursor = conn.cursor()
-            cursor.execute('BEGIN TRANSACTION')
+            cursor.execute('BEGIN')
 
             cursor.execute('CREATE TABLE IF NOT EXISTS {} (version INTEGER)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION))
             cursor.execute('INSERT INTO {} VALUES (1)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION))
@@ -53,7 +65,7 @@ def __init__(self, schema_version, db_file_name):
             conn.commit()
     
     def __enter__(self):
-        self.conn = sqlite3.connect(self.db_file_name)
+        self.conn = self._get_db_conn()
         return self.conn
     
     def __exit__(self, _1, _2, _3):

From 4709f40ba3c9bcb75a5c0ed0732326c046f2f8b0 Mon Sep 17 00:00:00 2001
From: David Nies <david.nies@posteo.de>
Date: Sun, 17 Feb 2019 12:10:10 +0100
Subject: [PATCH 07/27] Ingore *.pyc

---
 .gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 639a9e9..4662532 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,7 +4,7 @@ __pycache__
 /.vscode
 docs/graphs/png/*.png
 .doit.db.*
-pynance/*.pyc
+*.pyc
 .pytest_cache
 /.hypothesis
 .coverage

From 8db7fbf0e8b3e23de1902c1fcebc51d18899a639 Mon Sep 17 00:00:00 2001
From: David Nies <david.nies@posteo.de>
Date: Sun, 17 Feb 2019 12:11:03 +0100
Subject: [PATCH 08/27] Setup database layout with connection as context
 manager

This makes it safe in Python 2 as well
---
 pynance/database.py | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/pynance/database.py b/pynance/database.py
index 7f01121..17a0864 100644
--- a/pynance/database.py
+++ b/pynance/database.py
@@ -46,23 +46,18 @@ def __init__(self, schema_version, db_file_name):
         assert schema_version in LowLevelConnection.SUPPORTED_SCHEMA_VERSIONS
         self.db_file_name = db_file_name
 
-        with self._get_db_conn() as conn:
-            cursor = conn.cursor()
-            cursor.execute('BEGIN')
+        connection = self._get_db_conn()
+        with connection:
+            connection.execute('CREATE TABLE IF NOT EXISTS {} (version INTEGER)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION))
+            connection.execute('INSERT INTO {} VALUES (1)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION))
 
-            cursor.execute('CREATE TABLE IF NOT EXISTS {} (version INTEGER)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION))
-            cursor.execute('INSERT INTO {} VALUES (1)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION))
-
-            cursor.execute('CREATE TABLE IF NOT EXISTS {} ({})'.format(
+            connection.execute('CREATE TABLE IF NOT EXISTS {} ({})'.format(
                 LowLevelConnection.TABLE_TRANSACTIONS,
                 ', '.join(
                     [LowLevelConnection.TABLE_TRANSACTIONS_ID] + LowLevelConnection.TABLE_TRANSACTIONS_FIELDS
                 )
             ))
-            cursor.execute('CREATE INDEX date_index ON {} ({})'.format(LowLevelConnection.TABLE_TRANSACTIONS, 'date'))
-
-            cursor.execute('COMMIT')
-            conn.commit()
+            connection.execute('CREATE INDEX date_index ON {} ({})'.format(LowLevelConnection.TABLE_TRANSACTIONS, 'date'))
     
     def __enter__(self):
         self.conn = self._get_db_conn()

From 9030725cc38c9859cfc9fb6e325d847193dc7eb9 Mon Sep 17 00:00:00 2001
From: David Nies <david.nies@posteo.de>
Date: Sun, 17 Feb 2019 12:17:51 +0100
Subject: [PATCH 09/27] Let `unittest` generate the database_test testsuite

---
 pynance/database_test.py | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/pynance/database_test.py b/pynance/database_test.py
index 74ee4a3..dc4e353 100644
--- a/pynance/database_test.py
+++ b/pynance/database_test.py
@@ -108,15 +108,7 @@ def run_test(csv_file, df_format):
 def test_suite():
     "return the test suite"
 
-    suite = unittest.TestSuite()
-
-    suite.addTest(LowLevelConnectionTestCase('test_creates_database_file_if_not_exists'))
-    suite.addTest(LowLevelConnectionTestCase('test_opens_connection'))
-    suite.addTest(LowLevelConnectionTestCase('test_creates_expected_tables'))
-
-    suite.addTest(InsertTableTestCase('test_create_temp_table_table_exists'))
-    suite.addTest(InsertTableTestCase('test_create_temp_table_choses_other_table_if_exists'))
-    suite.addTest(InsertTableTestCase('test_it_removes_the_temporary_table'))
-    suite.addTest(InsertTableTestCase('test_it_works_with_dataframes_from_text_importer'))
+    suite = unittest.makeSuite(LowLevelConnectionTestCase)
+    suite.addTests(unittest.makeSuite(InsertTableTestCase))
 
     return suite
\ No newline at end of file

From 6619cc9af5260fd3369ef7121e7b172bc94a580c Mon Sep 17 00:00:00 2001
From: David Nies <david.nies@posteo.de>
Date: Sun, 17 Feb 2019 15:22:16 +0100
Subject: [PATCH 10/27] Remove reference to `dummy_test`

---
 unittests.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/unittests.py b/unittests.py
index 359240e..715a9ae 100644
--- a/unittests.py
+++ b/unittests.py
@@ -40,7 +40,6 @@ def add_doctests_for_module(package):
 
 def test_suite():
     suite = unittest.TestSuite()
-    suite.addTests(pynance.dummy_test.test_suite())
     suite.addTests(pynance.textimporter_test.test_suite())
     suite.addTests(pynance.dash_viz.plot_flow_test.test_suite())
     suite.addTests(pynance.database_test.test_suite())

From 35cfacbbb78bcaa98339ba0f0e1e80d04770b710 Mon Sep 17 00:00:00 2001
From: David Nies <david.nies@posteo.de>
Date: Sun, 17 Feb 2019 15:23:31 +0100
Subject: [PATCH 11/27] Ensure that `LowLevelConnection.__init__` is idempotent

The version before failed when it is executed on the same database
twice. Added test to ensure this behaviour is tested and fixed the
bug.
---
 pynance/database.py      | 33 ++++++++++++++++++++++++---------
 pynance/database_test.py | 11 +++++++++++
 2 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/pynance/database.py b/pynance/database.py
index 17a0864..c835e60 100644
--- a/pynance/database.py
+++ b/pynance/database.py
@@ -1,5 +1,18 @@
 import sqlite3
 
+
+    
+def exists_table(conn, table_name):
+    """
+    Returns True if and only if 'table_name' is an existing table.
+    """
+
+    result = conn.execute(
+        'select count(*) from sqlite_master where type="table" and name="{}"'.format(table_name)
+    ).fetchall()
+    return result[0][0] == 1
+        
+
 class LowLevelConnection(object):
     """
     Class that handles low-level database connection. Makes sure the expected table strucutre exists.
@@ -48,16 +61,18 @@ def __init__(self, schema_version, db_file_name):
 
         connection = self._get_db_conn()
         with connection:
-            connection.execute('CREATE TABLE IF NOT EXISTS {} (version INTEGER)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION))
-            connection.execute('INSERT INTO {} VALUES (1)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION))
+            if not exists_table(connection, LowLevelConnection.TABLE_SCHEMA_VERSION):
+                connection.execute('CREATE TABLE IF NOT EXISTS {} (version INTEGER)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION))
+                connection.execute('INSERT INTO {} VALUES (1)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION))
 
-            connection.execute('CREATE TABLE IF NOT EXISTS {} ({})'.format(
-                LowLevelConnection.TABLE_TRANSACTIONS,
-                ', '.join(
-                    [LowLevelConnection.TABLE_TRANSACTIONS_ID] + LowLevelConnection.TABLE_TRANSACTIONS_FIELDS
-                )
-            ))
-            connection.execute('CREATE INDEX date_index ON {} ({})'.format(LowLevelConnection.TABLE_TRANSACTIONS, 'date'))
+            if not exists_table(connection, LowLevelConnection.TABLE_TRANSACTIONS):
+                connection.execute('CREATE TABLE IF NOT EXISTS {} ({})'.format(
+                    LowLevelConnection.TABLE_TRANSACTIONS,
+                    ', '.join(
+                        [LowLevelConnection.TABLE_TRANSACTIONS_ID] + LowLevelConnection.TABLE_TRANSACTIONS_FIELDS
+                    )
+                ))
+                connection.execute('CREATE INDEX date_index ON {} ({})'.format(LowLevelConnection.TABLE_TRANSACTIONS, 'date'))
     
     def __enter__(self):
         self.conn = self._get_db_conn()
diff --git a/pynance/database_test.py b/pynance/database_test.py
index dc4e353..4a44ba4 100644
--- a/pynance/database_test.py
+++ b/pynance/database_test.py
@@ -46,6 +46,17 @@ def test_creates_expected_tables(self):
                     [(1,)],
                     cursor.execute('select count(*) from {}'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)).fetchall()
                 )
+    
+    def test_works_on_same_database_twice(self):
+        with TemporaryDirectory() as tmp_dir:
+            db_name = os.path.join(tmp_dir, 'test.db')
+            with LowLevelConnection(1, db_name) as _:
+                pass
+            with LowLevelConnection(1, db_name) as conn:
+                result = conn \
+                    .execute('select count(*) from {}'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)) \
+                    .fetchall()
+                self.assertEqual(1, result[0][0])
 
 class InsertTableTestCase(unittest.TestCase):
 

From f6f80a853041d736036f1585e81eb14d688c4095 Mon Sep 17 00:00:00 2001
From: David Nies <david.nies@posteo.de>
Date: Sun, 17 Feb 2019 15:42:18 +0100
Subject: [PATCH 12/27] Factor out COLUMNS to `definitions.py`

---
 pynance/definitions.py  | 20 ++++++++++++++++++++
 pynance/textimporter.py | 21 +++------------------
 2 files changed, 23 insertions(+), 18 deletions(-)
 create mode 100644 pynance/definitions.py

diff --git a/pynance/definitions.py b/pynance/definitions.py
new file mode 100644
index 0000000..a8e0c7b
--- /dev/null
+++ b/pynance/definitions.py
@@ -0,0 +1,20 @@
+"""
+This module contains common definitions that are shared across other pynance
+modules.
+"""
+
+import numpy as np
+
+# see issue #5 and #6
+# use numpy types for numbers, because that's what pandas likes
+COLUMNS = {
+    "date": np.datetime64,
+    "sender_account": str,
+    "receiver_account": str,
+    "text": str,
+    "amount": np.float64,
+    "total_balance": np.float64,
+    "currency": str,
+    "category": str,
+    "tags": str,
+    "origin": str}
\ No newline at end of file
diff --git a/pynance/textimporter.py b/pynance/textimporter.py
index 77f5e25..92a4cd7 100644
--- a/pynance/textimporter.py
+++ b/pynance/textimporter.py
@@ -4,6 +4,8 @@
 import pandas as pd
 import numpy as np
 
+from .definitions import COLUMNS
+
 
 def read_csv(filepath_or_buffer, description):
     """
@@ -197,21 +199,4 @@ class UnsupportedCsvFormatException(IOError):
         An error that occurs, if the importer is asked to read a CSV file with
         a setting that does not fit the actual file
     """
-    pass
-
-
-# STATIC DEFINITIONS below this line ################
-
-# see issue #5 and #6
-# use numpy types for numbers, because that's what pandas likes
-COLUMNS = {
-    "date": np.datetime64,
-    "sender_account": str,
-    "receiver_account": str,
-    "text": str,
-    "amount": np.float64,
-    "total_balance": np.float64,
-    "currency": str,
-    "category": str,
-    "tags": str,
-    "origin": str}
+    pass
\ No newline at end of file

From 044fea6c26c0acebba46e82194a98ad5709d4cac Mon Sep 17 00:00:00 2001
From: David Nies <david.nies@posteo.de>
Date: Mon, 25 Feb 2019 20:51:32 +0100
Subject: [PATCH 13/27] Get definition of columns for InsertTable from
 definitions.COLUMNS

---
 pynance/database.py      | 51 ++++++++++++++++++++++++++--------------
 pynance/database_test.py | 17 +++++++++++++-
 2 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/pynance/database.py b/pynance/database.py
index c835e60..d8fffb2 100644
--- a/pynance/database.py
+++ b/pynance/database.py
@@ -1,7 +1,8 @@
 import sqlite3
+import numpy as np
+from .definitions import COLUMNS
 
 
-    
 def exists_table(conn, table_name):
     """
     Returns True if and only if 'table_name' is an existing table.
@@ -12,6 +13,30 @@ def exists_table(conn, table_name):
     ).fetchall()
     return result[0][0] == 1
         
+def generate_sqlite_columns_definitions():
+    """
+    Converts definitions.COLUMNS into the column definitions of a sqlite table. By column definitions,
+    we mean the part of a CREATE TABLE statement that defines the columns:
+      
+      CREATE TABLE my_table_name (<column definitions here>)
+    
+    Returns the column definitions as string
+    """
+
+    type_lookup_dict = {
+        str: 'TEXT',
+        np.datetime64: 'TEXT',
+        np.float64: 'REAL'
+    }
+
+    def name_type_to_string(x):
+        # print('foooo: {}'.format(x))
+        col_name, col_type = x
+        if col_type not in type_lookup_dict:
+            raise ValueError("Don't know which sqlite type '{}' is".format(col_type))
+        return '{} {}'.format(col_name, type_lookup_dict[col_type])        
+
+    return ', '.join(map(name_type_to_string, COLUMNS.items()))
 
 class LowLevelConnection(object):
     """
@@ -25,18 +50,6 @@ class LowLevelConnection(object):
     TABLE_SCHEMA_VERSION = 'schema'
     TABLE_TRANSACTIONS = 'transactions'
     TABLE_TRANSACTIONS_ID = 'id INTEGER PRIMARY KEY'
-    TABLE_TRANSACTIONS_FIELDS = [
-        'imported_at INTEGER', # unix timestamp
-        'date TEXT', # format: YYYY-MM-DD
-        'sender_account TEXT', 
-        'receiver_account TEXT',
-        'text TEXT',
-        'amount REAL',
-        'total_balance REAL',
-        'currency TEXT',
-        'category TEXT',
-        'tags TEXT'
-    ]
 
     def _get_db_conn(self):
         """
@@ -66,11 +79,10 @@ def __init__(self, schema_version, db_file_name):
                 connection.execute('INSERT INTO {} VALUES (1)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION))
 
             if not exists_table(connection, LowLevelConnection.TABLE_TRANSACTIONS):
-                connection.execute('CREATE TABLE IF NOT EXISTS {} ({})'.format(
+                connection.execute('CREATE TABLE IF NOT EXISTS {} ({}, {})'.format(
                     LowLevelConnection.TABLE_TRANSACTIONS,
-                    ', '.join(
-                        [LowLevelConnection.TABLE_TRANSACTIONS_ID] + LowLevelConnection.TABLE_TRANSACTIONS_FIELDS
-                    )
+                    LowLevelConnection.TABLE_TRANSACTIONS_ID,
+                    generate_sqlite_columns_definitions()
                 ))
                 connection.execute('CREATE INDEX date_index ON {} ({})'.format(LowLevelConnection.TABLE_TRANSACTIONS, 'date'))
     
@@ -100,7 +112,10 @@ def create_temp_table(conn):
             go_on = False
             table_name = 'insert_df_{}'.format(i)
             try:
-                cursor.execute('CREATE TEMPORARY TABLE {} ({})'.format( table_name, ', '.join(LowLevelConnection.TABLE_TRANSACTIONS_FIELDS)))
+                cursor.execute('CREATE TEMPORARY TABLE {} ({})'.format( 
+                    table_name, 
+                    generate_sqlite_columns_definitions()
+                ))
             except sqlite3.OperationalError:
                 go_on = True
                 i += 1
diff --git a/pynance/database_test.py b/pynance/database_test.py
index 4a44ba4..eedbdd8 100644
--- a/pynance/database_test.py
+++ b/pynance/database_test.py
@@ -4,7 +4,8 @@
 from tempfile import mkdtemp
 import sqlite3
 
-from pynance.database import LowLevelConnection, InsertTable
+from pynance.database import generate_sqlite_columns_definitions, \
+    LowLevelConnection, InsertTable
 from pynance.textimporter import read_csv
 from pynance.dkb import SupportedCsvTypes
 
@@ -16,6 +17,20 @@ def __enter__(self):
     def __exit__(self, _1, _2, _3):
         shutil.rmtree(self.dir)
 
+class ColumnsDefinitionsTestCase(unittest.TestCase):
+    def test_it_produces_valid_string(self):
+        result = generate_sqlite_columns_definitions()
+        self.assertEqual(type(result), str)
+        self.assertTrue(len(result) > 0)
+    
+    def test_it_produces_valid_sql_types(self):
+        with TemporaryDirectory() as tmp_dir:
+            conn = sqlite3.connect(os.path.join(tmp_dir, 'test.db'))
+            with conn:
+                column_definitions = generate_sqlite_columns_definitions()
+                conn.execute('CREATE TABLE test ({})'.format(column_definitions))
+
+
 class LowLevelConnectionTestCase(unittest.TestCase):
     def test_creates_database_file_if_not_exists(self):
         with TemporaryDirectory() as tmp_dir:

From ede1b3a70f1bedca66e4089d0105543f8031f02a Mon Sep 17 00:00:00 2001
From: David Nies <david.nies@posteo.de>
Date: Mon, 25 Feb 2019 20:52:14 +0100
Subject: [PATCH 14/27] Remove database_test.test_suite

This function is obsolete since tests are discovered by pytest
---
 pynance/database_test.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/pynance/database_test.py b/pynance/database_test.py
index eedbdd8..ecbf186 100644
--- a/pynance/database_test.py
+++ b/pynance/database_test.py
@@ -129,12 +129,3 @@ def run_test(csv_file, df_format):
 
         run_test(os.path.join('pynance', 'test_data', 'dkb_cash_sample.csv'), SupportedCsvTypes.DKBCash)
         run_test(os.path.join('pynance', 'test_data', 'dkb_visa_sample.csv'), SupportedCsvTypes.DKBVisa)
-
-
-def test_suite():
-    "return the test suite"
-
-    suite = unittest.makeSuite(LowLevelConnectionTestCase)
-    suite.addTests(unittest.makeSuite(InsertTableTestCase))
-
-    return suite
\ No newline at end of file

From fb713456353ae2c3e8039b5f6e5756bebd1ef7a9 Mon Sep 17 00:00:00 2001
From: David Nies <david.nies@posteo.de>
Date: Mon, 4 Mar 2019 21:46:18 +0100
Subject: [PATCH 15/27] Add test strategies for generating transactions
 dataframes

---
 pynance/test_stragegies/transactions.py      | 68 ++++++++++++++++++++
 pynance/test_stragegies/transactions_test.py | 34 ++++++++++
 2 files changed, 102 insertions(+)
 create mode 100644 pynance/test_stragegies/transactions.py
 create mode 100644 pynance/test_stragegies/transactions_test.py

diff --git a/pynance/test_stragegies/transactions.py b/pynance/test_stragegies/transactions.py
new file mode 100644
index 0000000..b5df0da
--- /dev/null
+++ b/pynance/test_stragegies/transactions.py
@@ -0,0 +1,68 @@
+"""
+Contains transaction test strategies.
+"""
+
+import hypothesis.strategies as st 
+import datetime
+import pandas as pd
+import numpy as np
+
+KNOWN_CURRENCIES = ['EUR', 'USD']
+ALPHABET = list(map(str, 'abcdefghijklmnopqrstuvwzyz ABCDEFGHIJKLMNOPQRSTUVWZYZ0123456789äüöß'))
+
+@st.composite
+def single_transaction(draw, min_date=None, max_date=None):
+    if not min_date:
+        min_date = datetime.date(1000,1,1)
+    if not max_date:
+        max_date = datetime.date(9999,12,31)
+
+    date = np.datetime64(draw(st.dates(min_value=min_date, max_value=max_date)))
+    sender_account = draw(st.text(alphabet=ALPHABET))
+    receiver_account = str(draw(st.text(alphabet=ALPHABET)))
+    text = str(draw(st.text(alphabet=ALPHABET)))
+    amount = draw(st.floats(min_value=0.01, max_value=10000000))
+    total_balance = draw(st.floats(min_value=0.01, max_value=10000000))
+    currency = str(draw(st.sampled_from(KNOWN_CURRENCIES)))
+    category = str(draw(st.text(alphabet=ALPHABET)))
+    tags = str(draw(st.text(alphabet=ALPHABET)))
+    origin = str(draw(st.text(alphabet=ALPHABET)))
+
+    return (date, sender_account, receiver_account, text, amount, total_balance, currency, category, tags, origin)
+
+@st.composite
+def dataframe(draw, min_size=0, max_size=None, min_date=None, max_date=None):
+    elements = draw(st.lists(
+        single_transaction(min_date=min_date, max_date=max_date),
+         min_size=min_size,
+         max_size=max_size
+    ))
+
+    dates, sender_accounts, receiver_accounts, texts, amounts, total_balances, currencies, \
+        categories, tagss, origins = [],[],[],[],[],[],[],[],[],[]
+
+    for date, sender_account, receiver_account, text, amount, total_balance, currency, category, tags, origin in elements:
+        dates.append(date)
+        sender_accounts.append(sender_account)
+        receiver_accounts.append(receiver_account)
+        texts.append(text)
+        amounts.append(amount)
+        total_balances.append(total_balance)
+        currencies.append(currency)
+        categories.append(category)
+        tagss.append(tags)
+        origins.append(origin)
+
+
+    return pd.DataFrame({
+        'date': dates,
+        'sender_account': sender_accounts,
+        'receiver_account': receiver_accounts,
+        'text': texts,
+        'amount': amounts,
+        'total_balance': total_balances,
+        'currency': currencies,
+        'category': categories,
+        'tags': tagss,
+        'origin': origins
+    })
\ No newline at end of file
diff --git a/pynance/test_stragegies/transactions_test.py b/pynance/test_stragegies/transactions_test.py
new file mode 100644
index 0000000..cd67607
--- /dev/null
+++ b/pynance/test_stragegies/transactions_test.py
@@ -0,0 +1,34 @@
+import unittest
+import transactions as t
+from hypothesis import given
+import numpy as np
+from datetime import date
+
+from pynance.definitions import COLUMNS
+
+class DataframeTestCase(unittest.TestCase):
+
+    @given(df=t.dataframe(min_size=1, max_size=1))
+    def test_has_expected_columns(self, df):
+        types = dict(df.dtypes)
+        self.assertEqual(len(types), len(COLUMNS))
+        for col in COLUMNS:
+            self.assertTrue(col in types)
+    
+    @given(df=t.dataframe(min_size = 1, min_date=date(2000,1,1)))
+    def test_respects_min_date(self, df):
+        remaining = df['date'][df['date'] < date(2000,1,1)]
+        self.assertEqual(remaining.size, 0)
+
+    @given(df=t.dataframe(min_size = 1, max_date=date(2000,1,1)))
+    def test_respects_max_date(self, df):
+        remaining = df['date'][df['date'] > date(2000,1,1)]
+        self.assertEqual(remaining.size, 0)
+
+    @given(df=t.dataframe(min_size = 10))
+    def test_respects_min_size(self, df):
+        self.assertTrue(df.size >= 10)
+
+    @given(df=t.dataframe(max_size = 10))
+    def test_respects_max_size(self):
+        self.assertTrue(df.size <= 10)
\ No newline at end of file

From df2ee629d945e74165540468d6e42bdbb522d3cf Mon Sep 17 00:00:00 2001
From: David Nies <david.nies@posteo.de>
Date: Tue, 5 Mar 2019 18:23:09 +0100
Subject: [PATCH 16/27] Fix bug and improve performance of transactions
 strategy

---
 pynance/test_stragegies/transactions.py      | 31 +++++++++-----------
 pynance/test_stragegies/transactions_test.py |  8 ++---
 2 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/pynance/test_stragegies/transactions.py b/pynance/test_stragegies/transactions.py
index b5df0da..a3e0844 100644
--- a/pynance/test_stragegies/transactions.py
+++ b/pynance/test_stragegies/transactions.py
@@ -12,26 +12,24 @@
 
 @st.composite
 def single_transaction(draw, min_date=None, max_date=None):
-    if not min_date:
-        min_date = datetime.date(1000,1,1)
-    if not max_date:
-        max_date = datetime.date(9999,12,31)
 
-    date = np.datetime64(draw(st.dates(min_value=min_date, max_value=max_date)))
-    sender_account = draw(st.text(alphabet=ALPHABET))
-    receiver_account = str(draw(st.text(alphabet=ALPHABET)))
-    text = str(draw(st.text(alphabet=ALPHABET)))
-    amount = draw(st.floats(min_value=0.01, max_value=10000000))
-    total_balance = draw(st.floats(min_value=0.01, max_value=10000000))
-    currency = str(draw(st.sampled_from(KNOWN_CURRENCIES)))
-    category = str(draw(st.text(alphabet=ALPHABET)))
-    tags = str(draw(st.text(alphabet=ALPHABET)))
-    origin = str(draw(st.text(alphabet=ALPHABET)))
+    # As a performance optimization, we don't generate each column individually and reuse
+    # already generated values. If we don't do this, test generation is too slow and
+    # Hypothesis' HealthChecks make the tests fail
+    d = draw(st.dates(min_value=min_date, max_value=max_date))
+    text = draw(st.text(alphabet=ALPHABET))
+    floats = draw(st.floats(min_value=0.01, max_value=10000000))
+    currency = draw(st.sampled_from(KNOWN_CURRENCIES))
 
-    return (date, sender_account, receiver_account, text, amount, total_balance, currency, category, tags, origin)
+    return (d, text, text, text, floats, floats, currency, text, text, text)
 
 @st.composite
 def dataframe(draw, min_size=0, max_size=None, min_date=None, max_date=None):
+    if not min_date:
+        min_date = datetime.date(1000,1,1)
+    if not max_date:
+        max_date = datetime.date(9999,12,31)
+
     elements = draw(st.lists(
         single_transaction(min_date=min_date, max_date=max_date),
          min_size=min_size,
@@ -64,5 +62,4 @@ def dataframe(draw, min_size=0, max_size=None, min_date=None, max_date=None):
         'currency': currencies,
         'category': categories,
         'tags': tagss,
-        'origin': origins
-    })
\ No newline at end of file
+        'origin': origins })
\ No newline at end of file
diff --git a/pynance/test_stragegies/transactions_test.py b/pynance/test_stragegies/transactions_test.py
index cd67607..c62e9df 100644
--- a/pynance/test_stragegies/transactions_test.py
+++ b/pynance/test_stragegies/transactions_test.py
@@ -27,8 +27,8 @@ def test_respects_max_date(self, df):
 
     @given(df=t.dataframe(min_size = 10))
     def test_respects_min_size(self, df):
-        self.assertTrue(df.size >= 10)
+        self.assertGreaterEqual(len(df), 10)
 
-    @given(df=t.dataframe(max_size = 10))
-    def test_respects_max_size(self):
-        self.assertTrue(df.size <= 10)
\ No newline at end of file
+    @given(t.dataframe(max_size = 10))
+    def test_respects_max_size(self, df):
+        self.assertLessEqual(len(df), 10)
\ No newline at end of file

From f73abc8cc658345c8ec6cc4753d3b6c7a874eb9d Mon Sep 17 00:00:00 2001
From: David Nies <david.nies@posteo.de>
Date: Tue, 12 Mar 2019 23:02:50 +0100
Subject: [PATCH 17/27] Add stub for StorageClass and its tests

---
 pynance/database.py      | 33 +++++++++++++++++++++++++++++++--
 pynance/database_test.py | 27 +++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/pynance/database.py b/pynance/database.py
index d8fffb2..9e4768f 100644
--- a/pynance/database.py
+++ b/pynance/database.py
@@ -1,3 +1,7 @@
+"""
+Explain the classes briefly. Elaborate on Storage
+"""
+
 import sqlite3
 import numpy as np
 from .definitions import COLUMNS
@@ -30,7 +34,6 @@ def generate_sqlite_columns_definitions():
     }
 
     def name_type_to_string(x):
-        # print('foooo: {}'.format(x))
         col_name, col_type = x
         if col_type not in type_lookup_dict:
             raise ValueError("Don't know which sqlite type '{}' is".format(col_type))
@@ -142,4 +145,30 @@ def __exit__(self, _1, _2, _3):
         "Make sure the table is gone."
         self.conn.cursor().execute('DROP TABLE {}.{}'.format(
             self.temp_table_schema, self.temp_table_name
-        ))
\ No newline at end of file
+        ))
+
+
+class Storage(object):
+    
+    def __init__(self, db_file):
+        pass
+    
+    @classmethod
+    def validate_dataframe_shape(cls, data_frame):
+        """
+        asserts that the correct columns are present. Tollerates that additional columns are present
+        """
+        pass
+    
+    def append_dataframe(self, data_frame):
+        """
+        asserts that the shape of the dataframe is correct
+        returns the part of the dataframe that is new. This part has also an ID column
+        """
+        pass
+    
+    def load_dataframe(self):
+        """
+        loads from db. contains ID column
+        """
+        pass
\ No newline at end of file
diff --git a/pynance/database_test.py b/pynance/database_test.py
index ecbf186..f06ba37 100644
--- a/pynance/database_test.py
+++ b/pynance/database_test.py
@@ -129,3 +129,30 @@ def run_test(csv_file, df_format):
 
         run_test(os.path.join('pynance', 'test_data', 'dkb_cash_sample.csv'), SupportedCsvTypes.DKBCash)
         run_test(os.path.join('pynance', 'test_data', 'dkb_visa_sample.csv'), SupportedCsvTypes.DKBVisa)
+
+
+class StorageTestCase(unittest.TestCase):
+
+    def test_validate_dataframe_shape_complains_when_columns_are_missing(self):
+        "Assertion when columns are missing"
+        pass
+
+    def test_validate_dataframe_shape_accepts_aditional_columns(self):
+        "Does not compain when aditional columns are present"
+        pass
+    
+    def test_append_dataframe_rejects_invalid_dataframes(self):
+        pass
+
+    def test_append_dataframe_returns_new_parts_with_id(self):
+        pass
+
+    def test_append_dataframe_returned_ids_are_the_same_as_in_load_dataframe(self):
+        pass
+
+    def test_append_dataframe_duplicats_are_left_out(self):
+        pass
+    
+    def test_load_dataframe_works_with_new_storage_instance(self):
+        "implies new conn etc..."
+        pass
\ No newline at end of file

From cc635473b86b7216d83b04b3030a9aab793ef52e Mon Sep 17 00:00:00 2001
From: Fabian Meyer <fabian.meyer@mailbox.org>
Date: Sat, 16 Mar 2019 00:06:54 +0100
Subject: [PATCH 18/27] basic tests for storage facade

---
 pynance/storage_test.py | 121 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)
 create mode 100644 pynance/storage_test.py

diff --git a/pynance/storage_test.py b/pynance/storage_test.py
new file mode 100644
index 0000000..c1dc80a
--- /dev/null
+++ b/pynance/storage_test.py
@@ -0,0 +1,121 @@
+import unittest
+import os
+
+import numpy as np
+import pandas as pd
+from pandas.testing import assert_frame_equal
+
+from .database import Storage
+from .textimporter import read_csv
+from .dkb import SupportedCsvTypes
+from .definitions import COLUMNS
+
+
+class StorageTestCase(unittest.TestCase):
+    def _read_dummy_file_dkbcash_small(self):
+        dummyfile_dkbcash_small = os.path.join("pynance",
+                                               "test_data",
+                                               "dkb_cash_sample.csv")
+        assert os.path.isfile(dummyfile_dkbcash_small)
+
+        return read_csv(dummyfile_dkbcash_small,
+                        SupportedCsvTypes.DKBCash)
+
+    def _read_dummy_file_dkbvisa_small(self):
+        dummyfile_dkbvisa_small = os.path.join("pynance",
+                                               "test_data",
+                                               "dkb_visa_sample.csv")
+        assert os.path.isfile(dummyfile_dkbvisa_small)
+
+        return read_csv(dummyfile_dkbvisa_small,
+                        SupportedCsvTypes.DKBVisa)
+
+    def _assert_frame_relevant_columns_equal(self, df1, df2):
+        assert_frame_equal(df1[COLUMNS], df2[COLUMNS])
+
+    def _delete_temp_db_file(self):
+        if os.path.exists(self.db_file):
+            os.remove(self.db_file)
+
+    def setUp(self):
+        self.db_file = os.path.join("test_data", "test.sqlite")
+
+    def test_init_storage(self):
+        storage = Storage(self.db_file)
+        assert storage is not None
+
+    def test_init_storage_creates_file(self):
+        # delete file to make sure starting from scratch
+        self._delete_temp_db_file()
+
+        _ = Storage(self.db_file)
+        assert os.path.exists(self.db_file)
+
+    def test_append_dataframe_dkb_cash_small(self):
+        # delete file to make sure starting from scratch
+        self._delete_temp_db_file()
+
+        storage = Storage(self.db_file)
+        df = self._read_dummy_file_dkbcash_small()
+        newdf = storage.append_dataframe(df)
+
+        self._assert_frame_relevant_columns_equal(df, newdf)
+
+    def test_append_dataframe_dkb_cash_and_visa(self):
+        # delete file to make sure starting from scratch
+        self._delete_temp_db_file()
+
+        storage = Storage(self.db_file)
+        df_cash = self._read_dummy_file_dkbcash_small()
+        df_visa = self._read_dummy_file_dkbcash_small()
+
+        storage.append_dataframe(df_cash)
+        storage.append_dataframe(df_visa)
+
+        df_loaded = storage.load_dataframe()
+
+        df_expected = df_cash.append(df_visa).sort_values(by="date",
+                                                          ascending=False)
+
+        self._assert_frame_relevant_columns_equal(df_loaded, df_expected)
+
+    def test_load_dataframe(self):
+        # delete file to make sure starting from scratch
+        self._delete_temp_db_file()
+
+        storage = Storage(self.db_file)
+        df = self._read_dummy_file_dkbcash_small()
+        newdf = storage.append_dataframe(df)
+        loaded_df = storage.load_dataframe()
+
+        self._assert_frame_relevant_columns_equal(df, loaded_df)
+
+    def test_append_dataframe_ignores_duplicates(self):
+        # delete file to make sure starting from scratch
+        self._delete_temp_db_file()
+
+        storage = Storage(self.db_file)
+        df = self._read_dummy_file_dkbcash_small()
+
+        # appending twice
+        newdf = storage.append_dataframe(df)
+        newdf2 = storage.append_dataframe(df)
+
+        loaded_df = storage.load_dataframe()
+
+        self._assert_frame_relevant_columns_equal(df, loaded_df)
+
+    def test_append_invalid_dataframe_fails(self):
+        random_df = pd.DataFrame(np.random.randn(100, 2),
+                                 columns=['colA', 'colB'])
+
+        storage = Storage(self.db_file)
+
+        def append_invalid():
+            return storage.append_dataframe(random_df)
+
+        self.assertRaises(Exception, append_invalid)
+
+    def tearDown(self):
+        # remove temporary db file
+        self._delete_temp_db_file()

From 2bc286d326891430967ef7118f5e3b7b8642cbac Mon Sep 17 00:00:00 2001
From: Fabian Meyer <fabian.meyer@mailbox.org>
Date: Wed, 22 May 2019 23:00:53 +0200
Subject: [PATCH 19/27] requirements dev

---
 requirements-dev.txt | 21 +++++++++++++++++++++
 requirements.txt     | 15 +--------------
 2 files changed, 22 insertions(+), 14 deletions(-)
 create mode 100644 requirements-dev.txt

diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..07bbe3e
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,21 @@
+pandas
+numpy
+dash
+dash-core-components
+dash-html-components
+
+hypothesis
+codecov
+
+# The latest version of doit supporting Python 2 is 0.29.0
+doit==0.29.0; python_version < '3.0'
+doit; python_version >= '3.0'
+attrs>17.4
+
+pytest==4.0.0; python_version < '3.0'
+pytest-cov==2.6.0; python_version < '3.0'
+pytest==4.2.0; python_version >= '3.0'
+pytest-cov==2.6.1; python_version >= '3.0'
+
+pylint
+pep8
diff --git a/requirements.txt b/requirements.txt
index 8da95b6..f01f8f7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,17 +2,4 @@ pandas
 numpy
 dash
 dash-core-components
-dash-html-components
-
-hypothesis
-codecov
-
-# The latest version of doit supporting Python 2 is 0.29.0
-doit==0.29.0; python_version < '3.0'
-doit; python_version >= '3.0'
-attrs>17.4
-
-pytest==4.0.0; python_version < '3.0'
-pytest-cov==2.6.0; python_version < '3.0'
-pytest==4.2.0; python_version >= '3.0'
-pytest-cov==2.6.1; python_version >= '3.0'
+dash-html-components
\ No newline at end of file

From d08aa56df1445f333ad3e47443a2fb96b862c51a Mon Sep 17 00:00:00 2001
From: Fabian Meyer <fabian.meyer@mailbox.org>
Date: Thu, 23 May 2019 00:22:11 +0200
Subject: [PATCH 20/27] hash functions for dataframe

---
 pynance/dataframe_util.py                        | 13 +++++++++++++
 pynance/dataframe_util_test.py                   | 16 ++++++++++++++++
 pynance/{test_stragegies => }/transactions.py    |  0
 .../{test_stragegies => }/transactions_test.py   |  0
 4 files changed, 29 insertions(+)
 create mode 100644 pynance/dataframe_util.py
 create mode 100644 pynance/dataframe_util_test.py
 rename pynance/{test_stragegies => }/transactions.py (100%)
 rename pynance/{test_stragegies => }/transactions_test.py (100%)

diff --git a/pynance/dataframe_util.py b/pynance/dataframe_util.py
new file mode 100644
index 0000000..b10b701
--- /dev/null
+++ b/pynance/dataframe_util.py
@@ -0,0 +1,13 @@
+from .definitions import COLUMNS
+from hashlib import md5
+
+
+def hash_row(row):
+    h = md5()
+    for value in row:
+        h.update(bytes(str(value), encoding='utf8'))
+    return h.hexdigest()
+
+
+def create_id_hash(new_df):
+    return new_df.apply(hash_row, axis=1)
diff --git a/pynance/dataframe_util_test.py b/pynance/dataframe_util_test.py
new file mode 100644
index 0000000..f302124
--- /dev/null
+++ b/pynance/dataframe_util_test.py
@@ -0,0 +1,16 @@
+import unittest
+from hypothesis import given
+from datetime import date
+
+from .transactions import dataframe
+from .dataframe_util import hash_row, create_id_hash
+
+
+class DataframeUtilTestcase(unittest.TestCase):
+
+    @given(df=dataframe(min_size=1, max_date=date(2000, 1, 1)))
+    def test_hash_row(self, df):
+        for i, row in df.iterrows():
+            hash_result = hash_row(row)
+            self.assertEqual(type(hash_result), str)
+            self.assertEqual(type(hash_result), str)
diff --git a/pynance/test_stragegies/transactions.py b/pynance/transactions.py
similarity index 100%
rename from pynance/test_stragegies/transactions.py
rename to pynance/transactions.py
diff --git a/pynance/test_stragegies/transactions_test.py b/pynance/transactions_test.py
similarity index 100%
rename from pynance/test_stragegies/transactions_test.py
rename to pynance/transactions_test.py

From 533cb9bd4b77f92b42f846f363d8734183c61173 Mon Sep 17 00:00:00 2001
From: Fabian Meyer <fabian.meyer@mailbox.org>
Date: Thu, 23 May 2019 00:27:13 +0200
Subject: [PATCH 21/27] extended dataframe hash util tests

---
 pynance/dataframe_util_test.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/pynance/dataframe_util_test.py b/pynance/dataframe_util_test.py
index f302124..a24a32e 100644
--- a/pynance/dataframe_util_test.py
+++ b/pynance/dataframe_util_test.py
@@ -13,4 +13,13 @@ def test_hash_row(self, df):
         for i, row in df.iterrows():
             hash_result = hash_row(row)
             self.assertEqual(type(hash_result), str)
-            self.assertEqual(type(hash_result), str)
+            self.assertEqual(len(hash_result), 32)
+
+    @given(df=dataframe(min_size=1, max_date=date(2000, 1, 1)))
+    def test_create_id_hash(self, df):
+        result_hash_column = create_id_hash(df)
+        self.assertEqual(len(result_hash_column), len(df))
+
+        for item in result_hash_column:
+            self.assertEqual(type(item), str)
+            self.assertEqual(len(item), 32)

From 663c329a185c45c3be3784f87177b56eb82afe2f Mon Sep 17 00:00:00 2001
From: Fabian Meyer <fabian.meyer@mailbox.org>
Date: Thu, 23 May 2019 00:32:40 +0200
Subject: [PATCH 22/27] started implementing storage with additional hash
 column for duplicate detection

---
 pynance/database.py     | 78 +++++++++++++++++++++++++----------------
 pynance/definitions.py  | 15 ++++----
 pynance/textimporter.py |  5 +--
 requirements-dev.txt    |  1 +
 4 files changed, 61 insertions(+), 38 deletions(-)

diff --git a/pynance/database.py b/pynance/database.py
index 9e4768f..197b6a6 100644
--- a/pynance/database.py
+++ b/pynance/database.py
@@ -16,14 +16,15 @@ def exists_table(conn, table_name):
         'select count(*) from sqlite_master where type="table" and name="{}"'.format(table_name)
     ).fetchall()
     return result[0][0] == 1
-        
+
+
 def generate_sqlite_columns_definitions():
     """
     Converts definitions.COLUMNS into the column definitions of a sqlite table. By column definitions,
     we mean the part of a CREATE TABLE statement that defines the columns:
-      
+
       CREATE TABLE my_table_name (<column definitions here>)
-    
+
     Returns the column definitions as string
     """
 
@@ -36,15 +37,17 @@ def generate_sqlite_columns_definitions():
     def name_type_to_string(x):
         col_name, col_type = x
         if col_type not in type_lookup_dict:
-            raise ValueError("Don't know which sqlite type '{}' is".format(col_type))
-        return '{} {}'.format(col_name, type_lookup_dict[col_type])        
+            raise ValueError(
+                "Don't know which sqlite type '{}' is".format(col_type))
+        return '{} {}'.format(col_name, type_lookup_dict[col_type])
 
     return ', '.join(map(name_type_to_string, COLUMNS.items()))
 
+
 class LowLevelConnection(object):
     """
     Class that handles low-level database connection. Makes sure the expected table strucutre exists.
-    Should be used in with-statements. 
+    Should be used in with-statements.
     """
 
     # Schema evolution should be handled later once it is needed
@@ -63,7 +66,7 @@ def _get_db_conn(self):
         """
         return sqlite3.connect(
             self.db_file_name,
-            isolation_level = 'DEFERRED' 
+            isolation_level='DEFERRED'
         )
 
     def __init__(self, schema_version, db_file_name):
@@ -78,8 +81,10 @@ def __init__(self, schema_version, db_file_name):
         connection = self._get_db_conn()
         with connection:
             if not exists_table(connection, LowLevelConnection.TABLE_SCHEMA_VERSION):
-                connection.execute('CREATE TABLE IF NOT EXISTS {} (version INTEGER)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION))
-                connection.execute('INSERT INTO {} VALUES (1)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION))
+                connection.execute('CREATE TABLE IF NOT EXISTS {} (version INTEGER)'.format(
+                    LowLevelConnection.TABLE_SCHEMA_VERSION))
+                connection.execute('INSERT INTO {} VALUES (1)'.format(
+                    LowLevelConnection.TABLE_SCHEMA_VERSION))
 
             if not exists_table(connection, LowLevelConnection.TABLE_TRANSACTIONS):
                 connection.execute('CREATE TABLE IF NOT EXISTS {} ({}, {})'.format(
@@ -87,12 +92,13 @@ def __init__(self, schema_version, db_file_name):
                     LowLevelConnection.TABLE_TRANSACTIONS_ID,
                     generate_sqlite_columns_definitions()
                 ))
-                connection.execute('CREATE INDEX date_index ON {} ({})'.format(LowLevelConnection.TABLE_TRANSACTIONS, 'date'))
-    
+                connection.execute('CREATE INDEX date_index ON {} ({})'.format(
+                    LowLevelConnection.TABLE_TRANSACTIONS, 'date'))
+
     def __enter__(self):
         self.conn = self._get_db_conn()
         return self.conn
-    
+
     def __exit__(self, _1, _2, _3):
         self.conn.close()
 
@@ -103,7 +109,7 @@ class InsertTable(object):
     It also makes sure that the temporary table is created in a safe way and disposed afterwards. For
     this purpuse, instances of this class should be used in with statements.
     """
-    
+
     @staticmethod
     def create_temp_table(conn):
         """Creates temporary table suitable for inserting the DataFrame and returns its name."""
@@ -115,32 +121,33 @@ def create_temp_table(conn):
             go_on = False
             table_name = 'insert_df_{}'.format(i)
             try:
-                cursor.execute('CREATE TEMPORARY TABLE {} ({})'.format( 
-                    table_name, 
+                cursor.execute('CREATE TEMPORARY TABLE {} ({})'.format(
+                    table_name,
                     generate_sqlite_columns_definitions()
                 ))
             except sqlite3.OperationalError:
                 go_on = True
                 i += 1
-        
+
         return 'temp', table_name
 
     def __init__(self, conn, data_frame):
         "uses conn, fetches everything from 'data_frame' into a temporary table"
-        
+
         self.conn = conn
-        self.temp_table_schema, self.temp_table_name = InsertTable.create_temp_table(conn)
+        self.temp_table_schema, self.temp_table_name = InsertTable.create_temp_table(
+            conn)
         data_frame.to_sql(
-            name=self.temp_table_name, 
+            name=self.temp_table_name,
             schema=self.temp_table_schema,
             index=False,
-            con=conn, 
+            con=conn,
             chunksize=5000
         )
-    
+
     def __enter__(self):
         return (self.temp_table_schema, self.temp_table_name)
-    
+
     def __exit__(self, _1, _2, _3):
         "Make sure the table is gone."
         self.conn.cursor().execute('DROP TABLE {}.{}'.format(
@@ -149,26 +156,37 @@ def __exit__(self, _1, _2, _3):
 
 
 class Storage(object):
-    
+
     def __init__(self, db_file):
-        pass
-    
+        self.db_file = db_file
+
     @classmethod
     def validate_dataframe_shape(cls, data_frame):
         """
-        asserts that the correct columns are present. Tollerates that additional columns are present
+        asserts that the correct columns are present. 
+        Tolerates that additional columns are present
         """
         pass
-    
+
     def append_dataframe(self, data_frame):
         """
         asserts that the shape of the dataframe is correct
         returns the part of the dataframe that is new. This part has also an ID column
         """
-        pass
-    
+        if not self.validate_dataframe_shape(data_frame):
+            raise Exception('Invalid dataframe')
+
+        with LowLevelConnection(1, self.db_file) as conn:
+            with InsertTable(conn, data_frame) as insert_table:
+                # add existing data to insert_table
+                with conn:
+                    conn.cursor().execute('INSERT INTO %s ')
+                # but only non-duplicates
+                # replace existing table by insert_table
+                pass
+
     def load_dataframe(self):
         """
         loads from db. contains ID column
         """
-        pass
\ No newline at end of file
+        pass
diff --git a/pynance/definitions.py b/pynance/definitions.py
index a8e0c7b..9b97d27 100644
--- a/pynance/definitions.py
+++ b/pynance/definitions.py
@@ -5,9 +5,7 @@
 
 import numpy as np
 
-# see issue #5 and #6
-# use numpy types for numbers, because that's what pandas likes
-COLUMNS = {
+IMMUTABLE_COLUMNS = {
     "date": np.datetime64,
     "sender_account": str,
     "receiver_account": str,
@@ -15,6 +13,11 @@
     "amount": np.float64,
     "total_balance": np.float64,
     "currency": str,
-    "category": str,
-    "tags": str,
-    "origin": str}
\ No newline at end of file
+    "origin": str
+}
+
+# see issue #5 and #6
+# use numpy types for numbers, because that's what pandas likes
+COLUMNS = dict(id=str,
+               category=str,
+               tags=str, **IMMUTABLE_COLUMNS)
diff --git a/pynance/textimporter.py b/pynance/textimporter.py
index 92a4cd7..9cf06a1 100644
--- a/pynance/textimporter.py
+++ b/pynance/textimporter.py
@@ -5,6 +5,7 @@
 import numpy as np
 
 from .definitions import COLUMNS
+from .dataframe_util import create_id_hash
 
 
 def read_csv(filepath_or_buffer, description):
@@ -81,7 +82,7 @@ def read_csv(filepath_or_buffer, description):
     amounts = new_df['amount'].values
     new_df['total_balance'] = amounts_to_balances(amounts,
                                                   final_total_balance)
-
+    new_df['id'] = create_id_hash(new_df)
     return new_df
 
 
@@ -199,4 +200,4 @@ class UnsupportedCsvFormatException(IOError):
         An error that occurs, if the importer is asked to read a CSV file with
         a setting that does not fit the actual file
     """
-    pass
\ No newline at end of file
+    pass
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 07bbe3e..eecf161 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -19,3 +19,4 @@ pytest-cov==2.6.1; python_version >= '3.0'
 
 pylint
 pep8
+autopep8
\ No newline at end of file

From c2ac19f47b5a55ac88be50b49b8b7fc142bc253a Mon Sep 17 00:00:00 2001
From: Fabian Meyer <fabian.meyer@mailbox.org>
Date: Sun, 26 May 2019 22:04:06 +0200
Subject: [PATCH 23/27] updated dash response tests

---
 pynance/dash_viz/plot_flow_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pynance/dash_viz/plot_flow_test.py b/pynance/dash_viz/plot_flow_test.py
index deb4906..d130556 100644
--- a/pynance/dash_viz/plot_flow_test.py
+++ b/pynance/dash_viz/plot_flow_test.py
@@ -32,7 +32,7 @@ def test_onselect_csvtype(self):
 
         for expected, selected in zip(onselect_response, dropdown_values):
             response = onselect_csvtype(selected)
-            response_dict = json.loads(response.data.decode())
+            response_dict = json.loads(response)  # .data.decode())
             is_enabled = not response_dict["response"]["props"]["disabled"]
 
             self.assertEqual(expected, is_enabled)
@@ -140,7 +140,7 @@ def test_update_output(self):
         bytestr = self._read_sample_file_like_uploaded()
 
         response = update_output(bytestr, "DKBCash")
-        response_dict = json.loads(response.data.decode())
+        response_dict = json.loads(response)  # .data.decode())
 
         res_charts = response_dict["response"]["props"]["figure"]["data"]
 

From 13894666864837d1508572133f5b2f6cd5586bda Mon Sep 17 00:00:00 2001
From: Fabian Meyer <fabian.meyer@mailbox.org>
Date: Sun, 26 May 2019 22:05:06 +0200
Subject: [PATCH 24/27] removed py27 backwards compatibility for temp dir

---
 pynance/database_test.py | 73 +++++++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 34 deletions(-)

diff --git a/pynance/database_test.py b/pynance/database_test.py
index f06ba37..b3bce49 100644
--- a/pynance/database_test.py
+++ b/pynance/database_test.py
@@ -1,7 +1,7 @@
 import unittest
 import os.path
 import shutil
-from tempfile import mkdtemp
+from tempfile import TemporaryDirectory, TemporaryFile
 import sqlite3
 
 from pynance.database import generate_sqlite_columns_definitions, \
@@ -9,26 +9,21 @@
 from pynance.textimporter import read_csv
 from pynance.dkb import SupportedCsvTypes
 
-class TemporaryDirectory(object):
-    def __enter__(self):
-        self.dir = mkdtemp()
-        return self.dir
-    
-    def __exit__(self, _1, _2, _3):
-        shutil.rmtree(self.dir)
 
 class ColumnsDefinitionsTestCase(unittest.TestCase):
     def test_it_produces_valid_string(self):
         result = generate_sqlite_columns_definitions()
         self.assertEqual(type(result), str)
         self.assertTrue(len(result) > 0)
-    
+
     def test_it_produces_valid_sql_types(self):
         with TemporaryDirectory() as tmp_dir:
-            conn = sqlite3.connect(os.path.join(tmp_dir, 'test.db'))
-            with conn:
-                column_definitions = generate_sqlite_columns_definitions()
-                conn.execute('CREATE TABLE test ({})'.format(column_definitions))
+            tmp_file = os.path.join(tmp_dir, 'test.db')
+            conn = sqlite3.connect(tmp_file)
+            column_definitions = generate_sqlite_columns_definitions()
+            query = 'CREATE TABLE test ({})'.format(column_definitions)
+            conn.execute(query)
+            conn.close()
 
 
 class LowLevelConnectionTestCase(unittest.TestCase):
@@ -39,29 +34,31 @@ def test_creates_database_file_if_not_exists(self):
             with LowLevelConnection(1, db_file) as _:
                 pass
             self.assertTrue(os.path.exists(db_file))
-    
+
     def test_opens_connection(self):
         with TemporaryDirectory() as tmp_dir:
             with LowLevelConnection(1, os.path.join(tmp_dir, 'test.db')) as conn:
                 self.assertIsNotNone(conn)
-    
+
     def test_creates_expected_tables(self):
         with TemporaryDirectory() as tmp_dir:
             with LowLevelConnection(1, os.path.join(tmp_dir, 'test.db')) as conn:
                 cursor = conn.cursor()
                 tables = set(map(
                     lambda x: x[0],
-                    cursor.execute('select name from sqlite_master where type="table"').fetchall()
+                    cursor.execute(
+                        'select name from sqlite_master where type="table"').fetchall()
                 ))
                 self.assertEqual(
                     tables,
-                    set([LowLevelConnection.TABLE_SCHEMA_VERSION,LowLevelConnection.TABLE_TRANSACTIONS
-                ]))
+                    set([LowLevelConnection.TABLE_SCHEMA_VERSION, LowLevelConnection.TABLE_TRANSACTIONS
+                         ]))
                 self.assertEqual(
                     [(1,)],
-                    cursor.execute('select count(*) from {}'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)).fetchall()
+                    cursor.execute(
+                        'select count(*) from {}'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)).fetchall()
                 )
-    
+
     def test_works_on_same_database_twice(self):
         with TemporaryDirectory() as tmp_dir:
             db_name = os.path.join(tmp_dir, 'test.db')
@@ -73,6 +70,7 @@ def test_works_on_same_database_twice(self):
                     .fetchall()
                 self.assertEqual(1, result[0][0])
 
+
 class InsertTableTestCase(unittest.TestCase):
 
     def test_create_temp_table_table_exists(self):
@@ -81,17 +79,19 @@ def test_create_temp_table_table_exists(self):
                 table_schema, table_name = InsertTable.create_temp_table(conn)
                 # Fails if and only if table does not exist
                 conn.cursor().execute('select count(*) from {}.{}'.format(table_schema, table_name))
-    
+
     def test_create_temp_table_choses_other_table_if_exists(self):
         with TemporaryDirectory() as tmp_dir:
             with LowLevelConnection(1, os.path.join(tmp_dir, 'test.db')) as conn:
                 conn.cursor().execute('CREATE TEMPORARY TABLE insert_df_0 (id INT)')
                 table_schema, table_name = InsertTable.create_temp_table(conn)
                 self.assertEqual(table_schema, 'temp')
-                self.assertEqual(table_name, 'insert_df_1', 'expected table creation to fail exactly the first time')
-    
+                self.assertEqual(
+                    table_name, 'insert_df_1', 'expected table creation to fail exactly the first time')
+
     def test_it_removes_the_temporary_table(self):
-        test_data_frame = read_csv(os.path.join('pynance', 'test_data', 'dkb_cash_sample.csv'), SupportedCsvTypes.DKBCash)
+        test_data_frame = read_csv(os.path.join(
+            'pynance', 'test_data', 'dkb_cash_sample.csv'), SupportedCsvTypes.DKBCash)
         # TODO: get rid of the 'drop' here
         test_data_frame = test_data_frame.drop(['origin'], axis=1)
         with TemporaryDirectory() as tmp_dir:
@@ -102,10 +102,12 @@ def check_if_table_exists():
                     conn.cursor().execute('select count(*) from {}'.format(insert_table_with_schema))
 
                 with InsertTable(conn, test_data_frame) as insert_table:
-                    insert_table_with_schema = '{}.{}'.format(insert_table[0], insert_table[1])
+                    insert_table_with_schema = '{}.{}'.format(
+                        insert_table[0], insert_table[1])
                     check_if_table_exists()
-                
-                self.assertRaises(sqlite3.OperationalError, check_if_table_exists)
+
+                self.assertRaises(sqlite3.OperationalError,
+                                  check_if_table_exists)
 
     def test_it_works_with_dataframes_from_text_importer(self):
         def run_test(csv_file, df_format):
@@ -115,7 +117,7 @@ def run_test(csv_file, df_format):
             # in the database as well.
             data_frame = read_csv(csv_file, df_format).drop(['origin'], axis=1)
             self.assertTrue(len(data_frame.index) > 0)
-    
+
             # Load it into the InserTable and test this
             with TemporaryDirectory() as tmp_dir:
                 with LowLevelConnection(1, os.path.join(tmp_dir, 'test.db')) as conn:
@@ -125,10 +127,13 @@ def run_test(csv_file, df_format):
                         database_rows = conn.cursor() \
                             .execute('SELECT count(*) FROM {}.{}'.format(insert_table[0], insert_table[1])).fetchall()[0][0]
 
-                        self.assertEqual(data_frame_size, database_rows, 'not all (or more?) rows written to database')
+                        self.assertEqual(
+                            data_frame_size, database_rows, 'not all (or more?) rows written to database')
 
-        run_test(os.path.join('pynance', 'test_data', 'dkb_cash_sample.csv'), SupportedCsvTypes.DKBCash)
-        run_test(os.path.join('pynance', 'test_data', 'dkb_visa_sample.csv'), SupportedCsvTypes.DKBVisa)
+        run_test(os.path.join('pynance', 'test_data',
+                              'dkb_cash_sample.csv'), SupportedCsvTypes.DKBCash)
+        run_test(os.path.join('pynance', 'test_data',
+                              'dkb_visa_sample.csv'), SupportedCsvTypes.DKBVisa)
 
 
 class StorageTestCase(unittest.TestCase):
@@ -140,7 +145,7 @@ def test_validate_dataframe_shape_complains_when_columns_are_missing(self):
     def test_validate_dataframe_shape_accepts_aditional_columns(self):
         "Does not compain when aditional columns are present"
         pass
-    
+
     def test_append_dataframe_rejects_invalid_dataframes(self):
         pass
 
@@ -152,7 +157,7 @@ def test_append_dataframe_returned_ids_are_the_same_as_in_load_dataframe(self):
 
     def test_append_dataframe_duplicats_are_left_out(self):
         pass
-    
+
     def test_load_dataframe_works_with_new_storage_instance(self):
         "implies new conn etc..."
-        pass
\ No newline at end of file
+        pass

From ae801bca949aa9bdd3982564da3128f8775623ce Mon Sep 17 00:00:00 2001
From: Fabian Meyer <fabian.meyer@mailbox.org>
Date: Sun, 26 May 2019 22:05:43 +0200
Subject: [PATCH 25/27] rename id to row_key as primary key

---
 pynance/database.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pynance/database.py b/pynance/database.py
index 197b6a6..8273a4c 100644
--- a/pynance/database.py
+++ b/pynance/database.py
@@ -55,7 +55,7 @@ class LowLevelConnection(object):
 
     TABLE_SCHEMA_VERSION = 'schema'
     TABLE_TRANSACTIONS = 'transactions'
-    TABLE_TRANSACTIONS_ID = 'id INTEGER PRIMARY KEY'
+    TABLE_TRANSACTIONS_ID = 'row_key INTEGER PRIMARY KEY'
 
     def _get_db_conn(self):
         """

From 74cb304d7c5f61e413baf4fde41cb4de65865c6f Mon Sep 17 00:00:00 2001
From: Fabian Meyer <fabian.meyer@mailbox.org>
Date: Sun, 26 May 2019 22:06:16 +0200
Subject: [PATCH 26/27] added id column in strategy dataframes

---
 pynance/transactions.py | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/pynance/transactions.py b/pynance/transactions.py
index a3e0844..cd3eb34 100644
--- a/pynance/transactions.py
+++ b/pynance/transactions.py
@@ -2,13 +2,17 @@
 Contains transaction test strategies.
 """
 
-import hypothesis.strategies as st 
+import hypothesis.strategies as st
 import datetime
 import pandas as pd
 import numpy as np
 
+from .dataframe_util import create_id_hash
+
 KNOWN_CURRENCIES = ['EUR', 'USD']
-ALPHABET = list(map(str, 'abcdefghijklmnopqrstuvwzyz ABCDEFGHIJKLMNOPQRSTUVWZYZ0123456789äüöß'))
+ALPHABET = list(
+    map(str, 'abcdefghijklmnopqrstuvwzyz ABCDEFGHIJKLMNOPQRSTUVWZYZ0123456789äüöß'))
+
 
 @st.composite
 def single_transaction(draw, min_date=None, max_date=None):
@@ -23,21 +27,22 @@ def single_transaction(draw, min_date=None, max_date=None):
 
     return (d, text, text, text, floats, floats, currency, text, text, text)
 
+
 @st.composite
 def dataframe(draw, min_size=0, max_size=None, min_date=None, max_date=None):
     if not min_date:
-        min_date = datetime.date(1000,1,1)
+        min_date = datetime.date(1000, 1, 1)
     if not max_date:
-        max_date = datetime.date(9999,12,31)
+        max_date = datetime.date(9999, 12, 31)
 
     elements = draw(st.lists(
         single_transaction(min_date=min_date, max_date=max_date),
-         min_size=min_size,
-         max_size=max_size
+        min_size=min_size,
+        max_size=max_size
     ))
 
     dates, sender_accounts, receiver_accounts, texts, amounts, total_balances, currencies, \
-        categories, tagss, origins = [],[],[],[],[],[],[],[],[],[]
+        categories, tagss, origins = [], [], [], [], [], [], [], [], [], []
 
     for date, sender_account, receiver_account, text, amount, total_balance, currency, category, tags, origin in elements:
         dates.append(date)
@@ -51,8 +56,7 @@ def dataframe(draw, min_size=0, max_size=None, min_date=None, max_date=None):
         tagss.append(tags)
         origins.append(origin)
 
-
-    return pd.DataFrame({
+    result_frame = pd.DataFrame({
         'date': dates,
         'sender_account': sender_accounts,
         'receiver_account': receiver_accounts,
@@ -62,4 +66,10 @@ def dataframe(draw, min_size=0, max_size=None, min_date=None, max_date=None):
         'currency': currencies,
         'category': categories,
         'tags': tagss,
-        'origin': origins })
\ No newline at end of file
+        'origin': origins})
+
+    hash_column = create_id_hash(result_frame)
+
+    result_frame['id'] = hash_column
+
+    return result_frame

From f8cac63a4def4218444ea4f673269d94d72f3566 Mon Sep 17 00:00:00 2001
From: Fabian Meyer <fabian.meyer@mailbox.org>
Date: Sun, 26 May 2019 23:01:18 +0200
Subject: [PATCH 27/27] fixed tests for database, id as key column

---
 pynance/database.py          | 26 +++++++++++++++++++-------
 pynance/storage_test.py      | 27 +++++++--------------------
 pynance/transactions_test.py | 21 +++++++++++----------
 3 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/pynance/database.py b/pynance/database.py
index 8273a4c..615d1c8 100644
--- a/pynance/database.py
+++ b/pynance/database.py
@@ -55,7 +55,7 @@ class LowLevelConnection(object):
 
     TABLE_SCHEMA_VERSION = 'schema'
     TABLE_TRANSACTIONS = 'transactions'
-    TABLE_TRANSACTIONS_ID = 'row_key INTEGER PRIMARY KEY'
+    ID_COLUMN = 'id'
 
     def _get_db_conn(self):
         """
@@ -87,13 +87,14 @@ def __init__(self, schema_version, db_file_name):
                     LowLevelConnection.TABLE_SCHEMA_VERSION))
 
             if not exists_table(connection, LowLevelConnection.TABLE_TRANSACTIONS):
-                connection.execute('CREATE TABLE IF NOT EXISTS {} ({}, {})'.format(
+                connection.execute('CREATE TABLE IF NOT EXISTS {} ({})'.format(
                     LowLevelConnection.TABLE_TRANSACTIONS,
-                    LowLevelConnection.TABLE_TRANSACTIONS_ID,
                     generate_sqlite_columns_definitions()
                 ))
                 connection.execute('CREATE INDEX date_index ON {} ({})'.format(
                     LowLevelConnection.TABLE_TRANSACTIONS, 'date'))
+                connection.execute('CREATE INDEX id ON {} ({})'.format(
+                    LowLevelConnection.TABLE_TRANSACTIONS, LowLevelConnection.ID_COLUMN))
 
     def __enter__(self):
         self.conn = self._get_db_conn()
@@ -163,10 +164,10 @@ def __init__(self, db_file):
     @classmethod
     def validate_dataframe_shape(cls, data_frame):
         """
-        asserts that the correct columns are present. 
+        asserts that the correct columns are present.
         Tolerates that additional columns are present
         """
-        pass
+        return True
 
     def append_dataframe(self, data_frame):
         """
@@ -180,10 +181,21 @@ def append_dataframe(self, data_frame):
             with InsertTable(conn, data_frame) as insert_table:
                 # add existing data to insert_table
                 with conn:
-                    conn.cursor().execute('INSERT INTO %s ')
+                    column_keys = COLUMNS.keys()
+                    columns_str = ','.join(column_keys)
+                    conn.cursor().execute(
+                        '''
+                        INSERT INTO %s
+                        SELECT %s
+                        FROM %s
+                        ON CONFLICT (%s) DO NOTHING
+                        ''' % (insert_table,
+                               columns_str,
+                               LowLevelConnection.TABLE_TRANSACTIONS,
+                               LowLevelConnection.ID_COLUMN))
+                conn.close()
                 # but only non-duplicates
                 # replace existing table by insert_table
-                pass
 
     def load_dataframe(self):
         """
diff --git a/pynance/storage_test.py b/pynance/storage_test.py
index c1dc80a..562662c 100644
--- a/pynance/storage_test.py
+++ b/pynance/storage_test.py
@@ -4,6 +4,8 @@
 import numpy as np
 import pandas as pd
 from pandas.testing import assert_frame_equal
+from tempfile import TemporaryDirectory
+
 
 from .database import Storage
 from .textimporter import read_csv
@@ -33,27 +35,19 @@ def _read_dummy_file_dkbvisa_small(self):
     def _assert_frame_relevant_columns_equal(self, df1, df2):
         assert_frame_equal(df1[COLUMNS], df2[COLUMNS])
 
-    def _delete_temp_db_file(self):
-        if os.path.exists(self.db_file):
-            os.remove(self.db_file)
-
     def setUp(self):
-        self.db_file = os.path.join("test_data", "test.sqlite")
+        self.tempdir = TemporaryDirectory()
+        self.db_file = os.path.join(self.tempdir.name, "test.sqlite")
+
+    def tearDown(self):
+        self.tempdir.cleanup()
 
     def test_init_storage(self):
         storage = Storage(self.db_file)
         assert storage is not None
 
-    def test_init_storage_creates_file(self):
-        # delete file to make sure starting from scratch
-        self._delete_temp_db_file()
-
-        _ = Storage(self.db_file)
-        assert os.path.exists(self.db_file)
-
     def test_append_dataframe_dkb_cash_small(self):
         # delete file to make sure starting from scratch
-        self._delete_temp_db_file()
 
         storage = Storage(self.db_file)
         df = self._read_dummy_file_dkbcash_small()
@@ -63,7 +57,6 @@ def test_append_dataframe_dkb_cash_small(self):
 
     def test_append_dataframe_dkb_cash_and_visa(self):
         # delete file to make sure starting from scratch
-        self._delete_temp_db_file()
 
         storage = Storage(self.db_file)
         df_cash = self._read_dummy_file_dkbcash_small()
@@ -81,7 +74,6 @@ def test_append_dataframe_dkb_cash_and_visa(self):
 
     def test_load_dataframe(self):
         # delete file to make sure starting from scratch
-        self._delete_temp_db_file()
 
         storage = Storage(self.db_file)
         df = self._read_dummy_file_dkbcash_small()
@@ -92,7 +84,6 @@ def test_load_dataframe(self):
 
     def test_append_dataframe_ignores_duplicates(self):
         # delete file to make sure starting from scratch
-        self._delete_temp_db_file()
 
         storage = Storage(self.db_file)
         df = self._read_dummy_file_dkbcash_small()
@@ -115,7 +106,3 @@ def append_invalid():
             return storage.append_dataframe(random_df)
 
         self.assertRaises(Exception, append_invalid)
-
-    def tearDown(self):
-        # remove temporary db file
-        self._delete_temp_db_file()
diff --git a/pynance/transactions_test.py b/pynance/transactions_test.py
index c62e9df..1709b56 100644
--- a/pynance/transactions_test.py
+++ b/pynance/transactions_test.py
@@ -1,34 +1,35 @@
 import unittest
-import transactions as t
+from .transactions import dataframe
 from hypothesis import given
 import numpy as np
 from datetime import date
 
 from pynance.definitions import COLUMNS
 
+
 class DataframeTestCase(unittest.TestCase):
 
-    @given(df=t.dataframe(min_size=1, max_size=1))
+    @given(df=dataframe(min_size=1, max_size=1))
     def test_has_expected_columns(self, df):
         types = dict(df.dtypes)
         self.assertEqual(len(types), len(COLUMNS))
         for col in COLUMNS:
             self.assertTrue(col in types)
-    
-    @given(df=t.dataframe(min_size = 1, min_date=date(2000,1,1)))
+
+    @given(df=dataframe(min_size=1, min_date=date(2000, 1, 1)))
     def test_respects_min_date(self, df):
-        remaining = df['date'][df['date'] < date(2000,1,1)]
+        remaining = df['date'][df['date'] < date(2000, 1, 1)]
         self.assertEqual(remaining.size, 0)
 
-    @given(df=t.dataframe(min_size = 1, max_date=date(2000,1,1)))
+    @given(df=dataframe(min_size=1, max_date=date(2000, 1, 1)))
     def test_respects_max_date(self, df):
-        remaining = df['date'][df['date'] > date(2000,1,1)]
+        remaining = df['date'][df['date'] > date(2000, 1, 1)]
         self.assertEqual(remaining.size, 0)
 
-    @given(df=t.dataframe(min_size = 10))
+    @given(df=dataframe(min_size=10))
     def test_respects_min_size(self, df):
         self.assertGreaterEqual(len(df), 10)
 
-    @given(t.dataframe(max_size = 10))
+    @given(dataframe(max_size=10))
     def test_respects_max_size(self, df):
-        self.assertLessEqual(len(df), 10)
\ No newline at end of file
+        self.assertLessEqual(len(df), 10)