From 5dd6db30d18c914b88e16d44ffebb1de78017e86 Mon Sep 17 00:00:00 2001 From: David Nies Date: Sat, 9 Feb 2019 03:02:19 +0100 Subject: [PATCH 01/27] Add `LowLevelConnection` This is a first draft that implements this class. It is responsible to obtain connections to the DB and make sure the expected tables exist. --- pynance/database.py | 57 ++++++++++++++++++++++++++++++++++++++++ pynance/database_test.py | 46 ++++++++++++++++++++++++++++++++ unittests.py | 4 ++- 3 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 pynance/database.py create mode 100644 pynance/database_test.py diff --git a/pynance/database.py b/pynance/database.py new file mode 100644 index 0000000..f8a7bae --- /dev/null +++ b/pynance/database.py @@ -0,0 +1,57 @@ +import sqlite3 + +class LowLevelConnection(object): + """ + Class that handles low-level database connection. Should be used in with-statements. + """ + + + # Schema evolution should be handled later once it is needed + SUPPORTED_SCHEMA_VERSIONS = [1] + + TABLE_SCHEMA_VERSION = 'schema' + TABLE_TRANSACTIONS = 'transactions' + TABLE_TRANSACTIONS_FIELDS = [ + 'id INTEGER PRIMARY KEY', + 'imported_at INTEGER', # unix timestamp + 'date TEXT', # format: YYYY-MM-DD + 'sender_account TEXT', + 'receiver_account TEXT', + 'text TEXT', + 'amount REAL', + 'total_balance REAL', + 'currency TEXT', + 'category TEXT', + 'tags TEXT' + ] + + def __init__(self, schema_version, db_file_name): + """ + Parameters: + * `schema_version`: Integer denoting the schema version. + * `db_file_name`: This DB file will be created if it does not yet exist. + """ + assert schema_version in LowLevelConnection.SUPPORTED_SCHEMA_VERSIONS + self.db_file_name = db_file_name + + with sqlite3.connect(self.db_file_name) as conn: + cursor = conn.cursor() + cursor.execute('BEGIN TRANSACTION') + + cursor.execute('CREATE TABLE IF NOT EXISTS {} (version INTEGER)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)) + cursor.execute('INSERT INTO {} VALUES (1)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)) + + cursor.execute('CREATE TABLE IF NOT EXISTS {} ({})'.format( + LowLevelConnection.TABLE_TRANSACTIONS, + ', '.join(LowLevelConnection.TABLE_TRANSACTIONS_FIELDS) + )) + + cursor.execute('COMMIT') + conn.commit() + + def __enter__(self): + self.conn = sqlite3.connect(self.db_file_name) + return self.conn + + def __exit__(self, _1, _2, _3): + self.conn.close() \ No newline at end of file diff --git a/pynance/database_test.py b/pynance/database_test.py new file mode 100644 index 0000000..ed2bfbe --- /dev/null +++ b/pynance/database_test.py @@ -0,0 +1,46 @@ +import unittest +import os.path +from tempfile import TemporaryDirectory + +from pynance.database import LowLevelConnection + +class LowLevelConnectionTestCase(unittest.TestCase): + def test_creates_database_file_if_not_exists(self): + with TemporaryDirectory() as tmp_dir: + db_file = os.path.join(tmp_dir, 'test.db') + self.assertFalse(os.path.exists(db_file)) + with LowLevelConnection(1, db_file) as _: + pass + self.assertTrue(os.path.exists(db_file)) + + def test_opens_connection(self): + with TemporaryDirectory() as tmp_dir: + with LowLevelConnection(1, os.path.join(tmp_dir, 'test.db')) as conn: + self.assertIsNotNone(conn) + + def test_creates_expected_tables(self): + with TemporaryDirectory() as tmp_dir: + with LowLevelConnection(1, os.path.join(tmp_dir, 'test.db')) as conn: + cursor = conn.cursor() + tables = set(map( + lambda x: x[0], + cursor.execute('select name from sqlite_master where type="table"').fetchall() + )) + self.assertEqual( + tables, + set([LowLevelConnection.TABLE_SCHEMA_VERSION,LowLevelConnection.TABLE_TRANSACTIONS + ])) + self.assertEqual( + [(1,)], + cursor.execute('select count(*) from {}'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)).fetchall() + ) + + + +def test_suite(): + "return the test suite" + suite = unittest.TestSuite() + suite.addTest(LowLevelConnectionTestCase('test_creates_database_file_if_not_exists')) + suite.addTest(LowLevelConnectionTestCase('test_opens_connection')) + suite.addTest(LowLevelConnectionTestCase('test_creates_expected_tables')) + return suite \ No newline at end of file diff --git a/unittests.py b/unittests.py index ad89cad..b86eb81 100644 --- a/unittests.py +++ b/unittests.py @@ -6,6 +6,7 @@ import pynance.dummy_test import pynance.textimporter_test import pynance.dash_viz.plot_flow_test +import pynance.database_test def doc_test_suite(): "Returns the testsuite doctests for all modules. Please don't forget to add new modules here." @@ -32,6 +33,7 @@ def test_suite(): suite.addTests(pynance.dummy_test.test_suite()) suite.addTests(pynance.textimporter_test.test_suite()) suite.addTests(pynance.dash_viz.plot_flow_test.test_suite()) + suite.addTests(pynance.database_test.test_suite()) suite.addTest(doc_test_suite()) @@ -46,4 +48,4 @@ def run_all_unit_tests(): if __name__ == "__main__": import sys all_tests_ok = run_all_unit_tests() - sys.exit(not all_tests_ok) \ No newline at end of file + sys.exit(not all_tests_ok) From 4585a32a5a4f29cf58c5aa7a5c44703687545961 Mon Sep 17 00:00:00 2001 From: David Nies Date: Sat, 9 Feb 2019 03:15:30 +0100 Subject: [PATCH 02/27] Make tests involving temp directories compatible with Python 2 Python 2 does not have tempfile.TemporaryDirectory --- pynance/database_test.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pynance/database_test.py b/pynance/database_test.py index ed2bfbe..e0f8ab1 100644 --- a/pynance/database_test.py +++ b/pynance/database_test.py @@ -1,9 +1,18 @@ import unittest import os.path -from tempfile import TemporaryDirectory +import shutil +from tempfile import mkdtemp from pynance.database import LowLevelConnection +class TemporaryDirectory(object): + def __enter__(self): + self.dir = mkdtemp() + return self.dir + + def __exit__(self, _1, _2, _3): + shutil.rmtree(self.dir) + class LowLevelConnectionTestCase(unittest.TestCase): def test_creates_database_file_if_not_exists(self): with TemporaryDirectory() as tmp_dir: From 37b37a30ddca4e5db42f6dcec3c765566c7495b9 Mon Sep 17 00:00:00 2001 From: David Nies Date: Sun, 10 Feb 2019 15:45:10 +0100 Subject: [PATCH 03/27] Add class InsertTable This class duplicates a Pandas DataFrame into a temporary table inside a sqlite database. The table is disposed once it is not needed any more. --- pynance/database.py | 61 ++++++++++++++++++++++++++++++++--- pynance/database_test.py | 68 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 123 insertions(+), 6 deletions(-) diff --git a/pynance/database.py b/pynance/database.py index f8a7bae..349014c 100644 --- a/pynance/database.py +++ b/pynance/database.py @@ -2,17 +2,17 @@ class LowLevelConnection(object): """ - Class that handles low-level database connection. Should be used in with-statements. + Class that handles low-level database connection. Makes sure the expected table strucutre exists. + Should be used in with-statements. """ - # Schema evolution should be handled later once it is needed SUPPORTED_SCHEMA_VERSIONS = [1] TABLE_SCHEMA_VERSION = 'schema' TABLE_TRANSACTIONS = 'transactions' + TABLE_TRANSACTIONS_ID = 'id INTEGER PRIMARY KEY' TABLE_TRANSACTIONS_FIELDS = [ - 'id INTEGER PRIMARY KEY', 'imported_at INTEGER', # unix timestamp 'date TEXT', # format: YYYY-MM-DD 'sender_account TEXT', @@ -43,8 +43,11 @@ def __init__(self, schema_version, db_file_name): cursor.execute('CREATE TABLE IF NOT EXISTS {} ({})'.format( LowLevelConnection.TABLE_TRANSACTIONS, - ', '.join(LowLevelConnection.TABLE_TRANSACTIONS_FIELDS) + ', '.join( + [LowLevelConnection.TABLE_TRANSACTIONS_ID] + LowLevelConnection.TABLE_TRANSACTIONS_FIELDS + ) )) + cursor.execute('CREATE INDEX date_index ON {} ({})'.format(LowLevelConnection.TABLE_TRANSACTIONS, 'date')) cursor.execute('COMMIT') conn.commit() @@ -54,4 +57,52 @@ def __enter__(self): return self.conn def __exit__(self, _1, _2, _3): - self.conn.close() \ No newline at end of file + self.conn.close() + + +class InsertTable(object): + """ + This class makes sure that a DataFrame is inserted into a temporary table of a sqlite databases. + It also makes sure that the temporary table is created in a safe way and disposed afterwards. For + this purpuse, instances of this class should be used in with statements. + """ + + @staticmethod + def create_temp_table(conn): + """Creates temporary table suitable for inserting the DataFrame and returns its name.""" + + cursor = conn.cursor() + i, table_name, go_on = 0, '', True + + while go_on: + go_on = False + table_name = 'insert_df_{}'.format(i) + try: + cursor.execute('CREATE TEMPORARY TABLE {} ({})'.format( table_name, ', '.join(LowLevelConnection.TABLE_TRANSACTIONS_FIELDS))) + except sqlite3.OperationalError: + go_on = True + i += 1 + + return 'temp', table_name + + def __init__(self, conn, data_frame): + "uses conn, fetches everything from 'data_frame' into a temporary table" + + self.conn = conn + self.temp_table_schema, self.temp_table_name = InsertTable.create_temp_table(conn) + data_frame.to_sql( + name=self.temp_table_name, + schema=self.temp_table_schema, + index=False, + con=conn, + chunksize=5000 + ) + + def __enter__(self): + return (self.temp_table_schema, self.temp_table_name) + + def __exit__(self, _1, _2, _3): + "Make sure the table is gone." + self.conn.cursor().execute('DROP TABLE {}.{}'.format( + self.temp_table_schema, self.temp_table_name + )) \ No newline at end of file diff --git a/pynance/database_test.py b/pynance/database_test.py index e0f8ab1..13637ba 100644 --- a/pynance/database_test.py +++ b/pynance/database_test.py @@ -2,8 +2,10 @@ import os.path import shutil from tempfile import mkdtemp +import sqlite3 -from pynance.database import LowLevelConnection +from pynance.database import LowLevelConnection, InsertTable +from pynance.textimporter import read_csv, SupportedCsvTypes class TemporaryDirectory(object): def __enter__(self): @@ -44,12 +46,76 @@ def test_creates_expected_tables(self): cursor.execute('select count(*) from {}'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)).fetchall() ) +class InsertTableTestCase(unittest.TestCase): + + def test_create_temp_table_table_exists(self): + with TemporaryDirectory() as tmp_dir: + with LowLevelConnection(1, os.path.join(tmp_dir, 'test.db')) as conn: + table_schema, table_name = InsertTable.create_temp_table(conn) + # Fails if and only if table does not exist + conn.cursor().execute('select count(*) from {}.{}'.format(table_schema, table_name)) + + def test_create_temp_table_choses_other_table_if_exists(self): + with TemporaryDirectory() as tmp_dir: + with LowLevelConnection(1, os.path.join(tmp_dir, 'test.db')) as conn: + conn.cursor().execute('CREATE TEMPORARY TABLE insert_df_0 (id INT)') + table_schema, table_name = InsertTable.create_temp_table(conn) + self.assertEqual(table_schema, 'temp') + self.assertEqual(table_name, 'insert_df_1', 'expected table creation to fail exactly the first time') + + def test_it_removes_the_temporary_table(self): + test_data_frame = read_csv(os.path.join('pynance', 'test_data', 'dkb_cash_sample.csv'), SupportedCsvTypes.DKBCash) + # TODO: get rid of the 'drop' here + test_data_frame = test_data_frame.drop(['origin'], axis=1) + with TemporaryDirectory() as tmp_dir: + with LowLevelConnection(1, os.path.join(tmp_dir, 'test.db')) as conn: + insert_table_with_schema = '' + + def check_if_table_exists(): + conn.cursor().execute('select count(*) from {}'.format(insert_table_with_schema)) + + with InsertTable(conn, test_data_frame) as insert_table: + insert_table_with_schema = '{}.{}'.format(insert_table[0], insert_table[1]) + check_if_table_exists() + + self.assertRaises(sqlite3.OperationalError, check_if_table_exists) + + def test_it_works_with_dataframes_from_text_importer(self): + def run_test(csv_file, df_format): + # Get the DataFrame + self.assertTrue(os.path.isfile(csv_file)) + # TODO: Investigate what origin is good for and if we want to include it as column + # in the database as well. + data_frame = read_csv(csv_file, df_format).drop(['origin'], axis=1) + self.assertTrue(len(data_frame.index) > 0) + + # Load it into the InserTable and test this + with TemporaryDirectory() as tmp_dir: + with LowLevelConnection(1, os.path.join(tmp_dir, 'test.db')) as conn: + with InsertTable(conn, data_frame) as insert_table: + + data_frame_size = len(data_frame.index) + database_rows = conn.cursor() \ + .execute('SELECT count(*) FROM {}.{}'.format(insert_table[0], insert_table[1])).fetchall()[0][0] + + self.assertEqual(data_frame_size, database_rows, 'not all (or more?) rows written to database') + + run_test(os.path.join('pynance', 'test_data', 'dkb_cash_sample.csv'), SupportedCsvTypes.DKBCash) + run_test(os.path.join('pynance', 'test_data', 'dkb_visa_sample.csv'), SupportedCsvTypes.DKBVisa) def test_suite(): "return the test suite" + suite = unittest.TestSuite() + suite.addTest(LowLevelConnectionTestCase('test_creates_database_file_if_not_exists')) suite.addTest(LowLevelConnectionTestCase('test_opens_connection')) suite.addTest(LowLevelConnectionTestCase('test_creates_expected_tables')) + + suite.addTest(InsertTableTestCase('test_create_temp_table_table_exists')) + suite.addTest(InsertTableTestCase('test_create_temp_table_choses_other_table_if_exists')) + suite.addTest(InsertTableTestCase('test_it_removes_the_temporary_table')) + suite.addTest(InsertTableTestCase('test_it_works_with_dataframes_from_text_importer')) + return suite \ No newline at end of file From d0b6214568d4384796778e949a3471ca383ffd23 Mon Sep 17 00:00:00 2001 From: David Nies Date: Sat, 16 Feb 2019 00:01:25 +0100 Subject: [PATCH 04/27] Remove `dummy_test` from `unittest.py` --- unittests.py | 1 - 1 file changed, 1 deletion(-) diff --git a/unittests.py b/unittests.py index ad0faaf..359240e 100644 --- a/unittests.py +++ b/unittests.py @@ -4,7 +4,6 @@ import unittest import doctest -import pynance.dummy_test import pynance.textimporter_test import pynance.dash_viz.plot_flow_test import pynance.database_test From 0cd15c000b29e11cd96cbf40633265a1774ddc1e Mon Sep 17 00:00:00 2001 From: David Nies Date: Sun, 17 Feb 2019 08:00:01 +0100 Subject: [PATCH 05/27] Adapt imports in `database_test.py` --- pynance/database_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pynance/database_test.py b/pynance/database_test.py index 13637ba..74ee4a3 100644 --- a/pynance/database_test.py +++ b/pynance/database_test.py @@ -5,7 +5,8 @@ import sqlite3 from pynance.database import LowLevelConnection, InsertTable -from pynance.textimporter import read_csv, SupportedCsvTypes +from pynance.textimporter import read_csv +from pynance.dkb import SupportedCsvTypes class TemporaryDirectory(object): def __enter__(self): From 656ac1f8e3260b2a3bea663188ceb27668fe6359 Mon Sep 17 00:00:00 2001 From: David Nies Date: Sun, 17 Feb 2019 08:34:48 +0100 Subject: [PATCH 06/27] Use DEFERRED isolation level in `LowLevelConnection` This is the default in Python 3 anyways, Python 2 uses a different default that forces transactions to be committed immediatelly. --- pynance/database.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/pynance/database.py b/pynance/database.py index 349014c..7f01121 100644 --- a/pynance/database.py +++ b/pynance/database.py @@ -25,6 +25,18 @@ class LowLevelConnection(object): 'tags TEXT' ] + def _get_db_conn(self): + """ + Get the connection to the sqlite database. We use the 'DEFERRED' isolation level. This + is the default in Python 3 anyways, in Python 2 the default is autocommit mode. The DEFERRED + isolation level seems appropriate in this case. See also + * https://www.sqlite.org/lang_transaction.html + """ + return sqlite3.connect( + self.db_file_name, + isolation_level = 'DEFERRED' + ) + def __init__(self, schema_version, db_file_name): """ Parameters: @@ -34,9 +46,9 @@ def __init__(self, schema_version, db_file_name): assert schema_version in LowLevelConnection.SUPPORTED_SCHEMA_VERSIONS self.db_file_name = db_file_name - with sqlite3.connect(self.db_file_name) as conn: + with self._get_db_conn() as conn: cursor = conn.cursor() - cursor.execute('BEGIN TRANSACTION') + cursor.execute('BEGIN') cursor.execute('CREATE TABLE IF NOT EXISTS {} (version INTEGER)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)) cursor.execute('INSERT INTO {} VALUES (1)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)) @@ -53,7 +65,7 @@ def __init__(self, schema_version, db_file_name): conn.commit() def __enter__(self): - self.conn = sqlite3.connect(self.db_file_name) + self.conn = self._get_db_conn() return self.conn def __exit__(self, _1, _2, _3): From 4709f40ba3c9bcb75a5c0ed0732326c046f2f8b0 Mon Sep 17 00:00:00 2001 From: David Nies Date: Sun, 17 Feb 2019 12:10:10 +0100 Subject: [PATCH 07/27] Ingore *.pyc --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 639a9e9..4662532 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ __pycache__ /.vscode docs/graphs/png/*.png .doit.db.* -pynance/*.pyc +*.pyc .pytest_cache /.hypothesis .coverage From 8db7fbf0e8b3e23de1902c1fcebc51d18899a639 Mon Sep 17 00:00:00 2001 From: David Nies Date: Sun, 17 Feb 2019 12:11:03 +0100 Subject: [PATCH 08/27] Setup database layout with connection as context manager This makes it safe in Python 2 as well --- pynance/database.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/pynance/database.py b/pynance/database.py index 7f01121..17a0864 100644 --- a/pynance/database.py +++ b/pynance/database.py @@ -46,23 +46,18 @@ def __init__(self, schema_version, db_file_name): assert schema_version in LowLevelConnection.SUPPORTED_SCHEMA_VERSIONS self.db_file_name = db_file_name - with self._get_db_conn() as conn: - cursor = conn.cursor() - cursor.execute('BEGIN') + connection = self._get_db_conn() + with connection: + connection.execute('CREATE TABLE IF NOT EXISTS {} (version INTEGER)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)) + connection.execute('INSERT INTO {} VALUES (1)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)) - cursor.execute('CREATE TABLE IF NOT EXISTS {} (version INTEGER)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)) - cursor.execute('INSERT INTO {} VALUES (1)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)) - - cursor.execute('CREATE TABLE IF NOT EXISTS {} ({})'.format( + connection.execute('CREATE TABLE IF NOT EXISTS {} ({})'.format( LowLevelConnection.TABLE_TRANSACTIONS, ', '.join( [LowLevelConnection.TABLE_TRANSACTIONS_ID] + LowLevelConnection.TABLE_TRANSACTIONS_FIELDS ) )) - cursor.execute('CREATE INDEX date_index ON {} ({})'.format(LowLevelConnection.TABLE_TRANSACTIONS, 'date')) - - cursor.execute('COMMIT') - conn.commit() + connection.execute('CREATE INDEX date_index ON {} ({})'.format(LowLevelConnection.TABLE_TRANSACTIONS, 'date')) def __enter__(self): self.conn = self._get_db_conn() From 9030725cc38c9859cfc9fb6e325d847193dc7eb9 Mon Sep 17 00:00:00 2001 From: David Nies Date: Sun, 17 Feb 2019 12:17:51 +0100 Subject: [PATCH 09/27] Let `unittest` generate the database_test testsuite --- pynance/database_test.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/pynance/database_test.py b/pynance/database_test.py index 74ee4a3..dc4e353 100644 --- a/pynance/database_test.py +++ b/pynance/database_test.py @@ -108,15 +108,7 @@ def run_test(csv_file, df_format): def test_suite(): "return the test suite" - suite = unittest.TestSuite() - - suite.addTest(LowLevelConnectionTestCase('test_creates_database_file_if_not_exists')) - suite.addTest(LowLevelConnectionTestCase('test_opens_connection')) - suite.addTest(LowLevelConnectionTestCase('test_creates_expected_tables')) - - suite.addTest(InsertTableTestCase('test_create_temp_table_table_exists')) - suite.addTest(InsertTableTestCase('test_create_temp_table_choses_other_table_if_exists')) - suite.addTest(InsertTableTestCase('test_it_removes_the_temporary_table')) - suite.addTest(InsertTableTestCase('test_it_works_with_dataframes_from_text_importer')) + suite = unittest.makeSuite(LowLevelConnectionTestCase) + suite.addTests(unittest.makeSuite(InsertTableTestCase)) return suite \ No newline at end of file From 6619cc9af5260fd3369ef7121e7b172bc94a580c Mon Sep 17 00:00:00 2001 From: David Nies Date: Sun, 17 Feb 2019 15:22:16 +0100 Subject: [PATCH 10/27] Remove reference to `dummy_test` --- unittests.py | 1 - 1 file changed, 1 deletion(-) diff --git a/unittests.py b/unittests.py index 359240e..715a9ae 100644 --- a/unittests.py +++ b/unittests.py @@ -40,7 +40,6 @@ def add_doctests_for_module(package): def test_suite(): suite = unittest.TestSuite() - suite.addTests(pynance.dummy_test.test_suite()) suite.addTests(pynance.textimporter_test.test_suite()) suite.addTests(pynance.dash_viz.plot_flow_test.test_suite()) suite.addTests(pynance.database_test.test_suite()) From 35cfacbbb78bcaa98339ba0f0e1e80d04770b710 Mon Sep 17 00:00:00 2001 From: David Nies Date: Sun, 17 Feb 2019 15:23:31 +0100 Subject: [PATCH 11/27] Ensure that `LowLevelConnection.__init__` is idempotent The version before failed when it is executed on the same database twice. Added test to ensure this behaviour is tested and fixed the bug. --- pynance/database.py | 33 ++++++++++++++++++++++++--------- pynance/database_test.py | 11 +++++++++++ 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/pynance/database.py b/pynance/database.py index 17a0864..c835e60 100644 --- a/pynance/database.py +++ b/pynance/database.py @@ -1,5 +1,18 @@ import sqlite3 + + +def exists_table(conn, table_name): + """ + Returns True if and only if 'table_name' is an existing table. + """ + + result = conn.execute( + 'select count(*) from sqlite_master where type="table" and name="{}"'.format(table_name) + ).fetchall() + return result[0][0] == 1 + + class LowLevelConnection(object): """ Class that handles low-level database connection. Makes sure the expected table strucutre exists. @@ -48,16 +61,18 @@ def __init__(self, schema_version, db_file_name): connection = self._get_db_conn() with connection: - connection.execute('CREATE TABLE IF NOT EXISTS {} (version INTEGER)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)) - connection.execute('INSERT INTO {} VALUES (1)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)) + if not exists_table(connection, LowLevelConnection.TABLE_SCHEMA_VERSION): + connection.execute('CREATE TABLE IF NOT EXISTS {} (version INTEGER)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)) + connection.execute('INSERT INTO {} VALUES (1)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)) - connection.execute('CREATE TABLE IF NOT EXISTS {} ({})'.format( - LowLevelConnection.TABLE_TRANSACTIONS, - ', '.join( - [LowLevelConnection.TABLE_TRANSACTIONS_ID] + LowLevelConnection.TABLE_TRANSACTIONS_FIELDS - ) - )) - connection.execute('CREATE INDEX date_index ON {} ({})'.format(LowLevelConnection.TABLE_TRANSACTIONS, 'date')) + if not exists_table(connection, LowLevelConnection.TABLE_TRANSACTIONS): + connection.execute('CREATE TABLE IF NOT EXISTS {} ({})'.format( + LowLevelConnection.TABLE_TRANSACTIONS, + ', '.join( + [LowLevelConnection.TABLE_TRANSACTIONS_ID] + LowLevelConnection.TABLE_TRANSACTIONS_FIELDS + ) + )) + connection.execute('CREATE INDEX date_index ON {} ({})'.format(LowLevelConnection.TABLE_TRANSACTIONS, 'date')) def __enter__(self): self.conn = self._get_db_conn() diff --git a/pynance/database_test.py b/pynance/database_test.py index dc4e353..4a44ba4 100644 --- a/pynance/database_test.py +++ b/pynance/database_test.py @@ -46,6 +46,17 @@ def test_creates_expected_tables(self): [(1,)], cursor.execute('select count(*) from {}'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)).fetchall() ) + + def test_works_on_same_database_twice(self): + with TemporaryDirectory() as tmp_dir: + db_name = os.path.join(tmp_dir, 'test.db') + with LowLevelConnection(1, db_name) as _: + pass + with LowLevelConnection(1, db_name) as conn: + result = conn \ + .execute('select count(*) from {}'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)) \ + .fetchall() + self.assertEqual(1, result[0][0]) class InsertTableTestCase(unittest.TestCase): From f6f80a853041d736036f1585e81eb14d688c4095 Mon Sep 17 00:00:00 2001 From: David Nies Date: Sun, 17 Feb 2019 15:42:18 +0100 Subject: [PATCH 12/27] Factor out COLUMNS to `definitions.py` --- pynance/definitions.py | 20 ++++++++++++++++++++ pynance/textimporter.py | 21 +++------------------ 2 files changed, 23 insertions(+), 18 deletions(-) create mode 100644 pynance/definitions.py diff --git a/pynance/definitions.py b/pynance/definitions.py new file mode 100644 index 0000000..a8e0c7b --- /dev/null +++ b/pynance/definitions.py @@ -0,0 +1,20 @@ +""" +This module contains common definitions that are shared across other pynance +modules. +""" + +import numpy as np + +# see issue #5 and #6 +# use numpy types for numbers, because that's what pandas likes +COLUMNS = { + "date": np.datetime64, + "sender_account": str, + "receiver_account": str, + "text": str, + "amount": np.float64, + "total_balance": np.float64, + "currency": str, + "category": str, + "tags": str, + "origin": str} \ No newline at end of file diff --git a/pynance/textimporter.py b/pynance/textimporter.py index 77f5e25..92a4cd7 100644 --- a/pynance/textimporter.py +++ b/pynance/textimporter.py @@ -4,6 +4,8 @@ import pandas as pd import numpy as np +from .definitions import COLUMNS + def read_csv(filepath_or_buffer, description): """ @@ -197,21 +199,4 @@ class UnsupportedCsvFormatException(IOError): An error that occurs, if the importer is asked to read a CSV file with a setting that does not fit the actual file """ - pass - - -# STATIC DEFINITIONS below this line ################ - -# see issue #5 and #6 -# use numpy types for numbers, because that's what pandas likes -COLUMNS = { - "date": np.datetime64, - "sender_account": str, - "receiver_account": str, - "text": str, - "amount": np.float64, - "total_balance": np.float64, - "currency": str, - "category": str, - "tags": str, - "origin": str} + pass \ No newline at end of file From 044fea6c26c0acebba46e82194a98ad5709d4cac Mon Sep 17 00:00:00 2001 From: David Nies Date: Mon, 25 Feb 2019 20:51:32 +0100 Subject: [PATCH 13/27] Get definition of columns for InsertTable from definitions.COLUMNS --- pynance/database.py | 51 ++++++++++++++++++++++++++-------------- pynance/database_test.py | 17 +++++++++++++- 2 files changed, 49 insertions(+), 19 deletions(-) diff --git a/pynance/database.py b/pynance/database.py index c835e60..d8fffb2 100644 --- a/pynance/database.py +++ b/pynance/database.py @@ -1,7 +1,8 @@ import sqlite3 +import numpy as np +from .definitions import COLUMNS - def exists_table(conn, table_name): """ Returns True if and only if 'table_name' is an existing table. @@ -12,6 +13,30 @@ def exists_table(conn, table_name): ).fetchall() return result[0][0] == 1 +def generate_sqlite_columns_definitions(): + """ + Converts definitions.COLUMNS into the column definitions of a sqlite table. By column definitions, + we mean the part of a CREATE TABLE statement that defines the columns: + + CREATE TABLE my_table_name () + + Returns the column definitions as string + """ + + type_lookup_dict = { + str: 'TEXT', + np.datetime64: 'TEXT', + np.float64: 'REAL' + } + + def name_type_to_string(x): + # print('foooo: {}'.format(x)) + col_name, col_type = x + if col_type not in type_lookup_dict: + raise ValueError("Don't know which sqlite type '{}' is".format(col_type)) + return '{} {}'.format(col_name, type_lookup_dict[col_type]) + + return ', '.join(map(name_type_to_string, COLUMNS.items())) class LowLevelConnection(object): """ @@ -25,18 +50,6 @@ class LowLevelConnection(object): TABLE_SCHEMA_VERSION = 'schema' TABLE_TRANSACTIONS = 'transactions' TABLE_TRANSACTIONS_ID = 'id INTEGER PRIMARY KEY' - TABLE_TRANSACTIONS_FIELDS = [ - 'imported_at INTEGER', # unix timestamp - 'date TEXT', # format: YYYY-MM-DD - 'sender_account TEXT', - 'receiver_account TEXT', - 'text TEXT', - 'amount REAL', - 'total_balance REAL', - 'currency TEXT', - 'category TEXT', - 'tags TEXT' - ] def _get_db_conn(self): """ @@ -66,11 +79,10 @@ def __init__(self, schema_version, db_file_name): connection.execute('INSERT INTO {} VALUES (1)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)) if not exists_table(connection, LowLevelConnection.TABLE_TRANSACTIONS): - connection.execute('CREATE TABLE IF NOT EXISTS {} ({})'.format( + connection.execute('CREATE TABLE IF NOT EXISTS {} ({}, {})'.format( LowLevelConnection.TABLE_TRANSACTIONS, - ', '.join( - [LowLevelConnection.TABLE_TRANSACTIONS_ID] + LowLevelConnection.TABLE_TRANSACTIONS_FIELDS - ) + LowLevelConnection.TABLE_TRANSACTIONS_ID, + generate_sqlite_columns_definitions() )) connection.execute('CREATE INDEX date_index ON {} ({})'.format(LowLevelConnection.TABLE_TRANSACTIONS, 'date')) @@ -100,7 +112,10 @@ def create_temp_table(conn): go_on = False table_name = 'insert_df_{}'.format(i) try: - cursor.execute('CREATE TEMPORARY TABLE {} ({})'.format( table_name, ', '.join(LowLevelConnection.TABLE_TRANSACTIONS_FIELDS))) + cursor.execute('CREATE TEMPORARY TABLE {} ({})'.format( + table_name, + generate_sqlite_columns_definitions() + )) except sqlite3.OperationalError: go_on = True i += 1 diff --git a/pynance/database_test.py b/pynance/database_test.py index 4a44ba4..eedbdd8 100644 --- a/pynance/database_test.py +++ b/pynance/database_test.py @@ -4,7 +4,8 @@ from tempfile import mkdtemp import sqlite3 -from pynance.database import LowLevelConnection, InsertTable +from pynance.database import generate_sqlite_columns_definitions, \ + LowLevelConnection, InsertTable from pynance.textimporter import read_csv from pynance.dkb import SupportedCsvTypes @@ -16,6 +17,20 @@ def __enter__(self): def __exit__(self, _1, _2, _3): shutil.rmtree(self.dir) +class ColumnsDefinitionsTestCase(unittest.TestCase): + def test_it_produces_valid_string(self): + result = generate_sqlite_columns_definitions() + self.assertEqual(type(result), str) + self.assertTrue(len(result) > 0) + + def test_it_produces_valid_sql_types(self): + with TemporaryDirectory() as tmp_dir: + conn = sqlite3.connect(os.path.join(tmp_dir, 'test.db')) + with conn: + column_definitions = generate_sqlite_columns_definitions() + conn.execute('CREATE TABLE test ({})'.format(column_definitions)) + + class LowLevelConnectionTestCase(unittest.TestCase): def test_creates_database_file_if_not_exists(self): with TemporaryDirectory() as tmp_dir: From ede1b3a70f1bedca66e4089d0105543f8031f02a Mon Sep 17 00:00:00 2001 From: David Nies Date: Mon, 25 Feb 2019 20:52:14 +0100 Subject: [PATCH 14/27] Remove database_test.test_suite This function is obsolete since tests are discovered by pytest --- pynance/database_test.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pynance/database_test.py b/pynance/database_test.py index eedbdd8..ecbf186 100644 --- a/pynance/database_test.py +++ b/pynance/database_test.py @@ -129,12 +129,3 @@ def run_test(csv_file, df_format): run_test(os.path.join('pynance', 'test_data', 'dkb_cash_sample.csv'), SupportedCsvTypes.DKBCash) run_test(os.path.join('pynance', 'test_data', 'dkb_visa_sample.csv'), SupportedCsvTypes.DKBVisa) - - -def test_suite(): - "return the test suite" - - suite = unittest.makeSuite(LowLevelConnectionTestCase) - suite.addTests(unittest.makeSuite(InsertTableTestCase)) - - return suite \ No newline at end of file From fb713456353ae2c3e8039b5f6e5756bebd1ef7a9 Mon Sep 17 00:00:00 2001 From: David Nies Date: Mon, 4 Mar 2019 21:46:18 +0100 Subject: [PATCH 15/27] Add test strategies for generating transactions dataframes --- pynance/test_stragegies/transactions.py | 68 ++++++++++++++++++++ pynance/test_stragegies/transactions_test.py | 34 ++++++++++ 2 files changed, 102 insertions(+) create mode 100644 pynance/test_stragegies/transactions.py create mode 100644 pynance/test_stragegies/transactions_test.py diff --git a/pynance/test_stragegies/transactions.py b/pynance/test_stragegies/transactions.py new file mode 100644 index 0000000..b5df0da --- /dev/null +++ b/pynance/test_stragegies/transactions.py @@ -0,0 +1,68 @@ +""" +Contains transaction test strategies. +""" + +import hypothesis.strategies as st +import datetime +import pandas as pd +import numpy as np + +KNOWN_CURRENCIES = ['EUR', 'USD'] +ALPHABET = list(map(str, 'abcdefghijklmnopqrstuvwzyz ABCDEFGHIJKLMNOPQRSTUVWZYZ0123456789äüöß')) + +@st.composite +def single_transaction(draw, min_date=None, max_date=None): + if not min_date: + min_date = datetime.date(1000,1,1) + if not max_date: + max_date = datetime.date(9999,12,31) + + date = np.datetime64(draw(st.dates(min_value=min_date, max_value=max_date))) + sender_account = draw(st.text(alphabet=ALPHABET)) + receiver_account = str(draw(st.text(alphabet=ALPHABET))) + text = str(draw(st.text(alphabet=ALPHABET))) + amount = draw(st.floats(min_value=0.01, max_value=10000000)) + total_balance = draw(st.floats(min_value=0.01, max_value=10000000)) + currency = str(draw(st.sampled_from(KNOWN_CURRENCIES))) + category = str(draw(st.text(alphabet=ALPHABET))) + tags = str(draw(st.text(alphabet=ALPHABET))) + origin = str(draw(st.text(alphabet=ALPHABET))) + + return (date, sender_account, receiver_account, text, amount, total_balance, currency, category, tags, origin) + +@st.composite +def dataframe(draw, min_size=0, max_size=None, min_date=None, max_date=None): + elements = draw(st.lists( + single_transaction(min_date=min_date, max_date=max_date), + min_size=min_size, + max_size=max_size + )) + + dates, sender_accounts, receiver_accounts, texts, amounts, total_balances, currencies, \ + categories, tagss, origins = [],[],[],[],[],[],[],[],[],[] + + for date, sender_account, receiver_account, text, amount, total_balance, currency, category, tags, origin in elements: + dates.append(date) + sender_accounts.append(sender_account) + receiver_accounts.append(receiver_account) + texts.append(text) + amounts.append(amount) + total_balances.append(total_balance) + currencies.append(currency) + categories.append(category) + tagss.append(tags) + origins.append(origin) + + + return pd.DataFrame({ + 'date': dates, + 'sender_account': sender_accounts, + 'receiver_account': receiver_accounts, + 'text': texts, + 'amount': amounts, + 'total_balance': total_balances, + 'currency': currencies, + 'category': categories, + 'tags': tagss, + 'origin': origins + }) \ No newline at end of file diff --git a/pynance/test_stragegies/transactions_test.py b/pynance/test_stragegies/transactions_test.py new file mode 100644 index 0000000..cd67607 --- /dev/null +++ b/pynance/test_stragegies/transactions_test.py @@ -0,0 +1,34 @@ +import unittest +import transactions as t +from hypothesis import given +import numpy as np +from datetime import date + +from pynance.definitions import COLUMNS + +class DataframeTestCase(unittest.TestCase): + + @given(df=t.dataframe(min_size=1, max_size=1)) + def test_has_expected_columns(self, df): + types = dict(df.dtypes) + self.assertEqual(len(types), len(COLUMNS)) + for col in COLUMNS: + self.assertTrue(col in types) + + @given(df=t.dataframe(min_size = 1, min_date=date(2000,1,1))) + def test_respects_min_date(self, df): + remaining = df['date'][df['date'] < date(2000,1,1)] + self.assertEqual(remaining.size, 0) + + @given(df=t.dataframe(min_size = 1, max_date=date(2000,1,1))) + def test_respects_max_date(self, df): + remaining = df['date'][df['date'] > date(2000,1,1)] + self.assertEqual(remaining.size, 0) + + @given(df=t.dataframe(min_size = 10)) + def test_respects_min_size(self, df): + self.assertTrue(df.size >= 10) + + @given(df=t.dataframe(max_size = 10)) + def test_respects_max_size(self): + self.assertTrue(df.size <= 10) \ No newline at end of file From df2ee629d945e74165540468d6e42bdbb522d3cf Mon Sep 17 00:00:00 2001 From: David Nies Date: Tue, 5 Mar 2019 18:23:09 +0100 Subject: [PATCH 16/27] Fix bug and improve performance of transactions strategy --- pynance/test_stragegies/transactions.py | 31 +++++++++----------- pynance/test_stragegies/transactions_test.py | 8 ++--- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/pynance/test_stragegies/transactions.py b/pynance/test_stragegies/transactions.py index b5df0da..a3e0844 100644 --- a/pynance/test_stragegies/transactions.py +++ b/pynance/test_stragegies/transactions.py @@ -12,26 +12,24 @@ @st.composite def single_transaction(draw, min_date=None, max_date=None): - if not min_date: - min_date = datetime.date(1000,1,1) - if not max_date: - max_date = datetime.date(9999,12,31) - date = np.datetime64(draw(st.dates(min_value=min_date, max_value=max_date))) - sender_account = draw(st.text(alphabet=ALPHABET)) - receiver_account = str(draw(st.text(alphabet=ALPHABET))) - text = str(draw(st.text(alphabet=ALPHABET))) - amount = draw(st.floats(min_value=0.01, max_value=10000000)) - total_balance = draw(st.floats(min_value=0.01, max_value=10000000)) - currency = str(draw(st.sampled_from(KNOWN_CURRENCIES))) - category = str(draw(st.text(alphabet=ALPHABET))) - tags = str(draw(st.text(alphabet=ALPHABET))) - origin = str(draw(st.text(alphabet=ALPHABET))) + # As a performance optimization, we don't generate each column individually and reuse + # already generated values. If we don't do this, test generation is too slow and + # Hypothesis' HealthChecks make the tests fail + d = draw(st.dates(min_value=min_date, max_value=max_date)) + text = draw(st.text(alphabet=ALPHABET)) + floats = draw(st.floats(min_value=0.01, max_value=10000000)) + currency = draw(st.sampled_from(KNOWN_CURRENCIES)) - return (date, sender_account, receiver_account, text, amount, total_balance, currency, category, tags, origin) + return (d, text, text, text, floats, floats, currency, text, text, text) @st.composite def dataframe(draw, min_size=0, max_size=None, min_date=None, max_date=None): + if not min_date: + min_date = datetime.date(1000,1,1) + if not max_date: + max_date = datetime.date(9999,12,31) + elements = draw(st.lists( single_transaction(min_date=min_date, max_date=max_date), min_size=min_size, @@ -64,5 +62,4 @@ def dataframe(draw, min_size=0, max_size=None, min_date=None, max_date=None): 'currency': currencies, 'category': categories, 'tags': tagss, - 'origin': origins - }) \ No newline at end of file + 'origin': origins }) \ No newline at end of file diff --git a/pynance/test_stragegies/transactions_test.py b/pynance/test_stragegies/transactions_test.py index cd67607..c62e9df 100644 --- a/pynance/test_stragegies/transactions_test.py +++ b/pynance/test_stragegies/transactions_test.py @@ -27,8 +27,8 @@ def test_respects_max_date(self, df): @given(df=t.dataframe(min_size = 10)) def test_respects_min_size(self, df): - self.assertTrue(df.size >= 10) + self.assertGreaterEqual(len(df), 10) - @given(df=t.dataframe(max_size = 10)) - def test_respects_max_size(self): - self.assertTrue(df.size <= 10) \ No newline at end of file + @given(t.dataframe(max_size = 10)) + def test_respects_max_size(self, df): + self.assertLessEqual(len(df), 10) \ No newline at end of file From f73abc8cc658345c8ec6cc4753d3b6c7a874eb9d Mon Sep 17 00:00:00 2001 From: David Nies Date: Tue, 12 Mar 2019 23:02:50 +0100 Subject: [PATCH 17/27] Add stub for StorageClass and its tests --- pynance/database.py | 33 +++++++++++++++++++++++++++++++-- pynance/database_test.py | 27 +++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/pynance/database.py b/pynance/database.py index d8fffb2..9e4768f 100644 --- a/pynance/database.py +++ b/pynance/database.py @@ -1,3 +1,7 @@ +""" +Explain the classes briefly. Elaborate on Storage +""" + import sqlite3 import numpy as np from .definitions import COLUMNS @@ -30,7 +34,6 @@ def generate_sqlite_columns_definitions(): } def name_type_to_string(x): - # print('foooo: {}'.format(x)) col_name, col_type = x if col_type not in type_lookup_dict: raise ValueError("Don't know which sqlite type '{}' is".format(col_type)) @@ -142,4 +145,30 @@ def __exit__(self, _1, _2, _3): "Make sure the table is gone." self.conn.cursor().execute('DROP TABLE {}.{}'.format( self.temp_table_schema, self.temp_table_name - )) \ No newline at end of file + )) + + +class Storage(object): + + def __init__(self, db_file): + pass + + @classmethod + def validate_dataframe_shape(cls, data_frame): + """ + asserts that the correct columns are present. Tollerates that additional columns are present + """ + pass + + def append_dataframe(self, data_frame): + """ + asserts that the shape of the dataframe is correct + returns the part of the dataframe that is new. This part has also an ID column + """ + pass + + def load_dataframe(self): + """ + loads from db. contains ID column + """ + pass \ No newline at end of file diff --git a/pynance/database_test.py b/pynance/database_test.py index ecbf186..f06ba37 100644 --- a/pynance/database_test.py +++ b/pynance/database_test.py @@ -129,3 +129,30 @@ def run_test(csv_file, df_format): run_test(os.path.join('pynance', 'test_data', 'dkb_cash_sample.csv'), SupportedCsvTypes.DKBCash) run_test(os.path.join('pynance', 'test_data', 'dkb_visa_sample.csv'), SupportedCsvTypes.DKBVisa) + + +class StorageTestCase(unittest.TestCase): + + def test_validate_dataframe_shape_complains_when_columns_are_missing(self): + "Assertion when columns are missing" + pass + + def test_validate_dataframe_shape_accepts_aditional_columns(self): + "Does not compain when aditional columns are present" + pass + + def test_append_dataframe_rejects_invalid_dataframes(self): + pass + + def test_append_dataframe_returns_new_parts_with_id(self): + pass + + def test_append_dataframe_returned_ids_are_the_same_as_in_load_dataframe(self): + pass + + def test_append_dataframe_duplicats_are_left_out(self): + pass + + def test_load_dataframe_works_with_new_storage_instance(self): + "implies new conn etc..." + pass \ No newline at end of file From cc635473b86b7216d83b04b3030a9aab793ef52e Mon Sep 17 00:00:00 2001 From: Fabian Meyer Date: Sat, 16 Mar 2019 00:06:54 +0100 Subject: [PATCH 18/27] basic tests for storage facade --- pynance/storage_test.py | 121 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 pynance/storage_test.py diff --git a/pynance/storage_test.py b/pynance/storage_test.py new file mode 100644 index 0000000..c1dc80a --- /dev/null +++ b/pynance/storage_test.py @@ -0,0 +1,121 @@ +import unittest +import os + +import numpy as np +import pandas as pd +from pandas.testing import assert_frame_equal + +from .database import Storage +from .textimporter import read_csv +from .dkb import SupportedCsvTypes +from .definitions import COLUMNS + + +class StorageTestCase(unittest.TestCase): + def _read_dummy_file_dkbcash_small(self): + dummyfile_dkbcash_small = os.path.join("pynance", + "test_data", + "dkb_cash_sample.csv") + assert os.path.isfile(dummyfile_dkbcash_small) + + return read_csv(dummyfile_dkbcash_small, + SupportedCsvTypes.DKBCash) + + def _read_dummy_file_dkbvisa_small(self): + dummyfile_dkbvisa_small = os.path.join("pynance", + "test_data", + "dkb_visa_sample.csv") + assert os.path.isfile(dummyfile_dkbvisa_small) + + return read_csv(dummyfile_dkbvisa_small, + SupportedCsvTypes.DKBVisa) + + def _assert_frame_relevant_columns_equal(self, df1, df2): + assert_frame_equal(df1[COLUMNS], df2[COLUMNS]) + + def _delete_temp_db_file(self): + if os.path.exists(self.db_file): + os.remove(self.db_file) + + def setUp(self): + self.db_file = os.path.join("test_data", "test.sqlite") + + def test_init_storage(self): + storage = Storage(self.db_file) + assert storage is not None + + def test_init_storage_creates_file(self): + # delete file to make sure starting from scratch + self._delete_temp_db_file() + + _ = Storage(self.db_file) + assert os.path.exists(self.db_file) + + def test_append_dataframe_dkb_cash_small(self): + # delete file to make sure starting from scratch + self._delete_temp_db_file() + + storage = Storage(self.db_file) + df = self._read_dummy_file_dkbcash_small() + newdf = storage.append_dataframe(df) + + self._assert_frame_relevant_columns_equal(df, newdf) + + def test_append_dataframe_dkb_cash_and_visa(self): + # delete file to make sure starting from scratch + self._delete_temp_db_file() + + storage = Storage(self.db_file) + df_cash = self._read_dummy_file_dkbcash_small() + df_visa = self._read_dummy_file_dkbcash_small() + + storage.append_dataframe(df_cash) + storage.append_dataframe(df_visa) + + df_loaded = storage.load_dataframe() + + df_expected = df_cash.append(df_visa).sort_values(by="date", + ascending=False) + + self._assert_frame_relevant_columns_equal(df_loaded, df_expected) + + def test_load_dataframe(self): + # delete file to make sure starting from scratch + self._delete_temp_db_file() + + storage = Storage(self.db_file) + df = self._read_dummy_file_dkbcash_small() + newdf = storage.append_dataframe(df) + loaded_df = storage.load_dataframe() + + self._assert_frame_relevant_columns_equal(df, loaded_df) + + def test_append_dataframe_ignores_duplicates(self): + # delete file to make sure starting from scratch + self._delete_temp_db_file() + + storage = Storage(self.db_file) + df = self._read_dummy_file_dkbcash_small() + + # appending twice + newdf = storage.append_dataframe(df) + newdf2 = storage.append_dataframe(df) + + loaded_df = storage.load_dataframe() + + self._assert_frame_relevant_columns_equal(df, loaded_df) + + def test_append_invalid_dataframe_fails(self): + random_df = pd.DataFrame(np.random.randn(100, 2), + columns=['colA', 'colB']) + + storage = Storage(self.db_file) + + def append_invalid(): + return storage.append_dataframe(random_df) + + self.assertRaises(Exception, append_invalid) + + def tearDown(self): + # remove temporary db file + self._delete_temp_db_file() From 2bc286d326891430967ef7118f5e3b7b8642cbac Mon Sep 17 00:00:00 2001 From: Fabian Meyer Date: Wed, 22 May 2019 23:00:53 +0200 Subject: [PATCH 19/27] requirements dev --- requirements-dev.txt | 21 +++++++++++++++++++++ requirements.txt | 15 +-------------- 2 files changed, 22 insertions(+), 14 deletions(-) create mode 100644 requirements-dev.txt diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..07bbe3e --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,21 @@ +pandas +numpy +dash +dash-core-components +dash-html-components + +hypothesis +codecov + +# The latest version of doit supporting Python 2 is 0.29.0 +doit==0.29.0; python_version < '3.0' +doit; python_version >= '3.0' +attrs>17.4 + +pytest==4.0.0; python_version < '3.0' +pytest-cov==2.6.0; python_version < '3.0' +pytest==4.2.0; python_version >= '3.0' +pytest-cov==2.6.1; python_version >= '3.0' + +pylint +pep8 diff --git a/requirements.txt b/requirements.txt index 8da95b6..f01f8f7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,17 +2,4 @@ pandas numpy dash dash-core-components -dash-html-components - -hypothesis -codecov - -# The latest version of doit supporting Python 2 is 0.29.0 -doit==0.29.0; python_version < '3.0' -doit; python_version >= '3.0' -attrs>17.4 - -pytest==4.0.0; python_version < '3.0' -pytest-cov==2.6.0; python_version < '3.0' -pytest==4.2.0; python_version >= '3.0' -pytest-cov==2.6.1; python_version >= '3.0' +dash-html-components \ No newline at end of file From d08aa56df1445f333ad3e47443a2fb96b862c51a Mon Sep 17 00:00:00 2001 From: Fabian Meyer Date: Thu, 23 May 2019 00:22:11 +0200 Subject: [PATCH 20/27] hash functions for dataframe --- pynance/dataframe_util.py | 13 +++++++++++++ pynance/dataframe_util_test.py | 16 ++++++++++++++++ pynance/{test_stragegies => }/transactions.py | 0 .../{test_stragegies => }/transactions_test.py | 0 4 files changed, 29 insertions(+) create mode 100644 pynance/dataframe_util.py create mode 100644 pynance/dataframe_util_test.py rename pynance/{test_stragegies => }/transactions.py (100%) rename pynance/{test_stragegies => }/transactions_test.py (100%) diff --git a/pynance/dataframe_util.py b/pynance/dataframe_util.py new file mode 100644 index 0000000..b10b701 --- /dev/null +++ b/pynance/dataframe_util.py @@ -0,0 +1,13 @@ +from .definitions import COLUMNS +from hashlib import md5 + + +def hash_row(row): + h = md5() + for value in row: + h.update(bytes(str(value), encoding='utf8')) + return h.hexdigest() + + +def create_id_hash(new_df): + return new_df.apply(hash_row, axis=1) diff --git a/pynance/dataframe_util_test.py b/pynance/dataframe_util_test.py new file mode 100644 index 0000000..f302124 --- /dev/null +++ b/pynance/dataframe_util_test.py @@ -0,0 +1,16 @@ +import unittest +from hypothesis import given +from datetime import date + +from .transactions import dataframe +from .dataframe_util import hash_row, create_id_hash + + +class DataframeUtilTestcase(unittest.TestCase): + + @given(df=dataframe(min_size=1, max_date=date(2000, 1, 1))) + def test_hash_row(self, df): + for i, row in df.iterrows(): + hash_result = hash_row(row) + self.assertEqual(type(hash_result), str) + self.assertEqual(type(hash_result), str) diff --git a/pynance/test_stragegies/transactions.py b/pynance/transactions.py similarity index 100% rename from pynance/test_stragegies/transactions.py rename to pynance/transactions.py diff --git a/pynance/test_stragegies/transactions_test.py b/pynance/transactions_test.py similarity index 100% rename from pynance/test_stragegies/transactions_test.py rename to pynance/transactions_test.py From 533cb9bd4b77f92b42f846f363d8734183c61173 Mon Sep 17 00:00:00 2001 From: Fabian Meyer Date: Thu, 23 May 2019 00:27:13 +0200 Subject: [PATCH 21/27] extended dataframe hash util tests --- pynance/dataframe_util_test.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pynance/dataframe_util_test.py b/pynance/dataframe_util_test.py index f302124..a24a32e 100644 --- a/pynance/dataframe_util_test.py +++ b/pynance/dataframe_util_test.py @@ -13,4 +13,13 @@ def test_hash_row(self, df): for i, row in df.iterrows(): hash_result = hash_row(row) self.assertEqual(type(hash_result), str) - self.assertEqual(type(hash_result), str) + self.assertEqual(len(hash_result), 32) + + @given(df=dataframe(min_size=1, max_date=date(2000, 1, 1))) + def test_create_id_hash(self, df): + result_hash_column = create_id_hash(df) + self.assertEqual(len(result_hash_column), len(df)) + + for item in result_hash_column: + self.assertEqual(type(item), str) + self.assertEqual(len(item), 32) From 663c329a185c45c3be3784f87177b56eb82afe2f Mon Sep 17 00:00:00 2001 From: Fabian Meyer Date: Thu, 23 May 2019 00:32:40 +0200 Subject: [PATCH 22/27] started implementing storage with additional hash column for duplicate detection --- pynance/database.py | 78 +++++++++++++++++++++++++---------------- pynance/definitions.py | 15 ++++---- pynance/textimporter.py | 5 +-- requirements-dev.txt | 1 + 4 files changed, 61 insertions(+), 38 deletions(-) diff --git a/pynance/database.py b/pynance/database.py index 9e4768f..197b6a6 100644 --- a/pynance/database.py +++ b/pynance/database.py @@ -16,14 +16,15 @@ def exists_table(conn, table_name): 'select count(*) from sqlite_master where type="table" and name="{}"'.format(table_name) ).fetchall() return result[0][0] == 1 - + + def generate_sqlite_columns_definitions(): """ Converts definitions.COLUMNS into the column definitions of a sqlite table. By column definitions, we mean the part of a CREATE TABLE statement that defines the columns: - + CREATE TABLE my_table_name () - + Returns the column definitions as string """ @@ -36,15 +37,17 @@ def generate_sqlite_columns_definitions(): def name_type_to_string(x): col_name, col_type = x if col_type not in type_lookup_dict: - raise ValueError("Don't know which sqlite type '{}' is".format(col_type)) - return '{} {}'.format(col_name, type_lookup_dict[col_type]) + raise ValueError( + "Don't know which sqlite type '{}' is".format(col_type)) + return '{} {}'.format(col_name, type_lookup_dict[col_type]) return ', '.join(map(name_type_to_string, COLUMNS.items())) + class LowLevelConnection(object): """ Class that handles low-level database connection. Makes sure the expected table strucutre exists. - Should be used in with-statements. + Should be used in with-statements. """ # Schema evolution should be handled later once it is needed @@ -63,7 +66,7 @@ def _get_db_conn(self): """ return sqlite3.connect( self.db_file_name, - isolation_level = 'DEFERRED' + isolation_level='DEFERRED' ) def __init__(self, schema_version, db_file_name): @@ -78,8 +81,10 @@ def __init__(self, schema_version, db_file_name): connection = self._get_db_conn() with connection: if not exists_table(connection, LowLevelConnection.TABLE_SCHEMA_VERSION): - connection.execute('CREATE TABLE IF NOT EXISTS {} (version INTEGER)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)) - connection.execute('INSERT INTO {} VALUES (1)'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)) + connection.execute('CREATE TABLE IF NOT EXISTS {} (version INTEGER)'.format( + LowLevelConnection.TABLE_SCHEMA_VERSION)) + connection.execute('INSERT INTO {} VALUES (1)'.format( + LowLevelConnection.TABLE_SCHEMA_VERSION)) if not exists_table(connection, LowLevelConnection.TABLE_TRANSACTIONS): connection.execute('CREATE TABLE IF NOT EXISTS {} ({}, {})'.format( @@ -87,12 +92,13 @@ def __init__(self, schema_version, db_file_name): LowLevelConnection.TABLE_TRANSACTIONS_ID, generate_sqlite_columns_definitions() )) - connection.execute('CREATE INDEX date_index ON {} ({})'.format(LowLevelConnection.TABLE_TRANSACTIONS, 'date')) - + connection.execute('CREATE INDEX date_index ON {} ({})'.format( + LowLevelConnection.TABLE_TRANSACTIONS, 'date')) + def __enter__(self): self.conn = self._get_db_conn() return self.conn - + def __exit__(self, _1, _2, _3): self.conn.close() @@ -103,7 +109,7 @@ class InsertTable(object): It also makes sure that the temporary table is created in a safe way and disposed afterwards. For this purpuse, instances of this class should be used in with statements. """ - + @staticmethod def create_temp_table(conn): """Creates temporary table suitable for inserting the DataFrame and returns its name.""" @@ -115,32 +121,33 @@ def create_temp_table(conn): go_on = False table_name = 'insert_df_{}'.format(i) try: - cursor.execute('CREATE TEMPORARY TABLE {} ({})'.format( - table_name, + cursor.execute('CREATE TEMPORARY TABLE {} ({})'.format( + table_name, generate_sqlite_columns_definitions() )) except sqlite3.OperationalError: go_on = True i += 1 - + return 'temp', table_name def __init__(self, conn, data_frame): "uses conn, fetches everything from 'data_frame' into a temporary table" - + self.conn = conn - self.temp_table_schema, self.temp_table_name = InsertTable.create_temp_table(conn) + self.temp_table_schema, self.temp_table_name = InsertTable.create_temp_table( + conn) data_frame.to_sql( - name=self.temp_table_name, + name=self.temp_table_name, schema=self.temp_table_schema, index=False, - con=conn, + con=conn, chunksize=5000 ) - + def __enter__(self): return (self.temp_table_schema, self.temp_table_name) - + def __exit__(self, _1, _2, _3): "Make sure the table is gone." self.conn.cursor().execute('DROP TABLE {}.{}'.format( @@ -149,26 +156,37 @@ def __exit__(self, _1, _2, _3): class Storage(object): - + def __init__(self, db_file): - pass - + self.db_file = db_file + @classmethod def validate_dataframe_shape(cls, data_frame): """ - asserts that the correct columns are present. Tollerates that additional columns are present + asserts that the correct columns are present. + Tolerates that additional columns are present """ pass - + def append_dataframe(self, data_frame): """ asserts that the shape of the dataframe is correct returns the part of the dataframe that is new. This part has also an ID column """ - pass - + if not self.validate_dataframe_shape(data_frame): + raise Exception('Invalid dataframe') + + with LowLevelConnection(1, self.db_file) as conn: + with InsertTable(conn, data_frame) as insert_table: + # add existing data to insert_table + with conn: + conn.cursor().execute('INSERT INTO %s ') + # but only non-duplicates + # replace existing table by insert_table + pass + def load_dataframe(self): """ loads from db. contains ID column """ - pass \ No newline at end of file + pass diff --git a/pynance/definitions.py b/pynance/definitions.py index a8e0c7b..9b97d27 100644 --- a/pynance/definitions.py +++ b/pynance/definitions.py @@ -5,9 +5,7 @@ import numpy as np -# see issue #5 and #6 -# use numpy types for numbers, because that's what pandas likes -COLUMNS = { +IMMUTABLE_COLUMNS = { "date": np.datetime64, "sender_account": str, "receiver_account": str, @@ -15,6 +13,11 @@ "amount": np.float64, "total_balance": np.float64, "currency": str, - "category": str, - "tags": str, - "origin": str} \ No newline at end of file + "origin": str +} + +# see issue #5 and #6 +# use numpy types for numbers, because that's what pandas likes +COLUMNS = dict(id=str, + category=str, + tags=str, **IMMUTABLE_COLUMNS) diff --git a/pynance/textimporter.py b/pynance/textimporter.py index 92a4cd7..9cf06a1 100644 --- a/pynance/textimporter.py +++ b/pynance/textimporter.py @@ -5,6 +5,7 @@ import numpy as np from .definitions import COLUMNS +from .dataframe_util import create_id_hash def read_csv(filepath_or_buffer, description): @@ -81,7 +82,7 @@ def read_csv(filepath_or_buffer, description): amounts = new_df['amount'].values new_df['total_balance'] = amounts_to_balances(amounts, final_total_balance) - + new_df['id'] = create_id_hash(new_df) return new_df @@ -199,4 +200,4 @@ class UnsupportedCsvFormatException(IOError): An error that occurs, if the importer is asked to read a CSV file with a setting that does not fit the actual file """ - pass \ No newline at end of file + pass diff --git a/requirements-dev.txt b/requirements-dev.txt index 07bbe3e..eecf161 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -19,3 +19,4 @@ pytest-cov==2.6.1; python_version >= '3.0' pylint pep8 +autopep8 \ No newline at end of file From c2ac19f47b5a55ac88be50b49b8b7fc142bc253a Mon Sep 17 00:00:00 2001 From: Fabian Meyer Date: Sun, 26 May 2019 22:04:06 +0200 Subject: [PATCH 23/27] updated dash response tests --- pynance/dash_viz/plot_flow_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pynance/dash_viz/plot_flow_test.py b/pynance/dash_viz/plot_flow_test.py index deb4906..d130556 100644 --- a/pynance/dash_viz/plot_flow_test.py +++ b/pynance/dash_viz/plot_flow_test.py @@ -32,7 +32,7 @@ def test_onselect_csvtype(self): for expected, selected in zip(onselect_response, dropdown_values): response = onselect_csvtype(selected) - response_dict = json.loads(response.data.decode()) + response_dict = json.loads(response) # .data.decode()) is_enabled = not response_dict["response"]["props"]["disabled"] self.assertEqual(expected, is_enabled) @@ -140,7 +140,7 @@ def test_update_output(self): bytestr = self._read_sample_file_like_uploaded() response = update_output(bytestr, "DKBCash") - response_dict = json.loads(response.data.decode()) + response_dict = json.loads(response) # .data.decode()) res_charts = response_dict["response"]["props"]["figure"]["data"] From 13894666864837d1508572133f5b2f6cd5586bda Mon Sep 17 00:00:00 2001 From: Fabian Meyer Date: Sun, 26 May 2019 22:05:06 +0200 Subject: [PATCH 24/27] removed py27 backwards compatibility for temp dir --- pynance/database_test.py | 73 +++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 34 deletions(-) diff --git a/pynance/database_test.py b/pynance/database_test.py index f06ba37..b3bce49 100644 --- a/pynance/database_test.py +++ b/pynance/database_test.py @@ -1,7 +1,7 @@ import unittest import os.path import shutil -from tempfile import mkdtemp +from tempfile import TemporaryDirectory, TemporaryFile import sqlite3 from pynance.database import generate_sqlite_columns_definitions, \ @@ -9,26 +9,21 @@ from pynance.textimporter import read_csv from pynance.dkb import SupportedCsvTypes -class TemporaryDirectory(object): - def __enter__(self): - self.dir = mkdtemp() - return self.dir - - def __exit__(self, _1, _2, _3): - shutil.rmtree(self.dir) class ColumnsDefinitionsTestCase(unittest.TestCase): def test_it_produces_valid_string(self): result = generate_sqlite_columns_definitions() self.assertEqual(type(result), str) self.assertTrue(len(result) > 0) - + def test_it_produces_valid_sql_types(self): with TemporaryDirectory() as tmp_dir: - conn = sqlite3.connect(os.path.join(tmp_dir, 'test.db')) - with conn: - column_definitions = generate_sqlite_columns_definitions() - conn.execute('CREATE TABLE test ({})'.format(column_definitions)) + tmp_file = os.path.join(tmp_dir, 'test.db') + conn = sqlite3.connect(tmp_file) + column_definitions = generate_sqlite_columns_definitions() + query = 'CREATE TABLE test ({})'.format(column_definitions) + conn.execute(query) + conn.close() class LowLevelConnectionTestCase(unittest.TestCase): @@ -39,29 +34,31 @@ def test_creates_database_file_if_not_exists(self): with LowLevelConnection(1, db_file) as _: pass self.assertTrue(os.path.exists(db_file)) - + def test_opens_connection(self): with TemporaryDirectory() as tmp_dir: with LowLevelConnection(1, os.path.join(tmp_dir, 'test.db')) as conn: self.assertIsNotNone(conn) - + def test_creates_expected_tables(self): with TemporaryDirectory() as tmp_dir: with LowLevelConnection(1, os.path.join(tmp_dir, 'test.db')) as conn: cursor = conn.cursor() tables = set(map( lambda x: x[0], - cursor.execute('select name from sqlite_master where type="table"').fetchall() + cursor.execute( + 'select name from sqlite_master where type="table"').fetchall() )) self.assertEqual( tables, - set([LowLevelConnection.TABLE_SCHEMA_VERSION,LowLevelConnection.TABLE_TRANSACTIONS - ])) + set([LowLevelConnection.TABLE_SCHEMA_VERSION, LowLevelConnection.TABLE_TRANSACTIONS + ])) self.assertEqual( [(1,)], - cursor.execute('select count(*) from {}'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)).fetchall() + cursor.execute( + 'select count(*) from {}'.format(LowLevelConnection.TABLE_SCHEMA_VERSION)).fetchall() ) - + def test_works_on_same_database_twice(self): with TemporaryDirectory() as tmp_dir: db_name = os.path.join(tmp_dir, 'test.db') @@ -73,6 +70,7 @@ def test_works_on_same_database_twice(self): .fetchall() self.assertEqual(1, result[0][0]) + class InsertTableTestCase(unittest.TestCase): def test_create_temp_table_table_exists(self): @@ -81,17 +79,19 @@ def test_create_temp_table_table_exists(self): table_schema, table_name = InsertTable.create_temp_table(conn) # Fails if and only if table does not exist conn.cursor().execute('select count(*) from {}.{}'.format(table_schema, table_name)) - + def test_create_temp_table_choses_other_table_if_exists(self): with TemporaryDirectory() as tmp_dir: with LowLevelConnection(1, os.path.join(tmp_dir, 'test.db')) as conn: conn.cursor().execute('CREATE TEMPORARY TABLE insert_df_0 (id INT)') table_schema, table_name = InsertTable.create_temp_table(conn) self.assertEqual(table_schema, 'temp') - self.assertEqual(table_name, 'insert_df_1', 'expected table creation to fail exactly the first time') - + self.assertEqual( + table_name, 'insert_df_1', 'expected table creation to fail exactly the first time') + def test_it_removes_the_temporary_table(self): - test_data_frame = read_csv(os.path.join('pynance', 'test_data', 'dkb_cash_sample.csv'), SupportedCsvTypes.DKBCash) + test_data_frame = read_csv(os.path.join( + 'pynance', 'test_data', 'dkb_cash_sample.csv'), SupportedCsvTypes.DKBCash) # TODO: get rid of the 'drop' here test_data_frame = test_data_frame.drop(['origin'], axis=1) with TemporaryDirectory() as tmp_dir: @@ -102,10 +102,12 @@ def check_if_table_exists(): conn.cursor().execute('select count(*) from {}'.format(insert_table_with_schema)) with InsertTable(conn, test_data_frame) as insert_table: - insert_table_with_schema = '{}.{}'.format(insert_table[0], insert_table[1]) + insert_table_with_schema = '{}.{}'.format( + insert_table[0], insert_table[1]) check_if_table_exists() - - self.assertRaises(sqlite3.OperationalError, check_if_table_exists) + + self.assertRaises(sqlite3.OperationalError, + check_if_table_exists) def test_it_works_with_dataframes_from_text_importer(self): def run_test(csv_file, df_format): @@ -115,7 +117,7 @@ def run_test(csv_file, df_format): # in the database as well. data_frame = read_csv(csv_file, df_format).drop(['origin'], axis=1) self.assertTrue(len(data_frame.index) > 0) - + # Load it into the InserTable and test this with TemporaryDirectory() as tmp_dir: with LowLevelConnection(1, os.path.join(tmp_dir, 'test.db')) as conn: @@ -125,10 +127,13 @@ def run_test(csv_file, df_format): database_rows = conn.cursor() \ .execute('SELECT count(*) FROM {}.{}'.format(insert_table[0], insert_table[1])).fetchall()[0][0] - self.assertEqual(data_frame_size, database_rows, 'not all (or more?) rows written to database') + self.assertEqual( + data_frame_size, database_rows, 'not all (or more?) rows written to database') - run_test(os.path.join('pynance', 'test_data', 'dkb_cash_sample.csv'), SupportedCsvTypes.DKBCash) - run_test(os.path.join('pynance', 'test_data', 'dkb_visa_sample.csv'), SupportedCsvTypes.DKBVisa) + run_test(os.path.join('pynance', 'test_data', + 'dkb_cash_sample.csv'), SupportedCsvTypes.DKBCash) + run_test(os.path.join('pynance', 'test_data', + 'dkb_visa_sample.csv'), SupportedCsvTypes.DKBVisa) class StorageTestCase(unittest.TestCase): @@ -140,7 +145,7 @@ def test_validate_dataframe_shape_complains_when_columns_are_missing(self): def test_validate_dataframe_shape_accepts_aditional_columns(self): "Does not compain when aditional columns are present" pass - + def test_append_dataframe_rejects_invalid_dataframes(self): pass @@ -152,7 +157,7 @@ def test_append_dataframe_returned_ids_are_the_same_as_in_load_dataframe(self): def test_append_dataframe_duplicats_are_left_out(self): pass - + def test_load_dataframe_works_with_new_storage_instance(self): "implies new conn etc..." - pass \ No newline at end of file + pass From ae801bca949aa9bdd3982564da3128f8775623ce Mon Sep 17 00:00:00 2001 From: Fabian Meyer Date: Sun, 26 May 2019 22:05:43 +0200 Subject: [PATCH 25/27] rename id to row_key as primary key --- pynance/database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pynance/database.py b/pynance/database.py index 197b6a6..8273a4c 100644 --- a/pynance/database.py +++ b/pynance/database.py @@ -55,7 +55,7 @@ class LowLevelConnection(object): TABLE_SCHEMA_VERSION = 'schema' TABLE_TRANSACTIONS = 'transactions' - TABLE_TRANSACTIONS_ID = 'id INTEGER PRIMARY KEY' + TABLE_TRANSACTIONS_ID = 'row_key INTEGER PRIMARY KEY' def _get_db_conn(self): """ From 74cb304d7c5f61e413baf4fde41cb4de65865c6f Mon Sep 17 00:00:00 2001 From: Fabian Meyer Date: Sun, 26 May 2019 22:06:16 +0200 Subject: [PATCH 26/27] added id column in strategy dataframes --- pynance/transactions.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/pynance/transactions.py b/pynance/transactions.py index a3e0844..cd3eb34 100644 --- a/pynance/transactions.py +++ b/pynance/transactions.py @@ -2,13 +2,17 @@ Contains transaction test strategies. """ -import hypothesis.strategies as st +import hypothesis.strategies as st import datetime import pandas as pd import numpy as np +from .dataframe_util import create_id_hash + KNOWN_CURRENCIES = ['EUR', 'USD'] -ALPHABET = list(map(str, 'abcdefghijklmnopqrstuvwzyz ABCDEFGHIJKLMNOPQRSTUVWZYZ0123456789äüöß')) +ALPHABET = list( + map(str, 'abcdefghijklmnopqrstuvwzyz ABCDEFGHIJKLMNOPQRSTUVWZYZ0123456789äüöß')) + @st.composite def single_transaction(draw, min_date=None, max_date=None): @@ -23,21 +27,22 @@ def single_transaction(draw, min_date=None, max_date=None): return (d, text, text, text, floats, floats, currency, text, text, text) + @st.composite def dataframe(draw, min_size=0, max_size=None, min_date=None, max_date=None): if not min_date: - min_date = datetime.date(1000,1,1) + min_date = datetime.date(1000, 1, 1) if not max_date: - max_date = datetime.date(9999,12,31) + max_date = datetime.date(9999, 12, 31) elements = draw(st.lists( single_transaction(min_date=min_date, max_date=max_date), - min_size=min_size, - max_size=max_size + min_size=min_size, + max_size=max_size )) dates, sender_accounts, receiver_accounts, texts, amounts, total_balances, currencies, \ - categories, tagss, origins = [],[],[],[],[],[],[],[],[],[] + categories, tagss, origins = [], [], [], [], [], [], [], [], [], [] for date, sender_account, receiver_account, text, amount, total_balance, currency, category, tags, origin in elements: dates.append(date) @@ -51,8 +56,7 @@ def dataframe(draw, min_size=0, max_size=None, min_date=None, max_date=None): tagss.append(tags) origins.append(origin) - - return pd.DataFrame({ + result_frame = pd.DataFrame({ 'date': dates, 'sender_account': sender_accounts, 'receiver_account': receiver_accounts, @@ -62,4 +66,10 @@ def dataframe(draw, min_size=0, max_size=None, min_date=None, max_date=None): 'currency': currencies, 'category': categories, 'tags': tagss, - 'origin': origins }) \ No newline at end of file + 'origin': origins}) + + hash_column = create_id_hash(result_frame) + + result_frame['id'] = hash_column + + return result_frame From f8cac63a4def4218444ea4f673269d94d72f3566 Mon Sep 17 00:00:00 2001 From: Fabian Meyer Date: Sun, 26 May 2019 23:01:18 +0200 Subject: [PATCH 27/27] fixed tests for database, id as key column --- pynance/database.py | 26 +++++++++++++++++++------- pynance/storage_test.py | 27 +++++++-------------------- pynance/transactions_test.py | 21 +++++++++++---------- 3 files changed, 37 insertions(+), 37 deletions(-) diff --git a/pynance/database.py b/pynance/database.py index 8273a4c..615d1c8 100644 --- a/pynance/database.py +++ b/pynance/database.py @@ -55,7 +55,7 @@ class LowLevelConnection(object): TABLE_SCHEMA_VERSION = 'schema' TABLE_TRANSACTIONS = 'transactions' - TABLE_TRANSACTIONS_ID = 'row_key INTEGER PRIMARY KEY' + ID_COLUMN = 'id' def _get_db_conn(self): """ @@ -87,13 +87,14 @@ def __init__(self, schema_version, db_file_name): LowLevelConnection.TABLE_SCHEMA_VERSION)) if not exists_table(connection, LowLevelConnection.TABLE_TRANSACTIONS): - connection.execute('CREATE TABLE IF NOT EXISTS {} ({}, {})'.format( + connection.execute('CREATE TABLE IF NOT EXISTS {} ({})'.format( LowLevelConnection.TABLE_TRANSACTIONS, - LowLevelConnection.TABLE_TRANSACTIONS_ID, generate_sqlite_columns_definitions() )) connection.execute('CREATE INDEX date_index ON {} ({})'.format( LowLevelConnection.TABLE_TRANSACTIONS, 'date')) + connection.execute('CREATE INDEX id ON {} ({})'.format( + LowLevelConnection.TABLE_TRANSACTIONS, LowLevelConnection.ID_COLUMN)) def __enter__(self): self.conn = self._get_db_conn() @@ -163,10 +164,10 @@ def __init__(self, db_file): @classmethod def validate_dataframe_shape(cls, data_frame): """ - asserts that the correct columns are present. + asserts that the correct columns are present. Tolerates that additional columns are present """ - pass + return True def append_dataframe(self, data_frame): """ @@ -180,10 +181,21 @@ def append_dataframe(self, data_frame): with InsertTable(conn, data_frame) as insert_table: # add existing data to insert_table with conn: - conn.cursor().execute('INSERT INTO %s ') + column_keys = COLUMNS.keys() + columns_str = ','.join(column_keys) + conn.cursor().execute( + ''' + INSERT INTO %s + SELECT %s + FROM %s + ON CONFLICT (%s) DO NOTHING + ''' % (insert_table, + columns_str, + LowLevelConnection.TABLE_TRANSACTIONS, + LowLevelConnection.ID_COLUMN)) + conn.close() # but only non-duplicates # replace existing table by insert_table - pass def load_dataframe(self): """ diff --git a/pynance/storage_test.py b/pynance/storage_test.py index c1dc80a..562662c 100644 --- a/pynance/storage_test.py +++ b/pynance/storage_test.py @@ -4,6 +4,8 @@ import numpy as np import pandas as pd from pandas.testing import assert_frame_equal +from tempfile import TemporaryDirectory + from .database import Storage from .textimporter import read_csv @@ -33,27 +35,19 @@ def _read_dummy_file_dkbvisa_small(self): def _assert_frame_relevant_columns_equal(self, df1, df2): assert_frame_equal(df1[COLUMNS], df2[COLUMNS]) - def _delete_temp_db_file(self): - if os.path.exists(self.db_file): - os.remove(self.db_file) - def setUp(self): - self.db_file = os.path.join("test_data", "test.sqlite") + self.tempdir = TemporaryDirectory() + self.db_file = os.path.join(self.tempdir.name, "test.sqlite") + + def tearDown(self): + self.tempdir.cleanup() def test_init_storage(self): storage = Storage(self.db_file) assert storage is not None - def test_init_storage_creates_file(self): - # delete file to make sure starting from scratch - self._delete_temp_db_file() - - _ = Storage(self.db_file) - assert os.path.exists(self.db_file) - def test_append_dataframe_dkb_cash_small(self): # delete file to make sure starting from scratch - self._delete_temp_db_file() storage = Storage(self.db_file) df = self._read_dummy_file_dkbcash_small() @@ -63,7 +57,6 @@ def test_append_dataframe_dkb_cash_small(self): def test_append_dataframe_dkb_cash_and_visa(self): # delete file to make sure starting from scratch - self._delete_temp_db_file() storage = Storage(self.db_file) df_cash = self._read_dummy_file_dkbcash_small() @@ -81,7 +74,6 @@ def test_append_dataframe_dkb_cash_and_visa(self): def test_load_dataframe(self): # delete file to make sure starting from scratch - self._delete_temp_db_file() storage = Storage(self.db_file) df = self._read_dummy_file_dkbcash_small() @@ -92,7 +84,6 @@ def test_load_dataframe(self): def test_append_dataframe_ignores_duplicates(self): # delete file to make sure starting from scratch - self._delete_temp_db_file() storage = Storage(self.db_file) df = self._read_dummy_file_dkbcash_small() @@ -115,7 +106,3 @@ def append_invalid(): return storage.append_dataframe(random_df) self.assertRaises(Exception, append_invalid) - - def tearDown(self): - # remove temporary db file - self._delete_temp_db_file() diff --git a/pynance/transactions_test.py b/pynance/transactions_test.py index c62e9df..1709b56 100644 --- a/pynance/transactions_test.py +++ b/pynance/transactions_test.py @@ -1,34 +1,35 @@ import unittest -import transactions as t +from .transactions import dataframe from hypothesis import given import numpy as np from datetime import date from pynance.definitions import COLUMNS + class DataframeTestCase(unittest.TestCase): - @given(df=t.dataframe(min_size=1, max_size=1)) + @given(df=dataframe(min_size=1, max_size=1)) def test_has_expected_columns(self, df): types = dict(df.dtypes) self.assertEqual(len(types), len(COLUMNS)) for col in COLUMNS: self.assertTrue(col in types) - - @given(df=t.dataframe(min_size = 1, min_date=date(2000,1,1))) + + @given(df=dataframe(min_size=1, min_date=date(2000, 1, 1))) def test_respects_min_date(self, df): - remaining = df['date'][df['date'] < date(2000,1,1)] + remaining = df['date'][df['date'] < date(2000, 1, 1)] self.assertEqual(remaining.size, 0) - @given(df=t.dataframe(min_size = 1, max_date=date(2000,1,1))) + @given(df=dataframe(min_size=1, max_date=date(2000, 1, 1))) def test_respects_max_date(self, df): - remaining = df['date'][df['date'] > date(2000,1,1)] + remaining = df['date'][df['date'] > date(2000, 1, 1)] self.assertEqual(remaining.size, 0) - @given(df=t.dataframe(min_size = 10)) + @given(df=dataframe(min_size=10)) def test_respects_min_size(self, df): self.assertGreaterEqual(len(df), 10) - @given(t.dataframe(max_size = 10)) + @given(dataframe(max_size=10)) def test_respects_max_size(self, df): - self.assertLessEqual(len(df), 10) \ No newline at end of file + self.assertLessEqual(len(df), 10)