From adc8ce269ce907b7df3d92f7f78ce472804f36a7 Mon Sep 17 00:00:00 2001 From: Dimitry Date: Tue, 15 Jan 2019 01:13:07 +0300 Subject: [PATCH 1/9] updated sdic to not print anything and use the logging module, ran black --- README.md | 25 ++++++---- sdic/constants.py | 2 +- sdic/main.py | 120 +++++++++++++++++++++++++--------------------- 3 files changed, 82 insertions(+), 65 deletions(-) diff --git a/README.md b/README.md index 98e3868..22abd84 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ This works for simple constraints: 1. It's easy to implement 1. It's cheap for the database to check on every change -But for more complex constraints that you'd like to set, it'd be either very +But for more complex conditions, it'd be either very expensinve to check on every write, or even impossible to write as a constraint. @@ -87,21 +87,28 @@ doctor ordered. ## Install as a cron -If you want to get an email every night to give you a list of all the soft -constraints that have been broken during the last day, just add it to you -crontab. We like to have it run daily, so we can fix any bug generating bad -data before it becomes a real problem. +Run sdic from cron as often as you like, we like to have it run daily. Example crontab: ``` -MAILTO="dba@acme.com" @daily sdic live ``` -`dba@acme.com` is the email that will get the soft constraints broken every -day. Make sure your local MTA is well configured on your system. You can test -it by doing `date | mail -s test dba@acme.com`. +## Monitoring + +sdic uses Python's [`logging`](https://docs.python.org/2.7/library/logging.html) +module to log any output in dot separated hierarchical fashion. + +Any general sdic message would look like this: +``` +Jan 11 00:10:19 sdic.enforce_fullname: enforce_fullname.sql successfully ran in 0.029 sec +``` + +In Papertrail `sdic.enforce_fullname` will be treated as a +[program attribute](https://help.papertrailapp.com/kb/how-it-works/search-syntax/#attributes). +And alert can be created with the search keyword `program:sdic.enforce_fullname` +to trigger anytime the checker encounters a violated constraint. ## Databases supported diff --git a/sdic/constants.py b/sdic/constants.py index ff09527..7289076 100644 --- a/sdic/constants.py +++ b/sdic/constants.py @@ -1 +1 @@ -VERSION = u'0.2.1' +VERSION = u'0.2.2' diff --git a/sdic/main.py b/sdic/main.py index d468236..b59a5a9 100644 --- a/sdic/main.py +++ b/sdic/main.py @@ -21,10 +21,10 @@ import sys import os import fnmatch +import logging import prettytable import ConfigParser import time -import syslog from sqlalchemy import create_engine from sqlalchemy import text @@ -33,13 +33,13 @@ from docopt import docopt from constants import VERSION -CONFIG_SERVERS = 'servers.ini' +CONFIG_SERVERS = "servers.ini" +log = logging.getLogger("sdic") -def error(message): - """Print an error message and exit the script""" - print "Error:", message - exit(1) + +class DuplicateColumnNames(Exception): + pass def get_query_files(directory): @@ -52,7 +52,7 @@ def get_query_files(directory): files = [] for found_file in os.listdir(directory): - if fnmatch.fnmatch(found_file, '*.sql'): + if fnmatch.fnmatch(found_file, "*.sql"): files.append(found_file) return files @@ -67,44 +67,50 @@ def launch_queries(directory, server): Returns: Bool value of whether we get query output or not """ - query_folder = os.path.join(directory, server['name']) + query_folder = os.path.join(directory, server["name"]) files = get_query_files(query_folder) produced_output = False for filename in files: - query_filename = os.path.join(directory, server['name'], filename) + query_log = logging.getLogger("sdic.{}".format(filename[:3])) + query_filename = os.path.join(directory, server["name"], filename) output = None - with open(query_filename, 'r') as opened_file: + with open(query_filename, "r") as opened_file: query = opened_file.read() start_time = time.time() try: output = get_query_output(server, query) - except DBAPIError: - print "The following SQL query got interrupted:" - print query - print + except DBAPIError as e: + query_log.exception( + "The following SQL query got interrupted: {}".format(query) + ) continue + except DuplicateColumnNames as e: + query_log.exception( + "Caught an error with PrettyTable while trying to format the output of: {}".format( + query + ) + ) + query_time = round(time.time() - start_time, 3) - syslog.syslog('{} successfully ran in {} sec.'.format(filename, - query_time)) + query_log.info( + "{} successfully ran in {} sec.".format(filename, query_time) + ) if output: produced_output = True # Announce that this query has results - print "-----===== /!\ INCOMING BAD DATA /!\ =====-----" - print - print "Server: {}".format(server['name']) - print "File: {}".format(filename) - print - # Display the raw query - print "SQL Query:" - print query - - # Display the results of the query - print output - print + query_log.error( + "-----===== /!\ INCOMING BAD DATA /!\ =====-----", + "\n", + "Server: {}".format(server["name"]), + "File: {}".format(filename), + "\n", + "SQL Query:\n{}".format(query), + output, + ) return produced_output @@ -120,7 +126,7 @@ def get_query_output(server, query): Returns: (str) or None """ - db_url = server['db_url'] + db_url = server["db_url"] # start sqlalchemy engine engine = create_engine(db_url) @@ -135,14 +141,28 @@ def get_query_output(server, query): titles = [] for desc in result.keys(): titles.append(desc) - table = prettytable.PrettyTable(titles) + + try: + table = prettytable.PrettyTable(titles) + except Exception as e: + # PrettyTable raises a generic Exception error for duplicate field names + # This is most likely because of a problem with the query. + # This should propogate the error up to the query_log so that the developer + # can see it. + if e == "Field names must be unique!": + raise DuplicateColumnNames( + "PrettyTable crashed while trying to format row names" + ) + else: + log.exception("A different general exception from PrettyTable.", e) + return None # Fill the table for row in rows: arr = [] for item in row: if isinstance(item, str): - item = unicode(item, 'utf8', 'ignore') + item = unicode(item, "utf8", "ignore") arr.append(item) table.add_row(arr) @@ -163,11 +183,11 @@ def get_servers_from_config(directory): config = ConfigParser.RawConfigParser() config.read(os.path.join(directory, CONFIG_SERVERS)) - valid_config_items = ['db_url'] + valid_config_items = ["db_url"] servers = [] for section in config.sections(): - server = {'name': section} + server = {"name": section} for (item_name, item_value) in config.items(section): if item_name in valid_config_items: server[item_name] = item_value @@ -177,41 +197,31 @@ def get_servers_from_config(directory): def main(): - args = docopt(__doc__, - version="sdic {}".format(VERSION)) + args = docopt(__doc__, version="sdic {}".format(VERSION)) # Check that the given directory exists - if not isdir(args['']): - error("The folder {} does not exist".format(args[''])) + if not isdir(args[""]): + raise IOError("The folder {} does not exist".format(args[""])) # Try to get the config of the servers we are gonna use - servers = get_servers_from_config(args['']) + servers = args[""] or get_servers_from_config(args[""]) # Check that we are not already running program_name = os.path.basename(sys.argv[0]) lock = FileLock("/tmp/{}.lock".format(program_name)) if lock.is_locked(): - error("{} is already running. Delete {} if it's a mistake.".format( - program_name, lock.path)) + raise RuntimeError( + "{} is already running. Delete {} if it's a mistake.".format( + program_name, lock.path + ) + ) # Everything's ok, run the main program with lock: - syslog.openlog('sdic') - has_output = False - if not args['']: - for server in servers: - if launch_queries(args[''], server): - has_output = True - else: - for server in servers: - if server['name'] == args['']: - if launch_queries(args[''], server): - has_output = True - if has_output: - return 1 - - syslog.closelog() + for server in servers: + if launch_queries(args[""], server): + return 1 if __name__ == "__main__": From 3f1e49ff96d043c421d876f294d1a348c7105041 Mon Sep 17 00:00:00 2001 From: Dimitry Date: Tue, 15 Jan 2019 01:23:18 +0300 Subject: [PATCH 2/9] change the mdl rules --- .circleci/config.yml | 2 +- README.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index beac387..aab33ba 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -6,7 +6,7 @@ jobs: - image: rsrchboy/mdl steps: - checkout - - run: mdl . + - run: mdl --style ./.circleci/.mdlrc . flake8: docker: diff --git a/README.md b/README.md index 22abd84..beb4582 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,7 @@ sdic uses Python's [`logging`](https://docs.python.org/2.7/library/logging.html) module to log any output in dot separated hierarchical fashion. Any general sdic message would look like this: + ``` Jan 11 00:10:19 sdic.enforce_fullname: enforce_fullname.sql successfully ran in 0.029 sec ``` From 3a335ca1d10ffeb40614b19cf6f23a447be46dff Mon Sep 17 00:00:00 2001 From: Dimitry Date: Tue, 15 Jan 2019 01:24:27 +0300 Subject: [PATCH 3/9] added mdlrc --- .circleci/.mdlrc | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .circleci/.mdlrc diff --git a/.circleci/.mdlrc b/.circleci/.mdlrc new file mode 100644 index 0000000..9bff191 --- /dev/null +++ b/.circleci/.mdlrc @@ -0,0 +1,2 @@ +all +rule 'MD013', :code_blocks => false From f9889c17ff13f83115e5233be2418e8298fba72c Mon Sep 17 00:00:00 2001 From: Dimitry Date: Tue, 15 Jan 2019 01:28:08 +0300 Subject: [PATCH 4/9] added black --- .circleci/config.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index aab33ba..5802ec2 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -8,17 +8,16 @@ jobs: - checkout - run: mdl --style ./.circleci/.mdlrc . - flake8: + black: docker: - - image: python:2.7 + - image: 667005031541.dkr.ecr.us-west-1.amazonaws.com/black:18.9b0 steps: - - run: pip install flake8 - checkout - - run: flake8 . + - run: pyfmt workflows: version: 2 sdic: jobs: - mdl - - flake8 + - black From a4b360f16a45bf0d4506bfafa91f23f666ac7200 Mon Sep 17 00:00:00 2001 From: Dimitry Date: Tue, 15 Jan 2019 01:39:50 +0300 Subject: [PATCH 5/9] added mdl language to codeblocks --- README.md | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index beb4582..9c768e1 100644 --- a/README.md +++ b/README.md @@ -89,12 +89,6 @@ doctor ordered. Run sdic from cron as often as you like, we like to have it run daily. -Example crontab: - -``` -@daily sdic live -``` - ## Monitoring sdic uses Python's [`logging`](https://docs.python.org/2.7/library/logging.html) @@ -102,7 +96,7 @@ module to log any output in dot separated hierarchical fashion. Any general sdic message would look like this: -``` +```console Jan 11 00:10:19 sdic.enforce_fullname: enforce_fullname.sql successfully ran in 0.029 sec ``` @@ -145,7 +139,7 @@ only run one of them, an optional `server` argument can be passed as well: If a query produces an output, it will look something like this: -``` +```console -----===== /!\ INCOMING BAD DATA /!\ =====----- Server: big-database From 9a4e30aa64e4f9aeddf56cd787d5213459358062 Mon Sep 17 00:00:00 2001 From: Dimitry Date: Thu, 17 Jan 2019 03:52:30 +0300 Subject: [PATCH 6/9] wip --- sdic/main.py | 53 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/sdic/main.py b/sdic/main.py index b59a5a9..f6745e9 100644 --- a/sdic/main.py +++ b/sdic/main.py @@ -11,21 +11,25 @@ See for more info Usage: - sdic [] + sdic [options] [ [--server_url=]] [] Options: -h --help Show this screen. + --output_location= Where to send output [default: stdout] + --server_url= Optionally pass the db_url for the server. + """ -from os.path import isdir -from lockfile import FileLock +from os.path import isfile import sys -import os +from os import walk import fnmatch import logging -import prettytable -import ConfigParser import time +import ConfigParser +from lockfile import FileLock +import prettytable + from sqlalchemy import create_engine from sqlalchemy import text from sqlalchemy.exc import DBAPIError @@ -42,6 +46,7 @@ class DuplicateColumnNames(Exception): pass +# Deprecate def get_query_files(directory): """ Get the list of filenames of SQL files found in the specified folder @@ -52,8 +57,7 @@ def get_query_files(directory): files = [] for found_file in os.listdir(directory): - if fnmatch.fnmatch(found_file, "*.sql"): - files.append(found_file) + files.append(found_file) return files @@ -72,6 +76,7 @@ def launch_queries(directory, server): produced_output = False for filename in files: + # TODO use os.path.splitext query_log = logging.getLogger("sdic.{}".format(filename[:3])) query_filename = os.path.join(directory, server["name"], filename) output = None @@ -199,15 +204,23 @@ def get_servers_from_config(directory): def main(): args = docopt(__doc__, version="sdic {}".format(VERSION)) - # Check that the given directory exists + # 1 if directory, then run on each file in the directory if not isdir(args[""]): raise IOError("The folder {} does not exist".format(args[""])) - # Try to get the config of the servers we are gonna use - servers = args[""] or get_servers_from_config(args[""]) + # create a closure here + launch_query = get_connections_from_config( + args["directory"], args[""], args["--server_url"] + ) + program_name = os.path.basename(sys.argv[0]) + + # TODO move this error into above function + if not server_config: + raise RuntimeError( + "{} cannot run without a server config file.".format(program_name) + ) # Check that we are not already running - program_name = os.path.basename(sys.argv[0]) lock = FileLock("/tmp/{}.lock".format(program_name)) if lock.is_locked(): raise RuntimeError( @@ -218,10 +231,18 @@ def main(): # Everything's ok, run the main program with lock: - has_output = False - for server in servers: - if launch_queries(args[""], server): - return 1 + + # Try to get the config of the servers we are gonna use + if args[""]: + launch_query(servers[args[""], args[""]) + + else: + for root_dir, dirs, files in os.walk(args[""]): + server_name = os.path.basename(os.path.normpath(root_dir)) + + for found_files in files: + if fnmatch.fnmatch(found_file, "*.sql"): + launch_query(server_name, found_file) if __name__ == "__main__": From bf03fa5fb5fde65d23665a65ec4f2b18a22b7ec7 Mon Sep 17 00:00:00 2001 From: Dimitry Date: Fri, 18 Jan 2019 03:01:47 +0300 Subject: [PATCH 7/9] wip --- sdic/main.py | 169 ++++++++++++++++++++------------------------------- 1 file changed, 66 insertions(+), 103 deletions(-) diff --git a/sdic/main.py b/sdic/main.py index f6745e9..e1bed88 100644 --- a/sdic/main.py +++ b/sdic/main.py @@ -46,80 +46,6 @@ class DuplicateColumnNames(Exception): pass -# Deprecate -def get_query_files(directory): - """ - Get the list of filenames of SQL files found in the specified folder - - Params: directory string - Returns: Array - """ - files = [] - - for found_file in os.listdir(directory): - files.append(found_file) - - return files - - -def launch_queries(directory, server): - """ - Launch the queries found in the specified folder - - Param directory string Folder containing the SQL files - Param server dict describing a server - - Returns: Bool value of whether we get query output or not - """ - query_folder = os.path.join(directory, server["name"]) - files = get_query_files(query_folder) - produced_output = False - - for filename in files: - # TODO use os.path.splitext - query_log = logging.getLogger("sdic.{}".format(filename[:3])) - query_filename = os.path.join(directory, server["name"], filename) - output = None - with open(query_filename, "r") as opened_file: - query = opened_file.read() - - start_time = time.time() - try: - output = get_query_output(server, query) - except DBAPIError as e: - query_log.exception( - "The following SQL query got interrupted: {}".format(query) - ) - continue - except DuplicateColumnNames as e: - query_log.exception( - "Caught an error with PrettyTable while trying to format the output of: {}".format( - query - ) - ) - - query_time = round(time.time() - start_time, 3) - - query_log.info( - "{} successfully ran in {} sec.".format(filename, query_time) - ) - if output: - produced_output = True - - # Announce that this query has results - query_log.error( - "-----===== /!\ INCOMING BAD DATA /!\ =====-----", - "\n", - "Server: {}".format(server["name"]), - "File: {}".format(filename), - "\n", - "SQL Query:\n{}".format(query), - output, - ) - - return produced_output - - def get_query_output(server, query): """ Launch a query and display the output in a pretty text table @@ -134,8 +60,6 @@ def get_query_output(server, query): db_url = server["db_url"] # start sqlalchemy engine - engine = create_engine(db_url) - conn = engine.connect() result = conn.execute(text(query)) rows = result.fetchall() @@ -172,54 +96,93 @@ def get_query_output(server, query): table.add_row(arr) - conn.close() - result.close() - return table -def get_servers_from_config(directory): +def get_servers_from_config(directory, server=None, server_url=None, output="stdout"): """ Get the configuration of all the servers in the config file param directory string Folder containing the servers.ini file return List of servers dictionnaries """ - config = ConfigParser.RawConfigParser() - config.read(os.path.join(directory, CONFIG_SERVERS)) + assert os.path.exists(directory, "The folder {} does not exist".format(directory)) + + # TODO determine if this is necessary + assert output in ("stdout","syslog"), "Invalid value for --output_location" + + servers = {} + + if server_url: + engine = create_engine(db_url) + servers[section] = engine.connect() + else: + config = ConfigParser.RawConfigParser() + config.read(os.path.join(directory, CONFIG_SERVERS)) + + valid_config_items = ["db_url"] + + for server_name in config.sections(): + if not server or (server and server_name == server): + for (item_name, item_value) in config.items(section): + if item_name is "db_url": + engine = create_engine(db_url) + servers[section] = engine.connect() + + assert servers, "Could not find any server URLs in the server.ini or --server_url." + + def launch_query(server_name, query_filename): + """ + """ + query_log = logging.getLogger("sdic.{}".format(query_filename)) + output = None + with open(query_filename, "r") as opened_file: + query = opened_file.read() + + start_time = time.time() + try: + output = get_query_output(server, query) + except DBAPIError as e: + query_log.exception( + "The following SQL query got interrupted: {}".format(query) + ) + continue + except DuplicateColumnNames as e: + query_log.exception( + "Caught an error with PrettyTable while trying to format the output of: {}".format( + query + ) + ) - valid_config_items = ["db_url"] + query_time = round(time.time() - start_time, 3) - servers = [] - for section in config.sections(): - server = {"name": section} - for (item_name, item_value) in config.items(section): - if item_name in valid_config_items: - server[item_name] = item_value - servers.append(server) + query_log.info( + "{} successfully ran in {} sec.".format(filename, query_time) + ) - return servers + if output: + # Announce that this query has results + query_log.error( + "-----===== /!\ INCOMING BAD DATA /!\ =====-----", + "\n", + "Server: {}".format(server["name"]), + "File: {}".format(filename), + "\n", + "SQL Query:\n{}".format(query), + output, + ) def main(): args = docopt(__doc__, version="sdic {}".format(VERSION)) # 1 if directory, then run on each file in the directory - if not isdir(args[""]): - raise IOError("The folder {} does not exist".format(args[""])) - # create a closure here launch_query = get_connections_from_config( - args["directory"], args[""], args["--server_url"] + args["directory"], server=args[""], server_url=args["--server_url"] ) program_name = os.path.basename(sys.argv[0]) - # TODO move this error into above function - if not server_config: - raise RuntimeError( - "{} cannot run without a server config file.".format(program_name) - ) - # Check that we are not already running lock = FileLock("/tmp/{}.lock".format(program_name)) if lock.is_locked(): @@ -234,12 +197,12 @@ def main(): # Try to get the config of the servers we are gonna use if args[""]: - launch_query(servers[args[""], args[""]) + # TODO + launch_query(servers[args[""]], args[""]) else: for root_dir, dirs, files in os.walk(args[""]): server_name = os.path.basename(os.path.normpath(root_dir)) - for found_files in files: if fnmatch.fnmatch(found_file, "*.sql"): launch_query(server_name, found_file) From faa1ce3a1e303ec7beb4230caf7a4c3292a8f326 Mon Sep 17 00:00:00 2001 From: Dimitry Date: Fri, 18 Jan 2019 03:20:55 +0300 Subject: [PATCH 8/9] wip --- sdic/main.py | 86 +++++++++++++++++++++------------------------------- 1 file changed, 35 insertions(+), 51 deletions(-) diff --git a/sdic/main.py b/sdic/main.py index e1bed88..a851eef 100644 --- a/sdic/main.py +++ b/sdic/main.py @@ -41,12 +41,7 @@ log = logging.getLogger("sdic") - -class DuplicateColumnNames(Exception): - pass - - -def get_query_output(server, query): +def format_query_result(result): """ Launch a query and display the output in a pretty text table @@ -57,34 +52,19 @@ def get_query_output(server, query): Returns: (str) or None """ - db_url = server["db_url"] - - # start sqlalchemy engine - result = conn.execute(text(query)) - rows = result.fetchall() - table = None if result.rowcount > 0: # Get the column titles - titles = [] - for desc in result.keys(): - titles.append(desc) + titles = result.keys() try: table = prettytable.PrettyTable(titles) except Exception as e: # PrettyTable raises a generic Exception error for duplicate field names - # This is most likely because of a problem with the query. - # This should propogate the error up to the query_log so that the developer - # can see it. - if e == "Field names must be unique!": - raise DuplicateColumnNames( - "PrettyTable crashed while trying to format row names" - ) - else: - log.exception("A different general exception from PrettyTable.", e) - return None + # Duplicate field names are likely caused by the query. + log.exception("PrettyTable crashed while trying to format row names.", e) + return None # Fill the table for row in rows: @@ -109,7 +89,7 @@ def get_servers_from_config(directory, server=None, server_url=None, output="std assert os.path.exists(directory, "The folder {} does not exist".format(directory)) # TODO determine if this is necessary - assert output in ("stdout","syslog"), "Invalid value for --output_location" + assert output in ("stdout", "syslog"), "Invalid value for --output_location" servers = {} @@ -123,7 +103,7 @@ def get_servers_from_config(directory, server=None, server_url=None, output="std valid_config_items = ["db_url"] for server_name in config.sections(): - if not server or (server and server_name == server): + if not server or (server_name is server): for (item_name, item_value) in config.items(section): if item_name is "db_url": engine = create_engine(db_url) @@ -134,25 +114,25 @@ def get_servers_from_config(directory, server=None, server_url=None, output="std def launch_query(server_name, query_filename): """ """ - query_log = logging.getLogger("sdic.{}".format(query_filename)) + query_log = logging.getLogger( + "sdic.{}".format(os.path.splitext(query_filename)) + ) + query_full_path = os.path.join(directory, server, query_filename) output = None - with open(query_filename, "r") as opened_file: + + with open(query_full_path, "r") as opened_file: query = opened_file.read() start_time = time.time() + try: - output = get_query_output(server, query) + result = servers[server_name].execute(query) + rows = result.fetchall() except DBAPIError as e: query_log.exception( "The following SQL query got interrupted: {}".format(query) ) - continue - except DuplicateColumnNames as e: - query_log.exception( - "Caught an error with PrettyTable while trying to format the output of: {}".format( - query - ) - ) + return query_time = round(time.time() - start_time, 3) @@ -160,17 +140,22 @@ def launch_query(server_name, query_filename): "{} successfully ran in {} sec.".format(filename, query_time) ) + output = format_query_output(result) + if output: # Announce that this query has results - query_log.error( - "-----===== /!\ INCOMING BAD DATA /!\ =====-----", - "\n", - "Server: {}".format(server["name"]), - "File: {}".format(filename), - "\n", - "SQL Query:\n{}".format(query), - output, - ) + if output is "syslog": + query_log.error(output) + elif output is "stdout": + print( + "-----===== /!\ INCOMING BAD DATA /!\ =====-----", + "\n", + "Server: {}".format(server_name), + "File: {}".format(filename), + "\n", + "SQL Query:\n{}".format(query), + output, + ) def main(): @@ -194,18 +179,17 @@ def main(): # Everything's ok, run the main program with lock: - # Try to get the config of the servers we are gonna use if args[""]: # TODO launch_query(servers[args[""]], args[""]) - else: for root_dir, dirs, files in os.walk(args[""]): server_name = os.path.basename(os.path.normpath(root_dir)) - for found_files in files: - if fnmatch.fnmatch(found_file, "*.sql"): - launch_query(server_name, found_file) + if not args[""] or (args[""] is server_name): + for found_files in files: + if fnmatch.fnmatch(found_file, "*.sql"): + launch_query(server_name, found_file) if __name__ == "__main__": From e5a47662c732b6e8c6dd67f37d58ed3125784e46 Mon Sep 17 00:00:00 2001 From: Dimitry Date: Mon, 28 Jan 2019 23:55:29 +0300 Subject: [PATCH 9/9] correctly runs through sdic, needs cleanup and tests --- sdic/main.py | 81 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 26 deletions(-) diff --git a/sdic/main.py b/sdic/main.py index a851eef..56f386b 100644 --- a/sdic/main.py +++ b/sdic/main.py @@ -19,11 +19,11 @@ --server_url= Optionally pass the db_url for the server. """ -from os.path import isfile import sys -from os import walk import fnmatch import logging +from os import walk +from os import path import time import ConfigParser @@ -35,12 +35,14 @@ from sqlalchemy.exc import DBAPIError from docopt import docopt -from constants import VERSION +#from constants import VERSION +VERSION = 1 CONFIG_SERVERS = "servers.ini" log = logging.getLogger("sdic") + def format_query_result(result): """ Launch a query and display the output in a pretty text table @@ -79,45 +81,67 @@ def format_query_result(result): return table -def get_servers_from_config(directory, server=None, server_url=None, output="stdout"): +def get_connections_from_config( + directory, server=None, server_url=None, output="stdout" +): """ - Get the configuration of all the servers in the config file + Returns a function for running queries on each database server. Establishes a + connection to each server as needed. - param directory string Folder containing the servers.ini file - return List of servers dictionnaries + Args: + directory (str): Folder containing the servers.ini file and queries. + server (str, optional): Database server name, should also be folder in + directory. Defaults to None. + server_url (str, optional): Database server URL. Defaults to None. + output (str, optional): Where to redirect output, only options are "stdout" and + "syslog". Defaults to "stdout." + Returns: + function: Will run a query on a corresponding server. """ - assert os.path.exists(directory, "The folder {} does not exist".format(directory)) + assert path.exists(directory), "The folder {} does not exist".format(directory) # TODO determine if this is necessary assert output in ("stdout", "syslog"), "Invalid value for --output_location" servers = {} + print servers if server_url: engine = create_engine(db_url) - servers[section] = engine.connect() + servers[server] = engine.connect() else: config = ConfigParser.RawConfigParser() - config.read(os.path.join(directory, CONFIG_SERVERS)) + config.read(path.join(directory, CONFIG_SERVERS)) valid_config_items = ["db_url"] for server_name in config.sections(): - if not server or (server_name is server): - for (item_name, item_value) in config.items(section): - if item_name is "db_url": - engine = create_engine(db_url) - servers[section] = engine.connect() + print servers + if not server or (server_name == server): + for (item_name, item_value) in config.items(server_name): + print item_name + print item_value + + if item_name == "db_url": + engine = create_engine(item_value) + servers[server_name] = engine.connect() + print "done" assert servers, "Could not find any server URLs in the server.ini or --server_url." def launch_query(server_name, query_filename): """ + Run query on specific server. + + Args: + server_name (str): database server name. + query_filename (str): name of the file for running queries. """ query_log = logging.getLogger( - "sdic.{}".format(os.path.splitext(query_filename)) + "sdic.{}".format(path.splitext(query_filename)) ) - query_full_path = os.path.join(directory, server, query_filename) + print server, query_filename + query_full_path = path.join(directory, server_name, query_filename) output = None with open(query_full_path, "r") as opened_file: @@ -126,6 +150,7 @@ def launch_query(server_name, query_filename): start_time = time.time() try: + print "inside try" result = servers[server_name].execute(query) rows = result.fetchall() except DBAPIError as e: @@ -137,10 +162,10 @@ def launch_query(server_name, query_filename): query_time = round(time.time() - start_time, 3) query_log.info( - "{} successfully ran in {} sec.".format(filename, query_time) + "{} successfully ran in {} sec.".format(query_filename, query_time) ) - output = format_query_output(result) + output = format_query_result(result) if output: # Announce that this query has results @@ -156,6 +181,7 @@ def launch_query(server_name, query_filename): "SQL Query:\n{}".format(query), output, ) + return launch_query def main(): @@ -164,9 +190,9 @@ def main(): # 1 if directory, then run on each file in the directory # create a closure here launch_query = get_connections_from_config( - args["directory"], server=args[""], server_url=args["--server_url"] + args[""], server=args[""], server_url=args["--server_url"] ) - program_name = os.path.basename(sys.argv[0]) + program_name = path.basename(sys.argv[0]) # Check that we are not already running lock = FileLock("/tmp/{}.lock".format(program_name)) @@ -182,12 +208,15 @@ def main(): # Try to get the config of the servers we are gonna use if args[""]: # TODO - launch_query(servers[args[""]], args[""]) + print "query" + launch_query(args[""], args[""]) else: - for root_dir, dirs, files in os.walk(args[""]): - server_name = os.path.basename(os.path.normpath(root_dir)) - if not args[""] or (args[""] is server_name): - for found_files in files: + for root_dir, dirs, files in walk(args[""]): + server_name = path.basename(path.normpath(root_dir)) + print root_dir + print server_name + if not args[""] or (args[""] == server_name): + for found_file in files: if fnmatch.fnmatch(found_file, "*.sql"): launch_query(server_name, found_file)