diff --git a/api/api.py b/api/api.py index f0506ad45..e71605ad6 100644 --- a/api/api.py +++ b/api/api.py @@ -7,6 +7,7 @@ from .handlers.containerhandler import ContainerHandler from .handlers.dataexplorerhandler import DataExplorerHandler from .handlers.devicehandler import DeviceHandler +from .handlers.filetypehandler import FileType from .handlers.grouphandler import GroupHandler from .handlers.listhandler import FileListHandler, NotesListHandler, PermissionsListHandler, TagsListHandler from .handlers.refererhandler import AnalysesHandler @@ -43,6 +44,9 @@ # Filename 'fname': '[^/]+', + # File type name + 'ftypename': '[^/]+', + # Note ID 'nid': '[0-9a-f]{24}', @@ -77,9 +81,11 @@ def prefix(path, routes): # System configuration - route('/config', Config, m=['GET']), - route('/config.js', Config, h='get_js', m=['GET']), - route('/version', Version, m=['GET']), + route('/config', Config, m=['GET']), + route('/config.js', Config, h='get_js', m=['GET']), + route('/version', Version, m=['GET']), + route('/filetype', FileType, m=['GET', 'POST']), + route('/filetype/<_id:{ftypename}>', FileType, m=['DELETE']), # General-purpose upload & download diff --git a/api/config.py b/api/config.py index 8bc5375bc..2306cbc94 100644 --- a/api/config.py +++ b/api/config.py @@ -8,7 +8,7 @@ import elasticsearch from . import util -from .dao.dbutil import try_replace_one +from .dao.dbutil import try_replace_one, try_update_one logging.basicConfig( format='%(asctime)s %(name)16.16s %(filename)24.24s %(lineno)5d:%(levelname)4.4s %(message)s', @@ -161,6 +161,7 @@ def apply_env_variables(config): 'container.json', 'device.json', 'file.json', + 'filetype.json', 'file-update.json', 'group-new.json', 'group-update.json', @@ -226,6 +227,7 @@ def create_or_recreate_ttl_index(coll_name, index_name, ttl): def initialize_db(): log.info('Initializing database, creating indexes') + # TODO review all indexes db.users.create_index('api_key.key') db.projects.create_index([('gid', 1), ('name', 1)]) @@ -251,7 +253,12 @@ def initialize_db(): create_or_recreate_ttl_index('downloads', 'timestamp', 60) now = datetime.datetime.utcnow() - db.groups.update_one({'_id': 'unknown'}, {'$setOnInsert': { 'created': now, 'modified': now, 'label': 'Unknown', 'permissions': []}}, upsert=True) + try_update_one(db, + 'groups', {'_id': 'unknown'}, + {'$setOnInsert': {'created': now, 'modified': now, 'label': 'Unknown', 'permissions': []}}, + upsert=True) + + log.info('Initializing database, creating indexes ....DONE') def get_config(): global __last_update, __config, __config_persisted #pylint: disable=global-statement diff --git a/api/dao/dbutil.py b/api/dao/dbutil.py index 9d3db91c0..266dc3e4f 100644 --- a/api/dao/dbutil.py +++ b/api/dao/dbutil.py @@ -4,6 +4,7 @@ from pymongo.errors import DuplicateKeyError from ..web.errors import APIStorageException + def try_replace_one(db, coll_name, query, update, upsert=False): """ Mongo does not see replace w/ upsert as an atomic action: @@ -39,3 +40,18 @@ def fault_tolerant_replace_one(db, coll_name, query, update, upsert=False): time.sleep(random.uniform(0.01,0.05)) raise APIStorageException('Unable to replace object.') + + +def try_update_one(db, coll_name, query, update, upsert=False): + """ + Mongo does not see replace w/ upsert as an atomic action: + https://jira.mongodb.org/browse/SERVER-14322 + + This function will try a replace_one operation, returning the result and if the operation succeeded. + """ + try: + result = db[coll_name].update_one(query, update, upsert=upsert) + except DuplicateKeyError: + return result, False + else: + return result, True diff --git a/api/files.py b/api/files.py index 0585b5993..b5596e2bd 100644 --- a/api/files.py +++ b/api/files.py @@ -1,9 +1,9 @@ -import os import cgi -import json -import shutil -import hashlib import collections +import hashlib +import os +import re +import shutil from . import util from . import config @@ -150,18 +150,15 @@ def get_hash(self): # File extension --> scitran file type detection hueristics. # Listed in precendence order. -with open(os.path.join(os.path.dirname(__file__), 'filetypes.json')) as fd: - TYPE_MAP = json.load(fd) - -KNOWN_FILETYPES = {ext: filetype for filetype, extensions in TYPE_MAP.iteritems() for ext in extensions} def guess_type_from_filename(filename): - particles = filename.split('.')[1:] - extentions = ['.' + '.'.join(particles[i:]) for i in range(len(particles))] - for ext in extentions: - filetype = KNOWN_FILETYPES.get(ext.lower()) - if filetype: - break - else: - filetype = None + filetype = None + m_length = 0 + cursor = config.db.filetypes.find({}) + + for document in cursor: + m = re.search(document['regex'], filename) + if m and m_length < len(m.group(0)): + filetype = document['_id'] + return filetype diff --git a/api/filetypes.json b/api/filetypes.json deleted file mode 100644 index aeef59564..000000000 --- a/api/filetypes.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "bval": [ ".bval", ".bvals" ], - "bvec": [ ".bvec", ".bvecs" ], - "dicom": [ ".dcm", ".dcm.zip", ".dicom.zip" ], - "eeg": [ ".eeg.zip" ], - "gephysio": [ ".gephysio.zip" ], - "ismrmrd": [ ".h5", ".hdf5" ], - "MATLAB data": [ ".mat" ], - "MGH data": [ ".mgh", ".mgz", ".mgh.gz" ], - "nifti": [ ".nii.gz", ".nii" ], - "parrec": [ ".parrec.zip", ".par-rec.zip" ], - "pfile": [ ".7.gz", ".7", ".7.zip" ], - "PsychoPy data": [ ".psydat" ], - "qa": [ ".qa.png", ".qa.json", ".qa.html" ], - - "archive": [ ".zip", ".tbz2", ".tar.gz", ".tbz", ".tar.bz2", ".tgz", ".tar", ".txz", ".tar.xz" ], - "document": [ ".docx", ".doc" ], - "image": [ ".jpg", ".tif", ".jpeg", ".gif", ".bmp", ".png", ".tiff" ], - "markup": [ ".html", ".htm", ".xml" ], - "markdown": [ ".md", ".markdown" ], - "log": [ ".log" ], - "pdf": [ ".pdf" ], - "presentation": [ ".ppt", ".pptx" ], - "source code": [ ".c", ".py", ".cpp", ".js", ".m", ".json", ".java", ".php", ".css", ".toml", ".yaml", ".yml" ], - "spreadsheet": [ ".xls", ".xlsx" ], - "tabular data": [ ".csv.gz", ".csv" ], - "text": [ ".txt" ], - "video": [ ".mpeg", ".mpg", ".mov", ".mp4", ".m4v", ".mts" ] -} diff --git a/api/handlers/filetypehandler.py b/api/handlers/filetypehandler.py new file mode 100644 index 000000000..257eb8d4c --- /dev/null +++ b/api/handlers/filetypehandler.py @@ -0,0 +1,42 @@ +import re + +from .. import config +from ..auth import require_admin, require_login +from ..validators import validate_data, InputValidationException +from ..web import base + + +class FileType(base.RequestHandler): + + @require_login + def get(self): + """Get file types""" + return config.db.filetypes.find() + + @require_admin + def post(self): + """ + Insert or replace a file type. Required fields: '_id' and 'regex' where the '_id' is the unique name of + the file type and 'regex' is a regular expression which is used to figure out the file type from the file name. + """ + payload = self.request.json_body + validate_data(payload, 'filetype.json', 'input', 'POST') + try: + re.compile(payload['regex']) + except re.error: + raise InputValidationException('Invalid regular expression') + result = config.db.filetypes.replace_one({'_id': payload['_id']}, payload, upsert=True) + if result.acknowledged: + _id = result.upserted_id if result.upserted_id else payload['_id'] + return {'_id': _id} + else: + self.abort(404, 'File type {} not updated'.format(payload['_id'])) + + @require_admin + def delete(self, _id): + """Delete a file type""" + result = config.db.filetypes.delete_one({'_id': _id}) + if result.deleted_count: + return {'deleted': result.deleted_count} + else: + self.abort(404, 'File type {} not removed'.format(_id)) diff --git a/bin/database.py b/bin/database.py index 57672b556..9dcd148c0 100755 --- a/bin/database.py +++ b/bin/database.py @@ -15,6 +15,7 @@ from api import config from api import util +from api import files as files_module from api.dao import containerutil from api.dao.containerstorage import ProjectStorage from api.jobs.jobs import Job @@ -22,7 +23,7 @@ from api.types import Origin from api.jobs import batch -CURRENT_DATABASE_VERSION = 40 # An int that is bumped when a new schema change is made +CURRENT_DATABASE_VERSION = 41 # An int that is bumped when a new schema change is made def get_db_version(): @@ -1301,6 +1302,100 @@ def upgrade_to_40(): cursor = config.db.acquisitions.find({'timestamp':{'$type':'string'}}) process_cursor(cursor, upgrade_to_40_closure) + +def upgrade_to_41_closure(cont, context): + """ + Re-type files based on new filetypes stored in mongo collection + """ + + # passing filetypes rather than using util function to speed upgrade and skip db lookup + filetypes = context['filetypes'] + cont_name = context['cont_name'] + + files = cont.get('files', []) + + for f in files: + + new_type = None + m_length = 0 + + for document in filetypes: + m = re.search(document['regex'], f['name']) + if m and m_length < len(m.group(0)): + new_type = document['_id'] + if new_type is not None: + f['type'] = new_type + + config.db['cont_name'].update_one({'_id': cont['_id']}, {'$set': {'files': files}}) + + return True + + +def upgrade_to_41(): + """ + Load initial filetypes into mongo, retype existing files + """ + + # It was decided an initial load of filetypes here for existing users was + # easiest way to move those users forward. Future changes a site's + # filetypes will happen through the API endpoints as expected + filetypes = [ + { "_id": "BVAL", "regex": "\\.(bval|bvals)$" }, + { "_id": "BVEC", "regex": "\\.(bvec|bvecs)$" }, + { "_id": "DICOM", "regex": "\\.(dcm|dcm\\.zip|dicom\\.zip)$" }, + { "_id": "EFile", "regex": "^E.*P.*\\.7$" }, + { "_id": "GE Physio", "regex": "\\.gephysio\\.zip$" }, + { "_id": "MGH Data", "regex": "\\.(mgh|mgz|mgh\\.gz)$" }, + { "_id": "NIfTI", "regex": "\\.(nii\\.gz|nii)$" }, + { "_id": "PAR/REC", "regex": "\\.(parrec\\.zip|par-rec\\.zip)$" }, + { "_id": "PFile Header", "regex": "\\.(7\\.hdr)$" }, + { "_id": "PFile", "regex": "\\.(7\\.gz|7|7\\.zip)$" }, + + { "_id": "EEG", "regex": "\\.eeg\\.zip$" }, + + { "_id": "QC", "regex": "\\.(q[ac]\\.png|q[ac]\\.json|q[ac]\\.html)$" }, + + { "_id": "MATLAB Data", "regex": "\\.mat$" }, + { "_id": "PsychoPy Data", "regex": "\\.psydat$" }, + + { "_id": "C/C++", "regex": "\\.(c|cpp)$" }, + { "_id": "CSS", "regex": "\\.css$" }, + { "_id": "HDF5", "regex": "\\.(h5|hdf5)$" }, + { "_id": "HTML", "regex": "\\.(html|htm)$" }, + { "_id": "JSON", "regex": "\\.json$" }, + { "_id": "Java", "regex": "\\.java$" }, + { "_id": "JavaScript", "regex": "\\.js$" }, + { "_id": "Jupyter", "regex": "\\.ipynb$" }, + { "_id": "MATLAB", "regex": "\\.(m|mex|mlx)$" }, + { "_id": "Markdown", "regex": "\\.(md|markdown)$" }, + { "_id": "PHP", "regex": "\\.php$" }, + { "_id": "Plain Text", "regex": "\\.txt$" }, + { "_id": "Python", "regex": "\\.py$" }, + { "_id": "TOML", "regex": "\\.toml$" }, + { "_id": "XML", "regex": "\\.xml$" }, + { "_id": "YAML", "regex": "\\.(yaml|yml)$" }, + + { "_id": "Archive", "regex": "\\.(zip|tbz2|tar\\.gz|tbz|tar\\.bz2|tgz|tar|txz|tar\\.xz)$" }, + { "_id": "Audio", "regex": "\\.(mp3|wav|wave)$" }, + { "_id": "Document", "regex": "\\.(docx|doc)$" }, + { "_id": "Image", "regex": "\\.(jpg|tif|jpeg|gif|bmp|png|tiff)$" }, + { "_id": "Log", "regex": "\\.log$" }, + { "_id": "PDF", "regex": "\\.pdf$" }, + { "_id": "Presentation", "regex": "\\.(ppt|pptx)$" }, + { "_id": "Spreadsheet", "regex": "\\.(xls|xlsx)$" }, + { "_id": "Tabular Data", "regex": "\\.([ct]sv\\.gz|[ct]sv)$" }, + { "_id": "Video", "regex": "\\.(mpeg|mpg|mov|mp4|m4v|mts)$" } + ] + + for ft in filetypes: + config.db.filetypes.replace_one({'_id': ft['_id']}, ft, upsert=True) + + for cont_name in ['projects', 'sessions', 'acquisitions', 'analyses', 'collections']: + + # Find all containers that have at least one file + cursor = config.db[cont_name].find({'files': { '$gt': [] }}) + process_cursor(cursor, upgrade_to_41_closure, context={'filetypes': filetypes, 'cont_name': cont_name}) + ### ### BEGIN RESERVED UPGRADE SECTION ### diff --git a/bin/load_users_drone_secret.py b/bin/load_drone_secret.py similarity index 91% rename from bin/load_users_drone_secret.py rename to bin/load_drone_secret.py index 471a73669..9f170e1fd 100755 --- a/bin/load_users_drone_secret.py +++ b/bin/load_drone_secret.py @@ -62,9 +62,9 @@ def _upsert_permission(request_session, api_url, permission_doc, group_id): full_permission_url = "{0}/{1}".format(base_permission_url, permission_doc['_id']) return request_session.put(full_permission_url, json=permission_doc) -def users(filepath, api_url, http_headers, insecure): +def bootstrap(filepath, api_url, http_headers, insecure): """ - Upserts the users/groups/permissions defined in filepath parameter. + Upserts the users/groups/permissions/file types defined in filepath parameter. Raises: requests.HTTPError: Upsert failed. @@ -95,7 +95,7 @@ def users(filepath, api_url, http_headers, insecure): log.info('bootstrapping projects...') for p in input_data.get('projects', []): - r = rs.post(api_url + '/projects?inherit=true' , json=p) + r = rs.post(api_url + '/projects?inherit=true', json=p) r.raise_for_status() project_id = r.json()['_id'] @@ -111,6 +111,11 @@ def users(filepath, api_url, http_headers, insecure): r = rs.post(api_url + '/projects/' + project_id + '/rules', json=rule) r.raise_for_status() + log.info('bootstrapping file types...') + for f in input_data.get('filetypes', []): + r = rs.post(api_url + '/filetype', json=f) + r.raise_for_status() + log.info('bootstrapping complete') @@ -134,7 +139,7 @@ def users(filepath, api_url, http_headers, insecure): # TODO: extend this to support oauth tokens try: - users(args.json, args.url, http_headers, args.insecure) + bootstrap(args.json, args.url, http_headers, args.insecure) except requests.HTTPError as ex: log.error(ex) log.error("request_body={0}".format(ex.response.request.body)) diff --git a/bootstrap.sample.json b/bootstrap.sample.json index e85943b67..85709b432 100644 --- a/bootstrap.sample.json +++ b/bootstrap.sample.json @@ -25,5 +25,52 @@ "_id": "local", "type": "engine" } + ], + "filetypes": [ + { "_id": "BVAL", "regex": "\\.(bval|bvals)$" }, + { "_id": "BVEC", "regex": "\\.(bvec|bvecs)$" }, + { "_id": "DICOM", "regex": "\\.(dcm|dcm\\.zip|dicom\\.zip)$" }, + { "_id": "EFile", "regex": "^E.*P.*\\.7$" }, + { "_id": "GE Physio", "regex": "\\.gephysio\\.zip$" }, + { "_id": "MGH Data", "regex": "\\.(mgh|mgz|mgh\\.gz)$" }, + { "_id": "NIfTI", "regex": "\\.(nii\\.gz|nii)$" }, + { "_id": "PAR/REC", "regex": "\\.(parrec\\.zip|par-rec\\.zip)$" }, + { "_id": "PFile Header", "regex": "\\.(7\\.hdr)$" }, + { "_id": "PFile", "regex": "\\.(7\\.gz|7|7\\.zip)$" }, + + { "_id": "EEG", "regex": "\\.eeg\\.zip$" }, + + { "_id": "QC", "regex": "\\.(q[ac]\\.png|q[ac]\\.json|q[ac]\\.html)$" }, + + { "_id": "MATLAB Data", "regex": "\\.mat$" }, + { "_id": "PsychoPy Data", "regex": "\\.psydat$" }, + + { "_id": "C/C++", "regex": "\\.(c|cpp)$" }, + { "_id": "CSS", "regex": "\\.css$" }, + { "_id": "HDF5", "regex": "\\.(h5|hdf5)$" }, + { "_id": "HTML", "regex": "\\.(html|htm)$" }, + { "_id": "JSON", "regex": "\\.json$" }, + { "_id": "Java", "regex": "\\.java$" }, + { "_id": "JavaScript", "regex": "\\.js$" }, + { "_id": "Jupyter", "regex": "\\.ipynb$" }, + { "_id": "MATLAB", "regex": "\\.(m|mex|mlx)$" }, + { "_id": "Markdown", "regex": "\\.(md|markdown)$" }, + { "_id": "PHP", "regex": "\\.php$" }, + { "_id": "Plain Text", "regex": "\\.txt$" }, + { "_id": "Python", "regex": "\\.py$" }, + { "_id": "TOML", "regex": "\\.toml$" }, + { "_id": "XML", "regex": "\\.xml$" }, + { "_id": "YAML", "regex": "\\.(yaml|yml)$" }, + + { "_id": "Archive", "regex": "\\.(zip|tbz2|tar\\.gz|tbz|tar\\.bz2|tgz|tar|txz|tar\\.xz)$" }, + { "_id": "Audio", "regex": "\\.(mp3|wav|wave)$" }, + { "_id": "Document", "regex": "\\.(docx|doc)$" }, + { "_id": "Image", "regex": "\\.(jpg|tif|jpeg|gif|bmp|png|tiff)$" }, + { "_id": "Log", "regex": "\\.log$" }, + { "_id": "PDF", "regex": "\\.pdf$" }, + { "_id": "Presentation", "regex": "\\.(ppt|pptx)$" }, + { "_id": "Spreadsheet", "regex": "\\.(xls|xlsx)$" }, + { "_id": "Tabular Data", "regex": "\\.([ct]sv\\.gz|[ct]sv)$" }, + { "_id": "Video", "regex": "\\.(mpeg|mpg|mov|mp4|m4v|mts)$" } ] } diff --git a/docker/README.md b/docker/README.md index cd98f3cf9..18bf1dc9f 100644 --- a/docker/README.md +++ b/docker/README.md @@ -44,7 +44,7 @@ preserving their contents across container instances. --rm \ -v /dev/bali.prod/docker/uwsgi/bootstrap-dev.json:/accounts.json \ scitran-core \ - /var/scitran/code/api/docker/bootstrap-accounts.sh \ + /var/scitran/code/api/docker/bootstrap-defaults.sh \ /accounts.json diff --git a/docker/bootstrap-accounts.sh b/docker/bootstrap-accounts.sh deleted file mode 100755 index e8aab4c1b..000000000 --- a/docker/bootstrap-accounts.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -set -e -set -x - -echo "IN BOOTSTRAP ACCOUNTS" - -( - -# Parse input parameters... -# -# bootstrap account file -bootstrap_user_file=${1:-'/var/scitran/code/api/bootstrap.json.sample'} - - -# Move to API folder for relative path assumptions later on -# -cd /var/scitran/code/api - -# Export PYTHONPATH for python script later on. -# -export PYTHONPATH=. - - -# Bootstrap Users -./bin/load_users_drone_secret.py --insecure --secret "${SCITRAN_CORE_DRONE_SECRET}" "${SCITRAN_SITE_API_URL}" "${bootstrap_user_file}" - - -) diff --git a/docker/bootstrap-defaults.sh b/docker/bootstrap-defaults.sh new file mode 100755 index 000000000..9d1823bac --- /dev/null +++ b/docker/bootstrap-defaults.sh @@ -0,0 +1,28 @@ +#!/bin/bash +set -e +set -x + +echo "IN BOOTSTRAP DEFAULTS" + +( + +# Parse input parameters... +# +# bootstrap file +bootstrap_file=${1:-'/var/scitran/code/api/bootstrap.sample.json'} + + +# Move to API folder for relative path assumptions later on +# +cd /var/scitran/code/api + +# Export PYTHONPATH for python script later on. +# +export PYTHONPATH=. + + +# Bootstrap users and file types +./bin/load_drone_secret.py --insecure --secret "${SCITRAN_CORE_DRONE_SECRET}" "${SCITRAN_SITE_API_URL}" "${bootstrap_file}" + + +) diff --git a/raml/schemas/input/filetype.json b/raml/schemas/input/filetype.json new file mode 100644 index 000000000..258d9d926 --- /dev/null +++ b/raml/schemas/input/filetype.json @@ -0,0 +1,17 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "File type", + "type": "object", + "properties": { + "_id": { + "title": "ID", + "type": "string" + }, + "regex": { + "title": "Regular Expression", + "type": "string" + } + }, + "additionalProperties": false, + "required":["_id", "regex"] +} diff --git a/tests/integration_tests/abao/load_fixture.py b/tests/integration_tests/abao/load_fixture.py index 32c036bdc..1d4113a8a 100644 --- a/tests/integration_tests/abao/load_fixture.py +++ b/tests/integration_tests/abao/load_fixture.py @@ -56,6 +56,12 @@ def main(): r = as_root.post('/groups', json={'_id': 'test-group'}) assert r.ok + # create file types + r = as_root.post('/filetype', json={'_id': 'dicom', 'regex': '\.(dcm|dcm\.zip|dicom\.zip)$'}) + assert r.ok + r = as_root.post('/filetype', json={'_id': 'text', 'regex': '\.txt$'}) + assert r.ok + # upload file to test-project-1/test-session-1/test-acquisition-1 # depends on 'create test-group' r = as_root.post('/upload/label', files={ diff --git a/tests/integration_tests/python/conftest.py b/tests/integration_tests/python/conftest.py index 7cd2593d5..509303c4f 100644 --- a/tests/integration_tests/python/conftest.py +++ b/tests/integration_tests/python/conftest.py @@ -32,6 +32,13 @@ def bootstrap_users(as_drone): return data_builder +@pytest.fixture(scope='session', autouse=True) +def bootstrap_filetypes(as_admin): + """Create file types""" + as_admin.post('/filetype', json={'_id': 'tabular data', 'regex': '\.(csv\.gz|csv)$'}) + as_admin.post('/filetype', json={'_id': 'text', 'regex': '\.txt$'}) + + @pytest.fixture(scope='session') def as_drone(): """Return requests session with drone access""" diff --git a/tests/integration_tests/python/test_files.py b/tests/integration_tests/python/test_files.py new file mode 100644 index 000000000..c382fcdcd --- /dev/null +++ b/tests/integration_tests/python/test_files.py @@ -0,0 +1,65 @@ +from api import files + + +def test_extension(as_admin): + r = as_admin.post('/filetype', json={'_id': 'pdf', 'regex': '\.pdf$'}) + assert r.ok + assert files.guess_type_from_filename('example.pdf') == 'pdf' + + +def test_multi_extension(as_admin): + r = as_admin.post('/filetype', + json={'_id': 'archive', + 'regex': '\.zip$'}) + assert r.ok + r = as_admin.post('/filetype', json={'_id': 'gephysio', 'regex': '\.gephysio\.zip$'}) + assert r.ok + assert files.guess_type_from_filename('example.zip') == 'archive' + assert files.guess_type_from_filename('example.gephysio.zip') == 'gephysio' + + +def test_nifti(as_admin): + r = as_admin.post('/filetype', json={'_id': 'nifti', 'regex': '\.(nii\.gz|nii)$'}) + assert r.ok + assert files.guess_type_from_filename('example.nii') == 'nifti' + assert files.guess_type_from_filename('example.nii.gz') == 'nifti' + assert files.guess_type_from_filename('example.nii.x.gz') == None + + +def test_qa(as_admin): + r = as_admin.post('/filetype', json={'_id': 'image', 'regex': '\.png$'}) + assert r.ok + r = as_admin.post('/filetype', json={'_id': 'qa', 'regex': '\.qa\.png$'}) + assert r.ok + assert files.guess_type_from_filename('example.png') == 'image' + assert files.guess_type_from_filename('example.qa.png') == 'qa' + assert files.guess_type_from_filename('example.qa') == None + assert files.guess_type_from_filename('example.qa.png.unknown') == None + + +def test_unknown(): + assert files.guess_type_from_filename('example.unknown') == None + + +def test_get_insert_delete(as_admin): + r = as_admin.get('/filetype') + assert r.ok + r = as_admin.post('/filetype', json={'_id': 'new', 'regex': '\.new$'}) + assert r.ok + assert files.guess_type_from_filename('example.new') == 'new' + r = as_admin.post('/filetype', json={'_id': 'new', 'regex': '\.new2$'}) + assert r.ok + assert files.guess_type_from_filename('example.new') == None + assert files.guess_type_from_filename('example.new2') == 'new' + r = as_admin.delete('/filetype/new') + assert r.ok + + +def test_insert_delete_abort(as_admin): + r = as_admin.delete('/filetype/notexists') + assert r.status_code == 404 + + +def test_invalid_regex(as_admin): + r = as_admin.post('/filetype', json={'_id': 'invalid', 'regex': '\\'}) + assert r.status_code == 400 diff --git a/tests/unit_tests/python/test_files.py b/tests/unit_tests/python/test_files.py deleted file mode 100644 index 01b977787..000000000 --- a/tests/unit_tests/python/test_files.py +++ /dev/null @@ -1,25 +0,0 @@ - -import pytest -from api import files - - -def test_extension(): - assert files.guess_type_from_filename('example.pdf') == 'pdf' - -def test_multi_extension(): - assert files.guess_type_from_filename('example.zip') == 'archive' - assert files.guess_type_from_filename('example.gephysio.zip') == 'gephysio' - -def test_nifti(): - assert files.guess_type_from_filename('example.nii') == 'nifti' - assert files.guess_type_from_filename('example.nii.gz') == 'nifti' - assert files.guess_type_from_filename('example.nii.x.gz') == None - -def test_qa(): - assert files.guess_type_from_filename('example.png') == 'image' - assert files.guess_type_from_filename('example.qa.png') == 'qa' - assert files.guess_type_from_filename('example.qa') == None - assert files.guess_type_from_filename('example.qa.png.unknown') == None - -def test_unknown(): - assert files.guess_type_from_filename('example.unknown') == None