diff --git a/.dockerignore b/.dockerignore index 877b7bb5d..29f71d1b1 100644 --- a/.dockerignore +++ b/.dockerignore @@ -5,3 +5,5 @@ build incoming docker *.zip +logs +backups diff --git a/Makefile b/Makefile index 0aec8dc5f..5f03e5f28 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ SECRETS_DIR=${BUILD_DIR}/secrets DB_PASSWORD_PATH=${SECRETS_DIR}/db_password PGPASS_PATH=${SECRETS_DIR}/.pgpass SECRET_KEY_PATH=${SECRETS_DIR}/django_secret_key -EXT_SECRETS=hcaptcha_secret github_client_secret orcid_client_secret discourse_api_key discourse_sso_secret mail_api_key datacite_api_password youtube_api_key +EXT_SECRETS=hcaptcha_secret github_client_secret orcid_client_secret discourse_api_key discourse_sso_secret mail_api_key datacite_api_password youtube_api_key github_integration_app_private_key github_integration_app_webhook_secret GENERATED_SECRETS=$(DB_PASSWORD_PATH) $(PGPASS_PATH) $(SECRET_KEY_PATH) ENVREPLACE := deploy/scripts/envreplace @@ -32,7 +32,7 @@ include .env .PHONY: build build: docker-compose.yml secrets $(DOCKER_SHARED_DIR) - docker compose build --pull -q + docker compose --progress=plain build --pull --parallel $(BORG_REPO_PATH): wget -c ${BORG_REPO_URL} -P ${BUILD_DIR} @@ -136,7 +136,7 @@ clean_deploy: clean .PHONY: test test: build - docker compose run --rm server /code/deploy/test.sh + docker compose run --rm server /code/deploy/test.sh $(TEST_ARGS) # e2e testing setup @@ -157,3 +157,7 @@ e2e: docker-compose.yml secrets $(DOCKER_SHARED_DIR) $(E2E_REPO_PATH) docker compose -f docker-compose.yml -f e2e.yml exec server bash -c "\ inv borg.restore --force && \ inv prepare" + +.PHONY: gen-secret +gen-secret: + docker compose run --rm server python -c "from django.core.management.utils import get_random_secret_key; print(get_random_secret_key())" diff --git a/base.yml b/base.yml index 616c2828f..469f87b4a 100644 --- a/base.yml +++ b/base.yml @@ -67,6 +67,8 @@ services: - django_secret_key - github_client_secret - orcid_client_secret + - github_integration_app_private_key + - github_integration_app_webhook_secret - hcaptcha_secret - mail_api_key - youtube_api_key @@ -99,6 +101,10 @@ secrets: file: ./build/secrets/django_secret_key github_client_secret: file: ./build/secrets/github_client_secret + github_integration_app_private_key: + file: ./build/secrets/github_integration_app_private_key + github_integration_app_webhook_secret: + file: ./build/secrets/github_integration_app_webhook_secret hcaptcha_secret: file: ./build/secrets/hcaptcha_secret mail_api_key: diff --git a/deploy/conf/.env.template b/deploy/conf/.env.template index 8f65f1272..deee75f5e 100644 --- a/deploy/conf/.env.template +++ b/deploy/conf/.env.template @@ -45,6 +45,12 @@ DATACITE_DRY_RUN="true" # allowed values: "true" or "false" # youtube api settings YOUTUBE_CHANNEL_ID= +# github integration app +GITHUB_INTEGRATION_APP_ID= +GITHUB_INTEGRATION_APP_NAME= +GITHUB_INTEGRATION_APP_INSTALLATION_ID= +GITHUB_MODEL_LIBRARY_ORG_NAME= + # test TEST_USER_ID=10000000 TEST_USERNAME=__test_user__ diff --git a/django/Dockerfile b/django/Dockerfile index 6412dcf1b..bdd87a2fc 100644 --- a/django/Dockerfile +++ b/django/Dockerfile @@ -10,12 +10,13 @@ ENV PATH "${VIRTUAL_ENV}/bin:$PATH" RUN rm -f /etc/apt/apt.conf.d/docker-clean; echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache RUN --mount=type=cache,target=/var/lib/apt,sharing=locked \ --mount=type=cache,target=/var/cache/apt,sharing=locked \ + DEBIAN_FRONTEND=noninteractive \ sed -i "s|archive.ubuntu.com|${UBUNTU_MIRROR}|" /etc/apt/sources.list \ - && apt-get update \ - && apt-get install -y postgresql-common --no-install-recommends \ + && apt-get -q update \ + && apt-get -q install -y postgresql-common --no-install-recommends \ && yes | /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh \ - && apt-get update \ - && apt-get install -y --no-install-recommends \ + && apt-get -q update \ + && apt-get -q install -y --no-install-recommends \ autopostgresqlbackup \ binutils \ borgbackup \ diff --git a/django/core/models.py b/django/core/models.py index 2c394d0fe..163f40b03 100644 --- a/django/core/models.py +++ b/django/core/models.py @@ -457,7 +457,6 @@ def orcid_url(self): def avatar_url(self): if self.picture: return self.picture.get_rendition("fill-150x150").url - return None @property def github_url(self): @@ -466,11 +465,16 @@ def github_url(self): """ return self.get_social_account_profile_url("github") + @property + def github_username(self): + github_account = self.get_social_account("github") + if github_account: + return github_account.extra_data.get("login") + def get_social_account_profile_url(self, provider_name): social_acct = self.get_social_account(provider_name) if social_acct: return social_acct.get_profile_url() - return None def get_social_account(self, provider_name): return self.user.socialaccount_set.filter(provider=provider_name).first() diff --git a/django/core/settings/defaults.py b/django/core/settings/defaults.py index b69678a34..d26f37af4 100644 --- a/django/core/settings/defaults.py +++ b/django/core/settings/defaults.py @@ -553,6 +553,20 @@ def set_environment(env: Environment): GITHUB_CLIENT_ID = os.getenv("GITHUB_CLIENT_ID", "") GITHUB_CLIENT_SECRET = read_secret("github_client_secret") +GITHUB_INTEGRATION_APP_ID = os.getenv("GITHUB_INTEGRATION_APP_ID", "") +GITHUB_INTEGRATION_APP_NAME = os.getenv("GITHUB_INTEGRATION_APP_NAME", "") +GITHUB_INTEGRATION_APP_PRIVATE_KEY = read_secret("github_integration_app_private_key") +GITHUB_INTEGRATION_APP_INSTALLATION_ID = int( + os.getenv("GITHUB_INTEGRATION_APP_INSTALLATION_ID") or 0 +) +GITHUB_INTEGRATION_APP_WEBHOOK_SECRET = read_secret( + "github_integration_app_webhook_secret" +) +GITHUB_MODEL_LIBRARY_ORG_NAME = os.getenv("GITHUB_MODEL_LIBRARY_ORG_NAME", "") +GITHUB_INDIVIDUAL_FILE_SIZE_LIMIT = int( + os.getenv("GITHUB_INDIVIDUAL_FILE_SIZE_LIMIT") or 100 * 1024 * 1024 +) + TEST_BASIC_AUTH_PASSWORD = os.getenv("TEST_BASIC_AUTH_PASSWORD", "test password") TEST_USER_ID = os.getenv("TEST_USER_ID", 1000000) TEST_USERNAME = os.getenv("TEST_USERNAME", "__test_user__") diff --git a/django/core/tests/base.py b/django/core/tests/base.py index 7d8852a21..721842f48 100644 --- a/django/core/tests/base.py +++ b/django/core/tests/base.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod +from pathlib import Path import logging -import os import shlex import shutil import subprocess @@ -179,7 +179,7 @@ def initialize_test_shared_folders(): settings.BACKUP_ROOT, settings.MEDIA_ROOT, ]: - os.makedirs(d, exist_ok=True) + Path(d).mkdir(parents=True, exist_ok=True) subprocess.run( shlex.split("borg init --encryption=none {}".format(settings.BORG_ROOT)), @@ -187,5 +187,13 @@ def initialize_test_shared_folders(): ) +def clear_test_shared_folder(dir=settings.REPOSITORY_ROOT): + for fs in Path(dir).iterdir(): + if fs.is_dir(): + shutil.rmtree(fs, ignore_errors=True) + elif fs.is_file(): + fs.unlink() + + def destroy_test_shared_folders(): - shutil.rmtree(settings.SHARE_DIR, ignore_errors=True) + shutil.rmtree(Path(settings.SHARE_DIR), ignore_errors=True) diff --git a/django/curator/fs.py b/django/curator/fs.py index 5d8eec0f8..6a197fda2 100644 --- a/django/curator/fs.py +++ b/django/curator/fs.py @@ -8,6 +8,8 @@ def fsck(queryset): results = OrderedDict() for release in queryset: + if release.is_imported: + continue rfsc = CodebaseReleaseFileConsistencyChecker(release) errors = rfsc.check() if errors: diff --git a/django/curator/invoke_tasks/borg.py b/django/curator/invoke_tasks/borg.py index 6a880d93e..ff09ce569 100644 --- a/django/curator/invoke_tasks/borg.py +++ b/django/curator/invoke_tasks/borg.py @@ -1,5 +1,4 @@ -import os -import pathlib +from pathlib import Path import shutil import tempfile @@ -9,13 +8,14 @@ from . import database as db from core.utils import confirm -DEFAULT_LIBRARY_BASENAME = os.path.basename(settings.LIBRARY_ROOT) -DEFAULT_MEDIA_BASENAME = os.path.basename(settings.MEDIA_ROOT) +DEFAULT_LIBRARY_BASENAME = Path(settings.LIBRARY_ROOT).name +DEFAULT_MEDIA_BASENAME = Path(settings.MEDIA_ROOT).name +DEFAULT_REPOSITORY_BASENAME = Path(settings.REPOSITORY_ROOT).name @task(aliases=["init"]) def initialize_repo(ctx): - if not os.path.exists(settings.BORG_ROOT): + if not Path(settings.BORG_ROOT).exists(): ctx.run(f"borg init --encryption=none {settings.BORG_ROOT}", echo=True) @@ -46,61 +46,85 @@ def backup(ctx): # Borg recognizes {now} as the current timestamp # http://borgbackup.readthedocs.io/en/stable/usage/help.html#borg-help-placeholders archive = "{utcnow}" - library = os.path.relpath(settings.LIBRARY_ROOT, share) - media = os.path.relpath(settings.MEDIA_ROOT, share) - database = os.path.relpath(os.path.join(settings.BACKUP_ROOT, "latest"), share) + share_path = Path(share) + library_root = Path(settings.LIBRARY_ROOT) + media_root = Path(settings.MEDIA_ROOT) + repository_root = Path(settings.REPOSITORY_ROOT) + backup_latest_root = Path(settings.BACKUP_ROOT) / "latest" + + library = library_root.relative_to(share_path) + media = media_root.relative_to(share_path) + repository = repository_root.relative_to(share_path) + database = backup_latest_root.relative_to(share_path) error_msgs = [] - for p in ( - settings.LIBRARY_ROOT, - settings.MEDIA_ROOT, - os.path.join(settings.BACKUP_ROOT, "latest"), - ): - if not os.path.exists(p): + for p in (library_root, media_root, repository_root, backup_latest_root): + if not p.exists(): error_msgs.append(f"Path {p} does not exist.") if error_msgs: raise IOError("Create archive failed. {}".format(" ".join(error_msgs))) with ctx.cd(share): ctx.run( - f'borg create --stats --compression lz4 {repo}::"{archive}" {library} {media} {database}', + f'borg create --stats --compression lz4 {repo}::"{archive}" {library} {media} {repository} {database}', echo=True, env=environment(), ) def delete_latest_uncompressed_backup( - src_library=DEFAULT_LIBRARY_BASENAME, src_media=DEFAULT_MEDIA_BASENAME + src_library=DEFAULT_LIBRARY_BASENAME, + src_media=DEFAULT_MEDIA_BASENAME, + src_repository=DEFAULT_REPOSITORY_BASENAME, ): - latest_dest_library = os.path.join(settings.PREVIOUS_SHARE_ROOT, src_library) - latest_dest_media = os.path.join(settings.PREVIOUS_SHARE_ROOT, src_media) + previous = Path(settings.PREVIOUS_SHARE_ROOT) + latest_dest_library = previous / src_library + latest_dest_media = previous / src_media + latest_dest_repository = previous / src_repository shutil.rmtree(latest_dest_library, ignore_errors=True) shutil.rmtree(latest_dest_media, ignore_errors=True) + shutil.rmtree(latest_dest_repository, ignore_errors=True) def rotate_library_and_media_files( working_directory, src_library=DEFAULT_LIBRARY_BASENAME, src_media=DEFAULT_MEDIA_BASENAME, + src_repository=DEFAULT_REPOSITORY_BASENAME, ): """ - Rotate the current library and media files + Rotate the current library, media, and repository files - Current library and media files are moved to the '.latest' folder in case of problems during the restore process. + Current library, media, and repository files are moved to the '.latest' folder in case of problems during the restore process. Files in .latest can be deleted if the restore was successful """ - print("rotating library and media files") - delete_latest_uncompressed_backup(src_library=src_library, src_media=src_media) - - os.makedirs(settings.PREVIOUS_SHARE_ROOT, exist_ok=True) - if os.path.exists(settings.LIBRARY_ROOT): - shutil.move(settings.LIBRARY_ROOT, settings.PREVIOUS_SHARE_ROOT) - if os.path.exists(settings.MEDIA_ROOT): - shutil.move(settings.MEDIA_ROOT, settings.PREVIOUS_SHARE_ROOT) + print("rotating library, media, and repository files") + delete_latest_uncompressed_backup( + src_library=src_library, src_media=src_media, src_repository=src_repository + ) - shutil.move(os.path.join(working_directory, src_library), settings.SHARE_DIR) - shutil.move(os.path.join(working_directory, src_media), settings.SHARE_DIR) + previous = Path(settings.PREVIOUS_SHARE_ROOT) + share_root = Path(settings.SHARE_DIR) + library_root = Path(settings.LIBRARY_ROOT) + media_root = Path(settings.MEDIA_ROOT) + repository_root = Path(settings.REPOSITORY_ROOT) + working_root = Path(working_directory) + + previous.mkdir(exist_ok=True) + if library_root.exists(): + shutil.move(library_root, previous) + if media_root.exists(): + shutil.move(media_root, previous) + if repository_root.exists(): + shutil.move(repository_root, previous) + + shutil.move(working_root / src_library, share_root) + shutil.move(working_root / src_media, share_root) + # repository dir may not exist in older backups + src_repository_path = working_root / src_repository + if src_repository_path.exists(): + shutil.move(src_repository_path, share_root) def environment(): @@ -115,11 +139,8 @@ def _restore_files(working_directory): def _restore_database(ctx, working_directory, target_database): - dumpfile_dir = pathlib.Path( - os.path.join( - working_directory, os.path.basename(settings.BACKUP_ROOT), "latest" - ) - ) + backup_root_name = Path(settings.BACKUP_ROOT).name + dumpfile_dir = Path(working_directory) / backup_root_name / "latest" if not dumpfile_dir.exists(): raise IOError("dumpfile_dir {} not found".format(dumpfile_dir)) dumpfile = str(list(dumpfile_dir.glob("comsesnet*"))[0]) diff --git a/django/curator/tests/test_dump_restore.py b/django/curator/tests/test_dump_restore.py index 0d232ebb9..b7b71a39b 100644 --- a/django/curator/tests/test_dump_restore.py +++ b/django/curator/tests/test_dump_restore.py @@ -19,7 +19,7 @@ from core.tests.base import EventFactory, JobFactory from library.fs import import_archive from library.models import Codebase -from library.tests.base import CodebaseFactory +from library.tests.base import CodebaseFactory, TEST_SAMPLES_DIR logger = logging.getLogger(__name__) @@ -51,7 +51,7 @@ def setUp(self): fs_api = self.release.get_fs_api() import_archive( codebase_release=self.release, - nested_code_folder_name="library/tests/archives/nestedcode", + nested_code_folder_name=TEST_SAMPLES_DIR / "archives" / "nestedcode", fs_api=fs_api, ) diff --git a/django/deploy/test.sh b/django/deploy/test.sh index 2bd47f701..770f9f7fe 100755 --- a/django/deploy/test.sh +++ b/django/deploy/test.sh @@ -8,4 +8,10 @@ initdb() { env DJANGO_SETTINGS_MODULE="core.settings.test" invoke db.init } initdb -exec env DJANGO_SETTINGS_MODULE="core.settings.test" invoke collectstatic test --tests="$@" --coverage \ No newline at end of file + +if [ "$#" -gt 0 ]; then + TEST_SELECTOR="$*" + exec env DJANGO_SETTINGS_MODULE="core.settings.test" invoke collectstatic test --tests="$TEST_SELECTOR" --coverage +fi + +exec env DJANGO_SETTINGS_MODULE="core.settings.test" invoke collectstatic test --coverage \ No newline at end of file diff --git a/django/library/fs.py b/django/library/fs.py index bbd13f6b5..a0a28715f 100644 --- a/django/library/fs.py +++ b/django/library/fs.py @@ -1,3 +1,7 @@ +from abc import ABC, abstractmethod +import json +import requests +import yaml import logging import mimetypes import os @@ -5,11 +9,14 @@ import shutil import tarfile import zipfile +import filecmp +from packaging.version import Version from enum import Enum from functools import total_ordering from pathlib import Path from tempfile import TemporaryDirectory -from typing import Optional +from typing import Callable, Optional +from git import Actor, GitCommandError, InvalidGitRepositoryError, Repo import bagit import rarfile @@ -17,6 +24,7 @@ from django.core.files.storage import FileSystemStorage from django.core.files.uploadedfile import File from django.urls import reverse +from django.utils import timezone from rest_framework.exceptions import ValidationError from core import fs @@ -37,13 +45,14 @@ class StagingDirectories(Enum): aip = 3 -class FileCategoryDirectories(Enum): +class FileCategories(Enum): code = 1 data = 2 docs = 3 media = 4 originals = 5 results = 6 + metadata = 7 @total_ordering @@ -72,25 +81,25 @@ def downgrade(self, minimum=0): ACCEPT_ALL_REGEX = re.compile(r".*") MIMETYPE_MATCHER = { - FileCategoryDirectories.code: ACCEPT_ALL_REGEX, - FileCategoryDirectories.data: ACCEPT_ALL_REGEX, - FileCategoryDirectories.docs: re.compile( + FileCategories.code: ACCEPT_ALL_REGEX, + FileCategories.data: ACCEPT_ALL_REGEX, + FileCategories.docs: re.compile( r"text/markdown|application/pdf|text/plain|text/x-rtf|application/vnd.oasis.opendocument.text" ), - FileCategoryDirectories.media: re.compile(r"image/.*|video/.*"), - FileCategoryDirectories.originals: ACCEPT_ALL_REGEX, - FileCategoryDirectories.results: ACCEPT_ALL_REGEX, + FileCategories.media: re.compile(r"image/.*|video/.*"), + FileCategories.originals: ACCEPT_ALL_REGEX, + FileCategories.results: ACCEPT_ALL_REGEX, } -def get_category(name) -> FileCategoryDirectories: +def get_category(name) -> FileCategories: category_name = Path(name).parts[0] try: - return FileCategoryDirectories[category_name] + return FileCategories[category_name] except KeyError: raise ValidationError( "Target folder name {} invalid. Must be one of {}".format( - category_name, list(d.name for d in FileCategoryDirectories) + category_name, list(d.name for d in FileCategories) ) ) @@ -222,7 +231,7 @@ def validate(self): msgs.append(self.validate_file(filename, content)) return msgs - def list(self, category: Optional[FileCategoryDirectories] = None, absolute=False): + def list(self, category: Optional[FileCategories] = None, absolute=False): path = Path(self.location) if category is not None: path = path.joinpath(category.name) @@ -265,7 +274,7 @@ def log_save(self, name, content): msgs.append(self.error(e)) return msgs - def clear_category(self, category: FileCategoryDirectories): + def clear_category(self, category: FileCategories): shutil.rmtree(os.path.join(self.location, category.name), ignore_errors=True) def clear(self): @@ -284,7 +293,7 @@ def log_delete(self, name): class CodebaseReleaseOriginalStorage(CodebaseReleaseStorage): stage = StagingDirectories.originals - def get_existing_archive_name(self, category: FileCategoryDirectories): + def get_existing_archive_name(self, category: FileCategories): for p in self.list(category): if p.is_file() and fs.is_archive(p): return str(p) @@ -340,17 +349,15 @@ def make_bag(self, metadata): class CodebaseReleaseAipStorage(CodebaseReleaseStorage): """Places files from the sip folder into aip""" + stage = StagingDirectories.aip + def import_sip(self, sip_storage: CodebaseReleaseSipStorage): shutil.copytree(sip_storage.location, self.location) -class CodebaseReleaseFsApi: +class BaseCodebaseReleaseFsApi(ABC): """ - Interface to maintain files associated with a codebase - - FIXME: This is not currently protected against concurrent file access but only the submitter can edit files - associated with a codebase release at the moment. Will need to implement file locks if/when this assumption fails to - hold + Base interface to maintain files associated with a codebase release """ def __init__( @@ -460,7 +467,7 @@ def get_stage_storage(self, stage: StagingDirectories): else: raise ValueError(f"StageDirectories values {stage} not valid") - def get_sip_list_url(self, category: FileCategoryDirectories): + def get_sip_list_url(self, category: FileCategories): return reverse( "library:codebaserelease-sip-files-list", kwargs={ @@ -470,7 +477,7 @@ def get_sip_list_url(self, category: FileCategoryDirectories): }, ) - def get_originals_list_url(self, category: FileCategoryDirectories): + def get_originals_list_url(self, category: FileCategories): return reverse( "library:codebaserelease-original-files-list", kwargs={ @@ -480,7 +487,7 @@ def get_originals_list_url(self, category: FileCategoryDirectories): }, ) - def get_absolute_url(self, category: FileCategoryDirectories, relpath: Path): + def get_absolute_url(self, category: FileCategories, relpath: Path): return reverse( "library:codebaserelease-original-files-detail", kwargs={ @@ -512,7 +519,10 @@ def initialize( mimetype_mismatch_message_level=MessageLevels.error, bagit_info=None, ): - fs_api = CodebaseReleaseFsApi( + """Initialize a new FS Api instance for a codebase release, including creating + the SIP directory and bagging the contents if it does not already exist + """ + fs_api = cls( codebase_release, system_file_presence_message_level=system_file_presence_message_level, mimetype_mismatch_message_level=mimetype_mismatch_message_level, @@ -564,9 +574,6 @@ def create_or_update_license(self, force=False): return False def build_published_archive(self, force=False): - """ - FIXME: some of this should be moved to an async processing task. - """ self.create_or_update_codemeta(force=force) self.create_or_update_citation_cff(force=force) self.create_or_update_license(force=force) @@ -607,19 +614,100 @@ def archive_size(self): def review_archive_size(self): return self.review_archivepath.stat().st_size - def clear_category(self, category: FileCategoryDirectories): - originals_storage = self.get_originals_storage() - originals_storage.clear_category(category) - sip_storage = self.get_sip_storage() - sip_storage.clear_category(category) + @abstractmethod + def list(self, stage: StagingDirectories, category: Optional[FileCategories]): + pass + + @abstractmethod + def list_sip_contents(self, path=None) -> dict: + pass + + @abstractmethod + def check_category_file_exists(self, category: FileCategories) -> bool: + """returns True if at least one file with the given category exists + in the sip storage, False otherwise + """ + pass + + def get_or_create_sip_bag(self, bagit_info=None): + sip_dir = str(self.sip_dir) + logger.info("creating bagit metadata at %s", sip_dir) + bag = fs.make_bag(sip_dir, bagit_info) + bag.save(manifests=True) + return bag + + def build_aip(self, sip_dir: Optional[str] = None): + logger.info("building aip") + if sip_dir is None: + sip_dir = str(self.sip_dir) + shutil.rmtree(str(self.aip_dir), ignore_errors=True) + shutil.copytree(sip_dir, str(self.aip_dir)) - def list( - self, stage: StagingDirectories, category: Optional[FileCategoryDirectories] + def build_archive_at_dest(self, dest): + logger.info("building archive") + self.build_aip() + if self.aip_contents_dir.exists(): + with zipfile.ZipFile(dest, "w") as archive: + for root_path, dirs, file_paths in os.walk(str(self.aip_contents_dir)): + for file_path in file_paths: + path = Path(root_path, file_path) + archive.write( + str(path), + arcname=str(path.relative_to(self.aip_contents_dir)), + ) + logger.info("building archive succeeded") + return True + else: + logger.error("building archive failed - no aip directory") + return False + + def build_archive(self, force=False): + if not self.archivepath.exists() or force: + self.build_archive_at_dest(dest=str(self.archivepath)) + + def create_or_update_metadata_files(self, force=False): + self.create_or_update_codemeta(force=force) + self.create_or_update_citation_cff(force=force) + self.create_or_update_license(force=force) + + def rebuild_metadata(self): + self.create_or_update_metadata_files(force=True) + # only rebuild the archive package if it already exists + if self.aip_dir.exists(): + self.build_archive(force=True) + + +class CodebaseReleaseFsApi(BaseCodebaseReleaseFsApi): + """ + File system API for managing a non-imported (regular, directly uploaded) codebase release. + + NOTE: This is not currently protected against concurrent file access but only the submitter can edit files + associated with a codebase release at the moment. Will need to implement file locks if/when this assumption fails to + hold + """ + + def __init__( + self, + codebase_release, + system_file_presence_message_level=MessageLevels.error, + mimetype_mismatch_message_level=MessageLevels.error, ): + if codebase_release.is_imported: + raise ValueError("CodebaseRelease must be a non-imported release") + super().__init__( + codebase_release, + system_file_presence_message_level, + mimetype_mismatch_message_level, + ) + + def list(self, stage, category): stage_storage = self.get_stage_storage(stage) return [str(p) for p in stage_storage.list(category)] def list_sip_contents(self, path=None): + """recursively build a tree representing the SIP contents. + Each node includes a label (file name), path (relative to sip contents), and category + """ if path is None: path = self.sip_contents_dir name = "archive-project-root" @@ -630,41 +718,42 @@ def list_sip_contents(self, path=None): if p.is_dir(): contents["contents"].append(self.list_sip_contents(p)) else: - contents["contents"].append({"label": p.name}) + try: + rel_parent = p.parent.relative_to(self.sip_contents_dir) + category_str = ( + str(rel_parent) + if rel_parent != Path(".") + else FileCategories.metadata.name + ) + except ValueError: + # parent is not a subdirectory of sip_contents_dir + category_str = FileCategories.metadata.name + contents["contents"].append( + { + "label": p.name, + "path": str(p.relative_to(self.sip_contents_dir)), + "category": category_str, + } + ) return contents + def check_category_file_exists(self, category): + sip_storage = self.get_sip_storage() + category_dir_exists = sip_storage.exists(category.name) + category_dir_list = list(sip_storage.list(category)) + return category_dir_exists and bool(category_dir_list) + def retrieve( self, stage: StagingDirectories, - category: FileCategoryDirectories, + category: FileCategories, relpath: Path, ): stage_storage = self.get_stage_storage(stage) relpath = Path(category.name, relpath) return stage_storage.open(str(relpath)) - def delete(self, category: FileCategoryDirectories, relpath: Path): - originals_storage = self.get_originals_storage() - sip_storage = self.get_sip_storage() - relpath = Path(category.name, relpath) - logs = MessageGroup() - if originals_storage.is_archive_directory(category): - self.clear_category(category) - else: - if not originals_storage.exists(str(relpath)): - logs.append( - create_fs_message( - f"No file at path {relpath} to delete", - StagingDirectories.originals, - MessageLevels.error, - ) - ) - return logs - logs.append(sip_storage.log_delete(str(relpath))) - logs.append(originals_storage.log_delete(str(relpath))) - return logs - - def _add_to_sip(self, name, content, category: FileCategoryDirectories): + def _add_to_sip(self, name, content, category: FileCategories): sip_storage = self.get_sip_storage() filename = self.originals_dir.joinpath(name) if fs.is_archive(name): @@ -673,20 +762,40 @@ def _add_to_sip(self, name, content, category: FileCategoryDirectories): else: return sip_storage.log_save(name=name, content=content) - def add_category(self, category: FileCategoryDirectories, src): - logger.info("adding category %s", category.name) - originals_storage = self.get_originals_storage() + def build_sip(self) -> MessageGroup: + logger.info("building sip") + originals_storage = self.get_originals_storage(self.originals_dir) + sip_storage = self.get_sip_storage() + sip_storage.clear() + msgs = self._create_msg_group() - for dirpath, dirnames, filenames in os.walk(src): - for filename in filenames: - filename = os.path.join(dirpath, filename) - name = os.path.join(category.name, str(Path(filename).relative_to(src))) - logger.debug("adding file %s", name) - with open(filename, "rb") as content: - msgs.append(originals_storage.log_save(name, content)) + for name in originals_storage.list(): + path = self.originals_dir.joinpath(name) + logger.debug("adding file: %s", path.relative_to(self.originals_dir)) + category = get_category(Path(name).parts[0]) + with File(path.open("rb")) as f: + msgs.append( + self._add_to_sip(name=str(name), content=f, category=category) + ) + + return msgs + + def rebuild(self) -> MessageGroup: + """rebuild the submission package and archive if it already exists""" + msgs = self.build_sip() + self.create_or_update_metadata_files(force=True) + # only rebuild the archive package if it already exists + if self.aip_dir.exists(): + self.build_archive(force=True) return msgs - def add(self, category: FileCategoryDirectories, content, name=None): + def clear_category(self, category: FileCategories): + originals_storage = self.get_originals_storage() + originals_storage.clear_category(category) + sip_storage = self.get_sip_storage() + sip_storage.clear_category(category) + + def add(self, category: FileCategories, content, name=None): if name is None: name = os.path.join(category.name, content.name) else: @@ -712,7 +821,7 @@ def copy_originals(self, source_release): self.identifier, ) source_fs_api = source_release.get_fs_api() - for category in FileCategoryDirectories: + for category in FileCategories: source_files = source_fs_api.list(StagingDirectories.originals, category) for relpath in source_files: with source_fs_api.retrieve( @@ -720,78 +829,606 @@ def copy_originals(self, source_release): ) as file_content: self.add(category, file_content, name=relpath) - def get_or_create_sip_bag(self, bagit_info=None): - sip_dir = str(self.sip_dir) - logger.info("creating bagit metadata at %s", sip_dir) - bag = fs.make_bag(sip_dir, bagit_info) - bag.save(manifests=True) - return bag + def delete(self, category: FileCategories, relpath: Path): + originals_storage = self.get_originals_storage() + sip_storage = self.get_sip_storage() + relpath = Path(category.name, relpath) + logs = MessageGroup() + if originals_storage.is_archive_directory(category): + self.clear_category(category) + else: + if not originals_storage.exists(str(relpath)): + logs.append( + create_fs_message( + f"No file at path {relpath} to delete", + StagingDirectories.originals, + MessageLevels.error, + ) + ) + return logs + logs.append(sip_storage.log_delete(str(relpath))) + logs.append(originals_storage.log_delete(str(relpath))) + return logs - def build_sip(self, originals_dir: Optional[str] = None): - logger.info("building sip") - if originals_dir is None: - originals_dir = self.originals_dir - originals_storage = self.get_originals_storage(originals_dir) + +class CategoryManifestManager: + def __init__(self, imported_release_sync_state): + self.imported_release_sync_state = imported_release_sync_state + + @property + def data(self) -> dict: + return self.imported_release_sync_state.category_manifest + + def build(self, file_list: list[Path]): + """generate a manifest from scratch from a list of files (normally sip.list()). + This overwrites the existing manifest + """ + manifest = {} + for name in file_list: + manifest[str(name)] = self._guess_file_category(name) + self.update(manifest) + + def _guess_file_category(self, name: Path) -> str: + """return an appropriate category name for a file based on its extension. + currently defaults to code for all files except pdfs, which can be reasonably assumed to be docs + """ + if ( + name.suffix == ".pdf" + or name.suffix == ".docx" + or name.suffix == ".doc" + or name.suffix == ".md" + ): + return FileCategories.docs.name + return FileCategories.code.name + + def update(self, manifest): + """save the manifest to the imported release package""" + self.imported_release_sync_state.category_manifest = manifest + self.imported_release_sync_state.save() + + def update_file_category(self, name, category: FileCategories): + manifest = self.data + if name not in manifest: + raise ValueError(f"file {name} not in manifest") + manifest[name] = category.name + self.update(manifest) + + def remove_file(self, name): + manifest = self.data + del manifest[name] + self.update(manifest) + + def add_file(self, name, category: FileCategories = FileCategories.code): + manifest = self.data + manifest[name] = category.name + self.update(manifest) + + def fix_from_list(self, file_list: list[Path]): + """update the manifest to match the file list. This will add any files in the file list that are not in the + manifest, and remove any files in the manifest that are not in the file list + """ + manifest = self.data + file_list_keys: set[str] = set() + for name in file_list: + key = str(name) + file_list_keys.add(key) + if key not in manifest: + manifest[key] = self._guess_file_category(name) + for key in list(manifest.keys()): + if key not in file_list_keys: + del manifest[key] + self.update(manifest) + + +class ImportedCodebaseReleaseFsApi(BaseCodebaseReleaseFsApi): + """ + File system API for managing an imported (i.e. from a GitHub release) codebase release. + + NOTE: This is not currently protected against concurrent file access but only the submitter can edit files + associated with a codebase release at the moment. Will need to implement file locks if/when this assumption fails to + hold + """ + + def __init__( + self, + codebase_release, + system_file_presence_message_level=MessageLevels.error, + mimetype_mismatch_message_level=MessageLevels.error, + ): + self.imported_release_sync_state = codebase_release.imported_release_sync_state + if not self.imported_release_sync_state: + raise ValueError("CodebaseRelease must be an imported release") + super().__init__( + codebase_release, + system_file_presence_message_level, + mimetype_mismatch_message_level, + ) + self.imported_release_sync_state = codebase_release.imported_release_sync_state + self.manifest = CategoryManifestManager(self.imported_release_sync_state) + + def list(self, stage=StagingDirectories.sip, category=None): + if category is not None: + return [ + str(relpath) + for relpath, cat in self.manifest.data.items() + if cat == category.name + ] + else: + return list(self.manifest.data.keys()) + + def list_sip_contents(self, path=None): + """recursively build a tree representing the SIP contents. + Each node includes a label (file name), path (relative to sip contents), and category + """ + if path is None: + path = self.sip_contents_dir + name = "archive-project-root" + else: + name = path.name + contents = {"label": name, "contents": []} + for p in path.iterdir(): + if p.is_dir(): + contents["contents"].append(self.list_sip_contents(p)) + else: + relpath = p.relative_to(self.sip_contents_dir) + category_str = self.manifest.data.get( + str(relpath), FileCategories.metadata.name + ) + contents["contents"].append( + { + "label": p.name, + "path": str(p.relative_to(self.sip_contents_dir)), + "category": category_str, + } + ) + return contents + + def check_category_file_exists(self, category): + return category.name in set(self.manifest.data.values()) + + def create_or_update_codemeta(self, force=False): + created = super().create_or_update_codemeta(force=force) + if created: + name = str(self.codemeta_path.relative_to(self.sip_contents_dir)) + self.manifest.add_file(name, FileCategories.metadata) + + def create_or_update_citation_cff(self, force=False): + created = super().create_or_update_citation_cff(force) + if created: + name = str(self.cff_path.relative_to(self.sip_contents_dir)) + self.manifest.add_file(name, FileCategories.metadata) + + def create_or_update_license(self, force=False): + created = super().create_or_update_license(force) + if created: + name = str(self.license_path.relative_to(self.sip_contents_dir)) + self.manifest.add_file(name, FileCategories.metadata) + + def download_archive(self, download_url: str, installation_token: str) -> Path: + """Download a release package archive from a remote URL and + places it in the originals stage directory""" + originals_storage = self.get_originals_storage() + if not os.path.exists(originals_storage.location): + os.makedirs(originals_storage.location, exist_ok=True) + originals_storage.clear() + headers = { + "Authorization": f"Bearer {installation_token}", + } + response = requests.get(download_url, headers=headers, stream=True) + response.raise_for_status() + cd = response.headers.get("content-disposition") + if cd and "filename=" in cd: + filename = re.findall("filename=(.+)", cd)[0] + else: + tag_name = self.imported_release_sync_state.tag_name + filename = f"{tag_name}.zip" + file_path = Path(originals_storage.location) / filename + with file_path.open("wb") as f: + for chunk in response.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + logger.info(f"downloaded imported release archive to {file_path}") + return file_path + + def extract_to_sip(self, archive_path: Path): + """Extract the downloaded release package archive into the SIP storage""" sip_storage = self.get_sip_storage() sip_storage.clear() + if not zipfile.is_zipfile(str(archive_path)): + raise ValueError("Archive file must be a zip archive") + extract_zip_without_top_dir(archive_path, Path(sip_storage.location)) + logger.info(f"extracted imported release archive to {sip_storage.location}") - msgs = self._create_msg_group() - for name in originals_storage.list(): - path = self.originals_dir.joinpath(name) - logger.debug("adding file: %s", path.relative_to(self.originals_dir)) - category = get_category(Path(name).parts[0]) - with File(path.open("rb")) as f: - msgs.append( - self._add_to_sip(name=str(name), content=f, category=category) + def import_release_package( + self, installation_token: str, download_url: str | None = None + ) -> tuple[dict, dict]: + """import a release archive from a remote URL (imported_release_sync_state.download_url by default) + by downloading into the originals storage and extracting into the SIP storage. + + returns a tuple of dicts representing extracted metadata from known metadata files found in the archive, + currently: (codemeta.json, CITATION.cff) + + NOTE: currently only supports zip archives + """ + if download_url is None: + download_url = self.imported_release_sync_state.download_url + archive_path = self.download_archive(download_url, installation_token) + self.extract_to_sip(archive_path) + sip_contents = list(self.get_sip_storage().list()) + self.manifest.build(sip_contents) + return self._extract_metadata_files(sip_contents) + + def _extract_metadata_files(self, sip_contents) -> tuple[dict, dict]: + """searches the extracted archive for known metadata files and returns their contents + + returns a tuple of dicts, currently: (codemeta.json, CITATION.cff) + """ + + def find_file(file_list, target: str) -> Path | None: + """ + search for a target file in the provided list of paths. + target is case-insensitive + """ + # check files in the root first + for f in file_list: + if len(f.parts) == 1 and f.name.lower() == target.lower(): + return f + for f in file_list: + if f.name.lower() == target.lower(): + return f + return None + + codemeta_path = find_file(sip_contents, "codemeta.json") + cff_path = find_file(sip_contents, "CITATION.cff") + codemeta = None + cff = None + + if codemeta_path: + try: + with self.get_sip_storage().open(str(codemeta_path), mode="r") as f: + file_content = f.read() + parsed = json.loads(file_content) + codemeta = parsed if isinstance(parsed, dict) else None + except Exception: + codemeta = None + + if cff_path: + try: + with self.get_sip_storage().open(str(cff_path), mode="r") as f: + file_content = f.read() + parsed = yaml.safe_load(file_content) + cff = parsed if isinstance(parsed, dict) else None + except Exception: + cff = None + + return codemeta, cff + + +class CodebaseGitRepositoryApi: + """ + Manage a (local) git repository mirror of a codebase + """ + + FILE_SIZE_LIMIT = settings.GITHUB_INDIVIDUAL_FILE_SIZE_LIMIT + MEGABYTE = 1024 * 1024 + FILE_SIZE_LIMIT_MB = FILE_SIZE_LIMIT / MEGABYTE + DEFAULT_BRANCH_NAME = "main" + RELEASE_BRANCH_PREFIX = "release/" + + def __init__(self, codebase): + self.codebase = codebase + self.repo_dir = Path(self.codebase.base_git_dir).absolute() + + @property + def committer(self): + return Actor("CoMSES Net", settings.EDITOR_EMAIL) + + @property + def author(self): + profile = self.codebase.submitter.member_profile + author_email = ( + f"{profile.github_username}@users.noreply.github.com" + if profile.github_username + else profile.email + ) + return Actor(profile.name, author_email) + + def get_release_branch_name(self, release): + return f"{self.RELEASE_BRANCH_PREFIX}{release.version_number}" + + @classmethod + def check_file_sizes(cls, codebase): + releases = codebase.ordered_releases_list(internal_only=True) + for release in releases: + release_fs_api = release.get_fs_api() + sip_storage = release_fs_api.get_sip_storage() + for file in sip_storage.list(absolute=True): + if file.stat().st_size > cls.FILE_SIZE_LIMIT: + file_size_mb = file.stat().st_size / cls.MEGABYTE + raise ValidationError( + f"File {file} is too large ({file_size_mb}MB), individual files must be under {cls.FILE_SIZE_LIMIT_MB}MB" + ) + + def initialize(self, should_exist=False): + """ + initialize the git repository or connect to an existing one + + :param should_exist: if True, raise an error if the repository does not exist + """ + if not self.repo_dir.exists(): + if should_exist: + raise RuntimeError(f"Repository {self.repo_dir} does not exist") + self.repo_dir.mkdir(parents=True) + try: + self.repo = Repo(self.repo_dir) + except InvalidGitRepositoryError: + if should_exist: + raise RuntimeError(f"Repository {self.repo_dir} does not exist") + self.repo = Repo.init( + self.repo_dir, initial_branch=self.DEFAULT_BRANCH_NAME + ) + except Exception as e: + logger.exception(e) + raise RuntimeError(f"Failed to initialize git repository") + + def checkout_main(self, update_main_git_ref_sync_state=False): + """checkout the default (main) branch and create/update the git ref sync state if requested""" + self.repo.git.checkout(self.DEFAULT_BRANCH_NAME) + if update_main_git_ref_sync_state: + main_state = self.codebase.get_or_create_main_git_ref_sync_state(self.DEFAULT_BRANCH_NAME) + main_state.record_build(commit_sha=self.repo.head.commit.hexsha) + + def clear_existing_files(self): + """ + clear any existing files in the working tree (tracked or untracked) besides .git + """ + for item in self.repo_dir.iterdir(): + if item.name != ".git": + if item.is_dir(): + shutil.rmtree(item) + else: + item.unlink() + self.repo.index.remove( + [str(item.relative_to(self.repo_dir))], + working_tree=True, + r=True, ) - return msgs + def add_release_files(self, release): + """ + copy over submission package files for a release to the working tree of the git repo + starting from a clean directory by removing all files except .git/ + """ + release_fs_api: CodebaseReleaseFsApi = release.get_fs_api() + sip_storage = release_fs_api.get_sip_storage() + self.clear_existing_files() + # copy over files from the sip storage and add to the index + # FIXME: consider moving this copy all operation to the CodebaseReleaseStorage class + for file in sip_storage.list(absolute=True): + rel_path = file.relative_to(sip_storage.location) + dest_path = self.repo_dir / rel_path + dest_path.parent.mkdir(parents=True, exist_ok=True) + shutil.copy(file, dest_path) + self.repo.index.add([str(rel_path)]) + + def add_readme(self, release): + """ + add a readme file to the repository root. If one already exists somewhere, move it. + Otherwise, generate one from a template + """ + release_fs_api: CodebaseReleaseFsApi = release.get_fs_api() + sip_storage = release_fs_api.get_sip_storage() + readme_pattern = re.compile( + r"(?i)^readme(?:\.(?:markdown|mdown|mkdn|md|textile|rdoc|org|creole|mediawiki|wiki|rst|asciidoc|adoc|asc|pod|txt))?$" + ) + for file in sip_storage.list(absolute=True): + # check for an existing readme and duplicate it to the repo root + # for github to recognize. Otherwise, we'll generate one later + if readme_pattern.match(file.name): + shutil.copy(file, self.repo_dir / file.name) + self.repo.index.add([file.name]) + return + readme_content = f"# {self.codebase.title}\n\n{self.codebase.description.raw}\n" + self._add_single_file("README.md", readme_content) + + def _add_single_file(self, filename, content: str, overwrite=False): + dest_path = self.repo_dir / filename + if not dest_path.exists() or overwrite: + with dest_path.open("w") as f: + f.write(content) + self.repo.index.add([filename]) + + def commit_release(self, release): + """ + commit the the release and tag it, should only be called after adding all necessary files + """ + # make sure the commit goes to main, then create the release branch later + # unless this is the first commit + if self.DEFAULT_BRANCH_NAME in self.repo.heads: + self.checkout_main() + commit_msg = ( + f"Release {release.version_number}\n\n{release.release_notes.raw}\n" + ) + for rc in release.coauthor_release_contributors: + contributor = rc.contributor + email = "" + # try to use the co-author's github account email, otherwise just leave it blank + if contributor.user and contributor.user.member_profile.github_username: + email = f"{contributor.user.member_profile.github_username}@users.noreply.github.com" + commit_msg += f"\nCo-authored-by: {contributor.name} <{email}>" + commit = self.repo.index.commit( + message=commit_msg, + committer=self.committer, + author=self.author, + author_date=release.last_published_on, + ) + tag_name = release.version_number + self.repo.create_tag(tag_name) + return commit, tag_name - def build_aip(self, sip_dir: Optional[str] = None): - logger.info("building aip") - if sip_dir is None: - sip_dir = str(self.sip_dir) - shutil.rmtree(str(self.aip_dir), ignore_errors=True) - shutil.copytree(sip_dir, str(self.aip_dir)) + def create_release_branch(self, release, commit): + """ + create a new branch for the release + """ + release_branch_name = self.get_release_branch_name(release) + self.repo.create_head(release_branch_name, commit) + return release_branch_name - def build_archive_at_dest(self, dest): - logger.info("building archive") - self.build_aip() - if self.aip_contents_dir.exists(): - with zipfile.ZipFile(dest, "w") as archive: - for root_path, dirs, file_paths in os.walk(str(self.aip_contents_dir)): - for file_path in file_paths: - path = Path(root_path, file_path) - archive.write( - str(path), - arcname=str(path.relative_to(self.aip_contents_dir)), - ) - logger.info("building archive succeeded") - return True - else: - logger.error("building archive failed - no aip directory") - return False + def build_release_refs(self, release): + """ + commit the release, create a branch/tag, and create the git ref sync state + """ + self.add_release_files(release) + self.add_readme(release) + commit, tag_name = self.commit_release(release) + branch_name = self.create_release_branch(release, commit) + # create git ref sync state for the release + release.get_or_create_git_ref_sync_state().record_build( + commit.hexsha, + tag_name=tag_name, + branch_name=branch_name, + ) - def build_archive(self, force=False): - if not self.archivepath.exists() or force: - self.build_archive_at_dest(dest=str(self.archivepath)) + def update_release_branch(self, release) -> Repo | None: + """ + update a release branch with new metadata, merging back into main (fast-forward) + if it is the latest release + + this ONLY updates metadata files and does not add + changes to the code, docs, etc. as it is assumed that any synced releases are published + and frozen + + returns None if no changes were made, otherwise returns the updated repo + """ + self.initialize(should_exist=True) + release_branch_name = self.get_release_branch_name(release) + # determine whether this is the latest release (i.e. points to the + # same thing as main) and should merge back into main + release_branch = self.repo.heads[release_branch_name] + main_branch = self.repo.heads[self.DEFAULT_BRANCH_NAME] + merge_into_main = (main_branch.commit == release_branch.commit) and ( + main_branch.commit == self.repo.head.commit + ) + + self.repo.git.checkout(release_branch_name) + self.add_release_files(release) + self.add_readme(release) + + # check for changes before committing + if not self.repo.is_dirty(): + self.checkout_main() + return None + + commit_msg = f"Update metadata for release {release.version_number}" + commit = self.repo.index.commit( + message=commit_msg, + committer=self.committer, + author=self.author, + author_date=timezone.now(), + ) + # update git ref sync state for this release to reflect new commit + release.get_or_create_git_ref_sync_state().record_build(commit.hexsha) + if merge_into_main: + self.checkout_main() + try: + self.repo.git.merge("--ff-only", release_branch_name) + self.checkout_main(update_main_git_ref_sync_state=True) + except Exception as e: + logger.error( + f"Unexpected divergence when trying to merge {release_branch_name} into {self.DEFAULT_BRANCH_NAME}: {e}" + ) + self.checkout_main() - def rebuild(self, metadata_only=False) -> MessageGroup: - """rebuild the submission package and archive if it already exists + return Repo(self.repo_dir) - if metadata_only is True, only the metadata files are rebuilt, not the entire package + def append_releases(self, releases=None) -> Repo: + """ + add new releases to the git repository. + releases must be newer/higher than the latest mirrored release so that they can be added on top + + this should only be used if no releases have been removed or otherwise modified since these require + rewriting history and this method strictly appends new releases + + :param releases: list of releases to append, if None, all unmirrored releases will be appended + """ + self.check_file_sizes(self.codebase) + if not releases: + # select internal public releases without a build state + releases = self.codebase.releases_without_git_ref_sync_state() + if not releases: + # nothing to do, return the existing repo + return Repo(self.repo_dir) + self.initialize(should_exist=True) + # make sure the releases are higher than the latest mirrored release + latest_built_state = self.codebase.latest_release_git_ref_sync_state() + if latest_built_state is not None: + if not all( + Version(release.version_number) + > Version(latest_built_state.release.version_number) + for release in releases + ): + raise ValueError( + "Releases must be higher than the latest mirrored release to append" + ) + # make sure the releases are ordered by version number + releases = sorted(releases, key=lambda r: Version(r.version_number)) + # append releases to the git repo by adding files, committing, and creating a branch + for release in releases: + self.build_release_refs(release) + self.checkout_main(update_main_git_ref_sync_state=True) + return Repo(self.repo_dir) + + def build(self) -> Repo: """ - if not metadata_only: - msgs = self.build_sip() + builds or rebuilds the git repository from codebase releases + + this will create an entirely new repository and should only be used if we are creating the + mirror for the first time or need to rebuild the entire history + """ + self.check_file_sizes(self.codebase) + releases = self.codebase.ordered_releases_list(internal_only=True) + if not releases: + raise ValidationError("Must have at least one public release to build from") + if self.repo_dir.exists(): + shutil.rmtree(self.repo_dir) + self.initialize() + for release in releases: + self.build_release_refs(release) + self.checkout_main(update_main_git_ref_sync_state=True) + return Repo(self.repo_dir) + + def update_or_build(self) -> Repo: + # if the repo doesn't exist or is empty, build/rebuild + if not self.repo_dir.exists() or not self.repo_dir.joinpath(".git").exists(): + return self.build() + # if no successful build states exist, rebuild + if not self.codebase.latest_release_git_ref_sync_state(): + return self.build() + return self.append_releases() + + def dirs_equal(self, dir1: Path, dir2: Path, ignore=[".git"]): + """ + check if two directories are equal by recursively comparing their contents + excluding the files in the ignore list (default is just .git) + + this will likely go unused in favor of a more efficient method for checking if a + release mirror (commit) is up to date + """ + dir1 = Path(dir1) + dir2 = Path(dir2) + comparison = filecmp.dircmp(dir1, dir2, ignore=ignore) + if ( + comparison.left_only + or comparison.right_only + or comparison.diff_files + or comparison.funny_files + ): + return False else: - msgs = self._create_msg_group() - self.create_or_update_codemeta(force=True) - self.create_or_update_citation_cff(force=True) - self.create_or_update_license(force=True) - # only rebuild the archive package if it already exists - if self.aip_dir.exists(): - self.build_archive(force=True) - return msgs + for subdir in comparison.common_dirs: + if not self.dirs_equal(dir1 / subdir, dir2 / subdir): + return False + return True class ArchiveExtractor: @@ -825,7 +1462,7 @@ def find_root_directory(self, basedir): if len(dirnames) != 1 or len(filenames) != 0: return dirpath - def process(self, category: FileCategoryDirectories, filename: str): + def process(self, category: FileCategories, filename: str): msgs = MessageGroup() try: with TemporaryDirectory() as d: @@ -872,10 +1509,28 @@ def import_archive(codebase_release, nested_code_folder_name, fs_api=None): """currently only used for tests""" if fs_api is None: fs_api = codebase_release.get_fs_api() + nested_code_folder_name = str(nested_code_folder_name) archive_name = f"{nested_code_folder_name}.zip" shutil.make_archive(nested_code_folder_name, "zip", nested_code_folder_name) with open(archive_name, "rb") as f: - msgs = fs_api.add( - FileCategoryDirectories.code, content=f, name="nestedcode.zip" - ) + msgs = fs_api.add(FileCategories.code, content=f, name="nestedcode.zip") return msgs + + +def extract_zip_without_top_dir(zip_path: Path, extract_to: Path): + """extract a zip archive to a directory, removing the top-level directory""" + with zipfile.ZipFile(zip_path, "r") as z: + all_names = [m.filename for m in z.infolist()] + top_level = os.path.commonprefix(all_names).rstrip("/") + # remove the top-level dir from each path and extract + for member in z.infolist(): + relative_path = os.path.relpath(member.filename, top_level) + if relative_path == ".": # skip top-level dir + continue + target_path = extract_to / relative_path + if member.is_dir(): + target_path.mkdir(parents=True, exist_ok=True) + else: + target_path.parent.mkdir(parents=True, exist_ok=True) + with target_path.open("wb") as f: + f.write(z.read(member)) diff --git a/django/library/github_integration.py b/django/library/github_integration.py new file mode 100644 index 000000000..77e7b12b3 --- /dev/null +++ b/django/library/github_integration.py @@ -0,0 +1,725 @@ +import logging +import re +import uuid +from datetime import datetime, timezone as datetime_timezone +from github.GithubException import GithubException, UnknownObjectException +from github.Repository import Repository as GithubRepo +from git import PushInfo, Repo as GitRepo +from django.conf import settings +from django.core.cache import cache +from django.db import transaction +from github import GithubIntegration, Auth, Github +from django.urls import reverse +from django.utils import timezone +from django.utils.text import slugify + +from .metadata import ReleaseMetadataConverter +from django.forms.models import model_to_dict +from .models import ( + Codebase, + CodebaseGitRemote, + CodebaseRelease, + Contributor, + ProgrammingLanguage, + ReleaseLanguage, + ImportedReleaseSyncState, + License, + GithubIntegrationAppInstallation, +) +from taggit.models import Tag +from .serializers import ImportedReleaseSyncStateSerializer +from .fs import CodebaseGitRepositoryApi + +logger = logging.getLogger(__name__) + +INSTALLATION_ACCESS_TOKEN_REDIS_KEY = "github_installation_access_token" +UTC = datetime_timezone.utc + + +def get_github_installation_status(user): + """ + Get GitHub installation status for a user. + Returns dict with github_account, connect_url, and installation_url. + """ + installation_url = None + github_account = None + if user.is_authenticated: + social_account = user.member_profile.get_social_account("github") + if social_account: + github_account = { + "id": social_account.uid, + "username": social_account.extra_data.get("login"), + "profile_url": social_account.get_profile_url(), + } + + if github_account: + installation_url = f"https://github.com/apps/{slugify(settings.GITHUB_INTEGRATION_APP_NAME)}/installations/new/permissions?target_id={github_account['id']}" + installation = getattr(user, "github_integration_app_installation", None) + if installation: + github_account["installation_id"] = installation.installation_id + + return { + "github_account": github_account, + "connect_url": reverse("socialaccount_connections"), + "installation_url": installation_url, + } + + +class GitHubRepoValidator: + + def __init__(self, repo_name: str): + self.repo_name = repo_name + + def validate_format(self): + """validate repository name format to match GitHub rules""" + if not re.fullmatch(r"[A-Za-z0-9_.-]+", self.repo_name): + raise ValueError( + "The repository name can only contain ASCII letters, digits, and the characters ., -, and _" + ) + if not (1 <= len(self.repo_name) <= 100): + raise ValueError("Repository name is too long (maximum is 100 characters)") + if self.repo_name.endswith(".git"): + raise ValueError("Repository name cannot end with '.git'") + if "github" in self.repo_name: + raise ValueError("Repository name cannot contain 'github'") + + def get_url_for_connectable_user_repo( + self, installation: GithubIntegrationAppInstallation, is_preexisting: bool + ) -> str: + """validate that a repository exists, is public, and the app has been granted access to it. + If the repository is not pre-existing, it must be empty. + + returns the HTML URL of the repository if it is valid, otherwise raises an error + """ + token = GitHubApi.get_user_installation_access_token(installation) + if not token: + raise ValueError("Unable to acquire user installation token") + full_name = f"{installation.github_login}/{self.repo_name}" + github_repo = GitHubApi.get_existing_repo(token, full_name) + if github_repo.private: + raise ValueError( + f"Repository at https://github.com/{full_name} is private. Only public repositories can be synced." + ) + self._check_installation_access(installation) + if not is_preexisting: + try: + # this should raise a 404 if the repository is empty + github_repo.get_contents("") + raise ValueError( + f"Repository at https://github.com/{full_name} is not empty" + ) + except GithubException as e: + if e.status == 404: + return github_repo.html_url + raise + return github_repo.html_url + + def _check_installation_access( + self, installation: GithubIntegrationAppInstallation + ) -> None: + """check that the GitHub app installation has access to the repository""" + auth = Auth.AppAuth( + settings.GITHUB_INTEGRATION_APP_ID, + settings.GITHUB_INTEGRATION_APP_PRIVATE_KEY, + ) + integration = GithubIntegration(auth=auth) + try: + # try to get the installation for this specific repository + # if the installation has access, this will succeed + integration.get_repo_installation(installation.github_login, self.repo_name) + except GithubException as e: + raise ValueError( + f"The CoMSES Integration GitHub app does not have access to the repository at https://github.com/{installation.github_login}/{self.repo_name}. " + f"Use the 'Manage permissions' link above to grant access to this repository (or all repositories)." + ) + + +class GitHubApi: + """Functionality for interacting with a remote Github repository + and Github API + """ + + def __init__( + self, + codebase: Codebase, + remote: CodebaseGitRemote, + local_repo: GitRepo, + ): + self.codebase = codebase + self.remote = remote + self.local_repo = local_repo + self._github_repo = None + + @property + def repo_owner(self): + return self.remote.owner + + @property + def repo_name(self): + return self.remote.repo_name + + @property + def is_user_repo(self): + return self.remote.is_user_repo + + @property + def github_repo(self) -> GithubRepo: + if not self._github_repo: + full_name = f"{self.repo_owner}/{self.repo_name}" + self._github_repo = self.get_existing_repo( + self.installation_access_token, + full_name, + ) + return self._github_repo + + @property + def installation_access_token(self): + token = self.get_installation_access_token_for_remote(self.remote) + if not token: + raise ValueError("Unable to acquire installation token") + return token + + @staticmethod + def get_installation_access_token_for_remote( + remote: CodebaseGitRemote, + ) -> str | None: + """Return an installation access token appropriate for the given remote.""" + if remote.is_user_repo: + return GitHubApi.get_user_installation_access_token( + getattr(remote, "installation", None) + ) + return GitHubApi.get_org_installation_access_token() + + @staticmethod + def get_release_raw_for_remote( + remote: CodebaseGitRemote, github_release_id: str | int + ) -> dict: + """Fetch a GitHub release raw dict for owner/repo of the remote.""" + token = GitHubApi.get_installation_access_token_for_remote(remote) + if not token: + raise ValueError("Unable to acquire installation token") + gh = Github(token) + full_name = f"{remote.owner}/{remote.repo_name}" + repo = gh.get_repo(full_name) + gh_release = repo.get_release(int(github_release_id)) + return getattr(gh_release, "raw_data", {}) or {} + + @staticmethod + def get_repo_raw_for_remote(remote: CodebaseGitRemote) -> dict: + """Fetch a GitHub repository raw dict for owner/repo of the remote.""" + token = GitHubApi.get_installation_access_token_for_remote(remote) + if not token: + raise ValueError("Unable to acquire installation token") + gh = Github(token) + full_name = f"{remote.owner}/{remote.repo_name}" + repo = gh.get_repo(full_name) + return getattr(repo, "raw_data", {}) or {} + + @staticmethod + def get_user_installation_access_token( + installation: GithubIntegrationAppInstallation | None, + ) -> str | None: + if not installation: + return None + auth = Auth.AppAuth( + settings.GITHUB_INTEGRATION_APP_ID, + settings.GITHUB_INTEGRATION_APP_PRIVATE_KEY, + ) + integration = GithubIntegration(auth=auth) + installation_auth = integration.get_access_token(installation.installation_id) + return installation_auth.token + + @classmethod + def get_org_installation_access_token(cls) -> str: + cached_token = cache.get(INSTALLATION_ACCESS_TOKEN_REDIS_KEY) + if cached_token: + return cached_token + return cls.refresh_org_installation_access_token() + + @staticmethod + def refresh_org_installation_access_token() -> str: + """retrieve a new installation access token for the Github app installed + on the central CoMSES model library organization account and cache it for future use + """ + auth = Auth.AppAuth( + settings.GITHUB_INTEGRATION_APP_ID, + settings.GITHUB_INTEGRATION_APP_PRIVATE_KEY, + ) + integration = GithubIntegration(auth=auth) + installation_auth = integration.get_access_token( + settings.GITHUB_INTEGRATION_APP_INSTALLATION_ID + ) + token = installation_auth.token + seconds_until_expiration = ( + installation_auth.expires_at - timezone.now() + ).total_seconds() + # cache the token for 1 minute less than the expiration time + cache.set( + INSTALLATION_ACCESS_TOKEN_REDIS_KEY, + token, + seconds_until_expiration - 60, + ) + return token + + def get_or_create_repo(self) -> GithubRepo: + """get or create the Github repository for a user or organization""" + try: + return self.github_repo + except ValueError: + if self.is_user_repo: + raise ValueError("User-owned repositories must be created beforehand") + else: + self._github_repo = self._create_org_repo() + return self._github_repo + + @staticmethod + def get_existing_repo(access_token: str, full_name: str) -> GithubRepo: + """attempt to get an existing repository for the authenticated user or organization""" + github = Github(access_token) + try: + return github.get_repo(full_name) + except UnknownObjectException as exc: + raise ValueError( + f"Github repository https://github.com/{full_name} does not exist or is private" + ) from exc + + def _create_org_repo(self): + """create a new repository in the CoMSES model library organization + + this function requires the `repo` scope for the installation access token + """ + token = self.installation_access_token + github = Github(token) + org = github.get_organization(settings.GITHUB_MODEL_LIBRARY_ORG_NAME) + repo = org.create_repo( + name=self.repo_name, + description=f"Mirror of {self.codebase.permanent_url}", + ) + return repo + + def push_release( + self, + local_repo: GitRepo, + release: CodebaseRelease, + branch_name: str | None = None, + tag_name: str | None = None, + ) -> tuple[str, str]: + """push only a single release branch and its tag to the remote. + + returns (pushed_commit_sha, summary_str) + """ + token = self.installation_access_token + push_url = f"https://x-access-token:{token}@github.com/{self.github_repo.full_name}.git" + if "origin" not in local_repo.remotes: + local_repo.create_remote("origin", push_url) + else: + local_repo.remotes["origin"].set_url(push_url) + remote = local_repo.remote(name="origin") + + # determine refs to push + branch_name = branch_name or ( + f"{CodebaseGitRepositoryApi.RELEASE_BRANCH_PREFIX}{release.version_number}" + ) + tag_name = tag_name or release.version_number + repo = local_repo + try: + release_branch = repo.heads[branch_name] + except Exception: + raise ValueError(f"missing local branch for release: {branch_name}") + try: + tag_ref = next((t for t in repo.tags if t.name == tag_name), None) + except Exception: + tag_ref = None + + # push refs + summaries: list[str] = [] + success_mask = PushInfo.NEW_HEAD | PushInfo.FAST_FORWARD | PushInfo.UP_TO_DATE + + def _summarize(push_results, label_prefix: str): + for info in push_results: + if info: + if info.flags & success_mask: + summaries.append(f"{label_prefix}: successfully pushed") + else: + summaries.append(f"{label_prefix}: did not push") + + # push release branch + _summarize(remote.push(branch_name), f"branch ({branch_name})") + # push tag (exactly one) but skip logging until gitpython flag behavior is clearer + if tag_ref: + refspec = f"refs/tags/{tag_name}:refs/tags/{tag_name}" + remote.push(refspec) + else: + summaries.append(f"tag ({tag_name}): not found locally") + timestamp = f"[{timezone.now().isoformat()}]:\n" + if not summaries: + summaries.append("no refs pushed") + return (release_branch.commit.hexsha, timestamp + "\n".join(summaries)) + + def push_main(self, local_repo: GitRepo) -> tuple[str, str]: + """push the main branch to the remote if it exists locally""" + token = self.installation_access_token + push_url = f"https://x-access-token:{token}@github.com/{self.github_repo.full_name}.git" + if "origin" not in local_repo.remotes: + local_repo.create_remote("origin", push_url) + else: + local_repo.remotes["origin"].set_url(push_url) + remote = local_repo.remote(name="origin") + repo = local_repo + try: + main_branch = repo.heads[CodebaseGitRepositoryApi.DEFAULT_BRANCH_NAME] + except (IndexError, AttributeError): + main_branch = None + if main_branch is None: + return ("", f"[{timezone.now().isoformat()}]: main not found locally") + success_mask = PushInfo.NEW_HEAD | PushInfo.FAST_FORWARD | PushInfo.UP_TO_DATE + summaries: list[str] = [] + for info in remote.push(CodebaseGitRepositoryApi.DEFAULT_BRANCH_NAME): + if info: + if info.flags & success_mask: + summaries.append("main: successfully pushed") + else: + summaries.append("main: did not push") + if not summaries: + summaries.append("main: no refs pushed") + commit_sha = main_branch.commit.hexsha + return (commit_sha, f"[{timezone.now().isoformat()}]:\n" + "\n".join(summaries)) + + def create_release_for_tag(self, local_repo: GitRepo, tag_name: str): + """create a GitHub release for a single tag if it does not already exist""" + try: + self.github_repo.get_release(tag_name) + return + except UnknownObjectException: + pass + + try: + tag = next((t for t in local_repo.tags if t.name == tag_name), None) + message = tag.commit.message if tag else "" + except (AttributeError, ValueError, TypeError): + message = "" + + self.github_repo.create_git_release( + tag_name, + name=tag_name, + message=message or "", + draft=False, + prerelease=False, + ) + + +def _coerce_release_datetime(value) -> datetime: + default_dt = datetime.min.replace(tzinfo=UTC) + if not value: + return default_dt + if isinstance(value, datetime): + if timezone.is_aware(value): + return value + return value.replace(tzinfo=UTC) + if isinstance(value, str): + try: + parsed = datetime.fromisoformat(value.replace("Z", "+00:00")) + except ValueError: + return default_dt + if timezone.is_aware(parsed): + return parsed + return parsed.replace(tzinfo=UTC) + return default_dt + + +def _serialize_github_release_listing_item(release) -> tuple[dict, dict]: + """Serialize a PyGithub release object into the listing payload shape.""" + raw = getattr(release, "raw_data", {}) or {} + release_id = str(raw.get("id") or getattr(release, "id", "")) + tag = raw.get("tag_name") or getattr(release, "tag_name", "") or "" + name = ( + raw.get("name") + or getattr(release, "title", None) + or getattr(release, "name", "") + or "" + ) + version = extract_semver(tag or name or "") + data = { + "id": release_id, + "name": name, + "tag_name": tag, + "html_url": raw.get("html_url") or getattr(release, "html_url", ""), + "zipball_url": raw.get("zipball_url") or getattr(release, "zipball_url", ""), + "draft": bool(raw.get("draft", getattr(release, "draft", False))), + "prerelease": bool( + raw.get("prerelease", getattr(release, "prerelease", False)) + ), + "created_at": raw.get("created_at") or getattr(release, "created_at", None), + "published_at": raw.get("published_at") + or getattr(release, "published_at", None), + "has_semantic_versioning": bool(version), + "version": version or "", + } + return data, raw + + +def list_github_releases_for_remote(remote: CodebaseGitRemote) -> list[dict]: + """list releases from the connected GitHub repository for the given remote + + returns a list of minimal release dicts with keys: id, name, tag_name, html_url, + zipball_url, draft, prerelease, created_at, published_at + + includes a `created_by_integration` flag when the release was created (pushed) by the integration app + """ + token = GitHubApi.get_installation_access_token_for_remote(remote) + if not token: + return [] + + full_name = f"{remote.owner}/{remote.repo_name}" + gh = Github(token) + repo = gh.get_repo(full_name) + releases = repo.get_releases() + + results: list[dict] = [] + for r in releases: + data, raw = _serialize_github_release_listing_item(r) + release_id = data["id"] + # annotate whether this release has already been imported for this remote + imported_state = ( + ImportedReleaseSyncState.objects.filter( + remote=remote, github_release_id=release_id + ) + .order_by("-last_modified") + .first() + ) + # create or update ImportedReleaseSyncState jobs for user-created releases + # only update pending (not started) jobs + # this is the single point at which we create ImportedReleaseSyncStates + if _is_release_created_by_integration(raw, remote): + data["created_by_integration"] = True + else: + try: + if imported_state is None or ( + imported_state.status == ImportedReleaseSyncState.Status.PENDING + ): + imported_state = ImportedReleaseSyncState.for_github_release( + remote, raw + ) + except Exception as e: + logger.warning( + "failed to upsert imported sync state for %s: %s", release_id, e + ) + if imported_state: + data["imported_sync_state"] = ImportedReleaseSyncStateSerializer( + imported_state + ).data + + results.append(data) + + # order by published_at + results.sort( + key=lambda d: _coerce_release_datetime(d.get("published_at")), + reverse=True, + ) + return results + + +def _is_release_created_by_integration( + gh_release_raw: dict, remote: CodebaseGitRemote +) -> bool: + """skip releases created by the integration app""" + author = gh_release_raw.get("author") or {} + if not isinstance(author, dict): + return False + author_login = str(author.get("login", "")).lower() + return slugify(settings.GITHUB_INTEGRATION_APP_NAME) in author_login + + +def extract_semver(value) -> str | None: + """extract semantic version (X.Y.Z) from a string, tolerant to a leading 'v' + + keep this shared between importer and listing so behavior is consistent. + """ + if not isinstance(value, str): + return None + if len(value) > 1024: # prevent expensive/malicious inputs + return None + match = re.search(r"v?(\d+\.\d+\.\d+)", value) + return match.group(1) if match else None + + +class GitHubReleaseImporter: + def __init__(self, remote: CodebaseGitRemote, github_release_id: str | int): + """Initialize the importer with the remote and GitHub release id. + + The GitHub release metadata must already be cached on an ImportedReleaseSyncState + created beforehand + """ + self.remote = remote + self.codebase = remote.codebase + self.github_release_id = str(github_release_id) + # load cached release metadata from sync state created earlier + self.sync_state = ImportedReleaseSyncState.objects.filter( + remote=self.remote, github_release_id=self.github_release_id + ).first() + if not self.sync_state: + raise ValueError( + "Missing ImportedReleaseSyncState: sync state must be created beforehand" + ) + self._reimporting = False + + if not self.sync_state.download_url: + raise ValueError("No zipball found in the github release") + + @property + def installation_token(self): + installation = self.codebase.submitter.github_integration_app_installation + return GitHubApi.get_user_installation_access_token(installation) + + def import_or_reimport(self, custom_version: str | None = None) -> bool: + + try: + # find any existing release tied to this GitHub release + existing_release = self.codebase.releases.filter( + codebase=self.codebase, + imported_release_sync_state__github_release_id=self.github_release_id, + ).first() + if existing_release: + if existing_release.status in [ + CodebaseRelease.Status.UNPUBLISHED, + CodebaseRelease.Status.UNDER_REVIEW, + CodebaseRelease.Status.DRAFT, + ]: + # reimport if editable + self._reimporting = True + return self.reimport_release(existing_release) + else: + # error if not editable + raise ValueError("Published releases cannot be reimported") + else: + # otherwise import as a brand new release + return self.import_new_release(custom_version=custom_version) + except Exception as e: + logger.exception( + f"Error importing GitHub release with id {self.github_release_id}): {e}" + ) + return self.log_failure("An unexpected error occurred") + + def import_new_release(self, custom_version: str | None = None) -> bool: + # make sure the release doesn't already exist as imported release + if self.codebase.releases.filter( + imported_release_sync_state__github_release_id=self.github_release_id + ).exists(): + return self.log_failure("Release already exists") + + # determine version number, make sure it doesn't already exist + version_number = self.extract_semver(self.sync_state.tag_name or "") + if not version_number: + version_number = self.extract_semver(self.sync_state.display_name or "") + if not version_number and custom_version: + version_number = custom_version + if not version_number: + return self.log_failure( + "Missing a semantic version number (X.X.X) in the release tag or name" + ) + if self.codebase.releases.filter(version_number=version_number).exists(): + return self.log_failure( + f"Release with version {version_number} already exists" + ) + + # create a new imported codebase release and link to existing sync state + with transaction.atomic(): + release = CodebaseRelease.objects.create( + codebase=self.codebase, + submitter=self.codebase.submitter, + status=CodebaseRelease.Status.UNPUBLISHED, + share_uuid=uuid.uuid4(), + version_number=version_number, + imported_release_sync_state=self.sync_state, + ) + # add submitter as a release contributor automatically + contributor, created = Contributor.from_user(self.codebase.submitter) + release.add_contributor(contributor) + + return self._import_package_and_metadata(release) + + def reimport_release(self, release) -> bool: + # refresh cached metadata + gh_release_raw = GitHubApi.get_release_raw_for_remote( + self.remote, self.github_release_id + ) + # the same download url indicates nothing actually changed, skip import + if self.sync_state.download_url == gh_release_raw.get("zipball_url"): + return self.log_success("Attempted reimport, no changes detected") + self.sync_state = ImportedReleaseSyncState.for_github_release( + self.remote, gh_release_raw + ) + return self._import_package_and_metadata(release) + + def _resolve_tags(self, tag_names: list[str]) -> list: + """ + Resolve a list of tag names to a list of Tag objects or strings + if the tag does not exist + """ + resolved_tags = [] + for tag_name in tag_names: + tag = Tag.objects.filter(name__iexact=tag_name).first() + if tag: + resolved_tags.append(tag) + else: + resolved_tags.append(tag_name) + return resolved_tags + + def _create_release_languages( + self, release: CodebaseRelease, language_names: list[str] + ) -> None: + # assign programming languages to the release + # since these come from github, we assume they are real languages and + # always create them if they don't already exist + for name in language_names: + if not isinstance(name, str): + continue + normalized_name = name.strip() + if not normalized_name: + continue + programming_language, _created = ( + ProgrammingLanguage.objects.get_or_create_by_name(normalized_name) + ) + ReleaseLanguage.objects.get_or_create( + release=release, programming_language=programming_language + ) + + def _import_package_and_metadata(self, release) -> bool: + # import the release package + fs_api = release.get_fs_api() + codemeta, cff = fs_api.import_release_package(self.installation_token) + + # extract metadata from the release package and save it to the release + gh_repo_raw = GitHubApi.get_repo_raw_for_remote(self.remote) + release_fields = ReleaseMetadataConverter( + codemeta, cff, gh_repo_raw, self.sync_state.extra_data or {} + ).convert() + license_spdx_id = release_fields.pop("license_spdx_id", None) + platforms = release_fields.pop("platforms", []) + programming_languages = release_fields.pop("programming_languages", []) + for key, value in release_fields.items(): + setattr(release, key, value) + license = License.objects.filter(name=license_spdx_id).first() + if license: + release.license = license + if platforms: + release.platform_tags.add(*self._resolve_tags(platforms)) + if programming_languages: + self._create_release_languages(release, programming_languages) + release.save() + + return self.log_success() + + def extract_semver(self, value) -> str | None: + return extract_semver(value) + + def log_failure(self, message: str): + self.sync_state.log_failure(message) + return False + + def log_success(self, message: str | None = None): + display = self.sync_state.display_name or self.sync_state.tag_name + if not message: + message = f"Successfully {'re-' if self._reimporting else ''}imported release {display}" + self.sync_state.log_success(message) + return True diff --git a/django/library/jinja2/library/codebases/edit.jinja b/django/library/jinja2/library/codebases/edit.jinja index 305c5079c..6d19df366 100644 --- a/django/library/jinja2/library/codebases/edit.jinja +++ b/django/library/jinja2/library/codebases/edit.jinja @@ -12,7 +12,12 @@ Let's walk through the steps needed to generate a citable software archive that broadly follows the FAIR Principles for Research Software.

-
+{% with github_config=settings("library.GitHubIntegrationConfiguration", use_default_site=True) %} +
+
+{% endwith %} {% endblock %} {% block js %} diff --git a/django/library/jinja2/library/codebases/git.jinja b/django/library/jinja2/library/codebases/git.jinja new file mode 100644 index 000000000..aa15bd41d --- /dev/null +++ b/django/library/jinja2/library/codebases/git.jinja @@ -0,0 +1,38 @@ +{% extends "base.jinja" %} +{% from "common.jinja" import breadcrumb %} + +{% set github_config=settings("library.GitHubIntegrationConfiguration", use_default_site=True) %} + +{% block title %}GitHub Integration{% endblock %} + +{% block introduction %} +

GitHub Integration{% if github_config.is_beta %} Beta{% endif + %}

+{% endblock %} + +{% block content %} +{{ breadcrumb([ +{'url': url('library:codebase-list'), 'text': 'Computational Model Library' }, +{'url': url('library:codebase-detail', codebase.identifier), 'text': codebase.title|truncate(68) }, +{'text': 'GitHub' }, +]) }} +

+ Connect a GitHub repository with {{ codebase.title }} +

+

+ The CoMSES Model Library GitHub integration allows you to transfer releases of your model to + or from a GitHub repository by connecting an existing repository or by having one created for you. +

+

+ Learn more about how it works +

+
+
+{% endblock %} + +{% block js %} +{{ vite_asset("apps/github_config.ts") }} +{% endblock %} \ No newline at end of file diff --git a/django/library/jinja2/library/codebases/macros.jinja b/django/library/jinja2/library/codebases/macros.jinja index 03bc6fb06..83a5999d2 100644 --- a/django/library/jinja2/library/codebases/macros.jinja +++ b/django/library/jinja2/library/codebases/macros.jinja @@ -109,3 +109,9 @@ {% endmacro %} + +{% macro imported_release_indicator(release) %} + {% if release.is_imported %} + + {% endif %} +{% endmacro %} diff --git a/django/library/jinja2/library/codebases/releases/edit.jinja b/django/library/jinja2/library/codebases/releases/edit.jinja index 7b3d38afc..7b1a11f67 100644 --- a/django/library/jinja2/library/codebases/releases/edit.jinja +++ b/django/library/jinja2/library/codebases/releases/edit.jinja @@ -15,6 +15,7 @@ data-review-status="{{ release.get_review().get_status_display() if release.get_review() else 'Unreviewed' }}" data-is-live="{{ release.live }}" data-can-edit-originals="{{ release.can_edit_originals }}" + data-is-imported="{{ release.is_imported }}" > {% endif %} diff --git a/django/library/jinja2/library/codebases/releases/retrieve.jinja b/django/library/jinja2/library/codebases/releases/retrieve.jinja index 5922f78e5..4d52044a5 100644 --- a/django/library/jinja2/library/codebases/releases/retrieve.jinja +++ b/django/library/jinja2/library/codebases/releases/retrieve.jinja @@ -1,6 +1,7 @@ {% extends "sidebar_layout.jinja" %} {% from "common.jinja" import breadcrumb, embed_discourse_comments, share_card, search_tag_href, member_profile_href, render_ogp_tags, alert_if_spam, mark_spam_confirm_modal %} {% from "library/review/includes/macros.jinja" import confirm_change_closed_modal %} +{% from "library/codebases/macros.jinja" import imported_release_indicator %} {% set open_code_badge_png_url = request.build_absolute_uri(static("images/icons/open-code-badge.png")) %} {% set open_code_badge_svg_url = request.build_absolute_uri(static("images/icons/open-code-badge.svg")) %} @@ -102,7 +103,9 @@ {% else %}
The release you are viewing is currently unpublished.
{% endif %} -

{{ codebase.title }} {{ release.version_number }}

+

+ {{ codebase.title }} {{ release.version_number }} +

Submitted by @@ -289,6 +292,43 @@ {% endif %} {% endwith %} + +{% with remote = codebase.active_git_remote %} + {% with github_config=settings("library.GitHubIntegrationConfiguration", use_default_site=True) %} + {% if github_config.can_use_github_integration(user) and (has_change_perm or remote) %} + + {% endif %} + {% endwith %} +{% endwith %} +