Exabyte-io · birdday · Jul 18, 2022 · Jul 18, 2022 · Jul 18, 2022 · Jul 18, 2022
diff --git a/birdday/README.md b/birdday/README.md
@@ -0,0 +1,37 @@
+# K-point convergence tracker (Materials)
+
+> Ideal candidate: scientists skilled in Density Functional Theory and proficient in python.
+
+# Overview
+
+The aim of this task is to create a python package that implements automatic convergence tracking mechanism for a materials simulations engine. The convergence is tracked with respect to the k-point sampling inside a reciprocal cell of a crystalline compound.
+
+# Requirements
+
+1. automatically find the dimensions of a k-point mesh that satisfy a certain criteria for total energy (eg. total energy is converged within dE = 0.01meV)
+1. the code shall be written in a way that can facilitate easy addition of convergence wrt other characteristics extracted from simulations (forces, pressures, phonon frequencies etc)
+1. the code shall support VASP or Quantum ESPRESSO
+
+# Expectations
+
+- correctly find k-point mesh that satisfies total energy convergence parameters for a set of 10 materials, starting from Si2, as simplest, to a 10-20-atom supercell of your choice
+- modular and object-oriented implementation
+- commit early and often - at least once per 24 hours
+
+# Timeline
+
+We leave exact timing to the candidate. Must fit Within 5 days total.
+
+# User story
+
+As a user of this software I can start it passing:
+
+- path to input data (eg. pw.in / POSCAR, INCAR, KPOINTS) and
+- kinetic energy cutoff
+
+as parameters and get the k-point dimensions (eg. 5 5 5).
+
+# Notes
+
+- create an account at exabyte.io and use it for the calculation purposes
+- suggested modeling engine: Quantum ESPRESSO
diff --git a/birdday/examples/kpoint_conv_example.py b/birdday/examples/kpoint_conv_example.py
@@ -0,0 +1,40 @@
+from exabyte_api_client.endpoints.jobs import JobEndpoints
+from exabyte_api_client.endpoints.projects import ProjectEndpoints
+from exabyte_api_client.endpoints.materials import MaterialEndpoints
+from exabyte_api_client.endpoints.workflows import WorkflowEndpoints
+
+from kpoint.kpoint import ConvTracker
+
+# Initialize the endpoints
+# Replace 'ACCOUNT_ID' and 'AUTH_TOKEN' with your respective tokens.
+ENDPOINT_ARGS = ['platform.exabyte.io', 443, 'ACCOUNT_ID', 'AUTH_TOKEN', '2018-10-01', True]
+job_endpoints = JobEndpoints(*ENDPOINT_ARGS)
+project_endpoints = ProjectEndpoints(*ENDPOINT_ARGS)
+material_endpoints = MaterialEndpoints(*ENDPOINT_ARGS)
+workflow_endpoints = WorkflowEndpoints(*ENDPOINT_ARGS)
+
+# Get Owner ID, Project ID, (Default) Material ID, and Workflow ID
+# Replace "KPOINT_WORKFLOW" with your respective workflow name.
+owner_id = ACCOUNT_ID
+project_id = project_endpoints.list({"isDefault": True, "owner._id": ACCOUNT_ID})[0]["_id"]
+material_id = material_endpoints.list({"isDefault": True, "owner._id": ACCOUNT_ID})[0]["_id"]
+workflow_id = workflow_endpoints.list({"name": "KPOINT_WORKFLOW", "owner._id": ACCOUNT_ID})[0]["_id"]
+
+# Set compute parameters.
+# Can replace debug queue (D) with "OR" if running into memory issues.
+PPN = "1"
+QUEUE = "D"
+NODES = "1"
+TIME_LIMIT = "01:00:00"
+CLUSTER = "cluster-001"
+
+
+# Generate config file.
+# Note that "job_name" is replaced by "job_name_prefix" when using run method.
+compute = job_endpoints.get_compute(CLUSTER, PPN, NODES, QUEUE, TIME_LIMIT)
+config = job_endpoints.get_config([material_id], workflow_id, project_id, owner_id, "job_name", compute)
+
+
+# Create Tracker Class and Run
+tracker = ConvTracker(config, job_endpoints)
+tracker.run(max_iter=20, job_set_name="KPoint", job_name_prefix="kpoint")
diff --git a/birdday/kpoint/kpoint.py b/birdday/kpoint/kpoint.py
@@ -0,0 +1,106 @@
+import re
+import urllib.request
+from utils.generic import wait_for_jobs_to_finish
+
+
+class ConvTracker:
+    """
+    Class used to create, submit, and manage jobs for generating a converged KPoint mesh.
+
+    Args:
+        config (dict): Exabyte API config.
+        job_endpoints (JobEndpoints): Exabyte API endpoint.
+        kwargs:
+            cutoff (float): Desired energy cutoff in eV.
+            energy (list): Total energy values. Can be used as a pseudo-restart to convergence.
+
+    Attributes:
+        config (dict): Exabyte API config.
+        job_endpoints (JobEndpoints): Exabyte API endpoint.
+        cutoff (float): Desired energy cutoff in eV.
+        energy (list): List of energy values used to check for convergence.
+    """
+
+    def __init__(self, config, job_endpoints, cutoff=1e-5, energy=[]):
+        self.config = config
+        self.job_endpoints = job_endpoints
+        self.cutoff = cutoff            # Units = eV
+        self.energy = energy            # Array of energies can be passed in to continue a job set.
+
+    def create_submit_job(self, kp, jobs_set=None, job_name_prefix="kpoint"):
+        """
+        Creates and submits a given job.
+
+        Args:
+            kp (int): Value of kpoints. Also used to generate job name.
+
+        kwargs:
+            jobs_set (str): ID of job set.
+            job_name_prefix (str): Name of job prepended to kpoint value.
+        """
+        job_name = {"name": f"{job_name_prefix}_{kp}"}
+        self.config.update(job_name)
+        job = self.job_endpoints.create(self.config)
+
+        if jobs_set is not None:
+            self.job_endpoints.move_to_set(job["_id"], "", jobs_set["_id"])
+
+        # Update K-Point Values
+        # This is not an ideal way to set kpoints, but the built in convergence tool did npt work as expected, and adjusting the workflow did not update render.
+        job["workflow"]["subworkflows"][0]["units"][0]["input"][0]["rendered"] = job["workflow"]["subworkflows"][0]["units"][0]["input"][0]["rendered"].replace("K_POINTS automatic\n10 10 10 0 0 0", f"K_POINTS automatic\n{kp} {kp} {kp} 0 0 0")
+        self.job_endpoints.update(job["_id"], job)
+        self.job_endpoints.submit(job['_id'])
+
+        return job["_id"]
+
+    def parse_output(self, job_id):
+        """
+        Read energy from results file
+
+        Args:
+            job_id (str): ID of job to get results from.
+        """
+        files = self.job_endpoints.list_files(job_id)
+        output_file = [file for file in files if file["name"] ==  'pw_scf.out'][0]
+        server_response = urllib.request.urlopen(output_file['signedUrl'])
+        output_file_bytes = server_response.read()
+        output_file = output_file_bytes.decode(encoding="UTF-8")
+        output_as_array = output_file.split("\n")
+        total_energy_ry = float(re.split(" +", [row for row in output_as_array if "!    total energy" in row][0])[-2])
+        total_energy_ev = total_energy_ry * 13.6056980659
+
+        return total_energy_ev
+
+    def check_convergence(self):
+        """
+        Check if energy convergence reached.
+        """
+        if len(self.energy) < 2:
+            return False
+        else:
+            return abs(self.energy[-1] - self.energy[-2]) <= self.cutoff
+
+    def run(self, kp_initial=1, max_iter=20, job_set_name=None, job_name_prefix="kpoint"):
+        """
+        Manages job submission and checks for convergence.
+
+        kwargs:
+         kp_initial (int): Sets initial kpoint values.
+         max_iter (int): Number of times to iterate before exiting.
+         job_set_name (str): Name given to set of jobs.
+         job_name_prefix (str):  Name of job prepended to kpoint value.
+        """
+        if job_set_name is not None:
+            jobs_set = self.job_endpoints.create_set({"name": job_set_name, "projectId": self.config["_project"]["_id"], "owner": {"_id": self.config["owner"]["_id"]}})
+        else:
+            job_set = None
+
+        for kp in range(kp_initial, max_iter+kp_initial):
+            print(f"KPoints = {kp}")
+            job_id = self.create_submit_job(kp, jobs_set=jobs_set, job_name_prefix=job_name_prefix)
+            wait_for_jobs_to_finish(self.job_endpoints, [job_id], poll_interval=10)
+            total_energy = self.parse_output(job_id)
+            self.energy.extend([total_energy])
+
+            if self.check_convergence():
+                break
diff --git a/birdday/tests/test_kpoint.py b/birdday/tests/test_kpoint.py
@@ -0,0 +1,15 @@
+import pytest
+from kpoint.kpoint import ConvTracker
+
+def test_check_convergence():
+    # Below convergence limit
+    test_tracker = ConvTracker("placeholder_config", "placeholder_job_endpoint", cutoff=1e-5, energy=[5, 5+1e-6])
+    assert test_tracker.check_convergence() == True
+
+    # At convergence limit
+    test_tracker = ConvTracker("placeholder_config", "placeholder_job_endpoint", cutoff=1e-5, energy=[5, 5+1e-5])
+    assert test_tracker.check_convergence() == True
+
+    # Above convergence limit
+    test_tracker = ConvTracker("placeholder_config", "placeholder_job_endpoint", cutoff=1e-5, energy=[5, 5+1e-4])
+    assert test_tracker.check_convergence() == False
diff --git a/birdday/utils/generic.py b/birdday/utils/generic.py
@@ -0,0 +1,48 @@
+# This module defines a set of common functions which are used in other examples.
+import time
+import datetime
+from tabulate import tabulate
+
+
+def get_jobs_statuses_by_ids(endpoint, job_ids):
+    """
+    Gets jobs statues by their IDs.
+
+    Args:
+        endpoint (endpoints.jobs.JobEndpoints): an instance of JobEndpoints class
+        job_ids (list): list of job IDs to get the status for
+
+    Returns:
+        list: list of job statuses
+    """
+    jobs = endpoint.list({"_id": {"$in": job_ids}}, {"fields": {"status": 1}})
+    return [job["status"] for job in jobs]
+
+
+def wait_for_jobs_to_finish(endpoint, job_ids, poll_interval=10):
+    """
+    Waits for jobs to finish and prints their statuses.
+    A job is considered finished if it is not in "pre-submission", "submitted", or, "active" status.
+
+    Args:
+        endpoint (endpoints.jobs.JobEndpoints): an instance of JobEndpoints class
+        job_ids (list): list of job IDs to wait for
+        poll_interval (int): poll interval for job information in seconds. Defaults to 10.
+    """
+    print("Wait for jobs to finish, poll interval: {0} sec".format(poll_interval))
+    while True:
+        statuses = get_jobs_statuses_by_ids(endpoint, job_ids)
+
+        errored_jobs = len([status for status in statuses if status == "error"])
+        active_jobs = len([status for status in statuses if status == "active"])
+        finished_jobs = len([status for status in statuses if status == "finished"])
+        submitted_jobs = len([status for status in statuses if status == "submitted"])
+
+        headers = ["TIME", "SUBMITTED-JOBS", "ACTIVE-JOBS", "FINISHED-JOBS", "ERRORED-JOBS"]
+        now = datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
+        row = [now, submitted_jobs, active_jobs, finished_jobs, errored_jobs]
+        print(tabulate([row], headers, tablefmt='grid', stralign='center'))
+
+        if all([status not in ["pre-submission", "submitted", "active"] for status in statuses]):
+            break
+        time.sleep(poll_interval)