diff --git a/birdday/README.md b/birdday/README.md new file mode 100644 index 00000000..cbd01009 --- /dev/null +++ b/birdday/README.md @@ -0,0 +1,37 @@ +# K-point convergence tracker (Materials) + +> Ideal candidate: scientists skilled in Density Functional Theory and proficient in python. + +# Overview + +The aim of this task is to create a python package that implements automatic convergence tracking mechanism for a materials simulations engine. The convergence is tracked with respect to the k-point sampling inside a reciprocal cell of a crystalline compound. + +# Requirements + +1. automatically find the dimensions of a k-point mesh that satisfy a certain criteria for total energy (eg. total energy is converged within dE = 0.01meV) +1. the code shall be written in a way that can facilitate easy addition of convergence wrt other characteristics extracted from simulations (forces, pressures, phonon frequencies etc) +1. the code shall support VASP or Quantum ESPRESSO + +# Expectations + +- correctly find k-point mesh that satisfies total energy convergence parameters for a set of 10 materials, starting from Si2, as simplest, to a 10-20-atom supercell of your choice +- modular and object-oriented implementation +- commit early and often - at least once per 24 hours + +# Timeline + +We leave exact timing to the candidate. Must fit Within 5 days total. + +# User story + +As a user of this software I can start it passing: + +- path to input data (eg. pw.in / POSCAR, INCAR, KPOINTS) and +- kinetic energy cutoff + +as parameters and get the k-point dimensions (eg. 5 5 5). + +# Notes + +- create an account at exabyte.io and use it for the calculation purposes +- suggested modeling engine: Quantum ESPRESSO diff --git a/birdday/examples/kpoint_conv_example.py b/birdday/examples/kpoint_conv_example.py new file mode 100644 index 00000000..c8e1fb51 --- /dev/null +++ b/birdday/examples/kpoint_conv_example.py @@ -0,0 +1,40 @@ +from exabyte_api_client.endpoints.jobs import JobEndpoints +from exabyte_api_client.endpoints.projects import ProjectEndpoints +from exabyte_api_client.endpoints.materials import MaterialEndpoints +from exabyte_api_client.endpoints.workflows import WorkflowEndpoints + +from kpoint.kpoint import ConvTracker + +# Initialize the endpoints +# Replace 'ACCOUNT_ID' and 'AUTH_TOKEN' with your respective tokens. +ENDPOINT_ARGS = ['platform.exabyte.io', 443, 'ACCOUNT_ID', 'AUTH_TOKEN', '2018-10-01', True] +job_endpoints = JobEndpoints(*ENDPOINT_ARGS) +project_endpoints = ProjectEndpoints(*ENDPOINT_ARGS) +material_endpoints = MaterialEndpoints(*ENDPOINT_ARGS) +workflow_endpoints = WorkflowEndpoints(*ENDPOINT_ARGS) + +# Get Owner ID, Project ID, (Default) Material ID, and Workflow ID +# Replace "KPOINT_WORKFLOW" with your respective workflow name. +owner_id = ACCOUNT_ID +project_id = project_endpoints.list({"isDefault": True, "owner._id": ACCOUNT_ID})[0]["_id"] +material_id = material_endpoints.list({"isDefault": True, "owner._id": ACCOUNT_ID})[0]["_id"] +workflow_id = workflow_endpoints.list({"name": "KPOINT_WORKFLOW", "owner._id": ACCOUNT_ID})[0]["_id"] + +# Set compute parameters. +# Can replace debug queue (D) with "OR" if running into memory issues. +PPN = "1" +QUEUE = "D" +NODES = "1" +TIME_LIMIT = "01:00:00" +CLUSTER = "cluster-001" + + +# Generate config file. +# Note that "job_name" is replaced by "job_name_prefix" when using run method. +compute = job_endpoints.get_compute(CLUSTER, PPN, NODES, QUEUE, TIME_LIMIT) +config = job_endpoints.get_config([material_id], workflow_id, project_id, owner_id, "job_name", compute) + + +# Create Tracker Class and Run +tracker = ConvTracker(config, job_endpoints) +tracker.run(max_iter=20, job_set_name="KPoint", job_name_prefix="kpoint") diff --git a/birdday/kpoint/kpoint.py b/birdday/kpoint/kpoint.py new file mode 100644 index 00000000..13c8663e --- /dev/null +++ b/birdday/kpoint/kpoint.py @@ -0,0 +1,106 @@ +import re +import urllib.request +from utils.generic import wait_for_jobs_to_finish + + +class ConvTracker: + """ + Class used to create, submit, and manage jobs for generating a converged KPoint mesh. + + Args: + config (dict): Exabyte API config. + job_endpoints (JobEndpoints): Exabyte API endpoint. + kwargs: + cutoff (float): Desired energy cutoff in eV. + energy (list): Total energy values. Can be used as a pseudo-restart to convergence. + + Attributes: + config (dict): Exabyte API config. + job_endpoints (JobEndpoints): Exabyte API endpoint. + cutoff (float): Desired energy cutoff in eV. + energy (list): List of energy values used to check for convergence. + """ + + def __init__(self, config, job_endpoints, cutoff=1e-5, energy=[]): + self.config = config + self.job_endpoints = job_endpoints + self.cutoff = cutoff # Units = eV + self.energy = energy # Array of energies can be passed in to continue a job set. + + def create_submit_job(self, kp, jobs_set=None, job_name_prefix="kpoint"): + """ + Creates and submits a given job. + + Args: + kp (int): Value of kpoints. Also used to generate job name. + + kwargs: + jobs_set (str): ID of job set. + job_name_prefix (str): Name of job prepended to kpoint value. + """ + job_name = {"name": f"{job_name_prefix}_{kp}"} + self.config.update(job_name) + job = self.job_endpoints.create(self.config) + + if jobs_set is not None: + self.job_endpoints.move_to_set(job["_id"], "", jobs_set["_id"]) + + # Update K-Point Values + # This is not an ideal way to set kpoints, but the built in convergence tool did npt work as expected, and adjusting the workflow did not update render. + job["workflow"]["subworkflows"][0]["units"][0]["input"][0]["rendered"] = job["workflow"]["subworkflows"][0]["units"][0]["input"][0]["rendered"].replace("K_POINTS automatic\n10 10 10 0 0 0", f"K_POINTS automatic\n{kp} {kp} {kp} 0 0 0") + self.job_endpoints.update(job["_id"], job) + self.job_endpoints.submit(job['_id']) + + return job["_id"] + + def parse_output(self, job_id): + """ + Read energy from results file + + Args: + job_id (str): ID of job to get results from. + """ + files = self.job_endpoints.list_files(job_id) + output_file = [file for file in files if file["name"] == 'pw_scf.out'][0] + server_response = urllib.request.urlopen(output_file['signedUrl']) + output_file_bytes = server_response.read() + output_file = output_file_bytes.decode(encoding="UTF-8") + output_as_array = output_file.split("\n") + total_energy_ry = float(re.split(" +", [row for row in output_as_array if "! total energy" in row][0])[-2]) + total_energy_ev = total_energy_ry * 13.6056980659 + + return total_energy_ev + + def check_convergence(self): + """ + Check if energy convergence reached. + """ + if len(self.energy) < 2: + return False + else: + return abs(self.energy[-1] - self.energy[-2]) <= self.cutoff + + def run(self, kp_initial=1, max_iter=20, job_set_name=None, job_name_prefix="kpoint"): + """ + Manages job submission and checks for convergence. + + kwargs: + kp_initial (int): Sets initial kpoint values. + max_iter (int): Number of times to iterate before exiting. + job_set_name (str): Name given to set of jobs. + job_name_prefix (str): Name of job prepended to kpoint value. + """ + if job_set_name is not None: + jobs_set = self.job_endpoints.create_set({"name": job_set_name, "projectId": self.config["_project"]["_id"], "owner": {"_id": self.config["owner"]["_id"]}}) + else: + job_set = None + + for kp in range(kp_initial, max_iter+kp_initial): + print(f"KPoints = {kp}") + job_id = self.create_submit_job(kp, jobs_set=jobs_set, job_name_prefix=job_name_prefix) + wait_for_jobs_to_finish(self.job_endpoints, [job_id], poll_interval=10) + total_energy = self.parse_output(job_id) + self.energy.extend([total_energy]) + + if self.check_convergence(): + break diff --git a/birdday/tests/test_kpoint.py b/birdday/tests/test_kpoint.py new file mode 100644 index 00000000..733967d9 --- /dev/null +++ b/birdday/tests/test_kpoint.py @@ -0,0 +1,15 @@ +import pytest +from kpoint.kpoint import ConvTracker + +def test_check_convergence(): + # Below convergence limit + test_tracker = ConvTracker("placeholder_config", "placeholder_job_endpoint", cutoff=1e-5, energy=[5, 5+1e-6]) + assert test_tracker.check_convergence() == True + + # At convergence limit + test_tracker = ConvTracker("placeholder_config", "placeholder_job_endpoint", cutoff=1e-5, energy=[5, 5+1e-5]) + assert test_tracker.check_convergence() == True + + # Above convergence limit + test_tracker = ConvTracker("placeholder_config", "placeholder_job_endpoint", cutoff=1e-5, energy=[5, 5+1e-4]) + assert test_tracker.check_convergence() == False diff --git a/birdday/utils/generic.py b/birdday/utils/generic.py new file mode 100644 index 00000000..22cd8305 --- /dev/null +++ b/birdday/utils/generic.py @@ -0,0 +1,48 @@ +# This module defines a set of common functions which are used in other examples. +import time +import datetime +from tabulate import tabulate + + +def get_jobs_statuses_by_ids(endpoint, job_ids): + """ + Gets jobs statues by their IDs. + + Args: + endpoint (endpoints.jobs.JobEndpoints): an instance of JobEndpoints class + job_ids (list): list of job IDs to get the status for + + Returns: + list: list of job statuses + """ + jobs = endpoint.list({"_id": {"$in": job_ids}}, {"fields": {"status": 1}}) + return [job["status"] for job in jobs] + + +def wait_for_jobs_to_finish(endpoint, job_ids, poll_interval=10): + """ + Waits for jobs to finish and prints their statuses. + A job is considered finished if it is not in "pre-submission", "submitted", or, "active" status. + + Args: + endpoint (endpoints.jobs.JobEndpoints): an instance of JobEndpoints class + job_ids (list): list of job IDs to wait for + poll_interval (int): poll interval for job information in seconds. Defaults to 10. + """ + print("Wait for jobs to finish, poll interval: {0} sec".format(poll_interval)) + while True: + statuses = get_jobs_statuses_by_ids(endpoint, job_ids) + + errored_jobs = len([status for status in statuses if status == "error"]) + active_jobs = len([status for status in statuses if status == "active"]) + finished_jobs = len([status for status in statuses if status == "finished"]) + submitted_jobs = len([status for status in statuses if status == "submitted"]) + + headers = ["TIME", "SUBMITTED-JOBS", "ACTIVE-JOBS", "FINISHED-JOBS", "ERRORED-JOBS"] + now = datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S') + row = [now, submitted_jobs, active_jobs, finished_jobs, errored_jobs] + print(tabulate([row], headers, tablefmt='grid', stralign='center')) + + if all([status not in ["pre-submission", "submitted", "active"] for status in statuses]): + break + time.sleep(poll_interval)