Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions birdday/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# K-point convergence tracker (Materials)

> Ideal candidate: scientists skilled in Density Functional Theory and proficient in python.

# Overview

The aim of this task is to create a python package that implements automatic convergence tracking mechanism for a materials simulations engine. The convergence is tracked with respect to the k-point sampling inside a reciprocal cell of a crystalline compound.

# Requirements

1. automatically find the dimensions of a k-point mesh that satisfy a certain criteria for total energy (eg. total energy is converged within dE = 0.01meV)
1. the code shall be written in a way that can facilitate easy addition of convergence wrt other characteristics extracted from simulations (forces, pressures, phonon frequencies etc)
1. the code shall support VASP or Quantum ESPRESSO

# Expectations

- correctly find k-point mesh that satisfies total energy convergence parameters for a set of 10 materials, starting from Si2, as simplest, to a 10-20-atom supercell of your choice
- modular and object-oriented implementation
- commit early and often - at least once per 24 hours

# Timeline

We leave exact timing to the candidate. Must fit Within 5 days total.

# User story

As a user of this software I can start it passing:

- path to input data (eg. pw.in / POSCAR, INCAR, KPOINTS) and
- kinetic energy cutoff

as parameters and get the k-point dimensions (eg. 5 5 5).

# Notes

- create an account at exabyte.io and use it for the calculation purposes
- suggested modeling engine: Quantum ESPRESSO
40 changes: 40 additions & 0 deletions birdday/examples/kpoint_conv_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from exabyte_api_client.endpoints.jobs import JobEndpoints
from exabyte_api_client.endpoints.projects import ProjectEndpoints
from exabyte_api_client.endpoints.materials import MaterialEndpoints
from exabyte_api_client.endpoints.workflows import WorkflowEndpoints

from kpoint.kpoint import ConvTracker

# Initialize the endpoints
# Replace 'ACCOUNT_ID' and 'AUTH_TOKEN' with your respective tokens.
ENDPOINT_ARGS = ['platform.exabyte.io', 443, 'ACCOUNT_ID', 'AUTH_TOKEN', '2018-10-01', True]
job_endpoints = JobEndpoints(*ENDPOINT_ARGS)
project_endpoints = ProjectEndpoints(*ENDPOINT_ARGS)
material_endpoints = MaterialEndpoints(*ENDPOINT_ARGS)
workflow_endpoints = WorkflowEndpoints(*ENDPOINT_ARGS)

# Get Owner ID, Project ID, (Default) Material ID, and Workflow ID
# Replace "KPOINT_WORKFLOW" with your respective workflow name.
owner_id = ACCOUNT_ID
project_id = project_endpoints.list({"isDefault": True, "owner._id": ACCOUNT_ID})[0]["_id"]
material_id = material_endpoints.list({"isDefault": True, "owner._id": ACCOUNT_ID})[0]["_id"]
workflow_id = workflow_endpoints.list({"name": "KPOINT_WORKFLOW", "owner._id": ACCOUNT_ID})[0]["_id"]

# Set compute parameters.
# Can replace debug queue (D) with "OR" if running into memory issues.
PPN = "1"
QUEUE = "D"
NODES = "1"
TIME_LIMIT = "01:00:00"
CLUSTER = "cluster-001"


# Generate config file.
# Note that "job_name" is replaced by "job_name_prefix" when using run method.
compute = job_endpoints.get_compute(CLUSTER, PPN, NODES, QUEUE, TIME_LIMIT)
config = job_endpoints.get_config([material_id], workflow_id, project_id, owner_id, "job_name", compute)


# Create Tracker Class and Run
tracker = ConvTracker(config, job_endpoints)
tracker.run(max_iter=20, job_set_name="KPoint", job_name_prefix="kpoint")
106 changes: 106 additions & 0 deletions birdday/kpoint/kpoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import re
import urllib.request
from utils.generic import wait_for_jobs_to_finish


class ConvTracker:
"""
Class used to create, submit, and manage jobs for generating a converged KPoint mesh.

Args:
config (dict): Exabyte API config.
job_endpoints (JobEndpoints): Exabyte API endpoint.
kwargs:
cutoff (float): Desired energy cutoff in eV.
energy (list): Total energy values. Can be used as a pseudo-restart to convergence.

Attributes:
config (dict): Exabyte API config.
job_endpoints (JobEndpoints): Exabyte API endpoint.
cutoff (float): Desired energy cutoff in eV.
energy (list): List of energy values used to check for convergence.
"""

def __init__(self, config, job_endpoints, cutoff=1e-5, energy=[]):
self.config = config
self.job_endpoints = job_endpoints
self.cutoff = cutoff # Units = eV
self.energy = energy # Array of energies can be passed in to continue a job set.

def create_submit_job(self, kp, jobs_set=None, job_name_prefix="kpoint"):
"""
Creates and submits a given job.

Args:
kp (int): Value of kpoints. Also used to generate job name.

kwargs:
jobs_set (str): ID of job set.
job_name_prefix (str): Name of job prepended to kpoint value.
"""
job_name = {"name": f"{job_name_prefix}_{kp}"}
self.config.update(job_name)
job = self.job_endpoints.create(self.config)

if jobs_set is not None:
self.job_endpoints.move_to_set(job["_id"], "", jobs_set["_id"])

# Update K-Point Values
# This is not an ideal way to set kpoints, but the built in convergence tool did npt work as expected, and adjusting the workflow did not update render.
job["workflow"]["subworkflows"][0]["units"][0]["input"][0]["rendered"] = job["workflow"]["subworkflows"][0]["units"][0]["input"][0]["rendered"].replace("K_POINTS automatic\n10 10 10 0 0 0", f"K_POINTS automatic\n{kp} {kp} {kp} 0 0 0")
self.job_endpoints.update(job["_id"], job)
self.job_endpoints.submit(job['_id'])

return job["_id"]

def parse_output(self, job_id):
"""
Read energy from results file

Args:
job_id (str): ID of job to get results from.
"""
files = self.job_endpoints.list_files(job_id)
output_file = [file for file in files if file["name"] == 'pw_scf.out'][0]
server_response = urllib.request.urlopen(output_file['signedUrl'])
output_file_bytes = server_response.read()
output_file = output_file_bytes.decode(encoding="UTF-8")
output_as_array = output_file.split("\n")
total_energy_ry = float(re.split(" +", [row for row in output_as_array if "! total energy" in row][0])[-2])
total_energy_ev = total_energy_ry * 13.6056980659

return total_energy_ev

def check_convergence(self):
"""
Check if energy convergence reached.
"""
if len(self.energy) < 2:
return False
else:
return abs(self.energy[-1] - self.energy[-2]) <= self.cutoff

def run(self, kp_initial=1, max_iter=20, job_set_name=None, job_name_prefix="kpoint"):
"""
Manages job submission and checks for convergence.

kwargs:
kp_initial (int): Sets initial kpoint values.
max_iter (int): Number of times to iterate before exiting.
job_set_name (str): Name given to set of jobs.
job_name_prefix (str): Name of job prepended to kpoint value.
"""
if job_set_name is not None:
jobs_set = self.job_endpoints.create_set({"name": job_set_name, "projectId": self.config["_project"]["_id"], "owner": {"_id": self.config["owner"]["_id"]}})
else:
job_set = None

for kp in range(kp_initial, max_iter+kp_initial):
print(f"KPoints = {kp}")
job_id = self.create_submit_job(kp, jobs_set=jobs_set, job_name_prefix=job_name_prefix)
wait_for_jobs_to_finish(self.job_endpoints, [job_id], poll_interval=10)
total_energy = self.parse_output(job_id)
self.energy.extend([total_energy])

if self.check_convergence():
break
15 changes: 15 additions & 0 deletions birdday/tests/test_kpoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import pytest
from kpoint.kpoint import ConvTracker

def test_check_convergence():
# Below convergence limit
test_tracker = ConvTracker("placeholder_config", "placeholder_job_endpoint", cutoff=1e-5, energy=[5, 5+1e-6])
assert test_tracker.check_convergence() == True

# At convergence limit
test_tracker = ConvTracker("placeholder_config", "placeholder_job_endpoint", cutoff=1e-5, energy=[5, 5+1e-5])
assert test_tracker.check_convergence() == True

# Above convergence limit
test_tracker = ConvTracker("placeholder_config", "placeholder_job_endpoint", cutoff=1e-5, energy=[5, 5+1e-4])
assert test_tracker.check_convergence() == False
48 changes: 48 additions & 0 deletions birdday/utils/generic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# This module defines a set of common functions which are used in other examples.
import time
import datetime
from tabulate import tabulate


def get_jobs_statuses_by_ids(endpoint, job_ids):
"""
Gets jobs statues by their IDs.

Args:
endpoint (endpoints.jobs.JobEndpoints): an instance of JobEndpoints class
job_ids (list): list of job IDs to get the status for

Returns:
list: list of job statuses
"""
jobs = endpoint.list({"_id": {"$in": job_ids}}, {"fields": {"status": 1}})
return [job["status"] for job in jobs]


def wait_for_jobs_to_finish(endpoint, job_ids, poll_interval=10):
"""
Waits for jobs to finish and prints their statuses.
A job is considered finished if it is not in "pre-submission", "submitted", or, "active" status.

Args:
endpoint (endpoints.jobs.JobEndpoints): an instance of JobEndpoints class
job_ids (list): list of job IDs to wait for
poll_interval (int): poll interval for job information in seconds. Defaults to 10.
"""
print("Wait for jobs to finish, poll interval: {0} sec".format(poll_interval))
while True:
statuses = get_jobs_statuses_by_ids(endpoint, job_ids)

errored_jobs = len([status for status in statuses if status == "error"])
active_jobs = len([status for status in statuses if status == "active"])
finished_jobs = len([status for status in statuses if status == "finished"])
submitted_jobs = len([status for status in statuses if status == "submitted"])

headers = ["TIME", "SUBMITTED-JOBS", "ACTIVE-JOBS", "FINISHED-JOBS", "ERRORED-JOBS"]
now = datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
row = [now, submitted_jobs, active_jobs, finished_jobs, errored_jobs]
print(tabulate([row], headers, tablefmt='grid', stralign='center'))

if all([status not in ["pre-submission", "submitted", "active"] for status in statuses]):
break
time.sleep(poll_interval)