Skip to content

Commit

Permalink
Having robust tests for tricky caching cases of bands and phonon freq…
Browse files Browse the repository at this point in the history
…uecies
  • Loading branch information
unkcpz committed Jun 10, 2024
1 parent f313e9b commit 94f2623
Show file tree
Hide file tree
Showing 5 changed files with 271 additions and 6 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ filterwarnings = [
'ignore:Creating AiiDA configuration folder.*:UserWarning',
'ignore:Object of type .* not in session, .* operation along .* will not proceed:sqlalchemy.exc.SAWarning',
'ignore:The `Code` class is deprecated.*:aiida.common.warnings.AiidaDeprecationWarning',
'ignore:`CalcJobNode.*` is deprecated.*:aiida.common.warnings.AiidaDeprecationWarning',
]
markers = [
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
Expand Down
4 changes: 2 additions & 2 deletions src/aiida_sssp_workflow/workflows/convergence/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ def is_valid_convergence_configuration(value, _=None):

def is_valid_cutoff_list(cutoff_list, _=None):
"""Check the cutoff list is a list of tuples and the cutoffs are increasing"""
if not all(isinstance(cutoff, tuple) for cutoff in cutoff_list):
return "cutoff_list must be a list of tuples"
if not all(isinstance(cutoff, (tuple, list)) for cutoff in cutoff_list):
return "cutoff_list must be a list of tuples or list."
if not all(
cutoff_list[i][0] < cutoff_list[i + 1][0] for i in range(len(cutoff_list) - 1)
):
Expand Down
3 changes: 0 additions & 3 deletions src/aiida_sssp_workflow/workflows/evaluate/_bands.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,6 @@ def inspect_bands(self):
"""
workchain = self.ctx.workchain_bands

if workchain.is_finished:
self._disable_cache(workchain)

if not workchain.is_finished_ok:
self.logger.warning(
f"PwBandsWorkChain for bands evaluation failed with exit status {workchain.exit_status}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ def inspect_bands(self):
is_cleaned = self.ctx.current_folder.base.extras.get("cleaned", False)
if is_cleaned:
self.logger.warning(
f"PhBaseWorkChain failed because the remote folder is empty with exit status {workchain.exit_status}, invalid the caching of the node and re-run scf calculation."
f"PwBaseWorkChain failed because the remote folder is empty with exit status {workchain.exit_status}, invalid the caching of the node and re-run scf calculation."
)
# invalid the caching of the node and re-run scf calculation
workchain_scf = self.ctx.workchain_scf
Expand Down
267 changes: 267 additions & 0 deletions tests/workflows/convergence/test_caching.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
import pytest

from aiida.plugins import DataFactory, WorkflowFactory
from aiida.engine import ProcessBuilder, run_get_node
from aiida.manage.caching import enable_caching


UpfData = DataFactory("pseudo.upf")


@pytest.mark.slow
@pytest.mark.usefixtures("aiida_profile_clean")
def test_caching_bands(
pseudo_path,
code_generator,
):
"""Test caching is working for the first pw calculation of bands.
Test if the remote_path was empty bands and phonon_frequencies workflow will manage
to rerun the first preparing pw.x calculation."""
_ConvergenceBandsWorkChain = WorkflowFactory("sssp_workflow.convergence.bands")

# The caching should turned on also for the first prepareing run
# Otherwise, the scf calculation inside band workflow has two duplicate nodes which has same uuid
# but are both valid cached source. This cause caching race condition.
with enable_caching(identifier="aiida.calculations:quantumespresso.*"):
bands_builder: ProcessBuilder = _ConvergenceBandsWorkChain.get_builder(
pseudo=pseudo_path("Al"),
protocol="test",
cutoff_list=[(20, 80), (30, 120)],
configuration="DC",
code=code_generator("pw"),
clean_workdir=True,
)

# Running a bands convergence workflow first and check that SCF is not from cached calcjob
_, source_node = run_get_node(bands_builder)

# check the first scf of reference
# The pw calculation
source_ref_wf = [
p
for p in source_node.called
if p.base.extras.get("wavefunction_cutoff", None) == 30
][0]
source_scf_calcjob_node = source_ref_wf.called[1].called[0].called[1]
assert (
source_scf_calcjob_node.base.extras.get("_aiida_cached_from", None) is None
)

# check the first bands of reference was cached
# The pw calculation
source_band_calcjob_node = source_ref_wf.called[1].called[1].called[0]
assert (
source_band_calcjob_node.base.extras.get("_aiida_cached_from", None) is None
)

# Run again and check it is using caching
_, cached_node = run_get_node(bands_builder)
cached_ref_wf = [
p
for p in cached_node.called
if p.base.extras.get("wavefunction_cutoff", None) == 30
][0]
cached_scf_calcjob_node = cached_ref_wf.called[1].called[0].called[1]

assert (
cached_scf_calcjob_node.base.extras.get("_aiida_cached_from", None)
== source_scf_calcjob_node.uuid
)
assert not cached_scf_calcjob_node.base.caching.is_valid_cache

cached_band_calcjob_node = cached_ref_wf.called[1].called[1].called[0]

assert (
cached_band_calcjob_node.base.extras.get("_aiida_cached_from", None)
== source_band_calcjob_node.uuid
)


@pytest.mark.slow
@pytest.mark.usefixtures("aiida_profile_clean")
def test_caching_phonon_frequencies(
pseudo_path,
code_generator,
):
"""Test caching is working for the first pw calculation of phonon_frequencies.
Test if the remote_path was empty phonon_frequencies workflow will manage
to rerun the first preparing pw.x calculation."""
_ConvergencePhononFrequenciessWorkChain = WorkflowFactory(
"sssp_workflow.convergence.phonon_frequencies"
)

# The caching should turned on also for the first prepareing run
# Otherwise, the scf calculation inside band workflow has two duplicate nodes which has same uuid
# but are both valid cached source. This cause caching race condition.
with enable_caching(identifier="aiida.calculations:quantumespresso.*"):
phonon_frequencies_builder: ProcessBuilder = (
_ConvergencePhononFrequenciessWorkChain.get_builder(
pseudo=pseudo_path("Al"),
protocol="test",
cutoff_list=[(20, 80), (30, 120)],
configuration="DC",
pw_code=code_generator("pw"),
ph_code=code_generator("ph"),
clean_workdir=True,
)
)

# Running a phonon frequencies convergence workflow first and check that SCF is not from cached calcjob
_, source_node = run_get_node(phonon_frequencies_builder)

# check the first scf of reference
# The pw calculation
source_ref_wf = [
p
for p in source_node.called
if p.base.extras.get("wavefunction_cutoff", None) == 30
][0]
source_scf_calcjob_node = source_ref_wf.called[0].called[1]
assert (
source_scf_calcjob_node.base.extras.get("_aiida_cached_from", None) is None
)

# The ph calculation
source_ph_calcjob_node = source_ref_wf.called[1].called[0]
assert (
source_ph_calcjob_node.base.extras.get("_aiida_cached_from", None) is None
)

# Run again and check it is using caching
_, cached_node = run_get_node(phonon_frequencies_builder)
cached_ref_wf = [
p
for p in cached_node.called
if p.base.extras.get("wavefunction_cutoff", None) == 30
][0]
cached_scf_calcjob_node = cached_ref_wf.called[0].called[1]

assert (
cached_scf_calcjob_node.base.extras.get("_aiida_cached_from", None)
== source_scf_calcjob_node.uuid
)
assert not cached_scf_calcjob_node.base.caching.is_valid_cache

# Run again and check it is using caching
cached_ph_calcjob_node = cached_ref_wf.called[1].called[0]
assert (
cached_ph_calcjob_node.base.extras.get("_aiida_cached_from", None)
== source_ph_calcjob_node.uuid
)


@pytest.mark.slow
@pytest.mark.usefixtures("aiida_profile_clean")
def test_caching_bands_rerun_pw_prepare(
pseudo_path,
code_generator,
):
"""Test caching is working for the first pw calculation of bands.
After the first run, I manually make the bands calculation invalid cache so it can rerun with an empty remote
The test check that the scf will rerun if the remote not exist."""
_ConvergenceBandsWorkChain = WorkflowFactory("sssp_workflow.convergence.bands")

# The caching should turned on also for the first prepareing run
# Otherwise, the scf calculation inside band workflow has two duplicate nodes which has same uuid
# but are both valid cached source. This cause caching race condition.
with enable_caching(identifier="aiida.calculations:quantumespresso.*"):
bands_builder: ProcessBuilder = _ConvergenceBandsWorkChain.get_builder(
pseudo=pseudo_path("Al"),
protocol="test",
cutoff_list=[(20, 80), (30, 120)],
configuration="DC",
code=code_generator("pw"),
clean_workdir=True,
)

# Running a bands convergence workflow first and check that SCF is not from cached calcjob
_, source_node = run_get_node(bands_builder)

# Make the source band calculation invalid cache
source_ref_wf = [
p
for p in source_node.called
if p.base.extras.get("wavefunction_cutoff", None) == 30
][0]
source_band_calcjob_node = source_ref_wf.called[1].called[1].called[0]
assert source_band_calcjob_node.is_valid_cache

source_band_calcjob_node.is_valid_cache = False

# Run again and check it is using caching
_, cached_node = run_get_node(bands_builder)

# Check the band work chain finished okay
cached_ref_wf = [
p
for p in cached_node.called
if p.base.extras.get("wavefunction_cutoff", None) == 30
][0]
assert cached_ref_wf.called[1].is_finished_ok

cached_band_calcjob_node = cached_ref_wf.called[1].called[1].called[0]
assert (
cached_band_calcjob_node.base.extras.get("_aiida_cached_from", None) is None
)
assert cached_band_calcjob_node.exit_code.status == 305


@pytest.mark.slow
@pytest.mark.usefixtures("aiida_profile_clean")
def test_caching_phonon_frequencies_rerun_pw_prepare(
pseudo_path,
code_generator,
):
"""Test caching is working for the first pw calculation of phonon_frequencies.
Test if the remote_path was empty phonon_frequencies workflow will manage
to rerun the first preparing pw.x calculation."""
_ConvergencePhononFrequenciessWorkChain = WorkflowFactory(
"sssp_workflow.convergence.phonon_frequencies"
)

# The caching should turned on also for the first prepareing run
# Otherwise, the scf calculation inside band workflow has two duplicate nodes which has same uuid
# but are both valid cached source. This cause caching race condition.
with enable_caching(identifier="aiida.calculations:quantumespresso.*"):
phonon_frequencies_builder: ProcessBuilder = (
_ConvergencePhononFrequenciessWorkChain.get_builder(
pseudo=pseudo_path("Al"),
protocol="test",
cutoff_list=[(20, 80), (30, 120)],
configuration="DC",
pw_code=code_generator("pw"),
ph_code=code_generator("ph"),
clean_workdir=True,
)
)

# Running a phonon_frequencies convergence workflow first and check that SCF is not from cached calcjob
_, source_node = run_get_node(phonon_frequencies_builder)

# Make the source ph calculation invalid cache
source_ref_wf = [
p
for p in source_node.called
if p.base.extras.get("wavefunction_cutoff", None) == 30
][0]
source_ph_calcjob_node = source_ref_wf.called[1].called[0]
assert source_ph_calcjob_node.is_valid_cache

source_ph_calcjob_node.is_valid_cache = False

# Run again and check it is using caching
_, cached_node = run_get_node(phonon_frequencies_builder)

cached_ref_wf = [
p
for p in cached_node.called
if p.base.extras.get("wavefunction_cutoff", None) == 30
][0]
# Check the ph from rerun pw is finished okay
assert cached_ref_wf.is_finished_ok

cached_ph_calcjob_node = cached_ref_wf.called[1].called[0]
assert (
cached_ph_calcjob_node.base.extras.get("_aiida_cached_from", None) is None
)
assert cached_ph_calcjob_node.exit_code.status == 312

0 comments on commit 94f2623

Please sign in to comment.