Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: muD calculator #292

Merged
merged 5 commits into from
Dec 30, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions news/muD_calculator.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
**Added:**

* function to compute muD from a given z-scan file

**Changed:**

* <news item>

**Deprecated:**

* <news item>

**Removed:**

* <news item>

**Fixed:**

* <news item>

**Security:**

* <news item>
1 change: 1 addition & 0 deletions requirements/conda.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
numpy
scipy
1 change: 1 addition & 0 deletions requirements/pip.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
numpy
scipy
122 changes: 104 additions & 18 deletions src/diffpy/utils/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,11 @@
from copy import copy
from pathlib import Path

import numpy as np
from scipy.optimize import dual_annealing
from scipy.signal import convolve

def clean_dict(obj):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like this function is not used anywhere else.. so I deleted it

"""Remove keys from the dictionary where the corresponding value is None.

Parameters
----------
obj: dict
The dictionary to clean. If None, initialize as an empty dictionary.

Returns
-------
dict:
The cleaned dictionary with keys removed where the value is None.
"""
obj = obj if obj is not None else {}
for key, value in copy(obj).items():
if not value:
del obj[key]
return obj
from diffpy.utils.parsers.loaddata import loadData


def _stringify(obj):
Expand Down Expand Up @@ -206,3 +192,103 @@ def get_package_info(package_names, metadata=None):
pkg_info.update({package: importlib.metadata.version(package)})
metadata["package_info"] = pkg_info
return metadata


def _top_hat(z, half_slit_width):
"""Create a top-hat function, return 1.0 for values within the specified
slit width and 0 otherwise."""
return np.where((z >= -half_slit_width) & (z <= half_slit_width), 1.0, 0.0)


def _model_function(z, diameter, z0, I0, mud, slope):
"""
Compute the model function with the following steps:
1. Let dz = z-z0, so that dz is centered at 0
2. Compute length l that is the effective length for computing intensity I = I0 * e^{-mu * l}:
- For dz within the capillary diameter, l is the chord length of the circle at position dz
- For dz outside this range, l = 0
3. Apply a linear adjustment to I0 by taking I0 as I0 - slope * z
"""
min_radius = -diameter / 2
max_radius = diameter / 2
dz = z - z0
length = np.piecewise(
dz,
[dz < min_radius, (min_radius <= dz) & (dz <= max_radius), dz > max_radius],
[0, lambda dz: 2 * np.sqrt((diameter / 2) ** 2 - dz**2), 0],
)
return (I0 - slope * z) * np.exp(-mud / diameter * length)


def _extend_z_and_convolve(z, diameter, half_slit_width, z0, I0, mud, slope):
"""Extend z values and I values for padding (so that we don't have tails in
convolution), then perform convolution (note that the convolved I values
are the same as modeled I values if slit width is close to 0)"""
n_points = len(z)
z_left_pad = np.linspace(z.min() - n_points * (z[1] - z[0]), z.min(), n_points)
z_right_pad = np.linspace(z.max(), z.max() + n_points * (z[1] - z[0]), n_points)
z_extended = np.concatenate([z_left_pad, z, z_right_pad])
I_extended = _model_function(z_extended, diameter, z0, I0, mud, slope)
kernel = _top_hat(z_extended - z_extended.mean(), half_slit_width)
I_convolved = I_extended # this takes care of the case where slit width is close to 0
if kernel.sum() != 0:
kernel /= kernel.sum()
I_convolved = convolve(I_extended, kernel, mode="same")
padding_length = len(z_left_pad)
return I_convolved[padding_length:-padding_length]


def _objective_function(params, z, observed_data):
"""Compute the objective function for fitting a model to the
observed/experimental data by minimizing the sum of squared residuals
between the observed data and the convolved model data."""
diameter, half_slit_width, z0, I0, mud, slope = params
convolved_model_data = _extend_z_and_convolve(z, diameter, half_slit_width, z0, I0, mud, slope)
residuals = observed_data - convolved_model_data
return np.sum(residuals**2)


def _compute_single_mud(z_data, I_data):
"""Perform dual annealing optimization and extract the parameters."""
bounds = [
(1e-5, z_data.max() - z_data.min()), # diameter: [small positive value, upper bound]
(0, (z_data.max() - z_data.min()) / 2), # half slit width: [0, upper bound]
(z_data.min(), z_data.max()), # z0: [min z, max z]
(1e-5, I_data.max()), # I0: [small positive value, max observed intensity]
(1e-5, 20), # muD: [small positive value, upper bound]
(-100000, 100000), # slope: [lower bound, upper bound]
]
result = dual_annealing(_objective_function, bounds, args=(z_data, I_data))
diameter, half_slit_width, z0, I0, mud, slope = result.x
convolved_fitted_signal = _extend_z_and_convolve(z_data, diameter, half_slit_width, z0, I0, mud, slope)
residuals = I_data - convolved_fitted_signal
rmse = np.sqrt(np.mean(residuals**2))
return mud, rmse


def compute_mud(filepath):
"""Compute the best-fit mu*D value from a z-scan file, removing the sample
holder effect.

This function loads z-scan data and fits it to a model
that convolves a top-hat function with I = I0 * e^{-mu * l}.
The fitting procedure is run multiple times, and we return the best-fit parameters based on the lowest rmse.

The full mathematical details are described in the paper:
An ad hoc Absorption Correction for Reliable Pair-Distribution Functions from Low Energy x-ray Sources,
Yucong Chen, Till Schertenleib, Andrew Yang, Pascal Schouwink, Wendy L. Queen and Simon J. L. Billinge,
in preparation.

Parameters
----------
filepath : str
The path to the z-scan file.

Returns
-------
mu*D : float
The best-fit mu*D value.
"""
z_data, I_data = loadData(filepath, unpack=True)
best_mud, _ = min((_compute_single_mud(z_data, I_data) for _ in range(20)), key=lambda pair: pair[1])
return best_mud
25 changes: 24 additions & 1 deletion tests/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,16 @@
import os
from pathlib import Path

import numpy as np
import pytest

from diffpy.utils.tools import check_and_build_global_config, get_package_info, get_user_info
from diffpy.utils.tools import (
_extend_z_and_convolve,
check_and_build_global_config,
compute_mud,
get_package_info,
get_user_info,
)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -160,3 +167,19 @@ def test_get_package_info(monkeypatch, inputs, expected):
)
actual_metadata = get_package_info(inputs[0], metadata=inputs[1])
assert actual_metadata == expected


def test_compute_mud(tmp_path):
diameter, slit_width, z0, I0, mud, slope = 1, 0.1, 0, 1e5, 3, 0
z_data = np.linspace(-1, 1, 50)
convolved_I_data = _extend_z_and_convolve(z_data, diameter, slit_width, z0, I0, mud, slope)

directory = Path(tmp_path)
file = directory / "testfile"
with open(file, "w") as f:
for x, I in zip(z_data, convolved_I_data):
f.write(f"{x}\t{I}\n")

expected_mud = 3
actual_mud = compute_mud(file)
assert actual_mud == pytest.approx(expected_mud, rel=1e-4, abs=1e-3)
Loading