diff --git a/news/muD_calculator.rst b/news/muD_calculator.rst new file mode 100644 index 00000000..a6731760 --- /dev/null +++ b/news/muD_calculator.rst @@ -0,0 +1,24 @@ +**Added:** + +* Function that can be used to compute muD (absorption coefficient) from a file containing an absorption profile + from a line-scan through the sample + +**Changed:** + +* + +**Deprecated:** + +* + +**Removed:** + +* + +**Fixed:** + +* + +**Security:** + +* diff --git a/requirements/conda.txt b/requirements/conda.txt index 24ce15ab..6bad1038 100644 --- a/requirements/conda.txt +++ b/requirements/conda.txt @@ -1 +1,2 @@ numpy +scipy diff --git a/requirements/pip.txt b/requirements/pip.txt index 24ce15ab..6bad1038 100644 --- a/requirements/pip.txt +++ b/requirements/pip.txt @@ -1 +1,2 @@ numpy +scipy diff --git a/src/diffpy/utils/tools.py b/src/diffpy/utils/tools.py index b7a4c47d..040e62ea 100644 --- a/src/diffpy/utils/tools.py +++ b/src/diffpy/utils/tools.py @@ -3,25 +3,11 @@ from copy import copy from pathlib import Path +import numpy as np +from scipy.optimize import dual_annealing +from scipy.signal import convolve -def clean_dict(obj): - """Remove keys from the dictionary where the corresponding value is None. - - Parameters - ---------- - obj: dict - The dictionary to clean. If None, initialize as an empty dictionary. - - Returns - ------- - dict: - The cleaned dictionary with keys removed where the value is None. - """ - obj = obj if obj is not None else {} - for key, value in copy(obj).items(): - if not value: - del obj[key] - return obj +from diffpy.utils.parsers.loaddata import loadData def _stringify(obj): @@ -206,3 +192,103 @@ def get_package_info(package_names, metadata=None): pkg_info.update({package: importlib.metadata.version(package)}) metadata["package_info"] = pkg_info return metadata + + +def _top_hat(z, half_slit_width): + """Create a top-hat function, return 1.0 for values within the specified + slit width and 0 otherwise.""" + return np.where((z >= -half_slit_width) & (z <= half_slit_width), 1.0, 0.0) + + +def _model_function(z, diameter, z0, I0, mud, slope): + """ + Compute the model function with the following steps: + 1. Let dz = z-z0, so that dz is centered at 0 + 2. Compute length l that is the effective length for computing intensity I = I0 * e^{-mu * l}: + - For dz within the capillary diameter, l is the chord length of the circle at position dz + - For dz outside this range, l = 0 + 3. Apply a linear adjustment to I0 by taking I0 as I0 - slope * z + """ + min_radius = -diameter / 2 + max_radius = diameter / 2 + dz = z - z0 + length = np.piecewise( + dz, + [dz < min_radius, (min_radius <= dz) & (dz <= max_radius), dz > max_radius], + [0, lambda dz: 2 * np.sqrt((diameter / 2) ** 2 - dz**2), 0], + ) + return (I0 - slope * z) * np.exp(-mud / diameter * length) + + +def _extend_z_and_convolve(z, diameter, half_slit_width, z0, I0, mud, slope): + """Extend z values and I values for padding (so that we don't have tails in + convolution), then perform convolution (note that the convolved I values + are the same as modeled I values if slit width is close to 0)""" + n_points = len(z) + z_left_pad = np.linspace(z.min() - n_points * (z[1] - z[0]), z.min(), n_points) + z_right_pad = np.linspace(z.max(), z.max() + n_points * (z[1] - z[0]), n_points) + z_extended = np.concatenate([z_left_pad, z, z_right_pad]) + I_extended = _model_function(z_extended, diameter, z0, I0, mud, slope) + kernel = _top_hat(z_extended - z_extended.mean(), half_slit_width) + I_convolved = I_extended # this takes care of the case where slit width is close to 0 + if kernel.sum() != 0: + kernel /= kernel.sum() + I_convolved = convolve(I_extended, kernel, mode="same") + padding_length = len(z_left_pad) + return I_convolved[padding_length:-padding_length] + + +def _objective_function(params, z, observed_data): + """Compute the objective function for fitting a model to the + observed/experimental data by minimizing the sum of squared residuals + between the observed data and the convolved model data.""" + diameter, half_slit_width, z0, I0, mud, slope = params + convolved_model_data = _extend_z_and_convolve(z, diameter, half_slit_width, z0, I0, mud, slope) + residuals = observed_data - convolved_model_data + return np.sum(residuals**2) + + +def _compute_single_mud(z_data, I_data): + """Perform dual annealing optimization and extract the parameters.""" + bounds = [ + (1e-5, z_data.max() - z_data.min()), # diameter: [small positive value, upper bound] + (0, (z_data.max() - z_data.min()) / 2), # half slit width: [0, upper bound] + (z_data.min(), z_data.max()), # z0: [min z, max z] + (1e-5, I_data.max()), # I0: [small positive value, max observed intensity] + (1e-5, 20), # muD: [small positive value, upper bound] + (-100000, 100000), # slope: [lower bound, upper bound] + ] + result = dual_annealing(_objective_function, bounds, args=(z_data, I_data)) + diameter, half_slit_width, z0, I0, mud, slope = result.x + convolved_fitted_signal = _extend_z_and_convolve(z_data, diameter, half_slit_width, z0, I0, mud, slope) + residuals = I_data - convolved_fitted_signal + rmse = np.sqrt(np.mean(residuals**2)) + return mud, rmse + + +def compute_mud(filepath): + """Compute the best-fit mu*D value from a z-scan file, removing the sample + holder effect. + + This function loads z-scan data and fits it to a model + that convolves a top-hat function with I = I0 * e^{-mu * l}. + The fitting procedure is run multiple times, and we return the best-fit parameters based on the lowest rmse. + + The full mathematical details are described in the paper: + An ad hoc Absorption Correction for Reliable Pair-Distribution Functions from Low Energy x-ray Sources, + Yucong Chen, Till Schertenleib, Andrew Yang, Pascal Schouwink, Wendy L. Queen and Simon J. L. Billinge, + in preparation. + + Parameters + ---------- + filepath : str + The path to the z-scan file. + + Returns + ------- + mu*D : float + The best-fit mu*D value. + """ + z_data, I_data = loadData(filepath, unpack=True) + best_mud, _ = min((_compute_single_mud(z_data, I_data) for _ in range(20)), key=lambda pair: pair[1]) + return best_mud diff --git a/tests/test_tools.py b/tests/test_tools.py index 3808537d..fa8b3c32 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -3,9 +3,16 @@ import os from pathlib import Path +import numpy as np import pytest -from diffpy.utils.tools import check_and_build_global_config, get_package_info, get_user_info +from diffpy.utils.tools import ( + _extend_z_and_convolve, + check_and_build_global_config, + compute_mud, + get_package_info, + get_user_info, +) @pytest.mark.parametrize( @@ -160,3 +167,19 @@ def test_get_package_info(monkeypatch, inputs, expected): ) actual_metadata = get_package_info(inputs[0], metadata=inputs[1]) assert actual_metadata == expected + + +def test_compute_mud(tmp_path): + diameter, slit_width, z0, I0, mud, slope = 1, 0.1, 0, 1e5, 3, 0 + z_data = np.linspace(-1, 1, 50) + convolved_I_data = _extend_z_and_convolve(z_data, diameter, slit_width, z0, I0, mud, slope) + + directory = Path(tmp_path) + file = directory / "testfile" + with open(file, "w") as f: + for x, I in zip(z_data, convolved_I_data): + f.write(f"{x}\t{I}\n") + + expected_mud = 3 + actual_mud = compute_mud(file) + assert actual_mud == pytest.approx(expected_mud, rel=1e-4, abs=1e-3)