Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix YamlHelper for thread safety in multi-threaded environments. #2163

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
49 changes: 44 additions & 5 deletions soda/core/soda/common/yaml_helper.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import threading

from ruamel.yaml import YAML, StringIO


Expand All @@ -14,16 +16,53 @@ def to_yaml_str(yaml_object) -> str:


class YamlHelper:
__yaml = create_yaml()
"""
A helper class to serialize and deserialize objects to and from YAML format.
This class is thread-safe and ensures that each thread has its own instance of the YAML parser/dumper.

Usage:
YamlHelper.to_yaml(yaml_object)
YamlHelper.from_yaml(yaml_string)
"""

_thread_local = threading.local()

@classmethod
def _get_yaml(cls):
"""
Returns a thread-local YAML instance for serializing and deserializing YAML data.
If no instance exists for the current thread, a new one is created.

:return: a YAML instance specific to the current thread.
"""
if not hasattr(cls._thread_local, "yaml"):
cls._thread_local.yaml = create_yaml()
return cls._thread_local.yaml

@classmethod
def to_yaml(cls, yaml_object) -> str:
"""
Serializes a Python object into a YAML formatted string.

:param object yaml_object: The Python object to serialize.
:return: the YAML formatted string, or an empty string if the input is None.
"""
if yaml_object is None:
return ""
stream = StringIO()
cls.__yaml.dump(yaml_object, stream)
return stream.getvalue()
with StringIO() as stream:
yaml_instance = cls._get_yaml()
yaml_instance.dump(yaml_object, stream)
return stream.getvalue()

@classmethod
def from_yaml(cls, yaml_str) -> object:
return cls.__yaml.load(yaml_str)
"""
Deserializes a YAML formatted string into a Python object.

:param str yaml_str: the YAML formatted string to deserialize.
:return: the deserialized Python object, or None if the input is None or empty.
"""
if yaml_str is None or not yaml_str.strip():
return None
yaml_instance = cls._get_yaml()
return yaml_instance.load(yaml_str)
Loading