diff --git a/src/ucs/sys/sys.c b/src/ucs/sys/sys.c index 069b8ac5c61d..42c3d3a24e9d 100644 --- a/src/ucs/sys/sys.c +++ b/src/ucs/sys/sys.c @@ -17,8 +17,11 @@ #include #include #include +#include #include +#include +#include #include #include #include @@ -38,6 +41,20 @@ /* Default huge page size is 2 MBytes */ #define UCS_DEFAULT_MEM_FREE 640000 #define UCS_PROCESS_SMAPS_FILE "/proc/self/smaps" +#define UCS_PROCESS_NS_DIR "/proc/self/ns" + + +struct { + const char *name; + ino_t ino; +} static ucs_sys_namespace_name[] = { + [UCS_SYS_NS_IPC] = {.name = "ipc", .ino = 0}, + [UCS_SYS_NS_MNT] = {.name = "mnt", .ino = 0}, + [UCS_SYS_NS_NET] = {.name = "net", .ino = 0}, + [UCS_SYS_NS_PID] = {.name = "pid", .ino = 0}, + [UCS_SYS_NS_USER] = {.name = "user", .ino = 0}, + [UCS_SYS_NS_UTS] = {.name = "uts", .ino = 0} +}; const char *ucs_get_tmpdir() @@ -1158,3 +1175,31 @@ void ucs_sys_cpuset_copy(ucs_cpu_set_t *dst, const ucs_sys_cpuset_t *src) } } } + +ino_t ucs_sys_get_ns(ucs_sys_get_ns_name_t name) +{ + static ucs_init_once_t init_once = UCS_INIT_ONCE_INITIALIZER; + char filename[MAXPATHLEN]; + int res; + struct stat st; + ucs_sys_get_ns_name_t ns; + + if (name >= UCS_SYS_NS_LAST) { + return 0; + } + + UCS_INIT_ONCE(&init_once) { + for (ns = UCS_SYS_NS_IPC; ns < UCS_SYS_NS_LAST; ns++) { + snprintf(filename, sizeof(filename), "%s/%s", UCS_PROCESS_NS_DIR, + ucs_sys_namespace_name[ns].name); + + res = stat(filename, &st); + if (res == 0) { + ucs_sys_namespace_name[ns].ino = st.st_ino; + } + } + } + + return ucs_sys_namespace_name[name].ino; +} + diff --git a/src/ucs/sys/sys.h b/src/ucs/sys/sys.h index 9629d552e604..7f44cf5f1e81 100644 --- a/src/ucs/sys/sys.h +++ b/src/ucs/sys/sys.h @@ -67,6 +67,17 @@ BEGIN_C_DECLS /** @file sys.h */ +typedef enum { + UCS_SYS_NS_IPC, + UCS_SYS_NS_MNT, + UCS_SYS_NS_NET, + UCS_SYS_NS_PID, + UCS_SYS_NS_USER, + UCS_SYS_NS_UTS, + UCS_SYS_NS_LAST +} ucs_sys_get_ns_name_t; + + /** * @return TMPDIR environment variable if set. Otherwise, return "/tmp". */ @@ -413,6 +424,16 @@ int ucs_sys_getaffinity(ucs_sys_cpuset_t *cpuset); */ void ucs_sys_cpuset_copy(ucs_cpu_set_t *dst, const ucs_sys_cpuset_t *src); +/** + * Get namespace id for resource. + * + * @param [in] name Resource name to get namespace + * + * @return namespace value or 0 if namespaces are not supported + */ +ino_t ucs_sys_get_ns(ucs_sys_get_ns_name_t name); + + END_C_DECLS #endif diff --git a/src/uct/sm/base/sm_iface.c b/src/uct/sm/base/sm_iface.c index f4fdd0d65ce4..24286f69e7ff 100644 --- a/src/uct/sm/base/sm_iface.c +++ b/src/uct/sm/base/sm_iface.c @@ -15,6 +15,7 @@ #include #include #include +#include ucs_config_field_t uct_sm_iface_config_table[] = { @@ -38,17 +39,71 @@ uct_sm_base_query_tl_devices(uct_md_h md, uct_tl_device_resource_t **tl_devices_ num_tl_devices_p); } + +/* read boot_id GUID or use machine_guid */ +static uint64_t uct_sm_iface_get_system_id() +{ + static ucs_init_once_t init_once = UCS_INIT_ONCE_INITIALIZER; + static uint64_t system_id = 0; + const char *boot_id_filename = "/proc/sys/kernel/random/boot_id"; + const char *guid_scan_fmt = "%x-%4hx-%4hx-%4hx-%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx"; + char guid_str[256]; + ssize_t size; + int res; + + union { + struct { + uint32_t v1; + uint16_t v2; + uint16_t v3; + uint16_t v4; + uint8_t v5[6]; + }; + struct { + uint64_t low_dword; + uint64_t hi_dword; + }; + } UCS_S_PACKED guid; + + UCS_INIT_ONCE(&init_once) { + size = ucs_read_file_str(guid_str, sizeof(guid_str), 1, + "%s", boot_id_filename); + if (size <= 0) { + goto use_machine_guid; + } + + res = sscanf(guid_str, guid_scan_fmt, + ucs_unaligned_ptr(&guid.v1), + ucs_unaligned_ptr(&guid.v2), + ucs_unaligned_ptr(&guid.v3), + ucs_unaligned_ptr(&guid.v4), + &guid.v5[0], &guid.v5[1], &guid.v5[2], + &guid.v5[3], &guid.v5[4], &guid.v5[5]); + if (res != 10) { /* 10 values should be scanned */ + goto use_machine_guid; + } + + system_id = guid.hi_dword ^ guid.low_dword; + continue; + +use_machine_guid: + system_id = ucs_machine_guid(); + } + + return system_id; +} + ucs_status_t uct_sm_iface_get_device_address(uct_iface_t *tl_iface, uct_device_addr_t *addr) { - *(uint64_t*)addr = ucs_machine_guid(); + *(uint64_t*)addr = uct_sm_iface_get_system_id(); return UCS_OK; } int uct_sm_iface_is_reachable(const uct_iface_h tl_iface, const uct_device_addr_t *dev_addr, const uct_iface_addr_t *iface_addr) { - return ucs_machine_guid() == *(const uint64_t*)dev_addr; + return uct_sm_iface_get_system_id() == *(const uint64_t*)dev_addr; } ucs_status_t uct_sm_iface_fence(uct_iface_t *tl_iface, unsigned flags) diff --git a/src/uct/sm/mm/base/mm_iface.c b/src/uct/sm/mm/base/mm_iface.c index cafdc852af99..404eb57763c6 100644 --- a/src/uct/sm/mm/base/mm_iface.c +++ b/src/uct/sm/mm/base/mm_iface.c @@ -312,6 +312,31 @@ static ucs_status_t uct_mm_iface_event_fd_arm(uct_iface_h tl_iface, } } +static int uct_mm_iface_is_reachable(const uct_iface_h tl_iface, + const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr) +{ + uct_mm_iface_t *iface = ucs_derived_of(tl_iface, + uct_mm_iface_t); + uct_mm_iface_addr_t *addr = (void*)iface_addr; + uct_mm_remote_seg_t remote_seg; + ucs_status_t status; + ino_t ipc_ns; + + status = uct_mm_iface_mapper_call(iface, mem_attach, + addr->fifo_seg_id, + sizeof(uct_mm_fifo_ctl_t), + addr + 1, &remote_seg); + if (status != UCS_OK) { + return 0; + } + + ipc_ns = ((uct_mm_fifo_ctl_t*)remote_seg.address)->ipc_ns; + uct_mm_iface_mapper_call(iface, mem_detach, &remote_seg); + + return ipc_ns == ucs_sys_get_ns(UCS_SYS_NS_IPC); +} + static UCS_CLASS_DECLARE_DELETE_FUNC(uct_mm_iface_t, uct_iface_t); static uct_iface_ops_t uct_mm_iface_ops = { @@ -343,7 +368,7 @@ static uct_iface_ops_t uct_mm_iface_ops = { .iface_query = uct_mm_iface_query, .iface_get_device_address = uct_sm_iface_get_device_address, .iface_get_address = uct_mm_iface_get_address, - .iface_is_reachable = uct_sm_iface_is_reachable + .iface_is_reachable = uct_mm_iface_is_reachable }; static void uct_mm_iface_recv_desc_init(uct_iface_h tl_iface, void *obj, @@ -538,9 +563,10 @@ static UCS_CLASS_INIT_FUNC(uct_mm_iface_t, uct_md_h md, uct_worker_h worker, uct_mm_iface_set_fifo_ptrs(self->recv_fifo_mem.address, &self->recv_fifo_ctl, &self->recv_fifo_elems); - self->recv_fifo_ctl->head = 0; - self->recv_fifo_ctl->tail = 0; - self->read_index = 0; + self->recv_fifo_ctl->head = 0; + self->recv_fifo_ctl->tail = 0; + self->recv_fifo_ctl->ipc_ns = ucs_sys_get_ns(UCS_SYS_NS_IPC); + self->read_index = 0; /* create a unix file descriptor to receive event notifications */ status = uct_mm_iface_create_signal_fd(self); diff --git a/src/uct/sm/mm/base/mm_iface.h b/src/uct/sm/mm/base/mm_iface.h index b008562e9af2..ea62c1c29a21 100644 --- a/src/uct/sm/mm/base/mm_iface.h +++ b/src/uct/sm/mm/base/mm_iface.h @@ -88,6 +88,9 @@ typedef struct uct_mm_fifo_ctl { /* 2nd cacheline */ volatile uint64_t tail; /* How much was consumed */ + + /* namespace info */ + ino_t ipc_ns; } UCS_S_PACKED UCS_V_ALIGNED(UCS_SYS_CACHE_LINE_SIZE) uct_mm_fifo_ctl_t;