Skip to content

Commit

Permalink
MM: try to connect to identify reachable
Browse files Browse the repository at this point in the history
- try to connect to remote iface to check if it is reachable
- use boot_id guid to identify same host
  • Loading branch information
Sergey Oblomov committed Nov 30, 2019
1 parent 24889d1 commit 39fb657
Show file tree
Hide file tree
Showing 5 changed files with 156 additions and 6 deletions.
45 changes: 45 additions & 0 deletions src/ucs/sys/sys.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@
#include <ucs/sys/sys.h>
#include <ucs/debug/log.h>
#include <ucs/time/time.h>
#include <ucs/type/init_once.h>
#include <ucm/util/sys.h>

#include <unistd.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/shm.h>
#include <sys/mman.h>
Expand All @@ -38,6 +41,20 @@
/* Default huge page size is 2 MBytes */
#define UCS_DEFAULT_MEM_FREE 640000
#define UCS_PROCESS_SMAPS_FILE "/proc/self/smaps"
#define UCS_PROCESS_NS_DIR "/proc/self/ns"


struct {
const char *name;
ino_t ino;
} static ucs_sys_namespace_name[] = {
[UCS_SYS_NS_IPC] = {.name = "ipc", .ino = 0},
[UCS_SYS_NS_MNT] = {.name = "mnt", .ino = 0},
[UCS_SYS_NS_NET] = {.name = "net", .ino = 0},
[UCS_SYS_NS_PID] = {.name = "pid", .ino = 0},
[UCS_SYS_NS_USER] = {.name = "user", .ino = 0},
[UCS_SYS_NS_UTS] = {.name = "uts", .ino = 0}
};


const char *ucs_get_tmpdir()
Expand Down Expand Up @@ -1158,3 +1175,31 @@ void ucs_sys_cpuset_copy(ucs_cpu_set_t *dst, const ucs_sys_cpuset_t *src)
}
}
}

ino_t ucs_sys_get_ns(ucs_sys_get_ns_name_t name)
{
static ucs_init_once_t init_once = UCS_INIT_ONCE_INITIALIZER;
char filename[MAXPATHLEN];
int res;
struct stat st;
ucs_sys_get_ns_name_t ns;

if (name >= UCS_SYS_NS_LAST) {
return 0;
}

UCS_INIT_ONCE(&init_once) {
for (ns = UCS_SYS_NS_IPC; ns < UCS_SYS_NS_LAST; ns++) {
snprintf(filename, sizeof(filename), "%s/%s", UCS_PROCESS_NS_DIR,
ucs_sys_namespace_name[ns].name);

res = stat(filename, &st);
if (res == 0) {
ucs_sys_namespace_name[ns].ino = st.st_ino;
}
}
}

return ucs_sys_namespace_name[name].ino;
}

21 changes: 21 additions & 0 deletions src/ucs/sys/sys.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,17 @@ BEGIN_C_DECLS
/** @file sys.h */


typedef enum {
UCS_SYS_NS_IPC,
UCS_SYS_NS_MNT,
UCS_SYS_NS_NET,
UCS_SYS_NS_PID,
UCS_SYS_NS_USER,
UCS_SYS_NS_UTS,
UCS_SYS_NS_LAST
} ucs_sys_get_ns_name_t;


/**
* @return TMPDIR environment variable if set. Otherwise, return "/tmp".
*/
Expand Down Expand Up @@ -413,6 +424,16 @@ int ucs_sys_getaffinity(ucs_sys_cpuset_t *cpuset);
*/
void ucs_sys_cpuset_copy(ucs_cpu_set_t *dst, const ucs_sys_cpuset_t *src);

/**
* Get namespace id for resource.
*
* @param [in] name Resource name to get namespace
*
* @return namespace value or 0 if namespaces are not supported
*/
ino_t ucs_sys_get_ns(ucs_sys_get_ns_name_t name);


END_C_DECLS

#endif
59 changes: 57 additions & 2 deletions src/uct/sm/base/sm_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <ucs/sys/string.h>
#include <ucs/sys/sys.h>
#include <ucs/arch/cpu.h>
#include <ucs/type/init_once.h>


ucs_config_field_t uct_sm_iface_config_table[] = {
Expand All @@ -38,17 +39,71 @@ uct_sm_base_query_tl_devices(uct_md_h md, uct_tl_device_resource_t **tl_devices_
num_tl_devices_p);
}


/* read boot_id GUID or use machine_guid */
static uint64_t uct_sm_iface_get_system_id()
{
static ucs_init_once_t init_once = UCS_INIT_ONCE_INITIALIZER;
static uint64_t system_id = 0;
const char *boot_id_filename = "/proc/sys/kernel/random/boot_id";
const char *guid_scan_fmt = "%x-%4hx-%4hx-%4hx-%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx";
char guid_str[256];
ssize_t size;
int res;

union {
struct {
uint32_t v1;
uint16_t v2;
uint16_t v3;
uint16_t v4;
uint8_t v5[6];
};
struct {
uint64_t low_dword;
uint64_t hi_dword;
};
} UCS_S_PACKED guid;

UCS_INIT_ONCE(&init_once) {
size = ucs_read_file_str(guid_str, sizeof(guid_str), 1,
"%s", boot_id_filename);
if (size <= 0) {
goto use_machine_guid;
}

res = sscanf(guid_str, guid_scan_fmt,
ucs_unaligned_ptr(&guid.v1),
ucs_unaligned_ptr(&guid.v2),
ucs_unaligned_ptr(&guid.v3),
ucs_unaligned_ptr(&guid.v4),
&guid.v5[0], &guid.v5[1], &guid.v5[2],
&guid.v5[3], &guid.v5[4], &guid.v5[5]);
if (res != 10) { /* 10 values should be scanned */
goto use_machine_guid;
}

system_id = guid.hi_dword ^ guid.low_dword;
continue;

use_machine_guid:
system_id = ucs_machine_guid();
}

return system_id;
}

ucs_status_t uct_sm_iface_get_device_address(uct_iface_t *tl_iface,
uct_device_addr_t *addr)
{
*(uint64_t*)addr = ucs_machine_guid();
*(uint64_t*)addr = uct_sm_iface_get_system_id();
return UCS_OK;
}

int uct_sm_iface_is_reachable(const uct_iface_h tl_iface, const uct_device_addr_t *dev_addr,
const uct_iface_addr_t *iface_addr)
{
return ucs_machine_guid() == *(const uint64_t*)dev_addr;
return uct_sm_iface_get_system_id() == *(const uint64_t*)dev_addr;
}

ucs_status_t uct_sm_iface_fence(uct_iface_t *tl_iface, unsigned flags)
Expand Down
34 changes: 30 additions & 4 deletions src/uct/sm/mm/base/mm_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,31 @@ static ucs_status_t uct_mm_iface_event_fd_arm(uct_iface_h tl_iface,
}
}

static int uct_mm_iface_is_reachable(const uct_iface_h tl_iface,
const uct_device_addr_t *dev_addr,
const uct_iface_addr_t *iface_addr)
{
uct_mm_iface_t *iface = ucs_derived_of(tl_iface,
uct_mm_iface_t);
uct_mm_iface_addr_t *addr = (void*)iface_addr;
uct_mm_remote_seg_t remote_seg;
ucs_status_t status;
ino_t ipc_ns;

status = uct_mm_iface_mapper_call(iface, mem_attach,
addr->fifo_seg_id,
sizeof(uct_mm_fifo_ctl_t),
addr + 1, &remote_seg);
if (status != UCS_OK) {
return 0;
}

ipc_ns = ((uct_mm_fifo_ctl_t*)remote_seg.address)->ipc_ns;
uct_mm_iface_mapper_call(iface, mem_detach, &remote_seg);

return ipc_ns == ucs_sys_get_ns(UCS_SYS_NS_IPC);
}

static UCS_CLASS_DECLARE_DELETE_FUNC(uct_mm_iface_t, uct_iface_t);

static uct_iface_ops_t uct_mm_iface_ops = {
Expand Down Expand Up @@ -343,7 +368,7 @@ static uct_iface_ops_t uct_mm_iface_ops = {
.iface_query = uct_mm_iface_query,
.iface_get_device_address = uct_sm_iface_get_device_address,
.iface_get_address = uct_mm_iface_get_address,
.iface_is_reachable = uct_sm_iface_is_reachable
.iface_is_reachable = uct_mm_iface_is_reachable
};

static void uct_mm_iface_recv_desc_init(uct_iface_h tl_iface, void *obj,
Expand Down Expand Up @@ -538,9 +563,10 @@ static UCS_CLASS_INIT_FUNC(uct_mm_iface_t, uct_md_h md, uct_worker_h worker,

uct_mm_iface_set_fifo_ptrs(self->recv_fifo_mem.address,
&self->recv_fifo_ctl, &self->recv_fifo_elems);
self->recv_fifo_ctl->head = 0;
self->recv_fifo_ctl->tail = 0;
self->read_index = 0;
self->recv_fifo_ctl->head = 0;
self->recv_fifo_ctl->tail = 0;
self->recv_fifo_ctl->ipc_ns = ucs_sys_get_ns(UCS_SYS_NS_IPC);
self->read_index = 0;

/* create a unix file descriptor to receive event notifications */
status = uct_mm_iface_create_signal_fd(self);
Expand Down
3 changes: 3 additions & 0 deletions src/uct/sm/mm/base/mm_iface.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ typedef struct uct_mm_fifo_ctl {

/* 2nd cacheline */
volatile uint64_t tail; /* How much was consumed */

/* namespace info */
ino_t ipc_ns;
} UCS_S_PACKED UCS_V_ALIGNED(UCS_SYS_CACHE_LINE_SIZE) uct_mm_fifo_ctl_t;


Expand Down

0 comments on commit 39fb657

Please sign in to comment.