Skip to content

Commit

Permalink
MM: use boot_id + ipc namespace to test reachable iface
Browse files Browse the repository at this point in the history
- use namespace ID to evaluate reachable iface
- used IPC namespace for sysV & knem ifaces
- IPC + PID namespaces for posix + cma ifaces
  • Loading branch information
Sergey Oblomov committed Dec 8, 2019
1 parent 0dc670d commit d222b72
Show file tree
Hide file tree
Showing 13 changed files with 335 additions and 22 deletions.
96 changes: 96 additions & 0 deletions src/ucs/sys/sys.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@
#include <ucs/sys/sys.h>
#include <ucs/debug/log.h>
#include <ucs/time/time.h>
#include <ucs/type/init_once.h>
#include <ucm/util/sys.h>

#include <unistd.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/shm.h>
#include <sys/mman.h>
Expand All @@ -38,6 +41,24 @@
/* Default huge page size is 2 MBytes */
#define UCS_DEFAULT_MEM_FREE 640000
#define UCS_PROCESS_SMAPS_FILE "/proc/self/smaps"
#define UCS_PROCESS_NS_DIR "/proc/self/ns"
#define UCS_PROCESS_BOOTID_FILE "/proc/sys/kernel/random/boot_id"
#define UCS_PROCESS_BOOTID_FMT "%x-%4hx-%4hx-%4hx-%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx"
#define UCS_PROCESS_NS_FIRST 0xF0000000U
#define UCS_PROCESS_NS_NET_DFLT 0xF0000080U


struct {
const char *name;
ucs_sys_ns_t dflt;
} static ucs_sys_namespace_info[] = {
[UCS_SYS_NS_TYPE_IPC] = {.name = "ipc", .dflt = UCS_PROCESS_NS_FIRST - 1},
[UCS_SYS_NS_TYPE_MNT] = {.name = "mnt", .dflt = UCS_PROCESS_NS_FIRST - 0},
[UCS_SYS_NS_TYPE_NET] = {.name = "net", .dflt = UCS_PROCESS_NS_NET_DFLT},
[UCS_SYS_NS_TYPE_PID] = {.name = "pid", .dflt = UCS_PROCESS_NS_FIRST - 4},
[UCS_SYS_NS_TYPE_USER] = {.name = "user", .dflt = UCS_PROCESS_NS_FIRST - 3},
[UCS_SYS_NS_TYPE_UTS] = {.name = "uts", .dflt = UCS_PROCESS_NS_FIRST - 2}
};


const char *ucs_get_tmpdir()
Expand Down Expand Up @@ -1158,3 +1179,78 @@ void ucs_sys_cpuset_copy(ucs_cpu_set_t *dst, const ucs_sys_cpuset_t *src)
}
}
}

ucs_sys_ns_t ucs_sys_get_ns(ucs_sys_namespace_type_t ns)
{
char filename[MAXPATHLEN];
int res;
struct stat st;

if (ns >= UCS_SYS_NS_TYPE_LAST) {
return 0;
}

snprintf(filename, sizeof(filename), "%s/%s", UCS_PROCESS_NS_DIR,
ucs_sys_namespace_info[ns].name);

res = stat(filename, &st);
if (res == 0) {
return (ucs_sys_ns_t)st.st_ino;
}

return ucs_sys_namespace_info[ns].dflt;
}

int ucs_sys_ns_is_default(ucs_sys_namespace_type_t ns)
{
return ucs_sys_get_ns(ns) == ucs_sys_namespace_info[ns].dflt;
}

ucs_status_t ucs_sys_get_boot_id(uint64_t *high, uint64_t *low)
{
static struct {
uint64_t high;
uint64_t low;
} boot_id = {0, 0};

static ucs_init_once_t init_once = UCS_INIT_ONCE_INITIALIZER;
static ucs_status_t status = UCS_ERR_IO_ERROR;
char bootid_str[256];
ssize_t size;
uint32_t v1;
uint16_t v2;
uint16_t v3;
uint16_t v4;
uint8_t v5[6];
int res;
int i;

UCS_INIT_ONCE(&init_once) {
size = ucs_read_file_str(bootid_str, sizeof(bootid_str), 1,
"%s", UCS_PROCESS_BOOTID_FILE);
if (size <= 0) {
continue; /* jump out of INIT_ONCE section */
}

res = sscanf(bootid_str, UCS_PROCESS_BOOTID_FMT,
&v1, &v2, &v3, &v4,
&v5[0], &v5[1], &v5[2],
&v5[3], &v5[4], &v5[5]);
if (res == 10) { /* 10 values should be scanned */
status = UCS_OK;
boot_id.low = ((uint64_t)v1) | ((uint64_t)v2 << 32) |
((uint64_t)v3 << 48);
boot_id.high = v4;
for (i = 0; i < ucs_array_size(v5); i++) {
boot_id.high |= (uint64_t)v5[i] << (16 + (i * 8));
}
}
}

if (status == UCS_OK) {
*high = boot_id.high;
*low = boot_id.low;
}

return status;
}
45 changes: 45 additions & 0 deletions src/ucs/sys/sys.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,21 @@ BEGIN_C_DECLS
/** @file sys.h */


typedef ino_t ucs_sys_ns_t;


/* namespace type used in @ref ucs_sys_get_ns and @ref ucs_sys_ns_is_default */
typedef enum {
UCS_SYS_NS_TYPE_IPC,
UCS_SYS_NS_TYPE_MNT,
UCS_SYS_NS_TYPE_NET,
UCS_SYS_NS_TYPE_PID,
UCS_SYS_NS_TYPE_USER,
UCS_SYS_NS_TYPE_UTS,
UCS_SYS_NS_TYPE_LAST
} ucs_sys_namespace_type_t;


/**
* @return TMPDIR environment variable if set. Otherwise, return "/tmp".
*/
Expand Down Expand Up @@ -413,6 +428,36 @@ int ucs_sys_getaffinity(ucs_sys_cpuset_t *cpuset);
*/
void ucs_sys_cpuset_copy(ucs_cpu_set_t *dst, const ucs_sys_cpuset_t *src);

/**
* Get namespace id for resource.
*
* @param [in] name Namespace to get value
*
* @return namespace value or 0 if namespaces are not supported
*/
ucs_sys_ns_t ucs_sys_get_ns(ucs_sys_namespace_type_t name);


/**
* Check if namespace is namespace of host system.
*
* @param [in] name Namespace to evaluate
*
* @return 1 in case if namespace is root, 0 - in other cases
*/
int ucs_sys_ns_is_default(ucs_sys_namespace_type_t name);


/**
* Get 128-bit boot ID value.
*
* @param [out] high Pointer to high 64 bit of 128 boot ID
* @param [out] low Pointer to low 64 bit of 128 boot ID
*
* @return UCS_OK or error in case of failure.
*/
ucs_status_t ucs_sys_get_boot_id(uint64_t *high, uint64_t *low);

END_C_DECLS

#endif
81 changes: 76 additions & 5 deletions src/uct/sm/base/sm_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,20 @@
#include <ucs/sys/string.h>
#include <ucs/sys/sys.h>
#include <ucs/arch/cpu.h>
#include <ucs/type/init_once.h>


#define UCS_SM_IFACE_ADDR_FLAG_EXT UCS_BIT(63)


typedef struct {
uint64_t id;
} ucs_sm_iface_base_device_addr_t;

typedef struct {
ucs_sm_iface_base_device_addr_t super;
ucs_sys_ns_t ipc_ns;
} ucs_sm_iface_ext_device_addr_t;


ucs_config_field_t uct_sm_iface_config_table[] = {
Expand All @@ -38,17 +52,67 @@ uct_sm_base_query_tl_devices(uct_md_h md, uct_tl_device_resource_t **tl_devices_
num_tl_devices_p);
}

ucs_status_t uct_sm_iface_get_device_address(uct_iface_t *tl_iface,
uct_device_addr_t *addr)

/* read boot_id GUID or use machine_guid */
static uint64_t uct_sm_iface_get_system_id()
{
*(uint64_t*)addr = ucs_machine_guid();
uint64_t high;
uint64_t low;
ucs_status_t status;

status = ucs_sys_get_boot_id(&high, &low);
if (status == UCS_OK) {
return high ^ low;
}

return ucs_machine_guid();
}

ucs_status_t UCS_F_NOOPTIMIZE /* GCC failed to compile it in release mode */
uct_sm_iface_get_device_address(uct_iface_t *tl_iface, uct_device_addr_t *addr)
{
ucs_sm_iface_ext_device_addr_t *ext_addr = (void*)addr;

ext_addr->super.id = uct_sm_iface_get_system_id() & ~UCS_SM_IFACE_ADDR_FLAG_EXT;

if (!ucs_sys_ns_is_default(UCS_SYS_NS_TYPE_IPC)) {
ext_addr->super.id |= UCS_SM_IFACE_ADDR_FLAG_EXT;
ext_addr->ipc_ns = ucs_sys_get_ns(UCS_SYS_NS_TYPE_IPC);
}

return UCS_OK;
}

int uct_sm_iface_is_reachable(const uct_iface_h tl_iface, const uct_device_addr_t *dev_addr,
int uct_sm_iface_is_reachable(const uct_iface_h tl_iface,
const uct_device_addr_t *dev_addr,
const uct_iface_addr_t *iface_addr)
{
return ucs_machine_guid() == *(const uint64_t*)dev_addr;
ucs_sm_iface_ext_device_addr_t *ext_addr = (void*)dev_addr;
ucs_sm_iface_ext_device_addr_t my_addr = {};
ucs_status_t status;

status = uct_sm_iface_get_device_address(tl_iface,
(uct_device_addr_t*)&my_addr);
if (status != UCS_OK) {
ucs_error("failed to get device address");
return 0;
}

/* do not merge these evaluations into single 'if' due
* to clags compilation warning */
/* check if both processes are on same host and
* both of them are in root (or non-root) pid namespace */
if (ext_addr->super.id != my_addr.super.id) {
return 0;
}

if (!(ext_addr->super.id & UCS_SM_IFACE_ADDR_FLAG_EXT)) {
return 1; /* both processes are in root namespace */
}

/* ok, we are in non-root PID namespace - return 1 if ID of
* namespaces are same */
return ext_addr->ipc_ns == my_addr.ipc_ns;
}

ucs_status_t uct_sm_iface_fence(uct_iface_t *tl_iface, unsigned flags)
Expand All @@ -65,6 +129,13 @@ ucs_status_t uct_sm_ep_fence(uct_ep_t *tl_ep, unsigned flags)
return UCS_OK;
}

size_t uct_sm_iface_get_device_addr_len()
{
return ucs_sys_ns_is_default(UCS_SYS_NS_TYPE_IPC) ?
sizeof(ucs_sm_iface_base_device_addr_t) :
sizeof(ucs_sm_iface_ext_device_addr_t);
}

UCS_CLASS_INIT_FUNC(uct_sm_iface_t, uct_iface_ops_t *ops, uct_md_h md,
uct_worker_h worker, const uct_iface_params_t *params,
const uct_iface_config_t *tl_config)
Expand Down
3 changes: 2 additions & 1 deletion src/uct/sm/base/sm_iface.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
#include <ucs/sys/iovec.h>


#define UCT_SM_IFACE_DEVICE_ADDR_LEN sizeof(uint64_t)
#define UCT_SM_MAX_IOV 16
#define UCT_SM_DEVICE_NAME "memory"

Expand Down Expand Up @@ -45,6 +44,8 @@ int uct_sm_iface_is_reachable(const uct_iface_h tl_iface, const uct_device_addr_

ucs_status_t uct_sm_iface_fence(uct_iface_t *tl_iface, unsigned flags);

size_t uct_sm_iface_get_device_addr_len();

ucs_status_t uct_sm_ep_fence(uct_ep_t *tl_ep, unsigned flags);

static UCS_F_ALWAYS_INLINE size_t uct_sm_get_max_iov() {
Expand Down
3 changes: 2 additions & 1 deletion src/uct/sm/cma/cma_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ static UCS_CLASS_INIT_FUNC(uct_cma_ep_t, const uct_ep_params_t *params)
"UCT_EP_PARAM_FIELD_IFACE_ADDR and UCT_EP_PARAM_FIELD_DEV_ADDR are not defined");

UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super.super);
self->remote_pid = *(const pid_t*)params->iface_addr;
self->remote_pid = *(const pid_t*)params->iface_addr &
~UCT_CMA_IFACE_ADDR_FLAG_PID_NS;
return UCS_OK;
}

Expand Down
46 changes: 42 additions & 4 deletions src/uct/sm/cma/cma_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@
#include <ucs/sys/string.h>


typedef struct {
pid_t id;
} ucs_cma_iface_base_device_addr_t;

typedef struct {
ucs_cma_iface_base_device_addr_t super;
ucs_sys_ns_t pid_ns;
} ucs_cma_iface_ext_device_addr_t;


static ucs_config_field_t uct_cma_iface_config_table[] = {
{"", "ALLOC=huge,thp,mmap,heap;BW=11145MBs", NULL,
ucs_offsetof(uct_cma_iface_config_t, super),
Expand All @@ -23,7 +33,15 @@ static ucs_config_field_t uct_cma_iface_config_table[] = {
static ucs_status_t uct_cma_iface_get_address(uct_iface_t *tl_iface,
uct_iface_addr_t *addr)
{
*(pid_t*)addr = getpid();
ucs_cma_iface_ext_device_addr_t *iface_addr = (void*)addr;

ucs_assert(!(getpid() & UCT_CMA_IFACE_ADDR_FLAG_PID_NS));

iface_addr->super.id = getpid();
if (!ucs_sys_ns_is_default(UCS_SYS_NS_TYPE_PID)) {
iface_addr->super.id |= UCT_CMA_IFACE_ADDR_FLAG_PID_NS;
iface_addr->pid_ns = ucs_sys_get_ns(UCS_SYS_NS_TYPE_PID);
}
return UCS_OK;
}

Expand Down Expand Up @@ -51,8 +69,10 @@ static ucs_status_t uct_cma_iface_query(uct_iface_h tl_iface,
iface_attr->cap.am.opt_zcopy_align = 1;
iface_attr->cap.am.align_mtu = iface_attr->cap.am.opt_zcopy_align;

iface_attr->iface_addr_len = sizeof(pid_t);
iface_attr->device_addr_len = UCT_SM_IFACE_DEVICE_ADDR_LEN;
iface_attr->iface_addr_len = ucs_sys_ns_is_default(UCS_SYS_NS_TYPE_PID) ?
sizeof(ucs_cma_iface_base_device_addr_t) :
sizeof(ucs_cma_iface_ext_device_addr_t);
iface_attr->device_addr_len = uct_sm_iface_get_device_addr_len();
iface_attr->ep_addr_len = 0;
iface_attr->max_conn_priv = 0;
iface_attr->cap.flags = UCT_IFACE_FLAG_GET_ZCOPY |
Expand All @@ -68,6 +88,24 @@ static ucs_status_t uct_cma_iface_query(uct_iface_h tl_iface,
return UCS_OK;
}

static int
uct_cma_iface_is_reachable(const uct_iface_h tl_iface,
const uct_device_addr_t *dev_addr,
const uct_iface_addr_t *tl_iface_addr)
{
ucs_cma_iface_ext_device_addr_t *iface_addr = (void*)tl_iface_addr;

if (!uct_sm_iface_is_reachable(tl_iface, dev_addr, tl_iface_addr)) {
return 0;
}

if (iface_addr->super.id & UCT_CMA_IFACE_ADDR_FLAG_PID_NS) {
return ucs_sys_get_ns(UCS_SYS_NS_TYPE_PID) == iface_addr->pid_ns;
}

return ucs_sys_ns_is_default(UCS_SYS_NS_TYPE_PID);
}

static UCS_CLASS_DECLARE_DELETE_FUNC(uct_cma_iface_t, uct_iface_t);

static uct_iface_ops_t uct_cma_iface_ops = {
Expand All @@ -88,7 +126,7 @@ static uct_iface_ops_t uct_cma_iface_ops = {
.iface_query = uct_cma_iface_query,
.iface_get_address = uct_cma_iface_get_address,
.iface_get_device_address = uct_sm_iface_get_device_address,
.iface_is_reachable = uct_sm_iface_is_reachable
.iface_is_reachable = uct_cma_iface_is_reachable
};

static UCS_CLASS_INIT_FUNC(uct_cma_iface_t, uct_md_h md, uct_worker_h worker,
Expand Down
Loading

0 comments on commit d222b72

Please sign in to comment.