From 85da98b5ef003ee4ad5244f6087608febdcb4247 Mon Sep 17 00:00:00 2001 From: Alexander Larsson Date: Tue, 10 Oct 2023 18:20:42 +0200 Subject: [PATCH 1/2] Improve performance of xattr hashing Make check is really slow, and a lot of it is in compute_erofs_shared_xattrs() when doing hash comparisons. Various improvements are don: * Estimate of the initial hash-table size to avoid resizes * Compare the xattr values before the xattr key names (as the kay names are often the same). * Don't divide by n_buckets each characted in hash_memory. This drops a mkcomposefs --from-file run from 5 seconds to 1 second. Signed-off-by: Alexander Larsson --- libcomposefs/lcfs-writer-erofs.c | 13 ++++++++----- libcomposefs/lcfs-writer.c | 4 ++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/libcomposefs/lcfs-writer-erofs.c b/libcomposefs/lcfs-writer-erofs.c index 2f4bfd34..62055338 100644 --- a/libcomposefs/lcfs-writer-erofs.c +++ b/libcomposefs/lcfs-writer-erofs.c @@ -230,13 +230,13 @@ static bool xattrs_ht_comparator(const void *d1, const void *d2) const struct hasher_xattr_s *v1 = d1; const struct hasher_xattr_s *v2 = d2; - if (strcmp(v1->xattr->key, v2->xattr->key) != 0) + if (v1->xattr->value_len != v2->xattr->value_len) return false; - if (v1->xattr->value_len != v2->xattr->value_len) + if (memcmp(v1->xattr->value, v2->xattr->value, v1->xattr->value_len) != 0) return false; - return memcmp(v1->xattr->value, v2->xattr->value, v1->xattr->value_len) == 0; + return strcmp(v1->xattr->key, v2->xattr->key) == 0; } /* Sort alphabetically by key and value to get some canonical order */ @@ -328,9 +328,12 @@ static int compute_erofs_shared_xattrs(struct lcfs_ctx_s *ctx) size_t n_xattrs; uint64_t xattr_offset; - /* Find the use count for each xattr key/value in use */ + size_t n_files = 0; + for (node = ctx->root; node != NULL; node = node->next) + n_files++; - xattr_hash = hash_initialize(0, NULL, xattrs_ht_hasher, + /* Find the use count for each xattr key/value in use */ + xattr_hash = hash_initialize(n_files, NULL, xattrs_ht_hasher, xattrs_ht_comparator, free); if (xattr_hash == NULL) { return -1; diff --git a/libcomposefs/lcfs-writer.c b/libcomposefs/lcfs-writer.c index 9dcab379..679c7cf0 100644 --- a/libcomposefs/lcfs-writer.c +++ b/libcomposefs/lcfs-writer.c @@ -67,9 +67,9 @@ size_t hash_memory(const char *string, size_t len, size_t n_buckets) size_t i, value = 0; for (i = 0; i < len; i++) { - value = (value * 31 + string[i]) % n_buckets; + value = (value * 31 + string[i]); } - return value; + return value % n_buckets; } static struct lcfs_ctx_s *lcfs_new_ctx(struct lcfs_node_s *root, From 8cd12a18d73fcaf2174b3ab99a6d958b9c838979 Mon Sep 17 00:00:00 2001 From: Alexander Larsson Date: Wed, 11 Oct 2023 11:00:52 +0200 Subject: [PATCH 2/2] writer: Don't grow children array by one each time Each time a child is added we grow by 1, which causes a lot of reallocs when adding children. Growing capacity by doubling saves us about 10% of the time of a mkcomposefs --from-image run. Signed-off-by: Alexander Larsson --- libcomposefs/lcfs-internal.h | 1 + libcomposefs/lcfs-writer.c | 28 +++++++++++++++++----------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/libcomposefs/lcfs-internal.h b/libcomposefs/lcfs-internal.h index 3c57203b..f8cebe18 100644 --- a/libcomposefs/lcfs-internal.h +++ b/libcomposefs/lcfs-internal.h @@ -140,6 +140,7 @@ struct lcfs_node_s { struct lcfs_node_s *parent; struct lcfs_node_s **children; /* Owns refs */ + size_t children_capacity; size_t children_size; /* Used to create hard links. */ diff --git a/libcomposefs/lcfs-writer.c b/libcomposefs/lcfs-writer.c index 679c7cf0..7a6567b3 100644 --- a/libcomposefs/lcfs-writer.c +++ b/libcomposefs/lcfs-writer.c @@ -906,7 +906,7 @@ int lcfs_node_add_child(struct lcfs_node_s *parent, struct lcfs_node_s *child, const char *name) { struct lcfs_node_s **new_children; - size_t new_size; + size_t new_capacity; char *name_copy; if ((parent->inode.st_mode & S_IFMT) != S_IFDIR) { @@ -936,20 +936,26 @@ int lcfs_node_add_child(struct lcfs_node_s *parent, struct lcfs_node_s *child, return -1; } - new_size = parent->children_size + 1; + if (parent->children_capacity == parent->children_size) { + if (parent->children_size == 0) + new_capacity = 16; + else + new_capacity = parent->children_capacity * 2; - new_children = reallocarray(parent->children, sizeof(*parent->children), - new_size); - if (new_children == NULL) { - errno = ENOMEM; - free(name_copy); - return -1; - } + new_children = reallocarray(parent->children, + sizeof(*parent->children), new_capacity); + if (new_children == NULL) { + errno = ENOMEM; + free(name_copy); + return -1; + } - parent->children = new_children; + parent->children = new_children; + parent->children_capacity = new_capacity; + } parent->children[parent->children_size] = child; - parent->children_size = new_size; + parent->children_size += 1; child->parent = parent; child->name = name_copy;