From d5f2203a8f732cabb07c2cb06421886b87385982 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Tue, 7 Jan 2025 14:03:00 -0500 Subject: [PATCH] improve regexp matching --- include/ada/url_pattern-inl.h | 16 ++++++++------ src/url_pattern.cpp | 39 ++++++++++++++++++++-------------- tests/wpt_urlpattern_tests.cpp | 6 +++--- 3 files changed, 35 insertions(+), 26 deletions(-) diff --git a/include/ada/url_pattern-inl.h b/include/ada/url_pattern-inl.h index 020786e7c..02e7e085a 100644 --- a/include/ada/url_pattern-inl.h +++ b/include/ada/url_pattern-inl.h @@ -46,16 +46,18 @@ url_pattern_component::create_component_match_result( // Optimization: Let's reserve the size. result.groups.reserve(exec_result.size() - 1); - // Let index be 1. + // Let index be 0. // While index is less than Get(execResult, "length"): - for (size_t index = 1; index < exec_result.size(); index++) { - // Let name be component’s group name list[index − 1]. + for (size_t index = 0; index < exec_result.size() - 1; index++) { + // Let name be component’s group name list[index]. // Let value be Get(execResult, ToString(index)). // Set groups[name] to value. - result.groups.insert({ - group_name_list[index - 1], - exec_result[index].str(), - }); + if (auto str = exec_result[index].str(); !str.empty()) { + result.groups.insert({ + group_name_list[index], + str, + }); + } } return result; } diff --git a/src/url_pattern.cpp b/src/url_pattern.cpp index 9e1003118..d4ae3aad1 100644 --- a/src/url_pattern.cpp +++ b/src/url_pattern.cpp @@ -681,53 +681,60 @@ result> url_pattern::match( } } + auto regex_flags = std::regex_constants::match_any; + // Let protocolExecResult be RegExpBuiltinExec(urlPattern’s protocol // component's regular expression, protocol). std::smatch protocol_exec_result_value; - auto protocol_exec_result = std::regex_match( - protocol, protocol_exec_result_value, protocol_component.regexp); + auto protocol_exec_result = + std::regex_match(protocol, protocol_exec_result_value, + protocol_component.regexp, regex_flags); // Let usernameExecResult be RegExpBuiltinExec(urlPattern’s username // component's regular expression, username). std::smatch username_exec_result_value; - auto username_exec_result = std::regex_match( - username, username_exec_result_value, username_component.regexp); + auto username_exec_result = + std::regex_match(username, username_exec_result_value, + username_component.regexp, regex_flags); // Let passwordExecResult be RegExpBuiltinExec(urlPattern’s password // component's regular expression, password). std::smatch password_exec_result_value; - auto password_exec_result = std::regex_match( - password, password_exec_result_value, password_component.regexp); + auto password_exec_result = + std::regex_match(password, password_exec_result_value, + password_component.regexp, regex_flags); // Let hostnameExecResult be RegExpBuiltinExec(urlPattern’s hostname // component's regular expression, hostname). std::smatch hostname_exec_result_value; - auto hostname_exec_result = std::regex_match( - hostname, hostname_exec_result_value, hostname_component.regexp); + auto hostname_exec_result = + std::regex_match(hostname, hostname_exec_result_value, + hostname_component.regexp, regex_flags); // Let portExecResult be RegExpBuiltinExec(urlPattern’s port component's // regular expression, port). std::smatch port_exec_result_value; - auto port_exec_result = - std::regex_match(port, port_exec_result_value, port_component.regexp); + auto port_exec_result = std::regex_match(port, port_exec_result_value, + port_component.regexp, regex_flags); // Let pathnameExecResult be RegExpBuiltinExec(urlPattern’s pathname // component's regular expression, pathname). std::smatch pathname_exec_result_value; - auto pathname_exec_result = std::regex_match( - pathname, pathname_exec_result_value, pathname_component.regexp); + auto pathname_exec_result = + std::regex_match(pathname, pathname_exec_result_value, + pathname_component.regexp, regex_flags); // Let searchExecResult be RegExpBuiltinExec(urlPattern’s search component's // regular expression, search). std::smatch search_exec_result_value; - auto search_exec_result = std::regex_match(search, search_exec_result_value, - search_component.regexp); + auto search_exec_result = std::regex_match( + search, search_exec_result_value, search_component.regexp, regex_flags); // Let hashExecResult be RegExpBuiltinExec(urlPattern’s hash component's // regular expression, hash). std::smatch hash_exec_result_value; - auto hash_exec_result = - std::regex_match(hash, hash_exec_result_value, hash_component.regexp); + auto hash_exec_result = std::regex_match(hash, hash_exec_result_value, + hash_component.regexp, regex_flags); // If protocolExecResult, usernameExecResult, passwordExecResult, // hostnameExecResult, portExecResult, pathnameExecResult, searchExecResult, diff --git a/tests/wpt_urlpattern_tests.cpp b/tests/wpt_urlpattern_tests.cpp index e005c4f7b..0a5a33d90 100644 --- a/tests/wpt_urlpattern_tests.cpp +++ b/tests/wpt_urlpattern_tests.cpp @@ -304,11 +304,11 @@ ada::url_pattern_component_result parse_component_result( ondemand::object groups; EXPECT_FALSE(element.value().get_object().get(groups)); for (auto group : groups) { - std::string_view group_key(group.key().value().raw()); + auto group_key = group.escaped_key().value(); std::string_view group_value; - EXPECT_FALSE(group.value().get(group_value)); + EXPECT_FALSE(group.value().get_string(group_value)); result.groups.insert_or_assign(std::string(group_key), - std::string(group_value)); + group_value); } } }