Skip to content

Commit

Permalink
ICU-22979 Support inverse rule for [] span in RBNF
Browse files Browse the repository at this point in the history
  • Loading branch information
grhoten committed Jan 7, 2025
1 parent a8d9f47 commit a8e7728
Show file tree
Hide file tree
Showing 11 changed files with 706 additions and 483 deletions.
8 changes: 4 additions & 4 deletions icu4c/source/i18n/nfrs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions,

UnicodeString& description = descriptions[index]; // !!! make sure index is valid

if (description.length() == 0) {
if (description.isEmpty()) {
// throw new IllegalArgumentException("Empty rule set description");
status = U_PARSE_ERROR;
return;
Expand All @@ -177,16 +177,16 @@ NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions,
name.setTo(UNICODE_STRING_SIMPLE("%default"));
}

if (description.length() == 0) {
if (description.isEmpty()) {
// throw new IllegalArgumentException("Empty rule set description");
status = U_PARSE_ERROR;
}

fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;

if ( name.endsWith(gNoparse,8) ) {
if (name.endsWith(gNoparse, 8)) {
fIsParseable = false;
name.truncate(name.length()-8); // remove the @noparse from the name
name.truncate(name.length() - 8); // remove the @noparse from the name
}

// all of the other members of NFRuleSet are initialized
Expand Down
19 changes: 15 additions & 4 deletions icu4c/source/i18n/nfrule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ NFRule::~NFRule()

static const char16_t gLeftBracket = 0x005b;
static const char16_t gRightBracket = 0x005d;
static const char16_t gVerticalLine = 0x007C;
static const char16_t gColon = 0x003a;
static const char16_t gZero = 0x0030;
static const char16_t gNine = 0x0039;
Expand Down Expand Up @@ -146,6 +147,7 @@ NFRule::makeRules(UnicodeString& description,
// then it's really shorthand for two rules (with one exception)
LocalPointer<NFRule> rule2;
UnicodeString sbuf;
int32_t orElseOp = description.indexOf(gVerticalLine);

// we'll actually only split the rule into two rules if its
// base value is an even multiple of its divisor (or it's one
Expand Down Expand Up @@ -193,9 +195,13 @@ NFRule::makeRules(UnicodeString& description,
rule2->radix = rule1->radix;
rule2->exponent = rule1->exponent;

// rule2's rule text omits the stuff in brackets: initialize
// its rule text and substitutions accordingly
// By default, rule2's rule text omits the stuff in brackets,
// unless it contains a | between the brackets.
// Initialize its rule text and substitutions accordingly.
sbuf.append(description, 0, brack1);
if (orElseOp >= 0) {
sbuf.append(description, orElseOp + 1, brack2 - orElseOp - 1);
}
if (brack2 + 1 < description.length()) {
sbuf.append(description, brack2 + 1, description.length() - brack2 - 1);
}
Expand All @@ -206,7 +212,12 @@ NFRule::makeRules(UnicodeString& description,
// the brackets themselves: initialize _its_ rule text and
// substitutions accordingly
sbuf.setTo(description, 0, brack1);
sbuf.append(description, brack1 + 1, brack2 - brack1 - 1);
if (orElseOp >= 0) {
sbuf.append(description, brack1 + 1, orElseOp - brack1 - 1);
}
else {
sbuf.append(description, brack1 + 1, brack2 - brack1 - 1);
}
if (brack2 + 1 < description.length()) {
sbuf.append(description, brack2 + 1, description.length() - brack2 - 1);
}
Expand Down Expand Up @@ -404,7 +415,7 @@ NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status)
// finally, if the rule body begins with an apostrophe, strip it off
// (this is generally used to put whitespace at the beginning of
// a rule's rule text)
if (description.length() > 0 && description.charAt(0) == gTick) {
if (!description.isEmpty() && description.charAt(0) == gTick) {
description.removeBetween(0, 1);
}

Expand Down
27 changes: 13 additions & 14 deletions icu4c/source/i18n/rbnf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1568,12 +1568,12 @@ RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* locali

// divide up the descriptions into individual rule-set descriptions
// and store them in a temporary array. At each step, we also
// new up a rule set, but all this does is initialize its name
// create a rule set, but all this does is initialize its name
// and remove it from its description. We can't actually parse
// the rest of the descriptions and finish initializing everything
// because we have to know the names and locations of all the rule
// sets before we can actually set everything up
if(!numRuleSets) {
if (!numRuleSets) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
Expand Down Expand Up @@ -1616,9 +1616,9 @@ RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* locali
// last public rule set, no matter what the localization data says.
initDefaultRuleSet();

// finally, we can go back through the temporary descriptions
// list and finish setting up the substructure (and we throw
// away the temporary descriptions as we go)
// Now that we know all the rule names, we can go back through
// the temporary descriptions list and finish setting up the substructure
// (and we throw away the temporary descriptions as we go)
{
for (int i = 0; i < numRuleSets; i++) {
fRuleSets[i]->parseRules(ruleSetDescriptions[i], status);
Expand Down Expand Up @@ -1706,10 +1706,13 @@ RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
UnicodeString result;

int start = 0;
while (start != -1 && start < description.length()) {
// seek to the first non-whitespace character...
UChar ch;
while (start < description.length()) {
// Seek to the first non-whitespace character...
// If the first non-whitespace character is semicolon, skip it and continue
while (start < description.length()
&& PatternProps::isWhiteSpace(description.charAt(start))) {
&& (PatternProps::isWhiteSpace(ch = description.charAt(start)) || ch == gSemiColon))
{
++start;
}

Expand All @@ -1720,20 +1723,16 @@ RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
// or if we don't find a semicolon, just copy the rest of
// the string into the result
result.append(description, start, description.length() - start);
start = -1;
break;
}
else if (p < description.length()) {
result.append(description, start, p + 1 - start);
start = p + 1;
}

// when we get here, we've seeked off the end of the string, and
// when we get here from the else, we've seeked off the end of the string, and
// we terminate the loop (we continue until *start* is -1 rather
// than until *p* is -1, because otherwise we'd miss the last
// rule in the description)
else {
start = -1;
}
}

description.setTo(result);
Expand Down
Loading

0 comments on commit a8e7728

Please sign in to comment.