From 657e29116e004fbff9565c0ea68dcbda842db1ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kl=C3=B6tzke?= Date: Tue, 3 Sep 2024 12:00:02 +0200 Subject: [PATCH 1/4] stringparser: optimize object size --- pym/bob/stringparser.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pym/bob/stringparser.py b/pym/bob/stringparser.py index defe9ef13..5e9dae06d 100644 --- a/pym/bob/stringparser.py +++ b/pym/bob/stringparser.py @@ -33,6 +33,8 @@ def isTrue(val): class StringParser: """Utility class for complex string parsing/manipulation""" + __slots__ = ('env', 'funs', 'funArgs', 'nounset', 'text', 'index', 'end') + def __init__(self, env, funs, funArgs, nounset): self.env = env self.funs = funs From 290e40c600c1d4e179e6fd1d571c34a2488ccbf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kl=C3=B6tzke?= Date: Tue, 3 Sep 2024 13:08:30 +0200 Subject: [PATCH 2/4] stringparser: document some functions --- pym/bob/stringparser.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pym/bob/stringparser.py b/pym/bob/stringparser.py index 5e9dae06d..f14cb01ac 100644 --- a/pym/bob/stringparser.py +++ b/pym/bob/stringparser.py @@ -88,6 +88,7 @@ def nextToken(self, extra=None): return "".join(tok) def getRestOfName(self): + """Get remainder of bare variable name""" ret = '' i = self.index while i < self.end: @@ -100,6 +101,7 @@ def getRestOfName(self): return ret def getSingleQuoted(self): + """Get remainder of single quoted string.""" i = self.index while i < self.end: if self.text[i] == "'": @@ -112,6 +114,15 @@ def getSingleQuoted(self): return ret def getString(self, delim=[None], keep=False): + """Interpret as string from current parsing position. + + Do any necessary substitutions until either the string ends or hits one + of the additional delimiters. + + :param delim: Additional delimiter characters where parsing should stop + :param keep: Keep the additional delimiter if hit. By default the + delimier is swallowed. + """ s = [] tok = self.nextToken(delim) while tok not in delim: @@ -141,6 +152,7 @@ def getString(self, delim=[None], keep=False): return "".join(s) def getVariable(self): + """Substitute variable at current position.""" # get variable name varName = self.getString([':', '-', '+', '}'], True) @@ -175,6 +187,10 @@ def getVariable(self): raise ParseError("Unterminated variable: " + str(op)) def getBareVariable(self, varName): + """Substitute base variable at current position. + + :param varName: Initial character of variable name + """ varName += self.getRestOfName() varValue = self.env.get(varName) if varValue is None: @@ -185,6 +201,7 @@ def getBareVariable(self, varName): return varValue def getCommand(self): + """Substitute string function at current position.""" words = [] delim = [",", ")"] while True: From 675fb6cb1aa83eb11ac13ff0520fa05702f345fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kl=C3=B6tzke?= Date: Tue, 3 Sep 2024 13:09:13 +0200 Subject: [PATCH 3/4] stringparser: skip over unused substitution parts Variable substitutions can have a default value ("${VAR-default}") or an alternate value ("${VAR+alternate}"). The "default" and "alternate" parts can itself be comprised of variable/function substitutions. Now, if the default/alternate part is unused, it should be skipped altogether. So far we always substituted them, even if they were unused. This is undesired because it causes constructs like ${VAR:+${VAR}} to fail which contradicts POSIX shell substitution behaviour which we try to follow. Rectify this by suppressing substitution in unused parts and just skip them. --- pym/bob/stringparser.py | 44 ++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/pym/bob/stringparser.py b/pym/bob/stringparser.py index f14cb01ac..c6858b088 100644 --- a/pym/bob/stringparser.py +++ b/pym/bob/stringparser.py @@ -113,7 +113,7 @@ def getSingleQuoted(self): self.index = i+1 return ret - def getString(self, delim=[None], keep=False): + def getString(self, delim=[None], keep=False, subst=True): """Interpret as string from current parsing position. Do any necessary substitutions until either the string ends or hits one @@ -122,22 +122,24 @@ def getString(self, delim=[None], keep=False): :param delim: Additional delimiter characters where parsing should stop :param keep: Keep the additional delimiter if hit. By default the delimier is swallowed. + :param subst: Do variable or command substitution. If false, skip over + such substitutions. """ s = [] tok = self.nextToken(delim) while tok not in delim: if tok == '"': - s.append(self.getString(['"'])) + s.append(self.getString(['"'], False, subst)) elif tok == '\'': s.append(self.getSingleQuoted()) elif tok == '$': tok = self.nextChar() if tok == '{': - s.append(self.getVariable()) + s.append(self.getVariable(subst)) elif tok == '(': - s.append(self.getCommand()) + s.append(self.getCommand(subst)) elif tok in NAME_START: - s.append(self.getBareVariable(tok)) + s.append(self.getBareVariable(tok, subst)) else: raise ParseError("Invalid $-subsitituion") elif tok == None: @@ -151,10 +153,13 @@ def getString(self, delim=[None], keep=False): if keep: self.index -= 1 return "".join(s) - def getVariable(self): - """Substitute variable at current position.""" + def getVariable(self, subst): + """Substitute variable at current position. + + :param subst: Bail out if substitution fails? + """ # get variable name - varName = self.getString([':', '-', '+', '}'], True) + varName = self.getString([':', '-', '+', '}'], True, subst) # process? op = self.nextChar() @@ -165,20 +170,20 @@ def getVariable(self): op = self.nextChar() if op == '-': - default = self.getString(['}']) + default = self.getString(['}'], False, subst and unset) if unset: return default else: return self.env[varName] elif op == '+': - alternate = self.getString(['}']) + alternate = self.getString(['}'], False, subst and not unset) if unset: return "" else: return alternate elif op == '}': if varName not in self.env: - if self.nounset: + if subst and self.nounset: raise ParseError("Unset variable: " + varName) else: return "" @@ -186,30 +191,37 @@ def getVariable(self): else: raise ParseError("Unterminated variable: " + str(op)) - def getBareVariable(self, varName): + def getBareVariable(self, varName, subst): """Substitute base variable at current position. :param varName: Initial character of variable name + :param subst: Bail out if substitution fails? """ varName += self.getRestOfName() varValue = self.env.get(varName) if varValue is None: - if self.nounset: + if subst and self.nounset: raise ParseError("Unset variable: " + varName) return "" else: return varValue - def getCommand(self): - """Substitute string function at current position.""" + def getCommand(self, subst): + """Substitute string function at current position. + + :param subst: Actually call function or just skip? + """ words = [] delim = [",", ")"] while True: - word = self.getString(delim, True) + word = self.getString(delim, True, subst) words.append(word) end = self.nextChar() if end == ")": break + if not subst: + return "" + if len(words) < 1: raise ParseError("Expected function name") cmd = words[0] From 363df79e1ec3e676f8fdd3b7fbb02164e33266e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kl=C3=B6tzke?= Date: Tue, 3 Sep 2024 13:19:03 +0200 Subject: [PATCH 4/4] test: add string parser tests for unused substitutions --- test/unit/test_input_stringparser.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/test/unit/test_input_stringparser.py b/test/unit/test_input_stringparser.py index acf6d5e98..3bdce440f 100644 --- a/test/unit/test_input_stringparser.py +++ b/test/unit/test_input_stringparser.py @@ -119,6 +119,26 @@ def testFails(self): self.assertRaises(ParseError, u.parse, "$1") self.assertRaises(ParseError, u.parse, "$%%") + def testSkipUnused(self): + """Unused branches must not be substituted. + + Syntax error must still be detected, though. + """ + + self.assertEqual(self.p.parse("${asdf:-$unset}"), "qwer") + self.assertEqual(self.p.parse("${asdf:-${unset}}"), "qwer") + self.assertEqual(self.p.parse("${asdf:-${${double-unset}}}"), "qwer") + self.assertEqual(self.p.parse("${asdf:-$(unknown)}"), "qwer") + self.assertEqual(self.p.parse("${asdf:-$($fn,$unset)}"), "qwer") + self.assertRaises(ParseError, self.p.parse, "${asdf:-$($fn}") + + self.assertEqual(self.p.parse("${unset:+$unset}"), "") + self.assertEqual(self.p.parse("${unset:+${unset}}"), "") + self.assertEqual(self.p.parse("${unset:+${${double-unset}}}"), "") + self.assertEqual(self.p.parse("${unset:+$(unknown)}"), "") + self.assertEqual(self.p.parse("${unset:+$($fn,$unset)}"), "") + self.assertRaises(ParseError, self.p.parse, "${unset:+${${double-unset}}") + class TestStringFunctions(TestCase): def testEqual(self):