From cde1c9355c4e619779ddd969a3c50e83a78eeadf Mon Sep 17 00:00:00 2001 From: Ralf Hubert Date: Wed, 1 Jan 2025 10:14:50 +0100 Subject: [PATCH] url-scm: store downloaded files in .bob-download directory Separate the downloaded files from the extracted files by downloading them into a special `.bob-download` directory. Also the canary is generated there. The Gzip and XZ-Extractor always extract the files into the directory of the compressed file. Therefor the compressed files is copied into the workspace-directory first. By removing `-k`the compressed files are no longer kept. To trigger a attic move of old workspaces a version information is added to the url-scm spec. --- pym/bob/scm/url.py | 51 ++++++++++++++----- .../extractors/recipes/extract_test.yaml | 2 +- test/black-box/extractors/run.sh | 15 ++++++ .../stable-variant-ids/specs/checkouts.txt | 12 ++--- test/unit/test_input_urlscm.py | 4 +- 5 files changed, 63 insertions(+), 21 deletions(-) diff --git a/pym/bob/scm/url.py b/pym/bob/scm/url.py index 54bf8f0f7..67634c297 100644 --- a/pym/bob/scm/url.py +++ b/pym/bob/scm/url.py @@ -164,8 +164,8 @@ def __init__(self, dir, file, strip): self.strip = strip async def _extract(self, cmds, invoker): - destination = invoker.joinPath(self.dir, self.file) - canary = invoker.joinPath(self.dir, "." + self.file + ".extracted") + destination = invoker.joinPath(self.dir, ".bob-download", self.file) + canary = invoker.joinPath(self.dir, ".bob-download", self.file + ".extracted") if isYounger(destination, canary): for cmd in cmds: if shutil.which(cmd[0]) is None: continue @@ -192,10 +192,11 @@ def __init__(self, dir, file, strip): async def extract(self, invoker): cmds = [] if isWin32 and self.strip == 0: - cmds.append(["python", "-m", "tarfile", "-e", self.file]) + cmds.append(["python", "-m", "tarfile", "-e", + os.path.join(".bob-download", self.file)]) cmd = ["tar", "-x", "--no-same-owner", "--no-same-permissions", - "-f", self.file] + "-f", os.path.join(".bob-download", self.file)] if self.strip > 0: cmd.append("--strip-components={}".format(self.strip)) cmds.append(cmd) @@ -212,9 +213,10 @@ def __init__(self, dir, file, strip): async def extract(self, invoker): cmds = [] if isWin32: - cmds.append(["python", "-m", "zipfile", "-e", self.file, "."]) + cmds.append(["python", "-m", "zipfile", + "-e", os.path.join(".bob-download", self.file), "."]) - cmds.append(["unzip", "-o", self.file]) + cmds.append(["unzip", "-o", os.path.join(".bob-download", self.file)]) await super()._extract(cmds, invoker) @@ -225,9 +227,14 @@ def __init__(self, dir, file, strip): raise BuildError("Extractor does not support 'stripComponents'!") async def extract(self, invoker): - cmds = [["gunzip", "-kf", self.file]] + # gunzip extracts the file at the location of the input file. Copy the + # downloaded file to the workspace directory prio to uncompressing it + shutil.copyfile(invoker.joinPath(self.dir, ".bob-download", self.file), + invoker.joinPath(self.dir, self.file)) + cmds = [["gunzip", "-f", self.file]] await super()._extract(cmds, invoker) + class XZExtractor(Extractor): def __init__(self, dir, file, strip): super().__init__(dir, file, strip) @@ -235,9 +242,12 @@ def __init__(self, dir, file, strip): raise BuildError("Extractor does not support 'stripComponents'!") async def extract(self, invoker): - cmds = [["unxz", "-kf", self.file]] + shutil.copyfile(invoker.joinPath(self.dir, ".bob-download", self.file), + invoker.joinPath(self.dir, self.file)) + cmds = [["unxz", "-f", self.file]] await super()._extract(cmds, invoker) + class SevenZipExtractor(Extractor): def __init__(self, dir, file, strip): super().__init__(dir, file, strip) @@ -245,7 +255,7 @@ def __init__(self, dir, file, strip): raise BuildError("Extractor does not support 'stripComponents'!") async def extract(self, invoker): - cmds = [["7z", "x", "-y", self.file]] + cmds = [["7z", "x", "-y", os.path.join(".bob-download", self.file)]] await super()._extract(cmds, invoker) @@ -314,6 +324,8 @@ class UrlScm(Scm): "zip" : ZipExtractor, } + VERSION = 1 + def __init__(self, spec, overrides=[], stripUser=None, preMirrors=[], fallbackMirrors=[], defaultFileMode=None): super().__init__(spec, overrides) @@ -354,6 +366,8 @@ def __init__(self, spec, overrides=[], stripUser=None, self.__fallbackMirrorsUrls = spec.get("fallbackMirrors") self.__fallbackMirrorsUpload = spec.get("__fallbackMirrorsUpload") self.__fileMode = spec.get("fileMode", 0o600 if defaultFileMode else None) + self.__version = { "valid" : True if spec.get("__version") is not None else False, + "version" : spec.get("__version", UrlScm.VERSION) } def getProperties(self, isJenkins, pretty=False): ret = super().getProperties(isJenkins) @@ -374,6 +388,7 @@ def getProperties(self, isJenkins, pretty=False): 'fallbackMirrors' : self.__getFallbackMirrorsUrls(), '__fallbackMirrorsUpload' : self.__getFallbackMirrorsUpload(), 'fileMode' : dumpMode(self.__fileMode) if pretty else self.__fileMode, + '__version' : self.__version, }) return ret @@ -423,7 +438,7 @@ def _download(self, url, destination, mode): headers["User-Agent"] = "BobBuildTool/{}".format(BOB_VERSION) context = None if self.__sslVerify else sslNoVerifyContext() if os.path.isfile(destination) and (url.scheme in ["http", "https"]): - # Try to avoid download if possible + # Try) to avoid download if possible headers["If-Modified-Since"] = time2HTTPDate(os.stat(destination).st_mtime) tmpFileName = None @@ -596,6 +611,9 @@ async def _put(self, invoker, workspaceFile, source, url): invoker.fail("Upload not supported for URL scheme: " + url.scheme) def canSwitch(self, oldScm): + if not self.__version.get("valid"): + return False + diff = self._diffSpec(oldScm) if "scm" in diff: return False @@ -630,7 +648,12 @@ async def switch(self, invoker, oldScm): async def invoke(self, invoker): os.makedirs(invoker.joinPath(self.__dir), exist_ok=True) workspaceFile = os.path.join(self.__dir, self.__fn) + extractor = self.__getExtractor() + destination = invoker.joinPath(self.__dir, self.__fn) + if extractor is not None: + os.makedirs(invoker.joinPath(self.__dir, ".bob-download"), exist_ok=True) + destination = invoker.joinPath(self.__dir, ".bob-download", self.__fn) # Download only if necessary if not self.isDeterministic() or not os.path.isfile(destination): @@ -679,7 +702,6 @@ async def invoke(self, invoker): await self._put(invoker, workspaceFile, destination, url) # Run optional extractors - extractor = self.__getExtractor() if extractor is not None: await extractor.extract(invoker) @@ -688,7 +710,9 @@ def asDigestScript(self): The format is "digest dir extract" if a SHA checksum was specified. Otherwise it is "url dir extract". A "s#" is appended if leading paths - are stripped where # is the number of stripped elements. + are stripped where # is the number of stripped elements. Also appended + is "m" if fileMode is set. "v" tracks the urlscm + directory layout used. """ if self.__stripUser: filt = removeUserFromUrl @@ -698,7 +722,8 @@ def asDigestScript(self): self.__digestSha1 or filt(self.__url) ) + " " + posixpath.join(self.__dir, self.__fn) + " " + str(self.__extract) + \ ( " s{}".format(self.__strip) if self.__strip > 0 else "" ) + \ - ( " m{}".format(self.__fileMode) if self.__fileMode is not None else "") + ( " m{}".format(self.__fileMode) if self.__fileMode is not None else "") + \ + " v{}".format(UrlScm.VERSION) def getDirectory(self): return self.__dir diff --git a/test/black-box/extractors/recipes/extract_test.yaml b/test/black-box/extractors/recipes/extract_test.yaml index 9b8b28fc9..5fc54b671 100644 --- a/test/black-box/extractors/recipes/extract_test.yaml +++ b/test/black-box/extractors/recipes/extract_test.yaml @@ -30,7 +30,7 @@ buildScript: | 6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b test.dat EOF SHA256_FILE=$(pwd)/test.sha256 - for d in $(find $1/ -mindepth 1 -type d); do + for d in $(find $1/ -mindepth 1 -not -name ".bob-download" -type d); do pushd $d sha256sum -c ${SHA256_FILE} popd diff --git a/test/black-box/extractors/run.sh b/test/black-box/extractors/run.sh index 3b03c3397..2e60a71b9 100755 --- a/test/black-box/extractors/run.sh +++ b/test/black-box/extractors/run.sh @@ -15,3 +15,18 @@ fi # Build and fetch result path run_bob dev -DINPUT_FILES="${INPUT}" -DIS_POSIX="$IS_POSIX" extract_test +check_files() { + expect_not_exist dev/src/extract_test/1/workspace/$1/test.${2:-$1}.extracted + expect_not_exist dev/src/extract_test/1/workspace/$1/test.${2:-$1} + expect_exist dev/src/extract_test/1/workspace/$1/.bob-download/test.${2:-$1}.extracted + expect_exist dev/src/extract_test/1/workspace/$1/.bob-download/test.${2:-$1} +} + +check_files "tar" "tgz" +check_files "zip" + +if is_posix; then + check_files "gzip" "dat.gz" + check_files "xz" "dat.xz" + check_files "7z" +fi diff --git a/test/black-box/stable-variant-ids/specs/checkouts.txt b/test/black-box/stable-variant-ids/specs/checkouts.txt index c8070515f..7f8ccd3b9 100644 --- a/test/black-box/stable-variant-ids/specs/checkouts.txt +++ b/test/black-box/stable-variant-ids/specs/checkouts.txt @@ -1,10 +1,10 @@ -root-checkouts dist d804f0667f9e2fb87404cdd7c4fe2344fb4f0edd +root-checkouts dist 6358cf7ffb1d25ffa79494293761bc98e88611a7 digestScript: 9718449ab965b52c8c40ce526f7b2c86b6d25b22 args: - 4b130acf8326e94c035bd0d57dcceaa217aa6df4 -root-checkouts build 4b130acf8326e94c035bd0d57dcceaa217aa6df4 + d64294f089e5c6b698a3efeec913ffe5b7d56170 +root-checkouts build d64294f089e5c6b698a3efeec913ffe5b7d56170 digestScript: 9718449ab965b52c8c40ce526f7b2c86b6d25b22 args: - b28343a344644eb72b59144b1ff4a3b4b0e04140 -root-checkouts src b28343a344644eb72b59144b1ff4a3b4b0e04140 - digestScript: 6a72ba2e852fa28c7fa5e36067665cbd9b8a3bf2 + 7ba4c1bff1d403c032924ceb62a87c2baf74c03d +root-checkouts src 7ba4c1bff1d403c032924ceb62a87c2baf74c03d + digestScript: a24b2b1ba87cb382869730f308151da07b25e1b4 diff --git a/test/unit/test_input_urlscm.py b/test/unit/test_input_urlscm.py index 96d1867c1..c6f3ead4f 100644 --- a/test/unit/test_input_urlscm.py +++ b/test/unit/test_input_urlscm.py @@ -65,6 +65,7 @@ def createUrlScm(self, spec = {}, preMirrors=[], fallbackMirrors=[], 'url' : self.url, 'recipe' : "foo.yaml#0", '__source' : "Recipe foo", + '__version' : UrlScm.VERSION, } s.update(spec) return UrlScm(s, preMirrors=preMirrors, fallbackMirrors=fallbackMirrors, @@ -862,7 +863,8 @@ def testSingleFile(self): with tempfile.TemporaryDirectory() as workspace: scm = self.createUrlScm() self.invokeScm(workspace, scm) - self.assertTrue(os.path.exists(os.path.join(workspace, "test.txt." + ext))) + self.assertTrue(os.path.exists(os.path.join(workspace, ".bob-download", + "test.txt." + ext))) self.assertFileMd5(os.path.join(workspace, "test.txt"), "d3b07384d113edec49eaa6238ad5ff00")