Skip to content

Commit

Permalink
url-scm: store downloaded files in .bob-download directory
Browse files Browse the repository at this point in the history
Separate the downloaded files from the extracted files by downloading
them into a special `.bob-download` directory. Also the canary is
generated there. The Gzip and XZ-Extractor always extract the files into
the directory of the compressed file. Therefor the compressed files is
copied into the workspace-directory first. By removing `-k`the
compressed files are no longer kept.

To trigger a attic move of old workspaces a version information is added
to the url-scm spec.
  • Loading branch information
rhubert committed Jan 1, 2025
1 parent 8628f33 commit cde1c93
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 21 deletions.
51 changes: 38 additions & 13 deletions pym/bob/scm/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ def __init__(self, dir, file, strip):
self.strip = strip

async def _extract(self, cmds, invoker):
destination = invoker.joinPath(self.dir, self.file)
canary = invoker.joinPath(self.dir, "." + self.file + ".extracted")
destination = invoker.joinPath(self.dir, ".bob-download", self.file)
canary = invoker.joinPath(self.dir, ".bob-download", self.file + ".extracted")
if isYounger(destination, canary):
for cmd in cmds:
if shutil.which(cmd[0]) is None: continue
Expand All @@ -192,10 +192,11 @@ def __init__(self, dir, file, strip):
async def extract(self, invoker):
cmds = []
if isWin32 and self.strip == 0:
cmds.append(["python", "-m", "tarfile", "-e", self.file])
cmds.append(["python", "-m", "tarfile", "-e",
os.path.join(".bob-download", self.file)])

cmd = ["tar", "-x", "--no-same-owner", "--no-same-permissions",
"-f", self.file]
"-f", os.path.join(".bob-download", self.file)]
if self.strip > 0:
cmd.append("--strip-components={}".format(self.strip))
cmds.append(cmd)
Expand All @@ -212,9 +213,10 @@ def __init__(self, dir, file, strip):
async def extract(self, invoker):
cmds = []
if isWin32:
cmds.append(["python", "-m", "zipfile", "-e", self.file, "."])
cmds.append(["python", "-m", "zipfile",
"-e", os.path.join(".bob-download", self.file), "."])

cmds.append(["unzip", "-o", self.file])
cmds.append(["unzip", "-o", os.path.join(".bob-download", self.file)])
await super()._extract(cmds, invoker)


Expand All @@ -225,27 +227,35 @@ def __init__(self, dir, file, strip):
raise BuildError("Extractor does not support 'stripComponents'!")

async def extract(self, invoker):
cmds = [["gunzip", "-kf", self.file]]
# gunzip extracts the file at the location of the input file. Copy the
# downloaded file to the workspace directory prio to uncompressing it
shutil.copyfile(invoker.joinPath(self.dir, ".bob-download", self.file),
invoker.joinPath(self.dir, self.file))
cmds = [["gunzip", "-f", self.file]]
await super()._extract(cmds, invoker)


class XZExtractor(Extractor):
def __init__(self, dir, file, strip):
super().__init__(dir, file, strip)
if strip != 0:
raise BuildError("Extractor does not support 'stripComponents'!")

async def extract(self, invoker):
cmds = [["unxz", "-kf", self.file]]
shutil.copyfile(invoker.joinPath(self.dir, ".bob-download", self.file),
invoker.joinPath(self.dir, self.file))
cmds = [["unxz", "-f", self.file]]
await super()._extract(cmds, invoker)


class SevenZipExtractor(Extractor):
def __init__(self, dir, file, strip):
super().__init__(dir, file, strip)
if strip != 0:
raise BuildError("Extractor does not support 'stripComponents'!")

async def extract(self, invoker):
cmds = [["7z", "x", "-y", self.file]]
cmds = [["7z", "x", "-y", os.path.join(".bob-download", self.file)]]
await super()._extract(cmds, invoker)


Expand Down Expand Up @@ -314,6 +324,8 @@ class UrlScm(Scm):
"zip" : ZipExtractor,
}

VERSION = 1

def __init__(self, spec, overrides=[], stripUser=None,
preMirrors=[], fallbackMirrors=[], defaultFileMode=None):
super().__init__(spec, overrides)
Expand Down Expand Up @@ -354,6 +366,8 @@ def __init__(self, spec, overrides=[], stripUser=None,
self.__fallbackMirrorsUrls = spec.get("fallbackMirrors")
self.__fallbackMirrorsUpload = spec.get("__fallbackMirrorsUpload")
self.__fileMode = spec.get("fileMode", 0o600 if defaultFileMode else None)
self.__version = { "valid" : True if spec.get("__version") is not None else False,
"version" : spec.get("__version", UrlScm.VERSION) }

def getProperties(self, isJenkins, pretty=False):
ret = super().getProperties(isJenkins)
Expand All @@ -374,6 +388,7 @@ def getProperties(self, isJenkins, pretty=False):
'fallbackMirrors' : self.__getFallbackMirrorsUrls(),
'__fallbackMirrorsUpload' : self.__getFallbackMirrorsUpload(),
'fileMode' : dumpMode(self.__fileMode) if pretty else self.__fileMode,
'__version' : self.__version,
})
return ret

Expand Down Expand Up @@ -423,7 +438,7 @@ def _download(self, url, destination, mode):
headers["User-Agent"] = "BobBuildTool/{}".format(BOB_VERSION)
context = None if self.__sslVerify else sslNoVerifyContext()
if os.path.isfile(destination) and (url.scheme in ["http", "https"]):
# Try to avoid download if possible
# Try) to avoid download if possible
headers["If-Modified-Since"] = time2HTTPDate(os.stat(destination).st_mtime)

tmpFileName = None
Expand Down Expand Up @@ -596,6 +611,9 @@ async def _put(self, invoker, workspaceFile, source, url):
invoker.fail("Upload not supported for URL scheme: " + url.scheme)

def canSwitch(self, oldScm):
if not self.__version.get("valid"):
return False

diff = self._diffSpec(oldScm)
if "scm" in diff:
return False
Expand Down Expand Up @@ -630,7 +648,12 @@ async def switch(self, invoker, oldScm):
async def invoke(self, invoker):
os.makedirs(invoker.joinPath(self.__dir), exist_ok=True)
workspaceFile = os.path.join(self.__dir, self.__fn)
extractor = self.__getExtractor()

destination = invoker.joinPath(self.__dir, self.__fn)
if extractor is not None:
os.makedirs(invoker.joinPath(self.__dir, ".bob-download"), exist_ok=True)
destination = invoker.joinPath(self.__dir, ".bob-download", self.__fn)

# Download only if necessary
if not self.isDeterministic() or not os.path.isfile(destination):
Expand Down Expand Up @@ -679,7 +702,6 @@ async def invoke(self, invoker):
await self._put(invoker, workspaceFile, destination, url)

# Run optional extractors
extractor = self.__getExtractor()
if extractor is not None:
await extractor.extract(invoker)

Expand All @@ -688,7 +710,9 @@ def asDigestScript(self):
The format is "digest dir extract" if a SHA checksum was specified.
Otherwise it is "url dir extract". A "s#" is appended if leading paths
are stripped where # is the number of stripped elements.
are stripped where # is the number of stripped elements. Also appended
is "m<fileMode>" if fileMode is set. "v<versionNumber>" tracks the urlscm
directory layout used.
"""
if self.__stripUser:
filt = removeUserFromUrl
Expand All @@ -698,7 +722,8 @@ def asDigestScript(self):
self.__digestSha1 or filt(self.__url)
) + " " + posixpath.join(self.__dir, self.__fn) + " " + str(self.__extract) + \
( " s{}".format(self.__strip) if self.__strip > 0 else "" ) + \
( " m{}".format(self.__fileMode) if self.__fileMode is not None else "")
( " m{}".format(self.__fileMode) if self.__fileMode is not None else "") + \
" v{}".format(UrlScm.VERSION)

def getDirectory(self):
return self.__dir
Expand Down
2 changes: 1 addition & 1 deletion test/black-box/extractors/recipes/extract_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ buildScript: |
6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b test.dat
EOF
SHA256_FILE=$(pwd)/test.sha256
for d in $(find $1/ -mindepth 1 -type d); do
for d in $(find $1/ -mindepth 1 -not -name ".bob-download" -type d); do
pushd $d
sha256sum -c ${SHA256_FILE}
popd
Expand Down
15 changes: 15 additions & 0 deletions test/black-box/extractors/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,18 @@ fi
# Build and fetch result path
run_bob dev -DINPUT_FILES="${INPUT}" -DIS_POSIX="$IS_POSIX" extract_test

check_files() {
expect_not_exist dev/src/extract_test/1/workspace/$1/test.${2:-$1}.extracted
expect_not_exist dev/src/extract_test/1/workspace/$1/test.${2:-$1}
expect_exist dev/src/extract_test/1/workspace/$1/.bob-download/test.${2:-$1}.extracted
expect_exist dev/src/extract_test/1/workspace/$1/.bob-download/test.${2:-$1}
}

check_files "tar" "tgz"
check_files "zip"

if is_posix; then
check_files "gzip" "dat.gz"
check_files "xz" "dat.xz"
check_files "7z"
fi
12 changes: 6 additions & 6 deletions test/black-box/stable-variant-ids/specs/checkouts.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
root-checkouts dist d804f0667f9e2fb87404cdd7c4fe2344fb4f0edd
root-checkouts dist 6358cf7ffb1d25ffa79494293761bc98e88611a7
digestScript: 9718449ab965b52c8c40ce526f7b2c86b6d25b22
args:
4b130acf8326e94c035bd0d57dcceaa217aa6df4
root-checkouts build 4b130acf8326e94c035bd0d57dcceaa217aa6df4
d64294f089e5c6b698a3efeec913ffe5b7d56170
root-checkouts build d64294f089e5c6b698a3efeec913ffe5b7d56170
digestScript: 9718449ab965b52c8c40ce526f7b2c86b6d25b22
args:
b28343a344644eb72b59144b1ff4a3b4b0e04140
root-checkouts src b28343a344644eb72b59144b1ff4a3b4b0e04140
digestScript: 6a72ba2e852fa28c7fa5e36067665cbd9b8a3bf2
7ba4c1bff1d403c032924ceb62a87c2baf74c03d
root-checkouts src 7ba4c1bff1d403c032924ceb62a87c2baf74c03d
digestScript: a24b2b1ba87cb382869730f308151da07b25e1b4
4 changes: 3 additions & 1 deletion test/unit/test_input_urlscm.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def createUrlScm(self, spec = {}, preMirrors=[], fallbackMirrors=[],
'url' : self.url,
'recipe' : "foo.yaml#0",
'__source' : "Recipe foo",
'__version' : UrlScm.VERSION,
}
s.update(spec)
return UrlScm(s, preMirrors=preMirrors, fallbackMirrors=fallbackMirrors,
Expand Down Expand Up @@ -862,7 +863,8 @@ def testSingleFile(self):
with tempfile.TemporaryDirectory() as workspace:
scm = self.createUrlScm()
self.invokeScm(workspace, scm)
self.assertTrue(os.path.exists(os.path.join(workspace, "test.txt." + ext)))
self.assertTrue(os.path.exists(os.path.join(workspace, ".bob-download",
"test.txt." + ext)))
self.assertFileMd5(os.path.join(workspace, "test.txt"),
"d3b07384d113edec49eaa6238ad5ff00")

Expand Down

0 comments on commit cde1c93

Please sign in to comment.