From d4e46fe23152756aebd96f69cc4fc4b87bf5f7a0 Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Mon, 1 Apr 2024 05:11:30 +0900 Subject: [PATCH] Containerfile: new parser Close #3970 Signed-off-by: Masatake YAMATO --- Tmain/list-map-patterns.d/stdout-expected.txt | 6 +- Tmain/list-roles.d/stdout-expected.txt | 2 + Units/parser-containerfile.r/env.d/args.ctags | 3 + .../env.d/expected.tags | 16 + .../env.d/input.container | 18 + .../parser-containerfile.r/multi.d/args.ctags | 4 + .../multi.d/expected.tags | 17 + .../multi.d/input.containerfile | 22 + main/parsers_p.h | 1 + parsers/containerfile.c | 439 ++++++++++++++++++ source.mak | 1 + win32/ctags_vs2013.vcxproj | 1 + win32/ctags_vs2013.vcxproj.filters | 3 + 13 files changed, 530 insertions(+), 3 deletions(-) create mode 100644 Units/parser-containerfile.r/env.d/args.ctags create mode 100644 Units/parser-containerfile.r/env.d/expected.tags create mode 100644 Units/parser-containerfile.r/env.d/input.container create mode 100644 Units/parser-containerfile.r/multi.d/args.ctags create mode 100644 Units/parser-containerfile.r/multi.d/expected.tags create mode 100644 Units/parser-containerfile.r/multi.d/input.containerfile create mode 100644 parsers/containerfile.c diff --git a/Tmain/list-map-patterns.d/stdout-expected.txt b/Tmain/list-map-patterns.d/stdout-expected.txt index 3de89a3cf1..fd84a07bb4 100644 --- a/Tmain/list-map-patterns.d/stdout-expected.txt +++ b/Tmain/list-map-patterns.d/stdout-expected.txt @@ -1,7 +1,7 @@ ## all|grep LdScript -#LANGUAGE PATTERN -LdScript *.lds.S -LdScript ld.script +#LANGUAGE PATTERN +LdScript *.lds.S +LdScript ld.script ## LdScript #PATTERN *.lds.S diff --git a/Tmain/list-roles.d/stdout-expected.txt b/Tmain/list-roles.d/stdout-expected.txt index e40d88ea5c..2a51434f59 100644 --- a/Tmain/list-roles.d/stdout-expected.txt +++ b/Tmain/list-roles.d/stdout-expected.txt @@ -49,6 +49,7 @@ CUDA h/header system on system header Cobol S/sourcefile copied on copied in source file CobolFree S/sourcefile copied on copied in source file CobolVariable S/sourcefile copied on copied in source file +Containerfile i/image base on referenced as a base image in FROM directive DTD e/element attOwner on attributes owner DTD p/parameterEntity condition on conditions DTD p/parameterEntity elementName on element names @@ -192,6 +193,7 @@ CUDA h/header system on system header Cobol S/sourcefile copied on copied in source file CobolFree S/sourcefile copied on copied in source file CobolVariable S/sourcefile copied on copied in source file +Containerfile i/image base on referenced as a base image in FROM directive DTD e/element attOwner on attributes owner DTD p/parameterEntity condition on conditions DTD p/parameterEntity elementName on element names diff --git a/Units/parser-containerfile.r/env.d/args.ctags b/Units/parser-containerfile.r/env.d/args.ctags new file mode 100644 index 0000000000..1c3015ff14 --- /dev/null +++ b/Units/parser-containerfile.r/env.d/args.ctags @@ -0,0 +1,3 @@ +--sort=no +--language-force=Containerfile +--fields=+in diff --git a/Units/parser-containerfile.r/env.d/expected.tags b/Units/parser-containerfile.r/env.d/expected.tags new file mode 100644 index 0000000000..c521b15623 --- /dev/null +++ b/Units/parser-containerfile.r/env.d/expected.tags @@ -0,0 +1,16 @@ +img27634bbb0100 input.container /^FROM ubi8:latest$/;" i line:2 inherits:ubi8:latest +MY_NAME input.container /^ENV MY_NAME="John Doe"$/;" e line:4 image:img27634bbb0100 +MY_DOG input.container /^ENV MY_DOG=Rex\\ The\\ Dog$/;" e line:5 image:img27634bbb0100 +MY_CAT input.container /^ENV MY_CAT=fluffy$/;" e line:6 image:img27634bbb0100 +MY_X input.container /^ENV MY_X=fluffy MY_Y="a" MY_Z=b$/;" e line:7 image:img27634bbb0100 +MY_Y input.container /^ENV MY_X=fluffy MY_Y="a" MY_Z=b$/;" e line:7 image:img27634bbb0100 +MY_Z input.container /^ENV MY_X=fluffy MY_Y="a" MY_Z=b$/;" e line:7 image:img27634bbb0100 +MY_A input.container /^ENV MY_A=fluffy \\$/;" e line:8 image:img27634bbb0100 +MY_B input.container /^MY_B="a" \\$/;" e line:9 image:img27634bbb0100 +MY_C input.container /^MY_C=b$/;" e line:10 image:img27634bbb0100 +YOUR_A input.container /^ENV YOUR_A "a"$/;" e line:12 image:img27634bbb0100 +YOUR_B input.container /^ENV YOUR_B "b" \\$/;" e line:13 image:img27634bbb0100 +YOUR_C input.container /^ YOUR_C "c" \\$/;" e line:14 image:img27634bbb0100 +YOUR_D input.container /^ YOUR_D \\$/;" e line:15 image:img27634bbb0100 +MY_HOUSE input.container /^ENV MY_HOUSE "a" MY_FAMILY "a"$/;" e line:18 image:img27634bbb0100 +MY_FAMILY input.container /^ENV MY_HOUSE "a" MY_FAMILY "a"$/;" e line:18 image:img27634bbb0100 diff --git a/Units/parser-containerfile.r/env.d/input.container b/Units/parser-containerfile.r/env.d/input.container new file mode 100644 index 0000000000..a16b1ac0af --- /dev/null +++ b/Units/parser-containerfile.r/env.d/input.container @@ -0,0 +1,18 @@ +# https://docs.docker.jp/engine/reference/builder.html#env +FROM ubi8:latest + +ENV MY_NAME="John Doe" +ENV MY_DOG=Rex\ The\ Dog +ENV MY_CAT=fluffy +ENV MY_X=fluffy MY_Y="a" MY_Z=b +ENV MY_A=fluffy \ +MY_B="a" \ +MY_C=b + +ENV YOUR_A "a" +ENV YOUR_B "b" \ + YOUR_C "c" \ + YOUR_D \ + YOUR_E + +ENV MY_HOUSE "a" MY_FAMILY "a" diff --git a/Units/parser-containerfile.r/multi.d/args.ctags b/Units/parser-containerfile.r/multi.d/args.ctags new file mode 100644 index 0000000000..92f0d4d84a --- /dev/null +++ b/Units/parser-containerfile.r/multi.d/args.ctags @@ -0,0 +1,4 @@ +--sort=no +--language-force=Containerfile +--fields=+inrl +--extras=+r diff --git a/Units/parser-containerfile.r/multi.d/expected.tags b/Units/parser-containerfile.r/multi.d/expected.tags new file mode 100644 index 0000000000..8a957522e9 --- /dev/null +++ b/Units/parser-containerfile.r/multi.d/expected.tags @@ -0,0 +1,17 @@ +X input.containerfile /^#define X /;" d line:3 language:CPreProcessor file: roles:def +busybox:latest input.containerfile /^FROM busybox:latest AS builder$/;" i line:5 language:Containerfile roles:base +builder input.containerfile /^FROM busybox:latest AS builder$/;" i line:5 language:Containerfile inherits:busybox:latest roles:def +BUILD_LOGLEVEL input.containerfile /^ENV 'BUILD'"_LOGLEVEL"="5" A=1$/;" e line:6 language:Containerfile image:builder roles:def +A input.containerfile /^ENV 'BUILD'"_LOGLEVEL"="5" A=1$/;" e line:6 language:Containerfile image:builder roles:def +OPENSHIFT_BUILD_NAME input.containerfile /^ENV "OPENSHIFT_BUILD_NAME"="mydockertest-1" "OPENSHIFT_BUILD_NAMESPACE"="default"$/;" e line:9 language:Containerfile image:builder roles:def +OPENSHIFT_BUILD_NAMESPACE input.containerfile /^ENV "OPENSHIFT_BUILD_NAME"="mydockertest-1" "OPENSHIFT_BUILD_NAMESPACE"="default"$/;" e line:9 language:Containerfile image:builder roles:def +io.openshift.build.name input.containerfile /^LABEL "io.openshift.build.name"="mydockertest-1" "io.openshift.build.namespace"="default"$/;" l line:10 language:Containerfile image:builder roles:def +io.openshift.build.namespace input.containerfile /^LABEL "io.openshift.build.name"="mydockertest-1" "io.openshift.build.namespace"="default"$/;" l line:10 language:Containerfile image:builder roles:def +Y input.containerfile /^#define Y /;" d line:12 language:CPreProcessor file: roles:def +builder input.containerfile /^FROM builder$/;" i line:14 language:Containerfile roles:base +img7a86bc1d0100 input.containerfile /^FROM builder$/;" i line:14 language:Containerfile inherits:builder roles:def +BUILD_LOGLEVEL input.containerfile /^ENV "BUILD_LOGLEVEL"="5"$/;" e line:15 language:Containerfile image:img7a86bc1d0100 roles:def +Z input.containerfile /^#define Z /;" d line:18 language:CPreProcessor file: roles:def +builder input.containerfile /^FROM builder$/;" i line:20 language:Containerfile roles:base +img7a86bc1d0200 input.containerfile /^FROM builder$/;" i line:20 language:Containerfile inherits:builder roles:def +BUILD_LOGLEVEL input.containerfile /^ENV "BUILD_LOGLEVEL"="5"$/;" e line:21 language:Containerfile image:img7a86bc1d0200 roles:def diff --git a/Units/parser-containerfile.r/multi.d/input.containerfile b/Units/parser-containerfile.r/multi.d/input.containerfile new file mode 100644 index 0000000000..3618196185 --- /dev/null +++ b/Units/parser-containerfile.r/multi.d/input.containerfile @@ -0,0 +1,22 @@ +# Based on https://github.com/containers/buildah/blob/main/tests/bud/multi-stage-builds/Dockerfile.extended + +#define X 1 + +FROM busybox:latest AS builder +ENV 'BUILD'"_LOGLEVEL"="5" A=1 +RUN touch /tmp/preCommit +ENTRYPOINT /bin/sleep 600 +ENV "OPENSHIFT_BUILD_NAME"="mydockertest-1" "OPENSHIFT_BUILD_NAMESPACE"="default" +LABEL "io.openshift.build.name"="mydockertest-1" "io.openshift.build.namespace"="default" + +#define Y 1 + +FROM builder +ENV "BUILD_LOGLEVEL"="5" +RUN touch /tmp/postCommit + +#define Z 1 + +FROM builder +ENV "BUILD_LOGLEVEL"="5" +RUN echo "$BUILD_LOGLEVEL" diff --git a/main/parsers_p.h b/main/parsers_p.h index 3dfd9e8bbc..83003c5f38 100644 --- a/main/parsers_p.h +++ b/main/parsers_p.h @@ -84,6 +84,7 @@ CobolParser, \ CobolFreeParser, \ CobolVariableParser, \ + ContainerfileParser, \ CUDAParser, \ DParser, \ DiffParser, \ diff --git a/parsers/containerfile.c b/parsers/containerfile.c new file mode 100644 index 0000000000..a9ef9f65c7 --- /dev/null +++ b/parsers/containerfile.c @@ -0,0 +1,439 @@ +/* + * Copyright (c) 2024, Red Hat, Inc. + * Copyright (c) 2024, Masatake YAMATO + * + * Author: Masatake YAMATO + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + * USA. + * + * Reference: + * - https://docs.docker.com/engine/reference/builder/ + * - https://github.com/containers/common/blob/main/docs/Containerfile.5.md + * - https://github.com/containers/podman/blob/main/docs/source/markdown/podman-build.1.md.in + */ + +/* + * INCLUDE FILES + */ +#include "general.h" /* must always come first */ + +#include "kind.h" +#include "parse.h" +#include "keyword.h" +#include "tokeninfo.h" +#include "field.h" +#include "read.h" +#include "entry.h" + +#include "cpreprocessor.h" + +/* + * DATA DEFINITIONS + */ +typedef enum { + K_IMAEG, + K_ARG, + K_ENV, + K_LABEL, +} containerfileKind; + +typedef enum { + R_IMAGE_BASE, +} containerfileImageRole; + +static roleDefinition ContainerfileImageRoles [] = { + { true, "base", "referenced as a base image in FROM directive" }, +}; + +static kindDefinition ContainerfileKinds [] = { + { true, 'i', "image", "images referred or defined with FROM directive", + .referenceOnly = false, ATTACH_ROLES (ContainerfileImageRoles) }, + { true, 'a', "arg", "objects defined with ARG directive", }, + { true, 'e', "env", "objects defined with ENV directive", }, + { true, 'l', "label", "objects defined with LABEL directive", }, +}; + +typedef enum eKeywordId { + KEYWORD_FROM, + KEYWORD_AS, + KEYWORD_ARG, + KEYWORD_ENV, + KEYWORD_LABEL, +} keywordId; + +static const keywordTable ContainerfileKeywordTable [] = { + { "FROM", KEYWORD_FROM }, + { "AS", KEYWORD_AS }, + { "ARG", KEYWORD_ARG }, + { "ENV", KEYWORD_ENV }, + { "LABEL", KEYWORD_LABEL }, +}; + +enum eTokenType { + TOKEN_EOF, + TOKEN_NEWLINE, + TOKEN_KEYWORD, + TOKEN_ID, + TOKEN_EQ, + TOKEN_UNDEFINED, +}; + +static langType Lang_containerfile; + +/* + * Function declarations + */ + +static void readToken (tokenInfo *const token, void *data CTAGS_ATTR_UNUSED); + +/* + * Function definitions + */ + +static struct tokenInfoClass containerfileTokenInfoClass = { + .nPreAlloc = 1, + .typeForUndefined = TOKEN_UNDEFINED, + .keywordNone = KEYWORD_NONE, + .typeForKeyword = TOKEN_KEYWORD, + .typeForEOF = TOKEN_EOF, + .read = readToken, +}; + +static void readId(tokenInfo *const token) +{ + while (true) + { + int c = cppGetc (); + + switch (c) + { + case EOF: + return; + + case ' ': + case '\t': + case '\r': + case '\f': + return; + + case '\n': + cppUngetc (c); + return; + + case CPP_STRING_SYMBOL: + case CPP_CHAR_SYMBOL: + tokenCat (token, cppGetLastCharOrStringContents()); + break; + + case '\\': + c = cppGetc (); + if (c == EOF) + { + /* broken input */ + return; + } + if (c == CPP_STRING_SYMBOL || c == CPP_CHAR_SYMBOL) + { + /* Broken input */ + tokenCat (token, cppGetLastCharOrStringContents()); + } + else + tokenPutc (token, c); + break; + case '=': + cppUngetc (c); + return; + default: + tokenPutc (token, c); + break; + } + } +} + +static void readToken (tokenInfo *const token, void *data CTAGS_ATTR_UNUSED) +{ + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + + int c; + + do + c = cppGetc (); + while (c == ' ' || c == '\t' || c == '\f' || c == '\r'); + + token->lineNumber = getInputLineNumber (); + token->filePosition = getInputFilePosition (); + + switch (c) + { + case EOF: + token->type = TOKEN_EOF; + break; + case '\n': + token->type = TOKEN_NEWLINE; + tokenPutc (token, '\n'); + break; + case CPP_STRING_SYMBOL: + case CPP_CHAR_SYMBOL: + tokenCat (token, cppGetLastCharOrStringContents()); + token->type = TOKEN_ID; + readId (token); + break; + case '\\': + c = cppGetc (); + if (c == EOF) + { + token->type = TOKEN_EOF; + break; + } + if (c == CPP_STRING_SYMBOL || c == CPP_CHAR_SYMBOL) + { + /* broken input */ + tokenCat (token, cppGetLastCharOrStringContents()); + } + else + tokenPutc (token, c); + token->type = TOKEN_ID; + readId (token); + break; + case '=': + token->type = TOKEN_EQ; + tokenPutc (token, c); + break; + default: + tokenPutc (token, c); + token->type = TOKEN_ID; + readId (token); + break; + } + + if (tokenIsType(token, ID)) + { + token->keyword = lookupCaseKeyword (tokenString (token), Lang_containerfile); + if (token->keyword != KEYWORD_NONE) + token->type = TOKEN_KEYWORD; + } +} + +static void skipToNewline (tokenInfo *token) +{ + tokenSkipToType (token, TOKEN_NEWLINE); +} + +static int makeImageTag (const char *name, int baseIndex) +{ + tagEntryInfo e; + tagEntryInfo *be; + + initTagEntry(&e, name, K_IMAEG); + + be = getEntryInCorkQueue(baseIndex); + if (be != CORK_NIL) + e.extensionFields.inheritance = be->name; + return makeTagEntry (&e); +} + +static int makeAnonImageTag (int baseIndex) +{ + vString *img = anonGenerateNew ("img", K_IMAEG); + int r = makeImageTag (vStringValue (img), baseIndex); + vStringDelete (img); + return r; +} + +static int parseFrom (tokenInfo *token) +{ + int imageIndex = CORK_NIL; + + tokenRead (token); + if (tokenIsEOF (token)) + return CORK_NIL; + if (!tokenIsType(token, ID)) + goto out; + else if (tokenString (token)[0] == '-') + { + /* Maybe an option, restart from the next token. */ + return parseFrom(token); + } + + if (vStringIsEmpty(token->string)) + goto out; + + int baseIndex = makeSimpleRefTag(token->string, K_IMAEG, R_IMAGE_BASE); + tokenRead (token); + + if (tokenIsEOF (token) + || tokenIsType(token, NEWLINE)) + return makeAnonImageTag(baseIndex); + + if (!tokenIsKeyword (token, AS)) + { + imageIndex = makeAnonImageTag(baseIndex); + goto out; + } + + tokenRead (token); + if (tokenIsEOF (token) + || tokenIsType(token, NEWLINE)) + return makeAnonImageTag(baseIndex); + + if ((!tokenIsType(token, ID)) + || vStringIsEmpty (token->string)) + { + imageIndex = makeAnonImageTag(baseIndex); + goto out; + } + + imageIndex = makeImageTag(tokenString(token), baseIndex); + + out: + skipToNewline(token); + return imageIndex; +} + +static void makeContainerfileTag (const char *name, int kindIndex, int scope) +{ + tagEntryInfo e; + + initTagEntry(&e, name, kindIndex); + + e.extensionFields.scopeIndex = scope; + + makeTagEntry (&e); +} + +static void parseNamedObjects (tokenInfo *token, int kindIndex, int imageIndex) +{ + while (true) + { + tokenRead (token); + + if (tokenIsEOF (token) + || tokenIsType(token, NEWLINE)) + return; + + if (tokenIsType (token, EQ)) + { + tokenRead (token); + + /* broken input */ + if (tokenIsEOF (token) + || tokenIsType(token, NEWLINE)) + return; + } + else if (tokenIsType (token, ID) || tokenIsType (token, KEYWORD)) + makeContainerfileTag (tokenString(token), kindIndex, imageIndex); + else + { + skipToNewline(token); + return; + } + } +} + +static void parseEnv (tokenInfo *token, int imageIndex) +{ + while (true) + { + tokenRead (token); + + if (tokenIsEOF (token) + || tokenIsType(token, NEWLINE)) + return; + + if (tokenIsType (token, EQ)) + { + tokenRead (token); + + /* broken input */ + if (tokenIsEOF (token) + || tokenIsType(token, NEWLINE)) + return; + } + else if (tokenIsType (token, ID) || tokenIsType (token, KEYWORD)) + { + makeContainerfileTag (tokenString(token), K_ENV, imageIndex); + + tokenRead (token); + if (tokenIsType (token, EQ)) + tokenUnread (token); + } + else + { + skipToNewline(token); + return; + } + } +} + +static void findContainerfileTags (void) +{ + cppInit (false, false, false, false, + KIND_GHOST_INDEX, 0, 0, + KIND_GHOST_INDEX, + KIND_GHOST_INDEX, 0, 0, + FIELD_UNKNOWN); + + tokenInfo *const token = newToken (&containerfileTokenInfoClass); + int imageIndex = CORK_NIL; + + while (true) + { + tokenRead (token); + if (tokenIsEOF (token)) + break; + + if (tokenIsKeyword(token, FROM)) + imageIndex = parseFrom(token); + else if (tokenIsKeyword(token, ARG)) + parseNamedObjects (token, K_ARG, imageIndex); + else if (tokenIsKeyword(token, ENV)) + parseEnv (token, imageIndex); + else if (tokenIsKeyword(token, LABEL)) + parseNamedObjects (token, K_LABEL, imageIndex); + else + skipToNewline(token); + } + + tokenDelete (token); + cppTerminate (); +} + +static void initialize (const langType language) +{ + Lang_containerfile = language; +} + +extern parserDefinition* ContainerfileParser (void) +{ + static const char *const patterns [] = { "Containerfile", "Dockerfile", NULL }; + parserDefinition* def = parserNew ("Containerfile"); + + def->patterns = patterns; + def->kindTable = ContainerfileKinds; + def->kindCount = ARRAY_SIZE (ContainerfileKinds); + + def->initialize = initialize; + def->parser = findContainerfileTags; + def->keywordTable = ContainerfileKeywordTable; + def->keywordCount = ARRAY_SIZE (ContainerfileKeywordTable); + + /* cpreprocessor wants corkQueue. */ + def->useCork = CORK_QUEUE; + + return def; +} diff --git a/source.mak b/source.mak index cf0326c7ae..3217419ab5 100644 --- a/source.mak +++ b/source.mak @@ -328,6 +328,7 @@ PARSER_SRCS = \ parsers/clojure.c \ parsers/css.c \ parsers/cobol.c \ + parsers/containerfile.c \ parsers/cpreprocessor.c \ parsers/cxx/cxx.c \ parsers/cxx/cxx_debug.c \ diff --git a/win32/ctags_vs2013.vcxproj b/win32/ctags_vs2013.vcxproj index afbbc6a629..38c6b464af 100644 --- a/win32/ctags_vs2013.vcxproj +++ b/win32/ctags_vs2013.vcxproj @@ -276,6 +276,7 @@ + diff --git a/win32/ctags_vs2013.vcxproj.filters b/win32/ctags_vs2013.vcxproj.filters index 1476a92bc9..b54c53be5e 100644 --- a/win32/ctags_vs2013.vcxproj.filters +++ b/win32/ctags_vs2013.vcxproj.filters @@ -351,6 +351,9 @@ Source Files\parsers + + Source Files\parsers + Source Files\parsers