Skip to content

Commit

Permalink
RMarkdown: new parser
Browse files Browse the repository at this point in the history
The parser supports "{r label}" syntax as described in https://bookdown.org/yihui/rmarkdown/.
"{r" part is parsed to choose a guest.
"label" in "label}" part is tagged as a chunkLabel kind object.

Signed-off-by: Masatake YAMATO <[email protected]>
  • Loading branch information
masatake committed Mar 15, 2022
1 parent 8d4de3b commit 355a2dc
Show file tree
Hide file tree
Showing 12 changed files with 248 additions and 3 deletions.
1 change: 1 addition & 0 deletions Tmain/list-subparsers-all.d/stdout-expected.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ PlistXML XML base <> sub {bidirectional}
PythonLoggingConfig Iniconf base <> sub {bidirectional}
QtMoc C++ base <> sub {bidirectional}
R6Class R base <> sub {bidirectional}
RMarkdown Markdown base <= sub {dedicated}
RSpec Ruby base => sub {shared}
RelaxNG XML base <> sub {bidirectional}
S4Class R base <> sub {bidirectional}
Expand Down
3 changes: 3 additions & 0 deletions Units/parser-rmarkdown.r/simple-rmarkdown.d/args.ctags
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
--sort=no
--extras=+g
--fields=+KenlE
11 changes: 11 additions & 0 deletions Units/parser-rmarkdown.r/simple-rmarkdown.d/expected.tags
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
S1 input.rmd /^# S1$/;" chapter line:1 language:Markdown end:14
xyX input.rmd /^```{r xyX}$/;" chunklabel line:3 language:RMarkdown extras:subparser
S2 input.rmd /^# S2$/;" chapter line:15 language:Markdown end:25
__anon4a45a9700100 input.rmd /^```{r, cache = TRUE, dependson = "xyX"}$/;" chunklabel line:17 language:RMarkdown extras:subparser,anonymous
__anon4a45a9700200 input.rmd /^```{python}$/;" chunklabel line:21 language:RMarkdown extras:subparser,anonymous
S3 input.rmd /^# S3$/;" chapter line:26 language:Markdown end:27
x input.rmd /^x <- 1$/;" globalVar line:5 language:R extras:guest end:5
foo input.rmd /^foo <- function () {$/;" function line:6 language:R extras:guest end:9
y input.rmd /^ y <- 2$/;" functionVar line:7 language:R function:foo extras:guest end:7
X input.rmd /^X <- func()$/;" globalVar line:11 language:R extras:guest end:11
f input.rmd /^def f():$/;" function line:22 language:Python extras:guest end:24
27 changes: 27 additions & 0 deletions Units/parser-rmarkdown.r/simple-rmarkdown.d/input.rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# S1

```{r xyX}
x <- 1
foo <- function () {
y <- 2
return(y)
}
X <- func()
```

# S2

```{r, cache = TRUE, dependson = "xyX"}
mean(X)
```

```{python}
def f():
g()
return 3
```
# S3

1 change: 1 addition & 0 deletions docs/news.rst
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,7 @@ The following parsers have been added:
* R6Class *R based subparser*
* RelaxNG *libxml*
* ReStructuredText
* RMarkdown *Markdown based subparser*
* Robot
* RpmMacros *optlib*
* RpmSpec
Expand Down
1 change: 1 addition & 0 deletions main/parsers_p.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@
PythonLoggingConfigParser, \
QemuHXParser, \
QtMocParser, \
RMarkdownParser, \
RParser, \
R6ClassParser, \
RSpecParser, \
Expand Down
38 changes: 35 additions & 3 deletions parsers/markdown.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
#include "promise.h"
#include "htable.h"

#include "markdown.h"

/*
* DATA DEFINITIONS
*/
Expand Down Expand Up @@ -210,6 +212,26 @@ static void getFootnoteMaybe (const char *line)
vStringDelete (footnote);
}

static bool extractLanguageForCodeBlock (const char *langMarker,
vString *codeLang)
{
subparser *s;
bool r = false;

foreachSubparser (s, false)
{
markdownSubparser *m = (markdownSubparser *)s;
enterSubparser(s);
if (m->extractLanguageForCodeBlock)
r = m->extractLanguageForCodeBlock (m, langMarker, codeLang);
leaveSubparser();
if (r)
break;
}

return r;
}

static void findMarkdownTags (void)
{
vString *prevLine = vStringNew ();
Expand All @@ -221,6 +243,10 @@ static void findMarkdownTags (void)
bool inPreambule = false;
bool inComment = false;

subparser *sub = getSubparserRunningBaseparser();
if (sub)
chooseExclusiveSubparser (sub, NULL);

nestingLevels = nestingLevelsNewFull (0, fillEndField);

while ((line = readLineFromInputFile ()) != NULL)
Expand Down Expand Up @@ -255,10 +281,16 @@ static void findMarkdownTags (void)
inCodeChar = 0;
else if (inCodeChar)
{
const char *langMarker = (const char *)(line + pos + nSame);
startLineNumber = startSourceLineNumber = lineNum + 1;
vStringCopyS (codeLang, (const char *)(line + pos + nSame));
vStringStripLeading (codeLang);
vStringStripTrailing (codeLang);

vStringClear (codeLang);
if (! extractLanguageForCodeBlock (langMarker, codeLang))
{
vStringCopyS (codeLang, langMarker);
vStringStripLeading (codeLang);
vStringStripTrailing (codeLang);
}
}
else
{
Expand Down
29 changes: 29 additions & 0 deletions parsers/markdown.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Copyright (c) 2022, Masatake YAMATO
*
* This source code is released for free distribution under the terms of the
* GNU General Public License version 2 or (at your option) any later version.
*
* The interface for subparsers of Markdown
*/
#ifndef CTAGS_PARSER_MARKDOWN_H
#define CTAGS_PARSER_MARKDOWN_H

/*
* INCLUDE FILES
*/
#include "general.h" /* must always come first */

#include "subparser.h"
#include "vstring.h"

typedef struct sMarkdownSubparser markdownSubparser;

struct sMarkdownSubparser {
subparser subparser;
bool (* extractLanguageForCodeBlock) (markdownSubparser *s,
const char *langMarker,
vString *langName);
};

#endif
134 changes: 134 additions & 0 deletions parsers/rmarkdown.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*
*
* Copyright (c) 2022, Masatake YAMATO
*
* This source code is released for free distribution under the terms of the
* GNU General Public License version 2 or (at your option) any later version.
*
* This module contains functions for generating tags for R Markdown files.
* https://bookdown.org/yihui/rmarkdown/
*
*/

/*
* INCLUDE FILES
*/
#include "general.h" /* must always come first */
#include "markdown.h"

#include "entry.h"
#include "parse.h"

#include <ctype.h>
#include <string.h>

/*
* DATA DEFINITIONS
*/
typedef enum {
K_CHUNK_LABEL = 0,
} rmarkdownKind;

static kindDefinition RMarkdownKinds[] = {
{ true, 'l', "chunklabel", "chunk labels"},
};

struct sRMarkdownSubparser {
markdownSubparser markdown;
};

/*
* FUNCTION DEFINITIONS
*/

static void findRMarkdownTags (void)
{
scheduleRunningBaseparser (0);
}

#define skip_space(CP) while (*CP == ' ' || *CP == '\t') CP++;

static void makeRMarkdownTag (vString *name, int kindIndex, bool anonymous)
{
tagEntryInfo e;
initTagEntry (&e, vStringValue (name), kindIndex);
if (anonymous)
markTagExtraBit (&e, XTAG_ANONYMOUS);
makeTagEntry (&e);
}

static bool extractLanguageForCodeBlock (markdownSubparser *s,
const char *langMarker,
vString *langName)
{
const char *cp = langMarker;

if (*cp != '{')
return false;
cp++;

const char *end = strpbrk(cp, " \t,}");
if (!end)
return false;

if (end - cp == 0)
return false;

vStringNCatS (langName, cp, end - cp);

cp = end;
if (*cp == ',' || *cp == '}')
{
vString *name = anonGenerateNew("__anon", K_CHUNK_LABEL);
makeRMarkdownTag (name, K_CHUNK_LABEL, true);
vStringDelete (name);
return true;
}

skip_space(cp);

vString *chunk_label = vStringNew ();
bool anonymous = false;
while (isalnum((unsigned char)*cp) || *cp == '-')
vStringPut (chunk_label, *cp++);

if (vStringLength (chunk_label) == 0)
{
anonGenerate (chunk_label, "__anon", K_CHUNK_LABEL);
anonymous = true;
}

skip_space(cp);
if (*cp == ',' || *cp == '}')
makeRMarkdownTag (chunk_label, K_CHUNK_LABEL, anonymous);

vStringDelete (chunk_label);
return true;
}

extern parserDefinition* RMarkdownParser (void)
{
static const char *const extensions [] = { "rmd", NULL };
static struct sRMarkdownSubparser rmarkdownSubparser = {
.markdown = {
.subparser = {
.direction = SUBPARSER_SUB_RUNS_BASE,
},
.extractLanguageForCodeBlock = extractLanguageForCodeBlock,
},
};
static parserDependency dependencies [] = {
[0] = { DEPTYPE_SUBPARSER, "Markdown", &rmarkdownSubparser },
};

parserDefinition* const def = parserNew ("RMarkdown");


def->dependencies = dependencies;
def->dependencyCount = ARRAY_SIZE(dependencies);
def->kindTable = RMarkdownKinds;
def->kindCount = ARRAY_SIZE (RMarkdownKinds);
def->extensions = extensions;
def->parser = findRMarkdownTags;
return def;
}
2 changes: 2 additions & 0 deletions source.mak
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ PARSER_HEADS = \
parsers/iniconf.h \
parsers/m4.h \
parsers/make.h \
parsers/markdown.h \
parsers/perl.h \
parsers/r.h \
parsers/ruby.h \
Expand Down Expand Up @@ -356,6 +357,7 @@ PARSER_SRCS = \
parsers/r-s4class.c \
parsers/r.c \
parsers/rexx.c \
parsers/rmarkdown.c \
parsers/robot.c \
parsers/rpmspec.c \
parsers/rspec.c \
Expand Down
1 change: 1 addition & 0 deletions win32/ctags_vs2013.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@
<ClCompile Include="..\parsers\r-s4class.c" />
<ClCompile Include="..\parsers\r.c" />
<ClCompile Include="..\parsers\rexx.c" />
<ClCompile Include="..\parsers\rmarkdown.c" />
<ClCompile Include="..\parsers\robot.c" />
<ClCompile Include="..\parsers\rpmspec.c" />
<ClCompile Include="..\parsers\rspec.c" />
Expand Down
3 changes: 3 additions & 0 deletions win32/ctags_vs2013.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,9 @@
<ClCompile Include="..\parsers\rexx.c">
<Filter>Source Files\parsers</Filter>
</ClCompile>
<ClCompile Include="..\parsers\rmarkdown.c">
<Filter>Source Files\parsers</Filter>
</ClCompile>
<ClCompile Include="..\parsers\robot.c">
<Filter>Source Files\parsers</Filter>
</ClCompile>
Expand Down

0 comments on commit 355a2dc

Please sign in to comment.