Skip to content

Commit d59092b

Browse files
committed
main: using regex for choosing a parser for the given file name
This change extends --map-<LANG> option to support regular expression matching with the full file name. The original --map-<LANG> option supports the glob based matching and the extension comparison with the file basename. However, two methods are not enough if the file names are too generic. See #3287 . The regular expression passed to --map-<LANG> must be surrounded by % character like --map-RpmMacros='%(.*/)?macros\.d/macros\.([^/]+)$%' If you want to match in a case-insensitive way, append `i' after the second % like --map-RpmMacros='%(.*/)?macros\.d/macros\.([^/]+)$%i' If you want to use % as part of an expression, put \ before % for escaping. TODO: - [ ] update ctags.1 - [ ] add Tmain test cases - [ ] add pcre backend - [ ] update NEWS Signed-off-by: Masatake YAMATO <[email protected]>
1 parent 44da7ec commit d59092b

File tree

17 files changed

+502
-31
lines changed

17 files changed

+502
-31
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
0

Tmain/list-map-rexprs.d/run.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Copyright: 2025 Masatake YAMATO
2+
# License: GPL-2
3+
4+
CTAGS=$1
5+
6+
$CTAGS --quiet --options=NONE \
7+
--langdef=Something \
8+
--map-Something='%\%ESCAPING\%%' \
9+
--map-Something=+'%ICASE%i' \
10+
--map-Something=+'%TEMP%' \
11+
--map-Something=-'%TEMP%' \
12+
--map-Something=+'%TEMPi%i' \
13+
--map-Something=-'%TEMPi%i' \
14+
--list-map-rexprs=all && \
15+
echo '## RpmMacros' && \
16+
$CTAGS --quiet --options=NONE --list-map-rexprs=RpmMacros

Tmain/list-map-rexprs.d/stderr-expected.txt

Whitespace-only changes.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#LANGUAGE EXPRESSION CASE
2+
RpmMacros (.*/)?macros\.d/macros\.([^/]+)$ sensitive
3+
Something %ESCAPING% sensitive
4+
Something ICASE insensitive
5+
## RpmMacros
6+
#EXPRESSION CASE
7+
(.*/)?macros\.d/macros\.([^/]+)$ sensitive

Tmain/versioning.d/stdout-expected.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ About TEST language
1616
enabled: yes
1717
version: 10.9
1818

19+
Mappings/rexprs
20+
-------------------------------------------------------
21+
22+
1923
Mappings/patterns
2024
-------------------------------------------------------
2125
MYTEST

main/options.c

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include "interactive_p.h"
4040
#include "writer_p.h"
4141
#include "trace.h"
42+
#include "flags_p.h"
4243

4344
#ifdef HAVE_JANSSON
4445
#include <jansson.h>
@@ -59,6 +60,8 @@
5960
/* The following separators are permitted for list options.
6061
*/
6162
#define EXTENSION_SEPARATOR '.'
63+
#define REXPR_START '%'
64+
#define REXPR_STOP '%'
6265
#define PATTERN_START '('
6366
#define PATTERN_STOP ')'
6467
#define IGNORE_SEPARATORS ", \t\n"
@@ -303,10 +306,10 @@ static optionDescription LongOptionDescription [] = {
303306
{1,0," --langmap=<map>[,<map>[...]]"},
304307
{1,0," Override default mapping of language to input file extension."},
305308
{1,0," e.g. --langmap=c:.c.x,java:+.j,make:([Mm]akefile).mak"},
306-
{1,0," --map-<LANG>=[+|-]<extension>|<pattern>"},
309+
{1,0," --map-<LANG>=[+|-]<extension>|<pattern>|<rexpr>"},
307310
{1,0," Set, add(+) or remove(-) the map for <LANG>."},
308-
{1,0," Unlike --langmap, this doesn't take a list; only one file name <pattern>"},
309-
{1,0," or one file <extension> can be specified at once."},
311+
{1,0," Unlike --langmap, this doesn't take a list; only one file name <pattern>,"},
312+
{1,0," one file name <extension>, or one file <rexpr> can be specified at once."},
310313
{1,0," Unlike --langmap the change with this option affects mapping of <LANG> only."},
311314
{1,0,""},
312315
{1,0,"Tags File Contents Options"},
@@ -436,6 +439,8 @@ static optionDescription LongOptionDescription [] = {
436439
{1,0," Output list of language extensions in mapping."},
437440
{1,0," --list-map-patterns[=(<language>|all)]"},
438441
{1,0," Output list of language patterns in mapping."},
442+
{1,0," --list-map-rexprs[=(<language>|all)]"},
443+
{1,0," Output list of language regular expressions in mapping."},
439444
{1,0," --list-maps[=(<language>|all)]"},
440445
{1,0," Output list of language mappings (both extensions and patterns)."},
441446
{1,0," --list-mline-regex-flags"},
@@ -1793,6 +1798,7 @@ static char* extractMapFromParameter (const langType language,
17931798
++parameter;
17941799
for (p = parameter ; *p != PATTERN_STOP && *p != '\0' ; ++p)
17951800
{
1801+
/* TODO: Can this handle a pattern including ')' ? */
17961802
if (*p == '\\' && *(p + 1) == PATTERN_STOP)
17971803
++p;
17981804
}
@@ -1808,9 +1814,46 @@ static char* extractMapFromParameter (const langType language,
18081814
return result;
18091815
}
18101816

1817+
if (first == REXPR_START)
1818+
{
1819+
*mapType = LMAP_REXPR;
1820+
1821+
++parameter;
1822+
const char* parameter_orig = parameter;
1823+
vString *rexpr = vStringNew ();
1824+
for (p = parameter ; *p != REXPR_STOP && *p != '\0' ; ++p)
1825+
{
1826+
if (*p == '\\' && *(p + 1) == REXPR_STOP)
1827+
p++;
1828+
vStringPut (rexpr, *p);
1829+
}
1830+
if (*p == '\0')
1831+
error (FATAL, "Unterminated file name regular expression for %s language: %s",
1832+
getLanguageName (language), parameter_orig);
1833+
1834+
*tail = p + 1;
1835+
return vStringDeleteUnwrap (rexpr);
1836+
}
1837+
18111838
return NULL;
18121839
}
18131840

1841+
static void langmap_rexpr_icase_short (char c CTAGS_ATTR_UNUSED, void* data)
1842+
{
1843+
bool *icase = data;
1844+
*icase = true;
1845+
}
1846+
1847+
static void langmap_rexpr_icase_long (const char* s CTAGS_ATTR_UNUSED, const char* const unused CTAGS_ATTR_UNUSED, void* data)
1848+
{
1849+
langmap_rexpr_icase_short ('i', data);
1850+
}
1851+
1852+
static flagDefinition langmapRexprFlagDef[] = {
1853+
{ 'i', "icase", langmap_rexpr_icase_short, langmap_rexpr_icase_long,
1854+
NULL, "applied in a case-insensitive manner"},
1855+
};
1856+
18141857
static char* addLanguageMap (const langType language, char* map_parameter,
18151858
bool exclusiveInAllLanguages)
18161859
{
@@ -1823,6 +1866,13 @@ static char* addLanguageMap (const langType language, char* map_parameter,
18231866
addLanguageExtensionMap (language, map, exclusiveInAllLanguages);
18241867
else if (map && map_type == LMAP_PATTERN)
18251868
addLanguagePatternMap (language, map, exclusiveInAllLanguages);
1869+
else if (map && map_type == LMAP_REXPR)
1870+
{
1871+
bool icase = false;
1872+
1873+
flagsEval (p, langmapRexprFlagDef, ARRAY_SIZE(langmapRexprFlagDef), &icase);
1874+
addLanguageRexprMap (language, map, icase, exclusiveInAllLanguages);
1875+
}
18261876
else
18271877
error (FATAL, "Badly formed language map for %s language",
18281878
getLanguageName (language));
@@ -1843,6 +1893,13 @@ static char* removeLanguageMap (const langType language, char* map_parameter)
18431893
removeLanguageExtensionMap (language, map);
18441894
else if (map && map_type == LMAP_PATTERN)
18451895
removeLanguagePatternMap (language, map);
1896+
else if (map && map_type == LMAP_REXPR)
1897+
{
1898+
bool icase = false;
1899+
1900+
flagsEval (p, langmapRexprFlagDef, ARRAY_SIZE(langmapRexprFlagDef), &icase);
1901+
removeLanguageRexprMap (language, map, icase);
1902+
}
18461903
else
18471904
error (FATAL, "Badly formed language map for %s language",
18481905
getLanguageName (language));
@@ -2164,6 +2221,13 @@ static void processListMapPatternsOption (const char *const option,
21642221
processListMapsOptionForType (option, parameter, LMAP_PATTERN|LMAP_TABLE_OUTPUT);
21652222
}
21662223

2224+
static void processListMapRexprsOption (const char *const option,
2225+
const char *const parameter)
2226+
{
2227+
processListMapsOptionForType (option, parameter, LMAP_REXPR|LMAP_TABLE_OUTPUT);
2228+
}
2229+
2230+
21672231
static void processListMapsOption (
21682232
const char *const option CTAGS_ATTR_UNUSED,
21692233
const char *const parameter CTAGS_ATTR_UNUSED)
@@ -2327,6 +2391,13 @@ static void processDescribeLanguage(const char *const option,
23272391
getLanguageVersionCurrent (language),
23282392
getLanguageVersionAge (language));
23292393

2394+
puts("");
2395+
puts("Mappings/rexprs");
2396+
puts("-------------------------------------------------------");
2397+
printLanguageMaps (language, LMAP_REXPR|LMAP_NO_LANG_PREFIX,
2398+
localOption.withListHeader, localOption.machinable,
2399+
stdout);
2400+
23302401
puts("");
23312402
puts("Mappings/patterns");
23322403
puts("-------------------------------------------------------");
@@ -2999,6 +3070,7 @@ static parametricOption ParametricOptions [] = {
29993070
{ "list-maps", processListMapsOption, true, STAGE_ANY },
30003071
{ "list-map-extensions", processListMapExtensionsOption, true, STAGE_ANY },
30013072
{ "list-map-patterns", processListMapPatternsOption, true, STAGE_ANY },
3073+
{ "list-map-rexprs", processListMapRexprsOption, true, STAGE_ANY },
30023074
{ "list-mline-regex-flags", processListMultilineRegexFlagsOption, true, STAGE_ANY },
30033075
{ "list-output-formats", processListOutputFormatsOption, true, STAGE_ANY },
30043076
{ "list-params", processListParametersOption, true, STAGE_ANY },

0 commit comments

Comments
 (0)