-
Notifications
You must be signed in to change notification settings - Fork 734
fix: panic in regexp.MustCompile when building wildcarddirectories with non ascii characters
#1947
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,65 @@ | ||
| package tsoptions | ||
|
|
||
| import ( | ||
| "testing" | ||
|
|
||
| "github.com/microsoft/typescript-go/internal/tspath" | ||
| ) | ||
|
|
||
| func TestGetWildcardDirectories_NonASCIICharacters(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| tests := []struct { | ||
| name string | ||
| include []string | ||
| exclude []string | ||
| currentDirectory string | ||
| useCaseSensitiveFileNames bool | ||
| }{ | ||
| { | ||
| name: "Norwegian character æ in path", | ||
| include: []string{"src/**/*.test.ts", "src/**/*.stories.ts", "src/**/*.mdx"}, | ||
| exclude: []string{"node_modules"}, | ||
| currentDirectory: "C:/Users/TobiasLægreid/dev/app/frontend/packages/react", | ||
| useCaseSensitiveFileNames: false, | ||
| }, | ||
| { | ||
| name: "Japanese characters in path", | ||
| include: []string{"src/**/*.ts"}, | ||
| exclude: []string{"テスト"}, | ||
| currentDirectory: "/Users/ユーザー/プロジェクト", | ||
| useCaseSensitiveFileNames: true, | ||
| }, | ||
| { | ||
| name: "Chinese characters in path", | ||
| include: []string{"源代码/**/*.js"}, | ||
| exclude: []string{"节点模块"}, | ||
| currentDirectory: "/home/用户/项目", | ||
| useCaseSensitiveFileNames: true, | ||
| }, | ||
| { | ||
| name: "Various Unicode characters", | ||
| include: []string{"src/**/*.ts"}, | ||
| exclude: []string{"node_modules"}, | ||
| currentDirectory: "/Users/Müller/café/naïve/résumé", | ||
| useCaseSensitiveFileNames: false, | ||
| }, | ||
| } | ||
|
|
||
| for _, tt := range tests { | ||
| t.Run(tt.name, func(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| comparePathsOptions := tspath.ComparePathsOptions{ | ||
| CurrentDirectory: tt.currentDirectory, | ||
| UseCaseSensitiveFileNames: tt.useCaseSensitiveFileNames, | ||
| } | ||
|
|
||
| result := getWildcardDirectories(tt.include, tt.exclude, comparePathsOptions) | ||
|
|
||
| if result == nil { | ||
| t.Fatalf("expected non-nil result") | ||
| } | ||
| }) | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -81,11 +81,11 @@ func IsImplicitGlob(lastPathComponent string) bool { | |||||||||
| return !strings.ContainsAny(lastPathComponent, ".*?") | ||||||||||
| } | ||||||||||
|
|
||||||||||
| // Reserved characters, forces escaping of any non-word (or digit), non-whitespace character. | ||||||||||
| // It may be inefficient (we could just match (/[-[\]{}()*+?.,\\^$|#\s]/g), but this is future | ||||||||||
| // proof. | ||||||||||
| // Reserved characters - only escape actual regex metacharacters. | ||||||||||
| // Go's regexp doesn't support \x escape sequences for arbitrary characters, | ||||||||||
| // so we only escape characters that have special meaning in regex. | ||||||||||
| var ( | ||||||||||
| reservedCharacterPattern *regexp.Regexp = regexp.MustCompile(`[^\w\s/]`) | ||||||||||
| reservedCharacterPattern *regexp.Regexp = regexp.MustCompile(`[\\.\+*?()\[\]{}^$|#]`) | ||||||||||
|
||||||||||
| reservedCharacterPattern *regexp.Regexp = regexp.MustCompile(`[\\.\+*?()\[\]{}^$|#]`) | |
| reservedCharacterPattern *regexp.Regexp = regexp.MustCompile(`[\\.+*?()\[\]{}^$|#]`) |
Copilot
AI
Oct 24, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The + character does not need to be escaped inside a character class. In Go regex character classes, + is treated as a literal character. Change \+ to + for consistency and correctness.
| reservedCharacterPattern *regexp.Regexp = regexp.MustCompile(`[\\.\+*?()\[\]{}^$|#]`) | |
| reservedCharacterPattern *regexp.Regexp = regexp.MustCompile(`[\\.+*?()\[\]{}^$|#]`) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The goal of suggesting regex2 was to avoid making this particular change. If we still want to restrict the characters, it makes me think we should just use QuoteMeta and not change anything else.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We shouldn't be using regexp at all; does switching to regexp2 fix this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I don't understand why
getWildcardDirectoriesusesregexpinstead ofregexp2.(I would rather we not use regexp/regexp2 at all in the codebase but that's another problem.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i would guess it's still a problem with regexp2.
I remove this regex in favor of a simple for loop though 6bf5543
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It shouldn't be, given regexp2 has a mode that emulates ECMAScript regexes?
If we are going to use
regexpand notregexp2, then we can probably just useregexp.QuoteMeta.