From b400852f9f5253133943ed8493830f67a628b9e8 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 7 May 2025 13:24:22 +1000 Subject: [PATCH 1/2] Add `ripgrep-14.1.1-tiny` benchmark. --- .../ripgrep-14.1.1-tiny/.cargo/config.toml | 21 + .../ripgrep-14.1.1-tiny/.cargo_vcs_info.json | 6 + .../ripgrep-14.1.1-tiny/.gitignore | 22 + .../ripgrep-14.1.1-tiny/.ignore | 1 + .../ripgrep-14.1.1-tiny/CHANGELOG.md | 1711 ++++ .../ripgrep-14.1.1-tiny/COPYING | 3 + .../ripgrep-14.1.1-tiny/Cargo.lock | 536 ++ .../ripgrep-14.1.1-tiny/Cargo.toml | 188 + .../ripgrep-14.1.1-tiny/Cargo.toml.orig | 120 + .../ripgrep-14.1.1-tiny/FAQ.md | 1046 +++ .../ripgrep-14.1.1-tiny/GUIDE.md | 1022 +++ .../ripgrep-14.1.1-tiny/LICENSE-MIT | 21 + .../ripgrep-14.1.1-tiny/README.md | 524 ++ .../ripgrep-14.1.1-tiny/RELEASE-CHECKLIST.md | 59 + .../ripgrep-14.1.1-tiny/UNLICENSE | 24 + .../ripgrep-14.1.1-tiny/build.rs | 46 + .../ripgrep-14.1.1-tiny/crates/core/README.md | 15 + .../crates/core/flags/complete/bash.rs | 107 + .../crates/core/flags/complete/encodings.sh | 29 + .../crates/core/flags/complete/fish.rs | 68 + .../crates/core/flags/complete/mod.rs | 10 + .../crates/core/flags/complete/powershell.rs | 86 + .../crates/core/flags/complete/rg.zsh | 637 ++ .../crates/core/flags/complete/zsh.rs | 23 + .../crates/core/flags/config.rs | 170 + .../crates/core/flags/defs.rs | 7675 +++++++++++++++++ .../crates/core/flags/doc/help.rs | 259 + .../crates/core/flags/doc/man.rs | 110 + .../crates/core/flags/doc/mod.rs | 38 + .../crates/core/flags/doc/template.long.help | 61 + .../crates/core/flags/doc/template.rg.1 | 424 + .../crates/core/flags/doc/template.short.help | 38 + .../crates/core/flags/doc/version.rs | 177 + .../crates/core/flags/hiargs.rs | 1471 ++++ .../crates/core/flags/lowargs.rs | 758 ++ .../crates/core/flags/mod.rs | 302 + .../crates/core/flags/parse.rs | 476 + .../crates/core/haystack.rs | 160 + .../ripgrep-14.1.1-tiny/crates/core/logger.rs | 72 + .../ripgrep-14.1.1-tiny/crates/core/main.rs | 483 ++ .../crates/core/messages.rs | 139 + .../ripgrep-14.1.1-tiny/crates/core/search.rs | 447 + .../ripgrep-14.1.1-tiny/perf-config.json | 4 + .../pkg/windows/Manifest.xml | 28 + .../ripgrep-14.1.1-tiny/pkg/windows/README.md | 15 + .../ripgrep-14.1.1-tiny/rustfmt.toml | 2 + .../ripgrep-14.1.1-tiny/tests/binary.rs | 306 + .../tests/data/sherlock-nul.txt | 2133 +++++ .../ripgrep-14.1.1-tiny/tests/data/sherlock.Z | Bin 0 -> 286 bytes .../tests/data/sherlock.br | 2 + .../tests/data/sherlock.bz2 | Bin 0 -> 272 bytes .../tests/data/sherlock.gz | Bin 0 -> 263 bytes .../tests/data/sherlock.lz4 | Bin 0 -> 365 bytes .../tests/data/sherlock.lzma | Bin 0 -> 286 bytes .../tests/data/sherlock.xz | Bin 0 -> 332 bytes .../tests/data/sherlock.zst | Bin 0 -> 249 bytes .../ripgrep-14.1.1-tiny/tests/feature.rs | 1174 +++ .../ripgrep-14.1.1-tiny/tests/hay.rs | 17 + .../ripgrep-14.1.1-tiny/tests/json.rs | 373 + .../ripgrep-14.1.1-tiny/tests/macros.rs | 61 + .../ripgrep-14.1.1-tiny/tests/misc.rs | 1130 +++ .../ripgrep-14.1.1-tiny/tests/multiline.rs | 121 + .../ripgrep-14.1.1-tiny/tests/regression.rs | 1219 +++ .../ripgrep-14.1.1-tiny/tests/tests.rs | 22 + .../ripgrep-14.1.1-tiny/tests/util.rs | 508 ++ 65 files changed, 26700 insertions(+) create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/.cargo/config.toml create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/.cargo_vcs_info.json create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/.gitignore create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/.ignore create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/CHANGELOG.md create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/COPYING create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/Cargo.lock create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/Cargo.toml create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/Cargo.toml.orig create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/FAQ.md create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/GUIDE.md create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/LICENSE-MIT create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/README.md create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/RELEASE-CHECKLIST.md create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/UNLICENSE create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/build.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/README.md create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/bash.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/encodings.sh create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/fish.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/mod.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/powershell.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/rg.zsh create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/zsh.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/config.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/defs.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/help.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/man.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/mod.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/template.long.help create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/template.rg.1 create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/template.short.help create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/version.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/hiargs.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/lowargs.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/mod.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/parse.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/haystack.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/logger.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/main.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/messages.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/search.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/perf-config.json create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/pkg/windows/Manifest.xml create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/pkg/windows/README.md create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/rustfmt.toml create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/binary.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/data/sherlock-nul.txt create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/data/sherlock.Z create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/data/sherlock.br create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/data/sherlock.bz2 create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/data/sherlock.gz create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/data/sherlock.lz4 create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/data/sherlock.lzma create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/data/sherlock.xz create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/data/sherlock.zst create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/feature.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/hay.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/json.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/macros.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/misc.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/multiline.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/regression.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/tests.rs create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/util.rs diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/.cargo/config.toml b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/.cargo/config.toml new file mode 100644 index 000000000..9e5430116 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/.cargo/config.toml @@ -0,0 +1,21 @@ +# On Windows MSVC, statically link the C runtime so that the resulting EXE does +# not depend on the vcruntime DLL. +# +# See: https://github.com/BurntSushi/ripgrep/pull/1613 +[target.x86_64-pc-windows-msvc] +rustflags = ["-C", "target-feature=+crt-static"] +[target.i686-pc-windows-msvc] +rustflags = ["-C", "target-feature=+crt-static"] + +# Do the same for MUSL targets. At the time of writing (2023-10-23), this is +# the default. But the plan is for the default to change to dynamic linking. +# The whole point of MUSL with respect to ripgrep is to create a fully +# statically linked executable. +# +# See: https://github.com/rust-lang/compiler-team/issues/422 +# See: https://github.com/rust-lang/compiler-team/issues/422#issuecomment-812135847 +[target.x86_64-unknown-linux-musl] +rustflags = [ + "-C", "target-feature=+crt-static", + "-C", "link-self-contained=yes", +] diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/.cargo_vcs_info.json b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/.cargo_vcs_info.json new file mode 100644 index 000000000..8f17caa64 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/.cargo_vcs_info.json @@ -0,0 +1,6 @@ +{ + "git": { + "sha1": "4649aa9700619f94cf9c66876e9549d83420e16c" + }, + "path_in_vcs": "" +} \ No newline at end of file diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/.gitignore b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/.gitignore new file mode 100644 index 000000000..881633e23 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/.gitignore @@ -0,0 +1,22 @@ +.*.swp +tags +target +/grep/Cargo.lock +/globset/Cargo.lock +/ignore/Cargo.lock +/termcolor/Cargo.lock +/wincolor/Cargo.lock +/deployment +/.idea + +# Snapcraft files +stage +prime +parts +*.snap +*.pyc +ripgrep*_source.tar.bz2 + +# Cargo timings +cargo-timing-*.html +cargo-timing.html diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/.ignore b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/.ignore new file mode 100644 index 000000000..19022145b --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/.ignore @@ -0,0 +1 @@ +!/.github/ diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/CHANGELOG.md b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/CHANGELOG.md new file mode 100644 index 000000000..60438379a --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/CHANGELOG.md @@ -0,0 +1,1711 @@ +14.1.1 (2024-09-08) +=================== +This is a minor release with a bug fix for a matching bug. In particular, a bug +was found that could cause ripgrep to ignore lines that should match. That is, +false negatives. It is difficult to characterize the specific set of regexes +in which this occurs as it requires multiple different optimization strategies +to collide and produce an incorrect result. But as one reported example, in +ripgrep, the regex `(?i:e.x|ex)` does not match `e-x` when it should. (This +bug is a result of an inner literal optimization performed in the `grep-regex` +crate and not in the `regex` crate.) + +Bug fixes: + +* [BUG #2884](https://github.com/BurntSushi/ripgrep/issues/2884): + Fix bug where ripgrep could miss some matches that it should report. + +Miscellaneous: + +* [MISC #2748](https://github.com/BurntSushi/ripgrep/issues/2748): + Remove ripgrep's `simd-accel` feature because it was frequently broken. + + +14.1.0 (2024-01-06) +=================== +This is a minor release with a few small new features and bug fixes. This +release contains a bug fix for unbounded memory growth while walking a +directory tree. This release also includes improvements to the completions for +the `fish` shell, and release binaries for several additional ARM targets. + +Bug fixes: + +* [BUG #2664](https://github.com/BurntSushi/ripgrep/issues/2690): + Fix unbounded memory growth in the `ignore` crate. + +Feature enhancements: + +* Added or improved file type filtering for Lean and Meson. +* [FEATURE #2684](https://github.com/BurntSushi/ripgrep/issues/2684): + Improve completions for the `fish` shell. +* [FEATURE #2702](https://github.com/BurntSushi/ripgrep/pull/2702): + Add release binaries for `armv7-unknown-linux-gnueabihf`, + `armv7-unknown-linux-musleabihf` and `armv7-unknown-linux-musleabi`. + + +14.0.3 (2023-11-28) +=================== +This is a patch release with a bug fix for the `--sortr` flag. + +Bug fixes: + +* [BUG #2664](https://github.com/BurntSushi/ripgrep/issues/2664): + Fix `--sortr=path`. I left a `todo!()` in the source. Oof. + + +14.0.2 (2023-11-27) +=================== +This is a patch release with a few small bug fixes. + +Bug fixes: + +* [BUG #2654](https://github.com/BurntSushi/ripgrep/issues/2654): + Fix `deb` release sha256 sum file. +* [BUG #2658](https://github.com/BurntSushi/ripgrep/issues/2658): + Fix partial regression in the behavior of `--null-data --line-regexp`. +* [BUG #2659](https://github.com/BurntSushi/ripgrep/issues/2659): + Fix Fish shell completions. +* [BUG #2662](https://github.com/BurntSushi/ripgrep/issues/2662): + Fix typo in documentation for `-i/--ignore-case`. + + +14.0.1 (2023-11-26) +=================== +This a patch release meant to fix `cargo install ripgrep` on Windows. + +Bug fixes: + +* [BUG #2653](https://github.com/BurntSushi/ripgrep/issues/2653): + Include `pkg/windows/Manifest.xml` in crate package. + + +14.0.0 (2023-11-26) +=================== +ripgrep 14 is a new major version release of ripgrep that has some new +features, performance improvements and a lot of bug fixes. + +The headlining feature in this release is hyperlink support. In this release, +they are an opt-in feature but may change to an opt-out feature in the future. +To enable them, try passing `--hyperlink-format default`. If you use [VS Code], +then try passing `--hyperlink-format vscode`. Please [report your experience +with hyperlinks][report-hyperlinks], positive or negative. + +[VS Code]: https://code.visualstudio.com/ +[report-hyperlinks]: https://github.com/BurntSushi/ripgrep/discussions/2611 + +Another headlining development in this release is that it contains a rewrite +of its regex engine. You generally shouldn't notice any changes, except for +some searches may get faster. You can read more about the [regex engine rewrite +on my blog][regex-internals]. Please [report your performance improvements or +regressions that you notice][report-perf]. + +[report-perf]: https://github.com/BurntSushi/ripgrep/discussions/2652 + +Finally, ripgrep switched the library it uses for argument parsing. Users +should not notice a difference in most cases (error messages have changed +somewhat), but flag overrides should generally be more consistent. For example, +things like `--no-ignore --ignore-vcs` work as one would expect (disables all +filtering related to ignore rules except for rules found in version control +systems such as `git`). + +[regex-internals]: https://blog.burntsushi.net/regex-internals/ + +**BREAKING CHANGES**: + +* `rg -C1 -A2` used to be equivalent to `rg -A2`, but now it is equivalent to + `rg -B1 -A2`. That is, `-A` and `-B` no longer completely override `-C`. + Instead, they only partially override `-C`. + +Build process changes: + +* ripgrep's shell completions and man page are now created by running ripgrep +with a new `--generate` flag. For example, `rg --generate man` will write a +man page in `roff` format on stdout. The release archives have not changed. +* The optional build dependency on `asciidoc` or `asciidoctor` has been +dropped. Previously, it was used to produce ripgrep's man page. ripgrep now +owns this process itself by writing `roff` directly. + +Performance improvements: + +* [PERF #1746](https://github.com/BurntSushi/ripgrep/issues/1746): + Make some cases with inner literals faster. +* [PERF #1760](https://github.com/BurntSushi/ripgrep/issues/1760): + Make most searches with `\b` look-arounds (among others) much faster. +* [PERF #2591](https://github.com/BurntSushi/ripgrep/pull/2591): + Parallel directory traversal now uses work stealing for faster searches. +* [PERF #2642](https://github.com/BurntSushi/ripgrep/pull/2642): + Parallel directory traversal has some contention reduced. + +Feature enhancements: + +* Added or improved file type filtering for Ada, DITA, Elixir, Fuchsia, Gentoo, + Gradle, GraphQL, Markdown, Prolog, Raku, TypeScript, USD, V +* [FEATURE #665](https://github.com/BurntSushi/ripgrep/issues/665): + Add a new `--hyperlink-format` flag that turns file paths into hyperlinks. +* [FEATURE #1709](https://github.com/BurntSushi/ripgrep/issues/1709): + Improve documentation of ripgrep's behavior when stdout is a tty. +* [FEATURE #1737](https://github.com/BurntSushi/ripgrep/issues/1737): + Provide binaries for Apple silicon. +* [FEATURE #1790](https://github.com/BurntSushi/ripgrep/issues/1790): + Add new `--stop-on-nonmatch` flag. +* [FEATURE #1814](https://github.com/BurntSushi/ripgrep/issues/1814): + Flags are now categorized in `-h/--help` output and ripgrep's man page. +* [FEATURE #1838](https://github.com/BurntSushi/ripgrep/issues/1838): + An error is shown when searching for NUL bytes with binary detection enabled. +* [FEATURE #2195](https://github.com/BurntSushi/ripgrep/issues/2195): + When `extra-verbose` mode is enabled in zsh, show extra file type info. +* [FEATURE #2298](https://github.com/BurntSushi/ripgrep/issues/2298): + Add instructions for installing ripgrep using `cargo binstall`. +* [FEATURE #2409](https://github.com/BurntSushi/ripgrep/pull/2409): + Added installation instructions for `winget`. +* [FEATURE #2425](https://github.com/BurntSushi/ripgrep/pull/2425): + Shell completions (and man page) can be created via `rg --generate`. +* [FEATURE #2524](https://github.com/BurntSushi/ripgrep/issues/2524): + The `--debug` flag now indicates whether stdin or `./` is being searched. +* [FEATURE #2643](https://github.com/BurntSushi/ripgrep/issues/2643): + Make `-d` a short flag for `--max-depth`. +* [FEATURE #2645](https://github.com/BurntSushi/ripgrep/issues/2645): + The `--version` output will now also contain PCRE2 availability information. + +Bug fixes: + +* [BUG #884](https://github.com/BurntSushi/ripgrep/issues/884): + Don't error when `-v/--invert-match` is used multiple times. +* [BUG #1275](https://github.com/BurntSushi/ripgrep/issues/1275): + Fix bug with `\b` assertion in the regex engine. +* [BUG #1376](https://github.com/BurntSushi/ripgrep/issues/1376): + Using `--no-ignore --ignore-vcs` now works as one would expect. +* [BUG #1622](https://github.com/BurntSushi/ripgrep/issues/1622): + Add note about error messages to `-z/--search-zip` documentation. +* [BUG #1648](https://github.com/BurntSushi/ripgrep/issues/1648): + Fix bug where sometimes short flags with values, e.g., `-M 900`, would fail. +* [BUG #1701](https://github.com/BurntSushi/ripgrep/issues/1701): + Fix bug where some flags could not be repeated. +* [BUG #1757](https://github.com/BurntSushi/ripgrep/issues/1757): + Fix bug when searching a sub-directory didn't have ignores applied correctly. +* [BUG #1891](https://github.com/BurntSushi/ripgrep/issues/1891): + Fix bug when using `-w` with a regex that can match the empty string. +* [BUG #1911](https://github.com/BurntSushi/ripgrep/issues/1911): + Disable mmap searching in all non-64-bit environments. +* [BUG #1966](https://github.com/BurntSushi/ripgrep/issues/1966): + Fix bug where ripgrep can panic when printing to stderr. +* [BUG #2046](https://github.com/BurntSushi/ripgrep/issues/2046): + Clarify that `--pre` can accept any kind of path in the documentation. +* [BUG #2108](https://github.com/BurntSushi/ripgrep/issues/2108): + Improve docs for `-r/--replace` syntax. +* [BUG #2198](https://github.com/BurntSushi/ripgrep/issues/2198): + Fix bug where `--no-ignore-dot` would not ignore `.rgignore`. +* [BUG #2201](https://github.com/BurntSushi/ripgrep/issues/2201): + Improve docs for `-r/--replace` flag. +* [BUG #2288](https://github.com/BurntSushi/ripgrep/issues/2288): + `-A` and `-B` now only each partially override `-C`. +* [BUG #2236](https://github.com/BurntSushi/ripgrep/issues/2236): + Fix gitignore parsing bug where a trailing `\/` resulted in an error. +* [BUG #2243](https://github.com/BurntSushi/ripgrep/issues/2243): + Fix `--sort` flag for values other than `path`. +* [BUG #2246](https://github.com/BurntSushi/ripgrep/issues/2246): + Add note in `--debug` logs when binary files are ignored. +* [BUG #2337](https://github.com/BurntSushi/ripgrep/issues/2337): + Improve docs to mention that `--stats` is always implied by `--json`. +* [BUG #2381](https://github.com/BurntSushi/ripgrep/issues/2381): + Make `-p/--pretty` override flags like `--no-line-number`. +* [BUG #2392](https://github.com/BurntSushi/ripgrep/issues/2392): + Improve global git config parsing of the `excludesFile` field. +* [BUG #2418](https://github.com/BurntSushi/ripgrep/pull/2418): + Clarify sorting semantics of `--sort=path`. +* [BUG #2458](https://github.com/BurntSushi/ripgrep/pull/2458): + Make `--trim` run before `-M/--max-columns` takes effect. +* [BUG #2479](https://github.com/BurntSushi/ripgrep/issues/2479): + Add documentation about `.ignore`/`.rgignore` files in parent directories. +* [BUG #2480](https://github.com/BurntSushi/ripgrep/issues/2480): + Fix bug when using inline regex flags with `-e/--regexp`. +* [BUG #2505](https://github.com/BurntSushi/ripgrep/issues/2505): + Improve docs for `--vimgrep` by mentioning footguns and some work-arounds. +* [BUG #2519](https://github.com/BurntSushi/ripgrep/issues/2519): + Fix incorrect default value in documentation for `--field-match-separator`. +* [BUG #2523](https://github.com/BurntSushi/ripgrep/issues/2523): + Make executable searching take `.com` into account on Windows. +* [BUG #2574](https://github.com/BurntSushi/ripgrep/issues/2574): + Fix bug in `-w/--word-regexp` that would result in incorrect match offsets. +* [BUG #2623](https://github.com/BurntSushi/ripgrep/issues/2623): + Fix a number of bugs with the `-w/--word-regexp` flag. +* [BUG #2636](https://github.com/BurntSushi/ripgrep/pull/2636): + Strip release binaries for macOS. + + +13.0.0 (2021-06-12) +=================== +ripgrep 13 is a new major version release of ripgrep that primarily contains +bug fixes, some performance improvements and a few minor breaking changes. +There is also a fix for a security vulnerability on Windows +([CVE-2021-3013](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-3013)). + +Some highlights: + +A new short flag, `-.`, has been added. It is an alias for the `--hidden` flag, +which instructs ripgrep to search hidden files and directories. + +ripgrep is now using a new +[vectorized implementation of `memmem`](https://github.com/BurntSushi/memchr/pull/82), +which accelerates many common searches. If you notice any performance +regressions (or major improvements), I'd love to hear about them through an +issue report! + +Also, for Windows users targeting MSVC, Cargo will now build fully static +executables of ripgrep. The release binaries for ripgrep 13 have been compiled +using this configuration. + +**BREAKING CHANGES**: + +**Binary detection output has changed slightly.** + +In this release, a small tweak has been made to the output format when a binary +file is detected. Previously, it looked like this: + +``` +Binary file FOO matches (found "\0" byte around offset XXX) +``` + +Now it looks like this: + +``` +FOO: binary file matches (found "\0" byte around offset XXX) +``` + +**vimgrep output in multi-line now only prints the first line for each match.** + +See [issue 1866](https://github.com/BurntSushi/ripgrep/issues/1866) for more +discussion on this. Previously, every line in a match was duplicated, even +when it spanned multiple lines. There are no changes to vimgrep output when +multi-line mode is disabled. + +**In multi-line mode, --count is now equivalent to --count-matches.** + +This appears to match how `pcre2grep` implements `--count`. Previously, ripgrep +would produce outright incorrect counts. Another alternative would be to simply +count the number of lines---even if it's more than the number of matches---but +that seems highly unintuitive. + +**FULL LIST OF FIXES AND IMPROVEMENTS:** + +Security fixes: + +* [CVE-2021-3013](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-3013): + Fixes a security hole on Windows where running ripgrep with either the + `-z/--search-zip` or `--pre` flags can result in running arbitrary + executables from the current directory. +* [VULN #1773](https://github.com/BurntSushi/ripgrep/issues/1773): + This is the public facing issue tracking CVE-2021-3013. ripgrep's README + now contains a section describing how to report a vulnerability. + +Performance improvements: + +* [PERF #1657](https://github.com/BurntSushi/ripgrep/discussions/1657): + Check if a file should be ignored first before issuing stat calls. +* [PERF memchr#82](https://github.com/BurntSushi/memchr/pull/82): + ripgrep now uses a new vectorized implementation of `memmem`. + +Feature enhancements: + +* Added or improved file type filtering for ASP, Bazel, dvc, FlatBuffers, + Futhark, minified files, Mint, pofiles (from GNU gettext) Racket, Red, Ruby, + VCL, Yang. +* [FEATURE #1404](https://github.com/BurntSushi/ripgrep/pull/1404): + ripgrep now prints a warning if nothing is searched. +* [FEATURE #1613](https://github.com/BurntSushi/ripgrep/pull/1613): + Cargo will now produce static executables on Windows when using MSVC. +* [FEATURE #1680](https://github.com/BurntSushi/ripgrep/pull/1680): + Add `-.` as a short flag alias for `--hidden`. +* [FEATURE #1842](https://github.com/BurntSushi/ripgrep/issues/1842): + Add `--field-{context,match}-separator` for customizing field delimiters. +* [FEATURE #1856](https://github.com/BurntSushi/ripgrep/pull/1856): + The README now links to a + [Spanish translation](https://github.com/UltiRequiem/traducciones/tree/master/ripgrep). + +Bug fixes: + +* [BUG #1277](https://github.com/BurntSushi/ripgrep/issues/1277): + Document cygwin path translation behavior in the FAQ. +* [BUG #1739](https://github.com/BurntSushi/ripgrep/issues/1739): + Fix bug where replacements were buggy if the regex matched a line terminator. +* [BUG #1311](https://github.com/BurntSushi/ripgrep/issues/1311): + Fix multi-line bug where a search & replace for `\n` didn't work as expected. +* [BUG #1401](https://github.com/BurntSushi/ripgrep/issues/1401): + Fix buggy interaction between PCRE2 look-around and `-o/--only-matching`. +* [BUG #1412](https://github.com/BurntSushi/ripgrep/issues/1412): + Fix multi-line bug with searches using look-around past matching lines. +* [BUG #1577](https://github.com/BurntSushi/ripgrep/issues/1577): + Fish shell completions will continue to be auto-generated. +* [BUG #1642](https://github.com/BurntSushi/ripgrep/issues/1642): + Fixes a bug where using `-m` and `-A` printed more matches than the limit. +* [BUG #1703](https://github.com/BurntSushi/ripgrep/issues/1703): + Clarify the function of `-u/--unrestricted`. +* [BUG #1708](https://github.com/BurntSushi/ripgrep/issues/1708): + Clarify how `-S/--smart-case` works. +* [BUG #1730](https://github.com/BurntSushi/ripgrep/issues/1730): + Clarify that CLI invocation must always be valid, regardless of config file. +* [BUG #1741](https://github.com/BurntSushi/ripgrep/issues/1741): + Fix stdin detection when using PowerShell in UNIX environments. +* [BUG #1756](https://github.com/BurntSushi/ripgrep/pull/1756): + Fix bug where `foo/**` would match `foo`, but it shouldn't. +* [BUG #1765](https://github.com/BurntSushi/ripgrep/issues/1765): + Fix panic when `--crlf` is used in some cases. +* [BUG #1638](https://github.com/BurntSushi/ripgrep/issues/1638): + Correctly sniff UTF-8 and do transcoding, like we do for UTF-16. +* [BUG #1816](https://github.com/BurntSushi/ripgrep/issues/1816): + Add documentation for glob alternate syntax, e.g., `{a,b,..}`. +* [BUG #1847](https://github.com/BurntSushi/ripgrep/issues/1847): + Clarify how the `--hidden` flag works. +* [BUG #1866](https://github.com/BurntSushi/ripgrep/issues/1866#issuecomment-841635553): + Fix bug when computing column numbers in `--vimgrep` mode. +* [BUG #1868](https://github.com/BurntSushi/ripgrep/issues/1868): + Fix bug where `--passthru` and `-A/-B/-C` did not override each other. +* [BUG #1869](https://github.com/BurntSushi/ripgrep/pull/1869): + Clarify docs for `--files-with-matches` and `--files-without-match`. +* [BUG #1878](https://github.com/BurntSushi/ripgrep/issues/1878): + Fix bug where `\A` could produce unanchored matches in multiline search. +* [BUG 94e4b8e3](https://github.com/BurntSushi/ripgrep/commit/94e4b8e3): + Fix column numbers with `--vimgrep` is used with `-U/--multiline`. + + +12.1.1 (2020-05-29) +=================== +ripgrep 12.1.1 is a patch release that fixes a couple small bugs. In +particular, the ripgrep 12.1.0 release did not tag new releases for all of its +in-tree dependencies. As a result, ripgrep built dependencies from crates.io +would produce a different build than compiling ripgrep from source on the +`12.1.0` tag. Namely, some crates like `grep-cli` had unreleased changes. + +Bug fixes: + +* [BUG #1581](https://github.com/BurntSushi/ripgrep/issues/1581): + Corrects some egregious markup output in `--help`. +* [BUG #1591](https://github.com/BurntSushi/ripgrep/issues/1591): + Mention the special `$0` capture group in docs for the `-r/--replace` flag. +* [BUG #1602](https://github.com/BurntSushi/ripgrep/issues/1602): + Fix failing test resulting from out-of-sync dependencies. + + +12.1.0 (2020-05-09) +=================== +ripgrep 12.1.0 is a small minor version release that mostly includes bug fixes +and documentation improvements. This release also contains some important +notices for downstream packagers. + +**Notices for downstream ripgrep package maintainers:** + +* Fish shell completions will be removed in the ripgrep 13 release. + See [#1577](https://github.com/BurntSushi/ripgrep/issues/1577) + for more details. +* ripgrep has switched from `a2x` to `asciidoctor` to generate the man page. + If `asciidoctor` is not present, then ripgrep will currently fall back to + `a2x`. Support for `a2x` will be dropped in the ripgrep 13 release. + See [#1544](https://github.com/BurntSushi/ripgrep/issues/1544) + for more details. + +Feature enhancements: + +* [FEATURE #1547](https://github.com/BurntSushi/ripgrep/pull/1547): + Support decompressing `.Z` files via `uncompress`. + +Bug fixes: + +* [BUG #1252](https://github.com/BurntSushi/ripgrep/issues/1252): + Add a section on the `--pre` flag to the GUIDE. +* [BUG #1339](https://github.com/BurntSushi/ripgrep/issues/1339): + Improve error message when a pattern with invalid UTF-8 is provided. +* [BUG #1524](https://github.com/BurntSushi/ripgrep/issues/1524): + Note how to escape a `$` when using `--replace`. +* [BUG #1537](https://github.com/BurntSushi/ripgrep/issues/1537): + Fix match bug caused by inner literal optimization. +* [BUG #1544](https://github.com/BurntSushi/ripgrep/issues/1544): + ripgrep now uses `asciidoctor` instead of `a2x` to generate its man page. +* [BUG #1550](https://github.com/BurntSushi/ripgrep/issues/1550): + Substantially reduce peak memory usage when searching wide directories. +* [BUG #1571](https://github.com/BurntSushi/ripgrep/issues/1571): + Add note about configuration files in `--type-{add,clear}` docs. +* [BUG #1573](https://github.com/BurntSushi/ripgrep/issues/1573): + Fix incorrect `--count-matches` output when using look-around. + + +12.0.1 (2020-03-29) +=================== +ripgrep 12.0.1 is a small patch release that includes a minor bug fix relating +to superfluous error messages when searching git repositories with sub-modules. +This was a regression introduced in the 12.0.0 release. + +Bug fixes: + +* [BUG #1520](https://github.com/BurntSushi/ripgrep/issues/1520): + Don't emit spurious error messages in git repositories with submodules. + + +12.0.0 (2020-03-15) +=================== +ripgrep 12 is a new major version release of ripgrep that contains many bug +fixes, several important performance improvements and a few minor new features. + +In a near future release, I am hoping to add an +[indexing feature](https://github.com/BurntSushi/ripgrep/issues/1497) +to ripgrep, which will dramatically speed up searching by building an index. +Feedback would very much be appreciated, especially on the user experience +which will be difficult to get right. + +This release has no known breaking changes. + +Deprecations: + +* The `--no-pcre2-unicode` flag is deprecated. Instead, use the `--no-unicode` + flag, which applies to both the default regex engine and PCRE2. For now, + `--no-pcre2-unicode` and `--pcre2-unicode` are aliases to `--no-unicode` + and `--unicode`, respectively. The `--[no-]pcre2-unicode` flags may be + removed in a future release. +* The `--auto-hybrid-regex` flag is deprecated. Instead, use the new `--engine` + flag with the `auto` value. + +Performance improvements: + +* [PERF #1087](https://github.com/BurntSushi/ripgrep/pull/1087): + ripgrep is smarter when detected literals are whitespace. +* [PERF #1381](https://github.com/BurntSushi/ripgrep/pull/1381): + Directory traversal is sped up with speculative ignore-file existence checks. +* [PERF cd8ec38a](https://github.com/BurntSushi/ripgrep/commit/cd8ec38a): + Improve inner literal detection to cover more cases more effectively. + e.g., ` +Sherlock Holmes +` now has ` Sherlock Holmes ` extracted instead + of ` `. +* [PERF 6a0e0147](https://github.com/BurntSushi/ripgrep/commit/6a0e0147): + Improve literal detection when the `-w/--word-regexp` flag is used. +* [PERF ad97e9c9](https://github.com/BurntSushi/ripgrep/commit/ad97e9c9): + Improve overall performance of the `-w/--word-regexp` flag. + +Feature enhancements: + +* Added or improved file type filtering for erb, diff, Gradle, HAML, Org, + Postscript, Skim, Slim, Slime, RPM Spec files, Typoscript, xml. +* [FEATURE #1370](https://github.com/BurntSushi/ripgrep/pull/1370): + Add `--include-zero` flag that shows files searched without matches. +* [FEATURE #1390](https://github.com/BurntSushi/ripgrep/pull/1390): + Add `--no-context-separator` flag that always hides context separators. +* [FEATURE #1414](https://github.com/BurntSushi/ripgrep/pull/1414): + Add `--no-require-git` flag to allow ripgrep to respect gitignores anywhere. +* [FEATURE #1420](https://github.com/BurntSushi/ripgrep/pull/1420): + Add `--no-ignore-exclude` to disregard rules in `.git/info/exclude` files. +* [FEATURE #1466](https://github.com/BurntSushi/ripgrep/pull/1466): + Add `--no-ignore-files` flag to disable all `--ignore-file` flags. +* [FEATURE #1488](https://github.com/BurntSushi/ripgrep/pull/1488): + Add '--engine' flag for easier switching between regex engines. +* [FEATURE 75cbe88f](https://github.com/BurntSushi/ripgrep/commit/75cbe88f): + Add `--no-unicode` flag. This works on all supported regex engines. + +Bug fixes: + +* [BUG #1291](https://github.com/BurntSushi/ripgrep/issues/1291): + ripgrep now works in non-existent directories. +* [BUG #1319](https://github.com/BurntSushi/ripgrep/issues/1319): + Fix match bug due to errant literal detection. +* [**BUG #1335**](https://github.com/BurntSushi/ripgrep/issues/1335): + Fixes a performance bug when searching plain text files with very long lines. + This was a serious performance regression in some cases. +* [BUG #1344](https://github.com/BurntSushi/ripgrep/issues/1344): + Document usage of `--type all`. +* [BUG #1389](https://github.com/BurntSushi/ripgrep/issues/1389): + Fixes a bug where ripgrep would panic when searching a symlinked directory. +* [BUG #1439](https://github.com/BurntSushi/ripgrep/issues/1439): + Improve documentation for ripgrep's automatic stdin detection. +* [BUG #1441](https://github.com/BurntSushi/ripgrep/issues/1441): + Remove CPU features from man page. +* [BUG #1442](https://github.com/BurntSushi/ripgrep/issues/1442), + [BUG #1478](https://github.com/BurntSushi/ripgrep/issues/1478): + Improve documentation of the `-g/--glob` flag. +* [BUG #1445](https://github.com/BurntSushi/ripgrep/issues/1445): + ripgrep now respects ignore rules from .git/info/exclude in worktrees. +* [BUG #1485](https://github.com/BurntSushi/ripgrep/issues/1485): + Fish shell completions from the release Debian package are now installed to + `/usr/share/fish/vendor_completions.d/rg.fish`. + + +11.0.2 (2019-08-01) +=================== +ripgrep 11.0.2 is a new patch release that fixes a few bugs, including a +performance regression and a matching bug when using the `-F/--fixed-strings` +flag. + +Feature enhancements: + +* [FEATURE #1293](https://github.com/BurntSushi/ripgrep/issues/1293): + Added `--glob-case-insensitive` flag that makes `--glob` behave as `--iglob`. + +Bug fixes: + +* [BUG #1246](https://github.com/BurntSushi/ripgrep/issues/1246): + Add translations to README, starting with an unofficial Chinese translation. +* [BUG #1259](https://github.com/BurntSushi/ripgrep/issues/1259): + Fix bug where the last byte of a `-f file` was stripped if it wasn't a `\n`. +* [BUG #1261](https://github.com/BurntSushi/ripgrep/issues/1261): + Document that no error is reported when searching for `\n` with `-P/--pcre2`. +* [BUG #1284](https://github.com/BurntSushi/ripgrep/issues/1284): + Mention `.ignore` and `.rgignore` more prominently in the README. +* [BUG #1292](https://github.com/BurntSushi/ripgrep/issues/1292): + Fix bug where `--with-filename` was sometimes enabled incorrectly. +* [BUG #1268](https://github.com/BurntSushi/ripgrep/issues/1268): + Fix major performance regression in GitHub `x86_64-linux` binary release. +* [BUG #1302](https://github.com/BurntSushi/ripgrep/issues/1302): + Show better error messages when a non-existent preprocessor command is given. +* [BUG #1334](https://github.com/BurntSushi/ripgrep/issues/1334): + Fix match regression with `-F` flag when patterns contain meta characters. + + +11.0.1 (2019-04-16) +=================== +ripgrep 11.0.1 is a new patch release that fixes a search regression introduced +in the previous 11.0.0 release. In particular, ripgrep can enter an infinite +loop for some search patterns when searching invalid UTF-8. + +Bug fixes: + +* [BUG #1247](https://github.com/BurntSushi/ripgrep/issues/1247): + Fix search bug that can cause ripgrep to enter an infinite loop. + + +11.0.0 (2019-04-15) +=================== +ripgrep 11 is a new major version release of ripgrep that contains many bug +fixes, some performance improvements and a few feature enhancements. Notably, +ripgrep's user experience for binary file filtering has been improved. See the +[guide's new section on binary data](GUIDE.md#binary-data) for more details. + +This release also marks a change in ripgrep's versioning. Where as the previous +version was `0.10.0`, this version is `11.0.0`. Moving forward, ripgrep's +major version will be increased a few times per year. ripgrep will continue to +be conservative with respect to backwards compatibility, but may occasionally +introduce breaking changes, which will always be documented in this CHANGELOG. +See [issue 1172](https://github.com/BurntSushi/ripgrep/issues/1172) for a bit +more detail on why this versioning change was made. + +This release increases the **minimum supported Rust version** from 1.28.0 to +1.34.0. + +**BREAKING CHANGES**: + +* ripgrep has tweaked its exit status codes to be more like GNU grep's. Namely, + if a non-fatal error occurs during a search, then ripgrep will now always + emit a `2` exit status code, regardless of whether a match is found or not. + Previously, ripgrep would only emit a `2` exit status code for a catastrophic + error (e.g., regex syntax error). One exception to this is if ripgrep is run + with `-q/--quiet`. In that case, if an error occurs and a match is found, + then ripgrep will exit with a `0` exit status code. +* Supplying the `-u/--unrestricted` flag three times is now equivalent to + supplying `--no-ignore --hidden --binary`. Previously, `-uuu` was equivalent + to `--no-ignore --hidden --text`. The difference is that `--binary` disables + binary file filtering without potentially dumping binary data into your + terminal. That is, `rg -uuu foo` should now be equivalent to `grep -r foo`. +* The `avx-accel` feature of ripgrep has been removed since it is no longer + necessary. All uses of AVX in ripgrep are now enabled automatically via + runtime CPU feature detection. The `simd-accel` feature does remain available + (only for enabling SIMD for transcoding), however, it does increase + compilation times substantially at the moment. + +Performance improvements: + +* [PERF #497](https://github.com/BurntSushi/ripgrep/issues/497), + [PERF #838](https://github.com/BurntSushi/ripgrep/issues/838): + Make `rg -F -f dictionary-of-literals` much faster. + +Feature enhancements: + +* Added or improved file type filtering for Apache Thrift, ASP, Bazel, Brotli, + BuildStream, bzip2, C, C++, Cython, gzip, Java, Make, Postscript, QML, Tex, + XML, xz, zig and zstd. +* [FEATURE #855](https://github.com/BurntSushi/ripgrep/issues/855): + Add `--binary` flag for disabling binary file filtering. +* [FEATURE #1078](https://github.com/BurntSushi/ripgrep/pull/1078): + Add `--max-columns-preview` flag for showing a preview of long lines. +* [FEATURE #1099](https://github.com/BurntSushi/ripgrep/pull/1099): + Add support for Brotli and Zstd to the `-z/--search-zip` flag. +* [FEATURE #1138](https://github.com/BurntSushi/ripgrep/pull/1138): + Add `--no-ignore-dot` flag for ignoring `.ignore` files. +* [FEATURE #1155](https://github.com/BurntSushi/ripgrep/pull/1155): + Add `--auto-hybrid-regex` flag for automatically falling back to PCRE2. +* [FEATURE #1159](https://github.com/BurntSushi/ripgrep/pull/1159): + ripgrep's exit status logic should now match GNU grep. See updated man page. +* [FEATURE #1164](https://github.com/BurntSushi/ripgrep/pull/1164): + Add `--ignore-file-case-insensitive` for case insensitive ignore globs. +* [FEATURE #1185](https://github.com/BurntSushi/ripgrep/pull/1185): + Add `-I` flag as a short option for the `--no-filename` flag. +* [FEATURE #1207](https://github.com/BurntSushi/ripgrep/pull/1207): + Add `none` value to `-E/--encoding` to forcefully disable all transcoding. +* [FEATURE da9d7204](https://github.com/BurntSushi/ripgrep/commit/da9d7204): + Add `--pcre2-version` for querying showing PCRE2 version information. + +Bug fixes: + +* [BUG #306](https://github.com/BurntSushi/ripgrep/issues/306), + [BUG #855](https://github.com/BurntSushi/ripgrep/issues/855): + Improve the user experience for ripgrep's binary file filtering. +* [BUG #373](https://github.com/BurntSushi/ripgrep/issues/373), + [BUG #1098](https://github.com/BurntSushi/ripgrep/issues/1098): + `**` is now accepted as valid syntax anywhere in a glob. +* [BUG #916](https://github.com/BurntSushi/ripgrep/issues/916): + ripgrep no longer hangs when searching `/proc` with a zombie process present. +* [BUG #1052](https://github.com/BurntSushi/ripgrep/issues/1052): + Fix bug where ripgrep could panic when transcoding UTF-16 files. +* [BUG #1055](https://github.com/BurntSushi/ripgrep/issues/1055): + Suggest `-U/--multiline` when a pattern contains a `\n`. +* [BUG #1063](https://github.com/BurntSushi/ripgrep/issues/1063): + Always strip a BOM if it's present, even for UTF-8. +* [BUG #1064](https://github.com/BurntSushi/ripgrep/issues/1064): + Fix inner literal detection that could lead to incorrect matches. +* [BUG #1079](https://github.com/BurntSushi/ripgrep/issues/1079): + Fixes a bug where the order of globs could result in missing a match. +* [BUG #1089](https://github.com/BurntSushi/ripgrep/issues/1089): + Fix another bug where ripgrep could panic when transcoding UTF-16 files. +* [BUG #1091](https://github.com/BurntSushi/ripgrep/issues/1091): + Add note about inverted flags to the man page. +* [BUG #1093](https://github.com/BurntSushi/ripgrep/pull/1093): + Fix handling of literal slashes in gitignore patterns. +* [BUG #1095](https://github.com/BurntSushi/ripgrep/issues/1095): + Fix corner cases involving the `--crlf` flag. +* [BUG #1101](https://github.com/BurntSushi/ripgrep/issues/1101): + Fix AsciiDoc escaping for man page output. +* [BUG #1103](https://github.com/BurntSushi/ripgrep/issues/1103): + Clarify what `--encoding auto` does. +* [BUG #1106](https://github.com/BurntSushi/ripgrep/issues/1106): + `--files-with-matches` and `--files-without-match` work with one file. +* [BUG #1121](https://github.com/BurntSushi/ripgrep/issues/1121): + Fix bug that was triggering Windows antimalware when using the `--files` + flag. +* [BUG #1125](https://github.com/BurntSushi/ripgrep/issues/1125), + [BUG #1159](https://github.com/BurntSushi/ripgrep/issues/1159): + ripgrep shouldn't panic for `rg -h | rg` and should emit correct exit status. +* [BUG #1144](https://github.com/BurntSushi/ripgrep/issues/1144): + Fixes a bug where line numbers could be wrong on big-endian machines. +* [BUG #1154](https://github.com/BurntSushi/ripgrep/issues/1154): + Windows files with "hidden" attribute are now treated as hidden. +* [BUG #1173](https://github.com/BurntSushi/ripgrep/issues/1173): + Fix handling of `**` patterns in gitignore files. +* [BUG #1174](https://github.com/BurntSushi/ripgrep/issues/1174): + Fix handling of repeated `**` patterns in gitignore files. +* [BUG #1176](https://github.com/BurntSushi/ripgrep/issues/1176): + Fix bug where `-F`/`-x` weren't applied to patterns given via `-f`. +* [BUG #1189](https://github.com/BurntSushi/ripgrep/issues/1189): + Document cases where ripgrep may use a lot of memory. +* [BUG #1203](https://github.com/BurntSushi/ripgrep/issues/1203): + Fix a matching bug related to the suffix literal optimization. +* [BUG 8f14cb18](https://github.com/BurntSushi/ripgrep/commit/8f14cb18): + Increase the default stack size for PCRE2's JIT. + + +0.10.0 (2018-09-07) +=================== +This is a new minor version release of ripgrep that contains some major new +features, a huge number of bug fixes, and is the first release based on +libripgrep. The entirety of ripgrep's core search and printing code has been +rewritten and generalized so that anyone can make use of it. + +Major new features include PCRE2 support, multi-line search and a JSON output +format. + +**BREAKING CHANGES**: + +* The minimum version required to compile Rust has now changed to track the + latest stable version of Rust. Patch releases will continue to compile with + the same version of Rust as the previous patch release, but new minor + versions will use the current stable version of the Rust compile as its + minimum supported version. +* The match semantics of `-w/--word-regexp` have changed slightly. They used + to be `\b(?:)\b`, but now it's + `(?:^|\W)(?:)(?:$|\W)`. This matches the behavior of GNU grep + and is believed to be closer to the intended semantics of the flag. See + [#389](https://github.com/BurntSushi/ripgrep/issues/389) for more details. + +Feature enhancements: + +* [FEATURE #162](https://github.com/BurntSushi/ripgrep/issues/162): + libripgrep is now a thing. The primary crate is + [`grep`](https://docs.rs/grep). +* [FEATURE #176](https://github.com/BurntSushi/ripgrep/issues/176): + Add `-U/--multiline` flag that permits matching over multiple lines. +* [FEATURE #188](https://github.com/BurntSushi/ripgrep/issues/188): + Add `-P/--pcre2` flag that gives support for look-around and backreferences. +* [FEATURE #244](https://github.com/BurntSushi/ripgrep/issues/244): + Add `--json` flag that prints results in a JSON Lines format. +* [FEATURE #321](https://github.com/BurntSushi/ripgrep/issues/321): + Add `--one-file-system` flag to skip directories on different file systems. +* [FEATURE #404](https://github.com/BurntSushi/ripgrep/issues/404): + Add `--sort` and `--sortr` flag for more sorting. Deprecate `--sort-files`. +* [FEATURE #416](https://github.com/BurntSushi/ripgrep/issues/416): + Add `--crlf` flag to permit `$` to work with carriage returns on Windows. +* [FEATURE #917](https://github.com/BurntSushi/ripgrep/issues/917): + The `--trim` flag strips prefix whitespace from all lines printed. +* [FEATURE #993](https://github.com/BurntSushi/ripgrep/issues/993): + Add `--null-data` flag, which makes ripgrep use NUL as a line terminator. +* [FEATURE #997](https://github.com/BurntSushi/ripgrep/issues/997): + The `--passthru` flag now works with the `--replace` flag. +* [FEATURE #1038-1](https://github.com/BurntSushi/ripgrep/issues/1038): + Add `--line-buffered` and `--block-buffered` for forcing a buffer strategy. +* [FEATURE #1038-2](https://github.com/BurntSushi/ripgrep/issues/1038): + Add `--pre-glob` for filtering files through the `--pre` flag. + +Bug fixes: + +* [BUG #2](https://github.com/BurntSushi/ripgrep/issues/2): + Searching with non-zero context can now use memory maps if appropriate. +* [BUG #200](https://github.com/BurntSushi/ripgrep/issues/200): + ripgrep will now stop correctly when its output pipe is closed. +* [BUG #389](https://github.com/BurntSushi/ripgrep/issues/389): + The `-w/--word-regexp` flag now works more intuitively. +* [BUG #643](https://github.com/BurntSushi/ripgrep/issues/643): + Detection of readable stdin has improved on Windows. +* [BUG #441](https://github.com/BurntSushi/ripgrep/issues/441), + [BUG #690](https://github.com/BurntSushi/ripgrep/issues/690), + [BUG #980](https://github.com/BurntSushi/ripgrep/issues/980): + Matching empty lines now works correctly in several corner cases. +* [BUG #764](https://github.com/BurntSushi/ripgrep/issues/764): + Color escape sequences now coalesce, which reduces output size. +* [BUG #842](https://github.com/BurntSushi/ripgrep/issues/842): + Add man page to binary Debian package. +* [BUG #922](https://github.com/BurntSushi/ripgrep/issues/922): + ripgrep is now more robust with respect to memory maps failing. +* [BUG #937](https://github.com/BurntSushi/ripgrep/issues/937): + Color escape sequences are no longer emitted for empty matches. +* [BUG #940](https://github.com/BurntSushi/ripgrep/issues/940): + Context from the `--passthru` flag should not impact process exit status. +* [BUG #984](https://github.com/BurntSushi/ripgrep/issues/984): + Fixes bug in `ignore` crate where first path was always treated as a symlink. +* [BUG #990](https://github.com/BurntSushi/ripgrep/issues/990): + Read stderr asynchronously when running a process. +* [BUG #1013](https://github.com/BurntSushi/ripgrep/issues/1013): + Add compile time and runtime CPU features to `--version` output. +* [BUG #1028](https://github.com/BurntSushi/ripgrep/pull/1028): + Don't complete bare pattern after `-f` in zsh. + + +0.9.0 (2018-08-03) +================== +This is a new minor version release of ripgrep that contains some minor new +features and a panoply of bug fixes. + +Releases provided on Github for `x86_64` will now work on all target CPUs, and +will also automatically take advantage of features found on modern CPUs (such +as AVX2) for additional optimizations. + +This release increases the **minimum supported Rust version** from 1.20.0 to +1.23.0. + +It is anticipated that the next release of ripgrep (0.10.0) will provide +multi-line search support and a JSON output format. + +**BREAKING CHANGES**: + +* When `--count` and `--only-matching` are provided simultaneously, the + behavior of ripgrep is as if the `--count-matches` flag was given. That is, + the total number of matches is reported, where there may be multiple matches + per line. Previously, the behavior of ripgrep was to report the total number + of matching lines. (Note that this behavior diverges from the behavior of + GNU grep.) +* Octal syntax is no longer supported. ripgrep previously accepted expressions + like `\1` as syntax for matching `U+0001`, but ripgrep will now report an + error instead. +* The `--line-number-width` flag has been removed. Its functionality was not + carefully considered with all ripgrep output formats. + See [#795](https://github.com/BurntSushi/ripgrep/issues/795) for more + details. + +Feature enhancements: + +* Added or improved file type filtering for Android, Bazel, Fuchsia, Haskell, + Java and Puppet. +* [FEATURE #411](https://github.com/BurntSushi/ripgrep/issues/411): + Add a `--stats` flag, which emits aggregate statistics after search results. +* [FEATURE #646](https://github.com/BurntSushi/ripgrep/issues/646): + Add a `--no-ignore-messages` flag, which suppresses parse errors from reading + `.ignore` and `.gitignore` files. +* [FEATURE #702](https://github.com/BurntSushi/ripgrep/issues/702): + Support `\u{..}` Unicode escape sequences. +* [FEATURE #812](https://github.com/BurntSushi/ripgrep/issues/812): + Add `-b/--byte-offset` flag that shows the byte offset of each matching line. +* [FEATURE #814](https://github.com/BurntSushi/ripgrep/issues/814): + Add `--count-matches` flag, which is like `--count`, but for each match. +* [FEATURE #880](https://github.com/BurntSushi/ripgrep/issues/880): + Add a `--no-column` flag, which disables column numbers in the output. +* [FEATURE #898](https://github.com/BurntSushi/ripgrep/issues/898): + Add support for `lz4` when using the `-z/--search-zip` flag. +* [FEATURE #924](https://github.com/BurntSushi/ripgrep/issues/924): + `termcolor` has moved to its own repository: + https://github.com/BurntSushi/termcolor +* [FEATURE #934](https://github.com/BurntSushi/ripgrep/issues/934): + Add a new flag, `--no-ignore-global`, that permits disabling global + gitignores. +* [FEATURE #967](https://github.com/BurntSushi/ripgrep/issues/967): + Rename `--maxdepth` to `--max-depth` for consistency. Keep `--maxdepth` for + backwards compatibility. +* [FEATURE #978](https://github.com/BurntSushi/ripgrep/issues/978): + Add a `--pre` option to filter inputs with an arbitrary program. +* [FEATURE fca9709d](https://github.com/BurntSushi/ripgrep/commit/fca9709d): + Improve zsh completion. + +Bug fixes: + +* [BUG #135](https://github.com/BurntSushi/ripgrep/issues/135): + Release portable binaries that conditionally use SSSE3, AVX2, etc., at + runtime. +* [BUG #268](https://github.com/BurntSushi/ripgrep/issues/268): + Print descriptive error message when trying to use look-around or + backreferences. +* [BUG #395](https://github.com/BurntSushi/ripgrep/issues/395): + Show comprehensible error messages for regexes like `\s*{`. +* [BUG #526](https://github.com/BurntSushi/ripgrep/issues/526): + Support backslash escapes in globs. +* [BUG #795](https://github.com/BurntSushi/ripgrep/issues/795): + Fix problems with `--line-number-width` by removing it. +* [BUG #832](https://github.com/BurntSushi/ripgrep/issues/832): + Clarify usage instructions for `-f/--file` flag. +* [BUG #835](https://github.com/BurntSushi/ripgrep/issues/835): + Fix small performance regression while crawling very large directory trees. +* [BUG #851](https://github.com/BurntSushi/ripgrep/issues/851): + Fix `-S/--smart-case` detection once and for all. +* [BUG #852](https://github.com/BurntSushi/ripgrep/issues/852): + Be robust with respect to `ENOMEM` errors returned by `mmap`. +* [BUG #853](https://github.com/BurntSushi/ripgrep/issues/853): + Upgrade `grep` crate to `regex-syntax 0.6.0`. +* [BUG #893](https://github.com/BurntSushi/ripgrep/issues/893): + Improve support for git submodules. +* [BUG #900](https://github.com/BurntSushi/ripgrep/issues/900): + When no patterns are given, ripgrep should never match anything. +* [BUG #907](https://github.com/BurntSushi/ripgrep/issues/907): + ripgrep will now stop traversing after the first file when `--quiet --files` + is used. +* [BUG #918](https://github.com/BurntSushi/ripgrep/issues/918): + Don't skip tar archives when `-z/--search-zip` is used. +* [BUG #934](https://github.com/BurntSushi/ripgrep/issues/934): + Don't respect gitignore files when searching outside git repositories. +* [BUG #948](https://github.com/BurntSushi/ripgrep/issues/948): + Use exit code 2 to indicate error, and use exit code 1 to indicate no + matches. +* [BUG #951](https://github.com/BurntSushi/ripgrep/issues/951): + Add stdin example to ripgrep usage documentation. +* [BUG #955](https://github.com/BurntSushi/ripgrep/issues/955): + Use buffered writing when not printing to a tty, which fixes a performance + regression. +* [BUG #957](https://github.com/BurntSushi/ripgrep/issues/957): + Improve the error message shown for `--path separator /` in some Windows + shells. +* [BUG #964](https://github.com/BurntSushi/ripgrep/issues/964): + Add a `--no-fixed-strings` flag to disable `-F/--fixed-strings`. +* [BUG #988](https://github.com/BurntSushi/ripgrep/issues/988): + Fix a bug in the `ignore` crate that prevented the use of explicit ignore + files after disabling all other ignore rules. +* [BUG #995](https://github.com/BurntSushi/ripgrep/issues/995): + Respect `$XDG_CONFIG_DIR/git/config` for detecting `core.excludesFile`. + + +0.8.1 (2018-02-20) +================== +This is a patch release of ripgrep that primarily fixes regressions introduced +in 0.8.0 (#820 and #824) in directory traversal on Windows. These regressions +do not impact non-Windows users. + +Feature enhancements: + +* Added or improved file type filtering for csv and VHDL. +* [FEATURE #798](https://github.com/BurntSushi/ripgrep/issues/798): + Add `underline` support to `termcolor` and ripgrep. See documentation on the + `--colors` flag for details. + +Bug fixes: + +* [BUG #684](https://github.com/BurntSushi/ripgrep/issues/684): + Improve documentation for the `--ignore-file` flag. +* [BUG #789](https://github.com/BurntSushi/ripgrep/issues/789): + Don't show `(rev )` if the revision wasn't available during the build. +* [BUG #791](https://github.com/BurntSushi/ripgrep/issues/791): + Add man page to ARM release. +* [BUG #797](https://github.com/BurntSushi/ripgrep/issues/797): + Improve documentation for "intense" setting in `termcolor`. +* [BUG #800](https://github.com/BurntSushi/ripgrep/issues/800): + Fix a bug in the `ignore` crate for custom ignore files. This had no impact + on ripgrep. +* [BUG #807](https://github.com/BurntSushi/ripgrep/issues/807): + Fix a bug where `rg --hidden .` behaved differently from `rg --hidden ./`. +* [BUG #815](https://github.com/BurntSushi/ripgrep/issues/815): + Clarify a common failure mode in user guide. +* [BUG #820](https://github.com/BurntSushi/ripgrep/issues/820): + Fixes a bug on Windows where symlinks were followed even if not requested. +* [BUG #824](https://github.com/BurntSushi/ripgrep/issues/824): + Fix a performance regression in directory traversal on Windows. + + +0.8.0 (2018-02-11) +================== +This is a new minor version release of ripgrep that satisfies several popular +feature requests (config files, search compressed files, true colors), fixes +many bugs and improves the quality of life for ripgrep maintainers. This +release also includes greatly improved documentation in the form of a +[User Guide](GUIDE.md) and a [FAQ](FAQ.md). + +This release increases the **minimum supported Rust version** from 1.17 to +1.20. + +**BREAKING CHANGES**: + +Note that these are all very minor and unlikely to impact most users. + +* In order to support configuration files, flag overrides needed to be + rethought. In some cases, this changed ripgrep's behavior. For example, + in ripgrep 0.7.1, `rg foo -s -i` will perform a case sensitive search + since the `-s/--case-sensitive` flag was defined to always take precedence + over the `-i/--ignore-case` flag, regardless of position. In ripgrep 0.8.0 + however, the override rule for all flags has changed to "the most recent + flag wins among competing flags." That is, `rg foo -s -i` now performs a + case insensitive search. +* The `-M/--max-columns` flag was tweaked so that specifying a value of `0` + now makes ripgrep behave as if the flag was absent. This makes it possible + to set a default value in a configuration file and then override it. The + previous ripgrep behavior was to suppress all matching non-empty lines. +* In all globs, `[^...]` is now equivalent to `[!...]` (indicating class + negation). Previously, `^` had no special significance in a character class. +* For **downstream packagers**, the directory hierarchy in ripgrep's archive + releases has changed. The root directory now only contains the executable, + README and license. There is now a new directory called `doc` which contains + the man page (previously in the root), a user guide (new), a FAQ (new) and + the CHANGELOG (previously not included in release). The `complete` + directory remains the same. + +Feature enhancements: + +* Added or improved file type filtering for + Apache Avro, C++, GN, Google Closure Templates, Jupyter notebooks, man pages, + Protocol Buffers, Smarty and Web IDL. +* [FEATURE #196](https://github.com/BurntSushi/ripgrep/issues/196): + Support a configuration file. See + [the new user guide](GUIDE.md#configuration-file) + for details. +* [FEATURE #261](https://github.com/BurntSushi/ripgrep/issues/261): + Add extended or "true" color support. Works in Windows 10! + [See the FAQ for details.](FAQ.md#colors) +* [FEATURE #539](https://github.com/BurntSushi/ripgrep/issues/539): + Search gzip, bzip2, lzma or xz files when given `-z/--search-zip` flag. +* [FEATURE #544](https://github.com/BurntSushi/ripgrep/issues/544): + Add support for line number alignment via a new `--line-number-width` flag. +* [FEATURE #654](https://github.com/BurntSushi/ripgrep/pull/654): + Support linuxbrew in ripgrep's Brew tap. +* [FEATURE #673](https://github.com/BurntSushi/ripgrep/issues/673): + Bring back `.rgignore` files. (A higher precedent, application specific + version of `.ignore`.) +* [FEATURE #676](https://github.com/BurntSushi/ripgrep/issues/676): + Provide ARM binaries. **WARNING:** This will be provided on a best effort + basis. +* [FEATURE #709](https://github.com/BurntSushi/ripgrep/issues/709): + Suggest `-F/--fixed-strings` flag on a regex syntax error. +* [FEATURE #740](https://github.com/BurntSushi/ripgrep/issues/740): + Add a `--passthru` flag that causes ripgrep to print every line it reads. +* [FEATURE #785](https://github.com/BurntSushi/ripgrep/pull/785): + Overhaul documentation. Cleaned up README, added user guide and FAQ. +* [FEATURE 7f5c07](https://github.com/BurntSushi/ripgrep/commit/7f5c07434be92103b5bf7e216b9c7494aed2d8cb): + Add hidden flags for convenient overrides (e.g., `--no-text`). + +Bug fixes: + +* [BUG #553](https://github.com/BurntSushi/ripgrep/issues/553): + Permit flags to be repeated. +* [BUG #633](https://github.com/BurntSushi/ripgrep/issues/633): + Fix a bug where ripgrep would panic on Windows while following symlinks. +* [BUG #649](https://github.com/BurntSushi/ripgrep/issues/649): + Fix handling of `!**/` in `.gitignore`. +* [BUG #663](https://github.com/BurntSushi/ripgrep/issues/663): + **BREAKING CHANGE:** Support `[^...]` glob syntax (as identical to `[!...]`). +* [BUG #693](https://github.com/BurntSushi/ripgrep/issues/693): + Don't display context separators when not printing matches. +* [BUG #705](https://github.com/BurntSushi/ripgrep/issues/705): + Fix a bug that prevented ripgrep from searching OneDrive directories. +* [BUG #717](https://github.com/BurntSushi/ripgrep/issues/717): + Improve `--smart-case` uppercase character detection. +* [BUG #725](https://github.com/BurntSushi/ripgrep/issues/725): + Clarify that globs do not override explicitly given paths to search. +* [BUG #742](https://github.com/BurntSushi/ripgrep/pull/742): + Write ANSI reset code as `\x1B[0m` instead of `\x1B[m`. +* [BUG #747](https://github.com/BurntSushi/ripgrep/issues/747): + Remove `yarn.lock` from YAML file type. +* [BUG #760](https://github.com/BurntSushi/ripgrep/issues/760): + ripgrep can now search `/sys/devices/system/cpu/vulnerabilities/*` files. +* [BUG #761](https://github.com/BurntSushi/ripgrep/issues/761): + Fix handling of gitignore patterns that contain a `/`. +* [BUG #776](https://github.com/BurntSushi/ripgrep/pull/776): + **BREAKING CHANGE:** `--max-columns=0` now disables the limit. +* [BUG #779](https://github.com/BurntSushi/ripgrep/issues/779): + Clarify documentation for `--files-without-match`. +* [BUG #780](https://github.com/BurntSushi/ripgrep/issues/780), + [BUG #781](https://github.com/BurntSushi/ripgrep/issues/781): + Fix bug where ripgrep missed some matching lines. + +Maintenance fixes: + +* [MAINT #772](https://github.com/BurntSushi/ripgrep/pull/772): + Drop `env_logger` in favor of simpler logger to avoid many new dependencies. +* [MAINT #772](https://github.com/BurntSushi/ripgrep/pull/772): + Add git revision hash to ripgrep's version string. +* [MAINT #772](https://github.com/BurntSushi/ripgrep/pull/772): + (Seemingly) improve compile times. +* [MAINT #776](https://github.com/BurntSushi/ripgrep/pull/776): + Automatically generate man page during build. +* [MAINT #786](https://github.com/BurntSushi/ripgrep/pull/786): + Remove use of `unsafe` in `globset`. :tada: +* [MAINT e9d448](https://github.com/BurntSushi/ripgrep/commit/e9d448e93bb4e1fb3b0c1afc29adb5af6ed5283d): + Add an issue template (has already drastically improved bug reports). +* [MAINT ae2d03](https://github.com/BurntSushi/ripgrep/commit/ae2d036dd4ba2a46acac9c2d77c32e7c667eb850): + Remove the `compile` script. + +Friends of ripgrep: + +I'd like to extend my gratitude to +[@balajisivaraman](https://github.com/balajisivaraman) +for their recent hard work in a number of areas, and in particular, for +implementing the "search compressed files" feature. Their work in sketching out +a specification for that and other work has been exemplary. + +Thanks +[@balajisivaraman](https://github.com/balajisivaraman)! + + +0.7.1 (2017-10-22) +================== +This is a patch release of ripgrep that includes a fix to very bad regression +introduced in ripgrep 0.7.0. + +Bug fixes: + +* [BUG #648](https://github.com/BurntSushi/ripgrep/issues/648): + Fix a bug where it was very easy to exceed standard file descriptor limits. + + +0.7.0 (2017-10-20) +================== +This is a new minor version release of ripgrep that includes mostly bug fixes. + +ripgrep continues to require Rust 1.17, and there are no known breaking changes +introduced in this release. + +Feature enhancements: + +* Added or improved file type filtering for config & license files, Elm, + Purescript, Standard ML, sh, systemd, Terraform +* [FEATURE #593](https://github.com/BurntSushi/ripgrep/pull/593): + Using both `-o/--only-matching` and `-r/--replace` does the right thing. + +Bug fixes: + +* [BUG #200](https://github.com/BurntSushi/ripgrep/issues/200): + ripgrep will stop when its pipe is closed. +* [BUG #402](https://github.com/BurntSushi/ripgrep/issues/402): + Fix context printing bug when the `-m/--max-count` flag is used. +* [BUG #521](https://github.com/BurntSushi/ripgrep/issues/521): + Fix interaction between `-r/--replace` and terminal colors. +* [BUG #559](https://github.com/BurntSushi/ripgrep/issues/559): + Ignore test that tried reading a non-UTF-8 file path on macOS. +* [BUG #599](https://github.com/BurntSushi/ripgrep/issues/599): + Fix color escapes on empty matches. +* [BUG #600](https://github.com/BurntSushi/ripgrep/issues/600): + Avoid expensive (on Windows) file handle check when using --files. +* [BUG #618](https://github.com/BurntSushi/ripgrep/issues/618): + Clarify installation instructions for Ubuntu users. +* [BUG #633](https://github.com/BurntSushi/ripgrep/issues/633): + Faster symlink loop checking on Windows. + + +0.6.0 (2017-08-23) +================== +This is a new minor version release of ripgrep that includes many bug fixes +and a few new features such as `--iglob` and `-x/--line-regexp`. + +Note that this release increases the minimum supported Rust version from 1.12 +to 1.17. + +Feature enhancements: + +* Added or improved file type filtering for BitBake, C++, Cabal, cshtml, Julia, + Make, msbuild, QMake, Yocto +* [FEATURE #163](https://github.com/BurntSushi/ripgrep/issues/163): + Add an `--iglob` flag that is like `-g/--glob`, but matches globs + case insensitively. +* [FEATURE #520](https://github.com/BurntSushi/ripgrep/pull/518): + Add `-x/--line-regexp` flag, which requires a match to span an entire line. +* [FEATURE #551](https://github.com/BurntSushi/ripgrep/pull/551), + [FEATURE #554](https://github.com/BurntSushi/ripgrep/pull/554): + `ignore`: add new `matched_path_or_any_parents` method. + +Bug fixes: + +* [BUG #342](https://github.com/BurntSushi/ripgrep/issues/342): + Fix invisible text in some PowerShell environments by changing the + default color scheme on Windows. +* [BUG #413](https://github.com/BurntSushi/ripgrep/issues/413): + Release binaries on Unix are now `strip`'d by default. This decreases + binary size by an order of magnitude. +* [BUG #483](https://github.com/BurntSushi/ripgrep/issues/483): + When `--quiet` is passed, `--files` should be quiet. +* [BUG #488](https://github.com/BurntSushi/ripgrep/pull/488): + When `--vimgrep` is passed, `--with-filename` should be enabled + automatically. +* [BUG #493](https://github.com/BurntSushi/ripgrep/issues/493): + Fix another bug in the implementation of the `-o/--only-matching` + flag. +* [BUG #499](https://github.com/BurntSushi/ripgrep/pull/499): + Permit certain flags to override others. +* [BUG #523](https://github.com/BurntSushi/ripgrep/pull/523): + `wincolor`: Re-fetch Windows console on all calls. +* [BUG #523](https://github.com/BurntSushi/ripgrep/issues/524): + `--version` now shows enabled compile-time features. +* [BUG #532](https://github.com/BurntSushi/ripgrep/issues/532), + [BUG #536](https://github.com/BurntSushi/ripgrep/pull/536), + [BUG #538](https://github.com/BurntSushi/ripgrep/pull/538), + [BUG #540](https://github.com/BurntSushi/ripgrep/pull/540), + [BUG #560](https://github.com/BurntSushi/ripgrep/pull/560), + [BUG #565](https://github.com/BurntSushi/ripgrep/pull/565): + Improve zsh completion. +* [BUG #578](https://github.com/BurntSushi/ripgrep/pull/578): + Enable SIMD for `encoding_rs` when appropriate. +* [BUG #580](https://github.com/BurntSushi/ripgrep/issues/580): + Fix `-w/--word-regexp` in the presence of capturing groups. +* [BUG #581](https://github.com/BurntSushi/ripgrep/issues/581): + Document that ripgrep may terminate unexpectedly when searching via + memory maps (which can happen using default settings). + +Friends of ripgrep: + +I'd like to give a big Thank You to @okdana for their recent hard work on +ripgrep. This includes new features like `--line-regexp`, heroic effort on +zsh auto-completion and thinking through some thorny argv issues with me. + +I'd also like to thank @ericbn for their work on improving ripgrep's argv +parsing by allowing some flags to override others. + +Thanks @okdana and @ericbn! + + +0.5.2 (2017-05-11) +================== +Feature enhancements: + +* Added or improved file type filtering for Nix. +* [FEATURE #362](https://github.com/BurntSushi/ripgrep/issues/362): + Add `--regex-size-limit` and `--dfa-size-limit` flags. +* [FEATURE #444](https://github.com/BurntSushi/ripgrep/issues/444): + Improve error messages for invalid globs. + +Bug fixes: + +* [BUG #442](https://github.com/BurntSushi/ripgrep/issues/442): + Fix line wrapping in `--help` output. +* [BUG #451](https://github.com/BurntSushi/ripgrep/issues/451): + Fix bug with duplicate output when using `-o/--only-matching` flag. + + +0.5.1 (2017-04-09) +================== +Feature enhancements: + +* Added or improved file type filtering for vim. +* [FEATURE #34](https://github.com/BurntSushi/ripgrep/issues/34): + Add a `-o/--only-matching` flag. +* [FEATURE #377](https://github.com/BurntSushi/ripgrep/issues/377): + Column numbers can now be customized with a color. (The default is + no color.) +* [FEATURE #419](https://github.com/BurntSushi/ripgrep/issues/419): + Added `-0` short flag option for `--null`. + +Bug fixes: + +* [BUG #381](https://github.com/BurntSushi/ripgrep/issues/381): + Include license text in all subcrates. +* [BUG #418](https://github.com/BurntSushi/ripgrep/issues/418), + [BUG #426](https://github.com/BurntSushi/ripgrep/issues/426), + [BUG #439](https://github.com/BurntSushi/ripgrep/issues/439): + Fix a few bugs with `-h/--help` output. + + +0.5.0 (2017-03-12) +================== +This is a new minor version release of ripgrep that includes one minor breaking +change, bug fixes and several new features including support for text encodings +other than UTF-8. + +A notable accomplishment with respect to Rust is that ripgrep proper now only +contains a single `unsafe` use (for accessing the contents of a memory map). + +The **breaking change** is: + +* [FEATURE #380](https://github.com/BurntSushi/ripgrep/issues/380): + Line numbers are now hidden by default when ripgrep is printing to a tty + **and** the only thing searched is stdin. + +Feature enhancements: + +* Added or improved file type filtering for Ceylon, CSS, Elixir, HTML, log, + SASS, SVG, Twig +* [FEATURE #1](https://github.com/BurntSushi/ripgrep/issues/1): + Add support for additional text encodings, including automatic detection for + UTF-16 via BOM sniffing. Explicit text encoding support with the + `-E/--encoding` flag was also added for latin-1, GBK, EUC-JP + and Shift_JIS, among others. The full list can be found here: + https://encoding.spec.whatwg.org/#concept-encoding-get +* [FEATURE #129](https://github.com/BurntSushi/ripgrep/issues/129): + Add a new `-M/--max-columns` flag that omits lines longer than the given + number of bytes. (Disabled by default!) +* [FEATURE #369](https://github.com/BurntSushi/ripgrep/issues/369): + A new flag, `--max-filesize`, was added for limiting searches to files with + a maximum file size. + +Bug fixes: + +* [BUG #52](https://github.com/BurntSushi/ripgrep/issues/52), + [BUG #311](https://github.com/BurntSushi/ripgrep/issues/311): + Tweak how binary files are detected and handled. (We are slightly less + conservative and will no longer use memory without bound.) +* [BUG #326](https://github.com/BurntSushi/ripgrep/issues/326): + When --files flag is given, we should never attempt to parse positional + arguments as regexes. +* [BUG #327](https://github.com/BurntSushi/ripgrep/issues/327): + Permit the --heading flag to override the --no-heading flag. +* [BUG #340](https://github.com/BurntSushi/ripgrep/pull/340): + Clarify that the `-u/--unrestricted` flags are aliases. +* [BUG #343](https://github.com/BurntSushi/ripgrep/pull/343): + Global git ignore config should use `$HOME/.config/git/ignore` and not + `$HOME/git/ignore`. +* [BUG #345](https://github.com/BurntSushi/ripgrep/pull/345): + Clarify docs for `-g/--glob` flag. +* [BUG #381](https://github.com/BurntSushi/ripgrep/issues/381): + Add license files to each sub-crate. +* [BUG #383](https://github.com/BurntSushi/ripgrep/issues/383): + Use latest version of clap (for argv parsing). +* [BUG #392](https://github.com/BurntSushi/ripgrep/issues/391): + Fix translation of set globs (e.g., `{foo,bar,quux}`) to regexes. +* [BUG #401](https://github.com/BurntSushi/ripgrep/pull/401): + Add PowerShell completion file to Windows release. +* [BUG #405](https://github.com/BurntSushi/ripgrep/issues/405): + Fix bug when excluding absolute paths with the `-g/--glob` flag. + + +0.4.0 +===== +This is a new minor version release of ripgrep that includes a couple very +minor breaking changes, a few new features and lots of bug fixes. + +This version of ripgrep upgrades its `regex` dependency from `0.1` to `0.2`, +which includes a few minor syntax changes: + +* POSIX character classes now require double bracketing. Previously, the regex + `[:upper:]` would parse as the `upper` POSIX character class. Now it parses + as the character class containing the characters `:upper:`. The fix to this + change is to use `[[:upper:]]` instead. Note that variants like + `[[:upper:][:blank:]]` continue to work. +* The character `[` must always be escaped inside a character class. +* The characters `&`, `-` and `~` must be escaped if any one of them are + repeated consecutively. For example, `[&]`, `[\&]`, `[\&\&]`, `[&-&]` are all + equivalent while `[&&]` is illegal. (The motivation for this and the prior + change is to provide a backwards compatible path for adding character class + set notation.) + +Feature enhancements: + +* Added or improved file type filtering for Crystal, Kotlin, Perl, PowerShell, + Ruby, Swig +* [FEATURE #83](https://github.com/BurntSushi/ripgrep/issues/83): + Type definitions can now include other type definitions. +* [FEATURE #243](https://github.com/BurntSushi/ripgrep/issues/243): + **BREAKING CHANGE**: The `--column` flag now implies `--line-number`. +* [FEATURE #263](https://github.com/BurntSushi/ripgrep/issues/263): + Add a new `--sort-files` flag. +* [FEATURE #275](https://github.com/BurntSushi/ripgrep/issues/275): + Add a new `--path-separator` flag. Useful in cygwin. + +Bug fixes: + +* [BUG #182](https://github.com/BurntSushi/ripgrep/issues/182): + Redux: use more portable ANSI color escape sequences when possible. +* [BUG #258](https://github.com/BurntSushi/ripgrep/issues/258): + Fix bug that caused ripgrep's parallel iterator to spin and burn CPU. +* [BUG #262](https://github.com/BurntSushi/ripgrep/issues/262): + Document how to install shell completion files. +* [BUG #266](https://github.com/BurntSushi/ripgrep/issues/266), + [BUG #293](https://github.com/BurntSushi/ripgrep/issues/293): + Fix handling of bold styling and change the default colors. +* [BUG #268](https://github.com/BurntSushi/ripgrep/issues/268): + Make lack of backreference support more explicit. +* [BUG #271](https://github.com/BurntSushi/ripgrep/issues/271): + Remove `~` dependency on clap. +* [BUG #277](https://github.com/BurntSushi/ripgrep/issues/277): + Fix cosmetic issue in `globset` crate docs. +* [BUG #279](https://github.com/BurntSushi/ripgrep/issues/279): + ripgrep did not terminate when `-q/--quiet` was given. +* [BUG #281](https://github.com/BurntSushi/ripgrep/issues/281): + **BREAKING CHANGE**: Completely remove `^C` handling from ripgrep. +* [BUG #284](https://github.com/BurntSushi/ripgrep/issues/284): + Make docs for `-g/--glob` clearer. +* [BUG #286](https://github.com/BurntSushi/ripgrep/pull/286): + When stdout is redirected to a file, don't search that file. +* [BUG #287](https://github.com/BurntSushi/ripgrep/pull/287): + Fix ZSH completions. +* [BUG #295](https://github.com/BurntSushi/ripgrep/pull/295): + Remove superfluous `memmap` dependency in `grep` crate. +* [BUG #308](https://github.com/BurntSushi/ripgrep/pull/308): + Improve docs for `-r/--replace`. +* [BUG #313](https://github.com/BurntSushi/ripgrep/pull/313): + Update bytecount dep to latest version. +* [BUG #318](https://github.com/BurntSushi/ripgrep/pull/318): + Fix invalid UTF-8 output bug in Windows consoles. + + +0.3.2 +===== +Feature enhancements: + +* Added or improved file type filtering for Less, Sass, stylus, Zsh + +Bug fixes: + +* [BUG #229](https://github.com/BurntSushi/ripgrep/issues/229): + Make smart case slightly less conservative. +* [BUG #247](https://github.com/BurntSushi/ripgrep/issues/247): + Clarify use of --heading/--no-heading. +* [BUG #251](https://github.com/BurntSushi/ripgrep/issues/251), + [BUG #264](https://github.com/BurntSushi/ripgrep/issues/264), + [BUG #267](https://github.com/BurntSushi/ripgrep/issues/267): + Fix matching bug caused by literal optimizations. +* [BUG #256](https://github.com/BurntSushi/ripgrep/issues/256): + Fix bug that caused `rg foo` and `rg foo/` to have different behavior + when `foo` was a symlink. +* [BUG #270](https://github.com/BurntSushi/ripgrep/issues/270): + Fix bug where patterns starting with a `-` couldn't be used with the + `-e/--regexp` flag. (This resolves a regression that was introduced in + ripgrep 0.3.0.) + + +0.3.1 +===== +Bug fixes: + +* [BUG #242](https://github.com/BurntSushi/ripgrep/issues/242): + ripgrep didn't respect `--colors foo:none` correctly. Now it does. + + +0.3.0 +===== +This is a new minor version release of ripgrep that includes two breaking +changes with lots of bug fixes and some new features and performance +improvements. Notably, if you had a problem with colors or piping on Windows +before, then that should now be fixed in this release. + +**BREAKING CHANGES**: + +* ripgrep now requires Rust 1.11 to compile. Previously, it could build on + Rust 1.9. The cause of this was the move from + [Docopt to Clap](https://github.com/BurntSushi/ripgrep/pull/233) + for argument parsing. +* The `-e/--regexp` flag can no longer accept a pattern starting with a `-`. + There are two work-arounds: `rg -- -foo` and `rg [-]foo` or `rg -e [-]foo` + will all search for the same `-foo` pattern. The cause of this was the move + from [Docopt to Clap](https://github.com/BurntSushi/ripgrep/pull/233) + for argument parsing. + [This may get fixed in the + future.](https://github.com/kbknapp/clap-rs/issues/742). + +Performance improvements: + +* [PERF #33](https://github.com/BurntSushi/ripgrep/issues/33): + ripgrep now performs similar to GNU grep on small corpora. +* [PERF #136](https://github.com/BurntSushi/ripgrep/issues/136): + ripgrep no longer slows down because of argument parsing when given a large + argument list. + +Feature enhancements: + +* Added or improved file type filtering for Elixir. +* [FEATURE #7](https://github.com/BurntSushi/ripgrep/issues/7): + Add a `-f/--file` flag that causes ripgrep to read patterns from a file. +* [FEATURE #51](https://github.com/BurntSushi/ripgrep/issues/51): + Add a `--colors` flag that enables one to customize the colors used in + ripgrep's output. +* [FEATURE #138](https://github.com/BurntSushi/ripgrep/issues/138): + Add a `--files-without-match` flag that shows only file paths that contain + zero matches. +* [FEATURE #230](https://github.com/BurntSushi/ripgrep/issues/230): + Add completion files to the release (Bash, Fish and PowerShell). + +Bug fixes: + +* [BUG #37](https://github.com/BurntSushi/ripgrep/issues/37): + Use correct ANSI escape sequences when `TERM=screen.linux`. +* [BUG #94](https://github.com/BurntSushi/ripgrep/issues/94): + ripgrep now detects stdin on Windows automatically. +* [BUG #117](https://github.com/BurntSushi/ripgrep/issues/117): + Colors should now work correctly and automatically inside mintty. +* [BUG #182](https://github.com/BurntSushi/ripgrep/issues/182): + Colors should now work within Emacs. In particular, `--color=always` will + emit colors regardless of the current environment. +* [BUG #189](https://github.com/BurntSushi/ripgrep/issues/189): + Show less content when running `rg -h`. The full help content can be + accessed with `rg --help`. +* [BUG #210](https://github.com/BurntSushi/ripgrep/issues/210): + Support non-UTF-8 file names on Unix platforms. +* [BUG #231](https://github.com/BurntSushi/ripgrep/issues/231): + Switch from block buffering to line buffering. +* [BUG #241](https://github.com/BurntSushi/ripgrep/issues/241): + Some error messages weren't suppressed when `--no-messages` was used. + + +0.2.9 +===== +Bug fixes: + +* [BUG #226](https://github.com/BurntSushi/ripgrep/issues/226): + File paths explicitly given on the command line weren't searched in parallel. + (This was a regression in `0.2.7`.) +* [BUG #228](https://github.com/BurntSushi/ripgrep/issues/228): + If a directory was given to `--ignore-file`, ripgrep's memory usage would + grow without bound. + + +0.2.8 +===== +Bug fixes: + +* Fixed a bug with the SIMD/AVX features for using bytecount in commit + `4ca15a`. + + +0.2.7 +===== +Performance improvements: + +* [PERF #223](https://github.com/BurntSushi/ripgrep/pull/223): + Added a parallel recursive directory iterator. This results in major + performance improvements on large repositories. +* [PERF #11](https://github.com/BurntSushi/ripgrep/pull/11): + ripgrep now uses the `bytecount` library for counting new lines. In some + cases, ripgrep runs twice as fast. Use + `RUSTFLAGS="-C target-cpu=native" cargo build --release --features 'simd-accel avx-accel'` + to get the fastest possible binary. + +Feature enhancements: + +* Added or improved file type filtering for Agda, Tex, Taskpaper, Markdown, + asciidoc, textile, rdoc, org, creole, wiki, pod, C#, PDF, C, C++. +* [FEATURE #149](https://github.com/BurntSushi/ripgrep/issues/149): + Add a new `--no-messages` flag that suppresses error messages. + Note that `rg foo 2> /dev/null` also works. +* [FEATURE #159](https://github.com/BurntSushi/ripgrep/issues/159): + Add a new `-m/--max-count` flag that limits the total number of matches + printed for each file searched. + +Bug fixes: + +* [BUG #199](https://github.com/BurntSushi/ripgrep/issues/199): + Fixed a bug where `-S/--smart-case` wasn't being applied correctly to + literal optimizations. +* [BUG #203](https://github.com/BurntSushi/ripgrep/issues/203): + Mention the full name, ripgrep, in more places. It now appears in + the output of `--help` and `--version`. The repository URL is now also + in the output of `--help` and the man page. +* [BUG #215](https://github.com/BurntSushi/ripgrep/issues/215): + Include small note about how to search for a pattern that starts with a `-`. + + +0.2.6 +===== +Feature enhancements: + +* Added or improved file type filtering for Fish. + +Bug fixes: + +* [BUG #206](https://github.com/BurntSushi/ripgrep/issues/206): + Fixed a regression with `-g/--glob` flag in `0.2.5`. + + +0.2.5 +===== +Feature enhancements: + +* Added or improved file type filtering for Groovy, Handlebars, Tcl, zsh and + Python. +* [FEATURE #9](https://github.com/BurntSushi/ripgrep/issues/9): + Support global gitignore config and `.git/info/exclude` files. +* [FEATURE #45](https://github.com/BurntSushi/ripgrep/issues/45): + Add --ignore-file flag for specifying additional ignore files. +* [FEATURE #202](https://github.com/BurntSushi/ripgrep/pull/202): + Introduce a new + [`ignore`](https://github.com/BurntSushi/ripgrep/tree/master/ignore) + crate that encapsulates all of ripgrep's gitignore matching logic. + +Bug fixes: + +* [BUG #44](https://github.com/BurntSushi/ripgrep/issues/44): + ripgrep runs slowly when given lots of positional arguments that are + directories. +* [BUG #119](https://github.com/BurntSushi/ripgrep/issues/119): + ripgrep didn't reset terminal colors if it was interrupted by `^C`. + Fixed in [PR #187](https://github.com/BurntSushi/ripgrep/pull/187). +* [BUG #184](https://github.com/BurntSushi/ripgrep/issues/184): + Fixed a bug related to interpreting gitignore files in parent directories. + + +0.2.4 +===== +SKIPPED. + + +0.2.3 +===== +Bug fixes: + +* [BUG #164](https://github.com/BurntSushi/ripgrep/issues/164): + Fixes a segfault on macos builds. +* [BUG #167](https://github.com/BurntSushi/ripgrep/issues/167): + Clarify documentation for --threads. + + +0.2.2 +===== +Packaging updates: + +* `ripgrep` is now in homebrew-core. `brew install ripgrep` will do the trick + on a Mac. +* `ripgrep` is now in the Archlinux community repository. + `pacman -S ripgrep` will do the trick on Archlinux. +* Support has been discontinued for i686-darwin. +* Glob matching has been moved out into its own crate: + [`globset`](https://crates.io/crates/globset). + +Feature enhancements: + +* Added or improved file type filtering for CMake, config, Jinja, Markdown, + Spark. +* [FEATURE #109](https://github.com/BurntSushi/ripgrep/issues/109): + Add a --max-depth flag for directory traversal. +* [FEATURE #124](https://github.com/BurntSushi/ripgrep/issues/124): + Add -s/--case-sensitive flag. Overrides --smart-case. +* [FEATURE #139](https://github.com/BurntSushi/ripgrep/pull/139): + The `ripgrep` repo is now a Homebrew tap. This is useful for installing + SIMD accelerated binaries, which aren't available in homebrew-core. + +Bug fixes: + +* [BUG #87](https://github.com/BurntSushi/ripgrep/issues/87), + [BUG #127](https://github.com/BurntSushi/ripgrep/issues/127), + [BUG #131](https://github.com/BurntSushi/ripgrep/issues/131): + Various issues related to glob matching. +* [BUG #116](https://github.com/BurntSushi/ripgrep/issues/116): + --quiet should stop search after first match. +* [BUG #121](https://github.com/BurntSushi/ripgrep/pull/121): + --color always should show colors, even when --vimgrep is used. +* [BUG #122](https://github.com/BurntSushi/ripgrep/pull/122): + Colorize file path at beginning of line. +* [BUG #134](https://github.com/BurntSushi/ripgrep/issues/134): + Processing a large ignore file (thousands of globs) was very slow. +* [BUG #137](https://github.com/BurntSushi/ripgrep/issues/137): + Always follow symlinks when given as an explicit argument. +* [BUG #147](https://github.com/BurntSushi/ripgrep/issues/147): + Clarify documentation for --replace. + + +0.2.1 +===== +Feature enhancements: + +* Added or improved file type filtering for Clojure and SystemVerilog. +* [FEATURE #89](https://github.com/BurntSushi/ripgrep/issues/89): + Add a --null flag that outputs a NUL byte after every file path. + +Bug fixes: + +* [BUG #98](https://github.com/BurntSushi/ripgrep/issues/98): + Fix a bug in single threaded mode when if opening a file failed, ripgrep + quit instead of continuing the search. +* [BUG #99](https://github.com/BurntSushi/ripgrep/issues/99): + Fix another bug in single threaded mode where empty lines were being printed + by mistake. +* [BUG #105](https://github.com/BurntSushi/ripgrep/issues/105): + Fix an off-by-one error with --column. +* [BUG #106](https://github.com/BurntSushi/ripgrep/issues/106): + Fix a bug where a whitespace only line in a gitignore file caused ripgrep + to panic (i.e., crash). + + +0.2.0 +===== +Feature enhancements: + +* Added or improved file type filtering for VB, R, F#, Swift, Nim, JavaScript, + TypeScript +* [FEATURE #20](https://github.com/BurntSushi/ripgrep/issues/20): + Adds a --no-filename flag. +* [FEATURE #26](https://github.com/BurntSushi/ripgrep/issues/26): + Adds --files-with-matches flag. Like --count, but only prints file paths + and doesn't need to count every match. +* [FEATURE #40](https://github.com/BurntSushi/ripgrep/issues/40): + Switch from using `.rgignore` to `.ignore`. Note that `.rgignore` is + still supported, but deprecated. +* [FEATURE #68](https://github.com/BurntSushi/ripgrep/issues/68): + Add --no-ignore-vcs flag that ignores .gitignore but not .ignore. +* [FEATURE #70](https://github.com/BurntSushi/ripgrep/issues/70): + Add -S/--smart-case flag (but is disabled by default). +* [FEATURE #80](https://github.com/BurntSushi/ripgrep/issues/80): + Add support for `{foo,bar}` globs. + +Many many bug fixes. Thanks every for reporting these and helping make +`ripgrep` better! (Note that I haven't captured every tracking issue here, +some were closed as duplicates.) + +* [BUG #8](https://github.com/BurntSushi/ripgrep/issues/8): + Don't use an intermediate buffer when --threads=1. (Permits constant memory + usage.) +* [BUG #15](https://github.com/BurntSushi/ripgrep/issues/15): + Improves the documentation for --type-add. +* [BUG #16](https://github.com/BurntSushi/ripgrep/issues/16), + [BUG #49](https://github.com/BurntSushi/ripgrep/issues/49), + [BUG #50](https://github.com/BurntSushi/ripgrep/issues/50), + [BUG #65](https://github.com/BurntSushi/ripgrep/issues/65): + Some gitignore globs were being treated as anchored when they weren't. +* [BUG #18](https://github.com/BurntSushi/ripgrep/issues/18): + --vimgrep reported incorrect column number. +* [BUG #19](https://github.com/BurntSushi/ripgrep/issues/19): + ripgrep was hanging waiting on stdin in some Windows terminals. Note that + this introduced a new bug: + [#94](https://github.com/BurntSushi/ripgrep/issues/94). +* [BUG #21](https://github.com/BurntSushi/ripgrep/issues/21): + Removes leading `./` when printing file paths. +* [BUG #22](https://github.com/BurntSushi/ripgrep/issues/22): + Running `rg --help | echo` caused `rg` to panic. +* [BUG #24](https://github.com/BurntSushi/ripgrep/issues/22): + Clarify the central purpose of rg in its usage message. +* [BUG #25](https://github.com/BurntSushi/ripgrep/issues/25): + Anchored gitignore globs weren't applied in subdirectories correctly. +* [BUG #30](https://github.com/BurntSushi/ripgrep/issues/30): + Globs like `foo/**` should match contents of `foo`, but not `foo` itself. +* [BUG #35](https://github.com/BurntSushi/ripgrep/issues/35), + [BUG #81](https://github.com/BurntSushi/ripgrep/issues/81): + When automatically detecting stdin, only read if it's a file or a fifo. + i.e., ignore stdin in `rg foo < /dev/null`. +* [BUG #36](https://github.com/BurntSushi/ripgrep/issues/36): + Don't automatically pick memory maps on MacOS. Ever. +* [BUG #38](https://github.com/BurntSushi/ripgrep/issues/38): + Trailing whitespace in gitignore wasn't being ignored. +* [BUG #43](https://github.com/BurntSushi/ripgrep/issues/43): + --glob didn't work with directories. +* [BUG #46](https://github.com/BurntSushi/ripgrep/issues/46): + Use one fewer worker thread than what is provided on CLI. +* [BUG #47](https://github.com/BurntSushi/ripgrep/issues/47): + --help/--version now work even if other options are set. +* [BUG #55](https://github.com/BurntSushi/ripgrep/issues/55): + ripgrep was refusing to search /proc/cpuinfo. Fixed by disabling memory + maps for files with zero size. +* [BUG #64](https://github.com/BurntSushi/ripgrep/issues/64): + The first path given with --files set was ignored. +* [BUG #67](https://github.com/BurntSushi/ripgrep/issues/67): + Sometimes whitelist globs like `!/dir` weren't interpreted as anchored. +* [BUG #77](https://github.com/BurntSushi/ripgrep/issues/77): + When -q/--quiet flag was passed, ripgrep kept searching even after a match + was found. +* [BUG #90](https://github.com/BurntSushi/ripgrep/issues/90): + Permit whitelisting hidden files. +* [BUG #93](https://github.com/BurntSushi/ripgrep/issues/93): + ripgrep was extracting an erroneous inner literal from a repeated pattern. diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/COPYING b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/COPYING new file mode 100644 index 000000000..bb9c20a09 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/COPYING @@ -0,0 +1,3 @@ +This project is dual-licensed under the Unlicense and MIT licenses. + +You may use this code under the terms of either license. diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/Cargo.lock b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/Cargo.lock new file mode 100644 index 000000000..87762b8a1 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/Cargo.lock @@ -0,0 +1,536 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anyhow" +version = "1.0.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10f00e1f6e58a40e807377c75c6a7f97bf9044fab57816f2414e6f5f4499d7b8" + +[[package]] +name = "bstr" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "cc" +version = "1.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b62ac837cdb5cb22e10a256099b4fc502b1dfe560cb282963a974d7abd80e476" +dependencies = [ + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "encoding_rs" +version = "0.8.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "encoding_rs_io" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cc3c5651fb62ab8aa3103998dade57efdd028544bd300516baa31840c252a83" +dependencies = [ + "encoding_rs", +] + +[[package]] +name = "globset" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "grep" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "308ae749734e28d749a86f33212c7b756748568ce332f970ac1d9cd8531f32e6" +dependencies = [ + "grep-cli", + "grep-matcher", + "grep-pcre2", + "grep-printer", + "grep-regex", + "grep-searcher", +] + +[[package]] +name = "grep-cli" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47f1288f0e06f279f84926fa4c17e3fcd2a22b357927a82f2777f7be26e4cec0" +dependencies = [ + "bstr", + "globset", + "libc", + "log", + "termcolor", + "winapi-util", +] + +[[package]] +name = "grep-matcher" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47a3141a10a43acfedc7c98a60a834d7ba00dfe7bec9071cbfc19b55b292ac02" +dependencies = [ + "memchr", +] + +[[package]] +name = "grep-pcre2" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d86f866e035f876ea632e08a128a134526bdd2ed0f0c8e89b2fcee94dd5dbe46" +dependencies = [ + "grep-matcher", + "log", + "pcre2", +] + +[[package]] +name = "grep-printer" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c112110ae4a891aa4d83ab82ecf734b307497d066f437686175e83fbd4e013fe" +dependencies = [ + "bstr", + "grep-matcher", + "grep-searcher", + "log", + "serde", + "serde_json", + "termcolor", +] + +[[package]] +name = "grep-regex" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9edd147c7e3296e7a26bd3a81345ce849557d5a8e48ed88f736074e760f91f7e" +dependencies = [ + "bstr", + "grep-matcher", + "log", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "grep-searcher" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9b6c14b3fc2e0a107d6604d3231dec0509e691e62447104bc385a46a7892cda" +dependencies = [ + "bstr", + "encoding_rs", + "encoding_rs_io", + "grep-matcher", + "log", + "memchr", + "memmap2", +] + +[[package]] +name = "ignore" +version = "0.4.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d89fd380afde86567dfba715db065673989d6253f42b88179abd3eae47bda4b" +dependencies = [ + "crossbeam-deque", + "globset", + "log", + "memchr", + "regex-automata", + "same-file", + "walkdir", + "winapi-util", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "jemalloc-sys" +version = "0.5.4+5.3.0-patched" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac6c1946e1cea1788cbfde01c993b52a10e2da07f4bac608228d1bed20bfebf2" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "jemallocator" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0de374a9f8e63150e6f5e8a60cc14c668226d7a347d8aee1a45766e3c4dd3bc" +dependencies = [ + "jemalloc-sys", + "libc", +] + +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + +[[package]] +name = "lexopt" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baff4b617f7df3d896f97fe922b64817f6cd9a756bb81d40f8883f2f66dcb401" + +[[package]] +name = "libc" +version = "0.2.158" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "memmap2" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" +dependencies = [ + "libc", +] + +[[package]] +name = "pcre2" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be55c43ac18044541d58d897e8f4c55157218428953ebd39d86df3ba0286b2b" +dependencies = [ + "libc", + "log", + "pcre2-sys", +] + +[[package]] +name = "pcre2-sys" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "550f5d18fb1b90c20b87e161852c10cde77858c3900c5059b5ad2a1449f11d8a" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + +[[package]] +name = "ripgrep" +version = "14.1.1" +dependencies = [ + "anyhow", + "bstr", + "grep", + "ignore", + "jemallocator", + "lexopt", + "log", + "serde", + "serde_derive", + "serde_json", + "termcolor", + "textwrap", + "walkdir", +] + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "serde" +version = "1.0.210" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.210" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.128" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "syn" +version = "2.0.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "textwrap" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/Cargo.toml b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/Cargo.toml new file mode 100644 index 000000000..b826faa36 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/Cargo.toml @@ -0,0 +1,188 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +rust-version = "1.72" +name = "ripgrep" +version = "14.1.1" +authors = ["Andrew Gallant "] +build = "build.rs" +exclude = [ + "HomebrewFormula", + "/.github/", + "/ci/", + "/pkg/brew", + "/benchsuite/", + "/scripts/", +] +autobins = false +autoexamples = false +autotests = false +autobenches = false +description = """ +ripgrep is a line-oriented search tool that recursively searches the current +directory for a regex pattern while respecting gitignore rules. ripgrep has +first class support on Windows, macOS and Linux. +""" +homepage = "https://github.com/BurntSushi/ripgrep" +documentation = "https://github.com/BurntSushi/ripgrep" +readme = "README.md" +keywords = [ + "regex", + "grep", + "egrep", + "search", + "pattern", +] +categories = [ + "command-line-utilities", + "text-processing", +] +license = "Unlicense OR MIT" +repository = "https://github.com/BurntSushi/ripgrep" + +[package.metadata.deb] +assets = [ + [ + "target/release/rg", + "usr/bin/", + "755", +], + [ + "COPYING", + "usr/share/doc/ripgrep/", + "644", +], + [ + "LICENSE-MIT", + "usr/share/doc/ripgrep/", + "644", +], + [ + "UNLICENSE", + "usr/share/doc/ripgrep/", + "644", +], + [ + "CHANGELOG.md", + "usr/share/doc/ripgrep/CHANGELOG", + "644", +], + [ + "README.md", + "usr/share/doc/ripgrep/README", + "644", +], + [ + "FAQ.md", + "usr/share/doc/ripgrep/FAQ", + "644", +], + [ + "deployment/deb/rg.1", + "usr/share/man/man1/rg.1", + "644", +], + [ + "deployment/deb/rg.bash", + "usr/share/bash-completion/completions/rg", + "644", +], + [ + "deployment/deb/rg.fish", + "usr/share/fish/vendor_completions.d/rg.fish", + "644", +], + [ + "deployment/deb/_rg", + "usr/share/zsh/vendor-completions/", + "644", +], +] +extended-description = """ +ripgrep (rg) recursively searches your current directory for a regex pattern. +By default, ripgrep will respect your .gitignore and automatically skip hidden +files/directories and binary files. +""" +features = ["pcre2"] +section = "utils" + +[profile.deb] +debug = 0 +inherits = "release" + +[profile.release] +debug = 1 + +[profile.release-lto] +opt-level = 3 +lto = "fat" +codegen-units = 1 +debug = 0 +debug-assertions = false +panic = "abort" +overflow-checks = false +incremental = false +inherits = "release" +strip = "symbols" + +[[bin]] +name = "rg" +path = "crates/core/main.rs" +bench = false + +[[test]] +name = "integration" +path = "tests/tests.rs" + +[dependencies.anyhow] +version = "1.0.75" + +[dependencies.bstr] +version = "1.7.0" + +[dependencies.grep] +version = "0.3.2" + +[dependencies.ignore] +version = "0.4.23" + +[dependencies.lexopt] +version = "0.3.0" + +[dependencies.log] +version = "0.4.5" + +[dependencies.serde_json] +version = "1.0.23" + +[dependencies.termcolor] +version = "1.1.0" + +[dependencies.textwrap] +version = "0.16.0" +default-features = false + +[dev-dependencies.serde] +version = "1.0.77" + +[dev-dependencies.serde_derive] +version = "1.0.77" + +[dev-dependencies.walkdir] +version = "2" + +[features] +pcre2 = ["grep/pcre2"] + +[target.'cfg(all(target_env = "musl", target_pointer_width = "64"))'.dependencies.jemallocator] +version = "0.5.0" diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/Cargo.toml.orig b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/Cargo.toml.orig new file mode 100644 index 000000000..2ec69c92a --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/Cargo.toml.orig @@ -0,0 +1,120 @@ +[package] +name = "ripgrep" +version = "14.1.1" #:version +authors = ["Andrew Gallant "] +description = """ +ripgrep is a line-oriented search tool that recursively searches the current +directory for a regex pattern while respecting gitignore rules. ripgrep has +first class support on Windows, macOS and Linux. +""" +documentation = "https://github.com/BurntSushi/ripgrep" +homepage = "https://github.com/BurntSushi/ripgrep" +repository = "https://github.com/BurntSushi/ripgrep" +keywords = ["regex", "grep", "egrep", "search", "pattern"] +categories = ["command-line-utilities", "text-processing"] +license = "Unlicense OR MIT" +exclude = [ + "HomebrewFormula", + "/.github/", + "/ci/", + "/pkg/brew", + "/benchsuite/", + "/scripts/", +] +build = "build.rs" +autotests = false +edition = "2021" +rust-version = "1.72" + +[[bin]] +bench = false +path = "crates/core/main.rs" +name = "rg" + +[[test]] +name = "integration" +path = "tests/tests.rs" + +[workspace] +members = [ + "crates/globset", + "crates/grep", + "crates/cli", + "crates/matcher", + "crates/pcre2", + "crates/printer", + "crates/regex", + "crates/searcher", + "crates/ignore", +] + +[dependencies] +anyhow = "1.0.75" +bstr = "1.7.0" +grep = { version = "0.3.2", path = "crates/grep" } +ignore = { version = "0.4.23", path = "crates/ignore" } +lexopt = "0.3.0" +log = "0.4.5" +serde_json = "1.0.23" +termcolor = "1.1.0" +textwrap = { version = "0.16.0", default-features = false } + +[target.'cfg(all(target_env = "musl", target_pointer_width = "64"))'.dependencies.jemallocator] +version = "0.5.0" + +[dev-dependencies] +serde = "1.0.77" +serde_derive = "1.0.77" +walkdir = "2" + +[features] +pcre2 = ["grep/pcre2"] + +[profile.release] +debug = 1 + +[profile.release-lto] +inherits = "release" +opt-level = 3 +debug = "none" +strip = "symbols" +debug-assertions = false +overflow-checks = false +lto = "fat" +panic = "abort" +incremental = false +codegen-units = 1 + +# This is the main way to strip binaries in the deb package created by +# 'cargo deb'. For other release binaries, we (currently) call 'strip' +# explicitly in the release process. +[profile.deb] +inherits = "release" +debug = false + +[package.metadata.deb] +features = ["pcre2"] +section = "utils" +assets = [ + ["target/release/rg", "usr/bin/", "755"], + ["COPYING", "usr/share/doc/ripgrep/", "644"], + ["LICENSE-MIT", "usr/share/doc/ripgrep/", "644"], + ["UNLICENSE", "usr/share/doc/ripgrep/", "644"], + ["CHANGELOG.md", "usr/share/doc/ripgrep/CHANGELOG", "644"], + ["README.md", "usr/share/doc/ripgrep/README", "644"], + ["FAQ.md", "usr/share/doc/ripgrep/FAQ", "644"], + # The man page is automatically generated by ripgrep's build process, so + # this file isn't actually committed. Instead, to create a dpkg, either + # create a deployment/deb directory and copy the man page to it, or use the + # 'ci/build-deb' script. + ["deployment/deb/rg.1", "usr/share/man/man1/rg.1", "644"], + # Similarly for shell completions. + ["deployment/deb/rg.bash", "usr/share/bash-completion/completions/rg", "644"], + ["deployment/deb/rg.fish", "usr/share/fish/vendor_completions.d/rg.fish", "644"], + ["deployment/deb/_rg", "usr/share/zsh/vendor-completions/", "644"], +] +extended-description = """\ +ripgrep (rg) recursively searches your current directory for a regex pattern. +By default, ripgrep will respect your .gitignore and automatically skip hidden +files/directories and binary files. +""" diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/FAQ.md b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/FAQ.md new file mode 100644 index 000000000..84d01f2df --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/FAQ.md @@ -0,0 +1,1046 @@ +## FAQ + +* [Does ripgrep support configuration files?](#config) +* [What's changed in ripgrep recently?](#changelog) +* [When is the next release?](#release) +* [Does ripgrep have a man page?](#manpage) +* [Does ripgrep have support for shell auto-completion?](#complete) +* [How can I get results in a consistent order?](#order) +* [How do I search files that aren't UTF-8?](#encoding) +* [How do I search compressed files?](#compressed) +* [How do I search over multiple lines?](#multiline) +* [How do I use lookaround and/or backreferences?](#fancy) +* [How do I configure ripgrep's colors?](#colors) +* [How do I enable true colors on Windows?](#truecolors-windows) +* [How do I stop ripgrep from messing up colors when I kill it?](#stop-ripgrep) +* [Why does using a leading `/` on Windows fail?](#because-cygwin) +* [How do I get around the regex size limit?](#size-limit) +* [How do I make the `-f/--file` flag faster?](#dfa-size) +* [How do I make the output look like The Silver Searcher's output?](#silver-searcher-output) +* [Why does ripgrep get slower when I enabled PCRE2 regexes?](#pcre2-slow) +* [When I run `rg`, why does it execute some other command?](#rg-other-cmd) +* [How do I create an alias for ripgrep on Windows?](#rg-alias-windows) +* [How do I create a PowerShell profile?](#powershell-profile) +* [How do I pipe non-ASCII content to ripgrep on Windows?](#pipe-non-ascii-windows) +* [How can I search and replace with ripgrep?](#search-and-replace) +* [How is ripgrep licensed?](#license) +* [Can ripgrep replace grep?](#posix4ever) +* [What does the "rip" in ripgrep mean?](#intentcountsforsomething) +* [How can I donate to ripgrep or its maintainers?](#donations) + + +

+Does ripgrep support configuration files? +

+ +Yes. See the +[guide's section on configuration files](GUIDE.md#configuration-file). + + +

+What's changed in ripgrep recently? +

+ +Please consult ripgrep's [CHANGELOG](CHANGELOG.md). + + +

+When is the next release? +

+ +ripgrep is a project whose contributors are volunteers. A release schedule +adds undue stress to said volunteers. Therefore, releases are made on a best +effort basis and no dates **will ever be given**. + +An exception to this _can be_ high impact bugs. If a ripgrep release contains +a significant regression, then there will generally be a strong push to get a +patch release out with a fix. However, no promises are made. + + +

+Does ripgrep have a man page? +

+ +Yes. If you installed ripgrep through a package manager on a Unix system, then +it would have ideally been installed for you in the proper location. In which +case, `man rg` should just work. + +Otherwise, you can ask ripgrep to generate the man page: + +``` +$ mkdir -p man/man1 +$ rg --generate man > man/man1/rg.1 +$ MANPATH="$PWD/man" man rg +``` + +Or, if your version of `man` supports the `-l/--local-file` flag, then this +will suffice: + +``` +$ rg --generate man | man -l - +``` + +Note that the man page's documentation for options is equivalent to the output +shown in `rg --help`. To see more condensed documentation (one line per flag), +run `rg -h`. + +The man page is also included in all +[ripgrep binary releases](https://github.com/BurntSushi/ripgrep/releases). + + +

+Does ripgrep have support for shell auto-completion? +

+ +Yes! If you installed ripgrep through a package manager on a Unix system, then +the shell completion files included in the release archive should have been +installed for you automatically. If not, you can generate completes using +ripgrep's command line interface. + +For **bash**: + +``` +$ dir="$XDG_CONFIG_HOME/bash_completion" +$ mkdir -p "$dir" +$ rg --generate complete-bash > "$dir/rg.bash" +``` + +For **fish**: + +``` +$ dir="$XDG_CONFIG_HOME/fish/completions" +$ mkdir -p "$dir" +$ rg --generate complete-fish > "$dir/rg.fish" +``` + +For **zsh**: + +``` +$ dir="$HOME/.zsh-complete" +$ mkdir -p "$dir" +$ rg --generate complete-zsh > "$dir/_rg" +``` + +For **PowerShell**, create the completions: + +``` +$ rg --generate complete-powershell > _rg.ps1 +``` + +And then add `. _rg.ps1` to your PowerShell +[profile](https://technet.microsoft.com/en-us/library/bb613488(v=vs.85).aspx) +(note the leading period). If the `_rg.ps1` file is not on your `PATH`, do +`. /path/to/_rg.ps1` instead. + + +

+How can I get results in a consistent order? +

+ +By default, ripgrep uses parallelism to execute its search because this makes +the search much faster on most modern systems. This in turn means that ripgrep +has a non-deterministic aspect to it, since the interleaving of threads during +the execution of the program is itself non-deterministic. This has the effect +of printing results in a somewhat arbitrary order, and this order can change +from run to run of ripgrep. + +The only way to make the order of results consistent is to ask ripgrep to +sort the output. Currently, this will disable all parallelism. (On smaller +repositories, you might not notice much of a performance difference!) You +can achieve this with the `--sort path` flag. + +There is more discussion on this topic here: +https://github.com/BurntSushi/ripgrep/issues/152 + + +

+How do I search files that aren't UTF-8? +

+ +See the [guide's section on file encoding](GUIDE.md#file-encoding). + + +

+How do I search compressed files? +

+ +ripgrep's `-z/--search-zip` flag will cause it to search compressed files +automatically. Currently, this supports gzip, bzip2, xz, lzma, lz4, Brotli and +Zstd. Each of these requires the corresponding `gzip`, `bzip2`, `xz`, +`lz4`, `brotli` and `zstd` binaries to be installed on your system. (That is, +ripgrep does decompression by shelling out to another process.) + +ripgrep currently does not search archive formats, so `*.tar.gz` files, for +example, are skipped. + + +

+How do I search over multiple lines? +

+ +The `-U/--multiline` flag enables ripgrep to report results that span over +multiple lines. + + +

+How do I use lookaround and/or backreferences? +

+ +ripgrep's default regex engine does not support lookaround or backreferences. +This is primarily because the default regex engine is implemented using finite +state machines in order to guarantee a linear worst case time complexity on all +inputs. Backreferences are not possible to implement in this paradigm, and +lookaround appears difficult to do efficiently. + +However, ripgrep optionally supports using PCRE2 as the regex engine instead of +the default one based on finite state machines. You can enable PCRE2 with the +`-P/--pcre2` flag. For example, in the root of the ripgrep repo, you can easily +find all palindromes: + +``` +$ rg -P '(\w{10})\1' +tests/misc.rs +483: cmd.arg("--max-filesize").arg("44444444444444444444"); +globset/src/glob.rs +1206: matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); +``` + +If your version of ripgrep doesn't support PCRE2, then you'll get an error +message when you try to use the `-P/--pcre2` flag: + +``` +$ rg -P '(\w{10})\1' +PCRE2 is not available in this build of ripgrep +``` + +Most of the releases distributed by the ripgrep project here on GitHub will +come bundled with PCRE2 enabled. If you installed ripgrep through a different +means (like your system's package manager), then please reach out to the +maintainer of that package to see whether it's possible to enable the PCRE2 +feature. + + +

+How do I configure ripgrep's colors? +

+ +ripgrep has two flags related to colors: + +* `--color` controls *when* to use colors. +* `--colors` controls *which* colors to use. + +The `--color` flag accepts one of the following possible values: `never`, +`auto`, `always` or `ansi`. The `auto` value is the default and will cause +ripgrep to only enable colors when it is printing to a terminal. But if you +pipe ripgrep to a file or some other process, then it will suppress colors. + +The `--colors` flag is a bit more complicated. The general format is: + +``` +--colors '{type}:{attribute}:{value}' +``` + +* `{type}` should be one of `path`, `line`, `column` or `match`. Each of these + correspond to the four different types of things that ripgrep will add color + to in its output. Select the type whose color you want to change. +* `{attribute}` should be one of `fg`, `bg` or `style`, corresponding to + foreground color, background color, or miscellaneous styling (such as whether + to bold the output or not). +* `{value}` is determined by the value of `{attribute}`. If + `{attribute}` is `style`, then `{value}` should be one of `nobold`, + `bold`, `nointense`, `intense`, `nounderline` or `underline`. If + `{attribute}` is `fg` or `bg`, then `{value}` should be a color. + +A color is specified by either one of eight of English names, a single 256-bit +number or an RGB triple (with over 16 million possible values, or "true +color"). + +The color names are `red`, `blue`, `green`, `cyan`, `magenta`, `yellow`, +`white` or `black`. + +A single 256-bit number is a value in the range 0-255 (inclusive). It can +either be in decimal format (e.g., `62`) or hexadecimal format (e.g., `0x3E`). + +An RGB triple corresponds to three numbers (decimal or hexadecimal) separated +by commas. + +As a special case, `--colors '{type}:none'` will clear all colors and styles +associated with `{type}`, which lets you start with a clean slate (instead of +building on top of ripgrep's default color settings). + +Here's an example that makes highlights the matches with a nice blue background +with bolded white text: + +``` +$ rg somepattern \ + --colors 'match:none' \ + --colors 'match:bg:0x33,0x66,0xFF' \ + --colors 'match:fg:white' \ + --colors 'match:style:bold' +``` + +Colors are an ideal candidate to set in your +[configuration file](GUIDE.md#configuration-file). See the +[question on emulating The Silver Searcher's output style](#silver-searcher-output) +for an example specific to colors. + + +

+How do I enable true colors on Windows? +

+ +First, see the previous question's +[answer on configuring colors](#colors). + +Secondly, coloring on Windows is a bit complicated. If you're using a terminal +like Cygwin, then it's likely true color support already works out of the box. +However, if you are using a normal Windows console (`cmd` or `PowerShell`) and +a version of Windows prior to 10, then there is no known way to get true +color support. If you are on Windows 10 and using a Windows console, then +true colors should work out of the box with one caveat: you might need to +clear ripgrep's default color settings first. That is, instead of this: + +``` +$ rg somepattern --colors 'match:fg:0x33,0x66,0xFF' +``` + +you should do this + +``` +$ rg somepattern --colors 'match:none' --colors 'match:fg:0x33,0x66,0xFF' +``` + +This is because ripgrep might set the default style for `match` to `bold`, and +it seems like Windows 10's VT100 support doesn't permit bold and true color +ANSI escapes to be used simultaneously. The work-around above will clear +ripgrep's default styling, allowing you to craft it exactly as desired. + + +

+How do I stop ripgrep from messing up colors when I kill it? +

+ +Type in `color` in cmd.exe (Command Prompt) and `echo -ne "\033[0m"` on +Unix-like systems to restore your original foreground color. + +In PowerShell, you can add the following code to your profile which will +restore the original foreground color when `Reset-ForegroundColor` is called. +Including the `Set-Alias` line will allow you to call it with simply `color`. + +```powershell +$OrigFgColor = $Host.UI.RawUI.ForegroundColor +function Reset-ForegroundColor { + $Host.UI.RawUI.ForegroundColor = $OrigFgColor +} +Set-Alias -Name color -Value Reset-ForegroundColor +``` + +PR [#187](https://github.com/BurntSushi/ripgrep/pull/187) fixed this, and it +was later deprecated in +[#281](https://github.com/BurntSushi/ripgrep/issues/281). A full explanation is +available +[here](https://github.com/BurntSushi/ripgrep/issues/281#issuecomment-269093893). + + +

+Why does using a leading `/` on Windows fail? +

+ +If you're using cygwin on Windows and try to search for a pattern beginning +with a `/`, then it's possible that cygwin is mangling that pattern without +your knowledge. For example, if you tried running `rg /foo` in a cygwin shell +on Windows, then cygwin might mistakenly perform path translation on `/foo`, +which would result in `rg C:/msys64/foo` being searched instead. + +You can fix this in one of three ways: + +1. Stop using cygwin. +2. Escape the leading slash with an additional slash. e.g., `rg //foo`. +3. Temporarily disable path translation by setting `MSYS_NO_PATHCONV=1`. e.g., + `MSYS_NO_PATHCONV=1 rg /foo`. + +For more details, see https://github.com/BurntSushi/ripgrep/issues/1277 + + +

+How do I get around the regex size limit? +

+ +If you've given ripgrep a particularly large pattern (or a large number of +smaller patterns), then it is possible that it will fail to compile because it +hit a pre-set limit. For example: + +``` +$ rg '\pL{1000}' +Compiled regex exceeds size limit of 10485760 bytes. +``` + +(Note: `\pL{1000}` may look small, but `\pL` is the character class containing +all Unicode letters, which is quite large. *And* it's repeated 1000 times.) + +In this case, you can work around by simply increasing the limit: + +``` +$ rg '\pL{1000}' --regex-size-limit 1G +``` + +Increasing the limit to 1GB does not necessarily mean that ripgrep will use +that much memory. The limit just says that it's allowed to (approximately) use +that much memory for constructing the regular expression. + + +

+How do I make the -f/--file flag faster? +

+ +The `-f/--file` permits one to give a file to ripgrep which contains a pattern +on each line. ripgrep will then report any line that matches any of the +patterns. + +If this pattern file gets too big, then it is possible ripgrep will slow down +dramatically. *Typically* this is because an internal cache is too small, and +will cause ripgrep to spill over to a slower but more robust regular expression +engine. If this is indeed the problem, then it is possible to increase this +cache and regain speed. The cache can be controlled via the `--dfa-size-limit` +flag. For example, using `--dfa-size-limit 1G` will set the cache size to 1GB. +(Note that this doesn't mean ripgrep will use 1GB of memory automatically, but +it will allow the regex engine to if it needs to.) + + +

+How do I make the output look like The Silver Searcher's output? +

+ +Use the `--colors` flag, like so: + +``` +rg --colors line:fg:yellow \ + --colors line:style:bold \ + --colors path:fg:green \ + --colors path:style:bold \ + --colors match:fg:black \ + --colors match:bg:yellow \ + --colors match:style:nobold \ + foo +``` + +Alternatively, add your color configuration to your ripgrep config file (which +is activated by setting the `RIPGREP_CONFIG_PATH` environment variable to point +to your config file). For example: + +``` +$ cat $HOME/.config/ripgrep/rc +--colors=line:fg:yellow +--colors=line:style:bold +--colors=path:fg:green +--colors=path:style:bold +--colors=match:fg:black +--colors=match:bg:yellow +--colors=match:style:nobold +$ RIPGREP_CONFIG_PATH=$HOME/.config/ripgrep/rc rg foo +``` + + +

+Why does ripgrep get slower when I enable PCRE2 regexes? +

+ +When you use the `--pcre2` (`-P` for short) flag, ripgrep will use the PCRE2 +regex engine instead of the default. Both regex engines are quite fast, +but PCRE2 provides a number of additional features such as look-around and +backreferences that many enjoy using. This is largely because PCRE2 uses +a backtracking implementation where as the default regex engine uses a finite +automaton based implementation. The former provides the ability to add lots of +bells and whistles over the latter, but the latter executes with worst case +linear time complexity. + +With that out of the way, if you've used `-P` with ripgrep, you may have +noticed that it can be slower. The reasons for why this is are quite complex, +and they are complex because the optimizations that ripgrep uses to implement +fast search are complex. + +The task ripgrep has before it is somewhat simple; all it needs to do is search +a file for occurrences of some pattern and then print the lines containing +those occurrences. The problem lies in what is considered a valid match and how +exactly we read the bytes from a file. + +In terms of what is considered a valid match, remember that ripgrep will only +report matches spanning a single line by default. The problem here is that +some patterns can match across multiple lines, and ripgrep needs to prevent +that from happening. For example, `foo\sbar` will match `foo\nbar`. The most +obvious way to achieve this is to read the data from a file, and then apply +the pattern search to that data for each line. The problem with this approach +is that it can be quite slow; it would be much faster to let the pattern +search across as much data as possible. It's faster because it gets rid of the +overhead of finding the boundaries of every line, and also because it gets rid +of the overhead of starting and stopping the pattern search for every single +line. (This is operating under the general assumption that matching lines are +much rarer than non-matching lines.) + +It turns out that we can use the faster approach by applying a very simple +restriction to the pattern: *statically prevent* the pattern from matching +through a `\n` character. Namely, when given a pattern like `foo\sbar`, +ripgrep will remove `\n` from the `\s` character class automatically. In some +cases, a simple removal is not so easy. For example, ripgrep will return an +error when your pattern includes a `\n` literal: + +``` +$ rg '\n' +the literal '"\n"' is not allowed in a regex +``` + +So what does this have to do with PCRE2? Well, ripgrep's default regex engine +exposes APIs for doing syntactic analysis on the pattern in a way that makes +it quite easy to strip `\n` from the pattern (or otherwise detect it and report +an error if stripping isn't possible). PCRE2 seemingly does not provide a +similar API, so ripgrep does not do any stripping when PCRE2 is enabled. This +forces ripgrep to use the "slow" search strategy of searching each line +individually. + +OK, so if enabling PCRE2 slows down the default method of searching because it +forces matches to be limited to a single line, then why is PCRE2 also sometimes +slower when performing multiline searches? Well, that's because there are +*multiple* reasons why using PCRE2 in ripgrep can be slower than the default +regex engine. This time, blame PCRE2's Unicode support, which ripgrep enables +by default. In particular, PCRE2 cannot simultaneously enable Unicode support +and search arbitrary data. That is, when PCRE2's Unicode support is enabled, +the data **must** be valid UTF-8 (to do otherwise is to invoke undefined +behavior). This is in contrast to ripgrep's default regex engine, which can +enable Unicode support and still search arbitrary data. ripgrep's default +regex engine simply won't match invalid UTF-8 for a pattern that can otherwise +only match valid UTF-8. Why doesn't PCRE2 do the same? This author isn't +familiar with its internals, so we can't comment on it here. + +The bottom line here is that we can't enable PCRE2's Unicode support without +simultaneously incurring a performance penalty for ensuring that we are +searching valid UTF-8. In particular, ripgrep will transcode the contents +of each file to UTF-8 while replacing invalid UTF-8 data with the Unicode +replacement codepoint. ripgrep then disables PCRE2's own internal UTF-8 +checking, since we've guaranteed the data we hand it will be valid UTF-8. The +reason why ripgrep takes this approach is because if we do hand PCRE2 invalid +UTF-8, then it will report a match error if it comes across an invalid UTF-8 +sequence. This is not good news for ripgrep, since it will stop it from +searching the rest of the file, and will also print potentially undesirable +error messages to users. + +All right, the above is a lot of information to swallow if you aren't already +familiar with ripgrep internals. Let's make this concrete with some examples. +First, let's get some data big enough to magnify the performance differences: + +``` +$ curl -O 'https://burntsushi.net/stuff/subtitles2016-sample.gz' +$ gzip -d subtitles2016-sample +$ md5sum subtitles2016-sample +e3cb796a20bbc602fbfd6bb43bda45f5 subtitles2016-sample +``` + +To search this data, we will use the pattern `^\w{42}$`, which contains exactly +one hit in the file and has no literals. Having no literals is important, +because it ensures that the regex engine won't use literal optimizations to +speed up the search. In other words, it lets us reason coherently about the +actual task that the regex engine is performing. + +Let's now walk through a few examples in light of the information above. First, +let's consider the default search using ripgrep's default regex engine and +then the same search with PCRE2: + +``` +$ time rg '^\w{42}$' subtitles2016-sample +21225780:EverymajordevelopmentinthehistoryofAmerica + +real 0m1.783s +user 0m1.731s +sys 0m0.051s + +$ time rg -P '^\w{42}$' subtitles2016-sample +21225780:EverymajordevelopmentinthehistoryofAmerica + +real 0m2.458s +user 0m2.419s +sys 0m0.038s +``` + +In this particular example, both pattern searches are using a Unicode aware +`\w` character class and both are counting lines in order to report line +numbers. The key difference here is that the first search will not search +line by line, but the second one will. We can observe which strategy ripgrep +uses by passing the `--trace` flag: + +``` +$ rg '^\w{42}$' subtitles2016-sample --trace +[... snip ...] +TRACE|grep_searcher::searcher|grep-searcher/src/searcher/mod.rs:622: Some("subtitles2016-sample"): searching via memory map +TRACE|grep_searcher::searcher|grep-searcher/src/searcher/mod.rs:712: slice reader: searching via slice-by-line strategy +TRACE|grep_searcher::searcher::core|grep-searcher/src/searcher/core.rs:61: searcher core: will use fast line searcher +[... snip ...] + +$ rg -P '^\w{42}$' subtitles2016-sample --trace +[... snip ...] +TRACE|grep_searcher::searcher|grep-searcher/src/searcher/mod.rs:622: Some("subtitles2016-sample"): searching via memory map +TRACE|grep_searcher::searcher|grep-searcher/src/searcher/mod.rs:705: slice reader: needs transcoding, using generic reader +TRACE|grep_searcher::searcher|grep-searcher/src/searcher/mod.rs:685: generic reader: searching via roll buffer strategy +TRACE|grep_searcher::searcher::core|grep-searcher/src/searcher/core.rs:63: searcher core: will use slow line searcher +[... snip ...] +``` + +The first says it is using the "fast line searcher" where as the latter says +it is using the "slow line searcher." The latter also shows that we are +decoding the contents of the file, which also impacts performance. + +Interestingly, in this case, the pattern does not match a `\n` and the file +we're searching is valid UTF-8, so neither the slow line-by-line search +strategy nor the decoding are necessary. We could fix the former issue with +better PCRE2 introspection APIs. We can actually fix the latter issue with +ripgrep's `--no-encoding` flag, which prevents the automatic UTF-8 decoding, +but will enable PCRE2's own UTF-8 validity checking. Unfortunately, it's slower +in my build of ripgrep: + +``` +$ time rg -P '^\w{42}$' subtitles2016-sample --no-encoding +21225780:EverymajordevelopmentinthehistoryofAmerica + +real 0m3.074s +user 0m3.021s +sys 0m0.051s +``` + +(Tip: use the `--trace` flag to verify that no decoding in ripgrep is +happening.) + +A possible reason why PCRE2's UTF-8 checking is slower is because it might +not be better than the highly optimized UTF-8 checking routines found in the +[`encoding_rs`](https://github.com/hsivonen/encoding_rs) library, which is what +ripgrep uses for UTF-8 decoding. Moreover, my build of ripgrep enables +`encoding_rs`'s SIMD optimizations, which may be in play here. + +Also, note that using the `--no-encoding` flag can cause PCRE2 to report +invalid UTF-8 errors, which causes ripgrep to stop searching the file: + +``` +$ cat invalid-utf8 +foobar + +$ xxd invalid-utf8 +00000000: 666f 6fff 6261 720a foo.bar. + +$ rg foo invalid-utf8 +1:foobar + +$ rg -P foo invalid-utf8 +1:foo�bar + +$ rg -P foo invalid-utf8 --no-encoding +invalid-utf8: PCRE2: error matching: UTF-8 error: illegal byte (0xfe or 0xff) +``` + +All right, so at this point, you might think that we could remove the penalty +for line-by-line searching by enabling multiline search. After all, our +particular pattern can't match across multiple lines anyway, so we'll still get +the results we want. Let's try it: + +``` +$ time rg -U '^\w{42}$' subtitles2016-sample +21225780:EverymajordevelopmentinthehistoryofAmerica + +real 0m1.803s +user 0m1.748s +sys 0m0.054s + +$ time rg -P -U '^\w{42}$' subtitles2016-sample +21225780:EverymajordevelopmentinthehistoryofAmerica + +real 0m2.962s +user 0m2.246s +sys 0m0.713s +``` + +Search times remain the same with the default regex engine, but the PCRE2 +search gets _slower_. What happened? The secrets can be revealed with the +`--trace` flag once again. In the former case, ripgrep actually detects that +the pattern can't match across multiple lines, and so will fall back to the +"fast line search" strategy as with our search without `-U`. + +However, for PCRE2, things are much worse. Namely, since Unicode mode is still +enabled, ripgrep is still going to decode UTF-8 to ensure that it hands only +valid UTF-8 to PCRE2. Unfortunately, one key downside of multiline search is +that ripgrep cannot do it incrementally. Since matches can be arbitrarily long, +ripgrep actually needs the entire file in memory at once. Normally, we can use +a memory map for this, but because we need to UTF-8 decode the file before +searching it, ripgrep winds up reading the entire contents of the file on to +the heap before executing a search. Owch. + +OK, so Unicode is killing us here. The file we're searching is _mostly_ ASCII, +so maybe we're OK with missing some data. (Try `rg '[\w--\p{ascii}]'` to see +non-ASCII word characters that an ASCII-only `\w` character class would miss.) +We can disable Unicode in both searches, but this is done differently depending +on the regex engine we use: + +``` +$ time rg '(?-u)^\w{42}$' subtitles2016-sample +21225780:EverymajordevelopmentinthehistoryofAmerica + +real 0m1.714s +user 0m1.669s +sys 0m0.044s + +$ time rg -P '^\w{42}$' subtitles2016-sample --no-pcre2-unicode +21225780:EverymajordevelopmentinthehistoryofAmerica + +real 0m1.997s +user 0m1.958s +sys 0m0.037s +``` + +For the most part, ripgrep's default regex engine performs about the same. +PCRE2 does improve a little bit, and is now almost as fast as the default +regex engine. If you look at the output of `--trace`, you'll see that ripgrep +will no longer perform UTF-8 decoding, but it does still use the slow +line-by-line searcher. + +At this point, we can combine all of our insights above: let's try to get off +of the slow line-by-line searcher by enabling multiline mode, and let's stop +UTF-8 decoding by disabling Unicode support: + +``` +$ time rg -U '(?-u)^\w{42}$' subtitles2016-sample +21225780:EverymajordevelopmentinthehistoryofAmerica + +real 0m1.714s +user 0m1.655s +sys 0m0.058s + +$ time rg -P -U '^\w{42}$' subtitles2016-sample --no-pcre2-unicode +21225780:EverymajordevelopmentinthehistoryofAmerica + +real 0m1.121s +user 0m1.071s +sys 0m0.048s +``` + +Ah, there's PCRE2's JIT shining! ripgrep's default regex engine once again +remains about the same, but PCRE2 no longer needs to search line-by-line and it +no longer needs to do any kind of UTF-8 checks. This allows the file to get +memory mapped and passed right through PCRE2's JIT at impressive speeds. (As +a brief and interesting historical note, the configuration of "memory map + +multiline + no-Unicode" is exactly the configuration used by The Silver +Searcher. This analysis perhaps sheds some reasoning as to why that +configuration is useful!) + +In summary, if you want PCRE2 to go as fast as possible and you don't care +about Unicode and you don't care about matches possibly spanning across +multiple lines, then enable multiline mode with `-U` and disable PCRE2's +Unicode support with the `--no-pcre2-unicode` flag. + +Caveat emptor: This author is not a PCRE2 expert, so there may be APIs that can +improve performance that the author missed. Similarly, there may be alternative +designs for a searching tool that are more amenable to how PCRE2 works. + + +

+When I run rg, why does it execute some other command? +

+ +It's likely that you have a shell alias or even another tool called `rg` which +is interfering with ripgrep. Run `which rg` to see what it is. + +(Notably, the Rails plug-in for +[Oh My Zsh](https://github.com/robbyrussell/oh-my-zsh/wiki/Plugins#rails) sets +up an `rg` alias for `rails generate`.) + +Problems like this can be resolved in one of several ways: + +* If you're using the OMZ Rails plug-in, disable it by editing the `plugins` + array in your zsh configuration. +* Temporarily bypass an existing `rg` alias by calling ripgrep as + `command rg`, `\rg`, or `'rg'`. +* Temporarily bypass an existing alias or another tool named `rg` by calling + ripgrep by its full path (e.g., `/usr/bin/rg` or `/usr/local/bin/rg`). +* Permanently disable an existing `rg` alias by adding `unalias rg` to the + bottom of your shell configuration file (e.g., `.bash_profile` or `.zshrc`). +* Give ripgrep its own alias that doesn't conflict with other tools/aliases by + adding a line like the following to the bottom of your shell configuration + file: `alias ripgrep='command rg'`. + + +

+How do I create an alias for ripgrep on Windows? +

+ +Often you can find a need to make alias for commands you use a lot that set +certain flags. But PowerShell function aliases do not behave like your typical +linux shell alias. You always need to propagate arguments and `stdin` input. +But it cannot be done simply as +`function grep() { $input | rg.exe --hidden $args }` + +Use below example as reference to how setup alias in PowerShell. + +```powershell +function grep { + $count = @($input).Count + $input.Reset() + + if ($count) { + $input | rg.exe --hidden $args + } + else { + rg.exe --hidden $args + } +} +``` + +PowerShell special variables: + +* input - is powershell `stdin` object that allows you to access its content. +* args - is array of arguments passed to this function. + +This alias checks whether there is `stdin` input and propagates only if there +is some lines. Otherwise empty `$input` will make powershell to trigger `rg` to +search empty `stdin`. + + +

+How do I create a PowerShell profile? +

+ +To customize powershell on start-up, there is a special PowerShell script that +has to be created. In order to find its location, type `$profile`. +See +[Microsoft's documentation](https://technet.microsoft.com/en-us/library/bb613488(v=vs.85).aspx) +for more details. + +Any PowerShell code in this file gets evaluated at the start of console. This +way you can have own aliases to be created at start. + + +

+How do I pipe non-ASCII content to ripgrep on Windows? +

+ +When piping input into native executables in PowerShell, the encoding of the +input is controlled by the `$OutputEncoding` variable. By default, this is set +to US-ASCII, and any characters in the pipeline that don't have encodings in +US-ASCII are converted to `?` (question mark) characters. + +To change this setting, set `$OutputEncoding` to a different encoding, as +represented by a .NET encoding object. Some common examples are below. The +value of this variable is reset when PowerShell restarts, so to make this +change take effect every time PowerShell is started add a line setting the +variable into your PowerShell profile. + +Example `$OutputEncoding` settings: + +* UTF-8 without BOM: `$OutputEncoding = [System.Text.UTF8Encoding]::new()` +* The console's output encoding: + `$OutputEncoding = [System.Console]::OutputEncoding` + +If you continue to have encoding problems, you can also force the encoding +that the console will use for printing to UTF-8 with +`[System.Console]::OutputEncoding = [System.Text.Encoding]::UTF8`. This +will also reset when PowerShell is restarted, so you can add that line +to your profile as well if you want to make the setting permanent. + +

+How can I search and replace with ripgrep? +

+ +Using ripgrep alone, you can't. ripgrep is a search tool that will never +touch your files. However, the output of ripgrep can be piped to other tools +that do modify files on disk. See +[this issue](https://github.com/BurntSushi/ripgrep/issues/74) for more +information. + +sed is one such tool that can modify files on disk. sed can take a filename +and a substitution command to search and replace in the specified file. +Files containing matching patterns can be provided to sed using + +``` +rg foo --files-with-matches +``` + +The output of this command is a list of filenames that contain a match for +the `foo` pattern. + +This list can be piped into `xargs`, which will split the filenames from +standard input into arguments for the command following xargs. You can use this +combination to pipe a list of filenames into sed for replacement. For example: + +``` +rg foo --files-with-matches | xargs sed -i 's/foo/bar/g' +``` + +will replace all instances of 'foo' with 'bar' in the files in which +ripgrep finds the foo pattern. The `-i` flag to sed indicates that you are +editing files in place, and `s/foo/bar/g` says that you are performing a +**s**ubstitution of the pattern `foo` for `bar`, and that you are doing this +substitution **g**lobally (all occurrences of the pattern in each file). + +Note: the above command assumes that you are using GNU sed. If you are using +BSD sed (the default on macOS and FreeBSD) then you must modify the above +command to be the following: + +``` +rg foo --files-with-matches | xargs sed -i '' 's/foo/bar/g' +``` + +The `-i` flag in BSD sed requires a file extension to be given to make backups +for all modified files. Specifying the empty string prevents file backups from +being made. + +Finally, if any of your file paths contain whitespace in them, then you might +need to delimit your file paths with a NUL terminator. This requires telling +ripgrep to output NUL bytes between each path, and telling xargs to read paths +delimited by NUL bytes: + +``` +rg foo --files-with-matches -0 | xargs -0 sed -i 's/foo/bar/g' +``` + +To learn more about sed, see the sed manual +[here](https://www.gnu.org/software/sed/manual/sed.html). + +Additionally, Facebook has a tool called +[fastmod](https://github.com/facebookincubator/fastmod) +that uses some of the same libraries as ripgrep and might provide a more +ergonomic search-and-replace experience. + + +

+How is ripgrep licensed? +

+ +ripgrep is dual licensed under the +[Unlicense](https://unlicense.org/) +and MIT licenses. Specifically, you may use ripgrep under the terms of either +license. + +The reason why ripgrep is dual licensed this way is two-fold: + +1. I, as ripgrep's author, would like to participate in a small bit of + ideological activism by promoting the Unlicense's goal: to disclaim + copyright monopoly interest. +2. I, as ripgrep's author, would like as many people to use ripgrep as + possible. Since the Unlicense is not a proven or well known license, ripgrep + is also offered under the MIT license, which is ubiquitous and accepted by + almost everyone. + +More specifically, ripgrep and all its dependencies are compatible with this +licensing choice. In particular, ripgrep's dependencies (direct and transitive) +will always be limited to permissive licenses. That is, ripgrep will never +depend on code that is not permissively licensed. This means rejecting any +dependency that uses a copyleft license such as the GPL, LGPL, MPL or any of +the Creative Commons ShareAlike licenses. Whether the license is "weak" +copyleft or not does not matter; ripgrep will **not** depend on it. + + +

+Can ripgrep replace grep? +

+ +Yes and no. + +If, upon hearing that "ripgrep can replace grep," you *actually* hear, "ripgrep +can be used in every instance grep can be used, in exactly the same way, for +the same use cases, with exactly the same bug-for-bug behavior," then no, +ripgrep trivially *cannot* replace grep. Moreover, ripgrep will *never* replace +grep. + +If, upon hearing that "ripgrep can replace grep," you *actually* hear, "ripgrep +can replace grep in some cases and not in other use cases," then yes, that is +indeed true! + +Let's go over some of those use cases in favor of ripgrep. Some of these may +not apply to you. That's OK. There may be other use cases not listed here that +do apply to you. That's OK too. + +(For all claims related to performance in the following words, see my +[blog post](https://blog.burntsushi.net/ripgrep/) +introducing ripgrep.) + +* Are you frequently searching a repository of code? If so, ripgrep might be a + good choice since there's likely a good chunk of your repository that you + don't want to search. grep, can, of course, be made to filter files using + recursive search, and if you don't mind writing out the requisite `--exclude` + rules or writing wrapper scripts, then grep might be sufficient. (I'm not + kidding, I myself did this with grep for almost a decade before writing + ripgrep.) But if you instead enjoy having a search tool respect your + `.gitignore`, then ripgrep might be perfect for you! +* Are you frequently searching non-ASCII text that is UTF-8 encoded? One of + ripgrep's key features is that it can handle Unicode features in your + patterns in a way that tends to be faster than GNU grep. Unicode features + in ripgrep are enabled by default; there is no need to configure your locale + settings to use ripgrep properly because ripgrep doesn't respect your locale + settings. +* Do you need to search UTF-16 files and you don't want to bother explicitly + transcoding them? Great. ripgrep does this for you automatically. No need + to enable it. +* Do you need to search a large directory of large files? ripgrep uses + parallelism by default, which tends to make it faster than a standard + `grep -r` search. However, if you're OK writing the occasional + `find ./ -print0 | xargs -P8 -0 grep` command, then maybe grep is good + enough. + +Here are some cases where you might *not* want to use ripgrep. The same caveats +for the previous section apply. + +* Are you writing portable shell scripts intended to work in a variety of + environments? Great, probably not a good idea to use ripgrep! ripgrep has + nowhere near the ubiquity of grep, so if you do use ripgrep, you might need + to futz with the installation process more than you would with grep. +* Do you care about POSIX compatibility? If so, then you can't use ripgrep + because it never was, isn't and never will be POSIX compatible. +* Do you hate tools that try to do something smart? If so, ripgrep is all about + being smart, so you might prefer to just stick with grep. +* Is there a particular feature of grep you rely on that ripgrep either doesn't + have or never will have? If the former, file a bug report, maybe ripgrep can + do it! If the latter, well, then, just use grep. + + +

+What does the "rip" in ripgrep mean? +

+ +When I first started writing ripgrep, I called it `rep`, intending it to be a +shorter variant of `grep`. Soon after, I renamed it to `xrep` since `rep` +wasn't obvious enough of a name for my taste. And also because adding `x` to +anything always makes it better, right? + +Before ripgrep's first public release, I decided that I didn't like `xrep`. I +thought it was slightly awkward to type, and despite my previous praise of the +letter `x`, I kind of thought it was pretty lame. Being someone who really +likes Rust, I wanted to call it "rustgrep" or maybe "rgrep" for short. But I +thought that was just as lame, and maybe a little too in-your-face. But I +wanted to continue using `r` so I could at least pretend Rust had something to +do with it. + +I spent a couple of days trying to think of very short words that began with +the letter `r` that were even somewhat related to the task of searching. I +don't remember how it popped into my head, but "rip" came up as something that +meant "fast," as in, "to rip through your text." The fact that RIP is also +an initialism for "Rest in Peace" (as in, "ripgrep kills grep") never really +dawned on me. Perhaps the coincidence is too striking to believe that, but +I didn't realize it until someone explicitly pointed it out to me after the +initial public release. I admit that I found it mildly amusing, but if I had +realized it myself before the public release, I probably would have pressed on +and chose a different name. Alas, renaming things after a release is hard, so I +decided to mush on. + +Given the fact that +[ripgrep never was, is or will be a 100% drop-in replacement for +grep](#posix4ever), +ripgrep is neither actually a "grep killer" nor was it ever intended to be. It +certainly does eat into some of its use cases, but that's nothing that other +tools like ack or The Silver Searcher weren't already doing. + + +

+How can I donate to ripgrep or its maintainers? +

+ +I welcome [sponsorship](https://github.com/sponsors/BurntSushi/). + +Or if you'd prefer, donating to a charitably organization that you like would +also be most welcome. My favorites are: + +* [The Internet Archive](https://archive.org/donate/) +* [Rails Girls](https://railsgirlssummerofcode.org/) +* [Wikipedia](https://wikimediafoundation.org/support/) diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/GUIDE.md b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/GUIDE.md new file mode 100644 index 000000000..6d51e460e --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/GUIDE.md @@ -0,0 +1,1022 @@ +## User Guide + +This guide is intended to give an elementary description of ripgrep and an +overview of its capabilities. This guide assumes that ripgrep is +[installed](README.md#installation) +and that readers have passing familiarity with using command line tools. This +also assumes a Unix-like system, although most commands are probably easily +translatable to any command line shell environment. + + +### Table of Contents + +* [Basics](#basics) +* [Recursive search](#recursive-search) +* [Automatic filtering](#automatic-filtering) +* [Manual filtering: globs](#manual-filtering-globs) +* [Manual filtering: file types](#manual-filtering-file-types) +* [Replacements](#replacements) +* [Configuration file](#configuration-file) +* [File encoding](#file-encoding) +* [Binary data](#binary-data) +* [Preprocessor](#preprocessor) +* [Common options](#common-options) + + +### Basics + +ripgrep is a command line tool that searches your files for patterns that +you give it. ripgrep behaves as if reading each file line by line. If a line +matches the pattern provided to ripgrep, then that line will be printed. If a +line does not match the pattern, then the line is not printed. + +The best way to see how this works is with an example. To show an example, we +need something to search. Let's try searching ripgrep's source code. First +grab a ripgrep source archive from +https://github.com/BurntSushi/ripgrep/archive/0.7.1.zip +and extract it: + +``` +$ curl -LO https://github.com/BurntSushi/ripgrep/archive/0.7.1.zip +$ unzip 0.7.1.zip +$ cd ripgrep-0.7.1 +$ ls +benchsuite grep tests Cargo.toml LICENSE-MIT +ci ignore wincolor CHANGELOG.md README.md +complete pkg appveyor.yml compile snapcraft.yaml +doc src build.rs COPYING UNLICENSE +globset termcolor Cargo.lock HomebrewFormula +``` + +Let's try our first search by looking for all occurrences of the word `fast` +in `README.md`: + +``` +$ rg fast README.md +75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement +88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while +119:### Is it really faster than everything else? +124:Summarizing, `ripgrep` is fast because: +129: optimizations to make searching very fast. +``` + +(**Note:** If you see an error message from ripgrep saying that it didn't +search any files, then re-run ripgrep with the `--debug` flag. One likely cause +of this is that you have a `*` rule in a `$HOME/.gitignore` file.) + +So what happened here? ripgrep read the contents of `README.md`, and for each +line that contained `fast`, ripgrep printed it to your terminal. ripgrep also +included the line number for each line by default. If your terminal supports +colors, then your output might actually look something like this screenshot: + +[![A screenshot of a sample search ripgrep](https://burntsushi.net/stuff/ripgrep-guide-sample.png)](https://burntsushi.net/stuff/ripgrep-guide-sample.png) + +In this example, we searched for something called a "literal" string. This +means that our pattern was just some normal text that we asked ripgrep to +find. But ripgrep supports the ability to specify patterns via [regular +expressions](https://en.wikipedia.org/wiki/Regular_expression). As an example, +what if we wanted to find all lines have a word that contains `fast` followed +by some number of other letters? + +``` +$ rg 'fast\w+' README.md +75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement +119:### Is it really faster than everything else? +``` + +In this example, we used the pattern `fast\w+`. This pattern tells ripgrep to +look for any lines containing the letters `fast` followed by *one or more* +word-like characters. Namely, `\w` matches characters that compose words (like +`a` and `L` but unlike `.` and ` `). The `+` after the `\w` means, "match the +previous pattern one or more times." This means that the word `fast` won't +match because there are no word characters following the final `t`. But a word +like `faster` will. `faste` would also match! + +Here's a different variation on this same theme: + +``` +$ rg 'fast\w*' README.md +75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement +88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while +119:### Is it really faster than everything else? +124:Summarizing, `ripgrep` is fast because: +129: optimizations to make searching very fast. +``` + +In this case, we used `fast\w*` for our pattern instead of `fast\w+`. The `*` +means that it should match *zero* or more times. In this case, ripgrep will +print the same lines as the pattern `fast`, but if your terminal supports +colors, you'll notice that `faster` will be highlighted instead of just the +`fast` prefix. + +It is beyond the scope of this guide to provide a full tutorial on regular +expressions, but ripgrep's specific syntax is documented here: +https://docs.rs/regex/*/regex/#syntax + + +### Recursive search + +In the previous section, we showed how to use ripgrep to search a single file. +In this section, we'll show how to use ripgrep to search an entire directory +of files. In fact, *recursively* searching your current working directory is +the default mode of operation for ripgrep, which means doing this is very +simple. + +Using our unzipped archive of ripgrep source code, here's how to find all +function definitions whose name is `write`: + +``` +$ rg 'fn write\(' +src/printer.rs +469: fn write(&mut self, buf: &[u8]) { + +termcolor/src/lib.rs +227: fn write(&mut self, b: &[u8]) -> io::Result { +250: fn write(&mut self, b: &[u8]) -> io::Result { +428: fn write(&mut self, b: &[u8]) -> io::Result { self.wtr.write(b) } +441: fn write(&mut self, b: &[u8]) -> io::Result { self.wtr.write(b) } +454: fn write(&mut self, buf: &[u8]) -> io::Result { +511: fn write(&mut self, buf: &[u8]) -> io::Result { +848: fn write(&mut self, buf: &[u8]) -> io::Result { +915: fn write(&mut self, buf: &[u8]) -> io::Result { +949: fn write(&mut self, buf: &[u8]) -> io::Result { +1114: fn write(&mut self, buf: &[u8]) -> io::Result { +1348: fn write(&mut self, buf: &[u8]) -> io::Result { +1353: fn write(&mut self, buf: &[u8]) -> io::Result { +``` + +(**Note:** We escape the `(` here because `(` has special significance inside +regular expressions. You could also use `rg -F 'fn write('` to achieve the +same thing, where `-F` interprets your pattern as a literal string instead of +a regular expression.) + +In this example, we didn't specify a file at all. Instead, ripgrep defaulted +to searching your current directory in the absence of a path. In general, +`rg foo` is equivalent to `rg foo ./`. + +This particular search showed us results in both the `src` and `termcolor` +directories. The `src` directory is the core ripgrep code where as `termcolor` +is a dependency of ripgrep (and is used by other tools). What if we only wanted +to search core ripgrep code? Well, that's easy, just specify the directory you +want: + +``` +$ rg 'fn write\(' src +src/printer.rs +469: fn write(&mut self, buf: &[u8]) { +``` + +Here, ripgrep limited its search to the `src` directory. Another way of doing +this search would be to `cd` into the `src` directory and simply use `rg 'fn +write\('` again. + + +### Automatic filtering + +After recursive search, ripgrep's most important feature is what it *doesn't* +search. By default, when you search a directory, ripgrep will ignore all of +the following: + +1. Files and directories that match glob patterns in these three categories: + 1. `.gitignore` globs (including global and repo-specific globs). This + includes `.gitignore` files in parent directories that are part of the + same `git` repository. (Unless the `--no-require-git` flag is given.) + 2. `.ignore` globs, which take precedence over all gitignore globs + when there's a conflict. This includes `.ignore` files in parent + directories. + 3. `.rgignore` globs, which take precedence over all `.ignore` globs + when there's a conflict. This includes `.rgignore` files in parent + directories. +2. Hidden files and directories. +3. Binary files. (ripgrep considers any file with a `NUL` byte to be binary.) +4. Symbolic links aren't followed. + +All of these things can be toggled using various flags provided by ripgrep: + +1. You can disable all ignore-related filtering with the `--no-ignore` flag. +2. Hidden files and directories can be searched with the `--hidden` (`-.` for +short) flag. +3. Binary files can be searched via the `--text` (`-a` for short) flag. + Be careful with this flag! Binary files may emit control characters to your + terminal, which might cause strange behavior. +4. ripgrep can follow symlinks with the `--follow` (`-L` for short) flag. + +As a special convenience, ripgrep also provides a flag called `--unrestricted` +(`-u` for short). Repeated uses of this flag will cause ripgrep to disable +more and more of its filtering. That is, `-u` will disable `.gitignore` +handling, `-uu` will search hidden files and directories and `-uuu` will search +binary files. This is useful when you're using ripgrep and you aren't sure +whether its filtering is hiding results from you. Tacking on a couple `-u` +flags is a quick way to find out. (Use the `--debug` flag if you're still +perplexed, and if that doesn't help, +[file an issue](https://github.com/BurntSushi/ripgrep/issues/new).) + +ripgrep's `.gitignore` handling actually goes a bit beyond just `.gitignore` +files. ripgrep will also respect repository specific rules found in +`$GIT_DIR/info/exclude`, as well as any global ignore rules in your +`core.excludesFile` (which is usually `$XDG_CONFIG_HOME/git/ignore` on +Unix-like systems). + +Sometimes you want to search files that are in your `.gitignore`, so it is +possible to specify additional ignore rules or overrides in a `.ignore` +(application agnostic) or `.rgignore` (ripgrep specific) file. + +For example, let's say you have a `.gitignore` file that looks like this: + +``` +log/ +``` + +This generally means that any `log` directory won't be tracked by `git`. +However, perhaps it contains useful output that you'd like to include in your +searches, but you still don't want to track it in `git`. You can achieve this +by creating a `.ignore` file in the same directory as the `.gitignore` file +with the following contents: + +``` +!log/ +``` + +ripgrep treats `.ignore` files with higher precedence than `.gitignore` files +(and treats `.rgignore` files with higher precedence than `.ignore` files). +This means ripgrep will see the `!log/` whitelist rule first and search that +directory. + +Like `.gitignore`, a `.ignore` file can be placed in any directory. Its rules +will be processed with respect to the directory it resides in, just like +`.gitignore`. + +To process `.gitignore` and `.ignore` files case insensitively, use the flag +`--ignore-file-case-insensitive`. This is especially useful on case insensitive +file systems like those on Windows and macOS. Note though that this can come +with a significant performance penalty, and is therefore disabled by default. + +For a more in depth description of how glob patterns in a `.gitignore` file +are interpreted, please see `man gitignore`. + + +### Manual filtering: globs + +In the previous section, we talked about ripgrep's filtering that it does by +default. It is "automatic" because it reacts to your environment. That is, it +uses already existing `.gitignore` files to produce more relevant search +results. + +In addition to automatic filtering, ripgrep also provides more manual or ad hoc +filtering. This comes in two varieties: additional glob patterns specified in +your ripgrep commands and file type filtering. This section covers glob +patterns while the next section covers file type filtering. + +In our ripgrep source code (see [Basics](#basics) for instructions on how to +get a source archive to search), let's say we wanted to see which things depend +on `clap`, our argument parser. + +We could do this: + +``` +$ rg clap +[lots of results] +``` + +But this shows us many things, and we're only interested in where we wrote +`clap` as a dependency. Instead, we could limit ourselves to TOML files, which +is how dependencies are communicated to Rust's build tool, Cargo: + +``` +$ rg clap -g '*.toml' +Cargo.toml +35:clap = "2.26" +51:clap = "2.26" +``` + +The `-g '*.toml'` syntax says, "make sure every file searched matches this +glob pattern." Note that we put `'*.toml'` in single quotes to prevent our +shell from expanding the `*`. + +If we wanted, we could tell ripgrep to search anything *but* `*.toml` files: + +``` +$ rg clap -g '!*.toml' +[lots of results] +``` + +This will give you a lot of results again as above, but they won't include +files ending with `.toml`. Note that the use of a `!` here to mean "negation" +is a bit non-standard, but it was chosen to be consistent with how globs in +`.gitignore` files are written. (Although, the meaning is reversed. In +`.gitignore` files, a `!` prefix means whitelist, and on the command line, a +`!` means blacklist.) + +Globs are interpreted in exactly the same way as `.gitignore` patterns. That +is, later globs will override earlier globs. For example, the following command +will search only `*.toml` files: + +``` +$ rg clap -g '!*.toml' -g '*.toml' +``` + +Interestingly, reversing the order of the globs in this case will match +nothing, since the presence of at least one non-blacklist glob will institute a +requirement that every file searched must match at least one glob. In this +case, the blacklist glob takes precedence over the previous glob and prevents +any file from being searched at all! + + +### Manual filtering: file types + +Over time, you might notice that you use the same glob patterns over and over. +For example, you might find yourself doing a lot of searches where you only +want to see results for Rust files: + +``` +$ rg 'fn run' -g '*.rs' +``` + +Instead of writing out the glob every time, you can use ripgrep's support for +file types: + +``` +$ rg 'fn run' --type rust +``` + +or, more succinctly, + +``` +$ rg 'fn run' -trust +``` + +The way the `--type` flag functions is simple. It acts as a name that is +assigned to one or more globs that match the relevant files. This lets you +write a single type that might encompass a broad range of file extensions. For +example, if you wanted to search C files, you'd have to check both C source +files and C header files: + +``` +$ rg 'int main' -g '*.{c,h}' +``` + +or you could just use the C file type: + +``` +$ rg 'int main' -tc +``` + +Just as you can write blacklist globs, you can blacklist file types too: + +``` +$ rg clap --type-not rust +``` + +or, more succinctly, + +``` +$ rg clap -Trust +``` + +That is, `-t` means "include files of this type" where as `-T` means "exclude +files of this type." + +To see the globs that make up a type, run `rg --type-list`: + +``` +$ rg --type-list | rg '^make:' +make: *.mak, *.mk, GNUmakefile, Gnumakefile, Makefile, gnumakefile, makefile +``` + +By default, ripgrep comes with a bunch of pre-defined types. Generally, these +types correspond to well known public formats. But you can define your own +types as well. For example, perhaps you frequently search "web" files, which +consist of JavaScript, HTML and CSS: + +``` +$ rg --type-add 'web:*.html' --type-add 'web:*.css' --type-add 'web:*.js' -tweb title +``` + +or, more succinctly, + +``` +$ rg --type-add 'web:*.{html,css,js}' -tweb title +``` + +The above command defines a new type, `web`, corresponding to the glob +`*.{html,css,js}`. It then applies the new filter with `-tweb` and searches for +the pattern `title`. If you ran + +``` +$ rg --type-add 'web:*.{html,css,js}' --type-list +``` + +Then you would see your `web` type show up in the list, even though it is not +part of ripgrep's built-in types. + +It is important to stress here that the `--type-add` flag only applies to the +current command. It does not add a new file type and save it somewhere in a +persistent form. If you want a type to be available in every ripgrep command, +then you should either create a shell alias: + +``` +alias rg="rg --type-add 'web:*.{html,css,js}'" +``` + +or add `--type-add=web:*.{html,css,js}` to your ripgrep configuration file. +([Configuration files](#configuration-file) are covered in more detail later.) + +#### The special `all` file type + +A special option supported by the `--type` flag is `all`. `--type all` looks +for a match in any of the supported file types listed by `--type-list`, +including those added on the command line using `--type-add`. It's equivalent +to the command `rg --type agda --type asciidoc --type asm ...`, where `...` +stands for a list of `--type` flags for the rest of the types in `--type-list`. + +As an example, let's suppose you have a shell script in your current directory, +`my-shell-script`, which includes a shell library, `my-shell-library.bash`. +Both `rg --type sh` and `rg --type all` would only search for matches in +`my-shell-library.bash`, not `my-shell-script`, because the globs matched +by the `sh` file type don't include files without an extension. On the +other hand, `rg --type-not all` would search `my-shell-script` but not +`my-shell-library.bash`. + +### Replacements + +ripgrep provides a limited ability to modify its output by replacing matched +text with some other text. This is easiest to explain with an example. Remember +when we searched for the word `fast` in ripgrep's README? + +``` +$ rg fast README.md +75: faster than both. (N.B. It is not, strictly speaking, a "drop-in" replacement +88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while +119:### Is it really faster than everything else? +124:Summarizing, `ripgrep` is fast because: +129: optimizations to make searching very fast. +``` + +What if we wanted to *replace* all occurrences of `fast` with `FAST`? That's +easy with ripgrep's `--replace` flag: + +``` +$ rg fast README.md --replace FAST +75: FASTer than both. (N.B. It is not, strictly speaking, a "drop-in" replacement +88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays FAST while +119:### Is it really FASTer than everything else? +124:Summarizing, `ripgrep` is FAST because: +129: optimizations to make searching very FAST. +``` + +or, more succinctly, + +``` +$ rg fast README.md -r FAST +[snip] +``` + +In essence, the `--replace` flag applies *only* to the matching portion of text +in the output. If you instead wanted to replace an entire line of text, then +you need to include the entire line in your match. For example: + +``` +$ rg '^.*fast.*$' README.md -r FAST +75:FAST +88:FAST +119:FAST +124:FAST +129:FAST +``` + +Alternatively, you can combine the `--only-matching` (or `-o` for short) with +the `--replace` flag to achieve the same result: + +``` +$ rg fast README.md --only-matching --replace FAST +75:FAST +88:FAST +119:FAST +124:FAST +129:FAST +``` + +or, more succinctly, + +``` +$ rg fast README.md -or FAST +[snip] +``` + +Finally, replacements can include capturing groups. For example, let's say +we wanted to find all occurrences of `fast` followed by another word and +join them together with a dash. The pattern we might use for that is +`fast\s+(\w+)`, which matches `fast`, followed by any amount of whitespace, +followed by any number of "word" characters. We put the `\w+` in a "capturing +group" (indicated by parentheses) so that we can reference it later in our +replacement string. For example: + +``` +$ rg 'fast\s+(\w+)' README.md -r 'fast-$1' +88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast-while +124:Summarizing, `ripgrep` is fast-because: +``` + +Our replacement string here, `fast-$1`, consists of `fast-` followed by the +contents of the capturing group at index `1`. (Capturing groups actually start +at index 0, but the `0`th capturing group always corresponds to the entire +match. The capturing group at index `1` always corresponds to the first +explicit capturing group found in the regex pattern.) + +Capturing groups can also be named, which is sometimes more convenient than +using the indices. For example, the following command is equivalent to the +above command: + +``` +$ rg 'fast\s+(?P\w+)' README.md -r 'fast-$word' +88: color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast-while +124:Summarizing, `ripgrep` is fast-because: +``` + +It is important to note that ripgrep **will never modify your files**. The +`--replace` flag only controls ripgrep's output. (And there is no flag to let +you do a replacement in a file.) + + +### Configuration file + +It is possible that ripgrep's default options aren't suitable in every case. +For that reason, and because shell aliases aren't always convenient, ripgrep +supports configuration files. + +Setting up a configuration file is simple. ripgrep will not look in any +predetermined directory for a config file automatically. Instead, you need to +set the `RIPGREP_CONFIG_PATH` environment variable to the file path of your +config file. Once the environment variable is set, open the file and just type +in the flags you want set automatically. There are only two rules for +describing the format of the config file: + +1. Every line is a shell argument, after trimming whitespace. +2. Lines starting with `#` (optionally preceded by any amount of whitespace) +are ignored. + +In particular, there is no escaping. Each line is given to ripgrep as a single +command line argument verbatim. + +Here's an example of a configuration file, which demonstrates some of the +formatting peculiarities: + +``` +$ cat $HOME/.ripgreprc +# Don't let ripgrep vomit really long lines to my terminal, and show a preview. +--max-columns=150 +--max-columns-preview + +# Add my 'web' type. +--type-add +web:*.{html,css,js}* + +# Search hidden files / directories (e.g. dotfiles) by default +--hidden + +# Using glob patterns to include/exclude files or folders +--glob=!.git/* + +# or +--glob +!.git/* + +# Set the colors. +--colors=line:none +--colors=line:style:bold + +# Because who cares about case!? +--smart-case +``` + +When we use a flag that has a value, we either put the flag and the value on +the same line but delimited by an `=` sign (e.g., `--max-columns=150`), or we +put the flag and the value on two different lines. This is because ripgrep's +argument parser knows to treat the single argument `--max-columns=150` as a +flag with a value, but if we had written `--max-columns 150` in our +configuration file, then ripgrep's argument parser wouldn't know what to do +with it. + +Putting the flag and value on different lines is exactly equivalent and is a +matter of style. + +Comments are encouraged so that you remember what the config is doing. Empty +lines are OK too. + +So let's say you're using the above configuration file, but while you're at a +terminal, you really want to be able to see lines longer than 150 columns. What +do you do? Thankfully, all you need to do is pass `--max-columns 0` (or `-M0` +for short) on the command line, which will override your configuration file's +setting. This works because ripgrep's configuration file is *prepended* to the +explicit arguments you give it on the command line. Since flags given later +override flags given earlier, everything works as expected. This works for most +other flags as well, and each flag's documentation states which other flags +override it. + +If you're confused about what configuration file ripgrep is reading arguments +from, then running ripgrep with the `--debug` flag should help clarify things. +The debug output should note what config file is being loaded and the arguments +that have been read from the configuration. + +Finally, if you want to make absolutely sure that ripgrep *isn't* reading a +configuration file, then you can pass the `--no-config` flag, which will always +prevent ripgrep from reading extraneous configuration from the environment, +regardless of what other methods of configuration are added to ripgrep in the +future. + + +### File encoding + +[Text encoding](https://en.wikipedia.org/wiki/Character_encoding) is a complex +topic, but we can try to summarize its relevancy to ripgrep: + +* Files are generally just a bundle of bytes. There is no reliable way to know + their encoding. +* Either the encoding of the pattern must match the encoding of the files being + searched, or a form of transcoding must be performed that converts either the + pattern or the file to the same encoding as the other. +* ripgrep tends to work best on plain text files, and among plain text files, + the most popular encodings likely consist of ASCII, latin1 or UTF-8. As + a special exception, UTF-16 is prevalent in Windows environments + +In light of the above, here is how ripgrep behaves when `--encoding auto` is +given, which is the default: + +* All input is assumed to be ASCII compatible (which means every byte that + corresponds to an ASCII codepoint actually is an ASCII codepoint). This + includes ASCII itself, latin1 and UTF-8. +* ripgrep works best with UTF-8. For example, ripgrep's regular expression + engine supports Unicode features. Namely, character classes like `\w` will + match all word characters by Unicode's definition and `.` will match any + Unicode codepoint instead of any byte. These constructions assume UTF-8, + so they simply won't match when they come across bytes in a file that aren't + UTF-8. +* To handle the UTF-16 case, ripgrep will do something called "BOM sniffing" + by default. That is, the first three bytes of a file will be read, and if + they correspond to a UTF-16 BOM, then ripgrep will transcode the contents of + the file from UTF-16 to UTF-8, and then execute the search on the transcoded + version of the file. (This incurs a performance penalty since transcoding + is needed in addition to regex searching.) If the file contains invalid + UTF-16, then the Unicode replacement codepoint is substituted in place of + invalid code units. +* To handle other cases, ripgrep provides a `-E/--encoding` flag, which permits + you to specify an encoding from the + [Encoding Standard](https://encoding.spec.whatwg.org/#concept-encoding-get). + ripgrep will assume *all* files searched are the encoding specified (unless + the file has a BOM) and will perform a transcoding step just like in the + UTF-16 case described above. + +By default, ripgrep will not require its input be valid UTF-8. That is, ripgrep +can and will search arbitrary bytes. The key here is that if you're searching +content that isn't UTF-8, then the usefulness of your pattern will degrade. If +you're searching bytes that aren't ASCII compatible, then it's likely the +pattern won't find anything. With all that said, this mode of operation is +important, because it lets you find ASCII or UTF-8 *within* files that are +otherwise arbitrary bytes. + +As a special case, the `-E/--encoding` flag supports the value `none`, which +will completely disable all encoding related logic, including BOM sniffing. +When `-E/--encoding` is set to `none`, ripgrep will search the raw bytes of +the underlying file with no transcoding step. For example, here's how you might +search the raw UTF-16 encoding of the string `Шерлок`: + +``` +$ rg '(?-u)\(\x045\x04@\x04;\x04>\x04:\x04' -E none -a some-utf16-file +``` + +Of course, that's just an example meant to show how one can drop down into +raw bytes. Namely, the simpler command works as you might expect automatically: + +``` +$ rg 'Шерлок' some-utf16-file +``` + +Finally, it is possible to disable ripgrep's Unicode support from within the +regular expression. For example, let's say you wanted `.` to match any byte +rather than any Unicode codepoint. (You might want this while searching a +binary file, since `.` by default will not match invalid UTF-8.) You could do +this by disabling Unicode via a regular expression flag: + +``` +$ rg '(?-u:.)' +``` + +This works for any part of the pattern. For example, the following will find +any Unicode word character followed by any ASCII word character followed by +another Unicode word character: + +``` +$ rg '\w(?-u:\w)\w' +``` + + +### Binary data + +In addition to skipping hidden files and files in your `.gitignore` by default, +ripgrep also attempts to skip binary files. ripgrep does this by default +because binary files (like PDFs or images) are typically not things you want to +search when searching for regex matches. Moreover, if content in a binary file +did match, then it's possible for undesirable binary data to be printed to your +terminal and wreak havoc. + +Unfortunately, unlike skipping hidden files and respecting your `.gitignore` +rules, a file cannot as easily be classified as binary. In order to figure out +whether a file is binary, the most effective heuristic that balances +correctness with performance is to simply look for `NUL` bytes. At that point, +the determination is simple: a file is considered "binary" if and only if it +contains a `NUL` byte somewhere in its contents. + +The issue is that while most binary files will have a `NUL` byte toward the +beginning of its contents, this is not necessarily true. The `NUL` byte might +be the very last byte in a large file, but that file is still considered +binary. While this leads to a fair amount of complexity inside ripgrep's +implementation, it also results in some unintuitive user experiences. + +At a high level, ripgrep operates in three different modes with respect to +binary files: + +1. The default mode is to attempt to remove binary files from a search + completely. This is meant to mirror how ripgrep removes hidden files and + files in your `.gitignore` automatically. That is, as soon as a file is + detected as binary, searching stops. If a match was already printed (because + it was detected long before a `NUL` byte), then ripgrep will print a warning + message indicating that the search stopped prematurely. This default mode + **only applies to files searched by ripgrep as a result of recursive + directory traversal**, which is consistent with ripgrep's other automatic + filtering. For example, `rg foo .file` will search `.file` even though it + is hidden. Similarly, `rg foo binary-file` will search `binary-file` in + "binary" mode automatically. +2. Binary mode is similar to the default mode, except it will not always + stop searching after it sees a `NUL` byte. Namely, in this mode, ripgrep + will continue searching a file that is known to be binary until the first + of two conditions is met: 1) the end of the file has been reached or 2) a + match is or has been seen. This means that in binary mode, if ripgrep + reports no matches, then there are no matches in the file. When a match does + occur, ripgrep prints a message similar to one it prints when in its default + mode indicating that the search has stopped prematurely. This mode can be + forcefully enabled for all files with the `--binary` flag. The purpose of + binary mode is to provide a way to discover matches in all files, but to + avoid having binary data dumped into your terminal. +3. Text mode completely disables all binary detection and searches all files + as if they were text. This is useful when searching a file that is + predominantly text but contains a `NUL` byte, or if you are specifically + trying to search binary data. This mode can be enabled with the `-a/--text` + flag. Note that when using this mode on very large binary files, it is + possible for ripgrep to use a lot of memory. + +Unfortunately, there is one additional complexity in ripgrep that can make it +difficult to reason about binary files. That is, the way binary detection works +depends on the way that ripgrep searches your files. Specifically: + +* When ripgrep uses memory maps, then binary detection is only performed on the + first few kilobytes of the file in addition to every matching line. +* When ripgrep doesn't use memory maps, then binary detection is performed on + all bytes searched. + +This means that whether a file is detected as binary or not can change based +on the internal search strategy used by ripgrep. If you prefer to keep +ripgrep's binary file detection consistent, then you can disable memory maps +via the `--no-mmap` flag. (The cost will be a small performance regression when +searching very large files on some platforms.) + + +### Preprocessor + +In ripgrep, a preprocessor is any type of command that can be run to transform +the input of every file before ripgrep searches it. This makes it possible to +search virtually any kind of content that can be automatically converted to +text without having to teach ripgrep how to read said content. + +One common example is searching PDFs. PDFs are first and foremost meant to be +displayed to users. But PDFs often have text streams in them that can be useful +to search. In our case, we want to search Bruce Watson's excellent +dissertation, +[Taxonomies and Toolkits of Regular Language Algorithms](https://burntsushi.net/stuff/1995-watson.pdf). +After downloading it, let's try searching it: + +``` +$ rg 'The Commentz-Walter algorithm' 1995-watson.pdf +$ +``` + +Surely, a dissertation on regular language algorithms would mention +Commentz-Walter. Indeed it does, but our search isn't picking it up because +PDFs are a binary format, and the text shown in the PDF may not be encoded as +simple contiguous UTF-8. Namely, even passing the `-a/--text` flag to ripgrep +will not make our search work. + +One way to fix this is to convert the PDF to plain text first. This won't work +well for all PDFs, but does great in a lot of cases. (Note that the tool we +use, `pdftotext`, is part of the [poppler](https://poppler.freedesktop.org) +PDF rendering library.) + +``` +$ pdftotext 1995-watson.pdf > 1995-watson.txt +$ rg 'The Commentz-Walter algorithm' 1995-watson.txt +316:The Commentz-Walter algorithms : : : : : : : : : : : : : : : +7165:4.4 The Commentz-Walter algorithms +10062:in input string S , we obtain the Boyer-Moore algorithm. The Commentz-Walter algorithm +17218:The Commentz-Walter algorithm (and its variants) displayed more interesting behaviour, +17249:Aho-Corasick algorithms are used extensively. The Commentz-Walter algorithms are used +17297: The Commentz-Walter algorithms (CW). In all versions of the CW algorithms, a common program skeleton is used with di erent shift functions. The CW algorithms are +``` + +But having to explicitly convert every file can be a pain, especially when you +have a directory full of PDF files. Instead, we can use ripgrep's preprocessor +feature to search the PDF. ripgrep's `--pre` flag works by taking a single +command name and then executing that command for every file that it searches. +ripgrep passes the file path as the first and only argument to the command and +also sends the contents of the file to stdin. So let's write a simple shell +script that wraps `pdftotext` in a way that conforms to this interface: + +``` +$ cat preprocess +#!/bin/sh + +exec pdftotext - - +``` + +With `preprocess` in the same directory as `1995-watson.pdf`, we can now use it +to search the PDF: + +``` +$ rg --pre ./preprocess 'The Commentz-Walter algorithm' 1995-watson.pdf +316:The Commentz-Walter algorithms : : : : : : : : : : : : : : : +7165:4.4 The Commentz-Walter algorithms +10062:in input string S , we obtain the Boyer-Moore algorithm. The Commentz-Walter algorithm +17218:The Commentz-Walter algorithm (and its variants) displayed more interesting behaviour, +17249:Aho-Corasick algorithms are used extensively. The Commentz-Walter algorithms are used +17297: The Commentz-Walter algorithms (CW). In all versions of the CW algorithms, a common program skeleton is used with di erent shift functions. The CW algorithms are +``` + +Note that `preprocess` must be resolvable to a command that ripgrep can read. +The simplest way to do this is to put your preprocessor command in a directory +that is in your `PATH` (or equivalent), or otherwise use an absolute path. + +As a bonus, this turns out to be quite a bit faster than other specialized PDF +grepping tools: + +``` +$ time rg --pre ./preprocess 'The Commentz-Walter algorithm' 1995-watson.pdf -c +6 + +real 0.697 +user 0.684 +sys 0.007 +maxmem 16 MB +faults 0 + +$ time pdfgrep 'The Commentz-Walter algorithm' 1995-watson.pdf -c +6 + +real 1.336 +user 1.310 +sys 0.023 +maxmem 16 MB +faults 0 +``` + +If you wind up needing to search a lot of PDFs, then ripgrep's parallelism can +make the speed difference even greater. + +#### A more robust preprocessor + +One of the problems with the aforementioned preprocessor is that it will fail +if you try to search a file that isn't a PDF: + +``` +$ echo foo > not-a-pdf +$ rg --pre ./preprocess 'The Commentz-Walter algorithm' not-a-pdf +not-a-pdf: preprocessor command failed: '"./preprocess" "not-a-pdf"': +------------------------------------------------------------------------------- +Syntax Warning: May not be a PDF file (continuing anyway) +Syntax Error: Couldn't find trailer dictionary +Syntax Error: Couldn't find trailer dictionary +Syntax Error: Couldn't read xref table +``` + +To fix this, we can make our preprocessor script a bit more robust by only +running `pdftotext` when we think the input is a non-empty PDF: + +``` +$ cat preprocessor +#!/bin/sh + +case "$1" in +*.pdf) + # The -s flag ensures that the file is non-empty. + if [ -s "$1" ]; then + exec pdftotext - - + else + exec cat + fi + ;; +*) + exec cat + ;; +esac +``` + +We can even extend our preprocessor to search other kinds of files. Sometimes +we don't always know the file type from the file name, so we can use the `file` +utility to "sniff" the type of the file based on its contents: + +``` +$ cat processor +#!/bin/sh + +case "$1" in +*.pdf) + # The -s flag ensures that the file is non-empty. + if [ -s "$1" ]; then + exec pdftotext - - + else + exec cat + fi + ;; +*) + case $(file "$1") in + *Zstandard*) + exec pzstd -cdq + ;; + *) + exec cat + ;; + esac + ;; +esac +``` + +#### Reducing preprocessor overhead + +There is one more problem with the above approach: it requires running a +preprocessor for every single file that ripgrep searches. If every file needs +a preprocessor, then this is OK. But if most don't, then this can substantially +slow down searches because of the overhead of launching new processors. You +can avoid this by telling ripgrep to only invoke the preprocessor when the file +path matches a glob. For example, consider the performance difference even when +searching a repository as small as ripgrep's: + +``` +$ time rg --pre pre-rg 'fn is_empty' -c +crates/globset/src/lib.rs:1 +crates/matcher/src/lib.rs:2 +crates/ignore/src/overrides.rs:1 +crates/ignore/src/gitignore.rs:1 +crates/ignore/src/types.rs:1 + +real 0.138 +user 0.485 +sys 0.209 +maxmem 7 MB +faults 0 + +$ time rg --pre pre-rg --pre-glob '*.pdf' 'fn is_empty' -c +crates/globset/src/lib.rs:1 +crates/ignore/src/types.rs:1 +crates/ignore/src/gitignore.rs:1 +crates/ignore/src/overrides.rs:1 +crates/matcher/src/lib.rs:2 + +real 0.008 +user 0.010 +sys 0.002 +maxmem 7 MB +faults 0 +``` + + +### Common options + +ripgrep has a lot of flags. Too many to keep in your head at once. This section +is intended to give you a sampling of some of the most important and frequently +used options that will likely impact how you use ripgrep on a regular basis. + +* `-h`: Show ripgrep's condensed help output. +* `--help`: Show ripgrep's longer form help output. (Nearly what you'd find in + ripgrep's man page, so pipe it into a pager!) +* `-i/--ignore-case`: When searching for a pattern, ignore case differences. + That is `rg -i fast` matches `fast`, `fASt`, `FAST`, etc. +* `-S/--smart-case`: This is similar to `--ignore-case`, but disables itself + if the pattern contains any uppercase letters. Usually this flag is put into + alias or a config file. +* `-F/--fixed-strings`: Disable regular expression matching and treat the pattern + as a literal string. +* `-w/--word-regexp`: Require that all matches of the pattern be surrounded + by word boundaries. That is, given `pattern`, the `--word-regexp` flag will + cause ripgrep to behave as if `pattern` were actually `\b(?:pattern)\b`. +* `-c/--count`: Report a count of total matched lines. +* `--files`: Print the files that ripgrep *would* search, but don't actually + search them. +* `-a/--text`: Search binary files as if they were plain text. +* `-U/--multiline`: Permit matches to span multiple lines. +* `-z/--search-zip`: Search compressed files (gzip, bzip2, lzma, xz, lz4, + brotli, zstd). This is disabled by default. +* `-C/--context`: Show the lines surrounding a match. +* `--sort path`: Force ripgrep to sort its output by file name. (This disables + parallelism, so it might be slower.) +* `-L/--follow`: Follow symbolic links while recursively searching. +* `-M/--max-columns`: Limit the length of lines printed by ripgrep. +* `--debug`: Shows ripgrep's debug output. This is useful for understanding + why a particular file might be ignored from search, or what kinds of + configuration ripgrep is loading from the environment. diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/LICENSE-MIT b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/LICENSE-MIT new file mode 100644 index 000000000..3b0a5dc09 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/LICENSE-MIT @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Andrew Gallant + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/README.md b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/README.md new file mode 100644 index 000000000..a42908760 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/README.md @@ -0,0 +1,524 @@ +ripgrep (rg) +------------ +ripgrep is a line-oriented search tool that recursively searches the current +directory for a regex pattern. By default, ripgrep will respect gitignore rules +and automatically skip hidden files/directories and binary files. (To disable +all automatic filtering by default, use `rg -uuu`.) ripgrep has first class +support on Windows, macOS and Linux, with binary downloads available for [every +release](https://github.com/BurntSushi/ripgrep/releases). ripgrep is similar to +other popular search tools like The Silver Searcher, ack and grep. + +[![Build status](https://github.com/BurntSushi/ripgrep/workflows/ci/badge.svg)](https://github.com/BurntSushi/ripgrep/actions) +[![Crates.io](https://img.shields.io/crates/v/ripgrep.svg)](https://crates.io/crates/ripgrep) +[![Packaging status](https://repology.org/badge/tiny-repos/ripgrep.svg)](https://repology.org/project/ripgrep/badges) + +Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org). + + +### CHANGELOG + +Please see the [CHANGELOG](CHANGELOG.md) for a release history. + +### Documentation quick links + +* [Installation](#installation) +* [User Guide](GUIDE.md) +* [Frequently Asked Questions](FAQ.md) +* [Regex syntax](https://docs.rs/regex/1/regex/#syntax) +* [Configuration files](GUIDE.md#configuration-file) +* [Shell completions](FAQ.md#complete) +* [Building](#building) +* [Translations](#translations) + + +### Screenshot of search results + +[![A screenshot of a sample search with ripgrep](https://burntsushi.net/stuff/ripgrep1.png)](https://burntsushi.net/stuff/ripgrep1.png) + + +### Quick examples comparing tools + +This example searches the entire +[Linux kernel source tree](https://github.com/BurntSushi/linux) +(after running `make defconfig && make -j8`) for `[A-Z]+_SUSPEND`, where +all matches must be words. Timings were collected on a system with an Intel +i9-12900K 5.2 GHz. + +Please remember that a single benchmark is never enough! See my +[blog post on ripgrep](https://blog.burntsushi.net/ripgrep/) +for a very detailed comparison with more benchmarks and analysis. + +| Tool | Command | Line count | Time | +| ---- | ------- | ---------- | ---- | +| ripgrep (Unicode) | `rg -n -w '[A-Z]+_SUSPEND'` | 536 | **0.082s** (1.00x) | +| [hypergrep](https://github.com/p-ranav/hypergrep) | `hgrep -n -w '[A-Z]+_SUSPEND'` | 536 | 0.167s (2.04x) | +| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `git grep -P -n -w '[A-Z]+_SUSPEND'` | 536 | 0.273s (3.34x) | +| [The Silver Searcher](https://github.com/ggreer/the_silver_searcher) | `ag -w '[A-Z]+_SUSPEND'` | 534 | 0.443s (5.43x) | +| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -r --ignore-files --no-hidden -I -w '[A-Z]+_SUSPEND'` | 536 | 0.639s (7.82x) | +| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=C git grep -E -n -w '[A-Z]+_SUSPEND'` | 536 | 0.727s (8.91x) | +| [git grep (Unicode)](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=en_US.UTF-8 git grep -E -n -w '[A-Z]+_SUSPEND'` | 536 | 2.670s (32.70x) | +| [ack](https://github.com/beyondgrep/ack3) | `ack -w '[A-Z]+_SUSPEND'` | 2677 | 2.935s (35.94x) | + +Here's another benchmark on the same corpus as above that disregards gitignore +files and searches with a whitelist instead. The corpus is the same as in the +previous benchmark, and the flags passed to each command ensure that they are +doing equivalent work: + +| Tool | Command | Line count | Time | +| ---- | ------- | ---------- | ---- | +| ripgrep | `rg -uuu -tc -n -w '[A-Z]+_SUSPEND'` | 447 | **0.063s** (1.00x) | +| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -r -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 447 | 0.607s (9.62x) | +| [GNU grep](https://www.gnu.org/software/grep/) | `grep -E -r -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 447 | 0.674s (10.69x) | + +Now we'll move to searching on single large file. Here is a straight-up +comparison between ripgrep, ugrep and GNU grep on a file cached in memory +(~13GB, [`OpenSubtitles.raw.en.gz`](http://opus.nlpl.eu/download.php?f=OpenSubtitles/v2018/mono/OpenSubtitles.raw.en.gz), decompressed): + +| Tool | Command | Line count | Time | +| ---- | ------- | ---------- | ---- | +| ripgrep (Unicode) | `rg -w 'Sherlock [A-Z]\w+'` | 7882 | **1.042s** (1.00x) | +| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -w 'Sherlock [A-Z]\w+'` | 7882 | 1.339s (1.28x) | +| [GNU grep (Unicode)](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 egrep -w 'Sherlock [A-Z]\w+'` | 7882 | 6.577s (6.31x) | + +In the above benchmark, passing the `-n` flag (for showing line numbers) +increases the times to `1.664s` for ripgrep and `9.484s` for GNU grep. ugrep +times are unaffected by the presence or absence of `-n`. + +Beware of performance cliffs though: + +| Tool | Command | Line count | Time | +| ---- | ------- | ---------- | ---- | +| ripgrep (Unicode) | `rg -w '[A-Z]\w+ Sherlock [A-Z]\w+'` | 485 | **1.053s** (1.00x) | +| [GNU grep (Unicode)](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 grep -E -w '[A-Z]\w+ Sherlock [A-Z]\w+'` | 485 | 6.234s (5.92x) | +| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -w '[A-Z]\w+ Sherlock [A-Z]\w+'` | 485 | 28.973s (27.51x) | + +And performance can drop precipitously across the board when searching big +files for patterns without any opportunities for literal optimizations: + +| Tool | Command | Line count | Time | +| ---- | ------- | ---------- | ---- | +| ripgrep | `rg '[A-Za-z]{30}'` | 6749 | **15.569s** (1.00x) | +| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -E '[A-Za-z]{30}'` | 6749 | 21.857s (1.40x) | +| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=C grep -E '[A-Za-z]{30}'` | 6749 | 32.409s (2.08x) | +| [GNU grep (Unicode)](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 grep -E '[A-Za-z]{30}'` | 6795 | 8m30s (32.74x) | + +Finally, high match counts also tend to both tank performance and smooth +out the differences between tools (because performance is dominated by how +quickly one can handle a match and not the algorithm used to detect the match, +generally speaking): + +| Tool | Command | Line count | Time | +| ---- | ------- | ---------- | ---- | +| ripgrep | `rg the` | 83499915 | **6.948s** (1.00x) | +| [ugrep](https://github.com/Genivia/ugrep) | `ugrep the` | 83499915 | 11.721s (1.69x) | +| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=C grep the` | 83499915 | 15.217s (2.19x) | + +### Why should I use ripgrep? + +* It can replace many use cases served by other search tools + because it contains most of their features and is generally faster. (See + [the FAQ](FAQ.md#posix4ever) for more details on whether ripgrep can truly + replace grep.) +* Like other tools specialized to code search, ripgrep defaults to + [recursive search](GUIDE.md#recursive-search) and does [automatic + filtering](GUIDE.md#automatic-filtering). Namely, ripgrep won't search files + ignored by your `.gitignore`/`.ignore`/`.rgignore` files, it won't search + hidden files and it won't search binary files. Automatic filtering can be + disabled with `rg -uuu`. +* ripgrep can [search specific types of files](GUIDE.md#manual-filtering-file-types). + For example, `rg -tpy foo` limits your search to Python files and `rg -Tjs + foo` excludes JavaScript files from your search. ripgrep can be taught about + new file types with custom matching rules. +* ripgrep supports many features found in `grep`, such as showing the context + of search results, searching multiple patterns, highlighting matches with + color and full Unicode support. Unlike GNU grep, ripgrep stays fast while + supporting Unicode (which is always on). +* ripgrep has optional support for switching its regex engine to use PCRE2. + Among other things, this makes it possible to use look-around and + backreferences in your patterns, which are not supported in ripgrep's default + regex engine. PCRE2 support can be enabled with `-P/--pcre2` (use PCRE2 + always) or `--auto-hybrid-regex` (use PCRE2 only if needed). An alternative + syntax is provided via the `--engine (default|pcre2|auto)` option. +* ripgrep has [rudimentary support for replacements](GUIDE.md#replacements), + which permit rewriting output based on what was matched. +* ripgrep supports [searching files in text encodings](GUIDE.md#file-encoding) + other than UTF-8, such as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more. + (Some support for automatically detecting UTF-16 is provided. Other text + encodings must be specifically specified with the `-E/--encoding` flag.) +* ripgrep supports searching files compressed in a common format (brotli, + bzip2, gzip, lz4, lzma, xz, or zstandard) with the `-z/--search-zip` flag. +* ripgrep supports + [arbitrary input preprocessing filters](GUIDE.md#preprocessor) + which could be PDF text extraction, less supported decompression, decrypting, + automatic encoding detection and so on. +* ripgrep can be configured via a + [configuration file](GUIDE.md#configuration-file). + +In other words, use ripgrep if you like speed, filtering by default, fewer +bugs and Unicode support. + + +### Why shouldn't I use ripgrep? + +Despite initially not wanting to add every feature under the sun to ripgrep, +over time, ripgrep has grown support for most features found in other file +searching tools. This includes searching for results spanning across multiple +lines, and opt-in support for PCRE2, which provides look-around and +backreference support. + +At this point, the primary reasons not to use ripgrep probably consist of one +or more of the following: + +* You need a portable and ubiquitous tool. While ripgrep works on Windows, + macOS and Linux, it is not ubiquitous and it does not conform to any + standard such as POSIX. The best tool for this job is good old grep. +* There still exists some other feature (or bug) not listed in this README that + you rely on that's in another tool that isn't in ripgrep. +* There is a performance edge case where ripgrep doesn't do well where another + tool does do well. (Please file a bug report!) +* ripgrep isn't possible to install on your machine or isn't available for your + platform. (Please file a bug report!) + + +### Is it really faster than everything else? + +Generally, yes. A large number of benchmarks with detailed analysis for each is +[available on my blog](https://blog.burntsushi.net/ripgrep/). + +Summarizing, ripgrep is fast because: + +* It is built on top of + [Rust's regex engine](https://github.com/rust-lang/regex). + Rust's regex engine uses finite automata, SIMD and aggressive literal + optimizations to make searching very fast. (PCRE2 support can be opted into + with the `-P/--pcre2` flag.) +* Rust's regex library maintains performance with full Unicode support by + building UTF-8 decoding directly into its deterministic finite automaton + engine. +* It supports searching with either memory maps or by searching incrementally + with an intermediate buffer. The former is better for single files and the + latter is better for large directories. ripgrep chooses the best searching + strategy for you automatically. +* Applies your ignore patterns in `.gitignore` files using a + [`RegexSet`](https://docs.rs/regex/1/regex/struct.RegexSet.html). + That means a single file path can be matched against multiple glob patterns + simultaneously. +* It uses a lock-free parallel recursive directory iterator, courtesy of + [`crossbeam`](https://docs.rs/crossbeam) and + [`ignore`](https://docs.rs/ignore). + + +### Feature comparison + +Andy Lester, author of [ack](https://beyondgrep.com/), has published an +excellent table comparing the features of ack, ag, git-grep, GNU grep and +ripgrep: https://beyondgrep.com/feature-comparison/ + +Note that ripgrep has grown a few significant new features recently that +are not yet present in Andy's table. This includes, but is not limited to, +configuration files, passthru, support for searching compressed files, +multiline search and opt-in fancy regex support via PCRE2. + + +### Playground + +If you'd like to try ripgrep before installing, there's an unofficial +[playground](https://codapi.org/ripgrep/) and an [interactive +tutorial](https://codapi.org/try/ripgrep/). + +If you have any questions about these, please open an issue in the [tutorial +repo](https://github.com/nalgeon/tryxinyminutes). + + +### Installation + +The binary name for ripgrep is `rg`. + +**[Archives of precompiled binaries for ripgrep are available for Windows, +macOS and Linux.](https://github.com/BurntSushi/ripgrep/releases)** Linux and +Windows binaries are static executables. Users of platforms not explicitly +mentioned below are advised to download one of these archives. + +If you're a **macOS Homebrew** or a **Linuxbrew** user, then you can install +ripgrep from homebrew-core: + +``` +$ brew install ripgrep +``` + +If you're a **MacPorts** user, then you can install ripgrep from the +[official ports](https://www.macports.org/ports.php?by=name&substr=ripgrep): + +``` +$ sudo port install ripgrep +``` + +If you're a **Windows Chocolatey** user, then you can install ripgrep from the +[official repo](https://chocolatey.org/packages/ripgrep): + +``` +$ choco install ripgrep +``` + +If you're a **Windows Scoop** user, then you can install ripgrep from the +[official bucket](https://github.com/ScoopInstaller/Main/blob/master/bucket/ripgrep.json): + +``` +$ scoop install ripgrep +``` + +If you're a **Windows Winget** user, then you can install ripgrep from the +[winget-pkgs](https://github.com/microsoft/winget-pkgs/tree/master/manifests/b/BurntSushi/ripgrep) +repository: + +``` +$ winget install BurntSushi.ripgrep.MSVC +``` + +If you're an **Arch Linux** user, then you can install ripgrep from the official repos: + +``` +$ sudo pacman -S ripgrep +``` + +If you're a **Gentoo** user, you can install ripgrep from the +[official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep): + +``` +$ sudo emerge sys-apps/ripgrep +``` + +If you're a **Fedora** user, you can install ripgrep from official +repositories. + +``` +$ sudo dnf install ripgrep +``` + +If you're an **openSUSE** user, ripgrep is included in **openSUSE Tumbleweed** +and **openSUSE Leap** since 15.1. + +``` +$ sudo zypper install ripgrep +``` + +If you're a **RHEL/CentOS 7/8** user, you can install ripgrep from +[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/): + +``` +$ sudo yum install -y yum-utils +$ sudo yum-config-manager --add-repo=https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/repo/epel-7/carlwgeorge-ripgrep-epel-7.repo +$ sudo yum install ripgrep +``` + +If you're a **Nix** user, you can install ripgrep from +[nixpkgs](https://github.com/NixOS/nixpkgs/blob/master/pkgs/tools/text/ripgrep/default.nix): + +``` +$ nix-env --install ripgrep +``` + +If you're a **Flox** user, you can install ripgrep as follows: + +``` +$ flox install ripgrep +``` + +If you're a **Guix** user, you can install ripgrep from the official +package collection: + +``` +$ guix install ripgrep +``` + +If you're a **Debian** user (or a user of a Debian derivative like **Ubuntu**), +then ripgrep can be installed using a binary `.deb` file provided in each +[ripgrep release](https://github.com/BurntSushi/ripgrep/releases). + +``` +$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/14.1.0/ripgrep_14.1.0-1_amd64.deb +$ sudo dpkg -i ripgrep_14.1.0-1_amd64.deb +``` + +If you run Debian stable, ripgrep is [officially maintained by +Debian](https://tracker.debian.org/pkg/rust-ripgrep), although its version may +be older than the `deb` package available in the previous step. + +``` +$ sudo apt-get install ripgrep +``` + +If you're an **Ubuntu Cosmic (18.10)** (or newer) user, ripgrep is +[available](https://launchpad.net/ubuntu/+source/rust-ripgrep) using the same +packaging as Debian: + +``` +$ sudo apt-get install ripgrep +``` + +(N.B. Various snaps for ripgrep on Ubuntu are also available, but none of them +seem to work right and generate a number of very strange bug reports that I +don't know how to fix and don't have the time to fix. Therefore, it is no +longer a recommended installation option.) + +If you're an **ALT** user, you can install ripgrep from the +[official repo](https://packages.altlinux.org/en/search?name=ripgrep): + +``` +$ sudo apt-get install ripgrep +``` + +If you're a **FreeBSD** user, then you can install ripgrep from the +[official ports](https://www.freshports.org/textproc/ripgrep/): + +``` +$ sudo pkg install ripgrep +``` + +If you're an **OpenBSD** user, then you can install ripgrep from the +[official ports](https://openports.se/textproc/ripgrep): + +``` +$ doas pkg_add ripgrep +``` + +If you're a **NetBSD** user, then you can install ripgrep from +[pkgsrc](https://pkgsrc.se/textproc/ripgrep): + +``` +$ sudo pkgin install ripgrep +``` + +If you're a **Haiku x86_64** user, then you can install ripgrep from the +[official ports](https://github.com/haikuports/haikuports/tree/master/sys-apps/ripgrep): + +``` +$ sudo pkgman install ripgrep +``` + +If you're a **Haiku x86_gcc2** user, then you can install ripgrep from the +same port as Haiku x86_64 using the x86 secondary architecture build: + +``` +$ sudo pkgman install ripgrep_x86 +``` + +If you're a **Void Linux** user, then you can install ripgrep from the +[official repository](https://voidlinux.org/packages/?arch=x86_64&q=ripgrep): + +``` +$ sudo xbps-install -Syv ripgrep +``` + +If you're a **Rust programmer**, ripgrep can be installed with `cargo`. + +* Note that the minimum supported version of Rust for ripgrep is **1.72.0**, + although ripgrep may work with older versions. +* Note that the binary may be bigger than expected because it contains debug + symbols. This is intentional. To remove debug symbols and therefore reduce + the file size, run `strip` on the binary. + +``` +$ cargo install ripgrep +``` + +Alternatively, one can use [`cargo +binstall`](https://github.com/cargo-bins/cargo-binstall) to install a ripgrep +binary directly from GitHub: + +``` +$ cargo binstall ripgrep +``` + + +### Building + +ripgrep is written in Rust, so you'll need to grab a +[Rust installation](https://www.rust-lang.org/) in order to compile it. +ripgrep compiles with Rust 1.72.0 (stable) or newer. In general, ripgrep tracks +the latest stable release of the Rust compiler. + +To build ripgrep: + +``` +$ git clone https://github.com/BurntSushi/ripgrep +$ cd ripgrep +$ cargo build --release +$ ./target/release/rg --version +0.1.3 +``` + +**NOTE:** In the past, ripgrep supported a `simd-accel` Cargo feature when +using a Rust nightly compiler. This only benefited UTF-16 transcoding. +Since it required unstable features, this build mode was prone to breakage. +Because of that, support for it has been removed. If you want SIMD +optimizations for UTF-16 transcoding, then you'll have to petition the +[`encoding_rs`](https://github.com/hsivonen/encoding_rs) project to use stable +APIs. + +Finally, optional PCRE2 support can be built with ripgrep by enabling the +`pcre2` feature: + +``` +$ cargo build --release --features 'pcre2' +``` + +Enabling the PCRE2 feature works with a stable Rust compiler and will +attempt to automatically find and link with your system's PCRE2 library via +`pkg-config`. If one doesn't exist, then ripgrep will build PCRE2 from source +using your system's C compiler and then statically link it into the final +executable. Static linking can be forced even when there is an available PCRE2 +system library by either building ripgrep with the MUSL target or by setting +`PCRE2_SYS_STATIC=1`. + +ripgrep can be built with the MUSL target on Linux by first installing the MUSL +library on your system (consult your friendly neighborhood package manager). +Then you just need to add MUSL support to your Rust toolchain and rebuild +ripgrep, which yields a fully static executable: + +``` +$ rustup target add x86_64-unknown-linux-musl +$ cargo build --release --target x86_64-unknown-linux-musl +``` + +Applying the `--features` flag from above works as expected. If you want to +build a static executable with MUSL and with PCRE2, then you will need to have +`musl-gcc` installed, which might be in a separate package from the actual +MUSL library, depending on your Linux distribution. + + +### Running tests + +ripgrep is relatively well-tested, including both unit tests and integration +tests. To run the full test suite, use: + +``` +$ cargo test --all +``` + +from the repository root. + + +### Related tools + +* [delta](https://github.com/dandavison/delta) is a syntax highlighting +pager that supports the `rg --json` output format. So all you need to do to +make it work is `rg --json pattern | delta`. See [delta's manual section on +grep](https://dandavison.github.io/delta/grep.html) for more details. + + +### Vulnerability reporting + +For reporting a security vulnerability, please +[contact Andrew Gallant](https://blog.burntsushi.net/about/). +The contact page has my email address and PGP public key if you wish to send an +encrypted message. + + +### Translations + +The following is a list of known translations of ripgrep's documentation. These +are unofficially maintained and may not be up to date. + +* [Chinese](https://github.com/chinanf-boy/ripgrep-zh#%E6%9B%B4%E6%96%B0-) +* [Spanish](https://github.com/UltiRequiem/traducciones/tree/master/ripgrep) diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/RELEASE-CHECKLIST.md b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/RELEASE-CHECKLIST.md new file mode 100644 index 000000000..c6f0d28dc --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/RELEASE-CHECKLIST.md @@ -0,0 +1,59 @@ +# Release Checklist + +* Ensure local `master` is up to date with respect to `origin/master`. +* Run `cargo update` and review dependency updates. Commit updated + `Cargo.lock`. +* Run `cargo outdated` and review semver incompatible updates. Unless there is + a strong motivation otherwise, review and update every dependency. Also + run `--aggressive`, but don't update to crates that are still in beta. +* Update date in `crates/core/flags/doc/template.rg.1`. +* Review changes for every crate in `crates` since the last ripgrep release. + If the set of changes is non-empty, issue a new release for that crate. Check + crates in the following order. After updating a crate, ensure minimal + versions are updated as appropriate in dependents. If an update is required, + run `cargo-up --no-push crates/{CRATE}/Cargo.toml`. + * crates/globset + * crates/ignore + * crates/cli + * crates/matcher + * crates/regex + * crates/pcre2 + * crates/searcher + * crates/printer + * crates/grep (bump minimal versions as necessary) + * crates/core (do **not** bump version, but update dependencies as needed) +* Update the CHANGELOG as appropriate. +* Edit the `Cargo.toml` to set the new ripgrep version. Run + `cargo update -p ripgrep` so that the `Cargo.lock` is updated. Commit the + changes and create a new signed tag. Alternatively, use + `cargo-up --no-push --no-release Cargo.toml {VERSION}` to automate this. +* Run `cargo package` and ensure it succeeds. +* Push changes to GitHub, NOT including the tag. (But do not publish a new + version of ripgrep to crates.io yet.) +* Once CI for `master` finishes successfully, push the version tag. (Trying to + do this in one step seems to result in GitHub Actions not seeing the tag + push and thus not running the release workflow.) +* Wait for CI to finish creating the release. If the release build fails, then + delete the tag from GitHub, make fixes, re-tag, delete the release and push. +* Copy the relevant section of the CHANGELOG to the tagged release notes. + Include this blurb describing what ripgrep is: + > In case you haven't heard of it before, ripgrep is a line-oriented search + > tool that recursively searches the current directory for a regex pattern. + > By default, ripgrep will respect gitignore rules and automatically skip + > hidden files/directories and binary files. +* Run `git checkout {VERSION} && ci/build-and-publish-m2 {VERSION}` on a macOS + system with Apple silicon. +* Run `cargo publish`. +* Run `ci/sha256-releases {VERSION} >> pkg/brew/ripgrep-bin.rb`. Then edit + `pkg/brew/ripgrep-bin.rb` to update the version number and sha256 hashes. + Remove extraneous stuff added by `ci/sha256-releases`. Commit changes. +* Add TBD section to the top of the CHANGELOG: + ``` + TBD + === + Unreleased changes. Release notes have not yet been written. + ``` + +Note that [`cargo-up` can be found in BurntSushi's dotfiles][dotfiles]. + +[dotfiles]: https://github.com/BurntSushi/dotfiles/blob/master/bin/cargo-up diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/UNLICENSE b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/UNLICENSE new file mode 100644 index 000000000..68a49daad --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/UNLICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/build.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/build.rs new file mode 100644 index 000000000..db9584bff --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/build.rs @@ -0,0 +1,46 @@ +fn main() { + set_git_revision_hash(); + set_windows_exe_options(); +} + +/// Embed a Windows manifest and set some linker options. +/// +/// The main reason for this is to enable long path support on Windows. This +/// still, I believe, requires enabling long path support in the registry. But +/// if that's enabled, then this will let ripgrep use C:\... style paths that +/// are longer than 260 characters. +fn set_windows_exe_options() { + static MANIFEST: &str = "pkg/windows/Manifest.xml"; + + let Ok(target_os) = std::env::var("CARGO_CFG_TARGET_OS") else { return }; + let Ok(target_env) = std::env::var("CARGO_CFG_TARGET_ENV") else { return }; + if !(target_os == "windows" && target_env == "msvc") { + return; + } + + let Ok(mut manifest) = std::env::current_dir() else { return }; + manifest.push(MANIFEST); + let Some(manifest) = manifest.to_str() else { return }; + + println!("cargo:rerun-if-changed={}", MANIFEST); + // Embed the Windows application manifest file. + println!("cargo:rustc-link-arg-bin=rg=/MANIFEST:EMBED"); + println!("cargo:rustc-link-arg-bin=rg=/MANIFESTINPUT:{manifest}"); + // Turn linker warnings into errors. Helps debugging, otherwise the + // warnings get squashed (I believe). + println!("cargo:rustc-link-arg-bin=rg=/WX"); +} + +/// Make the current git hash available to the build as the environment +/// variable `RIPGREP_BUILD_GIT_HASH`. +fn set_git_revision_hash() { + use std::process::Command; + + let args = &["rev-parse", "--short=10", "HEAD"]; + let Ok(output) = Command::new("git").args(args).output() else { return }; + let rev = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if rev.is_empty() { + return; + } + println!("cargo:rustc-env=RIPGREP_BUILD_GIT_HASH={}", rev); +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/README.md b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/README.md new file mode 100644 index 000000000..44920e438 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/README.md @@ -0,0 +1,15 @@ +ripgrep core +------------ +This is the core ripgrep crate. In particular, `main.rs` is where the `main` +function lives. + +Most of ripgrep core consists of two things: + +* The definition of the CLI interface, including docs for every flag. +* Glue code that brings the `grep-matcher`, `grep-regex`, `grep-searcher` and + `grep-printer` crates together to actually execute the search. + +Currently, there are no plans to make ripgrep core available as an independent +library. However, much of the heavy lifting of ripgrep is done via its +constituent crates, which can be reused independent of ripgrep. Unfortunately, +there is no guide or tutorial to teach folks how to do this yet. diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/bash.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/bash.rs new file mode 100644 index 000000000..a390061bd --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/bash.rs @@ -0,0 +1,107 @@ +/*! +Provides completions for ripgrep's CLI for the bash shell. +*/ + +use crate::flags::defs::FLAGS; + +const TEMPLATE_FULL: &'static str = " +_rg() { + local i cur prev opts cmds + COMPREPLY=() + cur=\"${COMP_WORDS[COMP_CWORD]}\" + prev=\"${COMP_WORDS[COMP_CWORD-1]}\" + cmd=\"\" + opts=\"\" + + for i in ${COMP_WORDS[@]}; do + case \"${i}\" in + rg) + cmd=\"rg\" + ;; + *) + ;; + esac + done + + case \"${cmd}\" in + rg) + opts=\"!OPTS!\" + if [[ ${cur} == -* || ${COMP_CWORD} -eq 1 ]] ; then + COMPREPLY=($(compgen -W \"${opts}\" -- \"${cur}\")) + return 0 + fi + case \"${prev}\" in +!CASES! + esac + COMPREPLY=($(compgen -W \"${opts}\" -- \"${cur}\")) + return 0 + ;; + esac +} + +complete -F _rg -o bashdefault -o default rg +"; + +const TEMPLATE_CASE: &'static str = " + !FLAG!) + COMPREPLY=($(compgen -f \"${cur}\")) + return 0 + ;; +"; + +const TEMPLATE_CASE_CHOICES: &'static str = " + !FLAG!) + COMPREPLY=($(compgen -W \"!CHOICES!\" -- \"${cur}\")) + return 0 + ;; +"; + +/// Generate completions for Bash. +/// +/// Note that these completions are based on what was produced for ripgrep <=13 +/// using Clap 2.x. Improvements on this are welcome. +pub(crate) fn generate() -> String { + let mut opts = String::new(); + for flag in FLAGS.iter() { + opts.push_str("--"); + opts.push_str(flag.name_long()); + opts.push(' '); + if let Some(short) = flag.name_short() { + opts.push('-'); + opts.push(char::from(short)); + opts.push(' '); + } + if let Some(name) = flag.name_negated() { + opts.push_str("--"); + opts.push_str(name); + opts.push(' '); + } + } + opts.push_str(" ..."); + + let mut cases = String::new(); + for flag in FLAGS.iter() { + let template = if !flag.doc_choices().is_empty() { + let choices = flag.doc_choices().join(" "); + TEMPLATE_CASE_CHOICES.trim_end().replace("!CHOICES!", &choices) + } else { + TEMPLATE_CASE.trim_end().to_string() + }; + let name = format!("--{}", flag.name_long()); + cases.push_str(&template.replace("!FLAG!", &name)); + if let Some(short) = flag.name_short() { + let name = format!("-{}", char::from(short)); + cases.push_str(&template.replace("!FLAG!", &name)); + } + if let Some(negated) = flag.name_negated() { + let name = format!("--{negated}"); + cases.push_str(&template.replace("!FLAG!", &name)); + } + } + + TEMPLATE_FULL + .replace("!OPTS!", &opts) + .replace("!CASES!", &cases) + .trim_start() + .to_string() +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/encodings.sh b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/encodings.sh new file mode 100644 index 000000000..14fc4a85b --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/encodings.sh @@ -0,0 +1,29 @@ +# This is impossible to read, but these encodings rarely if ever change, so +# it probably does not matter. They are derived from the list given here: +# https://encoding.spec.whatwg.org/#concept-encoding-get +# +# The globbing here works in both fish and zsh (though they expand it in +# different orders). It may work in other shells too. + +{{,us-}ascii,arabic,chinese,cyrillic,greek{,8},hebrew,korean} +logical visual mac {,cs}macintosh x-mac-{cyrillic,roman,ukrainian} +866 ibm{819,866} csibm866 +big5{,-hkscs} {cn-,cs}big5 x-x-big5 +cp{819,866,125{0,1,2,3,4,5,6,7,8}} x-cp125{0,1,2,3,4,5,6,7,8} +csiso2022{jp,kr} csiso8859{6,8}{e,i} +csisolatin{1,2,3,4,5,6,9} csisolatin{arabic,cyrillic,greek,hebrew} +ecma-{114,118} asmo-708 elot_928 sun_eu_greek +euc-{jp,kr} x-euc-jp cseuckr cseucpkdfmtjapanese +{,x-}gbk csiso58gb231280 gb18030 {,cs}gb2312 gb_2312{,-80} hz-gb-2312 +iso-2022-{cn,cn-ext,jp,kr} +iso8859{,-}{1,2,3,4,5,6,7,8,9,10,11,13,14,15} +iso-8859-{1,2,3,4,5,6,7,8,9,10,11,{6,8}-{e,i},13,14,15,16} iso_8859-{1,2,3,4,5,6,7,8,9,15} +iso_8859-{1,2,6,7}:1987 iso_8859-{3,4,5,8}:1988 iso_8859-9:1989 +iso-ir-{58,100,101,109,110,126,127,138,144,148,149,157} +koi{,8,8-r,8-ru,8-u,8_r} cskoi8r +ks_c_5601-{1987,1989} ksc{,_}5691 csksc56011987 +latin{1,2,3,4,5,6} l{1,2,3,4,5,6,9} +shift{-,_}jis csshiftjis {,x-}sjis ms_kanji ms932 +utf{,-}8 utf-16{,be,le} unicode-1-1-utf-8 +windows-{31j,874,949,125{0,1,2,3,4,5,6,7,8}} dos-874 tis-620 ansi_x3.4-1968 +x-user-defined auto none diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/fish.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/fish.rs new file mode 100644 index 000000000..f8f7133bc --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/fish.rs @@ -0,0 +1,68 @@ +/*! +Provides completions for ripgrep's CLI for the fish shell. +*/ + +use crate::flags::{defs::FLAGS, CompletionType}; + +const TEMPLATE: &'static str = "complete -c rg !SHORT! -l !LONG! -d '!DOC!'"; +const TEMPLATE_NEGATED: &'static str = + "complete -c rg -l !NEGATED! -n '__fish_contains_opt !SHORT! !LONG!' -d '!DOC!'\n"; + +/// Generate completions for Fish. +pub(crate) fn generate() -> String { + let mut out = String::new(); + for flag in FLAGS.iter() { + let short = match flag.name_short() { + None => "".to_string(), + Some(byte) => format!("-s {}", char::from(byte)), + }; + let long = flag.name_long(); + let doc = flag.doc_short().replace("'", "\\'"); + let mut completion = TEMPLATE + .replace("!SHORT!", &short) + .replace("!LONG!", &long) + .replace("!DOC!", &doc); + + match flag.completion_type() { + CompletionType::Filename => { + completion.push_str(" -r -F"); + } + CompletionType::Executable => { + completion.push_str(" -r -f -a '(__fish_complete_command)'"); + } + CompletionType::Filetype => { + completion.push_str( + " -r -f -a '(rg --type-list | string replace : \\t)'", + ); + } + CompletionType::Encoding => { + completion.push_str(" -r -f -a '"); + completion.push_str(super::ENCODINGS); + completion.push_str("'"); + } + CompletionType::Other if !flag.doc_choices().is_empty() => { + completion.push_str(" -r -f -a '"); + completion.push_str(&flag.doc_choices().join(" ")); + completion.push_str("'"); + } + CompletionType::Other if !flag.is_switch() => { + completion.push_str(" -r -f"); + } + CompletionType::Other => (), + } + + completion.push('\n'); + out.push_str(&completion); + + if let Some(negated) = flag.name_negated() { + out.push_str( + &TEMPLATE_NEGATED + .replace("!NEGATED!", &negated) + .replace("!SHORT!", &short) + .replace("!LONG!", &long) + .replace("!DOC!", &doc), + ); + } + } + out +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/mod.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/mod.rs new file mode 100644 index 000000000..b531bf12b --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/mod.rs @@ -0,0 +1,10 @@ +/*! +Modules for generating completions for various shells. +*/ + +static ENCODINGS: &'static str = include_str!("encodings.sh"); + +pub(super) mod bash; +pub(super) mod fish; +pub(super) mod powershell; +pub(super) mod zsh; diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/powershell.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/powershell.rs new file mode 100644 index 000000000..e8a89e2ea --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/powershell.rs @@ -0,0 +1,86 @@ +/*! +Provides completions for ripgrep's CLI for PowerShell. +*/ + +use crate::flags::defs::FLAGS; + +const TEMPLATE: &'static str = " +using namespace System.Management.Automation +using namespace System.Management.Automation.Language + +Register-ArgumentCompleter -Native -CommandName 'rg' -ScriptBlock { + param($wordToComplete, $commandAst, $cursorPosition) + $commandElements = $commandAst.CommandElements + $command = @( + 'rg' + for ($i = 1; $i -lt $commandElements.Count; $i++) { + $element = $commandElements[$i] + if ($element -isnot [StringConstantExpressionAst] -or + $element.StringConstantType -ne [StringConstantType]::BareWord -or + $element.Value.StartsWith('-')) { + break + } + $element.Value + }) -join ';' + + $completions = @(switch ($command) { + 'rg' { +!FLAGS! + } + }) + + $completions.Where{ $_.CompletionText -like \"$wordToComplete*\" } | + Sort-Object -Property ListItemText +} +"; + +const TEMPLATE_FLAG: &'static str = + "[CompletionResult]::new('!DASH_NAME!', '!NAME!', [CompletionResultType]::ParameterName, '!DOC!')"; + +/// Generate completions for PowerShell. +/// +/// Note that these completions are based on what was produced for ripgrep <=13 +/// using Clap 2.x. Improvements on this are welcome. +pub(crate) fn generate() -> String { + let mut flags = String::new(); + for (i, flag) in FLAGS.iter().enumerate() { + let doc = flag.doc_short().replace("'", "''"); + + let dash_name = format!("--{}", flag.name_long()); + let name = flag.name_long(); + if i > 0 { + flags.push('\n'); + } + flags.push_str(" "); + flags.push_str( + &TEMPLATE_FLAG + .replace("!DASH_NAME!", &dash_name) + .replace("!NAME!", &name) + .replace("!DOC!", &doc), + ); + + if let Some(byte) = flag.name_short() { + let dash_name = format!("-{}", char::from(byte)); + let name = char::from(byte).to_string(); + flags.push_str("\n "); + flags.push_str( + &TEMPLATE_FLAG + .replace("!DASH_NAME!", &dash_name) + .replace("!NAME!", &name) + .replace("!DOC!", &doc), + ); + } + + if let Some(negated) = flag.name_negated() { + let dash_name = format!("--{}", negated); + flags.push_str("\n "); + flags.push_str( + &TEMPLATE_FLAG + .replace("!DASH_NAME!", &dash_name) + .replace("!NAME!", &negated) + .replace("!DOC!", &doc), + ); + } + } + TEMPLATE.trim_start().replace("!FLAGS!", &flags) +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/rg.zsh b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/rg.zsh new file mode 100644 index 000000000..e9731a168 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/rg.zsh @@ -0,0 +1,637 @@ +#compdef rg + +## +# zsh completion function for ripgrep +# +# Run ci/test-complete after building to ensure that the options supported by +# this function stay in synch with the `rg` binary. +# +# For convenience, a completion reference guide is included at the bottom of +# this file. +# +# Originally based on code from the zsh-users project — see copyright notice +# below. + +_rg() { + local curcontext=$curcontext no='!' descr ret=1 + local -a context line state state_descr args tmp suf + local -A opt_args + + # ripgrep has many options which negate the effect of a more common one — for + # example, `--no-column` to negate `--column`, and `--messages` to negate + # `--no-messages`. There are so many of these, and they're so infrequently + # used, that some users will probably find it irritating if they're completed + # indiscriminately, so let's not do that unless either the current prefix + # matches one of those negation options or the user has the `complete-all` + # style set. Note that this prefix check has to be updated manually to account + # for all of the potential negation options listed below! + if + # We also want to list all of these options during testing + [[ $_RG_COMPLETE_LIST_ARGS == (1|t*|y*) ]] || + # (--[imnp]* => --ignore*, --messages, --no-*, --pcre2-unicode) + [[ $PREFIX$SUFFIX == --[imnp]* ]] || + zstyle -t ":completion:${curcontext}:" complete-all + then + no= + fi + + # We make heavy use of argument groups here to prevent the option specs from + # growing unwieldy. These aren't supported in zsh <5.4, though, so we'll strip + # them out below if necessary. This makes the exclusions inaccurate on those + # older versions, but oh well — it's not that big a deal + args=( + + '(exclusive)' # Misc. fully exclusive options + '(: * -)'{-h,--help}'[display help information]' + '(: * -)'{-V,--version}'[display version information]' + '(: * -)'--pcre2-version'[print the version of PCRE2 used by ripgrep, if available]' + + + '(buffered)' # buffering options + '--line-buffered[force line buffering]' + $no"--no-line-buffered[don't force line buffering]" + '--block-buffered[force block buffering]' + $no"--no-block-buffered[don't force block buffering]" + + + '(case)' # Case-sensitivity options + {-i,--ignore-case}'[search case-insensitively]' + {-s,--case-sensitive}'[search case-sensitively]' + {-S,--smart-case}'[search case-insensitively if pattern is all lowercase]' + + + '(context-a)' # Context (after) options + '(context-c)'{-A+,--after-context=}'[specify lines to show after each match]:number of lines' + + + '(context-b)' # Context (before) options + '(context-c)'{-B+,--before-context=}'[specify lines to show before each match]:number of lines' + + + '(context-c)' # Context (combined) options + '(context-a context-b)'{-C+,--context=}'[specify lines to show before and after each match]:number of lines' + + + '(column)' # Column options + '--column[show column numbers for matches]' + $no"--no-column[don't show column numbers for matches]" + + + '(count)' # Counting options + {-c,--count}'[only show count of matching lines for each file]' + '--count-matches[only show count of individual matches for each file]' + '--include-zero[include files with zero matches in summary]' + $no"--no-include-zero[don't include files with zero matches in summary]" + + + '(encoding)' # Encoding options + {-E+,--encoding=}'[specify text encoding of files to search]: :_rg_encodings' + $no'--no-encoding[use default text encoding]' + + + '(engine)' # Engine choice options + '--engine=[select which regex engine to use]:when:(( + default\:"use default engine" + pcre2\:"identical to --pcre2" + auto\:"identical to --auto-hybrid-regex" + ))' + + + file # File-input options + '(1)*'{-f+,--file=}'[specify file containing patterns to search for]: :_files' + + + '(file-match)' # Files with/without match options + '(stats)'{-l,--files-with-matches}'[only show names of files with matches]' + '(stats)--files-without-match[only show names of files without matches]' + + + '(file-name)' # File-name options + {-H,--with-filename}'[show file name for matches]' + {-I,--no-filename}"[don't show file name for matches]" + + + '(file-system)' # File system options + "--one-file-system[don't descend into directories on other file systems]" + $no'--no-one-file-system[descend into directories on other file systems]' + + + '(fixed)' # Fixed-string options + {-F,--fixed-strings}'[treat pattern as literal string instead of regular expression]' + $no"--no-fixed-strings[don't treat pattern as literal string]" + + + '(follow)' # Symlink-following options + {-L,--follow}'[follow symlinks]' + $no"--no-follow[don't follow symlinks]" + + + '(generate)' # Options for generating ancillary data + '--generate=[generate man page or completion scripts]:when:(( + man\:"man page" + complete-bash\:"shell completions for bash" + complete-zsh\:"shell completions for zsh" + complete-fish\:"shell completions for fish" + complete-powershell\:"shell completions for PowerShell" + ))' + + + glob # File-glob options + '*'{-g+,--glob=}'[include/exclude files matching specified glob]:glob' + '*--iglob=[include/exclude files matching specified case-insensitive glob]:glob' + + + '(glob-case-insensitive)' # File-glob case sensitivity options + '--glob-case-insensitive[treat -g/--glob patterns case insensitively]' + $no'--no-glob-case-insensitive[treat -g/--glob patterns case sensitively]' + + + '(heading)' # Heading options + '(pretty-vimgrep)--heading[show matches grouped by file name]' + "(pretty-vimgrep)--no-heading[don't show matches grouped by file name]" + + + '(hidden)' # Hidden-file options + {-.,--hidden}'[search hidden files and directories]' + $no"--no-hidden[don't search hidden files and directories]" + + + '(hybrid)' # hybrid regex options + '--auto-hybrid-regex[DEPRECATED: dynamically use PCRE2 if necessary]' + $no"--no-auto-hybrid-regex[DEPRECATED: don't dynamically use PCRE2 if necessary]" + + + '(ignore)' # Ignore-file options + "(--no-ignore-global --no-ignore-parent --no-ignore-vcs --no-ignore-dot)--no-ignore[don't respect ignore files]" + $no'(--ignore-global --ignore-parent --ignore-vcs --ignore-dot)--ignore[respect ignore files]' + + + '(ignore-file-case-insensitive)' # Ignore-file case sensitivity options + '--ignore-file-case-insensitive[process ignore files case insensitively]' + $no'--no-ignore-file-case-insensitive[process ignore files case sensitively]' + + + '(ignore-exclude)' # Local exclude (ignore)-file options + "--no-ignore-exclude[don't respect local exclude (ignore) files]" + $no'--ignore-exclude[respect local exclude (ignore) files]' + + + '(ignore-global)' # Global ignore-file options + "--no-ignore-global[don't respect global ignore files]" + $no'--ignore-global[respect global ignore files]' + + + '(ignore-parent)' # Parent ignore-file options + "--no-ignore-parent[don't respect ignore files in parent directories]" + $no'--ignore-parent[respect ignore files in parent directories]' + + + '(ignore-vcs)' # VCS ignore-file options + "--no-ignore-vcs[don't respect version control ignore files]" + $no'--ignore-vcs[respect version control ignore files]' + + + '(require-git)' # git specific settings + "--no-require-git[don't require git repository to respect gitignore rules]" + $no'--require-git[require git repository to respect gitignore rules]' + + + '(ignore-dot)' # .ignore options + "--no-ignore-dot[don't respect .ignore files]" + $no'--ignore-dot[respect .ignore files]' + + + '(ignore-files)' # custom global ignore file options + "--no-ignore-files[don't respect --ignore-file flags]" + $no'--ignore-files[respect --ignore-file files]' + + + '(json)' # JSON options + '--json[output results in JSON Lines format]' + $no"--no-json[don't output results in JSON Lines format]" + + + '(line-number)' # Line-number options + {-n,--line-number}'[show line numbers for matches]' + {-N,--no-line-number}"[don't show line numbers for matches]" + + + '(line-terminator)' # Line-terminator options + '--crlf[use CRLF as line terminator]' + $no"--no-crlf[don't use CRLF as line terminator]" + '(text)--null-data[use NUL as line terminator]' + + + '(max-columns-preview)' # max column preview options + '--max-columns-preview[show preview for long lines (with -M)]' + $no"--no-max-columns-preview[don't show preview for long lines (with -M)]" + + + '(max-depth)' # Directory-depth options + {-d,--max-depth}'[specify max number of directories to descend]:number of directories' + '--maxdepth=[alias for --max-depth]:number of directories' + '!--maxdepth=:number of directories' + + + '(messages)' # Error-message options + '(--no-ignore-messages)--no-messages[suppress some error messages]' + $no"--messages[don't suppress error messages affected by --no-messages]" + + + '(messages-ignore)' # Ignore-error message options + "--no-ignore-messages[don't show ignore-file parse error messages]" + $no'--ignore-messages[show ignore-file parse error messages]' + + + '(mmap)' # mmap options + '--mmap[search using memory maps when possible]' + "--no-mmap[don't search using memory maps]" + + + '(multiline)' # Multiline options + {-U,--multiline}'[permit matching across multiple lines]' + $no'(multiline-dotall)--no-multiline[restrict matches to at most one line each]' + + + '(multiline-dotall)' # Multiline DOTALL options + '(--no-multiline)--multiline-dotall[allow "." to match newline (with -U)]' + $no"(--no-multiline)--no-multiline-dotall[don't allow \".\" to match newline (with -U)]" + + + '(only)' # Only-match options + {-o,--only-matching}'[show only matching part of each line]' + + + '(passthru)' # Pass-through options + '(--vimgrep)--passthru[show both matching and non-matching lines]' + '(--vimgrep)--passthrough[alias for --passthru]' + + + '(pcre2)' # PCRE2 options + {-P,--pcre2}'[enable matching with PCRE2]' + $no'(pcre2-unicode)--no-pcre2[disable matching with PCRE2]' + + + '(pcre2-unicode)' # PCRE2 Unicode options + $no'(--no-pcre2 --no-pcre2-unicode)--pcre2-unicode[DEPRECATED: enable PCRE2 Unicode mode (with -P)]' + '(--no-pcre2 --pcre2-unicode)--no-pcre2-unicode[DEPRECATED: disable PCRE2 Unicode mode (with -P)]' + + + '(pre)' # Preprocessing options + '(-z --search-zip)--pre=[specify preprocessor utility]:preprocessor utility:_command_names -e' + $no'--no-pre[disable preprocessor utility]' + + + pre-glob # Preprocessing glob options + '*--pre-glob[include/exclude files for preprocessing with --pre]' + + + '(pretty-vimgrep)' # Pretty/vimgrep display options + '(heading)'{-p,--pretty}'[alias for --color=always --heading -n]' + '(heading passthru)--vimgrep[show results in vim-compatible format]' + + + regexp # Explicit pattern options + '(1 file)*'{-e+,--regexp=}'[specify pattern]:pattern' + + + '(replace)' # Replacement options + {-r+,--replace=}'[specify string used to replace matches]:replace string' + + + '(sort)' # File-sorting options + '(threads)--sort=[sort results in ascending order (disables parallelism)]:sort method:(( + none\:"no sorting" + path\:"sort by file path" + modified\:"sort by last modified time" + accessed\:"sort by last accessed time" + created\:"sort by creation time" + ))' + '(threads)--sortr=[sort results in descending order (disables parallelism)]:sort method:(( + none\:"no sorting" + path\:"sort by file path" + modified\:"sort by last modified time" + accessed\:"sort by last accessed time" + created\:"sort by creation time" + ))' + '(threads)--sort-files[DEPRECATED: sort results by file path (disables parallelism)]' + $no"--no-sort-files[DEPRECATED: do not sort results]" + + + '(stats)' # Statistics options + '(--files file-match)--stats[show search statistics]' + $no"--no-stats[don't show search statistics]" + + + '(text)' # Binary-search options + {-a,--text}'[search binary files as if they were text]' + "--binary[search binary files, don't print binary data]" + $no"--no-binary[don't search binary files]" + $no"(--null-data)--no-text[don't search binary files as if they were text]" + + + '(threads)' # Thread-count options + '(sort)'{-j+,--threads=}'[specify approximate number of threads to use]:number of threads' + + + '(trim)' # Trim options + '--trim[trim any ASCII whitespace prefix from each line]' + $no"--no-trim[don't trim ASCII whitespace prefix from each line]" + + + type # Type options + '*'{-t+,--type=}'[only search files matching specified type]: :_rg_types' + '*--type-add=[add new glob for specified file type]: :->typespec' + '*--type-clear=[clear globs previously defined for specified file type]: :_rg_types' + # This should actually be exclusive with everything but other type options + '(: *)--type-list[show all supported file types and their associated globs]' + '*'{-T+,--type-not=}"[don't search files matching specified file type]: :_rg_types" + + + '(word-line)' # Whole-word/line match options + {-w,--word-regexp}'[only show matches surrounded by word boundaries]' + {-x,--line-regexp}'[only show matches surrounded by line boundaries]' + + + '(unicode)' # Unicode options + $no'--unicode[enable Unicode mode]' + '--no-unicode[disable Unicode mode]' + + + '(zip)' # Compression options + '(--pre)'{-z,--search-zip}'[search in compressed files]' + $no"--no-search-zip[don't search in compressed files]" + + + misc # Other options — no need to separate these at the moment + '(-b --byte-offset)'{-b,--byte-offset}'[show 0-based byte offset for each matching line]' + $no"--no-byte-offset[don't show byte offsets for each matching line]" + '--color=[specify when to use colors in output]:when:(( + never\:"never use colors" + auto\:"use colors or not based on stdout, TERM, etc." + always\:"always use colors" + ansi\:"always use ANSI colors (even on Windows)" + ))' + '*--colors=[specify color and style settings]: :->colorspec' + '--context-separator=[specify string used to separate non-continuous context lines in output]:separator' + $no"--no-context-separator[don't print context separators]" + '--debug[show debug messages]' + '--field-context-separator[set string to delimit fields in context lines]' + '--field-match-separator[set string to delimit fields in matching lines]' + '--hostname-bin=[executable for getting system hostname]:hostname executable:_command_names -e' + '--hyperlink-format=[specify pattern for hyperlinks]:pattern' + '--trace[show more verbose debug messages]' + '--dfa-size-limit=[specify upper size limit of generated DFA]:DFA size (bytes)' + "(1 stats)--files[show each file that would be searched (but don't search)]" + '*--ignore-file=[specify additional ignore file]:ignore file:_files' + '(-v --invert-match)'{-v,--invert-match}'[invert matching]' + $no"--no-invert-match[do not invert matching]" + '(-M --max-columns)'{-M+,--max-columns=}'[specify max length of lines to print]:number of bytes' + '(-m --max-count)'{-m+,--max-count=}'[specify max number of matches per file]:number of matches' + '--max-filesize=[specify size above which files should be ignored]:file size (bytes)' + "--no-config[don't load configuration files]" + '(-0 --null)'{-0,--null}'[print NUL byte after file names]' + '--path-separator=[specify path separator to use when printing file names]:separator' + '(-q --quiet)'{-q,--quiet}'[suppress normal output]' + '--regex-size-limit=[specify upper size limit of compiled regex]:regex size (bytes)' + '*'{-u,--unrestricted}'[reduce level of "smart" searching]' + '--stop-on-nonmatch[stop on first non-matching line after a matching one]' + + + operand # Operands + '(--files --type-list file regexp)1: :_guard "^-*" pattern' + '(--type-list)*: :_files' + ) + + # This is used with test-complete to verify that there are no options + # listed in the help output that aren't also defined here + [[ $_RG_COMPLETE_LIST_ARGS == (1|t*|y*) ]] && { + print -rl - $args + return 0 + } + + # Strip out argument groups where unsupported (see above) + [[ $ZSH_VERSION == (4|5.<0-3>)(.*)# ]] && + args=( ${(@)args:#(#i)(+|[a-z0-9][a-z0-9_-]#|\([a-z0-9][a-z0-9_-]#\))} ) + + _arguments -C -s -S : $args && ret=0 + + case $state in + colorspec) + if [[ ${IPREFIX#--*=}$PREFIX == [^:]# ]]; then + suf=( -qS: ) + tmp=( + 'column:specify coloring for column numbers' + 'line:specify coloring for line numbers' + 'match:specify coloring for match text' + 'path:specify coloring for file names' + ) + descr='color/style type' + elif [[ ${IPREFIX#--*=}$PREFIX == (column|line|match|path):[^:]# ]]; then + suf=( -qS: ) + tmp=( + 'none:clear color/style for type' + 'bg:specify background color' + 'fg:specify foreground color' + 'style:specify text style' + ) + descr='color/style attribute' + elif [[ ${IPREFIX#--*=}$PREFIX == [^:]##:(bg|fg):[^:]# ]]; then + tmp=( black blue green red cyan magenta yellow white ) + descr='color name or r,g,b' + elif [[ ${IPREFIX#--*=}$PREFIX == [^:]##:style:[^:]# ]]; then + tmp=( {,no}bold {,no}intense {,no}underline ) + descr='style name' + else + _message -e colorspec 'no more arguments' + fi + + (( $#tmp )) && { + compset -P '*:' + _describe -t colorspec $descr tmp $suf && ret=0 + } + ;; + + typespec) + if compset -P '[^:]##:include:'; then + _sequence -s , _rg_types && ret=0 + # @todo This bit in particular could be better, but it's a little + # complex, and attempting to solve it seems to run us up against a crash + # bug — zsh # 40362 + elif compset -P '[^:]##:'; then + _message 'glob or include directive' && ret=1 + elif [[ ! -prefix *:* ]]; then + _rg_types -qS : && ret=0 + fi + ;; + esac + + return ret +} + +# Complete encodings +_rg_encodings() { + local -a expl + local -aU _encodings + + _encodings=( +!ENCODINGS! + ) + + _wanted encodings expl encoding compadd -a "$@" - _encodings +} + +# Complete file types +_rg_types() { + local -a expl + local -aU _types + + _types=( ${(@)${(f)"$( _call_program types $words[1] --type-list )"}//:[[:space:]]##/:} ) + + if zstyle -t ":completion:${curcontext}:types" extra-verbose; then + _describe -t types 'file type' _types + else + _wanted types expl 'file type' compadd "$@" - ${(@)_types%%:*} + fi +} + +_rg "$@" + +################################################################################ +# ZSH COMPLETION REFERENCE +# +# For the convenience of developers who aren't especially familiar with zsh +# completion functions, a brief reference guide follows. This is in no way +# comprehensive; it covers just enough of the basic structure, syntax, and +# conventions to help someone make simple changes like adding new options. For +# more complete documentation regarding zsh completion functions, please see the +# following: +# +# * http://zsh.sourceforge.net/Doc/Release/Completion-System.html +# * https://github.com/zsh-users/zsh/blob/master/Etc/completion-style-guide +# +# OVERVIEW +# +# Most zsh completion functions are defined in terms of `_arguments`, which is a +# shell function that takes a series of argument specifications. The specs for +# `rg` are stored in an array, which is common for more complex functions; the +# elements of the array are passed to `_arguments` on invocation. +# +# ARGUMENT-SPECIFICATION SYNTAX +# +# The following is a contrived example of the argument specs for a simple tool: +# +# '(: * -)'{-h,--help}'[display help information]' +# '(-q -v --quiet --verbose)'{-q,--quiet}'[decrease output verbosity]' +# '!(-q -v --quiet --verbose)--silent' +# '(-q -v --quiet --verbose)'{-v,--verbose}'[increase output verbosity]' +# '--color=[specify when to use colors]:when:(always never auto)' +# '*:example file:_files' +# +# Although there may appear to be six specs here, there are actually nine; we +# use brace expansion to combine specs for options that go by multiple names, +# like `-q` and `--quiet`. This is customary, and ties in with the fact that zsh +# merges completion possibilities together when they have the same description. +# +# The first line defines the option `-h`/`--help`. With most tools, it isn't +# useful to complete anything after `--help` because it effectively overrides +# all others; the `(: * -)` at the beginning of the spec tells zsh not to +# complete any other operands (`:` and `*`) or options (`-`) after this one has +# been used. The `[...]` at the end associates a description with `-h`/`--help`; +# as mentioned, zsh will see the identical descriptions and merge these options +# together when offering completion possibilities. +# +# The next line defines `-q`/`--quiet`. Here we don't want to suppress further +# completions entirely, but we don't want to offer `-q` if `--quiet` has been +# given (since they do the same thing), nor do we want to offer `-v` (since it +# doesn't make sense to be quiet and verbose at the same time). We don't need to +# tell zsh not to offer `--quiet` a second time, since that's the default +# behaviour, but since this line expands to two specs describing `-q` *and* +# `--quiet` we do need to explicitly list all of them here. +# +# The next line defines a hidden option `--silent` — maybe it's a deprecated +# synonym for `--quiet`. The leading `!` indicates that zsh shouldn't offer this +# option during completion. The benefit of providing a spec for an option that +# shouldn't be completed is that, if someone *does* use it, we can correctly +# suppress completion of other options afterwards. +# +# The next line defines `-v`/`--verbose`; this works just like `-q`/`--quiet`. +# +# The next line defines `--color`. In this example, `--color` doesn't have a +# corresponding short option, so we don't need to use brace expansion. Further, +# there are no other options it's exclusive with (just itself), so we don't need +# to define those at the beginning. However, it does take a mandatory argument. +# The `=` at the end of `--color=` indicates that the argument may appear either +# like `--color always` or like `--color=always`; this is how most GNU-style +# command-line tools work. The corresponding short option would normally use `+` +# — for example, `-c+` would allow either `-c always` or `-calways`. For this +# option, the arguments are known ahead of time, so we can simply list them in +# parentheses at the end (`when` is used as the description for the argument). +# +# The last line defines an operand (a non-option argument). In this example, the +# operand can be used any number of times (the leading `*`), and it should be a +# file path, so we tell zsh to call the `_files` function to complete it. The +# `example file` in the middle is the description to use for this operand; we +# could use a space instead to accept the default provided by `_files`. +# +# GROUPING ARGUMENT SPECIFICATIONS +# +# Newer versions of zsh support grouping argument specs together. All specs +# following a `+` and then a group name are considered to be members of the +# named group. Grouping is useful mostly for organisational purposes; it makes +# the relationship between different options more obvious, and makes it easier +# to specify exclusions. +# +# We could rewrite our example above using grouping as follows: +# +# '(: * -)'{-h,--help}'[display help information]' +# '--color=[specify when to use colors]:when:(always never auto)' +# '*:example file:_files' +# + '(verbosity)' +# {-q,--quiet}'[decrease output verbosity]' +# '!--silent' +# {-v,--verbose}'[increase output verbosity]' +# +# Here we take advantage of a useful feature of spec grouping — when the group +# name is surrounded by parentheses, as in `(verbosity)`, it tells zsh that all +# of the options in that group are exclusive with each other. As a result, we +# don't need to manually list out the exclusions at the beginning of each +# option. +# +# Groups can also be referred to by name in other argument specs; for example: +# +# '(xyz)--aaa' '*: :_files' +# + xyz --xxx --yyy --zzz +# +# Here we use the group name `xyz` to tell zsh that `--xxx`, `--yyy`, and +# `--zzz` are not to be completed after `--aaa`. This makes the exclusion list +# much more compact and reusable. +# +# CONVENTIONS +# +# zsh completion functions generally adhere to the following conventions: +# +# * Use two spaces for indentation +# * Combine specs for options with different names using brace expansion +# * In combined specs, list the short option first (as in `{-a,--text}`) +# * Use `+` or `=` as described above for options that take arguments +# * Provide a description for all options, option-arguments, and operands +# * Capitalise/punctuate argument descriptions as phrases, not complete +# sentences — 'display help information', never 'Display help information.' +# (but still capitalise acronyms and proper names) +# * Write argument descriptions as verb phrases — 'display x', 'enable y', +# 'use z' +# * Word descriptions to make it clear when an option expects an argument; +# usually this is done with the word 'specify', as in 'specify x' or +# 'use specified x') +# * Write argument descriptions as tersely as possible — for example, articles +# like 'a' and 'the' should be omitted unless it would be confusing +# +# Other conventions currently used by this function: +# +# * Order argument specs alphabetically by group name, then option name +# * Group options that are directly related, mutually exclusive, or frequently +# referenced by other argument specs +# * Use only characters in the set [a-z0-9_-] in group names +# * Order exclusion lists as follows: short options, long options, groups +# * Use American English in descriptions +# * Use 'don't' in descriptions instead of 'do not' +# * Word descriptions for related options as similarly as possible. For example, +# `--foo[enable foo]` and `--no-foo[disable foo]`, or `--foo[use foo]` and +# `--no-foo[don't use foo]` +# * Word descriptions to make it clear when an option only makes sense with +# another option, usually by adding '(with -x)' to the end +# * Don't quote strings or variables unnecessarily. When quotes are required, +# prefer single-quotes to double-quotes +# * Prefix option specs with `$no` when the option serves only to negate the +# behaviour of another option that must be provided explicitly by the user. +# This prevents rarely used options from cluttering up the completion menu +################################################################################ + +# ------------------------------------------------------------------------------ +# Copyright (c) 2011 Github zsh-users - http://github.com/zsh-users +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the zsh-users nor the +# names of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL ZSH-USERS BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ------------------------------------------------------------------------------ +# Description +# ----------- +# +# Completion script for ripgrep +# +# ------------------------------------------------------------------------------ +# Authors +# ------- +# +# * arcizan +# * MaskRay +# +# ------------------------------------------------------------------------------ + +# Local Variables: +# mode: shell-script +# coding: utf-8-unix +# indent-tabs-mode: nil +# sh-indentation: 2 +# sh-basic-offset: 2 +# End: +# vim: ft=zsh sw=2 ts=2 et diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/zsh.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/zsh.rs new file mode 100644 index 000000000..7475c0c1b --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/complete/zsh.rs @@ -0,0 +1,23 @@ +/*! +Provides completions for ripgrep's CLI for the zsh shell. + +Unlike completion short for other shells (at time of writing), zsh's +completions for ripgrep are maintained by hand. This is because: + +1. They are lovingly written by an expert in such things. +2. Are much higher in quality than the ones below that are auto-generated. +Namely, the zsh completions take application level context about flag +compatibility into account. +3. There is a CI script that fails if a new flag is added to ripgrep that +isn't included in the zsh completions. +4. There is a wealth of documentation in the zsh script explaining how it +works and how it can be extended. + +In principle, I'd be open to maintaining any completion script by hand so +long as it meets criteria 3 and 4 above. +*/ + +/// Generate completions for zsh. +pub(crate) fn generate() -> String { + include_str!("rg.zsh").replace("!ENCODINGS!", super::ENCODINGS.trim_end()) +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/config.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/config.rs new file mode 100644 index 000000000..a081fe793 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/config.rs @@ -0,0 +1,170 @@ +/*! +This module provides routines for reading ripgrep config "rc" files. + +The primary output of these routines is a sequence of arguments, where each +argument corresponds precisely to one shell argument. +*/ + +use std::{ + ffi::OsString, + path::{Path, PathBuf}, +}; + +use bstr::{io::BufReadExt, ByteSlice}; + +/// Return a sequence of arguments derived from ripgrep rc configuration files. +pub fn args() -> Vec { + let config_path = match std::env::var_os("RIPGREP_CONFIG_PATH") { + None => return vec![], + Some(config_path) => { + if config_path.is_empty() { + return vec![]; + } + PathBuf::from(config_path) + } + }; + let (args, errs) = match parse(&config_path) { + Ok((args, errs)) => (args, errs), + Err(err) => { + message!( + "failed to read the file specified in RIPGREP_CONFIG_PATH: {}", + err + ); + return vec![]; + } + }; + if !errs.is_empty() { + for err in errs { + message!("{}:{}", config_path.display(), err); + } + } + log::debug!( + "{}: arguments loaded from config file: {:?}", + config_path.display(), + args + ); + args +} + +/// Parse a single ripgrep rc file from the given path. +/// +/// On success, this returns a set of shell arguments, in order, that should +/// be pre-pended to the arguments given to ripgrep at the command line. +/// +/// If the file could not be read, then an error is returned. If there was +/// a problem parsing one or more lines in the file, then errors are returned +/// for each line in addition to successfully parsed arguments. +fn parse>( + path: P, +) -> anyhow::Result<(Vec, Vec)> { + let path = path.as_ref(); + match std::fs::File::open(&path) { + Ok(file) => parse_reader(file), + Err(err) => anyhow::bail!("{}: {}", path.display(), err), + } +} + +/// Parse a single ripgrep rc file from the given reader. +/// +/// Callers should not provided a buffered reader, as this routine will use its +/// own buffer internally. +/// +/// On success, this returns a set of shell arguments, in order, that should +/// be pre-pended to the arguments given to ripgrep at the command line. +/// +/// If the reader could not be read, then an error is returned. If there was a +/// problem parsing one or more lines, then errors are returned for each line +/// in addition to successfully parsed arguments. +fn parse_reader( + rdr: R, +) -> anyhow::Result<(Vec, Vec)> { + let mut bufrdr = std::io::BufReader::new(rdr); + let (mut args, mut errs) = (vec![], vec![]); + let mut line_number = 0; + bufrdr.for_byte_line_with_terminator(|line| { + line_number += 1; + + let line = line.trim(); + if line.is_empty() || line[0] == b'#' { + return Ok(true); + } + match line.to_os_str() { + Ok(osstr) => { + args.push(osstr.to_os_string()); + } + Err(err) => { + errs.push(anyhow::anyhow!("{line_number}: {err}")); + } + } + Ok(true) + })?; + Ok((args, errs)) +} + +#[cfg(test)] +mod tests { + use super::parse_reader; + use std::ffi::OsString; + + #[test] + fn basic() { + let (args, errs) = parse_reader( + &b"\ +# Test +--context=0 + --smart-case +-u + + + # --bar +--foo +"[..], + ) + .unwrap(); + assert!(errs.is_empty()); + let args: Vec = + args.into_iter().map(|s| s.into_string().unwrap()).collect(); + assert_eq!(args, vec!["--context=0", "--smart-case", "-u", "--foo",]); + } + + // We test that we can handle invalid UTF-8 on Unix-like systems. + #[test] + #[cfg(unix)] + fn error() { + use std::os::unix::ffi::OsStringExt; + + let (args, errs) = parse_reader( + &b"\ +quux +foo\xFFbar +baz +"[..], + ) + .unwrap(); + assert!(errs.is_empty()); + assert_eq!( + args, + vec![ + OsString::from("quux"), + OsString::from_vec(b"foo\xFFbar".to_vec()), + OsString::from("baz"), + ] + ); + } + + // ... but test that invalid UTF-8 fails on Windows. + #[test] + #[cfg(not(unix))] + fn error() { + let (args, errs) = parse_reader( + &b"\ +quux +foo\xFFbar +baz +"[..], + ) + .unwrap(); + assert_eq!(errs.len(), 1); + assert_eq!(args, vec![OsString::from("quux"), OsString::from("baz"),]); + } +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/defs.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/defs.rs new file mode 100644 index 000000000..9a196c491 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/defs.rs @@ -0,0 +1,7675 @@ +/*! +Defines all of the flags available in ripgrep. + +Each flag corresponds to a unit struct with a corresponding implementation +of `Flag`. Note that each implementation of `Flag` might actually have many +possible manifestations of the same "flag." That is, each implementation of +`Flag` can have the following flags available to an end user of ripgrep: + +* The long flag name. +* An optional short flag name. +* An optional negated long flag name. +* An arbitrarily long list of aliases. + +The idea is that even though there are multiple flags that a user can type, +one implementation of `Flag` corresponds to a single _logical_ flag inside of +ripgrep. For example, `-E`, `--encoding` and `--no-encoding` all manipulate the +same encoding state in ripgrep. +*/ + +use std::path::PathBuf; + +use {anyhow::Context as AnyhowContext, bstr::ByteVec}; + +use crate::flags::{ + lowargs::{ + BinaryMode, BoundaryMode, BufferMode, CaseMode, ColorChoice, + ContextMode, EncodingMode, EngineChoice, GenerateMode, LoggingMode, + LowArgs, MmapMode, Mode, PatternSource, SearchMode, SortMode, + SortModeKind, SpecialMode, TypeChange, + }, + Category, Flag, FlagValue, +}; + +#[cfg(test)] +use crate::flags::parse::parse_low_raw; + +use super::CompletionType; + +/// A list of all flags in ripgrep via implementations of `Flag`. +/// +/// The order of these flags matter. It determines the order of the flags in +/// the generated documentation (`-h`, `--help` and the man page) within each +/// category. (This is why the deprecated flags are last.) +pub(super) const FLAGS: &[&dyn Flag] = &[ + // -e/--regexp and -f/--file should come before anything else in the + // same category. + &Regexp, + &File, + &AfterContext, + &BeforeContext, + &Binary, + &BlockBuffered, + &ByteOffset, + &CaseSensitive, + &Color, + &Colors, + &Column, + &Context, + &ContextSeparator, + &Count, + &CountMatches, + &Crlf, + &Debug, + &DfaSizeLimit, + &Encoding, + &Engine, + &FieldContextSeparator, + &FieldMatchSeparator, + &Files, + &FilesWithMatches, + &FilesWithoutMatch, + &FixedStrings, + &Follow, + &Generate, + &Glob, + &GlobCaseInsensitive, + &Heading, + &Help, + &Hidden, + &HostnameBin, + &HyperlinkFormat, + &IGlob, + &IgnoreCase, + &IgnoreFile, + &IgnoreFileCaseInsensitive, + &IncludeZero, + &InvertMatch, + &JSON, + &LineBuffered, + &LineNumber, + &LineNumberNo, + &LineRegexp, + &MaxColumns, + &MaxColumnsPreview, + &MaxCount, + &MaxDepth, + &MaxFilesize, + &Mmap, + &Multiline, + &MultilineDotall, + &NoConfig, + &NoIgnore, + &NoIgnoreDot, + &NoIgnoreExclude, + &NoIgnoreFiles, + &NoIgnoreGlobal, + &NoIgnoreMessages, + &NoIgnoreParent, + &NoIgnoreVcs, + &NoMessages, + &NoRequireGit, + &NoUnicode, + &Null, + &NullData, + &OneFileSystem, + &OnlyMatching, + &PathSeparator, + &Passthru, + &PCRE2, + &PCRE2Version, + &Pre, + &PreGlob, + &Pretty, + &Quiet, + &RegexSizeLimit, + &Replace, + &SearchZip, + &SmartCase, + &Sort, + &Sortr, + &Stats, + &StopOnNonmatch, + &Text, + &Threads, + &Trace, + &Trim, + &Type, + &TypeNot, + &TypeAdd, + &TypeClear, + &TypeList, + &Unrestricted, + &Version, + &Vimgrep, + &WithFilename, + &WithFilenameNo, + &WordRegexp, + // DEPRECATED (make them show up last in their respective categories) + &AutoHybridRegex, + &NoPcre2Unicode, + &SortFiles, +]; + +/// -A/--after-context +#[derive(Debug)] +struct AfterContext; + +impl Flag for AfterContext { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'A') + } + fn name_long(&self) -> &'static str { + "after-context" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + "Show NUM lines after each match." + } + fn doc_long(&self) -> &'static str { + r" +Show \fINUM\fP lines after each match. +.sp +This overrides the \flag{passthru} flag and partially overrides the +\flag{context} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.context.set_after(convert::usize(&v.unwrap_value())?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_after_context() { + let mkctx = |lines| { + let mut mode = ContextMode::default(); + mode.set_after(lines); + mode + }; + + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(ContextMode::default(), args.context); + + let args = parse_low_raw(["--after-context", "5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["--after-context=5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["-A", "5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["-A5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["-A5", "-A10"]).unwrap(); + assert_eq!(mkctx(10), args.context); + + let args = parse_low_raw(["-A5", "-A0"]).unwrap(); + assert_eq!(mkctx(0), args.context); + + let args = parse_low_raw(["-A5", "--passthru"]).unwrap(); + assert_eq!(ContextMode::Passthru, args.context); + + let args = parse_low_raw(["--passthru", "-A5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let n = usize::MAX.to_string(); + let args = parse_low_raw(["--after-context", n.as_str()]).unwrap(); + assert_eq!(mkctx(usize::MAX), args.context); + + #[cfg(target_pointer_width = "64")] + { + let n = (u128::from(u64::MAX) + 1).to_string(); + let result = parse_low_raw(["--after-context", n.as_str()]); + assert!(result.is_err(), "{result:?}"); + } +} + +/// --auto-hybrid-regex +#[derive(Debug)] +struct AutoHybridRegex; + +impl Flag for AutoHybridRegex { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "auto-hybrid-regex" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-auto-hybrid-regex") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + "(DEPRECATED) Use PCRE2 if appropriate." + } + fn doc_long(&self) -> &'static str { + r" +DEPRECATED. Use \flag{engine} instead. +.sp +When this flag is used, ripgrep will dynamically choose between supported regex +engines depending on the features used in a pattern. When ripgrep chooses a +regex engine, it applies that choice for every regex provided to ripgrep (e.g., +via multiple \flag{regexp} or \flag{file} flags). +.sp +As an example of how this flag might behave, ripgrep will attempt to use +its default finite automata based regex engine whenever the pattern can be +successfully compiled with that regex engine. If PCRE2 is enabled and if the +pattern given could not be compiled with the default regex engine, then PCRE2 +will be automatically used for searching. If PCRE2 isn't available, then this +flag has no effect because there is only one regex engine to choose from. +.sp +In the future, ripgrep may adjust its heuristics for how it decides which +regex engine to use. In general, the heuristics will be limited to a static +analysis of the patterns, and not to any specific runtime behavior observed +while searching files. +.sp +The primary downside of using this flag is that it may not always be obvious +which regex engine ripgrep uses, and thus, the match semantics or performance +profile of ripgrep may subtly and unexpectedly change. However, in many cases, +all regex engines will agree on what constitutes a match and it can be nice +to transparently support more advanced regex features like look-around and +backreferences without explicitly needing to enable them. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let mode = if v.unwrap_switch() { + EngineChoice::Auto + } else { + EngineChoice::Default + }; + args.engine = mode; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_auto_hybrid_regex() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); + + let args = parse_low_raw(["--auto-hybrid-regex"]).unwrap(); + assert_eq!(EngineChoice::Auto, args.engine); + + let args = + parse_low_raw(["--auto-hybrid-regex", "--no-auto-hybrid-regex"]) + .unwrap(); + assert_eq!(EngineChoice::Default, args.engine); + + let args = + parse_low_raw(["--no-auto-hybrid-regex", "--auto-hybrid-regex"]) + .unwrap(); + assert_eq!(EngineChoice::Auto, args.engine); + + let args = parse_low_raw(["--auto-hybrid-regex", "-P"]).unwrap(); + assert_eq!(EngineChoice::PCRE2, args.engine); + + let args = parse_low_raw(["-P", "--auto-hybrid-regex"]).unwrap(); + assert_eq!(EngineChoice::Auto, args.engine); + + let args = + parse_low_raw(["--engine=auto", "--auto-hybrid-regex"]).unwrap(); + assert_eq!(EngineChoice::Auto, args.engine); + + let args = + parse_low_raw(["--engine=default", "--auto-hybrid-regex"]).unwrap(); + assert_eq!(EngineChoice::Auto, args.engine); + + let args = + parse_low_raw(["--auto-hybrid-regex", "--engine=default"]).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); +} + +/// -B/--before-context +#[derive(Debug)] +struct BeforeContext; + +impl Flag for BeforeContext { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'B') + } + fn name_long(&self) -> &'static str { + "before-context" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + "Show NUM lines before each match." + } + fn doc_long(&self) -> &'static str { + r" +Show \fINUM\fP lines before each match. +.sp +This overrides the \flag{passthru} flag and partially overrides the +\flag{context} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.context.set_before(convert::usize(&v.unwrap_value())?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_before_context() { + let mkctx = |lines| { + let mut mode = ContextMode::default(); + mode.set_before(lines); + mode + }; + + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(ContextMode::default(), args.context); + + let args = parse_low_raw(["--before-context", "5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["--before-context=5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["-B", "5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["-B5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["-B5", "-B10"]).unwrap(); + assert_eq!(mkctx(10), args.context); + + let args = parse_low_raw(["-B5", "-B0"]).unwrap(); + assert_eq!(mkctx(0), args.context); + + let args = parse_low_raw(["-B5", "--passthru"]).unwrap(); + assert_eq!(ContextMode::Passthru, args.context); + + let args = parse_low_raw(["--passthru", "-B5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let n = usize::MAX.to_string(); + let args = parse_low_raw(["--before-context", n.as_str()]).unwrap(); + assert_eq!(mkctx(usize::MAX), args.context); + + #[cfg(target_pointer_width = "64")] + { + let n = (u128::from(u64::MAX) + 1).to_string(); + let result = parse_low_raw(["--before-context", n.as_str()]); + assert!(result.is_err(), "{result:?}"); + } +} + +/// --binary +#[derive(Debug)] +struct Binary; + +impl Flag for Binary { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "binary" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-binary") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + "Search binary files." + } + fn doc_long(&self) -> &'static str { + r" +Enabling this flag will cause ripgrep to search binary files. By default, +ripgrep attempts to automatically skip binary files in order to improve the +relevance of results and make the search faster. +.sp +Binary files are heuristically detected based on whether they contain a +\fBNUL\fP byte or not. By default (without this flag set), once a \fBNUL\fP +byte is seen, ripgrep will stop searching the file. Usually, \fBNUL\fP bytes +occur in the beginning of most binary files. If a \fBNUL\fP byte occurs after +a match, then ripgrep will not print the match, stop searching that file, and +emit a warning that some matches are being suppressed. +.sp +In contrast, when this flag is provided, ripgrep will continue searching a +file even if a \fBNUL\fP byte is found. In particular, if a \fBNUL\fP byte is +found then ripgrep will continue searching until either a match is found or +the end of the file is reached, whichever comes sooner. If a match is found, +then ripgrep will stop and print a warning saying that the search stopped +prematurely. +.sp +If you want ripgrep to search a file without any special \fBNUL\fP byte +handling at all (and potentially print binary data to stdout), then you should +use the \flag{text} flag. +.sp +The \flag{binary} flag is a flag for controlling ripgrep's automatic filtering +mechanism. As such, it does not need to be used when searching a file +explicitly or when searching stdin. That is, it is only applicable when +recursively searching a directory. +.sp +When the \flag{unrestricted} flag is provided for a third time, then this flag +is automatically enabled. +.sp +This flag overrides the \flag{text} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.binary = if v.unwrap_switch() { + BinaryMode::SearchAndSuppress + } else { + BinaryMode::Auto + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_binary() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(BinaryMode::Auto, args.binary); + + let args = parse_low_raw(["--binary"]).unwrap(); + assert_eq!(BinaryMode::SearchAndSuppress, args.binary); + + let args = parse_low_raw(["--binary", "--no-binary"]).unwrap(); + assert_eq!(BinaryMode::Auto, args.binary); + + let args = parse_low_raw(["--no-binary", "--binary"]).unwrap(); + assert_eq!(BinaryMode::SearchAndSuppress, args.binary); + + let args = parse_low_raw(["--binary", "-a"]).unwrap(); + assert_eq!(BinaryMode::AsText, args.binary); + + let args = parse_low_raw(["-a", "--binary"]).unwrap(); + assert_eq!(BinaryMode::SearchAndSuppress, args.binary); + + let args = parse_low_raw(["-a", "--no-binary"]).unwrap(); + assert_eq!(BinaryMode::Auto, args.binary); +} + +/// --block-buffered +#[derive(Debug)] +struct BlockBuffered; + +impl Flag for BlockBuffered { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "block-buffered" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-block-buffered") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + "Force block buffering." + } + fn doc_long(&self) -> &'static str { + r" +When enabled, ripgrep will use block buffering. That is, whenever a matching +line is found, it will be written to an in-memory buffer and will not be +written to stdout until the buffer reaches a certain size. This is the default +when ripgrep's stdout is redirected to a pipeline or a file. When ripgrep's +stdout is connected to a tty, line buffering will be used by default. Forcing +block buffering can be useful when dumping a large amount of contents to a tty. +.sp +This overrides the \flag{line-buffered} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.buffer = if v.unwrap_switch() { + BufferMode::Block + } else { + BufferMode::Auto + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_block_buffered() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(BufferMode::Auto, args.buffer); + + let args = parse_low_raw(["--block-buffered"]).unwrap(); + assert_eq!(BufferMode::Block, args.buffer); + + let args = + parse_low_raw(["--block-buffered", "--no-block-buffered"]).unwrap(); + assert_eq!(BufferMode::Auto, args.buffer); + + let args = parse_low_raw(["--block-buffered", "--line-buffered"]).unwrap(); + assert_eq!(BufferMode::Line, args.buffer); +} + +/// --byte-offset +#[derive(Debug)] +struct ByteOffset; + +impl Flag for ByteOffset { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'b') + } + fn name_long(&self) -> &'static str { + "byte-offset" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-byte-offset") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + "Print the byte offset for each matching line." + } + fn doc_long(&self) -> &'static str { + r" +Print the 0-based byte offset within the input file before each line of output. +If \flag{only-matching} is specified, print the offset of the matched text +itself. +.sp +If ripgrep does transcoding, then the byte offset is in terms of the result +of transcoding and not the original data. This applies similarly to other +transformations on the data, such as decompression or a \flag{pre} filter. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.byte_offset = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_byte_offset() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.byte_offset); + + let args = parse_low_raw(["--byte-offset"]).unwrap(); + assert_eq!(true, args.byte_offset); + + let args = parse_low_raw(["-b"]).unwrap(); + assert_eq!(true, args.byte_offset); + + let args = parse_low_raw(["--byte-offset", "--no-byte-offset"]).unwrap(); + assert_eq!(false, args.byte_offset); + + let args = parse_low_raw(["--no-byte-offset", "-b"]).unwrap(); + assert_eq!(true, args.byte_offset); +} + +/// -s/--case-sensitive +#[derive(Debug)] +struct CaseSensitive; + +impl Flag for CaseSensitive { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b's') + } + fn name_long(&self) -> &'static str { + "case-sensitive" + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Search case sensitively (default)." + } + fn doc_long(&self) -> &'static str { + r" +Execute the search case sensitively. This is the default mode. +.sp +This is a global option that applies to all patterns given to ripgrep. +Individual patterns can still be matched case insensitively by using inline +regex flags. For example, \fB(?i)abc\fP will match \fBabc\fP case insensitively +even when this flag is used. +.sp +This flag overrides the \flag{ignore-case} and \flag{smart-case} flags. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "flag has no negation"); + args.case = CaseMode::Sensitive; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_case_sensitive() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(CaseMode::Sensitive, args.case); + + let args = parse_low_raw(["--case-sensitive"]).unwrap(); + assert_eq!(CaseMode::Sensitive, args.case); + + let args = parse_low_raw(["-s"]).unwrap(); + assert_eq!(CaseMode::Sensitive, args.case); +} + +/// --color +#[derive(Debug)] +struct Color; + +impl Flag for Color { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "color" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("WHEN") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + "When to use color." + } + fn doc_long(&self) -> &'static str { + r" +This flag controls when to use colors. The default setting is \fBauto\fP, which +means ripgrep will try to guess when to use colors. For example, if ripgrep is +printing to a tty, then it will use colors, but if it is redirected to a file +or a pipe, then it will suppress color output. +.sp +ripgrep will suppress color output by default in some other circumstances as +well. These include, but are not limited to: +.sp +.IP \(bu 3n +When the \fBTERM\fP environment variable is not set or set to \fBdumb\fP. +.sp +.IP \(bu 3n +When the \fBNO_COLOR\fP environment variable is set (regardless of value). +.sp +.IP \(bu 3n +When flags that imply no use for colors are given. For example, +\flag{vimgrep} and \flag{json}. +. +.PP +The possible values for this flag are: +.sp +.IP \fBnever\fP 10n +Colors will never be used. +.sp +.IP \fBauto\fP 10n +The default. ripgrep tries to be smart. +.sp +.IP \fBalways\fP 10n +Colors will always be used regardless of where output is sent. +.sp +.IP \fBansi\fP 10n +Like 'always', but emits ANSI escapes (even in a Windows console). +. +.PP +This flag also controls whether hyperlinks are emitted. For example, when +a hyperlink format is specified, hyperlinks won't be used when color is +suppressed. If one wants to emit hyperlinks but no colors, then one must use +the \flag{colors} flag to manually set all color styles to \fBnone\fP: +.sp +.EX + \-\-colors 'path:none' \\ + \-\-colors 'line:none' \\ + \-\-colors 'column:none' \\ + \-\-colors 'match:none' +.EE +.sp +" + } + fn doc_choices(&self) -> &'static [&'static str] { + &["never", "auto", "always", "ansi"] + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.color = match convert::str(&v.unwrap_value())? { + "never" => ColorChoice::Never, + "auto" => ColorChoice::Auto, + "always" => ColorChoice::Always, + "ansi" => ColorChoice::Ansi, + unk => anyhow::bail!("choice '{unk}' is unrecognized"), + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_color() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(ColorChoice::Auto, args.color); + + let args = parse_low_raw(["--color", "never"]).unwrap(); + assert_eq!(ColorChoice::Never, args.color); + + let args = parse_low_raw(["--color", "auto"]).unwrap(); + assert_eq!(ColorChoice::Auto, args.color); + + let args = parse_low_raw(["--color", "always"]).unwrap(); + assert_eq!(ColorChoice::Always, args.color); + + let args = parse_low_raw(["--color", "ansi"]).unwrap(); + assert_eq!(ColorChoice::Ansi, args.color); + + let args = parse_low_raw(["--color=never"]).unwrap(); + assert_eq!(ColorChoice::Never, args.color); + + let args = + parse_low_raw(["--color", "always", "--color", "never"]).unwrap(); + assert_eq!(ColorChoice::Never, args.color); + + let args = + parse_low_raw(["--color", "never", "--color", "always"]).unwrap(); + assert_eq!(ColorChoice::Always, args.color); + + let result = parse_low_raw(["--color", "foofoo"]); + assert!(result.is_err(), "{result:?}"); + + let result = parse_low_raw(["--color", "Always"]); + assert!(result.is_err(), "{result:?}"); +} + +/// --colors +#[derive(Debug)] +struct Colors; + +impl Flag for Colors { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "colors" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("COLOR_SPEC") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + "Configure color settings and styles." + } + fn doc_long(&self) -> &'static str { + r" +This flag specifies color settings for use in the output. This flag may be +provided multiple times. Settings are applied iteratively. Pre-existing color +labels are limited to one of eight choices: \fBred\fP, \fBblue\fP, \fBgreen\fP, +\fBcyan\fP, \fBmagenta\fP, \fByellow\fP, \fBwhite\fP and \fBblack\fP. Styles +are limited to \fBnobold\fP, \fBbold\fP, \fBnointense\fP, \fBintense\fP, +\fBnounderline\fP or \fBunderline\fP. +.sp +The format of the flag is +\fB{\fP\fItype\fP\fB}:{\fP\fIattribute\fP\fB}:{\fP\fIvalue\fP\fB}\fP. +\fItype\fP should be one of \fBpath\fP, \fBline\fP, \fBcolumn\fP or +\fBmatch\fP. \fIattribute\fP can be \fBfg\fP, \fBbg\fP or \fBstyle\fP. +\fIvalue\fP is either a color (for \fBfg\fP and \fBbg\fP) or a text style. A +special format, \fB{\fP\fItype\fP\fB}:none\fP, will clear all color settings +for \fItype\fP. +.sp +For example, the following command will change the match color to magenta and +the background color for line numbers to yellow: +.sp +.EX + rg \-\-colors 'match:fg:magenta' \-\-colors 'line:bg:yellow' +.EE +.sp +Extended colors can be used for \fIvalue\fP when the tty supports ANSI color +sequences. These are specified as either \fIx\fP (256-color) or +.IB x , x , x +(24-bit truecolor) where \fIx\fP is a number between \fB0\fP and \fB255\fP +inclusive. \fIx\fP may be given as a normal decimal number or a hexadecimal +number, which is prefixed by \fB0x\fP. +.sp +For example, the following command will change the match background color to +that represented by the rgb value (0,128,255): +.sp +.EX + rg \-\-colors 'match:bg:0,128,255' +.EE +.sp +or, equivalently, +.sp +.EX + rg \-\-colors 'match:bg:0x0,0x80,0xFF' +.EE +.sp +Note that the \fBintense\fP and \fBnointense\fP styles will have no effect when +used alongside these extended color codes. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let v = v.unwrap_value(); + let v = convert::str(&v)?; + args.colors.push(v.parse()?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_colors() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert!(args.colors.is_empty()); + + let args = parse_low_raw(["--colors", "match:fg:magenta"]).unwrap(); + assert_eq!(args.colors, vec!["match:fg:magenta".parse().unwrap()]); + + let args = parse_low_raw([ + "--colors", + "match:fg:magenta", + "--colors", + "line:bg:yellow", + ]) + .unwrap(); + assert_eq!( + args.colors, + vec![ + "match:fg:magenta".parse().unwrap(), + "line:bg:yellow".parse().unwrap() + ] + ); +} + +/// --column +#[derive(Debug)] +struct Column; + +impl Flag for Column { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "column" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-column") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + "Show column numbers." + } + fn doc_long(&self) -> &'static str { + r" +Show column numbers (1-based). This only shows the column numbers for the first +match on each line. This does not try to account for Unicode. One byte is equal +to one column. This implies \flag{line-number}. +.sp +When \flag{only-matching} is used, then the column numbers written correspond +to the start of each match. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.column = Some(v.unwrap_switch()); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_column() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.column); + + let args = parse_low_raw(["--column"]).unwrap(); + assert_eq!(Some(true), args.column); + + let args = parse_low_raw(["--column", "--no-column"]).unwrap(); + assert_eq!(Some(false), args.column); + + let args = parse_low_raw(["--no-column", "--column"]).unwrap(); + assert_eq!(Some(true), args.column); +} + +/// -C/--context +#[derive(Debug)] +struct Context; + +impl Flag for Context { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'C') + } + fn name_long(&self) -> &'static str { + "context" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Show NUM lines before and after each match." + } + fn doc_long(&self) -> &'static str { + r" +Show \fINUM\fP lines before and after each match. This is equivalent to +providing both the \flag{before-context} and \flag{after-context} flags with +the same value. +.sp +This overrides the \flag{passthru} flag. The \flag{after-context} and +\flag{before-context} flags both partially override this flag, regardless of +the order. For example, \fB\-A2 \-C1\fP is equivalent to \fB\-A2 \-B1\fP. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.context.set_both(convert::usize(&v.unwrap_value())?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_context() { + let mkctx = |lines| { + let mut mode = ContextMode::default(); + mode.set_both(lines); + mode + }; + + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(ContextMode::default(), args.context); + + let args = parse_low_raw(["--context", "5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["--context=5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["-C", "5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["-C5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse_low_raw(["-C5", "-C10"]).unwrap(); + assert_eq!(mkctx(10), args.context); + + let args = parse_low_raw(["-C5", "-C0"]).unwrap(); + assert_eq!(mkctx(0), args.context); + + let args = parse_low_raw(["-C5", "--passthru"]).unwrap(); + assert_eq!(ContextMode::Passthru, args.context); + + let args = parse_low_raw(["--passthru", "-C5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let n = usize::MAX.to_string(); + let args = parse_low_raw(["--context", n.as_str()]).unwrap(); + assert_eq!(mkctx(usize::MAX), args.context); + + #[cfg(target_pointer_width = "64")] + { + let n = (u128::from(u64::MAX) + 1).to_string(); + let result = parse_low_raw(["--context", n.as_str()]); + assert!(result.is_err(), "{result:?}"); + } + + // Test the interaction between -A/-B and -C. Basically, -A/-B always + // partially overrides -C, regardless of where they appear relative to + // each other. This behavior is also how GNU grep works, and it also makes + // logical sense to me: -A/-B are the more specific flags. + let args = parse_low_raw(["-A1", "-C5"]).unwrap(); + let mut mode = ContextMode::default(); + mode.set_after(1); + mode.set_both(5); + assert_eq!(mode, args.context); + assert_eq!((5, 1), args.context.get_limited()); + + let args = parse_low_raw(["-B1", "-C5"]).unwrap(); + let mut mode = ContextMode::default(); + mode.set_before(1); + mode.set_both(5); + assert_eq!(mode, args.context); + assert_eq!((1, 5), args.context.get_limited()); + + let args = parse_low_raw(["-A1", "-B2", "-C5"]).unwrap(); + let mut mode = ContextMode::default(); + mode.set_before(2); + mode.set_after(1); + mode.set_both(5); + assert_eq!(mode, args.context); + assert_eq!((2, 1), args.context.get_limited()); + + // These next three are like the ones above, but with -C before -A/-B. This + // tests that -A and -B only partially override -C. That is, -C1 -A2 is + // equivalent to -B1 -A2. + let args = parse_low_raw(["-C5", "-A1"]).unwrap(); + let mut mode = ContextMode::default(); + mode.set_after(1); + mode.set_both(5); + assert_eq!(mode, args.context); + assert_eq!((5, 1), args.context.get_limited()); + + let args = parse_low_raw(["-C5", "-B1"]).unwrap(); + let mut mode = ContextMode::default(); + mode.set_before(1); + mode.set_both(5); + assert_eq!(mode, args.context); + assert_eq!((1, 5), args.context.get_limited()); + + let args = parse_low_raw(["-C5", "-A1", "-B2"]).unwrap(); + let mut mode = ContextMode::default(); + mode.set_before(2); + mode.set_after(1); + mode.set_both(5); + assert_eq!(mode, args.context); + assert_eq!((2, 1), args.context.get_limited()); +} + +/// --context-separator +#[derive(Debug)] +struct ContextSeparator; + +impl Flag for ContextSeparator { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "context-separator" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-context-separator") + } + fn doc_variable(&self) -> Option<&'static str> { + Some("SEPARATOR") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Set the separator for contextual chunks." + } + fn doc_long(&self) -> &'static str { + r" +The string used to separate non-contiguous context lines in the output. This is +only used when one of the context flags is used (that is, \flag{after-context}, +\flag{before-context} or \flag{context}). Escape sequences like \fB\\x7F\fP or +\fB\\t\fP may be used. The default value is \fB\-\-\fP. +.sp +When the context separator is set to an empty string, then a line break +is still inserted. To completely disable context separators, use the +\flag-negate{context-separator} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + use crate::flags::lowargs::ContextSeparator as Separator; + + args.context_separator = match v { + FlagValue::Switch(true) => { + unreachable!("flag can only be disabled") + } + FlagValue::Switch(false) => Separator::disabled(), + FlagValue::Value(v) => Separator::new(&v)?, + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_context_separator() { + use bstr::BString; + + use crate::flags::lowargs::ContextSeparator as Separator; + + let getbytes = |ctxsep: Separator| ctxsep.into_bytes().map(BString::from); + + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Some(BString::from("--")), getbytes(args.context_separator)); + + let args = parse_low_raw(["--context-separator", "XYZ"]).unwrap(); + assert_eq!(Some(BString::from("XYZ")), getbytes(args.context_separator)); + + let args = parse_low_raw(["--no-context-separator"]).unwrap(); + assert_eq!(None, getbytes(args.context_separator)); + + let args = parse_low_raw([ + "--context-separator", + "XYZ", + "--no-context-separator", + ]) + .unwrap(); + assert_eq!(None, getbytes(args.context_separator)); + + let args = parse_low_raw([ + "--no-context-separator", + "--context-separator", + "XYZ", + ]) + .unwrap(); + assert_eq!(Some(BString::from("XYZ")), getbytes(args.context_separator)); + + // This checks that invalid UTF-8 can be used. This case isn't too tricky + // to handle, because it passes the invalid UTF-8 as an escape sequence + // that is itself valid UTF-8. It doesn't become invalid UTF-8 until after + // the argument is parsed and then unescaped. + let args = parse_low_raw(["--context-separator", r"\xFF"]).unwrap(); + assert_eq!(Some(BString::from(b"\xFF")), getbytes(args.context_separator)); + + // In this case, we specifically try to pass an invalid UTF-8 argument to + // the flag. In theory we might be able to support this, but because we do + // unescaping and because unescaping wants valid UTF-8, we do a UTF-8 check + // on the value. Since we pass invalid UTF-8, it fails. This demonstrates + // that the only way to use an invalid UTF-8 separator is by specifying an + // escape sequence that is itself valid UTF-8. + #[cfg(unix)] + { + use std::{ffi::OsStr, os::unix::ffi::OsStrExt}; + + let result = parse_low_raw([ + OsStr::from_bytes(b"--context-separator"), + OsStr::from_bytes(&[0xFF]), + ]); + assert!(result.is_err(), "{result:?}"); + } +} + +/// -c/--count +#[derive(Debug)] +struct Count; + +impl Flag for Count { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'c') + } + fn name_long(&self) -> &'static str { + "count" + } + fn doc_category(&self) -> Category { + Category::OutputModes + } + fn doc_short(&self) -> &'static str { + r"Show count of matching lines for each file." + } + fn doc_long(&self) -> &'static str { + r" +This flag suppresses normal output and shows the number of lines that match the +given patterns for each file searched. Each file containing a match has its +path and count printed on each line. Note that unless \flag{multiline} +is enabled, this reports the number of lines that match and not the total +number of matches. In multiline mode, \flag{count} is equivalent to +\flag{count-matches}. +.sp +If only one file is given to ripgrep, then only the count is printed if there +is a match. The \flag{with-filename} flag can be used to force printing the +file path in this case. If you need a count to be printed regardless of whether +there is a match, then use \flag{include-zero}. +.sp +This overrides the \flag{count-matches} flag. Note that when \flag{count} +is combined with \flag{only-matching}, then ripgrep behaves as if +\flag{count-matches} was given. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--count can only be enabled"); + args.mode.update(Mode::Search(SearchMode::Count)); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_count() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Mode::Search(SearchMode::Standard), args.mode); + + let args = parse_low_raw(["--count"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::Count), args.mode); + + let args = parse_low_raw(["-c"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::Count), args.mode); + + let args = parse_low_raw(["--count-matches", "--count"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::Count), args.mode); + + let args = parse_low_raw(["--count-matches", "-c"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::Count), args.mode); +} + +/// --count-matches +#[derive(Debug)] +struct CountMatches; + +impl Flag for CountMatches { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "count-matches" + } + fn doc_variable(&self) -> Option<&'static str> { + None + } + fn doc_category(&self) -> Category { + Category::OutputModes + } + fn doc_short(&self) -> &'static str { + r"Show count of every match for each file." + } + fn doc_long(&self) -> &'static str { + r" +This flag suppresses normal output and shows the number of individual matches +of the given patterns for each file searched. Each file containing matches has +its path and match count printed on each line. Note that this reports the total +number of individual matches and not the number of lines that match. +.sp +If only one file is given to ripgrep, then only the count is printed if there +is a match. The \flag{with-filename} flag can be used to force printing the +file path in this case. +.sp +This overrides the \flag{count} flag. Note that when \flag{count} is combined +with \flag{only-matching}, then ripgrep behaves as if \flag{count-matches} was +given. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--count-matches can only be enabled"); + args.mode.update(Mode::Search(SearchMode::CountMatches)); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_count_matches() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Mode::Search(SearchMode::Standard), args.mode); + + let args = parse_low_raw(["--count-matches"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::CountMatches), args.mode); + + let args = parse_low_raw(["--count", "--count-matches"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::CountMatches), args.mode); + + let args = parse_low_raw(["-c", "--count-matches"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::CountMatches), args.mode); +} + +/// --crlf +#[derive(Debug)] +struct Crlf; + +impl Flag for Crlf { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "crlf" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-crlf") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Use CRLF line terminators (nice for Windows)." + } + fn doc_long(&self) -> &'static str { + r" +When enabled, ripgrep will treat CRLF (\fB\\r\\n\fP) as a line terminator +instead of just \fB\\n\fP. +.sp +Principally, this permits the line anchor assertions \fB^\fP and \fB$\fP in +regex patterns to treat CRLF, CR or LF as line terminators instead of just LF. +Note that they will never match between a CR and a LF. CRLF is treated as one +single line terminator. +.sp +When using the default regex engine, CRLF support can also be enabled inside +the pattern with the \fBR\fP flag. For example, \fB(?R:$)\fP will match just +before either CR or LF, but never between CR and LF. +.sp +This flag overrides \flag{null-data}. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.crlf = v.unwrap_switch(); + if args.crlf { + args.null_data = false; + } + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_crlf() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.crlf); + + let args = parse_low_raw(["--crlf"]).unwrap(); + assert_eq!(true, args.crlf); + assert_eq!(false, args.null_data); + + let args = parse_low_raw(["--crlf", "--null-data"]).unwrap(); + assert_eq!(false, args.crlf); + assert_eq!(true, args.null_data); + + let args = parse_low_raw(["--null-data", "--crlf"]).unwrap(); + assert_eq!(true, args.crlf); + assert_eq!(false, args.null_data); + + let args = parse_low_raw(["--null-data", "--no-crlf"]).unwrap(); + assert_eq!(false, args.crlf); + assert_eq!(true, args.null_data); + + let args = parse_low_raw(["--null-data", "--crlf", "--no-crlf"]).unwrap(); + assert_eq!(false, args.crlf); + assert_eq!(false, args.null_data); +} + +/// --debug +#[derive(Debug)] +struct Debug; + +impl Flag for Debug { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "debug" + } + fn doc_category(&self) -> Category { + Category::Logging + } + fn doc_short(&self) -> &'static str { + r"Show debug messages." + } + fn doc_long(&self) -> &'static str { + r" +Show debug messages. Please use this when filing a bug report. +.sp +The \flag{debug} flag is generally useful for figuring out why ripgrep skipped +searching a particular file. The debug messages should mention all files +skipped and why they were skipped. +.sp +To get even more debug output, use the \flag{trace} flag, which implies +\flag{debug} along with additional trace data. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--debug can only be enabled"); + args.logging = Some(LoggingMode::Debug); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_debug() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.logging); + + let args = parse_low_raw(["--debug"]).unwrap(); + assert_eq!(Some(LoggingMode::Debug), args.logging); + + let args = parse_low_raw(["--trace", "--debug"]).unwrap(); + assert_eq!(Some(LoggingMode::Debug), args.logging); +} + +/// --dfa-size-limit +#[derive(Debug)] +struct DfaSizeLimit; + +impl Flag for DfaSizeLimit { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "dfa-size-limit" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM+SUFFIX?") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"The upper size limit of the regex DFA." + } + fn doc_long(&self) -> &'static str { + r" +The upper size limit of the regex DFA. The default limit is something generous +for any single pattern or for many smallish patterns. This should only be +changed on very large regex inputs where the (slower) fallback regex engine may +otherwise be used if the limit is reached. +.sp +The input format accepts suffixes of \fBK\fP, \fBM\fP or \fBG\fP which +correspond to kilobytes, megabytes and gigabytes, respectively. If no suffix is +provided the input is treated as bytes. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let v = v.unwrap_value(); + args.dfa_size_limit = Some(convert::human_readable_usize(&v)?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_dfa_size_limit() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.dfa_size_limit); + + #[cfg(target_pointer_width = "64")] + { + let args = parse_low_raw(["--dfa-size-limit", "9G"]).unwrap(); + assert_eq!(Some(9 * (1 << 30)), args.dfa_size_limit); + + let args = parse_low_raw(["--dfa-size-limit=9G"]).unwrap(); + assert_eq!(Some(9 * (1 << 30)), args.dfa_size_limit); + + let args = + parse_low_raw(["--dfa-size-limit=9G", "--dfa-size-limit=0"]) + .unwrap(); + assert_eq!(Some(0), args.dfa_size_limit); + } + + let args = parse_low_raw(["--dfa-size-limit=0K"]).unwrap(); + assert_eq!(Some(0), args.dfa_size_limit); + + let args = parse_low_raw(["--dfa-size-limit=0M"]).unwrap(); + assert_eq!(Some(0), args.dfa_size_limit); + + let args = parse_low_raw(["--dfa-size-limit=0G"]).unwrap(); + assert_eq!(Some(0), args.dfa_size_limit); + + let result = parse_low_raw(["--dfa-size-limit", "9999999999999999999999"]); + assert!(result.is_err(), "{result:?}"); + + let result = parse_low_raw(["--dfa-size-limit", "9999999999999999G"]); + assert!(result.is_err(), "{result:?}"); +} + +/// -E/--encoding +#[derive(Debug)] +struct Encoding; + +impl Flag for Encoding { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'E') + } + fn name_long(&self) -> &'static str { + "encoding" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-encoding") + } + fn doc_variable(&self) -> Option<&'static str> { + Some("ENCODING") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Specify the text encoding of files to search." + } + fn doc_long(&self) -> &'static str { + r" +Specify the text encoding that ripgrep will use on all files searched. The +default value is \fBauto\fP, which will cause ripgrep to do a best effort +automatic detection of encoding on a per-file basis. Automatic detection in +this case only applies to files that begin with a UTF-8 or UTF-16 byte-order +mark (BOM). No other automatic detection is performed. One can also specify +\fBnone\fP which will then completely disable BOM sniffing and always result +in searching the raw bytes, including a BOM if it's present, regardless of its +encoding. +.sp +Other supported values can be found in the list of labels here: +\fIhttps://encoding.spec.whatwg.org/#concept-encoding-get\fP. +.sp +For more details on encoding and how ripgrep deals with it, see \fBGUIDE.md\fP. +.sp +The encoding detection that ripgrep uses can be reverted to its automatic mode +via the \flag-negate{encoding} flag. +" + } + fn completion_type(&self) -> CompletionType { + CompletionType::Encoding + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let value = match v { + FlagValue::Value(v) => v, + FlagValue::Switch(true) => { + unreachable!("--encoding must accept a value") + } + FlagValue::Switch(false) => { + args.encoding = EncodingMode::Auto; + return Ok(()); + } + }; + let label = convert::str(&value)?; + args.encoding = match label { + "auto" => EncodingMode::Auto, + "none" => EncodingMode::Disabled, + _ => EncodingMode::Some(grep::searcher::Encoding::new(label)?), + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_encoding() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(EncodingMode::Auto, args.encoding); + + let args = parse_low_raw(["--encoding", "auto"]).unwrap(); + assert_eq!(EncodingMode::Auto, args.encoding); + + let args = parse_low_raw(["--encoding", "none"]).unwrap(); + assert_eq!(EncodingMode::Disabled, args.encoding); + + let args = parse_low_raw(["--encoding=none"]).unwrap(); + assert_eq!(EncodingMode::Disabled, args.encoding); + + let args = parse_low_raw(["-E", "none"]).unwrap(); + assert_eq!(EncodingMode::Disabled, args.encoding); + + let args = parse_low_raw(["-Enone"]).unwrap(); + assert_eq!(EncodingMode::Disabled, args.encoding); + + let args = parse_low_raw(["-E", "none", "--no-encoding"]).unwrap(); + assert_eq!(EncodingMode::Auto, args.encoding); + + let args = parse_low_raw(["--no-encoding", "-E", "none"]).unwrap(); + assert_eq!(EncodingMode::Disabled, args.encoding); + + let args = parse_low_raw(["-E", "utf-16"]).unwrap(); + let enc = grep::searcher::Encoding::new("utf-16").unwrap(); + assert_eq!(EncodingMode::Some(enc), args.encoding); + + let args = parse_low_raw(["-E", "utf-16", "--no-encoding"]).unwrap(); + assert_eq!(EncodingMode::Auto, args.encoding); + + let result = parse_low_raw(["-E", "foo"]); + assert!(result.is_err(), "{result:?}"); +} + +/// --engine +#[derive(Debug)] +struct Engine; + +impl Flag for Engine { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "engine" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("ENGINE") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Specify which regex engine to use." + } + fn doc_long(&self) -> &'static str { + r" +Specify which regular expression engine to use. When you choose a regex engine, +it applies that choice for every regex provided to ripgrep (e.g., via multiple +\flag{regexp} or \flag{file} flags). +.sp +Accepted values are \fBdefault\fP, \fBpcre2\fP, or \fBauto\fP. +.sp +The default value is \fBdefault\fP, which is usually the fastest and should be +good for most use cases. The \fBpcre2\fP engine is generally useful when you +want to use features such as look-around or backreferences. \fBauto\fP will +dynamically choose between supported regex engines depending on the features +used in a pattern on a best effort basis. +.sp +Note that the \fBpcre2\fP engine is an optional ripgrep feature. If PCRE2 +wasn't included in your build of ripgrep, then using this flag will result in +ripgrep printing an error message and exiting. +.sp +This overrides previous uses of the \flag{pcre2} and \flag{auto-hybrid-regex} +flags. +" + } + fn doc_choices(&self) -> &'static [&'static str] { + &["default", "pcre2", "auto"] + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let v = v.unwrap_value(); + let string = convert::str(&v)?; + args.engine = match string { + "default" => EngineChoice::Default, + "pcre2" => EngineChoice::PCRE2, + "auto" => EngineChoice::Auto, + _ => anyhow::bail!("unrecognized regex engine '{string}'"), + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_engine() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); + + let args = parse_low_raw(["--engine", "pcre2"]).unwrap(); + assert_eq!(EngineChoice::PCRE2, args.engine); + + let args = parse_low_raw(["--engine=pcre2"]).unwrap(); + assert_eq!(EngineChoice::PCRE2, args.engine); + + let args = + parse_low_raw(["--auto-hybrid-regex", "--engine=pcre2"]).unwrap(); + assert_eq!(EngineChoice::PCRE2, args.engine); + + let args = + parse_low_raw(["--engine=pcre2", "--auto-hybrid-regex"]).unwrap(); + assert_eq!(EngineChoice::Auto, args.engine); + + let args = + parse_low_raw(["--auto-hybrid-regex", "--engine=auto"]).unwrap(); + assert_eq!(EngineChoice::Auto, args.engine); + + let args = + parse_low_raw(["--auto-hybrid-regex", "--engine=default"]).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); + + let args = + parse_low_raw(["--engine=pcre2", "--no-auto-hybrid-regex"]).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); +} + +/// --field-context-separator +#[derive(Debug)] +struct FieldContextSeparator; + +impl Flag for FieldContextSeparator { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "field-context-separator" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("SEPARATOR") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Set the field context separator." + } + fn doc_long(&self) -> &'static str { + r" +Set the field context separator. This separator is only used when printing +contextual lines. It is used to delimit file paths, line numbers, columns and +the contextual line itself. The separator may be any number of bytes, including +zero. Escape sequences like \fB\\x7F\fP or \fB\\t\fP may be used. +.sp +The \fB-\fP character is the default value. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + use crate::flags::lowargs::FieldContextSeparator as Separator; + + args.field_context_separator = Separator::new(&v.unwrap_value())?; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_field_context_separator() { + use bstr::BString; + + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(BString::from("-"), args.field_context_separator.into_bytes()); + + let args = parse_low_raw(["--field-context-separator", "XYZ"]).unwrap(); + assert_eq!( + BString::from("XYZ"), + args.field_context_separator.into_bytes() + ); + + let args = parse_low_raw(["--field-context-separator=XYZ"]).unwrap(); + assert_eq!( + BString::from("XYZ"), + args.field_context_separator.into_bytes() + ); + + let args = parse_low_raw([ + "--field-context-separator", + "XYZ", + "--field-context-separator", + "ABC", + ]) + .unwrap(); + assert_eq!( + BString::from("ABC"), + args.field_context_separator.into_bytes() + ); + + let args = parse_low_raw(["--field-context-separator", r"\t"]).unwrap(); + assert_eq!(BString::from("\t"), args.field_context_separator.into_bytes()); + + let args = parse_low_raw(["--field-context-separator", r"\x00"]).unwrap(); + assert_eq!( + BString::from("\x00"), + args.field_context_separator.into_bytes() + ); + + // This checks that invalid UTF-8 can be used. This case isn't too tricky + // to handle, because it passes the invalid UTF-8 as an escape sequence + // that is itself valid UTF-8. It doesn't become invalid UTF-8 until after + // the argument is parsed and then unescaped. + let args = parse_low_raw(["--field-context-separator", r"\xFF"]).unwrap(); + assert_eq!( + BString::from(b"\xFF"), + args.field_context_separator.into_bytes() + ); + + // In this case, we specifically try to pass an invalid UTF-8 argument to + // the flag. In theory we might be able to support this, but because we do + // unescaping and because unescaping wants valid UTF-8, we do a UTF-8 check + // on the value. Since we pass invalid UTF-8, it fails. This demonstrates + // that the only way to use an invalid UTF-8 separator is by specifying an + // escape sequence that is itself valid UTF-8. + #[cfg(unix)] + { + use std::{ffi::OsStr, os::unix::ffi::OsStrExt}; + + let result = parse_low_raw([ + OsStr::from_bytes(b"--field-context-separator"), + OsStr::from_bytes(&[0xFF]), + ]); + assert!(result.is_err(), "{result:?}"); + } +} + +/// --field-match-separator +#[derive(Debug)] +struct FieldMatchSeparator; + +impl Flag for FieldMatchSeparator { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "field-match-separator" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("SEPARATOR") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Set the field match separator." + } + fn doc_long(&self) -> &'static str { + r" +Set the field match separator. This separator is only used when printing +matching lines. It is used to delimit file paths, line numbers, columns and the +matching line itself. The separator may be any number of bytes, including zero. +Escape sequences like \fB\\x7F\fP or \fB\\t\fP may be used. +.sp +The \fB:\fP character is the default value. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + use crate::flags::lowargs::FieldMatchSeparator as Separator; + + args.field_match_separator = Separator::new(&v.unwrap_value())?; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_field_match_separator() { + use bstr::BString; + + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(BString::from(":"), args.field_match_separator.into_bytes()); + + let args = parse_low_raw(["--field-match-separator", "XYZ"]).unwrap(); + assert_eq!(BString::from("XYZ"), args.field_match_separator.into_bytes()); + + let args = parse_low_raw(["--field-match-separator=XYZ"]).unwrap(); + assert_eq!(BString::from("XYZ"), args.field_match_separator.into_bytes()); + + let args = parse_low_raw([ + "--field-match-separator", + "XYZ", + "--field-match-separator", + "ABC", + ]) + .unwrap(); + assert_eq!(BString::from("ABC"), args.field_match_separator.into_bytes()); + + let args = parse_low_raw(["--field-match-separator", r"\t"]).unwrap(); + assert_eq!(BString::from("\t"), args.field_match_separator.into_bytes()); + + let args = parse_low_raw(["--field-match-separator", r"\x00"]).unwrap(); + assert_eq!(BString::from("\x00"), args.field_match_separator.into_bytes()); + + // This checks that invalid UTF-8 can be used. This case isn't too tricky + // to handle, because it passes the invalid UTF-8 as an escape sequence + // that is itself valid UTF-8. It doesn't become invalid UTF-8 until after + // the argument is parsed and then unescaped. + let args = parse_low_raw(["--field-match-separator", r"\xFF"]).unwrap(); + assert_eq!( + BString::from(b"\xFF"), + args.field_match_separator.into_bytes() + ); + + // In this case, we specifically try to pass an invalid UTF-8 argument to + // the flag. In theory we might be able to support this, but because we do + // unescaping and because unescaping wants valid UTF-8, we do a UTF-8 check + // on the value. Since we pass invalid UTF-8, it fails. This demonstrates + // that the only way to use an invalid UTF-8 separator is by specifying an + // escape sequence that is itself valid UTF-8. + #[cfg(unix)] + { + use std::{ffi::OsStr, os::unix::ffi::OsStrExt}; + + let result = parse_low_raw([ + OsStr::from_bytes(b"--field-match-separator"), + OsStr::from_bytes(&[0xFF]), + ]); + assert!(result.is_err(), "{result:?}"); + } +} + +/// -f/--file +#[derive(Debug)] +struct File; + +impl Flag for File { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'f') + } + fn name_long(&self) -> &'static str { + "file" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("PATTERNFILE") + } + fn doc_category(&self) -> Category { + Category::Input + } + fn doc_short(&self) -> &'static str { + r"Search for patterns from the given file." + } + fn doc_long(&self) -> &'static str { + r" +Search for patterns from the given file, with one pattern per line. When this +flag is used multiple times or in combination with the \flag{regexp} flag, then +all patterns provided are searched. Empty pattern lines will match all input +lines, and the newline is not counted as part of the pattern. +.sp +A line is printed if and only if it matches at least one of the patterns. +.sp +When \fIPATTERNFILE\fP is \fB-\fP, then \fBstdin\fP will be read for the +patterns. +.sp +When \flag{file} or \flag{regexp} is used, then ripgrep treats all positional +arguments as files or directories to search. +" + } + fn completion_type(&self) -> CompletionType { + CompletionType::Filename + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let path = PathBuf::from(v.unwrap_value()); + args.patterns.push(PatternSource::File(path)); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_file() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.patterns); + + let args = parse_low_raw(["--file", "foo"]).unwrap(); + assert_eq!(vec![PatternSource::File(PathBuf::from("foo"))], args.patterns); + + let args = parse_low_raw(["--file=foo"]).unwrap(); + assert_eq!(vec![PatternSource::File(PathBuf::from("foo"))], args.patterns); + + let args = parse_low_raw(["-f", "foo"]).unwrap(); + assert_eq!(vec![PatternSource::File(PathBuf::from("foo"))], args.patterns); + + let args = parse_low_raw(["-ffoo"]).unwrap(); + assert_eq!(vec![PatternSource::File(PathBuf::from("foo"))], args.patterns); + + let args = parse_low_raw(["--file", "-foo"]).unwrap(); + assert_eq!( + vec![PatternSource::File(PathBuf::from("-foo"))], + args.patterns + ); + + let args = parse_low_raw(["--file=-foo"]).unwrap(); + assert_eq!( + vec![PatternSource::File(PathBuf::from("-foo"))], + args.patterns + ); + + let args = parse_low_raw(["-f", "-foo"]).unwrap(); + assert_eq!( + vec![PatternSource::File(PathBuf::from("-foo"))], + args.patterns + ); + + let args = parse_low_raw(["-f-foo"]).unwrap(); + assert_eq!( + vec![PatternSource::File(PathBuf::from("-foo"))], + args.patterns + ); + + let args = parse_low_raw(["--file=foo", "--file", "bar"]).unwrap(); + assert_eq!( + vec![ + PatternSource::File(PathBuf::from("foo")), + PatternSource::File(PathBuf::from("bar")) + ], + args.patterns + ); + + // We permit path arguments to be invalid UTF-8. So test that. Some of + // these cases are tricky and depend on lexopt doing the right thing. + // + // We probably should add tests for this handling on Windows too, but paths + // that are invalid UTF-16 appear incredibly rare in the Windows world. + #[cfg(unix)] + { + use std::{ + ffi::{OsStr, OsString}, + os::unix::ffi::{OsStrExt, OsStringExt}, + }; + + let bytes = &[b'A', 0xFF, b'Z'][..]; + let path = PathBuf::from(OsString::from_vec(bytes.to_vec())); + + let args = parse_low_raw([ + OsStr::from_bytes(b"--file"), + OsStr::from_bytes(bytes), + ]) + .unwrap(); + assert_eq!(vec![PatternSource::File(path.clone())], args.patterns); + + let args = parse_low_raw([ + OsStr::from_bytes(b"-f"), + OsStr::from_bytes(bytes), + ]) + .unwrap(); + assert_eq!(vec![PatternSource::File(path.clone())], args.patterns); + + let mut bytes = b"--file=A".to_vec(); + bytes.push(0xFF); + bytes.push(b'Z'); + let args = parse_low_raw([OsStr::from_bytes(&bytes)]).unwrap(); + assert_eq!(vec![PatternSource::File(path.clone())], args.patterns); + + let mut bytes = b"-fA".to_vec(); + bytes.push(0xFF); + bytes.push(b'Z'); + let args = parse_low_raw([OsStr::from_bytes(&bytes)]).unwrap(); + assert_eq!(vec![PatternSource::File(path.clone())], args.patterns); + } +} + +/// --files +#[derive(Debug)] +struct Files; + +impl Flag for Files { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "files" + } + fn doc_category(&self) -> Category { + Category::OtherBehaviors + } + fn doc_short(&self) -> &'static str { + r"Print each file that would be searched." + } + fn doc_long(&self) -> &'static str { + r" +Print each file that would be searched without actually performing the search. +This is useful to determine whether a particular file is being searched or not. +.sp +This overrides \flag{type-list}. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch()); + args.mode.update(Mode::Files); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_files() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Mode::Search(SearchMode::Standard), args.mode); + + let args = parse_low_raw(["--files"]).unwrap(); + assert_eq!(Mode::Files, args.mode); +} + +/// -l/--files-with-matches +#[derive(Debug)] +struct FilesWithMatches; + +impl Flag for FilesWithMatches { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'l') + } + fn name_long(&self) -> &'static str { + "files-with-matches" + } + fn doc_category(&self) -> Category { + Category::OutputModes + } + fn doc_short(&self) -> &'static str { + r"Print the paths with at least one match." + } + fn doc_long(&self) -> &'static str { + r" +Print only the paths with at least one match and suppress match contents. +.sp +This overrides \flag{files-without-match}. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--files-with-matches can only be enabled"); + args.mode.update(Mode::Search(SearchMode::FilesWithMatches)); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_files_with_matches() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Mode::Search(SearchMode::Standard), args.mode); + + let args = parse_low_raw(["--files-with-matches"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::FilesWithMatches), args.mode); + + let args = parse_low_raw(["-l"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::FilesWithMatches), args.mode); +} + +/// -l/--files-without-match +#[derive(Debug)] +struct FilesWithoutMatch; + +impl Flag for FilesWithoutMatch { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "files-without-match" + } + fn doc_category(&self) -> Category { + Category::OutputModes + } + fn doc_short(&self) -> &'static str { + r"Print the paths that contain zero matches." + } + fn doc_long(&self) -> &'static str { + r" +Print the paths that contain zero matches and suppress match contents. +.sp +This overrides \flag{files-with-matches}. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!( + v.unwrap_switch(), + "--files-without-match can only be enabled" + ); + args.mode.update(Mode::Search(SearchMode::FilesWithoutMatch)); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_files_without_match() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Mode::Search(SearchMode::Standard), args.mode); + + let args = parse_low_raw(["--files-without-match"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::FilesWithoutMatch), args.mode); + + let args = + parse_low_raw(["--files-with-matches", "--files-without-match"]) + .unwrap(); + assert_eq!(Mode::Search(SearchMode::FilesWithoutMatch), args.mode); + + let args = + parse_low_raw(["--files-without-match", "--files-with-matches"]) + .unwrap(); + assert_eq!(Mode::Search(SearchMode::FilesWithMatches), args.mode); +} + +/// -F/--fixed-strings +#[derive(Debug)] +struct FixedStrings; + +impl Flag for FixedStrings { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'F') + } + fn name_long(&self) -> &'static str { + "fixed-strings" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-fixed-strings") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Treat all patterns as literals." + } + fn doc_long(&self) -> &'static str { + r" +Treat all patterns as literals instead of as regular expressions. When this +flag is used, special regular expression meta characters such as \fB.(){}*+\fP +should not need be escaped. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.fixed_strings = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_fixed_strings() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.fixed_strings); + + let args = parse_low_raw(["--fixed-strings"]).unwrap(); + assert_eq!(true, args.fixed_strings); + + let args = parse_low_raw(["-F"]).unwrap(); + assert_eq!(true, args.fixed_strings); + + let args = parse_low_raw(["-F", "--no-fixed-strings"]).unwrap(); + assert_eq!(false, args.fixed_strings); + + let args = parse_low_raw(["--no-fixed-strings", "-F"]).unwrap(); + assert_eq!(true, args.fixed_strings); +} + +/// -L/--follow +#[derive(Debug)] +struct Follow; + +impl Flag for Follow { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'L') + } + fn name_long(&self) -> &'static str { + "follow" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-follow") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Follow symbolic links." + } + fn doc_long(&self) -> &'static str { + r" +This flag instructs ripgrep to follow symbolic links while traversing +directories. This behavior is disabled by default. Note that ripgrep will +check for symbolic link loops and report errors if it finds one. ripgrep will +also report errors for broken links. To suppress error messages, use the +\flag{no-messages} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.follow = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_follow() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.follow); + + let args = parse_low_raw(["--follow"]).unwrap(); + assert_eq!(true, args.follow); + + let args = parse_low_raw(["-L"]).unwrap(); + assert_eq!(true, args.follow); + + let args = parse_low_raw(["-L", "--no-follow"]).unwrap(); + assert_eq!(false, args.follow); + + let args = parse_low_raw(["--no-follow", "-L"]).unwrap(); + assert_eq!(true, args.follow); +} + +/// --generate +#[derive(Debug)] +struct Generate; + +impl Flag for Generate { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "generate" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("KIND") + } + fn doc_category(&self) -> Category { + Category::OtherBehaviors + } + fn doc_short(&self) -> &'static str { + r"Generate man pages and completion scripts." + } + fn doc_long(&self) -> &'static str { + r" +This flag instructs ripgrep to generate some special kind of output identified +by \fIKIND\fP and then quit without searching. \fIKIND\fP can be one of the +following values: +.sp +.TP 15 +\fBman\fP +Generates a manual page for ripgrep in the \fBroff\fP format. +.TP 15 +\fBcomplete\-bash\fP +Generates a completion script for the \fBbash\fP shell. +.TP 15 +\fBcomplete\-zsh\fP +Generates a completion script for the \fBzsh\fP shell. +.TP 15 +\fBcomplete\-fish\fP +Generates a completion script for the \fBfish\fP shell. +.TP 15 +\fBcomplete\-powershell\fP +Generates a completion script for PowerShell. +.PP +The output is written to \fBstdout\fP. The list above may expand over time. +" + } + fn doc_choices(&self) -> &'static [&'static str] { + &[ + "man", + "complete-bash", + "complete-zsh", + "complete-fish", + "complete-powershell", + ] + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let genmode = match convert::str(&v.unwrap_value())? { + "man" => GenerateMode::Man, + "complete-bash" => GenerateMode::CompleteBash, + "complete-zsh" => GenerateMode::CompleteZsh, + "complete-fish" => GenerateMode::CompleteFish, + "complete-powershell" => GenerateMode::CompletePowerShell, + unk => anyhow::bail!("choice '{unk}' is unrecognized"), + }; + args.mode.update(Mode::Generate(genmode)); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_generate() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Mode::Search(SearchMode::Standard), args.mode); + + let args = parse_low_raw(["--generate", "man"]).unwrap(); + assert_eq!(Mode::Generate(GenerateMode::Man), args.mode); + + let args = parse_low_raw(["--generate", "complete-bash"]).unwrap(); + assert_eq!(Mode::Generate(GenerateMode::CompleteBash), args.mode); + + let args = parse_low_raw(["--generate", "complete-zsh"]).unwrap(); + assert_eq!(Mode::Generate(GenerateMode::CompleteZsh), args.mode); + + let args = parse_low_raw(["--generate", "complete-fish"]).unwrap(); + assert_eq!(Mode::Generate(GenerateMode::CompleteFish), args.mode); + + let args = parse_low_raw(["--generate", "complete-powershell"]).unwrap(); + assert_eq!(Mode::Generate(GenerateMode::CompletePowerShell), args.mode); + + let args = + parse_low_raw(["--generate", "complete-bash", "--generate=man"]) + .unwrap(); + assert_eq!(Mode::Generate(GenerateMode::Man), args.mode); + + let args = parse_low_raw(["--generate", "man", "-l"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::FilesWithMatches), args.mode); + + // An interesting quirk of how the modes override each other that lets + // you get back to the "default" mode of searching. + let args = + parse_low_raw(["--generate", "man", "--json", "--no-json"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::Standard), args.mode); +} + +/// -g/--glob +#[derive(Debug)] +struct Glob; + +impl Flag for Glob { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'g') + } + fn name_long(&self) -> &'static str { + "glob" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("GLOB") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Include or exclude file paths." + } + fn doc_long(&self) -> &'static str { + r#" +Include or exclude files and directories for searching that match the given +glob. This always overrides any other ignore logic. Multiple glob flags may +be used. Globbing rules match \fB.gitignore\fP globs. Precede a glob with a +\fB!\fP to exclude it. If multiple globs match a file or directory, the glob +given later in the command line takes precedence. +.sp +As an extension, globs support specifying alternatives: +.BI "\-g '" ab{c,d}* ' +is equivalent to +.BI "\-g " "abc " "\-g " abd. +Empty alternatives like +.BI "\-g '" ab{,c} ' +are not currently supported. Note that this syntax extension is also currently +enabled in \fBgitignore\fP files, even though this syntax isn't supported by +git itself. ripgrep may disable this syntax extension in gitignore files, but +it will always remain available via the \flag{glob} flag. +.sp +When this flag is set, every file and directory is applied to it to test for +a match. For example, if you only want to search in a particular directory +\fIfoo\fP, then +.BI "\-g " foo +is incorrect because \fIfoo/bar\fP does not match +the glob \fIfoo\fP. Instead, you should use +.BI "\-g '" foo/** '. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let glob = convert::string(v.unwrap_value())?; + args.globs.push(glob); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_glob() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.globs); + + let args = parse_low_raw(["--glob", "foo"]).unwrap(); + assert_eq!(vec!["foo".to_string()], args.globs); + + let args = parse_low_raw(["--glob=foo"]).unwrap(); + assert_eq!(vec!["foo".to_string()], args.globs); + + let args = parse_low_raw(["-g", "foo"]).unwrap(); + assert_eq!(vec!["foo".to_string()], args.globs); + + let args = parse_low_raw(["-gfoo"]).unwrap(); + assert_eq!(vec!["foo".to_string()], args.globs); + + let args = parse_low_raw(["--glob", "-foo"]).unwrap(); + assert_eq!(vec!["-foo".to_string()], args.globs); + + let args = parse_low_raw(["--glob=-foo"]).unwrap(); + assert_eq!(vec!["-foo".to_string()], args.globs); + + let args = parse_low_raw(["-g", "-foo"]).unwrap(); + assert_eq!(vec!["-foo".to_string()], args.globs); + + let args = parse_low_raw(["-g-foo"]).unwrap(); + assert_eq!(vec!["-foo".to_string()], args.globs); +} + +/// --glob-case-insensitive +#[derive(Debug)] +struct GlobCaseInsensitive; + +impl Flag for GlobCaseInsensitive { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "glob-case-insensitive" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-glob-case-insensitive") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Process all glob patterns case insensitively." + } + fn doc_long(&self) -> &'static str { + r" +Process all glob patterns given with the \flag{glob} flag case insensitively. +This effectively treats \flag{glob} as \flag{iglob}. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.glob_case_insensitive = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_glob_case_insensitive() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.glob_case_insensitive); + + let args = parse_low_raw(["--glob-case-insensitive"]).unwrap(); + assert_eq!(true, args.glob_case_insensitive); + + let args = parse_low_raw([ + "--glob-case-insensitive", + "--no-glob-case-insensitive", + ]) + .unwrap(); + assert_eq!(false, args.glob_case_insensitive); + + let args = parse_low_raw([ + "--no-glob-case-insensitive", + "--glob-case-insensitive", + ]) + .unwrap(); + assert_eq!(true, args.glob_case_insensitive); +} + +/// --heading +#[derive(Debug)] +struct Heading; + +impl Flag for Heading { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "heading" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-heading") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Print matches grouped by each file." + } + fn doc_long(&self) -> &'static str { + r" +This flag prints the file path above clusters of matches from each file instead +of printing the file path as a prefix for each matched line. +.sp +This is the default mode when printing to a tty. +.sp +When \fBstdout\fP is not a tty, then ripgrep will default to the standard +grep-like format. One can force this format in Unix-like environments by +piping the output of ripgrep to \fBcat\fP. For example, \fBrg\fP \fIfoo\fP \fB| +cat\fP. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.heading = Some(v.unwrap_switch()); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_heading() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.heading); + + let args = parse_low_raw(["--heading"]).unwrap(); + assert_eq!(Some(true), args.heading); + + let args = parse_low_raw(["--no-heading"]).unwrap(); + assert_eq!(Some(false), args.heading); + + let args = parse_low_raw(["--heading", "--no-heading"]).unwrap(); + assert_eq!(Some(false), args.heading); + + let args = parse_low_raw(["--no-heading", "--heading"]).unwrap(); + assert_eq!(Some(true), args.heading); +} + +/// -h/--help +#[derive(Debug)] +struct Help; + +impl Flag for Help { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "help" + } + fn name_short(&self) -> Option { + Some(b'h') + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Show help output." + } + fn doc_long(&self) -> &'static str { + r" +This flag prints the help output for ripgrep. +.sp +Unlike most other flags, the behavior of the short flag, \fB\-h\fP, and the +long flag, \fB\-\-help\fP, is different. The short flag will show a condensed +help output while the long flag will show a verbose help output. The verbose +help output has complete documentation, where as the condensed help output will +show only a single line for every flag. +" + } + + fn update(&self, v: FlagValue, _: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--help has no negation"); + // Since this flag has different semantics for -h and --help and the + // Flag trait doesn't support encoding this sort of thing, we handle it + // as a special case in the parser. + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_help() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.special); + + let args = parse_low_raw(["-h"]).unwrap(); + assert_eq!(Some(SpecialMode::HelpShort), args.special); + + let args = parse_low_raw(["--help"]).unwrap(); + assert_eq!(Some(SpecialMode::HelpLong), args.special); + + let args = parse_low_raw(["-h", "--help"]).unwrap(); + assert_eq!(Some(SpecialMode::HelpLong), args.special); + + let args = parse_low_raw(["--help", "-h"]).unwrap(); + assert_eq!(Some(SpecialMode::HelpShort), args.special); +} + +/// -./--hidden +#[derive(Debug)] +struct Hidden; + +impl Flag for Hidden { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'.') + } + fn name_long(&self) -> &'static str { + "hidden" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-hidden") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Search hidden files and directories." + } + fn doc_long(&self) -> &'static str { + r#" +Search hidden files and directories. By default, hidden files and directories +are skipped. Note that if a hidden file or a directory is whitelisted in +an ignore file, then it will be searched even if this flag isn't provided. +Similarly if a hidden file or directory is given explicitly as an argument to +ripgrep. +.sp +A file or directory is considered hidden if its base name starts with a dot +character (\fB.\fP). On operating systems which support a "hidden" file +attribute, like Windows, files with this attribute are also considered hidden. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.hidden = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_hidden() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.hidden); + + let args = parse_low_raw(["--hidden"]).unwrap(); + assert_eq!(true, args.hidden); + + let args = parse_low_raw(["-."]).unwrap(); + assert_eq!(true, args.hidden); + + let args = parse_low_raw(["-.", "--no-hidden"]).unwrap(); + assert_eq!(false, args.hidden); + + let args = parse_low_raw(["--no-hidden", "-."]).unwrap(); + assert_eq!(true, args.hidden); +} + +/// --hostname-bin +#[derive(Debug)] +struct HostnameBin; + +impl Flag for HostnameBin { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "hostname-bin" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("COMMAND") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Run a program to get this system's hostname." + } + fn doc_long(&self) -> &'static str { + r#" +This flag controls how ripgrep determines this system's hostname. The flag's +value should correspond to an executable (either a path or something that can +be found via your system's \fBPATH\fP environment variable). When set, ripgrep +will run this executable, with no arguments, and treat its output (with leading +and trailing whitespace stripped) as your system's hostname. +.sp +When not set (the default, or the empty string), ripgrep will try to +automatically detect your system's hostname. On Unix, this corresponds +to calling \fBgethostname\fP. On Windows, this corresponds to calling +\fBGetComputerNameExW\fP to fetch the system's "physical DNS hostname." +.sp +ripgrep uses your system's hostname for producing hyperlinks. +"# + } + fn completion_type(&self) -> CompletionType { + CompletionType::Executable + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let path = PathBuf::from(v.unwrap_value()); + args.hostname_bin = + if path.as_os_str().is_empty() { None } else { Some(path) }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_hostname_bin() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.hostname_bin); + + let args = parse_low_raw(["--hostname-bin", "foo"]).unwrap(); + assert_eq!(Some(PathBuf::from("foo")), args.hostname_bin); + + let args = parse_low_raw(["--hostname-bin=foo"]).unwrap(); + assert_eq!(Some(PathBuf::from("foo")), args.hostname_bin); +} + +/// --hyperlink-format +#[derive(Debug)] +struct HyperlinkFormat; + +impl Flag for HyperlinkFormat { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "hyperlink-format" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("FORMAT") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Set the format of hyperlinks." + } + fn doc_long(&self) -> &'static str { + r#" +Set the format of hyperlinks to use when printing results. Hyperlinks make +certain elements of ripgrep's output, such as file paths, clickable. This +generally only works in terminal emulators that support OSC-8 hyperlinks. For +example, the format \fBfile://{host}{path}\fP will emit an RFC 8089 hyperlink. +To see the format that ripgrep is using, pass the \flag{debug} flag. +.sp +Alternatively, a format string may correspond to one of the following aliases: +\fBdefault\fP, \fBnone\fP, \fBfile\fP, \fBgrep+\fP, \fBkitty\fP, \fBmacvim\fP, +\fBtextmate\fP, \fBvscode\fP, \fBvscode-insiders\fP, \fBvscodium\fP. The +alias will be replaced with a format string that is intended to work for the +corresponding application. +.sp +The following variables are available in the format string: +.sp +.TP 12 +\fB{path}\fP +Required. This is replaced with a path to a matching file. The path is +guaranteed to be absolute and percent encoded such that it is valid to put into +a URI. Note that a path is guaranteed to start with a /. +.TP 12 +\fB{host}\fP +Optional. This is replaced with your system's hostname. On Unix, this +corresponds to calling \fBgethostname\fP. On Windows, this corresponds to +calling \fBGetComputerNameExW\fP to fetch the system's "physical DNS hostname." +Alternatively, if \flag{hostname-bin} was provided, then the hostname returned +from the output of that program will be returned. If no hostname could be +found, then this variable is replaced with the empty string. +.TP 12 +\fB{line}\fP +Optional. If appropriate, this is replaced with the line number of a match. If +no line number is available (for example, if \fB\-\-no\-line\-number\fP was +given), then it is automatically replaced with the value 1. +.TP 12 +\fB{column}\fP +Optional, but requires the presence of \fB{line}\fP. If appropriate, this is +replaced with the column number of a match. If no column number is available +(for example, if \fB\-\-no\-column\fP was given), then it is automatically +replaced with the value 1. +.TP 12 +\fB{wslprefix}\fP +Optional. This is a special value that is set to +\fBwsl$/\fP\fIWSL_DISTRO_NAME\fP, where \fIWSL_DISTRO_NAME\fP corresponds to +the value of the equivalent environment variable. If the system is not Unix +or if the \fIWSL_DISTRO_NAME\fP environment variable is not set, then this is +replaced with the empty string. +.PP +A format string may be empty. An empty format string is equivalent to the +\fBnone\fP alias. In this case, hyperlinks will be disabled. +.sp +At present, ripgrep does not enable hyperlinks by default. Users must opt into +them. If you aren't sure what format to use, try \fBdefault\fP. +.sp +Like colors, when ripgrep detects that stdout is not connected to a tty, then +hyperlinks are automatically disabled, regardless of the value of this flag. +Users can pass \fB\-\-color=always\fP to forcefully emit hyperlinks. +.sp +Note that hyperlinks are only written when a path is also in the output +and colors are enabled. To write hyperlinks without colors, you'll need to +configure ripgrep to not colorize anything without actually disabling all ANSI +escape codes completely: +.sp +.EX + \-\-colors 'path:none' \\ + \-\-colors 'line:none' \\ + \-\-colors 'column:none' \\ + \-\-colors 'match:none' +.EE +.sp +ripgrep works this way because it treats the \flag{color} flag as a proxy for +whether ANSI escape codes should be used at all. This means that environment +variables like \fBNO_COLOR=1\fP and \fBTERM=dumb\fP not only disable colors, +but hyperlinks as well. Similarly, colors and hyperlinks are disabled when +ripgrep is not writing to a tty. (Unless one forces the issue by setting +\fB\-\-color=always\fP.) +.sp +If you're searching a file directly, for example: +.sp +.EX + rg foo path/to/file +.EE +.sp +then hyperlinks will not be emitted since the path given does not appear +in the output. To make the path appear, and thus also a hyperlink, use the +\flag{with-filename} flag. +.sp +For more information on hyperlinks in terminal emulators, see: +https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let v = v.unwrap_value(); + let string = convert::str(&v)?; + let format = string.parse().context("invalid hyperlink format")?; + args.hyperlink_format = format; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_hyperlink_format() { + let parseformat = |format: &str| { + format.parse::().unwrap() + }; + + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(parseformat("none"), args.hyperlink_format); + + let args = parse_low_raw(["--hyperlink-format", "default"]).unwrap(); + #[cfg(windows)] + assert_eq!(parseformat("file://{path}"), args.hyperlink_format); + #[cfg(not(windows))] + assert_eq!(parseformat("file://{host}{path}"), args.hyperlink_format); + + let args = parse_low_raw(["--hyperlink-format", "file"]).unwrap(); + assert_eq!(parseformat("file://{host}{path}"), args.hyperlink_format); + + let args = parse_low_raw([ + "--hyperlink-format", + "file", + "--hyperlink-format=grep+", + ]) + .unwrap(); + assert_eq!(parseformat("grep+://{path}:{line}"), args.hyperlink_format); + + let args = + parse_low_raw(["--hyperlink-format", "file://{host}{path}#{line}"]) + .unwrap(); + assert_eq!( + parseformat("file://{host}{path}#{line}"), + args.hyperlink_format + ); + + let result = parse_low_raw(["--hyperlink-format", "file://heythere"]); + assert!(result.is_err(), "{result:?}"); +} + +/// --iglob +#[derive(Debug)] +struct IGlob; + +impl Flag for IGlob { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "iglob" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("GLOB") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Include/exclude paths case insensitively." + } + fn doc_long(&self) -> &'static str { + r" +Include or exclude files and directories for searching that match the given +glob. This always overrides any other ignore logic. Multiple glob flags may +be used. Globbing rules match \fB.gitignore\fP globs. Precede a glob with a +\fB!\fP to exclude it. If multiple globs match a file or directory, the glob +given later in the command line takes precedence. Globs used via this flag are +matched case insensitively. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let glob = convert::string(v.unwrap_value())?; + args.iglobs.push(glob); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_iglob() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.iglobs); + + let args = parse_low_raw(["--iglob", "foo"]).unwrap(); + assert_eq!(vec!["foo".to_string()], args.iglobs); + + let args = parse_low_raw(["--iglob=foo"]).unwrap(); + assert_eq!(vec!["foo".to_string()], args.iglobs); + + let args = parse_low_raw(["--iglob", "-foo"]).unwrap(); + assert_eq!(vec!["-foo".to_string()], args.iglobs); + + let args = parse_low_raw(["--iglob=-foo"]).unwrap(); + assert_eq!(vec!["-foo".to_string()], args.iglobs); +} + +/// -i/--ignore-case +#[derive(Debug)] +struct IgnoreCase; + +impl Flag for IgnoreCase { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'i') + } + fn name_long(&self) -> &'static str { + "ignore-case" + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Case insensitive search." + } + fn doc_long(&self) -> &'static str { + r#" +When this flag is provided, all patterns will be searched case insensitively. +The case insensitivity rules used by ripgrep's default regex engine conform to +Unicode's "simple" case folding rules. +.sp +This is a global option that applies to all patterns given to ripgrep. +Individual patterns can still be matched case sensitively by using +inline regex flags. For example, \fB(?\-i)abc\fP will match \fBabc\fP +case sensitively even when this flag is used. +.sp +This flag overrides \flag{case-sensitive} and \flag{smart-case}. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "flag has no negation"); + args.case = CaseMode::Insensitive; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_ignore_case() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(CaseMode::Sensitive, args.case); + + let args = parse_low_raw(["--ignore-case"]).unwrap(); + assert_eq!(CaseMode::Insensitive, args.case); + + let args = parse_low_raw(["-i"]).unwrap(); + assert_eq!(CaseMode::Insensitive, args.case); + + let args = parse_low_raw(["-i", "-s"]).unwrap(); + assert_eq!(CaseMode::Sensitive, args.case); + + let args = parse_low_raw(["-s", "-i"]).unwrap(); + assert_eq!(CaseMode::Insensitive, args.case); +} + +/// --ignore-file +#[derive(Debug)] +struct IgnoreFile; + +impl Flag for IgnoreFile { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "ignore-file" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("PATH") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Specify additional ignore files." + } + fn doc_long(&self) -> &'static str { + r" +Specifies a path to one or more \fBgitignore\fP formatted rules files. +These patterns are applied after the patterns found in \fB.gitignore\fP, +\fB.rgignore\fP and \fB.ignore\fP are applied and are matched relative to the +current working directory. Multiple additional ignore files can be specified +by using this flag repeatedly. When specifying multiple ignore files, earlier +files have lower precedence than later files. +.sp +If you are looking for a way to include or exclude files and directories +directly on the command line, then use \flag{glob} instead. +" + } + fn completion_type(&self) -> CompletionType { + CompletionType::Filename + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let path = PathBuf::from(v.unwrap_value()); + args.ignore_file.push(path); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_ignore_file() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.ignore_file); + + let args = parse_low_raw(["--ignore-file", "foo"]).unwrap(); + assert_eq!(vec![PathBuf::from("foo")], args.ignore_file); + + let args = parse_low_raw(["--ignore-file", "foo", "--ignore-file", "bar"]) + .unwrap(); + assert_eq!( + vec![PathBuf::from("foo"), PathBuf::from("bar")], + args.ignore_file + ); +} + +/// --ignore-file-case-insensitive +#[derive(Debug)] +struct IgnoreFileCaseInsensitive; + +impl Flag for IgnoreFileCaseInsensitive { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "ignore-file-case-insensitive" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-ignore-file-case-insensitive") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Process ignore files case insensitively." + } + fn doc_long(&self) -> &'static str { + r" +Process ignore files (\fB.gitignore\fP, \fB.ignore\fP, etc.) case +insensitively. Note that this comes with a performance penalty and is most +useful on case insensitive file systems (such as Windows). +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.ignore_file_case_insensitive = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_ignore_file_case_insensitive() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.ignore_file_case_insensitive); + + let args = parse_low_raw(["--ignore-file-case-insensitive"]).unwrap(); + assert_eq!(true, args.ignore_file_case_insensitive); + + let args = parse_low_raw([ + "--ignore-file-case-insensitive", + "--no-ignore-file-case-insensitive", + ]) + .unwrap(); + assert_eq!(false, args.ignore_file_case_insensitive); + + let args = parse_low_raw([ + "--no-ignore-file-case-insensitive", + "--ignore-file-case-insensitive", + ]) + .unwrap(); + assert_eq!(true, args.ignore_file_case_insensitive); +} + +/// --include-zero +#[derive(Debug)] +struct IncludeZero; + +impl Flag for IncludeZero { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "include-zero" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-include-zero") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Include zero matches in summary output." + } + fn doc_long(&self) -> &'static str { + r" +When used with \flag{count} or \flag{count-matches}, this causes ripgrep to +print the number of matches for each file even if there were zero matches. This +is disabled by default but can be enabled to make ripgrep behave more like +grep. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.include_zero = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_include_zero() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.include_zero); + + let args = parse_low_raw(["--include-zero"]).unwrap(); + assert_eq!(true, args.include_zero); + + let args = parse_low_raw(["--include-zero", "--no-include-zero"]).unwrap(); + assert_eq!(false, args.include_zero); +} + +/// -v/--invert-match +#[derive(Debug)] +struct InvertMatch; + +impl Flag for InvertMatch { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'v') + } + fn name_long(&self) -> &'static str { + "invert-match" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-invert-match") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Invert matching." + } + fn doc_long(&self) -> &'static str { + r" +This flag inverts matching. That is, instead of printing lines that match, +ripgrep will print lines that don't match. +.sp +Note that this only inverts line-by-line matching. For example, combining this +flag with \flag{files-with-matches} will emit files that contain any lines +that do not match the patterns given. That's not the same as, for example, +\flag{files-without-match}, which will emit files that do not contain any +matching lines. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.invert_match = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_invert_match() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.invert_match); + + let args = parse_low_raw(["--invert-match"]).unwrap(); + assert_eq!(true, args.invert_match); + + let args = parse_low_raw(["-v"]).unwrap(); + assert_eq!(true, args.invert_match); + + let args = parse_low_raw(["-v", "--no-invert-match"]).unwrap(); + assert_eq!(false, args.invert_match); +} + +/// --json +#[derive(Debug)] +struct JSON; + +impl Flag for JSON { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "json" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-json") + } + fn doc_category(&self) -> Category { + Category::OutputModes + } + fn doc_short(&self) -> &'static str { + r"Show search results in a JSON Lines format." + } + fn doc_long(&self) -> &'static str { + r" +Enable printing results in a JSON Lines format. +.sp +When this flag is provided, ripgrep will emit a sequence of messages, each +encoded as a JSON object, where there are five different message types: +.sp +.TP 12 +\fBbegin\fP +A message that indicates a file is being searched and contains at least one +match. +.TP 12 +\fBend\fP +A message the indicates a file is done being searched. This message also +include summary statistics about the search for a particular file. +.TP 12 +\fBmatch\fP +A message that indicates a match was found. This includes the text and offsets +of the match. +.TP 12 +\fBcontext\fP +A message that indicates a contextual line was found. This includes the text of +the line, along with any match information if the search was inverted. +.TP 12 +\fBsummary\fP +The final message emitted by ripgrep that contains summary statistics about the +search across all files. +.PP +Since file paths or the contents of files are not guaranteed to be valid +UTF-8 and JSON itself must be representable by a Unicode encoding, ripgrep +will emit all data elements as objects with one of two keys: \fBtext\fP or +\fBbytes\fP. \fBtext\fP is a normal JSON string when the data is valid UTF-8 +while \fBbytes\fP is the base64 encoded contents of the data. +.sp +The JSON Lines format is only supported for showing search results. It cannot +be used with other flags that emit other types of output, such as \flag{files}, +\flag{files-with-matches}, \flag{files-without-match}, \flag{count} or +\flag{count-matches}. ripgrep will report an error if any of the aforementioned +flags are used in concert with \flag{json}. +.sp +Other flags that control aspects of the standard output such as +\flag{only-matching}, \flag{heading}, \flag{replace}, \flag{max-columns}, etc., +have no effect when \flag{json} is set. However, enabling JSON output will +always implicitly and unconditionally enable \flag{stats}. +.sp +A more complete description of the JSON format used can be found here: +\fIhttps://docs.rs/grep-printer/*/grep_printer/struct.JSON.html\fP. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + if v.unwrap_switch() { + args.mode.update(Mode::Search(SearchMode::JSON)); + } else if matches!(args.mode, Mode::Search(SearchMode::JSON)) { + // --no-json only reverts to the default mode if the mode is + // JSON, otherwise it's a no-op. + args.mode.update(Mode::Search(SearchMode::Standard)); + } + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_json() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Mode::Search(SearchMode::Standard), args.mode); + + let args = parse_low_raw(["--json"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::JSON), args.mode); + + let args = parse_low_raw(["--json", "--no-json"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::Standard), args.mode); + + let args = parse_low_raw(["--json", "--files", "--no-json"]).unwrap(); + assert_eq!(Mode::Files, args.mode); + + let args = parse_low_raw(["--json", "-l", "--no-json"]).unwrap(); + assert_eq!(Mode::Search(SearchMode::FilesWithMatches), args.mode); +} + +/// --line-buffered +#[derive(Debug)] +struct LineBuffered; + +impl Flag for LineBuffered { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "line-buffered" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-line-buffered") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Force line buffering." + } + fn doc_long(&self) -> &'static str { + r" +When enabled, ripgrep will always use line buffering. That is, whenever a +matching line is found, it will be flushed to stdout immediately. This is the +default when ripgrep's stdout is connected to a tty, but otherwise, ripgrep +will use block buffering, which is typically faster. This flag forces ripgrep +to use line buffering even if it would otherwise use block buffering. This is +typically useful in shell pipelines, for example: +.sp +.EX + tail -f something.log | rg foo --line-buffered | rg bar +.EE +.sp +This overrides the \flag{block-buffered} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.buffer = if v.unwrap_switch() { + BufferMode::Line + } else { + BufferMode::Auto + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_line_buffered() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(BufferMode::Auto, args.buffer); + + let args = parse_low_raw(["--line-buffered"]).unwrap(); + assert_eq!(BufferMode::Line, args.buffer); + + let args = + parse_low_raw(["--line-buffered", "--no-line-buffered"]).unwrap(); + assert_eq!(BufferMode::Auto, args.buffer); + + let args = parse_low_raw(["--line-buffered", "--block-buffered"]).unwrap(); + assert_eq!(BufferMode::Block, args.buffer); +} + +/// -n/--line-number +#[derive(Debug)] +struct LineNumber; + +impl Flag for LineNumber { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'n') + } + fn name_long(&self) -> &'static str { + "line-number" + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Show line numbers." + } + fn doc_long(&self) -> &'static str { + r" +Show line numbers (1-based). +.sp +This is enabled by default when stdout is connected to a tty. +.sp +This flag can be disabled by \flag{no-line-number}. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--line-number has no automatic negation"); + args.line_number = Some(true); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_line_number() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.line_number); + + let args = parse_low_raw(["--line-number"]).unwrap(); + assert_eq!(Some(true), args.line_number); + + let args = parse_low_raw(["-n"]).unwrap(); + assert_eq!(Some(true), args.line_number); + + let args = parse_low_raw(["-n", "--no-line-number"]).unwrap(); + assert_eq!(Some(false), args.line_number); +} + +/// -N/--no-line-number +#[derive(Debug)] +struct LineNumberNo; + +impl Flag for LineNumberNo { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'N') + } + fn name_long(&self) -> &'static str { + "no-line-number" + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Suppress line numbers." + } + fn doc_long(&self) -> &'static str { + r" +Suppress line numbers. +.sp +Line numbers are off by default when stdout is not connected to a tty. +.sp +Line numbers can be forcefully turned on by \flag{line-number}. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!( + v.unwrap_switch(), + "--no-line-number has no automatic negation" + ); + args.line_number = Some(false); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_line_number() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.line_number); + + let args = parse_low_raw(["--no-line-number"]).unwrap(); + assert_eq!(Some(false), args.line_number); + + let args = parse_low_raw(["-N"]).unwrap(); + assert_eq!(Some(false), args.line_number); + + let args = parse_low_raw(["-N", "--line-number"]).unwrap(); + assert_eq!(Some(true), args.line_number); +} + +/// -x/--line-regexp +#[derive(Debug)] +struct LineRegexp; + +impl Flag for LineRegexp { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'x') + } + fn name_long(&self) -> &'static str { + "line-regexp" + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Show matches surrounded by line boundaries." + } + fn doc_long(&self) -> &'static str { + r" +When enabled, ripgrep will only show matches surrounded by line boundaries. +This is equivalent to surrounding every pattern with \fB^\fP and \fB$\fP. In +other words, this only prints lines where the entire line participates in a +match. +.sp +This overrides the \flag{word-regexp} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--line-regexp has no negation"); + args.boundary = Some(BoundaryMode::Line); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_line_regexp() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.boundary); + + let args = parse_low_raw(["--line-regexp"]).unwrap(); + assert_eq!(Some(BoundaryMode::Line), args.boundary); + + let args = parse_low_raw(["-x"]).unwrap(); + assert_eq!(Some(BoundaryMode::Line), args.boundary); +} + +/// -M/--max-columns +#[derive(Debug)] +struct MaxColumns; + +impl Flag for MaxColumns { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'M') + } + fn name_long(&self) -> &'static str { + "max-columns" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Omit lines longer than this limit." + } + fn doc_long(&self) -> &'static str { + r" +When given, ripgrep will omit lines longer than this limit in bytes. Instead of +printing long lines, only the number of matches in that line is printed. +.sp +When this flag is omitted or is set to \fB0\fP, then it has no effect. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let max = convert::u64(&v.unwrap_value())?; + args.max_columns = if max == 0 { None } else { Some(max) }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_max_columns() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.max_columns); + + let args = parse_low_raw(["--max-columns", "5"]).unwrap(); + assert_eq!(Some(5), args.max_columns); + + let args = parse_low_raw(["-M", "5"]).unwrap(); + assert_eq!(Some(5), args.max_columns); + + let args = parse_low_raw(["-M5"]).unwrap(); + assert_eq!(Some(5), args.max_columns); + + let args = parse_low_raw(["--max-columns", "5", "-M0"]).unwrap(); + assert_eq!(None, args.max_columns); +} + +/// --max-columns-preview +#[derive(Debug)] +struct MaxColumnsPreview; + +impl Flag for MaxColumnsPreview { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "max-columns-preview" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-max-columns-preview") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Show preview for lines exceeding the limit." + } + fn doc_long(&self) -> &'static str { + r" +Prints a preview for lines exceeding the configured max column limit. +.sp +When the \flag{max-columns} flag is used, ripgrep will by default completely +replace any line that is too long with a message indicating that a matching +line was removed. When this flag is combined with \flag{max-columns}, a preview +of the line (corresponding to the limit size) is shown instead, where the part +of the line exceeding the limit is not shown. +.sp +If the \flag{max-columns} flag is not set, then this has no effect. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.max_columns_preview = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_max_columns_preview() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.max_columns_preview); + + let args = parse_low_raw(["--max-columns-preview"]).unwrap(); + assert_eq!(true, args.max_columns_preview); + + let args = + parse_low_raw(["--max-columns-preview", "--no-max-columns-preview"]) + .unwrap(); + assert_eq!(false, args.max_columns_preview); +} + +/// -m/--max-count +#[derive(Debug)] +struct MaxCount; + +impl Flag for MaxCount { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'm') + } + fn name_long(&self) -> &'static str { + "max-count" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Limit the number of matching lines." + } + fn doc_long(&self) -> &'static str { + r" +Limit the number of matching lines per file searched to \fINUM\fP. +.sp +Note that \fB0\fP is a legal value but not likely to be useful. When used, +ripgrep won't search anything. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.max_count = Some(convert::u64(&v.unwrap_value())?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_max_count() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.max_count); + + let args = parse_low_raw(["--max-count", "5"]).unwrap(); + assert_eq!(Some(5), args.max_count); + + let args = parse_low_raw(["-m", "5"]).unwrap(); + assert_eq!(Some(5), args.max_count); + + let args = parse_low_raw(["-m", "5", "--max-count=10"]).unwrap(); + assert_eq!(Some(10), args.max_count); + let args = parse_low_raw(["-m0"]).unwrap(); + assert_eq!(Some(0), args.max_count); +} + +/// --max-depth +#[derive(Debug)] +struct MaxDepth; + +impl Flag for MaxDepth { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'd') + } + fn name_long(&self) -> &'static str { + "max-depth" + } + fn aliases(&self) -> &'static [&'static str] { + &["maxdepth"] + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Descend at most NUM directories." + } + fn doc_long(&self) -> &'static str { + r" +This flag limits the depth of directory traversal to \fINUM\fP levels beyond +the paths given. A value of \fB0\fP only searches the explicitly given paths +themselves. +.sp +For example, \fBrg --max-depth 0 \fP\fIdir/\fP is a no-op because \fIdir/\fP +will not be descended into. \fBrg --max-depth 1 \fP\fIdir/\fP will search only +the direct children of \fIdir\fP. +.sp +An alternative spelling for this flag is \fB\-\-maxdepth\fP. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.max_depth = Some(convert::usize(&v.unwrap_value())?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_max_depth() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.max_depth); + + let args = parse_low_raw(["--max-depth", "5"]).unwrap(); + assert_eq!(Some(5), args.max_depth); + + let args = parse_low_raw(["-d", "5"]).unwrap(); + assert_eq!(Some(5), args.max_depth); + + let args = parse_low_raw(["--max-depth", "5", "--max-depth=10"]).unwrap(); + assert_eq!(Some(10), args.max_depth); + + let args = parse_low_raw(["--max-depth", "0"]).unwrap(); + assert_eq!(Some(0), args.max_depth); + + let args = parse_low_raw(["--maxdepth", "5"]).unwrap(); + assert_eq!(Some(5), args.max_depth); +} + +/// --max-filesize +#[derive(Debug)] +struct MaxFilesize; + +impl Flag for MaxFilesize { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "max-filesize" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM+SUFFIX?") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Ignore files larger than NUM in size." + } + fn doc_long(&self) -> &'static str { + r" +Ignore files larger than \fINUM\fP in size. This does not apply to directories. +.sp +The input format accepts suffixes of \fBK\fP, \fBM\fP or \fBG\fP which +correspond to kilobytes, megabytes and gigabytes, respectively. If no suffix is +provided the input is treated as bytes. +.sp +Examples: \fB\-\-max-filesize 50K\fP or \fB\-\-max\-filesize 80M\fP. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let v = v.unwrap_value(); + args.max_filesize = Some(convert::human_readable_u64(&v)?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_max_filesize() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.max_filesize); + + let args = parse_low_raw(["--max-filesize", "1024"]).unwrap(); + assert_eq!(Some(1024), args.max_filesize); + + let args = parse_low_raw(["--max-filesize", "1K"]).unwrap(); + assert_eq!(Some(1024), args.max_filesize); + + let args = + parse_low_raw(["--max-filesize", "1K", "--max-filesize=1M"]).unwrap(); + assert_eq!(Some(1024 * 1024), args.max_filesize); +} + +/// --mmap +#[derive(Debug)] +struct Mmap; + +impl Flag for Mmap { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "mmap" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-mmap") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Search with memory maps when possible." + } + fn doc_long(&self) -> &'static str { + r" +When enabled, ripgrep will search using memory maps when possible. This is +enabled by default when ripgrep thinks it will be faster. +.sp +Memory map searching cannot be used in all circumstances. For example, when +searching virtual files or streams likes \fBstdin\fP. In such cases, memory +maps will not be used even when this flag is enabled. +.sp +Note that ripgrep may abort unexpectedly when memory maps are used if it +searches a file that is simultaneously truncated. Users can opt out of this +possibility by disabling memory maps. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.mmap = if v.unwrap_switch() { + MmapMode::AlwaysTryMmap + } else { + MmapMode::Never + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_mmap() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(MmapMode::Auto, args.mmap); + + let args = parse_low_raw(["--mmap"]).unwrap(); + assert_eq!(MmapMode::AlwaysTryMmap, args.mmap); + + let args = parse_low_raw(["--no-mmap"]).unwrap(); + assert_eq!(MmapMode::Never, args.mmap); + + let args = parse_low_raw(["--mmap", "--no-mmap"]).unwrap(); + assert_eq!(MmapMode::Never, args.mmap); + + let args = parse_low_raw(["--no-mmap", "--mmap"]).unwrap(); + assert_eq!(MmapMode::AlwaysTryMmap, args.mmap); +} + +/// -U/--multiline +#[derive(Debug)] +struct Multiline; + +impl Flag for Multiline { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'U') + } + fn name_long(&self) -> &'static str { + "multiline" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-multiline") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Enable searching across multiple lines." + } + fn doc_long(&self) -> &'static str { + r#" +This flag enable searching across multiple lines. +.sp +When multiline mode is enabled, ripgrep will lift the restriction that a +match cannot include a line terminator. For example, when multiline mode +is not enabled (the default), then the regex \fB\\p{any}\fP will match any +Unicode codepoint other than \fB\\n\fP. Similarly, the regex \fB\\n\fP is +explicitly forbidden, and if you try to use it, ripgrep will return an error. +However, when multiline mode is enabled, \fB\\p{any}\fP will match any Unicode +codepoint, including \fB\\n\fP, and regexes like \fB\\n\fP are permitted. +.sp +An important caveat is that multiline mode does not change the match semantics +of \fB.\fP. Namely, in most regex matchers, a \fB.\fP will by default match any +character other than \fB\\n\fP, and this is true in ripgrep as well. In order +to make \fB.\fP match \fB\\n\fP, you must enable the "dot all" flag inside the +regex. For example, both \fB(?s).\fP and \fB(?s:.)\fP have the same semantics, +where \fB.\fP will match any character, including \fB\\n\fP. Alternatively, the +\flag{multiline-dotall} flag may be passed to make the "dot all" behavior the +default. This flag only applies when multiline search is enabled. +.sp +There is no limit on the number of the lines that a single match can span. +.sp +\fBWARNING\fP: Because of how the underlying regex engine works, multiline +searches may be slower than normal line-oriented searches, and they may also +use more memory. In particular, when multiline mode is enabled, ripgrep +requires that each file it searches is laid out contiguously in memory (either +by reading it onto the heap or by memory-mapping it). Things that cannot be +memory-mapped (such as \fBstdin\fP) will be consumed until EOF before searching +can begin. In general, ripgrep will only do these things when necessary. +Specifically, if the \flag{multiline} flag is provided but the regex does +not contain patterns that would match \fB\\n\fP characters, then ripgrep +will automatically avoid reading each file into memory before searching it. +Nevertheless, if you only care about matches spanning at most one line, then it +is always better to disable multiline mode. +.sp +This overrides the \flag{stop-on-nonmatch} flag. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.multiline = v.unwrap_switch(); + if args.multiline { + args.stop_on_nonmatch = false; + } + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_multiline() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.multiline); + + let args = parse_low_raw(["--multiline"]).unwrap(); + assert_eq!(true, args.multiline); + + let args = parse_low_raw(["-U"]).unwrap(); + assert_eq!(true, args.multiline); + + let args = parse_low_raw(["-U", "--no-multiline"]).unwrap(); + assert_eq!(false, args.multiline); +} + +/// --multiline-dotall +#[derive(Debug)] +struct MultilineDotall; + +impl Flag for MultilineDotall { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "multiline-dotall" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-multiline-dotall") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Make '.' match line terminators." + } + fn doc_long(&self) -> &'static str { + r#" +This flag enables "dot all" mode in all regex patterns. This causes \fB.\fP to +match line terminators when multiline searching is enabled. This flag has no +effect if multiline searching isn't enabled with the \flag{multiline} flag. +.sp +Normally, a \fB.\fP will match any character except line terminators. While +this behavior typically isn't relevant for line-oriented matching (since +matches can span at most one line), this can be useful when searching with the +\flag{multiline} flag. By default, multiline mode runs without "dot all" mode +enabled. +.sp +This flag is generally intended to be used in an alias or your ripgrep config +file if you prefer "dot all" semantics by default. Note that regardless of +whether this flag is used, "dot all" semantics can still be controlled via +inline flags in the regex pattern itself, e.g., \fB(?s:.)\fP always enables +"dot all" whereas \fB(?-s:.)\fP always disables "dot all". Moreover, you +can use character classes like \fB\\p{any}\fP to match any Unicode codepoint +regardless of whether "dot all" mode is enabled or not. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.multiline_dotall = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_multiline_dotall() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.multiline_dotall); + + let args = parse_low_raw(["--multiline-dotall"]).unwrap(); + assert_eq!(true, args.multiline_dotall); + + let args = parse_low_raw(["--multiline-dotall", "--no-multiline-dotall"]) + .unwrap(); + assert_eq!(false, args.multiline_dotall); +} + +/// --no-config +#[derive(Debug)] +struct NoConfig; + +impl Flag for NoConfig { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-config" + } + fn doc_category(&self) -> Category { + Category::OtherBehaviors + } + fn doc_short(&self) -> &'static str { + r"Never read configuration files." + } + fn doc_long(&self) -> &'static str { + r" +When set, ripgrep will never read configuration files. When this flag is +present, ripgrep will not respect the \fBRIPGREP_CONFIG_PATH\fP environment +variable. +.sp +If ripgrep ever grows a feature to automatically read configuration files in +pre-defined locations, then this flag will also disable that behavior as well. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--no-config has no negation"); + args.no_config = true; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_config() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_config); + + let args = parse_low_raw(["--no-config"]).unwrap(); + assert_eq!(true, args.no_config); +} + +/// --no-ignore +#[derive(Debug)] +struct NoIgnore; + +impl Flag for NoIgnore { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-ignore" + } + fn name_negated(&self) -> Option<&'static str> { + Some("ignore") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Don't use ignore files." + } + fn doc_long(&self) -> &'static str { + r" +When set, ignore files such as \fB.gitignore\fP, \fB.ignore\fP and +\fB.rgignore\fP will not be respected. This implies \flag{no-ignore-dot}, +\flag{no-ignore-exclude}, \flag{no-ignore-global}, \flag{no-ignore-parent} and +\flag{no-ignore-vcs}. +.sp +This does not imply \flag{no-ignore-files}, since \flag{ignore-file} is +specified explicitly as a command line argument. +.sp +When given only once, the \flag{unrestricted} flag is identical in +behavior to this flag and can be considered an alias. However, subsequent +\flag{unrestricted} flags have additional effects. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let yes = v.unwrap_switch(); + args.no_ignore_dot = yes; + args.no_ignore_exclude = yes; + args.no_ignore_global = yes; + args.no_ignore_parent = yes; + args.no_ignore_vcs = yes; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_ignore() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_ignore_dot); + assert_eq!(false, args.no_ignore_exclude); + assert_eq!(false, args.no_ignore_global); + assert_eq!(false, args.no_ignore_parent); + assert_eq!(false, args.no_ignore_vcs); + + let args = parse_low_raw(["--no-ignore"]).unwrap(); + assert_eq!(true, args.no_ignore_dot); + assert_eq!(true, args.no_ignore_exclude); + assert_eq!(true, args.no_ignore_global); + assert_eq!(true, args.no_ignore_parent); + assert_eq!(true, args.no_ignore_vcs); + + let args = parse_low_raw(["--no-ignore", "--ignore"]).unwrap(); + assert_eq!(false, args.no_ignore_dot); + assert_eq!(false, args.no_ignore_exclude); + assert_eq!(false, args.no_ignore_global); + assert_eq!(false, args.no_ignore_parent); + assert_eq!(false, args.no_ignore_vcs); +} + +/// --no-ignore-dot +#[derive(Debug)] +struct NoIgnoreDot; + +impl Flag for NoIgnoreDot { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-ignore-dot" + } + fn name_negated(&self) -> Option<&'static str> { + Some("ignore-dot") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Don't use .ignore or .rgignore files." + } + fn doc_long(&self) -> &'static str { + r" +Don't respect filter rules from \fB.ignore\fP or \fB.rgignore\fP files. +.sp +This does not impact whether ripgrep will ignore files and directories whose +names begin with a dot. For that, see the \flag{hidden} flag. This flag also +does not impact whether filter rules from \fB.gitignore\fP files are respected. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_ignore_dot = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_ignore_dot() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_ignore_dot); + + let args = parse_low_raw(["--no-ignore-dot"]).unwrap(); + assert_eq!(true, args.no_ignore_dot); + + let args = parse_low_raw(["--no-ignore-dot", "--ignore-dot"]).unwrap(); + assert_eq!(false, args.no_ignore_dot); +} + +/// --no-ignore-exclude +#[derive(Debug)] +struct NoIgnoreExclude; + +impl Flag for NoIgnoreExclude { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-ignore-exclude" + } + fn name_negated(&self) -> Option<&'static str> { + Some("ignore-exclude") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Don't use local exclusion files." + } + fn doc_long(&self) -> &'static str { + r" +Don't respect filter rules from files that are manually configured for the repository. +For example, this includes \fBgit\fP's \fB.git/info/exclude\fP. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_ignore_exclude = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_ignore_exclude() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_ignore_exclude); + + let args = parse_low_raw(["--no-ignore-exclude"]).unwrap(); + assert_eq!(true, args.no_ignore_exclude); + + let args = + parse_low_raw(["--no-ignore-exclude", "--ignore-exclude"]).unwrap(); + assert_eq!(false, args.no_ignore_exclude); +} + +/// --no-ignore-files +#[derive(Debug)] +struct NoIgnoreFiles; + +impl Flag for NoIgnoreFiles { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-ignore-files" + } + fn name_negated(&self) -> Option<&'static str> { + Some("ignore-files") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Don't use --ignore-file arguments." + } + fn doc_long(&self) -> &'static str { + r" +When set, any \flag{ignore-file} flags, even ones that come after this flag, +are ignored. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_ignore_files = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_ignore_files() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_ignore_files); + + let args = parse_low_raw(["--no-ignore-files"]).unwrap(); + assert_eq!(true, args.no_ignore_files); + + let args = parse_low_raw(["--no-ignore-files", "--ignore-files"]).unwrap(); + assert_eq!(false, args.no_ignore_files); +} + +/// --no-ignore-global +#[derive(Debug)] +struct NoIgnoreGlobal; + +impl Flag for NoIgnoreGlobal { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-ignore-global" + } + fn name_negated(&self) -> Option<&'static str> { + Some("ignore-global") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Don't use global ignore files." + } + fn doc_long(&self) -> &'static str { + r#" +Don't respect filter rules from ignore files that come from "global" sources +such as \fBgit\fP's \fBcore.excludesFile\fP configuration option (which +defaults to \fB$HOME/.config/git/ignore\fP). +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_ignore_global = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_ignore_global() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_ignore_global); + + let args = parse_low_raw(["--no-ignore-global"]).unwrap(); + assert_eq!(true, args.no_ignore_global); + + let args = + parse_low_raw(["--no-ignore-global", "--ignore-global"]).unwrap(); + assert_eq!(false, args.no_ignore_global); +} + +/// --no-ignore-messages +#[derive(Debug)] +struct NoIgnoreMessages; + +impl Flag for NoIgnoreMessages { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-ignore-messages" + } + fn name_negated(&self) -> Option<&'static str> { + Some("ignore-messages") + } + fn doc_category(&self) -> Category { + Category::Logging + } + fn doc_short(&self) -> &'static str { + r"Suppress gitignore parse error messages." + } + fn doc_long(&self) -> &'static str { + r" +When this flag is enabled, all error messages related to parsing ignore files +are suppressed. By default, error messages are printed to stderr. In cases +where these errors are expected, this flag can be used to avoid seeing the +noise produced by the messages. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_ignore_messages = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_ignore_messages() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_ignore_messages); + + let args = parse_low_raw(["--no-ignore-messages"]).unwrap(); + assert_eq!(true, args.no_ignore_messages); + + let args = + parse_low_raw(["--no-ignore-messages", "--ignore-messages"]).unwrap(); + assert_eq!(false, args.no_ignore_messages); +} + +/// --no-ignore-parent +#[derive(Debug)] +struct NoIgnoreParent; + +impl Flag for NoIgnoreParent { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-ignore-parent" + } + fn name_negated(&self) -> Option<&'static str> { + Some("ignore-parent") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Don't use ignore files in parent directories." + } + fn doc_long(&self) -> &'static str { + r" +When this flag is set, filter rules from ignore files found in parent +directories are not respected. By default, ripgrep will ascend the parent +directories of the current working directory to look for any applicable ignore +files that should be applied. In some cases this may not be desirable. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_ignore_parent = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_ignore_parent() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_ignore_parent); + + let args = parse_low_raw(["--no-ignore-parent"]).unwrap(); + assert_eq!(true, args.no_ignore_parent); + + let args = + parse_low_raw(["--no-ignore-parent", "--ignore-parent"]).unwrap(); + assert_eq!(false, args.no_ignore_parent); +} + +/// --no-ignore-vcs +#[derive(Debug)] +struct NoIgnoreVcs; + +impl Flag for NoIgnoreVcs { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-ignore-vcs" + } + fn name_negated(&self) -> Option<&'static str> { + Some("ignore-vcs") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Don't use ignore files from source control." + } + fn doc_long(&self) -> &'static str { + r" +When given, filter rules from source control ignore files (e.g., \fB.gitignore\fP) +are not respected. By default, ripgrep respects \fBgit\fP's ignore rules for +automatic filtering. In some cases, it may not be desirable to respect the +source control's ignore rules and instead only respect rules in \fB.ignore\fP +or \fB.rgignore\fP. +.sp +This flag implies \flag{no-ignore-parent} for source control ignore files as +well. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_ignore_vcs = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_ignore_vcs() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_ignore_vcs); + + let args = parse_low_raw(["--no-ignore-vcs"]).unwrap(); + assert_eq!(true, args.no_ignore_vcs); + + let args = parse_low_raw(["--no-ignore-vcs", "--ignore-vcs"]).unwrap(); + assert_eq!(false, args.no_ignore_vcs); +} + +/// --no-messages +#[derive(Debug)] +struct NoMessages; + +impl Flag for NoMessages { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-messages" + } + fn name_negated(&self) -> Option<&'static str> { + Some("messages") + } + fn doc_category(&self) -> Category { + Category::Logging + } + fn doc_short(&self) -> &'static str { + r"Suppress some error messages." + } + fn doc_long(&self) -> &'static str { + r" +This flag suppresses some error messages. Specifically, messages related to +the failed opening and reading of files. Error messages related to the syntax +of the pattern are still shown. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_messages = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_messages() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_messages); + + let args = parse_low_raw(["--no-messages"]).unwrap(); + assert_eq!(true, args.no_messages); + + let args = parse_low_raw(["--no-messages", "--messages"]).unwrap(); + assert_eq!(false, args.no_messages); +} + +/// --no-pcre2-unicode +#[derive(Debug)] +struct NoPcre2Unicode; + +impl Flag for NoPcre2Unicode { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-pcre2-unicode" + } + fn name_negated(&self) -> Option<&'static str> { + Some("pcre2-unicode") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"(DEPRECATED) Disable Unicode mode for PCRE2." + } + fn doc_long(&self) -> &'static str { + r" +DEPRECATED. Use \flag{no-unicode} instead. +.sp +Note that Unicode mode is enabled by default. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_unicode = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_pcre2_unicode() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_unicode); + + let args = parse_low_raw(["--no-pcre2-unicode"]).unwrap(); + assert_eq!(true, args.no_unicode); + + let args = + parse_low_raw(["--no-pcre2-unicode", "--pcre2-unicode"]).unwrap(); + assert_eq!(false, args.no_unicode); +} + +/// --no-require-git +#[derive(Debug)] +struct NoRequireGit; + +impl Flag for NoRequireGit { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-require-git" + } + fn name_negated(&self) -> Option<&'static str> { + Some("require-git") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Use .gitignore outside of git repositories." + } + fn doc_long(&self) -> &'static str { + r" +When this flag is given, source control ignore files such as \fB.gitignore\fP +are respected even if no \fBgit\fP repository is present. +.sp +By default, ripgrep will only respect filter rules from source control ignore +files when ripgrep detects that the search is executed inside a source control +repository. For example, when a \fB.git\fP directory is observed. +.sp +This flag relaxes the default restriction. For example, it might be useful when +the contents of a \fBgit\fP repository are stored or copied somewhere, but +where the repository state is absent. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_require_git = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_require_git() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_require_git); + + let args = parse_low_raw(["--no-require-git"]).unwrap(); + assert_eq!(true, args.no_require_git); + + let args = parse_low_raw(["--no-require-git", "--require-git"]).unwrap(); + assert_eq!(false, args.no_require_git); +} + +/// --no-unicode +#[derive(Debug)] +struct NoUnicode; + +impl Flag for NoUnicode { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "no-unicode" + } + fn name_negated(&self) -> Option<&'static str> { + Some("unicode") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Disable Unicode mode." + } + fn doc_long(&self) -> &'static str { + r#" +This flag disables Unicode mode for all patterns given to ripgrep. +.sp +By default, ripgrep will enable "Unicode mode" in all of its regexes. This has +a number of consequences: +.sp +.IP \(bu 3n +\fB.\fP will only match valid UTF-8 encoded Unicode scalar values. +.sp +.IP \(bu 3n +Classes like \fB\\w\fP, \fB\\s\fP, \fB\\d\fP are all Unicode aware and much +bigger than their ASCII only versions. +.sp +.IP \(bu 3n +Case insensitive matching will use Unicode case folding. +.sp +.IP \(bu 3n +A large array of classes like \fB\\p{Emoji}\fP are available. (Although the +specific set of classes available varies based on the regex engine. In general, +the default regex engine has more classes available to it.) +.sp +.IP \(bu 3n +Word boundaries (\fB\\b\fP and \fB\\B\fP) use the Unicode definition of a word +character. +.PP +In some cases it can be desirable to turn these things off. This flag will do +exactly that. For example, Unicode mode can sometimes have a negative impact +on performance, especially when things like \fB\\w\fP are used frequently +(including via bounded repetitions like \fB\\w{100}\fP) when only their ASCII +interpretation is needed. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.no_unicode = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_no_unicode() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_unicode); + + let args = parse_low_raw(["--no-unicode"]).unwrap(); + assert_eq!(true, args.no_unicode); + + let args = parse_low_raw(["--no-unicode", "--unicode"]).unwrap(); + assert_eq!(false, args.no_unicode); + + let args = parse_low_raw(["--no-unicode", "--pcre2-unicode"]).unwrap(); + assert_eq!(false, args.no_unicode); + + let args = parse_low_raw(["--no-pcre2-unicode", "--unicode"]).unwrap(); + assert_eq!(false, args.no_unicode); +} + +/// -0/--null +#[derive(Debug)] +struct Null; + +impl Flag for Null { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'0') + } + fn name_long(&self) -> &'static str { + "null" + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Print a NUL byte after file paths." + } + fn doc_long(&self) -> &'static str { + r" +Whenever a file path is printed, follow it with a \fBNUL\fP byte. This includes +printing file paths before matches, and when printing a list of matching files +such as with \flag{count}, \flag{files-with-matches} and \flag{files}. This +option is useful for use with \fBxargs\fP. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--null has no negation"); + args.null = true; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_null() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.null); + + let args = parse_low_raw(["--null"]).unwrap(); + assert_eq!(true, args.null); + + let args = parse_low_raw(["-0"]).unwrap(); + assert_eq!(true, args.null); +} + +/// --null-data +#[derive(Debug)] +struct NullData; + +impl Flag for NullData { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "null-data" + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Use NUL as a line terminator." + } + fn doc_long(&self) -> &'static str { + r" +Enabling this flag causes ripgrep to use \fBNUL\fP as a line terminator instead +of the default of \fP\\n\fP. +.sp +This is useful when searching large binary files that would otherwise have +very long lines if \fB\\n\fP were used as the line terminator. In particular, +ripgrep requires that, at a minimum, each line must fit into memory. Using +\fBNUL\fP instead can be a useful stopgap to keep memory requirements low and +avoid OOM (out of memory) conditions. +.sp +This is also useful for processing NUL delimited data, such as that emitted +when using ripgrep's \flag{null} flag or \fBfind\fP's \fB\-\-print0\fP flag. +.sp +Using this flag implies \flag{text}. It also overrides \flag{crlf}. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--null-data has no negation"); + args.crlf = false; + args.null_data = true; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_null_data() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.null_data); + + let args = parse_low_raw(["--null-data"]).unwrap(); + assert_eq!(true, args.null_data); + + let args = parse_low_raw(["--null-data", "--crlf"]).unwrap(); + assert_eq!(false, args.null_data); + assert_eq!(true, args.crlf); + + let args = parse_low_raw(["--crlf", "--null-data"]).unwrap(); + assert_eq!(true, args.null_data); + assert_eq!(false, args.crlf); + + let args = parse_low_raw(["--null-data", "--no-crlf"]).unwrap(); + assert_eq!(true, args.null_data); + assert_eq!(false, args.crlf); +} + +/// --one-file-system +#[derive(Debug)] +struct OneFileSystem; + +impl Flag for OneFileSystem { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "one-file-system" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-one-file-system") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Skip directories on other file systems." + } + fn doc_long(&self) -> &'static str { + r" +When enabled, ripgrep will not cross file system boundaries relative to where +the search started from. +.sp +Note that this applies to each path argument given to ripgrep. For example, in +the command +.sp +.EX + rg \-\-one\-file\-system /foo/bar /quux/baz +.EE +.sp +ripgrep will search both \fI/foo/bar\fP and \fI/quux/baz\fP even if they are +on different file systems, but will not cross a file system boundary when +traversing each path's directory tree. +.sp +This is similar to \fBfind\fP's \fB\-xdev\fP or \fB\-mount\fP flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.one_file_system = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_one_file_system() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.one_file_system); + + let args = parse_low_raw(["--one-file-system"]).unwrap(); + assert_eq!(true, args.one_file_system); + + let args = + parse_low_raw(["--one-file-system", "--no-one-file-system"]).unwrap(); + assert_eq!(false, args.one_file_system); +} + +/// -o/--only-matching +#[derive(Debug)] +struct OnlyMatching; + +impl Flag for OnlyMatching { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'o') + } + fn name_long(&self) -> &'static str { + "only-matching" + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Print only matched parts of a line." + } + fn doc_long(&self) -> &'static str { + r" +Print only the matched (non-empty) parts of a matching line, with each such +part on a separate output line. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--only-matching does not have a negation"); + args.only_matching = true; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_only_matching() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.only_matching); + + let args = parse_low_raw(["--only-matching"]).unwrap(); + assert_eq!(true, args.only_matching); + + let args = parse_low_raw(["-o"]).unwrap(); + assert_eq!(true, args.only_matching); +} + +/// --path-separator +#[derive(Debug)] +struct PathSeparator; + +impl Flag for PathSeparator { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "path-separator" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("SEPARATOR") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Set the path separator for printing paths." + } + fn doc_long(&self) -> &'static str { + r" +Set the path separator to use when printing file paths. This defaults to your +platform's path separator, which is \fB/\fP on Unix and \fB\\\fP on Windows. +This flag is intended for overriding the default when the environment demands +it (e.g., cygwin). A path separator is limited to a single byte. +.sp +Setting this flag to an empty string reverts it to its default behavior. That +is, the path separator is automatically chosen based on the environment. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let s = convert::string(v.unwrap_value())?; + let raw = Vec::unescape_bytes(&s); + args.path_separator = if raw.is_empty() { + None + } else if raw.len() == 1 { + Some(raw[0]) + } else { + anyhow::bail!( + "A path separator must be exactly one byte, but \ + the given separator is {len} bytes: {sep}\n\ + In some shells on Windows '/' is automatically \ + expanded. Use '//' instead.", + len = raw.len(), + sep = s, + ) + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_path_separator() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.path_separator); + + let args = parse_low_raw(["--path-separator", "/"]).unwrap(); + assert_eq!(Some(b'/'), args.path_separator); + + let args = parse_low_raw(["--path-separator", r"\"]).unwrap(); + assert_eq!(Some(b'\\'), args.path_separator); + + let args = parse_low_raw(["--path-separator", r"\x00"]).unwrap(); + assert_eq!(Some(0), args.path_separator); + + let args = parse_low_raw(["--path-separator", r"\0"]).unwrap(); + assert_eq!(Some(0), args.path_separator); + + let args = parse_low_raw(["--path-separator", "\x00"]).unwrap(); + assert_eq!(Some(0), args.path_separator); + + let args = parse_low_raw(["--path-separator", "\0"]).unwrap(); + assert_eq!(Some(0), args.path_separator); + + let args = + parse_low_raw(["--path-separator", r"\x00", "--path-separator=/"]) + .unwrap(); + assert_eq!(Some(b'/'), args.path_separator); + + let result = parse_low_raw(["--path-separator", "foo"]); + assert!(result.is_err(), "{result:?}"); + + let result = parse_low_raw(["--path-separator", r"\\x00"]); + assert!(result.is_err(), "{result:?}"); +} + +/// --passthru +#[derive(Debug)] +struct Passthru; + +impl Flag for Passthru { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "passthru" + } + fn aliases(&self) -> &'static [&'static str] { + &["passthrough"] + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Print both matching and non-matching lines." + } + fn doc_long(&self) -> &'static str { + r#" +Print both matching and non-matching lines. +.sp +Another way to achieve a similar effect is by modifying your pattern to match +the empty string. For example, if you are searching using \fBrg\fP \fIfoo\fP, +then using \fBrg\fP \fB'^|\fP\fIfoo\fP\fB'\fP instead will emit every line in +every file searched, but only occurrences of \fIfoo\fP will be highlighted. +This flag enables the same behavior without needing to modify the pattern. +.sp +An alternative spelling for this flag is \fB\-\-passthrough\fP. +.sp +This overrides the \flag{context}, \flag{after-context} and +\flag{before-context} flags. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--passthru has no negation"); + args.context = ContextMode::Passthru; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_passthru() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(ContextMode::default(), args.context); + + let args = parse_low_raw(["--passthru"]).unwrap(); + assert_eq!(ContextMode::Passthru, args.context); + + let args = parse_low_raw(["--passthrough"]).unwrap(); + assert_eq!(ContextMode::Passthru, args.context); +} + +/// -P/--pcre2 +#[derive(Debug)] +struct PCRE2; + +impl Flag for PCRE2 { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'P') + } + fn name_long(&self) -> &'static str { + "pcre2" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-pcre2") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Enable PCRE2 matching." + } + fn doc_long(&self) -> &'static str { + r" +When this flag is present, ripgrep will use the PCRE2 regex engine instead of +its default regex engine. +.sp +This is generally useful when you want to use features such as look-around +or backreferences. +.sp +Using this flag is the same as passing \fB\-\-engine=pcre2\fP. Users may +instead elect to use \fB\-\-engine=auto\fP to ask ripgrep to automatically +select the right regex engine based on the patterns given. This flag and the +\flag{engine} flag override one another. +.sp +Note that PCRE2 is an optional ripgrep feature. If PCRE2 wasn't included in +your build of ripgrep, then using this flag will result in ripgrep printing +an error message and exiting. PCRE2 may also have worse user experience in +some cases, since it has fewer introspection APIs than ripgrep's default +regex engine. For example, if you use a \fB\\n\fP in a PCRE2 regex without +the \flag{multiline} flag, then ripgrep will silently fail to match anything +instead of reporting an error immediately (like it does with the default regex +engine). +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.engine = if v.unwrap_switch() { + EngineChoice::PCRE2 + } else { + EngineChoice::Default + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_pcre2() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); + + let args = parse_low_raw(["--pcre2"]).unwrap(); + assert_eq!(EngineChoice::PCRE2, args.engine); + + let args = parse_low_raw(["-P"]).unwrap(); + assert_eq!(EngineChoice::PCRE2, args.engine); + + let args = parse_low_raw(["-P", "--no-pcre2"]).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); + + let args = parse_low_raw(["--engine=auto", "-P", "--no-pcre2"]).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); + + let args = parse_low_raw(["-P", "--engine=auto"]).unwrap(); + assert_eq!(EngineChoice::Auto, args.engine); +} + +/// --pcre2-version +#[derive(Debug)] +struct PCRE2Version; + +impl Flag for PCRE2Version { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "pcre2-version" + } + fn doc_category(&self) -> Category { + Category::OtherBehaviors + } + fn doc_short(&self) -> &'static str { + r"Print the version of PCRE2 that ripgrep uses." + } + fn doc_long(&self) -> &'static str { + r" +When this flag is present, ripgrep will print the version of PCRE2 in use, +along with other information, and then exit. If PCRE2 is not available, then +ripgrep will print an error message and exit with an error code. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--pcre2-version has no negation"); + args.special = Some(SpecialMode::VersionPCRE2); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_pcre2_version() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.special); + + let args = parse_low_raw(["--pcre2-version"]).unwrap(); + assert_eq!(Some(SpecialMode::VersionPCRE2), args.special); +} + +/// --pre +#[derive(Debug)] +struct Pre; + +impl Flag for Pre { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "pre" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-pre") + } + fn doc_variable(&self) -> Option<&'static str> { + Some("COMMAND") + } + fn doc_category(&self) -> Category { + Category::Input + } + fn doc_short(&self) -> &'static str { + r"Search output of COMMAND for each PATH." + } + fn doc_long(&self) -> &'static str { + r#" +For each input \fIPATH\fP, this flag causes ripgrep to search the standard +output of \fICOMMAND\fP \fIPATH\fP instead of the contents of \fIPATH\fP. +This option expects the \fICOMMAND\fP program to either be a path or to be +available in your \fBPATH\fP. Either an empty string \fICOMMAND\fP or the +\fB\-\-no\-pre\fP flag will disable this behavior. +.sp +.TP 12 +\fBWARNING\fP +When this flag is set, ripgrep will unconditionally spawn a process for every +file that is searched. Therefore, this can incur an unnecessarily large +performance penalty if you don't otherwise need the flexibility offered by this +flag. One possible mitigation to this is to use the \flag{pre-glob} flag to +limit which files a preprocessor is run with. +.PP +A preprocessor is not run when ripgrep is searching stdin. +.sp +When searching over sets of files that may require one of several +preprocessors, \fICOMMAND\fP should be a wrapper program which first classifies +\fIPATH\fP based on magic numbers/content or based on the \fIPATH\fP name and +then dispatches to an appropriate preprocessor. Each \fICOMMAND\fP also has its +standard input connected to \fIPATH\fP for convenience. +.sp +For example, a shell script for \fICOMMAND\fP might look like: +.sp +.EX + case "$1" in + *.pdf) + exec pdftotext "$1" - + ;; + *) + case $(file "$1") in + *Zstandard*) + exec pzstd -cdq + ;; + *) + exec cat + ;; + esac + ;; + esac +.EE +.sp +The above script uses \fBpdftotext\fP to convert a PDF file to plain text. For +all other files, the script uses the \fBfile\fP utility to sniff the type of +the file based on its contents. If it is a compressed file in the Zstandard +format, then \fBpzstd\fP is used to decompress the contents to stdout. +.sp +This overrides the \flag{search-zip} flag. +"# + } + fn completion_type(&self) -> CompletionType { + CompletionType::Executable + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let path = match v { + FlagValue::Value(v) => PathBuf::from(v), + FlagValue::Switch(yes) => { + assert!(!yes, "there is no affirmative switch for --pre"); + args.pre = None; + return Ok(()); + } + }; + args.pre = if path.as_os_str().is_empty() { None } else { Some(path) }; + if args.pre.is_some() { + args.search_zip = false; + } + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_pre() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.pre); + + let args = parse_low_raw(["--pre", "foo/bar"]).unwrap(); + assert_eq!(Some(PathBuf::from("foo/bar")), args.pre); + + let args = parse_low_raw(["--pre", ""]).unwrap(); + assert_eq!(None, args.pre); + + let args = parse_low_raw(["--pre", "foo/bar", "--pre", ""]).unwrap(); + assert_eq!(None, args.pre); + + let args = parse_low_raw(["--pre", "foo/bar", "--pre="]).unwrap(); + assert_eq!(None, args.pre); + + let args = parse_low_raw(["--pre", "foo/bar", "--no-pre"]).unwrap(); + assert_eq!(None, args.pre); +} + +/// --pre-glob +#[derive(Debug)] +struct PreGlob; + +impl Flag for PreGlob { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "pre-glob" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("GLOB") + } + fn doc_category(&self) -> Category { + Category::Input + } + fn doc_short(&self) -> &'static str { + r"Include or exclude files from a preprocessor." + } + fn doc_long(&self) -> &'static str { + r#" +This flag works in conjunction with the \flag{pre} flag. Namely, when one or +more \flag{pre-glob} flags are given, then only files that match the given set +of globs will be handed to the command specified by the \flag{pre} flag. Any +non-matching files will be searched without using the preprocessor command. +.sp +This flag is useful when searching many files with the \flag{pre} flag. +Namely, it provides the ability to avoid process overhead for files that +don't need preprocessing. For example, given the following shell script, +\fIpre-pdftotext\fP: +.sp +.EX + #!/bin/sh + pdftotext "$1" - +.EE +.sp +then it is possible to use \fB\-\-pre\fP \fIpre-pdftotext\fP \fB--pre-glob +'\fP\fI*.pdf\fP\fB'\fP to make it so ripgrep only executes the +\fIpre-pdftotext\fP command on files with a \fI.pdf\fP extension. +.sp +Multiple \flag{pre-glob} flags may be used. Globbing rules match +\fBgitignore\fP globs. Precede a glob with a \fB!\fP to exclude it. +.sp +This flag has no effect if the \flag{pre} flag is not used. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let glob = convert::string(v.unwrap_value())?; + args.pre_glob.push(glob); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_pre_glob() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.pre_glob); + + let args = parse_low_raw(["--pre-glob", "*.pdf"]).unwrap(); + assert_eq!(vec!["*.pdf".to_string()], args.pre_glob); + + let args = + parse_low_raw(["--pre-glob", "*.pdf", "--pre-glob=foo"]).unwrap(); + assert_eq!(vec!["*.pdf".to_string(), "foo".to_string()], args.pre_glob); +} + +/// -p/--pretty +#[derive(Debug)] +struct Pretty; + +impl Flag for Pretty { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'p') + } + fn name_long(&self) -> &'static str { + "pretty" + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Alias for colors, headings and line numbers." + } + fn doc_long(&self) -> &'static str { + r" +This is a convenience alias for \fB\-\-color=always \-\-heading +\-\-line\-number\fP. This flag is useful when you still want pretty output even +if you're piping ripgrep to another program or file. For example: \fBrg -p +\fP\fIfoo\fP \fB| less -R\fP. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--pretty has no negation"); + args.color = ColorChoice::Always; + args.heading = Some(true); + args.line_number = Some(true); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_pretty() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(ColorChoice::Auto, args.color); + assert_eq!(None, args.heading); + assert_eq!(None, args.line_number); + + let args = parse_low_raw(["--pretty"]).unwrap(); + assert_eq!(ColorChoice::Always, args.color); + assert_eq!(Some(true), args.heading); + assert_eq!(Some(true), args.line_number); + + let args = parse_low_raw(["-p"]).unwrap(); + assert_eq!(ColorChoice::Always, args.color); + assert_eq!(Some(true), args.heading); + assert_eq!(Some(true), args.line_number); +} + +/// -q/--quiet +#[derive(Debug)] +struct Quiet; + +impl Flag for Quiet { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'q') + } + fn name_long(&self) -> &'static str { + "quiet" + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Do not print anything to stdout." + } + fn doc_long(&self) -> &'static str { + r" +Do not print anything to stdout. If a match is found in a file, then ripgrep +will stop searching. This is useful when ripgrep is used only for its exit code +(which will be an error code if no matches are found). +.sp +When \flag{files} is used, ripgrep will stop finding files after finding the +first file that does not match any ignore rules. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--quiet has no negation"); + args.quiet = true; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_quiet() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.quiet); + + let args = parse_low_raw(["--quiet"]).unwrap(); + assert_eq!(true, args.quiet); + + let args = parse_low_raw(["-q"]).unwrap(); + assert_eq!(true, args.quiet); + + // flags like -l and --json cannot override -q, regardless of order + let args = parse_low_raw(["-q", "--json"]).unwrap(); + assert_eq!(true, args.quiet); + + let args = parse_low_raw(["-q", "--files-with-matches"]).unwrap(); + assert_eq!(true, args.quiet); + + let args = parse_low_raw(["-q", "--files-without-match"]).unwrap(); + assert_eq!(true, args.quiet); + + let args = parse_low_raw(["-q", "--count"]).unwrap(); + assert_eq!(true, args.quiet); + + let args = parse_low_raw(["-q", "--count-matches"]).unwrap(); + assert_eq!(true, args.quiet); +} + +/// --regex-size-limit +#[derive(Debug)] +struct RegexSizeLimit; + +impl Flag for RegexSizeLimit { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "regex-size-limit" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM+SUFFIX?") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"The size limit of the compiled regex." + } + fn doc_long(&self) -> &'static str { + r" +The size limit of the compiled regex, where the compiled regex generally +corresponds to a single object in memory that can match all of the patterns +provided to ripgrep. The default limit is generous enough that most reasonable +patterns (or even a small number of them) should fit. +.sp +This useful to change when you explicitly want to let ripgrep spend potentially +much more time and/or memory building a regex matcher. +.sp +The input format accepts suffixes of \fBK\fP, \fBM\fP or \fBG\fP which +correspond to kilobytes, megabytes and gigabytes, respectively. If no suffix is +provided the input is treated as bytes. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let v = v.unwrap_value(); + args.regex_size_limit = Some(convert::human_readable_usize(&v)?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_regex_size_limit() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.regex_size_limit); + + #[cfg(target_pointer_width = "64")] + { + let args = parse_low_raw(["--regex-size-limit", "9G"]).unwrap(); + assert_eq!(Some(9 * (1 << 30)), args.regex_size_limit); + + let args = parse_low_raw(["--regex-size-limit=9G"]).unwrap(); + assert_eq!(Some(9 * (1 << 30)), args.regex_size_limit); + + let args = + parse_low_raw(["--regex-size-limit=9G", "--regex-size-limit=0"]) + .unwrap(); + assert_eq!(Some(0), args.regex_size_limit); + } + + let args = parse_low_raw(["--regex-size-limit=0K"]).unwrap(); + assert_eq!(Some(0), args.regex_size_limit); + + let args = parse_low_raw(["--regex-size-limit=0M"]).unwrap(); + assert_eq!(Some(0), args.regex_size_limit); + + let args = parse_low_raw(["--regex-size-limit=0G"]).unwrap(); + assert_eq!(Some(0), args.regex_size_limit); + + let result = + parse_low_raw(["--regex-size-limit", "9999999999999999999999"]); + assert!(result.is_err(), "{result:?}"); + + let result = parse_low_raw(["--regex-size-limit", "9999999999999999G"]); + assert!(result.is_err(), "{result:?}"); +} + +/// -e/--regexp +#[derive(Debug)] +struct Regexp; + +impl Flag for Regexp { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'e') + } + fn name_long(&self) -> &'static str { + "regexp" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("PATTERN") + } + fn doc_category(&self) -> Category { + Category::Input + } + fn doc_short(&self) -> &'static str { + r"A pattern to search for." + } + fn doc_long(&self) -> &'static str { + r" +A pattern to search for. This option can be provided multiple times, where +all patterns given are searched, in addition to any patterns provided by +\flag{file}. Lines matching at least one of the provided patterns are printed. +This flag can also be used when searching for patterns that start with a dash. +.sp +For example, to search for the literal \fB\-foo\fP: +.sp +.EX + rg \-e \-foo +.EE +.sp +You can also use the special \fB\-\-\fP delimiter to indicate that no more +flags will be provided. Namely, the following is equivalent to the above: +.sp +.EX + rg \-\- \-foo +.EE +.sp +When \flag{file} or \flag{regexp} is used, then ripgrep treats all positional +arguments as files or directories to search. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let regexp = convert::string(v.unwrap_value())?; + args.patterns.push(PatternSource::Regexp(regexp)); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_regexp() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.patterns); + + let args = parse_low_raw(["--regexp", "foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("foo".to_string())], args.patterns); + + let args = parse_low_raw(["--regexp=foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("foo".to_string())], args.patterns); + + let args = parse_low_raw(["-e", "foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("foo".to_string())], args.patterns); + + let args = parse_low_raw(["-efoo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("foo".to_string())], args.patterns); + + let args = parse_low_raw(["--regexp", "-foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("-foo".to_string())], args.patterns); + + let args = parse_low_raw(["--regexp=-foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("-foo".to_string())], args.patterns); + + let args = parse_low_raw(["-e", "-foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("-foo".to_string())], args.patterns); + + let args = parse_low_raw(["-e-foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("-foo".to_string())], args.patterns); + + let args = parse_low_raw(["--regexp=foo", "--regexp", "bar"]).unwrap(); + assert_eq!( + vec![ + PatternSource::Regexp("foo".to_string()), + PatternSource::Regexp("bar".to_string()) + ], + args.patterns + ); + + // While we support invalid UTF-8 arguments in general, patterns must be + // valid UTF-8. + #[cfg(unix)] + { + use std::{ffi::OsStr, os::unix::ffi::OsStrExt}; + + let bytes = &[b'A', 0xFF, b'Z'][..]; + let result = parse_low_raw([ + OsStr::from_bytes(b"-e"), + OsStr::from_bytes(bytes), + ]); + assert!(result.is_err(), "{result:?}"); + } + + // Check that combining -e/--regexp and -f/--file works as expected. + let args = parse_low_raw(["-efoo", "-fbar"]).unwrap(); + assert_eq!( + vec![ + PatternSource::Regexp("foo".to_string()), + PatternSource::File(PathBuf::from("bar")) + ], + args.patterns + ); + + let args = parse_low_raw(["-efoo", "-fbar", "-equux"]).unwrap(); + assert_eq!( + vec![ + PatternSource::Regexp("foo".to_string()), + PatternSource::File(PathBuf::from("bar")), + PatternSource::Regexp("quux".to_string()), + ], + args.patterns + ); +} + +/// -r/--replace +#[derive(Debug)] +struct Replace; + +impl Flag for Replace { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'r') + } + fn name_long(&self) -> &'static str { + "replace" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("REPLACEMENT") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Replace matches with the given text." + } + fn doc_long(&self) -> &'static str { + r#" +Replaces every match with the text given when printing results. Neither this +flag nor any other ripgrep flag will modify your files. +.sp +Capture group indices (e.g., \fB$\fP\fI5\fP) and names (e.g., \fB$\fP\fIfoo\fP) +are supported in the replacement string. Capture group indices are numbered +based on the position of the opening parenthesis of the group, where the +leftmost such group is \fB$\fP\fI1\fP. The special \fB$\fP\fI0\fP group +corresponds to the entire match. +.sp +The name of a group is formed by taking the longest string of letters, numbers +and underscores (i.e. \fB[_0-9A-Za-z]\fP) after the \fB$\fP. For example, +\fB$\fP\fI1a\fP will be replaced with the group named \fI1a\fP, not the +group at index \fI1\fP. If the group's name contains characters that aren't +letters, numbers or underscores, or you want to immediately follow the group +with another string, the name should be put inside braces. For example, +\fB${\fP\fI1\fP\fB}\fP\fIa\fP will take the content of the group at index +\fI1\fP and append \fIa\fP to the end of it. +.sp +If an index or name does not refer to a valid capture group, it will be +replaced with an empty string. +.sp +In shells such as Bash and zsh, you should wrap the pattern in single quotes +instead of double quotes. Otherwise, capture group indices will be replaced by +expanded shell variables which will most likely be empty. +.sp +To write a literal \fB$\fP, use \fB$$\fP. +.sp +Note that the replacement by default replaces each match, and not the entire +line. To replace the entire line, you should match the entire line. +.sp +This flag can be used with the \flag{only-matching} flag. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.replace = Some(convert::string(v.unwrap_value())?.into()); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_replace() { + use bstr::BString; + + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.replace); + + let args = parse_low_raw(["--replace", "foo"]).unwrap(); + assert_eq!(Some(BString::from("foo")), args.replace); + + let args = parse_low_raw(["--replace", "-foo"]).unwrap(); + assert_eq!(Some(BString::from("-foo")), args.replace); + + let args = parse_low_raw(["-r", "foo"]).unwrap(); + assert_eq!(Some(BString::from("foo")), args.replace); + + let args = parse_low_raw(["-r", "foo", "-rbar"]).unwrap(); + assert_eq!(Some(BString::from("bar")), args.replace); + + let args = parse_low_raw(["-r", "foo", "-r", ""]).unwrap(); + assert_eq!(Some(BString::from("")), args.replace); +} + +/// -z/--search-zip +#[derive(Debug)] +struct SearchZip; + +impl Flag for SearchZip { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'z') + } + fn name_long(&self) -> &'static str { + "search-zip" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-search-zip") + } + fn doc_category(&self) -> Category { + Category::Input + } + fn doc_short(&self) -> &'static str { + r"Search in compressed files." + } + fn doc_long(&self) -> &'static str { + r" +This flag instructs ripgrep to search in compressed files. Currently gzip, +bzip2, xz, LZ4, LZMA, Brotli and Zstd files are supported. This option expects +the decompression binaries (such as \fBgzip\fP) to be available in your +\fBPATH\fP. If the required binaries are not found, then ripgrep will not +emit an error messages by default. Use the \flag{debug} flag to see more +information. +.sp +Note that this flag does not make ripgrep search archive formats as directory +trees. It only makes ripgrep detect compressed files and then decompress them +before searching their contents as it would any other file. +.sp +This overrides the \flag{pre} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.search_zip = if v.unwrap_switch() { + args.pre = None; + true + } else { + false + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_search_zip() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.search_zip); + + let args = parse_low_raw(["--search-zip"]).unwrap(); + assert_eq!(true, args.search_zip); + + let args = parse_low_raw(["-z"]).unwrap(); + assert_eq!(true, args.search_zip); + + let args = parse_low_raw(["-z", "--no-search-zip"]).unwrap(); + assert_eq!(false, args.search_zip); + + let args = parse_low_raw(["--pre=foo", "--no-search-zip"]).unwrap(); + assert_eq!(Some(PathBuf::from("foo")), args.pre); + assert_eq!(false, args.search_zip); + + let args = parse_low_raw(["--pre=foo", "--search-zip"]).unwrap(); + assert_eq!(None, args.pre); + assert_eq!(true, args.search_zip); + + let args = parse_low_raw(["--pre=foo", "-z", "--no-search-zip"]).unwrap(); + assert_eq!(None, args.pre); + assert_eq!(false, args.search_zip); +} + +/// -S/--smart-case +#[derive(Debug)] +struct SmartCase; + +impl Flag for SmartCase { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'S') + } + fn name_long(&self) -> &'static str { + "smart-case" + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Smart case search." + } + fn doc_long(&self) -> &'static str { + r" +This flag instructs ripgrep to searches case insensitively if the pattern is +all lowercase. Otherwise, ripgrep will search case sensitively. +.sp +A pattern is considered all lowercase if both of the following rules hold: +.sp +.IP \(bu 3n +First, the pattern contains at least one literal character. For example, +\fBa\\w\fP contains a literal (\fBa\fP) but just \fB\\w\fP does not. +.sp +.IP \(bu 3n +Second, of the literals in the pattern, none of them are considered to be +uppercase according to Unicode. For example, \fBfoo\\pL\fP has no uppercase +literals but \fBFoo\\pL\fP does. +.PP +This overrides the \flag{case-sensitive} and \flag{ignore-case} flags. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--smart-case flag has no negation"); + args.case = CaseMode::Smart; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_smart_case() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(CaseMode::Sensitive, args.case); + + let args = parse_low_raw(["--smart-case"]).unwrap(); + assert_eq!(CaseMode::Smart, args.case); + + let args = parse_low_raw(["-S"]).unwrap(); + assert_eq!(CaseMode::Smart, args.case); + + let args = parse_low_raw(["-S", "-s"]).unwrap(); + assert_eq!(CaseMode::Sensitive, args.case); + + let args = parse_low_raw(["-S", "-i"]).unwrap(); + assert_eq!(CaseMode::Insensitive, args.case); + + let args = parse_low_raw(["-s", "-S"]).unwrap(); + assert_eq!(CaseMode::Smart, args.case); + + let args = parse_low_raw(["-i", "-S"]).unwrap(); + assert_eq!(CaseMode::Smart, args.case); +} + +/// --sort-files +#[derive(Debug)] +struct SortFiles; + +impl Flag for SortFiles { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "sort-files" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-sort-files") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"(DEPRECATED) Sort results by file path." + } + fn doc_long(&self) -> &'static str { + r" +DEPRECATED. Use \fB\-\-sort=path\fP instead. +.sp +This flag instructs ripgrep to sort search results by file path +lexicographically in ascending order. Note that this currently disables all +parallelism and runs search in a single thread. +.sp +This flag overrides \flag{sort} and \flag{sortr}. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.sort = if v.unwrap_switch() { + Some(SortMode { reverse: false, kind: SortModeKind::Path }) + } else { + None + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_sort_files() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.sort); + + let args = parse_low_raw(["--sort-files"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: false, kind: SortModeKind::Path }), + args.sort + ); + + let args = parse_low_raw(["--sort-files", "--no-sort-files"]).unwrap(); + assert_eq!(None, args.sort); + + let args = parse_low_raw(["--sort", "created", "--sort-files"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: false, kind: SortModeKind::Path }), + args.sort + ); + + let args = parse_low_raw(["--sort-files", "--sort", "created"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: false, kind: SortModeKind::Created }), + args.sort + ); + + let args = parse_low_raw(["--sortr", "created", "--sort-files"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: false, kind: SortModeKind::Path }), + args.sort + ); + + let args = parse_low_raw(["--sort-files", "--sortr", "created"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: true, kind: SortModeKind::Created }), + args.sort + ); + + let args = parse_low_raw(["--sort=path", "--no-sort-files"]).unwrap(); + assert_eq!(None, args.sort); + + let args = parse_low_raw(["--sortr=path", "--no-sort-files"]).unwrap(); + assert_eq!(None, args.sort); +} + +/// --sort +#[derive(Debug)] +struct Sort; + +impl Flag for Sort { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "sort" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("SORTBY") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Sort results in ascending order." + } + fn doc_long(&self) -> &'static str { + r" +This flag enables sorting of results in ascending order. The possible values +for this flag are: +.sp +.TP 12 +\fBnone\fP +(Default) Do not sort results. Fastest. Can be multi-threaded. +.TP 12 +\fBpath\fP +Sort by file path. Always single-threaded. The order is determined by sorting +files in each directory entry during traversal. This means that given the files +\fBa/b\fP and \fBa+\fP, the latter will sort after the former even though +\fB+\fP would normally sort before \fB/\fP. +.TP 12 +\fBmodified\fP +Sort by the last modified time on a file. Always single-threaded. +.TP 12 +\fBaccessed\fP +Sort by the last accessed time on a file. Always single-threaded. +.TP 12 +\fBcreated\fP +Sort by the creation time on a file. Always single-threaded. +.PP +If the chosen (manually or by-default) sorting criteria isn't available on your +system (for example, creation time is not available on ext4 file systems), then +ripgrep will attempt to detect this, print an error and exit without searching. +.sp +To sort results in reverse or descending order, use the \flag{sortr} flag. +Also, this flag overrides \flag{sortr}. +.sp +Note that sorting results currently always forces ripgrep to abandon +parallelism and run in a single thread. +" + } + fn doc_choices(&self) -> &'static [&'static str] { + &["none", "path", "modified", "accessed", "created"] + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let kind = match convert::str(&v.unwrap_value())? { + "none" => { + args.sort = None; + return Ok(()); + } + "path" => SortModeKind::Path, + "modified" => SortModeKind::LastModified, + "accessed" => SortModeKind::LastAccessed, + "created" => SortModeKind::Created, + unk => anyhow::bail!("choice '{unk}' is unrecognized"), + }; + args.sort = Some(SortMode { reverse: false, kind }); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_sort() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.sort); + + let args = parse_low_raw(["--sort", "path"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: false, kind: SortModeKind::Path }), + args.sort + ); + + let args = parse_low_raw(["--sort", "path", "--sort=created"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: false, kind: SortModeKind::Created }), + args.sort + ); + + let args = parse_low_raw(["--sort=none"]).unwrap(); + assert_eq!(None, args.sort); + + let args = parse_low_raw(["--sort", "path", "--sort=none"]).unwrap(); + assert_eq!(None, args.sort); +} + +/// --sortr +#[derive(Debug)] +struct Sortr; + +impl Flag for Sortr { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "sortr" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("SORTBY") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Sort results in descending order." + } + fn doc_long(&self) -> &'static str { + r" +This flag enables sorting of results in descending order. The possible values +for this flag are: +.sp +.TP 12 +\fBnone\fP +(Default) Do not sort results. Fastest. Can be multi-threaded. +.TP 12 +\fBpath\fP +Sort by file path. Always single-threaded. The order is determined by sorting +files in each directory entry during traversal. This means that given the files +\fBa/b\fP and \fBa+\fP, the latter will sort before the former even though +\fB+\fP would normally sort after \fB/\fP when doing a reverse lexicographic +sort. +.TP 12 +\fBmodified\fP +Sort by the last modified time on a file. Always single-threaded. +.TP 12 +\fBaccessed\fP +Sort by the last accessed time on a file. Always single-threaded. +.TP 12 +\fBcreated\fP +Sort by the creation time on a file. Always single-threaded. +.PP +If the chosen (manually or by-default) sorting criteria isn't available on your +system (for example, creation time is not available on ext4 file systems), then +ripgrep will attempt to detect this, print an error and exit without searching. +.sp +To sort results in ascending order, use the \flag{sort} flag. Also, this flag +overrides \flag{sort}. +.sp +Note that sorting results currently always forces ripgrep to abandon +parallelism and run in a single thread. +" + } + fn doc_choices(&self) -> &'static [&'static str] { + &["none", "path", "modified", "accessed", "created"] + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let kind = match convert::str(&v.unwrap_value())? { + "none" => { + args.sort = None; + return Ok(()); + } + "path" => SortModeKind::Path, + "modified" => SortModeKind::LastModified, + "accessed" => SortModeKind::LastAccessed, + "created" => SortModeKind::Created, + unk => anyhow::bail!("choice '{unk}' is unrecognized"), + }; + args.sort = Some(SortMode { reverse: true, kind }); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_sortr() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.sort); + + let args = parse_low_raw(["--sortr", "path"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: true, kind: SortModeKind::Path }), + args.sort + ); + + let args = parse_low_raw(["--sortr", "path", "--sortr=created"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: true, kind: SortModeKind::Created }), + args.sort + ); + + let args = parse_low_raw(["--sortr=none"]).unwrap(); + assert_eq!(None, args.sort); + + let args = parse_low_raw(["--sortr", "path", "--sortr=none"]).unwrap(); + assert_eq!(None, args.sort); + + let args = parse_low_raw(["--sort=path", "--sortr=path"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: true, kind: SortModeKind::Path }), + args.sort + ); + + let args = parse_low_raw(["--sortr=path", "--sort=path"]).unwrap(); + assert_eq!( + Some(SortMode { reverse: false, kind: SortModeKind::Path }), + args.sort + ); +} + +/// --stats +#[derive(Debug)] +struct Stats; + +impl Flag for Stats { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "stats" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-stats") + } + fn doc_category(&self) -> Category { + Category::Logging + } + fn doc_short(&self) -> &'static str { + r"Print statistics about the search." + } + fn doc_long(&self) -> &'static str { + r" +When enabled, ripgrep will print aggregate statistics about the search. When +this flag is present, ripgrep will print at least the following stats to +stdout at the end of the search: number of matched lines, number of files with +matches, number of files searched, and the time taken for the entire search to +complete. +.sp +This set of aggregate statistics may expand over time. +.sp +This flag is always and implicitly enabled when \flag{json} is used. +.sp +Note that this flag has no effect if \flag{files}, \flag{files-with-matches} or +\flag{files-without-match} is passed. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.stats = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_stats() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.stats); + + let args = parse_low_raw(["--stats"]).unwrap(); + assert_eq!(true, args.stats); + + let args = parse_low_raw(["--stats", "--no-stats"]).unwrap(); + assert_eq!(false, args.stats); +} + +/// --stop-on-nonmatch +#[derive(Debug)] +struct StopOnNonmatch; + +impl Flag for StopOnNonmatch { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "stop-on-nonmatch" + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Stop searching after a non-match." + } + fn doc_long(&self) -> &'static str { + r" +Enabling this option will cause ripgrep to stop reading a file once it +encounters a non-matching line after it has encountered a matching line. +This is useful if it is expected that all matches in a given file will be on +sequential lines, for example due to the lines being sorted. +.sp +This overrides the \flag{multiline} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--stop-on-nonmatch has no negation"); + args.stop_on_nonmatch = true; + args.multiline = false; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_stop_on_nonmatch() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.stop_on_nonmatch); + + let args = parse_low_raw(["--stop-on-nonmatch"]).unwrap(); + assert_eq!(true, args.stop_on_nonmatch); + + let args = parse_low_raw(["--stop-on-nonmatch", "-U"]).unwrap(); + assert_eq!(true, args.multiline); + assert_eq!(false, args.stop_on_nonmatch); + + let args = parse_low_raw(["-U", "--stop-on-nonmatch"]).unwrap(); + assert_eq!(false, args.multiline); + assert_eq!(true, args.stop_on_nonmatch); + + let args = + parse_low_raw(["--stop-on-nonmatch", "--no-multiline"]).unwrap(); + assert_eq!(false, args.multiline); + assert_eq!(true, args.stop_on_nonmatch); +} + +/// -a/--text +#[derive(Debug)] +struct Text; + +impl Flag for Text { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'a') + } + fn name_long(&self) -> &'static str { + "text" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-text") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Search binary files as if they were text." + } + fn doc_long(&self) -> &'static str { + r" +This flag instructs ripgrep to search binary files as if they were text. When +this flag is present, ripgrep's binary file detection is disabled. This means +that when a binary file is searched, its contents may be printed if there is +a match. This may cause escape codes to be printed that alter the behavior of +your terminal. +.sp +When binary file detection is enabled, it is imperfect. In general, it uses +a simple heuristic. If a \fBNUL\fP byte is seen during search, then the file +is considered binary and searching stops (unless this flag is present). +Alternatively, if the \flag{binary} flag is used, then ripgrep will only quit +when it sees a \fBNUL\fP byte after it sees a match (or searches the entire +file). +.sp +This flag overrides the \flag{binary} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.binary = if v.unwrap_switch() { + BinaryMode::AsText + } else { + BinaryMode::Auto + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_text() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(BinaryMode::Auto, args.binary); + + let args = parse_low_raw(["--text"]).unwrap(); + assert_eq!(BinaryMode::AsText, args.binary); + + let args = parse_low_raw(["-a"]).unwrap(); + assert_eq!(BinaryMode::AsText, args.binary); + + let args = parse_low_raw(["-a", "--no-text"]).unwrap(); + assert_eq!(BinaryMode::Auto, args.binary); + + let args = parse_low_raw(["-a", "--binary"]).unwrap(); + assert_eq!(BinaryMode::SearchAndSuppress, args.binary); + + let args = parse_low_raw(["--binary", "-a"]).unwrap(); + assert_eq!(BinaryMode::AsText, args.binary); + + let args = parse_low_raw(["-a", "--no-binary"]).unwrap(); + assert_eq!(BinaryMode::Auto, args.binary); + + let args = parse_low_raw(["--binary", "--no-text"]).unwrap(); + assert_eq!(BinaryMode::Auto, args.binary); +} + +/// -j/--threads +#[derive(Debug)] +struct Threads; + +impl Flag for Threads { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'j') + } + fn name_long(&self) -> &'static str { + "threads" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM") + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Set the approximate number of threads to use." + } + fn doc_long(&self) -> &'static str { + r" +This flag sets the approximate number of threads to use. A value of \fB0\fP +(which is the default) causes ripgrep to choose the thread count using +heuristics. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let threads = convert::usize(&v.unwrap_value())?; + args.threads = if threads == 0 { None } else { Some(threads) }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_threads() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.threads); + + let args = parse_low_raw(["--threads", "5"]).unwrap(); + assert_eq!(Some(5), args.threads); + + let args = parse_low_raw(["-j", "5"]).unwrap(); + assert_eq!(Some(5), args.threads); + + let args = parse_low_raw(["-j5"]).unwrap(); + assert_eq!(Some(5), args.threads); + + let args = parse_low_raw(["-j5", "-j10"]).unwrap(); + assert_eq!(Some(10), args.threads); + + let args = parse_low_raw(["-j5", "-j0"]).unwrap(); + assert_eq!(None, args.threads); +} + +/// --trace +#[derive(Debug)] +struct Trace; + +impl Flag for Trace { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "trace" + } + fn doc_category(&self) -> Category { + Category::Logging + } + fn doc_short(&self) -> &'static str { + r"Show trace messages." + } + fn doc_long(&self) -> &'static str { + r" +Show trace messages. This shows even more detail than the \flag{debug} +flag. Generally, one should only use this if \flag{debug} doesn't emit the +information you're looking for. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--trace can only be enabled"); + args.logging = Some(LoggingMode::Trace); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_trace() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.logging); + + let args = parse_low_raw(["--trace"]).unwrap(); + assert_eq!(Some(LoggingMode::Trace), args.logging); + + let args = parse_low_raw(["--debug", "--trace"]).unwrap(); + assert_eq!(Some(LoggingMode::Trace), args.logging); +} + +/// --trim +#[derive(Debug)] +struct Trim; + +impl Flag for Trim { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "trim" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-trim") + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Trim prefix whitespace from matches." + } + fn doc_long(&self) -> &'static str { + r" +When set, all ASCII whitespace at the beginning of each line printed will be +removed. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.trim = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_trim() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.trim); + + let args = parse_low_raw(["--trim"]).unwrap(); + assert_eq!(true, args.trim); + + let args = parse_low_raw(["--trim", "--no-trim"]).unwrap(); + assert_eq!(false, args.trim); +} + +/// -t/--type +#[derive(Debug)] +struct Type; + +impl Flag for Type { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b't') + } + fn name_long(&self) -> &'static str { + "type" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("TYPE") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Only search files matching TYPE." + } + fn doc_long(&self) -> &'static str { + r#" +This flag limits ripgrep to searching files matching \fITYPE\fP. Multiple +\flag{type} flags may be provided. +.sp +This flag supports the special value \fBall\fP, which will behave as if +\flag{type} was provided for every file type supported by ripgrep (including +any custom file types). The end result is that \fB\-\-type=all\fP causes +ripgrep to search in "whitelist" mode, where it will only search files it +recognizes via its type definitions. +.sp +Note that this flag has lower precedence than both the \flag{glob} flag and +any rules found in ignore files. +.sp +To see the list of available file types, use the \flag{type-list} flag. +"# + } + fn completion_type(&self) -> CompletionType { + CompletionType::Filetype + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.type_changes.push(TypeChange::Select { + name: convert::string(v.unwrap_value())?, + }); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_type() { + let select = |name: &str| TypeChange::Select { name: name.to_string() }; + + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.type_changes); + + let args = parse_low_raw(["--type", "rust"]).unwrap(); + assert_eq!(vec![select("rust")], args.type_changes); + + let args = parse_low_raw(["-t", "rust"]).unwrap(); + assert_eq!(vec![select("rust")], args.type_changes); + + let args = parse_low_raw(["-trust"]).unwrap(); + assert_eq!(vec![select("rust")], args.type_changes); + + let args = parse_low_raw(["-trust", "-tpython"]).unwrap(); + assert_eq!(vec![select("rust"), select("python")], args.type_changes); + + let args = parse_low_raw(["-tabcdefxyz"]).unwrap(); + assert_eq!(vec![select("abcdefxyz")], args.type_changes); +} + +/// --type-add +#[derive(Debug)] +struct TypeAdd; + +impl Flag for TypeAdd { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "type-add" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("TYPESPEC") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Add a new glob for a file type." + } + fn doc_long(&self) -> &'static str { + r" +This flag adds a new glob for a particular file type. Only one glob can be +added at a time. Multiple \flag{type-add} flags can be provided. Unless +\flag{type-clear} is used, globs are added to any existing globs defined inside +of ripgrep. +.sp +Note that this must be passed to every invocation of ripgrep. Type settings are +not persisted. See \fBCONFIGURATION FILES\fP for a workaround. +.sp +Example: +.sp +.EX + rg \-\-type\-add 'foo:*.foo' -tfoo \fIPATTERN\fP +.EE +.sp +This flag can also be used to include rules from other types with the special +include directive. The include directive permits specifying one or more other +type names (separated by a comma) that have been defined and its rules will +automatically be imported into the type specified. For example, to create a +type called src that matches C++, Python and Markdown files, one can use: +.sp +.EX + \-\-type\-add 'src:include:cpp,py,md' +.EE +.sp +Additional glob rules can still be added to the src type by using this flag +again: +.sp +.EX + \-\-type\-add 'src:include:cpp,py,md' \-\-type\-add 'src:*.foo' +.EE +.sp +Note that type names must consist only of Unicode letters or numbers. +Punctuation characters are not allowed. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.type_changes + .push(TypeChange::Add { def: convert::string(v.unwrap_value())? }); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_type_add() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.type_changes); + + let args = parse_low_raw(["--type-add", "foo"]).unwrap(); + assert_eq!( + vec![TypeChange::Add { def: "foo".to_string() }], + args.type_changes + ); + + let args = parse_low_raw(["--type-add", "foo", "--type-add=bar"]).unwrap(); + assert_eq!( + vec![ + TypeChange::Add { def: "foo".to_string() }, + TypeChange::Add { def: "bar".to_string() } + ], + args.type_changes + ); +} + +/// --type-clear +#[derive(Debug)] +struct TypeClear; + +impl Flag for TypeClear { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "type-clear" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("TYPE") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Clear globs for a file type." + } + fn doc_long(&self) -> &'static str { + r" +Clear the file type globs previously defined for \fITYPE\fP. This clears any +previously defined globs for the \fITYPE\fP, but globs can be added after this +flag. +.sp +Note that this must be passed to every invocation of ripgrep. Type settings are +not persisted. See \fBCONFIGURATION FILES\fP for a workaround. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.type_changes.push(TypeChange::Clear { + name: convert::string(v.unwrap_value())?, + }); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_type_clear() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.type_changes); + + let args = parse_low_raw(["--type-clear", "foo"]).unwrap(); + assert_eq!( + vec![TypeChange::Clear { name: "foo".to_string() }], + args.type_changes + ); + + let args = + parse_low_raw(["--type-clear", "foo", "--type-clear=bar"]).unwrap(); + assert_eq!( + vec![ + TypeChange::Clear { name: "foo".to_string() }, + TypeChange::Clear { name: "bar".to_string() } + ], + args.type_changes + ); +} + +/// --type-not +#[derive(Debug)] +struct TypeNot; + +impl Flag for TypeNot { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'T') + } + fn name_long(&self) -> &'static str { + "type-not" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("TYPE") + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r"Do not search files matching TYPE." + } + fn doc_long(&self) -> &'static str { + r#" +Do not search files matching \fITYPE\fP. Multiple \flag{type-not} flags may be +provided. Use the \flag{type-list} flag to list all available types. +.sp +This flag supports the special value \fBall\fP, which will behave +as if \flag{type-not} was provided for every file type supported by +ripgrep (including any custom file types). The end result is that +\fB\-\-type\-not=all\fP causes ripgrep to search in "blacklist" mode, where it +will only search files that are unrecognized by its type definitions. +.sp +To see the list of available file types, use the \flag{type-list} flag. +"# + } + fn completion_type(&self) -> CompletionType { + CompletionType::Filetype + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + args.type_changes.push(TypeChange::Negate { + name: convert::string(v.unwrap_value())?, + }); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_type_not() { + let select = |name: &str| TypeChange::Select { name: name.to_string() }; + let negate = |name: &str| TypeChange::Negate { name: name.to_string() }; + + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.type_changes); + + let args = parse_low_raw(["--type-not", "rust"]).unwrap(); + assert_eq!(vec![negate("rust")], args.type_changes); + + let args = parse_low_raw(["-T", "rust"]).unwrap(); + assert_eq!(vec![negate("rust")], args.type_changes); + + let args = parse_low_raw(["-Trust"]).unwrap(); + assert_eq!(vec![negate("rust")], args.type_changes); + + let args = parse_low_raw(["-Trust", "-Tpython"]).unwrap(); + assert_eq!(vec![negate("rust"), negate("python")], args.type_changes); + + let args = parse_low_raw(["-Tabcdefxyz"]).unwrap(); + assert_eq!(vec![negate("abcdefxyz")], args.type_changes); + + let args = parse_low_raw(["-Trust", "-ttoml", "-Tjson"]).unwrap(); + assert_eq!( + vec![negate("rust"), select("toml"), negate("json")], + args.type_changes + ); +} + +/// --type-list +#[derive(Debug)] +struct TypeList; + +impl Flag for TypeList { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "type-list" + } + fn doc_category(&self) -> Category { + Category::OtherBehaviors + } + fn doc_short(&self) -> &'static str { + r"Show all supported file types." + } + fn doc_long(&self) -> &'static str { + r" +Show all supported file types and their corresponding globs. This takes any +\flag{type-add} and \flag{type-clear} flags given into account. Each type is +printed on its own line, followed by a \fB:\fP and then a comma-delimited list +of globs for that type on the same line. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--type-list has no negation"); + args.mode.update(Mode::Types); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_type_list() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(Mode::Search(SearchMode::Standard), args.mode); + + let args = parse_low_raw(["--type-list"]).unwrap(); + assert_eq!(Mode::Types, args.mode); +} + +/// -u/--unrestricted +#[derive(Debug)] +struct Unrestricted; + +impl Flag for Unrestricted { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'u') + } + fn name_long(&self) -> &'static str { + "unrestricted" + } + fn doc_category(&self) -> Category { + Category::Filter + } + fn doc_short(&self) -> &'static str { + r#"Reduce the level of "smart" filtering."# + } + fn doc_long(&self) -> &'static str { + r#" +This flag reduces the level of "smart" filtering. Repeated uses (up to 3) reduces +the filtering even more. When repeated three times, ripgrep will search every +file in a directory tree. +.sp +A single \flag{unrestricted} flag is equivalent to \flag{no-ignore}. Two +\flag{unrestricted} flags is equivalent to \flag{no-ignore} \flag{hidden}. +Three \flag{unrestricted} flags is equivalent to \flag{no-ignore} \flag{hidden} +\flag{binary}. +.sp +The only filtering ripgrep still does when \fB-uuu\fP is given is to skip +symbolic links and to avoid printing matches from binary files. Symbolic links +can be followed via the \flag{follow} flag, and binary files can be treated as +text files via the \flag{text} flag. +"# + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--unrestricted has no negation"); + args.unrestricted = args.unrestricted.saturating_add(1); + anyhow::ensure!( + args.unrestricted <= 3, + "flag can only be repeated up to 3 times" + ); + if args.unrestricted == 1 { + NoIgnore.update(FlagValue::Switch(true), args)?; + } else if args.unrestricted == 2 { + Hidden.update(FlagValue::Switch(true), args)?; + } else { + assert_eq!(args.unrestricted, 3); + Binary.update(FlagValue::Switch(true), args)?; + } + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_unrestricted() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.no_ignore_vcs); + assert_eq!(false, args.hidden); + assert_eq!(BinaryMode::Auto, args.binary); + + let args = parse_low_raw(["--unrestricted"]).unwrap(); + assert_eq!(true, args.no_ignore_vcs); + assert_eq!(false, args.hidden); + assert_eq!(BinaryMode::Auto, args.binary); + + let args = parse_low_raw(["--unrestricted", "-u"]).unwrap(); + assert_eq!(true, args.no_ignore_vcs); + assert_eq!(true, args.hidden); + assert_eq!(BinaryMode::Auto, args.binary); + + let args = parse_low_raw(["-uuu"]).unwrap(); + assert_eq!(true, args.no_ignore_vcs); + assert_eq!(true, args.hidden); + assert_eq!(BinaryMode::SearchAndSuppress, args.binary); + + let result = parse_low_raw(["-uuuu"]); + assert!(result.is_err(), "{result:?}"); +} + +/// --version +#[derive(Debug)] +struct Version; + +impl Flag for Version { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'V') + } + fn name_long(&self) -> &'static str { + "version" + } + fn doc_category(&self) -> Category { + Category::OtherBehaviors + } + fn doc_short(&self) -> &'static str { + r"Print ripgrep's version." + } + fn doc_long(&self) -> &'static str { + r" +This flag prints ripgrep's version. This also may print other relevant +information, such as the presence of target specific optimizations and the +\fBgit\fP revision that this build of ripgrep was compiled from. +" + } + + fn update(&self, v: FlagValue, _: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--version has no negation"); + // Since this flag has different semantics for -V and --version and the + // Flag trait doesn't support encoding this sort of thing, we handle it + // as a special case in the parser. + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_version() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.special); + + let args = parse_low_raw(["-V"]).unwrap(); + assert_eq!(Some(SpecialMode::VersionShort), args.special); + + let args = parse_low_raw(["--version"]).unwrap(); + assert_eq!(Some(SpecialMode::VersionLong), args.special); + + let args = parse_low_raw(["-V", "--version"]).unwrap(); + assert_eq!(Some(SpecialMode::VersionLong), args.special); + + let args = parse_low_raw(["--version", "-V"]).unwrap(); + assert_eq!(Some(SpecialMode::VersionShort), args.special); +} + +/// --vimgrep +#[derive(Debug)] +struct Vimgrep; + +impl Flag for Vimgrep { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "vimgrep" + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Print results in a vim compatible format." + } + fn doc_long(&self) -> &'static str { + r" +This flag instructs ripgrep to print results with every match on its own line, +including line numbers and column numbers. +.sp +With this option, a line with more than one match will be printed in its +entirety more than once. For that reason, the total amount of output as a +result of this flag can be quadratic in the size of the input. For example, +if the pattern matches every byte in an input file, then each line will be +repeated for every byte matched. For this reason, users should only use this +flag when there is no other choice. Editor integrations should prefer some +other way of reading results from ripgrep, such as via the \flag{json} flag. +One alternative to avoiding exorbitant memory usage is to force ripgrep into +single threaded mode with the \flag{threads} flag. Note though that this will +not impact the total size of the output, just the heap memory that ripgrep will +use. +" + } + fn doc_choices(&self) -> &'static [&'static str] { + &[] + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--vimgrep has no negation"); + args.vimgrep = true; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_vimgrep() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(false, args.vimgrep); + + let args = parse_low_raw(["--vimgrep"]).unwrap(); + assert_eq!(true, args.vimgrep); +} + +/// --with-filename +#[derive(Debug)] +struct WithFilename; + +impl Flag for WithFilename { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'H') + } + fn name_long(&self) -> &'static str { + "with-filename" + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Print the file path with each matching line." + } + fn doc_long(&self) -> &'static str { + r" +This flag instructs ripgrep to print the file path for each matching line. +This is the default when more than one file is searched. If \flag{heading} is +enabled (the default when printing to a tty), the file path will be shown above +clusters of matches from each file; otherwise, the file name will be shown as a +prefix for each matched line. +.sp +This flag overrides \flag{no-filename}. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--with-filename has no defined negation"); + args.with_filename = Some(true); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_with_filename() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.with_filename); + + let args = parse_low_raw(["--with-filename"]).unwrap(); + assert_eq!(Some(true), args.with_filename); + + let args = parse_low_raw(["-H"]).unwrap(); + assert_eq!(Some(true), args.with_filename); +} + +/// --no-filename +#[derive(Debug)] +struct WithFilenameNo; + +impl Flag for WithFilenameNo { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'I') + } + fn name_long(&self) -> &'static str { + "no-filename" + } + fn doc_category(&self) -> Category { + Category::Output + } + fn doc_short(&self) -> &'static str { + r"Never print the path with each matching line." + } + fn doc_long(&self) -> &'static str { + r" +This flag instructs ripgrep to never print the file path with each matching +line. This is the default when ripgrep is explicitly instructed to search one +file or stdin. +.sp +This flag overrides \flag{with-filename}. +" + } + fn doc_choices(&self) -> &'static [&'static str] { + &[] + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--no-filename has no defined negation"); + args.with_filename = Some(false); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_with_filename_no() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.with_filename); + + let args = parse_low_raw(["--no-filename"]).unwrap(); + assert_eq!(Some(false), args.with_filename); + + let args = parse_low_raw(["-I"]).unwrap(); + assert_eq!(Some(false), args.with_filename); + + let args = parse_low_raw(["-I", "-H"]).unwrap(); + assert_eq!(Some(true), args.with_filename); + + let args = parse_low_raw(["-H", "-I"]).unwrap(); + assert_eq!(Some(false), args.with_filename); +} + +/// -w/--word-regexp +#[derive(Debug)] +struct WordRegexp; + +impl Flag for WordRegexp { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'w') + } + fn name_long(&self) -> &'static str { + "word-regexp" + } + fn doc_category(&self) -> Category { + Category::Search + } + fn doc_short(&self) -> &'static str { + r"Show matches surrounded by word boundaries." + } + fn doc_long(&self) -> &'static str { + r" +When enabled, ripgrep will only show matches surrounded by word boundaries. +This is equivalent to surrounding every pattern with \fB\\b{start-half}\fP +and \fB\\b{end-half}\fP. +.sp +This overrides the \flag{line-regexp} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--word-regexp has no negation"); + args.boundary = Some(BoundaryMode::Word); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_word_regexp() { + let args = parse_low_raw(None::<&str>).unwrap(); + assert_eq!(None, args.boundary); + + let args = parse_low_raw(["--word-regexp"]).unwrap(); + assert_eq!(Some(BoundaryMode::Word), args.boundary); + + let args = parse_low_raw(["-w"]).unwrap(); + assert_eq!(Some(BoundaryMode::Word), args.boundary); + + let args = parse_low_raw(["-x", "-w"]).unwrap(); + assert_eq!(Some(BoundaryMode::Word), args.boundary); + + let args = parse_low_raw(["-w", "-x"]).unwrap(); + assert_eq!(Some(BoundaryMode::Line), args.boundary); +} + +mod convert { + use std::ffi::{OsStr, OsString}; + + use anyhow::Context; + + pub(super) fn str(v: &OsStr) -> anyhow::Result<&str> { + let Some(s) = v.to_str() else { + anyhow::bail!("value is not valid UTF-8") + }; + Ok(s) + } + + pub(super) fn string(v: OsString) -> anyhow::Result { + let Ok(s) = v.into_string() else { + anyhow::bail!("value is not valid UTF-8") + }; + Ok(s) + } + + pub(super) fn usize(v: &OsStr) -> anyhow::Result { + str(v)?.parse().context("value is not a valid number") + } + + pub(super) fn u64(v: &OsStr) -> anyhow::Result { + str(v)?.parse().context("value is not a valid number") + } + + pub(super) fn human_readable_u64(v: &OsStr) -> anyhow::Result { + grep::cli::parse_human_readable_size(str(v)?).context("invalid size") + } + + pub(super) fn human_readable_usize(v: &OsStr) -> anyhow::Result { + let size = human_readable_u64(v)?; + let Ok(size) = usize::try_from(size) else { + anyhow::bail!("size is too big") + }; + Ok(size) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn available_shorts() { + let mut total = vec![false; 128]; + for byte in 0..=0x7F { + match byte { + b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' => { + total[usize::from(byte)] = true + } + _ => continue, + } + } + + let mut taken = vec![false; 128]; + for flag in FLAGS.iter() { + let Some(short) = flag.name_short() else { continue }; + taken[usize::from(short)] = true; + } + + for byte in 0..=0x7F { + if total[usize::from(byte)] && !taken[usize::from(byte)] { + eprintln!("{}", char::from(byte)); + } + } + } + + #[test] + fn shorts_all_ascii_alphanumeric() { + for flag in FLAGS.iter() { + let Some(byte) = flag.name_short() else { continue }; + let long = flag.name_long(); + assert!( + byte.is_ascii_alphanumeric() || byte == b'.', + "\\x{byte:0X} is not a valid short flag for {long}", + ) + } + } + + #[test] + fn longs_all_ascii_alphanumeric() { + for flag in FLAGS.iter() { + let long = flag.name_long(); + let count = long.chars().count(); + assert!(count >= 2, "flag '{long}' is less than 2 characters"); + assert!( + long.chars().all(|c| c.is_ascii_alphanumeric() || c == '-'), + "flag '{long}' does not match ^[-0-9A-Za-z]+$", + ); + for alias in flag.aliases() { + let count = alias.chars().count(); + assert!( + count >= 2, + "flag '{long}' has alias '{alias}' that is \ + less than 2 characters", + ); + assert!( + alias + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-'), + "flag '{long}' has alias '{alias}' that does not \ + match ^[-0-9A-Za-z]+$", + ); + } + let Some(negated) = flag.name_negated() else { continue }; + let count = negated.chars().count(); + assert!( + count >= 2, + "flag '{long}' has negation '{negated}' that is \ + less than 2 characters", + ); + assert!( + negated.chars().all(|c| c.is_ascii_alphanumeric() || c == '-'), + "flag '{long}' has negation '{negated}' that \ + does not match ^[-0-9A-Za-z]+$", + ); + } + } + + #[test] + fn shorts_no_duplicates() { + let mut taken = vec![false; 128]; + for flag in FLAGS.iter() { + let Some(short) = flag.name_short() else { continue }; + let long = flag.name_long(); + assert!( + !taken[usize::from(short)], + "flag {long} has duplicate short flag {}", + char::from(short) + ); + taken[usize::from(short)] = true; + } + } + + #[test] + fn longs_no_duplicates() { + use std::collections::BTreeSet; + + let mut taken = BTreeSet::new(); + for flag in FLAGS.iter() { + let long = flag.name_long(); + assert!(taken.insert(long), "flag {long} has a duplicate name"); + for alias in flag.aliases() { + assert!( + taken.insert(alias), + "flag {long} has an alias {alias} that is duplicative" + ); + } + let Some(negated) = flag.name_negated() else { continue }; + assert!( + taken.insert(negated), + "negated flag {negated} has a duplicate name" + ); + } + } + + #[test] + fn non_switches_have_variable_names() { + for flag in FLAGS.iter() { + if flag.is_switch() { + continue; + } + let long = flag.name_long(); + assert!( + flag.doc_variable().is_some(), + "flag '{long}' should have a variable name" + ); + } + } + + #[test] + fn switches_have_no_choices() { + for flag in FLAGS.iter() { + if !flag.is_switch() { + continue; + } + let long = flag.name_long(); + let choices = flag.doc_choices(); + assert!( + choices.is_empty(), + "switch flag '{long}' \ + should not have any choices but has some: {choices:?}", + ); + } + } + + #[test] + fn choices_ascii_alphanumeric() { + for flag in FLAGS.iter() { + let long = flag.name_long(); + for choice in flag.doc_choices() { + assert!( + choice.chars().all(|c| c.is_ascii_alphanumeric() + || c == '-' + || c == ':'), + "choice '{choice}' for flag '{long}' does not match \ + ^[-:0-9A-Za-z]+$", + ) + } + } + } +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/help.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/help.rs new file mode 100644 index 000000000..353d1808b --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/help.rs @@ -0,0 +1,259 @@ +/*! +Provides routines for generating ripgrep's "short" and "long" help +documentation. + +The short version is used when the `-h` flag is given, while the long version +is used when the `--help` flag is given. +*/ + +use std::{collections::BTreeMap, fmt::Write}; + +use crate::flags::{defs::FLAGS, doc::version, Category, Flag}; + +const TEMPLATE_SHORT: &'static str = include_str!("template.short.help"); +const TEMPLATE_LONG: &'static str = include_str!("template.long.help"); + +/// Wraps `std::write!` and asserts there is no failure. +/// +/// We only write to `String` in this module. +macro_rules! write { + ($($tt:tt)*) => { std::write!($($tt)*).unwrap(); } +} + +/// Generate short documentation, i.e., for `-h`. +pub(crate) fn generate_short() -> String { + let mut cats: BTreeMap, Vec)> = + BTreeMap::new(); + let (mut maxcol1, mut maxcol2) = (0, 0); + for flag in FLAGS.iter().copied() { + let columns = + cats.entry(flag.doc_category()).or_insert((vec![], vec![])); + let (col1, col2) = generate_short_flag(flag); + maxcol1 = maxcol1.max(col1.len()); + maxcol2 = maxcol2.max(col2.len()); + columns.0.push(col1); + columns.1.push(col2); + } + let mut out = + TEMPLATE_SHORT.replace("!!VERSION!!", &version::generate_digits()); + for (cat, (col1, col2)) in cats.iter() { + let var = format!("!!{name}!!", name = cat.as_str()); + let val = format_short_columns(col1, col2, maxcol1, maxcol2); + out = out.replace(&var, &val); + } + out +} + +/// Generate short for a single flag. +/// +/// The first element corresponds to the flag name while the second element +/// corresponds to the documentation string. +fn generate_short_flag(flag: &dyn Flag) -> (String, String) { + let (mut col1, mut col2) = (String::new(), String::new()); + + // Some of the variable names are fine for longer form + // docs, but they make the succinct short help very noisy. + // So just shorten some of them. + let var = flag.doc_variable().map(|s| { + let mut s = s.to_string(); + s = s.replace("SEPARATOR", "SEP"); + s = s.replace("REPLACEMENT", "TEXT"); + s = s.replace("NUM+SUFFIX?", "NUM"); + s + }); + + // Generate the first column, the flag name. + if let Some(byte) = flag.name_short() { + let name = char::from(byte); + write!(col1, r"-{name}"); + write!(col1, r", "); + } + write!(col1, r"--{name}", name = flag.name_long()); + if let Some(var) = var.as_ref() { + write!(col1, r"={var}"); + } + + // And now the second column, with the description. + write!(col2, "{}", flag.doc_short()); + + (col1, col2) +} + +/// Write two columns of documentation. +/// +/// `maxcol1` should be the maximum length (in bytes) of the first column, +/// while `maxcol2` should be the maximum length (in bytes) of the second +/// column. +fn format_short_columns( + col1: &[String], + col2: &[String], + maxcol1: usize, + _maxcol2: usize, +) -> String { + assert_eq!(col1.len(), col2.len(), "columns must have equal length"); + const PAD: usize = 2; + let mut out = String::new(); + for (i, (c1, c2)) in col1.iter().zip(col2.iter()).enumerate() { + if i > 0 { + write!(out, "\n"); + } + + let pad = maxcol1 - c1.len() + PAD; + write!(out, " "); + write!(out, "{c1}"); + write!(out, "{}", " ".repeat(pad)); + write!(out, "{c2}"); + } + out +} + +/// Generate long documentation, i.e., for `--help`. +pub(crate) fn generate_long() -> String { + let mut cats = BTreeMap::new(); + for flag in FLAGS.iter().copied() { + let mut cat = cats.entry(flag.doc_category()).or_insert(String::new()); + if !cat.is_empty() { + write!(cat, "\n\n"); + } + generate_long_flag(flag, &mut cat); + } + + let mut out = + TEMPLATE_LONG.replace("!!VERSION!!", &version::generate_digits()); + for (cat, value) in cats.iter() { + let var = format!("!!{name}!!", name = cat.as_str()); + out = out.replace(&var, value); + } + out +} + +/// Write generated documentation for `flag` to `out`. +fn generate_long_flag(flag: &dyn Flag, out: &mut String) { + if let Some(byte) = flag.name_short() { + let name = char::from(byte); + write!(out, r" -{name}"); + if let Some(var) = flag.doc_variable() { + write!(out, r" {var}"); + } + write!(out, r", "); + } else { + write!(out, r" "); + } + + let name = flag.name_long(); + write!(out, r"--{name}"); + if let Some(var) = flag.doc_variable() { + write!(out, r"={var}"); + } + write!(out, "\n"); + + let doc = flag.doc_long().trim(); + let doc = super::render_custom_markup(doc, "flag", |name, out| { + let Some(flag) = crate::flags::parse::lookup(name) else { + unreachable!(r"found unrecognized \flag{{{name}}} in --help docs") + }; + if let Some(name) = flag.name_short() { + write!(out, r"-{}/", char::from(name)); + } + write!(out, r"--{}", flag.name_long()); + }); + let doc = super::render_custom_markup(&doc, "flag-negate", |name, out| { + let Some(flag) = crate::flags::parse::lookup(name) else { + unreachable!( + r"found unrecognized \flag-negate{{{name}}} in --help docs" + ) + }; + let Some(name) = flag.name_negated() else { + let long = flag.name_long(); + unreachable!( + "found \\flag-negate{{{long}}} in --help docs but \ + {long} does not have a negation" + ); + }; + write!(out, r"--{name}"); + }); + + let mut cleaned = remove_roff(&doc); + if let Some(negated) = flag.name_negated() { + // Flags that can be negated that aren't switches, like + // --context-separator, are somewhat weird. Because of that, the docs + // for those flags should discuss the semantics of negation explicitly. + // But for switches, the behavior is always the same. + if flag.is_switch() { + write!(cleaned, "\n\nThis flag can be disabled with --{negated}."); + } + } + let indent = " ".repeat(8); + let wrapopts = textwrap::Options::new(71) + // Normally I'd be fine with breaking at hyphens, but ripgrep's docs + // includes a lot of flag names, and they in turn contain hyphens. + // Breaking flag names across lines is not great. + .word_splitter(textwrap::WordSplitter::NoHyphenation); + for (i, paragraph) in cleaned.split("\n\n").enumerate() { + if i > 0 { + write!(out, "\n\n"); + } + let mut new = paragraph.to_string(); + if paragraph.lines().all(|line| line.starts_with(" ")) { + // Re-indent but don't refill so as to preserve line breaks + // in code/shell example snippets. + new = textwrap::indent(&new, &indent); + } else { + new = new.replace("\n", " "); + new = textwrap::refill(&new, &wrapopts); + new = textwrap::indent(&new, &indent); + } + write!(out, "{}", new.trim_end()); + } +} + +/// Removes roff syntax from `v` such that the result is approximately plain +/// text readable. +/// +/// This is basically a mish mash of heuristics based on the specific roff used +/// in the docs for the flags in this tool. If new kinds of roff are used in +/// the docs, then this may need to be updated to handle them. +fn remove_roff(v: &str) -> String { + let mut lines = vec![]; + for line in v.trim().lines() { + assert!(!line.is_empty(), "roff should have no empty lines"); + if line.starts_with(".") { + if line.starts_with(".IP ") { + let item_label = line + .split(" ") + .nth(1) + .expect("first argument to .IP") + .replace(r"\(bu", r"•") + .replace(r"\fB", "") + .replace(r"\fP", ":"); + lines.push(format!("{item_label}")); + } else if line.starts_with(".IB ") || line.starts_with(".BI ") { + let pieces = line + .split_whitespace() + .skip(1) + .collect::>() + .concat(); + lines.push(format!("{pieces}")); + } else if line.starts_with(".sp") + || line.starts_with(".PP") + || line.starts_with(".TP") + { + lines.push("".to_string()); + } + } else if line.starts_with(r"\fB") && line.ends_with(r"\fP") { + let line = line.replace(r"\fB", "").replace(r"\fP", ""); + lines.push(format!("{line}:")); + } else { + lines.push(line.to_string()); + } + } + // Squash multiple adjacent paragraph breaks into one. + lines.dedup_by(|l1, l2| l1.is_empty() && l2.is_empty()); + lines + .join("\n") + .replace(r"\fB", "") + .replace(r"\fI", "") + .replace(r"\fP", "") + .replace(r"\-", "-") + .replace(r"\\", r"\") +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/man.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/man.rs new file mode 100644 index 000000000..e0ed13bae --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/man.rs @@ -0,0 +1,110 @@ +/*! +Provides routines for generating ripgrep's man page in `roff` format. +*/ + +use std::{collections::BTreeMap, fmt::Write}; + +use crate::flags::{defs::FLAGS, doc::version, Flag}; + +const TEMPLATE: &'static str = include_str!("template.rg.1"); + +/// Wraps `std::write!` and asserts there is no failure. +/// +/// We only write to `String` in this module. +macro_rules! write { + ($($tt:tt)*) => { std::write!($($tt)*).unwrap(); } +} + +/// Wraps `std::writeln!` and asserts there is no failure. +/// +/// We only write to `String` in this module. +macro_rules! writeln { + ($($tt:tt)*) => { std::writeln!($($tt)*).unwrap(); } +} + +/// Returns a `roff` formatted string corresponding to ripgrep's entire man +/// page. +pub(crate) fn generate() -> String { + let mut cats = BTreeMap::new(); + for flag in FLAGS.iter().copied() { + let mut cat = cats.entry(flag.doc_category()).or_insert(String::new()); + if !cat.is_empty() { + writeln!(cat, ".sp"); + } + generate_flag(flag, &mut cat); + } + + let mut out = TEMPLATE.replace("!!VERSION!!", &version::generate_digits()); + for (cat, value) in cats.iter() { + let var = format!("!!{name}!!", name = cat.as_str()); + out = out.replace(&var, value); + } + out +} + +/// Writes `roff` formatted documentation for `flag` to `out`. +fn generate_flag(flag: &'static dyn Flag, out: &mut String) { + if let Some(byte) = flag.name_short() { + let name = char::from(byte); + write!(out, r"\fB\-{name}\fP"); + if let Some(var) = flag.doc_variable() { + write!(out, r" \fI{var}\fP"); + } + write!(out, r", "); + } + + let name = flag.name_long(); + write!(out, r"\fB\-\-{name}\fP"); + if let Some(var) = flag.doc_variable() { + write!(out, r"=\fI{var}\fP"); + } + write!(out, "\n"); + + writeln!(out, ".RS 4"); + let doc = flag.doc_long().trim(); + // Convert \flag{foo} into something nicer. + let doc = super::render_custom_markup(doc, "flag", |name, out| { + let Some(flag) = crate::flags::parse::lookup(name) else { + unreachable!(r"found unrecognized \flag{{{name}}} in roff docs") + }; + out.push_str(r"\fB"); + if let Some(name) = flag.name_short() { + write!(out, r"\-{}/", char::from(name)); + } + write!(out, r"\-\-{}", flag.name_long()); + out.push_str(r"\fP"); + }); + // Convert \flag-negate{foo} into something nicer. + let doc = super::render_custom_markup(&doc, "flag-negate", |name, out| { + let Some(flag) = crate::flags::parse::lookup(name) else { + unreachable!( + r"found unrecognized \flag-negate{{{name}}} in roff docs" + ) + }; + let Some(name) = flag.name_negated() else { + let long = flag.name_long(); + unreachable!( + "found \\flag-negate{{{long}}} in roff docs but \ + {long} does not have a negation" + ); + }; + out.push_str(r"\fB"); + write!(out, r"\-\-{name}"); + out.push_str(r"\fP"); + }); + writeln!(out, "{doc}"); + if let Some(negated) = flag.name_negated() { + // Flags that can be negated that aren't switches, like + // --context-separator, are somewhat weird. Because of that, the docs + // for those flags should discuss the semantics of negation explicitly. + // But for switches, the behavior is always the same. + if flag.is_switch() { + writeln!(out, ".sp"); + writeln!( + out, + r"This flag can be disabled with \fB\-\-{negated}\fP." + ); + } + } + writeln!(out, ".RE"); +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/mod.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/mod.rs new file mode 100644 index 000000000..c52a024f7 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/mod.rs @@ -0,0 +1,38 @@ +/*! +Modules for generating documentation for ripgrep's flags. +*/ + +pub(crate) mod help; +pub(crate) mod man; +pub(crate) mod version; + +/// Searches for `\tag{...}` occurrences in `doc` and calls `replacement` for +/// each such tag found. +/// +/// The first argument given to `replacement` is the tag value, `...`. The +/// second argument is the buffer that accumulates the full replacement text. +/// +/// Since this function is only intended to be used on doc strings written into +/// the program source code, callers should panic in `replacement` if there are +/// any errors or unexpected circumstances. +fn render_custom_markup( + mut doc: &str, + tag: &str, + mut replacement: impl FnMut(&str, &mut String), +) -> String { + let mut out = String::with_capacity(doc.len()); + let tag_prefix = format!(r"\{tag}{{"); + while let Some(offset) = doc.find(&tag_prefix) { + out.push_str(&doc[..offset]); + + let start = offset + tag_prefix.len(); + let Some(end) = doc[start..].find('}').map(|i| start + i) else { + unreachable!(r"found {tag_prefix} without closing }}"); + }; + let name = &doc[start..end]; + replacement(name, &mut out); + doc = &doc[end + 1..]; + } + out.push_str(doc); + out +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/template.long.help b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/template.long.help new file mode 100644 index 000000000..83c20c1af --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/template.long.help @@ -0,0 +1,61 @@ +ripgrep !!VERSION!! +Andrew Gallant + +ripgrep (rg) recursively searches the current directory for lines matching +a regex pattern. By default, ripgrep will respect gitignore rules and +automatically skip hidden files/directories and binary files. + +Use -h for short descriptions and --help for more details. + +Project home page: https://github.com/BurntSushi/ripgrep + +USAGE: + rg [OPTIONS] PATTERN [PATH ...] + rg [OPTIONS] -e PATTERN ... [PATH ...] + rg [OPTIONS] -f PATTERNFILE ... [PATH ...] + rg [OPTIONS] --files [PATH ...] + rg [OPTIONS] --type-list + command | rg [OPTIONS] PATTERN + rg [OPTIONS] --help + rg [OPTIONS] --version + +POSITIONAL ARGUMENTS: + + A regular expression used for searching. To match a pattern beginning + with a dash, use the -e/--regexp flag. + + For example, to search for the literal '-foo', you can use this flag: + + rg -e -foo + + You can also use the special '--' delimiter to indicate that no more + flags will be provided. Namely, the following is equivalent to the + above: + + rg -- -foo + + ... + A file or directory to search. Directories are searched recursively. + File paths specified on the command line override glob and ignore + rules. + +INPUT OPTIONS: +!!input!! + +SEARCH OPTIONS: +!!search!! + +FILTER OPTIONS: +!!filter!! + +OUTPUT OPTIONS: +!!output!! + +OUTPUT MODES: +!!output-modes!! + +LOGGING OPTIONS: +!!logging!! + +OTHER BEHAVIORS: +!!other-behaviors!! diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/template.rg.1 b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/template.rg.1 new file mode 100644 index 000000000..dbbddd7dd --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/template.rg.1 @@ -0,0 +1,424 @@ +.TH RG 1 2024-09-08 "!!VERSION!!" "User Commands" +. +. +.SH NAME +rg \- recursively search the current directory for lines matching a pattern +. +. +.SH SYNOPSIS +.\" I considered using GNU troff's .SY and .YS "synopsis" macros here, but it +.\" looks like they aren't portable. Specifically, they don't appear to be in +.\" BSD's mdoc used on macOS. +.sp +\fBrg\fP [\fIOPTIONS\fP] \fIPATTERN\fP [\fIPATH\fP...] +.sp +\fBrg\fP [\fIOPTIONS\fP] \fB\-e\fP \fIPATTERN\fP... [\fIPATH\fP...] +.sp +\fBrg\fP [\fIOPTIONS\fP] \fB\-f\fP \fIPATTERNFILE\fP... [\fIPATH\fP...] +.sp +\fBrg\fP [\fIOPTIONS\fP] \fB\-\-files\fP [\fIPATH\fP...] +.sp +\fBrg\fP [\fIOPTIONS\fP] \fB\-\-type\-list\fP +.sp +\fIcommand\fP | \fBrg\fP [\fIOPTIONS\fP] \fIPATTERN\fP +.sp +\fBrg\fP [\fIOPTIONS\fP] \fB\-\-help\fP +.sp +\fBrg\fP [\fIOPTIONS\fP] \fB\-\-version\fP +. +. +.SH DESCRIPTION +ripgrep (rg) recursively searches the current directory for a regex pattern. +By default, ripgrep will respect your \fB.gitignore\fP and automatically skip +hidden files/directories and binary files. +.sp +ripgrep's default regex engine uses finite automata and guarantees linear +time searching. Because of this, features like backreferences and arbitrary +look-around are not supported. However, if ripgrep is built with PCRE2, +then the \fB\-P/\-\-pcre2\fP flag can be used to enable backreferences and +look-around. +.sp +ripgrep supports configuration files. Set \fBRIPGREP_CONFIG_PATH\fP to a +configuration file. The file can specify one shell argument per line. Lines +starting with \fB#\fP are ignored. For more details, see \fBCONFIGURATION +FILES\fP below. +.sp +ripgrep will automatically detect if stdin exists and search stdin for a regex +pattern, e.g. \fBls | rg foo\fP. In some environments, stdin may exist when +it shouldn't. To turn off stdin detection, one can explicitly specify the +directory to search, e.g. \fBrg foo ./\fP. +.sp +Like other tools such as \fBls\fP, ripgrep will alter its output depending on +whether stdout is connected to a tty. By default, when printing a tty, ripgrep +will enable colors, line numbers and a heading format that lists each matching +file path once instead of once per matching line. +.sp +Tip: to disable all smart filtering and make ripgrep behave a bit more like +classical grep, use \fBrg -uuu\fP. +. +. +.SH REGEX SYNTAX +ripgrep uses Rust's regex engine by default, which documents its syntax: +\fIhttps://docs.rs/regex/1.*/regex/#syntax\fP +.sp +ripgrep uses byte-oriented regexes, which has some additional documentation: +\fIhttps://docs.rs/regex/1.*/regex/bytes/index.html#syntax\fP +.sp +To a first approximation, ripgrep uses Perl-like regexes without look-around or +backreferences. This makes them very similar to the "extended" (ERE) regular +expressions supported by *egrep*, but with a few additional features like +Unicode character classes. +.sp +If you're using ripgrep with the \fB\-P/\-\-pcre2\fP flag, then please consult +\fIhttps://www.pcre.org\fP or the PCRE2 man pages for documentation on the +supported syntax. +. +. +.SH POSITIONAL ARGUMENTS +.TP 12 +\fIPATTERN\fP +A regular expression used for searching. To match a pattern beginning with a +dash, use the \fB\-e/\-\-regexp\fP option. +.TP 12 +\fIPATH\fP +A file or directory to search. Directories are searched recursively. File paths +specified explicitly on the command line override glob and ignore rules. +. +. +.SH OPTIONS +This section documents all flags that ripgrep accepts. Flags are grouped into +categories below according to their function. +.sp +Note that many options can be turned on and off. In some cases, those flags are +not listed explicitly below. For example, the \fB\-\-column\fP flag (listed +below) enables column numbers in ripgrep's output, but the \fB\-\-no\-column\fP +flag (not listed below) disables them. The reverse can also exist. For example, +the \fB\-\-no\-ignore\fP flag (listed below) disables ripgrep's \fBgitignore\fP +logic, but the \fB\-\-ignore\fP flag (not listed below) enables it. These +flags are useful for overriding a ripgrep configuration file (or alias) on the +command line. Each flag's documentation notes whether an inverted flag exists. +In all cases, the flag specified last takes precedence. +. +.SS INPUT OPTIONS +!!input!! +. +.SS SEARCH OPTIONS +!!search!! +. +.SS FILTER OPTIONS +!!filter!! +. +.SS OUTPUT OPTIONS +!!output!! +. +.SS OUTPUT MODES +!!output-modes!! +. +.SS LOGGING OPTIONS +!!logging!! +. +.SS OTHER BEHAVIORS +!!other-behaviors!! +. +. +.SH EXIT STATUS +If ripgrep finds a match, then the exit status of the program is \fB0\fP. +If no match could be found, then the exit status is \fB1\fP. If an error +occurred, then the exit status is always \fB2\fP unless ripgrep was run with +the \fB\-q/\-\-quiet\fP flag and a match was found. In summary: +.sp +.IP \(bu 3n +\fB0\fP exit status occurs only when at least one match was found, and if +no error occurred, unless \fB\-q/\-\-quiet\fP was given. +. +.IP \(bu 3n +\fB1\fP exit status occurs only when no match was found and no error occurred. +. +.IP \(bu 3n +\fB2\fP exit status occurs when an error occurred. This is true for both +catastrophic errors (e.g., a regex syntax error) and for soft errors (e.g., +unable to read a file). +. +. +.SH AUTOMATIC FILTERING +ripgrep does a fair bit of automatic filtering by default. This section +describes that filtering and how to control it. +.sp +\fBTIP\fP: To disable automatic filtering, use \fBrg -uuu\fP. +.sp +ripgrep's automatic "smart" filtering is one of the most apparent +differentiating features between ripgrep and other tools like \fBgrep\fP. As +such, its behavior may be surprising to users that aren't expecting it. +.sp +ripgrep does four types of filtering automatically: +.sp +. +.IP 1. 3n +Files and directories that match ignore rules are not searched. +.IP 2. 3n +Hidden files and directories are not searched. +.IP 3. 3n +Binary files (files with a \fBNUL\fP byte) are not searched. +.IP 4. 3n +Symbolic links are not followed. +.PP +The first type of filtering is the most sophisticated. ripgrep will attempt to +respect your \fBgitignore\fP rules as faithfully as possible. In particular, +this includes the following: +. +.IP \(bu 3n +Any global rules, e.g., in \fB$HOME/.config/git/ignore\fP. +. +.IP \(bu 3n +Any rules in relevant \fB.gitignore\fP files. This includes \fB.gitignore\fP +files in parent directories that are part of the same \fBgit\fP repository. +(Unless \fB\-\-no\-require\-git\fP is given.) +. +.IP \(bu 3n +Any local rules, e.g., in \fB.git/info/exclude\fP. +.PP +In some cases, ripgrep and \fBgit\fP will not always be in sync in terms +of which files are ignored. For example, a file that is ignored via +\fB.gitignore\fP but is tracked by \fBgit\fP would not be searched by ripgrep +even though \fBgit\fP tracks it. This is unlikely to ever be fixed. Instead, +you should either make sure your exclude rules match the files you track +precisely, or otherwise use \fBgit grep\fP for search. +.sp +Additional ignore rules can be provided outside of a \fBgit\fP context: +. +.IP \(bu 3n +Any rules in \fB.ignore\fP. ripgrep will also respect \fB.ignore\fP files in +parent directories. +. +.IP \(bu 3n +Any rules in \fB.rgignore\fP. ripgrep will also respect \fB.rgignore\fP files +in parent directories. +. +.IP \(bu 3n +Any rules in files specified with the \fB\-\-ignore\-file\fP flag. +.PP +The precedence of ignore rules is as follows, with later items overriding +earlier items: +. +.IP \(bu 3n +Files given by \fB\-\-ignore\-file\fP. +. +.IP \(bu 3n +Global gitignore rules, e.g., from \fB$HOME/.config/git/ignore\fP. +. +.IP \(bu 3n +Local rules from \fB.git/info/exclude\fP. +. +.IP \(bu 3n +Rules from \fB.gitignore\fP. +. +.IP \(bu 3n +Rules from \fB.ignore\fP. +. +.IP \(bu 3n +Rules from \fB.rgignore\fP. +.PP +So for example, if \fIfoo\fP were in a \fB.gitignore\fP and \fB!\fP\fIfoo\fP +were in an \fB.rgignore\fP, then \fIfoo\fP would not be ignored since +\fB.rgignore\fP takes precedence over \fB.gitignore\fP. +.sp +Each of the types of filtering can be configured via command line flags: +. +.IP \(bu 3n +There are several flags starting with \fB\-\-no\-ignore\fP that toggle which, +if any, ignore rules are respected. \fB\-\-no\-ignore\fP by itself will disable +all +of them. +. +.IP \(bu 3n +\fB\-./\-\-hidden\fP will force ripgrep to search hidden files and directories. +. +.IP \(bu 3n +\fB\-\-binary\fP will force ripgrep to search binary files. +. +.IP \(bu 3n +\fB\-L/\-\-follow\fP will force ripgrep to follow symlinks. +.PP +As a special short hand, the \fB\-u\fP flag can be specified up to three times. +Each additional time incrementally decreases filtering: +. +.IP \(bu 3n +\fB\-u\fP is equivalent to \fB\-\-no\-ignore\fP. +. +.IP \(bu 3n +\fB\-uu\fP is equivalent to \fB\-\-no\-ignore \-\-hidden\fP. +. +.IP \(bu 3n +\fB\-uuu\fP is equivalent to \fB\-\-no\-ignore \-\-hidden \-\-binary\fP. +.PP +In particular, \fBrg -uuu\fP should search the same exact content as \fBgrep +-r\fP. +. +. +.SH CONFIGURATION FILES +ripgrep supports reading configuration files that change ripgrep's default +behavior. The format of the configuration file is an "rc" style and is very +simple. It is defined by two rules: +. +.IP 1. 3n +Every line is a shell argument, after trimming whitespace. +. +.IP 2. 3n +Lines starting with \fB#\fP (optionally preceded by any amount of whitespace) +are ignored. +.PP +ripgrep will look for a single configuration file if and only if the +\fBRIPGREP_CONFIG_PATH\fP environment variable is set and is non-empty. +ripgrep will parse arguments from this file on startup and will behave as if +the arguments in this file were prepended to any explicit arguments given to +ripgrep on the command line. Note though that the \fBrg\fP command you run +must still be valid. That is, it must always contain at least one pattern at +the command line, even if the configuration file uses the \fB\-e/\-\-regexp\fP +flag. +.sp +For example, if your ripgreprc file contained a single line: +.sp +.EX + \-\-smart\-case +.EE +.sp +then the following command +.sp +.EX + RIPGREP_CONFIG_PATH=wherever/.ripgreprc rg foo +.EE +.sp +would behave identically to the following command: +.sp +.EX + rg \-\-smart-case foo +.EE +.sp +Another example is adding types, like so: +.sp +.EX + \-\-type-add + web:*.{html,css,js}* +.EE +.sp +The above would behave identically to the following command: +.sp +.EX + rg \-\-type\-add 'web:*.{html,css,js}*' foo +.EE +.sp +The same applies to using globs. This: +.sp +.EX + \-\-glob=!.git +.EE +.sp +or this: +.sp +.EX + \-\-glob + !.git +.EE +.sp +would behave identically to the following command: +.sp +.EX + rg \-\-glob '!.git' foo +.EE +.sp +The bottom line is that every shell argument needs to be on its own line. So +for example, a config file containing +.sp +.EX + \-j 4 +.EE +.sp +is probably not doing what you intend. Instead, you want +.sp +.EX + \-j + 4 +.EE +.sp +or +.sp +.EX + \-j4 +.EE +.sp +ripgrep also provides a flag, \fB\-\-no\-config\fP, that when present will +suppress any and all support for configuration. This includes any future +support for auto-loading configuration files from pre-determined paths. +.sp +Conflicts between configuration files and explicit arguments are handled +exactly like conflicts in the same command line invocation. That is, assuming +your config file contains only \fB\-\-smart\-case\fP, then this command: +.sp +.EX + RIPGREP_CONFIG_PATH=wherever/.ripgreprc rg foo \-\-case\-sensitive +.EE +.sp +is exactly equivalent to +.sp +.EX + rg \-\-smart\-case foo \-\-case\-sensitive +.EE +.sp +in which case, the \fB\-\-case\-sensitive\fP flag would override the +\fB\-\-smart\-case\fP flag. +. +. +.SH SHELL COMPLETION +Shell completion files are included in the release tarball for Bash, Fish, Zsh +and PowerShell. +.sp +For \fBbash\fP, move \fBrg.bash\fP to \fB$XDG_CONFIG_HOME/bash_completion\fP or +\fB/etc/bash_completion.d/\fP. +.sp +For \fBfish\fP, move \fBrg.fish\fP to \fB$HOME/.config/fish/completions\fP. +.sp +For \fBzsh\fP, move \fB_rg\fP to one of your \fB$fpath\fP directories. +. +. +.SH CAVEATS +ripgrep may abort unexpectedly when using default settings if it searches a +file that is simultaneously truncated. This behavior can be avoided by passing +the \fB\-\-no\-mmap\fP flag which will forcefully disable the use of memory +maps in all cases. +.sp +ripgrep may use a large amount of memory depending on a few factors. Firstly, +if ripgrep uses parallelism for search (the default), then the entire +output for each individual file is buffered into memory in order to prevent +interleaving matches in the output. To avoid this, you can disable parallelism +with the \fB\-j1\fP flag. Secondly, ripgrep always needs to have at least a +single line in memory in order to execute a search. A file with a very long +line can thus cause ripgrep to use a lot of memory. Generally, this only occurs +when searching binary data with the \fB\-a/\-\-text\fP flag enabled. (When the +\fB\-a/\-\-text\fP flag isn't enabled, ripgrep will replace all NUL bytes with +line terminators, which typically prevents exorbitant memory usage.) Thirdly, +when ripgrep searches a large file using a memory map, the process will likely +report its resident memory usage as the size of the file. However, this does +not mean ripgrep actually needed to use that much heap memory; the operating +system will generally handle this for you. +. +. +.SH VERSION +!!VERSION!! +. +. +.SH HOMEPAGE +\fIhttps://github.com/BurntSushi/ripgrep\fP +.sp +Please report bugs and feature requests to the issue tracker. Please do your +best to provide a reproducible test case for bugs. This should include the +corpus being searched, the \fBrg\fP command, the actual output and the expected +output. Please also include the output of running the same \fBrg\fP command but +with the \fB\-\-debug\fP flag. +.sp +If you have questions that don't obviously fall into the "bug" or "feature +request" category, then they are welcome in the Discussions section of the +issue tracker: \fIhttps://github.com/BurntSushi/ripgrep/discussions\fP. +. +. +.SH AUTHORS +Andrew Gallant <\fIjamslam@gmail.com\fP> diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/template.short.help b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/template.short.help new file mode 100644 index 000000000..0a285668b --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/template.short.help @@ -0,0 +1,38 @@ +ripgrep !!VERSION!! +Andrew Gallant + +ripgrep (rg) recursively searches the current directory for lines matching +a regex pattern. By default, ripgrep will respect gitignore rules and +automatically skip hidden files/directories and binary files. + +Use -h for short descriptions and --help for more details. + +Project home page: https://github.com/BurntSushi/ripgrep + +USAGE: + rg [OPTIONS] PATTERN [PATH ...] + +POSITIONAL ARGUMENTS: + A regular expression used for searching. + ... A file or directory to search. + +INPUT OPTIONS: +!!input!! + +SEARCH OPTIONS: +!!search!! + +FILTER OPTIONS: +!!filter!! + +OUTPUT OPTIONS: +!!output!! + +OUTPUT MODES: +!!output-modes!! + +LOGGING OPTIONS: +!!logging!! + +OTHER BEHAVIORS: +!!other-behaviors!! diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/version.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/version.rs new file mode 100644 index 000000000..d238ba0cd --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/doc/version.rs @@ -0,0 +1,177 @@ +/*! +Provides routines for generating version strings. + +Version strings can be just the digits, an overall short one-line description +or something more verbose that includes things like CPU target feature support. +*/ + +use std::fmt::Write; + +/// Generates just the numerical part of the version of ripgrep. +/// +/// This includes the git revision hash. +pub(crate) fn generate_digits() -> String { + let semver = option_env!("CARGO_PKG_VERSION").unwrap_or("N/A"); + match option_env!("RIPGREP_BUILD_GIT_HASH") { + None => semver.to_string(), + Some(hash) => format!("{semver} (rev {hash})"), + } +} + +/// Generates a short version string of the form `ripgrep x.y.z`. +pub(crate) fn generate_short() -> String { + let digits = generate_digits(); + format!("ripgrep {digits}") +} + +/// Generates a longer multi-line version string. +/// +/// This includes not only the version of ripgrep but some other information +/// about its build. For example, SIMD support and PCRE2 support. +pub(crate) fn generate_long() -> String { + let (compile, runtime) = (compile_cpu_features(), runtime_cpu_features()); + + let mut out = String::new(); + writeln!(out, "{}", generate_short()).unwrap(); + writeln!(out).unwrap(); + writeln!(out, "features:{}", features().join(",")).unwrap(); + if !compile.is_empty() { + writeln!(out, "simd(compile):{}", compile.join(",")).unwrap(); + } + if !runtime.is_empty() { + writeln!(out, "simd(runtime):{}", runtime.join(",")).unwrap(); + } + let (pcre2_version, _) = generate_pcre2(); + writeln!(out, "\n{pcre2_version}").unwrap(); + out +} + +/// Generates multi-line version string with PCRE2 information. +/// +/// This also returns whether PCRE2 is actually available in this build of +/// ripgrep. +pub(crate) fn generate_pcre2() -> (String, bool) { + let mut out = String::new(); + + #[cfg(feature = "pcre2")] + { + use grep::pcre2; + + let (major, minor) = pcre2::version(); + write!(out, "PCRE2 {}.{} is available", major, minor).unwrap(); + if cfg!(target_pointer_width = "64") && pcre2::is_jit_available() { + writeln!(out, " (JIT is available)").unwrap(); + } else { + writeln!(out, " (JIT is unavailable)").unwrap(); + } + (out, true) + } + + #[cfg(not(feature = "pcre2"))] + { + writeln!(out, "PCRE2 is not available in this build of ripgrep.") + .unwrap(); + (out, false) + } +} + +/// Returns the relevant SIMD features supported by the CPU at runtime. +/// +/// This is kind of a dirty violation of abstraction, since it assumes +/// knowledge about what specific SIMD features are being used by various +/// components. +fn runtime_cpu_features() -> Vec { + #[cfg(target_arch = "x86_64")] + { + let mut features = vec![]; + + let sse2 = is_x86_feature_detected!("sse2"); + features.push(format!("{sign}SSE2", sign = sign(sse2))); + + let ssse3 = is_x86_feature_detected!("ssse3"); + features.push(format!("{sign}SSSE3", sign = sign(ssse3))); + + let avx2 = is_x86_feature_detected!("avx2"); + features.push(format!("{sign}AVX2", sign = sign(avx2))); + + features + } + #[cfg(target_arch = "aarch64")] + { + let mut features = vec![]; + + // memchr and aho-corasick only use NEON when it is available at + // compile time. This isn't strictly necessary, but NEON is supposed + // to be available for all aarch64 targets. If this isn't true, please + // file an issue at https://github.com/BurntSushi/memchr. + let neon = cfg!(target_feature = "neon"); + features.push(format!("{sign}NEON", sign = sign(neon))); + + features + } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + vec![] + } +} + +/// Returns the SIMD features supported while compiling ripgrep. +/// +/// In essence, any features listed here are required to run ripgrep correctly. +/// +/// This is kind of a dirty violation of abstraction, since it assumes +/// knowledge about what specific SIMD features are being used by various +/// components. +/// +/// An easy way to enable everything available on your current CPU is to +/// compile ripgrep with `RUSTFLAGS="-C target-cpu=native"`. But note that +/// the binary produced by this will not be portable. +fn compile_cpu_features() -> Vec { + #[cfg(target_arch = "x86_64")] + { + let mut features = vec![]; + + let sse2 = cfg!(target_feature = "sse2"); + features.push(format!("{sign}SSE2", sign = sign(sse2))); + + let ssse3 = cfg!(target_feature = "ssse3"); + features.push(format!("{sign}SSSE3", sign = sign(ssse3))); + + let avx2 = cfg!(target_feature = "avx2"); + features.push(format!("{sign}AVX2", sign = sign(avx2))); + + features + } + #[cfg(target_arch = "aarch64")] + { + let mut features = vec![]; + + let neon = cfg!(target_feature = "neon"); + features.push(format!("{sign}NEON", sign = sign(neon))); + + features + } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + vec![] + } +} + +/// Returns a list of "features" supported (or not) by this build of ripgrpe. +fn features() -> Vec { + let mut features = vec![]; + + let pcre2 = cfg!(feature = "pcre2"); + features.push(format!("{sign}pcre2", sign = sign(pcre2))); + + features +} + +/// Returns `+` when `enabled` is `true` and `-` otherwise. +fn sign(enabled: bool) -> &'static str { + if enabled { + "+" + } else { + "-" + } +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/hiargs.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/hiargs.rs new file mode 100644 index 000000000..df09dceda --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/hiargs.rs @@ -0,0 +1,1471 @@ +/*! +Provides the definition of high level arguments from CLI flags. +*/ + +use std::{ + collections::HashSet, + path::{Path, PathBuf}, +}; + +use { + bstr::BString, + grep::printer::{ColorSpecs, SummaryKind}, +}; + +use crate::{ + flags::lowargs::{ + BinaryMode, BoundaryMode, BufferMode, CaseMode, ColorChoice, + ContextMode, ContextSeparator, EncodingMode, EngineChoice, + FieldContextSeparator, FieldMatchSeparator, LowArgs, MmapMode, Mode, + PatternSource, SearchMode, SortMode, SortModeKind, TypeChange, + }, + haystack::{Haystack, HaystackBuilder}, + search::{PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder}, +}; + +/// A high level representation of CLI arguments. +/// +/// The distinction between low and high level arguments is somewhat arbitrary +/// and wishy washy. The main idea here is that high level arguments generally +/// require all of CLI parsing to be finished. For example, one cannot +/// construct a glob matcher until all of the glob patterns are known. +/// +/// So while low level arguments are collected during parsing itself, high +/// level arguments aren't created until parsing has completely finished. +#[derive(Debug)] +pub(crate) struct HiArgs { + binary: BinaryDetection, + boundary: Option, + buffer: BufferMode, + byte_offset: bool, + case: CaseMode, + color: ColorChoice, + colors: grep::printer::ColorSpecs, + column: bool, + context: ContextMode, + context_separator: ContextSeparator, + crlf: bool, + dfa_size_limit: Option, + encoding: EncodingMode, + engine: EngineChoice, + field_context_separator: FieldContextSeparator, + field_match_separator: FieldMatchSeparator, + file_separator: Option>, + fixed_strings: bool, + follow: bool, + globs: ignore::overrides::Override, + heading: bool, + hidden: bool, + hyperlink_config: grep::printer::HyperlinkConfig, + ignore_file_case_insensitive: bool, + ignore_file: Vec, + include_zero: bool, + invert_match: bool, + is_terminal_stdout: bool, + line_number: bool, + max_columns: Option, + max_columns_preview: bool, + max_count: Option, + max_depth: Option, + max_filesize: Option, + mmap_choice: grep::searcher::MmapChoice, + mode: Mode, + multiline: bool, + multiline_dotall: bool, + no_ignore_dot: bool, + no_ignore_exclude: bool, + no_ignore_files: bool, + no_ignore_global: bool, + no_ignore_parent: bool, + no_ignore_vcs: bool, + no_require_git: bool, + no_unicode: bool, + null_data: bool, + one_file_system: bool, + only_matching: bool, + path_separator: Option, + paths: Paths, + path_terminator: Option, + patterns: Patterns, + pre: Option, + pre_globs: ignore::overrides::Override, + quiet: bool, + quit_after_match: bool, + regex_size_limit: Option, + replace: Option, + search_zip: bool, + sort: Option, + stats: Option, + stop_on_nonmatch: bool, + threads: usize, + trim: bool, + types: ignore::types::Types, + vimgrep: bool, + with_filename: bool, +} + +impl HiArgs { + /// Convert low level arguments into high level arguments. + /// + /// This process can fail for a variety of reasons. For example, invalid + /// globs or some kind of environment issue. + pub(crate) fn from_low_args(mut low: LowArgs) -> anyhow::Result { + // Callers should not be trying to convert low-level arguments when + // a short-circuiting special mode is present. + assert_eq!(None, low.special, "special mode demands short-circuiting"); + // If the sorting mode isn't supported, then we bail loudly. I'm not + // sure if this is the right thing to do. We could silently "not sort" + // as well. If we wanted to go that route, then we could just set + // `low.sort = None` if `supported()` returns an error. + if let Some(ref sort) = low.sort { + sort.supported()?; + } + + // We modify the mode in-place on `low` so that subsequent conversions + // see the correct mode. + match low.mode { + Mode::Search(ref mut mode) => match *mode { + // treat `-v --count-matches` as `-v --count` + SearchMode::CountMatches if low.invert_match => { + *mode = SearchMode::Count; + } + // treat `-o --count` as `--count-matches` + SearchMode::Count if low.only_matching => { + *mode = SearchMode::CountMatches; + } + _ => {} + }, + _ => {} + } + + let mut state = State::new()?; + let patterns = Patterns::from_low_args(&mut state, &mut low)?; + let paths = Paths::from_low_args(&mut state, &patterns, &mut low)?; + + let binary = BinaryDetection::from_low_args(&state, &low); + let colors = take_color_specs(&mut state, &mut low); + let hyperlink_config = take_hyperlink_config(&mut state, &mut low)?; + let stats = stats(&low); + let types = types(&low)?; + let globs = globs(&state, &low)?; + let pre_globs = preprocessor_globs(&state, &low)?; + + let color = match low.color { + ColorChoice::Auto if !state.is_terminal_stdout => { + ColorChoice::Never + } + _ => low.color, + }; + let column = low.column.unwrap_or(low.vimgrep); + let heading = match low.heading { + None => !low.vimgrep && state.is_terminal_stdout, + Some(false) => false, + Some(true) => !low.vimgrep, + }; + let path_terminator = if low.null { Some(b'\x00') } else { None }; + let quit_after_match = stats.is_none() && low.quiet; + let threads = if low.sort.is_some() || paths.is_one_file { + 1 + } else if let Some(threads) = low.threads { + threads + } else { + std::thread::available_parallelism().map_or(1, |n| n.get()).min(12) + }; + log::debug!("using {threads} thread(s)"); + let with_filename = low + .with_filename + .unwrap_or_else(|| low.vimgrep || !paths.is_one_file); + + let file_separator = match low.mode { + Mode::Search(SearchMode::Standard) => { + if heading { + Some(b"".to_vec()) + } else if let ContextMode::Limited(ref limited) = low.context { + let (before, after) = limited.get(); + if before > 0 || after > 0 { + low.context_separator.clone().into_bytes() + } else { + None + } + } else { + None + } + } + _ => None, + }; + + let line_number = low.line_number.unwrap_or_else(|| { + if low.quiet { + return false; + } + let Mode::Search(ref search_mode) = low.mode else { return false }; + match *search_mode { + SearchMode::FilesWithMatches + | SearchMode::FilesWithoutMatch + | SearchMode::Count + | SearchMode::CountMatches => return false, + SearchMode::JSON => return true, + SearchMode::Standard => { + // A few things can imply counting line numbers. In + // particular, we generally want to show line numbers by + // default when printing to a tty for human consumption, + // except for one interesting case: when we're only + // searching stdin. This makes pipelines work as expected. + (state.is_terminal_stdout && !paths.is_only_stdin()) + || column + || low.vimgrep + } + } + }); + + let mmap_choice = { + // SAFETY: Memory maps are difficult to impossible to encapsulate + // safely in a portable way that doesn't simultaneously negate some + // of the benfits of using memory maps. For ripgrep's use, we never + // mutate a memory map and generally never store the contents of + // memory map in a data structure that depends on immutability. + // Generally speaking, the worst thing that can happen is a SIGBUS + // (if the underlying file is truncated while reading it), which + // will cause ripgrep to abort. This reasoning should be treated as + // suspect. + let maybe = unsafe { grep::searcher::MmapChoice::auto() }; + let never = grep::searcher::MmapChoice::never(); + match low.mmap { + MmapMode::Auto => { + if paths.paths.len() <= 10 + && paths.paths.iter().all(|p| p.is_file()) + { + // If we're only searching a few paths and all of them + // are files, then memory maps are probably faster. + maybe + } else { + never + } + } + MmapMode::AlwaysTryMmap => maybe, + MmapMode::Never => never, + } + }; + + Ok(HiArgs { + mode: low.mode, + patterns, + paths, + binary, + boundary: low.boundary, + buffer: low.buffer, + byte_offset: low.byte_offset, + case: low.case, + color, + colors, + column, + context: low.context, + context_separator: low.context_separator, + crlf: low.crlf, + dfa_size_limit: low.dfa_size_limit, + encoding: low.encoding, + engine: low.engine, + field_context_separator: low.field_context_separator, + field_match_separator: low.field_match_separator, + file_separator, + fixed_strings: low.fixed_strings, + follow: low.follow, + heading, + hidden: low.hidden, + hyperlink_config, + ignore_file: low.ignore_file, + ignore_file_case_insensitive: low.ignore_file_case_insensitive, + include_zero: low.include_zero, + invert_match: low.invert_match, + is_terminal_stdout: state.is_terminal_stdout, + line_number, + max_columns: low.max_columns, + max_columns_preview: low.max_columns_preview, + max_count: low.max_count, + max_depth: low.max_depth, + max_filesize: low.max_filesize, + mmap_choice, + multiline: low.multiline, + multiline_dotall: low.multiline_dotall, + no_ignore_dot: low.no_ignore_dot, + no_ignore_exclude: low.no_ignore_exclude, + no_ignore_files: low.no_ignore_files, + no_ignore_global: low.no_ignore_global, + no_ignore_parent: low.no_ignore_parent, + no_ignore_vcs: low.no_ignore_vcs, + no_require_git: low.no_require_git, + no_unicode: low.no_unicode, + null_data: low.null_data, + one_file_system: low.one_file_system, + only_matching: low.only_matching, + globs, + path_separator: low.path_separator, + path_terminator, + pre: low.pre, + pre_globs, + quiet: low.quiet, + quit_after_match, + regex_size_limit: low.regex_size_limit, + replace: low.replace, + search_zip: low.search_zip, + sort: low.sort, + stats, + stop_on_nonmatch: low.stop_on_nonmatch, + threads, + trim: low.trim, + types, + vimgrep: low.vimgrep, + with_filename, + }) + } + + /// Returns a writer for printing buffers to stdout. + /// + /// This is intended to be used from multiple threads. Namely, a buffer + /// writer can create new buffers that are sent to threads. Threads can + /// then independently write to the buffers. Once a unit of work is + /// complete, a buffer can be given to the buffer writer to write to + /// stdout. + pub(crate) fn buffer_writer(&self) -> termcolor::BufferWriter { + let mut wtr = + termcolor::BufferWriter::stdout(self.color.to_termcolor()); + wtr.separator(self.file_separator.clone()); + wtr + } + + /// Returns true when ripgrep had to guess to search the current working + /// directory. That is, it's true when ripgrep is called without any file + /// paths or directories to search. + /// + /// Other than changing how file paths are printed (i.e., without the + /// leading `./`), it's also useful to know for diagnostic reasons. For + /// example, ripgrep will print an error message when nothing is searched + /// since it's possible the ignore rules in play are too aggressive. But + /// this warning is only emitted when ripgrep was called without any + /// explicit file paths since otherwise the warning would likely be too + /// aggressive. + pub(crate) fn has_implicit_path(&self) -> bool { + self.paths.has_implicit_path + } + + /// Return a properly configured builder for constructing haystacks. + /// + /// The builder can be used to turn a directory entry (from the `ignore` + /// crate) into something that can be searched. + pub(crate) fn haystack_builder(&self) -> HaystackBuilder { + let mut builder = HaystackBuilder::new(); + builder.strip_dot_prefix(self.paths.has_implicit_path); + builder + } + + /// Return the matcher that should be used for searching using the engine + /// choice made by the user. + /// + /// If there was a problem building the matcher (e.g., a syntax error), + /// then this returns an error. + pub(crate) fn matcher(&self) -> anyhow::Result { + match self.engine { + EngineChoice::Default => match self.matcher_rust() { + Ok(m) => Ok(m), + Err(err) => { + anyhow::bail!(suggest_other_engine(err.to_string())); + } + }, + EngineChoice::PCRE2 => Ok(self.matcher_pcre2()?), + EngineChoice::Auto => { + let rust_err = match self.matcher_rust() { + Ok(m) => return Ok(m), + Err(err) => err, + }; + log::debug!( + "error building Rust regex in hybrid mode:\n{rust_err}", + ); + + let pcre_err = match self.matcher_pcre2() { + Ok(m) => return Ok(m), + Err(err) => err, + }; + let divider = "~".repeat(79); + anyhow::bail!( + "regex could not be compiled with either the default \ + regex engine or with PCRE2.\n\n\ + default regex engine error:\n\ + {divider}\n\ + {rust_err}\n\ + {divider}\n\n\ + PCRE2 regex engine error:\n{pcre_err}", + ); + } + } + } + + /// Build a matcher using PCRE2. + /// + /// If there was a problem building the matcher (such as a regex syntax + /// error), then an error is returned. + /// + /// If the `pcre2` feature is not enabled then this always returns an + /// error. + fn matcher_pcre2(&self) -> anyhow::Result { + #[cfg(feature = "pcre2")] + { + let mut builder = grep::pcre2::RegexMatcherBuilder::new(); + builder.multi_line(true).fixed_strings(self.fixed_strings); + match self.case { + CaseMode::Sensitive => builder.caseless(false), + CaseMode::Insensitive => builder.caseless(true), + CaseMode::Smart => builder.case_smart(true), + }; + if let Some(ref boundary) = self.boundary { + match *boundary { + BoundaryMode::Line => builder.whole_line(true), + BoundaryMode::Word => builder.word(true), + }; + } + // For whatever reason, the JIT craps out during regex compilation with + // a "no more memory" error on 32 bit systems. So don't use it there. + if cfg!(target_pointer_width = "64") { + builder + .jit_if_available(true) + // The PCRE2 docs say that 32KB is the default, and that 1MB + // should be big enough for anything. But let's crank it to + // 10MB. + .max_jit_stack_size(Some(10 * (1 << 20))); + } + if !self.no_unicode { + builder.utf(true).ucp(true); + } + if self.multiline { + builder.dotall(self.multiline_dotall); + } + if self.crlf { + builder.crlf(true); + } + let m = builder.build_many(&self.patterns.patterns)?; + Ok(PatternMatcher::PCRE2(m)) + } + #[cfg(not(feature = "pcre2"))] + { + Err(anyhow::anyhow!( + "PCRE2 is not available in this build of ripgrep" + )) + } + } + + /// Build a matcher using Rust's regex engine. + /// + /// If there was a problem building the matcher (such as a regex syntax + /// error), then an error is returned. + fn matcher_rust(&self) -> anyhow::Result { + let mut builder = grep::regex::RegexMatcherBuilder::new(); + builder + .multi_line(true) + .unicode(!self.no_unicode) + .octal(false) + .fixed_strings(self.fixed_strings); + match self.case { + CaseMode::Sensitive => builder.case_insensitive(false), + CaseMode::Insensitive => builder.case_insensitive(true), + CaseMode::Smart => builder.case_smart(true), + }; + if let Some(ref boundary) = self.boundary { + match *boundary { + BoundaryMode::Line => builder.whole_line(true), + BoundaryMode::Word => builder.word(true), + }; + } + if self.multiline { + builder.dot_matches_new_line(self.multiline_dotall); + if self.crlf { + builder.crlf(true).line_terminator(None); + } + } else { + builder.line_terminator(Some(b'\n')).dot_matches_new_line(false); + if self.crlf { + builder.crlf(true); + } + // We don't need to set this in multiline mode since multiline + // matchers don't use optimizations related to line terminators. + // Moreover, a multiline regex used with --null-data should + // be allowed to match NUL bytes explicitly, which this would + // otherwise forbid. + if self.null_data { + builder.line_terminator(Some(b'\x00')); + } + } + if let Some(limit) = self.regex_size_limit { + builder.size_limit(limit); + } + if let Some(limit) = self.dfa_size_limit { + builder.dfa_size_limit(limit); + } + if !self.binary.is_none() { + builder.ban_byte(Some(b'\x00')); + } + let m = match builder.build_many(&self.patterns.patterns) { + Ok(m) => m, + Err(err) => { + anyhow::bail!(suggest_text(suggest_multiline(err.to_string()))) + } + }; + Ok(PatternMatcher::RustRegex(m)) + } + + /// Returns true if some non-zero number of matches is believed to be + /// possible. + /// + /// When this returns false, it is impossible for ripgrep to ever report + /// a match. + pub(crate) fn matches_possible(&self) -> bool { + if self.patterns.patterns.is_empty() { + return false; + } + if self.max_count == Some(0) { + return false; + } + true + } + + /// Returns the "mode" that ripgrep should operate in. + /// + /// This is generally useful for determining what action ripgrep should + /// take. The main mode is of course to "search," but there are other + /// non-search modes such as `--type-list` and `--files`. + pub(crate) fn mode(&self) -> Mode { + self.mode + } + + /// Returns a builder for constructing a "path printer." + /// + /// This is useful for the `--files` mode in ripgrep, where the printer + /// just needs to emit paths and not need to worry about the functionality + /// of searching. + pub(crate) fn path_printer_builder( + &self, + ) -> grep::printer::PathPrinterBuilder { + let mut builder = grep::printer::PathPrinterBuilder::new(); + builder + .color_specs(self.colors.clone()) + .hyperlink(self.hyperlink_config.clone()) + .separator(self.path_separator.clone()) + .terminator(self.path_terminator.unwrap_or(b'\n')); + builder + } + + /// Returns a printer for the given search mode. + /// + /// This chooses which printer to build (JSON, summary or standard) based + /// on the search mode given. + pub(crate) fn printer( + &self, + search_mode: SearchMode, + wtr: W, + ) -> Printer { + let summary_kind = if self.quiet { + SummaryKind::Quiet + } else { + match search_mode { + SearchMode::FilesWithMatches => SummaryKind::PathWithMatch, + SearchMode::FilesWithoutMatch => SummaryKind::PathWithoutMatch, + SearchMode::Count => SummaryKind::Count, + SearchMode::CountMatches => SummaryKind::CountMatches, + SearchMode::JSON => { + return Printer::JSON(self.printer_json(wtr)) + } + SearchMode::Standard => { + return Printer::Standard(self.printer_standard(wtr)) + } + } + }; + Printer::Summary(self.printer_summary(wtr, summary_kind)) + } + + /// Builds a JSON printer. + fn printer_json( + &self, + wtr: W, + ) -> grep::printer::JSON { + grep::printer::JSONBuilder::new() + .pretty(false) + .max_matches(self.max_count) + .always_begin_end(false) + .build(wtr) + } + + /// Builds a "standard" grep printer where matches are printed as plain + /// text lines. + fn printer_standard( + &self, + wtr: W, + ) -> grep::printer::Standard { + let mut builder = grep::printer::StandardBuilder::new(); + builder + .byte_offset(self.byte_offset) + .color_specs(self.colors.clone()) + .column(self.column) + .heading(self.heading) + .hyperlink(self.hyperlink_config.clone()) + .max_columns_preview(self.max_columns_preview) + .max_columns(self.max_columns) + .max_matches(self.max_count) + .only_matching(self.only_matching) + .path(self.with_filename) + .path_terminator(self.path_terminator.clone()) + .per_match_one_line(true) + .per_match(self.vimgrep) + .replacement(self.replace.clone().map(|r| r.into())) + .separator_context(self.context_separator.clone().into_bytes()) + .separator_field_context( + self.field_context_separator.clone().into_bytes(), + ) + .separator_field_match( + self.field_match_separator.clone().into_bytes(), + ) + .separator_path(self.path_separator.clone()) + .stats(self.stats.is_some()) + .trim_ascii(self.trim); + // When doing multi-threaded searching, the buffer writer is + // responsible for writing separators since it is the only thing that + // knows whether something has been printed or not. But for the single + // threaded case, we don't use a buffer writer and thus can let the + // printer own this. + if self.threads == 1 { + builder.separator_search(self.file_separator.clone()); + } + builder.build(wtr) + } + + /// Builds a "summary" printer where search results are aggregated on a + /// file-by-file basis. + fn printer_summary( + &self, + wtr: W, + kind: SummaryKind, + ) -> grep::printer::Summary { + grep::printer::SummaryBuilder::new() + .color_specs(self.colors.clone()) + .exclude_zero(!self.include_zero) + .hyperlink(self.hyperlink_config.clone()) + .kind(kind) + .max_matches(self.max_count) + .path(self.with_filename) + .path_terminator(self.path_terminator.clone()) + .separator_field(b":".to_vec()) + .separator_path(self.path_separator.clone()) + .stats(self.stats.is_some()) + .build(wtr) + } + + /// Returns true if ripgrep should operate in "quiet" mode. + /// + /// Generally speaking, quiet mode means that ripgrep should not print + /// anything to stdout. There are some exceptions. For example, when the + /// user has provided `--stats`, then ripgrep will print statistics to + /// stdout. + pub(crate) fn quiet(&self) -> bool { + self.quiet + } + + /// Returns true when ripgrep should stop searching after a single match is + /// found. + /// + /// This is useful for example when quiet mode is enabled. In that case, + /// users generally can't tell the difference in behavior between a search + /// that finds all matches and a search that only finds one of them. (An + /// exception here is if `--stats` is given, then `quit_after_match` will + /// always return false since the user expects ripgrep to find everything.) + pub(crate) fn quit_after_match(&self) -> bool { + self.quit_after_match + } + + /// Build a worker for executing searches. + /// + /// Search results are found using the given matcher and written to the + /// given printer. + pub(crate) fn search_worker( + &self, + matcher: PatternMatcher, + searcher: grep::searcher::Searcher, + printer: Printer, + ) -> anyhow::Result> { + let mut builder = SearchWorkerBuilder::new(); + builder + .preprocessor(self.pre.clone())? + .preprocessor_globs(self.pre_globs.clone()) + .search_zip(self.search_zip) + .binary_detection_explicit(self.binary.explicit.clone()) + .binary_detection_implicit(self.binary.implicit.clone()); + Ok(builder.build(matcher, searcher, printer)) + } + + /// Build a searcher from the command line parameters. + pub(crate) fn searcher(&self) -> anyhow::Result { + let line_term = if self.crlf { + grep::matcher::LineTerminator::crlf() + } else if self.null_data { + grep::matcher::LineTerminator::byte(b'\x00') + } else { + grep::matcher::LineTerminator::byte(b'\n') + }; + let mut builder = grep::searcher::SearcherBuilder::new(); + builder + .line_terminator(line_term) + .invert_match(self.invert_match) + .line_number(self.line_number) + .multi_line(self.multiline) + .memory_map(self.mmap_choice.clone()) + .stop_on_nonmatch(self.stop_on_nonmatch); + match self.context { + ContextMode::Passthru => { + builder.passthru(true); + } + ContextMode::Limited(ref limited) => { + let (before, after) = limited.get(); + builder.before_context(before); + builder.after_context(after); + } + } + match self.encoding { + EncodingMode::Auto => {} // default for the searcher + EncodingMode::Some(ref enc) => { + builder.encoding(Some(enc.clone())); + } + EncodingMode::Disabled => { + builder.bom_sniffing(false); + } + } + Ok(builder.build()) + } + + /// Given an iterator of haystacks, sort them if necessary. + /// + /// When sorting is necessary, this will collect the entire iterator into + /// memory, sort them and then return a new iterator. When sorting is not + /// necessary, then the iterator given is returned as is without collecting + /// it into memory. + /// + /// Once special case is when sorting by path in ascending order has been + /// requested. In this case, the iterator given is returned as is without + /// any additional sorting. This is done because `walk_builder()` will sort + /// the iterator it yields during directory traversal, so no additional + /// sorting is needed. + pub(crate) fn sort<'a, I>( + &self, + haystacks: I, + ) -> Box + 'a> + where + I: Iterator + 'a, + { + use std::{cmp::Ordering, fs::Metadata, io, time::SystemTime}; + + fn attach_timestamps( + haystacks: impl Iterator, + get: impl Fn(&Metadata) -> io::Result, + ) -> impl Iterator)> { + haystacks.map(move |s| { + let time = s.path().metadata().and_then(|m| get(&m)).ok(); + (s, time) + }) + } + + let Some(ref sort) = self.sort else { return Box::new(haystacks) }; + let mut with_timestamps: Vec<_> = match sort.kind { + SortModeKind::Path if !sort.reverse => return Box::new(haystacks), + SortModeKind::Path => { + let mut haystacks = haystacks.collect::>(); + haystacks.sort_by(|ref h1, ref h2| { + h1.path().cmp(h2.path()).reverse() + }); + return Box::new(haystacks.into_iter()); + } + SortModeKind::LastModified => { + attach_timestamps(haystacks, |md| md.modified()).collect() + } + SortModeKind::LastAccessed => { + attach_timestamps(haystacks, |md| md.accessed()).collect() + } + SortModeKind::Created => { + attach_timestamps(haystacks, |md| md.created()).collect() + } + }; + with_timestamps.sort_by(|(_, ref t1), (_, ref t2)| { + let ordering = match (*t1, *t2) { + // Both have metadata, do the obvious thing. + (Some(t1), Some(t2)) => t1.cmp(&t2), + // Things that error should appear later (when ascending). + (Some(_), None) => Ordering::Less, + // Things that error should appear later (when ascending). + (None, Some(_)) => Ordering::Greater, + // When both error, we can't distinguish, so treat as equal. + (None, None) => Ordering::Equal, + }; + if sort.reverse { + ordering.reverse() + } else { + ordering + } + }); + Box::new(with_timestamps.into_iter().map(|(s, _)| s)) + } + + /// Returns a stats object if the user requested that ripgrep keep track + /// of various metrics during a search. + /// + /// When this returns `None`, then callers may assume that the user did + /// not request statistics. + pub(crate) fn stats(&self) -> Option { + self.stats.clone() + } + + /// Returns a color-enabled writer for stdout. + /// + /// The writer returned is also configured to do either line or block + /// buffering, based on either explicit configuration from the user via CLI + /// flags, or automatically based on whether stdout is connected to a tty. + pub(crate) fn stdout(&self) -> grep::cli::StandardStream { + let color = self.color.to_termcolor(); + match self.buffer { + BufferMode::Auto => { + if self.is_terminal_stdout { + grep::cli::stdout_buffered_line(color) + } else { + grep::cli::stdout_buffered_block(color) + } + } + BufferMode::Line => grep::cli::stdout_buffered_line(color), + BufferMode::Block => grep::cli::stdout_buffered_block(color), + } + } + + /// Returns the total number of threads ripgrep should use to execute a + /// search. + /// + /// This number is the result of reasoning about both heuristics (like + /// the available number of cores) and whether ripgrep's mode supports + /// parallelism. It is intended that this number be used to directly + /// determine how many threads to spawn. + pub(crate) fn threads(&self) -> usize { + self.threads + } + + /// Returns the file type matcher that was built. + /// + /// The matcher includes both the default rules and any rules added by the + /// user for this specific invocation. + pub(crate) fn types(&self) -> &ignore::types::Types { + &self.types + } + + /// Create a new builder for recursive directory traversal. + /// + /// The builder returned can be used to start a single threaded or multi + /// threaded directory traversal. For multi threaded traversal, the number + /// of threads configured is equivalent to `HiArgs::threads`. + /// + /// If `HiArgs::threads` is equal to `1`, then callers should generally + /// choose to explicitly use single threaded traversal since it won't have + /// the unnecessary overhead of synchronization. + pub(crate) fn walk_builder(&self) -> anyhow::Result { + let mut builder = ignore::WalkBuilder::new(&self.paths.paths[0]); + for path in self.paths.paths.iter().skip(1) { + builder.add(path); + } + if !self.no_ignore_files { + for path in self.ignore_file.iter() { + if let Some(err) = builder.add_ignore(path) { + ignore_message!("{err}"); + } + } + } + builder + .max_depth(self.max_depth) + .follow_links(self.follow) + .max_filesize(self.max_filesize) + .threads(self.threads) + .same_file_system(self.one_file_system) + .skip_stdout(matches!(self.mode, Mode::Search(_))) + .overrides(self.globs.clone()) + .types(self.types.clone()) + .hidden(!self.hidden) + .parents(!self.no_ignore_parent) + .ignore(!self.no_ignore_dot) + .git_global(!self.no_ignore_vcs && !self.no_ignore_global) + .git_ignore(!self.no_ignore_vcs) + .git_exclude(!self.no_ignore_vcs && !self.no_ignore_exclude) + .require_git(!self.no_require_git) + .ignore_case_insensitive(self.ignore_file_case_insensitive); + if !self.no_ignore_dot { + builder.add_custom_ignore_filename(".rgignore"); + } + // When we want to sort paths lexicographically in ascending order, + // then we can actually do this during directory traversal itself. + // Otherwise, sorting is done by collecting all paths, sorting them and + // then searching them. + if let Some(ref sort) = self.sort { + assert_eq!(1, self.threads, "sorting implies single threaded"); + if !sort.reverse && matches!(sort.kind, SortModeKind::Path) { + builder.sort_by_file_name(|a, b| a.cmp(b)); + } + } + Ok(builder) + } +} + +/// State that only needs to be computed once during argument parsing. +/// +/// This state is meant to be somewhat generic and shared across multiple +/// low->high argument conversions. The state can even be mutated by various +/// conversions as a way to communicate changes to other conversions. For +/// example, reading patterns might consume from stdin. If we know stdin +/// has been consumed and no other file paths have been given, then we know +/// for sure that we should search the CWD. In this way, a state change +/// when reading the patterns can impact how the file paths are ultimately +/// generated. +#[derive(Debug)] +struct State { + /// Whether it's believed that tty is connected to stdout. Note that on + /// unix systems, this is always correct. On Windows, heuristics are used + /// by Rust's standard library, particularly for cygwin/MSYS environments. + is_terminal_stdout: bool, + /// Whether stdin has already been consumed. This is useful to know and for + /// providing good error messages when the user has tried to read from stdin + /// in two different places. For example, `rg -f - -`. + stdin_consumed: bool, + /// The current working directory. + cwd: PathBuf, +} + +impl State { + /// Initialize state to some sensible defaults. + /// + /// Note that the state values may change throughout the lifetime of + /// argument parsing. + fn new() -> anyhow::Result { + use std::io::IsTerminal; + + Ok(State { + is_terminal_stdout: std::io::stdout().is_terminal(), + stdin_consumed: false, + cwd: current_dir()?, + }) + } +} + +/// The disjunction of patterns to search for. +/// +/// The number of patterns can be empty, e.g., via `-f /dev/null`. +#[derive(Debug)] +struct Patterns { + /// The actual patterns to match. + patterns: Vec, +} + +impl Patterns { + /// Pulls the patterns out of the low arguments. + /// + /// This includes collecting patterns from -e/--regexp and -f/--file. + /// + /// If the invocation implies that the first positional argument is a + /// pattern (the common case), then the first positional argument is + /// extracted as well. + fn from_low_args( + state: &mut State, + low: &mut LowArgs, + ) -> anyhow::Result { + // The first positional is only a pattern when ripgrep is instructed to + // search and neither -e/--regexp nor -f/--file is given. Basically, + // the first positional is a pattern only when a pattern hasn't been + // given in some other way. + + // No search means no patterns. Even if -e/--regexp or -f/--file is + // given, we know we won't use them so don't bother collecting them. + if !matches!(low.mode, Mode::Search(_)) { + return Ok(Patterns { patterns: vec![] }); + } + // If we got nothing from -e/--regexp and -f/--file, then the first + // positional is a pattern. + if low.patterns.is_empty() { + anyhow::ensure!( + !low.positional.is_empty(), + "ripgrep requires at least one pattern to execute a search" + ); + let ospat = low.positional.remove(0); + let Ok(pat) = ospat.into_string() else { + anyhow::bail!("pattern given is not valid UTF-8") + }; + return Ok(Patterns { patterns: vec![pat] }); + } + // Otherwise, we need to slurp up our patterns from -e/--regexp and + // -f/--file. We de-duplicate as we go. If we don't de-duplicate, + // then it can actually lead to major slow downs for sloppy inputs. + // This might be surprising, and the regex engine will eventually + // de-duplicate duplicative branches in a single regex (maybe), but + // not until after it has gone through parsing and some other layers. + // If there are a lot of duplicates, then that can lead to a sizeable + // extra cost. It is lamentable that we pay the extra cost here to + // de-duplicate for a likely uncommon case, but I've seen this have a + // big impact on real world data. + let mut seen = HashSet::new(); + let mut patterns = Vec::with_capacity(low.patterns.len()); + let mut add = |pat: String| { + if !seen.contains(&pat) { + seen.insert(pat.clone()); + patterns.push(pat); + } + }; + for source in low.patterns.drain(..) { + match source { + PatternSource::Regexp(pat) => add(pat), + PatternSource::File(path) => { + if path == Path::new("-") { + anyhow::ensure!( + !state.stdin_consumed, + "error reading -f/--file from stdin: stdin \ + has already been consumed" + ); + for pat in grep::cli::patterns_from_stdin()? { + add(pat); + } + state.stdin_consumed = true; + } else { + for pat in grep::cli::patterns_from_path(&path)? { + add(pat); + } + } + } + } + } + Ok(Patterns { patterns }) + } +} + +/// The collection of paths we want to search for. +/// +/// This guarantees that there is always at least one path. +#[derive(Debug)] +struct Paths { + /// The actual paths. + paths: Vec, + /// This is true when ripgrep had to guess to search the current working + /// directory. e.g., When the user just runs `rg foo`. It is odd to need + /// this, but it subtly changes how the paths are printed. When no explicit + /// path is given, then ripgrep doesn't prefix each path with `./`. But + /// otherwise it does! This curious behavior matches what GNU grep does. + has_implicit_path: bool, + /// Set to true if it is known that only a single file descriptor will + /// be searched. + is_one_file: bool, +} + +impl Paths { + /// Drain the search paths out of the given low arguments. + fn from_low_args( + state: &mut State, + _: &Patterns, + low: &mut LowArgs, + ) -> anyhow::Result { + // We require a `&Patterns` even though we don't use it to ensure that + // patterns have already been read from LowArgs. This let's us safely + // assume that all remaining positional arguments are intended to be + // file paths. + + let mut paths = Vec::with_capacity(low.positional.len()); + for osarg in low.positional.drain(..) { + let path = PathBuf::from(osarg); + if state.stdin_consumed && path == Path::new("-") { + anyhow::bail!( + "error: attempted to read patterns from stdin \ + while also searching stdin", + ); + } + paths.push(path); + } + log::debug!("number of paths given to search: {}", paths.len()); + if !paths.is_empty() { + let is_one_file = paths.len() == 1 + // Note that we specifically use `!paths[0].is_dir()` here + // instead of `paths[0].is_file()`. Namely, the latter can + // return `false` even when the path is something resembling + // a file. So instead, we just consider the path a file as + // long as we know it isn't a directory. + // + // See: https://github.com/BurntSushi/ripgrep/issues/2736 + && (paths[0] == Path::new("-") || !paths[0].is_dir()); + log::debug!("is_one_file? {is_one_file:?}"); + return Ok(Paths { paths, has_implicit_path: false, is_one_file }); + } + // N.B. is_readable_stdin is a heuristic! Part of the issue is that a + // lot of "exec process" APIs will open a stdin pipe even though stdin + // isn't really being used. ripgrep then thinks it should search stdin + // and one gets the appearance of it hanging. It's a terrible failure + // mode, but there really is no good way to mitigate it. It's just a + // consequence of letting the user type 'rg foo' and "guessing" that + // they meant to search the CWD. + let is_readable_stdin = grep::cli::is_readable_stdin(); + let use_cwd = !is_readable_stdin + || state.stdin_consumed + || !matches!(low.mode, Mode::Search(_)); + log::debug!( + "using heuristics to determine whether to read from \ + stdin or search ./ (\ + is_readable_stdin={is_readable_stdin}, \ + stdin_consumed={stdin_consumed}, \ + mode={mode:?})", + stdin_consumed = state.stdin_consumed, + mode = low.mode, + ); + let (path, is_one_file) = if use_cwd { + log::debug!("heuristic chose to search ./"); + (PathBuf::from("./"), false) + } else { + log::debug!("heuristic chose to search stdin"); + (PathBuf::from("-"), true) + }; + Ok(Paths { paths: vec![path], has_implicit_path: true, is_one_file }) + } + + /// Returns true if ripgrep will only search stdin and nothing else. + fn is_only_stdin(&self) -> bool { + self.paths.len() == 1 && self.paths[0] == Path::new("-") + } +} + +/// The "binary detection" configuration that ripgrep should use. +/// +/// ripgrep actually uses two different binary detection heuristics depending +/// on whether a file is explicitly being searched (e.g., via a CLI argument) +/// or implicitly searched (e.g., via directory traversal). In general, the +/// former can never use a heuristic that lets it "quit" seaching before +/// either getting EOF or finding a match. (Because doing otherwise would be +/// considered a filter, and ripgrep follows the rule that an explicitly given +/// file is always searched.) +#[derive(Debug)] +struct BinaryDetection { + explicit: grep::searcher::BinaryDetection, + implicit: grep::searcher::BinaryDetection, +} + +impl BinaryDetection { + /// Determines the correct binary detection mode from low-level arguments. + fn from_low_args(_: &State, low: &LowArgs) -> BinaryDetection { + let none = matches!(low.binary, BinaryMode::AsText) || low.null_data; + let convert = matches!(low.binary, BinaryMode::SearchAndSuppress); + let explicit = if none { + grep::searcher::BinaryDetection::none() + } else { + grep::searcher::BinaryDetection::convert(b'\x00') + }; + let implicit = if none { + grep::searcher::BinaryDetection::none() + } else if convert { + grep::searcher::BinaryDetection::convert(b'\x00') + } else { + grep::searcher::BinaryDetection::quit(b'\x00') + }; + BinaryDetection { explicit, implicit } + } + + /// Returns true when both implicit and explicit binary detection is + /// disabled. + pub(crate) fn is_none(&self) -> bool { + let none = grep::searcher::BinaryDetection::none(); + self.explicit == none && self.implicit == none + } +} + +/// Builds the file type matcher from low level arguments. +fn types(low: &LowArgs) -> anyhow::Result { + let mut builder = ignore::types::TypesBuilder::new(); + builder.add_defaults(); + for tychange in low.type_changes.iter() { + match tychange { + TypeChange::Clear { ref name } => { + builder.clear(name); + } + TypeChange::Add { ref def } => { + builder.add_def(def)?; + } + TypeChange::Select { ref name } => { + builder.select(name); + } + TypeChange::Negate { ref name } => { + builder.negate(name); + } + } + } + Ok(builder.build()?) +} + +/// Builds the glob "override" matcher from the CLI `-g/--glob` and `--iglob` +/// flags. +fn globs( + state: &State, + low: &LowArgs, +) -> anyhow::Result { + if low.globs.is_empty() && low.iglobs.is_empty() { + return Ok(ignore::overrides::Override::empty()); + } + let mut builder = ignore::overrides::OverrideBuilder::new(&state.cwd); + // Make all globs case insensitive with --glob-case-insensitive. + if low.glob_case_insensitive { + builder.case_insensitive(true).unwrap(); + } + for glob in low.globs.iter() { + builder.add(glob)?; + } + // This only enables case insensitivity for subsequent globs. + builder.case_insensitive(true).unwrap(); + for glob in low.iglobs.iter() { + builder.add(&glob)?; + } + Ok(builder.build()?) +} + +/// Builds a glob matcher for all of the preprocessor globs (via `--pre-glob`). +fn preprocessor_globs( + state: &State, + low: &LowArgs, +) -> anyhow::Result { + if low.pre_glob.is_empty() { + return Ok(ignore::overrides::Override::empty()); + } + let mut builder = ignore::overrides::OverrideBuilder::new(&state.cwd); + for glob in low.pre_glob.iter() { + builder.add(glob)?; + } + Ok(builder.build()?) +} + +/// Determines whether stats should be tracked for this search. If so, a stats +/// object is returned. +fn stats(low: &LowArgs) -> Option { + if !matches!(low.mode, Mode::Search(_)) { + return None; + } + if low.stats || matches!(low.mode, Mode::Search(SearchMode::JSON)) { + return Some(grep::printer::Stats::new()); + } + None +} + +/// Pulls out any color specs provided by the user and assembles them into one +/// single configuration. +fn take_color_specs(_: &mut State, low: &mut LowArgs) -> ColorSpecs { + let mut specs = grep::printer::default_color_specs(); + for spec in low.colors.drain(..) { + specs.push(spec); + } + ColorSpecs::new(&specs) +} + +/// Pulls out the necessary info from the low arguments to build a full +/// hyperlink configuration. +fn take_hyperlink_config( + _: &mut State, + low: &mut LowArgs, +) -> anyhow::Result { + let mut env = grep::printer::HyperlinkEnvironment::new(); + if let Some(hostname) = hostname(low.hostname_bin.as_deref()) { + log::debug!("found hostname for hyperlink configuration: {hostname}"); + env.host(Some(hostname)); + } + if let Some(wsl_prefix) = wsl_prefix() { + log::debug!( + "found wsl_prefix for hyperlink configuration: {wsl_prefix}" + ); + env.wsl_prefix(Some(wsl_prefix)); + } + let fmt = std::mem::take(&mut low.hyperlink_format); + log::debug!("hyperlink format: {:?}", fmt.to_string()); + Ok(grep::printer::HyperlinkConfig::new(env, fmt)) +} + +/// Attempts to discover the current working directory. +/// +/// This mostly just defers to the standard library, however, such things will +/// fail if ripgrep is in a directory that no longer exists. We attempt some +/// fallback mechanisms, such as querying the PWD environment variable, but +/// otherwise return an error. +fn current_dir() -> anyhow::Result { + let err = match std::env::current_dir() { + Err(err) => err, + Ok(cwd) => return Ok(cwd), + }; + if let Some(cwd) = std::env::var_os("PWD") { + if !cwd.is_empty() { + return Ok(PathBuf::from(cwd)); + } + } + anyhow::bail!( + "failed to get current working directory: {err}\n\ + did your CWD get deleted?", + ) +} + +/// Retrieves the hostname that should be used wherever a hostname is required. +/// +/// Currently, this is only used in the hyperlink format. +/// +/// This works by first running the given binary program (if present and with +/// no arguments) to get the hostname after trimming leading and trailing +/// whitespace. If that fails for any reason, then it falls back to getting +/// the hostname via platform specific means (e.g., `gethostname` on Unix). +/// +/// The purpose of `bin` is to make it possible for end users to override how +/// ripgrep determines the hostname. +fn hostname(bin: Option<&Path>) -> Option { + let Some(bin) = bin else { return platform_hostname() }; + let bin = match grep::cli::resolve_binary(bin) { + Ok(bin) => bin, + Err(err) => { + log::debug!( + "failed to run command '{bin:?}' to get hostname \ + (falling back to platform hostname): {err}", + ); + return platform_hostname(); + } + }; + let mut cmd = std::process::Command::new(&bin); + cmd.stdin(std::process::Stdio::null()); + let rdr = match grep::cli::CommandReader::new(&mut cmd) { + Ok(rdr) => rdr, + Err(err) => { + log::debug!( + "failed to spawn command '{bin:?}' to get \ + hostname (falling back to platform hostname): {err}", + ); + return platform_hostname(); + } + }; + let out = match std::io::read_to_string(rdr) { + Ok(out) => out, + Err(err) => { + log::debug!( + "failed to read output from command '{bin:?}' to get \ + hostname (falling back to platform hostname): {err}", + ); + return platform_hostname(); + } + }; + let hostname = out.trim(); + if hostname.is_empty() { + log::debug!( + "output from command '{bin:?}' is empty after trimming \ + leading and trailing whitespace (falling back to \ + platform hostname)", + ); + return platform_hostname(); + } + Some(hostname.to_string()) +} + +/// Attempts to get the hostname by using platform specific routines. +/// +/// For example, this will do `gethostname` on Unix and `GetComputerNameExW` on +/// Windows. +fn platform_hostname() -> Option { + let hostname_os = match grep::cli::hostname() { + Ok(x) => x, + Err(err) => { + log::debug!("could not get hostname: {}", err); + return None; + } + }; + let Some(hostname) = hostname_os.to_str() else { + log::debug!( + "got hostname {:?}, but it's not valid UTF-8", + hostname_os + ); + return None; + }; + Some(hostname.to_string()) +} + +/// Returns the value for the `{wslprefix}` variable in a hyperlink format. +/// +/// A WSL prefix is a share/network like thing that is meant to permit Windows +/// applications to open files stored within a WSL drive. +/// +/// If a WSL distro name is unavailable, not valid UTF-8 or this isn't running +/// in a Unix environment, then this returns None. +/// +/// See: +fn wsl_prefix() -> Option { + if !cfg!(unix) { + return None; + } + let distro_os = std::env::var_os("WSL_DISTRO_NAME")?; + let Some(distro) = distro_os.to_str() else { + log::debug!( + "found WSL_DISTRO_NAME={:?}, but value is not UTF-8", + distro_os + ); + return None; + }; + Some(format!("wsl$/{distro}")) +} + +/// Possibly suggest another regex engine based on the error message given. +/// +/// This inspects an error resulting from building a Rust regex matcher, and +/// if it's believed to correspond to a syntax error that another engine could +/// handle, then add a message to suggest the use of the engine flag. +fn suggest_other_engine(msg: String) -> String { + if let Some(pcre_msg) = suggest_pcre2(&msg) { + return pcre_msg; + } + msg +} + +/// Possibly suggest PCRE2 based on the error message given. +/// +/// Inspect an error resulting from building a Rust regex matcher, and if it's +/// believed to correspond to a syntax error that PCRE2 could handle, then +/// add a message to suggest the use of -P/--pcre2. +fn suggest_pcre2(msg: &str) -> Option { + if !cfg!(feature = "pcre2") { + return None; + } + if !msg.contains("backreferences") && !msg.contains("look-around") { + None + } else { + Some(format!( + "{msg} + +Consider enabling PCRE2 with the --pcre2 flag, which can handle backreferences +and look-around.", + )) + } +} + +/// Possibly suggest multiline mode based on the error message given. +/// +/// Does a bit of a hacky inspection of the given error message, and if it +/// looks like the user tried to type a literal line terminator then it will +/// return a new error message suggesting the use of -U/--multiline. +fn suggest_multiline(msg: String) -> String { + if msg.contains("the literal") && msg.contains("not allowed") { + format!( + "{msg} + +Consider enabling multiline mode with the --multiline flag (or -U for short). +When multiline mode is enabled, new line characters can be matched.", + ) + } else { + msg + } +} + +/// Possibly suggest the `-a/--text` flag. +fn suggest_text(msg: String) -> String { + if msg.contains("pattern contains \"\\0\"") { + format!( + "{msg} + +Consider enabling text mode with the --text flag (or -a for short). Otherwise, +binary detection is enabled and matching a NUL byte is impossible.", + ) + } else { + msg + } +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/lowargs.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/lowargs.rs new file mode 100644 index 000000000..184c96ae8 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/lowargs.rs @@ -0,0 +1,758 @@ +/*! +Provides the definition of low level arguments from CLI flags. +*/ + +use std::{ + ffi::{OsStr, OsString}, + path::PathBuf, +}; + +use { + bstr::{BString, ByteVec}, + grep::printer::{HyperlinkFormat, UserColorSpec}, +}; + +/// A collection of "low level" arguments. +/// +/// The "low level" here is meant to constrain this type to be as close to the +/// actual CLI flags and arguments as possible. Namely, other than some +/// convenience types to help validate flag values and deal with overrides +/// between flags, these low level arguments do not contain any higher level +/// abstractions. +/// +/// Another self-imposed constraint is that populating low level arguments +/// should not require anything other than validating what the user has +/// provided. For example, low level arguments should not contain a +/// `HyperlinkConfig`, since in order to get a full configuration, one needs to +/// discover the hostname of the current system (which might require running a +/// binary or a syscall). +/// +/// Low level arguments are populated by the parser directly via the `update` +/// method on the corresponding implementation of the `Flag` trait. +#[derive(Debug, Default)] +pub(crate) struct LowArgs { + // Essential arguments. + pub(crate) special: Option, + pub(crate) mode: Mode, + pub(crate) positional: Vec, + pub(crate) patterns: Vec, + // Everything else, sorted lexicographically. + pub(crate) binary: BinaryMode, + pub(crate) boundary: Option, + pub(crate) buffer: BufferMode, + pub(crate) byte_offset: bool, + pub(crate) case: CaseMode, + pub(crate) color: ColorChoice, + pub(crate) colors: Vec, + pub(crate) column: Option, + pub(crate) context: ContextMode, + pub(crate) context_separator: ContextSeparator, + pub(crate) crlf: bool, + pub(crate) dfa_size_limit: Option, + pub(crate) encoding: EncodingMode, + pub(crate) engine: EngineChoice, + pub(crate) field_context_separator: FieldContextSeparator, + pub(crate) field_match_separator: FieldMatchSeparator, + pub(crate) fixed_strings: bool, + pub(crate) follow: bool, + pub(crate) glob_case_insensitive: bool, + pub(crate) globs: Vec, + pub(crate) heading: Option, + pub(crate) hidden: bool, + pub(crate) hostname_bin: Option, + pub(crate) hyperlink_format: HyperlinkFormat, + pub(crate) iglobs: Vec, + pub(crate) ignore_file: Vec, + pub(crate) ignore_file_case_insensitive: bool, + pub(crate) include_zero: bool, + pub(crate) invert_match: bool, + pub(crate) line_number: Option, + pub(crate) logging: Option, + pub(crate) max_columns: Option, + pub(crate) max_columns_preview: bool, + pub(crate) max_count: Option, + pub(crate) max_depth: Option, + pub(crate) max_filesize: Option, + pub(crate) mmap: MmapMode, + pub(crate) multiline: bool, + pub(crate) multiline_dotall: bool, + pub(crate) no_config: bool, + pub(crate) no_ignore_dot: bool, + pub(crate) no_ignore_exclude: bool, + pub(crate) no_ignore_files: bool, + pub(crate) no_ignore_global: bool, + pub(crate) no_ignore_messages: bool, + pub(crate) no_ignore_parent: bool, + pub(crate) no_ignore_vcs: bool, + pub(crate) no_messages: bool, + pub(crate) no_require_git: bool, + pub(crate) no_unicode: bool, + pub(crate) null: bool, + pub(crate) null_data: bool, + pub(crate) one_file_system: bool, + pub(crate) only_matching: bool, + pub(crate) path_separator: Option, + pub(crate) pre: Option, + pub(crate) pre_glob: Vec, + pub(crate) quiet: bool, + pub(crate) regex_size_limit: Option, + pub(crate) replace: Option, + pub(crate) search_zip: bool, + pub(crate) sort: Option, + pub(crate) stats: bool, + pub(crate) stop_on_nonmatch: bool, + pub(crate) threads: Option, + pub(crate) trim: bool, + pub(crate) type_changes: Vec, + pub(crate) unrestricted: usize, + pub(crate) vimgrep: bool, + pub(crate) with_filename: Option, +} + +/// A "special" mode that supercedes everything else. +/// +/// When one of these modes is present, it overrides everything else and causes +/// ripgrep to short-circuit. In particular, we avoid converting low-level +/// argument types into higher level arguments types that can fail for various +/// reasons related to the environment. (Parsing the low-level arguments can +/// fail too, but usually not in a way that can't be worked around by removing +/// the corresponding arguments from the CLI command.) This is overall a hedge +/// to ensure that version and help information are basically always available. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum SpecialMode { + /// Show a condensed version of "help" output. Generally speaking, this + /// shows each flag and an extremely terse description of that flag on + /// a single line. This corresponds to the `-h` flag. + HelpShort, + /// Shows a very verbose version of the "help" output. The docs for some + /// flags will be paragraphs long. This corresponds to the `--help` flag. + HelpLong, + /// Show condensed version information. e.g., `ripgrep x.y.z`. + VersionShort, + /// Show verbose version information. Includes "short" information as well + /// as features included in the build. + VersionLong, + /// Show PCRE2's version information, or an error if this version of + /// ripgrep wasn't compiled with PCRE2 support. + VersionPCRE2, +} + +/// The overall mode that ripgrep should operate in. +/// +/// If ripgrep were designed without the legacy of grep, these would probably +/// be sub-commands? Perhaps not, since they aren't as frequently used. +/// +/// The point of putting these in one enum is that they are all mutually +/// exclusive and override one another. +/// +/// Note that -h/--help and -V/--version are not included in this because +/// they always overrides everything else, regardless of where it appears +/// in the command line. They are treated as "special" modes that short-circuit +/// ripgrep's usual flow. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum Mode { + /// ripgrep will execute a search of some kind. + Search(SearchMode), + /// Show the files that *would* be searched, but don't actually search + /// them. + Files, + /// List all file type definitions configured, including the default file + /// types and any additional file types added to the command line. + Types, + /// Generate various things like the man page and completion files. + Generate(GenerateMode), +} + +impl Default for Mode { + fn default() -> Mode { + Mode::Search(SearchMode::Standard) + } +} + +impl Mode { + /// Update this mode to the new mode while implementing various override + /// semantics. For example, a search mode cannot override a non-search + /// mode. + pub(crate) fn update(&mut self, new: Mode) { + match *self { + // If we're in a search mode, then anything can override it. + Mode::Search(_) => *self = new, + _ => { + // Once we're in a non-search mode, other non-search modes + // can override it. But search modes cannot. So for example, + // `--files -l` will still be Mode::Files. + if !matches!(*self, Mode::Search(_)) { + *self = new; + } + } + } + } +} + +/// The kind of search that ripgrep is going to perform. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum SearchMode { + /// The default standard mode of operation. ripgrep looks for matches and + /// prints them when found. + /// + /// There is no specific flag for this mode since it's the default. But + /// some of the modes below, like JSON, have negation flags like --no-json + /// that let you revert back to this default mode. + Standard, + /// Show files containing at least one match. + FilesWithMatches, + /// Show files that don't contain any matches. + FilesWithoutMatch, + /// Show files containing at least one match and the number of matching + /// lines. + Count, + /// Show files containing at least one match and the total number of + /// matches. + CountMatches, + /// Print matches in a JSON lines format. + JSON, +} + +/// The thing to generate via the --generate flag. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum GenerateMode { + /// Generate the raw roff used for the man page. + Man, + /// Completions for bash. + CompleteBash, + /// Completions for zsh. + CompleteZsh, + /// Completions for fish. + CompleteFish, + /// Completions for PowerShell. + CompletePowerShell, +} + +/// Indicates how ripgrep should treat binary data. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum BinaryMode { + /// Automatically determine the binary mode to use. Essentially, when + /// a file is searched explicitly, then it will be searched using the + /// `SearchAndSuppress` strategy. Otherwise, it will be searched in a way + /// that attempts to skip binary files as much as possible. That is, once + /// a file is classified as binary, searching will immediately stop. + Auto, + /// Search files even when they have binary data, but if a match is found, + /// suppress it and emit a warning. + /// + /// In this mode, `NUL` bytes are replaced with line terminators. This is + /// a heuristic meant to reduce heap memory usage, since true binary data + /// isn't line oriented. If one attempts to treat such data as line + /// oriented, then one may wind up with impractically large lines. For + /// example, many binary files contain very long runs of NUL bytes. + SearchAndSuppress, + /// Treat all files as if they were plain text. There's no skipping and no + /// replacement of `NUL` bytes with line terminators. + AsText, +} + +impl Default for BinaryMode { + fn default() -> BinaryMode { + BinaryMode::Auto + } +} + +/// Indicates what kind of boundary mode to use (line or word). +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum BoundaryMode { + /// Only allow matches when surrounded by line bounaries. + Line, + /// Only allow matches when surrounded by word bounaries. + Word, +} + +/// Indicates the buffer mode that ripgrep should use when printing output. +/// +/// The default is `Auto`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum BufferMode { + /// Select the buffer mode, 'line' or 'block', automatically based on + /// whether stdout is connected to a tty. + Auto, + /// Flush the output buffer whenever a line terminator is seen. + /// + /// This is useful when wants to see search results more immediately, + /// for example, with `tail -f`. + Line, + /// Flush the output buffer whenever it reaches some fixed size. The size + /// is usually big enough to hold many lines. + /// + /// This is useful for maximum performance, particularly when printing + /// lots of results. + Block, +} + +impl Default for BufferMode { + fn default() -> BufferMode { + BufferMode::Auto + } +} + +/// Indicates the case mode for how to interpret all patterns given to ripgrep. +/// +/// The default is `Sensitive`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum CaseMode { + /// Patterns are matched case sensitively. i.e., `a` does not match `A`. + Sensitive, + /// Patterns are matched case insensitively. i.e., `a` does match `A`. + Insensitive, + /// Patterns are automatically matched case insensitively only when they + /// consist of all lowercase literal characters. For example, the pattern + /// `a` will match `A` but `A` will not match `a`. + Smart, +} + +impl Default for CaseMode { + fn default() -> CaseMode { + CaseMode::Sensitive + } +} + +/// Indicates whether ripgrep should include color/hyperlinks in its output. +/// +/// The default is `Auto`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum ColorChoice { + /// Color and hyperlinks will never be used. + Never, + /// Color and hyperlinks will be used only when stdout is connected to a + /// tty. + Auto, + /// Color will always be used. + Always, + /// Color will always be used and only ANSI escapes will be used. + /// + /// This only makes sense in the context of legacy Windows console APIs. + /// At time of writing, ripgrep will try to use the legacy console APIs + /// if ANSI coloring isn't believed to be possible. This option will force + /// ripgrep to use ANSI coloring. + Ansi, +} + +impl Default for ColorChoice { + fn default() -> ColorChoice { + ColorChoice::Auto + } +} + +impl ColorChoice { + /// Convert this color choice to the corresponding termcolor type. + pub(crate) fn to_termcolor(&self) -> termcolor::ColorChoice { + match *self { + ColorChoice::Never => termcolor::ColorChoice::Never, + ColorChoice::Auto => termcolor::ColorChoice::Auto, + ColorChoice::Always => termcolor::ColorChoice::Always, + ColorChoice::Ansi => termcolor::ColorChoice::AlwaysAnsi, + } + } +} + +/// Indicates the line context options ripgrep should use for output. +/// +/// The default is no context at all. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum ContextMode { + /// All lines will be printed. That is, the context is unbounded. + Passthru, + /// Only show a certain number of lines before and after each match. + Limited(ContextModeLimited), +} + +impl Default for ContextMode { + fn default() -> ContextMode { + ContextMode::Limited(ContextModeLimited::default()) + } +} + +impl ContextMode { + /// Set the "before" context. + /// + /// If this was set to "passthru" context, then it is overridden in favor + /// of limited context with the given value for "before" and `0` for + /// "after." + pub(crate) fn set_before(&mut self, lines: usize) { + match *self { + ContextMode::Passthru => { + *self = ContextMode::Limited(ContextModeLimited { + before: Some(lines), + after: None, + both: None, + }) + } + ContextMode::Limited(ContextModeLimited { + ref mut before, + .. + }) => *before = Some(lines), + } + } + + /// Set the "after" context. + /// + /// If this was set to "passthru" context, then it is overridden in favor + /// of limited context with the given value for "after" and `0` for + /// "before." + pub(crate) fn set_after(&mut self, lines: usize) { + match *self { + ContextMode::Passthru => { + *self = ContextMode::Limited(ContextModeLimited { + before: None, + after: Some(lines), + both: None, + }) + } + ContextMode::Limited(ContextModeLimited { + ref mut after, .. + }) => *after = Some(lines), + } + } + + /// Set the "both" context. + /// + /// If this was set to "passthru" context, then it is overridden in favor + /// of limited context with the given value for "both" and `None` for + /// "before" and "after". + pub(crate) fn set_both(&mut self, lines: usize) { + match *self { + ContextMode::Passthru => { + *self = ContextMode::Limited(ContextModeLimited { + before: None, + after: None, + both: Some(lines), + }) + } + ContextMode::Limited(ContextModeLimited { + ref mut both, .. + }) => *both = Some(lines), + } + } + + /// A convenience function for use in tests that returns the limited + /// context. If this mode isn't limited, then it panics. + #[cfg(test)] + pub(crate) fn get_limited(&self) -> (usize, usize) { + match *self { + ContextMode::Passthru => unreachable!("context mode is passthru"), + ContextMode::Limited(ref limited) => limited.get(), + } + } +} + +/// A context mode for a finite number of lines. +/// +/// Namely, this indicates that a specific number of lines (possibly zero) +/// should be shown before and/or after each matching line. +/// +/// Note that there is a subtle difference between `Some(0)` and `None`. In the +/// former case, it happens when `0` is given explicitly, where as `None` is +/// the default value and occurs when no value is specified. +/// +/// `both` is only set by the -C/--context flag. The reason why we don't just +/// set before = after = --context is because the before and after context +/// settings always take precedent over the -C/--context setting, regardless of +/// order. Thus, we need to keep track of them separately. +#[derive(Debug, Default, Eq, PartialEq)] +pub(crate) struct ContextModeLimited { + before: Option, + after: Option, + both: Option, +} + +impl ContextModeLimited { + /// Returns the specific number of contextual lines that should be shown + /// around each match. This takes proper precedent into account, i.e., + /// that `before` and `after` both partially override `both` in all cases. + /// + /// By default, this returns `(0, 0)`. + pub(crate) fn get(&self) -> (usize, usize) { + let (mut before, mut after) = + self.both.map(|lines| (lines, lines)).unwrap_or((0, 0)); + // --before and --after always override --context, regardless + // of where they appear relative to each other. + if let Some(lines) = self.before { + before = lines; + } + if let Some(lines) = self.after { + after = lines; + } + (before, after) + } +} + +/// Represents the separator to use between non-contiguous sections of +/// contextual lines. +/// +/// The default is `--`. +#[derive(Clone, Debug, Eq, PartialEq)] +pub(crate) struct ContextSeparator(Option); + +impl Default for ContextSeparator { + fn default() -> ContextSeparator { + ContextSeparator(Some(BString::from("--"))) + } +} + +impl ContextSeparator { + /// Create a new context separator from the user provided argument. This + /// handles unescaping. + pub(crate) fn new(os: &OsStr) -> anyhow::Result { + let Some(string) = os.to_str() else { + anyhow::bail!( + "separator must be valid UTF-8 (use escape sequences \ + to provide a separator that is not valid UTF-8)" + ) + }; + Ok(ContextSeparator(Some(Vec::unescape_bytes(string).into()))) + } + + /// Creates a new separator that intructs the printer to disable contextual + /// separators entirely. + pub(crate) fn disabled() -> ContextSeparator { + ContextSeparator(None) + } + + /// Return the raw bytes of this separator. + /// + /// If context separators were disabled, then this returns `None`. + /// + /// Note that this may return a `Some` variant with zero bytes. + pub(crate) fn into_bytes(self) -> Option> { + self.0.map(|sep| sep.into()) + } +} + +/// The encoding mode the searcher will use. +/// +/// The default is `Auto`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum EncodingMode { + /// Use only BOM sniffing to auto-detect an encoding. + Auto, + /// Use an explicit encoding forcefully, but let BOM sniffing override it. + Some(grep::searcher::Encoding), + /// Use no explicit encoding and disable all BOM sniffing. This will + /// always result in searching the raw bytes, regardless of their + /// true encoding. + Disabled, +} + +impl Default for EncodingMode { + fn default() -> EncodingMode { + EncodingMode::Auto + } +} + +/// The regex engine to use. +/// +/// The default is `Default`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum EngineChoice { + /// Uses the default regex engine: Rust's `regex` crate. + /// + /// (Well, technically it uses `regex-automata`, but `regex-automata` is + /// the implementation of the `regex` crate.) + Default, + /// Dynamically select the right engine to use. + /// + /// This works by trying to use the default engine, and if the pattern does + /// not compile, it switches over to the PCRE2 engine if it's available. + Auto, + /// Uses the PCRE2 regex engine if it's available. + PCRE2, +} + +impl Default for EngineChoice { + fn default() -> EngineChoice { + EngineChoice::Default + } +} + +/// The field context separator to use to between metadata for each contextual +/// line. +/// +/// The default is `-`. +#[derive(Clone, Debug, Eq, PartialEq)] +pub(crate) struct FieldContextSeparator(BString); + +impl Default for FieldContextSeparator { + fn default() -> FieldContextSeparator { + FieldContextSeparator(BString::from("-")) + } +} + +impl FieldContextSeparator { + /// Create a new separator from the given argument value provided by the + /// user. Unescaping it automatically handled. + pub(crate) fn new(os: &OsStr) -> anyhow::Result { + let Some(string) = os.to_str() else { + anyhow::bail!( + "separator must be valid UTF-8 (use escape sequences \ + to provide a separator that is not valid UTF-8)" + ) + }; + Ok(FieldContextSeparator(Vec::unescape_bytes(string).into())) + } + + /// Return the raw bytes of this separator. + /// + /// Note that this may return an empty `Vec`. + pub(crate) fn into_bytes(self) -> Vec { + self.0.into() + } +} + +/// The field match separator to use to between metadata for each matching +/// line. +/// +/// The default is `:`. +#[derive(Clone, Debug, Eq, PartialEq)] +pub(crate) struct FieldMatchSeparator(BString); + +impl Default for FieldMatchSeparator { + fn default() -> FieldMatchSeparator { + FieldMatchSeparator(BString::from(":")) + } +} + +impl FieldMatchSeparator { + /// Create a new separator from the given argument value provided by the + /// user. Unescaping it automatically handled. + pub(crate) fn new(os: &OsStr) -> anyhow::Result { + let Some(string) = os.to_str() else { + anyhow::bail!( + "separator must be valid UTF-8 (use escape sequences \ + to provide a separator that is not valid UTF-8)" + ) + }; + Ok(FieldMatchSeparator(Vec::unescape_bytes(string).into())) + } + + /// Return the raw bytes of this separator. + /// + /// Note that this may return an empty `Vec`. + pub(crate) fn into_bytes(self) -> Vec { + self.0.into() + } +} + +/// The type of logging to do. `Debug` emits some details while `Trace` emits +/// much more. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum LoggingMode { + Debug, + Trace, +} + +/// Indicates when to use memory maps. +/// +/// The default is `Auto`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum MmapMode { + /// This instructs ripgrep to use heuristics for selecting when to and not + /// to use memory maps for searching. + Auto, + /// This instructs ripgrep to always try memory maps when possible. (Memory + /// maps are not possible to use in all circumstances, for example, for + /// virtual files.) + AlwaysTryMmap, + /// Never use memory maps under any circumstances. This includes even + /// when multi-line search is enabled where ripgrep will read the entire + /// contents of a file on to the heap before searching it. + Never, +} + +impl Default for MmapMode { + fn default() -> MmapMode { + MmapMode::Auto + } +} + +/// Represents a source of patterns that ripgrep should search for. +/// +/// The reason to unify these is so that we can retain the order of `-f/--flag` +/// and `-e/--regexp` flags relative to one another. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum PatternSource { + /// Comes from the `-e/--regexp` flag. + Regexp(String), + /// Comes from the `-f/--file` flag. + File(PathBuf), +} + +/// The sort criteria, if present. +#[derive(Debug, Eq, PartialEq)] +pub(crate) struct SortMode { + /// Whether to reverse the sort criteria (i.e., descending order). + pub(crate) reverse: bool, + /// The actual sorting criteria. + pub(crate) kind: SortModeKind, +} + +/// The criteria to use for sorting. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum SortModeKind { + /// Sort by path. + Path, + /// Sort by last modified time. + LastModified, + /// Sort by last accessed time. + LastAccessed, + /// Sort by creation time. + Created, +} + +impl SortMode { + /// Checks whether the selected sort mode is supported. If it isn't, an + /// error (hopefully explaining why) is returned. + pub(crate) fn supported(&self) -> anyhow::Result<()> { + match self.kind { + SortModeKind::Path => Ok(()), + SortModeKind::LastModified => { + let md = std::env::current_exe() + .and_then(|p| p.metadata()) + .and_then(|md| md.modified()); + let Err(err) = md else { return Ok(()) }; + anyhow::bail!( + "sorting by last modified isn't supported: {err}" + ); + } + SortModeKind::LastAccessed => { + let md = std::env::current_exe() + .and_then(|p| p.metadata()) + .and_then(|md| md.accessed()); + let Err(err) = md else { return Ok(()) }; + anyhow::bail!( + "sorting by last accessed isn't supported: {err}" + ); + } + SortModeKind::Created => { + let md = std::env::current_exe() + .and_then(|p| p.metadata()) + .and_then(|md| md.created()); + let Err(err) = md else { return Ok(()) }; + anyhow::bail!( + "sorting by creation time isn't supported: {err}" + ); + } + } + } +} + +/// A single instance of either a change or a selection of one ripgrep's +/// file types. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum TypeChange { + /// Clear the given type from ripgrep. + Clear { name: String }, + /// Add the given type definition (name and glob) to ripgrep. + Add { def: String }, + /// Select the given type for filtering. + Select { name: String }, + /// Select the given type for filtering but negate it. + Negate { name: String }, +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/mod.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/mod.rs new file mode 100644 index 000000000..54aec5725 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/mod.rs @@ -0,0 +1,302 @@ +/*! +Defines ripgrep's command line interface. + +This modules deals with everything involving ripgrep's flags and positional +arguments. This includes generating shell completions, `--help` output and even +ripgrep's man page. It's also responsible for parsing and validating every +flag (including reading ripgrep's config file), and manages the contact points +between these flags and ripgrep's cast of supporting libraries. For example, +once [`HiArgs`] has been created, it knows how to create a multi threaded +recursive directory traverser. +*/ +use std::{ + ffi::OsString, + fmt::Debug, + panic::{RefUnwindSafe, UnwindSafe}, +}; + +pub(crate) use crate::flags::{ + complete::{ + bash::generate as generate_complete_bash, + fish::generate as generate_complete_fish, + powershell::generate as generate_complete_powershell, + zsh::generate as generate_complete_zsh, + }, + doc::{ + help::{ + generate_long as generate_help_long, + generate_short as generate_help_short, + }, + man::generate as generate_man_page, + version::{ + generate_long as generate_version_long, + generate_pcre2 as generate_version_pcre2, + generate_short as generate_version_short, + }, + }, + hiargs::HiArgs, + lowargs::{GenerateMode, Mode, SearchMode, SpecialMode}, + parse::{parse, ParseResult}, +}; + +mod complete; +mod config; +mod defs; +mod doc; +mod hiargs; +mod lowargs; +mod parse; + +/// A trait that encapsulates the definition of an optional flag for ripgrep. +/// +/// This trait is meant to be used via dynamic dispatch. Namely, the `defs` +/// module provides a single global slice of `&dyn Flag` values correspondings +/// to all of the flags in ripgrep. +/// +/// ripgrep's required positional arguments are handled by the parser and by +/// the conversion from low-level arguments to high level arguments. Namely, +/// all of ripgrep's positional arguments are treated as file paths, except +/// in certain circumstances where the first argument is treated as a regex +/// pattern. +/// +/// Note that each implementation of this trait requires a long flag name, +/// but can also optionally have a short version and even a negation flag. +/// For example, the `-E/--encoding` flag accepts a value, but it also has a +/// `--no-encoding` negation flag for reverting back to "automatic" encoding +/// detection. All three of `-E`, `--encoding` and `--no-encoding` are provided +/// by a single implementation of this trait. +/// +/// ripgrep only supports flags that are switches or flags that accept a single +/// value. Flags that accept multiple values are an unsupported abberation. +trait Flag: Debug + Send + Sync + UnwindSafe + RefUnwindSafe + 'static { + /// Returns true if this flag is a switch. When a flag is a switch, the + /// CLI parser will not look for a value after the flag is seen. + fn is_switch(&self) -> bool; + + /// A short single byte name for this flag. This returns `None` by default, + /// which signifies that the flag has no short name. + /// + /// The byte returned must be an ASCII codepoint that is a `.` or is + /// alpha-numeric. + fn name_short(&self) -> Option { + None + } + + /// Returns the long name of this flag. All flags must have a "long" name. + /// + /// The long name must be at least 2 bytes, and all of its bytes must be + /// ASCII codepoints that are either `-` or alpha-numeric. + fn name_long(&self) -> &'static str; + + /// Returns a list of aliases for this flag. + /// + /// The aliases must follow the same rules as `Flag::name_long`. + /// + /// By default, an empty slice is returned. + fn aliases(&self) -> &'static [&'static str] { + &[] + } + + /// Returns a negated name for this flag. The negation of a flag is + /// intended to have the opposite meaning of a flag or to otherwise turn + /// something "off" or revert it to its default behavior. + /// + /// Negated flags are not listed in their own section in the `-h/--help` + /// output or man page. Instead, they are automatically mentioned at the + /// end of the documentation section of the flag they negated. + /// + /// The aliases must follow the same rules as `Flag::name_long`. + /// + /// By default, a flag has no negation and this returns `None`. + fn name_negated(&self) -> Option<&'static str> { + None + } + + /// Returns the variable name describing the type of value this flag + /// accepts. This should always be set for non-switch flags and never set + /// for switch flags. + /// + /// For example, the `--max-count` flag has its variable name set to `NUM`. + /// + /// The convention is to capitalize variable names. + /// + /// By default this returns `None`. + fn doc_variable(&self) -> Option<&'static str> { + None + } + + /// Returns the category of this flag. + /// + /// Every flag must have a single category. Categories are used to organize + /// flags in the generated documentation. + fn doc_category(&self) -> Category; + + /// A (very) short documentation string describing what this flag does. + /// + /// This may sacrifice "proper English" in order to be as terse as + /// possible. Generally, we try to ensure that `rg -h` doesn't have any + /// lines that exceed 79 columns. + fn doc_short(&self) -> &'static str; + + /// A (possibly very) longer documentation string describing in full + /// detail what this flag does. This should be in mandoc/mdoc format. + fn doc_long(&self) -> &'static str; + + /// If this is a non-switch flag that accepts a small set of specific + /// values, then this should list them. + /// + /// This returns an empty slice by default. + fn doc_choices(&self) -> &'static [&'static str] { + &[] + } + + fn completion_type(&self) -> CompletionType { + CompletionType::Other + } + + /// Given the parsed value (which might just be a switch), this should + /// update the state in `args` based on the value given for this flag. + /// + /// This may update state for other flags as appropriate. + /// + /// The `-V/--version` and `-h/--help` flags are treated specially in the + /// parser and should do nothing here. + /// + /// By convention, implementations should generally not try to "do" + /// anything other than validate the value given. For example, the + /// implementation for `--hostname-bin` should not try to resolve the + /// hostname to use by running the binary provided. That should be saved + /// for a later step. This convention is used to ensure that getting the + /// low-level arguments is as reliable and quick as possible. It also + /// ensures that "doing something" occurs a minimal number of times. For + /// example, by avoiding trying to find the hostname here, we can do it + /// once later no matter how many times `--hostname-bin` is provided. + /// + /// Implementations should not include the flag name in the error message + /// returned. The flag name is included automatically by the parser. + fn update( + &self, + value: FlagValue, + args: &mut crate::flags::lowargs::LowArgs, + ) -> anyhow::Result<()>; +} + +/// The category that a flag belongs to. +/// +/// Categories are used to organize flags into "logical" groups in the +/// generated documentation. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] +enum Category { + /// Flags related to how ripgrep reads its input. Its "input" generally + /// consists of the patterns it is trying to match and the haystacks it is + /// trying to search. + Input, + /// Flags related to the operation of the search itself. For example, + /// whether case insensitive matching is enabled. + Search, + /// Flags related to how ripgrep filters haystacks. For example, whether + /// to respect gitignore files or not. + Filter, + /// Flags related to how ripgrep shows its search results. For example, + /// whether to show line numbers or not. + Output, + /// Flags related to changing ripgrep's output at a more fundamental level. + /// For example, flags like `--count` suppress printing of individual + /// lines, and instead just print the total count of matches for each file + /// searched. + OutputModes, + /// Flags related to logging behavior such as emitting non-fatal error + /// messages or printing search statistics. + Logging, + /// Other behaviors not related to ripgrep's core functionality. For + /// example, printing the file type globbing rules, or printing the list + /// of files ripgrep would search without actually searching them. + OtherBehaviors, +} + +impl Category { + /// Returns a string representation of this category. + /// + /// This string is the name of the variable used in various templates for + /// generated documentation. This name can be used for interpolation. + fn as_str(&self) -> &'static str { + match *self { + Category::Input => "input", + Category::Search => "search", + Category::Filter => "filter", + Category::Output => "output", + Category::OutputModes => "output-modes", + Category::Logging => "logging", + Category::OtherBehaviors => "other-behaviors", + } + } +} + +/// The kind of argument a flag accepts, to be used for shell completions. +#[derive(Clone, Copy, Debug)] +enum CompletionType { + /// No special category. is_switch() and doc_choices() may apply. + Other, + /// A path to a file. + Filename, + /// A command in $PATH. + Executable, + /// The name of a file type, as used by e.g. --type. + Filetype, + /// The name of an encoding_rs encoding, as used by --encoding. + Encoding, +} + +/// Represents a value parsed from the command line. +/// +/// This doesn't include the corresponding flag, but values come in one of +/// two forms: a switch (on or off) or an arbitrary value. +/// +/// Note that the CLI doesn't directly support negated switches. For example, +/// you can'd do anything like `-n=false` or any of that nonsense. Instead, +/// the CLI parser knows about which flag names are negations and which aren't +/// (courtesy of the `Flag` trait). If a flag given is known as a negation, +/// then a `FlagValue::Switch(false)` value is passed into `Flag::update`. +#[derive(Debug)] +enum FlagValue { + /// A flag that is either on or off. + Switch(bool), + /// A flag that comes with an arbitrary user value. + Value(OsString), +} + +impl FlagValue { + /// Return the yes or no value of this switch. + /// + /// If this flag value is not a switch, then this panics. + /// + /// This is useful when writing the implementation of `Flag::update`. + /// namely, callers usually know whether a switch or a value is expected. + /// If a flag is something different, then it indicates a bug, and thus a + /// panic is acceptable. + fn unwrap_switch(self) -> bool { + match self { + FlagValue::Switch(yes) => yes, + FlagValue::Value(_) => { + unreachable!("got flag value but expected switch") + } + } + } + + /// Return the user provided value of this flag. + /// + /// If this flag is a switch, then this panics. + /// + /// This is useful when writing the implementation of `Flag::update`. + /// namely, callers usually know whether a switch or a value is expected. + /// If a flag is something different, then it indicates a bug, and thus a + /// panic is acceptable. + fn unwrap_value(self) -> OsString { + match self { + FlagValue::Switch(_) => { + unreachable!("got switch but expected flag value") + } + FlagValue::Value(v) => v, + } + } +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/parse.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/parse.rs new file mode 100644 index 000000000..37a74b6f1 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/flags/parse.rs @@ -0,0 +1,476 @@ +/*! +Parses command line arguments into a structured and typed representation. +*/ + +use std::{borrow::Cow, collections::BTreeSet, ffi::OsString}; + +use anyhow::Context; + +use crate::flags::{ + defs::FLAGS, + hiargs::HiArgs, + lowargs::{LoggingMode, LowArgs, SpecialMode}, + Flag, FlagValue, +}; + +/// The result of parsing CLI arguments. +/// +/// This is basically a `anyhow::Result`, but with one extra variant that is +/// inhabited whenever ripgrep should execute a "special" mode. That is, when a +/// user provides the `-h/--help` or `-V/--version` flags. +/// +/// This special variant exists to allow CLI parsing to short circuit as +/// quickly as is reasonable. For example, it lets CLI parsing avoid reading +/// ripgrep's configuration and converting low level arguments into a higher +/// level representation. +#[derive(Debug)] +pub(crate) enum ParseResult { + Special(SpecialMode), + Ok(T), + Err(anyhow::Error), +} + +impl ParseResult { + /// If this result is `Ok`, then apply `then` to it. Otherwise, return this + /// result unchanged. + fn and_then( + self, + mut then: impl FnMut(T) -> ParseResult, + ) -> ParseResult { + match self { + ParseResult::Special(mode) => ParseResult::Special(mode), + ParseResult::Ok(t) => then(t), + ParseResult::Err(err) => ParseResult::Err(err), + } + } +} + +/// Parse CLI arguments and convert then to their high level representation. +pub(crate) fn parse() -> ParseResult { + parse_low().and_then(|low| match HiArgs::from_low_args(low) { + Ok(hi) => ParseResult::Ok(hi), + Err(err) => ParseResult::Err(err), + }) +} + +/// Parse CLI arguments only into their low level representation. +/// +/// This takes configuration into account. That is, it will try to read +/// `RIPGREP_CONFIG_PATH` and prepend any arguments found there to the +/// arguments passed to this process. +/// +/// This will also set one-time global state flags, such as the log level and +/// whether messages should be printed. +fn parse_low() -> ParseResult { + if let Err(err) = crate::logger::Logger::init() { + let err = anyhow::anyhow!("failed to initialize logger: {err}"); + return ParseResult::Err(err); + } + + let parser = Parser::new(); + let mut low = LowArgs::default(); + if let Err(err) = parser.parse(std::env::args_os().skip(1), &mut low) { + return ParseResult::Err(err); + } + // Even though we haven't parsed the config file yet (assuming it exists), + // we can still use the arguments given on the CLI to setup ripgrep's + // logging preferences. Even if the config file changes them in some way, + // it's really the best we can do. This way, for example, folks can pass + // `--trace` and see any messages logged during config file parsing. + set_log_levels(&low); + // Before we try to take configuration into account, we can bail early + // if a special mode was enabled. This is basically only for version and + // help output which shouldn't be impacted by extra configuration. + if let Some(special) = low.special.take() { + return ParseResult::Special(special); + } + // If the end user says no config, then respect it. + if low.no_config { + log::debug!("not reading config files because --no-config is present"); + return ParseResult::Ok(low); + } + // Look for arguments from a config file. If we got nothing (whether the + // file is empty or RIPGREP_CONFIG_PATH wasn't set), then we don't need + // to re-parse. + let config_args = crate::flags::config::args(); + if config_args.is_empty() { + log::debug!("no extra arguments found from configuration file"); + return ParseResult::Ok(low); + } + // The final arguments are just the arguments from the CLI appending to + // the end of the config arguments. + let mut final_args = config_args; + final_args.extend(std::env::args_os().skip(1)); + + // Now do the CLI parsing dance again. + let mut low = LowArgs::default(); + if let Err(err) = parser.parse(final_args.into_iter(), &mut low) { + return ParseResult::Err(err); + } + // Reset the message and logging levels, since they could have changed. + set_log_levels(&low); + ParseResult::Ok(low) +} + +/// Sets global state flags that control logging based on low-level arguments. +fn set_log_levels(low: &LowArgs) { + crate::messages::set_messages(!low.no_messages); + crate::messages::set_ignore_messages(!low.no_ignore_messages); + match low.logging { + Some(LoggingMode::Trace) => { + log::set_max_level(log::LevelFilter::Trace) + } + Some(LoggingMode::Debug) => { + log::set_max_level(log::LevelFilter::Debug) + } + None => log::set_max_level(log::LevelFilter::Warn), + } +} + +/// Parse the sequence of CLI arguments given a low level typed set of +/// arguments. +/// +/// This is exposed for testing that the correct low-level arguments are parsed +/// from a CLI. It just runs the parser once over the CLI arguments. It doesn't +/// setup logging or read from a config file. +/// +/// This assumes the iterator given does *not* begin with the binary name. +#[cfg(test)] +pub(crate) fn parse_low_raw( + rawargs: impl IntoIterator>, +) -> anyhow::Result { + let mut args = LowArgs::default(); + Parser::new().parse(rawargs, &mut args)?; + Ok(args) +} + +/// Return the metadata for the flag of the given name. +pub(super) fn lookup(name: &str) -> Option<&'static dyn Flag> { + // N.B. Creating a new parser might look expensive, but it only builds + // the lookup trie exactly once. That is, we get a `&'static Parser` from + // `Parser::new()`. + match Parser::new().find_long(name) { + FlagLookup::Match(&FlagInfo { flag, .. }) => Some(flag), + _ => None, + } +} + +/// A parser for turning a sequence of command line arguments into a more +/// strictly typed set of arguments. +#[derive(Debug)] +struct Parser { + /// A single map that contains all possible flag names. This includes + /// short and long names, aliases and negations. This maps those names to + /// indices into `info`. + map: FlagMap, + /// A map from IDs returned by the `map` to the corresponding flag + /// information. + info: Vec, +} + +impl Parser { + /// Create a new parser. + /// + /// This always creates the same parser and only does it once. Callers may + /// call this repeatedly, and the parser will only be built once. + fn new() -> &'static Parser { + use std::sync::OnceLock; + + // Since a parser's state is immutable and completely determined by + // FLAGS, and since FLAGS is a constant, we can initialize it exactly + // once. + static P: OnceLock = OnceLock::new(); + P.get_or_init(|| { + let mut infos = vec![]; + for &flag in FLAGS.iter() { + infos.push(FlagInfo { + flag, + name: Ok(flag.name_long()), + kind: FlagInfoKind::Standard, + }); + for alias in flag.aliases() { + infos.push(FlagInfo { + flag, + name: Ok(alias), + kind: FlagInfoKind::Alias, + }); + } + if let Some(byte) = flag.name_short() { + infos.push(FlagInfo { + flag, + name: Err(byte), + kind: FlagInfoKind::Standard, + }); + } + if let Some(name) = flag.name_negated() { + infos.push(FlagInfo { + flag, + name: Ok(name), + kind: FlagInfoKind::Negated, + }); + } + } + let map = FlagMap::new(&infos); + Parser { map, info: infos } + }) + } + + /// Parse the given CLI arguments into a low level representation. + /// + /// The iterator given should *not* start with the binary name. + fn parse(&self, rawargs: I, args: &mut LowArgs) -> anyhow::Result<()> + where + I: IntoIterator, + O: Into, + { + let mut p = lexopt::Parser::from_args(rawargs); + while let Some(arg) = p.next().context("invalid CLI arguments")? { + let lookup = match arg { + lexopt::Arg::Value(value) => { + args.positional.push(value); + continue; + } + lexopt::Arg::Short(ch) if ch == 'h' => { + // Special case -h/--help since behavior is different + // based on whether short or long flag is given. + args.special = Some(SpecialMode::HelpShort); + continue; + } + lexopt::Arg::Short(ch) if ch == 'V' => { + // Special case -V/--version since behavior is different + // based on whether short or long flag is given. + args.special = Some(SpecialMode::VersionShort); + continue; + } + lexopt::Arg::Short(ch) => self.find_short(ch), + lexopt::Arg::Long(name) if name == "help" => { + // Special case -h/--help since behavior is different + // based on whether short or long flag is given. + args.special = Some(SpecialMode::HelpLong); + continue; + } + lexopt::Arg::Long(name) if name == "version" => { + // Special case -V/--version since behavior is different + // based on whether short or long flag is given. + args.special = Some(SpecialMode::VersionLong); + continue; + } + lexopt::Arg::Long(name) => self.find_long(name), + }; + let mat = match lookup { + FlagLookup::Match(mat) => mat, + FlagLookup::UnrecognizedShort(name) => { + anyhow::bail!("unrecognized flag -{name}") + } + FlagLookup::UnrecognizedLong(name) => { + let mut msg = format!("unrecognized flag --{name}"); + if let Some(suggest_msg) = suggest(&name) { + msg = format!("{msg}\n\n{suggest_msg}"); + } + anyhow::bail!("{msg}") + } + }; + let value = if matches!(mat.kind, FlagInfoKind::Negated) { + // Negated flags are always switches, even if the non-negated + // flag is not. For example, --context-separator accepts a + // value, but --no-context-separator does not. + FlagValue::Switch(false) + } else if mat.flag.is_switch() { + FlagValue::Switch(true) + } else { + FlagValue::Value(p.value().with_context(|| { + format!("missing value for flag {mat}") + })?) + }; + mat.flag + .update(value, args) + .with_context(|| format!("error parsing flag {mat}"))?; + } + Ok(()) + } + + /// Look for a flag by its short name. + fn find_short(&self, ch: char) -> FlagLookup<'_> { + if !ch.is_ascii() { + return FlagLookup::UnrecognizedShort(ch); + } + let byte = u8::try_from(ch).unwrap(); + let Some(index) = self.map.find(&[byte]) else { + return FlagLookup::UnrecognizedShort(ch); + }; + FlagLookup::Match(&self.info[index]) + } + + /// Look for a flag by its long name. + /// + /// This also works for aliases and negated names. + fn find_long(&self, name: &str) -> FlagLookup<'_> { + let Some(index) = self.map.find(name.as_bytes()) else { + return FlagLookup::UnrecognizedLong(name.to_string()); + }; + FlagLookup::Match(&self.info[index]) + } +} + +/// The result of looking up a flag name. +#[derive(Debug)] +enum FlagLookup<'a> { + /// Lookup found a match and the metadata for the flag is attached. + Match(&'a FlagInfo), + /// The given short name is unrecognized. + UnrecognizedShort(char), + /// The given long name is unrecognized. + UnrecognizedLong(String), +} + +/// The info about a flag associated with a flag's ID in the flag map. +#[derive(Debug)] +struct FlagInfo { + /// The flag object and its associated metadata. + flag: &'static dyn Flag, + /// The actual name that is stored in the Aho-Corasick automaton. When this + /// is a byte, it corresponds to a short single character ASCII flag. The + /// actual pattern that's in the Aho-Corasick automaton is just the single + /// byte. + name: Result<&'static str, u8>, + /// The type of flag that is stored for the corresponding Aho-Corasick + /// pattern. + kind: FlagInfoKind, +} + +/// The kind of flag that is being matched. +#[derive(Debug)] +enum FlagInfoKind { + /// A standard flag, e.g., --passthru. + Standard, + /// A negation of a standard flag, e.g., --no-multiline. + Negated, + /// An alias for a standard flag, e.g., --passthrough. + Alias, +} + +impl std::fmt::Display for FlagInfo { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self.name { + Ok(long) => write!(f, "--{long}"), + Err(short) => write!(f, "-{short}", short = char::from(short)), + } + } +} + +/// A map from flag names (short, long, negated and aliases) to their ID. +/// +/// Once an ID is known, it can be used to look up a flag's metadata in the +/// parser's internal state. +#[derive(Debug)] +struct FlagMap { + map: std::collections::HashMap, usize>, +} + +impl FlagMap { + /// Create a new map of flags for the given flag information. + /// + /// The index of each flag info corresponds to its ID. + fn new(infos: &[FlagInfo]) -> FlagMap { + let mut map = std::collections::HashMap::with_capacity(infos.len()); + for (i, info) in infos.iter().enumerate() { + match info.name { + Ok(name) => { + assert_eq!(None, map.insert(name.as_bytes().to_vec(), i)); + } + Err(byte) => { + assert_eq!(None, map.insert(vec![byte], i)); + } + } + } + FlagMap { map } + } + + /// Look for a match of `name` in the given Aho-Corasick automaton. + /// + /// This only returns a match if the one found has a length equivalent to + /// the length of the name given. + fn find(&self, name: &[u8]) -> Option { + self.map.get(name).copied() + } +} + +/// Possibly return a message suggesting flags similar in the name to the one +/// given. +/// +/// The one given should be a flag given by the user (without the leading +/// dashes) that was unrecognized. This attempts to find existing flags that +/// are similar to the one given. +fn suggest(unrecognized: &str) -> Option { + let similars = find_similar_names(unrecognized); + if similars.is_empty() { + return None; + } + let list = similars + .into_iter() + .map(|name| format!("--{name}")) + .collect::>() + .join(", "); + Some(format!("similar flags that are available: {list}")) +} + +/// Return a sequence of names similar to the unrecognized name given. +fn find_similar_names(unrecognized: &str) -> Vec<&'static str> { + // The jaccard similarity threshold at which we consider two flag names + // similar enough that it's worth suggesting it to the end user. + // + // This value was determined by some ad hoc experimentation. It might need + // further tweaking. + const THRESHOLD: f64 = 0.4; + + let mut similar = vec![]; + let bow_given = ngrams(unrecognized); + for &flag in FLAGS.iter() { + let name = flag.name_long(); + let bow = ngrams(name); + if jaccard_index(&bow_given, &bow) >= THRESHOLD { + similar.push(name); + } + if let Some(name) = flag.name_negated() { + let bow = ngrams(name); + if jaccard_index(&bow_given, &bow) >= THRESHOLD { + similar.push(name); + } + } + for name in flag.aliases() { + let bow = ngrams(name); + if jaccard_index(&bow_given, &bow) >= THRESHOLD { + similar.push(name); + } + } + } + similar +} + +/// A "bag of words" is a set of ngrams. +type BagOfWords<'a> = BTreeSet>; + +/// Returns the jaccard index (a measure of similarity) between sets of ngrams. +fn jaccard_index(ngrams1: &BagOfWords<'_>, ngrams2: &BagOfWords<'_>) -> f64 { + let union = u32::try_from(ngrams1.union(ngrams2).count()) + .expect("fewer than u32::MAX flags"); + let intersection = u32::try_from(ngrams1.intersection(ngrams2).count()) + .expect("fewer than u32::MAX flags"); + f64::from(intersection) / f64::from(union) +} + +/// Returns all 3-grams in the slice given. +/// +/// If the slice doesn't contain a 3-gram, then one is artificially created by +/// padding it out with a character that will never appear in a flag name. +fn ngrams(flag_name: &str) -> BagOfWords<'_> { + // We only allow ASCII flag names, so we can just use bytes. + let slice = flag_name.as_bytes(); + let seq: Vec> = match slice.len() { + 0 => vec![Cow::Owned(b"!!!".to_vec())], + 1 => vec![Cow::Owned(vec![slice[0], b'!', b'!'])], + 2 => vec![Cow::Owned(vec![slice[0], slice[1], b'!'])], + _ => slice.windows(3).map(Cow::Borrowed).collect(), + }; + BTreeSet::from_iter(seq) +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/haystack.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/haystack.rs new file mode 100644 index 000000000..f88b6dedd --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/haystack.rs @@ -0,0 +1,160 @@ +/*! +Defines a builder for haystacks. + +A "haystack" represents something we want to search. It encapsulates the logic +for whether a haystack ought to be searched or not, separate from the standard +ignore rules and other filtering logic. + +Effectively, a haystack wraps a directory entry and adds some light application +level logic around it. +*/ + +use std::path::Path; + +/// A builder for constructing things to search over. +#[derive(Clone, Debug)] +pub(crate) struct HaystackBuilder { + strip_dot_prefix: bool, +} + +impl HaystackBuilder { + /// Return a new haystack builder with a default configuration. + pub(crate) fn new() -> HaystackBuilder { + HaystackBuilder { strip_dot_prefix: false } + } + + /// Create a new haystack from a possibly missing directory entry. + /// + /// If the directory entry isn't present, then the corresponding error is + /// logged if messages have been configured. Otherwise, if the directory + /// entry is deemed searchable, then it is returned as a haystack. + pub(crate) fn build_from_result( + &self, + result: Result, + ) -> Option { + match result { + Ok(dent) => self.build(dent), + Err(err) => { + err_message!("{err}"); + None + } + } + } + + /// Create a new haystack using this builder's configuration. + /// + /// If a directory entry could not be created or should otherwise not be + /// searched, then this returns `None` after emitting any relevant log + /// messages. + fn build(&self, dent: ignore::DirEntry) -> Option { + let hay = Haystack { dent, strip_dot_prefix: self.strip_dot_prefix }; + if let Some(err) = hay.dent.error() { + ignore_message!("{err}"); + } + // If this entry was explicitly provided by an end user, then we always + // want to search it. + if hay.is_explicit() { + return Some(hay); + } + // At this point, we only want to search something if it's explicitly a + // file. This omits symlinks. (If ripgrep was configured to follow + // symlinks, then they have already been followed by the directory + // traversal.) + if hay.is_file() { + return Some(hay); + } + // We got nothing. Emit a debug message, but only if this isn't a + // directory. Otherwise, emitting messages for directories is just + // noisy. + if !hay.is_dir() { + log::debug!( + "ignoring {}: failed to pass haystack filter: \ + file type: {:?}, metadata: {:?}", + hay.dent.path().display(), + hay.dent.file_type(), + hay.dent.metadata() + ); + } + None + } + + /// When enabled, if the haystack's file path starts with `./` then it is + /// stripped. + /// + /// This is useful when implicitly searching the current working directory. + pub(crate) fn strip_dot_prefix( + &mut self, + yes: bool, + ) -> &mut HaystackBuilder { + self.strip_dot_prefix = yes; + self + } +} + +/// A haystack is a thing we want to search. +/// +/// Generally, a haystack is either a file or stdin. +#[derive(Clone, Debug)] +pub(crate) struct Haystack { + dent: ignore::DirEntry, + strip_dot_prefix: bool, +} + +impl Haystack { + /// Return the file path corresponding to this haystack. + /// + /// If this haystack corresponds to stdin, then a special `` path + /// is returned instead. + pub(crate) fn path(&self) -> &Path { + if self.strip_dot_prefix && self.dent.path().starts_with("./") { + self.dent.path().strip_prefix("./").unwrap() + } else { + self.dent.path() + } + } + + /// Returns true if and only if this entry corresponds to stdin. + pub(crate) fn is_stdin(&self) -> bool { + self.dent.is_stdin() + } + + /// Returns true if and only if this entry corresponds to a haystack to + /// search that was explicitly supplied by an end user. + /// + /// Generally, this corresponds to either stdin or an explicit file path + /// argument. e.g., in `rg foo some-file ./some-dir/`, `some-file` is + /// an explicit haystack, but, e.g., `./some-dir/some-other-file` is not. + /// + /// However, note that ripgrep does not see through shell globbing. e.g., + /// in `rg foo ./some-dir/*`, `./some-dir/some-other-file` will be treated + /// as an explicit haystack. + pub(crate) fn is_explicit(&self) -> bool { + // stdin is obvious. When an entry has a depth of 0, that means it + // was explicitly provided to our directory iterator, which means it + // was in turn explicitly provided by the end user. The !is_dir check + // means that we want to search files even if their symlinks, again, + // because they were explicitly provided. (And we never want to try + // to search a directory.) + self.is_stdin() || (self.dent.depth() == 0 && !self.is_dir()) + } + + /// Returns true if and only if this haystack points to a directory after + /// following symbolic links. + fn is_dir(&self) -> bool { + let ft = match self.dent.file_type() { + None => return false, + Some(ft) => ft, + }; + if ft.is_dir() { + return true; + } + // If this is a symlink, then we want to follow it to determine + // whether it's a directory or not. + self.dent.path_is_symlink() && self.dent.path().is_dir() + } + + /// Returns true if and only if this haystack points to a file. + fn is_file(&self) -> bool { + self.dent.file_type().map_or(false, |ft| ft.is_file()) + } +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/logger.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/logger.rs new file mode 100644 index 000000000..ce4e0e9d2 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/logger.rs @@ -0,0 +1,72 @@ +/*! +Defines a super simple logger that works with the `log` crate. + +We don't do anything fancy. We just need basic log levels and the ability to +print to stderr. We therefore avoid bringing in extra dependencies just for +this functionality. +*/ + +use log::Log; + +/// The simplest possible logger that logs to stderr. +/// +/// This logger does no filtering. Instead, it relies on the `log` crates +/// filtering via its global max_level setting. +#[derive(Debug)] +pub(crate) struct Logger(()); + +/// A singleton used as the target for an implementation of the `Log` trait. +const LOGGER: &'static Logger = &Logger(()); + +impl Logger { + /// Create a new logger that logs to stderr and initialize it as the + /// global logger. If there was a problem setting the logger, then an + /// error is returned. + pub(crate) fn init() -> Result<(), log::SetLoggerError> { + log::set_logger(LOGGER) + } +} + +impl Log for Logger { + fn enabled(&self, _: &log::Metadata<'_>) -> bool { + // We set the log level via log::set_max_level, so we don't need to + // implement filtering here. + true + } + + fn log(&self, record: &log::Record<'_>) { + match (record.file(), record.line()) { + (Some(file), Some(line)) => { + eprintln_locked!( + "{}|{}|{}:{}: {}", + record.level(), + record.target(), + file, + line, + record.args() + ); + } + (Some(file), None) => { + eprintln_locked!( + "{}|{}|{}: {}", + record.level(), + record.target(), + file, + record.args() + ); + } + _ => { + eprintln_locked!( + "{}|{}: {}", + record.level(), + record.target(), + record.args() + ); + } + } + } + + fn flush(&self) { + // We use eprintln_locked! which is flushed on every call. + } +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/main.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/main.rs new file mode 100644 index 000000000..64f35cebb --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/main.rs @@ -0,0 +1,483 @@ +/*! +The main entry point into ripgrep. +*/ + +use std::{io::Write, process::ExitCode}; + +use ignore::WalkState; + +use crate::flags::{HiArgs, SearchMode}; + +#[macro_use] +mod messages; + +mod flags; +mod haystack; +mod logger; +mod search; + +// Since Rust no longer uses jemalloc by default, ripgrep will, by default, +// use the system allocator. On Linux, this would normally be glibc's +// allocator, which is pretty good. In particular, ripgrep does not have a +// particularly allocation heavy workload, so there really isn't much +// difference (for ripgrep's purposes) between glibc's allocator and jemalloc. +// +// However, when ripgrep is built with musl, this means ripgrep will use musl's +// allocator, which appears to be substantially worse. (musl's goal is not to +// have the fastest version of everything. Its goal is to be small and amenable +// to static compilation.) Even though ripgrep isn't particularly allocation +// heavy, musl's allocator appears to slow down ripgrep quite a bit. Therefore, +// when building with musl, we use jemalloc. +// +// We don't unconditionally use jemalloc because it can be nice to use the +// system's default allocator by default. Moreover, jemalloc seems to increase +// compilation times by a bit. +// +// Moreover, we only do this on 64-bit systems since jemalloc doesn't support +// i686. +#[cfg(all(target_env = "musl", target_pointer_width = "64"))] +#[global_allocator] +static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; + +/// Then, as it was, then again it will be. +fn main() -> ExitCode { + match run(flags::parse()) { + Ok(code) => code, + Err(err) => { + // Look for a broken pipe error. In this case, we generally want + // to exit "gracefully" with a success exit code. This matches + // existing Unix convention. We need to handle this explicitly + // since the Rust runtime doesn't ask for PIPE signals, and thus + // we get an I/O error instead. Traditional C Unix applications + // quit by getting a PIPE signal that they don't handle, and thus + // the unhandled signal causes the process to unceremoniously + // terminate. + for cause in err.chain() { + if let Some(ioerr) = cause.downcast_ref::() { + if ioerr.kind() == std::io::ErrorKind::BrokenPipe { + return ExitCode::from(0); + } + } + } + eprintln_locked!("{:#}", err); + ExitCode::from(2) + } + } +} + +/// The main entry point for ripgrep. +/// +/// The given parse result determines ripgrep's behavior. The parse +/// result should be the result of parsing CLI arguments in a low level +/// representation, and then followed by an attempt to convert them into a +/// higher level representation. The higher level representation has some nicer +/// abstractions, for example, instead of representing the `-g/--glob` flag +/// as a `Vec` (as in the low level representation), the globs are +/// converted into a single matcher. +fn run(result: crate::flags::ParseResult) -> anyhow::Result { + use crate::flags::{Mode, ParseResult}; + + let args = match result { + ParseResult::Err(err) => return Err(err), + ParseResult::Special(mode) => return special(mode), + ParseResult::Ok(args) => args, + }; + let matched = match args.mode() { + Mode::Search(_) if !args.matches_possible() => false, + Mode::Search(mode) if args.threads() == 1 => search(&args, mode)?, + Mode::Search(mode) => search_parallel(&args, mode)?, + Mode::Files if args.threads() == 1 => files(&args)?, + Mode::Files => files_parallel(&args)?, + Mode::Types => return types(&args), + Mode::Generate(mode) => return generate(mode), + }; + Ok(if matched && (args.quiet() || !messages::errored()) { + ExitCode::from(0) + } else if messages::errored() { + ExitCode::from(2) + } else { + ExitCode::from(1) + }) +} + +/// The top-level entry point for single-threaded search. +/// +/// This recursively steps through the file list (current directory by default) +/// and searches each file sequentially. +fn search(args: &HiArgs, mode: SearchMode) -> anyhow::Result { + let started_at = std::time::Instant::now(); + let haystack_builder = args.haystack_builder(); + let unsorted = args + .walk_builder()? + .build() + .filter_map(|result| haystack_builder.build_from_result(result)); + let haystacks = args.sort(unsorted); + + let mut matched = false; + let mut searched = false; + let mut stats = args.stats(); + let mut searcher = args.search_worker( + args.matcher()?, + args.searcher()?, + args.printer(mode, args.stdout()), + )?; + for haystack in haystacks { + searched = true; + let search_result = match searcher.search(&haystack) { + Ok(search_result) => search_result, + // A broken pipe means graceful termination. + Err(err) if err.kind() == std::io::ErrorKind::BrokenPipe => break, + Err(err) => { + err_message!("{}: {}", haystack.path().display(), err); + continue; + } + }; + matched = matched || search_result.has_match(); + if let Some(ref mut stats) = stats { + *stats += search_result.stats().unwrap(); + } + if matched && args.quit_after_match() { + break; + } + } + if args.has_implicit_path() && !searched { + eprint_nothing_searched(); + } + if let Some(ref stats) = stats { + let wtr = searcher.printer().get_mut(); + let _ = print_stats(mode, stats, started_at, wtr); + } + Ok(matched) +} + +/// The top-level entry point for multi-threaded search. +/// +/// The parallelism is itself achieved by the recursive directory traversal. +/// All we need to do is feed it a worker for performing a search on each file. +/// +/// Requesting a sorted output from ripgrep (such as with `--sort path`) will +/// automatically disable parallelism and hence sorting is not handled here. +fn search_parallel(args: &HiArgs, mode: SearchMode) -> anyhow::Result { + use std::sync::atomic::{AtomicBool, Ordering}; + + let started_at = std::time::Instant::now(); + let haystack_builder = args.haystack_builder(); + let bufwtr = args.buffer_writer(); + let stats = args.stats().map(std::sync::Mutex::new); + let matched = AtomicBool::new(false); + let searched = AtomicBool::new(false); + + let mut searcher = args.search_worker( + args.matcher()?, + args.searcher()?, + args.printer(mode, bufwtr.buffer()), + )?; + args.walk_builder()?.build_parallel().run(|| { + let bufwtr = &bufwtr; + let stats = &stats; + let matched = &matched; + let searched = &searched; + let haystack_builder = &haystack_builder; + let mut searcher = searcher.clone(); + + Box::new(move |result| { + let haystack = match haystack_builder.build_from_result(result) { + Some(haystack) => haystack, + None => return WalkState::Continue, + }; + searched.store(true, Ordering::SeqCst); + searcher.printer().get_mut().clear(); + let search_result = match searcher.search(&haystack) { + Ok(search_result) => search_result, + Err(err) => { + err_message!("{}: {}", haystack.path().display(), err); + return WalkState::Continue; + } + }; + if search_result.has_match() { + matched.store(true, Ordering::SeqCst); + } + if let Some(ref locked_stats) = *stats { + let mut stats = locked_stats.lock().unwrap(); + *stats += search_result.stats().unwrap(); + } + if let Err(err) = bufwtr.print(searcher.printer().get_mut()) { + // A broken pipe means graceful termination. + if err.kind() == std::io::ErrorKind::BrokenPipe { + return WalkState::Quit; + } + // Otherwise, we continue on our merry way. + err_message!("{}: {}", haystack.path().display(), err); + } + if matched.load(Ordering::SeqCst) && args.quit_after_match() { + WalkState::Quit + } else { + WalkState::Continue + } + }) + }); + if args.has_implicit_path() && !searched.load(Ordering::SeqCst) { + eprint_nothing_searched(); + } + if let Some(ref locked_stats) = stats { + let stats = locked_stats.lock().unwrap(); + let mut wtr = searcher.printer().get_mut(); + let _ = print_stats(mode, &stats, started_at, &mut wtr); + let _ = bufwtr.print(&mut wtr); + } + Ok(matched.load(Ordering::SeqCst)) +} + +/// The top-level entry point for file listing without searching. +/// +/// This recursively steps through the file list (current directory by default) +/// and prints each path sequentially using a single thread. +fn files(args: &HiArgs) -> anyhow::Result { + let haystack_builder = args.haystack_builder(); + let unsorted = args + .walk_builder()? + .build() + .filter_map(|result| haystack_builder.build_from_result(result)); + let haystacks = args.sort(unsorted); + + let mut matched = false; + let mut path_printer = args.path_printer_builder().build(args.stdout()); + for haystack in haystacks { + matched = true; + if args.quit_after_match() { + break; + } + if let Err(err) = path_printer.write(haystack.path()) { + // A broken pipe means graceful termination. + if err.kind() == std::io::ErrorKind::BrokenPipe { + break; + } + // Otherwise, we have some other error that's preventing us from + // writing to stdout, so we should bubble it up. + return Err(err.into()); + } + } + Ok(matched) +} + +/// The top-level entry point for multi-threaded file listing without +/// searching. +/// +/// This recursively steps through the file list (current directory by default) +/// and prints each path sequentially using multiple threads. +/// +/// Requesting a sorted output from ripgrep (such as with `--sort path`) will +/// automatically disable parallelism and hence sorting is not handled here. +fn files_parallel(args: &HiArgs) -> anyhow::Result { + use std::{ + sync::{ + atomic::{AtomicBool, Ordering}, + mpsc, + }, + thread, + }; + + let haystack_builder = args.haystack_builder(); + let mut path_printer = args.path_printer_builder().build(args.stdout()); + let matched = AtomicBool::new(false); + let (tx, rx) = mpsc::channel::(); + + // We spawn a single printing thread to make sure we don't tear writes. + // We use a channel here under the presumption that it's probably faster + // than using a mutex in the worker threads below, but this has never been + // seriously litigated. + let print_thread = thread::spawn(move || -> std::io::Result<()> { + for haystack in rx.iter() { + path_printer.write(haystack.path())?; + } + Ok(()) + }); + args.walk_builder()?.build_parallel().run(|| { + let haystack_builder = &haystack_builder; + let matched = &matched; + let tx = tx.clone(); + + Box::new(move |result| { + let haystack = match haystack_builder.build_from_result(result) { + Some(haystack) => haystack, + None => return WalkState::Continue, + }; + matched.store(true, Ordering::SeqCst); + if args.quit_after_match() { + WalkState::Quit + } else { + match tx.send(haystack) { + Ok(_) => WalkState::Continue, + Err(_) => WalkState::Quit, + } + } + }) + }); + drop(tx); + if let Err(err) = print_thread.join().unwrap() { + // A broken pipe means graceful termination, so fall through. + // Otherwise, something bad happened while writing to stdout, so bubble + // it up. + if err.kind() != std::io::ErrorKind::BrokenPipe { + return Err(err.into()); + } + } + Ok(matched.load(Ordering::SeqCst)) +} + +/// The top-level entry point for `--type-list`. +fn types(args: &HiArgs) -> anyhow::Result { + let mut count = 0; + let mut stdout = args.stdout(); + for def in args.types().definitions() { + count += 1; + stdout.write_all(def.name().as_bytes())?; + stdout.write_all(b": ")?; + + let mut first = true; + for glob in def.globs() { + if !first { + stdout.write_all(b", ")?; + } + stdout.write_all(glob.as_bytes())?; + first = false; + } + stdout.write_all(b"\n")?; + } + Ok(ExitCode::from(if count == 0 { 1 } else { 0 })) +} + +/// Implements ripgrep's "generate" modes. +/// +/// These modes correspond to generating some kind of ancillary data related +/// to ripgrep. At present, this includes ripgrep's man page (in roff format) +/// and supported shell completions. +fn generate(mode: crate::flags::GenerateMode) -> anyhow::Result { + use crate::flags::GenerateMode; + + let output = match mode { + GenerateMode::Man => flags::generate_man_page(), + GenerateMode::CompleteBash => flags::generate_complete_bash(), + GenerateMode::CompleteZsh => flags::generate_complete_zsh(), + GenerateMode::CompleteFish => flags::generate_complete_fish(), + GenerateMode::CompletePowerShell => { + flags::generate_complete_powershell() + } + }; + writeln!(std::io::stdout(), "{}", output.trim_end())?; + Ok(ExitCode::from(0)) +} + +/// Implements ripgrep's "special" modes. +/// +/// A special mode is one that generally short-circuits most (not all) of +/// ripgrep's initialization logic and skips right to this routine. The +/// special modes essentially consist of printing help and version output. The +/// idea behind the short circuiting is to ensure there is as little as possible +/// (within reason) that would prevent ripgrep from emitting help output. +/// +/// For example, part of the initialization logic that is skipped (among +/// other things) is accessing the current working directory. If that fails, +/// ripgrep emits an error. We don't want to emit an error if it fails and +/// the user requested version or help information. +fn special(mode: crate::flags::SpecialMode) -> anyhow::Result { + use crate::flags::SpecialMode; + + let mut exit = ExitCode::from(0); + let output = match mode { + SpecialMode::HelpShort => flags::generate_help_short(), + SpecialMode::HelpLong => flags::generate_help_long(), + SpecialMode::VersionShort => flags::generate_version_short(), + SpecialMode::VersionLong => flags::generate_version_long(), + // --pcre2-version is a little special because it emits an error + // exit code if this build of ripgrep doesn't support PCRE2. + SpecialMode::VersionPCRE2 => { + let (output, available) = flags::generate_version_pcre2(); + if !available { + exit = ExitCode::from(1); + } + output + } + }; + writeln!(std::io::stdout(), "{}", output.trim_end())?; + Ok(exit) +} + +/// Prints a heuristic error messages when nothing is searched. +/// +/// This can happen if an applicable ignore file has one or more rules that +/// are too broad and cause ripgrep to ignore everything. +/// +/// We only show this error message when the user does *not* provide an +/// explicit path to search. This is because the message can otherwise be +/// noisy, e.g., when it is intended that there is nothing to search. +fn eprint_nothing_searched() { + err_message!( + "No files were searched, which means ripgrep probably \ + applied a filter you didn't expect.\n\ + Running with --debug will show why files are being skipped." + ); +} + +/// Prints the statistics given to the writer given. +/// +/// The search mode given determines whether the stats should be printed in +/// a plain text format or in a JSON format. +/// +/// The `started` time should be the time at which ripgrep started working. +/// +/// If an error occurs while writing, then writing stops and the error is +/// returned. Note that callers should probably ignore this errror, since +/// whether stats fail to print or not generally shouldn't cause ripgrep to +/// enter into an "error" state. And usually the only way for this to fail is +/// if writing to stdout itself fails. +fn print_stats( + mode: SearchMode, + stats: &grep::printer::Stats, + started: std::time::Instant, + mut wtr: W, +) -> std::io::Result<()> { + let elapsed = std::time::Instant::now().duration_since(started); + if matches!(mode, SearchMode::JSON) { + // We specifically match the format laid out by the JSON printer in + // the grep-printer crate. We simply "extend" it with the 'summary' + // message type. + serde_json::to_writer( + &mut wtr, + &serde_json::json!({ + "type": "summary", + "data": { + "stats": stats, + "elapsed_total": { + "secs": elapsed.as_secs(), + "nanos": elapsed.subsec_nanos(), + "human": format!("{:0.6}s", elapsed.as_secs_f64()), + }, + } + }), + )?; + write!(wtr, "\n") + } else { + write!( + wtr, + " +{matches} matches +{lines} matched lines +{searches_with_match} files contained matches +{searches} files searched +{bytes_printed} bytes printed +{bytes_searched} bytes searched +{search_time:0.6} seconds spent searching +{process_time:0.6} seconds +", + matches = stats.matches(), + lines = stats.matched_lines(), + searches_with_match = stats.searches_with_match(), + searches = stats.searches(), + bytes_printed = stats.bytes_printed(), + bytes_searched = stats.bytes_searched(), + search_time = stats.elapsed().as_secs_f64(), + process_time = elapsed.as_secs_f64(), + ) + } +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/messages.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/messages.rs new file mode 100644 index 000000000..ba8b9adc5 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/messages.rs @@ -0,0 +1,139 @@ +/*! +This module defines some macros and some light shared mutable state. + +This state is responsible for keeping track of whether we should emit certain +kinds of messages to the user (such as errors) that are distinct from the +standard "debug" or "trace" log messages. This state is specifically set at +startup time when CLI arguments are parsed and then never changed. + +The other state tracked here is whether ripgrep experienced an error +condition. Aside from errors associated with invalid CLI arguments, ripgrep +generally does not abort when an error occurs (e.g., if reading a file failed). +But when an error does occur, it will alter ripgrep's exit status. Thus, when +an error message is emitted via `err_message`, then a global flag is toggled +indicating that at least one error occurred. When ripgrep exits, this flag is +consulted to determine what the exit status ought to be. +*/ + +use std::sync::atomic::{AtomicBool, Ordering}; + +/// When false, "messages" will not be printed. +static MESSAGES: AtomicBool = AtomicBool::new(false); +/// When false, "messages" related to ignore rules will not be printed. +static IGNORE_MESSAGES: AtomicBool = AtomicBool::new(false); +/// Flipped to true when an error message is printed. +static ERRORED: AtomicBool = AtomicBool::new(false); + +/// Like eprintln, but locks stdout to prevent interleaving lines. +/// +/// This locks stdout, not stderr, even though this prints to stderr. This +/// avoids the appearance of interleaving output when stdout and stderr both +/// correspond to a tty. +#[macro_export] +macro_rules! eprintln_locked { + ($($tt:tt)*) => {{ + { + use std::io::Write; + + // This is a bit of an abstraction violation because we explicitly + // lock stdout before printing to stderr. This avoids interleaving + // lines within ripgrep because `search_parallel` uses `termcolor`, + // which accesses the same stdout lock when writing lines. + let stdout = std::io::stdout().lock(); + let mut stderr = std::io::stderr().lock(); + // We specifically ignore any errors here. One plausible error we + // can get in some cases is a broken pipe error. And when that + // occurs, we should exit gracefully. Otherwise, just abort with + // an error code because there isn't much else we can do. + // + // See: https://github.com/BurntSushi/ripgrep/issues/1966 + if let Err(err) = write!(stderr, "rg: ") { + if err.kind() == std::io::ErrorKind::BrokenPipe { + std::process::exit(0); + } else { + std::process::exit(2); + } + } + if let Err(err) = writeln!(stderr, $($tt)*) { + if err.kind() == std::io::ErrorKind::BrokenPipe { + std::process::exit(0); + } else { + std::process::exit(2); + } + } + drop(stdout); + } + }} +} + +/// Emit a non-fatal error message, unless messages were disabled. +#[macro_export] +macro_rules! message { + ($($tt:tt)*) => { + if crate::messages::messages() { + eprintln_locked!($($tt)*); + } + } +} + +/// Like message, but sets ripgrep's "errored" flag, which controls the exit +/// status. +#[macro_export] +macro_rules! err_message { + ($($tt:tt)*) => { + crate::messages::set_errored(); + message!($($tt)*); + } +} + +/// Emit a non-fatal ignore-related error message (like a parse error), unless +/// ignore-messages were disabled. +#[macro_export] +macro_rules! ignore_message { + ($($tt:tt)*) => { + if crate::messages::messages() && crate::messages::ignore_messages() { + eprintln_locked!($($tt)*); + } + } +} + +/// Returns true if and only if messages should be shown. +pub(crate) fn messages() -> bool { + MESSAGES.load(Ordering::SeqCst) +} + +/// Set whether messages should be shown or not. +/// +/// By default, they are not shown. +pub(crate) fn set_messages(yes: bool) { + MESSAGES.store(yes, Ordering::SeqCst) +} + +/// Returns true if and only if "ignore" related messages should be shown. +pub(crate) fn ignore_messages() -> bool { + IGNORE_MESSAGES.load(Ordering::SeqCst) +} + +/// Set whether "ignore" related messages should be shown or not. +/// +/// By default, they are not shown. +/// +/// Note that this is overridden if `messages` is disabled. Namely, if +/// `messages` is disabled, then "ignore" messages are never shown, regardless +/// of this setting. +pub(crate) fn set_ignore_messages(yes: bool) { + IGNORE_MESSAGES.store(yes, Ordering::SeqCst) +} + +/// Returns true if and only if ripgrep came across a non-fatal error. +pub(crate) fn errored() -> bool { + ERRORED.load(Ordering::SeqCst) +} + +/// Indicate that ripgrep has come across a non-fatal error. +/// +/// Callers should not use this directly. Instead, it is called automatically +/// via the `err_message` macro. +pub(crate) fn set_errored() { + ERRORED.store(true, Ordering::SeqCst); +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/search.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/search.rs new file mode 100644 index 000000000..672734254 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/crates/core/search.rs @@ -0,0 +1,447 @@ +/*! +Defines a very high level "search worker" abstraction. + +A search worker manages the high level interaction points between the matcher +(i.e., which regex engine is used), the searcher (i.e., how data is actually +read and matched using the regex engine) and the printer. For example, the +search worker is where things like preprocessors or decompression happens. +*/ + +use std::{io, path::Path}; + +use {grep::matcher::Matcher, termcolor::WriteColor}; + +/// The configuration for the search worker. +/// +/// Among a few other things, the configuration primarily controls the way we +/// show search results to users at a very high level. +#[derive(Clone, Debug)] +struct Config { + preprocessor: Option, + preprocessor_globs: ignore::overrides::Override, + search_zip: bool, + binary_implicit: grep::searcher::BinaryDetection, + binary_explicit: grep::searcher::BinaryDetection, +} + +impl Default for Config { + fn default() -> Config { + Config { + preprocessor: None, + preprocessor_globs: ignore::overrides::Override::empty(), + search_zip: false, + binary_implicit: grep::searcher::BinaryDetection::none(), + binary_explicit: grep::searcher::BinaryDetection::none(), + } + } +} + +/// A builder for configuring and constructing a search worker. +#[derive(Clone, Debug)] +pub(crate) struct SearchWorkerBuilder { + config: Config, + command_builder: grep::cli::CommandReaderBuilder, + decomp_builder: grep::cli::DecompressionReaderBuilder, +} + +impl Default for SearchWorkerBuilder { + fn default() -> SearchWorkerBuilder { + SearchWorkerBuilder::new() + } +} + +impl SearchWorkerBuilder { + /// Create a new builder for configuring and constructing a search worker. + pub(crate) fn new() -> SearchWorkerBuilder { + let mut cmd_builder = grep::cli::CommandReaderBuilder::new(); + cmd_builder.async_stderr(true); + + let mut decomp_builder = grep::cli::DecompressionReaderBuilder::new(); + decomp_builder.async_stderr(true); + + SearchWorkerBuilder { + config: Config::default(), + command_builder: cmd_builder, + decomp_builder, + } + } + + /// Create a new search worker using the given searcher, matcher and + /// printer. + pub(crate) fn build( + &self, + matcher: PatternMatcher, + searcher: grep::searcher::Searcher, + printer: Printer, + ) -> SearchWorker { + let config = self.config.clone(); + let command_builder = self.command_builder.clone(); + let decomp_builder = self.decomp_builder.clone(); + SearchWorker { + config, + command_builder, + decomp_builder, + matcher, + searcher, + printer, + } + } + + /// Set the path to a preprocessor command. + /// + /// When this is set, instead of searching files directly, the given + /// command will be run with the file path as the first argument, and the + /// output of that command will be searched instead. + pub(crate) fn preprocessor( + &mut self, + cmd: Option, + ) -> anyhow::Result<&mut SearchWorkerBuilder> { + if let Some(ref prog) = cmd { + let bin = grep::cli::resolve_binary(prog)?; + self.config.preprocessor = Some(bin); + } else { + self.config.preprocessor = None; + } + Ok(self) + } + + /// Set the globs for determining which files should be run through the + /// preprocessor. By default, with no globs and a preprocessor specified, + /// every file is run through the preprocessor. + pub(crate) fn preprocessor_globs( + &mut self, + globs: ignore::overrides::Override, + ) -> &mut SearchWorkerBuilder { + self.config.preprocessor_globs = globs; + self + } + + /// Enable the decompression and searching of common compressed files. + /// + /// When enabled, if a particular file path is recognized as a compressed + /// file, then it is decompressed before searching. + /// + /// Note that if a preprocessor command is set, then it overrides this + /// setting. + pub(crate) fn search_zip( + &mut self, + yes: bool, + ) -> &mut SearchWorkerBuilder { + self.config.search_zip = yes; + self + } + + /// Set the binary detection that should be used when searching files + /// found via a recursive directory search. + /// + /// Generally, this binary detection may be + /// `grep::searcher::BinaryDetection::quit` if we want to skip binary files + /// completely. + /// + /// By default, no binary detection is performed. + pub(crate) fn binary_detection_implicit( + &mut self, + detection: grep::searcher::BinaryDetection, + ) -> &mut SearchWorkerBuilder { + self.config.binary_implicit = detection; + self + } + + /// Set the binary detection that should be used when searching files + /// explicitly supplied by an end user. + /// + /// Generally, this binary detection should NOT be + /// `grep::searcher::BinaryDetection::quit`, since we never want to + /// automatically filter files supplied by the end user. + /// + /// By default, no binary detection is performed. + pub(crate) fn binary_detection_explicit( + &mut self, + detection: grep::searcher::BinaryDetection, + ) -> &mut SearchWorkerBuilder { + self.config.binary_explicit = detection; + self + } +} + +/// The result of executing a search. +/// +/// Generally speaking, the "result" of a search is sent to a printer, which +/// writes results to an underlying writer such as stdout or a file. However, +/// every search also has some aggregate statistics or meta data that may be +/// useful to higher level routines. +#[derive(Clone, Debug, Default)] +pub(crate) struct SearchResult { + has_match: bool, + stats: Option, +} + +impl SearchResult { + /// Whether the search found a match or not. + pub(crate) fn has_match(&self) -> bool { + self.has_match + } + + /// Return aggregate search statistics for a single search, if available. + /// + /// It can be expensive to compute statistics, so these are only present + /// if explicitly enabled in the printer provided by the caller. + pub(crate) fn stats(&self) -> Option<&grep::printer::Stats> { + self.stats.as_ref() + } +} + +/// The pattern matcher used by a search worker. +#[derive(Clone, Debug)] +pub(crate) enum PatternMatcher { + RustRegex(grep::regex::RegexMatcher), + #[cfg(feature = "pcre2")] + PCRE2(grep::pcre2::RegexMatcher), +} + +/// The printer used by a search worker. +/// +/// The `W` type parameter refers to the type of the underlying writer. +#[derive(Clone, Debug)] +pub(crate) enum Printer { + /// Use the standard printer, which supports the classic grep-like format. + Standard(grep::printer::Standard), + /// Use the summary printer, which supports aggregate displays of search + /// results. + Summary(grep::printer::Summary), + /// A JSON printer, which emits results in the JSON Lines format. + JSON(grep::printer::JSON), +} + +impl Printer { + /// Return a mutable reference to the underlying printer's writer. + pub(crate) fn get_mut(&mut self) -> &mut W { + match *self { + Printer::Standard(ref mut p) => p.get_mut(), + Printer::Summary(ref mut p) => p.get_mut(), + Printer::JSON(ref mut p) => p.get_mut(), + } + } +} + +/// A worker for executing searches. +/// +/// It is intended for a single worker to execute many searches, and is +/// generally intended to be used from a single thread. When searching using +/// multiple threads, it is better to create a new worker for each thread. +#[derive(Clone, Debug)] +pub(crate) struct SearchWorker { + config: Config, + command_builder: grep::cli::CommandReaderBuilder, + decomp_builder: grep::cli::DecompressionReaderBuilder, + matcher: PatternMatcher, + searcher: grep::searcher::Searcher, + printer: Printer, +} + +impl SearchWorker { + /// Execute a search over the given haystack. + pub(crate) fn search( + &mut self, + haystack: &crate::haystack::Haystack, + ) -> io::Result { + let bin = if haystack.is_explicit() { + self.config.binary_explicit.clone() + } else { + self.config.binary_implicit.clone() + }; + let path = haystack.path(); + log::trace!("{}: binary detection: {:?}", path.display(), bin); + + self.searcher.set_binary_detection(bin); + if haystack.is_stdin() { + self.search_reader(path, &mut io::stdin().lock()) + } else if self.should_preprocess(path) { + self.search_preprocessor(path) + } else if self.should_decompress(path) { + self.search_decompress(path) + } else { + self.search_path(path) + } + } + + /// Return a mutable reference to the underlying printer. + pub(crate) fn printer(&mut self) -> &mut Printer { + &mut self.printer + } + + /// Returns true if and only if the given file path should be + /// decompressed before searching. + fn should_decompress(&self, path: &Path) -> bool { + if !self.config.search_zip { + return false; + } + self.decomp_builder.get_matcher().has_command(path) + } + + /// Returns true if and only if the given file path should be run through + /// the preprocessor. + fn should_preprocess(&self, path: &Path) -> bool { + if !self.config.preprocessor.is_some() { + return false; + } + if self.config.preprocessor_globs.is_empty() { + return true; + } + !self.config.preprocessor_globs.matched(path, false).is_ignore() + } + + /// Search the given file path by first asking the preprocessor for the + /// data to search instead of opening the path directly. + fn search_preprocessor( + &mut self, + path: &Path, + ) -> io::Result { + use std::{fs::File, process::Stdio}; + + let bin = self.config.preprocessor.as_ref().unwrap(); + let mut cmd = std::process::Command::new(bin); + cmd.arg(path).stdin(Stdio::from(File::open(path)?)); + + let mut rdr = self.command_builder.build(&mut cmd).map_err(|err| { + io::Error::new( + io::ErrorKind::Other, + format!( + "preprocessor command could not start: '{:?}': {}", + cmd, err, + ), + ) + })?; + let result = self.search_reader(path, &mut rdr).map_err(|err| { + io::Error::new( + io::ErrorKind::Other, + format!("preprocessor command failed: '{:?}': {}", cmd, err), + ) + }); + let close_result = rdr.close(); + let search_result = result?; + close_result?; + Ok(search_result) + } + + /// Attempt to decompress the data at the given file path and search the + /// result. If the given file path isn't recognized as a compressed file, + /// then search it without doing any decompression. + fn search_decompress(&mut self, path: &Path) -> io::Result { + let mut rdr = self.decomp_builder.build(path)?; + let result = self.search_reader(path, &mut rdr); + let close_result = rdr.close(); + let search_result = result?; + close_result?; + Ok(search_result) + } + + /// Search the contents of the given file path. + fn search_path(&mut self, path: &Path) -> io::Result { + use self::PatternMatcher::*; + + let (searcher, printer) = (&mut self.searcher, &mut self.printer); + match self.matcher { + RustRegex(ref m) => search_path(m, searcher, printer, path), + #[cfg(feature = "pcre2")] + PCRE2(ref m) => search_path(m, searcher, printer, path), + } + } + + /// Executes a search on the given reader, which may or may not correspond + /// directly to the contents of the given file path. Instead, the reader + /// may actually cause something else to be searched (for example, when + /// a preprocessor is set or when decompression is enabled). In those + /// cases, the file path is used for visual purposes only. + /// + /// Generally speaking, this method should only be used when there is no + /// other choice. Searching via `search_path` provides more opportunities + /// for optimizations (such as memory maps). + fn search_reader( + &mut self, + path: &Path, + rdr: &mut R, + ) -> io::Result { + use self::PatternMatcher::*; + + let (searcher, printer) = (&mut self.searcher, &mut self.printer); + match self.matcher { + RustRegex(ref m) => search_reader(m, searcher, printer, path, rdr), + #[cfg(feature = "pcre2")] + PCRE2(ref m) => search_reader(m, searcher, printer, path, rdr), + } + } +} + +/// Search the contents of the given file path using the given matcher, +/// searcher and printer. +fn search_path( + matcher: M, + searcher: &mut grep::searcher::Searcher, + printer: &mut Printer, + path: &Path, +) -> io::Result { + match *printer { + Printer::Standard(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_path(&matcher, path, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: sink.stats().map(|s| s.clone()), + }) + } + Printer::Summary(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_path(&matcher, path, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: sink.stats().map(|s| s.clone()), + }) + } + Printer::JSON(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_path(&matcher, path, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: Some(sink.stats().clone()), + }) + } + } +} + +/// Search the contents of the given reader using the given matcher, searcher +/// and printer. +fn search_reader( + matcher: M, + searcher: &mut grep::searcher::Searcher, + printer: &mut Printer, + path: &Path, + mut rdr: R, +) -> io::Result { + match *printer { + Printer::Standard(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_reader(&matcher, &mut rdr, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: sink.stats().map(|s| s.clone()), + }) + } + Printer::Summary(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_reader(&matcher, &mut rdr, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: sink.stats().map(|s| s.clone()), + }) + } + Printer::JSON(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_reader(&matcher, &mut rdr, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: Some(sink.stats().clone()), + }) + } + } +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/perf-config.json b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/perf-config.json new file mode 100644 index 000000000..3166b496a --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/perf-config.json @@ -0,0 +1,4 @@ +{ + "artifact": "binary", + "category": "primary" +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/pkg/windows/Manifest.xml b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/pkg/windows/Manifest.xml new file mode 100644 index 000000000..b6f0e7027 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/pkg/windows/Manifest.xml @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + UTF-8 + + + + + + true + + + diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/pkg/windows/README.md b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/pkg/windows/README.md new file mode 100644 index 000000000..7be701bf8 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/pkg/windows/README.md @@ -0,0 +1,15 @@ +This directory contains a Windows manifest for various Windows-specific +settings. + +The main thing we enable here is [`longPathAware`], which permits paths of the +form `C:\` to be longer than 260 characters. + +The approach taken here was modeled off of a [similar change for `rustc`][rustc pr]. +In particular, this manifest gets linked into the final binary. Those linker +arguments are applied in `build.rs`. + +This currently only applies to MSVC builds. If there's an easy way to make this +apply to GNU builds as well, then patches are welcome. + +[`longPathAware`]: https://learn.microsoft.com/en-us/windows/win32/sbscs/application-manifests#longpathaware +[rustc pr]: https://github.com/rust-lang/rust/pull/96737 diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/rustfmt.toml b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/rustfmt.toml new file mode 100644 index 000000000..aa37a218b --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/rustfmt.toml @@ -0,0 +1,2 @@ +max_width = 79 +use_small_heuristics = "max" diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/binary.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/binary.rs new file mode 100644 index 000000000..93f6844ca --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/binary.rs @@ -0,0 +1,306 @@ +use crate::util::{Dir, TestCommand}; + +// This file contains a smattering of tests specifically for checking ripgrep's +// handling of binary files. There's quite a bit of discussion on this in this +// bug report: https://github.com/BurntSushi/ripgrep/issues/306 + +// Our haystack is the first 500 lines of Gutenberg's copy of "A Study in +// Scarlet," with a NUL byte at line 1898: `abcdef\x00`. +// +// The position and size of the haystack is, unfortunately, significant. In +// particular, the NUL byte is specifically inserted at some point *after* the +// first 65,536 bytes, which corresponds to the initial capacity of the buffer +// that ripgrep uses to read files. (grep for DEFAULT_BUFFER_CAPACITY.) The +// position of the NUL byte ensures that we can execute some search on the +// initial buffer contents without ever detecting any binary data. Moreover, +// when using a memory map for searching, only the first 65,536 bytes are +// scanned for a NUL byte, so no binary bytes are detected at all when using +// a memory map (unless our query matches line 1898). +// +// One last note: in the tests below, we use --no-mmap heavily because binary +// detection with memory maps is a bit different. Namely, NUL bytes are only +// searched for in the first few KB of the file and in a match. Normally, NUL +// bytes are searched for everywhere. +// +// TODO: Add tests for binary file detection when using memory maps. +const HAY: &'static [u8] = include_bytes!("./data/sherlock-nul.txt"); + +// This tests that ripgrep prints a warning message if it finds and prints a +// match in a binary file before detecting that it is a binary file. The point +// here is to notify that user that the search of the file is only partially +// complete. +// +// This applies to files that are *implicitly* searched via a recursive +// directory traversal. In particular, this results in a WARNING message being +// printed. We make our file "implicit" by doing a recursive search with a glob +// that matches our file. +rgtest!(after_match1_implicit, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("hay", HAY); + cmd.args(&["--no-mmap", "-n", "Project Gutenberg EBook", "-g", "hay"]); + + let expected = "\ +hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle +hay: WARNING: stopped searching binary file after match (found \"\\0\" byte around offset 77041) +"; + eqnice!(expected, cmd.stdout()); +}); + +// Like after_match1_implicit, except we provide a file to search +// explicitly. This results in identical behavior, but a different message. +rgtest!(after_match1_explicit, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("hay", HAY); + cmd.args(&["--no-mmap", "-n", "Project Gutenberg EBook", "hay"]); + + let expected = "\ +1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle +binary file matches (found \"\\0\" byte around offset 77041) +"; + eqnice!(expected, cmd.stdout()); +}); + +// Like after_match1_explicit, except we feed our content on stdin. +rgtest!(after_match1_stdin, |_: Dir, mut cmd: TestCommand| { + cmd.args(&["--no-mmap", "-n", "Project Gutenberg EBook"]); + + let expected = "\ +1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle +binary file matches (found \"\\0\" byte around offset 77041) +"; + eqnice!(expected, cmd.pipe(HAY)); +}); + +// Like after_match1_implicit, but provides the --binary flag, which +// disables binary filtering. Thus, this matches the behavior of ripgrep as +// if the file were given explicitly. +rgtest!(after_match1_implicit_binary, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("hay", HAY); + cmd.args(&[ + "--no-mmap", + "-n", + "--binary", + "Project Gutenberg EBook", + "-g", + "hay", + ]); + + let expected = "\ +hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle +hay: binary file matches (found \"\\0\" byte around offset 77041) +"; + eqnice!(expected, cmd.stdout()); +}); + +// Like after_match1_implicit, but enables -a/--text, so no binary +// detection should be performed. +rgtest!(after_match1_implicit_text, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("hay", HAY); + cmd.args(&[ + "--no-mmap", + "-n", + "--text", + "Project Gutenberg EBook", + "-g", + "hay", + ]); + + let expected = "\ +hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle +"; + eqnice!(expected, cmd.stdout()); +}); + +// Like after_match1_implicit_text, but enables -a/--text, so no binary +// detection should be performed. +rgtest!(after_match1_explicit_text, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("hay", HAY); + cmd.args(&["--no-mmap", "-n", "--text", "Project Gutenberg EBook", "hay"]); + + let expected = "\ +1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle +"; + eqnice!(expected, cmd.stdout()); +}); + +// Like after_match1_implicit, except this asks ripgrep to print all matching +// files. +// +// This is an interesting corner case that one might consider a bug, however, +// it's unlikely to be fixed. Namely, ripgrep probably shouldn't print `hay` +// as a matching file since it is in fact a binary file, and thus should be +// filtered out by default. However, the --files-with-matches flag will print +// out the path of a matching file as soon as a match is seen and then stop +// searching completely. Therefore, the NUL byte is never actually detected. +// +// The only way to fix this would be to kill ripgrep's performance in this case +// and continue searching the entire file for a NUL byte. (Similarly if the +// --quiet flag is set. See the next test.) +rgtest!(after_match1_implicit_path, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("hay", HAY); + cmd.args(&["--no-mmap", "-l", "Project Gutenberg EBook", "-g", "hay"]); + eqnice!("hay\n", cmd.stdout()); +}); + +// Like after_match1_implicit_path, except this indicates that a match was +// found with no other output. (This is the same bug described above, but +// manifest as an exit code with no output.) +rgtest!(after_match1_implicit_quiet, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("hay", HAY); + cmd.args(&["--no-mmap", "-q", "Project Gutenberg EBook", "-g", "hay"]); + eqnice!("", cmd.stdout()); +}); + +// This sets up the same test as after_match1_implicit_path, but instead of +// just printing the matching files, this includes the full count of matches. +// In this case, we need to search the entire file, so ripgrep correctly +// detects the binary data and suppresses output. +rgtest!(after_match1_implicit_count, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("hay", HAY); + cmd.args(&["--no-mmap", "-c", "Project Gutenberg EBook", "-g", "hay"]); + cmd.assert_err(); +}); + +// Like after_match1_implicit_count, except the --binary flag is provided, +// which makes ripgrep disable binary data filtering even for implicit files. +rgtest!( + after_match1_implicit_count_binary, + |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("hay", HAY); + cmd.args(&[ + "--no-mmap", + "-c", + "--binary", + "Project Gutenberg EBook", + "-g", + "hay", + ]); + eqnice!("hay:1\n", cmd.stdout()); + } +); + +// Like after_match1_implicit_count, except the file path is provided +// explicitly, so binary filtering is disabled and a count is correctly +// reported. +rgtest!(after_match1_explicit_count, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("hay", HAY); + cmd.args(&["--no-mmap", "-c", "Project Gutenberg EBook", "hay"]); + eqnice!("1\n", cmd.stdout()); +}); + +// This tests that a match way before the NUL byte is shown, but a match after +// the NUL byte is not. +rgtest!(after_match2_implicit, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("hay", HAY); + cmd.args(&[ + "--no-mmap", + "-n", + "Project Gutenberg EBook|a medical student", + "-g", + "hay", + ]); + + let expected = "\ +hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle +hay: WARNING: stopped searching binary file after match (found \"\\0\" byte around offset 77041) +"; + eqnice!(expected, cmd.stdout()); +}); + +// Like after_match2_implicit, but enables -a/--text, so no binary +// detection should be performed. +rgtest!(after_match2_implicit_text, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("hay", HAY); + cmd.args(&[ + "--no-mmap", + "-n", + "--text", + "Project Gutenberg EBook|a medical student", + "-g", + "hay", + ]); + + let expected = "\ +hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle +hay:1867:\"And yet you say he is not a medical student?\" +"; + eqnice!(expected, cmd.stdout()); +}); + +// This tests that ripgrep *silently* quits before finding a match that occurs +// after a NUL byte. +rgtest!(before_match1_implicit, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("hay", HAY); + cmd.args(&["--no-mmap", "-n", "Heaven", "-g", "hay"]); + cmd.assert_err(); +}); + +// This tests that ripgrep *does not* silently quit before finding a match that +// occurs after a NUL byte when a file is explicitly searched. +rgtest!(before_match1_explicit, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("hay", HAY); + cmd.args(&["--no-mmap", "-n", "Heaven", "hay"]); + + let expected = "\ +binary file matches (found \"\\0\" byte around offset 77041) +"; + eqnice!(expected, cmd.stdout()); +}); + +// Like before_match1_implicit, but enables the --binary flag, which +// disables binary filtering. Thus, this matches the behavior of ripgrep as if +// the file were given explicitly. +rgtest!(before_match1_implicit_binary, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("hay", HAY); + cmd.args(&["--no-mmap", "-n", "--binary", "Heaven", "-g", "hay"]); + + let expected = "\ +hay: binary file matches (found \"\\0\" byte around offset 77041) +"; + eqnice!(expected, cmd.stdout()); +}); + +// Like before_match1_implicit, but enables -a/--text, so no binary +// detection should be performed. +rgtest!(before_match1_implicit_text, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("hay", HAY); + cmd.args(&["--no-mmap", "-n", "--text", "Heaven", "-g", "hay"]); + + let expected = "\ +hay:1871:\"No. Heaven knows what the objects of his studies are. But here we +"; + eqnice!(expected, cmd.stdout()); +}); + +// This tests that ripgrep *silently* quits before finding a match that occurs +// before a NUL byte, but within the same buffer as the NUL byte. +rgtest!(before_match2_implicit, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("hay", HAY); + cmd.args(&["--no-mmap", "-n", "a medical student", "-g", "hay"]); + cmd.assert_err(); +}); + +// This tests that ripgrep *does not* silently quit before finding a match that +// occurs before a NUL byte, but within the same buffer as the NUL byte. Even +// though the match occurs before the NUL byte, ripgrep still doesn't print it +// because it has already scanned ahead to detect the NUL byte. (This matches +// the behavior of GNU grep.) +rgtest!(before_match2_explicit, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("hay", HAY); + cmd.args(&["--no-mmap", "-n", "a medical student", "hay"]); + + let expected = "\ +binary file matches (found \"\\0\" byte around offset 77041) +"; + eqnice!(expected, cmd.stdout()); +}); + +// Like before_match1_implicit, but enables -a/--text, so no binary +// detection should be performed. +rgtest!(before_match2_implicit_text, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("hay", HAY); + cmd.args(&["--no-mmap", "-n", "--text", "a medical student", "-g", "hay"]); + + let expected = "\ +hay:1867:\"And yet you say he is not a medical student?\" +"; + eqnice!(expected, cmd.stdout()); +}); diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/data/sherlock-nul.txt b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/data/sherlock-nul.txt new file mode 100644 index 000000000..60fb2b095 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/data/sherlock-nul.txt @@ -0,0 +1,2133 @@ +The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle + +This eBook is for the use of anyone anywhere at no cost and with +almost no restrictions whatsoever. You may copy it, give it away or +re-use it under the terms of the Project Gutenberg License included +with this eBook or online at www.gutenberg.org + + +Title: A Study In Scarlet + +Author: Arthur Conan Doyle + +Posting Date: July 12, 2008 [EBook #244] +Release Date: April, 1995 +[Last updated: February 17, 2013] + +Language: English + + +*** START OF THIS PROJECT GUTENBERG EBOOK A STUDY IN SCARLET *** + + + + +Produced by Roger Squires + + + + + +A STUDY IN SCARLET. + +By A. Conan Doyle + +[1] + + + + Original Transcriber's Note: This etext is prepared directly + from an 1887 edition, and care has been taken to duplicate the + original exactly, including typographical and punctuation + vagaries. + + Additions to the text include adding the underscore character to + indicate italics, and textual end-notes in square braces. + + Project Gutenberg Editor's Note: In reproofing and moving old PG + files such as this to the present PG directory system it is the + policy to reformat the text to conform to present PG Standards. + In this case however, in consideration of the note above of the + original transcriber describing his care to try to duplicate the + original 1887 edition as to typography and punctuation vagaries, + no changes have been made in this ascii text file. However, in + the Latin-1 file and this html file, present standards are + followed and the several French and Spanish words have been + given their proper accents. + + Part II, The Country of the Saints, deals much with the Mormon Church. + + + + +A STUDY IN SCARLET. + + + + + +PART I. + +(_Being a reprint from the reminiscences of_ JOHN H. WATSON, M.D., _late +of the Army Medical Department._) [2] + + + + +CHAPTER I. MR. SHERLOCK HOLMES. + + +IN the year 1878 I took my degree of Doctor of Medicine of the +University of London, and proceeded to Netley to go through the course +prescribed for surgeons in the army. Having completed my studies there, +I was duly attached to the Fifth Northumberland Fusiliers as Assistant +Surgeon. The regiment was stationed in India at the time, and before +I could join it, the second Afghan war had broken out. On landing at +Bombay, I learned that my corps had advanced through the passes, and +was already deep in the enemy's country. I followed, however, with many +other officers who were in the same situation as myself, and succeeded +in reaching Candahar in safety, where I found my regiment, and at once +entered upon my new duties. + +The campaign brought honours and promotion to many, but for me it had +nothing but misfortune and disaster. I was removed from my brigade and +attached to the Berkshires, with whom I served at the fatal battle of +Maiwand. There I was struck on the shoulder by a Jezail bullet, which +shattered the bone and grazed the subclavian artery. I should have +fallen into the hands of the murderous Ghazis had it not been for the +devotion and courage shown by Murray, my orderly, who threw me across a +pack-horse, and succeeded in bringing me safely to the British lines. + +Worn with pain, and weak from the prolonged hardships which I had +undergone, I was removed, with a great train of wounded sufferers, to +the base hospital at Peshawar. Here I rallied, and had already improved +so far as to be able to walk about the wards, and even to bask a little +upon the verandah, when I was struck down by enteric fever, that curse +of our Indian possessions. For months my life was despaired of, and +when at last I came to myself and became convalescent, I was so weak and +emaciated that a medical board determined that not a day should be lost +in sending me back to England. I was dispatched, accordingly, in the +troopship "Orontes," and landed a month later on Portsmouth jetty, with +my health irretrievably ruined, but with permission from a paternal +government to spend the next nine months in attempting to improve it. + +I had neither kith nor kin in England, and was therefore as free as +air--or as free as an income of eleven shillings and sixpence a day will +permit a man to be. Under such circumstances, I naturally gravitated to +London, that great cesspool into which all the loungers and idlers of +the Empire are irresistibly drained. There I stayed for some time at +a private hotel in the Strand, leading a comfortless, meaningless +existence, and spending such money as I had, considerably more freely +than I ought. So alarming did the state of my finances become, that +I soon realized that I must either leave the metropolis and rusticate +somewhere in the country, or that I must make a complete alteration in +my style of living. Choosing the latter alternative, I began by making +up my mind to leave the hotel, and to take up my quarters in some less +pretentious and less expensive domicile. + +On the very day that I had come to this conclusion, I was standing at +the Criterion Bar, when some one tapped me on the shoulder, and turning +round I recognized young Stamford, who had been a dresser under me at +Barts. The sight of a friendly face in the great wilderness of London is +a pleasant thing indeed to a lonely man. In old days Stamford had never +been a particular crony of mine, but now I hailed him with enthusiasm, +and he, in his turn, appeared to be delighted to see me. In the +exuberance of my joy, I asked him to lunch with me at the Holborn, and +we started off together in a hansom. + +"Whatever have you been doing with yourself, Watson?" he asked in +undisguised wonder, as we rattled through the crowded London streets. +"You are as thin as a lath and as brown as a nut." + +I gave him a short sketch of my adventures, and had hardly concluded it +by the time that we reached our destination. + +"Poor devil!" he said, commiseratingly, after he had listened to my +misfortunes. "What are you up to now?" + +"Looking for lodgings." [3] I answered. "Trying to solve the problem +as to whether it is possible to get comfortable rooms at a reasonable +price." + +"That's a strange thing," remarked my companion; "you are the second man +to-day that has used that expression to me." + +"And who was the first?" I asked. + +"A fellow who is working at the chemical laboratory up at the hospital. +He was bemoaning himself this morning because he could not get someone +to go halves with him in some nice rooms which he had found, and which +were too much for his purse." + +"By Jove!" I cried, "if he really wants someone to share the rooms and +the expense, I am the very man for him. I should prefer having a partner +to being alone." + +Young Stamford looked rather strangely at me over his wine-glass. "You +don't know Sherlock Holmes yet," he said; "perhaps you would not care +for him as a constant companion." + +"Why, what is there against him?" + +"Oh, I didn't say there was anything against him. He is a little queer +in his ideas--an enthusiast in some branches of science. As far as I +know he is a decent fellow enough." + +"A medical student, I suppose?" said I. + +"No--I have no idea what he intends to go in for. I believe he is well +up in anatomy, and he is a first-class chemist; but, as far as I know, +he has never taken out any systematic medical classes. His studies are +very desultory and eccentric, but he has amassed a lot of out-of-the way +knowledge which would astonish his professors." + +"Did you never ask him what he was going in for?" I asked. + +"No; he is not a man that it is easy to draw out, though he can be +communicative enough when the fancy seizes him." + +"I should like to meet him," I said. "If I am to lodge with anyone, I +should prefer a man of studious and quiet habits. I am not strong +enough yet to stand much noise or excitement. I had enough of both in +Afghanistan to last me for the remainder of my natural existence. How +could I meet this friend of yours?" + +"He is sure to be at the laboratory," returned my companion. "He either +avoids the place for weeks, or else he works there from morning to +night. If you like, we shall drive round together after luncheon." + +"Certainly," I answered, and the conversation drifted away into other +channels. + +As we made our way to the hospital after leaving the Holborn, Stamford +gave me a few more particulars about the gentleman whom I proposed to +take as a fellow-lodger. + +"You mustn't blame me if you don't get on with him," he said; "I know +nothing more of him than I have learned from meeting him occasionally in +the laboratory. You proposed this arrangement, so you must not hold me +responsible." + +"If we don't get on it will be easy to part company," I answered. "It +seems to me, Stamford," I added, looking hard at my companion, "that you +have some reason for washing your hands of the matter. Is this fellow's +temper so formidable, or what is it? Don't be mealy-mouthed about it." + +"It is not easy to express the inexpressible," he answered with a laugh. +"Holmes is a little too scientific for my tastes--it approaches to +cold-bloodedness. I could imagine his giving a friend a little pinch of +the latest vegetable alkaloid, not out of malevolence, you understand, +but simply out of a spirit of inquiry in order to have an accurate idea +of the effects. To do him justice, I think that he would take it himself +with the same readiness. He appears to have a passion for definite and +exact knowledge." + +"Very right too." + +"Yes, but it may be pushed to excess. When it comes to beating the +subjects in the dissecting-rooms with a stick, it is certainly taking +rather a bizarre shape." + +"Beating the subjects!" + +"Yes, to verify how far bruises may be produced after death. I saw him +at it with my own eyes." + +This eBook is for the use of anyone anywhere at no cost and with +almost no restrictions whatsoever. You may copy it, give it away or +re-use it under the terms of the Project Gutenberg License included +with this eBook or online at www.gutenberg.org + + +Title: A Study In Scarlet + +Author: Arthur Conan Doyle + +Posting Date: July 12, 2008 [EBook #244] +Release Date: April, 1995 +[Last updated: February 17, 2013] + +Language: English + + +*** START OF THIS PROJECT GUTENBERG EBOOK A STUDY IN SCARLET *** + + + + +Produced by Roger Squires + + + + + +A STUDY IN SCARLET. + +By A. Conan Doyle + +[1] + + + + Original Transcriber's Note: This etext is prepared directly + from an 1887 edition, and care has been taken to duplicate the + original exactly, including typographical and punctuation + vagaries. + + Additions to the text include adding the underscore character to + indicate italics, and textual end-notes in square braces. + + Project Gutenberg Editor's Note: In reproofing and moving old PG + files such as this to the present PG directory system it is the + policy to reformat the text to conform to present PG Standards. + In this case however, in consideration of the note above of the + original transcriber describing his care to try to duplicate the + original 1887 edition as to typography and punctuation vagaries, + no changes have been made in this ascii text file. However, in + the Latin-1 file and this html file, present standards are + followed and the several French and Spanish words have been + given their proper accents. + + Part II, The Country of the Saints, deals much with the Mormon Church. + + + + +A STUDY IN SCARLET. + + + + + +PART I. + +(_Being a reprint from the reminiscences of_ JOHN H. WATSON, M.D., _late +of the Army Medical Department._) [2] + + + + +CHAPTER I. MR. SHERLOCK HOLMES. + + +IN the year 1878 I took my degree of Doctor of Medicine of the +University of London, and proceeded to Netley to go through the course +prescribed for surgeons in the army. Having completed my studies there, +I was duly attached to the Fifth Northumberland Fusiliers as Assistant +Surgeon. The regiment was stationed in India at the time, and before +I could join it, the second Afghan war had broken out. On landing at +Bombay, I learned that my corps had advanced through the passes, and +was already deep in the enemy's country. I followed, however, with many +other officers who were in the same situation as myself, and succeeded +in reaching Candahar in safety, where I found my regiment, and at once +entered upon my new duties. + +The campaign brought honours and promotion to many, but for me it had +nothing but misfortune and disaster. I was removed from my brigade and +attached to the Berkshires, with whom I served at the fatal battle of +Maiwand. There I was struck on the shoulder by a Jezail bullet, which +shattered the bone and grazed the subclavian artery. I should have +fallen into the hands of the murderous Ghazis had it not been for the +devotion and courage shown by Murray, my orderly, who threw me across a +pack-horse, and succeeded in bringing me safely to the British lines. + +Worn with pain, and weak from the prolonged hardships which I had +undergone, I was removed, with a great train of wounded sufferers, to +the base hospital at Peshawar. Here I rallied, and had already improved +so far as to be able to walk about the wards, and even to bask a little +upon the verandah, when I was struck down by enteric fever, that curse +of our Indian possessions. For months my life was despaired of, and +when at last I came to myself and became convalescent, I was so weak and +emaciated that a medical board determined that not a day should be lost +in sending me back to England. I was dispatched, accordingly, in the +troopship "Orontes," and landed a month later on Portsmouth jetty, with +my health irretrievably ruined, but with permission from a paternal +government to spend the next nine months in attempting to improve it. + +I had neither kith nor kin in England, and was therefore as free as +air--or as free as an income of eleven shillings and sixpence a day will +permit a man to be. Under such circumstances, I naturally gravitated to +London, that great cesspool into which all the loungers and idlers of +the Empire are irresistibly drained. There I stayed for some time at +a private hotel in the Strand, leading a comfortless, meaningless +existence, and spending such money as I had, considerably more freely +than I ought. So alarming did the state of my finances become, that +I soon realized that I must either leave the metropolis and rusticate +somewhere in the country, or that I must make a complete alteration in +my style of living. Choosing the latter alternative, I began by making +up my mind to leave the hotel, and to take up my quarters in some less +pretentious and less expensive domicile. + +On the very day that I had come to this conclusion, I was standing at +the Criterion Bar, when some one tapped me on the shoulder, and turning +round I recognized young Stamford, who had been a dresser under me at +Barts. The sight of a friendly face in the great wilderness of London is +a pleasant thing indeed to a lonely man. In old days Stamford had never +been a particular crony of mine, but now I hailed him with enthusiasm, +and he, in his turn, appeared to be delighted to see me. In the +exuberance of my joy, I asked him to lunch with me at the Holborn, and +we started off together in a hansom. + +"Whatever have you been doing with yourself, Watson?" he asked in +undisguised wonder, as we rattled through the crowded London streets. +"You are as thin as a lath and as brown as a nut." + +I gave him a short sketch of my adventures, and had hardly concluded it +by the time that we reached our destination. + +"Poor devil!" he said, commiseratingly, after he had listened to my +misfortunes. "What are you up to now?" + +"Looking for lodgings." [3] I answered. "Trying to solve the problem +as to whether it is possible to get comfortable rooms at a reasonable +price." + +"That's a strange thing," remarked my companion; "you are the second man +to-day that has used that expression to me." + +"And who was the first?" I asked. + +"A fellow who is working at the chemical laboratory up at the hospital. +He was bemoaning himself this morning because he could not get someone +to go halves with him in some nice rooms which he had found, and which +were too much for his purse." + +"By Jove!" I cried, "if he really wants someone to share the rooms and +the expense, I am the very man for him. I should prefer having a partner +to being alone." + +Young Stamford looked rather strangely at me over his wine-glass. "You +don't know Sherlock Holmes yet," he said; "perhaps you would not care +for him as a constant companion." + +"Why, what is there against him?" + +"Oh, I didn't say there was anything against him. He is a little queer +in his ideas--an enthusiast in some branches of science. As far as I +know he is a decent fellow enough." + +"A medical student, I suppose?" said I. + +"No--I have no idea what he intends to go in for. I believe he is well +up in anatomy, and he is a first-class chemist; but, as far as I know, +he has never taken out any systematic medical classes. His studies are +very desultory and eccentric, but he has amassed a lot of out-of-the way +knowledge which would astonish his professors." + +"Did you never ask him what he was going in for?" I asked. + +"No; he is not a man that it is easy to draw out, though he can be +communicative enough when the fancy seizes him." + +"I should like to meet him," I said. "If I am to lodge with anyone, I +should prefer a man of studious and quiet habits. I am not strong +enough yet to stand much noise or excitement. I had enough of both in +Afghanistan to last me for the remainder of my natural existence. How +could I meet this friend of yours?" + +"He is sure to be at the laboratory," returned my companion. "He either +avoids the place for weeks, or else he works there from morning to +night. If you like, we shall drive round together after luncheon." + +"Certainly," I answered, and the conversation drifted away into other +channels. + +As we made our way to the hospital after leaving the Holborn, Stamford +gave me a few more particulars about the gentleman whom I proposed to +take as a fellow-lodger. + +"You mustn't blame me if you don't get on with him," he said; "I know +nothing more of him than I have learned from meeting him occasionally in +the laboratory. You proposed this arrangement, so you must not hold me +responsible." + +"If we don't get on it will be easy to part company," I answered. "It +seems to me, Stamford," I added, looking hard at my companion, "that you +have some reason for washing your hands of the matter. Is this fellow's +temper so formidable, or what is it? Don't be mealy-mouthed about it." + +"It is not easy to express the inexpressible," he answered with a laugh. +"Holmes is a little too scientific for my tastes--it approaches to +cold-bloodedness. I could imagine his giving a friend a little pinch of +the latest vegetable alkaloid, not out of malevolence, you understand, +but simply out of a spirit of inquiry in order to have an accurate idea +of the effects. To do him justice, I think that he would take it himself +with the same readiness. He appears to have a passion for definite and +exact knowledge." + +"Very right too." + +"Yes, but it may be pushed to excess. When it comes to beating the +subjects in the dissecting-rooms with a stick, it is certainly taking +rather a bizarre shape." + +"Beating the subjects!" + +"Yes, to verify how far bruises may be produced after death. I saw him +at it with my own eyes." + +This eBook is for the use of anyone anywhere at no cost and with +almost no restrictions whatsoever. You may copy it, give it away or +re-use it under the terms of the Project Gutenberg License included +with this eBook or online at www.gutenberg.org + + +Title: A Study In Scarlet + +Author: Arthur Conan Doyle + +Posting Date: July 12, 2008 [EBook #244] +Release Date: April, 1995 +[Last updated: February 17, 2013] + +Language: English + + +*** START OF THIS PROJECT GUTENBERG EBOOK A STUDY IN SCARLET *** + + + + +Produced by Roger Squires + + + + + +A STUDY IN SCARLET. + +By A. Conan Doyle + +[1] + + + + Original Transcriber's Note: This etext is prepared directly + from an 1887 edition, and care has been taken to duplicate the + original exactly, including typographical and punctuation + vagaries. + + Additions to the text include adding the underscore character to + indicate italics, and textual end-notes in square braces. + + Project Gutenberg Editor's Note: In reproofing and moving old PG + files such as this to the present PG directory system it is the + policy to reformat the text to conform to present PG Standards. + In this case however, in consideration of the note above of the + original transcriber describing his care to try to duplicate the + original 1887 edition as to typography and punctuation vagaries, + no changes have been made in this ascii text file. However, in + the Latin-1 file and this html file, present standards are + followed and the several French and Spanish words have been + given their proper accents. + + Part II, The Country of the Saints, deals much with the Mormon Church. + + + + +A STUDY IN SCARLET. + + + + + +PART I. + +(_Being a reprint from the reminiscences of_ JOHN H. WATSON, M.D., _late +of the Army Medical Department._) [2] + + + + +CHAPTER I. MR. SHERLOCK HOLMES. + + +IN the year 1878 I took my degree of Doctor of Medicine of the +University of London, and proceeded to Netley to go through the course +prescribed for surgeons in the army. Having completed my studies there, +I was duly attached to the Fifth Northumberland Fusiliers as Assistant +Surgeon. The regiment was stationed in India at the time, and before +I could join it, the second Afghan war had broken out. On landing at +Bombay, I learned that my corps had advanced through the passes, and +was already deep in the enemy's country. I followed, however, with many +other officers who were in the same situation as myself, and succeeded +in reaching Candahar in safety, where I found my regiment, and at once +entered upon my new duties. + +The campaign brought honours and promotion to many, but for me it had +nothing but misfortune and disaster. I was removed from my brigade and +attached to the Berkshires, with whom I served at the fatal battle of +Maiwand. There I was struck on the shoulder by a Jezail bullet, which +shattered the bone and grazed the subclavian artery. I should have +fallen into the hands of the murderous Ghazis had it not been for the +devotion and courage shown by Murray, my orderly, who threw me across a +pack-horse, and succeeded in bringing me safely to the British lines. + +Worn with pain, and weak from the prolonged hardships which I had +undergone, I was removed, with a great train of wounded sufferers, to +the base hospital at Peshawar. Here I rallied, and had already improved +so far as to be able to walk about the wards, and even to bask a little +upon the verandah, when I was struck down by enteric fever, that curse +of our Indian possessions. For months my life was despaired of, and +when at last I came to myself and became convalescent, I was so weak and +emaciated that a medical board determined that not a day should be lost +in sending me back to England. I was dispatched, accordingly, in the +troopship "Orontes," and landed a month later on Portsmouth jetty, with +my health irretrievably ruined, but with permission from a paternal +government to spend the next nine months in attempting to improve it. + +I had neither kith nor kin in England, and was therefore as free as +air--or as free as an income of eleven shillings and sixpence a day will +permit a man to be. Under such circumstances, I naturally gravitated to +London, that great cesspool into which all the loungers and idlers of +the Empire are irresistibly drained. There I stayed for some time at +a private hotel in the Strand, leading a comfortless, meaningless +existence, and spending such money as I had, considerably more freely +than I ought. So alarming did the state of my finances become, that +I soon realized that I must either leave the metropolis and rusticate +somewhere in the country, or that I must make a complete alteration in +my style of living. Choosing the latter alternative, I began by making +up my mind to leave the hotel, and to take up my quarters in some less +pretentious and less expensive domicile. + +On the very day that I had come to this conclusion, I was standing at +the Criterion Bar, when some one tapped me on the shoulder, and turning +round I recognized young Stamford, who had been a dresser under me at +Barts. The sight of a friendly face in the great wilderness of London is +a pleasant thing indeed to a lonely man. In old days Stamford had never +been a particular crony of mine, but now I hailed him with enthusiasm, +and he, in his turn, appeared to be delighted to see me. In the +exuberance of my joy, I asked him to lunch with me at the Holborn, and +we started off together in a hansom. + +"Whatever have you been doing with yourself, Watson?" he asked in +undisguised wonder, as we rattled through the crowded London streets. +"You are as thin as a lath and as brown as a nut." + +I gave him a short sketch of my adventures, and had hardly concluded it +by the time that we reached our destination. + +"Poor devil!" he said, commiseratingly, after he had listened to my +misfortunes. "What are you up to now?" + +"Looking for lodgings." [3] I answered. "Trying to solve the problem +as to whether it is possible to get comfortable rooms at a reasonable +price." + +"That's a strange thing," remarked my companion; "you are the second man +to-day that has used that expression to me." + +"And who was the first?" I asked. + +"A fellow who is working at the chemical laboratory up at the hospital. +He was bemoaning himself this morning because he could not get someone +to go halves with him in some nice rooms which he had found, and which +were too much for his purse." + +"By Jove!" I cried, "if he really wants someone to share the rooms and +the expense, I am the very man for him. I should prefer having a partner +to being alone." + +Young Stamford looked rather strangely at me over his wine-glass. "You +don't know Sherlock Holmes yet," he said; "perhaps you would not care +for him as a constant companion." + +"Why, what is there against him?" + +"Oh, I didn't say there was anything against him. He is a little queer +in his ideas--an enthusiast in some branches of science. As far as I +know he is a decent fellow enough." + +"A medical student, I suppose?" said I. + +"No--I have no idea what he intends to go in for. I believe he is well +up in anatomy, and he is a first-class chemist; but, as far as I know, +he has never taken out any systematic medical classes. His studies are +very desultory and eccentric, but he has amassed a lot of out-of-the way +knowledge which would astonish his professors." + +"Did you never ask him what he was going in for?" I asked. + +"No; he is not a man that it is easy to draw out, though he can be +communicative enough when the fancy seizes him." + +"I should like to meet him," I said. "If I am to lodge with anyone, I +should prefer a man of studious and quiet habits. I am not strong +enough yet to stand much noise or excitement. I had enough of both in +Afghanistan to last me for the remainder of my natural existence. How +could I meet this friend of yours?" + +"He is sure to be at the laboratory," returned my companion. "He either +avoids the place for weeks, or else he works there from morning to +night. If you like, we shall drive round together after luncheon." + +"Certainly," I answered, and the conversation drifted away into other +channels. + +As we made our way to the hospital after leaving the Holborn, Stamford +gave me a few more particulars about the gentleman whom I proposed to +take as a fellow-lodger. + +"You mustn't blame me if you don't get on with him," he said; "I know +nothing more of him than I have learned from meeting him occasionally in +the laboratory. You proposed this arrangement, so you must not hold me +responsible." + +"If we don't get on it will be easy to part company," I answered. "It +seems to me, Stamford," I added, looking hard at my companion, "that you +have some reason for washing your hands of the matter. Is this fellow's +temper so formidable, or what is it? Don't be mealy-mouthed about it." + +"It is not easy to express the inexpressible," he answered with a laugh. +"Holmes is a little too scientific for my tastes--it approaches to +cold-bloodedness. I could imagine his giving a friend a little pinch of +the latest vegetable alkaloid, not out of malevolence, you understand, +but simply out of a spirit of inquiry in order to have an accurate idea +of the effects. To do him justice, I think that he would take it himself +with the same readiness. He appears to have a passion for definite and +exact knowledge." + +"Very right too." + +"Yes, but it may be pushed to excess. When it comes to beating the +subjects in the dissecting-rooms with a stick, it is certainly taking +rather a bizarre shape." + +"Beating the subjects!" + +"Yes, to verify how far bruises may be produced after death. I saw him +at it with my own eyes." + +This eBook is for the use of anyone anywhere at no cost and with +almost no restrictions whatsoever. You may copy it, give it away or +re-use it under the terms of the Project Gutenberg License included +with this eBook or online at www.gutenberg.org + + +Title: A Study In Scarlet + +Author: Arthur Conan Doyle + +Posting Date: July 12, 2008 [EBook #244] +Release Date: April, 1995 +[Last updated: February 17, 2013] + +Language: English + + +*** START OF THIS PROJECT GUTENBERG EBOOK A STUDY IN SCARLET *** + + + + +Produced by Roger Squires + + + + + +A STUDY IN SCARLET. + +By A. Conan Doyle + +[1] + + + + Original Transcriber's Note: This etext is prepared directly + from an 1887 edition, and care has been taken to duplicate the + original exactly, including typographical and punctuation + vagaries. + + Additions to the text include adding the underscore character to + indicate italics, and textual end-notes in square braces. + + Project Gutenberg Editor's Note: In reproofing and moving old PG + files such as this to the present PG directory system it is the + policy to reformat the text to conform to present PG Standards. + In this case however, in consideration of the note above of the + original transcriber describing his care to try to duplicate the + original 1887 edition as to typography and punctuation vagaries, + no changes have been made in this ascii text file. However, in + the Latin-1 file and this html file, present standards are + followed and the several French and Spanish words have been + given their proper accents. + + Part II, The Country of the Saints, deals much with the Mormon Church. + + + + +A STUDY IN SCARLET. + + + + + +PART I. + +(_Being a reprint from the reminiscences of_ JOHN H. WATSON, M.D., _late +of the Army Medical Department._) [2] + + + + +CHAPTER I. MR. SHERLOCK HOLMES. + + +IN the year 1878 I took my degree of Doctor of Medicine of the +University of London, and proceeded to Netley to go through the course +prescribed for surgeons in the army. Having completed my studies there, +I was duly attached to the Fifth Northumberland Fusiliers as Assistant +Surgeon. The regiment was stationed in India at the time, and before +I could join it, the second Afghan war had broken out. On landing at +Bombay, I learned that my corps had advanced through the passes, and +was already deep in the enemy's country. I followed, however, with many +other officers who were in the same situation as myself, and succeeded +in reaching Candahar in safety, where I found my regiment, and at once +entered upon my new duties. + +The campaign brought honours and promotion to many, but for me it had +nothing but misfortune and disaster. I was removed from my brigade and +attached to the Berkshires, with whom I served at the fatal battle of +Maiwand. There I was struck on the shoulder by a Jezail bullet, which +shattered the bone and grazed the subclavian artery. I should have +fallen into the hands of the murderous Ghazis had it not been for the +devotion and courage shown by Murray, my orderly, who threw me across a +pack-horse, and succeeded in bringing me safely to the British lines. + +Worn with pain, and weak from the prolonged hardships which I had +undergone, I was removed, with a great train of wounded sufferers, to +the base hospital at Peshawar. Here I rallied, and had already improved +so far as to be able to walk about the wards, and even to bask a little +upon the verandah, when I was struck down by enteric fever, that curse +of our Indian possessions. For months my life was despaired of, and +when at last I came to myself and became convalescent, I was so weak and +emaciated that a medical board determined that not a day should be lost +in sending me back to England. I was dispatched, accordingly, in the +troopship "Orontes," and landed a month later on Portsmouth jetty, with +my health irretrievably ruined, but with permission from a paternal +government to spend the next nine months in attempting to improve it. + +I had neither kith nor kin in England, and was therefore as free as +air--or as free as an income of eleven shillings and sixpence a day will +permit a man to be. Under such circumstances, I naturally gravitated to +London, that great cesspool into which all the loungers and idlers of +the Empire are irresistibly drained. There I stayed for some time at +a private hotel in the Strand, leading a comfortless, meaningless +existence, and spending such money as I had, considerably more freely +than I ought. So alarming did the state of my finances become, that +I soon realized that I must either leave the metropolis and rusticate +somewhere in the country, or that I must make a complete alteration in +my style of living. Choosing the latter alternative, I began by making +up my mind to leave the hotel, and to take up my quarters in some less +pretentious and less expensive domicile. + +On the very day that I had come to this conclusion, I was standing at +the Criterion Bar, when some one tapped me on the shoulder, and turning +round I recognized young Stamford, who had been a dresser under me at +Barts. The sight of a friendly face in the great wilderness of London is +a pleasant thing indeed to a lonely man. In old days Stamford had never +been a particular crony of mine, but now I hailed him with enthusiasm, +and he, in his turn, appeared to be delighted to see me. In the +exuberance of my joy, I asked him to lunch with me at the Holborn, and +we started off together in a hansom. + +"Whatever have you been doing with yourself, Watson?" he asked in +undisguised wonder, as we rattled through the crowded London streets. +"You are as thin as a lath and as brown as a nut." + +I gave him a short sketch of my adventures, and had hardly concluded it +by the time that we reached our destination. + +"Poor devil!" he said, commiseratingly, after he had listened to my +misfortunes. "What are you up to now?" + +"Looking for lodgings." [3] I answered. "Trying to solve the problem +as to whether it is possible to get comfortable rooms at a reasonable +price." + +"That's a strange thing," remarked my companion; "you are the second man +to-day that has used that expression to me." + +"And who was the first?" I asked. + +"A fellow who is working at the chemical laboratory up at the hospital. +He was bemoaning himself this morning because he could not get someone +to go halves with him in some nice rooms which he had found, and which +were too much for his purse." + +"By Jove!" I cried, "if he really wants someone to share the rooms and +the expense, I am the very man for him. I should prefer having a partner +to being alone." + +Young Stamford looked rather strangely at me over his wine-glass. "You +don't know Sherlock Holmes yet," he said; "perhaps you would not care +for him as a constant companion." + +"Why, what is there against him?" + +"Oh, I didn't say there was anything against him. He is a little queer +in his ideas--an enthusiast in some branches of science. As far as I +know he is a decent fellow enough." + +"A medical student, I suppose?" said I. + +"No--I have no idea what he intends to go in for. I believe he is well +up in anatomy, and he is a first-class chemist; but, as far as I know, +he has never taken out any systematic medical classes. His studies are +very desultory and eccentric, but he has amassed a lot of out-of-the way +knowledge which would astonish his professors." + +"Did you never ask him what he was going in for?" I asked. + +"No; he is not a man that it is easy to draw out, though he can be +communicative enough when the fancy seizes him." + +"I should like to meet him," I said. "If I am to lodge with anyone, I +should prefer a man of studious and quiet habits. I am not strong +enough yet to stand much noise or excitement. I had enough of both in +Afghanistan to last me for the remainder of my natural existence. How +could I meet this friend of yours?" + +"He is sure to be at the laboratory," returned my companion. "He either +avoids the place for weeks, or else he works there from morning to +night. If you like, we shall drive round together after luncheon." + +"Certainly," I answered, and the conversation drifted away into other +channels. + +As we made our way to the hospital after leaving the Holborn, Stamford +gave me a few more particulars about the gentleman whom I proposed to +take as a fellow-lodger. + +"You mustn't blame me if you don't get on with him," he said; "I know +nothing more of him than I have learned from meeting him occasionally in +the laboratory. You proposed this arrangement, so you must not hold me +responsible." + +"If we don't get on it will be easy to part company," I answered. "It +seems to me, Stamford," I added, looking hard at my companion, "that you +have some reason for washing your hands of the matter. Is this fellow's +temper so formidable, or what is it? Don't be mealy-mouthed about it." + +"It is not easy to express the inexpressible," he answered with a laugh. +"Holmes is a little too scientific for my tastes--it approaches to +cold-bloodedness. I could imagine his giving a friend a little pinch of +the latest vegetable alkaloid, not out of malevolence, you understand, +but simply out of a spirit of inquiry in order to have an accurate idea +of the effects. To do him justice, I think that he would take it himself +with the same readiness. He appears to have a passion for definite and +exact knowledge." + +"Very right too." + +"Yes, but it may be pushed to excess. When it comes to beating the +subjects in the dissecting-rooms with a stick, it is certainly taking +rather a bizarre shape." + +"Beating the subjects!" + +"Yes, to verify how far bruises may be produced after death. I saw him +at it with my own eyes." + +This eBook is for the use of anyone anywhere at no cost and with +almost no restrictions whatsoever. You may copy it, give it away or +re-use it under the terms of the Project Gutenberg License included +with this eBook or online at www.gutenberg.org + + +Title: A Study In Scarlet + +Author: Arthur Conan Doyle + +Posting Date: July 12, 2008 [EBook #244] +Release Date: April, 1995 +[Last updated: February 17, 2013] + +Language: English + + +*** START OF THIS PROJECT GUTENBERG EBOOK A STUDY IN SCARLET *** + + + + +Produced by Roger Squires + + + + + +A STUDY IN SCARLET. + +By A. Conan Doyle + +[1] + + + + Original Transcriber's Note: This etext is prepared directly + from an 1887 edition, and care has been taken to duplicate the + original exactly, including typographical and punctuation + vagaries. + + Additions to the text include adding the underscore character to + indicate italics, and textual end-notes in square braces. + + Project Gutenberg Editor's Note: In reproofing and moving old PG + files such as this to the present PG directory system it is the + policy to reformat the text to conform to present PG Standards. + In this case however, in consideration of the note above of the + original transcriber describing his care to try to duplicate the + original 1887 edition as to typography and punctuation vagaries, + no changes have been made in this ascii text file. However, in + the Latin-1 file and this html file, present standards are + followed and the several French and Spanish words have been + given their proper accents. + + Part II, The Country of the Saints, deals much with the Mormon Church. + + + + +A STUDY IN SCARLET. + + + + + +PART I. + +(_Being a reprint from the reminiscences of_ JOHN H. WATSON, M.D., _late +of the Army Medical Department._) [2] + + + + +CHAPTER I. MR. SHERLOCK HOLMES. + + +IN the year 1878 I took my degree of Doctor of Medicine of the +University of London, and proceeded to Netley to go through the course +prescribed for surgeons in the army. Having completed my studies there, +I was duly attached to the Fifth Northumberland Fusiliers as Assistant +Surgeon. The regiment was stationed in India at the time, and before +I could join it, the second Afghan war had broken out. On landing at +Bombay, I learned that my corps had advanced through the passes, and +was already deep in the enemy's country. I followed, however, with many +other officers who were in the same situation as myself, and succeeded +in reaching Candahar in safety, where I found my regiment, and at once +entered upon my new duties. + +The campaign brought honours and promotion to many, but for me it had +nothing but misfortune and disaster. I was removed from my brigade and +attached to the Berkshires, with whom I served at the fatal battle of +Maiwand. There I was struck on the shoulder by a Jezail bullet, which +shattered the bone and grazed the subclavian artery. I should have +fallen into the hands of the murderous Ghazis had it not been for the +devotion and courage shown by Murray, my orderly, who threw me across a +pack-horse, and succeeded in bringing me safely to the British lines. + +Worn with pain, and weak from the prolonged hardships which I had +undergone, I was removed, with a great train of wounded sufferers, to +the base hospital at Peshawar. Here I rallied, and had already improved +so far as to be able to walk about the wards, and even to bask a little +upon the verandah, when I was struck down by enteric fever, that curse +of our Indian possessions. For months my life was despaired of, and +when at last I came to myself and became convalescent, I was so weak and +emaciated that a medical board determined that not a day should be lost +in sending me back to England. I was dispatched, accordingly, in the +troopship "Orontes," and landed a month later on Portsmouth jetty, with +my health irretrievably ruined, but with permission from a paternal +government to spend the next nine months in attempting to improve it. + +I had neither kith nor kin in England, and was therefore as free as +air--or as free as an income of eleven shillings and sixpence a day will +permit a man to be. Under such circumstances, I naturally gravitated to +London, that great cesspool into which all the loungers and idlers of +the Empire are irresistibly drained. There I stayed for some time at +a private hotel in the Strand, leading a comfortless, meaningless +existence, and spending such money as I had, considerably more freely +than I ought. So alarming did the state of my finances become, that +I soon realized that I must either leave the metropolis and rusticate +somewhere in the country, or that I must make a complete alteration in +my style of living. Choosing the latter alternative, I began by making +up my mind to leave the hotel, and to take up my quarters in some less +pretentious and less expensive domicile. + +On the very day that I had come to this conclusion, I was standing at +the Criterion Bar, when some one tapped me on the shoulder, and turning +round I recognized young Stamford, who had been a dresser under me at +Barts. The sight of a friendly face in the great wilderness of London is +a pleasant thing indeed to a lonely man. In old days Stamford had never +been a particular crony of mine, but now I hailed him with enthusiasm, +and he, in his turn, appeared to be delighted to see me. In the +exuberance of my joy, I asked him to lunch with me at the Holborn, and +we started off together in a hansom. + +"Whatever have you been doing with yourself, Watson?" he asked in +undisguised wonder, as we rattled through the crowded London streets. +"You are as thin as a lath and as brown as a nut." + +I gave him a short sketch of my adventures, and had hardly concluded it +by the time that we reached our destination. + +"Poor devil!" he said, commiseratingly, after he had listened to my +misfortunes. "What are you up to now?" + +"Looking for lodgings." [3] I answered. "Trying to solve the problem +as to whether it is possible to get comfortable rooms at a reasonable +price." + +"That's a strange thing," remarked my companion; "you are the second man +to-day that has used that expression to me." + +"And who was the first?" I asked. + +"A fellow who is working at the chemical laboratory up at the hospital. +He was bemoaning himself this morning because he could not get someone +to go halves with him in some nice rooms which he had found, and which +were too much for his purse." + +"By Jove!" I cried, "if he really wants someone to share the rooms and +the expense, I am the very man for him. I should prefer having a partner +to being alone." + +Young Stamford looked rather strangely at me over his wine-glass. "You +don't know Sherlock Holmes yet," he said; "perhaps you would not care +for him as a constant companion." + +"Why, what is there against him?" + +"Oh, I didn't say there was anything against him. He is a little queer +in his ideas--an enthusiast in some branches of science. As far as I +know he is a decent fellow enough." + +"A medical student, I suppose?" said I. + +"No--I have no idea what he intends to go in for. I believe he is well +up in anatomy, and he is a first-class chemist; but, as far as I know, +he has never taken out any systematic medical classes. His studies are +very desultory and eccentric, but he has amassed a lot of out-of-the way +knowledge which would astonish his professors." + +"Did you never ask him what he was going in for?" I asked. + +"No; he is not a man that it is easy to draw out, though he can be +communicative enough when the fancy seizes him." + +"I should like to meet him," I said. "If I am to lodge with anyone, I +should prefer a man of studious and quiet habits. I am not strong +enough yet to stand much noise or excitement. I had enough of both in +Afghanistan to last me for the remainder of my natural existence. How +could I meet this friend of yours?" + +"He is sure to be at the laboratory," returned my companion. "He either +avoids the place for weeks, or else he works there from morning to +night. If you like, we shall drive round together after luncheon." + +"Certainly," I answered, and the conversation drifted away into other +channels. + +As we made our way to the hospital after leaving the Holborn, Stamford +gave me a few more particulars about the gentleman whom I proposed to +take as a fellow-lodger. + +"You mustn't blame me if you don't get on with him," he said; "I know +nothing more of him than I have learned from meeting him occasionally in +the laboratory. You proposed this arrangement, so you must not hold me +responsible." + +"If we don't get on it will be easy to part company," I answered. "It +seems to me, Stamford," I added, looking hard at my companion, "that you +have some reason for washing your hands of the matter. Is this fellow's +temper so formidable, or what is it? Don't be mealy-mouthed about it." + +"It is not easy to express the inexpressible," he answered with a laugh. +"Holmes is a little too scientific for my tastes--it approaches to +cold-bloodedness. I could imagine his giving a friend a little pinch of +the latest vegetable alkaloid, not out of malevolence, you understand, +but simply out of a spirit of inquiry in order to have an accurate idea +of the effects. To do him justice, I think that he would take it himself +with the same readiness. He appears to have a passion for definite and +exact knowledge." + +"Very right too." + +"Yes, but it may be pushed to excess. When it comes to beating the +subjects in the dissecting-rooms with a stick, it is certainly taking +rather a bizarre shape." + +"Beating the subjects!" + +"Yes, to verify how far bruises may be produced after death. I saw him +at it with my own eyes." + +This eBook is for the use of anyone anywhere at no cost and with +almost no restrictions whatsoever. You may copy it, give it away or +re-use it under the terms of the Project Gutenberg License included +with this eBook or online at www.gutenberg.org + + +Title: A Study In Scarlet + +Author: Arthur Conan Doyle + +Posting Date: July 12, 2008 [EBook #244] +Release Date: April, 1995 +[Last updated: February 17, 2013] + +Language: English + + +*** START OF THIS PROJECT GUTENBERG EBOOK A STUDY IN SCARLET *** + + + + +Produced by Roger Squires + + + + + +A STUDY IN SCARLET. + +By A. Conan Doyle + +[1] + + + + Original Transcriber's Note: This etext is prepared directly + from an 1887 edition, and care has been taken to duplicate the + original exactly, including typographical and punctuation + vagaries. + + Additions to the text include adding the underscore character to + indicate italics, and textual end-notes in square braces. + + Project Gutenberg Editor's Note: In reproofing and moving old PG + files such as this to the present PG directory system it is the + policy to reformat the text to conform to present PG Standards. + In this case however, in consideration of the note above of the + original transcriber describing his care to try to duplicate the + original 1887 edition as to typography and punctuation vagaries, + no changes have been made in this ascii text file. However, in + the Latin-1 file and this html file, present standards are + followed and the several French and Spanish words have been + given their proper accents. + + Part II, The Country of the Saints, deals much with the Mormon Church. + + + + +A STUDY IN SCARLET. + + + + + +PART I. + +(_Being a reprint from the reminiscences of_ JOHN H. WATSON, M.D., _late +of the Army Medical Department._) [2] + + + + +CHAPTER I. MR. SHERLOCK HOLMES. + + +IN the year 1878 I took my degree of Doctor of Medicine of the +University of London, and proceeded to Netley to go through the course +prescribed for surgeons in the army. Having completed my studies there, +I was duly attached to the Fifth Northumberland Fusiliers as Assistant +Surgeon. The regiment was stationed in India at the time, and before +I could join it, the second Afghan war had broken out. On landing at +Bombay, I learned that my corps had advanced through the passes, and +was already deep in the enemy's country. I followed, however, with many +other officers who were in the same situation as myself, and succeeded +in reaching Candahar in safety, where I found my regiment, and at once +entered upon my new duties. + +The campaign brought honours and promotion to many, but for me it had +nothing but misfortune and disaster. I was removed from my brigade and +attached to the Berkshires, with whom I served at the fatal battle of +Maiwand. There I was struck on the shoulder by a Jezail bullet, which +shattered the bone and grazed the subclavian artery. I should have +fallen into the hands of the murderous Ghazis had it not been for the +devotion and courage shown by Murray, my orderly, who threw me across a +pack-horse, and succeeded in bringing me safely to the British lines. + +Worn with pain, and weak from the prolonged hardships which I had +undergone, I was removed, with a great train of wounded sufferers, to +the base hospital at Peshawar. Here I rallied, and had already improved +so far as to be able to walk about the wards, and even to bask a little +upon the verandah, when I was struck down by enteric fever, that curse +of our Indian possessions. For months my life was despaired of, and +when at last I came to myself and became convalescent, I was so weak and +emaciated that a medical board determined that not a day should be lost +in sending me back to England. I was dispatched, accordingly, in the +troopship "Orontes," and landed a month later on Portsmouth jetty, with +my health irretrievably ruined, but with permission from a paternal +government to spend the next nine months in attempting to improve it. + +I had neither kith nor kin in England, and was therefore as free as +air--or as free as an income of eleven shillings and sixpence a day will +permit a man to be. Under such circumstances, I naturally gravitated to +London, that great cesspool into which all the loungers and idlers of +the Empire are irresistibly drained. There I stayed for some time at +a private hotel in the Strand, leading a comfortless, meaningless +existence, and spending such money as I had, considerably more freely +than I ought. So alarming did the state of my finances become, that +I soon realized that I must either leave the metropolis and rusticate +somewhere in the country, or that I must make a complete alteration in +my style of living. Choosing the latter alternative, I began by making +up my mind to leave the hotel, and to take up my quarters in some less +pretentious and less expensive domicile. + +On the very day that I had come to this conclusion, I was standing at +the Criterion Bar, when some one tapped me on the shoulder, and turning +round I recognized young Stamford, who had been a dresser under me at +Barts. The sight of a friendly face in the great wilderness of London is +a pleasant thing indeed to a lonely man. In old days Stamford had never +been a particular crony of mine, but now I hailed him with enthusiasm, +and he, in his turn, appeared to be delighted to see me. In the +exuberance of my joy, I asked him to lunch with me at the Holborn, and +we started off together in a hansom. + +"Whatever have you been doing with yourself, Watson?" he asked in +undisguised wonder, as we rattled through the crowded London streets. +"You are as thin as a lath and as brown as a nut." + +I gave him a short sketch of my adventures, and had hardly concluded it +by the time that we reached our destination. + +"Poor devil!" he said, commiseratingly, after he had listened to my +misfortunes. "What are you up to now?" + +"Looking for lodgings." [3] I answered. "Trying to solve the problem +as to whether it is possible to get comfortable rooms at a reasonable +price." + +"That's a strange thing," remarked my companion; "you are the second man +to-day that has used that expression to me." + +"And who was the first?" I asked. + +"A fellow who is working at the chemical laboratory up at the hospital. +He was bemoaning himself this morning because he could not get someone +to go halves with him in some nice rooms which he had found, and which +were too much for his purse." + +"By Jove!" I cried, "if he really wants someone to share the rooms and +the expense, I am the very man for him. I should prefer having a partner +to being alone." + +Young Stamford looked rather strangely at me over his wine-glass. "You +don't know Sherlock Holmes yet," he said; "perhaps you would not care +for him as a constant companion." + +"Why, what is there against him?" + +"Oh, I didn't say there was anything against him. He is a little queer +in his ideas--an enthusiast in some branches of science. As far as I +know he is a decent fellow enough." + +"A medical student, I suppose?" said I. + +"No--I have no idea what he intends to go in for. I believe he is well +up in anatomy, and he is a first-class chemist; but, as far as I know, +he has never taken out any systematic medical classes. His studies are +very desultory and eccentric, but he has amassed a lot of out-of-the way +knowledge which would astonish his professors." + +"Did you never ask him what he was going in for?" I asked. + +"No; he is not a man that it is easy to draw out, though he can be +communicative enough when the fancy seizes him." + +"I should like to meet him," I said. "If I am to lodge with anyone, I +should prefer a man of studious and quiet habits. I am not strong +enough yet to stand much noise or excitement. I had enough of both in +Afghanistan to last me for the remainder of my natural existence. How +could I meet this friend of yours?" + +"He is sure to be at the laboratory," returned my companion. "He either +avoids the place for weeks, or else he works there from morning to +night. If you like, we shall drive round together after luncheon." + +"Certainly," I answered, and the conversation drifted away into other +channels. + +As we made our way to the hospital after leaving the Holborn, Stamford +gave me a few more particulars about the gentleman whom I proposed to +take as a fellow-lodger. + +"You mustn't blame me if you don't get on with him," he said; "I know +nothing more of him than I have learned from meeting him occasionally in +the laboratory. You proposed this arrangement, so you must not hold me +responsible." + +"If we don't get on it will be easy to part company," I answered. "It +seems to me, Stamford," I added, looking hard at my companion, "that you +have some reason for washing your hands of the matter. Is this fellow's +temper so formidable, or what is it? Don't be mealy-mouthed about it." + +"It is not easy to express the inexpressible," he answered with a laugh. +"Holmes is a little too scientific for my tastes--it approaches to +cold-bloodedness. I could imagine his giving a friend a little pinch of +the latest vegetable alkaloid, not out of malevolence, you understand, +but simply out of a spirit of inquiry in order to have an accurate idea +of the effects. To do him justice, I think that he would take it himself +with the same readiness. He appears to have a passion for definite and +exact knowledge." + +"Very right too." + +"Yes, but it may be pushed to excess. When it comes to beating the +subjects in the dissecting-rooms with a stick, it is certainly taking +rather a bizarre shape." + +"Beating the subjects!" + +"Yes, to verify how far bruises may be produced after death. I saw him +at it with my own eyes." + +This eBook is for the use of anyone anywhere at no cost and with +almost no restrictions whatsoever. You may copy it, give it away or +re-use it under the terms of the Project Gutenberg License included +with this eBook or online at www.gutenberg.org + + +Title: A Study In Scarlet + +Author: Arthur Conan Doyle + +Posting Date: July 12, 2008 [EBook #244] +Release Date: April, 1995 +[Last updated: February 17, 2013] + +Language: English + + +*** START OF THIS PROJECT GUTENBERG EBOOK A STUDY IN SCARLET *** + + + + +Produced by Roger Squires + + + + + +A STUDY IN SCARLET. + +By A. Conan Doyle + +[1] + + + + Original Transcriber's Note: This etext is prepared directly + from an 1887 edition, and care has been taken to duplicate the + original exactly, including typographical and punctuation + vagaries. + + Additions to the text include adding the underscore character to + indicate italics, and textual end-notes in square braces. + + Project Gutenberg Editor's Note: In reproofing and moving old PG + files such as this to the present PG directory system it is the + policy to reformat the text to conform to present PG Standards. + In this case however, in consideration of the note above of the + original transcriber describing his care to try to duplicate the + original 1887 edition as to typography and punctuation vagaries, + no changes have been made in this ascii text file. However, in + the Latin-1 file and this html file, present standards are + followed and the several French and Spanish words have been + given their proper accents. + + Part II, The Country of the Saints, deals much with the Mormon Church. + + + + +A STUDY IN SCARLET. + + + + + +PART I. + +(_Being a reprint from the reminiscences of_ JOHN H. WATSON, M.D., _late +of the Army Medical Department._) [2] + + + + +CHAPTER I. MR. SHERLOCK HOLMES. + + +IN the year 1878 I took my degree of Doctor of Medicine of the +University of London, and proceeded to Netley to go through the course +prescribed for surgeons in the army. Having completed my studies there, +I was duly attached to the Fifth Northumberland Fusiliers as Assistant +Surgeon. The regiment was stationed in India at the time, and before +I could join it, the second Afghan war had broken out. On landing at +Bombay, I learned that my corps had advanced through the passes, and +was already deep in the enemy's country. I followed, however, with many +other officers who were in the same situation as myself, and succeeded +in reaching Candahar in safety, where I found my regiment, and at once +entered upon my new duties. + +The campaign brought honours and promotion to many, but for me it had +nothing but misfortune and disaster. I was removed from my brigade and +attached to the Berkshires, with whom I served at the fatal battle of +Maiwand. There I was struck on the shoulder by a Jezail bullet, which +shattered the bone and grazed the subclavian artery. I should have +fallen into the hands of the murderous Ghazis had it not been for the +devotion and courage shown by Murray, my orderly, who threw me across a +pack-horse, and succeeded in bringing me safely to the British lines. + +Worn with pain, and weak from the prolonged hardships which I had +undergone, I was removed, with a great train of wounded sufferers, to +the base hospital at Peshawar. Here I rallied, and had already improved +so far as to be able to walk about the wards, and even to bask a little +upon the verandah, when I was struck down by enteric fever, that curse +of our Indian possessions. For months my life was despaired of, and +when at last I came to myself and became convalescent, I was so weak and +emaciated that a medical board determined that not a day should be lost +in sending me back to England. I was dispatched, accordingly, in the +troopship "Orontes," and landed a month later on Portsmouth jetty, with +my health irretrievably ruined, but with permission from a paternal +government to spend the next nine months in attempting to improve it. + +I had neither kith nor kin in England, and was therefore as free as +air--or as free as an income of eleven shillings and sixpence a day will +permit a man to be. Under such circumstances, I naturally gravitated to +London, that great cesspool into which all the loungers and idlers of +the Empire are irresistibly drained. There I stayed for some time at +a private hotel in the Strand, leading a comfortless, meaningless +existence, and spending such money as I had, considerably more freely +than I ought. So alarming did the state of my finances become, that +I soon realized that I must either leave the metropolis and rusticate +somewhere in the country, or that I must make a complete alteration in +my style of living. Choosing the latter alternative, I began by making +up my mind to leave the hotel, and to take up my quarters in some less +pretentious and less expensive domicile. + +On the very day that I had come to this conclusion, I was standing at +the Criterion Bar, when some one tapped me on the shoulder, and turning +round I recognized young Stamford, who had been a dresser under me at +Barts. The sight of a friendly face in the great wilderness of London is +a pleasant thing indeed to a lonely man. In old days Stamford had never +been a particular crony of mine, but now I hailed him with enthusiasm, +and he, in his turn, appeared to be delighted to see me. In the +exuberance of my joy, I asked him to lunch with me at the Holborn, and +we started off together in a hansom. + +"Whatever have you been doing with yourself, Watson?" he asked in +undisguised wonder, as we rattled through the crowded London streets. +"You are as thin as a lath and as brown as a nut." + +I gave him a short sketch of my adventures, and had hardly concluded it +by the time that we reached our destination. + +"Poor devil!" he said, commiseratingly, after he had listened to my +misfortunes. "What are you up to now?" + +"Looking for lodgings." [3] I answered. "Trying to solve the problem +as to whether it is possible to get comfortable rooms at a reasonable +price." + +"That's a strange thing," remarked my companion; "you are the second man +to-day that has used that expression to me." + +"And who was the first?" I asked. + +"A fellow who is working at the chemical laboratory up at the hospital. +He was bemoaning himself this morning because he could not get someone +to go halves with him in some nice rooms which he had found, and which +were too much for his purse." + +"By Jove!" I cried, "if he really wants someone to share the rooms and +the expense, I am the very man for him. I should prefer having a partner +to being alone." + +Young Stamford looked rather strangely at me over his wine-glass. "You +don't know Sherlock Holmes yet," he said; "perhaps you would not care +for him as a constant companion." + +"Why, what is there against him?" + +"Oh, I didn't say there was anything against him. He is a little queer +in his ideas--an enthusiast in some branches of science. As far as I +know he is a decent fellow enough." + +"A medical student, I suppose?" said I. + +"No--I have no idea what he intends to go in for. I believe he is well +up in anatomy, and he is a first-class chemist; but, as far as I know, +he has never taken out any systematic medical classes. His studies are +very desultory and eccentric, but he has amassed a lot of out-of-the way +knowledge which would astonish his professors." + +"Did you never ask him what he was going in for?" I asked. + +"No; he is not a man that it is easy to draw out, though he can be +communicative enough when the fancy seizes him." + +"I should like to meet him," I said. "If I am to lodge with anyone, I +should prefer a man of studious and quiet habits. I am not strong +enough yet to stand much noise or excitement. I had enough of both in +Afghanistan to last me for the remainder of my natural existence. How +could I meet this friend of yours?" + +"He is sure to be at the laboratory," returned my companion. "He either +avoids the place for weeks, or else he works there from morning to +night. If you like, we shall drive round together after luncheon." + +"Certainly," I answered, and the conversation drifted away into other +channels. + +As we made our way to the hospital after leaving the Holborn, Stamford +gave me a few more particulars about the gentleman whom I proposed to +take as a fellow-lodger. + +"You mustn't blame me if you don't get on with him," he said; "I know +nothing more of him than I have learned from meeting him occasionally in +the laboratory. You proposed this arrangement, so you must not hold me +responsible." + +"If we don't get on it will be easy to part company," I answered. "It +seems to me, Stamford," I added, looking hard at my companion, "that you +have some reason for washing your hands of the matter. Is this fellow's +temper so formidable, or what is it? Don't be mealy-mouthed about it." + +"It is not easy to express the inexpressible," he answered with a laugh. +"Holmes is a little too scientific for my tastes--it approaches to +cold-bloodedness. I could imagine his giving a friend a little pinch of +the latest vegetable alkaloid, not out of malevolence, you understand, +but simply out of a spirit of inquiry in order to have an accurate idea +of the effects. To do him justice, I think that he would take it himself +with the same readiness. He appears to have a passion for definite and +exact knowledge." + +"Very right too." + +"Yes, but it may be pushed to excess. When it comes to beating the +subjects in the dissecting-rooms with a stick, it is certainly taking +rather a bizarre shape." + +"Beating the subjects!" + +"Yes, to verify how far bruises may be produced after death. I saw him +at it with my own eyes." + +This eBook is for the use of anyone anywhere at no cost and with +almost no restrictions whatsoever. You may copy it, give it away or +re-use it under the terms of the Project Gutenberg License included +with this eBook or online at www.gutenberg.org + + +Title: A Study In Scarlet + +Author: Arthur Conan Doyle + +Posting Date: July 12, 2008 [EBook #244] +Release Date: April, 1995 +[Last updated: February 17, 2013] + +Language: English + + +*** START OF THIS PROJECT GUTENBERG EBOOK A STUDY IN SCARLET *** + + + + +Produced by Roger Squires + + + + + +A STUDY IN SCARLET. + +By A. Conan Doyle + +[1] + + + + Original Transcriber's Note: This etext is prepared directly + from an 1887 edition, and care has been taken to duplicate the + original exactly, including typographical and punctuation + vagaries. + + Additions to the text include adding the underscore character to + indicate italics, and textual end-notes in square braces. + + Project Gutenberg Editor's Note: In reproofing and moving old PG + files such as this to the present PG directory system it is the + policy to reformat the text to conform to present PG Standards. + In this case however, in consideration of the note above of the + original transcriber describing his care to try to duplicate the + original 1887 edition as to typography and punctuation vagaries, + no changes have been made in this ascii text file. However, in + the Latin-1 file and this html file, present standards are + followed and the several French and Spanish words have been + given their proper accents. + + Part II, The Country of the Saints, deals much with the Mormon Church. + + + + +A STUDY IN SCARLET. + + + + + +PART I. + +(_Being a reprint from the reminiscences of_ JOHN H. WATSON, M.D., _late +of the Army Medical Department._) [2] + + + + +CHAPTER I. MR. SHERLOCK HOLMES. + + +IN the year 1878 I took my degree of Doctor of Medicine of the +University of London, and proceeded to Netley to go through the course +prescribed for surgeons in the army. Having completed my studies there, +I was duly attached to the Fifth Northumberland Fusiliers as Assistant +Surgeon. The regiment was stationed in India at the time, and before +I could join it, the second Afghan war had broken out. On landing at +Bombay, I learned that my corps had advanced through the passes, and +was already deep in the enemy's country. I followed, however, with many +other officers who were in the same situation as myself, and succeeded +in reaching Candahar in safety, where I found my regiment, and at once +entered upon my new duties. + +The campaign brought honours and promotion to many, but for me it had +nothing but misfortune and disaster. I was removed from my brigade and +attached to the Berkshires, with whom I served at the fatal battle of +Maiwand. There I was struck on the shoulder by a Jezail bullet, which +shattered the bone and grazed the subclavian artery. I should have +fallen into the hands of the murderous Ghazis had it not been for the +devotion and courage shown by Murray, my orderly, who threw me across a +pack-horse, and succeeded in bringing me safely to the British lines. + +Worn with pain, and weak from the prolonged hardships which I had +undergone, I was removed, with a great train of wounded sufferers, to +the base hospital at Peshawar. Here I rallied, and had already improved +so far as to be able to walk about the wards, and even to bask a little +upon the verandah, when I was struck down by enteric fever, that curse +of our Indian possessions. For months my life was despaired of, and +when at last I came to myself and became convalescent, I was so weak and +emaciated that a medical board determined that not a day should be lost +in sending me back to England. I was dispatched, accordingly, in the +troopship "Orontes," and landed a month later on Portsmouth jetty, with +my health irretrievably ruined, but with permission from a paternal +government to spend the next nine months in attempting to improve it. + +I had neither kith nor kin in England, and was therefore as free as +air--or as free as an income of eleven shillings and sixpence a day will +permit a man to be. Under such circumstances, I naturally gravitated to +London, that great cesspool into which all the loungers and idlers of +the Empire are irresistibly drained. There I stayed for some time at +a private hotel in the Strand, leading a comfortless, meaningless +existence, and spending such money as I had, considerably more freely +than I ought. So alarming did the state of my finances become, that +I soon realized that I must either leave the metropolis and rusticate +somewhere in the country, or that I must make a complete alteration in +my style of living. Choosing the latter alternative, I began by making +up my mind to leave the hotel, and to take up my quarters in some less +pretentious and less expensive domicile. + +On the very day that I had come to this conclusion, I was standing at +the Criterion Bar, when some one tapped me on the shoulder, and turning +round I recognized young Stamford, who had been a dresser under me at +Barts. The sight of a friendly face in the great wilderness of London is +a pleasant thing indeed to a lonely man. In old days Stamford had never +been a particular crony of mine, but now I hailed him with enthusiasm, +and he, in his turn, appeared to be delighted to see me. In the +exuberance of my joy, I asked him to lunch with me at the Holborn, and +we started off together in a hansom. + +"Whatever have you been doing with yourself, Watson?" he asked in +undisguised wonder, as we rattled through the crowded London streets. +"You are as thin as a lath and as brown as a nut." + +I gave him a short sketch of my adventures, and had hardly concluded it +by the time that we reached our destination. + +"Poor devil!" he said, commiseratingly, after he had listened to my +misfortunes. "What are you up to now?" + +"Looking for lodgings." [3] I answered. "Trying to solve the problem +as to whether it is possible to get comfortable rooms at a reasonable +price." + +"That's a strange thing," remarked my companion; "you are the second man +to-day that has used that expression to me." + +"And who was the first?" I asked. + +"A fellow who is working at the chemical laboratory up at the hospital. +He was bemoaning himself this morning because he could not get someone +to go halves with him in some nice rooms which he had found, and which +were too much for his purse." + +"By Jove!" I cried, "if he really wants someone to share the rooms and +the expense, I am the very man for him. I should prefer having a partner +to being alone." + +Young Stamford looked rather strangely at me over his wine-glass. "You +don't know Sherlock Holmes yet," he said; "perhaps you would not care +for him as a constant companion." + +"Why, what is there against him?" + +"Oh, I didn't say there was anything against him. He is a little queer +in his ideas--an enthusiast in some branches of science. As far as I +know he is a decent fellow enough." + +"A medical student, I suppose?" said I. + +"No--I have no idea what he intends to go in for. I believe he is well +up in anatomy, and he is a first-class chemist; but, as far as I know, +he has never taken out any systematic medical classes. His studies are +very desultory and eccentric, but he has amassed a lot of out-of-the way +knowledge which would astonish his professors." + +"Did you never ask him what he was going in for?" I asked. + +"No; he is not a man that it is easy to draw out, though he can be +communicative enough when the fancy seizes him." + +"I should like to meet him," I said. "If I am to lodge with anyone, I +should prefer a man of studious and quiet habits. I am not strong +enough yet to stand much noise or excitement. I had enough of both in +Afghanistan to last me for the remainder of my natural existence. How +could I meet this friend of yours?" + +"He is sure to be at the laboratory," returned my companion. "He either +avoids the place for weeks, or else he works there from morning to +night. If you like, we shall drive round together after luncheon." + +"Certainly," I answered, and the conversation drifted away into other +channels. + +As we made our way to the hospital after leaving the Holborn, Stamford +gave me a few more particulars about the gentleman whom I proposed to +take as a fellow-lodger. + +"You mustn't blame me if you don't get on with him," he said; "I know +nothing more of him than I have learned from meeting him occasionally in +the laboratory. You proposed this arrangement, so you must not hold me +responsible." + +"If we don't get on it will be easy to part company," I answered. "It +seems to me, Stamford," I added, looking hard at my companion, "that you +have some reason for washing your hands of the matter. Is this fellow's +temper so formidable, or what is it? Don't be mealy-mouthed about it." + +"It is not easy to express the inexpressible," he answered with a laugh. +"Holmes is a little too scientific for my tastes--it approaches to +cold-bloodedness. I could imagine his giving a friend a little pinch of +the latest vegetable alkaloid, not out of malevolence, you understand, +but simply out of a spirit of inquiry in order to have an accurate idea +of the effects. To do him justice, I think that he would take it himself +with the same readiness. He appears to have a passion for definite and +exact knowledge." + +"Very right too." + +"Yes, but it may be pushed to excess. When it comes to beating the +subjects in the dissecting-rooms with a stick, it is certainly taking +rather a bizarre shape." + +"Beating the subjects!" + +"Yes, to verify how far bruises may be produced after death. I saw him +at it with my own eyes." + +"And yet you say he is not a medical student?" + + +abcdef +"No. Heaven knows what the objects of his studies are. But here we +are, and you must form your own impressions about him." As he spoke, we +turned down a narrow lane and passed through a small side-door, which +opened into a wing of the great hospital. It was familiar ground to me, +and I needed no guiding as we ascended the bleak stone staircase and +made our way down the long corridor with its vista of whitewashed +wall and dun-coloured doors. Near the further end a low arched passage +branched away from it and led to the chemical laboratory. + +This was a lofty chamber, lined and littered with countless bottles. +Broad, low tables were scattered about, which bristled with retorts, +test-tubes, and little Bunsen lamps, with their blue flickering flames. +There was only one student in the room, who was bending over a distant +table absorbed in his work. At the sound of our steps he glanced round +and sprang to his feet with a cry of pleasure. "I've found it! I've +found it," he shouted to my companion, running towards us with a +test-tube in his hand. "I have found a re-agent which is precipitated +by hoemoglobin, [4] and by nothing else." Had he discovered a gold mine, +greater delight could not have shone upon his features. + +"Dr. Watson, Mr. Sherlock Holmes," said Stamford, introducing us. + +"How are you?" he said cordially, gripping my hand with a strength +for which I should hardly have given him credit. "You have been in +Afghanistan, I perceive." + +"How on earth did you know that?" I asked in astonishment. + +"Never mind," said he, chuckling to himself. "The question now is about +hoemoglobin. No doubt you see the significance of this discovery of +mine?" + +"It is interesting, chemically, no doubt," I answered, "but +practically----" + +"Why, man, it is the most practical medico-legal discovery for years. +Don't you see that it gives us an infallible test for blood stains. Come +over here now!" He seized me by the coat-sleeve in his eagerness, and +drew me over to the table at which he had been working. "Let us have +some fresh blood," he said, digging a long bodkin into his finger, and +drawing off the resulting drop of blood in a chemical pipette. "Now, I +add this small quantity of blood to a litre of water. You perceive that +the resulting mixture has the appearance of pure water. The proportion +of blood cannot be more than one in a million. I have no doubt, however, +that we shall be able to obtain the characteristic reaction." As he +spoke, he threw into the vessel a few white crystals, and then added +some drops of a transparent fluid. In an instant the contents assumed a +dull mahogany colour, and a brownish dust was precipitated to the bottom +of the glass jar. + +"Ha! ha!" he cried, clapping his hands, and looking as delighted as a +child with a new toy. "What do you think of that?" + +"It seems to be a very delicate test," I remarked. + +"Beautiful! beautiful! The old Guiacum test was very clumsy and +uncertain. So is the microscopic examination for blood corpuscles. The +latter is valueless if the stains are a few hours old. Now, this appears +to act as well whether the blood is old or new. Had this test been +invented, there are hundreds of men now walking the earth who would long +ago have paid the penalty of their crimes." + +"Indeed!" I murmured. + +"Criminal cases are continually hinging upon that one point. A man is +suspected of a crime months perhaps after it has been committed. His +linen or clothes are examined, and brownish stains discovered upon them. +Are they blood stains, or mud stains, or rust stains, or fruit stains, +or what are they? That is a question which has puzzled many an expert, +and why? Because there was no reliable test. Now we have the Sherlock +Holmes' test, and there will no longer be any difficulty." + +His eyes fairly glittered as he spoke, and he put his hand over his +heart and bowed as if to some applauding crowd conjured up by his +imagination. + +"You are to be congratulated," I remarked, considerably surprised at his +enthusiasm. + +"There was the case of Von Bischoff at Frankfort last year. He would +certainly have been hung had this test been in existence. Then there was +Mason of Bradford, and the notorious Muller, and Lefevre of Montpellier, +and Samson of New Orleans. I could name a score of cases in which it +would have been decisive." + +"You seem to be a walking calendar of crime," said Stamford with a +laugh. "You might start a paper on those lines. Call it the 'Police News +of the Past.'" + +"Very interesting reading it might be made, too," remarked Sherlock +Holmes, sticking a small piece of plaster over the prick on his finger. +"I have to be careful," he continued, turning to me with a smile, "for I +dabble with poisons a good deal." He held out his hand as he spoke, and +I noticed that it was all mottled over with similar pieces of plaster, +and discoloured with strong acids. + +"We came here on business," said Stamford, sitting down on a high +three-legged stool, and pushing another one in my direction with +his foot. "My friend here wants to take diggings, and as you were +complaining that you could get no one to go halves with you, I thought +that I had better bring you together." + +Sherlock Holmes seemed delighted at the idea of sharing his rooms with +me. "I have my eye on a suite in Baker Street," he said, "which would +suit us down to the ground. You don't mind the smell of strong tobacco, +I hope?" + +"I always smoke 'ship's' myself," I answered. + +"That's good enough. I generally have chemicals about, and occasionally +do experiments. Would that annoy you?" + +"By no means." + +"Let me see--what are my other shortcomings. I get in the dumps at +times, and don't open my mouth for days on end. You must not think I am +sulky when I do that. Just let me alone, and I'll soon be right. What +have you to confess now? It's just as well for two fellows to know the +worst of one another before they begin to live together." + +I laughed at this cross-examination. "I keep a bull pup," I said, "and +I object to rows because my nerves are shaken, and I get up at all sorts +of ungodly hours, and I am extremely lazy. I have another set of vices +when I'm well, but those are the principal ones at present." + +"Do you include violin-playing in your category of rows?" he asked, +anxiously. + +"It depends on the player," I answered. "A well-played violin is a treat +for the gods--a badly-played one----" + +"Oh, that's all right," he cried, with a merry laugh. "I think we may +consider the thing as settled--that is, if the rooms are agreeable to +you." + +"When shall we see them?" + +"Call for me here at noon to-morrow, and we'll go together and settle +everything," he answered. + +"All right--noon exactly," said I, shaking his hand. + +We left him working among his chemicals, and we walked together towards +my hotel. + +"By the way," I asked suddenly, stopping and turning upon Stamford, "how +the deuce did he know that I had come from Afghanistan?" + +My companion smiled an enigmatical smile. "That's just his little +peculiarity," he said. "A good many people have wanted to know how he +finds things out." + +"Oh! a mystery is it?" I cried, rubbing my hands. "This is very piquant. +I am much obliged to you for bringing us together. 'The proper study of +mankind is man,' you know." + +"You must study him, then," Stamford said, as he bade me good-bye. +"You'll find him a knotty problem, though. I'll wager he learns more +about you than you about him. Good-bye." + +"Good-bye," I answered, and strolled on to my hotel, considerably +interested in my new acquaintance. + + + + +CHAPTER II. THE SCIENCE OF DEDUCTION. + + +WE met next day as he had arranged, and inspected the rooms at No. 221B, +[5] Baker Street, of which he had spoken at our meeting. They +consisted of a couple of comfortable bed-rooms and a single large +airy sitting-room, cheerfully furnished, and illuminated by two broad +windows. So desirable in every way were the apartments, and so moderate +did the terms seem when divided between us, that the bargain was +concluded upon the spot, and we at once entered into possession. +That very evening I moved my things round from the hotel, and on the +following morning Sherlock Holmes followed me with several boxes and +portmanteaus. For a day or two we were busily employed in unpacking and +laying out our property to the best advantage. That done, we +gradually began to settle down and to accommodate ourselves to our new +surroundings. + +Holmes was certainly not a difficult man to live with. He was quiet +in his ways, and his habits were regular. It was rare for him to be +up after ten at night, and he had invariably breakfasted and gone out +before I rose in the morning. Sometimes he spent his day at the chemical +laboratory, sometimes in the dissecting-rooms, and occasionally in long +walks, which appeared to take him into the lowest portions of the City. +Nothing could exceed his energy when the working fit was upon him; but +now and again a reaction would seize him, and for days on end he would +lie upon the sofa in the sitting-room, hardly uttering a word or moving +a muscle from morning to night. On these occasions I have noticed such +a dreamy, vacant expression in his eyes, that I might have suspected him +of being addicted to the use of some narcotic, had not the temperance +and cleanliness of his whole life forbidden such a notion. + +As the weeks went by, my interest in him and my curiosity as to his +aims in life, gradually deepened and increased. His very person and +appearance were such as to strike the attention of the most casual +observer. In height he was rather over six feet, and so excessively +lean that he seemed to be considerably taller. His eyes were sharp and +piercing, save during those intervals of torpor to which I have alluded; +and his thin, hawk-like nose gave his whole expression an air of +alertness and decision. His chin, too, had the prominence and squareness +which mark the man of determination. His hands were invariably +blotted with ink and stained with chemicals, yet he was possessed of +extraordinary delicacy of touch, as I frequently had occasion to observe +when I watched him manipulating his fragile philosophical instruments. + +The reader may set me down as a hopeless busybody, when I confess how +much this man stimulated my curiosity, and how often I endeavoured +to break through the reticence which he showed on all that concerned +himself. Before pronouncing judgment, however, be it remembered, how +objectless was my life, and how little there was to engage my attention. +My health forbade me from venturing out unless the weather was +exceptionally genial, and I had no friends who would call upon me and +break the monotony of my daily existence. Under these circumstances, I +eagerly hailed the little mystery which hung around my companion, and +spent much of my time in endeavouring to unravel it. + +He was not studying medicine. He had himself, in reply to a question, +confirmed Stamford's opinion upon that point. Neither did he appear to +have pursued any course of reading which might fit him for a degree in +science or any other recognized portal which would give him an entrance +into the learned world. Yet his zeal for certain studies was remarkable, +and within eccentric limits his knowledge was so extraordinarily ample +and minute that his observations have fairly astounded me. Surely no man +would work so hard or attain such precise information unless he had some +definite end in view. Desultory readers are seldom remarkable for the +exactness of their learning. No man burdens his mind with small matters +unless he has some very good reason for doing so. + +His ignorance was as remarkable as his knowledge. Of contemporary +literature, philosophy and politics he appeared to know next to nothing. +Upon my quoting Thomas Carlyle, he inquired in the naivest way who he +might be and what he had done. My surprise reached a climax, however, +when I found incidentally that he was ignorant of the Copernican Theory +and of the composition of the Solar System. That any civilized human +being in this nineteenth century should not be aware that the earth +travelled round the sun appeared to be to me such an extraordinary fact +that I could hardly realize it. + +"You appear to be astonished," he said, smiling at my expression of +surprise. "Now that I do know it I shall do my best to forget it." + +"To forget it!" + +"You see," he explained, "I consider that a man's brain originally is +like a little empty attic, and you have to stock it with such furniture +as you choose. A fool takes in all the lumber of every sort that he +comes across, so that the knowledge which might be useful to him gets +crowded out, or at best is jumbled up with a lot of other things so that +he has a difficulty in laying his hands upon it. Now the skilful workman +is very careful indeed as to what he takes into his brain-attic. He will +have nothing but the tools which may help him in doing his work, but of +these he has a large assortment, and all in the most perfect order. It +is a mistake to think that that little room has elastic walls and can +distend to any extent. Depend upon it there comes a time when for every +addition of knowledge you forget something that you knew before. It is +of the highest importance, therefore, not to have useless facts elbowing +out the useful ones." + diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/data/sherlock.Z b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/data/sherlock.Z new file mode 100644 index 0000000000000000000000000000000000000000..9ffa9283ad25673bd23820c9685ced10c94d8dfe GIT binary patch literal 286 zcmV+(0pb21osdS}$N@nIO&JhG-i)Cm5X6-*bmqKqGY7;QGX&A3sdz^q8!{|FgsJ!j z4xBf0%8()WCgZ^=2Ia6(xJChzH*DIJNjWFvoH}F1?5vo^jY|-4fwDrhD$;ebtgG7ffXot*uyhFlR^aS!ZvX% zWo#nOXM2UCFf?1W^W1JpfErIEMd-h`vrO~~Q)%b%2i@-U^c@1=z{C-9;{lI)S%UXH zN22W|Q66Gvx2oX`pbR#9QGx+J$BfrtOiwFq1dSqGv19NC)a%^v7Yc6znbO2S4L2kt$3`F;w!UsVA0^L?s_g!@D z80UlHK!ly-=k_(JRH-{-#&2dmS_jJv?~TmIZ!~RHq0PQ63TNw*BA{=!ttlt4cI&?~ zcWJbQ&n7D?YXseiSQSdW6n=7RveWYTGlpN|Kv;*qBRLXC_=LH))#tP5Hty6vPw;XGv*21`Lj^3I7?2n$ITxpQ%JnWfJ5BQ#ycy5Yi43d= zpQq3NexH54+TCrR_n%kBEDtX!gE)e1vZC?>g;QimomM@=lyYcmK&=czVroIsqgig= z9p#X0U%jPpBqpwbM{9{0d|a93X~^m1qoqP>Nyv&nQNi_a99aOvgfmwS)k*|}6Xh8K z<^utpg`(82=GSE6F-T#A(e~@v+`?>@`^sW0V;QZIzy_M2%ju|k@;sDz*5(BI5keQR z|1uZr52z`-P1TI@XMT3z^e=UHsHb@W!JZ#ArvJKuOsizntV Y*J>zEO#(I)07cL`YS)#~*k8W+A1t4MVgLXD literal 0 HcmV?d00001 diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/data/sherlock.lzma b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/data/sherlock.lzma new file mode 100644 index 0000000000000000000000000000000000000000..bfdf7fb42ddb5c2afb072dbc54aba8f1a799769d GIT binary patch literal 286 zcmV+(0pb2#004jh|NsC0|NsC003#d9LIXYFwP?GBIrY$L$F%zazXXYem;~e&*alte z=t9^frS~$z0Y?Q3jO;}|``UB5a58=2`h47LM78DJ;cP}*akjkDR-NX_3~%a6;3H-U=5C}zbz;?Y z`w?Vk3);}Fg)Yf_e!e6T*}b8O6e)o?{`Mvt?=KuxIAf?%$0CaWgJ9RGjC~ko=%*WN znHGZ)N30;D3rK?Q8o|l6PDhClZYx>^4hj_<8DPM)Wihg3T!DWCsJdG~w#CO^Lv8+%0dG@;k9VHhB@`n zYR9zu0lx%^g_s277T5+|>*zw*C8hT=!U0DV&AycCun8a5!D%-6+&tgEp)THmoZBu) ztCp6Ko*fFekml`V{&H=ZaWv5YQjF|HKKt5px^Oam;re{sYeco>-QjFTTXD9$(pH`3 z$qaAmO5h`A3FdC3Lv>=+bo&uxXA9cUt%WYhe15(p5!t<=i4-Y;IR5q~8}Bb1RXAg) zQpX~T0E1xHsf>LXW$33HYnc{<5l5^bqYFra?i#_#wN6Kg5N<141r7=o92sE1v}G}} zWL$xN1gN@OK(@ulUqlBbS$X16%i)u<*rUEmr`D@JFCHyYAsGELU@+Ys00000D8O7Y e)y-i50jC1*0ssK(BNQ;P#Ao{g000001X)_CGnBvp literal 0 HcmV?d00001 diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/data/sherlock.zst b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/data/sherlock.zst new file mode 100644 index 0000000000000000000000000000000000000000..c649ac9b1b3c00c94c77fa794a0dbf6b202bc2d1 GIT binary patch literal 249 zcmVD9fTvAX>`s{}h0;l}`@41Eh=@$F8_{9{06HxIEdVS4 zD_QlWFm<8VBMJwIqoB$o0$iIbCJ9>4g;9ykn$Y^7b)oJRIB^RG_-LId5JzFk2fd2v zv)J`D+$ZEIAMypCab4UX^l(j`R~h0XvH69x<$L3Uep#u2nFPr%95Sx;ke3+*0Mc_F zA(i9Q@j21$a@7Z&Xlgzqn8)p^_`KzE4fnc2co4+E9F^NWs~e|$Mw^<8gUY)qHs53< zWVp>bqA*`&#%7(7gq(Q1484BbdIJXl0V|bIi~txcA5{e\x04:\x04 \x00%\x04>\x04;\x04<\x04A\x04" + ); + cmd.arg("Шерлок Холмс"); + eqnice!("foo:Шерлок Холмс\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1 +rgtest!(f1_utf16_explicit, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes( + "foo", + b"\xff\xfe(\x045\x04@\x04;\x04>\x04:\x04 \x00%\x04>\x04;\x04<\x04A\x04" + ); + cmd.arg("-Eutf-16le").arg("Шерлок Холмс"); + eqnice!("foo:Шерлок Холмс\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1 +rgtest!(f1_eucjp, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes( + "foo", + b"\xa7\xba\xa7\xd6\xa7\xe2\xa7\xdd\xa7\xe0\xa7\xdc \xa7\xb7\xa7\xe0\xa7\xdd\xa7\xde\xa7\xe3" + ); + cmd.arg("-Eeuc-jp").arg("Шерлок Холмс"); + eqnice!("foo:Шерлок Холмс\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1 +rgtest!(f1_unknown_encoding, |_: Dir, mut cmd: TestCommand| { + cmd.arg("-Efoobar").assert_non_empty_stderr(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1 +rgtest!(f1_replacement_encoding, |_: Dir, mut cmd: TestCommand| { + cmd.arg("-Ecsiso2022kr").assert_non_empty_stderr(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/7 +rgtest!(f7, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("pat", "Sherlock\nHolmes"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.arg("-fpat").arg("sherlock").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/7 +rgtest!(f7_stdin, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.arg("-f-").pipe(b"Sherlock")); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/20 +rgtest!(f20_no_filename, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--no-filename"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.arg("--no-filename").arg("Sherlock").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/34 +rgtest!(f34_only_matching, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let expected = "\ +sherlock:Sherlock +sherlock:Sherlock +"; + eqnice!(expected, cmd.arg("-o").arg("Sherlock").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/34 +rgtest!(f34_only_matching_line_column, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let expected = "\ +sherlock:1:57:Sherlock +sherlock:3:49:Sherlock +"; + cmd.arg("-o").arg("--column").arg("-n").arg("Sherlock"); + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/45 +rgtest!(f45_relative_cwd, |dir: Dir, mut cmd: TestCommand| { + dir.create(".not-an-ignore", "foo\n/bar"); + dir.create_dir("bar"); + dir.create_dir("baz/bar"); + dir.create_dir("baz/baz/bar"); + dir.create("bar/test", "test"); + dir.create("baz/bar/test", "test"); + dir.create("baz/baz/bar/test", "test"); + dir.create("baz/foo", "test"); + dir.create("baz/test", "test"); + dir.create("foo", "test"); + dir.create("test", "test"); + + cmd.arg("-l").arg("test"); + + // First, get a baseline without applying ignore rules. + let expected = " +bar/test +baz/bar/test +baz/baz/bar/test +baz/foo +baz/test +foo +test +"; + eqnice!(sort_lines(expected), sort_lines(&cmd.stdout())); + + // Now try again with the ignore file activated. + cmd.arg("--ignore-file").arg(".not-an-ignore"); + let expected = " +baz/bar/test +baz/baz/bar/test +baz/test +test +"; + eqnice!(sort_lines(expected), sort_lines(&cmd.stdout())); + + // Now do it again, but inside the baz directory. Since the ignore file + // is interpreted relative to the CWD, this will cause the /bar anchored + // pattern to filter out baz/bar, which is a subtle difference between true + // parent ignore files and manually specified ignore files. + let mut cmd = dir.command(); + cmd.args(&["--ignore-file", "../.not-an-ignore", "-l", "test"]); + cmd.current_dir(dir.path().join("baz")); + let expected = " +baz/bar/test +test +"; + eqnice!(sort_lines(expected), sort_lines(&cmd.stdout())); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/45 +rgtest!(f45_precedence_with_others, |dir: Dir, mut cmd: TestCommand| { + dir.create(".not-an-ignore", "*.log"); + dir.create(".ignore", "!imp.log"); + dir.create("imp.log", "test"); + dir.create("wat.log", "test"); + + cmd.arg("--ignore-file").arg(".not-an-ignore").arg("test"); + eqnice!("imp.log:test\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/45 +rgtest!(f45_precedence_internal, |dir: Dir, mut cmd: TestCommand| { + dir.create(".not-an-ignore1", "*.log"); + dir.create(".not-an-ignore2", "!imp.log"); + dir.create("imp.log", "test"); + dir.create("wat.log", "test"); + + cmd.args(&[ + "--ignore-file", + ".not-an-ignore1", + "--ignore-file", + ".not-an-ignore2", + "test", + ]); + eqnice!("imp.log:test\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/68 +rgtest!(f68_no_ignore_vcs, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "foo"); + dir.create(".ignore", "bar"); + dir.create("foo", "test"); + dir.create("bar", "test"); + + eqnice!("foo:test\n", cmd.arg("--no-ignore-vcs").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/70 +rgtest!(f70_smart_case, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.arg("-S").arg("sherlock").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/89 +rgtest!(f89_files_with_matches, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + cmd.arg("--null").arg("--files-with-matches").arg("Sherlock"); + eqnice!("sherlock\x00", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/89 +rgtest!(f89_files_without_match, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.py", "foo"); + + cmd.arg("--null").arg("--files-without-match").arg("Sherlock"); + eqnice!("file.py\x00", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/89 +rgtest!(f89_count, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + cmd.arg("--null").arg("--count").arg("Sherlock"); + eqnice!("sherlock\x002\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/89 +rgtest!(f89_files, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + eqnice!("sherlock\x00", cmd.arg("--null").arg("--files").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/89 +rgtest!(f89_match, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let expected = "\ +sherlock\x00For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock\x00Holmeses, success in the province of detective work must always +sherlock\x00be, to a very large extent, the result of luck. Sherlock Holmes +sherlock\x00can extract a clew from a wisp of straw or a flake of cigar ash; +"; + eqnice!(expected, cmd.arg("--null").arg("-C1").arg("Sherlock").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/109 +rgtest!(f109_max_depth, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("one"); + dir.create("one/pass", "far"); + dir.create_dir("one/too"); + dir.create("one/too/many", "far"); + + cmd.arg("--maxdepth").arg("2").arg("far"); + eqnice!("one/pass:far\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/124 +rgtest!(f109_case_sensitive_part1, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "tEsT"); + + cmd.arg("--smart-case").arg("--case-sensitive").arg("test").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/124 +rgtest!(f109_case_sensitive_part2, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "tEsT"); + cmd.arg("--ignore-case").arg("--case-sensitive").arg("test").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/129 +rgtest!(f129_matches, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "test\ntest abcdefghijklmnopqrstuvwxyz test"); + + let expected = "foo:test\nfoo:[Omitted long matching line]\n"; + eqnice!(expected, cmd.arg("-M26").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/129 +rgtest!(f129_context, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "test\nabcdefghijklmnopqrstuvwxyz"); + + let expected = "foo:test\nfoo-[Omitted long context line]\n"; + eqnice!(expected, cmd.arg("-M20").arg("-C1").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/129 +rgtest!(f129_replace, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "test\ntest abcdefghijklmnopqrstuvwxyz test"); + + let expected = "foo:foo\nfoo:[Omitted long line with 2 matches]\n"; + eqnice!(expected, cmd.arg("-M26").arg("-rfoo").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/159 +rgtest!(f159_max_count, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "test\ntest"); + + eqnice!("foo:test\n", cmd.arg("-m1").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/159 +rgtest!(f159_max_count_zero, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "test\ntest"); + + cmd.arg("-m0").arg("test").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/196 +rgtest!(f196_persistent_config, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("sherlock").arg("sherlock"); + + // Make sure we get no matches by default. + cmd.assert_err(); + + // Now add our config file, and make sure it impacts ripgrep. + dir.create(".ripgreprc", "--ignore-case"); + cmd.cmd().env("RIPGREP_CONFIG_PATH", ".ripgreprc"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/243 +rgtest!(f243_column_line, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "test"); + + eqnice!("foo:1:1:test\n", cmd.arg("--column").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/263 +rgtest!(f263_sort_files, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "test"); + dir.create("abc", "test"); + dir.create("zoo", "test"); + dir.create("bar", "test"); + + let expected = "abc:test\nbar:test\nfoo:test\nzoo:test\n"; + eqnice!(expected, cmd.arg("--sort-files").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/263 +rgtest!(f263_sort_files_reverse, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "test"); + dir.create("abc", "test"); + dir.create("zoo", "test"); + dir.create("bar", "test"); + + let expected = "zoo:test\nfoo:test\nbar:test\nabc:test\n"; + eqnice!(expected, cmd.arg("--sortr=path").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/275 +rgtest!(f275_pathsep, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("foo"); + dir.create("foo/bar", "test"); + + cmd.arg("test").arg("--path-separator").arg("Z"); + eqnice!("fooZbar:test\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/362 +rgtest!(f362_dfa_size_limit, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + // This should fall back to the nfa engine but should still produce the + // expected result. + cmd.arg("--dfa-size-limit").arg("10").arg(r"For\s").arg("sherlock"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/362 +rgtest!(f362_exceeds_regex_size_limit, |dir: Dir, mut cmd: TestCommand| { + // --regex-size-limit doesn't apply to PCRE2. + if dir.is_pcre2() { + return; + } + cmd.arg("--regex-size-limit").arg("10K").arg(r"[0-9]\w+").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/362 +#[cfg(target_pointer_width = "32")] +rgtest!( + f362_u64_to_narrow_usize_overflow, + |dir: Dir, mut cmd: TestCommand| { + // --dfa-size-limit doesn't apply to PCRE2. + if dir.is_pcre2() { + return; + } + dir.create_size("foo", 1000000); + + // 2^35 * 2^20 is ok for u64, but not for usize + cmd.arg("--dfa-size-limit").arg("34359738368M").arg("--files"); + cmd.assert_err(); + } +); + +// See: https://github.com/BurntSushi/ripgrep/issues/411 +rgtest!( + f411_single_threaded_search_stats, + |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let lines = cmd.arg("-j1").arg("--stats").arg("Sherlock").stdout(); + assert!(lines.contains("Sherlock")); + assert!(lines.contains("2 matched lines")); + assert!(lines.contains("1 files contained matches")); + assert!(lines.contains("1 files searched")); + assert!(lines.contains("seconds")); + } +); + +rgtest!(f411_parallel_search_stats, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock_1", SHERLOCK); + dir.create("sherlock_2", SHERLOCK); + + let lines = cmd.arg("-j2").arg("--stats").arg("Sherlock").stdout(); + assert!(lines.contains("4 matched lines")); + assert!(lines.contains("2 files contained matches")); + assert!(lines.contains("2 files searched")); + assert!(lines.contains("seconds")); +}); + +rgtest!( + f411_single_threaded_quiet_search_stats, + |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let lines = cmd + .arg("--quiet") + .arg("-j1") + .arg("--stats") + .arg("Sherlock") + .stdout(); + assert!(!lines.contains("Sherlock")); + assert!(lines.contains("2 matched lines")); + assert!(lines.contains("1 files contained matches")); + assert!(lines.contains("1 files searched")); + assert!(lines.contains("seconds")); + } +); + +rgtest!(f411_parallel_quiet_search_stats, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock_1", SHERLOCK); + dir.create("sherlock_2", SHERLOCK); + + let lines = + cmd.arg("-j2").arg("--quiet").arg("--stats").arg("Sherlock").stdout(); + assert!(!lines.contains("Sherlock")); + assert!(lines.contains("4 matched lines")); + assert!(lines.contains("2 files contained matches")); + assert!(lines.contains("2 files searched")); + assert!(lines.contains("seconds")); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/416 +rgtest!(f416_crlf, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK_CRLF); + cmd.arg("--crlf").arg(r"Sherlock$").arg("sherlock"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock\r +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/416 +rgtest!(f416_crlf_multiline, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK_CRLF); + cmd.arg("--crlf").arg("-U").arg(r"Sherlock$").arg("sherlock"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock\r +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/416 +rgtest!(f416_crlf_only_matching, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK_CRLF); + cmd.arg("--crlf").arg("-o").arg(r"Sherlock$").arg("sherlock"); + + let expected = "\ +Sherlock\r +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/419 +rgtest!(f419_zero_as_shortcut_for_null, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + cmd.arg("-0").arg("--count").arg("Sherlock"); + eqnice!("sherlock\x002\n", cmd.stdout()); +}); + +rgtest!(f740_passthru, |dir: Dir, mut cmd: TestCommand| { + dir.create("file", "\nfoo\nbar\nfoobar\n\nbaz\n"); + dir.create("patterns", "foo\nbar\n"); + + // We can't assume that the way colour specs are translated to ANSI + // sequences will remain stable, and --replace doesn't currently work with + // pass-through, so for now we don't actually test the match sub-strings + let common_args = &["-n", "--passthru"]; + let foo_expected = "\ +1- +2:foo +3-bar +4:foobar +5- +6-baz +"; + + // With single pattern + cmd.args(common_args).arg("foo").arg("file"); + eqnice!(foo_expected, cmd.stdout()); + + let foo_bar_expected = "\ +1- +2:foo +3:bar +4:foobar +5- +6-baz +"; + + // With multiple -e patterns + let mut cmd = dir.command(); + cmd.args(common_args); + cmd.args(&["-e", "foo", "-e", "bar", "file"]); + eqnice!(foo_bar_expected, cmd.stdout()); + + // With multiple -f patterns + let mut cmd = dir.command(); + cmd.args(common_args); + cmd.args(&["-f", "patterns", "file"]); + eqnice!(foo_bar_expected, cmd.stdout()); + + // -c should override + let mut cmd = dir.command(); + cmd.args(common_args); + cmd.args(&["-c", "foo", "file"]); + eqnice!("2\n", cmd.stdout()); + + let only_foo_expected = "\ +1- +2:foo +3-bar +4:foo +5- +6-baz +"; + + // -o should work + let mut cmd = dir.command(); + cmd.args(common_args); + cmd.args(&["-o", "foo", "file"]); + eqnice!(only_foo_expected, cmd.stdout()); + + let replace_foo_expected = "\ +1- +2:wat +3-bar +4:watbar +5- +6-baz +"; + + // -r should work + let mut cmd = dir.command(); + cmd.args(common_args); + cmd.args(&["-r", "wat", "foo", "file"]); + eqnice!(replace_foo_expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/948 +rgtest!(f948_exit_code_match, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("."); + + cmd.assert_exit_code(0); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/948 +rgtest!(f948_exit_code_no_match, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("NADA"); + + cmd.assert_exit_code(1); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/948 +rgtest!(f948_exit_code_error, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("*"); + + cmd.assert_exit_code(2); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/917 +rgtest!(f917_trim, |dir: Dir, mut cmd: TestCommand| { + const SHERLOCK: &'static str = "\ +zzz + For the Doctor Watsons of this world, as opposed to the Sherlock + Holmeses, success in the province of detective work must always +\tbe, to a very large extent, the result of luck. Sherlock Holmes + can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, + and exhibited clearly, with a label attached. +"; + dir.create("sherlock", SHERLOCK); + cmd.args(&["-n", "-B1", "-A2", "--trim", "Holmeses", "sherlock"]); + + let expected = "\ +2-For the Doctor Watsons of this world, as opposed to the Sherlock +3:Holmeses, success in the province of detective work must always +4-be, to a very large extent, the result of luck. Sherlock Holmes +5-can extract a clew from a wisp of straw or a flake of cigar ash; +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/917 +// +// This is like f917_trim, except this tests that trimming occurs even when the +// whitespace is part of a match. +rgtest!(f917_trim_match, |dir: Dir, mut cmd: TestCommand| { + const SHERLOCK: &'static str = "\ +zzz + For the Doctor Watsons of this world, as opposed to the Sherlock + Holmeses, success in the province of detective work must always +\tbe, to a very large extent, the result of luck. Sherlock Holmes + can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, + and exhibited clearly, with a label attached. +"; + dir.create("sherlock", SHERLOCK); + cmd.args(&["-n", "-B1", "-A2", "--trim", r"\s+Holmeses", "sherlock"]); + + let expected = "\ +2-For the Doctor Watsons of this world, as opposed to the Sherlock +3:Holmeses, success in the province of detective work must always +4-be, to a very large extent, the result of luck. Sherlock Holmes +5-can extract a clew from a wisp of straw or a flake of cigar ash; +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(f917_trim_multi_standard, |dir: Dir, mut cmd: TestCommand| { + const HAYSTACK: &str = " 0123456789abcdefghijklmnopqrstuvwxyz"; + dir.create("haystack", HAYSTACK); + cmd.args(&["--multiline", "--trim", "-r$0", "--no-filename", r"a\n?bc"]); + + let expected = "0123456789abcdefghijklmnopqrstuvwxyz\n"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(f917_trim_max_columns_normal, |dir: Dir, mut cmd: TestCommand| { + const HAYSTACK: &str = " 0123456789abcdefghijklmnopqrstuvwxyz"; + dir.create("haystack", HAYSTACK); + cmd.args(&[ + "--trim", + "--max-columns-preview", + "-M8", + "--no-filename", + "abc", + ]); + + let expected = "01234567 [... omitted end of long line]\n"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(f917_trim_max_columns_matches, |dir: Dir, mut cmd: TestCommand| { + const HAYSTACK: &str = " 0123456789abcdefghijklmnopqrstuvwxyz"; + dir.create("haystack", HAYSTACK); + cmd.args(&[ + "--trim", + "--max-columns-preview", + "-M8", + "--color=always", + "--colors=path:none", + "--no-filename", + "abc", + ]); + + let expected = "01234567 [... 1 more match]\n"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!( + f917_trim_max_columns_multi_standard, + |dir: Dir, mut cmd: TestCommand| { + const HAYSTACK: &str = " 0123456789abcdefghijklmnopqrstuvwxyz"; + dir.create("haystack", HAYSTACK); + cmd.args(&[ + "--multiline", + "--trim", + "--max-columns-preview", + "-M8", + // Force the "slow" printing path without actually + // putting colors in the output. + "--color=always", + "--colors=path:none", + "--no-filename", + r"a\n?bc", + ]); + + let expected = "01234567 [... 1 more match]\n"; + eqnice!(expected, cmd.stdout()); + } +); + +rgtest!( + f917_trim_max_columns_multi_only_matching, + |dir: Dir, mut cmd: TestCommand| { + const HAYSTACK: &str = " 0123456789abcdefghijklmnopqrstuvwxyz"; + dir.create("haystack", HAYSTACK); + cmd.args(&[ + "--multiline", + "--trim", + "--max-columns-preview", + "-M8", + "--only-matching", + "--no-filename", + r".*a\n?bc.*", + ]); + + let expected = "01234567 [... 0 more matches]\n"; + eqnice!(expected, cmd.stdout()); + } +); + +rgtest!( + f917_trim_max_columns_multi_per_match, + |dir: Dir, mut cmd: TestCommand| { + const HAYSTACK: &str = " 0123456789abcdefghijklmnopqrstuvwxyz"; + dir.create("haystack", HAYSTACK); + cmd.args(&[ + "--multiline", + "--trim", + "--max-columns-preview", + "-M8", + "--vimgrep", + "--no-filename", + r".*a\n?bc.*", + ]); + + let expected = "1:1:01234567 [... 0 more matches]\n"; + eqnice!(expected, cmd.stdout()); + } +); + +// See: https://github.com/BurntSushi/ripgrep/issues/993 +rgtest!(f993_null_data, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "foo\x00bar\x00\x00\x00baz\x00"); + cmd.arg("--null-data").arg(r".+").arg("test"); + + // If we just used -a instead of --null-data, then the result would include + // all NUL bytes. + let expected = "foo\x00bar\x00baz\x00"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1078 +// +// N.B. There are many more tests in the grep-printer crate. +rgtest!(f1078_max_columns_preview1, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&[ + "-M46", + "--max-columns-preview", + "exhibited|dusted|has to have it", + ]); + + let expected = "\ +sherlock:but Doctor Watson has to have it taken out for [... omitted end of long line] +sherlock:and exhibited clearly, with a label attached. +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(f1078_max_columns_preview2, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&[ + "-M43", + "--max-columns-preview", + // Doing a replacement forces ripgrep to show the number of remaining + // matches. Normally, this happens by default when printing a tty with + // colors. + "-rxxx", + "exhibited|dusted|has to have it", + ]); + + let expected = "\ +sherlock:but Doctor Watson xxx taken out for him and [... 1 more match] +sherlock:and xxx clearly, with a label attached. +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1138 +rgtest!(f1138_no_ignore_dot, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "foo"); + dir.create(".ignore", "bar"); + dir.create(".fzf-ignore", "quux"); + dir.create("foo", ""); + dir.create("bar", ""); + dir.create("quux", ""); + + cmd.arg("--sort").arg("path").arg("--files"); + eqnice!("quux\n", cmd.stdout()); + eqnice!("bar\nquux\n", cmd.arg("--no-ignore-dot").stdout()); + eqnice!("bar\n", cmd.arg("--ignore-file").arg(".fzf-ignore").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1155 +rgtest!(f1155_auto_hybrid_regex, |dir: Dir, mut cmd: TestCommand| { + // No sense in testing a hybrid regex engine with only one engine! + if !dir.is_pcre2() { + return; + } + + dir.create("sherlock", SHERLOCK); + cmd.arg("--no-pcre2").arg("--auto-hybrid-regex").arg(r"(?<=the )Sherlock"); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1207 +// +// Tests if without encoding 'none' flag null bytes are consumed by automatic +// encoding detection. +rgtest!(f1207_auto_encoding, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("foo", b"\xFF\xFE\x00\x62"); + cmd.arg("-a").arg("\\x00").arg("foo"); + cmd.assert_exit_code(1); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1207 +// +// Tests if encoding 'none' flag does treat file as raw bytes +rgtest!(f1207_ignore_encoding, |dir: Dir, mut cmd: TestCommand| { + // PCRE2 chokes on this test because it can't search invalid non-UTF-8 + // and the point of this test is to search raw UTF-16. + if dir.is_pcre2() { + return; + } + + dir.create_bytes("foo", b"\xFF\xFE\x00\x62"); + cmd.arg("--encoding").arg("none").arg("-a").arg("\\x00").arg("foo"); + eqnice!("\u{FFFD}\u{FFFD}\x00b\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1414 +rgtest!(f1414_no_require_git, |dir: Dir, mut cmd: TestCommand| { + dir.create(".gitignore", "foo"); + dir.create("foo", ""); + dir.create("bar", ""); + + let stdout = cmd.args(&["--sort", "path", "--files"]).stdout(); + eqnice!("bar\nfoo\n", stdout); + + let stdout = + cmd.args(&["--sort", "path", "--files", "--no-require-git"]).stdout(); + eqnice!("bar\n", stdout); + + let stdout = cmd + .args(&[ + "--sort", + "path", + "--files", + "--no-require-git", + "--require-git", + ]) + .stdout(); + eqnice!("bar\nfoo\n", stdout); +}); + +// See: https://github.com/BurntSushi/ripgrep/pull/1420 +rgtest!(f1420_no_ignore_exclude, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git/info"); + dir.create(".git/info/exclude", "foo"); + dir.create("bar", ""); + dir.create("foo", ""); + + cmd.arg("--sort").arg("path").arg("--files"); + eqnice!("bar\n", cmd.stdout()); + eqnice!("bar\nfoo\n", cmd.arg("--no-ignore-exclude").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/pull/1466 +rgtest!(f1466_no_ignore_files, |dir: Dir, mut cmd: TestCommand| { + dir.create(".myignore", "bar"); + dir.create("bar", ""); + dir.create("foo", ""); + + // Test that --no-ignore-files disables --ignore-file. + // And that --ignore-files overrides --no-ignore-files. + cmd.arg("--sort").arg("path").arg("--files"); + eqnice!("bar\nfoo\n", cmd.stdout()); + eqnice!("foo\n", cmd.arg("--ignore-file").arg(".myignore").stdout()); + eqnice!("bar\nfoo\n", cmd.arg("--no-ignore-files").stdout()); + eqnice!("foo\n", cmd.arg("--ignore-files").stdout()); + + // Test that the -u flag does not disable --ignore-file. + let mut cmd = dir.command(); + cmd.arg("--sort").arg("path").arg("--files"); + cmd.arg("--ignore-file").arg(".myignore"); + eqnice!("foo\n", cmd.stdout()); + eqnice!("foo\n", cmd.arg("-u").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/pull/2361 +rgtest!(f2361_sort_nested_files, |dir: Dir, mut cmd: TestCommand| { + use std::{thread::sleep, time::Duration}; + + if crate::util::is_cross() { + return; + } + dir.create("foo", "1"); + sleep(Duration::from_millis(100)); + dir.create_dir("dir"); + sleep(Duration::from_millis(100)); + dir.create(dir.path().join("dir").join("bar"), "1"); + + cmd.arg("--sort").arg("accessed").arg("--files"); + eqnice!("foo\ndir/bar\n", cmd.stdout()); + + dir.create("foo", "2"); + sleep(Duration::from_millis(100)); + dir.create(dir.path().join("dir").join("bar"), "2"); + sleep(Duration::from_millis(100)); + + cmd.arg("--sort").arg("accessed").arg("--files"); + eqnice!("foo\ndir/bar\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1404 +rgtest!(f1404_nothing_searched_warning, |dir: Dir, mut cmd: TestCommand| { + dir.create(".ignore", "ignored-dir/**"); + dir.create_dir("ignored-dir"); + dir.create("ignored-dir/foo", "needle"); + + // Test that, if ripgrep searches only ignored folders/files, then there + // is a non-zero exit code. + cmd.arg("needle"); + cmd.assert_err(); + + // Test that we actually get an error message that we expect. + let output = cmd.raw_output(); + let stderr = String::from_utf8_lossy(&output.stderr); + let expected = "\ + rg: No files were searched, which means ripgrep probably applied \ + a filter you didn't expect.\n\ + Running with --debug will show why files are being skipped.\n\ + "; + eqnice!(expected, stderr); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1404 +rgtest!(f1404_nothing_searched_ignored, |dir: Dir, mut cmd: TestCommand| { + dir.create(".ignore", "ignored-dir/**"); + dir.create_dir("ignored-dir"); + dir.create("ignored-dir/foo", "needle"); + + // Test that, if ripgrep searches only ignored folders/files, then there + // is a non-zero exit code. + cmd.arg("--no-messages").arg("needle"); + cmd.assert_err(); + + // But since --no-messages is given, there should not be any error message + // printed. + let output = cmd.raw_output(); + let stderr = String::from_utf8_lossy(&output.stderr); + let expected = ""; + eqnice!(expected, stderr); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1842 +rgtest!(f1842_field_context_separator, |dir: Dir, _: TestCommand| { + dir.create("sherlock", SHERLOCK); + + // Test the default. + let base = &["-n", "-A1", "Doctor Watsons", "sherlock"]; + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +2-Holmeses, success in the province of detective work must always +"; + eqnice!(expected, dir.command().args(base).stdout()); + + // Test that it can be overridden. + let mut args = vec!["--field-context-separator", "!"]; + args.extend(base); + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +2!Holmeses, success in the province of detective work must always +"; + eqnice!(expected, dir.command().args(&args).stdout()); + + // Test that it can use multiple bytes. + let mut args = vec!["--field-context-separator", "!!"]; + args.extend(base); + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +2!!Holmeses, success in the province of detective work must always +"; + eqnice!(expected, dir.command().args(&args).stdout()); + + // Test that unescaping works. + let mut args = vec!["--field-context-separator", r"\x7F"]; + args.extend(base); + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +2\x7FHolmeses, success in the province of detective work must always +"; + eqnice!(expected, dir.command().args(&args).stdout()); + + // Test that an empty separator is OK. + let mut args = vec!["--field-context-separator", r""]; + args.extend(base); + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +2Holmeses, success in the province of detective work must always +"; + eqnice!(expected, dir.command().args(&args).stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1842 +rgtest!(f1842_field_match_separator, |dir: Dir, _: TestCommand| { + dir.create("sherlock", SHERLOCK); + + // Test the default. + let base = &["-n", "Doctor Watsons", "sherlock"]; + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +"; + eqnice!(expected, dir.command().args(base).stdout()); + + // Test that it can be overridden. + let mut args = vec!["--field-match-separator", "!"]; + args.extend(base); + let expected = "\ +1!For the Doctor Watsons of this world, as opposed to the Sherlock +"; + eqnice!(expected, dir.command().args(&args).stdout()); + + // Test that it can use multiple bytes. + let mut args = vec!["--field-match-separator", "!!"]; + args.extend(base); + let expected = "\ +1!!For the Doctor Watsons of this world, as opposed to the Sherlock +"; + eqnice!(expected, dir.command().args(&args).stdout()); + + // Test that unescaping works. + let mut args = vec!["--field-match-separator", r"\x7F"]; + args.extend(base); + let expected = "\ +1\x7FFor the Doctor Watsons of this world, as opposed to the Sherlock +"; + eqnice!(expected, dir.command().args(&args).stdout()); + + // Test that an empty separator is OK. + let mut args = vec!["--field-match-separator", r""]; + args.extend(base); + let expected = "\ +1For the Doctor Watsons of this world, as opposed to the Sherlock +"; + eqnice!(expected, dir.command().args(&args).stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/2288 +rgtest!(f2288_context_partial_override, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "1\n2\n3\n4\n5\n6\n7\n8\n9\n"); + cmd.args(&["-C1", "-A2", "5", "test"]); + eqnice!("4\n5\n6\n7\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/2288 +rgtest!( + f2288_context_partial_override_rev, + |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "1\n2\n3\n4\n5\n6\n7\n8\n9\n"); + cmd.args(&["-A2", "-C1", "5", "test"]); + eqnice!("4\n5\n6\n7\n", cmd.stdout()); + } +); + +rgtest!(no_context_sep, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "foo\nctx\nbar\nctx\nfoo\nctx"); + cmd.args(&["-A1", "--no-context-separator", "foo", "test"]); + eqnice!("foo\nctx\nfoo\nctx\n", cmd.stdout()); +}); + +rgtest!(no_context_sep_overrides, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "foo\nctx\nbar\nctx\nfoo\nctx"); + cmd.args(&[ + "-A1", + "--context-separator", + "AAA", + "--no-context-separator", + "foo", + "test", + ]); + eqnice!("foo\nctx\nfoo\nctx\n", cmd.stdout()); +}); + +rgtest!(no_context_sep_overridden, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "foo\nctx\nbar\nctx\nfoo\nctx"); + cmd.args(&[ + "-A1", + "--no-context-separator", + "--context-separator", + "AAA", + "foo", + "test", + ]); + eqnice!("foo\nctx\nAAA\nfoo\nctx\n", cmd.stdout()); +}); + +rgtest!(context_sep, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "foo\nctx\nbar\nctx\nfoo\nctx"); + cmd.args(&["-A1", "--context-separator", "AAA", "foo", "test"]); + eqnice!("foo\nctx\nAAA\nfoo\nctx\n", cmd.stdout()); +}); + +rgtest!(context_sep_default, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "foo\nctx\nbar\nctx\nfoo\nctx"); + cmd.args(&["-A1", "foo", "test"]); + eqnice!("foo\nctx\n--\nfoo\nctx\n", cmd.stdout()); +}); + +rgtest!(context_sep_empty, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "foo\nctx\nbar\nctx\nfoo\nctx"); + cmd.args(&["-A1", "--context-separator", "", "foo", "test"]); + eqnice!("foo\nctx\n\nfoo\nctx\n", cmd.stdout()); +}); + +rgtest!(no_unicode, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "δ"); + cmd.arg("-i").arg("--no-unicode").arg("Δ").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1790 +rgtest!(stop_on_nonmatch, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "line1\nline2\nline3\nline4\nline5"); + cmd.args(&["--stop-on-nonmatch", "[235]"]); + eqnice!("test:line2\ntest:line3\n", cmd.stdout()); +}); diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/hay.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/hay.rs new file mode 100644 index 000000000..15a53b150 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/hay.rs @@ -0,0 +1,17 @@ +pub const SHERLOCK: &'static str = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + +pub const SHERLOCK_CRLF: &'static str = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock\r +Holmeses, success in the province of detective work must always\r +be, to a very large extent, the result of luck. Sherlock Holmes\r +can extract a clew from a wisp of straw or a flake of cigar ash;\r +but Doctor Watson has to have it taken out for him and dusted,\r +and exhibited clearly, with a label attached.\r +"; diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/json.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/json.rs new file mode 100644 index 000000000..86d8518a4 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/json.rs @@ -0,0 +1,373 @@ +use std::time; + +use serde_derive::Deserialize; +use serde_json as json; + +use crate::hay::{SHERLOCK, SHERLOCK_CRLF}; +use crate::util::{Dir, TestCommand}; + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[serde(tag = "type", content = "data")] +#[serde(rename_all = "snake_case")] +enum Message { + Begin(Begin), + End(End), + Match(Match), + Context(Context), + Summary(Summary), +} + +impl Message { + fn unwrap_begin(&self) -> Begin { + match *self { + Message::Begin(ref x) => x.clone(), + ref x => panic!("expected Message::Begin but got {:?}", x), + } + } + + fn unwrap_end(&self) -> End { + match *self { + Message::End(ref x) => x.clone(), + ref x => panic!("expected Message::End but got {:?}", x), + } + } + + fn unwrap_match(&self) -> Match { + match *self { + Message::Match(ref x) => x.clone(), + ref x => panic!("expected Message::Match but got {:?}", x), + } + } + + fn unwrap_context(&self) -> Context { + match *self { + Message::Context(ref x) => x.clone(), + ref x => panic!("expected Message::Context but got {:?}", x), + } + } + + fn unwrap_summary(&self) -> Summary { + match *self { + Message::Summary(ref x) => x.clone(), + ref x => panic!("expected Message::Summary but got {:?}", x), + } + } +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +struct Begin { + path: Option, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +struct End { + path: Option, + binary_offset: Option, + stats: Stats, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +struct Summary { + elapsed_total: Duration, + stats: Stats, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +struct Match { + path: Option, + lines: Data, + line_number: Option, + absolute_offset: u64, + submatches: Vec, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +struct Context { + path: Option, + lines: Data, + line_number: Option, + absolute_offset: u64, + submatches: Vec, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +struct SubMatch { + #[serde(rename = "match")] + m: Data, + start: usize, + end: usize, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[serde(untagged)] +enum Data { + Text { text: String }, + // This variant is used when the data isn't valid UTF-8. The bytes are + // base64 encoded, so using a String here is OK. + Bytes { bytes: String }, +} + +impl Data { + fn text(s: &str) -> Data { + Data::Text { text: s.to_string() } + } + fn bytes(s: &str) -> Data { + Data::Bytes { bytes: s.to_string() } + } +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +struct Stats { + elapsed: Duration, + searches: u64, + searches_with_match: u64, + bytes_searched: u64, + bytes_printed: u64, + matched_lines: u64, + matches: u64, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +struct Duration { + #[serde(flatten)] + duration: time::Duration, + human: String, +} + +/// Decode JSON Lines into a Vec. If there was an error decoding, +/// this function panics. +fn json_decode(jsonlines: &str) -> Vec { + json::Deserializer::from_str(jsonlines) + .into_iter() + .collect::, _>>() + .unwrap() +} + +rgtest!(basic, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--json").arg("-B1").arg("Sherlock Holmes").arg("sherlock"); + + let msgs = json_decode(&cmd.stdout()); + + assert_eq!( + msgs[0].unwrap_begin(), + Begin { path: Some(Data::text("sherlock")) } + ); + assert_eq!( + msgs[1].unwrap_context(), + Context { + path: Some(Data::text("sherlock")), + lines: Data::text( + "Holmeses, success in the province of \ + detective work must always\n", + ), + line_number: Some(2), + absolute_offset: 65, + submatches: vec![], + } + ); + assert_eq!( + msgs[2].unwrap_match(), + Match { + path: Some(Data::text("sherlock")), + lines: Data::text( + "be, to a very large extent, the result of luck. \ + Sherlock Holmes\n", + ), + line_number: Some(3), + absolute_offset: 129, + submatches: vec![SubMatch { + m: Data::text("Sherlock Holmes"), + start: 48, + end: 63, + },], + } + ); + assert_eq!(msgs[3].unwrap_end().path, Some(Data::text("sherlock"))); + assert_eq!(msgs[3].unwrap_end().binary_offset, None); + assert_eq!(msgs[4].unwrap_summary().stats.searches_with_match, 1); + assert_eq!(msgs[4].unwrap_summary().stats.bytes_printed, 494); +}); + +rgtest!(quiet_stats, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--json") + .arg("--quiet") + .arg("--stats") + .arg("Sherlock Holmes") + .arg("sherlock"); + + let msgs = json_decode(&cmd.stdout()); + assert_eq!(msgs[0].unwrap_summary().stats.searches_with_match, 1); + assert_eq!(msgs[0].unwrap_summary().stats.bytes_searched, 367); +}); + +#[cfg(unix)] +rgtest!(notutf8, |dir: Dir, mut cmd: TestCommand| { + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + + // This test does not work with PCRE2 because PCRE2 does not support the + // `u` flag. + if dir.is_pcre2() { + return; + } + // macOS doesn't like this either... sigh. + if cfg!(target_os = "macos") { + return; + } + + let name = &b"foo\xFFbar"[..]; + let contents = &b"quux\xFFbaz"[..]; + + // APFS does not support creating files with invalid UTF-8 bytes, so just + // skip the test if we can't create our file. Presumably we don't need this + // check if we're already skipping it on macOS, but maybe other file + // systems won't like this test either? + if !dir.try_create_bytes(OsStr::from_bytes(name), contents).is_ok() { + return; + } + cmd.arg("--json").arg(r"(?-u)\xFF"); + + let msgs = json_decode(&cmd.stdout()); + + assert_eq!( + msgs[0].unwrap_begin(), + Begin { path: Some(Data::bytes("Zm9v/2Jhcg==")) } + ); + assert_eq!( + msgs[1].unwrap_match(), + Match { + path: Some(Data::bytes("Zm9v/2Jhcg==")), + lines: Data::bytes("cXV1eP9iYXo="), + line_number: Some(1), + absolute_offset: 0, + submatches: vec![SubMatch { + m: Data::bytes("/w=="), + start: 4, + end: 5, + },], + } + ); +}); + +rgtest!(notutf8_file, |dir: Dir, mut cmd: TestCommand| { + use std::ffi::OsStr; + + // This test does not work with PCRE2 because PCRE2 does not support the + // `u` flag. + if dir.is_pcre2() { + return; + } + + let name = "foo"; + let contents = &b"quux\xFFbaz"[..]; + + // APFS does not support creating files with invalid UTF-8 bytes, so just + // skip the test if we can't create our file. + if !dir.try_create_bytes(OsStr::new(name), contents).is_ok() { + return; + } + cmd.arg("--json").arg(r"(?-u)\xFF"); + + let msgs = json_decode(&cmd.stdout()); + + assert_eq!( + msgs[0].unwrap_begin(), + Begin { path: Some(Data::text("foo")) } + ); + assert_eq!( + msgs[1].unwrap_match(), + Match { + path: Some(Data::text("foo")), + lines: Data::bytes("cXV1eP9iYXo="), + line_number: Some(1), + absolute_offset: 0, + submatches: vec![SubMatch { + m: Data::bytes("/w=="), + start: 4, + end: 5, + },], + } + ); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/416 +// +// This test in particular checks that our match does _not_ include the `\r` +// even though the '$' may be rewritten as '(?:\r??$)' and could thus include +// `\r` in the match. +rgtest!(crlf, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK_CRLF); + cmd.arg("--json").arg("--crlf").arg(r"Sherlock$").arg("sherlock"); + + let msgs = json_decode(&cmd.stdout()); + + assert_eq!( + msgs[1].unwrap_match().submatches[0].clone(), + SubMatch { m: Data::text("Sherlock"), start: 56, end: 64 }, + ); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1095 +// +// This test checks that we don't drop the \r\n in a matching line when --crlf +// mode is enabled. +rgtest!(r1095_missing_crlf, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "test\r\n"); + + // Check without --crlf flag. + let msgs = json_decode(&cmd.arg("--json").arg("test").stdout()); + assert_eq!(msgs.len(), 4); + assert_eq!(msgs[1].unwrap_match().lines, Data::text("test\r\n")); + + // Now check with --crlf flag. + let msgs = json_decode(&cmd.arg("--crlf").stdout()); + assert_eq!(msgs.len(), 4); + assert_eq!(msgs[1].unwrap_match().lines, Data::text("test\r\n")); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1095 +// +// This test checks that we don't return empty submatches when matching a `\n` +// in CRLF mode. +rgtest!(r1095_crlf_empty_match, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "test\r\n\n"); + + // Check without --crlf flag. + let msgs = json_decode(&cmd.arg("-U").arg("--json").arg("\n").stdout()); + assert_eq!(msgs.len(), 4); + + let m = msgs[1].unwrap_match(); + assert_eq!(m.lines, Data::text("test\r\n\n")); + assert_eq!(m.submatches[0].m, Data::text("\n")); + assert_eq!(m.submatches[1].m, Data::text("\n")); + + // Now check with --crlf flag. + let msgs = json_decode(&cmd.arg("--crlf").stdout()); + assert_eq!(msgs.len(), 4); + + let m = msgs[1].unwrap_match(); + assert_eq!(m.lines, Data::text("test\r\n\n")); + assert_eq!(m.submatches[0].m, Data::text("\n")); + assert_eq!(m.submatches[1].m, Data::text("\n")); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1412 +rgtest!(r1412_look_behind_match_missing, |dir: Dir, mut cmd: TestCommand| { + // Only PCRE2 supports look-around. + if !dir.is_pcre2() { + return; + } + + dir.create("test", "foo\nbar\n"); + + let msgs = json_decode( + &cmd.arg("-U").arg("--json").arg(r"(?<=foo\n)bar").stdout(), + ); + assert_eq!(msgs.len(), 4); + + let m = msgs[1].unwrap_match(); + assert_eq!(m.lines, Data::text("bar\n")); + assert_eq!(m.submatches.len(), 1); +}); diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/macros.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/macros.rs new file mode 100644 index 000000000..7e5958c37 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/macros.rs @@ -0,0 +1,61 @@ +#[macro_export] +macro_rules! rgtest { + ($name:ident, $fun:expr) => { + #[test] + fn $name() { + let (dir, cmd) = crate::util::setup(stringify!($name)); + $fun(dir, cmd); + + if cfg!(feature = "pcre2") { + let (dir, cmd) = crate::util::setup_pcre2(stringify!($name)); + $fun(dir, cmd); + } + } + }; +} + +#[macro_export] +macro_rules! eqnice { + ($expected:expr, $got:expr) => { + let expected = &*$expected; + let got = &*$got; + if expected != got { + panic!(" +printed outputs differ! + +expected: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +{} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +got: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +{} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +", expected, got); + } + } +} + +#[macro_export] +macro_rules! eqnice_repr { + ($expected:expr, $got:expr) => { + let expected = &*$expected; + let got = &*$got; + if expected != got { + panic!(" +printed outputs differ! + +expected: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +{:?} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +got: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +{:?} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +", expected, got); + } + } +} diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/misc.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/misc.rs new file mode 100644 index 000000000..f1091b924 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/misc.rs @@ -0,0 +1,1130 @@ +use crate::hay::SHERLOCK; +use crate::util::{cmd_exists, sort_lines, Dir, TestCommand}; + +// This file contains "miscellaneous" tests that were either written before +// features were tracked more explicitly, or were simply written without +// linking them to a specific issue number. We should try to minimize the +// addition of more tests in this file and instead add them to either the +// regression test suite or the feature test suite (found in regression.rs and +// feature.rs, respectively). + +rgtest!(single_file, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.arg("Sherlock").arg("sherlock").stdout()); +}); + +rgtest!(dir, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.arg("Sherlock").stdout()); +}); + +rgtest!(line_numbers, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +3:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.arg("-n").arg("Sherlock").arg("sherlock").stdout()); +}); + +rgtest!(columns, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--column").arg("Sherlock").arg("sherlock"); + + let expected = "\ +1:57:For the Doctor Watsons of this world, as opposed to the Sherlock +3:49:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(with_filename, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-H").arg("Sherlock").arg("sherlock"); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(with_heading, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&[ + // This forces the issue since --with-filename is disabled by default + // when searching one file. + "--with-filename", + "--heading", + "Sherlock", + "sherlock", + ]); + + let expected = "\ +sherlock +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(with_heading_default, |dir: Dir, mut cmd: TestCommand| { + // Search two or more and get --with-filename enabled by default. + // Use -j1 to get deterministic results. + dir.create("sherlock", SHERLOCK); + dir.create("foo", "Sherlock Holmes lives on Baker Street."); + cmd.arg("-j1").arg("--heading").arg("Sherlock"); + + let expected = "\ +foo +Sherlock Holmes lives on Baker Street. + +sherlock +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(sort_lines(expected), sort_lines(&cmd.stdout())); +}); + +rgtest!(inverted, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-v").arg("Sherlock").arg("sherlock"); + + let expected = "\ +Holmeses, success in the province of detective work must always +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(inverted_line_numbers, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-n").arg("-v").arg("Sherlock").arg("sherlock"); + + let expected = "\ +2:Holmeses, success in the province of detective work must always +4:can extract a clew from a wisp of straw or a flake of cigar ash; +5:but Doctor Watson has to have it taken out for him and dusted, +6:and exhibited clearly, with a label attached. +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(case_insensitive, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-i").arg("sherlock").arg("sherlock"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(word, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-w").arg("as").arg("sherlock"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(word_period, |dir: Dir, mut cmd: TestCommand| { + dir.create("haystack", "..."); + cmd.arg("-ow").arg(".").arg("haystack"); + + let expected = "\ +. +. +. +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(line, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&[ + "-x", + "Watson|and exhibited clearly, with a label attached.", + "sherlock", + ]); + + let expected = "\ +and exhibited clearly, with a label attached. +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(literal, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file", "blib\n()\nblab\n"); + cmd.arg("-F").arg("()").arg("file"); + + eqnice!("()\n", cmd.stdout()); +}); + +rgtest!(quiet, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-q").arg("Sherlock").arg("sherlock"); + + assert!(cmd.stdout().is_empty()); +}); + +rgtest!(replace, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-r").arg("FooBar").arg("Sherlock").arg("sherlock"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the FooBar +be, to a very large extent, the result of luck. FooBar Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(replace_groups, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&["-r", "$2, $1", "([A-Z][a-z]+) ([A-Z][a-z]+)", "sherlock"]); + + let expected = "\ +For the Watsons, Doctor of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Holmes, Sherlock +but Watson, Doctor has to have it taken out for him and dusted, +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(replace_named_groups, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&[ + "-r", + "$last, $first", + "(?P[A-Z][a-z]+) (?P[A-Z][a-z]+)", + "sherlock", + ]); + + let expected = "\ +For the Watsons, Doctor of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Holmes, Sherlock +but Watson, Doctor has to have it taken out for him and dusted, +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(replace_with_only_matching, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-o").arg("-r").arg("$1").arg(r"of (\w+)").arg("sherlock"); + + let expected = "\ +this +detective +luck +straw +cigar +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(file_types, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.py", "Sherlock"); + dir.create("file.rs", "Sherlock"); + cmd.arg("-t").arg("rust").arg("Sherlock"); + + eqnice!("file.rs:Sherlock\n", cmd.stdout()); +}); + +rgtest!(file_types_all, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.py", "Sherlock"); + cmd.arg("-t").arg("all").arg("Sherlock"); + + eqnice!("file.py:Sherlock\n", cmd.stdout()); +}); + +rgtest!(file_types_negate, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.remove("sherlock"); + dir.create("file.py", "Sherlock"); + dir.create("file.rs", "Sherlock"); + cmd.arg("-T").arg("rust").arg("Sherlock"); + + eqnice!("file.py:Sherlock\n", cmd.stdout()); +}); + +rgtest!(file_types_negate_all, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.py", "Sherlock"); + cmd.arg("-T").arg("all").arg("Sherlock"); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(file_type_clear, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.py", "Sherlock"); + dir.create("file.rs", "Sherlock"); + cmd.arg("--type-clear").arg("rust").arg("-t").arg("rust").arg("Sherlock"); + + cmd.assert_non_empty_stderr(); +}); + +rgtest!(file_type_add, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.py", "Sherlock"); + dir.create("file.rs", "Sherlock"); + dir.create("file.wat", "Sherlock"); + cmd.args(&["--type-add", "wat:*.wat", "-t", "wat", "Sherlock"]); + + eqnice!("file.wat:Sherlock\n", cmd.stdout()); +}); + +rgtest!(file_type_add_compose, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.py", "Sherlock"); + dir.create("file.rs", "Sherlock"); + dir.create("file.wat", "Sherlock"); + cmd.args(&[ + "--type-add", + "wat:*.wat", + "--type-add", + "combo:include:wat,py", + "-t", + "combo", + "Sherlock", + ]); + + let expected = "\ +file.py:Sherlock +file.wat:Sherlock +"; + eqnice!(expected, sort_lines(&cmd.stdout())); +}); + +rgtest!(glob, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.py", "Sherlock"); + dir.create("file.rs", "Sherlock"); + cmd.arg("-g").arg("*.rs").arg("Sherlock"); + + eqnice!("file.rs:Sherlock\n", cmd.stdout()); +}); + +rgtest!(glob_negate, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.remove("sherlock"); + dir.create("file.py", "Sherlock"); + dir.create("file.rs", "Sherlock"); + cmd.arg("-g").arg("!*.rs").arg("Sherlock"); + + eqnice!("file.py:Sherlock\n", cmd.stdout()); +}); + +rgtest!(glob_case_insensitive, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.HTML", "Sherlock"); + cmd.arg("--iglob").arg("*.html").arg("Sherlock"); + + eqnice!("file.HTML:Sherlock\n", cmd.stdout()); +}); + +rgtest!(glob_case_sensitive, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file1.HTML", "Sherlock"); + dir.create("file2.html", "Sherlock"); + cmd.arg("--glob").arg("*.html").arg("Sherlock"); + + eqnice!("file2.html:Sherlock\n", cmd.stdout()); +}); + +rgtest!(glob_always_case_insensitive, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.HTML", "Sherlock"); + cmd.args(&["--glob-case-insensitive", "--glob", "*.html", "Sherlock"]); + + eqnice!("file.HTML:Sherlock\n", cmd.stdout()); +}); + +rgtest!(byte_offset_only_matching, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-b").arg("-o").arg("Sherlock"); + + let expected = "\ +sherlock:56:Sherlock +sherlock:177:Sherlock +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(count, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--count").arg("Sherlock"); + + let expected = "sherlock:2\n"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(count_matches, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--count-matches").arg("the"); + + let expected = "sherlock:4\n"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(count_matches_inverted, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--count-matches").arg("--invert-match").arg("Sherlock"); + + let expected = "sherlock:4\n"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(count_matches_via_only, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--count").arg("--only-matching").arg("the"); + + let expected = "sherlock:4\n"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(include_zero, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&["--count", "--include-zero", "nada"]); + cmd.assert_err(); + + let output = cmd.raw_output(); + let stdout = String::from_utf8_lossy(&output.stdout); + let expected = "sherlock:0\n"; + + eqnice!(expected, stdout); +}); + +rgtest!(include_zero_override, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&["--count", "--include-zero", "--no-include-zero", "nada"]); + cmd.assert_err(); + + let output = cmd.raw_output(); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!(stdout.is_empty()); +}); + +rgtest!(files_with_matches, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--files-with-matches").arg("Sherlock"); + + let expected = "sherlock\n"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(files_without_match, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("file.py", "foo"); + cmd.arg("--files-without-match").arg("Sherlock"); + + let expected = "file.py\n"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(after_context, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-A").arg("1").arg("Sherlock").arg("sherlock"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew from a wisp of straw or a flake of cigar ash; +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(after_context_line_numbers, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-A").arg("1").arg("-n").arg("Sherlock").arg("sherlock"); + + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +2-Holmeses, success in the province of detective work must always +3:be, to a very large extent, the result of luck. Sherlock Holmes +4-can extract a clew from a wisp of straw or a flake of cigar ash; +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(before_context, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-B").arg("1").arg("Sherlock").arg("sherlock"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(before_context_line_numbers, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-B").arg("1").arg("-n").arg("Sherlock").arg("sherlock"); + + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +2-Holmeses, success in the province of detective work must always +3:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(context, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-C").arg("1").arg("world|attached").arg("sherlock"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +-- +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(context_line_numbers, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("-C").arg("1").arg("-n").arg("world|attached").arg("sherlock"); + + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +2-Holmeses, success in the province of detective work must always +-- +5-but Doctor Watson has to have it taken out for him and dusted, +6:and exhibited clearly, with a label attached. +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(max_filesize_parse_error_length, |_: Dir, mut cmd: TestCommand| { + cmd.arg("--max-filesize").arg("44444444444444444444"); + cmd.assert_non_empty_stderr(); +}); + +rgtest!(max_filesize_parse_error_suffix, |_: Dir, mut cmd: TestCommand| { + cmd.arg("--max-filesize").arg("45k"); + cmd.assert_non_empty_stderr(); +}); + +rgtest!(max_filesize_parse_no_suffix, |dir: Dir, mut cmd: TestCommand| { + dir.create_size("foo", 40); + dir.create_size("bar", 60); + cmd.arg("--max-filesize").arg("50").arg("--files"); + + eqnice!("foo\n", cmd.stdout()); +}); + +rgtest!(max_filesize_parse_k_suffix, |dir: Dir, mut cmd: TestCommand| { + dir.create_size("foo", 3048); + dir.create_size("bar", 4100); + cmd.arg("--max-filesize").arg("4K").arg("--files"); + + eqnice!("foo\n", cmd.stdout()); +}); + +rgtest!(max_filesize_parse_m_suffix, |dir: Dir, mut cmd: TestCommand| { + dir.create_size("foo", 1000000); + dir.create_size("bar", 1400000); + cmd.arg("--max-filesize").arg("1M").arg("--files"); + + eqnice!("foo\n", cmd.stdout()); +}); + +rgtest!(max_filesize_suffix_overflow, |dir: Dir, mut cmd: TestCommand| { + dir.create_size("foo", 1000000); + + // 2^35 * 2^30 would otherwise overflow + cmd.arg("--max-filesize").arg("34359738368G").arg("--files"); + cmd.assert_non_empty_stderr(); +}); + +rgtest!(ignore_hidden, |dir: Dir, mut cmd: TestCommand| { + dir.create(".sherlock", SHERLOCK); + cmd.arg("Sherlock").assert_err(); +}); + +rgtest!(no_ignore_hidden, |dir: Dir, mut cmd: TestCommand| { + dir.create(".sherlock", SHERLOCK); + cmd.arg("--hidden").arg("Sherlock"); + + let expected = "\ +.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(ignore_git, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create_dir(".git"); + dir.create(".gitignore", "sherlock\n"); + cmd.arg("Sherlock"); + + cmd.assert_err(); +}); + +rgtest!(ignore_generic, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create(".ignore", "sherlock\n"); + cmd.arg("Sherlock"); + + cmd.assert_err(); +}); + +rgtest!(ignore_ripgrep, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create(".rgignore", "sherlock\n"); + cmd.arg("Sherlock"); + + cmd.assert_err(); +}); + +rgtest!(no_ignore, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create(".gitignore", "sherlock\n"); + cmd.arg("--no-ignore").arg("Sherlock"); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(ignore_git_parent, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "sherlock\n"); + dir.create_dir("foo"); + dir.create("foo/sherlock", SHERLOCK); + cmd.arg("Sherlock"); + + // Even though we search in foo/, which has no .gitignore, ripgrep will + // traverse parent directories and respect the gitignore files found. + cmd.current_dir(dir.path().join("foo")); + cmd.assert_err(); +}); + +rgtest!(ignore_git_parent_stop, |dir: Dir, mut cmd: TestCommand| { + // This tests that searching parent directories for .gitignore files stops + // after it sees a .git directory. To test this, we create this directory + // hierarchy: + // + // .gitignore (contains `sherlock`) + // foo/ + // .git/ + // bar/ + // sherlock + // + // And we perform the search inside `foo/bar/`. ripgrep will stop looking + // for .gitignore files after it sees `foo/.git/`, and therefore not + // respect the top-level `.gitignore` containing `sherlock`. + dir.create(".gitignore", "sherlock\n"); + dir.create_dir("foo"); + dir.create_dir("foo/.git"); + dir.create_dir("foo/bar"); + dir.create("foo/bar/sherlock", SHERLOCK); + cmd.arg("Sherlock"); + cmd.current_dir(dir.path().join("foo").join("bar")); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +// Like ignore_git_parent_stop, but with a .git file instead of a .git +// directory. +rgtest!(ignore_git_parent_stop_file, |dir: Dir, mut cmd: TestCommand| { + // This tests that searching parent directories for .gitignore files stops + // after it sees a .git *file*. A .git file is used for submodules. To test + // this, we create this directory hierarchy: + // + // .gitignore (contains `sherlock`) + // foo/ + // .git + // bar/ + // sherlock + // + // And we perform the search inside `foo/bar/`. ripgrep will stop looking + // for .gitignore files after it sees `foo/.git`, and therefore not + // respect the top-level `.gitignore` containing `sherlock`. + dir.create(".gitignore", "sherlock\n"); + dir.create_dir("foo"); + dir.create("foo/.git", ""); + dir.create_dir("foo/bar"); + dir.create("foo/bar/sherlock", SHERLOCK); + cmd.arg("Sherlock"); + cmd.current_dir(dir.path().join("foo").join("bar")); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(ignore_ripgrep_parent_no_stop, |dir: Dir, mut cmd: TestCommand| { + // This is like the `ignore_git_parent_stop` test, except it checks that + // ripgrep *doesn't* stop checking for .rgignore files. + dir.create(".rgignore", "sherlock\n"); + dir.create_dir("foo"); + dir.create_dir("foo/.git"); + dir.create_dir("foo/bar"); + dir.create("foo/bar/sherlock", SHERLOCK); + cmd.arg("Sherlock"); + cmd.current_dir(dir.path().join("foo").join("bar")); + + // The top-level .rgignore applies. + cmd.assert_err(); +}); + +rgtest!(no_parent_ignore_git, |dir: Dir, mut cmd: TestCommand| { + // Set up a directory hierarchy like this: + // + // .git/ + // .gitignore + // foo/ + // .gitignore + // sherlock + // watson + // + // Where `.gitignore` contains `sherlock` and `foo/.gitignore` contains + // `watson`. + // + // Now *do the search* from the foo directory. By default, ripgrep will + // search parent directories for .gitignore files. The --no-ignore-parent + // flag should prevent that. At the same time, the `foo/.gitignore` file + // will still be respected (since the search is happening in `foo/`). + // + // In other words, we should only see results from `sherlock`, not from + // `watson`. + dir.create_dir(".git"); + dir.create(".gitignore", "sherlock\n"); + dir.create_dir("foo"); + dir.create("foo/.gitignore", "watson\n"); + dir.create("foo/sherlock", SHERLOCK); + dir.create("foo/watson", SHERLOCK); + cmd.arg("--no-ignore-parent").arg("Sherlock"); + cmd.current_dir(dir.path().join("foo")); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(symlink_nofollow, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("foo"); + dir.create_dir("foo/bar"); + dir.link_dir("foo/baz", "foo/bar/baz"); + dir.create_dir("foo/baz"); + dir.create("foo/baz/sherlock", SHERLOCK); + cmd.arg("Sherlock"); + cmd.current_dir(dir.path().join("foo/bar")); + + cmd.assert_err(); +}); + +#[cfg(not(windows))] +rgtest!(symlink_follow, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("foo"); + dir.create_dir("foo/bar"); + dir.create_dir("foo/baz"); + dir.create("foo/baz/sherlock", SHERLOCK); + dir.link_dir("foo/baz", "foo/bar/baz"); + cmd.arg("-L").arg("Sherlock"); + cmd.current_dir(dir.path().join("foo/bar")); + + let expected = "\ +baz/sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +baz/sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(unrestricted1, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create(".gitignore", "sherlock\n"); + cmd.arg("-u").arg("Sherlock"); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(unrestricted2, |dir: Dir, mut cmd: TestCommand| { + dir.create(".sherlock", SHERLOCK); + cmd.arg("-uu").arg("Sherlock"); + + let expected = "\ +.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(unrestricted3, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("hay", "foo\x00bar\nfoo\x00baz\n"); + cmd.arg("-uuu").arg("foo"); + + let expected = "\ +hay: binary file matches (found \"\\0\" byte around offset 3) +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(vimgrep, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--vimgrep").arg("Sherlock|Watson"); + + let expected = "\ +sherlock:1:16:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:1:57:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:3:49:be, to a very large extent, the result of luck. Sherlock Holmes +sherlock:5:12:but Doctor Watson has to have it taken out for him and dusted, +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(vimgrep_no_line, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--vimgrep").arg("-N").arg("Sherlock|Watson"); + + let expected = "\ +sherlock:16:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:57:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:49:be, to a very large extent, the result of luck. Sherlock Holmes +sherlock:12:but Doctor Watson has to have it taken out for him and dusted, +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(vimgrep_no_line_no_column, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.arg("--vimgrep").arg("-N").arg("--no-column").arg("Sherlock|Watson"); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +sherlock:but Doctor Watson has to have it taken out for him and dusted, +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(preprocessing, |dir: Dir, mut cmd: TestCommand| { + if !cmd_exists("xzcat") { + return; + } + + dir.create_bytes("sherlock.xz", include_bytes!("./data/sherlock.xz")); + cmd.arg("--pre").arg("xzcat").arg("Sherlock").arg("sherlock.xz"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(preprocessing_glob, |dir: Dir, mut cmd: TestCommand| { + if !cmd_exists("xzcat") { + return; + } + + dir.create("sherlock", SHERLOCK); + dir.create_bytes("sherlock.xz", include_bytes!("./data/sherlock.xz")); + cmd.args(&["--pre", "xzcat", "--pre-glob", "*.xz", "Sherlock"]); + + let expected = "\ +sherlock.xz:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock.xz:be, to a very large extent, the result of luck. Sherlock Holmes +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(sort_lines(expected), sort_lines(&cmd.stdout())); +}); + +rgtest!(compressed_gzip, |dir: Dir, mut cmd: TestCommand| { + if !cmd_exists("gzip") { + return; + } + + dir.create_bytes("sherlock.gz", include_bytes!("./data/sherlock.gz")); + cmd.arg("-z").arg("Sherlock").arg("sherlock.gz"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(compressed_bzip2, |dir: Dir, mut cmd: TestCommand| { + if !cmd_exists("bzip2") { + return; + } + + dir.create_bytes("sherlock.bz2", include_bytes!("./data/sherlock.bz2")); + cmd.arg("-z").arg("Sherlock").arg("sherlock.bz2"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(compressed_xz, |dir: Dir, mut cmd: TestCommand| { + if !cmd_exists("xz") { + return; + } + + dir.create_bytes("sherlock.xz", include_bytes!("./data/sherlock.xz")); + cmd.arg("-z").arg("Sherlock").arg("sherlock.xz"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(compressed_lz4, |dir: Dir, mut cmd: TestCommand| { + if !cmd_exists("lz4") { + return; + } + + dir.create_bytes("sherlock.lz4", include_bytes!("./data/sherlock.lz4")); + cmd.arg("-z").arg("Sherlock").arg("sherlock.lz4"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(compressed_lzma, |dir: Dir, mut cmd: TestCommand| { + if !cmd_exists("xz") { + return; + } + + dir.create_bytes("sherlock.lzma", include_bytes!("./data/sherlock.lzma")); + cmd.arg("-z").arg("Sherlock").arg("sherlock.lzma"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(compressed_brotli, |dir: Dir, mut cmd: TestCommand| { + if !cmd_exists("brotli") { + return; + } + + dir.create_bytes("sherlock.br", include_bytes!("./data/sherlock.br")); + cmd.arg("-z").arg("Sherlock").arg("sherlock.br"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(compressed_zstd, |dir: Dir, mut cmd: TestCommand| { + if !cmd_exists("zstd") { + return; + } + + dir.create_bytes("sherlock.zst", include_bytes!("./data/sherlock.zst")); + cmd.arg("-z").arg("Sherlock").arg("sherlock.zst"); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(compressed_uncompress, |dir: Dir, mut cmd: TestCommand| { + if !cmd_exists("uncompress") { + return; + } + + dir.create_bytes("sherlock.Z", include_bytes!("./data/sherlock.Z")); + cmd.arg("-z").arg("Sherlock").arg("sherlock.Z"); + + let expected = "\ + For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(compressed_failing_gzip, |dir: Dir, mut cmd: TestCommand| { + if !cmd_exists("gzip") { + return; + } + + dir.create("sherlock.gz", SHERLOCK); + cmd.arg("-z").arg("Sherlock").arg("sherlock.gz"); + + cmd.assert_non_empty_stderr(); +}); + +rgtest!(binary_convert, |dir: Dir, mut cmd: TestCommand| { + dir.create("file", "foo\x00bar\nfoo\x00baz\n"); + cmd.arg("--no-mmap").arg("foo").arg("file"); + + let expected = "\ +binary file matches (found \"\\0\" byte around offset 3) +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(binary_convert_mmap, |dir: Dir, mut cmd: TestCommand| { + dir.create("file", "foo\x00bar\nfoo\x00baz\n"); + cmd.arg("--mmap").arg("foo").arg("file"); + + let expected = "\ +binary file matches (found \"\\0\" byte around offset 3) +"; + eqnice!(expected, cmd.stdout()); +}); + +rgtest!(binary_quit, |dir: Dir, mut cmd: TestCommand| { + dir.create("file", "foo\x00bar\nfoo\x00baz\n"); + cmd.arg("--no-mmap").arg("foo").arg("-gfile"); + cmd.assert_err(); +}); + +rgtest!(binary_quit_mmap, |dir: Dir, mut cmd: TestCommand| { + dir.create("file", "foo\x00bar\nfoo\x00baz\n"); + cmd.arg("--mmap").arg("foo").arg("-gfile"); + cmd.assert_err(); +}); + +// The following two tests show a discrepancy in search results between +// searching with memory mapped files and stream searching. Stream searching +// uses a heuristic (that GNU grep also uses) where NUL bytes are replaced with +// the EOL terminator, which tends to avoid allocating large amounts of memory +// for really long "lines." The memory map searcher has no need to worry about +// such things, and more than that, it would be pretty hard for it to match the +// semantics of streaming search in this case. +// +// Binary files with lots of NULs aren't really part of the use case of ripgrep +// (or any other grep-like tool for that matter), so we shouldn't feel too bad +// about it. +rgtest!(binary_search_mmap, |dir: Dir, mut cmd: TestCommand| { + dir.create("file", "foo\x00bar\nfoo\x00baz\n"); + cmd.arg("-a").arg("--mmap").arg("foo").arg("file"); + eqnice!("foo\x00bar\nfoo\x00baz\n", cmd.stdout()); +}); + +rgtest!(binary_search_no_mmap, |dir: Dir, mut cmd: TestCommand| { + dir.create("file", "foo\x00bar\nfoo\x00baz\n"); + cmd.arg("-a").arg("--no-mmap").arg("foo").arg("file"); + eqnice!("foo\x00bar\nfoo\x00baz\n", cmd.stdout()); +}); + +rgtest!(files, |dir: Dir, mut cmd: TestCommand| { + dir.create("file", ""); + dir.create_dir("dir"); + dir.create("dir/file", ""); + cmd.arg("--files"); + + eqnice!(sort_lines("file\ndir/file\n"), sort_lines(&cmd.stdout())); +}); + +rgtest!(type_list, |_: Dir, mut cmd: TestCommand| { + cmd.arg("--type-list"); + // This can change over time, so just make sure we print something. + assert!(!cmd.stdout().is_empty()); +}); + +// The following series of tests seeks to test all permutations of ripgrep's +// sorted queries. +// +// They all rely on this setup function, which sets up this particular file +// structure with a particular creation order: +// ├── a # 1 +// ├── b # 4 +// └── dir # 2 +// ├── c # 3 +// └── d # 5 +// +// This order is important when sorting them by system time-stamps. +fn sort_setup(dir: Dir) { + use std::{thread::sleep, time::Duration}; + + let sub_dir = dir.path().join("dir"); + dir.create("a", "test"); + sleep(Duration::from_millis(100)); + dir.create_dir(&sub_dir); + sleep(Duration::from_millis(100)); + dir.create(sub_dir.join("c"), "test"); + sleep(Duration::from_millis(100)); + dir.create("b", "test"); + sleep(Duration::from_millis(100)); + dir.create(sub_dir.join("d"), "test"); +} + +rgtest!(sort_files, |dir: Dir, mut cmd: TestCommand| { + sort_setup(dir); + let expected = "a:test\nb:test\ndir/c:test\ndir/d:test\n"; + eqnice!(expected, cmd.args(["--sort", "path", "test"]).stdout()); +}); + +rgtest!(sort_accessed, |dir: Dir, mut cmd: TestCommand| { + if crate::util::is_cross() { + return; + } + sort_setup(dir); + let expected = "a:test\ndir/c:test\nb:test\ndir/d:test\n"; + eqnice!(expected, cmd.args(["--sort", "accessed", "test"]).stdout()); +}); + +rgtest!(sortr_accessed, |dir: Dir, mut cmd: TestCommand| { + if crate::util::is_cross() { + return; + } + sort_setup(dir); + let expected = "dir/d:test\nb:test\ndir/c:test\na:test\n"; + eqnice!(expected, cmd.args(["--sortr", "accessed", "test"]).stdout()); +}); diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/multiline.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/multiline.rs new file mode 100644 index 000000000..d084c96ba --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/multiline.rs @@ -0,0 +1,121 @@ +use crate::hay::SHERLOCK; +use crate::util::{Dir, TestCommand}; + +// This tests that multiline matches that span multiple lines, but where +// multiple matches may begin and end on the same line work correctly. +rgtest!(overlap1, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "xxx\nabc\ndefxxxabc\ndefxxx\nxxx"); + cmd.arg("-n").arg("-U").arg("abc\ndef").arg("test"); + eqnice!("2:abc\n3:defxxxabc\n4:defxxx\n", cmd.stdout()); +}); + +// Like overlap1, but tests the case where one match ends at precisely the same +// location at which the next match begins. +rgtest!(overlap2, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "xxx\nabc\ndefabc\ndefxxx\nxxx"); + cmd.arg("-n").arg("-U").arg("abc\ndef").arg("test"); + eqnice!("2:abc\n3:defabc\n4:defxxx\n", cmd.stdout()); +}); + +// Tests that even in a multiline search, a '.' does not match a newline. +rgtest!(dot_no_newline, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&["-n", "-U", "of this world.+detective work", "sherlock"]); + cmd.assert_err(); +}); + +// Tests that the --multiline-dotall flag causes '.' to match a newline. +rgtest!(dot_all, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&[ + "-n", + "-U", + "--multiline-dotall", + "of this world.+detective work", + "sherlock", + ]); + + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +2:Holmeses, success in the province of detective work must always +"; + eqnice!(expected, cmd.stdout()); +}); + +// Tests that --only-matching works in multiline mode. +rgtest!(only_matching, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&[ + "-n", + "-U", + "--only-matching", + r"Watson|Sherlock\p{Any}+?Holmes", + "sherlock", + ]); + + let expected = "\ +1:Watson +1:Sherlock +2:Holmes +3:Sherlock Holmes +5:Watson +"; + eqnice!(expected, cmd.stdout()); +}); + +// Tests that --vimgrep works in multiline mode. +// +// In particular, we test that only the first line of each match is printed, +// even when a match spans multiple lines. +// +// See: https://github.com/BurntSushi/ripgrep/issues/1866 +rgtest!(vimgrep, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&[ + "-n", + "-U", + "--vimgrep", + r"Watson|Sherlock\p{Any}+?Holmes", + "sherlock", + ]); + + let expected = "\ +sherlock:1:16:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:1:57:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:3:49:be, to a very large extent, the result of luck. Sherlock Holmes +sherlock:5:12:but Doctor Watson has to have it taken out for him and dusted, +"; + eqnice!(expected, cmd.stdout()); +}); + +// Tests that multiline search works when reading from stdin. This is an +// important test because multiline search must read the entire contents of +// what it is searching into memory before executing the search. +rgtest!(stdin, |_: Dir, mut cmd: TestCommand| { + cmd.args(&["-n", "-U", r"of this world\p{Any}+?detective work"]); + let expected = "\ +1:For the Doctor Watsons of this world, as opposed to the Sherlock +2:Holmeses, success in the province of detective work must always +"; + eqnice!(expected, cmd.pipe(SHERLOCK.as_bytes())); +}); + +// Test that multiline search and contextual matches work. +rgtest!(context, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + cmd.args(&[ + "-n", + "-U", + "-C1", + r"detective work\p{Any}+?result of luck", + "sherlock", + ]); + + let expected = "\ +1-For the Doctor Watsons of this world, as opposed to the Sherlock +2:Holmeses, success in the province of detective work must always +3:be, to a very large extent, the result of luck. Sherlock Holmes +4-can extract a clew from a wisp of straw or a flake of cigar ash; +"; + eqnice!(expected, cmd.stdout()); +}); diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/regression.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/regression.rs new file mode 100644 index 000000000..e28af4a31 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/regression.rs @@ -0,0 +1,1219 @@ +use crate::hay::SHERLOCK; +use crate::util::{sort_lines, Dir, TestCommand}; + +// See: https://github.com/BurntSushi/ripgrep/issues/16 +rgtest!(r16, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "ghi/"); + dir.create_dir("ghi"); + dir.create_dir("def/ghi"); + dir.create("ghi/toplevel.txt", "xyz"); + dir.create("def/ghi/subdir.txt", "xyz"); + + cmd.arg("xyz").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/25 +rgtest!(r25, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "/llvm/"); + dir.create_dir("src/llvm"); + dir.create("src/llvm/foo", "test"); + + cmd.arg("test"); + eqnice!("src/llvm/foo:test\n", cmd.stdout()); + + cmd.current_dir(dir.path().join("src")); + eqnice!("llvm/foo:test\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/30 +rgtest!(r30, |dir: Dir, mut cmd: TestCommand| { + dir.create(".gitignore", "vendor/**\n!vendor/manifest"); + dir.create_dir("vendor"); + dir.create("vendor/manifest", "test"); + + eqnice!("vendor/manifest:test\n", cmd.arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/49 +rgtest!(r49, |dir: Dir, mut cmd: TestCommand| { + dir.create(".gitignore", "foo/bar"); + dir.create_dir("test/foo/bar"); + dir.create("test/foo/bar/baz", "test"); + + cmd.arg("xyz").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/50 +rgtest!(r50, |dir: Dir, mut cmd: TestCommand| { + dir.create(".gitignore", "XXX/YYY/"); + dir.create_dir("abc/def/XXX/YYY"); + dir.create_dir("ghi/XXX/YYY"); + dir.create("abc/def/XXX/YYY/bar", "test"); + dir.create("ghi/XXX/YYY/bar", "test"); + + cmd.arg("xyz").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/64 +rgtest!(r64, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("dir"); + dir.create_dir("foo"); + dir.create("dir/abc", ""); + dir.create("foo/abc", ""); + + eqnice!("foo/abc\n", cmd.arg("--files").arg("foo").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/65 +rgtest!(r65, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "a/"); + dir.create_dir("a"); + dir.create("a/foo", "xyz"); + dir.create("a/bar", "xyz"); + + cmd.arg("xyz").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/67 +rgtest!(r67, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "/*\n!/dir"); + dir.create_dir("dir"); + dir.create_dir("foo"); + dir.create("foo/bar", "test"); + dir.create("dir/bar", "test"); + + eqnice!("dir/bar:test\n", cmd.arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/87 +rgtest!(r87, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "foo\n**no-vcs**"); + dir.create("foo", "test"); + + cmd.arg("test").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/90 +rgtest!(r90, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "!.foo"); + dir.create(".foo", "test"); + + eqnice!(".foo:test\n", cmd.arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/93 +rgtest!(r93, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "192.168.1.1"); + + eqnice!("foo:192.168.1.1\n", cmd.arg(r"(\d{1,3}\.){3}\d{1,3}").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/99 +rgtest!(r99, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo1", "test"); + dir.create("foo2", "zzz"); + dir.create("bar", "test"); + + eqnice!( + sort_lines("bar\ntest\n\nfoo1\ntest\n"), + sort_lines(&cmd.arg("-j1").arg("--heading").arg("test").stdout()) + ); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/105 +rgtest!(r105_part1, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "zztest"); + + eqnice!("foo:1:3:zztest\n", cmd.arg("--vimgrep").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/105 +rgtest!(r105_part2, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "zztest"); + + eqnice!("foo:1:3:zztest\n", cmd.arg("--column").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/127 +rgtest!(r127, |dir: Dir, mut cmd: TestCommand| { + // Set up a directory hierarchy like this: + // + // .gitignore + // foo/ + // sherlock + // watson + // + // Where `.gitignore` contains `foo/sherlock`. + // + // ripgrep should ignore 'foo/sherlock' giving us results only from + // 'foo/watson' but on Windows ripgrep will include both 'foo/sherlock' and + // 'foo/watson' in the search results. + dir.create_dir(".git"); + dir.create(".gitignore", "foo/sherlock\n"); + dir.create_dir("foo"); + dir.create("foo/sherlock", SHERLOCK); + dir.create("foo/watson", SHERLOCK); + + let expected = "\ +foo/watson:For the Doctor Watsons of this world, as opposed to the Sherlock +foo/watson:be, to a very large extent, the result of luck. Sherlock Holmes +"; + assert_eq!(expected, cmd.arg("Sherlock").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/128 +rgtest!(r128, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("foo", b"01234567\x0b\n\x0b\n\x0b\n\x0b\nx"); + + eqnice!("foo:5:x\n", cmd.arg("-n").arg("x").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/131 +// +// TODO(burntsushi): Darwin doesn't like this test for some reason. Probably +// due to the weird file path. +#[cfg(not(target_os = "macos"))] +rgtest!(r131, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "TopÑapa"); + dir.create("TopÑapa", "test"); + + cmd.arg("test").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/137 +// +// TODO(burntsushi): Figure out how to make this test work on Windows. Right +// now it gives "access denied" errors when trying to create a file symlink. +// For now, disable test on Windows. +#[cfg(not(windows))] +rgtest!(r137, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.link_file("sherlock", "sym1"); + dir.link_file("sherlock", "sym2"); + + let expected = "\ +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +sym1:For the Doctor Watsons of this world, as opposed to the Sherlock +sym1:be, to a very large extent, the result of luck. Sherlock Holmes +sym2:For the Doctor Watsons of this world, as opposed to the Sherlock +sym2:be, to a very large extent, the result of luck. Sherlock Holmes +"; + cmd.arg("-j1").arg("Sherlock").arg("./").arg("sym1").arg("sym2"); + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/156 +rgtest!(r156, |dir: Dir, mut cmd: TestCommand| { + let expected = r#"#parse('widgets/foo_bar_macros.vm') +#parse ( 'widgets/mobile/foo_bar_macros.vm' ) +#parse ("widgets/foobarhiddenformfields.vm") +#parse ( "widgets/foo_bar_legal.vm" ) +#include( 'widgets/foo_bar_tips.vm' ) +#include('widgets/mobile/foo_bar_macros.vm') +#include ("widgets/mobile/foo_bar_resetpw.vm") +#parse('widgets/foo-bar-macros.vm') +#parse ( 'widgets/mobile/foo-bar-macros.vm' ) +#parse ("widgets/foo-bar-hiddenformfields.vm") +#parse ( "widgets/foo-bar-legal.vm" ) +#include( 'widgets/foo-bar-tips.vm' ) +#include('widgets/mobile/foo-bar-macros.vm') +#include ("widgets/mobile/foo-bar-resetpw.vm") +"#; + dir.create("testcase.txt", expected); + + cmd.arg("-N"); + cmd.arg(r#"#(?:parse|include)\s*\(\s*(?:"|')[./A-Za-z_-]+(?:"|')"#); + cmd.arg("testcase.txt"); + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/184 +rgtest!(r184, |dir: Dir, mut cmd: TestCommand| { + dir.create(".gitignore", ".*"); + dir.create_dir("foo/bar"); + dir.create("foo/bar/baz", "test"); + + cmd.arg("test"); + eqnice!("foo/bar/baz:test\n", cmd.stdout()); + + cmd.current_dir(dir.path().join("./foo/bar")); + eqnice!("baz:test\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/199 +rgtest!(r199, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "tEsT"); + + eqnice!("foo:tEsT\n", cmd.arg("--smart-case").arg(r"\btest\b").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/206 +rgtest!(r206, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("foo"); + dir.create("foo/bar.txt", "test"); + + cmd.arg("test").arg("-g").arg("*.txt"); + eqnice!("foo/bar.txt:test\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/210 +#[cfg(unix)] +rgtest!(r210, |dir: Dir, mut cmd: TestCommand| { + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + + let badutf8 = OsStr::from_bytes(&b"foo\xffbar"[..]); + + // APFS does not support creating files with invalid UTF-8 bytes. + // https://github.com/BurntSushi/ripgrep/issues/559 + if dir.try_create(badutf8, "test").is_ok() { + cmd.arg("-H").arg("test").arg(badutf8); + assert_eq!(b"foo\xffbar:test\n".to_vec(), cmd.output().stdout); + } +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/228 +rgtest!(r228, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("foo"); + + cmd.arg("--ignore-file").arg("foo").arg("test").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/229 +rgtest!(r229, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "economie"); + + cmd.arg("-S").arg("[E]conomie").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/251 +rgtest!(r251, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "привет\nПривет\nПрИвЕт"); + + let expected = "foo:привет\nfoo:Привет\nfoo:ПрИвЕт\n"; + eqnice!(expected, cmd.arg("-i").arg("привет").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/256 +#[cfg(not(windows))] +rgtest!(r256, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("bar"); + dir.create("bar/baz", "test"); + dir.link_dir("bar", "foo"); + + eqnice!("foo/baz:test\n", cmd.arg("test").arg("foo").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/256 +#[cfg(not(windows))] +rgtest!(r256_j1, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("bar"); + dir.create("bar/baz", "test"); + dir.link_dir("bar", "foo"); + + eqnice!("foo/baz:test\n", cmd.arg("-j1").arg("test").arg("foo").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/270 +rgtest!(r270, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "-test"); + + cmd.arg("-e").arg("-test"); + eqnice!("foo:-test\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/279 +rgtest!(r279, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "test"); + + eqnice!("", cmd.arg("-q").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/391 +rgtest!(r391, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create("lock", ""); + dir.create("bar.py", ""); + dir.create(".git/packed-refs", ""); + dir.create(".git/description", ""); + + cmd.args(&[ + "--no-ignore", + "--hidden", + "--follow", + "--files", + "--glob", + "!{.git,node_modules,plugged}/**", + "--glob", + "*.{js,json,php,md,styl,scss,sass,pug,html,config,py,cpp,c,go,hs}", + ]); + eqnice!("bar.py\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/405 +rgtest!(r405, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("foo/bar"); + dir.create_dir("bar/foo"); + dir.create("foo/bar/file1.txt", "test"); + dir.create("bar/foo/file2.txt", "test"); + + cmd.arg("-g").arg("!/foo/**").arg("test"); + eqnice!("bar/foo/file2.txt:test\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/428 +#[cfg(not(windows))] +rgtest!(r428_color_context_path, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", "foo\nbar"); + cmd.args(&[ + "-A1", + "-H", + "--no-heading", + "-N", + "--colors=match:none", + "--color=always", + "--hyperlink-format=", + "foo", + ]); + + let expected = format!( + "{colored_path}:foo\n{colored_path}-bar\n", + colored_path = + "\x1b\x5b\x30\x6d\x1b\x5b\x33\x35\x6dsherlock\x1b\x5b\x30\x6d" + ); + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/428 +rgtest!(r428_unrecognized_style, |dir: Dir, mut cmd: TestCommand| { + dir.create("file.txt", "Sherlock"); + + cmd.arg("--colors=match:style:").arg("Sherlock"); + cmd.assert_err(); + + let output = cmd.raw_output(); + let stderr = String::from_utf8_lossy(&output.stderr); + let expected = "\ +rg: error parsing flag --colors: \ +unrecognized style attribute ''. Choose from: nobold, bold, nointense, \ +intense, nounderline, underline. +"; + eqnice!(expected, stderr); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/451 +rgtest!(r451_only_matching_as_in_issue, |dir: Dir, mut cmd: TestCommand| { + dir.create("digits.txt", "1 2 3\n"); + cmd.arg("--only-matching").arg(r"[0-9]+").arg("digits.txt"); + + let expected = "\ +1 +2 +3 +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/451 +rgtest!(r451_only_matching, |dir: Dir, mut cmd: TestCommand| { + dir.create("digits.txt", "1 2 3\n123\n"); + cmd.args(&["--only-matching", "--column", r"[0-9]", "digits.txt"]); + + let expected = "\ +1:1:1 +1:3:2 +1:5:3 +2:1:1 +2:2:2 +2:3:3 +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/483 +rgtest!(r483_matching_no_stdout, |dir: Dir, mut cmd: TestCommand| { + dir.create("file.py", ""); + cmd.arg("--quiet").arg("--files").arg("--glob").arg("*.py"); + eqnice!("", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/483 +rgtest!(r483_non_matching_exit_code, |dir: Dir, mut cmd: TestCommand| { + dir.create("file.rs", ""); + cmd.arg("--quiet").arg("--files").arg("--glob").arg("*.py"); + cmd.assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/493 +rgtest!(r493, |dir: Dir, mut cmd: TestCommand| { + dir.create("input.txt", "peshwaship 're seminomata"); + + cmd.arg("-o").arg(r"\b 're \b").arg("input.txt"); + assert_eq!(" 're \n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/506 +rgtest!(r506_word_not_parenthesized, |dir: Dir, mut cmd: TestCommand| { + dir.create("wb.txt", "min minimum amin\nmax maximum amax"); + cmd.arg("-w").arg("-o").arg("min|max").arg("wb.txt"); + eqnice!("min\nmax\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/553 +rgtest!(r553_switch, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + cmd.arg("-i").arg("sherlock"); + eqnice!(expected, cmd.stdout()); + + // Repeat the `i` flag to make sure everything still works. + eqnice!(expected, cmd.arg("-i").stdout()); +}); + +rgtest!(r553_flag, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +-- +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached. +"; + cmd.arg("-C").arg("1").arg(r"world|attached").arg("sherlock"); + eqnice!(expected, cmd.stdout()); + + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +and exhibited clearly, with a label attached. +"; + eqnice!(expected, cmd.arg("-C").arg("0").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/568 +rgtest!(r568_leading_hyphen_option_args, |dir: Dir, mut cmd: TestCommand| { + dir.create("file", "foo bar -baz\n"); + cmd.arg("-e-baz").arg("-e").arg("-baz").arg("file"); + eqnice!("foo bar -baz\n", cmd.stdout()); + + let mut cmd = dir.command(); + cmd.arg("-rni").arg("bar").arg("file"); + eqnice!("foo ni -baz\n", cmd.stdout()); + + let mut cmd = dir.command(); + cmd.arg("-r").arg("-n").arg("-i").arg("bar").arg("file"); + eqnice!("foo -n -baz\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/599 +// +// This test used to check that we emitted color escape sequences even for +// empty matches, but with the addition of the JSON output format, clients no +// longer need to rely on escape sequences to parse matches. Therefore, we no +// longer emit useless escape sequences. +rgtest!(r599, |dir: Dir, mut cmd: TestCommand| { + dir.create("input.txt", "\n\ntest\n"); + cmd.args(&[ + "--color", + "ansi", + "--colors", + "path:none", + "--colors", + "line:none", + "--colors", + "match:fg:red", + "--colors", + "match:style:nobold", + "--line-number", + r"^$", + "input.txt", + ]); + + let expected = "\ +1: +2: +"; + eqnice_repr!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/693 +rgtest!(r693_context_in_contextless_mode, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "xyz\n"); + dir.create("bar", "xyz\n"); + + cmd.arg("-C1").arg("-c").arg("--sort-files").arg("xyz"); + eqnice!("bar:1\nfoo:1\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/807 +rgtest!(r807, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", ".a/b"); + dir.create_dir(".a/b"); + dir.create_dir(".a/c"); + dir.create(".a/b/file", "test"); + dir.create(".a/c/file", "test"); + + eqnice!(".a/c/file:test\n", cmd.arg("--hidden").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/900 +rgtest!(r900, |dir: Dir, mut cmd: TestCommand| { + dir.create("sherlock", SHERLOCK); + dir.create("pat", ""); + + cmd.arg("-fpat").arg("sherlock").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1064 +rgtest!(r1064, |dir: Dir, mut cmd: TestCommand| { + dir.create("input", "abc"); + eqnice!("input:abc\n", cmd.arg("a(.*c)").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1174 +rgtest!(r1098, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "a**b"); + dir.create("afoob", "test"); + cmd.arg("test").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1130 +rgtest!(r1130, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "test"); + eqnice!( + "foo\n", + cmd.arg("--files-with-matches").arg("test").arg("foo").stdout() + ); + + let mut cmd = dir.command(); + eqnice!( + "foo\n", + cmd.arg("--files-without-match").arg("nada").arg("foo").stdout() + ); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1159 +rgtest!(r1159_invalid_flag, |_: Dir, mut cmd: TestCommand| { + cmd.arg("--wat").assert_exit_code(2); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1159 +rgtest!(r1159_exit_status, |dir: Dir, _: TestCommand| { + dir.create("foo", "test"); + + // search with a match gets 0 exit status. + let mut cmd = dir.command(); + cmd.arg("test").assert_exit_code(0); + + // search with --quiet and a match gets 0 exit status. + let mut cmd = dir.command(); + cmd.arg("-q").arg("test").assert_exit_code(0); + + // search with a match and an error gets 2 exit status. + let mut cmd = dir.command(); + cmd.arg("test").arg("no-file").assert_exit_code(2); + + // search with a match in --quiet mode and an error gets 0 exit status. + let mut cmd = dir.command(); + cmd.arg("-q").arg("test").arg("foo").arg("no-file").assert_exit_code(0); + + // search with no match gets 1 exit status. + let mut cmd = dir.command(); + cmd.arg("nada").assert_exit_code(1); + + // search with --quiet and no match gets 1 exit status. + let mut cmd = dir.command(); + cmd.arg("-q").arg("nada").assert_exit_code(1); + + // search with no match and an error gets 2 exit status. + let mut cmd = dir.command(); + cmd.arg("nada").arg("no-file").assert_exit_code(2); + + // search with no match in --quiet mode and an error gets 2 exit status. + let mut cmd = dir.command(); + cmd.arg("-q").arg("nada").arg("foo").arg("no-file").assert_exit_code(2); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1163 +rgtest!(r1163, |dir: Dir, mut cmd: TestCommand| { + dir.create("bom.txt", "\u{FEFF}test123\ntest123"); + eqnice!( + "bom.txt:test123\nbom.txt:test123\n", + cmd.arg("^test123").stdout() + ); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1164 +rgtest!(r1164, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "myfile"); + dir.create("MYFILE", "test"); + + cmd.arg("--ignore-file-case-insensitive").arg("test").assert_err(); + eqnice!( + "MYFILE:test\n", + cmd.arg("--no-ignore-file-case-insensitive").stdout() + ); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1173 +rgtest!(r1173, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "**"); + dir.create("foo", "test"); + cmd.arg("test").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1174 +rgtest!(r1174, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir(".git"); + dir.create(".gitignore", "**/**/*"); + dir.create_dir("a"); + dir.create("a/foo", "test"); + cmd.arg("test").assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1176 +rgtest!(r1176_literal_file, |dir: Dir, mut cmd: TestCommand| { + dir.create("patterns", "foo(bar\n"); + dir.create("test", "foo(bar"); + + eqnice!( + "foo(bar\n", + cmd.arg("-F").arg("-f").arg("patterns").arg("test").stdout() + ); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1176 +rgtest!(r1176_line_regex, |dir: Dir, mut cmd: TestCommand| { + dir.create("patterns", "foo\n"); + dir.create("test", "foobar\nfoo\nbarfoo\n"); + + eqnice!( + "foo\n", + cmd.arg("-x").arg("-f").arg("patterns").arg("test").stdout() + ); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1203 +rgtest!(r1203_reverse_suffix_literal, |dir: Dir, _: TestCommand| { + dir.create("test", "153.230000\n"); + + let mut cmd = dir.command(); + eqnice!("153.230000\n", cmd.arg(r"\d\d\d00").arg("test").stdout()); + + let mut cmd = dir.command(); + eqnice!("153.230000\n", cmd.arg(r"\d\d\d000").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1223 +rgtest!( + r1223_no_dir_check_for_default_path, + |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("-"); + dir.create("a.json", "{}"); + dir.create("a.txt", "some text"); + + eqnice!( + "a.json\na.txt\n", + sort_lines(&cmd.arg("a").pipe(b"a.json\na.txt")) + ); + } +); + +// See: https://github.com/BurntSushi/ripgrep/issues/1259 +rgtest!(r1259_drop_last_byte_nonl, |dir: Dir, mut cmd: TestCommand| { + dir.create("patterns-nonl", "[foo]"); + dir.create("patterns-nl", "[foo]\n"); + dir.create("test", "fz"); + + eqnice!("fz\n", cmd.arg("-f").arg("patterns-nonl").arg("test").stdout()); + cmd = dir.command(); + eqnice!("fz\n", cmd.arg("-f").arg("patterns-nl").arg("test").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1311 +rgtest!(r1311_multi_line_term_replace, |dir: Dir, mut cmd: TestCommand| { + dir.create("input", "hello\nworld\n"); + eqnice!( + "1:hello?world?\n", + cmd.args(&["-U", "-r?", "-n", "\n", "input"]).stdout() + ); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1319 +rgtest!(r1319, |dir: Dir, mut cmd: TestCommand| { + dir.create("input", "CCAGCTACTCGGGAGGCTGAGGCTGGAGGATCGCTTGAGTCCAGGAGTTC"); + eqnice!( + "input:CCAGCTACTCGGGAGGCTGAGGCTGGAGGATCGCTTGAGTCCAGGAGTTC\n", + cmd.arg("TTGAGTCCAGGAG[ATCG]{2}C").stdout() + ); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1334 +rgtest!(r1334_crazy_literals, |dir: Dir, mut cmd: TestCommand| { + dir.create("patterns", &"1.208.0.0/12\n".repeat(40)); + dir.create("corpus", "1.208.0.0/12\n"); + eqnice!( + "1.208.0.0/12\n", + cmd.arg("-Ff").arg("patterns").arg("corpus").stdout() + ); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1380 +rgtest!(r1380, |dir: Dir, mut cmd: TestCommand| { + dir.create( + "foo", + "\ +a +b +c +d +e +d +e +d +e +d +e +", + ); + + eqnice!("d\ne\nd\n", cmd.args(&["-A2", "-m1", "d", "foo"]).stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1389 +rgtest!(r1389_bad_symlinks_no_biscuit, |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("mydir"); + dir.create("mydir/file.txt", "test"); + dir.link_dir("mydir", "mylink"); + + let stdout = cmd + .args(&["test", "--no-ignore", "--sort", "path", "mylink"]) + .stdout(); + eqnice!("mylink/file.txt:test\n", stdout); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1401 +rgtest!(r1401_look_ahead_only_matching_1, |dir: Dir, mut cmd: TestCommand| { + // Only PCRE2 supports look-around. + if !dir.is_pcre2() { + return; + } + dir.create("ip.txt", "foo 42\nxoyz\ncat\tdog\n"); + cmd.args(&["-No", r".*o(?!.*\s)", "ip.txt"]); + eqnice!("xo\ncat\tdo\n", cmd.stdout()); + + let mut cmd = dir.command(); + cmd.args(&["-No", r".*o(?!.*[ \t])", "ip.txt"]); + eqnice!("xo\ncat\tdo\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1401 +rgtest!(r1401_look_ahead_only_matching_2, |dir: Dir, mut cmd: TestCommand| { + // Only PCRE2 supports look-around. + if !dir.is_pcre2() { + return; + } + dir.create("ip.txt", "foo 42\nxoyz\ncat\tdog\nfoo"); + cmd.args(&["-No", r".*o(?!.*\s)", "ip.txt"]); + eqnice!("xo\ncat\tdo\nfoo\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1412 +rgtest!(r1412_look_behind_no_replacement, |dir: Dir, mut cmd: TestCommand| { + // Only PCRE2 supports look-around. + if !dir.is_pcre2() { + return; + } + + dir.create("test", "foo\nbar\n"); + cmd.args(&["-nU", "-rquux", r"(?<=foo\n)bar", "test"]); + eqnice!("2:quux\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/pull/1446 +rgtest!( + r1446_respect_excludes_in_worktree, + |dir: Dir, mut cmd: TestCommand| { + dir.create_dir("repo/.git/info"); + dir.create("repo/.git/info/exclude", "ignored"); + dir.create_dir("repo/.git/worktrees/repotree"); + dir.create("repo/.git/worktrees/repotree/commondir", "../.."); + + dir.create_dir("repotree"); + dir.create("repotree/.git", "gitdir: repo/.git/worktrees/repotree"); + dir.create("repotree/ignored", ""); + dir.create("repotree/not-ignored", ""); + + cmd.arg("--sort").arg("path").arg("--files").arg("repotree"); + eqnice!("repotree/not-ignored\n", cmd.stdout()); + } +); + +// See: https://github.com/BurntSushi/ripgrep/issues/1537 +rgtest!(r1537, |dir: Dir, mut cmd: TestCommand| { + dir.create("foo", "abc;de,fg"); + + let expected = "foo:abc;de,fg\n"; + eqnice!(expected, cmd.arg(";(.*,){1}").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1559 +rgtest!(r1559, |dir: Dir, mut cmd: TestCommand| { + dir.create( + "foo", + "\ +type A struct { + TaskID int `json:\"taskID\"` +} + +type B struct { + ObjectID string `json:\"objectID\"` + TaskID int `json:\"taskID\"` +} +", + ); + + let expected = "\ +foo: TaskID int `json:\"taskID\"` +foo: TaskID int `json:\"taskID\"` +"; + eqnice!(expected, cmd.arg("TaskID +int").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1573 +// +// Tests that if look-ahead is used, then --count-matches is correct. +rgtest!(r1573, |dir: Dir, mut cmd: TestCommand| { + // Only PCRE2 supports look-ahead. + if !dir.is_pcre2() { + return; + } + + dir.create_bytes("foo", b"\xFF\xFE\x00\x62"); + dir.create( + "foo", + "\ +def A; +def B; +use A; +use B; +", + ); + + // Check that normal --count is correct. + cmd.args(&[ + "--pcre2", + "--multiline", + "--count", + r"(?s)def (\w+);(?=.*use \w+)", + "foo", + ]); + eqnice!("2\n", cmd.stdout()); + + // Now check --count-matches. + let mut cmd = dir.command(); + cmd.args(&[ + "--pcre2", + "--multiline", + "--count-matches", + r"(?s)def (\w+);(?=.*use \w+)", + "foo", + ]); + eqnice!("2\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1638 +// +// Tests if UTF-8 BOM is sniffed, then the column index is correct. +rgtest!(r1638, |dir: Dir, mut cmd: TestCommand| { + dir.create_bytes("foo", b"\xef\xbb\xbfx"); + + eqnice!("foo:1:1:x\n", cmd.arg("--column").arg("x").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1739 +rgtest!(r1739_replacement_lineterm_match, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "a\n"); + cmd.args(&[r"-r${0}f", r".*", "test"]); + eqnice!("af\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1757 +rgtest!(f1757, |dir: Dir, _: TestCommand| { + dir.create_dir("rust/target"); + dir.create(".ignore", "rust/target"); + dir.create("rust/source.rs", "needle"); + dir.create("rust/target/rustdoc-output.html", "needle"); + + let args = &["--files-with-matches", "needle", "rust"]; + eqnice!("rust/source.rs\n", dir.command().args(args).stdout()); + let args = &["--files-with-matches", "needle", "./rust"]; + eqnice!("./rust/source.rs\n", dir.command().args(args).stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1765 +rgtest!(r1765, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "\n"); + // We need to add --color=always here to force the failure, since the bad + // code path is only triggered when colors are enabled. + cmd.args(&[r"x?", "--crlf", "--color", "always"]); + + assert!(!cmd.stdout().is_empty()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1838 +rgtest!(r1838_nul_error_with_binary_detection, |dir: Dir, _: TestCommand| { + // We don't support this error reporting with PCRE2 since we can't parse + // the pattern (easily) to give a good error message. + if dir.is_pcre2() { + return; + } + dir.create("test", "foo\n"); + + dir.command().args(&[r"foo\x00?"]).assert_err(); + eqnice!("test:foo\n", dir.command().args(&["-a", r"foo\x00?"]).stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1866 +rgtest!(r1866, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "foobar\nfoobar\nfoo quux"); + cmd.args(&[ + "--multiline", + "--vimgrep", + r"foobar\nfoobar\nfoo|quux", + "test", + ]); + + // vimgrep only wants the first line of each match, even when a match + // spans multiple lines. + // + // See: https://github.com/BurntSushi/ripgrep/issues/1866 + let expected = "\ +test:1:1:foobar +test:3:5:foo quux +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1868 +rgtest!(r1868_context_passthru_override, |dir: Dir, _: TestCommand| { + dir.create("test", "foo\nbar\nbaz\nquux\n"); + + let args = &["-C1", "bar", "test"]; + eqnice!("foo\nbar\nbaz\n", dir.command().args(args).stdout()); + let args = &["--passthru", "bar", "test"]; + eqnice!("foo\nbar\nbaz\nquux\n", dir.command().args(args).stdout()); + + let args = &["--passthru", "-C1", "bar", "test"]; + eqnice!("foo\nbar\nbaz\n", dir.command().args(args).stdout()); + let args = &["-C1", "--passthru", "bar", "test"]; + eqnice!("foo\nbar\nbaz\nquux\n", dir.command().args(args).stdout()); + + let args = &["--passthru", "-B1", "bar", "test"]; + eqnice!("foo\nbar\n", dir.command().args(args).stdout()); + let args = &["-B1", "--passthru", "bar", "test"]; + eqnice!("foo\nbar\nbaz\nquux\n", dir.command().args(args).stdout()); + + let args = &["--passthru", "-A1", "bar", "test"]; + eqnice!("bar\nbaz\n", dir.command().args(args).stdout()); + let args = &["-A1", "--passthru", "bar", "test"]; + eqnice!("foo\nbar\nbaz\nquux\n", dir.command().args(args).stdout()); +}); + +rgtest!(r1878, |dir: Dir, _: TestCommand| { + dir.create("test", "a\nbaz\nabc\n"); + + // Since ripgrep enables (?m) by default, '^' will match at the beginning + // of a line, even when -U/--multiline is used. + let args = &["-U", "--no-mmap", r"^baz", "test"]; + eqnice!("baz\n", dir.command().args(args).stdout()); + let args = &["-U", "--mmap", r"^baz", "test"]; + eqnice!("baz\n", dir.command().args(args).stdout()); + + // But when (?-m) is disabled, or when \A is used, then there should be no + // matches that aren't anchored to the beginning of the file. + let args = &["-U", "--no-mmap", r"(?-m)^baz", "test"]; + dir.command().args(args).assert_err(); + let args = &["-U", "--mmap", r"(?-m)^baz", "test"]; + dir.command().args(args).assert_err(); + + let args = &["-U", "--no-mmap", r"\Abaz", "test"]; + dir.command().args(args).assert_err(); + let args = &["-U", "--mmap", r"\Abaz", "test"]; + dir.command().args(args).assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1891 +rgtest!(r1891, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "\n##\n"); + // N.B. We use -o here to force the issue to occur, which seems to only + // happen when each match needs to be detected. + eqnice!("1:\n2:\n2:\n2:\n", cmd.args(&["-won", "", "test"]).stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/2095 +rgtest!(r2095, |dir: Dir, mut cmd: TestCommand| { + dir.create( + "test", + "#!/usr/bin/env bash + +zero=one + +a=one + +if true; then + a=( + a + b + c + ) + true +fi + +a=two + +b=one +}); +", + ); + cmd.args(&[ + "--line-number", + "--multiline", + "--only-matching", + "--replace", + "${value}", + r"^(?P\s*)a=(?P(?ms:[(].*?[)])|.*?)$", + "test", + ]); + let expected = "4:one +8:( +9: a +10: b +11: c +12: ) +15:two +"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/2198 +rgtest!(r2198, |dir: Dir, mut cmd: TestCommand| { + dir.create(".ignore", "a"); + dir.create(".rgignore", "b"); + dir.create("a", ""); + dir.create("b", ""); + dir.create("c", ""); + + cmd.arg("--files").arg("--sort").arg("path"); + eqnice!("c\n", cmd.stdout()); + eqnice!("a\nb\nc\n", cmd.arg("--no-ignore-dot").stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/2208 +rgtest!(r2208, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "# Compile requirements.txt files from all found or specified requirements.in files (compile). +# Use -h to include hashes, -u dep1,dep2... to upgrade specific dependencies, and -U to upgrade all. +pipc () { # [-h] [-U|-u [,...]] [...] [-- ...] + emulate -L zsh + unset REPLY + if [[ $1 == --help ]] { zpy $0; return } + [[ $ZPY_PROCS ]] || return + + local gen_hashes upgrade upgrade_csv + while [[ $1 == -[hUu] ]] { + if [[ $1 == -h ]] { gen_hashes=--generate-hashes; shift } + if [[ $1 == -U ]] { upgrade=1; shift } + if [[ $1 == -u ]] { upgrade=1; upgrade_csv=$2; shift 2 } + } +} +"); + cmd.args(&[ + "-N", + "-U", + "-r", "$usage", + r#"^(?P\n?(# .*\n)*)(alias (?Ppipc)="[^"]+"|(?Ppipc) \(\) \{)( #(?P .+))?"#, + "test", + ]); + let expected = " [-h] [-U|-u [,...]] [...] [-- ...]\n"; + eqnice!(expected, cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/2236 +rgtest!(r2236, |dir: Dir, mut cmd: TestCommand| { + dir.create(".ignore", r"foo\/"); + dir.create_dir("foo"); + dir.create("foo/bar", "test\n"); + cmd.args(&["test"]).assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/2480 +rgtest!(r2480, |dir: Dir, mut cmd: TestCommand| { + dir.create("file", "FooBar\n"); + + // no regression in empty pattern behavior + cmd.args(&["-e", "", "file"]); + eqnice!("FooBar\n", cmd.stdout()); + + // no regression in single pattern behavior + let mut cmd = dir.command(); + cmd.args(&["-e", ")(", "file"]); + eqnice!("FooBar\n", cmd.stdout()); + + // no regression in multiple patterns behavior + let mut cmd = dir.command(); + cmd.args(&["--only-matching", "-e", "Foo", "-e", "Bar", "file"]); + eqnice!("Foo\nBar\n", cmd.stdout()); + + // no regression in capture groups behavior + let mut cmd = dir.command(); + cmd.args(&["-e", "Fo(oB)a(r)", "--replace", "${0}_${1}_${2}${3}", "file"]); + eqnice!("FooBar_oB_r\n", cmd.stdout()); // note: ${3} expected to be empty + + // flag does not leak into next pattern on match + let mut cmd = dir.command(); + cmd.args(&["--only-matching", "-e", "(?i)foo", "-e", "bar", "file"]); + eqnice!("Foo\n", cmd.stdout()); + + // flag does not leak into next pattern on mismatch + let mut cmd = dir.command(); + cmd.args(&["--only-matching", "-e", "(?i)notfoo", "-e", "bar", "file"]); + cmd.assert_err(); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/2574 +rgtest!(r2574, |dir: Dir, mut cmd: TestCommand| { + dir.create("haystack", "some.domain.com\nsome.domain.com/x\n"); + let got = cmd + .args(&[ + "--no-filename", + "--no-unicode", + "-w", + "-o", + r"(\w+\.)*domain\.(\w+)", + ]) + .stdout(); + eqnice!("some.domain.com\nsome.domain.com\n", got); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/2658 +rgtest!(r2658_null_data_line_regexp, |dir: Dir, mut cmd: TestCommand| { + dir.create("haystack", "foo\0bar\0quux\0"); + let got = cmd.args(&["--null-data", "--line-regexp", r"bar"]).stdout(); + eqnice!("haystack:bar\0", got); +}); diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/tests.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/tests.rs new file mode 100644 index 000000000..81e40f806 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/tests.rs @@ -0,0 +1,22 @@ +// Macros useful for testing. +#[macro_use] +mod macros; + +// Corpora. +mod hay; +// Utilities for making tests nicer to read and easier to write. +mod util; + +// Tests for ripgrep's handling of binary files. +mod binary; +// Tests related to most features in ripgrep. If you're adding something new +// to ripgrep, tests should probably go in here. +mod feature; +// Tests for ripgrep's JSON format. +mod json; +// Miscellaneous tests grouped in a haphazard manner. Try not to add more. +mod misc; +// Tests for ripgrep's multiline search support. +mod multiline; +// Regression tests. +mod regression; diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/util.rs b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/util.rs new file mode 100644 index 000000000..07a1a783f --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/tests/util.rs @@ -0,0 +1,508 @@ +use std::env; +use std::error; +use std::ffi::OsStr; +use std::fs::{self, File}; +use std::io::{self, Write}; +use std::path::{Path, PathBuf}; +use std::process::{self, Command}; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::thread; +use std::time::Duration; + +use bstr::ByteSlice; + +static TEST_DIR: &'static str = "ripgrep-tests"; +static NEXT_ID: AtomicUsize = AtomicUsize::new(0); + +/// Setup an empty work directory and return a command pointing to the ripgrep +/// executable whose CWD is set to the work directory. +/// +/// The name given will be used to create the directory. Generally, it should +/// correspond to the test name. +pub fn setup(test_name: &str) -> (Dir, TestCommand) { + let dir = Dir::new(test_name); + let cmd = dir.command(); + (dir, cmd) +} + +/// Like `setup`, but uses PCRE2 as the underlying regex engine. +pub fn setup_pcre2(test_name: &str) -> (Dir, TestCommand) { + let mut dir = Dir::new(test_name); + dir.pcre2(true); + let cmd = dir.command(); + (dir, cmd) +} + +/// Break the given string into lines, sort them and then join them back +/// together. This is useful for testing output from ripgrep that may not +/// always be in the same order. +pub fn sort_lines(lines: &str) -> String { + let mut lines: Vec<&str> = lines.trim().lines().collect(); + lines.sort(); + format!("{}\n", lines.join("\n")) +} + +/// Returns true if and only if the given program can be successfully executed +/// with a `--help` flag. +pub fn cmd_exists(program: &str) -> bool { + Command::new(program).arg("--help").output().is_ok() +} + +/// Dir represents a directory in which tests should be run. +/// +/// Directories are created from a global atomic counter to avoid duplicates. +#[derive(Clone, Debug)] +pub struct Dir { + /// The directory in which this test executable is running. + root: PathBuf, + /// The directory in which the test should run. If a test needs to create + /// files, they should go in here. This directory is also used as the CWD + /// for any processes created by the test. + dir: PathBuf, + /// Set to true when the test should use PCRE2 as the regex engine. + pcre2: bool, +} + +impl Dir { + /// Create a new test working directory with the given name. The name + /// does not need to be distinct for each invocation, but should correspond + /// to a logical grouping of tests. + pub fn new(name: &str) -> Dir { + let id = NEXT_ID.fetch_add(1, Ordering::SeqCst); + let root = env::current_exe() + .unwrap() + .parent() + .expect("executable's directory") + .to_path_buf(); + let dir = + env::temp_dir().join(TEST_DIR).join(name).join(&format!("{}", id)); + if dir.exists() { + nice_err(&dir, fs::remove_dir_all(&dir)); + } + nice_err(&dir, repeat(|| fs::create_dir_all(&dir))); + Dir { root, dir, pcre2: false } + } + + /// Use PCRE2 for this test. + pub fn pcre2(&mut self, yes: bool) { + self.pcre2 = yes; + } + + /// Returns true if and only if this test is configured to use PCRE2 as + /// the regex engine. + pub fn is_pcre2(&self) -> bool { + self.pcre2 + } + + /// Create a new file with the given name and contents in this directory, + /// or panic on error. + pub fn create>(&self, name: P, contents: &str) { + self.create_bytes(name, contents.as_bytes()); + } + + /// Try to create a new file with the given name and contents in this + /// directory. + #[allow(dead_code)] // unused on Windows + pub fn try_create>( + &self, + name: P, + contents: &str, + ) -> io::Result<()> { + let path = self.dir.join(name); + self.try_create_bytes(path, contents.as_bytes()) + } + + /// Create a new file with the given name and size. + pub fn create_size>(&self, name: P, filesize: u64) { + let path = self.dir.join(name); + let file = nice_err(&path, File::create(&path)); + nice_err(&path, file.set_len(filesize)); + } + + /// Create a new file with the given name and contents in this directory, + /// or panic on error. + pub fn create_bytes>(&self, name: P, contents: &[u8]) { + let path = self.dir.join(&name); + nice_err(&path, self.try_create_bytes(name, contents)); + } + + /// Try to create a new file with the given name and contents in this + /// directory. + pub fn try_create_bytes>( + &self, + name: P, + contents: &[u8], + ) -> io::Result<()> { + let path = self.dir.join(name); + let mut file = File::create(path)?; + file.write_all(contents)?; + file.flush() + } + + /// Remove a file with the given name from this directory. + pub fn remove>(&self, name: P) { + let path = self.dir.join(name); + nice_err(&path, fs::remove_file(&path)); + } + + /// Create a new directory with the given path (and any directories above + /// it) inside this directory. + pub fn create_dir>(&self, path: P) { + let path = self.dir.join(path); + nice_err(&path, repeat(|| fs::create_dir_all(&path))); + } + + /// Creates a new command that is set to use the ripgrep executable in + /// this working directory. + /// + /// This also: + /// + /// * Unsets the `RIPGREP_CONFIG_PATH` environment variable. + /// * Sets the `--path-separator` to `/` so that paths have the same output + /// on all systems. Tests that need to check `--path-separator` itself + /// can simply pass it again to override it. + pub fn command(&self) -> TestCommand { + let mut cmd = self.bin(); + cmd.env_remove("RIPGREP_CONFIG_PATH"); + cmd.current_dir(&self.dir); + cmd.arg("--path-separator").arg("/"); + if self.is_pcre2() { + cmd.arg("--pcre2"); + } + TestCommand { dir: self.clone(), cmd } + } + + /// Returns the path to the ripgrep executable. + pub fn bin(&self) -> process::Command { + let rg = self.root.join(format!("../rg{}", env::consts::EXE_SUFFIX)); + match cross_runner() { + None => process::Command::new(rg), + Some(runner) => { + let mut cmd = process::Command::new(runner); + cmd.arg(rg); + cmd + } + } + } + + /// Returns the path to this directory. + pub fn path(&self) -> &Path { + &self.dir + } + + /// Creates a directory symlink to the src with the given target name + /// in this directory. + #[cfg(not(windows))] + pub fn link_dir, T: AsRef>(&self, src: S, target: T) { + use std::os::unix::fs::symlink; + let src = self.dir.join(src); + let target = self.dir.join(target); + let _ = fs::remove_file(&target); + nice_err(&target, symlink(&src, &target)); + } + + /// Creates a directory symlink to the src with the given target name + /// in this directory. + #[cfg(windows)] + pub fn link_dir, T: AsRef>(&self, src: S, target: T) { + use std::os::windows::fs::symlink_dir; + let src = self.dir.join(src); + let target = self.dir.join(target); + let _ = fs::remove_dir(&target); + nice_err(&target, symlink_dir(&src, &target)); + } + + /// Creates a file symlink to the src with the given target name + /// in this directory. + #[cfg(not(windows))] + pub fn link_file, T: AsRef>( + &self, + src: S, + target: T, + ) { + self.link_dir(src, target); + } + + /// Creates a file symlink to the src with the given target name + /// in this directory. + #[cfg(windows)] + #[allow(dead_code)] // unused on Windows + pub fn link_file, T: AsRef>( + &self, + src: S, + target: T, + ) { + use std::os::windows::fs::symlink_file; + let src = self.dir.join(src); + let target = self.dir.join(target); + let _ = fs::remove_file(&target); + nice_err(&target, symlink_file(&src, &target)); + } +} + +/// A simple wrapper around a process::Command with some conveniences. +#[derive(Debug)] +pub struct TestCommand { + /// The dir used to launched this command. + dir: Dir, + /// The actual command we use to control the process. + cmd: Command, +} + +impl TestCommand { + /// Returns a mutable reference to the underlying command. + pub fn cmd(&mut self) -> &mut Command { + &mut self.cmd + } + + /// Add an argument to pass to the command. + pub fn arg>(&mut self, arg: A) -> &mut TestCommand { + self.cmd.arg(arg); + self + } + + /// Add any number of arguments to the command. + pub fn args(&mut self, args: I) -> &mut TestCommand + where + I: IntoIterator, + A: AsRef, + { + self.cmd.args(args); + self + } + + /// Set the working directory for this command. + /// + /// Note that this does not need to be called normally, since the creation + /// of this TestCommand causes its working directory to be set to the + /// test's directory automatically. + pub fn current_dir>(&mut self, dir: P) -> &mut TestCommand { + self.cmd.current_dir(dir); + self + } + + /// Runs and captures the stdout of the given command. + pub fn stdout(&mut self) -> String { + let o = self.output(); + String::from_utf8_lossy(&o.stdout).into_owned() + } + + /// Pipe `input` to a command, and collect the output. + pub fn pipe(&mut self, input: &[u8]) -> String { + self.cmd.stdin(process::Stdio::piped()); + self.cmd.stdout(process::Stdio::piped()); + self.cmd.stderr(process::Stdio::piped()); + + let mut child = self.cmd.spawn().unwrap(); + + // Pipe input to child process using a separate thread to avoid + // risk of deadlock between parent and child process. + let mut stdin = child.stdin.take().expect("expected standard input"); + let input = input.to_owned(); + let worker = thread::spawn(move || stdin.write_all(&input)); + + let output = self.expect_success(child.wait_with_output().unwrap()); + worker.join().unwrap().unwrap(); + + String::from_utf8_lossy(&output.stdout).into_owned() + } + + /// Gets the output of a command. If the command failed, then this panics. + pub fn output(&mut self) -> process::Output { + let output = self.raw_output(); + self.expect_success(output) + } + + /// Gets the raw output of a command after filtering nonsense like jemalloc + /// error messages from stderr. + pub fn raw_output(&mut self) -> process::Output { + let mut output = self.cmd.output().unwrap(); + output.stderr = strip_jemalloc_nonsense(&output.stderr); + output + } + + /// Runs the command and asserts that it resulted in an error exit code. + pub fn assert_err(&mut self) { + let o = self.raw_output(); + if o.status.success() { + panic!( + "\n\n===== {:?} =====\n\ + command succeeded but expected failure!\ + \n\ncwd: {}\ + \n\ndir list: {:?}\ + \n\nstatus: {}\ + \n\nstdout: {}\n\nstderr: {}\ + \n\n=====\n", + self.cmd, + self.dir.dir.display(), + dir_list(&self.dir.dir), + o.status, + String::from_utf8_lossy(&o.stdout), + String::from_utf8_lossy(&o.stderr) + ); + } + } + + /// Runs the command and asserts that its exit code matches expected exit + /// code. + pub fn assert_exit_code(&mut self, expected_code: i32) { + let code = self.cmd.output().unwrap().status.code().unwrap(); + assert_eq!( + expected_code, + code, + "\n\n===== {:?} =====\n\ + expected exit code did not match\ + \n\ncwd: {}\ + \n\ndir list: {:?}\ + \n\nexpected: {}\ + \n\nfound: {}\ + \n\n=====\n", + self.cmd, + self.dir.dir.display(), + dir_list(&self.dir.dir), + expected_code, + code + ); + } + + /// Runs the command and asserts that something was printed to stderr. + pub fn assert_non_empty_stderr(&mut self) { + let o = self.cmd.output().unwrap(); + if o.status.success() || o.stderr.is_empty() { + panic!( + "\n\n===== {:?} =====\n\ + command succeeded but expected failure!\ + \n\ncwd: {}\ + \n\ndir list: {:?}\ + \n\nstatus: {}\ + \n\nstdout: {}\n\nstderr: {}\ + \n\n=====\n", + self.cmd, + self.dir.dir.display(), + dir_list(&self.dir.dir), + o.status, + String::from_utf8_lossy(&o.stdout), + String::from_utf8_lossy(&o.stderr) + ); + } + } + + fn expect_success(&self, o: process::Output) -> process::Output { + if !o.status.success() { + let suggest = if o.stderr.is_empty() { + "\n\nDid your search end up with no results?".to_string() + } else { + "".to_string() + }; + + panic!( + "\n\n==========\n\ + command failed but expected success!\ + {}\ + \n\ncommand: {:?}\ + \n\ncwd: {}\ + \n\ndir list: {:?}\ + \n\nstatus: {}\ + \n\nstdout: {}\ + \n\nstderr: {}\ + \n\n==========\n", + suggest, + self.cmd, + self.dir.dir.display(), + dir_list(&self.dir.dir), + o.status, + String::from_utf8_lossy(&o.stdout), + String::from_utf8_lossy(&o.stderr) + ); + } + o + } +} + +fn nice_err(path: &Path, res: Result) -> T { + match res { + Ok(t) => t, + Err(err) => panic!("{}: {:?}", path.display(), err), + } +} + +fn repeat io::Result<()>>(mut f: F) -> io::Result<()> { + let mut last_err = None; + for _ in 0..10 { + if let Err(err) = f() { + last_err = Some(err); + thread::sleep(Duration::from_millis(500)); + } else { + return Ok(()); + } + } + Err(last_err.unwrap()) +} + +/// Return a recursive listing of all files and directories in the given +/// directory. This is useful for debugging transient and odd failures in +/// integration tests. +fn dir_list>(dir: P) -> Vec { + walkdir::WalkDir::new(dir) + .follow_links(true) + .into_iter() + .map(|result| result.unwrap().path().to_string_lossy().into_owned()) + .collect() +} + +/// When running tests with cross, we need to be a bit smarter about how we +/// run our `rg` binary. We can't just run it directly since it might be +/// compiled for a totally different target. Instead, it's likely that `cross` +/// will have setup qemu to run it. While this is integrated into the Rust +/// testing by default, we need to handle it ourselves for integration tests. +/// +/// Now thankfully, cross sets `CROSS_RUNNER` to point to the right qemu +/// executable. Or so one thinks. But it seems to always be set to `qemu-user` +/// and I cannot find `qemu-user` anywhere in the Docker image. Awesome. +/// +/// Thers is `/linux-runner` which seems to work sometimes? But not always. +/// +/// Instead, it looks like we have to use `qemu-aarch64` in the `aarch64` +/// case. Perfect, so just get the current target architecture and append it +/// to `qemu-`. Wrong. Cross (or qemu or whoever) uses `qemu-ppc64` for +/// `powerpc64`, so we can't just use the target architecture as Rust knows +/// it verbatim. +/// +/// So... we just manually handle these cases. So fucking fun. +fn cross_runner() -> Option { + let runner = std::env::var("CROSS_RUNNER").ok()?; + if runner.is_empty() || runner == "empty" { + return None; + } + if cfg!(target_arch = "powerpc64") { + Some("qemu-ppc64".to_string()) + } else if cfg!(target_arch = "x86") { + Some("i386".to_string()) + } else { + // Make a guess... Sigh. + Some(format!("qemu-{}", std::env::consts::ARCH)) + } +} + +/// Returns true if the test setup believes Cross is running and `qemu` is +/// needed to run ripgrep. +/// +/// This is useful because it has been difficult to get some tests to pass +/// under Cross. +pub fn is_cross() -> bool { + std::env::var("CROSS_RUNNER").ok().map_or(false, |v| !v.is_empty()) +} + +/// Strips absolutely fucked `:` lines from the output. +/// +/// In theory this only happens under qemu, which is where our tests run under +/// `cross`. But is messes with our tests, because... they don't expect the +/// allocator to fucking write to stderr. I mean, what the fuck? Who prints a +/// warning message with absolutely no instruction for what to do with it or +/// how to disable it. Absolutely fucking bonkers. +fn strip_jemalloc_nonsense(data: &[u8]) -> Vec { + let lines = data + .lines_with_terminator() + .filter(|line| !line.starts_with_str(":")); + bstr::concat(lines) +} From 6a4e05c16e3f5bac4195c4cd9ee07f08992d48f4 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 7 May 2025 13:25:26 +1000 Subject: [PATCH 2/2] Add extra pieces. --- collector/compile-benchmarks/README.md | 2 ++ collector/compile-benchmarks/REUSE.toml | 5 +++++ .../ripgrep-14.1.1-tiny/0-println.patch | 12 ++++++++++++ .../ripgrep-14.1.1-tiny/Cargo.toml | 8 +++++++- .../ripgrep-14.1.1-tiny/perf-config.json | 7 +++++-- 5 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 collector/compile-benchmarks/ripgrep-14.1.1-tiny/0-println.patch diff --git a/collector/compile-benchmarks/README.md b/collector/compile-benchmarks/README.md index 9f793a6bb..61a279223 100644 --- a/collector/compile-benchmarks/README.md +++ b/collector/compile-benchmarks/README.md @@ -122,6 +122,8 @@ compiler in interesting ways. regression](https://github.com/rust-lang/rust/issues/31157) from the past. - **ripgrep-13.0.0-tiny**: A line-oriented search tool, optimized with flags that should reduce binary size. +- **ripgrep-14.1.1-tiny**: A line-oriented search tool, optimized with flags + that should reduce binary size. - **token-stream-stress**: A proc-macro crate. Constructs a long token stream much like the `quote` crate does, which caused [quadratic behavior](https://github.com/rust-lang/rust/issues/65080) in the past. diff --git a/collector/compile-benchmarks/REUSE.toml b/collector/compile-benchmarks/REUSE.toml index 5a6d652db..6bbf6f392 100644 --- a/collector/compile-benchmarks/REUSE.toml +++ b/collector/compile-benchmarks/REUSE.toml @@ -215,6 +215,11 @@ path = "ripgrep-14.1.1/**" SPDX-FileCopyrightText = "ripgrep contributors" SPDX-License-Identifier = "MIT OR Unlicense" +[[annotations]] +path = "ripgrep-14.1.1-tiny/**" +SPDX-FileCopyrightText = "ripgrep contributors" +SPDX-License-Identifier = "MIT OR Unlicense" + [[annotations]] path = "serde-1.0.136/**" SPDX-FileCopyrightText = "serde contributors" diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/0-println.patch b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/0-println.patch new file mode 100644 index 000000000..9ad6029d1 --- /dev/null +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/0-println.patch @@ -0,0 +1,12 @@ +diff --git a/crates/core/search.rs b/crates/core/search.rs +index 67273425..4794ab49 100644 +--- a/crates/core/search.rs ++++ b/crates/core/search.rs +@@ -340,6 +340,7 @@ impl SearchWorker { + fn search_path(&mut self, path: &Path) -> io::Result { + use self::PatternMatcher::*; + ++ println!("testing"); + let (searcher, printer) = (&mut self.searcher, &mut self.printer); + match self.matcher { + RustRegex(ref m) => search_path(m, searcher, printer, path), diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/Cargo.toml b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/Cargo.toml index b826faa36..e243176bd 100644 --- a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/Cargo.toml +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/Cargo.toml @@ -121,7 +121,11 @@ debug = 0 inherits = "release" [profile.release] -debug = 1 +opt-level = "z" +lto = true +codegen-units = 1 +panic = "abort" +strip = true [profile.release-lto] opt-level = 3 @@ -186,3 +190,5 @@ pcre2 = ["grep/pcre2"] [target.'cfg(all(target_env = "musl", target_pointer_width = "64"))'.dependencies.jemallocator] version = "0.5.0" + +[workspace] diff --git a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/perf-config.json b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/perf-config.json index 3166b496a..c7bb4ccc5 100644 --- a/collector/compile-benchmarks/ripgrep-14.1.1-tiny/perf-config.json +++ b/collector/compile-benchmarks/ripgrep-14.1.1-tiny/perf-config.json @@ -1,4 +1,7 @@ { - "artifact": "binary", - "category": "primary" + "touch_file": "crates/core/main.rs", + "category": "secondary", + "excluded_profiles": ["Doc", "Check", "Debug"], + "excluded_scenarios": ["IncrFull", "IncrPatched", "IncrUnchanged"], + "artifact": "binary" }