diff --git a/cmd/cli/cli/const.go b/cmd/cli/cli/const.go index 970bcea31ae..172971ada0b 100644 --- a/cmd/cli/cli/const.go +++ b/cmd/cli/cli/const.go @@ -479,7 +479,7 @@ var ( indent4 + "\tais ls ais://nnn --regex \"^A\"\t- match object names starting with letter A", } - // TODO: `--select` (to select columns) would sound more conventional + // TODO: `--select` (to select columns) would sound more conventional/idiomatic regexColsFlag = cli.StringFlag{ Name: regexFlag.Name, @@ -490,6 +490,11 @@ var ( indent4 + "\t --regex \"(AWS-GET$|VERSION-CHANGE$)\" - show the number object version changes (updates) and cold GETs from AWS\n" + indent4 + "\t --regex \"(gcp-get$|version-change$)\" - same as above for Google Cloud ('gs://')", } + allColumnsFlag = cli.BoolFlag{ + Name: "all-columns", + Usage: "Show all columns, including those with only zero values", + } + regexJobsFlag = cli.StringFlag{ Name: regexFlag.Name, Usage: "Regular expression to select jobs by name, kind, or description, e.g.: --regex \"ec|mirror|elect\"", @@ -582,7 +587,7 @@ var ( } pageSizeFlag = cli.IntFlag{ Name: "page-size", - Usage: "Maximum number of object names per page; when the flag is omitted or 0 (zero)\n" + + Usage: "Maximum number of object names per page; when the flag is omitted or 0\n" + indent4 + "\tthe maximum is defined by the corresponding backend; see also '--max-pages' and '--paged'", } maxPagesFlag = cli.IntFlag{ @@ -824,18 +829,18 @@ var ( indent1 + "\t(applies only to buckets with remote backend)", } - _onlyin = "only in-cluster objects - only those objects from the respective remote bucket that are present (\"cached\")" + _onlyin = "in-cluster objects, i.e., objects from the respective remote bucket that are present (\"cached\") in the cluster" listObjCachedFlag = cli.BoolFlag{ Name: "cached", - Usage: "List " + _onlyin, + Usage: "Only list " + _onlyin, } getObjCachedFlag = cli.BoolFlag{ Name: listObjCachedFlag.Name, - Usage: "Get " + _onlyin, + Usage: "Only get " + _onlyin, } scrubObjCachedFlag = cli.BoolFlag{ Name: listObjCachedFlag.Name, - Usage: "Visit " + _onlyin, + Usage: "Only visit " + _onlyin, } // when '--all' is used for/by another flag diff --git a/cmd/cli/cli/scrub.go b/cmd/cli/cli/scrub.go index 39f3235da9b..6b4154118ca 100644 --- a/cmd/cli/cli/scrub.go +++ b/cmd/cli/cli/scrub.go @@ -28,13 +28,11 @@ import ( ) // [TODO] -// - add options: -// --cached -// --locally-misplaced -// --checksum -// --fix (***) -// - async execution, with --wait option -// - speed-up `ls` via multiple workers (***) +// - '--checksum' option (slow) +// - '--fix' option (***) +// - multiple buckets vs one-log-per-scrub-metric - a problem +// - async execution with '--wait' option +// - speed-up `ls` via multiple workers type ( _log struct { @@ -82,6 +80,7 @@ var ( smallSizeFlag, largeSizeFlag, scrubObjCachedFlag, + allColumnsFlag, ) ) @@ -182,7 +181,7 @@ func (ctx *scrCtx) closeLogs(c *cli.Context) { fmt.Fprintln(c.App.Writer, strings.Repeat("-", len(title))) titled = true } - fmt.Fprintf(c.App.Writer, "* %s objects: %s (%d record%s)\n", log.tag, log.fn, log.cnt, cos.Plural(log.cnt)) + fmt.Fprintf(c.App.Writer, "* %s objects: \t%s (%d record%s)\n", log.tag, log.fn, log.cnt, cos.Plural(log.cnt)) } } @@ -225,7 +224,7 @@ func (ctx *scrCtx) prnt() error { out[i] = (*teb.ScrBp)(scr) } all := teb.ScrubHelper{All: out} - tab := all.MakeTab(ctx.units, ctx.haveRemote.Load()) + tab := all.MakeTab(ctx.units, ctx.haveRemote.Load(), flagIsSet(ctx.c, allColumnsFlag)) return teb.Print(out, tab.Template(flagIsSet(ctx.c, noHeaderFlag))) } @@ -407,9 +406,9 @@ func (scr *scrBp) upd(parent *scrCtx, en *cmn.LsoEnt) { // or-ing rest conditions (x num-copies) if en.Status() == apc.LocMisplacedMountpath { - scr.Stats[teb.ScrMisplacedMp].Cnt++ - scr.Stats[teb.ScrMisplacedMp].Siz += en.Size - scr.log(parent, en, teb.ScrMisplacedMp) + scr.Stats[teb.ScrMisplacedMpath].Cnt++ + scr.Stats[teb.ScrMisplacedMpath].Siz += en.Size + scr.log(parent, en, teb.ScrMisplacedMpath) } if scr.Bck.Props.Mirror.Enabled && en.Copies < int16(scr.Bck.Props.Mirror.Copies) { diff --git a/cmd/cli/teb/scrub.go b/cmd/cli/teb/scrub.go index bf16e28a0a7..7d9c7ef8f56 100644 --- a/cmd/cli/teb/scrub.go +++ b/cmd/cli/teb/scrub.go @@ -15,23 +15,23 @@ import ( // naming-wise, see also: fmtLsObjStatus (cmd/cli/teb/lso.go) const ( - colBucket = "BUCKET" // + [/PREFIX] - colObjects = "OBJECTS" // num listed - colNotIn = "NOT-CACHED" // "not present", "not cached", not in-cluster - colMisplacedNode = "MISPLACED-NODE" // cluster-wise - colMisplacedMp = "MISPLACED-DISK" // local misplacement - colMissingCp = "MISSING-CP" - colSmallSz = "SMALL" - colLargeSz = "LARGE" - colVchanged = "VER-CHANGED" - colVremoved = "VER-REMOVED" + colBucket = "BUCKET" // + [/PREFIX] + colObjects = "OBJECTS" // num listed + colNotIn = "NOT-CACHED" // "not present", "not cached", not in-cluster + colMisplacedNode = "MISPLACED(cluster)" // cluster-wise + colMisplacedMpath = "MISPLACED(mountpath)" // local misplacement + colMissingCp = "MISSING-COPIES" + colSmallSz = "SMALL" + colLargeSz = "LARGE" + colVchanged = "VER-CHANGED" + colVremoved = "DELETED" ) const ( ScrObjects = iota ScrNotIn ScrMisplacedNode - ScrMisplacedMp + ScrMisplacedMpath ScrMissingCp ScrSmallSz ScrLargeSz @@ -42,7 +42,7 @@ const ( ) var ( - ScrCols = [...]string{colObjects, colNotIn, colMisplacedNode, colMisplacedMp, colMissingCp, colSmallSz, colLargeSz, colVchanged, colVremoved} + ScrCols = [...]string{colObjects, colNotIn, colMisplacedNode, colMisplacedMpath, colMissingCp, colSmallSz, colLargeSz, colVchanged, colVremoved} ScrNums = [ScrNumStats]int64{} ) @@ -81,7 +81,7 @@ func (h *ScrubHelper) colFirst() string { } } -func (h *ScrubHelper) MakeTab(units string, haveRemote bool) *Table { +func (h *ScrubHelper) MakeTab(units string, haveRemote, allColumns bool) *Table { debug.Assert(len(ScrCols) == len(ScrNums)) cols := make([]*header, 1, len(ScrCols)+1) @@ -93,7 +93,11 @@ func (h *ScrubHelper) MakeTab(units string, haveRemote bool) *Table { table := newTable(cols...) // hide assorted columns - h.hideMissingCp(cols, colMissingCp) + if !allColumns { + h.hideMissingCp(cols, colMisplacedNode) + h.hideMissingCp(cols, colMisplacedMpath) + h.hideMissingCp(cols, colMissingCp) + } if !haveRemote { h._hideCol(cols, colNotIn) h._hideCol(cols, colVchanged) diff --git a/docs/cli/bucket.md b/docs/cli/bucket.md index 3311c6e801f..ae10c43648f 100644 --- a/docs/cli/bucket.md +++ b/docs/cli/bucket.md @@ -304,7 +304,7 @@ OPTIONS: - all buckets, including accessible (visible) remote buckets that are not in-cluster - all objects in a given accessible (visible) bucket, including remote objects and misplaced copies --archive List archived content (see docs/archive.md for details) - --cached List only in-cluster objects - only those objects from the respective remote bucket that are present ("cached") + --cached Only list in-cluster objects, i.e., objects from the respective remote bucket that are present ("cached") in the cluster --count-only Print only the resulting number of listed objects and elapsed time --diff Perform a bidirectional diff between in-cluster and remote content, which further entails: - detecting remote version changes (a.k.a. out-of-band updates), and diff --git a/docs/cli/object.md b/docs/cli/object.md index 04c0015222b..ebbff30f836 100644 --- a/docs/cli/object.md +++ b/docs/cli/object.md @@ -108,7 +108,7 @@ OPTIONS: to select possibly multiple matching archived files from a given shard; is used in combination with '--archmode' ("matching mode") option --blob-download Utilize built-in blob-downloader (and the corresponding alternative datapath) to read very large remote objects - --cached Get only in-cluster objects - only those objects from the respective remote bucket that are present ("cached") + --cached Only get in-cluster objects, i.e., objects from the respective remote bucket that are present ("cached") in the cluster --check-cached Check whether a given named object is present in cluster (applies only to buckets with remote backend) --checksum Validate checksum diff --git a/docs/cli/storage.md b/docs/cli/storage.md index 41559312c84..2755be8bd5d 100644 --- a/docs/cli/storage.md +++ b/docs/cli/storage.md @@ -95,79 +95,90 @@ For command line options and usage examples, please refer to: ```console $ ais scrub --help + NAME: - ais scrub - (alias for "storage validate") check in-cluster content for misplaced objects, objects that have insufficient numbers of copies, zero size, and more + ais scrub - (alias for "storage validate") Check in-cluster content for misplaced objects, objects that have insufficient numbers of copies, zero size, and more e.g.: * ais storage validate - validate all in-cluster buckets; * ais scrub - same as above; - * ais storage validate ais - validate (a.k.a. scrub) all ais buckets; - * ais scrub s3 - all s3 buckets present in the cluster; + * ais storage validate ais - validate (a.k.a. scrub) all ais:// buckets; + * ais scrub s3 - ditto, all s3:// buckets; * ais scrub s3 --refresh 10 - same as above while refreshing runtime counter(s) every 10s; * ais scrub gs://abc/images/ - validate part of the gcp bucket under 'images/`; * ais scrub gs://abc --prefix images/ - same as above. USAGE: - ais scrub [command options] [BUCKET[/PREFIX]] or [PROVIDER] + ais scrub [command options] [BUCKET[/PREFIX]] [PROVIDER] OPTIONS: - --refresh value time interval for continuous monitoring; can be also used to update progress bar (at a given interval); - valid time units: ns, us (or µs), ms, s (default), m, h - --count value used together with '--refresh' to limit the number of generated reports, e.g.: + --all-columns Show all columns, including those with only zero values + --cached Only visit in-cluster objects, i.e., objects from the respective remote bucket that are present ("cached") in the cluster + --count value Used together with '--refresh' to limit the number of generated reports, e.g.: '--refresh 10 --count 5' - run 5 times with 10s interval (default: 0) - --prefix value for each bucket, select only those objects (names) that start with the specified prefix, e.g.: - '--prefix a/b/c' - sum-up sizes of the virtual directory a/b/c and objects from the virtual directory - a/b that have names (relative to this directory) starting with the letter c - --page-size value maximum number of object names per page; when the flag is omitted or 0 (zero) - the maximum is defined by the corresponding backend; see also '--max-pages' and '--paged' (default: 0) - --limit value maximum number of object names to list (0 - unlimited; see also '--max-pages') - e.g.: 'ais ls gs://abc --limit 1234 --cached --props size,custom (default: 0) - --no-headers, -H display tables without headers - --max-pages value maximum number of pages to display (see also '--page-size' and '--limit') + --large-size value Count and report all objects that are larger or equal in size (e.g.: 4mb, 1MiB, 1048576, 128k; default: 5 GiB) + --limit value The maximum number of objects to list, get, or otherwise handle (0 - unlimited; see also '--max-pages'), + e.g.: + - 'ais ls gs://abc/dir --limit 1234 --cached --props size,custom,atime' - list no more than 1234 objects + - 'ais get gs://abc /dev/null --prefix dir --limit 1234' - get --/-- + - 'ais scrub gs://abc/dir --limit 1234' - scrub --/-- (default: 0) + --max-pages value Maximum number of pages to display (see also '--page-size' and '--limit') e.g.: 'ais ls az://abc --paged --page-size 123 --max-pages 7 (default: 0) - --non-recursive, --nr non-recursive operation, e.g.: - 'ais ls gs://bucket/prefix --nr' - list objects and/or virtual subdirectories with names starting with the specified prefix; - 'ais ls gs://bucket/prefix/ --nr' - list contained objects and/or immediately nested virtual subdirectories _without_ recursing into the latter; - 'ais prefetch s3://bck/abcd --nr' - prefetch a single named object (see 'ais prefetch --help' for details); - 'ais rmo gs://bucket/prefix --nr' - remove a single object with the specified name (see 'ais rmo --help' for details) - --small-size value count and report all objects that are smaller or equal in size (e.g.: 4, 4b, 1k, 128kib; default: 0) - --large-size value count and report all objects that are larger or equal in size (e.g.: 4mb, 1MiB, 1048576, 128k; default: 5 GiB) - --help, -h show help + --no-headers, -H Display tables without headers + --non-recursive, --nr Non-recursive operation, e.g.: + - 'ais ls gs://bucket/prefix --nr' - list objects and/or virtual subdirectories with names starting with the specified prefix; + - 'ais ls gs://bucket/prefix/ --nr' - list contained objects and/or immediately nested virtual subdirectories _without_ recursing into the latter; + - 'ais prefetch s3://bck/abcd --nr' - prefetch a single named object (see 'ais prefetch --help' for details); + - 'ais rmo gs://bucket/prefix --nr' - remove a single object with the specified name (see 'ais rmo --help' for details) + --page-size value Maximum number of object names per page; when the flag is omitted or 0 + the maximum is defined by the corresponding backend; see also '--max-pages' and '--paged' (default: 0) + --prefix value For each bucket, select only those objects (names) that start with the specified prefix, e.g.: + '--prefix a/b/c' - sum up sizes of the virtual directory a/b/c and objects from the virtual directory + a/b that have names (relative to this directory) starting with the letter c + --refresh value Time interval for continuous monitoring; can be also used to update progress bar (at a given interval); + valid time units: ns, us (or µs), ms, s (default), m, h + --small-size value Count and report all objects that are smaller or equal in size (e.g.: 4, 4b, 1k, 128kib; default: 0) + --help, -h Show help ``` Checks all objects of the bucket `BUCKET` and show number of misplaced objects, number of objects that have insufficient number of copies, etc. If optional arguments are omitted, show information about all in-cluster buckets. -### Example: validate a given (prefix-defined) portion of s3 bucket +### Example: validate a given prefix-defined portion of an s3 bucket ```console -$ ais storage validate s3://abc/birds +$ ais scrub s3://data/my-prefix --large-size 500k -Please wait, the operation may take some time... +BUCKET/PREFIX OBJECTS NOT-CACHED SMALL LARGE VER-CHANGED DELETED +s3://data/my-prefix 1637 (1.6GiB) 1465 (1.4GiB) - 172 (172.0MiB) 1 (1.0MiB) 1 (1.0MiB) -BUCKET OBJECTS MISPLACED MISSING COPIES ZERO SIZE 5+GB -s3://abc 84603 0 0 0 329 +Detailed Logs +------------- +* not-cached objects: /tmp/.ais-scrub-not-cached.204f71.log (1465 records) +* large objects: /tmp/.ais-scrub-large.204f71.log (172 records) +* ver-changed objects: /tmp/.ais-scrub-ver-changed.204f71.log (1 record) +* deleted objects: /tmp/.ais-scrub-deleted.204f71.log (1 record) ``` -### Example: same as above - -```console -$ ais storage validate s3://abc --prefix birds -``` +### Example: same as above but show all columns -### Example: validate all `ais://` buckets +In other words, include relevant metrics that have only zero values. ```console -$ ais storage validate ais +$ ais scrub s3://data/my-prefix --large-size 500k --all-columns -BUCKET OBJECTS MISPLACED MISSING COPIES ZERO SIZE 5+GB -ais://aa 12345 0 0 0 678 -ais://bb 67890 0 0 0 901 -... -... -``` +BUCKET/PREFIX OBJECTS NOT-CACHED MISPLACED(cluster) MISPLACED(mountpath) MISSING-COPIES SMALL LARGE VER-CHANGED DELETED +s3://data/my-prefix 1637 (1.6GiB) 1465 (1.4GiB) - - - - 172 (172.0MiB) 1 (1.0MiB) 1 (1.0MiB) +Detailed Logs +------------- +* not-cached objects: /tmp/.ais-scrub-not-cached.204f8c.log (1465 records) +* large objects: /tmp/.ais-scrub-large.204f8c.log (172 records) +* ver-changed objects: /tmp/.ais-scrub-ver-changed.204f8c.log (1 record) +* deleted objects: /tmp/.ais-scrub-deleted.204f8c.log (1 record) +``` +Note that 172 (records) = 1637 - 1465. ## Mountpath (and disk) management