Skip to content

Commit 0d85ea9

Browse files
committed
feat: add --bfs option to mc mirror for layer-by-layer traversal in S3 client operations
closes minio#4873
1 parent e929f89 commit 0d85ea9

File tree

6 files changed

+187
-6
lines changed

6 files changed

+187
-6
lines changed

cmd/client-s3.go

+17
Original file line numberDiff line numberDiff line change
@@ -1839,6 +1839,10 @@ func (c *S3Client) listVersionsRoutine(ctx context.Context, b, o string, opts Li
18391839
buckets = append(buckets, b)
18401840
}
18411841

1842+
if opts.Prefix != "" {
1843+
o = opts.Prefix
1844+
}
1845+
18421846
for _, b := range buckets {
18431847
var skipKey string
18441848
for objectVersion := range c.api.ListObjects(ctx, b, minio.ListObjectsOptions{
@@ -2104,6 +2108,10 @@ func (c *S3Client) listIncompleteInRoutine(ctx context.Context, contentCh chan *
21042108
func (c *S3Client) listIncompleteRecursiveInRoutine(ctx context.Context, contentCh chan *ClientContent, opts ListOptions) {
21052109
// get bucket and object from URL.
21062110
b, o := c.url2BucketAndObject()
2111+
if opts.Prefix != "" {
2112+
o = opts.Prefix
2113+
}
2114+
21072115
switch {
21082116
case b == "" && o == "":
21092117
buckets, err := c.api.ListBuckets(ctx)
@@ -2243,6 +2251,7 @@ func (c *S3Client) objectInfo2ClientContent(bucket string, entry minio.ObjectInf
22432251
}
22442252
url.Path = c.buildAbsPath(bucket, entry.Key)
22452253
content.URL = url
2254+
content.ObjectKey = entry.Key
22462255
content.BucketName = bucket
22472256
content.Size = entry.Size
22482257
content.ETag = entry.ETag
@@ -2321,6 +2330,10 @@ func (c *S3Client) bucketStat(ctx context.Context, opts BucketStatOptions) (*Cli
23212330
func (c *S3Client) listInRoutine(ctx context.Context, contentCh chan *ClientContent, opts ListOptions) {
23222331
// get bucket and object from URL.
23232332
b, o := c.url2BucketAndObject()
2333+
if opts.Prefix != "" {
2334+
o = opts.Prefix
2335+
}
2336+
23242337
if opts.ListZip && (b == "" || o == "") {
23252338
contentCh <- &ClientContent{
23262339
Err: probe.NewError(errors.New("listing zip files must provide bucket and object")),
@@ -2385,6 +2398,10 @@ func sortBucketsNameWithSlash(bucketsInfo []minio.BucketInfo) {
23852398
func (c *S3Client) listRecursiveInRoutine(ctx context.Context, contentCh chan *ClientContent, opts ListOptions) {
23862399
// get bucket and object from URL.
23872400
b, o := c.url2BucketAndObject()
2401+
if opts.Prefix != "" {
2402+
o = opts.Prefix
2403+
}
2404+
23882405
switch {
23892406
case b == "" && o == "":
23902407
buckets, err := c.api.ListBuckets(ctx)

cmd/client-url.go

+8-4
Original file line numberDiff line numberDiff line change
@@ -190,10 +190,14 @@ func (u ClientURL) String() string {
190190
}
191191

192192
// urlJoinPath Join a path to existing URL.
193-
func urlJoinPath(url1, url2 string) string {
194-
u1 := newClientURL(url1)
195-
u2 := newClientURL(url2)
196-
return joinURLs(u1, u2).String()
193+
func urlJoinPath(base, element string) string {
194+
if strings.HasSuffix(base, "/") && strings.HasPrefix(element, "/") {
195+
return base + element[1:]
196+
}
197+
if !strings.HasSuffix(base, "/") && !strings.HasPrefix(element, "/") {
198+
return base + "/" + element
199+
}
200+
return base + element
197201
}
198202

199203
// url2Stat returns stat info for URL - supports bucket, object and a prefixe with or without a trailing slash

cmd/client.go

+2
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ type ListOptions struct {
108108
TimeRef time.Time
109109
ShowDir DirOpt
110110
Count int
111+
Prefix string // Add prefix support
111112
}
112113

113114
// CopyOptions holds options for copying operation
@@ -213,6 +214,7 @@ type Client interface {
213214
// ClientContent - Content container for content metadata
214215
type ClientContent struct {
215216
URL ClientURL
217+
ObjectKey string
216218
BucketName string // only valid and set for client-type objectStorage
217219
Time time.Time
218220
Size int64

cmd/difference.go

+153-1
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,153 @@ func getSourceModTimeKey(metadata map[string]string) string {
8282
return ""
8383
}
8484

85+
func layerDifference(ctx context.Context, sourceClnt, targetClnt Client, opts mirrorOptions) chan diffMessage {
86+
diffCh := make(chan diffMessage, 10000)
87+
88+
go func() {
89+
defer close(diffCh)
90+
91+
// Channels to feed items found by BFS into the difference engine
92+
srcClientCh := make(chan *ClientContent, 1000)
93+
tgtClientCh := make(chan *ClientContent, 1000)
94+
95+
// Goroutine to perform BFS on the source
96+
go func() {
97+
defer close(srcClientCh)
98+
// Queue for *relative object prefixes* to explore
99+
queue := []string{""} // "" represents the root prefix
100+
101+
for len(queue) > 0 {
102+
// Dequeue the next relative prefix
103+
prefix := queue[0]
104+
queue = queue[1:]
105+
106+
// List items at the current prefix level using the relative prefix
107+
listCtx, listCancel := context.WithCancel(ctx)
108+
contentsCh := sourceClnt.List(listCtx, ListOptions{
109+
Recursive: false, // List only the current level
110+
WithMetadata: opts.isMetadata,
111+
ShowDir: DirLast, // Ensure directories are listed
112+
Prefix: prefix, // Pass the relative prefix
113+
})
114+
115+
for content := range contentsCh {
116+
select {
117+
case <-ctx.Done():
118+
listCancel()
119+
return
120+
default:
121+
if content != nil && content.Err != nil {
122+
srcClientCh <- content
123+
listCancel()
124+
continue
125+
}
126+
if content == nil {
127+
continue
128+
}
129+
130+
// Send the valid content (file or directory) for comparison
131+
srcClientCh <- content
132+
133+
// If it's a directory, queue its *relative object key* for the next level
134+
if content.Type.IsDir() {
135+
relativeKey := content.ObjectKey // Get the relative key
136+
// Prevent infinite loops: don't re-queue the prefix we just listed,
137+
// especially the root ("") which might list itself as "/" depending on backend.
138+
// Also check if ObjectKey is populated.
139+
if relativeKey != "" && relativeKey != prefix {
140+
// Ensure the key ends with a separator if it's a directory prefix
141+
// The S3 ListObjects usually returns directory keys ending with '/'
142+
if !strings.HasSuffix(relativeKey, string(content.URL.Separator)) {
143+
// This case might indicate a non-standard directory representation, handle cautiously
144+
// For standard S3, common prefixes already end in '/'
145+
// If needed, append separator: relativeKey += string(content.URL.Separator)
146+
}
147+
// Add the relative key (prefix) to the queue
148+
queue = append(queue, relativeKey)
149+
}
150+
}
151+
}
152+
}
153+
listCancel()
154+
}
155+
}()
156+
157+
// Goroutine to perform BFS on the target (symmetric to the source)
158+
go func() {
159+
defer close(tgtClientCh)
160+
// Queue for *relative object prefixes*
161+
queue := []string{""}
162+
163+
for len(queue) > 0 {
164+
prefix := queue[0]
165+
queue = queue[1:]
166+
167+
listCtx, listCancel := context.WithCancel(ctx)
168+
contentsCh := targetClnt.List(listCtx, ListOptions{
169+
Recursive: false,
170+
WithMetadata: opts.isMetadata,
171+
ShowDir: DirLast,
172+
Prefix: prefix, // Pass the relative prefix
173+
})
174+
175+
for content := range contentsCh {
176+
select {
177+
case <-ctx.Done():
178+
listCancel()
179+
return
180+
default:
181+
if content != nil && content.Err != nil {
182+
tgtClientCh <- content
183+
listCancel()
184+
continue
185+
}
186+
if content == nil {
187+
continue
188+
}
189+
190+
tgtClientCh <- content
191+
192+
// If it's a directory, queue its *relative object key*
193+
if content.Type.IsDir() {
194+
relativeKey := content.ObjectKey
195+
if relativeKey != "" && relativeKey != prefix {
196+
// Ensure trailing slash if needed (usually present from S3 List)
197+
if !strings.HasSuffix(relativeKey, string(content.URL.Separator)) {
198+
// Handle non-standard directory representation if necessary
199+
}
200+
queue = append(queue, relativeKey)
201+
}
202+
}
203+
}
204+
}
205+
listCancel()
206+
}
207+
}()
208+
209+
// Comparison logic remains the same
210+
err := differenceInternal(
211+
sourceClnt.GetURL().String(),
212+
srcClientCh,
213+
targetClnt.GetURL().String(),
214+
tgtClientCh,
215+
opts,
216+
false, // returnSimilar is false
217+
diffCh,
218+
)
219+
220+
if err != nil {
221+
select {
222+
case <-ctx.Done():
223+
default:
224+
diffCh <- diffMessage{Error: err}
225+
}
226+
}
227+
}()
228+
229+
return diffCh
230+
}
231+
85232
// activeActiveModTimeUpdated tries to calculate if the object copy in the target
86233
// is older than the one in the source by comparing the modtime of the data.
87234
func activeActiveModTimeUpdated(src, dst *ClientContent) bool {
@@ -167,7 +314,12 @@ func bucketObjectDifference(ctx context.Context, sourceClnt, targetClnt Client)
167314
})
168315
}
169316

170-
func objectDifference(ctx context.Context, sourceClnt, targetClnt Client, opts mirrorOptions) (diffCh chan diffMessage) {
317+
func objectDifference(ctx context.Context, sourceClnt, targetClnt Client, opts mirrorOptions) chan diffMessage {
318+
if opts.bfs {
319+
// Use layer-by-layer difference for regular objects
320+
return layerDifference(ctx, sourceClnt, targetClnt, opts)
321+
}
322+
171323
sourceURL := sourceClnt.GetURL().String()
172324
sourceCh := sourceClnt.List(ctx, ListOptions{Recursive: true, WithMetadata: opts.isMetadata, ShowDir: DirNone})
173325

cmd/mirror-main.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,10 @@ var (
144144
Name: "skip-errors",
145145
Usage: "skip any errors when mirroring",
146146
},
147+
cli.BoolFlag{
148+
Name: "bfs",
149+
Usage: "using BFS for layer-by-layer traversal of files, suitable for large number of files",
150+
},
147151
checksumFlag,
148152
}
149153
)
@@ -212,7 +216,7 @@ EXAMPLES:
212216
{{.Prompt}} {{.HelpName}} --older-than 30d s3/test ~/test
213217
214218
13. Mirror server encrypted objects from Amazon S3 cloud storage to a bucket on Amazon S3 cloud storage
215-
{{.Prompt}} {{.HelpName}} --enc-c "minio/archive=MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDA" --enc-c "s3/archive=MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5BBB" s3/archive/ minio/archive/
219+
{{.Prompt}} {{.HelpName}} --enc-c "minio/archive=MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDA" --enc-c "s3/archive=MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5BBB" s3/archive/ minio/archive/
216220
217221
14. Update 'Cache-Control' header on all existing objects recursively.
218222
{{.Prompt}} {{.HelpName}} --attr "Cache-Control=max-age=90000,min-fresh=9000" myminio/video-files myminio/video-files
@@ -1024,6 +1028,7 @@ func runMirror(ctx context.Context, srcURL, dstURL string, cli *cli.Context, enc
10241028
userMetadata: userMetadata,
10251029
encKeyDB: encKeyDB,
10261030
activeActive: isWatch,
1031+
bfs: cli.Bool("bfs"),
10271032
}
10281033

10291034
// If we are not using active/active and we are not removing

cmd/mirror-url.go

+1
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ type mirrorOptions struct {
278278
userMetadata map[string]string
279279
checksum minio.ChecksumType
280280
sourceListingOnly bool
281+
bfs bool
281282
}
282283

283284
// Prepares urls that need to be copied or removed based on requested options.

0 commit comments

Comments
 (0)