Skip to content

Commit dba0bc4

Browse files
committed
fix(gateway): prevent blocked CIDs in CAR responses
The gateway was including blocked CIDs in CAR format responses, bypassing content filtering policies. The fix separates the DAGService usage in GetCAR: - nodeGetterToCarExporer continues wrapping for path resolution - Original dagService is now used for blockOpener during traversal - blockOpener returns traversal.SkipMe{} ONLY for blocked content - NotFound errors are properly propagated (not skipped) This ensures blocked content is filtered from CAR responses while properly failing when blocks are genuinely missing (broken DAG). Closes ipfs/kubo#10361
1 parent 15a5643 commit dba0bc4

File tree

3 files changed

+79
-10
lines changed

3 files changed

+79
-10
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ The following emojis are used to highlight certain changes:
2222

2323
### Fixed
2424

25+
- `gateway`: Fixed CAR responses including blocked content. The gateway now properly filters out blocked CIDs from CAR format responses, ensuring content filtering policies are enforced across all response formats. ([ipfs/kubo#10361](https://github.com/ipfs/kubo/issues/10361))
26+
2527
### Security
2628

2729

gateway/backend_blocks.go

Lines changed: 59 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -391,10 +391,14 @@ func (bb *BlocksBackend) GetCAR(ctx context.Context, p path.ImmutablePath, param
391391
return ContentPathMetadata{}, nil, err
392392
}
393393

394-
blockGetter := merkledag.NewDAGService(bb.blockService).Session(ctx)
395-
396-
blockGetter = &nodeGetterToCarExporer{
397-
ng: blockGetter,
394+
// Create a DAGService that uses the blocking-aware BlockService.
395+
// When a CID is blocked, the underlying BlockService returns an error.
396+
dagService := merkledag.NewDAGService(bb.blockService).Session(ctx)
397+
398+
// Wrap the DAGService to write blocks to CAR as they're fetched.
399+
// This wrapper is used by the path resolver to fetch intermediate blocks.
400+
blockGetter := &nodeGetterToCarExporer{
401+
ng: dagService,
398402
cw: cw,
399403
}
400404

@@ -434,10 +438,14 @@ func (bb *BlocksBackend) GetCAR(ctx context.Context, p path.ImmutablePath, param
434438
return
435439
}
436440

437-
blockGetter := merkledag.NewDAGService(bb.blockService).Session(ctx)
441+
// Create a DAGService that uses the blocking-aware BlockService.
442+
// When a CID is blocked, the underlying BlockService returns an error.
443+
dagService := merkledag.NewDAGService(bb.blockService).Session(ctx)
438444

439-
blockGetter = &nodeGetterToCarExporer{
440-
ng: blockGetter,
445+
// Wrap the DAGService to write blocks to CAR as they're fetched.
446+
// This wrapper is used by the path resolver to fetch intermediate blocks.
447+
blockGetter := &nodeGetterToCarExporer{
448+
ng: dagService,
441449
cw: cw,
442450
}
443451

@@ -447,7 +455,15 @@ func (bb *BlocksBackend) GetCAR(ctx context.Context, p path.ImmutablePath, param
447455

448456
lsys := cidlink.DefaultLinkSystem()
449457
unixfsnode.AddUnixFSReificationToLinkSystem(&lsys)
450-
lsys.StorageReadOpener = blockOpener(ctx, blockGetter)
458+
// CRITICAL: Use the original dagService for blockOpener, not the wrapped nodeGetterToCarExporer.
459+
// This separation ensures that:
460+
// 1. blockOpener checks if blocks are accessible (through blocking-aware BlockService)
461+
// 2. Only non-blocked content triggers CAR writing in nodeGetterToCarExporer
462+
// 3. Blocked content returns traversal.SkipMe{} and never gets written to CAR
463+
//
464+
// If we passed blockGetter (the wrapped nodeGetterToCarExporer) here instead,
465+
// it would write blocks to CAR immediately upon access, even if they're blocked.
466+
lsys.StorageReadOpener = blockOpener(ctx, dagService)
451467

452468
// First resolve the path since we always need to.
453469
lastCid, remainder, err := pathResolver.ResolveToLastNode(ctx, p)
@@ -784,6 +800,12 @@ func (bb *BlocksBackend) resolvePath(ctx context.Context, p path.Path) (path.Imm
784800
return imPath, remainder, nil
785801
}
786802

803+
// nodeGetterToCarExporer wraps a NodeGetter to write blocks to a CAR file as they are fetched.
804+
// This enables streaming CAR generation during DAG traversal.
805+
//
806+
// IMPORTANT: This wrapper is used for path resolution but NOT for the traversal's blockOpener.
807+
// The blockOpener uses the underlying dagService directly to ensure proper blocking checks
808+
// before any blocks are written to the CAR.
787809
type nodeGetterToCarExporer struct {
788810
ng format.NodeGetter
789811
cw storage.WritableCar
@@ -792,6 +814,7 @@ type nodeGetterToCarExporer struct {
792814
func (n *nodeGetterToCarExporer) Get(ctx context.Context, c cid.Cid) (format.Node, error) {
793815
nd, err := n.ng.Get(ctx, c)
794816
if err != nil {
817+
// Pass through all errors - blockOpener will handle them appropriately
795818
return nil, err
796819
}
797820

@@ -820,6 +843,19 @@ func (n *nodeGetterToCarExporer) GetMany(ctx context.Context, cids []cid.Cid) <-
820843
case outCh <- nd:
821844
case <-ctx.Done():
822845
}
846+
} else {
847+
// Handle errors from the underlying NodeGetter:
848+
// - NotFound errors: content doesn't exist, skip silently
849+
// - Blocked errors: content is blocked, skip silently
850+
// - Other errors: propagate to caller
851+
if !format.IsNotFound(nd.Err) && !isErrContentBlocked(nd.Err) {
852+
// Only pass through non-blocked errors
853+
select {
854+
case outCh <- nd:
855+
case <-ctx.Done():
856+
}
857+
}
858+
// For blocked/not found errors, we simply skip - don't send anything
823859
}
824860
}
825861
}()
@@ -909,15 +945,30 @@ func (n *nodeGetterFetcherSingleUseFactory) blankProgress(ctx context.Context) t
909945
}
910946
}
911947

948+
// blockOpener returns a function that loads blocks during CAR traversal.
949+
// It is used by the IPLD LinkSystem during the walkGatewaySimpleSelector traversal.
950+
//
951+
// When a blocked CID is encountered, it returns traversal.SkipMe{} which tells
952+
// the traversal to skip that branch of the DAG without failing the entire operation.
953+
// This allows generating CARs with partial content when some blocks are filtered.
912954
func blockOpener(ctx context.Context, ng format.NodeGetter) ipld.BlockReadOpener {
913955
return func(_ ipld.LinkContext, lnk ipld.Link) (io.Reader, error) {
914956
cidLink, ok := lnk.(cidlink.Link)
915957
if !ok {
916958
return nil, fmt.Errorf("invalid link type for loading: %v", lnk)
917959
}
918960

961+
// Attempt to fetch the block through the NodeGetter.
962+
// If using a blocking-aware BlockService, this returns an error for blocked CIDs.
919963
blk, err := ng.Get(ctx, cidLink.Cid)
920964
if err != nil {
965+
// Check if this block is blocked (not just missing)
966+
if isErrContentBlocked(err) {
967+
// Return traversal.SkipMe{} to gracefully skip blocked content.
968+
// The traversal continues with other accessible parts of the DAG.
969+
return nil, traversal.SkipMe{}
970+
}
971+
// Propagate all other errors including NotFound (broken DAG)
921972
return nil, err
922973
}
923974

gateway/errors.go

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,8 @@ func webError(w http.ResponseWriter, r *http.Request, c *Config, err error, defa
226226
case errors.Is(err, &cid.ErrInvalidCid{}):
227227
code = http.StatusBadRequest
228228
case isErrContentBlocked(err):
229+
// HTTP 410 Gone indicates the content has been permanently removed
230+
// due to content filtering/blocking policies
229231
code = http.StatusGone
230232
case isErrNotFound(err):
231233
code = http.StatusNotFound
@@ -279,9 +281,23 @@ func isErrNotFound(err error) bool {
279281
}
280282
}
281283

282-
// isErrContentBlocked returns true for content filtering system errors
284+
// isErrContentBlocked returns true for content filtering system errors.
285+
//
286+
// This function detects errors from nopfs (https://github.com/ipfs-shipyard/nopfs),
287+
// the content blocking system used by IPFS implementations.
288+
// When content is blocked, nopfs returns a StatusError with a specific message format.
289+
// We detect these errors by checking for the characteristic error message rather than
290+
// using type assertions to avoid pulling nopfs as a direct dependency.
291+
//
292+
// The blocking system returns HTTP 410 Gone for blocked content, indicating the content
293+
// has been intentionally made unavailable due to content filtering policies.
294+
//
295+
// TODO: When nopfs becomes a direct dependency, replace this string matching with proper
296+
// type assertion or errors.Is() for more robust error detection.
283297
func isErrContentBlocked(err error) bool {
284-
// TODO: we match error message to avoid pulling nopfs as a dependency
298+
// The nopfs StatusError.Error() returns messages in the format:
299+
// - "{cid} is blocked and cannot be provided" for blocked CIDs
300+
// - "{path} is blocked and cannot be provided" for blocked paths
285301
// Ref. https://github.com/ipfs-shipyard/nopfs/blob/cde3b5ba964c13e977f4a95f3bd8ca7d7710fbda/status.go#L87-L89
286302
return strings.Contains(err.Error(), "blocked and cannot be provided")
287303
}

0 commit comments

Comments
 (0)