Skip to content
This repository was archived by the owner on May 30, 2025. It is now read-only.

[HTTPClient] Support HTTP Cache via SeekableRequestReadStream #17

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion components/ByteStream/ReadStream/BaseByteReadStream.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

abstract class BaseByteReadStream implements ByteReadStream {

const CHUNK_SIZE = 100;// 64 * 1024;
const CHUNK_SIZE = 64 * 1024;

protected $buffer_size = 2048;

Expand Down
192 changes: 143 additions & 49 deletions components/HttpClient/ByteStream/SeekableRequestReadStream.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,67 +5,115 @@
use WordPress\ByteStream\FileReadWriteStream;
use WordPress\ByteStream\ReadStream\BaseByteReadStream;
use WordPress\ByteStream\ReadStream\ByteReadStream;
use WordPress\HttpClient\CacheEntry;
use WordPress\HttpClient\CachePolicy;
use WordPress\HttpClient\FilesystemCache;
use WordPress\HttpClient\Request;

/**
* HTTP reader that can seek() within the stream.
* HTTP reader that can seek() within the stream with transparent persistent caching.
*
* Downloaded bytes are stored in a temporary file. All the read operations are delegated to that file.
*
* – Seek()-ing forward is done by fetching all the bytes up to the target offset.
* – Seek()-ing backwards is done by seeking within the temporary file.
* Behaviour changes compared to the original implementation:
* – Before issuing the HTTP request, it checks a on-disk cache (FilesystemCache).
* If a fresh 200 response is found (per CachePolicy) the body is replayed with
* zero network overhead.
* – After a successful complete download of a cache-able 200 response, the body
* and headers are written to the cache. Partial / cut-off downloads are never
* stored, preventing corrupted bodies from being reused in the future.
*/
class SeekableRequestReadStream implements ByteReadStream {

private $remote; // RequestReadStream
private $cache; // FileReadWriteStream
private $temp;
private $request;
/** @var RequestReadStream|null */
private $remote;
/** @var FileReadWriteStream */
private $cacheStream; // local working file (either cached body or tmp during download)

/** @var FilesystemCache */
private $store;
/** @var CacheEntry|null */
private $cacheEntry = null;
/** @var bool */
private $cacheHit = false;
/** @var bool */
private $stored = false; // did we write the cache already?

private $length_resolved = false;

public function __construct( $request, array $options = array() ) {
if ( is_string( $request ) ) {
$request = new Request( $request );
}

$this->request = $request;
$url = $request->url;
$cache_dir = $options['cache_dir'] ?? sys_get_temp_dir() . '/wp_http_cache2';
$this->store = new FilesystemCache( $cache_dir );

// Cache lookup
$hit = $this->store->lookup( $url );
if ( $hit && CachePolicy::is_fresh( $hit ) && $hit->status === 200 ) {
// Cache hit – serve from cache
$this->cacheHit = true;
$this->cacheEntry = $hit;
$this->cacheStream = FileReadWriteStream::from_path( $this->store->get_body_path( $url ) );

return;
}

// Cache miss – fetch remotely
$this->remote = new RequestReadStream( $request, $options );
$this->temp = $options['cache_path'] ?? tempnam( sys_get_temp_dir(), 'wp_http_cache_' );
$this->cache = FileReadWriteStream::from_path( $this->temp, true );
}

/* ------------------------------------------------ internal helpers */

private function pipe_until( int $offset ): void {
while ( $this->cache->length() === null || $this->cache->length() < $offset ) {
$pulled = $this->remote->pull( BaseByteReadStream::CHUNK_SIZE );
if ( $this->cacheHit ) {
return; // data already fully available locally
}
while ( !$this->cacheStream || $this->cacheStream->length() === null || $this->cacheStream->length() < $offset ) {
$pulled = $this->pull_remote();
if ( 0 === $pulled ) {
break;
}
$this->cache->append_bytes( $this->remote->consume( $pulled ) );
$this->cacheStream->append_bytes( $this->remote->consume( $pulled ) );
}
}

private function pull_remote() {
$pulled = $this->remote->pull( BaseByteReadStream::CHUNK_SIZE );
/**
* Wait for the first response bytes from the remote server to
* ensure we're caching the terminal response after all the
* redirects have been followed.
*/
if ( $pulled > 0 && !$this->cacheStream ) {
$latest_request = $this->request->latest_redirect();
$this->cacheStream = FileReadWriteStream::from_path(
$this->store->get_body_path( $latest_request->url ),
true
);
}
return $pulled;
}

/* ------------------------------------------------ interface ByteReadStream */

public function length(): ?int {
if ( ! $this->cacheStream ) {
return $this->remote->length();
}

if ( $this->cacheHit ) {
return $this->cacheStream->length();
}

if ( ! $this->length_resolved && null === $this->remote->length() ) {
/**
* Wait for the remote headers before returning the length.
*
* This is an inconsistency between RequestReadStream::length():
*
* * RequestReadStream returns null until the remote headers are known.
* * SeekableRequestReadStream proactively waits for the remote headers.
*
* That's because:
*
* * RequestReadStream class is a lower-level utility where we simply
* expose what's available at the moment. The developer is responsible
* for awaiting the response headers.
* * SeekableRequestReadStream is a higher-level tool meant for usage
* when knowing the length is vital, e.g. reading from a remote ZIP file.
*/
$this->remote->await_response();
if ( null === $this->remote->length() ) {
// The server did not send the Content-Length header.
// We need to consume the entire stream to infer the length.
$position = $this->tell();
$pos = $this->tell();
$this->consume_all();
$this->seek( $position );
$this->seek( $pos );
}
$this->length_resolved = true;
}
Expand All @@ -74,54 +122,100 @@ public function length(): ?int {
}

public function tell(): int {
return $this->cache->tell();
if ( ! $this->cacheStream ) {
return $this->remote->tell();
}
return $this->cacheStream->tell();
}

public function seek( int $offset ) {
$this->pipe_until( $offset );
$this->cache->seek( $offset );
$this->cacheStream->seek( $offset );
}

public function reached_end_of_data(): bool {
return $this->remote->reached_end_of_data() && $this->cache->reached_end_of_data();
if ( $this->cacheHit ) {
return $this->cacheStream->reached_end_of_data();
}

$ended = $this->remote->reached_end_of_data() && $this->cacheStream->reached_end_of_data();
if ( $ended ) {
$this->maybe_store_cache();
}

return $ended;
}

public function pull( $n, $mode = self::PULL_NO_MORE_THAN ): int {
$this->pipe_until( $this->tell() + $n );

return $this->cache->pull( $n, $mode );
return $this->cacheStream->pull( $n, $mode );
}

public function peek( $n ): string {
$this->pipe_until( $this->tell() + $n );

return $this->cache->peek( $n );
return $this->cacheStream->peek( $n );
}

public function consume( $n ): string {
return $this->cache->consume( $n );
return $this->cacheStream->consume( $n );
}

public function consume_all(): string {
while ( ! $this->remote->reached_end_of_data() ) {
$pulled = $this->remote->pull( BaseByteReadStream::CHUNK_SIZE );
if ( 0 === $pulled ) {
break;
if ( ! $this->cacheHit ) {
while ( ! $this->remote->reached_end_of_data() ) {
$pulled = $this->remote->pull( BaseByteReadStream::CHUNK_SIZE );
if ( 0 === $pulled ) {
break;
}
$this->cacheStream->append_bytes( $this->remote->consume( $pulled ) );
}
$this->cache->append_bytes( $this->remote->consume( $pulled ) );
$this->cacheStream->close_writing();
$this->maybe_store_cache();
}
$this->cache->close_writing();

return $this->cache->consume_all();
return $this->cacheStream->consume_all();
}

/**
* Returns the HTTP response associated with this stream.
* For cache hits a synthetic Response object is created from the stored metadata.
*/
public function await_response() {
if ( $this->cacheHit && $this->cacheEntry ) {
return $this->cacheEntry->to_response( $this->request );
}

return $this->remote->await_response();
}

public function close_reading(): void {
$this->remote->close_reading();
$this->cache->close_reading();
@unlink( $this->temp );
$this->maybe_store_cache();
if ( $this->remote ) {
$this->remote->close_reading();
}
$this->cacheStream->close_reading();
}

private function maybe_store_cache(): void {
if ( $this->cacheHit || $this->stored || ! $this->remote ) {
return; // nothing to do
}
if ( ! $this->remote->reached_end_of_data() ) {
return; // not finished – do not store partial responses
}

$response = $this->remote->await_response();
if ( $response->status_code !== 200 ) {
return; // only store 200 OK bodies
}
if ( ! CachePolicy::response_is_cacheable( $response ) ) {
return; // respect Cache-Control / Expires rules
}

$this->store->commit( $response );
$this->stored = true;
}

}
43 changes: 43 additions & 0 deletions components/HttpClient/CacheEntry.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,47 @@ final class CacheEntry {
* @var string|null
*/
public $last_modified;
/**
* @var string|null Vary header value from the response that determined the cache entry variant.
*/
public $vary;
/**
* @var array<string, string|null>|null Stores the original request header values for every header listed in `Vary`.
* This allows later look-ups to verify the variant matches the current request.
*/
public $vary_headers;

static public function from_response( Response $response ) {
$entry = new self();
$entry->url = $response->request->url;
$entry->status = $response->status_code;
$entry->headers = $response->headers;
$entry->stored_at = time();
$entry->etag = $response->get_header( 'etag' );
$entry->last_modified = $response->get_header( 'last-modified' );

// Capture Vary header information to support correct cache variant matching.
$vary_header = $response->get_header( 'vary' );
$entry->vary = $vary_header;
if ( null !== $vary_header && trim( $vary_header ) !== '*' ) {
$vary_headers = [];
foreach ( explode( ',', $vary_header ) as $h ) {
$h = strtolower( trim( $h ) );
if ( '' === $h ) {
continue;
}
$vary_headers[ $h ] = $response->request->get_header( $h ) ?? null;
}
$entry->vary_headers = $vary_headers;
}

return $entry;
}

public function to_response( Request $request ) {
$response = new Response( $request );
$response->status_code = $this->status;
$response->headers = $this->headers;
return $response;
}
}
34 changes: 0 additions & 34 deletions components/HttpClient/CacheStorage.php

This file was deleted.

4 changes: 4 additions & 0 deletions components/HttpClient/Client.php
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,10 @@ protected function receive_response_body( $requests ) {
*/
protected function handle_redirects( $requests ) {
foreach ( $requests as $request ) {
if ( ! $request->client_can_follow_redirects ) {
continue;
}

$response = $request->response;
if ( ! $response ) {
continue;
Expand Down
Loading
Loading