From 858517966f022c4487338bafbb98a49eb8c8a235 Mon Sep 17 00:00:00 2001 From: Thomas Johnson Date: Wed, 21 May 2025 10:09:03 +0200 Subject: [PATCH 1/3] Adds an initial implementation of http_head --- src/http_client_extension.cpp | 60 ++++++++++++++++++++++++++++++++++- test/sql/httpclient.test | 25 +++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/src/http_client_extension.cpp b/src/http_client_extension.cpp index 6680c9b..e7fa243 100644 --- a/src/http_client_extension.cpp +++ b/src/http_client_extension.cpp @@ -156,6 +156,61 @@ static int ConvertListEntryToMap(const list_entry_t& list_entry, const duckdb::V return result.size(); } + + +std::string headers_to_string(const duckdb_httplib_openssl::Headers& headers) { + std::string result = "{"; + + for (const auto& [key, value] : headers) { + // Convert ci string to regular string by converting to lowercase in-place + std::string lower_key = key; + std::transform(lower_key.begin(), lower_key.end(), lower_key.begin(), + [](unsigned char c){ return std::tolower(c); }); + + result += "\"" + escape_json(lower_key) + "\":\"" + escape_json(value) + "\","; + } + + if (result.length() > 1) { + result.pop_back(); // Remove trailing comma + } + result += "}"; + + return result; +} + + +static void HTTPHeadRequestFunction(DataChunk &args, ExpressionState &state, Vector &result) { + D_ASSERT(args.data.size() == 1); + + UnaryExecutor::Execute(args.data[0], result, args.size(), [&](string_t input) { + std::string url = input.GetString(); + + // Use helper to setup client and parse URL + auto client_and_path = SetupHttpClient(url); + auto &client = client_and_path.first; + auto &path = client_and_path.second; + + // Make the GET request + auto res = client.Head(path.c_str()); + auto headers = headers_to_string(res->headers); + if (res) { + std::string response = StringUtil::Format( + "{ \"status\": %i, \"reason\": \"%s\", \"headers\": \"%s\" }", + res->status, + escape_json(res->reason), + escape_json(headers) + ); + return StringVector::AddString(result, response); + } else { + std::string response = StringUtil::Format( + "{ \"status\": %i, \"reason\": \"%s\", \"headers\": \"%s\" }", + -1, GetHttpErrorMessage(res, "HEAD"), "" + ); + return StringVector::AddString(result, response); + } + }); +} + static void HTTPGetRequestFunction(DataChunk &args, ExpressionState &state, Vector &result) { D_ASSERT(args.data.size() == 1); @@ -303,6 +358,10 @@ static void HTTPPostFormRequestFunction(DataChunk &args, ExpressionState &state, static void LoadInternal(DatabaseInstance &instance) { + ScalarFunctionSet http_head("http_head"); + http_head.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::JSON(), HTTPHeadRequestFunction)); + ExtensionUtil::RegisterFunction(instance, http_head); + ScalarFunctionSet http_get("http_get"); http_get.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::JSON(), HTTPGetRequestFunction)); http_get.AddFunction(ScalarFunction( @@ -358,4 +417,3 @@ DUCKDB_EXTENSION_API const char *http_client_version() { #ifndef DUCKDB_EXTENSION_MAIN #error DUCKDB_EXTENSION_MAIN not defined #endif - diff --git a/test/sql/httpclient.test b/test/sql/httpclient.test index 2631ebe..3ba0141 100644 --- a/test/sql/httpclient.test +++ b/test/sql/httpclient.test @@ -199,3 +199,28 @@ FROM ---- 200 OK 10 +# Confirm the HEAD function works +query III +WITH __input AS ( + SELECT + http_head( + 'https://httpbin.org/delay/0' + ) AS res +), +__response AS ( + SELECT + (res->>'status')::INT AS status, + (res->>'reason') AS reason, + unnest( from_json(((res->>'headers')::JSON), '{"connection": "VARCHAR"}') ) AS features + FROM + __input +) +SELECT + __response.status, + __response.reason, + __response.connection AS connection +FROM + __response +; +---- +200 OK close From 9031ae552e9bfecf7b9d6b2f71933fbcbf165a7b Mon Sep 17 00:00:00 2001 From: Thomas Johnson Date: Wed, 21 May 2025 11:00:32 +0200 Subject: [PATCH 2/3] Fixes compiler warning and seg fault due to missing headers on failed requests --- src/http_client_extension.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/http_client_extension.cpp b/src/http_client_extension.cpp index e7fa243..4d30405 100644 --- a/src/http_client_extension.cpp +++ b/src/http_client_extension.cpp @@ -161,7 +161,10 @@ static int ConvertListEntryToMap(const list_entry_t& list_entry, const duckdb::V std::string headers_to_string(const duckdb_httplib_openssl::Headers& headers) { std::string result = "{"; - for (const auto& [key, value] : headers) { + for (const auto& pair : headers) { + const std::string& key = pair.first; + const std::string& value = pair.second; + // Convert ci string to regular string by converting to lowercase in-place std::string lower_key = key; std::transform(lower_key.begin(), lower_key.end(), lower_key.begin(), @@ -192,8 +195,8 @@ static void HTTPHeadRequestFunction(DataChunk &args, ExpressionState &state, Vec // Make the GET request auto res = client.Head(path.c_str()); - auto headers = headers_to_string(res->headers); if (res) { + auto headers = headers_to_string(res->headers); std::string response = StringUtil::Format( "{ \"status\": %i, \"reason\": \"%s\", \"headers\": \"%s\" }", res->status, From 796118832cffb9a381746ca4342adb14c6fdf989 Mon Sep 17 00:00:00 2001 From: Thomas Johnson Date: Wed, 21 May 2025 13:05:29 +0200 Subject: [PATCH 3/3] Cleans up comments --- src/http_client_extension.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/http_client_extension.cpp b/src/http_client_extension.cpp index 4d30405..c0a3dbb 100644 --- a/src/http_client_extension.cpp +++ b/src/http_client_extension.cpp @@ -165,7 +165,6 @@ std::string headers_to_string(const duckdb_httplib_openssl::Headers& headers) { const std::string& key = pair.first; const std::string& value = pair.second; - // Convert ci string to regular string by converting to lowercase in-place std::string lower_key = key; std::transform(lower_key.begin(), lower_key.end(), lower_key.begin(), [](unsigned char c){ return std::tolower(c); }); @@ -193,7 +192,7 @@ static void HTTPHeadRequestFunction(DataChunk &args, ExpressionState &state, Vec auto &client = client_and_path.first; auto &path = client_and_path.second; - // Make the GET request + // Make the HEAD request auto res = client.Head(path.c_str()); if (res) { auto headers = headers_to_string(res->headers);