|
1 | 1 | #include "Conditions/URLStreamer.h" |
2 | 2 |
|
3 | | -#include <string.h> |
4 | | -#include <sys/wait.h> |
5 | | -#include <unistd.h> |
| 3 | +#include <curl/curl.h> |
6 | 4 |
|
7 | 5 | #include <fstream> |
8 | | -#include <iostream> |
9 | 6 | #include <sstream> |
10 | 7 | #include <string> |
11 | 8 |
|
12 | 9 | #include "Framework/Exception/Exception.h" |
13 | 10 |
|
14 | 11 | namespace conditions { |
15 | 12 |
|
16 | | -static unsigned int http_requests_ = 0; |
17 | | -static unsigned int http_failures_ = 0; |
| 13 | +static unsigned int http_requests = 0; |
| 14 | +static unsigned int http_failures = 0; |
18 | 15 |
|
19 | | -void urlstatistics(unsigned int& http_requests, unsigned int& http_failures) { |
20 | | - http_requests = http_requests_; |
21 | | - http_failures = http_failures_; |
| 16 | +void urlstatistics(unsigned int& requests, unsigned int& failures) { |
| 17 | + requests = http_requests; |
| 18 | + failures = http_failures; |
| 19 | +} |
| 20 | + |
| 21 | +/** |
| 22 | + * Callback for libcurl to write received data into a std::string buffer. |
| 23 | + */ |
| 24 | +static size_t writeCallback(char* ptr, size_t size, size_t nmemb, |
| 25 | + void* userdata) { |
| 26 | + auto* buffer = static_cast<std::string*>(userdata); |
| 27 | + size_t total = size * nmemb; |
| 28 | + buffer->append(ptr, total); |
| 29 | + return total; |
22 | 30 | } |
23 | 31 |
|
24 | 32 | std::unique_ptr<std::istream> urlstream(const std::string& url) { |
25 | 33 | if (url.find("file://") == 0 || (url.length() > 0 && url[0] == '/')) { |
26 | 34 | std::string fname = url; |
27 | | - if (fname.find("file://") == 0) |
28 | | - fname = url.substr(url.find("file://") + strlen("file://")); |
29 | | - std::ifstream* fs = new std::ifstream(fname); |
| 35 | + if (fname.find("file://") == 0) fname = url.substr(7); |
| 36 | + auto fs = std::make_unique<std::ifstream>(fname); |
30 | 37 | if (!fs->good()) { |
31 | | - delete fs; |
32 | 38 | EXCEPTION_RAISE("ConditionsException", |
33 | 39 | "Unable to open CSV file '" + fname + "'"); |
34 | 40 | } |
35 | | - return std::unique_ptr<std::istream>(fs); |
| 41 | + return fs; |
36 | 42 | } |
37 | | - if ((url.find("http://") != std::string::npos) || |
38 | | - (url.find("https://") != std::string::npos)) { |
39 | | - http_requests_++; |
40 | | - // this implementation uses wget to handle the SSL processes |
41 | | - static int istream = 0; |
42 | | - char fname[250]; |
43 | | - snprintf(fname, 250, "/tmp/httpstream_%d_%d.csv ", getpid(), istream++); |
44 | | - pid_t apid = fork(); |
45 | | - if (apid == 0) { // child |
46 | | - execl("/usr/bin/wget", "wget", "-q", "--no-check-certificate", "-O", |
47 | | - fname, "-o", "/tmp/wget.log", url.c_str(), (char*)0); |
48 | | - } else { |
49 | | - int wstatus; |
50 | | - waitpid(apid, &wstatus, 0); |
51 | | - // std::cout << "EXITED: " << WIFEXITED(wstatus) << " STATUS: " << |
52 | | - // WEXITSTATUS(wstatus) << std::endl; |
53 | | - if (WIFEXITED(wstatus) != 1 || WEXITSTATUS(wstatus) != 0) { |
54 | | - http_failures_++; |
55 | | - EXCEPTION_RAISE("ConditionsException", |
56 | | - "Wget error " + std::to_string(WEXITSTATUS(wstatus)) + |
57 | | - " retreiving URL '" + url + "'"); |
58 | | - } |
| 43 | + if ((url.find("http://") == 0) || (url.find("https://") == 0)) { |
| 44 | + http_requests++; |
| 45 | + |
| 46 | + CURL* curl = curl_easy_init(); |
| 47 | + if (!curl) { |
| 48 | + http_failures++; |
| 49 | + EXCEPTION_RAISE("ConditionsException", |
| 50 | + "Failed to initialize libcurl for URL '" + url + "'"); |
59 | 51 | } |
60 | | - std::ifstream ib(fname); |
61 | | - if (ib.bad()) { |
62 | | - http_failures_++; |
| 52 | + |
| 53 | + std::string response_body; |
| 54 | + |
| 55 | + curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); |
| 56 | + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeCallback); |
| 57 | + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_body); |
| 58 | + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); |
| 59 | + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 1L); |
| 60 | + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 2L); |
| 61 | + curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 30L); |
| 62 | + curl_easy_setopt(curl, CURLOPT_TIMEOUT, 300L); |
| 63 | + |
| 64 | + CURLcode res = curl_easy_perform(curl); |
| 65 | + |
| 66 | + if (res != CURLE_OK) { |
| 67 | + curl_easy_cleanup(curl); |
| 68 | + http_failures++; |
| 69 | + EXCEPTION_RAISE("ConditionsException", |
| 70 | + "Curl error (" + std::string(curl_easy_strerror(res)) + |
| 71 | + ") retrieving URL '" + url + "'"); |
| 72 | + } |
| 73 | + |
| 74 | + long http_code = 0; |
| 75 | + curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code); |
| 76 | + curl_easy_cleanup(curl); |
| 77 | + |
| 78 | + if (http_code != 200) { |
| 79 | + http_failures++; |
63 | 80 | EXCEPTION_RAISE("ConditionsException", |
64 | | - "Bad/empty file retreiving URL '" + url + "'"); |
| 81 | + "HTTP error " + std::to_string(http_code) + |
| 82 | + " retrieving URL '" + url + "'"); |
65 | 83 | } |
66 | | - std::stringstream* ss = new std::stringstream(); |
67 | | - (*ss) << ib.rdbuf(); |
68 | | - // std::cout << "CONTENTS: \n" << ss->str(); |
69 | | - ib.close(); // needed for some implementations |
70 | | - std::remove(fname); |
71 | | - return std::unique_ptr<std::istream>(ss); |
| 84 | + |
| 85 | + auto ss = std::make_unique<std::stringstream>(std::move(response_body)); |
| 86 | + return ss; |
72 | 87 | } |
73 | 88 | EXCEPTION_RAISE("ConditionsException", "Unable to handle URL '" + url + "'"); |
74 | 89 | return std::unique_ptr<std::istream>(nullptr); |
|
0 commit comments