Skip to content

Commit 4275b05

Browse files
bug fixes
1 parent b727828 commit 4275b05

File tree

5 files changed

+85
-65
lines changed

5 files changed

+85
-65
lines changed

cpp/FileMetadataInitializer.cpp

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,12 @@ void Snowflake::Client::FileMetadataInitializer::populateSrcLocUploadMetadata(st
6060
{
6161
// looking for files on disk.
6262
std::string srcLocationPlatform = m_stmtPutGet->UTF8ToPlatformString(sourceLocation);
63+
replaceStrAll(srcLocationPlatform, "/", std::string() + PATH_SEP);
6364
size_t dirSep = srcLocationPlatform.find_last_of(PATH_SEP);
6465
std::string basePath = srcLocationPlatform.substr(0, dirSep + 1);
6566

6667
std::vector<std::string> fileList;
67-
if (!listFilesRecursive(srcLocationPlatform, fileList))
68+
if (!listFiles(srcLocationPlatform, fileList))
6869
{
6970
CXX_LOG_ERROR("Failed on finding files for uploading.");
7071
return;
@@ -95,14 +96,15 @@ void Snowflake::Client::FileMetadataInitializer::includeSubfolderFilesRecursive(
9596
}
9697
}
9798

98-
bool Snowflake::Client::FileMetadataInitializer::listFilesRecursive(const std::string &sourceLocation,
99+
bool Snowflake::Client::FileMetadataInitializer::listFiles(const std::string &sourceLocation,
99100
std::vector<std::string> & fileList)
100101
{
101102
// looking for files on disk.
102103
std::string srcLocationPlatform = m_stmtPutGet->UTF8ToPlatformString(sourceLocation);
103104
size_t dirSep = srcLocationPlatform.find_last_of(PATH_SEP);
104105
std::string dirPath = srcLocationPlatform.substr(0, dirSep + 1);
105106
std::string filePattern = srcLocationPlatform.substr(dirSep + 1);
107+
bool includeSubfolder = filePattern == "**";
106108

107109
#ifdef _WIN32
108110
WIN32_FIND_DATA fdd;
@@ -132,7 +134,12 @@ bool Snowflake::Client::FileMetadataInitializer::listFilesRecursive(const std::s
132134
}
133135
else
134136
{
135-
includeSubfolderFilesRecursive(dirPath + fdd.cFileName, fileList);
137+
if (includeSubfolder &&
138+
(std::string(fdd.cFileName) != ".") &&
139+
(std::string(fdd.cFileName) != ".."))
140+
{
141+
includeSubfolderFilesRecursive(dirPath + fdd.cFileName, fileList);
142+
}
136143
}
137144
} while (FindNextFile(hFind, &fdd) != 0);
138145

@@ -162,7 +169,10 @@ bool Snowflake::Client::FileMetadataInitializer::listFilesRecursive(const std::s
162169
if (S_ISREG(fileStatus.st_mode)) {
163170
fileList.push_back(dirPath + dir_entry->d_name);
164171
}
165-
else if (S_ISDIR(fileStatus.st_mode))
172+
else if (includeSubfolder &&
173+
(S_ISDIR(fileStatus.st_mode)) &&
174+
(std::string(dir_entry->d_name) != ".") &&
175+
(std::string(dir_entry->d_name) != ".."))
166176
{
167177
includeSubfolderFilesRecursive(dirPath + dir_entry->d_name, fileList);
168178
}
@@ -201,8 +211,10 @@ void Snowflake::Client::FileMetadataInitializer::initCompressionMetadata(
201211
{
202212
// guess
203213
CXX_LOG_INFO("Auto detect on compression type");
214+
std::string srcFileNamePlatform = m_stmtPutGet->UTF8ToPlatformString(
215+
fileMetadata.srcFileName);
204216
fileMetadata.sourceCompression = FileCompressionType::guessCompressionType(
205-
m_stmtPutGet->UTF8ToPlatformString(fileMetadata.srcFileName));
217+
srcFileNamePlatform);
206218
}
207219
else if (!sf_strncasecmp(m_sourceCompression, COMPRESSION_NONE,
208220
sizeof(COMPRESSION_NONE)))
@@ -319,4 +331,29 @@ populateSrcLocDownloadMetadata(std::string &sourceLocation,
319331
return outcome;
320332
}
321333

334+
void Snowflake::Client::FileMetadataInitializer::
335+
replaceStrAll(std::string& stringToReplace,
336+
std::string const& oldValue,
337+
std::string const& newValue)
338+
{
339+
size_t oldValueLen = oldValue.length();
340+
size_t newValueLen = newValue.length();
341+
if (0 == oldValueLen)
342+
{
343+
return;
344+
}
345+
346+
size_t index = 0;
347+
while (true) {
348+
/* Locate the substring to replace. */
349+
index = stringToReplace.find(oldValue, index);
350+
if (index == std::string::npos) break;
351+
352+
/* Make the replacement. */
353+
stringToReplace.replace(index, oldValueLen, newValue);
354+
355+
/* Advance index forward so the next iteration doesn't pick it up as well. */
356+
index += newValueLen;
357+
}
358+
}
322359

cpp/FileMetadataInitializer.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,16 +42,17 @@ class FileMetadataInitializer
4242
static void replaceStrAll(std::string& stringToReplace, std::string const& oldValue,
4343
std::string const& newValue);
4444
/**
45-
* Given a source location, find all files recursively in subfolders that match the
46-
* location pattern. Utility function called from populateSrcLocUploadMetadata.
45+
* Given a source location, find all files match the partern, recursively include
46+
* all subfolders if the pattern is **
47+
* Utility function called from populateSrcLocUploadMetadata.
4748
*
4849
* @param sourceLocation The source location could have pattern at the end.
4950
* @param fileList Output the files with the full path.
5051
*
5152
* @return True when succeeded, false when no file matches with the source location.
5253
* @throw SnowflakeTransferException on unexpected error.
5354
*/
54-
bool listFilesRecursive(const std::string &sourceLocation, std::vector<std::string> & fileList);
55+
bool listFiles(const std::string &sourceLocation, std::vector<std::string> & fileList);
5556

5657
/**
5758
* Given a full path of a folder, add all files in the folder recursively including subfolders.

cpp/FileTransferAgent.cpp

Lines changed: 5 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -33,31 +33,6 @@ using ::Snowflake::Client::RemoteStorageRequestOutcome;
3333
namespace
3434
{
3535
const std::string FILE_PROTOCOL = "file://";
36-
37-
void replaceStrAll(std::string& stringToReplace,
38-
std::string const& oldValue,
39-
std::string const& newValue)
40-
{
41-
size_t oldValueLen = oldValue.length();
42-
size_t newValueLen = newValue.length();
43-
if (0 == oldValueLen)
44-
{
45-
return;
46-
}
47-
48-
size_t index = 0;
49-
while (true) {
50-
/* Locate the substring to replace. */
51-
index = stringToReplace.find(oldValue, index);
52-
if (index == std::string::npos) break;
53-
54-
/* Make the replacement. */
55-
stringToReplace.replace(index, oldValueLen, newValue);
56-
57-
/* Advance index forward so the next iteration doesn't pick it up as well. */
58-
index += newValueLen;
59-
}
60-
}
6136
}
6237

6338
Snowflake::Client::FileTransferAgent::FileTransferAgent(
@@ -471,7 +446,7 @@ RemoteStorageRequestOutcome Snowflake::Client::FileTransferAgent::uploadSingleFi
471446

472447
// after compress replace PATH_SEP with / in destPath as that's
473448
// what needed on stage side
474-
replaceStrAll(fileMetadata->destPath, std::string() + PATH_SEP, "/");
449+
FileMetadataInitializer::replaceStrAll(fileMetadata->destPath, std::string() + PATH_SEP, "/");
475450
fileMetadata->destFileName = fileMetadata->destPath + fileMetadata->destFileName;
476451

477452
CXX_LOG_TRACE("Update File digest metadata start");
@@ -624,7 +599,7 @@ void Snowflake::Client::FileTransferAgent::compressSourceFile(
624599
if (!fileMetadata->destPath.empty())
625600
{
626601
std::string subfolder = fileMetadata->destPath;
627-
replaceStrAll(subfolder, "/", std::string() + PATH_SEP);
602+
FileMetadataInitializer::replaceStrAll(subfolder, "/", std::string() + PATH_SEP);
628603
std::string subfolderPlatform = m_stmtPutGet->UTF8ToPlatformString(subfolder);
629604
subfolder = std::string(tempDir) + subfolder;
630605
subfolderPlatform = std::string(tempDir) + subfolderPlatform;
@@ -857,7 +832,7 @@ RemoteStorageRequestOutcome Snowflake::Client::FileTransferAgent::downloadSingle
857832
RemoteStorageRequestOutcome outcome = RemoteStorageRequestOutcome::FAILED;
858833

859834
// create subfolder first befor adding file name
860-
replaceStrAll(fileMetadata->destPath, "/", std::string() + PATH_SEP);
835+
FileMetadataInitializer::replaceStrAll(fileMetadata->destPath, "/", std::string() + PATH_SEP);
861836
fileMetadata->destPath = std::string(response.localLocation) + PATH_SEP +
862837
fileMetadata->destPath;
863838
std::string destPathPlatform = m_stmtPutGet->UTF8ToPlatformString(fileMetadata->destPath);
@@ -934,7 +909,7 @@ void Snowflake::Client::FileTransferAgent::getPresignedUrlForUploading(
934909
// need to replace file://mypath/myfile?.csv with file://mypath/myfile1.csv.gz
935910
std::string presignedUrlCommand = command;
936911
std::string localFilePath = getLocalFilePathFromCommand(command, false);
937-
replaceStrAll(presignedUrlCommand, localFilePath, fileMetadata.destFileName);
912+
FileMetadataInitializer::replaceStrAll(presignedUrlCommand, localFilePath, fileMetadata.destFileName);
938913
// then hand that to GS to get the actual presigned URL we'll use
939914
PutGetParseResponse rsp;
940915
if (!m_stmtPutGet->parsePutGetCommand(&presignedUrlCommand, &rsp))
@@ -980,7 +955,7 @@ std::string Snowflake::Client::FileTransferAgent::getLocalFilePathFromCommand(
980955
// unescape backslashes to match the file name from GS
981956
if (unescape)
982957
{
983-
replaceStrAll(localFilePath, "\\\\\\\\", "\\\\");
958+
FileMetadataInitializer::replaceStrAll(localFilePath, "\\\\\\\\", "\\\\");
984959
}
985960
}
986961
else

tests/test_simple_put.cpp

Lines changed: 32 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,11 @@ void test_simple_put_core(const char * fileName,
169169

170170
std::string dataDir = TestSetup::getDataDir();
171171
std::string file = dataDir + fileName;
172+
if (uploadFolder)
173+
{
174+
file += PATH_SEP;
175+
file += "**";
176+
}
172177
std::string putCommand = "put file://" + file + " @%test_small_put";
173178
if (testUnicode)
174179
{
@@ -927,34 +932,33 @@ void test_putget_subfolder_core(bool testUnicode)
927932

928933
// use get command to have local files in nested subfolder
929934
std::string cmd = "get '@%test_small_put/" + filename + ".gz' ";
930-
std::string sub1 = "sub1";
931-
std::string sub1Platform = sub1;
932-
std::string localPath1 = "file://";
935+
std::string sub2 = "sub2";
936+
std::string localPath = "file://";
937+
938+
localPath += dataDir + "subfoldertest" + PATH_SEP + "sub1";
939+
test_simple_get_data((cmd + localPath).c_str(), "48", 0, testUnicode, sf);
940+
933941
if (testUnicode)
934942
{
935-
sub1 += UTF8_STR;
936-
sub1Platform += PLATFORM_STR;
943+
sub2 += UTF8_STR;
937944
}
938-
localPath1 += dataDir + sub1;
939-
std::string localPath2 = localPath1 + PATH_SEP + "sub2";
945+
localPath += PATH_SEP;
946+
localPath += sub2;
940947
if (testUnicode)
941948
{
942-
localPath1 = "'" + localPath1 + "'";
943-
localPath2 = "'" + localPath2 + "'";
944-
replaceInPlace(localPath1, "\\", "\\\\");
945-
replaceInPlace(localPath2, "\\", "\\\\");
949+
localPath = "'" + localPath + "'";
950+
replaceInPlace(localPath, "\\", "\\\\");
946951
}
947-
test_simple_get_data((cmd + localPath1).c_str(), "48", 0, testUnicode, sf);
948-
test_simple_get_data((cmd + localPath2).c_str(), "48", 0, testUnicode, sf);
952+
test_simple_get_data((cmd + localPath).c_str(), "48", 0, testUnicode, sf);
949953

950954
// the files returned in alphabetical order, use set to get expected files sorted
951955
std::set<std::string> expectedFiles;
952956
// put command to upload files with subfolders to stage
953-
// put with createSubfolder=true so all the files in "subfolder"
954-
// which would be easier to check later
955-
// uploadFolder=true to skip filename checking
957+
// uploadFolder=true
958+
// createSubfolder=true as well so the uploaded files will be in path of
959+
// "subfolder" which would be easier to check later
956960
std::string basePath = "subfolder/";
957-
test_simple_put_core(sub1.c_str(), // filename
961+
test_simple_put_core("subfoldertest", // filename
958962
"auto", //source compression
959963
true, // auto compress
960964
false, // copyUploadFile
@@ -973,15 +977,17 @@ void test_putget_subfolder_core(bool testUnicode)
973977
testUnicode, // testUnicode
974978
true // uploadFolder
975979
);
976-
expectedFiles.insert(basePath + sub1 + "/" + filename + ".gz");
977-
expectedFiles.insert(basePath + sub1 + "/sub2/" + filename + ".gz");
980+
expectedFiles.insert(basePath + "sub1/" + filename + ".gz");
981+
expectedFiles.insert(basePath + "sub1/" + sub2 + "/" + filename + ".gz");
978982

979983
// get command to download uploaded files into a new folder sub3
980-
cmd = "get '@%test_small_put/subfolder/" + sub1 + "' file://" + dataDir + "sub3";
984+
cmd = "get @%test_small_put/subfolder file://" +
985+
dataDir + "subfoldertest" + PATH_SEP + "put2" + PATH_SEP + "sub3";
981986
test_simple_get_data(cmd.c_str(), "48", 0, testUnicode, sf);
982987

983988
// put command to upload downloaded file again
984-
test_simple_put_core("sub3", // filename
989+
std::string put2Path = std::string("subfoldertest") + PATH_SEP + "put2";
990+
test_simple_put_core(put2Path.c_str(), // filename
985991
"auto", //source compression
986992
true, // auto compress
987993
false, // copyUploadFile
@@ -1000,8 +1006,8 @@ void test_putget_subfolder_core(bool testUnicode)
10001006
testUnicode, // testUnicode
10011007
true // uploadFolder
10021008
);
1003-
expectedFiles.insert(basePath + "sub3/subfolder/" + sub1 + "/" + filename + ".gz");
1004-
expectedFiles.insert(basePath + "sub3/subfolder/" + sub1 + "/sub2/" + filename + ".gz");
1009+
expectedFiles.insert(basePath + "sub3/subfolder/sub1/" + filename + ".gz");
1010+
expectedFiles.insert(basePath + "sub3/subfolder/sub1/" + sub2 + "/" + filename + ".gz");
10051011

10061012
// run list command to check result
10071013
SF_STMT *sfstmt = NULL;
@@ -1029,8 +1035,7 @@ void test_putget_subfolder_core(bool testUnicode)
10291035

10301036
snowflake_stmt_term(sfstmt);
10311037
snowflake_term(sf);
1032-
remove_all(dataDir + sub1Platform);
1033-
remove_all(dataDir + "sub3");
1038+
remove_all(dataDir + "subfoldertest");
10341039
}
10351040

10361041
void test_putget_subfolder(void **unused)
@@ -1834,6 +1839,8 @@ int main(void) {
18341839
}
18351840

18361841
const struct CMUnitTest tests[] = {
1842+
cmocka_unit_test_teardown(test_putget_subfolder, teardown),
1843+
cmocka_unit_test_teardown(test_putget_subfolder_with_unicode, teardown),
18371844
cmocka_unit_test_teardown(test_simple_put_auto_compress, teardown),
18381845
cmocka_unit_test_teardown(test_simple_put_config_temp_dir, teardown),
18391846
cmocka_unit_test_teardown(test_simple_put_auto_detect_gzip, teardown),
@@ -1865,8 +1872,6 @@ int main(void) {
18651872
cmocka_unit_test_teardown(test_simple_put_with_noproxy_fromenv, teardown),
18661873
cmocka_unit_test_teardown(test_upload_file_to_stage_using_stream, donothing),
18671874
cmocka_unit_test_teardown(test_put_get_with_unicode, teardown),
1868-
cmocka_unit_test_teardown(test_putget_subfolder_with_unicode, teardown),
1869-
cmocka_unit_test_teardown(test_putget_subfolder, teardown),
18701875
};
18711876
int ret = cmocka_run_group_tests(tests, gr_setup, gr_teardown);
18721877
return ret;

tests/test_unit_file_metadata_init.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ void test_file_pattern_match_core(std::vector<std::string> *expectedFiles,
101101
for (auto i = expectedFiles->begin(); i != expectedFiles->end(); i++)
102102
{
103103
std::string expectedFileFull = testDir + *i;
104+
// the src file path would use platform path separator
105+
replaceStrAll(expectedFileFull, "/", std::string() + PATH_SEP);
104106
expectedFilesFull.insert(expectedFileFull);
105107
}
106108

0 commit comments

Comments
 (0)