Skip to content

Commit e3a3695

Browse files
Support directories in PUT/GET
1 parent eb00a91 commit e3a3695

File tree

4 files changed

+362
-67
lines changed

4 files changed

+362
-67
lines changed

cpp/FileMetadataInitializer.cpp

Lines changed: 67 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "snowflake/platform.h"
99
#include "snowflake/SnowflakeTransferException.hpp"
1010
#include <cerrno>
11+
#include "boost/filesystem.hpp"
1112

1213
#define COMPRESSION_AUTO "AUTO"
1314
#define COMPRESSION_AUTO_DETECT "AUTO_DETECT"
@@ -20,7 +21,7 @@
2021
#include <fnmatch.h>
2122
#endif
2223

23-
24+
using namespace boost::filesystem;
2425

2526
Snowflake::Client::FileMetadataInitializer::FileMetadataInitializer(
2627
std::vector<FileMetadata> &smallFileMetadata,
@@ -34,16 +35,16 @@ Snowflake::Client::FileMetadataInitializer::FileMetadataInitializer(
3435
}
3536

3637
void
37-
Snowflake::Client::FileMetadataInitializer::initUploadFileMetadata(const std::string &fileDir, const char *fileName,
38+
Snowflake::Client::FileMetadataInitializer::initUploadFileMetadata(const std::string &fileNameFull,
39+
const std::string &destPath,
40+
const std::string &fileName,
3841
size_t fileSize, size_t threshold)
3942
{
40-
std::string fileNameFull = fileDir;
41-
fileNameFull += fileName;
42-
4343
FileMetadata fileMetadata;
4444
fileMetadata.srcFileName = m_stmtPutGet->platformStringToUTF8(fileNameFull);
4545
fileMetadata.srcFileSize = fileSize;
46-
fileMetadata.destFileName = m_stmtPutGet->platformStringToUTF8(std::string(fileName));
46+
fileMetadata.destPath = m_stmtPutGet->platformStringToUTF8(destPath);
47+
fileMetadata.destFileName = m_stmtPutGet->platformStringToUTF8(fileName);
4748
// process compression type
4849
initCompressionMetadata(fileMetadata);
4950

@@ -56,9 +57,52 @@ Snowflake::Client::FileMetadataInitializer::initUploadFileMetadata(const std::st
5657

5758
void Snowflake::Client::FileMetadataInitializer::populateSrcLocUploadMetadata(std::string &sourceLocation,
5859
size_t putThreshold)
60+
{
61+
// looking for files on disk.
62+
std::string srcLocationPlatform = m_stmtPutGet->UTF8ToPlatformString(sourceLocation);
63+
size_t dirSep = srcLocationPlatform.find_last_of(PATH_SEP);
64+
std::string basePath = srcLocationPlatform.substr(0, dirSep + 1);
65+
66+
std::vector<std::string> fileList;
67+
if (!listFilesRecursive(srcLocationPlatform, fileList))
68+
{
69+
CXX_LOG_ERROR("Failed on finding files for uploading.");
70+
return;
71+
}
72+
73+
for (auto file = fileList.begin(); file != fileList.end(); file++)
74+
{
75+
path p(*file);
76+
size_t fileSize = file_size(p);
77+
std::string fileNameFull = p.string();
78+
std::string fileName = p.filename().string();
79+
//make the path on stage by removing base path and file name from full path
80+
std::string destPath = fileNameFull.substr(basePath.length(),
81+
fileNameFull.length() - basePath.length() - fileName.length());
82+
initUploadFileMetadata(fileNameFull, destPath, fileName, fileSize, putThreshold);
83+
}
84+
}
85+
86+
void Snowflake::Client::FileMetadataInitializer::includeSubfolderFilesRecursive(const std::string &folderPath,
87+
std::vector<std::string> & fileList)
88+
{
89+
for (auto const& entry : recursive_directory_iterator(folderPath))
90+
{
91+
if (is_regular_file(entry))
92+
{
93+
fileList.push_back(entry.path().string());
94+
}
95+
}
96+
}
97+
98+
bool Snowflake::Client::FileMetadataInitializer::listFilesRecursive(const std::string &sourceLocation,
99+
std::vector<std::string> & fileList)
59100
{
60101
// looking for files on disk.
61102
std::string srcLocationPlatform = m_stmtPutGet->UTF8ToPlatformString(sourceLocation);
103+
size_t dirSep = srcLocationPlatform.find_last_of(PATH_SEP);
104+
std::string dirPath = srcLocationPlatform.substr(0, dirSep + 1);
105+
std::string filePattern = srcLocationPlatform.substr(dirSep + 1);
62106

63107
#ifdef _WIN32
64108
WIN32_FIND_DATA fdd;
@@ -71,8 +115,7 @@ void Snowflake::Client::FileMetadataInitializer::populateSrcLocUploadMetadata(st
71115
{
72116
CXX_LOG_ERROR("No file matching pattern %s has been found. Error: %d",
73117
sourceLocation.c_str(), dwError);
74-
FindClose(hFind);
75-
return;
118+
return false;
76119
}
77120
else if (dwError != ERROR_SUCCESS)
78121
{
@@ -85,37 +128,24 @@ void Snowflake::Client::FileMetadataInitializer::populateSrcLocUploadMetadata(st
85128
do {
86129
if (!(fdd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) )
87130
{
88-
std::string fileFullPath = std::string(fdd.cFileName);
89-
size_t dirSep = srcLocationPlatform.find_last_of(PATH_SEP);
90-
if (dirSep == std::string::npos)
91-
{
92-
dirSep = sourceLocation.find_last_of(ALTER_PATH_SEP);
93-
}
94-
if (dirSep != std::string::npos)
95-
{
96-
std::string dirPath = srcLocationPlatform.substr(0, dirSep + 1);
97-
LARGE_INTEGER fileSize;
98-
fileSize.LowPart = fdd.nFileSizeLow;
99-
fileSize.HighPart = fdd.nFileSizeHigh;
100-
initUploadFileMetadata(dirPath, (char *)fdd.cFileName, (size_t)fileSize.QuadPart, putThreshold);
101-
}
131+
fileList.push_back(dirPath + fdd.cFileName);
132+
}
133+
else
134+
{
135+
includeSubfolderFilesRecursive(dirPath + fdd.cFileName, fileList);
102136
}
103137
} while (FindNextFile(hFind, &fdd) != 0);
104138

105139
DWORD dwError = GetLastError();
140+
FindClose(hFind);
106141
if (dwError != ERROR_NO_MORE_FILES)
107142
{
108143
CXX_LOG_ERROR("Failed on FindNextFile. Error: %d", dwError);
109144
throw SnowflakeTransferException(TransferError::DIR_OPEN_ERROR,
110145
srcLocationPlatform.c_str(), dwError);
111146
}
112-
FindClose(hFind);
113147

114148
#else
115-
unsigned long dirSep = srcLocationPlatform.find_last_of(PATH_SEP);
116-
std::string dirPath = srcLocationPlatform.substr(0, dirSep + 1);
117-
std::string filePattern = srcLocationPlatform.substr(dirSep + 1);
118-
119149
DIR * dir = nullptr;
120150
struct dirent * dir_entry;
121151
if ((dir = opendir(dirPath.c_str())) != NULL)
@@ -130,8 +160,11 @@ void Snowflake::Client::FileMetadataInitializer::populateSrcLocUploadMetadata(st
130160
if (!ret)
131161
{
132162
if (S_ISREG(fileStatus.st_mode)) {
133-
initUploadFileMetadata(dirPath, dir_entry->d_name,
134-
(size_t) fileStatus.st_size, putThreshold);
163+
fileList.pushback(dirPath + dir_entry->d_name);
164+
}
165+
else if (S_ISDIR(fileStatus.st_mode))
166+
{
167+
includeSubfolderFilesRecursive(dirPath + dir_entry->d_name, fileList);
135168
}
136169
}
137170
else
@@ -169,7 +202,7 @@ void Snowflake::Client::FileMetadataInitializer::initCompressionMetadata(
169202
// guess
170203
CXX_LOG_INFO("Auto detect on compression type");
171204
fileMetadata.sourceCompression = FileCompressionType::guessCompressionType(
172-
fileMetadata.srcFileName);
205+
m_stmtPutGet->UTF8ToPlatformString(fileMetadata.srcFileName));
173206
}
174207
else if (!sf_strncasecmp(m_sourceCompression, COMPRESSION_NONE,
175208
sizeof(COMPRESSION_NONE)))
@@ -253,8 +286,9 @@ populateSrcLocDownloadMetadata(std::string &sourceLocation,
253286
size_t getThreshold)
254287
{
255288
std::string fullPath = *remoteLocation + sourceLocation;
256-
size_t dirSep = fullPath.find_last_of('/');
257-
std::string dstFileName = fullPath.substr(dirSep + 1);
289+
size_t dirSep = sourceLocation.find_last_of('/');
290+
std::string dstFileName = sourceLocation.substr(dirSep + 1);
291+
std::string dstPath = sourceLocation.substr(0, dirSep + 1);
258292

259293
FileMetadata fileMetadata;
260294
fileMetadata.presignedUrl = presignedUrl;
@@ -271,6 +305,7 @@ populateSrcLocDownloadMetadata(std::string &sourceLocation,
271305
metaListToPush.push_back(fileMetadata);
272306
metaListToPush.back().srcFileName = fullPath;
273307
metaListToPush.back().destFileName = dstFileName;
308+
metaListToPush.back().destPath = dstPath;
274309
if (encMat)
275310
{
276311
EncryptionProvider::decryptFileKey(&(metaListToPush.back()), encMat, getRandomDev());

cpp/FileMetadataInitializer.hpp

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,37 @@ class FileMetadataInitializer
3030
IStatementPutGet *stmtPutGet);
3131

3232
/**
33-
* Given a source locations, find all files that match the location pattern,
33+
* Given a source location, find all files that match the location pattern,
3434
* init file metadata, and divide them into different vector according to size
3535
*/
3636
void populateSrcLocUploadMetadata(std::string &sourceLocation, size_t putThreshold);
3737

38+
39+
/**
40+
* Utility function to replace all matching instances in a string.
41+
*/
42+
static void replaceStrAll(std::string& stringToReplace, std::string const& oldValue,
43+
std::string const& newValue);
44+
/**
45+
* Given a source location, find all files recursively in subfolders that match the
46+
* location pattern. Utility function called from populateSrcLocUploadMetadata.
47+
*
48+
* @param sourceLocation The source location could have pattern at the end.
49+
* @param fileList Output the files with the full path.
50+
*
51+
* @return True when succeeded, false when no file matches with the source location.
52+
* @throw SnowflakeTransferException on unexpected error.
53+
*/
54+
bool listFilesRecursive(const std::string &sourceLocation, std::vector<std::string> & fileList);
55+
56+
/**
57+
* Given a full path of a folder, add all files in the folder recursively including subfolders.
58+
*
59+
* @param folderPath The full path of a folder.
60+
* @param fileList Output the files in the folder recursively including subfolders.
61+
*/
62+
void includeSubfolderFilesRecursive(const std::string &folderPath, std::vector<std::string> & fileList);
63+
3864
/**
3965
* Given a source location, find out file size to determine use parallel
4066
* download or not.
@@ -79,7 +105,8 @@ class FileMetadataInitializer
79105
* Given file name, populate metadata
80106
* @param fileName
81107
*/
82-
void initUploadFileMetadata(const std::string &fileDir, const char *fileName, size_t fileSize, size_t threshold);
108+
void initUploadFileMetadata(const std::string &fileNameFull, const std::string &destPath,
109+
const std::string &fileName, size_t fileSize, size_t threshold);
83110

84111
/**
85112
* init compression metadata

cpp/FileTransferAgent.cpp

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,12 @@ RemoteStorageRequestOutcome Snowflake::Client::FileTransferAgent::uploadSingleFi
468468
fileMetadata->srcFileToUpload = fileMetadata->srcFileName;
469469
fileMetadata->srcFileToUploadSize = fileMetadata->srcFileSize;
470470
}
471+
472+
// after compress replace PATH_SEP with / in destPath as that's
473+
// what needed on stage side
474+
replaceStrAll(fileMetadata->destPath, std::string() + PATH_SEP, "/");
475+
fileMetadata->destFileName = fileMetadata->destPath + fileMetadata->destFileName;
476+
471477
CXX_LOG_TRACE("Update File digest metadata start");
472478

473479
// calculate digest
@@ -614,6 +620,24 @@ void Snowflake::Client::FileTransferAgent::compressSourceFile(
614620
}
615621

616622
std::string stagingFile(tempDir);
623+
624+
if (!fileMetadata->destPath.empty())
625+
{
626+
std::string subfolder = fileMetadata->destPath;
627+
replaceStrAll(subfolder, "/", std::string() + PATH_SEP);
628+
std::string subfolderPlatform = m_stmtPutGet->UTF8ToPlatformString(subfolder);
629+
subfolder = std::string(tempDir) + subfolder;
630+
subfolderPlatform = std::string(tempDir) + subfolderPlatform;
631+
632+
int ret = sf_create_directory_if_not_exists_recursive(subfolderPlatform.c_str());
633+
if (ret != 0)
634+
{
635+
CXX_LOG_ERROR("Failed to create temporary folder %s. Errno: %d", subfolder, errno);
636+
throw SnowflakeTransferException(TransferError::FILE_OPEN_ERROR, subfolder, -1);
637+
}
638+
stagingFile = subfolderPlatform;
639+
}
640+
617641
stagingFile += m_stmtPutGet->UTF8ToPlatformString(fileMetadata->destFileName);
618642
std::string srcFileNamePlatform = m_stmtPutGet->UTF8ToPlatformString(fileMetadata->srcFileName);
619643

@@ -647,12 +671,13 @@ void Snowflake::Client::FileTransferAgent::download(string *command)
647671
m_executionResults = new FileTransferExecutionResult(CommandType::DOWNLOAD,
648672
m_largeFilesMeta.size() + m_smallFilesMeta.size());
649673

650-
int ret = sf_create_directory_if_not_exists((const char *)response.localLocation);
674+
std::string localLocationPlatform = m_stmtPutGet->UTF8ToPlatformString(response.localLocation);
675+
int ret = sf_create_directory_if_not_exists_recursive((const char *)localLocationPlatform.c_str());
651676
if (ret != 0)
652677
{
653678
CXX_LOG_ERROR("Filed to create directory %s", response.localLocation);
654679
throw SnowflakeTransferException(TransferError::MKDIR_ERROR,
655-
response.localLocation, ret);
680+
localLocationPlatform, ret);
656681
}
657682

658683
if (m_largeFilesMeta.size() > 0)
@@ -829,11 +854,28 @@ RemoteStorageRequestOutcome Snowflake::Client::FileTransferAgent::downloadSingle
829854
FileMetadata *fileMetadata,
830855
size_t resultIndex)
831856
{
857+
RemoteStorageRequestOutcome outcome = RemoteStorageRequestOutcome::FAILED;
858+
859+
// create subfolder first befor adding file name
860+
replaceStrAll(fileMetadata->destPath, "/", std::string() + PATH_SEP);
832861
fileMetadata->destPath = std::string(response.localLocation) + PATH_SEP +
833-
fileMetadata->destFileName;
862+
fileMetadata->destPath;
834863
std::string destPathPlatform = m_stmtPutGet->UTF8ToPlatformString(fileMetadata->destPath);
864+
int ret = sf_create_directory_if_not_exists_recursive(destPathPlatform.c_str());
865+
if (ret != 0)
866+
{
867+
char* str_error = sf_strerror(errno);
868+
CXX_LOG_DEBUG("Filed to create directory: %s",
869+
fileMetadata->destPath.c_str(), str_error);
870+
sf_free_s(str_error);
871+
m_executionResults->SetTransferOutCome(outcome, resultIndex);
872+
873+
return outcome;
874+
}
875+
876+
fileMetadata->destPath += fileMetadata->destFileName;
877+
destPathPlatform += m_stmtPutGet->UTF8ToPlatformString(fileMetadata->destFileName);
835878

836-
RemoteStorageRequestOutcome outcome = RemoteStorageRequestOutcome::FAILED;
837879
RetryContext getRetryCtx(fileMetadata->srcFileName, m_maxGetRetries);
838880
do
839881
{
@@ -846,7 +888,6 @@ RemoteStorageRequestOutcome Snowflake::Client::FileTransferAgent::downloadSingle
846888
std::ios_base::out | std::ios_base::binary);
847889
}
848890
catch (...) {
849-
std::string err = "Could not open file " + fileMetadata->destPath + " to downoad";
850891
char* str_error = sf_strerror(errno);
851892
CXX_LOG_DEBUG("Could not open file %s to downoad: %s",
852893
fileMetadata->destPath.c_str(), str_error);
@@ -856,7 +897,6 @@ RemoteStorageRequestOutcome Snowflake::Client::FileTransferAgent::downloadSingle
856897
}
857898
if (!dstFile.is_open())
858899
{
859-
std::string err = "Could not open file " + fileMetadata->destPath + " to downoad";
860900
char* str_error = sf_strerror(errno);
861901
CXX_LOG_DEBUG("Could not open file %s to downoad: %s",
862902
fileMetadata->destPath.c_str(), str_error);

0 commit comments

Comments
 (0)