Skip to content

Commit

Permalink
vineyard fuse: add serilaization and deserialization factory (#889)
Browse files Browse the repository at this point in the history
* Vineyard fuse development and testing.
* Fixes the syntax error in the CI script

Signed-off-by: sitan liu <[email protected]>
Co-authored-by: Tao He <[email protected]>
  • Loading branch information
liusitan and sighingnow authored Aug 24, 2022
1 parent 8e5643a commit 2f2c7f7
Show file tree
Hide file tree
Showing 25 changed files with 891 additions and 156 deletions.
12 changes: 12 additions & 0 deletions .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,19 @@ jobs:
rm -rf default.etcd
rm -rf /dev/shm/etcd*
python3 test/runner.py --with-io --with-migration
- name: Run FUSE Tests
run: |
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib:/usr/local/lib64:/usr/local/lib/x86_64-linux-gnu
export VINEYARD_DEVELOP=TRUE
export VINEYARD_DATA_DIR=`pwd`/gstest
export TMPDIR="${TMPDIR:-$(dirname $(mktemp))}"
rm -rf default.etcd
rm -rf /dev/shm/etcd*
python3 test/runner.py --with-fuse
- name: Find vineyard using CMake
run: |
cmake -S test/vineyard-cmake-example -B build/vineyard-cmake-example
Expand Down
3 changes: 2 additions & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,5 @@
[submodule "modules/hosseinmoein-dataframe/thirdparty/DataFrame"]
path = modules/hosseinmoein-dataframe/thirdparty/DataFrame
url = https://github.com/hosseinmoein/DataFrame.git
shallow = true
shallow = true

126 changes: 126 additions & 0 deletions cmake/FindParquet.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# - Find Parquet (parquet/api/reader.h, libparquet.a, libparquet.so)
#
# This module requires Arrow from which it uses
# arrow_find_package()
#
# This module defines
# PARQUET_FOUND, whether Parquet has been found
# PARQUET_IMPORT_LIB, path to libparquet's import library (Windows only)
# PARQUET_INCLUDE_DIR, directory containing headers
# PARQUET_LIBS, deprecated. Use PARQUET_LIB_DIR instead
# PARQUET_LIB_DIR, directory containing Parquet libraries
# PARQUET_SHARED_IMP_LIB, deprecated. Use PARQUET_IMPORT_LIB instead
# PARQUET_SHARED_LIB, path to libparquet's shared library
# PARQUET_SO_VERSION, shared object version of found Parquet such as "100"
# PARQUET_STATIC_LIB, path to libparquet.a

if(DEFINED PARQUET_FOUND)
return()
endif()

set(find_package_arguments)
if(${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION)
list(APPEND find_package_arguments "${${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION}")
endif()
if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED)
list(APPEND find_package_arguments REQUIRED)
endif()
if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY)
list(APPEND find_package_arguments QUIET)
endif()
find_package(Arrow ${find_package_arguments})

if(NOT "$ENV{PARQUET_HOME}" STREQUAL "")
file(TO_CMAKE_PATH "$ENV{PARQUET_HOME}" PARQUET_HOME)
endif()

if((NOT PARQUET_HOME) AND ARROW_HOME)
set(PARQUET_HOME ${ARROW_HOME})
endif()

if(ARROW_FOUND)
arrow_find_package(PARQUET
"${PARQUET_HOME}"
parquet
parquet/api/reader.h
Parquet
parquet)
if(PARQUET_HOME)
if(PARQUET_INCLUDE_DIR)
file(READ "${PARQUET_INCLUDE_DIR}/parquet/parquet_version.h"
PARQUET_VERSION_H_CONTENT)
arrow_extract_macro_value(PARQUET_VERSION_MAJOR "PARQUET_VERSION_MAJOR"
"${PARQUET_VERSION_H_CONTENT}")
arrow_extract_macro_value(PARQUET_VERSION_MINOR "PARQUET_VERSION_MINOR"
"${PARQUET_VERSION_H_CONTENT}")
arrow_extract_macro_value(PARQUET_VERSION_PATCH "PARQUET_VERSION_PATCH"
"${PARQUET_VERSION_H_CONTENT}")
if("${PARQUET_VERSION_MAJOR}" STREQUAL ""
OR "${PARQUET_VERSION_MINOR}" STREQUAL ""
OR "${PARQUET_VERSION_PATCH}" STREQUAL "")
set(PARQUET_VERSION "0.0.0")
else()
set(PARQUET_VERSION
"${PARQUET_VERSION_MAJOR}.${PARQUET_VERSION_MINOR}.${PARQUET_VERSION_PATCH}")
endif()

arrow_extract_macro_value(PARQUET_SO_VERSION_QUOTED "PARQUET_SO_VERSION"
"${PARQUET_VERSION_H_CONTENT}")
string(REGEX REPLACE "^\"(.+)\"$" "\\1" PARQUET_SO_VERSION
"${PARQUET_SO_VERSION_QUOTED}")
arrow_extract_macro_value(PARQUET_FULL_SO_VERSION_QUOTED "PARQUET_FULL_SO_VERSION"
"${PARQUET_VERSION_H_CONTENT}")
string(REGEX REPLACE "^\"(.+)\"$" "\\1" PARQUET_FULL_SO_VERSION
"${PARQUET_FULL_SO_VERSION_QUOTED}")
endif()
else()
if(PARQUET_USE_CMAKE_PACKAGE_CONFIG)
find_package(Parquet CONFIG)
elseif(PARQUET_USE_PKG_CONFIG)
pkg_get_variable(PARQUET_SO_VERSION parquet so_version)
pkg_get_variable(PARQUET_FULL_SO_VERSION parquet full_so_version)
endif()
endif()
set(PARQUET_ABI_VERSION "${PARQUET_SO_VERSION}")
endif()

mark_as_advanced(PARQUET_ABI_VERSION
PARQUET_IMPORT_LIB
PARQUET_INCLUDE_DIR
PARQUET_LIBS
PARQUET_LIB_DIR
PARQUET_SHARED_IMP_LIB
PARQUET_SHARED_LIB
PARQUET_SO_VERSION
PARQUET_STATIC_LIB
PARQUET_VERSION)

find_package_handle_standard_args(
Parquet
REQUIRED_VARS PARQUET_INCLUDE_DIR PARQUET_LIB_DIR PARQUET_SO_VERSION
VERSION_VAR PARQUET_VERSION)
set(PARQUET_FOUND ${Parquet_FOUND})

if(Parquet_FOUND AND NOT Parquet_FIND_QUIETLY)
message(STATUS "Parquet version: ${PARQUET_VERSION} (${PARQUET_FIND_APPROACH})")
message(STATUS "Found the Parquet shared library: ${PARQUET_SHARED_LIB}")
message(STATUS "Found the Parquet import library: ${PARQUET_IMPORT_LIB}")
message(STATUS "Found the Parquet static library: ${PARQUET_STATIC_LIB}")
endif()
9 changes: 9 additions & 0 deletions modules/basic/ds/dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,15 @@ const std::shared_ptr<arrow::RecordBatch> DataFrame::AsBatch(bool copy) const {

columns[i] = arrow::MakeArray(arrow::ArrayData::Make(
FromAnyType(df_col->value_type()), num_rows, {nullptr, copied_buffer}));

std::shared_ptr<arrow::Scalar> sca;
CHECK_ARROW_ERROR_AND_ASSIGN(sca, columns[i]->GetScalar(0));

DLOG(INFO) << "at column" << i << " start element : " << sca->ToString()
<< " value type: " << df_col->value_type()
<< " meta data type name:" << df_col->meta().GetTypeName()
<< std::endl;

fields[i] = std::make_shared<arrow::Field>(
field_name, FromAnyType(df_col->value_type()));
}
Expand Down
2 changes: 2 additions & 0 deletions modules/basic/ds/types.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ AnyType ParseAnyType(const std::string& type_name) {
return AnyType::UInt64;
} else if (type_name == "float") {
return AnyType::Float;
} else if (type_name == "float64") {
return AnyType::Double;
} else if (type_name == "double") {
return AnyType::Double;
} else if (type_name == "string") {
Expand Down
19 changes: 13 additions & 6 deletions modules/fuse/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
# build vineyard-fuse
set(FUSE_SRC_FILES)
list(APPEND FUSE_SRC_FILES "fused.cc")
list(APPEND FUSE_SRC_FILES "adaptors/arrow.cc")
list(APPEND FUSE_SRC_FILES "adaptors/orc.cc")
list(APPEND FUSE_SRC_FILES "adaptors/parquet.cc")

list(APPEND FUSE_SRC_FILES "adaptors/arrow_ipc/deserializer_registry.cc")
list(APPEND FUSE_SRC_FILES "adaptors/arrow_ipc/serializer_registry.cc")
list(APPEND FUSE_SRC_FILES "fuse_impl.cc")

add_library(vineyard_fuse ${FUSE_SRC_FILES})
target_link_libraries(vineyard_fuse PUBLIC vineyard_client
vineyard_basic
${ARROW_SHARED_LIB}
)
set_target_properties(vineyard_fuse PROPERTIES CXX_STANDARD 14)
target_link_libraries(vineyard_fuse PUBLIC FUSE3::FUSE3)
target_compile_options(vineyard_fuse PUBLIC -DWITH_ARROW_IPC)
if(BUILD_VINEYARD_FUSE_PARQUET)
target_compile_options(vineyard_fuse PUBLIC -DWITH_PARQUET)
if(TARGET parquet_shared)
Expand All @@ -19,12 +21,17 @@ if(BUILD_VINEYARD_FUSE_PARQUET)
target_link_libraries(vineyard_fuse PUBLIC parquet_static)
endif()
endif()
target_include_directories(vineyard_fuse PRIVATE ${CMAKE_SOURCE_DIR})

install_vineyard_target(vineyard_fuse)
install_vineyard_headers("${CMAKE_CURRENT_SOURCE_DIR}")

add_executable(vineyard-fusermount fusermount.cc)
target_link_libraries(vineyard-fusermount PRIVATE vineyard_fuse)
set(FUSE_MOUNT_SRC_FILES)

add_executable(vineyard-fusermount fusermount.cc)
target_include_directories(vineyard-fusermount PRIVATE ${CMAKE_SOURCE_DIR})

target_link_libraries(vineyard-fusermount PUBLIC vineyard_fuse)
install_vineyard_target(vineyard-fusermount)

if(BUILD_VINEYARD_TESTS)
Expand Down
Loading

0 comments on commit 2f2c7f7

Please sign in to comment.