Skip to content

Commit

Permalink
[Build] Refactor project structure and refactor release tools
Browse files Browse the repository at this point in the history
  • Loading branch information
yuzelin committed Nov 27, 2024
1 parent 33d5253 commit a705fff
Show file tree
Hide file tree
Showing 30 changed files with 232 additions and 349 deletions.
23 changes: 23 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

global-exclude *.py[cod] __pycache__ .DS_Store
recursive-include deps/jars *.jar
include README.md
include LICENSE
include NOTICE
3 changes: 0 additions & 3 deletions dev/lint-python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -577,9 +577,6 @@ function tox_check() {
# Ensure the permission of the scripts set correctly
chmod +x $PAIMON_PYTHON_DIR/dev/*

# tox runs codes in virtual env, set var to avoid error
export _PYPAIMON_TOX_TEST="true"

if [[ -n "$GITHUB_ACTION" ]]; then
# Run tests in all versions triggered by a Git push (tests aren't so many currently)
$TOX_PATH -vv -c $PAIMON_PYTHON_DIR/tox.ini --recreate 2>&1 | tee -a $LOG_FILE
Expand Down
File renamed without changes.
File renamed without changes.
18 changes: 11 additions & 7 deletions pypaimon/py4j/gateway_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# limitations under the License.
################################################################################

import importlib
import importlib.resources
import os
import platform
import signal
Expand Down Expand Up @@ -74,17 +74,21 @@ def preexec_func():
stdin=PIPE, stderr=PIPE, preexec_fn=preexec_fn, env=env)


_JAVA_IMPL_MODULE = 'pypaimon.py4j'
_JAVA_DEPS = 'java_dependencies'
_JAVA_BRIDGE = 'paimon-python-java-bridge'
_JAVA_DEPS_PACKAGE = 'pypaimon.jars'


def _get_classpath(env):
classpath = []

module = importlib.import_module(_JAVA_IMPL_MODULE)
builtin_java_bridge = os.path.join(*module.__path__, _JAVA_DEPS, _JAVA_BRIDGE + '.jar')
classpath.append(builtin_java_bridge)
# note that jars are not packaged in test
test_mode = os.environ.get(constants.PYPAIMON4J_TEST_MODE)
if not test_mode or test_mode.lower() != "true":
jars = importlib.resources.files(_JAVA_DEPS_PACKAGE)
one_jar = next(iter(jars.iterdir()), None)
if not one_jar:
raise ValueError("Haven't found necessary python-java-bridge jar, this is unexpected.")
builtin_java_classpath = os.path.join(os.path.dirname(str(one_jar)), '*')
classpath.append(builtin_java_classpath)

# user defined
if constants.PYPAIMON_JAVA_CLASSPATH in env:
Expand Down
85 changes: 85 additions & 0 deletions pypaimon/py4j/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,88 @@
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import os
import shutil
import subprocess
import tempfile
import unittest
import urllib.request

from pypaimon.py4j import constants, Catalog
from xml.etree import ElementTree


def _setup_hadoop_bundle_jar(hadoop_dir):
url = 'https://repo.maven.apache.org/maven2/org/apache/flink/' \
'flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar'

response = urllib.request.urlopen(url)
if not os.path.exists(hadoop_dir):
os.mkdir(hadoop_dir)

jar_path = os.path.join(hadoop_dir, "bundled-hadoop.jar")
with open(jar_path, 'wb') as file:
file.write(response.read())

os.environ[constants.PYPAIMON_HADOOP_CLASSPATH] = jar_path


def _setup_bridge_jar(bridge_dir):
java_bridge_module = _find_java_bridge_module()
subprocess.run(
["mvn", "clean", "package"],
cwd=java_bridge_module,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)

if not os.path.exists(bridge_dir):
os.mkdir(bridge_dir)

jar_path = os.path.join(bridge_dir, "paimon-python-java-bridge.jar")
shutil.copy(
os.path.join(java_bridge_module, 'target/{}-{}.jar'
.format('paimon-python-java-bridge', _extract_bridge_version())),
jar_path
)

os.environ[constants.PYPAIMON_JAVA_CLASSPATH] = jar_path


def _extract_bridge_version():
pom_path = os.path.join(_find_java_bridge_module(), 'pom.xml')
return ElementTree.parse(pom_path).getroot().find(
'POM:version',
namespaces={
'POM': 'http://maven.apache.org/POM/4.0.0'
}).text


def _find_java_bridge_module():
this_dir = os.path.abspath(os.path.dirname(__file__))
project_dir = os.path.dirname(os.path.dirname(os.path.dirname(this_dir)))
return os.path.join(project_dir, "paimon-python-java-bridge")


class PypaimonTestBase(unittest.TestCase):
"""
Base class for unit tests.
"""

@classmethod
def setUpClass(cls):
os.environ[constants.PYPAIMON4J_TEST_MODE] = 'true'
cls.tempdir = tempfile.mkdtemp()

_setup_hadoop_bundle_jar(cls.tempdir)
_setup_bridge_jar(cls.tempdir)

cls.warehouse = os.path.join(cls.tempdir, 'warehouse')
cls.catalog = Catalog.create({'warehouse': cls.warehouse})
cls.catalog.create_database('default', False)

@classmethod
def tearDownClass(cls):
shutil.rmtree(cls.tempdir, ignore_errors=True)
del os.environ[constants.PYPAIMON4J_TEST_MODE]
25 changes: 3 additions & 22 deletions pypaimon/py4j/tests/test_data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,43 +16,24 @@
# limitations under the License.
################################################################################

import os
import random
import shutil
import string
import tempfile
import pyarrow as pa
import unittest

from pypaimon import Schema
from pypaimon.py4j import Catalog
from pypaimon.py4j.tests import utils
from pypaimon.py4j.tests import PypaimonTestBase
from pypaimon.py4j.util import java_utils
from setup_utils import java_setuputils


class DataTypesTest(unittest.TestCase):
class DataTypesTest(PypaimonTestBase):

@classmethod
def setUpClass(cls):
java_setuputils.setup_java_bridge()
cls.hadoop_path = tempfile.mkdtemp()
utils.setup_hadoop_bundle_jar(cls.hadoop_path)
cls.warehouse = tempfile.mkdtemp()
super().setUpClass()
cls.simple_pa_schema = pa.schema([
('f0', pa.int32()),
('f1', pa.string())
])
cls.catalog = Catalog.create({'warehouse': cls.warehouse})
cls.catalog.create_database('default', False)

@classmethod
def tearDownClass(cls):
java_setuputils.clean()
if os.path.exists(cls.hadoop_path):
shutil.rmtree(cls.hadoop_path)
if os.path.exists(cls.warehouse):
shutil.rmtree(cls.warehouse)

def test_int(self):
pa_schema = pa.schema([
Expand Down
48 changes: 13 additions & 35 deletions pypaimon/py4j/tests/test_preicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,12 @@
# limitations under the License.
################################################################################

import os
import shutil
import tempfile
import unittest
import random
import pandas as pd
import pyarrow as pa

from pypaimon import Schema
from pypaimon.py4j import Catalog
from pypaimon.py4j.tests import utils
from setup_utils import java_setuputils
from pypaimon.py4j.tests import PypaimonTestBase


def _check_filtered_result(read_builder, expected_df):
Expand All @@ -38,41 +32,34 @@ def _check_filtered_result(read_builder, expected_df):
actual_df.reset_index(drop=True), expected_df.reset_index(drop=True))


# TODO: parquet has bug now
# TODO: Parquet has bug now. Fixed in 1.0.
def _random_format():
return random.choice(['avro', 'orc'])


class PredicateTest(unittest.TestCase):
class PredicateTest(PypaimonTestBase):

@classmethod
def setUpClass(cls):
java_setuputils.setup_java_bridge()
cls.hadoop_path = tempfile.mkdtemp()
utils.setup_hadoop_bundle_jar(cls.hadoop_path)
cls.warehouse = tempfile.mkdtemp()

catalog = Catalog.create({'warehouse': cls.warehouse})
catalog.create_database('default', False)

super().setUpClass()
pa_schema = pa.schema([
('f0', pa.int64()),
('f1', pa.string()),
])
catalog.create_table('default.test_append',
Schema(pa_schema, options={'file.format': _random_format()}),
False)
catalog.create_table('default.test_pk',
Schema(pa_schema, primary_keys=['f0'],
options={'bucket': '1', 'file.format': _random_format()}),
False)
cls.catalog.create_table('default.test_append',
Schema(pa_schema, options={'file.format': _random_format()}),
False)
cls.catalog.create_table('default.test_pk',
Schema(pa_schema, primary_keys=['f0'],
options={'bucket': '1', 'file.format': _random_format()}),
False)

df = pd.DataFrame({
'f0': [1, 2, 3, 4, 5],
'f1': ['abc', 'abbc', 'bc', 'd', None],
})

append_table = catalog.get_table('default.test_append')
append_table = cls.catalog.get_table('default.test_append')
write_builder = append_table.new_batch_write_builder()
write = write_builder.new_write()
commit = write_builder.new_commit()
Expand All @@ -81,7 +68,7 @@ def setUpClass(cls):
write.close()
commit.close()

pk_table = catalog.get_table('default.test_pk')
pk_table = cls.catalog.get_table('default.test_pk')
write_builder = pk_table.new_batch_write_builder()
write = write_builder.new_write()
commit = write_builder.new_commit()
Expand All @@ -90,17 +77,8 @@ def setUpClass(cls):
write.close()
commit.close()

cls.catalog = catalog
cls.df = df

@classmethod
def tearDownClass(cls):
java_setuputils.clean()
if os.path.exists(cls.hadoop_path):
shutil.rmtree(cls.hadoop_path)
if os.path.exists(cls.warehouse):
shutil.rmtree(cls.warehouse)

def testWrongFieldName(self):
table = self.catalog.get_table('default.test_append')
predicate_builder = table.new_read_builder().new_predicate_builder()
Expand Down
23 changes: 3 additions & 20 deletions pypaimon/py4j/tests/test_write_and_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,44 +16,27 @@
# limitations under the License.
################################################################################

import os
import shutil
import tempfile
import unittest
import pandas as pd
import pyarrow as pa
from py4j.protocol import Py4JJavaError

from pypaimon import Schema
from pypaimon.py4j import Catalog
from pypaimon.py4j.java_gateway import get_gateway
from pypaimon.py4j.tests import utils
from pypaimon.py4j.tests import PypaimonTestBase
from pypaimon.py4j.util import java_utils
from setup_utils import java_setuputils


class TableWriteReadTest(unittest.TestCase):
class TableWriteReadTest(PypaimonTestBase):

@classmethod
def setUpClass(cls):
java_setuputils.setup_java_bridge()
cls.hadoop_path = tempfile.mkdtemp()
utils.setup_hadoop_bundle_jar(cls.hadoop_path)
cls.warehouse = tempfile.mkdtemp()
super().setUpClass()
cls.simple_pa_schema = pa.schema([
('f0', pa.int32()),
('f1', pa.string())
])
cls.catalog = Catalog.create({'warehouse': cls.warehouse})
cls.catalog.create_database('default', False)

@classmethod
def tearDownClass(cls):
java_setuputils.clean()
if os.path.exists(cls.hadoop_path):
shutil.rmtree(cls.hadoop_path)
if os.path.exists(cls.warehouse):
shutil.rmtree(cls.warehouse)

def testReadEmptyAppendTable(self):
schema = Schema(self.simple_pa_schema)
Expand Down
3 changes: 3 additions & 0 deletions pypaimon/py4j/util/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,6 @@

# ------------------------ for catalog options ------------------------
MAX_WORKERS = "max-workers"

# ------------------ for tests (Please don't use it) ------------------
PYPAIMON4J_TEST_MODE = '_PYPAIMON4J_TEST_MODE'
File renamed without changes.
3 changes: 2 additions & 1 deletion setup_utils/__init__.py → setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@
# limitations under the License.
################################################################################

"""This module only contains utils for setup and won't be packaged."""
[bdist_wheel]
universal = 1
Loading

0 comments on commit a705fff

Please sign in to comment.