Skip to content

Commit 165ad9b

Browse files
authored
java_based_implementation packages java-bridge jar by default (#13)
1 parent 583bf96 commit 165ad9b

9 files changed

Lines changed: 184 additions & 78 deletions

File tree

README.md

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,6 @@ and running "External Tools" → "flake8".
2828

2929
We can use `py4j` to leverage Java code to read Paimon data. This section describes how to use this implementation.
3030

31-
### Build paimon-python-java-bridge
32-
33-
```bash
34-
cd java_based_implementation/paimon-python-java-bridge/
35-
mvn clean install -DskipTests
36-
```
37-
The built target is java-based-implementation/paimon-python-java-bridge/target/paimon-python-java-bridge-<version>.jar
38-
3931
### Set Environment Variables
4032

4133
`py4j` need to access a JVM, so we should set JVM arguments (optional) and Java classpath. A convenient way is using
@@ -44,10 +36,14 @@ The built target is java-based-implementation/paimon-python-java-bridge/target/p
4436
```python
4537
import os
4638

47-
os.environ['PYPAIMON_JAVA_CLASSPATH'] = '/path/to/paimon-python-java-bridge-<version>.jar'
39+
os.environ['PYPAIMON_JAVA_CLASSPATH'] = '/path/to/dependent_jars/*'
4840
os.environ['_PYPAIMON_JVM_ARGS'] = 'jvm_arg1 jvm_arg2 ...'
4941
```
5042

43+
NOTE: the package has set paimon core and hadoop dependencies. If you just test in local or run code in hadoop, you doesn't
44+
need to set classpath. If you need other dependencies such as OSS/S3 filesystem jars, or special catalog which isn't implemented
45+
in paimon core, please download jars and set classpath.
46+
5147
# API Reference
5248
TODO
5349

dev/build-wheels.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ for ((i=0;i<${#py_env[@]};i++)) do
3333
# force the linker to use the older glibc version in Linux
3434
export CFLAGS="-I. -include dev/glibc_version_fix.h"
3535
fi
36-
${PY_ENV_DIR}/${py_env[i]}/bin/python setup.py bdist_wheel
36+
${PY_ENV_DIR}/${py_env[i]}/bin/python setup.py clean bdist_wheel
3737
done
3838

3939
## 4. convert linux_x86_64 wheel to manylinux1 wheel in Linux

dev/lint-python.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,9 @@ function tox_check() {
578578
# Ensure the permission of the scripts set correctly
579579
chmod +x $PAIMON_PYTHON_DIR/dev/*
580580

581+
# tox runs codes in virtual env, set var to avoid error
582+
export _PYPAIMON_TOX_TEST="true"
583+
581584
if [[ ${BUILD_REASON} = 'IndividualCI' ]]; then
582585
# Only run test in latest python version triggered by a Git push
583586
$TOX_PATH -vv -c $PAIMON_PYTHON_DIR/tox.ini -e ${LATEST_PYTHON} --recreate 2>&1 | tee -a $LOG_FILE

java_based_implementation/gateway_server.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@
2121
import signal
2222

2323
from subprocess import Popen, PIPE
24-
from java_based_implementation.util.constants import (PYPAIMON_JVM_ARGS, PYPAIMON_JAVA_CLASSPATH,
25-
PYPAIMON_MAIN_ARGS, PYPAIMON_MAIN_CLASS)
24+
from java_based_implementation.util.constants import (PYPAIMON_JVM_ARGS, PYPAIMON_MAIN_ARGS,
25+
PYPAIMON_MAIN_CLASS)
26+
from java_based_implementation.util.setup_utils import get_classpath
2627

2728

2829
def on_windows():
@@ -47,7 +48,7 @@ def launch_gateway_server_process(env):
4748
# TODO construct Java module log settings
4849
log_settings = []
4950
jvm_args = env.get(PYPAIMON_JVM_ARGS, '').split()
50-
classpath = env.get(PYPAIMON_JAVA_CLASSPATH)
51+
classpath = get_classpath(env)
5152
main_args = env.get(PYPAIMON_MAIN_ARGS, '').split()
5253
command = [
5354
java_executable,

java_based_implementation/tests/test_write_and_read.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,27 +16,29 @@
1616
# limitations under the License.
1717
################################################################################
1818

19-
import os
2019
import tempfile
2120
import unittest
2221
import pandas as pd
2322
import pyarrow as pa
2423

2524
from java_based_implementation.api_impl import Catalog, Table
2625
from java_based_implementation.java_gateway import get_gateway
27-
from java_based_implementation.tests.utils import set_bridge_jar, create_simple_table
28-
from java_based_implementation.util import constants, java_utils
26+
from java_based_implementation.tests.utils import create_simple_table
27+
from java_based_implementation.util import java_utils, setup_utils
2928
from py4j.protocol import Py4JJavaError
3029

3130

3231
class TableWriteReadTest(unittest.TestCase):
3332

3433
@classmethod
3534
def setUpClass(cls):
36-
classpath = set_bridge_jar()
37-
os.environ[constants.PYPAIMON_JAVA_CLASSPATH] = classpath
35+
setup_utils.setup_java_bridge()
3836
cls.warehouse = tempfile.mkdtemp()
3937

38+
@classmethod
39+
def tearDownClass(cls):
40+
setup_utils.clean()
41+
4042
def testReadEmptyAppendTable(self):
4143
create_simple_table(self.warehouse, 'default', 'empty_append_table', False)
4244
catalog = Catalog.create({'warehouse': self.warehouse})

java_based_implementation/tests/utils.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,35 +16,10 @@
1616
# limitations under the License.
1717
################################################################################
1818

19-
import os
20-
import shutil
21-
import subprocess
22-
import tempfile
23-
2419
from java_based_implementation.java_gateway import get_gateway
2520
from java_based_implementation.util.java_utils import to_j_catalog_context
2621

2722

28-
def set_bridge_jar() -> str:
29-
current_file_path = os.path.abspath(__file__)
30-
current_dir = os.path.dirname(current_file_path)
31-
parent_dir = os.path.dirname(current_dir)
32-
java_module = os.path.join(parent_dir, 'paimon-python-java-bridge')
33-
# build paimon-python-java-bridge
34-
subprocess.run(
35-
["mvn", "clean", "package"],
36-
cwd=java_module,
37-
stdout=subprocess.PIPE,
38-
stderr=subprocess.PIPE
39-
)
40-
jar_name = 'paimon-python-java-bridge-0.9-SNAPSHOT.jar'
41-
jar_file = os.path.join(java_module, 'target', jar_name)
42-
# move to temp dir
43-
temp_dir = tempfile.mkdtemp()
44-
shutil.move(jar_file, temp_dir)
45-
return os.path.join(temp_dir, jar_name)
46-
47-
4823
def create_simple_table(warehouse, database, table_name, has_pk, options=None):
4924
if options is None:
5025
options = {

java_based_implementation/util/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,4 @@
2222
PYPAIMON_JAVA_CLASSPATH = '_PYPAIMON_JAVA_CLASSPATH'
2323
PYPAIMON_MAIN_CLASS = 'org.apache.paimon.python.PythonGatewayServer'
2424
PYPAIMON_MAIN_ARGS = '_PYPAIMON_MAIN_ARGS'
25+
PYPAIMON_TOX_TEST = '_PYPAIMON_TOX_TEST'
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
################################################################################
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
################################################################################
18+
19+
import importlib
20+
import os
21+
import shutil
22+
import subprocess
23+
24+
from java_based_implementation.util import constants
25+
from java_based_implementation.util.constants import PYPAIMON_JAVA_CLASSPATH
26+
27+
_JAVA_IMPL_MODULE = 'java_based_implementation'
28+
_JAVA_DEPS = 'java_dependencies'
29+
_JAVA_BRIDGE = 'paimon-python-java-bridge'
30+
# TODO configure version
31+
_JAVA_BRIDGE_VERSION = '0.9-SNAPSHOT'
32+
33+
34+
def get_package_data():
35+
is_tox_test = os.environ.get(constants.PYPAIMON_TOX_TEST)
36+
if is_tox_test and is_tox_test.lower() == "true":
37+
return ['']
38+
39+
setup_java_bridge()
40+
return [os.path.join(_JAVA_DEPS, '*')]
41+
42+
43+
def clean():
44+
java_deps_dir = os.path.join(_find_java_impl_dir(), _JAVA_DEPS)
45+
if os.path.exists(java_deps_dir):
46+
shutil.rmtree(java_deps_dir)
47+
48+
49+
def get_classpath(env):
50+
user_defined = env.get(PYPAIMON_JAVA_CLASSPATH)
51+
52+
module = importlib.import_module(_JAVA_IMPL_MODULE)
53+
builtin_java_bridge = os.path.join(*module.__path__, _JAVA_DEPS, _JAVA_BRIDGE + '.jar')
54+
55+
if user_defined is None:
56+
return builtin_java_bridge
57+
else:
58+
return os.pathsep.join([builtin_java_bridge, user_defined])
59+
60+
61+
def setup_java_bridge():
62+
java_impl_dir = _find_java_impl_dir()
63+
64+
java_deps_dir = os.path.join(java_impl_dir, _JAVA_DEPS)
65+
if not os.path.exists(java_deps_dir):
66+
os.mkdir(java_deps_dir)
67+
68+
java_bridge_dst = os.path.join(java_deps_dir, _JAVA_BRIDGE + '.jar')
69+
if os.path.exists(java_bridge_dst):
70+
return
71+
72+
java_bridge_module = os.path.join(java_impl_dir, _JAVA_BRIDGE)
73+
subprocess.run(
74+
["mvn", "clean", "package"],
75+
cwd=java_bridge_module,
76+
stdout=subprocess.PIPE,
77+
stderr=subprocess.PIPE
78+
)
79+
80+
shutil.copy(
81+
os.path.join(java_bridge_module, 'target/{}-{}.jar'
82+
.format(_JAVA_BRIDGE, _JAVA_BRIDGE_VERSION)),
83+
java_bridge_dst
84+
)
85+
86+
87+
def _find_java_impl_dir():
88+
abspath = os.path.abspath(__file__)
89+
return os.path.dirname(os.path.dirname(abspath))

setup.py

Lines changed: 74 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -16,38 +16,77 @@
1616
# limitations under the License.
1717
################################################################################
1818

19-
from setuptools import setup
20-
21-
PACKAGES = [
22-
'paimon_python_api',
23-
'java_based_implementation',
24-
'java_based_implementation.util'
25-
]
26-
27-
install_requires = [
28-
'py4j==0.10.9.7',
29-
'python-dateutil>=2.8.0,<3',
30-
'pytz>=2018.3',
31-
'numpy>=1.22.4',
32-
'pandas>=1.3.0',
33-
'pyarrow>=5.0.0'
34-
]
35-
36-
setup(
37-
name='paimon_python',
38-
version='0.9.0.dev1',
39-
packages=PACKAGES,
40-
install_requires=install_requires,
41-
description='Apache Paimon Python API',
42-
author='Apache Software Foundation',
43-
author_email='dev@paimon.apache.org',
44-
url='https://paimon.apache.org',
45-
classifiers=[
46-
'Development Status :: 5 - Production/Stable',
47-
'License :: OSI Approved :: Apache Software License',
48-
'Programming Language :: Python :: 3.8',
49-
'Programming Language :: Python :: 3.9',
50-
'Programming Language :: Python :: 3.10',
51-
'Programming Language :: Python :: 3.11'],
52-
python_requires='>=3.8'
53-
)
19+
import fnmatch
20+
import java_based_implementation.util.setup_utils
21+
import os
22+
import shutil
23+
24+
from setuptools import Command, setup
25+
26+
27+
class CleanCommand(Command):
28+
description = 'Clean up temporary files and directories of last build.'
29+
user_options = []
30+
31+
def initialize_options(self):
32+
pass
33+
34+
def finalize_options(self):
35+
pass
36+
37+
def run(self):
38+
directories_to_delete = ['build', 'dist', '*.egg-info']
39+
40+
for directory in directories_to_delete:
41+
if '*' in directory:
42+
for matched_dir in filter(lambda x: fnmatch.fnmatch(x, directory), os.listdir('.')):
43+
if os.path.isdir(matched_dir):
44+
shutil.rmtree(matched_dir)
45+
else:
46+
if os.path.exists(directory):
47+
shutil.rmtree(directory)
48+
49+
50+
try:
51+
PACKAGES = [
52+
'paimon_python_api',
53+
'java_based_implementation',
54+
'java_based_implementation.util'
55+
]
56+
57+
PACKAGE_DATA = {
58+
'java_based_implementation': java_based_implementation.util.setup_utils.get_package_data()
59+
}
60+
61+
install_requires = [
62+
'py4j==0.10.9.7',
63+
'python-dateutil>=2.8.0,<3',
64+
'pytz>=2018.3',
65+
'numpy>=1.22.4',
66+
'pandas>=1.3.0',
67+
'pyarrow>=5.0.0'
68+
]
69+
70+
setup(
71+
name='paimon_python',
72+
version='0.9.0.dev1',
73+
packages=PACKAGES,
74+
include_package_data=True,
75+
package_data=PACKAGE_DATA,
76+
cmdclass={'clean': CleanCommand},
77+
install_requires=install_requires,
78+
description='Apache Paimon Python API',
79+
author='Apache Software Foundation',
80+
author_email='dev@paimon.apache.org',
81+
url='https://paimon.apache.org',
82+
classifiers=[
83+
'Development Status :: 5 - Production/Stable',
84+
'License :: OSI Approved :: Apache Software License',
85+
'Programming Language :: Python :: 3.8',
86+
'Programming Language :: Python :: 3.9',
87+
'Programming Language :: Python :: 3.10',
88+
'Programming Language :: Python :: 3.11'],
89+
python_requires='>=3.8'
90+
)
91+
finally:
92+
java_based_implementation.util.setup_utils.clean()

0 commit comments

Comments
 (0)