1616# limitations under the License.
1717################################################################################
1818
19+ import os
20+ import shutil
1921import tempfile
2022import unittest
2123import pandas as pd
2224import pyarrow as pa
2325import setup_utils .java_setuputils as setuputils
2426
25- from paimon_python_java import Catalog , Table
27+ from paimon_python_api import Schema
28+ from paimon_python_java import Catalog
2629from paimon_python_java .java_gateway import get_gateway
27- from paimon_python_java .tests .utils import create_simple_table
2830from paimon_python_java .util import java_utils
2931from py4j .protocol import Py4JJavaError
3032
@@ -35,15 +37,23 @@ class TableWriteReadTest(unittest.TestCase):
3537 def setUpClass (cls ):
3638 setuputils .setup_java_bridge ()
3739 cls .warehouse = tempfile .mkdtemp ()
40+ cls .simple_pa_schema = pa .schema ([
41+ ('f0' , pa .int32 ()),
42+ ('f1' , pa .string ())
43+ ])
44+ cls .catalog = Catalog .create ({'warehouse' : cls .warehouse })
45+ cls .catalog .create_database ('default' , False )
3846
3947 @classmethod
4048 def tearDownClass (cls ):
4149 setuputils .clean ()
50+ if os .path .exists (cls .warehouse ):
51+ shutil .rmtree (cls .warehouse )
4252
4353 def testReadEmptyAppendTable (self ):
44- create_simple_table (self .warehouse , 'default' , 'empty_append_table' , False )
45- catalog = Catalog . create ({ 'warehouse' : self . warehouse } )
46- table = catalog .get_table ('default.empty_append_table' )
54+ schema = Schema (self .simple_pa_schema )
55+ self . catalog . create_table ( 'default.empty_append_table' , schema , False )
56+ table = self . catalog .get_table ('default.empty_append_table' )
4757
4858 # read data
4959 read_builder = table .new_read_builder ()
@@ -53,7 +63,10 @@ def testReadEmptyAppendTable(self):
5363 self .assertTrue (len (splits ) == 0 )
5464
5565 def testReadEmptyPkTable (self ):
56- create_simple_table (self .warehouse , 'default' , 'empty_pk_table' , True )
66+ schema = Schema (self .simple_pa_schema , primary_keys = ['f0' ], options = {'bucket' : '1' })
67+ self .catalog .create_table ('default.empty_pk_table' , schema , False )
68+
69+ # use Java API to generate data
5770 gateway = get_gateway ()
5871 j_catalog_context = java_utils .to_j_catalog_context ({'warehouse' : self .warehouse })
5972 j_catalog = gateway .jvm .CatalogFactory .createCatalog (j_catalog_context )
@@ -84,7 +97,7 @@ def testReadEmptyPkTable(self):
8497 table_commit .close ()
8598
8699 # read data
87- table = Table ( j_table , {} )
100+ table = self . catalog . get_table ( 'default.empty_pk_table' )
88101 read_builder = table .new_read_builder ()
89102 table_scan = read_builder .new_scan ()
90103 table_read = read_builder .new_read ()
@@ -98,19 +111,17 @@ def testReadEmptyPkTable(self):
98111 self .assertEqual (len (data_frames ), 0 )
99112
100113 def testWriteReadAppendTable (self ):
101- create_simple_table (self .warehouse , 'default' , 'simple_append_table' , False )
102-
103- catalog = Catalog .create ({'warehouse' : self .warehouse })
104- table = catalog .get_table ('default.simple_append_table' )
114+ schema = Schema (self .simple_pa_schema )
115+ self .catalog .create_table ('default.simple_append_table' , schema , False )
116+ table = self .catalog .get_table ('default.simple_append_table' )
105117
106118 # prepare data
107119 data = {
108120 'f0' : [1 , 2 , 3 ],
109121 'f1' : ['a' , 'b' , 'c' ],
110122 }
111123 df = pd .DataFrame (data )
112- df ['f0' ] = df ['f0' ].astype ('int32' )
113- record_batch = pa .RecordBatch .from_pandas (df )
124+ record_batch = pa .RecordBatch .from_pandas (df , schema = self .simple_pa_schema )
114125
115126 # write and commit data
116127 write_builder = table .new_batch_write_builder ()
@@ -138,13 +149,15 @@ def testWriteReadAppendTable(self):
138149 result = pd .concat (data_frames )
139150
140151 # check data (ignore index)
141- pd .testing .assert_frame_equal (result .reset_index (drop = True ), df .reset_index (drop = True ))
152+ expected = df
153+ expected ['f0' ] = df ['f0' ].astype ('int32' )
154+ pd .testing .assert_frame_equal (
155+ result .reset_index (drop = True ), expected .reset_index (drop = True ))
142156
143157 def testWriteWrongSchema (self ):
144- create_simple_table (self .warehouse , 'default' , 'test_wrong_schema' , False )
145-
146- catalog = Catalog .create ({'warehouse' : self .warehouse })
147- table = catalog .get_table ('default.test_wrong_schema' )
158+ schema = Schema (self .simple_pa_schema )
159+ self .catalog .create_table ('default.test_wrong_schema' , schema , False )
160+ table = self .catalog .get_table ('default.test_wrong_schema' )
148161
149162 data = {
150163 'f0' : [1 , 2 , 3 ],
@@ -155,7 +168,7 @@ def testWriteWrongSchema(self):
155168 ('f0' , pa .int64 ()),
156169 ('f1' , pa .string ())
157170 ])
158- record_batch = pa .RecordBatch .from_pandas (df , schema )
171+ record_batch = pa .RecordBatch .from_pandas (df , schema = schema )
159172
160173 write_builder = table .new_batch_write_builder ()
161174 table_write = write_builder .new_write ()
@@ -169,16 +182,9 @@ def testWriteWrongSchema(self):
169182\t Input schema is: [f0: Int(64, true), f1: Utf8]''' )
170183
171184 def testCannotWriteDynamicBucketTable (self ):
172- create_simple_table (
173- self .warehouse ,
174- 'default' ,
175- 'test_dynamic_bucket' ,
176- True ,
177- {'bucket' : '-1' }
178- )
179-
180- catalog = Catalog .create ({'warehouse' : self .warehouse })
181- table = catalog .get_table ('default.test_dynamic_bucket' )
185+ schema = Schema (self .simple_pa_schema , primary_keys = ['f0' ])
186+ self .catalog .create_table ('default.test_dynamic_bucket' , schema , False )
187+ table = self .catalog .get_table ('default.test_dynamic_bucket' )
182188
183189 with self .assertRaises (TypeError ) as e :
184190 table .new_batch_write_builder ()
@@ -187,9 +193,9 @@ def testCannotWriteDynamicBucketTable(self):
187193 "Doesn't support writing dynamic bucket or cross partition table." )
188194
189195 def testParallelRead (self ):
190- create_simple_table (self .warehouse , 'default' , 'test_parallel_read' , False )
191-
192196 catalog = Catalog .create ({'warehouse' : self .warehouse , 'max-workers' : '2' })
197+ schema = Schema (self .simple_pa_schema )
198+ catalog .create_table ('default.test_parallel_read' , schema , False )
193199 table = catalog .get_table ('default.test_parallel_read' )
194200
195201 # prepare data
@@ -207,8 +213,7 @@ def testParallelRead(self):
207213 expected_data ['f1' ].append (str (i * 2 ))
208214
209215 df = pd .DataFrame (data )
210- df ['f0' ] = df ['f0' ].astype ('int32' )
211- record_batch = pa .RecordBatch .from_pandas (df )
216+ record_batch = pa .RecordBatch .from_pandas (df , schema = self .simple_pa_schema )
212217
213218 # write and commit data
214219 write_builder = table .new_batch_write_builder ()
0 commit comments