3636
3737pytestmark = [
3838 pytest .mark .single_cpu ,
39- pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False ),
4039]
4140
4241
4342@pytest .mark .parametrize ("mode" , ["r" , "r+" , "a" , "w" ])
44- def test_mode (setup_path , tmp_path , mode ):
43+ def test_mode (setup_path , tmp_path , mode , using_infer_string ):
4544 df = DataFrame (
4645 np .random .default_rng (2 ).standard_normal ((10 , 4 )),
4746 columns = Index (list ("ABCD" ), dtype = object ),
@@ -90,10 +89,12 @@ def test_mode(setup_path, tmp_path, mode):
9089 read_hdf (path , "df" , mode = mode )
9190 else :
9291 result = read_hdf (path , "df" , mode = mode )
92+ if using_infer_string :
93+ df .columns = df .columns .astype ("str" )
9394 tm .assert_frame_equal (result , df )
9495
9596
96- def test_default_mode (tmp_path , setup_path ):
97+ def test_default_mode (tmp_path , setup_path , using_infer_string ):
9798 # read_hdf uses default mode
9899 df = DataFrame (
99100 np .random .default_rng (2 ).standard_normal ((10 , 4 )),
@@ -103,7 +104,10 @@ def test_default_mode(tmp_path, setup_path):
103104 path = tmp_path / setup_path
104105 df .to_hdf (path , key = "df" , mode = "w" )
105106 result = read_hdf (path , "df" )
106- tm .assert_frame_equal (result , df )
107+ expected = df .copy ()
108+ if using_infer_string :
109+ expected .columns = expected .columns .astype ("str" )
110+ tm .assert_frame_equal (result , expected )
107111
108112
109113def test_reopen_handle (tmp_path , setup_path ):
@@ -162,7 +166,7 @@ def test_reopen_handle(tmp_path, setup_path):
162166 assert not store .is_open
163167
164168
165- def test_open_args (setup_path ):
169+ def test_open_args (setup_path , using_infer_string ):
166170 with tm .ensure_clean (setup_path ) as path :
167171 df = DataFrame (
168172 1.1 * np .arange (120 ).reshape ((30 , 4 )),
@@ -177,8 +181,13 @@ def test_open_args(setup_path):
177181 store ["df" ] = df
178182 store .append ("df2" , df )
179183
180- tm .assert_frame_equal (store ["df" ], df )
181- tm .assert_frame_equal (store ["df2" ], df )
184+ expected = df .copy ()
185+ if using_infer_string :
186+ expected .index = expected .index .astype ("str" )
187+ expected .columns = expected .columns .astype ("str" )
188+
189+ tm .assert_frame_equal (store ["df" ], expected )
190+ tm .assert_frame_equal (store ["df2" ], expected )
182191
183192 store .close ()
184193
@@ -193,7 +202,7 @@ def test_flush(setup_path):
193202 store .flush (fsync = True )
194203
195204
196- def test_complibs_default_settings (tmp_path , setup_path ):
205+ def test_complibs_default_settings (tmp_path , setup_path , using_infer_string ):
197206 # GH15943
198207 df = DataFrame (
199208 1.1 * np .arange (120 ).reshape ((30 , 4 )),
@@ -206,7 +215,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
206215 tmpfile = tmp_path / setup_path
207216 df .to_hdf (tmpfile , key = "df" , complevel = 9 )
208217 result = read_hdf (tmpfile , "df" )
209- tm .assert_frame_equal (result , df )
218+ expected = df .copy ()
219+ if using_infer_string :
220+ expected .index = expected .index .astype ("str" )
221+ expected .columns = expected .columns .astype ("str" )
222+ tm .assert_frame_equal (result , expected )
210223
211224 with tables .open_file (tmpfile , mode = "r" ) as h5file :
212225 for node in h5file .walk_nodes (where = "/df" , classname = "Leaf" ):
@@ -217,7 +230,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
217230 tmpfile = tmp_path / setup_path
218231 df .to_hdf (tmpfile , key = "df" , complib = "zlib" )
219232 result = read_hdf (tmpfile , "df" )
220- tm .assert_frame_equal (result , df )
233+ expected = df .copy ()
234+ if using_infer_string :
235+ expected .index = expected .index .astype ("str" )
236+ expected .columns = expected .columns .astype ("str" )
237+ tm .assert_frame_equal (result , expected )
221238
222239 with tables .open_file (tmpfile , mode = "r" ) as h5file :
223240 for node in h5file .walk_nodes (where = "/df" , classname = "Leaf" ):
@@ -228,7 +245,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
228245 tmpfile = tmp_path / setup_path
229246 df .to_hdf (tmpfile , key = "df" )
230247 result = read_hdf (tmpfile , "df" )
231- tm .assert_frame_equal (result , df )
248+ expected = df .copy ()
249+ if using_infer_string :
250+ expected .index = expected .index .astype ("str" )
251+ expected .columns = expected .columns .astype ("str" )
252+ tm .assert_frame_equal (result , expected )
232253
233254 with tables .open_file (tmpfile , mode = "r" ) as h5file :
234255 for node in h5file .walk_nodes (where = "/df" , classname = "Leaf" ):
@@ -302,6 +323,7 @@ def test_complibs(tmp_path, lvl, lib, request):
302323 assert node .filters .complib == lib
303324
304325
326+ @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
305327@pytest .mark .skipif (
306328 not is_platform_little_endian (), reason = "reason platform is not little endian"
307329)
@@ -319,6 +341,7 @@ def test_encoding(setup_path):
319341 tm .assert_frame_equal (result , expected )
320342
321343
344+ @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
322345@pytest .mark .parametrize (
323346 "val" ,
324347 [
0 commit comments