From 9a7bf20d049d63b15f81438ab1f6d30f9a99c490 Mon Sep 17 00:00:00 2001 From: teancake Date: Wed, 6 Mar 2024 23:11:59 +0800 Subject: [PATCH] 1. Pandas.Panel is deprecated since version 0.20.0 and remove from 0.25.0, replace with DataFrame. 2. Swap months_start and months_end in create_month_tdays_begin_end method. 3. Tested with pandas 2.0.2 on Python 3.10 --- raw_data_fetch.py | 18 +++++++++--------- single_factor_test.py | 11 ++++------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/raw_data_fetch.py b/raw_data_fetch.py index bcf906d..52964d0 100644 --- a/raw_data_fetch.py +++ b/raw_data_fetch.py @@ -155,8 +155,8 @@ def create_indicator_m_by_q(self, raw_data_dir, raw_data_field, indicator_name, del dat['Unnamed: 0'] panel[d] = dat print(d) - datpanel = pd.Panel(panel) - datpanel = datpanel.to_frame().stack().unstack(level=(0,1)) #貌似某些情况下会有BUG,有索引但是没数据 + datpanel = pd.concat(panel, axis=0) + datpanel = datpanel.stack().unstack(level=(1, -1)) #开始计算结果指标(月频),在每个时间截面逐个处理每只股票 df = pd.DataFrame(index=all_stocks_info.index, columns=mdays) for d in df.columns: #每月最后一天 @@ -191,6 +191,7 @@ def create_indicator_m_by_q_ex(self, raw_data_dir, raw_data_field, indicator_nam del dat['Unnamed: 0'] panel[d] = dat print(d) + # no access to wind, no way to test, thus not changed. datpanel = pd.Panel(panel) datpanel = datpanel.swapaxes(0, 1) #开始计算结果指标(月频),在每个时间截面逐个处理每只股票 @@ -655,8 +656,8 @@ def create_month_tdays_begin_end(self, latest_month_end_tradeday=None): ''' 每月第一个和最后一个交易日映射 ''' tdays = self.tradedays - months_start = tdays[0:1] + list(after_d for before_d, after_d in zip(tdays[:-1], tdays[1:]) if before_d.month != after_d.month) - months_end = list(before_d for before_d, after_d in zip(tdays[:-1], tdays[1:]) if before_d.month != after_d.month) + tdays[-1:] + months_end = tdays[0:1] + list(after_d for before_d, after_d in zip(tdays[:-1], tdays[1:]) if before_d.month != after_d.month) + months_start = list(before_d for before_d, after_d in zip(tdays[:-1], tdays[1:]) if before_d.month != after_d.month) + tdays[-1:] if latest_month_end_tradeday is None: latest_month_end_tradeday = self.month_map.index[-1] if months_end[-1] > latest_month_end_tradeday: @@ -929,9 +930,8 @@ def create_pcf_ocf_ttm_m(self): del dat['Unnamed: 0'] panel[d] = dat print(d) - panel = pd.Panel(panel) - panel = panel.to_frame() - panel = panel.stack().unstack(level=(0,1)) + panel = pd.concat(panel, axis=0) + panel = panel.stack().unstack(level=(1, -1)) #------------------------------------------------------- #开始计算结果指标(月频) df_result = pd.DataFrame(index=all_stocks_info.index, columns=new_caldays) @@ -1001,8 +1001,8 @@ def create_profit_ttm_G_m(self): del dat['Unnamed: 0'] panel[d] = dat print(d) - panel = pd.Panel(panel) - panel = panel.to_frame() + panel = pd.concat(panel, axis=0) + panel = panel.stack().unstack(level=(1, -1)) ''' 2009-03-31 2009-06-30 2009-09-30 2009-12-31 2010-03-31 2010-06-30 2010-09-30 2010-12-31 2011-03-31 2011-06-30 2011-09-30 2011-12-31 2012-03-31 2012-06-30 2012-09-30 2012-12-31 2013-03-31 2013-06-30 2013-09-30 2013-12-31 2014-03-31 2014-06-30 2014-09-30 2014-12-31 2015-03-31 2015-06-30 2015-09-30 2015-12-31 2016-03-31 2016-06-30 2016-09-30 2016-12-31 2017-03-31 2017-06-30 2017-09-30 2017-12-31 2018-03-31 2018-06-30 2018-09-30 2018-12-31 2019-03-31 2019-06-30 2019-09-30 2019-12-31 major minor diff --git a/single_factor_test.py b/single_factor_test.py index 9a0fb19..52eab1c 100644 --- a/single_factor_test.py +++ b/single_factor_test.py @@ -198,9 +198,7 @@ def test_yearly(factors=None, start_year=2012, end_year=2019): df.to_csv(os.path.join(sf_test_save_path, save_name+'.csv'), encoding='gbk') #存储检验结果表格 - test_result = pd.Panel(test_result) - test_result = test_result.swapaxes(2, 0) - test_result = test_result.swapaxes(1, 2) + test_result = pd.concat(test_result) test_result.to_excel(os.path.join(sf_test_save_path, 'T检验&IC检验结果.xlsx'), encoding='gbk') #绘制单因子检验图,并进行存储 @@ -676,8 +674,8 @@ def get_stock_weight(self, equal_weight=True): dates = self.factor_data.columns stk_weights = [self.get_stock_weight_by_group(self.factor_data[date], equal_weight) for date in dates] result = {date: stk_weight for date, stk_weight in zip(dates, stk_weights)} - result = pd.Panel.from_dict(result) - result = [result.minor_xs(group) for group in result.minor_axis] + result = pd.concat(result) + result = [result[col].unstack(level=0) for col in result.columns] return result def get_stock_weight_by_group(self, factor, equal_weight=False): @@ -749,8 +747,7 @@ def panel_to_matrix(factors, factor_path=factor_path, save_path=sf_test_save_pat date = pd.to_datetime(f.split('.')[0]) datpanel[date] = datdf[factors] - datpanel = pd.Panel(datpanel) - datpanel = datpanel.swapaxes(0, 2) + datpanel = pd.concat(datpanel) for factor in datpanel.items: dat = datpanel.loc[factor] save_name = factor.replace('/', '_div_') if '/' in factor else factor