CTA_Factor_Analysis/runtime/analysis.py

65 lines
3.4 KiB
Python

import os
import re
import colorama
import pandas as pd
from rich.progress import track
import settings
import common.helpers as helpers
if __name__ == '__main__':
colorama.init(autoreset=True)
ret = pd.read_pickle(f"../cache/{settings.factor_name}/ret.pkl")
filenames = [f for f in os.listdir(f"../cache/{settings.factor_name}/{settings.symbol}") if
f.endswith('.pkl') and re.sub(r'(?<!^)([A-Z])', r'_\1', settings.factor_name).lower() in f]
ic_performance_collection = pd.DataFrame()
rolling_ic_df_collection = pd.DataFrame()
factor_dict = {}
for _filename in track(filenames, description="[green]整理因子数据和收益率数据,逐参数IC检验"):
_factor_value = pd.read_pickle(f"../cache/{settings.factor_name}/{settings.symbol}/{_filename}")
factor_name = _filename.replace('.pkl', '')
factor_dict[factor_name] = _factor_value
_ret = ret.copy()
_ret = _ret.loc[_factor_value.index[0]:_factor_value.index[-1]]
_rolling_ic_df = helpers.fast_ts_rolling_IC(_factor_value, _ret, factor_name, 24 * 30)
_rolling_ic_mean = _rolling_ic_df.mean().iloc[0]
_rolling_ic_std = _rolling_ic_df.std().iloc[0]
report = {
'name': factor_name,
'rolling IC mean': round(_rolling_ic_mean, 4),
'rolling IC std': round(_rolling_ic_std, 4),
'rolling IR': round(_rolling_ic_mean / _rolling_ic_std, 4)
}
ic_performance = pd.DataFrame([report])
ic_performance_collection = pd.concat([ic_performance_collection, ic_performance])
rolling_ic_df_collection[factor_name] = _rolling_ic_df
helpers.factor_exposure_hist(_factor_value, factor_name, 100)
# 提取参数
ic_performance_collection["parameter"] = ic_performance_collection["name"].apply(
lambda x: int(re.search(r"\d+", x).group()))
# 按参数排序
ic_performance_collection = ic_performance_collection.sort_values("parameter")
parameter_ic_df = ic_performance_collection[['parameter', 'rolling IC mean']].copy()
helpers.parameters_IC_figure(parameter_ic_df, "rolling IC mean")
del ic_performance_collection['parameter']
# ic_performance_collection = ic_performance_collection[ic_performance_collection['IC'].abs() >= 0.05]
# if ic_performance_collection.empty:
# print("未有通过因子IC检验的参数")
# exit()
ic_performance_collection['IC abs'] = ic_performance_collection['rolling IC mean'].abs()
ic_performance_collection.sort_values('IC abs', ascending=False, inplace=True)
del ic_performance_collection['IC abs']
ic_performance_collection = ic_performance_collection[ic_performance_collection['rolling IC mean'].abs() >= 0.05]
print(ic_performance_collection)
rolling_ic_df_collection = rolling_ic_df_collection[ic_performance_collection['name'].tolist()]
helpers.cum_ic_merge_figure_v2(rolling_ic_df_collection)
for i in ic_performance_collection['name'].tolist():
_factor_value = factor_dict[i]
_ret = ret.copy()
# helpers.cta_ts_group_by_ret(_factor_value, _ret, f"{i}_{settings.symbol}", ic_performance_collection[ic_performance_collection['name']==i]['rolling IC mean'].values[0], 5)
helpers.cta_ts_group_v2(_factor_value, _ret, f"{i}_{settings.symbol}", ic_performance_collection[ic_performance_collection['name'] == i]['rolling IC mean'].values[0], 5)