CTA_Factor_Analysis/runtime/analysis.py

import os
import re

import colorama
import pandas as pd
from rich.progress import track

import settings
import common.helpers as helpers

if __name__ == '__main__':
    colorama.init(autoreset=True)
    ret = pd.read_pickle(f"../cache/{settings.factor_name}/ret.pkl")
    filenames = [f for f in os.listdir(f"../cache/{settings.factor_name}/{settings.symbol}") if
                 f.endswith('.pkl') and re.sub(r'(?<!^)([A-Z])', r'_\1', settings.factor_name).lower() in f]

    ic_performance_collection = pd.DataFrame()
    rolling_ic_df_collection = pd.DataFrame()
    factor_dict = {}
    for _filename in track(filenames, description="[green]整理因子数据和收益率数据,逐参数IC检验"):
        _factor_value = pd.read_pickle(f"../cache/{settings.factor_name}/{settings.symbol}/{_filename}")
        factor_name = _filename.replace('.pkl', '')
        factor_dict[factor_name] = _factor_value
        _ret = ret.copy()
        _ret = _ret.loc[_factor_value.index[0]:_factor_value.index[-1]]
        _rolling_ic_df = helpers.fast_ts_rolling_IC(_factor_value, _ret, factor_name, 24 * 30)
        _rolling_ic_mean = _rolling_ic_df.mean().iloc[0]
        _rolling_ic_std = _rolling_ic_df.std().iloc[0]
        report = {
            'name': factor_name,
            'rolling IC mean': round(_rolling_ic_mean, 4),
            'rolling IC std': round(_rolling_ic_std, 4),
            'rolling IR': round(_rolling_ic_mean / _rolling_ic_std, 4)
        }
        ic_performance = pd.DataFrame([report])
        ic_performance_collection = pd.concat([ic_performance_collection, ic_performance])
        rolling_ic_df_collection[factor_name] = _rolling_ic_df
        helpers.factor_exposure_hist(_factor_value, factor_name, 100)

    # 提取参数
    ic_performance_collection["parameter"] = ic_performance_collection["name"].apply(
        lambda x: int(re.search(r"\d+", x).group()))
    # 按参数排序
    ic_performance_collection = ic_performance_collection.sort_values("parameter")
    parameter_ic_df = ic_performance_collection[['parameter', 'rolling IC mean']].copy()
    helpers.parameters_IC_figure(parameter_ic_df, "rolling IC mean")
    del ic_performance_collection['parameter']

    # ic_performance_collection = ic_performance_collection[ic_performance_collection['IC'].abs() >= 0.05]
    # if ic_performance_collection.empty:
    #     print("未有通过因子IC检验的参数")
    #     exit()
    ic_performance_collection['IC abs'] = ic_performance_collection['rolling IC mean'].abs()
    ic_performance_collection.sort_values('IC abs', ascending=False, inplace=True)
    del ic_performance_collection['IC abs']
    ic_performance_collection = ic_performance_collection[ic_performance_collection['rolling IC mean'].abs() >= 0.05]
    print(ic_performance_collection)
    rolling_ic_df_collection = rolling_ic_df_collection[ic_performance_collection['name'].tolist()]
    helpers.cum_ic_merge_figure_v2(rolling_ic_df_collection)
    for i in ic_performance_collection['name'].tolist():
        _factor_value = factor_dict[i]
        _ret = ret.copy()
        # helpers.cta_ts_group_by_ret(_factor_value, _ret, f"{i}_{settings.symbol}", ic_performance_collection[ic_performance_collection['name']==i]['rolling IC mean'].values[0], 5)
        helpers.cta_ts_group_v2(_factor_value, _ret, f"{i}_{settings.symbol}", ic_performance_collection[ic_performance_collection['name'] == i]['rolling IC mean'].values[0], 5)