CTA_Factor_Analysis/runtime/data_clean.py

59 lines
2.1 KiB
Python

import importlib
import inspect
from copy import copy
import pandas as pd
import colorama
import settings
from rich.progress import track
from rich import print
from common.helpers import create_dir_not_exist
klines_folder_path = '../data/klines/'
start_date = pd.to_datetime(settings.start_time)
end_date = pd.to_datetime(settings.end_time)
filename = klines_folder_path + settings.symbol + '-USDT.csv'
colorama.init(autoreset=True)
df = pd.read_csv(filename, encoding='gbk', skiprows=1, parse_dates=['candle_begin_time'])
df['candle_begin_time'] = df['candle_begin_time'] + pd.Timedelta(hours=1) - pd.Timedelta(seconds=1)
df.rename(columns={'candle_begin_time': 'timestamp'}, inplace=True)
df.set_index('timestamp', inplace=True)
df.sort_index(inplace=True)
ret = df['close'].pct_change(settings.signal_hold_hours).shift(-settings.signal_hold_hours).dropna(how='all').to_frame()
df = df[start_date:end_date]
ret = ret[start_date:end_date]
ret.rename(columns={'close': 'ret'}, inplace=True)
factor_implement = tuple()
module = importlib.import_module(f'factors.{settings.factor_name}')
for name, obj in inspect.getmembers(module):
if inspect.isfunction(obj) and obj.__module__ == f"factors.{settings.factor_name}":
if name != 'param_traversal':
factor_implement = (name, obj)
factor_params_list = getattr(module, 'param_traversal')()
create_dir_not_exist(f'../cache/{settings.factor_name}')
create_dir_not_exist(f'../cache/{settings.factor_name}/{settings.symbol}')
ret.to_pickle(f'../cache/{settings.factor_name}/ret.pkl')
print(len(factor_params_list))
for i in track(factor_params_list, description='[green]逐参数计算因子暴露度...', total=len(factor_params_list)):
factor_name = factor_implement[0]
implementation = factor_implement[1]
factor_value = implementation(copy(df), *i)
factor_value = factor_value.dropna(axis=0, how='all')
factor_value = factor_value.to_frame()
factor_value.columns = ['factorVal']
_file_name = f"{factor_name}_{i}.pkl"
factor_value.to_pickle(f'../cache/{settings.factor_name}/{settings.symbol}/{_file_name}')