# 台股選股因子實測(2018~) — 本文「五單因子 vs 0050」所有數字的可追溯來源
# 環境:finlab(conda)。對照基準 0050 一律用 etl:adj_close(含息,品質引擎鐵則)。
# 跑法:conda activate finlab && python factor-comparison.py
import warnings; warnings.filterwarnings("ignore")
import numpy as np, pandas as pd
from finlab import data
from finlab.backtest import sim

close = data.get('price:收盤價')
adj   = data.get('etl:adj_close')
pe    = data.get('price_earning_ratio:本益比')
pb    = data.get('price_earning_ratio:股價淨值比')
roe   = data.get('fundamental_features:ROE稅後').index_str_to_date().reindex(close.index, method='ffill')
rev   = data.get('monthly_revenue:去年同月增減(%)').reindex(close.index, method='ffill')
START = '2018-01-01'

# 0050 含息 buy & hold(benchmark)
e = adj['0050'].dropna(); e = e[e.index >= START]
yrs = (e.index[-1] - e.index[0]).days / 365.25
cagr0050 = (e.iloc[-1] / e.iloc[0]) ** (1 / yrs) - 1
ret0050 = e.pct_change().dropna()
sharpe0050 = ret0050.mean() / ret0050.std() * (252 ** 0.5)
mdd0050 = ((e / e.cummax()) - 1).min()

def topN(df, n=30, largest=True, lo=None, hi=None):
    d = pd.DataFrame(df).astype(float)
    if lo is not None: d = d.where(d > lo)
    if hi is not None: d = d.where(d < hi)
    return d.rank(axis=1, ascending=not largest) <= n

def run(name, pos):
    s = sim(pos[pos.index >= START], resample='M', upload=False).get_stats()
    print(f"{name:8s} CAGR={s['cagr']:.4f} Sharpe={s.get('daily_sharpe'):.4f} MDD={s['max_drawdown']:.4f}")

print("== 五單因子(各選 30 檔、月頻換股、2018~)==")
run('低本益比',  topN(pe, 30, False, 0, 100))
run('低股價淨值比', topN(pb, 30, False, 0, 10))
run('高ROE',    topN(roe, 30, True))
run('價格動能',  topN(close / close.shift(60) - 1, 30, True))
run('營收動能',  topN(rev, 30, True, 0))
print(f"\n0050(含息) CAGR={cagr0050:.4f} Sharpe={sharpe0050:.4f} MDD={mdd0050:.4f}")
