#!/usr/bin/env python
# -*- coding: utf-8 -*-
# author:mj time:2020/7/28
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['font.sans-serif'] = ['WenQuanYi Micro Hei']
matplotlib.rcParams['axes.unicode_minus'] = False
import json
import warnings
warnings.filterwarnings('ignore')

# 数据预处理函数
def data_pre(df):
    # 填充缺失值，并更新索引
    # 现货填充
    index1 = df['现货价格'].index[np.where(df['现货价格'].isnull())[0]]
    for item in index1:
        qian = df.loc[(item - 3):(item - 1), '现货价格'].values.tolist()
        hou = df.loc[(item + 1):(item + 3), '现货价格'].values.tolist()
        df.loc[index1[0], '现货价格'] = np.mean(qian + hou)
    # 期货填充
    index2 = df['期货价格'].index[np.where(df['期货价格'].isnull())[0]]
    for item in index2:
        qian = df.loc[(item - 3):(item - 1), '期货价格'].values.tolist()
        hou = df.loc[(item + 1):(item + 3), '期货价格'].values.tolist()
        df.loc[item, '期货价格'] = np.nanmean(qian + hou)
    # 更新索引
    df = df.reset_index(drop=True)  # drop=True将之前的旧的索引删除
    return df

# 特征工程函数
def feature_processing(data, index, high_level, low_level, product_type):
    feature = []
    i = index
    feature.append(data.iloc[i, 1])  # 记录现货绝对价格
    feature.append(data.iloc[i, 2])  # 记录期货绝对价格
    describe = data.iloc[(i - 60):i, 1].describe().values
    feature.append(np.round(describe[1], 3))  # 统计指标的平均值
    feature.append(np.round(describe[2], 3))  # 统计指标的标准差
    feature.append(describe[6])  # 统计指标的75%分位
    describe1 = data.iloc[(i - 60):i, 2].describe().values
    feature.append(np.round(describe1[1]))  # 统计指标的平均值
    feature.append(np.round(describe1[2], 3))  # 统计指标的标准差
    feature.append(describe1[6])  # 统计指标的75%分位
    feature.append(data.iloc[i, 1] - data.iloc[i, 2])  # 记录现货期货基差
    if product_type in ['螺纹钢', '热卷', '铁矿石']:
        feature.append(np.round(data.iloc[(i - 29):i, 1].corr(data.iloc[(i - 29):i, 2]), 3))  # 记录现货期货相关性系数
    else:
        pass
    if (data.iloc[(i - 60), 1] - data.iloc[(i - 60), 2]) - (data.iloc[i, 1] - data.iloc[i, 2]) > 0:
        feature.append(1)
    else:
        feature.append(0)  # 记录基差走势，变弱( >0 )则为1，变强( <=0 )则为0

    if data.iloc[i, 1] - data.iloc[(i - 60), 1] > 0:
        feature.append(1)
    else:
        feature.append(0)  # 记录现价走势，价格升( >0 )则为1，否则为0

    if data.iloc[i, 2] - data.iloc[(i - 60), 2] > 0:
        feature.append(1)
    else:
        feature.append(0)  # 记录期价走势，价格升( >0 )则为1，否则为0

    # 下面是添加时间季度变量，后期需要进行特征编码的,为了将训练集和测试集都标准化为函数，这里统一人为编码
    if data.iloc[i, 0].month in [1, 2, 3]:  # 第一季度，1 0 0 0
        feature.append(1)
        feature.append(0)
        feature.append(0)
        feature.append(0)
    elif data.iloc[i, 0].month in [4, 5, 6]:  # 第二季度，0 1 0 0
        feature.append(0)
        feature.append(1)
        feature.append(0)
        feature.append(0)
    elif data.iloc[i, 0].month in [7, 8, 9]:  # 第三季度 0 0 1 0
        feature.append(0)
        feature.append(0)
        feature.append(1)
        feature.append(0)
    elif data.iloc[i, 0].month in [10, 11, 12]:  # 第四季度 0 0 0 1
        feature.append(0)
        feature.append(0)
        feature.append(0)
        feature.append(1)
    return feature

# 包含当天价格的历史价格data_excel表的表名,表中列明为：日期，现货价格，期货价格
def Model_BuyTaobao(input_path, out_path, days, product_type):
    df = pd.read_excel(input_path)
    if product_type == '螺纹钢':
        # 选择若干年，如 2017-1月以后和2010-2013，价格>3000的
        # 如果是其它品种，可能会直接用所有时间段的，不需要进行时间区间的选择
        df = df[(df.日期 > '2017-1-1') | (df.日期 > '2009-12-31') & (df.日期 <= '2013-12-31')]
        data = data_pre(df)
    elif product_type in ['热卷', '铁矿石']:
        data = data_pre(df)
    elif product_type in ['焦煤', '焦炭']:
        data = df.dropna()
        data = data.reset_index(drop=True)

    # 搜索
    guancha = {}  # 观察
    for i in range(len(data) - days):
        l1_jicha = []
        l1_jicha.append(data.iloc[i, 1] - data.iloc[i, 2])
        l1_jicha.append(data.iloc[(i + int(days / 3) - 1), 1] - data.iloc[(i + int(days / 3) - 1), 2])
        l1_jicha.append(data.iloc[(i + int(days * 2 / 3) - 1), 1] - data.iloc[(i + int(days * 2 / 3) - 1), 2])
        l1_jicha.append(data.iloc[(i + days - 1), 1] - data.iloc[(i + days - 1), 2])
        guancha[i] = (l1_jicha[0] - l1_jicha[1]) + (l1_jicha[0] - l1_jicha[2]) + (l1_jicha[0] - l1_jicha[3])
    biaoxing_list = [item for item in guancha.values() if item > 0]
    low_level = np.percentile(biaoxing_list, 30)  # 146
    high_level = np.percentile(biaoxing_list, 80)  # 535

    dict_label = {}
    for i in range(len(data) - days):
        l1_jicha = []
        l1_jicha.append(data.iloc[i, 1] - data.iloc[i, 2])
        l1_jicha.append(data.iloc[(i + int(days / 3) - 1), 1] - data.iloc[(i + int(days / 3) - 1), 2])
        l1_jicha.append(data.iloc[(i + int(days * 2 / 3) - 1), 1] - data.iloc[(i + int(days * 2 / 3) - 1), 2])
        l1_jicha.append(data.iloc[(i + days - 1), 1] - data.iloc[(i + days - 1), 2])
        panduan = (l1_jicha[0] - l1_jicha[1]) + (l1_jicha[0] - l1_jicha[2]) + (l1_jicha[0] - l1_jicha[3])
        if panduan > high_level:
            label = 3
        elif panduan >= low_level and panduan <= high_level:
            label = 2
        elif panduan > 0 and panduan < low_level:
            label = 1
        else:
            label = 0
        dict_label[i] = label  # print(sum(dict_label.values()))

    # 推演t时间之前60天的数据进行特征工程,考虑季节
    train = {}
    time_index_z = []
    for i in range(len(data)):
        if i >= 60:
            feature = feature_processing(data, i, high_level, low_level, product_type)
            train[i] = feature
            time_index_z.append(data.iloc[i, 0])
    import string
    XX = [values for key, values in train.items()]  # 用于最终放到模型中预测后绘图的
    DF_Z = pd.DataFrame(XX, columns=list(string.ascii_letters[0:len(XX[0])]), index=time_index_z)

    # 特征数据X，结果标签数据y, 用于模型训练和测试的， 长度为1658 print(len(train))
    X = []
    y = []
    time_index = []
    for key, values in train.items():
        if key in [k for k, v in dict_label.items()]:
            X.append(values)
            y.append(dict_label[key])
            time_index.append(data.iloc[key, 0])

    # 这里不需要再进行编码了，加上时间索引time_index
    df_X = pd.DataFrame(X, columns=list(string.ascii_letters[0:len(X[0])]), index=time_index)
    X = df_X.values  # print(X[0:2])print(y[0:2])

    # 随机切分训练集测试集，进行随机森林分类模型建立（随机森林比决策树好很多）
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.model_selection import train_test_split
    train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.1, random_state=1)
    # tree_model = DecisionTreeClassifier(max_depth=10, criterion='entropy') # 如果想让每次决策树的模型是一样的，需要设置参数random_state=1,
    tree_model = RandomForestClassifier(max_depth=10, criterion='entropy',random_state=1)
    tree_model.fit(train_X, train_y)
    tree_score = tree_model.score(train_X, train_y)
    tree_score1 = tree_model.score(test_X, test_y)
    print('决策树模型训练得分:', tree_score)
    print('决策树模型测试得分:', tree_score1)
    flag = '模型训练成功'
    result_Z = tree_model.predict(XX)
    DF_Z['所有最终结果'] = list(result_Z)

    # 准备绘图数据hui_data
    hui_data = data[data.日期 > '2018-12-31']
    hui_data.set_index(["日期"], inplace=True)
    tianjialie = [hui_data.iloc[i, 1] if item.strftime("%Y-%m-%d") == data.iloc[-1, 0].strftime("%Y-%m-%d") else np.nan
                  for i, item in enumerate(hui_data.index.tolist())]
    hui_data['结果'] = tianjialie
    tianjialie1 = [hui_data.iloc[i, 1] if DF_Z.loc[item]['所有最终结果'] > 0 else np.nan for i, item in
                   enumerate(hui_data.index.tolist())]
    tianjialie1[-1] = np.nan
    hui_data['历史结果'] = tianjialie1

    result = [result_Z[-1], data.iloc[-1, 0].strftime("%Y-%m-%d")]  # 当天的结果和时间
    # 绘制结果图形
    plt.figure(figsize=(8, 5))
    plt.plot(hui_data.index, hui_data['现货价格'], label='现货价格')
    plt.plot(hui_data.index, hui_data['期货价格'], label='期货价格')
    plt.xticks(rotation=30)
    def jidu(date):
        if date.month in [1, 2, 3]:
            jd = 1
        elif date.month in [4, 5, 6]:
            jd = 2
        elif date.month in [7, 8, 9]:
            jd = 3
        elif date.month in [10, 11, 12]:
            jd = 4
        return jd

    plt.title('期现买入套保模型策略（' + product_type + ')', fontsize=20)
    info = '参考要素:基差' + str(np.round(feature[8], 2)) + ',现期绝对价格' + str(np.round(feature[0], 2)) + '/' + str(
        np.round(feature[1], 3)) + ',当天为第' + str(jidu(data.iloc[-1, 0])) + '季度'
    # lim = plt.axis()
    # plt.text(lim[0] + (lim[1] - lim[0]) / 2, lim[3] + (lim[3] - lim[2]) / 14, info, ha='center', va='top', fontsize=10, c='k')
    plt.xlabel(info)
    for a, b in zip(hui_data.index, hui_data['结果']):
        if a.strftime("%Y-%m-%d") == result[1]:
            result_json = []
            if result[0] == 0:
                annatation = '不适合对' + product_type + '当前活跃合约进行买入操作'
                result.append(annatation)
                plt.plot(hui_data.index, hui_data['历史结果'], 'o', ms=2, c='r', label='适合买入套保点')
                plt.text(a, b + 15, '不买入', ha='center', va='bottom', fontsize=20, c='r')
            elif result[0] == 1:
                annatation = '适合对' + product_type + '当前活跃合约进行买入操作,买入等级1颗星,持仓时间为' + str(days) + '天'
                result.append(annatation)
                plt.plot(hui_data.index, hui_data['历史结果'], 'o', ms=2, c='r', label='适合买入套保点')
                plt.plot(hui_data.index, hui_data['结果'], 'o', ms=10, c='r')
                plt.text(a, b + 15, '买入\n*', ha='center', va='bottom', fontsize=20, c='r')
            elif result[0] == 2:
                annatation = '适合对' + product_type + '当前活跃合约进行买入操作,买入等级2颗星,持仓时间为' + str(days) + '天'
                result.append(annatation)
                plt.plot(hui_data.index, hui_data['历史结果'], 'o', ms=2, c='r', label='适合买入套保点')
                plt.plot(hui_data.index, hui_data['结果'], 'o', ms=10, c='r')
                plt.text(a, b + 15, '买入\n**', ha='center', va='bottom', fontsize=20, c='r')
            elif result[0] == 3:
                annatation = '适合对' + product_type + '当前活跃合约进行买入操作,买入等级3颗星,持仓时间为' + str(days) + '天'
                result.append(annatation)
                plt.plot(hui_data.index, hui_data['历史结果'], 'o', ms=2, c='r', label='适合买入套保点')
                plt.plot(hui_data.index, hui_data['结果'], 'o', ms=10, c='r')
                plt.text(a, b + 15, '买入\n***', ha='center', va='bottom', fontsize=20, c='r')
            plt.gcf().autofmt_xdate() 
            plt.ylabel('期/现货价格', fontsize=10)
            plt.legend(loc='upper left')
            plt.savefig(out_path + '/tksmr.png', dpi=100)
            #plt.show()
            result_json.append(result[1] + result[2])  # 类似2020-04-23不适合对XX当前活跃合约进行买入操作
            result_json.append(info)
            with open(out_path + '/' + product_type + '买入套保推荐结果.json', 'w', encoding='utf-8') as f:
                json.dump(result_json, f, ensure_ascii=False)
            result_output = result[1] + result[2]
    return result_output

if __name__ == "__main__":
    # 买入套保模型
    days = 30  # 持仓时间
    product_type = '铁矿石'   # 螺纹钢 热卷  铁矿石 焦煤 焦炭
    input_path = "/www/wwwroot/file.jsfsdata.com/tks_mr.xls"  # 输入数据相对路径，指向当前目录的file目录
    out_path = "/www/wwwroot/file.jsfsdata.com/rs"  # 输出json的保存相对路径，指向当前目录的file目录
    result_output = Model_BuyTaobao(input_path, out_path, days, product_type)
    print(result_output)