import os import sys os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import tensorflow as tf import numpy as np import matplotlib.animation as animation import matplotlib.pyplot as plt import seaborn as sns import pandas as pd from sklearn.preprocessing import MinMaxScaler from datetime import datetime from datetime import timedelta from tqdm import tqdm sns.set() import json tf.compat.v1.random.set_random_seed(1234) from matplotlib import style # import matplotlib.backends.backend_qt5agg # %matplotlib qt style.use('ggplot') import math import yfinance as yf import time from datetime import date, timedelta os.environ["CUDA_VISIBLE_DEVICES"] = "-1" tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) import cProfile import pstats # In[34]: symbols=os.listdir() symbols tf.compat.v1.disable_eager_execution() # INITIAL VARS test_size = 14 simulation_size = 1 # MODEL VARS num_layers = 2 size_layer = 128 timestamp = 7 epoch = 20 dropout_rate = 0.8 prediction_gap = sys.argv[2] future_day = test_size learning_rate = 0.01 graph_loss = [] # In[35]: # Necessary Dirs # def date_manage(date1,date2=None): # if date2 is None: # date2=date1+timedelta(days=365) # date_col=[] # for n in range(int ((date2 - date1).days)+1): # date_col.append(date1 + timedelta(n)) # weekdays = [5,6] # date_result=[] # for dt in date_col: # if dt.weekday() not in weekdays: # dt.strftime("%Y-%m-%d") # return date_result # In[36]: def loss_animate(ax, i): json_loss = pd.DataFrame(total_loss) ax.plot(i) return ax def loader(symbol,test_size,date): # dateparse = lambda dates : pd.datetime.strptime(dates,'%Y-%m') # df = pd.read_csv('../dataset/IBMCUT.csv',parse_dates=['Date'], index_col = 'Date', date_parser=dateparse) df=yf.Ticker(symbol) # df=df.history(period="1y",interval="1d") # df=df.history(start=date-timedelta(days=365),end=date,interval="1d") df=df.history(start=date-timedelta(days=365*3),end=date,interval="1d") df=df.reset_index(level=0) # df=df.drop(columns=['Dividends'], axis=1) df=df.drop(columns=['Stock Splits'], axis=1) df['Up'] = df['High'].ewm(span=6,adjust=False).mean() + 2* df['High'].rolling(window=6).std() df['Down']= df['Low'].ewm(span=8,adjust=False).mean() - 2* df['Low'].rolling(window=8).std() df=df.dropna() df=df.drop(df.tail(5).index) date_ori = pd.to_datetime(df.iloc[:, 0]).tolist() # for i in range(test_size): # #date_ori.append(date_ori[-1] + timedelta(days = 1)) # add=1 # while ((date_ori[-1]) + timedelta(days = add)).weekday() in [5,6]: # add=add+1 # date_ori.append(date_ori[-1] + timedelta(days = add)) date_ori = pd.Series(date_ori).dt.strftime(date_format = '%Y-%m-%d').tolist() print(len(df),len(date_ori)) return df,date_ori def trueloader(symbol,test_size,date): # df2 = pd.read_csv(symbol) # print("LENDF2:",len(df2)) df2 = yf.Ticker(symbol) # df2 = df2.history(start=date-timedelta(days=365),end=date,interval="1d") df2 = df2.history(start=date-timedelta(days=365*3),end=date,interval="1d") df2 = df2.reset_index(level=0) df2 = df2.drop(columns=['Dividends'], axis=1) df2 = df2.drop(columns=['Stock Splits'], axis=1) df2 = df2.drop(df2.head(7).index) # df2 = df2.drop(df2.tail(test_size).index) return df2 # In[38]: def preproc(df): minmax = MinMaxScaler().fit(df.iloc[:,1:9].astype('float32')) # Close, Volume, and all df_log = minmax.transform(df.iloc[:, 1:9].astype('float32')) # Close, Volume, and all df_log = pd.DataFrame(df_log) df_log.head() return df_log,minmax # In[39]: class Model: def __init__( self, learning_rate, num_layers, size, size_layer, output_size, forget_bias = 0.1, ): def lstm_cell(size_layer): # print("ASDasdasdasd",len(tf.compat.v1.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple = False)) return tf.compat.v1.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple = False) rnn_cells = tf.compat.v1.nn.rnn_cell.MultiRNNCell( [lstm_cell(size_layer) for _ in range(num_layers)], state_is_tuple = False, ) self.X = tf.compat.v1.placeholder(tf.float32, (None, None, size)) self.Y = tf.compat.v1.placeholder(tf.float32, (None, output_size)) # self.X = tf.keras.Input((None, size),dtype=tf.float32) # self.Y = tf.keras.Input((output_size),dtype=tf.float32) drop = tf.compat.v1.nn.rnn_cell.DropoutWrapper( rnn_cells, output_keep_prob = forget_bias ) # print("XXXX:",self.X) # print("XXXX:",self.X.shape) # print("XXXX:",self.Y) # print("XXXX:",self.Y.shape) #print("LOOOASDSDASD") self.hidden_layer = tf.compat.v1.placeholder( tf.float32, (None, num_layers * 2 * size_layer) ) self.outputs, self.last_state = tf.compat.v1.nn.dynamic_rnn( drop, self.X, initial_state = self.hidden_layer, dtype = tf.float32 ) #rint("INIDIA",self.outputs) self.logits = tf.compat.v1.layers.dense(self.outputs[-1], output_size) self.cost = tf.reduce_mean(tf.square(self.Y - self.logits)) self.optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate).minimize( self.cost ) #print("cost:",self.cost) #print("cost:",self.optimizer) def calculate_accuracy(real, predict): real = np.array(real) + 1 predict = np.array(predict) + 1 percentage = 1 - np.sqrt(np.mean(np.square((real - predict) / real))) return percentage * 100 def anchor(signal, weight): buffer = [] last = signal[0] for i in signal: smoothed_val = last * weight + (1 - weight) * i buffer.append(smoothed_val) last = smoothed_val return buffer # In[40]: def main_train(df_beta, df_train, df, minmax): modelnn = Model( learning_rate, num_layers, df_beta.shape[1], size_layer, df_beta.shape[1], dropout_rate ) sess = tf.compat.v1.Session() sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(intra_op_parallelism_threads=64,inter_op_parallelism_threads=64)) sess.run(tf.compat.v1.global_variables_initializer()) pbar = tqdm(range(10), desc = 'Main train loop') # Default 500 range for i in pbar: init_value = np.zeros((1, num_layers * 2 * size_layer)) total_loss, total_acc = [], [] print("Degugging : ") for k in range(0, df_train.shape[0] - 1, timestamp): index = min(k + timestamp, df_train.shape[0] - 1) print(index) batch_x = np.expand_dims( df_train.iloc[k : index, :].values, axis = 0 ) batch_y = df_train.iloc[k + 1 : index + 1, :].values #print("BATCH_X:",batch_x) #print("BATCH_Y:",batch_y) logits, last_state,__,loss = sess.run( [modelnn.logits, modelnn.last_state,modelnn.optimizer, modelnn.cost], feed_dict = { modelnn.X: batch_x, modelnn.Y: batch_y, modelnn.hidden_layer: init_value, }, ) init_value = last_state total_loss.append(loss) total_acc.append(calculate_accuracy(batch_y[:, 0], logits[:, 0])) # json_loss.to_json("./loss.json") graph_loss.append(np.mean(total_loss)) #np.save(loss_file, np.array(graph_loss)) pbar.set_postfix(cost = np.mean(total_loss), min_acc = np.min(total_acc), mean_acc=np.mean(total_acc)) def forecast(df_beta,df_train,df,minmax): # print("DF_BETA:",df_beta) # print("DF_TRAIN:",df_train) # tf.compat.v1.variable_scope("AAA", reuse=True) tf.compat.v1.reset_default_graph() modelnn = Model( learning_rate, num_layers, df_beta.shape[1], size_layer, df_beta.shape[1], dropout_rate ) # print("MODELX: ",modelnn.X) # print("MODELY: ",modelnn.Y) # print("MODELLayer: ",modelnn.hidden_layer) sess = tf.compat.v1.Session() sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(intra_op_parallelism_threads=64,inter_op_parallelism_threads=64)) sess.run(tf.compat.v1.global_variables_initializer()) date_ori = pd.to_datetime(df.iloc[:, 0]).tolist() # print("INI___!:",df_train.shape[0] - 1, timestamp) # print(df_train.shape[0]) pbar = tqdm(range(epoch), desc = 'train loop') for i in pbar: # init_value = np.zeros((1, num_layers * 2 * size_layer)) total_loss, total_acc = [], [] print("Degugging : ") for k in range(0, df_train.shape[0] - 1, timestamp): init_value = np.zeros((1, num_layers * 2 * size_layer)) index = min(k + timestamp, df_train.shape[0] - 1) # print(index) batch_x = np.expand_dims( df_train.iloc[k : index, :].values, axis = 0 ) batch_y = df_train.iloc[k + 1 : index + 1, :].values print("BATCH_X:",batch_x) print("BATCH_Y:",batch_y) logits, last_state,__,loss = sess.run( [modelnn.logits, modelnn.last_state,modelnn.optimizer, modelnn.cost], feed_dict = { modelnn.X: batch_x, modelnn.Y: batch_y, modelnn.hidden_layer: init_value, }, ) # print("BATCHX:",batch_x) # print("MODELX: ",modelnn.X) # print("MODELY: ",modelnn.Y) # print("MODELLayer: ",modelnn.hidden_layer) # print("OUTSSS:",len(outs)) # print("opt:",opt) # print("outs1",batch_x[0]) # print("outs2",outs[1]) # print("outs3",outs[2]) # print("outs4",outs[3]) # input() init_value = last_state total_loss.append(loss) #print("LOGITS:",logits[:, 0]) total_acc.append(calculate_accuracy(batch_y[:, 0], logits[:, 0])) # json_loss.to_json("./loss.json") graph_loss.append(np.mean(total_loss)) #np.save(loss_file, np.array(graph_loss)) pbar.set_postfix(cost = np.mean(total_loss), min_acc = np.min(total_acc), mean_acc=np.mean(total_acc)) future_day = test_size output_predict = np.zeros((df_train.shape[0] + future_day, df_train.shape[1])) output_predict[0] = df_train.iloc[0] upper_b = (df_train.shape[0] // timestamp) * timestamp init_value = np.zeros((1, num_layers * 2 * size_layer)) for k in range(0, (df_train.shape[0] // timestamp) * timestamp, timestamp): out_logits, last_state = sess.run( [modelnn.logits, modelnn.last_state], feed_dict = { modelnn.X: np.expand_dims( df_train.iloc[k : k + timestamp], axis = 0 ), modelnn.hidden_layer: init_value, }, ) init_value = last_state output_predict[k + 1 : k + timestamp + 1] = out_logits if upper_b != df_train.shape[0]: out_logits, last_state = sess.run( [modelnn.logits, modelnn.last_state], feed_dict = { modelnn.X: np.expand_dims(df_train.iloc[upper_b:], axis = 0), modelnn.hidden_layer: init_value, }, ) output_predict[upper_b + 1 : df_train.shape[0] + 1] = out_logits future_day -= 1 date_ori.append(date_ori[-1] + timedelta(days = 1)) init_value = last_state for i in range(future_day): o = output_predict[-future_day - timestamp + i:-future_day + i] out_logits, last_state = sess.run( [modelnn.logits, modelnn.last_state], feed_dict = { modelnn.X: np.expand_dims(o, axis = 0), modelnn.hidden_layer: init_value, }, ) init_value = last_state output_predict[-future_day + i] = out_logits[-1] date_ori.append(date_ori[-1] + timedelta(days = 1)) output_predict = minmax.inverse_transform(output_predict) deep_future = anchor(output_predict[:, 0], 0.3) sess.close() sess.__del__() return deep_future # In[41]: def newaccuration(accepted_results,truetrend): hasilutama=0 indexbagus=0 truest=0 predictest=0 for i,x in enumerate(accepted_results): a=x[-(test_size+2):] #a=x[:((test_size+2)/2)] print("a",a) b=truetrend[-(test_size+1):] #print("b",b) hasil=0 true=[] predict=[] for xy in range(1,len((a))): if a[xy] hasilutama: hasilutama=hasil indexbagus=i truest=true predictest=predict salah=[] for xz in range(len(truest)): if truest[xz]!=predictest[xz]: salah.append(xz) # if xz!=0: # salah.append(xz-1) # print("INI:",b) print("TRUEST",truest) print("predictest",predictest) return hasilutama,indexbagus,salah # In[42]: def betaforecast(simulationsize,dfx,dftrain,df,df2,minmax): results = [] for i in range(simulationsize): forecast_res = forecast(df,dftrain,dfx,minmax) results.append(forecast_res) accepted_results = [] while not (np.array(results[0][-test_size:]) < np.min(dfx['Close'])).sum() == 0 and (np.array(results[0][-test_size:]) > np.max(dfx['Close']) * 2).sum() == 0: print("++++++++++++++++++++++++") print("Forecast Recalled...") results[0]=forecast(df,dftrain,dfx,minmax) return results[0] # In[43]: def interval(p1,p2): return abs((p1) - (p2)) # In[44]: def checkaccuracy2(true): avg=[] for x in range(len(true)-7): avg.append(interval(true[x],true[x+1])) average=sum(avg) / len(avg) return average # In[45]: def checkaccuracy(predict,true,filterx, test_size): print("True Length: ",len(true)) print("Predict Length: ",len(predict)) # avg=[] # for x in range(len(true)-5): # avg.append(interval(true[x],predict[x])) # average=sum(avg) / len(avg) # print("AVG1:",average) # print("AVG2:",threshold) temp_predict=predict[-test_size:] temp_true=true[-test_size:] # avg2=interval(max(predict),min(predict)) count=0 print("------------------------------------") for x in range(test_size): # acc_var1 = temp_true[x]-(1/filterx*temp_true[x]) acc_var1 = temp_true[x]-(filterx/10) acc_var2 = temp_predict[x] # acc_var3 = temp_true[x]+(1/filterx*temp_true[x]) acc_var3 = temp_true[x]+(filterx/10) acc_condition = acc_var1 <= acc_var2 <= acc_var3 # print("Var 1 : ",acc_var1) # print("Var 2 : ",acc_var2) # print("Var 3 : ",acc_var3) # print("Day "+str(x+1)+" "+str(int(acc_var1))+" "+str(int(acc_var2))+" "+str(int(acc_var3))+" : ",acc_condition) print("Day "+str(x+1)+", Price : "+str(int(temp_true[x]))+" ,Gap = "+str(int(abs(temp_predict[x]-temp_true[x])))+" : ",acc_condition) if (acc_condition): count=count+1 print("------------------------------------") if count>7: print("Result True") return True else: print("Result False") return False # if average>threshold: # return False # else: # return True # In[46]: def findthreshold(simulationsize,dfx,dftrain,df,df2,minmax): results=[] for i in range(simulationsize): results.append(forecast(df,dftrain,dfx,minmax)) accepted_results = [] for r in results: if (np.array(r[-test_size:]) < np.min(dfx['Close'])).sum() == 0 and (np.array(r[-test_size:]) > np.max(dfx['Close']) * 2).sum() == 0: accepted_results.append(r) finalavg=999999 for o in accepted_results: avg=[] for x in range(len(o)-5): avg.append(interval(o[x],df2[x])) average=sum(avg) / len(avg) if average<=finalavg: finalavg=average return finalavg def temp_data(date, xi, resultfinal, df2, date_col,x): print("Called . . . ") if os.path.isdir("TempData/") == False: os.mkdir("TempData/") if os.path.isdir("TempData/%s"%x) == False: os.mkdir("TempData/%s"%x) if os.path.isdir("TempData/%s/"%x+str(date)) == False: os.mkdir("TempData/%s/"%x+str(date)) with open("TempData/%s/"%x+str(date)+"/"+x+str(xi)+".vezpal2","w+") as oop: main=[] main.append(resultfinal) # prediction main.append(list(df2['Close'])) main.append(date_col) # main.append(3) # main.append([0]) json.dump(main,oop) def automaton(simulationsize,date): # symbols=["AAPL"] symbols = sys.argv[1] times=[] x=symbols # for x in symbols: temp_time=[] temp_time.append(x) counter=0 validity=0 df,date_col=loader(x,test_size,date) # print(type(df)) dfx=df # print("ASDSAD") df2=trueloader(x,test_size,date) df,minmax=preproc(df) dftrain=df wrong=[1,2,3,4,5] # avg=checkaccuracy2(list(df2["Close"])) # start=time.time() # avg=findthreshold(50,dfx,dftrain,df,list(df2["Close"]),minmax) # temp_time.append(time.time()-start) start=time.time() filterx = int(prediction_gap) able=False print("============== || Initial Train || =============") main_train(df,dftrain,dfx,minmax) for xi in range(5): decision=False while (decision==False): print() print("====== [ Foreacasting Attempt : "+str(counter+1)+" ] ===========") print("====== [ Progress : "+str(xi)+"/5 ] ") resultfinal=betaforecast(simulationsize,dfx,dftrain,df,df2,minmax) # validity=valid decision=checkaccuracy(resultfinal,list(df2["Close"]),filterx, test_size) # wrong=invalid if decision==True: able=True print("ABLE") print(str(filterx)) if counter > 10 and decision != True: counter = 0 filterx=filterx+10 print("Filter X new value : "+str(filterx)) print("Decision Status : ", decision) print("**************************************") # avg=avg+(1/3*avg) if filterx>1000: print("====== [ GG, we gave up] =====") continue counter=counter+1 temp_data(date, xi, resultfinal, df2, date_col, x) print("[ Loop : "+x+" done ] =========================") print() if os.path.isdir("Backtest/") == False: os.mkdir("Backtest/") if os.path.isdir("Backtest/%s"%x) == False: os.mkdir("Backtest/%s"%x) if os.path.isdir("Backtest/%s/"%x+str(date)) == False: os.mkdir("Backtest/%s/"%x+str(date)) with open("Backtest/%s/"%x+str(date)+"/"+x+str(xi)+".vezpal2","w+") as oop: main=[] main.append(resultfinal) #prediction main.append(list(df2['Close'])) main.append(date_col) # main.append(3) # main.append([0]) json.dump(main,oop) print("Time for %s :"%x,time.time()-start) temp_time.append(time.time()-start) times.append(temp_time) return times def predictor(simulationsize,current): for x in range(52): tf.compat.v1.reset_default_graph() current+=timedelta(days=7) automaton(simulationsize,current) current_date=date(2020,1,1) if os.path.isdir("Loss/") == False: os.mkdir("Loss/") if os.path.isdir("Loss/"+str(current_date)) == False: os.mkdir("Loss/"+str(current_date)) # loss_file = time.strftime("%Y%m%d-%H%M%S") # loss_file = "Loss/"+str(date.today())+"/"+loss_file global_start = time.time() # profile = cProfile.Profile() # main_func = "predictor(simulation_size,current_date)" predictor(simulation_size,current_date) # ps = pstats.Stats(profile.run(main_func)) print("Overall time consumption ", str(time.time()-global_start)) # ps.dump_stats("./Cprofile_model_01.ps")