from __future__ import absolute_import from __future__ import division from __future__ import print_function from statsmodels.tsa.stattools import adfuller import numpy as np import pandas as pd import matplotlib.pyplot as plt import statsmodels as sm from statsmodels.tsa.stattools import acf, pacf from statsmodels.tsa.arima_model import ARIMA from statsmodels.tsa.seasonal import seasonal_decompose import argparse import sys # Import data import tensorflow as tf FLAGS = None def test_stationarity(timeseries): #Determing rolling statistics rolmean = pd.rolling_mean(timeseries, window=12) rolstd = pd.rolling_std(timeseries, window=12) #Plot rolling statistics: orig = plt.plot(timeseries, color='blue',label='Original') mean = plt.plot(rolmean, color='red', label='Rolling Mean') std = plt.plot(rolstd, color='black', label = 'Rolling Std') plt.legend(loc='best') plt.title('Rolling Mean & Standard Deviation') plt.show() #Perform Dickey-Fuller test: print('Results of Dickey-Fuller Test:') dftest = adfuller(timeseries, autolag='AIC') dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used']) for key,value in dftest[4].items(): dfoutput['Critical Value (%s)'%key] = value print(dfoutput) def weight_variable(shape): initial = tf.truncated_normal(shape, stddev = 0.1) return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) def conv2d(x, W): return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') def max_pool_2x2(x): return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') def predict(src2month): df = dict() pkg_num = len(src2month) training_num = len(src2month['linux'])-12 data = src2month['linux'] past = data[:len(data)-12] print(data) print(past) data_rol = pd.rolling_mean(data, window=12) rolmean = pd.rolling_mean(past, window=12) print(rolmean) print(len(rolmean)) past = rolmean[12:] decomposition = seasonal_decompose(past, freq=12) # fig = plt.figure() # fig = decomposition.plot() # fig.set_size_inches(15,8) # print(np.roll(past, 1)) df['first_difference'] = past - np.roll(past, 1) df['first_difference'] = df['first_difference'][1:] df['seasonal_difference'] = past - np.roll(past, 12) df['seasonal_difference'] = df['seasonal_difference'][12:] df['seasonal_first_difference'] = df['first_difference'] - np.roll(df['first_difference'], 12) df['seasonal_first_difference'] = df['seasonal_first_difference'][12:] print(len(df['first_difference'])) # test_stationarity(past) # test_stationarity(df['first_difference']) # test_stationarity(df['seasonal_difference']) # test_stationarity(df['seasonal_first_difference']) # fig = plt.figure(figsize=(12,8)) # ax1 = fig.add_subplot(211) # fig = sm.graphics.tsaplots.plot_acf(df['seasonal_first_difference'], lags=40, ax=ax1) # ax2 = fig.add_subplot(212) # fig = sm.graphics.tsaplots.plot_pacf(df['seasonal_first_difference'], lags=40, ax=ax2) lag_acf = acf(df['seasonal_first_difference'], nlags=20) lag_pacf = pacf(df['seasonal_first_difference'], nlags=20, method='ols') #Plot ACF: # plt.subplot(121) # plt.plot(lag_acf) # plt.axhline(y=0,linestyle='--',color='gray') # plt.axhline(y=-1.96/np.sqrt(len(df['seasonal_first_difference'])),linestyle='--',color='gray') # plt.axhline(y=1.96/np.sqrt(len(df['seasonal_first_difference'])),linestyle='--',color='gray') # plt.title('Autocorrelation Function') #Plot PACF: # plt.subplot(122) # plt.plot(lag_pacf) # plt.axhline(y=0,linestyle='--',color='gray') # plt.axhline(y=-1.96/np.sqrt(len(df['seasonal_first_difference'])),linestyle='--',color='gray') # plt.axhline(y=1.96/np.sqrt(len(df['seasonal_first_difference'])),linestyle='--',color='gray') # plt.title('Partial Autocorrelation Function') # plt.tight_layout() mod = sm.tsa.statespace.sarimax.SARIMAX(past, trend='n', order=(0,1,0), seasonal_order=(2,1,1,12)) results = mod.fit() print(results.summary()) df['forecast'] = results.predict(start = len(past) + 1, end = len(past) + 102, dynamic= True) pred = np.concatenate((np.zeros(180), df['forecast'])) fitted = results.predict(start = 24, end = len(past) + 102, dynamic= True) fig = plt.figure(figsize=(12,8)) fig = plt.plot(data_rol, color='blue') pred = np.concatenate((np.zeros(12), pred)) fig = plt.plot(pred, color='green') fig = plt.plot(fitted, color='red') print(len(data), len(past), len(pred)) reality = sum(data[193:205]) average = sum(data[181:193]) predicted = pred[203] print('Actual vulnerabilities in 2016: ' + str(reality)) print('Number of vulnerabilities in 2015: ' + str(average)) print('Predicted vulnerabilities for 2016: ' + str(predicted * 12)) print('Prediction error: ' + str(reality - predicted * 12)) print('Difference from previous year: ' + str(reality - average)) plt.show()