import numpy import matplotlib.pyplot as plt import pandas import math from keras.models import Sequential from keras.layers import Dense from keras.layers import LSTM from keras.layers import Activation from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import mean_squared_error numpy.random.seed(7) # convert an array of values into a dataset matrix def create_dataset(dataset, look_back=1): dataX, dataY = [], [] for i in range(len(dataset)-look_back-1): a = dataset[i:(i+look_back-6)] dataX.append(a) dataY.append(dataset[i + look_back]) return numpy.array(dataX), numpy.array(dataY) def predict(src2month): pkg_num = len(src2month) training_num = len(src2month['linux'])-12 look_back = 24 # create the LSTM network model = Sequential() model.add(LSTM(128, input_dim=look_back-6, activation ='relu', dropout_W =0.6, dropout_U =0.6)) model.add(Dense(12, init='uniform', activation='relu')) model.add(Dense(8, init='uniform', activation='relu')) model.add(Dense(1, init='uniform', activation='sigmoid')) # model.add(LSTM(4, input_dim=look_back-6, dropout_W = 0.2, dropout_U = 0.1)) # model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') flag = True ################################################################################################### for pkg_name in ['icedove', 'linux', 'mysql', 'xulrunner', 'wireshark', 'wordpress', 'iceape', 'xen', 'asterisk', 'tomcat7', 'phpmyadmin', 'asterisk', 'mariadb-10.0', 'libxml2', 'apache2', 'cups', 'samba', 'freetype', 'tiff', 'clamav', 'bind9', 'squid', 'openssl', 'moodle', 'cacti', 'krb5', 'ffmpeg', 'mantis', 'xpdf', 'imagemagick', 'typo3-src', 'firefox', 'chromium-browser']: # for pkg_name in ['chromium-browser']: pkg_num = len(src2month) training_num = len(src2month['linux'])-12 dataset = src2month[pkg_name] dataset = pandas.rolling_mean(dataset, window=12) dataset = dataset[12:] # normalize the dataset scaler = MinMaxScaler(feature_range=(0, 1)) dataset = scaler.fit_transform(dataset) train_size = int(len(dataset) * 0.80) test_size = len(dataset) - train_size train, test = dataset[0:train_size], dataset[train_size:len(dataset)] print(len(train), len(test)) # reshape into X=t and Y=t+1 trainX, trainY = create_dataset(train, look_back) testX, testY = create_dataset(test, look_back) # concatenate on big training and test arrays if flag: trainX_long = trainX trainY_long = trainY testX_long = testX testY_long = testY else: trainX_long = numpy.concatenate((trainX_long, trainX), axis = 0) trainY_long = numpy.concatenate((trainY_long, trainY), axis = 0) testX_long = numpy.concatenate((testX_long, testX), axis = 0) testY_long = numpy.concatenate((testY_long, testY), axis = 0) flag = False # reshape input to be [samples, time steps, features] trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1])) trainX_long = numpy.reshape(trainX_long, (trainX_long.shape[0], 1, trainX_long.shape[1])) testX_long = numpy.reshape(testX_long, (testX_long.shape[0], 1, testX_long.shape[1])) print(len(trainX_long)) # fit the LSTM network model.fit(trainX_long, trainY_long, nb_epoch=50, batch_size=1, verbose=2) ################################################################################################### # make predictions trainPredict = model.predict(trainX) testPredict = model.predict(testX) print(type(testPredict)) # invert predictions trainPredict = scaler.inverse_transform(trainPredict) trainY = scaler.inverse_transform([trainY]) testPredict = scaler.inverse_transform(testPredict) testY = scaler.inverse_transform([testY]) # calculate root mean squared error trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0])) print('Train Score: %.2f RMSE' % (trainScore)) testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0])) print('Test Score: %.2f RMSE' % (testScore)) # shift train predictions for plotting trainPredictPlot = numpy.empty_like(dataset) trainPredictPlot[:] = numpy.nan trainPredictPlot[look_back:len(trainPredict)+look_back] = trainPredict[:, 0] # shift test predictions for plotting testPredictPlot = numpy.empty_like(dataset) testPredictPlot[:] = numpy.nan testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1] = testPredict[:, 0] # plot baseline and predictions plt.plot(scaler.inverse_transform(dataset)) plt.plot(trainPredictPlot) plt.plot(testPredictPlot) plt.show()