123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131 |
- import numpy
- import matplotlib.pyplot as plt
- import pandas
- import math
- from keras.models import Sequential
- from keras.layers import Dense
- from keras.layers import LSTM
- from keras.layers import Activation
- from sklearn.preprocessing import MinMaxScaler
- from sklearn.metrics import mean_squared_error
- numpy.random.seed(7)
- # convert an array of values into a dataset matrix
- def create_dataset(dataset, look_back=1):
- dataX, dataY = [], []
- for i in range(len(dataset)-look_back-1):
- a = dataset[i:(i+look_back)]
- dataX.append(a)
- dataY.append(dataset[i + look_back])
- return numpy.array(dataX), numpy.array(dataY)
- def predict(src2month):
- pkg_num = len(src2month)
- training_num = len(src2month['linux'])
- trainXdict = dict()
- trainYdict = dict()
- testXdict = dict()
- testYdict = dict()
- look_back = 4
- # create the LSTM network
- model = Sequential()
- model.add(LSTM(32, input_dim=look_back, activation ='relu', dropout_W =0.1, dropout_U =0.1))
- # model.add(Dense(12, init='uniform', activation='relu'))
- # model.add(Dense(8, init='uniform', activation='relu'))
- # model.add(Dense(1, init='uniform', activation='sigmoid'))
- # model.add(LSTM(4, input_dim=look_back-6, dropout_W = 0.2, dropout_U = 0.1))
- model.add(Dense(1))
- model.compile(loss='mean_squared_error', optimizer='adam')
-
- scaler = MinMaxScaler(feature_range=(0, 1))
- flag = True
- ###################################################################################################
- for pkg_name in src2month:
- # for pkg_name in ['icedove', 'mysql', 'xulrunner', 'wireshark', 'firefox', 'openjdk', 'php5', 'iceape', 'wordpress', 'xen', 'openssl', 'chromium-browser', 'linux']:
- # for pkg_name in ['linux']:
- pkg_num = len(src2month)
- dataset = src2month[pkg_name]
- if sum(dataset)>20:
- dataset = pandas.rolling_mean(dataset, window=12)
- dataset = dataset[12:]
- # normalize the dataset
- dataset = scaler.fit_transform(dataset)
- train_size = int(len(dataset) * 0.80)
- test_size = len(dataset) - train_size
- train, test = dataset[0:train_size], dataset[train_size:len(dataset)]
- print(len(train), len(test))
- # reshape into X=t and Y=t+1
- trainX, trainY = create_dataset(train, look_back)
- testX, testY = create_dataset(test, look_back)
- # reshape input to be [samples, time steps, features]
- trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
- testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
- # save to dict for later
- trainXdict[pkg_name], trainYdict[pkg_name] = trainX, trainY
- testXdict[pkg_name], testYdict[pkg_name] = testX, testY
-
- # fit the LSTM network
- model.fit(trainX, trainY, nb_epoch=10, batch_size=1, verbose=2)
-
-
- ###################################################################################################
- model.save('all_packages_test.h5')
- for pkg_name in ['icedove', 'mysql', 'xulrunner', 'wireshark', 'firefox', 'openjdk', 'php5', 'iceape', 'wordpress', 'xen', 'openssl', 'chromium-browser', 'linux']:
-
- trainX, trainY = trainXdict[pkg_name], trainYdict[pkg_name]
- testX, testY = testXdict[pkg_name], testYdict[pkg_name]
- dataset = src2month[pkg_name]
- dataset = pandas.rolling_mean(dataset, window=12)
- dataset = dataset[12:]
- # normalize the dataset
- dataset = scaler.fit_transform(dataset)
- # make predictions
- trainPredict = model.predict(trainX)
- testPredict = model.predict(testX)
- # invert predictions
- trainPredict = scaler.inverse_transform(trainPredict)
- trainY = scaler.inverse_transform([trainY])
- testPredict = scaler.inverse_transform(testPredict)
- testY = scaler.inverse_transform([testY])
- # calculate root mean squared error
- print('Package: ' + pkg_name)
- trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
- print('Train Score: %.2f RMSE' % (trainScore))
- testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
- print('Test Score: %.2f RMSE' % (testScore))
- # shift train predictions for plotting
- trainPredictPlot = numpy.empty_like(dataset)
- trainPredictPlot[:] = numpy.nan
- trainPredictPlot[look_back:len(trainPredict)+look_back] = trainPredict[:, 0]
- # shift test predictions for plotting
- testPredictPlot = numpy.empty_like(dataset)
- testPredictPlot[:] = numpy.nan
- testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1] = testPredict[:, 0]
- # plot baseline and predictions
- plt.plot(scaler.inverse_transform(dataset))
- plt.plot(trainPredictPlot)
- plt.plot(testPredictPlot)
- plt.show()
|