123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101 |
- import numpy
- import matplotlib.pyplot as plt
- import pandas
- import math
- from keras.models import Sequential
- from keras.layers import Dense
- from keras.layers import LSTM
- from keras.layers import Activation
- from sklearn.preprocessing import MinMaxScaler
- from sklearn.metrics import mean_squared_error
- numpy.random.seed(7)
- # convert an array of values into a dataset matrix
- def create_dataset(dataset, look_back=1):
- dataX, dataY = [], []
- for i in range(len(dataset)-look_back-1):
- a = dataset[i:(i+look_back)]
- dataX.append(a)
- dataY.append(dataset[i + look_back])
- return numpy.array(dataX), numpy.array(dataY)
- def predict(src2month):
- pkg_num = len(src2month)
- training_num = len(src2month['linux'])
- look_back = 4
- # create the LSTM network
- model = Sequential()
- model.add(LSTM(64, input_dim=look_back, activation ='relu', dropout_W =0.1, dropout_U =0.1))
- # model.add(Dense(12, init='uniform', activation='relu'))
- # model.add(Dense(8, init='uniform', activation='relu'))
- # model.add(Dense(1, init='uniform', activation='sigmoid'))
- # model.add(LSTM(4, input_dim=look_back-6, dropout_W = 0.2, dropout_U = 0.1))
- model.add(Dense(1))
- model.compile(loss='mean_squared_error', optimizer='adam')
-
- scaler = MinMaxScaler(feature_range=(0, 1))
- flag = True
- ###################################################################################################
- for pkg_name in ['icedove', 'linux', 'mysql', 'xulrunner', 'wireshark', 'firefox', 'openjdk', 'php5', 'iceape', 'wordpress', 'xen', 'openssl', 'chromium-browser']:
- # for pkg_name in ['chromium-browser']:
- pkg_num = len(src2month)
- dataset = src2month[pkg_name]
- dataset = pandas.rolling_mean(dataset, window=12)
- dataset = dataset[12:]
- # normalize the dataset
- dataset = scaler.fit_transform(dataset)
- train_size = int(len(dataset) * 0.80)
- test_size = len(dataset) - train_size
- train, test = dataset[0:train_size], dataset[train_size:len(dataset)]
- print(len(train), len(test))
- # reshape into X=t and Y=t+1
- trainX, trainY = create_dataset(train, look_back)
- testX, testY = create_dataset(test, look_back)
-
- # reshape input to be [samples, time steps, features]
- trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
- testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
- # fit the LSTM network
- model.fit(trainX, trainY, nb_epoch=5, batch_size=1, verbose=2)
-
-
- ###################################################################################################
- # make predictions
- trainPredict = model.predict(trainX)
- testPredict = model.predict(testX)
- print(type(testPredict))
- # invert predictions
- trainPredict = scaler.inverse_transform(trainPredict)
- trainY = scaler.inverse_transform([trainY])
- testPredict = scaler.inverse_transform(testPredict)
- testY = scaler.inverse_transform([testY])
- # calculate root mean squared error
- trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
- print('Train Score: %.2f RMSE' % (trainScore))
- testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
- print('Test Score: %.2f RMSE' % (testScore))
- # shift train predictions for plotting
- trainPredictPlot = numpy.empty_like(dataset)
- trainPredictPlot[:] = numpy.nan
- trainPredictPlot[look_back:len(trainPredict)+look_back] = trainPredict[:, 0]
- # shift test predictions for plotting
- testPredictPlot = numpy.empty_like(dataset)
- testPredictPlot[:] = numpy.nan
- testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1] = testPredict[:, 0]
- # plot baseline and predictions
- plt.plot(scaler.inverse_transform(dataset))
- plt.plot(trainPredictPlot)
- plt.plot(testPredictPlot)
- plt.show()
|