lstm_reg.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. import numpy
  2. import matplotlib.pyplot as plt
  3. import pandas
  4. import math
  5. from keras.models import Sequential
  6. from keras.layers import Dense
  7. from keras.layers import LSTM
  8. from keras.layers import Activation
  9. from sklearn.preprocessing import MinMaxScaler
  10. from sklearn.metrics import mean_squared_error
  11. numpy.random.seed(7)
  12. # convert an array of values into a dataset matrix
  13. def create_dataset(dataset, look_back=1):
  14. dataX, dataY = [], []
  15. for i in range(len(dataset)-look_back-1):
  16. a = dataset[i:(i+look_back)]
  17. dataX.append(a)
  18. dataY.append(dataset[i + look_back])
  19. return numpy.array(dataX), numpy.array(dataY)
  20. def predict(src2month):
  21. pkg_num = len(src2month)
  22. training_num = len(src2month['linux'])
  23. look_back = 4
  24. # create the LSTM network
  25. model = Sequential()
  26. model.add(LSTM(64, input_dim=look_back, activation ='relu', dropout_W =0.1, dropout_U =0.1))
  27. # model.add(Dense(12, init='uniform', activation='relu'))
  28. # model.add(Dense(8, init='uniform', activation='relu'))
  29. # model.add(Dense(1, init='uniform', activation='sigmoid'))
  30. # model.add(LSTM(4, input_dim=look_back-6, dropout_W = 0.2, dropout_U = 0.1))
  31. model.add(Dense(1))
  32. model.compile(loss='mean_squared_error', optimizer='adam')
  33. scaler = MinMaxScaler(feature_range=(0, 1))
  34. flag = True
  35. ###################################################################################################
  36. for pkg_name in ['icedove', 'linux', 'mysql', 'xulrunner', 'wireshark', 'firefox', 'openjdk', 'php5', 'iceape', 'wordpress', 'xen', 'openssl', 'chromium-browser']:
  37. # for pkg_name in ['chromium-browser']:
  38. pkg_num = len(src2month)
  39. dataset = src2month[pkg_name]
  40. dataset = pandas.rolling_mean(dataset, window=12)
  41. dataset = dataset[12:]
  42. # normalize the dataset
  43. dataset = scaler.fit_transform(dataset)
  44. train_size = int(len(dataset) * 0.80)
  45. test_size = len(dataset) - train_size
  46. train, test = dataset[0:train_size], dataset[train_size:len(dataset)]
  47. print(len(train), len(test))
  48. # reshape into X=t and Y=t+1
  49. trainX, trainY = create_dataset(train, look_back)
  50. testX, testY = create_dataset(test, look_back)
  51. # reshape input to be [samples, time steps, features]
  52. trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
  53. testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
  54. # fit the LSTM network
  55. model.fit(trainX, trainY, nb_epoch=5, batch_size=1, verbose=2)
  56. ###################################################################################################
  57. # make predictions
  58. trainPredict = model.predict(trainX)
  59. testPredict = model.predict(testX)
  60. print(type(testPredict))
  61. # invert predictions
  62. trainPredict = scaler.inverse_transform(trainPredict)
  63. trainY = scaler.inverse_transform([trainY])
  64. testPredict = scaler.inverse_transform(testPredict)
  65. testY = scaler.inverse_transform([testY])
  66. # calculate root mean squared error
  67. trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
  68. print('Train Score: %.2f RMSE' % (trainScore))
  69. testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
  70. print('Test Score: %.2f RMSE' % (testScore))
  71. # shift train predictions for plotting
  72. trainPredictPlot = numpy.empty_like(dataset)
  73. trainPredictPlot[:] = numpy.nan
  74. trainPredictPlot[look_back:len(trainPredict)+look_back] = trainPredict[:, 0]
  75. # shift test predictions for plotting
  76. testPredictPlot = numpy.empty_like(dataset)
  77. testPredictPlot[:] = numpy.nan
  78. testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1] = testPredict[:, 0]
  79. # plot baseline and predictions
  80. plt.plot(scaler.inverse_transform(dataset))
  81. plt.plot(trainPredictPlot)
  82. plt.plot(testPredictPlot)
  83. plt.show()