lstm_reg.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. import numpy
  2. import matplotlib.pyplot as plt
  3. import pandas
  4. import math
  5. from keras.models import Sequential
  6. from keras.layers import Dense
  7. from keras.layers import LSTM
  8. from keras.layers import Activation
  9. from sklearn.preprocessing import MinMaxScaler
  10. from sklearn.metrics import mean_squared_error
  11. numpy.random.seed(7)
  12. # convert an array of values into a dataset matrix
  13. def create_dataset(dataset, look_back=1):
  14. dataX, dataY = [], []
  15. for i in range(len(dataset)-look_back-1):
  16. a = dataset[i:(i+look_back-6)]
  17. dataX.append(a)
  18. dataY.append(dataset[i + look_back])
  19. return numpy.array(dataX), numpy.array(dataY)
  20. def predict(src2month):
  21. pkg_num = len(src2month)
  22. training_num = len(src2month['linux'])-12
  23. look_back = 24
  24. # create the LSTM network
  25. model = Sequential()
  26. model.add(LSTM(128, input_dim=look_back-6, activation ='relu', dropout_W =0.6, dropout_U =0.6))
  27. model.add(Dense(12, init='uniform', activation='relu'))
  28. model.add(Dense(8, init='uniform', activation='relu'))
  29. model.add(Dense(1, init='uniform', activation='sigmoid'))
  30. # model.add(LSTM(4, input_dim=look_back-6, dropout_W = 0.2, dropout_U = 0.1))
  31. # model.add(Dense(1))
  32. model.compile(loss='mean_squared_error', optimizer='adam')
  33. flag = True
  34. ###################################################################################################
  35. for pkg_name in ['icedove', 'linux', 'mysql', 'xulrunner', 'wireshark', 'wordpress', 'iceape', 'xen', 'asterisk', 'tomcat7', 'phpmyadmin', 'asterisk', 'mariadb-10.0', 'libxml2', 'apache2', 'cups', 'samba', 'freetype', 'tiff', 'clamav', 'bind9', 'squid', 'openssl', 'moodle', 'cacti', 'krb5', 'ffmpeg', 'mantis', 'xpdf', 'imagemagick', 'typo3-src', 'firefox', 'chromium-browser']:
  36. # for pkg_name in ['chromium-browser']:
  37. pkg_num = len(src2month)
  38. training_num = len(src2month['linux'])-12
  39. dataset = src2month[pkg_name]
  40. dataset = pandas.rolling_mean(dataset, window=12)
  41. dataset = dataset[12:]
  42. # normalize the dataset
  43. scaler = MinMaxScaler(feature_range=(0, 1))
  44. dataset = scaler.fit_transform(dataset)
  45. train_size = int(len(dataset) * 0.80)
  46. test_size = len(dataset) - train_size
  47. train, test = dataset[0:train_size], dataset[train_size:len(dataset)]
  48. print(len(train), len(test))
  49. # reshape into X=t and Y=t+1
  50. trainX, trainY = create_dataset(train, look_back)
  51. testX, testY = create_dataset(test, look_back)
  52. # concatenate on big training and test arrays
  53. if flag:
  54. trainX_long = trainX
  55. trainY_long = trainY
  56. testX_long = testX
  57. testY_long = testY
  58. else:
  59. trainX_long = numpy.concatenate((trainX_long, trainX), axis = 0)
  60. trainY_long = numpy.concatenate((trainY_long, trainY), axis = 0)
  61. testX_long = numpy.concatenate((testX_long, testX), axis = 0)
  62. testY_long = numpy.concatenate((testY_long, testY), axis = 0)
  63. flag = False
  64. # reshape input to be [samples, time steps, features]
  65. trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
  66. testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
  67. trainX_long = numpy.reshape(trainX_long, (trainX_long.shape[0], 1, trainX_long.shape[1]))
  68. testX_long = numpy.reshape(testX_long, (testX_long.shape[0], 1, testX_long.shape[1]))
  69. print(len(trainX_long))
  70. # fit the LSTM network
  71. model.fit(trainX_long, trainY_long, nb_epoch=50, batch_size=1, verbose=2)
  72. ###################################################################################################
  73. # make predictions
  74. trainPredict = model.predict(trainX)
  75. testPredict = model.predict(testX)
  76. print(type(testPredict))
  77. # invert predictions
  78. trainPredict = scaler.inverse_transform(trainPredict)
  79. trainY = scaler.inverse_transform([trainY])
  80. testPredict = scaler.inverse_transform(testPredict)
  81. testY = scaler.inverse_transform([testY])
  82. # calculate root mean squared error
  83. trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
  84. print('Train Score: %.2f RMSE' % (trainScore))
  85. testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
  86. print('Test Score: %.2f RMSE' % (testScore))
  87. # shift train predictions for plotting
  88. trainPredictPlot = numpy.empty_like(dataset)
  89. trainPredictPlot[:] = numpy.nan
  90. trainPredictPlot[look_back:len(trainPredict)+look_back] = trainPredict[:, 0]
  91. # shift test predictions for plotting
  92. testPredictPlot = numpy.empty_like(dataset)
  93. testPredictPlot[:] = numpy.nan
  94. testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1] = testPredict[:, 0]
  95. # plot baseline and predictions
  96. plt.plot(scaler.inverse_transform(dataset))
  97. plt.plot(trainPredictPlot)
  98. plt.plot(testPredictPlot)
  99. plt.show()