7 years ago · 9cd78ac5db
--- a/apt-sec.py
+++ b/apt-sec.py
@@ -19,6 +19,7 @@ import numpy as np
 
				 from dateutil import parser
			
 
				 import plotly.plotly as py
			
 
				 import plotly.graph_objs as go
			
 
				+import machine_learning as ml
			
 
				 
			
 
				 logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
			
 
				 ## Increase the recursion limit by much to allow bs to parse large files
			
@@ -367,7 +368,7 @@ def updateCVETables(myid, dsatable, state, src2dsa, dsa2cve, cvetable, client):
 
				 
			
 
				 ###############################################################################
			
 
				 ## Check for updates on Package information
			
 
				-def aptsec_update(state, config, dsatable, client, src2dsa, dsa2cve, cvetable):
			
 
				+def aptsec_update(state, config, dsatable, client, src2dsa, dsa2cve, src2month, cvetable):
			
 
				     args = sys.argv
			
 
				 #    if not('--offline' in args):
			
 
				 #        fetchMeta('Packages')
			
@@ -402,7 +403,7 @@ def aptsec_update(state, config, dsatable, client, src2dsa, dsa2cve, cvetable):
 
				     
			
 
				     # recompute all pkg statistics
			
 
				     for srcpkg in src2dsa:
			
 
				-        processCVEs(srcpkg, now, src2dsa, dsa2cve, cvetable, config)
			
 
				+        processCVEs(srcpkg, now, src2dsa, dsa2cve, src2month, cvetable, config)
			
 
				     
			
 
				     return 0
			
 
				 ###############################################################################
			
@@ -423,7 +424,7 @@ def resolvePkg2Src(pkglist, pkg2src):
 
				 ## compute and store MTBF, MTBR and Scores of each src pkg
			
 
				 ## output: %src2mtbf:
			
 
				 ##  (srcpkg=> ())
			
 
				-def processCVEs(pkg, now, src2dsa, dsa2cve, cvetable, config):
			
 
				+def processCVEs(pkg, now, src2dsa, dsa2cve, src2month, cvetable, config):
			
 
				     stats = [now, 0, 0, 0, 0, 0, 0]
			
 
				     mylambda = config['TRUST']['lambda']
			
 
				     cvestats = dict()
			
@@ -465,17 +466,53 @@ def processCVEs(pkg, now, src2dsa, dsa2cve, cvetable, config):
 
				     count = sum(cvestats.values())
			
 
				 
			
 
				     print(pkg + ' ' + str(count))
			
 
				-    if pkg == 'chromium-browser':
			
 
				-        print(src2dsa[pkg])
			
 
				-        pkg_plot(pkg, cvestats)
			
 
				+#    if pkg == 'chromium-browser':
			
 
				+#        print(src2dsa[pkg])
			
 
				+#        pkg_plot(pkg, cvestats)
			
 
				+
			
 
				+    format_data(pkg, cvestats, src2month)
			
 
				 
			
 
				-    for date in dates:
			
 
				-        pass
			
 
				-        ## Need to do compute value
			
 
				 
			
 
				     ##TODO Code to compute trust goes here
			
 
				 
			
 
				 
			
 
				+###############################################################################
			
 
				+## format vulnerability data into monthly intervals, suitable for tensorflow
			
 
				+def format_data(pkg, cvestats, src2month):
			
 
				+    
			
 
				+    x = []
			
 
				+    y = []
			
 
				+    monthyear = []
			
 
				+    year = []
			
 
				+
			
 
				+    items=list(cvestats.items())
			
 
				+    items.sort(key=lambda tup: tup[0])
			
 
				+
			
 
				+    for data_dict in items:
			
 
				+        x.append(parser.parse(data_dict[0]))
			
 
				+        y.append(int(data_dict[1]))
			
 
				+
			
 
				+    for i in range(2000, 2017):
			
 
				+        temp = []
			
 
				+        for j in range(12):
			
 
				+            temp.append(0)
			
 
				+        monthyear.append(temp)
			
 
				+
			
 
				+    for i in range(len(x)):
			
 
				+        monthyear[x[i].year-2000][x[i].month-1] += y[i]
			
 
				+
			
 
				+    months_list = [item for sublist in monthyear for item in sublist]
			
 
				+
			
 
				+    temp_months = np.zeros(len(months_list))
			
 
				+    i = 0
			
 
				+    for element in months_list:
			
 
				+        temp_months[i] = np.float32(element)
			
 
				+        i += 1
			
 
				+
			
 
				+    src2month[pkg] = temp_months
			
 
				+    return
			
 
				+    
			
 
				+
			
 
				 ###############################################################################
			
 
				 ## plot vulnerability time distribution for a single package
			
 
				 def pkg_plot(pkg, cvestats):
			
@@ -494,7 +531,7 @@ def pkg_plot(pkg, cvestats):
 
				     monthyear = []
			
 
				     year = []
			
 
				     # initialize list
			
 
				-    for i in range(1995,2017):
			
 
				+    for i in range(2000,2017):
			
 
				         temp = []
			
 
				         for j in range(12):
			
 
				             temp.append(0)
			
@@ -502,7 +539,7 @@ def pkg_plot(pkg, cvestats):
 
				 
			
 
				     for i in range(len(x)):
			
 
				 #        print(str(x[i].year) + str(x[i].month))
			
 
				-        monthyear[x[i].year-1995][x[i].month-1] += y[i]
			
 
				+        monthyear[x[i].year-2000][x[i].month-1] += y[i]
			
 
				     newx = []
			
 
				     yearsx = []
			
 
				     year = []
			
@@ -534,9 +571,9 @@ def pkg_plot(pkg, cvestats):
 
				 
			
 
				 
			
 
				     for i in range(len(year)):
			
 
				-        yearsx.append(i + 1995)
			
 
				+        yearsx.append(i + 2000)
			
 
				 
			
 
				-    k = 1995
			
 
				+    k = 2000
			
 
				     datapoints = []
			
 
				     for i in range(len(month)):
			
 
				         datapoints.append(i+1)
			
@@ -666,6 +703,7 @@ cve_db = client.cvedb
 
				 src2dsa = dict()
			
 
				 dsa2cve = dict()
			
 
				 cvetable = dict()
			
 
				+src2month = dict()
			
 
				 
			
 
				 (state, err) = load_state()
			
 
				 state['vendor'] = 'debian'
			
@@ -678,10 +716,11 @@ state['vendor'] = 'debian'
 
				 if action == 'update':
			
 
				     (dsatable, src2dsa, dsa2cve, cvetable) = load_DBs()
			
 
				 #    loadsha1lists()
			
 
				-    aptsec_update(state,config, dsatable, client, src2dsa, dsa2cve, cvetable)
			
 
				+    aptsec_update(state,config, dsatable, client, src2dsa, dsa2cve, src2month, cvetable)
			
 
				 #    save_sha1lists()
			
 
				     save_DBs(dsatable, src2dsa, dsa2cve, cvetable)
			
 
				     save_state(state)
			
 
				+    ml.predict(src2month)
			
 
				 elif action == 'status':
			
 
				     load_DBs or exit(1)
			
 
				     #handle errors more gracefully
			
--- a/machine_learning.py
+++ b/machine_learning.py
@@ -0,0 +1,81 @@
 
				+from __future__ import absolute_import
			
 
				+from __future__ import division
			
 
				+from __future__ import print_function
			
 
				+import numpy as np
			
 
				+
			
 
				+import argparse
			
 
				+import sys
			
 
				+
			
 
				+# Import data
			
 
				+
			
 
				+import tensorflow as tf
			
 
				+
			
 
				+FLAGS = None
			
 
				+
			
 
				+def weight_variable(shape):
			
 
				+    initial = tf.truncated_normal(shape, stddev = 0.1)
			
 
				+    return tf.Variable(initial)
			
 
				+
			
 
				+def bias_variable(shape):
			
 
				+    initial = tf.constant(0.1, shape=shape)
			
 
				+    return tf.Variable(initial)
			
 
				+
			
 
				+def conv2d(x, W):
			
 
				+    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
			
 
				+
			
 
				+def max_pool_2x2(x):
			
 
				+    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
			
 
				+
			
 
				+def predict(src2month):
			
 
				+#    mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
			
 
				+
			
 
				+    pkg_num = len(src2month)
			
 
				+    training_num = len(src2month['linux'])-12
			
 
				+
			
 
				+    training_table = np.zeros((pkg_num, training_num))
			
 
				+    test_values = np.zeros((pkg_num, 1))
			
 
				+
			
 
				+    i=0
			
 
				+
			
 
				+    for key, value in src2month.items():
			
 
				+        training_table[i] = value[0:training_num]
			
 
				+        test_values[i, 0] = np.sum(value[training_num:])
			
 
				+        i += 1
			
 
				+
			
 
				+    
			
 
				+
			
 
				+
			
 
				+    # Create the model
			
 
				+    x = tf.placeholder(tf.float32, [None, training_num])
			
 
				+    W = tf.Variable(tf.zeros([training_num, 1]))
			
 
				+    b = tf.Variable(tf.zeros([1]))
			
 
				+    y = tf.matmul(x, W) + b
			
 
				+
			
 
				+    # Define loss and optimizer
			
 
				+    y_ = tf.placeholder(tf.float32, [None, 1])
			
 
				+
			
 
				+    # The raw formulation of cross-entropy,
			
 
				+    #
			
 
				+    #   tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)),
			
 
				+    #                                 reduction_indices=[1]))
			
 
				+    #
			
 
				+    # can be numerically unstable.
			
 
				+    #
			
 
				+    # So here we use tf.nn.softmax_cross_entropy_with_logits on the raw
			
 
				+    # outputs of 'y', and then average across the batch.
			
 
				+    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))
			
 
				+    train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
			
 
				+
			
 
				+    #sess = tf.InteractiveSession()
			
 
				+    # Train
			
 
				+    tf.global_variables_initializer().run()
			
 
				+    for _ in range(1000):
			
 
				+        sess.run(train_step, feed_dict={x: training_table, y_: test_values})
			
 
				+#    for _ in range(1000):
			
 
				+#        batch_xs, batch_ys = mnist.train.next_batch(100)
			
 
				+#        sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
			
 
				+    print(y)
			
 
				+    # Test trained model
			
 
				+    correct_prediction = tf.equal(y, y_)
			
 
				+    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
			
 
				+    print(sess.run(accuracy, feed_dict={x: training_table, y_: test_values}))
			
--- a/output.txt
+++ b/output.txt