7 år sedan · 58da4d51b4
--- a/M-star.py
+++ b/M-star.py
@@ -31,16 +31,8 @@ def __main__(configfile='config_default.txt', vendorname='debian', action='help'
 
				 
			
 
				         #for norm_param in range(1, 15):
			
 
				         # get model as set of opinions
			
 
				-        # TODO: FIX THE CVSREADER dependency
			
 
				-        # TODO: for random subsets of packages (that exist in our model), generate prediction_error_values and compute the average error value for each of the three approaches: (addition from model , addition of separate predictions for opinions from a model, ANDed prediction)
			
 
				 
			
 
				 
			
 
				-        """
			
 
				-        # TEST AN INPUT AGAINST OUR MODEL
			
 
				-        # read packages and create a set of opinions for those packages
			
 
				-        
			
 
				-        """
			
 
				-
			
 
				     else:
			
 
				         print("Only debian vendors are supported for now.")
			
 
				         sys.exit(1)
			
--- a/vendors/debian/CSVReader.py
+++ b/vendors/debian/CSVReader.py
@@ -3,8 +3,7 @@ from CertainTrust import Opinion
 
				 
			
 
				 
			
 
				 class CSVReader:
			
 
				-
			
 
				-    prediction_read = 0;
			
 
				+    gathered_predictions = dict()
			
 
				 
			
 
				     @staticmethod
			
 
				     def read_csv_prediction_errorcompl(inputfile, vendormodel, months, f=0.5, norm_param=4):
			
@@ -18,7 +17,6 @@ class CSVReader:
 
				         :return: dictionary of package-names as key and opinions as value
			
 
				         '''
			
 
				         result = {}
			
 
				-        summ = 0
			
 
				         with open(inputfile, newline="") as csvfile:
			
 
				             reader = csv.reader(csvfile, delimiter=':')
			
 
				 
			
@@ -26,7 +24,7 @@ class CSVReader:
 
				                 if not len(row) == 0:
			
 
				                     package = vendormodel.unifySrcName(row [0])
			
 
				                     prediction = float(row [1])
			
 
				-                    summ = summ + prediction
			
 
				+                    CSVReader.gathered_predictions[package]=prediction
			
 
				                     errorCompl = float(row [2])
			
 
				                     resT = 1 - prediction / (30 * months * norm_param)
			
 
				                     if len(row)==4:
			
@@ -35,7 +33,6 @@ class CSVReader:
 
				                         result[package]=Opinion(resT, errorCompl, newf)
			
 
				                     else:
			
 
				                         result[package] = Opinion(resT, errorCompl, f)
			
 
				-        CSVReader.prediction_read = summ
			
 
				         return result
			
 
				 
			
 
				     @staticmethod
			
--- a/vendors/debian/DebianModel.py
+++ b/vendors/debian/DebianModel.py
@@ -372,7 +372,10 @@ class DebianModel(VendorModel):
 
				         return DebianAdvisory.unifySrcName(name)
			
 
				 
			
 
				     def performTests(self):
			
 
				-        Tests.input_prediction_error_test(self)
			
 
				+        #Tests.system_input_prediction_error_test(self)
			
 
				+        #Tests.random_input_prediction_error_test(self)
			
 
				+        Tests.relativity_of_expectations_test(self)
			
 
				+
			
 
				 
			
 
				     def gen_model_opinion_set(self, filename, month, norm_param):
			
 
				         """
			
--- a/vendors/debian/Tests.py
+++ b/vendors/debian/Tests.py
@@ -1,12 +1,20 @@
 
				 import os
			
 
				+import random
			
 
				+import math
			
 
				 
			
 
				 from CertainTrust import Opinion
			
 
				 from vendors.debian.CSVReader import CSVReader
			
 
				 
			
 
				 class Tests:
			
 
				     @staticmethod
			
 
				-    def input_prediction_error_test(model):
			
 
				+    def system_input_prediction_error_test(model):
			
 
				+        """
			
 
				+        Compare errors of three predictions: cummulatedAnd , cummulated separate prediction, prediction from csv file
			
 
				+        :param model: model to perform tests
			
 
				+        :return: nope
			
 
				+        """
			
 
				         norm_param = 4
			
 
				+        months = 9
			
 
				 
			
 
				         model_filename = os.path.join(model.module_path, "models", "dummy_input_package_prediction_errorcompl.csv")
			
 
				         model_set = model.gen_model_opinion_set(model_filename, 9, norm_param)
			
@@ -17,7 +25,6 @@ class Tests:
 
				         # get system dict
			
 
				         system_dict = dict()
			
 
				         for key in system:
			
 
				-            print(key)
			
 
				             new_key = model.unifySrcName(key)
			
 
				             if new_key in model_set:
			
 
				                 system_dict[new_key] = model_set[new_key]
			
@@ -25,43 +32,167 @@ class Tests:
 
				         print("Evaluating on: " + str(len(system_dict)) + " out of " + str(
			
 
				             len(system)) + " packages, since they are not present in our model.")
			
 
				 
			
 
				-        Tests.evaluate_expectation(system_dict, model, 9, norm_param)
			
 
				+        and_prediction = Tests.acc_AND_prediction(system_dict, months, norm_param)
			
 
				+        sep_prediction = Tests.separate_prediction(system_dict, months, norm_param)
			
 
				+        csv_prediction = Tests.read_model_prediction(system_dict)
			
 
				+        s2m_prediction = Tests.get_src2month_data(system_dict, model, months)
			
 
				+
			
 
				+
			
 
				+        and_error = abs(and_prediction - s2m_prediction) / s2m_prediction
			
 
				+        sep_error = abs(sep_prediction - s2m_prediction) / s2m_prediction
			
 
				+        csv_error = abs(csv_prediction - s2m_prediction) / s2m_prediction
			
 
				+
			
 
				+        print("and_error = "+str(and_error))
			
 
				+        print("sep_error = " + str(sep_error))
			
 
				+        print("csv_error = " + str(csv_error))
			
 
				+
			
 
				+        return
			
 
				+
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def random_input_prediction_error_test(model):
			
 
				+        """
			
 
				+        Compare errors of three predictions: cummulatedAnd , cummulated separate prediction, prediction from model
			
 
				+        :param model: model to perform tests
			
 
				+        :return: nope
			
 
				+        """
			
 
				+
			
 
				+        norm_param = 4
			
 
				+        months = 9
			
 
				+
			
 
				+        model_filename = os.path.join(model.module_path, "models", "dummy_input_package_prediction_errorcompl.csv")
			
 
				+        model_set = model.gen_model_opinion_set(model_filename, 9, norm_param)
			
 
				+
			
 
				+        errors_dict=dict()
			
 
				+        errors_dict["and"]=[]
			
 
				+        errors_dict["sep"]=[]
			
 
				+        errors_dict["csv"]=[]
			
 
				+
			
 
				+        # compute errors for 20 subsets
			
 
				+        for i in range(0, 100):
			
 
				+            # to compute the errors, wen need a random package_list dict of size 100
			
 
				+            subset = dict()
			
 
				+            while len(subset)!=100:
			
 
				+                package = random.choice(list(model_set))
			
 
				+                subset[package] = model_set[package]
			
 
				+
			
 
				+            and_prediction = Tests.acc_AND_prediction(subset, months, norm_param)
			
 
				+            sep_prediction = Tests.separate_prediction(subset, months, norm_param)
			
 
				+            csv_prediction = Tests.read_model_prediction(subset)
			
 
				+            s2m_prediction = Tests.get_src2month_data(subset, model, months)
			
 
				+
			
 
				+            errors_dict["and"].append(abs(and_prediction - s2m_prediction) / s2m_prediction)
			
 
				+            errors_dict["sep"].append(abs(sep_prediction - s2m_prediction) / s2m_prediction)
			
 
				+            errors_dict["csv"].append(abs(csv_prediction - s2m_prediction) / s2m_prediction)
			
 
				+
			
 
				+
			
 
				+        # given error dicts , we can compute the mean errors
			
 
				+        avg_and_error_normal = sum(errors_dict["and"])/len(errors_dict["and"])
			
 
				+        avg_sep_error_normal = sum(errors_dict["sep"]) / len(errors_dict["sep"])
			
 
				+        avg_csv_error_normal = sum(errors_dict["csv"]) / len(errors_dict["csv"])
			
 
				+        print("Normal errors: " + str(avg_and_error_normal) + " : " + str(avg_sep_error_normal) + " : " + str(avg_csv_error_normal))
			
 
				+
			
 
				+        # quadratic errors
			
 
				+        avg_and_error_quadr = math.sqrt(sum(math.pow(i, 2) for i in errors_dict["and"]) / len(errors_dict["and"]))
			
 
				+        avg_sep_error_quadr = math.sqrt(sum(math.pow(i, 2) for i in errors_dict["sep"]) / len(errors_dict["sep"]))
			
 
				+        avg_csv_error_quadr = math.sqrt(sum(math.pow(i, 2) for i in errors_dict["csv"]) / len(errors_dict["csv"]))
			
 
				+        print("Quadratic errors: " + str(avg_and_error_quadr) + " : " + str(avg_sep_error_quadr) + " : " + str(avg_csv_error_quadr))
			
 
				+
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def relativity_of_expectations_test(model):
			
 
				+        """
			
 
				+        Compares the relativeness of predictions of two sets, to relativeness of real data
			
 
				+        :param model:
			
 
				+        :return:
			
 
				+        """
			
 
				+        norm_param = 4
			
 
				+        months = 9
			
 
				+
			
 
				+        model_filename = os.path.join(model.module_path, "models", "dummy_input_package_prediction_errorcompl.csv")
			
 
				+        model_set = model.gen_model_opinion_set(model_filename, 9, norm_param)
			
 
				 
			
 
				+        computetd_rel_list = []
			
 
				+        real_rel_list = []
			
 
				 
			
 
				+        for i in range(0, 100):
			
 
				+            # get two subsets
			
 
				+            subset1 = dict()
			
 
				+            while len(subset1)!=100:
			
 
				+                package = random.choice(list(model_set))
			
 
				+                subset1[package] = model_set[package]
			
 
				+            subset2 = dict()
			
 
				+            while len(subset2)!=100:
			
 
				+                package = random.choice(list(model_set))
			
 
				+                subset2[package] = model_set[package]
			
 
				 
			
 
				+            # for these two sets, compute relativity of their ANDed predictions and of real data
			
 
				+            computed_rel_prediction = Tests.acc_AND_prediction(subset1, months, norm_param)/Tests.acc_AND_prediction(subset2, months, norm_param)
			
 
				+            computetd_rel_list.append(computed_rel_prediction)
			
 
				+
			
 
				+            real_rel_prediction = Tests.get_src2month_data(subset1, model, months) / Tests.get_src2month_data(subset2, model, months)
			
 
				+            real_rel_list.append(real_rel_prediction)
			
 
				+
			
 
				+        # at this point we have two lists of computed relatives, lets see how similair are they
			
 
				+        similarities = []
			
 
				+        for i in range(0, 100):
			
 
				+            similarity = abs(real_rel_list[i]-computetd_rel_list[i])/real_rel_list[i]
			
 
				+            similarities.append(similarity)
			
 
				+
			
 
				+        avg_normal_relativity = sum(similarities)/len(similarities)
			
 
				+        avg_quadratic_relativity = math.sqrt(sum(math.pow(i, 2) for i in similarities) / len(similarities))
			
 
				+        print("Average normal relativity: " + str(avg_normal_relativity))
			
 
				+        print("Average quadratic relativity: "+str(avg_quadratic_relativity))
			
 
				+
			
 
				+
			
 
				+    ## helper methods
			
 
				     @staticmethod
			
 
				-    def evaluate_expectation(package_list, vendormodel, months, norm_val):
			
 
				+    def acc_AND_prediction(package_list, months, norm_val):
			
 
				         """
			
 
				-        TODO:
			
 
				-        :param package_list: dictionary of opinions with names as key and opinion as value
			
 
				-        :param vendormodel: the vendor-model to compare the prediction with
			
 
				-        :return: TODO:
			
 
				+        Returns accumulated and prediction for a list of packages
			
 
				+        :param package_list: dictionary with package_names as keys , opinions as values
			
 
				+        :param months: months
			
 
				+        :param norm_val: normalization value
			
 
				+        :return: prediction
			
 
				         """
			
 
				-        print("For normalization value = " + str(norm_val))
			
 
				-        # ANDed prediction
			
 
				         system_and = Opinion._cum_and(list(package_list.values()))
			
 
				         expectation = system_and.expectation_value()
			
 
				         AND_prediction = (1 - expectation) * (norm_val * months * 30)
			
 
				+        return AND_prediction
			
 
				+        #and_error = abs(summ - AND_prediction) / summ
			
 
				+        #print("ANDed prediction = " + str(AND_prediction) + ", ( error = " + str(and_error) + " )")
			
 
				 
			
 
				-        # separate prediction
			
 
				+    @staticmethod
			
 
				+    def separate_prediction(package_list, months, norm_val):
			
 
				+        """
			
 
				+        Returns summ of separated predictions for a list of packages
			
 
				+        :param package_list: dictionary with package_names as keys , opinions as values
			
 
				+        :param months: months
			
 
				+        :param norm_val: normalization value
			
 
				+        :return: prediction
			
 
				+        """
			
 
				         sep_pred = 0
			
 
				         for k in package_list:
			
 
				             sep_pred = sep_pred + ((1 - package_list[k].expectation_value()) * (norm_val * months * 30))
			
 
				+        return sep_pred
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def read_model_prediction(package_list):
			
 
				+        """
			
 
				+        :param package_list: dictionary with package_names as keys , opinions as values
			
 
				+        :return: Summ of model-predictions
			
 
				+        """
			
 
				+        summ = 0
			
 
				+        for k in package_list:
			
 
				+            summ = summ + CSVReader.gathered_predictions[k]
			
 
				+        return summ
			
 
				 
			
 
				-        # src2month prediction gathering
			
 
				+    @staticmethod
			
 
				+    def get_src2month_data(package_list, vendormodel, months):
			
 
				         summ = 0
			
 
				         for package in package_list:
			
 
				             unified_package = vendormodel.unifySrcName(package)
			
 
				             src2month = vendormodel.get_src2month()
			
 
				             if unified_package in src2month:
			
 
				                 summ = summ + sum(src2month[package][-months - 3:-3])
			
 
				-
			
 
				-        file_error = abs(summ - CSVReader.prediction_read) / summ
			
 
				-        print("Input file prediction = " + str(CSVReader.prediction_read) + ", ( error = " + str(file_error) + " )")
			
 
				-
			
 
				-        sep_error = abs(summ - sep_pred) / summ
			
 
				-        print("Separate prediction = " + str(sep_pred) + ", ( error = " + str(sep_error) + " )")
			
 
				-
			
 
				-        and_error = abs(summ - AND_prediction) / summ
			
 
				-        print("ANDed prediction = " + str(AND_prediction) + ", ( error = " + str(and_error) + " )")
			
 
				-        print("Src2month data = " + str(summ))
			
 
				+        return summ
			
--- a/vendors/debian/cache/oldcache/cvetable
+++ b/vendors/debian/cache/oldcache/cvetable
--- a/vendors/debian/cache/oldcache/dsa2cve
+++ b/vendors/debian/cache/oldcache/dsa2cve
--- a/vendors/debian/cache/oldcache/dsatable
+++ b/vendors/debian/cache/oldcache/dsatable
--- a/vendors/debian/cache/oldcache/src2dsa
+++ b/vendors/debian/cache/oldcache/src2dsa
--- a/vendors/debian/cache/oldcache/src2month
+++ b/vendors/debian/cache/oldcache/src2month
--- a/vendors/debian/cache/oldcache/src2sum
+++ b/vendors/debian/cache/oldcache/src2sum
--- a/vendors/debian/cache/oldcache/state
+++ b/vendors/debian/cache/oldcache/state
@@ -1 +1 @@
 
				-{"Sources": "", "vendor": "debian", "Sha1Sums": "", "next_adv": 4021, "cache_dir": "cache/state", "Packages": "", "next_fsa": 0}
			
 
				+{"Sources": "", "next_adv": 4051, "Packages": "", "cache_dir": "cache/state", "next_fsa": 0, "vendor": "debian", "Sha1Sums": ""}