123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142 |
- # -*- coding: utf-8 -*-
- import csv
- """Utilities for creating/reading/writing generalization hierarchies
- Hierarchies stored in csv files have following structure:
- level0;level1;level2;level3;...
- e.g.
- Bachelors;Undergraduate;Higher education
- 12th;High School;Secondary education
- Hierarchies used in code have the following structure:
- [ level1 dictionary,
- level2 dictionary,
- .
- .
- .
- ]
- e.g. (Each key represents a generalization of the values inside the list; 1.dict=level1, 2.dict=level2)
- education = [
- {"Undergraduate": [" Bachelors", " Some-college"],
- "High School":[" 11th", " HS-grad", " 9th", " 7th-8th", " 12th", " 10th"],
- "Professional Education":[" Prof-school", " Assoc-acdm", " Assoc-voc"],
- "Graduate":[" Masters", " Doctorate"]},
- {"Higher education": [" Bachelors", " Some-college", " Prof-school", " Assoc-acdm", " Assoc-voc", " Masters", " Doctorate"],
- "Secondary education":[" 11th", " HS-grad", " 9th", " 7th-8th", " 12th", " 10th"]}
- ]
- """
- def create_gen_hierarchy(path):
- """Simple Textbased guide to create new generalization hierarchies
- :return: Created generalization hierarchy
- :rtype: list
- """
- raw_data = read_data(path, ";")
- headers = []
- for col in raw_data:
- headers.append(col[0])
- column_name = input('Select QI: \n'+str(headers)+'\n')
- if column_name.isdigit():
- index = int(column_name)-1
- else:
- index = headers.index(column_name)
- column = raw_data[index]
- column_set = set(column[1:-1])
- levels = int(input('How many generalization level for QI '+headers[index]+'?\n'))
- gen_hier = []
- for value in column_set:
- for level in range(levels):
- gval = input('Level '+str(level)+' generalization for QI '+value+'?\n')
- try:
- gen_hier[level][gval].append(value)
- except KeyError:
- gen_hier[level].update({gval: [value]})
- except IndexError:
- gen_hier.append({gval: [value]})
- return gen_hier, headers[index]
- def write_gen_hierarchy(path, gen_hier, header):
- """Write generalization hierarchy from variable into csv file
- :param gen_hier: Generalization hierarchy
- :param header: Name of QI
- :return: None
- """
- first_level = True
- index_list = []
- rows = []
- for di in gen_hier:
- for key, lists in di.items():
- for value in lists:
- if first_level:
- index_list.append(value)
- rows.append([value, key])
- else:
- rows[index_list.index(value)].append(key)
- first_level = False
- with open(path+"gen_hier_" + str(header) + ".csv", "w") as output:
- for r in rows:
- output.write(';'.join(str(r)) + '\n')
- output.close()
- def read_gen_hierarchy(path, header):
- """
- :param path: Path to the file containing the generalization hierarchy
- :return: Generalization hierarchy for one QI
- """
- gen_hier = []
- linecount = 0
- with open(path+'_hierarchy_'+header+'.csv', "r") as input:
- for line in input.readlines():
- values = line.rstrip().split(";")
- firstval = values[0]
- values = values[1:]
- colnum = 0
- for val in values:
- if linecount == 0:
- gen_hier.append({})
- try:
- gen_hier[colnum][val].append(firstval)
- except KeyError:
- gen_hier[colnum].update({val: [firstval]})
- colnum += 1
- linecount += 1
- input.close()
- return gen_hier
- def read_data(file_name: str, delimiter: str) -> list:
- """Reads dataset from a csv file
- :param file_name: Path to the csv file
- :param delimiter: Character that is used as delimiter in the csv file
- :return: 2d list: A Cell can be accessed with data[col][row]
- """
- with open(file_name) as csv_file:
- csv_reader = csv.reader(csv_file, delimiter=delimiter)
- line_count = 0
- for row in csv_reader:
- if line_count == 0:
- col_count = len(row)
- data = [[] for _ in range(col_count)]
- for col in range(col_count):
- data[col].append(row[col].strip())
- line_count += 1
- csv_file.close()
- return data
|