hierarchy_utilities.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. # -*- coding: utf-8 -*-
  2. import csv
  3. """Utilities for creating/reading/writing generalization hierarchies
  4. Hierarchies stored in csv files have following structure:
  5. level0;level1;level2;level3;...
  6. e.g.
  7. Bachelors;Undergraduate;Higher education
  8. 12th;High School;Secondary education
  9. Hierarchies used in code have the following structure:
  10. [ level1 dictionary,
  11. level2 dictionary,
  12. .
  13. .
  14. .
  15. ]
  16. e.g. (Each key represents a generalization of the values inside the list; 1.dict=level1, 2.dict=level2)
  17. education = [
  18. {"Undergraduate": [" Bachelors", " Some-college"],
  19. "High School":[" 11th", " HS-grad", " 9th", " 7th-8th", " 12th", " 10th"],
  20. "Professional Education":[" Prof-school", " Assoc-acdm", " Assoc-voc"],
  21. "Graduate":[" Masters", " Doctorate"]},
  22. {"Higher education": [" Bachelors", " Some-college", " Prof-school", " Assoc-acdm", " Assoc-voc", " Masters", " Doctorate"],
  23. "Secondary education":[" 11th", " HS-grad", " 9th", " 7th-8th", " 12th", " 10th"]}
  24. ]
  25. """
  26. def create_gen_hierarchy(path):
  27. """Simple Textbased guide to create new generalization hierarchies
  28. :return: Created generalization hierarchy
  29. :rtype: list
  30. """
  31. raw_data = read_data(path, ";")
  32. headers = []
  33. for col in raw_data:
  34. headers.append(col[0])
  35. column_name = input('Select QI: \n'+str(headers)+'\n')
  36. if column_name.isdigit():
  37. index = int(column_name)-1
  38. else:
  39. index = headers.index(column_name)
  40. column = raw_data[index]
  41. column_set = set(column[1:-1])
  42. levels = int(input('How many generalization level for QI '+headers[index]+'?\n'))
  43. gen_hier = []
  44. for value in column_set:
  45. for level in range(levels):
  46. gval = input('Level '+str(level)+' generalization for QI '+value+'?\n')
  47. try:
  48. gen_hier[level][gval].append(value)
  49. except KeyError:
  50. gen_hier[level].update({gval: [value]})
  51. except IndexError:
  52. gen_hier.append({gval: [value]})
  53. return gen_hier, headers[index]
  54. def write_gen_hierarchy(path, gen_hier, header):
  55. """Write generalization hierarchy from variable into csv file
  56. :param gen_hier: Generalization hierarchy
  57. :param header: Name of QI
  58. :return: None
  59. """
  60. first_level = True
  61. index_list = []
  62. rows = []
  63. for di in gen_hier:
  64. for key, lists in di.items():
  65. for value in lists:
  66. if first_level:
  67. index_list.append(value)
  68. rows.append([value, key])
  69. else:
  70. rows[index_list.index(value)].append(key)
  71. first_level = False
  72. with open(path+"gen_hier_" + str(header) + ".csv", "w") as output:
  73. for r in rows:
  74. output.write(';'.join(str(r)) + '\n')
  75. output.close()
  76. def read_gen_hierarchy(path, header):
  77. """
  78. :param path: Path to the file containing the generalization hierarchy
  79. :return: Generalization hierarchy for one QI
  80. """
  81. gen_hier = []
  82. linecount = 0
  83. with open(path+'_hierarchy_'+header+'.csv', "r") as input:
  84. for line in input.readlines():
  85. values = line.rstrip().split(";")
  86. firstval = values[0]
  87. values = values[1:]
  88. colnum = 0
  89. for val in values:
  90. if linecount == 0:
  91. gen_hier.append({})
  92. try:
  93. gen_hier[colnum][val].append(firstval)
  94. except KeyError:
  95. gen_hier[colnum].update({val: [firstval]})
  96. colnum += 1
  97. linecount += 1
  98. input.close()
  99. return gen_hier
  100. def read_data(file_name: str, delimiter: str) -> list:
  101. """Reads dataset from a csv file
  102. :param file_name: Path to the csv file
  103. :param delimiter: Character that is used as delimiter in the csv file
  104. :return: 2d list: A Cell can be accessed with data[col][row]
  105. """
  106. with open(file_name) as csv_file:
  107. csv_reader = csv.reader(csv_file, delimiter=delimiter)
  108. line_count = 0
  109. for row in csv_reader:
  110. if line_count == 0:
  111. col_count = len(row)
  112. data = [[] for _ in range(col_count)]
  113. for col in range(col_count):
  114. data[col].append(row[col].strip())
  115. line_count += 1
  116. csv_file.close()
  117. return data