Counter.java 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. package analyzer;
  2. import analyzer.models.Hashtag;
  3. import analyzer.models.Round;
  4. import analyzer.models.User;
  5. import java.io.*;
  6. import java.util.*;
  7. import java.util.concurrent.atomic.AtomicReference;
  8. import java.util.stream.Collectors;
  9. public class Counter {
  10. private static Map<String, Integer> hashtagsCounter = new HashMap<>();
  11. private static Map<String, Integer> clientsCounter = new HashMap<>();
  12. private static String path = "C:\\Users\\Admin\\Desktop\\Skripts\\Thesis\\Repo\\local\\round_based_data_" + Analyzer.roundLength + ".txt";
  13. public static void main(String[] args) throws IOException, InterruptedException {
  14. countHashtag(10);
  15. System.out.println();
  16. countClient(10000);
  17. //datasetStatistics();
  18. //trial();
  19. }
  20. public static void countHashtag(int num) throws IOException {
  21. FileReader fr = new FileReader(Analyzer.serverLogPath);
  22. BufferedReader br= new BufferedReader(fr);
  23. String line;
  24. while((line = br.readLine()) != null) {
  25. String[] elements = line.trim().split("\t");//timestamp user_id hashtags
  26. String[] hashtags = elements[2].split(" ");
  27. for(String h : hashtags) {
  28. if(hashtagsCounter.containsKey(h))
  29. hashtagsCounter.replace(h, hashtagsCounter.get(h) + 1);
  30. else
  31. hashtagsCounter.put(h, 1);
  32. }
  33. }
  34. List<Map.Entry<String, Integer>> list = new ArrayList<>(hashtagsCounter.entrySet());
  35. list.sort(Map.Entry.comparingByValue((v1, v2) -> Integer.compare(v2, v1)));
  36. System.out.println(hashtagsCounter.size() + " hashtags");
  37. System.out.println("Hashtag" + "\t" + "Times");
  38. for(int i=0; i<num; i++)
  39. System.out.println(list.get(i).getKey() + "\t" + list.get(i).getValue());
  40. }
  41. public static void countClient(int num) throws IOException {
  42. FileReader fr = new FileReader(Analyzer.clientLogPath);
  43. BufferedReader br= new BufferedReader(fr);
  44. String line;
  45. while((line = br.readLine()) != null) {
  46. String[] elements = line.trim().split("\t");//client-id destination timestamp
  47. if (clientsCounter.containsKey(elements[0]))
  48. clientsCounter.replace(elements[0], clientsCounter.get(elements[0]) + 1);
  49. else
  50. clientsCounter.put(elements[0], 1);
  51. }
  52. List<Map.Entry<String, Integer>> list = new ArrayList<>(clientsCounter.entrySet());
  53. list.sort(Map.Entry.comparingByValue((v1, v2) -> Integer.compare(v2, v1)));
  54. System.out.println(clientsCounter.size() + " clients");
  55. System.out.println("Client" + "\t" + "Posts");
  56. for(int i=0; i<num; i++)
  57. System.out.println(list.get(i).getKey() + "\t" + list.get(i).getValue());
  58. }
  59. private static void datasetStatistics() throws InterruptedException, IOException {
  60. Analyzer.serverLogPath = path;
  61. new Analyzer();
  62. //hashtagsPerUser();
  63. //postsPerUserPerRound();
  64. //messagesPerHashtagPerRound();
  65. //popularHashtagsOvertime();
  66. //hashtagTable(50);
  67. //distribution1(new int[] {2, 3, 4, 10, 100, 1000});
  68. //distribution2();
  69. //exportDataPopularity(new int[] {1, 10, 100, 1000, 10000, 100000});
  70. //exportDataLifetime(new int[] {1, 10, 100, 1000, 10000, 100000});
  71. //numOfPostsEachRound();
  72. activeTime();
  73. }
  74. private static void hashtagsPerUser() {
  75. int min = Integer.MAX_VALUE;
  76. long total = 0L;
  77. int max = Integer.MIN_VALUE;
  78. int allUsers = ServerLogParser.users.size();
  79. for(User u : ServerLogParser.users.values()) {
  80. int noOfHashtags = u.getHashtags().size();
  81. if(min > noOfHashtags)
  82. min = noOfHashtags;
  83. if(max < noOfHashtags)
  84. max = noOfHashtags;
  85. total += noOfHashtags;
  86. }
  87. double avg = ((double)total / (double)allUsers);
  88. System.out.println();
  89. System.out.println("hashtagsPerUser");
  90. System.out.println("Min: " + min);
  91. System.out.println("Max: " + max);
  92. System.out.println("Avg: " + avg);
  93. }
  94. private static void postsPerUserPerRound() {
  95. int min = Integer.MAX_VALUE;
  96. int max = Integer.MIN_VALUE;
  97. double avg;
  98. double avgSigma = 0;
  99. for(Round r : ServerLogParser.rounds.values()) {
  100. int total_i = r.getUserMap().values().stream().mapToInt(v -> v).sum();
  101. double avg_i = (double) total_i / (double) r.getUserMap().size();
  102. avgSigma += avg_i;
  103. }
  104. avg = avgSigma / (double) ServerLogParser.rounds.size();
  105. for(Round r : ServerLogParser.rounds.values()) {
  106. for(Integer postNo : r.getUserMap().values()) {
  107. if(max < postNo)
  108. max = postNo;
  109. if(min > postNo)
  110. min = postNo;
  111. }
  112. }
  113. System.out.println();
  114. System.out.println("postsPerUserPerRound");
  115. System.out.println("Min: " + min);
  116. System.out.println("Max: " + max);
  117. System.out.println("Avg: " + avg);
  118. }
  119. private static void messagesPerHashtagPerRound() {
  120. int min = Integer.MAX_VALUE;
  121. int max = Integer.MIN_VALUE;
  122. double avg;
  123. double avgSigma = 0;
  124. for(Round r : ServerLogParser.rounds.values()) {
  125. int total_i = r.getHashtags().values().stream().mapToInt(v -> v).sum();
  126. double avg_i = (double) total_i / (double) r.getHashtags().size();
  127. avgSigma += avg_i;
  128. }
  129. avg = avgSigma / (double) ServerLogParser.rounds.size();
  130. for(Round r : ServerLogParser.rounds.values()) {
  131. for(Integer postNo : r.getHashtags().values()) {
  132. if(max < postNo)
  133. max = postNo;
  134. if(min > postNo)
  135. min = postNo;
  136. }
  137. }
  138. System.out.println();
  139. System.out.println("messagesPerHashtagPerRound");
  140. System.out.println("Min: " + min);
  141. System.out.println("Max: " + max);
  142. System.out.println("Avg: " + avg);
  143. }
  144. private static void popularHashtagsOvertime() {
  145. List<Hashtag>[] arrOfLst = new List[ServerLogParser.rounds.size()];
  146. int[] max = new int[ServerLogParser.rounds.size()];
  147. Arrays.fill(max, Integer.MIN_VALUE);
  148. for(Round r : ServerLogParser.rounds.values()) {
  149. for(Integer v : r.getHashtags().values()) {
  150. if(v > max[r.getNo() - 1]) {
  151. max[r.getNo() - 1] = v;
  152. }
  153. }
  154. List<Hashtag> lst = new ArrayList<>();
  155. for(Map.Entry<Hashtag, Integer> e : r.getHashtags().entrySet()) {
  156. if(e.getValue().equals(max[r.getNo() - 1])) {
  157. lst.add(e.getKey());
  158. }
  159. }
  160. arrOfLst[r.getNo() - 1] = lst;
  161. }
  162. List<Hashtag> lstOfPopularHashtags = new ArrayList<>();
  163. for(List<Hashtag> l : arrOfLst) {
  164. for(Hashtag h : l) {
  165. if(!lstOfPopularHashtags.contains(h))
  166. lstOfPopularHashtags.add(h);
  167. }
  168. }
  169. System.out.println();
  170. System.out.println("popularHashtagsOvertime " + lstOfPopularHashtags.size());
  171. System.out.println("Most popular hashtags of round ");
  172. for(int i = 0; i < arrOfLst.length; i++) {
  173. Round thisRound = ServerLogParser.rounds.get(i+1);
  174. System.out.print(thisRound.getNo() + "\t");
  175. for (Hashtag h : arrOfLst[i]) {
  176. System.out.print(h.getName() + "|" + h.getRoundMap().get(thisRound) + " ");
  177. }
  178. System.out.println();
  179. }
  180. }
  181. private static void hashtagTable(int maxRound) throws IOException {
  182. String wPath = "C:\\Users\\Admin\\Desktop\\hiwi tud\\";
  183. File file = new File(wPath + "hashtag_table_" + Analyzer.roundLength + ".txt");
  184. FileWriter fw = new FileWriter(file, true);
  185. BufferedWriter bw = new BufferedWriter(fw);
  186. Map<Integer, Round> map = ServerLogParser.rounds;
  187. int num = Math.min(maxRound, map.size());
  188. bw.write("Round:[Hashtag|NumOfMessages...]");
  189. bw.newLine();
  190. for(int i = 0; i < maxRound; i++) {
  191. Round r = map.get(i+1);
  192. List<Map.Entry<Hashtag, Integer>> sortedList = new ArrayList<>(r.getHashtags().entrySet());
  193. sortedList.sort(Comparator.comparingInt(Map.Entry::getValue));
  194. bw.write(r.getNo() + ":");
  195. for(Map.Entry<Hashtag, Integer> e : sortedList) {
  196. bw.write(e.getKey().getName() + "|" + e.getValue() + " ");
  197. }
  198. bw.newLine();
  199. }
  200. bw.close();
  201. }
  202. private static void distribution1(int[] intervals) throws IOException {
  203. int[] counters = new int[intervals.length + 1];
  204. for(Round r : ServerLogParser.rounds.values()) {
  205. r.getHashtags().values().forEach(v -> {
  206. int indexMax = -1;
  207. for(int i = 0; i < intervals.length; i++) {
  208. if(v < intervals[i]) {
  209. indexMax = i;
  210. break;
  211. }
  212. }
  213. if(indexMax == -1)
  214. counters[counters.length - 1]++;
  215. else
  216. counters[indexMax]++;
  217. });
  218. }
  219. int totalCount = Arrays.stream(counters).sum();
  220. for(int i = 0; i < counters.length; i++) {
  221. String printedString;
  222. if(i == 0)
  223. printedString = "1 - " + intervals[i] + ":";
  224. else if (i == counters.length - 1)
  225. printedString = intervals[i-1] + " - MAX:";
  226. else
  227. printedString = intervals[i-1] + " - " + intervals[i] + ":";
  228. double share = ((double)counters[i]/(double) totalCount) * 100;
  229. System.out.println(printedString + "\t" + counters[i] + " " + share);
  230. }
  231. System.out.println();
  232. }
  233. private static void distribution2() {
  234. Map<Integer, Integer> map = new Hashtable<>();
  235. for(Round r : ServerLogParser.rounds.values()) {
  236. r.getHashtags().values().forEach(v -> {
  237. if(map.containsKey(v))
  238. map.replace(v, map.get(v) + 1);
  239. else
  240. map.put(v, 1);
  241. });
  242. }
  243. int kinds = map.values().stream().mapToInt(v -> v).sum();
  244. AtomicReference<Double> betterAvg = new AtomicReference<>((double) 0);
  245. map.forEach((k, v) -> {
  246. double weight = (double) v / (double) kinds;
  247. betterAvg.updateAndGet(v1 -> (v1 + k * weight));
  248. });
  249. System.out.println("betterAvg: " + betterAvg.get());
  250. }
  251. public static void trial() throws InterruptedException {
  252. Analyzer.serverLogPath = path;
  253. new Analyzer();
  254. Round round149 = ServerLogParser.rounds.get(149);
  255. round149.getHashtags().forEach((k, v) -> {
  256. System.out.println(k.getName() + " " + v);
  257. });
  258. }
  259. public static void exportDataLifetime(int[] intervals) {
  260. int maxAvgPostNo = 0;
  261. Map<Hashtag, Integer> map = new Hashtable<>();
  262. for(Hashtag h : ServerLogParser.hashtags.values()) {
  263. int totalPosts = h.getTotalPosts();
  264. //double avgPostsPerRound = (double) totalPosts / (double) roundNo;
  265. //double avgPostsPerRound = (double) totalPosts / (double) h.getRoundMap().size();
  266. if(maxAvgPostNo < totalPosts)
  267. maxAvgPostNo = (int) totalPosts + 1;
  268. map.put(h, totalPosts);
  269. }
  270. int maxAvg = maxAvgPostNo;
  271. List<Map.Entry<Hashtag, Integer>>[] arrOfLists = new List[intervals.length + 1];
  272. for(int i = 0; i < arrOfLists.length; i++) {
  273. if(i == 0) {
  274. int finalRightLimit = intervals[i];
  275. arrOfLists[i] = map.entrySet().stream().filter(e -> e.getValue() >= 0 && e.getValue() < finalRightLimit).collect(Collectors.toList());
  276. } else if(i == arrOfLists.length - 1) {
  277. int finalLeftLimit = intervals[i - 1];
  278. arrOfLists[i] = map.entrySet().stream().filter(e -> e.getValue() >= finalLeftLimit && e.getValue() < maxAvg).collect(Collectors.toList());
  279. }
  280. else {
  281. int finalLeftLimit = intervals[i-1];
  282. int finalRightLimit = intervals[i];
  283. arrOfLists[i] = map.entrySet().stream().filter(e -> e.getValue() >= finalLeftLimit && e.getValue() < finalRightLimit).collect(Collectors.toList());
  284. }
  285. }
  286. double[] avgs = new double[arrOfLists.length];
  287. for(int i = 0; i < arrOfLists.length; i++) {
  288. List<Map.Entry<Hashtag, Integer>> l = arrOfLists[i];
  289. avgs[i] = l.stream().mapToInt(e -> e.getKey().getRoundMap().size()).sum() / (double) l.size();
  290. }
  291. for(double v : avgs)
  292. System.out.println(v);
  293. }
  294. public static void exportDataPopularity(int[] intervals) {
  295. int roundNo = ServerLogParser.rounds.size();
  296. int maxAvgPostNo = 0;
  297. Map<Hashtag, Integer> map = new Hashtable<>();
  298. for(Hashtag h : ServerLogParser.hashtags.values()) {
  299. int totalPosts = h.getTotalPosts();
  300. //double avgPostsPerRound = (double) totalPosts / (double) roundNo;
  301. //double avgPostsPerRound = (double) totalPosts / (double) h.getRoundMap().size();
  302. if(maxAvgPostNo < totalPosts)
  303. maxAvgPostNo = (int) totalPosts + 1;
  304. map.put(h, totalPosts);
  305. }
  306. int maxAvg = maxAvgPostNo;
  307. List<Map.Entry<Hashtag, Integer>>[] arrOfLists = new List[intervals.length + 1];
  308. for(int i = 0; i < arrOfLists.length; i++) {
  309. if(i == 0) {
  310. int finalRightLimit = intervals[i];
  311. arrOfLists[i] = map.entrySet().stream().filter(e -> e.getValue() >= 0 && e.getValue() < finalRightLimit).collect(Collectors.toList());
  312. } else if(i == arrOfLists.length - 1) {
  313. int finalLeftLimit = intervals[i - 1];
  314. arrOfLists[i] = map.entrySet().stream().filter(e -> e.getValue() >= finalLeftLimit && e.getValue() < maxAvg).collect(Collectors.toList());
  315. }
  316. else {
  317. int finalLeftLimit = intervals[i-1];
  318. int finalRightLimit = intervals[i];
  319. arrOfLists[i] = map.entrySet().stream().filter(e -> e.getValue() >= finalLeftLimit && e.getValue() < finalRightLimit).collect(Collectors.toList());
  320. }
  321. }
  322. for(int i = 0; i < arrOfLists.length; i++) {
  323. int lstSize = arrOfLists[i].size();
  324. double percentage = ((double) lstSize / (double) ServerLogParser.hashtags.size()) * 100.00;
  325. if(i == 0)
  326. System.out.println(0 + " - " + intervals[i] + ":\t" + lstSize + " " + percentage);
  327. else if(i == arrOfLists.length - 1)
  328. System.out.println(intervals[i-1] + " - " + (maxAvg + 1) + ":\t" + lstSize + " " + percentage);
  329. else
  330. System.out.println(intervals[i-1] + " - " + intervals[i] + ":\t" + lstSize + " " + percentage);
  331. }
  332. double[][] arrOfsums = new double[arrOfLists.length][ServerLogParser.rounds.size()];
  333. for(int i = 0; i < arrOfLists.length; i++) {
  334. List<Map.Entry<Hashtag, Integer>> l = arrOfLists[i];
  335. int size = l.size();
  336. if(size == 0)
  337. continue;
  338. System.out.print("Interval " + i + "\t");
  339. for(int j = 0; j < ServerLogParser.rounds.size(); j++){
  340. Round r = ServerLogParser.rounds.get(j+1);
  341. int sum = l.stream().mapToInt(e -> {
  342. Integer temp = e.getKey().getRoundMap().get(r);
  343. return Objects.requireNonNullElse(temp, 0);
  344. }).sum();
  345. arrOfsums[i][j] = sum;
  346. System.out.print(j+1 + "|" + arrOfsums[i][j] + " ");
  347. }
  348. System.out.println();
  349. }
  350. }
  351. public static void numOfPostsEachRound() {
  352. List<Map.Entry<Integer, Round>> entries = new ArrayList<>(ServerLogParser.rounds.entrySet());
  353. entries.sort(Comparator.comparingInt(Map.Entry::getKey));
  354. for(Map.Entry<Integer, Round> e : entries) {
  355. int totalPosts = e.getValue().getUserMap().values().stream().mapToInt(v -> v).sum();
  356. System.out.print(totalPosts + " ");
  357. }
  358. }
  359. public static void activeTime() {
  360. int sumActive = ServerLogParser.rounds.values().stream().mapToInt(r -> r.getHashtags().size()).sum();
  361. double activeTime = (((double) sumActive / (double) ServerLogParser.hashtags.size()) / ServerLogParser.rounds.size()) * 100.00;
  362. System.out.println(activeTime + " active time");
  363. }
  364. }