123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417 |
- package analyzer;
- import analyzer.models.Hashtag;
- import analyzer.models.Round;
- import analyzer.models.User;
- import java.io.*;
- import java.util.*;
- import java.util.concurrent.atomic.AtomicReference;
- import java.util.stream.Collectors;
- public class Counter {
- private static Map<String, Integer> hashtagsCounter = new HashMap<>();
- private static Map<String, Integer> clientsCounter = new HashMap<>();
- private static String path = "C:\\Users\\Admin\\Desktop\\Skripts\\Thesis\\Repo\\local\\round_based_data_" + Analyzer.roundLength + ".txt";
- public static void main(String[] args) throws IOException, InterruptedException {
- countHashtag(10);
- System.out.println();
- countClient(10000);
- //datasetStatistics();
- //trial();
- }
- public static void countHashtag(int num) throws IOException {
- FileReader fr = new FileReader(Analyzer.serverLogPath);
- BufferedReader br= new BufferedReader(fr);
- String line;
- while((line = br.readLine()) != null) {
- String[] elements = line.trim().split("\t");//timestamp user_id hashtags
- String[] hashtags = elements[2].split(" ");
- for(String h : hashtags) {
- if(hashtagsCounter.containsKey(h))
- hashtagsCounter.replace(h, hashtagsCounter.get(h) + 1);
- else
- hashtagsCounter.put(h, 1);
- }
- }
- List<Map.Entry<String, Integer>> list = new ArrayList<>(hashtagsCounter.entrySet());
- list.sort(Map.Entry.comparingByValue((v1, v2) -> Integer.compare(v2, v1)));
- System.out.println(hashtagsCounter.size() + " hashtags");
- System.out.println("Hashtag" + "\t" + "Times");
- for(int i=0; i<num; i++)
- System.out.println(list.get(i).getKey() + "\t" + list.get(i).getValue());
- }
- public static void countClient(int num) throws IOException {
- FileReader fr = new FileReader(Analyzer.clientLogPath);
- BufferedReader br= new BufferedReader(fr);
- String line;
- while((line = br.readLine()) != null) {
- String[] elements = line.trim().split("\t");//client-id destination timestamp
- if (clientsCounter.containsKey(elements[0]))
- clientsCounter.replace(elements[0], clientsCounter.get(elements[0]) + 1);
- else
- clientsCounter.put(elements[0], 1);
- }
- List<Map.Entry<String, Integer>> list = new ArrayList<>(clientsCounter.entrySet());
- list.sort(Map.Entry.comparingByValue((v1, v2) -> Integer.compare(v2, v1)));
- System.out.println(clientsCounter.size() + " clients");
- System.out.println("Client" + "\t" + "Posts");
- for(int i=0; i<num; i++)
- System.out.println(list.get(i).getKey() + "\t" + list.get(i).getValue());
- }
- private static void datasetStatistics() throws InterruptedException, IOException {
- Analyzer.serverLogPath = path;
- new Analyzer();
- //hashtagsPerUser();
- //postsPerUserPerRound();
- //messagesPerHashtagPerRound();
- //popularHashtagsOvertime();
- //hashtagTable(50);
- //distribution1(new int[] {2, 3, 4, 10, 100, 1000});
- //distribution2();
- //exportDataPopularity(new int[] {1, 10, 100, 1000, 10000, 100000});
- //exportDataLifetime(new int[] {1, 10, 100, 1000, 10000, 100000});
- //numOfPostsEachRound();
- activeTime();
- }
- private static void hashtagsPerUser() {
- int min = Integer.MAX_VALUE;
- long total = 0L;
- int max = Integer.MIN_VALUE;
- int allUsers = ServerLogParser.users.size();
- for(User u : ServerLogParser.users.values()) {
- int noOfHashtags = u.getHashtags().size();
- if(min > noOfHashtags)
- min = noOfHashtags;
- if(max < noOfHashtags)
- max = noOfHashtags;
- total += noOfHashtags;
- }
- double avg = ((double)total / (double)allUsers);
- System.out.println();
- System.out.println("hashtagsPerUser");
- System.out.println("Min: " + min);
- System.out.println("Max: " + max);
- System.out.println("Avg: " + avg);
- }
- private static void postsPerUserPerRound() {
- int min = Integer.MAX_VALUE;
- int max = Integer.MIN_VALUE;
- double avg;
- double avgSigma = 0;
- for(Round r : ServerLogParser.rounds.values()) {
- int total_i = r.getUserMap().values().stream().mapToInt(v -> v).sum();
- double avg_i = (double) total_i / (double) r.getUserMap().size();
- avgSigma += avg_i;
- }
- avg = avgSigma / (double) ServerLogParser.rounds.size();
- for(Round r : ServerLogParser.rounds.values()) {
- for(Integer postNo : r.getUserMap().values()) {
- if(max < postNo)
- max = postNo;
- if(min > postNo)
- min = postNo;
- }
- }
- System.out.println();
- System.out.println("postsPerUserPerRound");
- System.out.println("Min: " + min);
- System.out.println("Max: " + max);
- System.out.println("Avg: " + avg);
- }
- private static void messagesPerHashtagPerRound() {
- int min = Integer.MAX_VALUE;
- int max = Integer.MIN_VALUE;
- double avg;
- double avgSigma = 0;
- for(Round r : ServerLogParser.rounds.values()) {
- int total_i = r.getHashtags().values().stream().mapToInt(v -> v).sum();
- double avg_i = (double) total_i / (double) r.getHashtags().size();
- avgSigma += avg_i;
- }
- avg = avgSigma / (double) ServerLogParser.rounds.size();
- for(Round r : ServerLogParser.rounds.values()) {
- for(Integer postNo : r.getHashtags().values()) {
- if(max < postNo)
- max = postNo;
- if(min > postNo)
- min = postNo;
- }
- }
- System.out.println();
- System.out.println("messagesPerHashtagPerRound");
- System.out.println("Min: " + min);
- System.out.println("Max: " + max);
- System.out.println("Avg: " + avg);
- }
- private static void popularHashtagsOvertime() {
- List<Hashtag>[] arrOfLst = new List[ServerLogParser.rounds.size()];
- int[] max = new int[ServerLogParser.rounds.size()];
- Arrays.fill(max, Integer.MIN_VALUE);
- for(Round r : ServerLogParser.rounds.values()) {
- for(Integer v : r.getHashtags().values()) {
- if(v > max[r.getNo() - 1]) {
- max[r.getNo() - 1] = v;
- }
- }
- List<Hashtag> lst = new ArrayList<>();
- for(Map.Entry<Hashtag, Integer> e : r.getHashtags().entrySet()) {
- if(e.getValue().equals(max[r.getNo() - 1])) {
- lst.add(e.getKey());
- }
- }
- arrOfLst[r.getNo() - 1] = lst;
- }
- List<Hashtag> lstOfPopularHashtags = new ArrayList<>();
- for(List<Hashtag> l : arrOfLst) {
- for(Hashtag h : l) {
- if(!lstOfPopularHashtags.contains(h))
- lstOfPopularHashtags.add(h);
- }
- }
- System.out.println();
- System.out.println("popularHashtagsOvertime " + lstOfPopularHashtags.size());
- System.out.println("Most popular hashtags of round ");
- for(int i = 0; i < arrOfLst.length; i++) {
- Round thisRound = ServerLogParser.rounds.get(i+1);
- System.out.print(thisRound.getNo() + "\t");
- for (Hashtag h : arrOfLst[i]) {
- System.out.print(h.getName() + "|" + h.getRoundMap().get(thisRound) + " ");
- }
- System.out.println();
- }
- }
- private static void hashtagTable(int maxRound) throws IOException {
- String wPath = "C:\\Users\\Admin\\Desktop\\hiwi tud\\";
- File file = new File(wPath + "hashtag_table_" + Analyzer.roundLength + ".txt");
- FileWriter fw = new FileWriter(file, true);
- BufferedWriter bw = new BufferedWriter(fw);
- Map<Integer, Round> map = ServerLogParser.rounds;
- int num = Math.min(maxRound, map.size());
- bw.write("Round:[Hashtag|NumOfMessages...]");
- bw.newLine();
- for(int i = 0; i < maxRound; i++) {
- Round r = map.get(i+1);
- List<Map.Entry<Hashtag, Integer>> sortedList = new ArrayList<>(r.getHashtags().entrySet());
- sortedList.sort(Comparator.comparingInt(Map.Entry::getValue));
- bw.write(r.getNo() + ":");
- for(Map.Entry<Hashtag, Integer> e : sortedList) {
- bw.write(e.getKey().getName() + "|" + e.getValue() + " ");
- }
- bw.newLine();
- }
- bw.close();
- }
- private static void distribution1(int[] intervals) throws IOException {
- int[] counters = new int[intervals.length + 1];
- for(Round r : ServerLogParser.rounds.values()) {
- r.getHashtags().values().forEach(v -> {
- int indexMax = -1;
- for(int i = 0; i < intervals.length; i++) {
- if(v < intervals[i]) {
- indexMax = i;
- break;
- }
- }
- if(indexMax == -1)
- counters[counters.length - 1]++;
- else
- counters[indexMax]++;
- });
- }
- int totalCount = Arrays.stream(counters).sum();
- for(int i = 0; i < counters.length; i++) {
- String printedString;
- if(i == 0)
- printedString = "1 - " + intervals[i] + ":";
- else if (i == counters.length - 1)
- printedString = intervals[i-1] + " - MAX:";
- else
- printedString = intervals[i-1] + " - " + intervals[i] + ":";
- double share = ((double)counters[i]/(double) totalCount) * 100;
- System.out.println(printedString + "\t" + counters[i] + " " + share);
- }
- System.out.println();
- }
- private static void distribution2() {
- Map<Integer, Integer> map = new Hashtable<>();
- for(Round r : ServerLogParser.rounds.values()) {
- r.getHashtags().values().forEach(v -> {
- if(map.containsKey(v))
- map.replace(v, map.get(v) + 1);
- else
- map.put(v, 1);
- });
- }
- int kinds = map.values().stream().mapToInt(v -> v).sum();
- AtomicReference<Double> betterAvg = new AtomicReference<>((double) 0);
- map.forEach((k, v) -> {
- double weight = (double) v / (double) kinds;
- betterAvg.updateAndGet(v1 -> (v1 + k * weight));
- });
- System.out.println("betterAvg: " + betterAvg.get());
- }
- public static void trial() throws InterruptedException {
- Analyzer.serverLogPath = path;
- new Analyzer();
- Round round149 = ServerLogParser.rounds.get(149);
- round149.getHashtags().forEach((k, v) -> {
- System.out.println(k.getName() + " " + v);
- });
- }
- public static void exportDataLifetime(int[] intervals) {
- int maxAvgPostNo = 0;
- Map<Hashtag, Integer> map = new Hashtable<>();
- for(Hashtag h : ServerLogParser.hashtags.values()) {
- int totalPosts = h.getTotalPosts();
- //double avgPostsPerRound = (double) totalPosts / (double) roundNo;
- //double avgPostsPerRound = (double) totalPosts / (double) h.getRoundMap().size();
- if(maxAvgPostNo < totalPosts)
- maxAvgPostNo = (int) totalPosts + 1;
- map.put(h, totalPosts);
- }
- int maxAvg = maxAvgPostNo;
- List<Map.Entry<Hashtag, Integer>>[] arrOfLists = new List[intervals.length + 1];
- for(int i = 0; i < arrOfLists.length; i++) {
- if(i == 0) {
- int finalRightLimit = intervals[i];
- arrOfLists[i] = map.entrySet().stream().filter(e -> e.getValue() >= 0 && e.getValue() < finalRightLimit).collect(Collectors.toList());
- } else if(i == arrOfLists.length - 1) {
- int finalLeftLimit = intervals[i - 1];
- arrOfLists[i] = map.entrySet().stream().filter(e -> e.getValue() >= finalLeftLimit && e.getValue() < maxAvg).collect(Collectors.toList());
- }
- else {
- int finalLeftLimit = intervals[i-1];
- int finalRightLimit = intervals[i];
- arrOfLists[i] = map.entrySet().stream().filter(e -> e.getValue() >= finalLeftLimit && e.getValue() < finalRightLimit).collect(Collectors.toList());
- }
- }
- double[] avgs = new double[arrOfLists.length];
- for(int i = 0; i < arrOfLists.length; i++) {
- List<Map.Entry<Hashtag, Integer>> l = arrOfLists[i];
- avgs[i] = l.stream().mapToInt(e -> e.getKey().getRoundMap().size()).sum() / (double) l.size();
- }
- for(double v : avgs)
- System.out.println(v);
- }
- public static void exportDataPopularity(int[] intervals) {
- int roundNo = ServerLogParser.rounds.size();
- int maxAvgPostNo = 0;
- Map<Hashtag, Integer> map = new Hashtable<>();
- for(Hashtag h : ServerLogParser.hashtags.values()) {
- int totalPosts = h.getTotalPosts();
- //double avgPostsPerRound = (double) totalPosts / (double) roundNo;
- //double avgPostsPerRound = (double) totalPosts / (double) h.getRoundMap().size();
- if(maxAvgPostNo < totalPosts)
- maxAvgPostNo = (int) totalPosts + 1;
- map.put(h, totalPosts);
- }
- int maxAvg = maxAvgPostNo;
- List<Map.Entry<Hashtag, Integer>>[] arrOfLists = new List[intervals.length + 1];
- for(int i = 0; i < arrOfLists.length; i++) {
- if(i == 0) {
- int finalRightLimit = intervals[i];
- arrOfLists[i] = map.entrySet().stream().filter(e -> e.getValue() >= 0 && e.getValue() < finalRightLimit).collect(Collectors.toList());
- } else if(i == arrOfLists.length - 1) {
- int finalLeftLimit = intervals[i - 1];
- arrOfLists[i] = map.entrySet().stream().filter(e -> e.getValue() >= finalLeftLimit && e.getValue() < maxAvg).collect(Collectors.toList());
- }
- else {
- int finalLeftLimit = intervals[i-1];
- int finalRightLimit = intervals[i];
- arrOfLists[i] = map.entrySet().stream().filter(e -> e.getValue() >= finalLeftLimit && e.getValue() < finalRightLimit).collect(Collectors.toList());
- }
- }
- for(int i = 0; i < arrOfLists.length; i++) {
- int lstSize = arrOfLists[i].size();
- double percentage = ((double) lstSize / (double) ServerLogParser.hashtags.size()) * 100.00;
- if(i == 0)
- System.out.println(0 + " - " + intervals[i] + ":\t" + lstSize + " " + percentage);
- else if(i == arrOfLists.length - 1)
- System.out.println(intervals[i-1] + " - " + (maxAvg + 1) + ":\t" + lstSize + " " + percentage);
- else
- System.out.println(intervals[i-1] + " - " + intervals[i] + ":\t" + lstSize + " " + percentage);
- }
- double[][] arrOfsums = new double[arrOfLists.length][ServerLogParser.rounds.size()];
- for(int i = 0; i < arrOfLists.length; i++) {
- List<Map.Entry<Hashtag, Integer>> l = arrOfLists[i];
- int size = l.size();
- if(size == 0)
- continue;
- System.out.print("Interval " + i + "\t");
- for(int j = 0; j < ServerLogParser.rounds.size(); j++){
- Round r = ServerLogParser.rounds.get(j+1);
- int sum = l.stream().mapToInt(e -> {
- Integer temp = e.getKey().getRoundMap().get(r);
- return Objects.requireNonNullElse(temp, 0);
- }).sum();
- arrOfsums[i][j] = sum;
- System.out.print(j+1 + "|" + arrOfsums[i][j] + " ");
- }
- System.out.println();
- }
- }
- public static void numOfPostsEachRound() {
- List<Map.Entry<Integer, Round>> entries = new ArrayList<>(ServerLogParser.rounds.entrySet());
- entries.sort(Comparator.comparingInt(Map.Entry::getKey));
- for(Map.Entry<Integer, Round> e : entries) {
- int totalPosts = e.getValue().getUserMap().values().stream().mapToInt(v -> v).sum();
- System.out.print(totalPosts + " ");
- }
- }
- public static void activeTime() {
- int sumActive = ServerLogParser.rounds.values().stream().mapToInt(r -> r.getHashtags().size()).sum();
- double activeTime = (((double) sumActive / (double) ServerLogParser.hashtags.size()) / ServerLogParser.rounds.size()) * 100.00;
- System.out.println(activeTime + " active time");
- }
- }
|