Browse Source

first commit

Huixijin C 1 year ago
commit
261bb70772
16 changed files with 823 additions and 0 deletions
  1. 138 0
      .gitignore
  2. 24 0
      .pre-commit-config.yaml
  3. 4 0
      Makefile
  4. 12 0
      README.md
  5. 6 0
      __init__.py
  6. 61 0
      charting.py
  7. 54 0
      client.py
  8. 207 0
      cluster.py
  9. 3 0
      clustering.bat
  10. 34 0
      encrypt_message.py
  11. 102 0
      kshape_filter.py
  12. 32 0
      models.py
  13. 24 0
      scheduler.py
  14. 43 0
      server.py
  15. 55 0
      update_ts.py
  16. 24 0
      util.py

+ 138 - 0
.gitignore

@@ -0,0 +1,138 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Mac OS file
+.DS_Store
+
+# Pycharm file
+.idea/
+
+# Emacs auto save cache
+*.*~

+ 24 - 0
.pre-commit-config.yaml

@@ -0,0 +1,24 @@
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v3.4.0
+    hooks:
+    - id: check-yaml
+    - id: check-added-large-files
+      args: [--maxkb=20480]
+    - id: check-ast
+    - id: check-case-conflict
+    - id: debug-statements
+    - id: detect-private-key
+    - id: end-of-file-fixer
+    - id: mixed-line-ending
+    - id: trailing-whitespace
+-   repo: https://github.com/psf/black
+    rev: 20.8b1
+    hooks:
+    - id: black
+      additional_dependencies: ['click==8.0.4']
+-   repo: https://github.com/kynan/nbstripout
+    rev: 0.3.9
+    hooks:
+    # strip output from Jupyter and IPython notebooks
+    - id: nbstripout

+ 4 - 0
Makefile

@@ -0,0 +1,4 @@
+.PHONY: format
+format:
+	pre-commit run --all-files
+	python -m black .

+ 12 - 0
README.md

@@ -0,0 +1,12 @@
+# Traffic-Analysis_Resistant_Anonymity_Sets
+
+## Run server by
+```
+$ flask run --host=0.0.0.0
+
+```
+## Run client by
+```
+$ python client.py http://127.0.0.1:5000/api/user_info dataset
+
+```

+ 6 - 0
__init__.py

@@ -0,0 +1,6 @@
+# -*- coding: utf-8 -*-
+
+from flask import Flask
+
+app = Flask(__name__)
+app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///db.sqlite"

+ 61 - 0
charting.py

@@ -0,0 +1,61 @@
+import pandas as pd
+import sqlite3
+import os
+from matplotlib import pyplot as plt
+import seaborn as sns
+
+conn = sqlite3.connect('db.sqlite')
+
+
+def check_folder():
+    path = './charts'
+    if not os.path.exists(path):
+        os.mkdir(path)
+        print('create charts directory...')
+
+def time_series_chart(x, y, y_label, title):
+    sns.set()
+    plt.figure(figsize=(18,7))
+    sns.lineplot(x, y)
+    plt.xlabel('Round')
+    plt.ylabel(y_label)
+    plt.title(title)
+    plt.savefig(os.path.join('charts', title + '.png'))
+
+
+def main():
+    check_folder()
+    # loading records
+    res = pd.read_sql_query(
+        '''select timestamp, 
+                  normal_user_count,
+                  abnormal_user_count,
+                  nyms_count
+            from cluster_result''',
+        con=conn)
+
+    # plot time-series key indicator fig
+
+    time_series_chart(res.index, res.loc[:, 'nyms_count'],
+                      'Size of Pseudonym',
+                      'Attack Anonymity Set')
+    time_series_chart(res.index, res.loc[:, 'normal_user_count'],
+                      'Forwarded User Counts',
+                      'Normal Active User')
+    time_series_chart(res.index, res.loc[:, 'abnormal_user_count'],
+                      'Delayed User Counts',
+                      'Abnormal Active User')
+    print('save finished.')
+
+
+if __name__ == '__main__':
+    main()
+
+
+
+
+
+
+
+
+

+ 54 - 0
client.py

@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+
+import json
+import os
+import sys
+
+import requests
+
+from encrypt_message import encrypt_message
+from .util import Constants
+
+
+class Client(object):
+    def __init__(self, server, text_file):
+        self.server = server
+        if not os.path.exists(text_file):
+            raise IOError
+        self.text_file = text_file
+
+    def batch_upload_user_info(self, batch_size=100, gen_cover=0):
+
+        with open(self.text_file, "r") as f:
+            lines = f.readlines()
+            batches = [
+                lines[x : x + batch_size] for x in range(0, len(lines), batch_size)
+            ]
+            for batch in batches:
+                    body = json.dumps(batch)
+                    try:
+                        body = encrypt_message(body)
+                        r = requests.post(self.server, data=body)
+                        print(r.text)
+                        if gen_cover:
+                            for _ in Constants.NUM_COVER:
+                                cover_msg = self.cover_message_gen(body)
+                                cover_msg = encrypt_message(cover_msg)
+                                r = requests.post(self.server, data=cover_msg)
+                                print(r.text)
+                    except Exception as e:
+                        print(e)
+
+    def cover_message_gen(self, msg):
+        user = msg.user_id
+        time = msg.timestamp
+        return {"timestamp": time, "user_id": user}
+
+
+if __name__ == "__main__":
+
+    server = sys.argv[1]
+    text_file = sys.argv[2]
+    client = Client(server, text_file)
+
+    client.batch_upload_user_info(batch_size=10000)

+ 207 - 0
cluster.py

@@ -0,0 +1,207 @@
+# -*- coding: utf-8 -*-
+
+import json
+import pandas as pd
+from datetime import datetime, timedelta
+from random import choice
+import hashlib
+
+import click
+from sqlalchemy import delete, func, select, update
+
+from . import app
+from .kshape_filter import KShapeFilter
+from .models import Cluster_result, Nym_info, User_info, db
+from .util import Constants, cnt_cluster
+from .update_ts import fill_ts
+
+class Clustering:
+
+    def __init__(self):
+        self.user_list = {}                  # user names
+        self.time_series = pd.DataFrame()    # time series dataset, update every round
+        self.delay_user_list = []
+
+    def get_user(self, df):
+        """Extract username from received dataframe"""
+        users = df['user_id'].drop_duplicates().values.tolist()             # users per round
+        return users
+
+    def update_user(self, user_list):
+        """Add new user to user_list"""
+        user = dict(zip(user_list, [0]*len(user_list)))
+        self.user_list.update(user)
+
+    def update_time_series(self, df):
+        """Add new round of data to the time series"""
+        self.time_series = df.combine_first(self.time_series).fillna(0)     # fill NaN data for k-shape
+
+    def rnd_time_series(self, df, users):
+        """Filter out the time series of active users in this round"""
+        rnd_df = df.loc[users]
+        print(rnd_df.shape)
+        return rnd_df
+
+    def delay_user(self, df, model, cnt_cluster):
+        """
+        Filter out users who in small cluster
+        :param cnt_cluster:  count the amount of users in each cluster
+        :param model: cluster model
+        """
+        cluster_s = cnt_cluster.loc[cnt_cluster['count'] <= Constants.CLUSTER_SIZE].index.tolist()
+        new_df = pd.DataFrame()
+        for i in cluster_s:
+            filter_df = df[model.labels_ == i]
+            new_df = pd.concat([filter_df, new_df], ignore_index=False)
+        delay_user = new_df.index.tolist()
+        if delay_user:
+            print(f"{len(delay_user)} messages been delayed, join in next round")
+        return delay_user
+
+    def ts_cluster(self, data):
+
+        users = self.get_user(data)
+        rnd_users = self.delay_user_list + users
+        print(f"Participate users: {len(users)} active, {len(self.delay_user_list)} delayed")
+        df = fill_ts(data)
+        self.update_time_series(df)
+        rnd_df = self.rnd_time_series(self.time_series, rnd_users)
+
+        ks, y_pred = KShapeFilter.k_shape(rnd_df, Constants.CLUSTER_NUM)
+        cnt = cnt_cluster(ks)
+
+        self.delay_user_list = self.delay_user(rnd_df, ks, cnt)
+
+
+    def anonymity_simulation(date, hour, random=True):
+        """
+        simulate anonymity attack, select one user randomly to update user_info
+        :param date: date
+        :param hour: hour
+        :return: None
+        """
+        print("attack:", date, hour)
+        start_time = datetime.strptime(f"{date} {hour}", "%Y-%m-%d %H")
+
+        # Two attack methods, specified anonymous attack and random anonymous attack
+
+        # Specified, random=0
+        if random == False:
+            attack_user_target = "537073336dcd41ff4f362d111888907c"
+
+        # Random attack, random=1
+        else:
+            res = db.session.execute(
+                select(User_info.user_id)
+                .where(User_info.timestamp <= start_time.strftime("%Y-%m-%d %H:%M:%S"))
+                .group_by(User_info.user_id)
+            )
+
+            user_list_json = list(
+                map(
+                    lambda x: {
+                        "user_id": x,
+                    },
+                    res.scalars().all(),
+                )
+            )
+
+            user_list = [item["user_id"] for item in user_list_json]
+            attack_user_target = choice(user_list)
+
+        return attack_user_target
+
+
+    def clustering(self, date, hour, with_attack=0, with_random=1):
+        """clustering function"""
+        print(date, hour)
+        start_time = datetime.strptime(f"{date} {hour}", "%Y-%m-%d %H")
+        end_time = start_time + timedelta(hours=1)
+        res = db.session.execute(
+            select(User_info)
+            .where(User_info.timestamp >= start_time.strftime("%Y-%m-%d %H:%M:%S"))
+            .where(User_info.timestamp < end_time.strftime("%Y-%m-%d %H:%M:%S"))
+        )
+        # print(res.scalars().all())
+        user_list = list(
+            map(
+                lambda x: {
+                    "user_id": x.user_id,
+                    "timestamp": x.timestamp,
+                },
+                res.scalars().all(),
+            )
+        )
+        # print(user_list)
+        print(f"{len(user_list)} users are selected")
+
+        for user in user_list:
+            user_id = user["user_id"]
+            # user_id = hashlib.md5(int(user_id).to_bytes(8, 'big')).hexdigest()      # For tweet dataset
+            nym = hashlib.md5(user_id.encode("utf-8")).hexdigest()
+            user_nym = Nym_info(timestamp=user["timestamp"], nym=nym)
+            db.session.merge(user_nym)
+        db.session.commit()
+
+        k_filter = KShapeFilter(iter_hour=f"{date} {hour}")
+        k_filter.load_data(user_list)
+        k_filter.feature_extract()
+        normal_user, abnormal_user = k_filter.train_predict()
+
+        if with_attack == 1:
+            target_user = self.anonymity_simulation(date, hour, random=with_random)
+
+            if target_user not in normal_user:
+                print(f"[Warning!] Anonymity Attack, user:{target_user}")
+                normal_user.append(target_user)
+                abnormal_user = [item for item in abnormal_user if item != target_user]
+                # delete normal user from anonymity user list and record the length of the anonymity user list
+                stmt = delete(Nym_info).where(Nym_info.nym.in_(normal_user))
+                db.session.execute(stmt)
+                db.session.commit()
+            else:
+                # delete all the users which is not in normal user list
+                stmt = delete(Nym_info).where(
+                    Nym_info.nym.notin_(normal_user)
+                )
+                db.session.execute(stmt)
+                db.session.commit()
+
+        stmt = (
+            update(Nym_info)
+            .where(Nym_info.nym.in_(abnormal_user))
+        )
+        print(stmt)
+        result = db.session.execute(stmt)
+        db.session.commit()
+
+        nyms_count = db.session.query(
+            func.count(Nym_info.nym)
+        ).scalar()
+        print(f"fictitious user count is: {nyms_count}")
+        cluster_result = Cluster_result(
+            timestamp=start_time,
+            normal_user=json.dumps(normal_user),
+            abnormal_user=json.dumps(abnormal_user),
+            normal_user_count=len(normal_user),
+            abnormal_user_count=len(abnormal_user),
+            nyms_count=nyms_count,
+        )
+
+        db.session.merge(cluster_result)
+        db.session.commit()
+        print(f"{len(abnormal_user)} abnormal users")
+        return {"status": 1}
+
+
+    @app.cli.command("clustering")
+    @click.argument("date")
+    @click.argument("hour")
+    @click.option("-a", "--attack", required=True, type=int)
+    @click.option("-r", "--random", required=True, type=int)
+    def clustering_by_date_hour(self, date, hour, attack=0, random=1):
+
+        print(date, hour)
+        print(f"is attack?:{attack}")
+        self.clustering(date, hour, with_attack=attack, with_random=random)
+        return None

+ 3 - 0
clustering.bat

@@ -0,0 +1,3 @@
+for %%d in (1 2 3 4 5) do (
+	for  %%h in (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) do flask clustering 2021-10-%%d %%h -a 1 -r 1
+)

+ 34 - 0
encrypt_message.py

@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+import os
+
+from cryptography.fernet import Fernet
+
+SECRET_KEY = "secret.key"
+
+
+def generate_key():
+    key = Fernet.generate_key()
+    with open(SECRET_KEY, "wb") as key_file:
+        key_file.write(key)
+
+
+def load_key():
+    return open(SECRET_KEY, "rb").read()
+
+
+def encrypt_message(message):
+    """Encrypts a message string"""
+    if not os.path.exists(SECRET_KEY):
+        generate_key()
+    key = load_key()
+    encoded_message = message.encode("UTF-8")
+    f = Fernet(key)
+    encrypted_message = f.encrypt(encoded_message)
+    return encrypted_message
+
+
+def decrypt_message(encrypted_message):
+    key = load_key()
+    f = Fernet(key)
+    decrypted_message = f.decrypt(encrypted_message)
+    return decrypted_message.decode()

+ 102 - 0
kshape_filter.py

@@ -0,0 +1,102 @@
+import time
+
+import numpy as np
+import pandas as pd
+from tslearn.clustering import KShape
+from .util import Constants
+
+
+# json to df
+def json2df(json_list):
+    """
+    Process incoming json and convert to dataframe for clustering
+    Input:
+    json_list: Each element is a list of Json, which is used to store the current processed data
+    """
+    return pd.DataFrame(json_list)
+
+
+class KShapeFilter:
+    def __init__(self, iter_hour: str):
+        """
+        Initialize the KShape algorithm, pass in json to process data, need to re-initialize each round
+        :param iter_hour: string, %Y-%m-%d %H
+        """
+        self.start_time = time.time()
+        self.hour = iter_hour
+
+    def load_data(self, json_list):
+
+        self.train_data = pd.DataFrame(json_list)
+        self.train_data.loc[:, "hour"] = self.hour
+
+    def feature_extract(self):
+
+        self.train_data.loc[:, "time"] = pd.to_datetime(
+            self.train_data.loc[:, "timestamp"], unit="s"
+        )
+        # store user information
+        self.user_list = self.train_data.loc[:, "user_id"].unique()
+
+        df = self.train_data.reset_index(drop=True)
+        df.loc[:, "event"] = 1
+        start_time = df.loc[0, "hour"] + ":00:00"
+        end_time = df.loc[0, "hour"] + ":59:59"
+
+        feature_list = []
+        for i, user in enumerate(self.user_list):
+            time_df = pd.DataFrame(
+                {"time": pd.date_range(start=start_time, end=end_time, freq="s")},
+                index=range(0, 3600),
+            )
+            time_df = pd.merge(
+                time_df,
+                df.loc[df.loc[:, "user_id"] == user, ["time", "event"]],
+                on="time",
+                how="left",
+            )
+            time_df = time_df.fillna(0)
+            time_array = np.array(time_df.loc[:, "event"])
+            feature_list.append(time_array)
+
+        self.train_X = np.array(feature_list).reshape([-1, 3600, 1])
+
+    def train_predict(self, thresholds=0.05, num_cluster=Constants.CLUSTER_NUM):
+
+        ks, y_pred = self.k_shape(self.train_X, num_cluster)
+        self.res = pd.DataFrame({"user_id": self.user_list})
+        self.res.loc[:, "predict_label"] = y_pred
+
+        cluster_count = self.res.loc[:, "predict_label"].value_counts()
+        print(cluster_count)
+
+        judging_df = self.res.loc[:, "predict_label"].value_counts() / len(
+            self.user_list
+        )
+        abnormal_label_list = judging_df[judging_df <= thresholds].index.tolist()
+
+        self.abnormal_user = self.res.loc[
+            self.res.loc[:, "predict_label"].isin(abnormal_label_list), "user_id"
+        ].tolist()
+
+        self.normal_user = self.res.loc[
+            ~self.res.loc[:, "predict_label"].isin(abnormal_label_list), "user_id"
+        ].tolist()
+
+        return self.normal_user, self.abnormal_user
+
+    @staticmethod
+    def k_shape(data, num_cluster):
+        """
+        k-shape clustering
+        :param df: time series dataset
+        :param num_cluster:
+        :return:cluster label
+        """
+        ks = KShape(
+            n_clusters=num_cluster, verbose=True, random_state=np.random.seed(0)
+        )
+        y_pred = ks.fit_predict(data)
+        return ks, y_pred
+
+

+ 32 - 0
models.py

@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+
+from datetime import datetime
+
+from flask_sqlalchemy import SQLAlchemy
+
+from . import app
+
+db = SQLAlchemy(app)
+
+
+class User_info(db.Model):
+
+    user_id = db.Column(db.String, primary_key=True)
+    timestamp = db.Column(db.DateTime, primary_key=True)
+
+
+
+class Cluster_result(db.Model):
+
+    timestamp = db.Column(db.DateTime, unique=True, primary_key=True)
+    normal_user = db.Column(db.Text)
+    abnormal_user = db.Column(db.Text)
+    normal_user_count = db.Column(db.Integer, default=0)
+    abnormal_user_count = db.Column(db.Integer, default=0)
+    nyms_count = db.Column(db.Integer, default=0)
+
+
+class Nym_info(db.Model):
+
+    nym = db.Column(db.String, primary_key=True, unique=True)
+    timestamp = db.Column(db.DateTime, primary_key=True)

+ 24 - 0
scheduler.py

@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+from apscheduler.schedules.background import BackgroundScheduler
+from flask_apscheduler import APScheduler
+
+from . import app
+
+
+class SchedulerConfig(object):
+    JOBS = [
+        {
+            "id": "",
+            "func": None,
+            "args": None,
+            "trigger": {
+                "type": "cron",
+                # 'day_of_week': "0-6", # Define specific days to execute
+                "hour": "*/1",
+                # "minute": "*/15",
+                # 'second': '*/5'  # "*/3" ,execute every 3 seconds
+            },
+        }
+    ]
+    SCHEDULER_API_ENABLED = True

+ 43 - 0
server.py

@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+
+import datetime
+import json
+
+from apscheduler.schedulers.background import BackgroundScheduler
+from flask import request
+
+from . import app
+from .models import User_info, db
+from .encrypt_message import decrypt_message
+
+db.create_all()
+
+
+def sensor():
+    """ Function for test purposes. """
+    print("Scheduler is alive!")
+
+
+sched = BackgroundScheduler(daemon=True)
+sched.add_job(sensor, "interval", minutes=1)
+sched.start()
+
+
+@app.route("/api/user_info", methods=["POST"])
+def user_info():
+    """ Function for parsing user info"""
+    data = request.data
+    try:
+        data = decrypt_message(data)
+        batches = json.loads(data)
+        for batch in batches:
+            line = json.loads(batch)
+            timestamp = datetime.datetime.fromtimestamp(line["timestamp"])
+            user_id = line["user_id"]
+            user = User_info(timestamp=timestamp, user_id=user_id)
+            db.session.merge(user)
+        db.session.commit()
+    except Exception as e:
+        return {"status": 0, "error": str(e)}
+    return {"status": 1}
+

+ 55 - 0
update_ts.py

@@ -0,0 +1,55 @@
+import json
+import csv
+import pandas as pd
+import numpy as np
+import warnings
+warnings.filterwarnings("ignore")
+
+pd.set_option('display.max_columns', None)
+pd.set_option('display.max_rows', None)
+pd.set_option('display.width', 1000)
+
+
+def convert_to_csv(filepath):
+    """
+    convert txt file to csv
+    run only once at the beginning
+    """
+    with open(filepath, 'r') as f, open('docs/reddit_100000.csv', 'w', encoding='utf-8') as csvfile:
+        fieldnames = ['timestamp', 'user_id']
+        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+        writer.writeheader()
+        for line in f:
+            writer.writerow(json.loads(line))
+        print('Successfully convert to CSV')
+
+
+def read_data(filepath):
+    """
+    read CSV data and parse timestamp
+    :return: dataframe named df
+    """
+    parser = lambda x: pd.to_datetime(x, unit='s')
+    df = pd.read_csv(filepath, parse_dates=['timestamp'], date_parser=parser)
+
+    return df
+
+def fill_ts(df):
+    """
+    set values for time series and fill N/A values
+    :param data: time series dataframe
+    :return: filled dataframe
+    """
+    df['timestamp'] = df['timestamp'].dt.floor('H')                     # floor timestamp to hour
+    df_cnt = df.loc[:, "user_id"].value_counts().to_frame().rename_axis('user_id')
+    df_cnt.columns = ['count']
+
+    df.reset_index(drop=True, inplace=True)
+    df_cnt.reset_index(drop=True, inplace=True)
+    df = pd.merge(df_cnt, df, left_index=True, right_index=True, how='outer').set_index('user_id')
+
+    df_res = df.pivot_table(index='user_id', columns='timestamp', values='count', aggfunc=np.sum)
+    print(df_res.shape)
+
+    return df_res
+

+ 24 - 0
util.py

@@ -0,0 +1,24 @@
+
+import pandas as pd
+
+class Constants:
+    CLUSTER_NUM = 100
+    ROUND_LENGTH = 3600        # in seconds
+    CLUSTER_SIZE = 30       # minimum cluster size that must be satisfied by forwarding messages
+    NUM_COVER = 1
+    MAX_DELAY = 3
+    ENCODING = 'UTF-8'
+
+def cnt_cluster(model):
+    """Count number of users in each cluster"""
+    cnt = pd.Series(model.labels_).value_counts().to_frame()
+    cnt.columns = ['count']
+    print('Non_empty cluster number:', len(cnt))
+    print('The number of users in each cluster is:\n', cnt)
+    return cnt
+
+
+
+
+
+