test2.py 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637
  1. import json
  2. import matplotlib.pyplot as plt
  3. import matplotlib.patches as mpatches
  4. #from pprint import pprint
  5. import csv
  6. from collections import Counter
  7. from sklearn.metrics.pairwise import cosine_similarity
  8. from mlxtend.frequent_patterns import apriori
  9. from mlxtend.preprocessing import TransactionEncoder
  10. import pandas as pd
  11. from scipy import sparse
  12. import numpy as np
  13. import time
  14. import random
  15. from scipy.interpolate import make_interp_spline, BSpline
  16. data = pd.read_csv('lastfm.csv')
  17. df = data.drop('user', 1)
  18. conv_df = df.astype(bool)
  19. start_time = time.time()
  20. d = apriori(conv_df, min_support=0.01, use_colnames=True, max_len=2)
  21. print((d['itemsets']))
  22. print("--- %s seconds ---" % (time.time() - start_time))
  23. interest_group_centroids = [] # cluster centriods on which the interest groups are formed
  24. interest_groups = [] # Most similar items for each centroid in the interest group
  25. items_len = len(df.columns) # lengh of the items in the dataset
  26. length = [] # stores the index of the centroids
  27. print(items_len)