12345678910111213141516171819202122232425262728293031323334353637 |
- import json
- import matplotlib.pyplot as plt
- import matplotlib.patches as mpatches
- #from pprint import pprint
- import csv
- from collections import Counter
- from sklearn.metrics.pairwise import cosine_similarity
- from mlxtend.frequent_patterns import apriori
- from mlxtend.preprocessing import TransactionEncoder
- import pandas as pd
- from scipy import sparse
- import numpy as np
- import time
- import random
- from scipy.interpolate import make_interp_spline, BSpline
- data = pd.read_csv('lastfm.csv')
- df = data.drop('user', 1)
- conv_df = df.astype(bool)
- start_time = time.time()
- d = apriori(conv_df, min_support=0.01, use_colnames=True, max_len=2)
- print((d['itemsets']))
- print("--- %s seconds ---" % (time.time() - start_time))
- interest_group_centroids = [] # cluster centriods on which the interest groups are formed
- interest_groups = [] # Most similar items for each centroid in the interest group
- items_len = len(df.columns) # lengh of the items in the dataset
- length = [] # stores the index of the centroids
- print(items_len)
|