import numpy as np import glob from tslearn.clustering import KShape from sklearn.cluster import KMeans import tslearn.metrics as metrics from sklearn.metrics import calinski_harabasz_score from tslearn.preprocessing import TimeSeriesScalerMeanVariance from tslearn.clustering import TimeSeriesKMeans import matplotlib.pyplot as plt #from tslearn.generators import random_walks import tslearn.metrics as metrics import pandas as pd from sklearn.model_selection import train_test_split import warnings from tslearn.clustering import silhouette_score import os from sklearn import metrics import random def filter(): GoodNameList = np.loadtxt('/home/it/middle_data/good_name_list.txt',dtype=int) X=np.array(pd.read_csv('/home/it/middle_data/value.csv',header=None)) threshold_1=19# threshold_2=20 t1 = np.empty([len(X),len(X)], dtype = int) t2 = np.empty([len(X),len(X)], dtype = int) i=0 while i=threshold_1:#close t2[i][j]=t2[j][i]=1 else: t2[i][j]=t2[j][i]=0#far t1[i][j]=t1[j][i]=m1_num j=j+1 i=i+1 outlier = np.empty([len(X)], dtype = int) k=0 while k=threshold_2: outlier[k]=0#NOT outlier else: outlier[k]=1#outlier k=k+1 need_be_filtered=np.argwhere(outlier>=1).copy() filtered_index=np.concatenate(need_be_filtered) np.savetxt('/home/it/middle_data/filtered_good_name_list.txt',np.delete(GoodNameList,filtered_index),fmt="%i") pattern_middle=np.delete(GoodNameList,filtered_index).copy() df_result = pd.DataFrame(np.delete(X,filtered_index,axis=0).copy()) df_result.to_csv('/home/it/middle_data/filtered_valus.csv',header=None,index=False) X=pd.read_csv('/home/it/middle_data/filtered_valus.csv',header=None) x_original=X.copy() x_pattern=np.copy(X) seed = 2 num_cluster = 13 X = TimeSeriesScalerMeanVariance(mu= 0.0 ,std= 1.0 ).fit_transform(X) ks = KShape(n_clusters= num_cluster ,n_init= 10 ,max_iter=200,tol=1e-8,verbose= True ,random_state=seed) y_pred = ks.fit_predict(X) y_pattern=y_pred.copy() final_y_pred=y_pred.copy() f4 = open("/home/it/middle_data/filtered_good_name_list.txt","a+") # open in `w` mode to write i=0 while i