final.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. import numpy as np
  2. import glob
  3. from tslearn.clustering import KShape
  4. from sklearn.cluster import KMeans
  5. import tslearn.metrics as metrics
  6. from sklearn.metrics import calinski_harabasz_score
  7. from tslearn.preprocessing import TimeSeriesScalerMeanVariance
  8. from tslearn.clustering import TimeSeriesKMeans
  9. import matplotlib.pyplot as plt
  10. #from tslearn.generators import random_walks
  11. import tslearn.metrics as metrics
  12. import pandas as pd
  13. from sklearn.model_selection import train_test_split
  14. import warnings
  15. from tslearn.clustering import silhouette_score
  16. import os
  17. from sklearn import metrics
  18. import random
  19. def filter():
  20. GoodNameList = np.loadtxt('/home/it/middle_data/good_name_list.txt',dtype=int)
  21. X=np.array(pd.read_csv('/home/it/middle_data/value.csv',header=None))
  22. threshold_1=19#
  23. threshold_2=20
  24. t1 = np.empty([len(X),len(X)], dtype = int)
  25. t2 = np.empty([len(X),len(X)], dtype = int)
  26. i=0
  27. while i<len(X):
  28. j=i
  29. while j<len(X):
  30. a=X[i]
  31. b=X[j]
  32. m1_num=np.sum(a==b)
  33. if m1_num>=threshold_1:#close
  34. t2[i][j]=t2[j][i]=1
  35. else:
  36. t2[i][j]=t2[j][i]=0#far
  37. t1[i][j]=t1[j][i]=m1_num
  38. j=j+1
  39. i=i+1
  40. outlier = np.empty([len(X)], dtype = int)
  41. k=0
  42. while k<len(X):
  43. if np.sum(t2[k])>=threshold_2:
  44. outlier[k]=0#NOT outlier
  45. else:
  46. outlier[k]=1#outlier
  47. k=k+1
  48. need_be_filtered=np.argwhere(outlier>=1).copy()
  49. filtered_index=np.concatenate(need_be_filtered)
  50. np.savetxt('/home/it/middle_data/filtered_good_name_list.txt',np.delete(GoodNameList,filtered_index),fmt="%i")
  51. pattern_middle=np.delete(GoodNameList,filtered_index).copy()
  52. df_result = pd.DataFrame(np.delete(X,filtered_index,axis=0).copy())
  53. df_result.to_csv('/home/it/middle_data/filtered_valus.csv',header=None,index=False)
  54. X=pd.read_csv('/home/it/middle_data/filtered_valus.csv',header=None)
  55. x_original=X.copy()
  56. x_pattern=np.copy(X)
  57. seed = 2
  58. num_cluster = 13
  59. X = TimeSeriesScalerMeanVariance(mu= 0.0 ,std= 1.0 ).fit_transform(X)
  60. ks = KShape(n_clusters= num_cluster ,n_init= 10 ,max_iter=200,tol=1e-8,verbose= True ,random_state=seed)
  61. y_pred = ks.fit_predict(X)
  62. y_pattern=y_pred.copy()
  63. final_y_pred=y_pred.copy()
  64. f4 = open("/home/it/middle_data/filtered_good_name_list.txt","a+") # open in `w` mode to write
  65. i=0
  66. while i<len(filtered_index):
  67. f4.write(str(GoodNameList[filtered_index[i]])) # concatenate the contents
  68. final_y_pred=np.append(final_y_pred,num_cluster)
  69. f4.write("\n")
  70. i=i+1
  71. f4.close()
  72. np.savetxt('/home/it/middle_data/groupingResult.txt',final_y_pred,fmt="%i")
  73. #score function
  74. score_result=metrics.calinski_harabasz_score(x_original, y_pred)
  75. score_result_path="/home/it/middle_data/score_result_kshape.txt"
  76. f = open(score_result_path,"a+") # open in `w` mode to write
  77. f.write("\n")
  78. f.write("k="+str(num_cluster))
  79. f.write("\n")
  80. f.write("CA="+str(score_result)) # concatenate the contents
  81. f.close()
  82. pattern=np.zeros([num_cluster,len(X[0])], dtype = int)
  83. count= np.zeros([1,num_cluster],dtype = int)
  84. i=0
  85. while i<len(y_pattern):
  86. #print("final_test",pattern[y_pattern[i]],x_pattern[i])
  87. pattern[y_pattern[i]]=pattern[y_pattern[i]]+x_pattern[i]
  88. count[0][y_pattern[i]]=count[0][y_pattern[i]]+1
  89. i=i+1
  90. print("count=",count,"pattern=",pattern)
  91. final_result=np.zeros([num_cluster,len(X[0])], dtype = float)
  92. i=0
  93. while i<num_cluster:
  94. final_result[i]=np.true_divide(pattern[i],float(count[0][i]))
  95. i=i+1
  96. print("final_result=",final_result)
  97. i=0
  98. while i<len(final_result):
  99. j=0
  100. while j<len(final_result[0]):
  101. if final_result[i][j]>=0.3:
  102. final_result[i][j]=1
  103. else:
  104. final_result[i][j]=0
  105. j=j+1
  106. i=i+1
  107. #repeat fix scheduler
  108. middle_final_result=np.copy(final_result)
  109. c=np.concatenate((middle_final_result,final_result),axis=1)
  110. c=np.concatenate((middle_final_result,final_result),axis=1)
  111. c=np.concatenate((middle_final_result,final_result),axis=1)
  112. c=np.concatenate((middle_final_result,final_result),axis=1)
  113. c=np.concatenate((middle_final_result,final_result),axis=1)
  114. c=np.concatenate((middle_final_result,final_result),axis=1)
  115. np.savetxt('/home/it/middle_data/pattern.txt',c,fmt="%i")
  116. def group_dtw():
  117. print("group_dtw")
  118. X=pd.read_csv('/home/it/middle_data/value.csv',header=None)
  119. x_original=X.copy()
  120. seed = 9
  121. num_cluster = 14
  122. X = TimeSeriesScalerMeanVariance(mu= 0.0 ,std= 1.0 ).fit_transform(X)
  123. km = TimeSeriesKMeans(n_clusters=num_cluster,n_init=1,metric="dtw",verbose=True,max_iter_barycenter=10,random_state=seed)
  124. y_pred = km.fit_predict(X)
  125. np.savetxt('/home/it/middle_data/groupingResult.txt',y_pred,fmt="%i")
  126. score_result=metrics.calinski_harabasz_score(x_original, y_pred)
  127. score_result_path="/home/it/middle_data/score_result_dtw.txt"
  128. f4 = open(score_result_path,"a+") # open in `w` mode to write
  129. f4.write("\n")
  130. f4.write(str(score_result)) # concatenate the contents
  131. f4.close()
  132. warnings.filterwarnings("ignore", category=FutureWarning, module="sklearn", lineno=931)
  133. filter()
  134. #group_dtw()