소스 검색

change training loop to depend on test accuracy

wesleyjtann 4 년 전
부모
커밋
185a6197ef

BIN
save/fed_mnist_cnn_50_C0.1_iid1_acc.png


BIN
save/fed_mnist_cnn_50_C0.1_iid1_loss.png


BIN
save/nn_cifar_mlp_1.png


BIN
save/objects/cifar_cnn_1_C[0.1]_iid[1]_E[10]_B[10].pkl


BIN
save/objects/mnist_mlp_1_C[0.1]_iid[1]_E[10]_B[10].pkl


파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 1036 - 0
src/.ipynb_checkpoints/federated-hierarchical_v1_twoclusters-changeEval-checkpoint.ipynb


BIN
src/__pycache__/options.cpython-37.pyc


BIN
src/__pycache__/update.cpython-37.pyc


+ 23 - 9
src/federated-hierarchical_main.py

@@ -50,12 +50,12 @@ def fl_train(args, train_dataset, cluster_global_model, cluster, usergrp, epochs
     cluster_train_loss, cluster_train_accuracy = [], []
     cluster_val_acc_list, cluster_net_list = [], []
     cluster_cv_loss, cluster_cv_acc = [], []
-    print_every = 2
+    # print_every = 1
     cluster_val_loss_pre, counter = 0, 0
 
-    for epoch in tqdm(range(epochs)):
+    for epoch in range(epochs):
         cluster_local_weights, cluster_local_losses = [], []
-        print(f'\n | Cluster Training Round : {epoch+1} |\n')
+        # print(f'\n | Cluster Training Round : {epoch+1} |\n')
 
         cluster_global_model.train()
         m = max(int(args.frac * len(cluster)), 1)
@@ -67,6 +67,7 @@ def fl_train(args, train_dataset, cluster_global_model, cluster, usergrp, epochs
             cluster_w, cluster_loss = cluster_local_model.update_weights(model=copy.deepcopy(cluster_global_model), global_round=epoch)
             cluster_local_weights.append(copy.deepcopy(cluster_w))
             cluster_local_losses.append(copy.deepcopy(cluster_loss))
+            print('| Global Round : {} | User : {} | \tLoss: {:.6f}'.format(epoch, idx, cluster_loss))
 
         # averaging global weights
         cluster_global_weights = average_weights(cluster_local_weights)
@@ -151,8 +152,10 @@ if __name__ == '__main__':
     cv_loss, cv_acc = [], []
     print_every = 1
     val_loss_pre, counter = 0, 0
+    testacc_check, epoch = 0, 0
 
-    for epoch in tqdm(range(args.epochs)):
+    # for epoch in tqdm(range(args.epochs)):
+    while testacc_check < args.test_acc:
         local_weights, local_losses, local_accuracies= [], [], []
         print(f'\n | Global Training Round : {epoch+1} |\n')
         
@@ -160,12 +163,12 @@ if __name__ == '__main__':
         global_model.train()
         
         # Cluster A
-        A_weights, A_losses = fl_train(args, train_dataset, cluster_modelA, A1, user_groupsA, 2)
+        A_weights, A_losses = fl_train(args, train_dataset, cluster_modelA, A1, user_groupsA, args.epochs)
         local_weights.append(copy.deepcopy(A_weights))
         local_losses.append(copy.deepcopy(A_losses))
         
         # Cluster B
-        B_weights, B_losses = fl_train(args, train_dataset, cluster_modelB, B1, user_groupsB, 2)
+        B_weights, B_losses = fl_train(args, train_dataset, cluster_modelB, B1, user_groupsB, args.epochs)
         local_weights.append(copy.deepcopy(B_weights))
         local_losses.append(copy.deepcopy(B_losses))
         
@@ -190,20 +193,31 @@ if __name__ == '__main__':
             list_acc.append(acc)
             list_loss.append(loss)
         train_accuracy.append(sum(list_acc)/len(list_acc))
-        
+        # Add
+        testacc_check = 100*train_accuracy[-1]
+        epoch = epoch + 1
+
         # print global training loss after every 'i' rounds
         if (epoch+1) % print_every == 0:
             print(f' \nAvg Training Stats after {epoch+1} global rounds:')
             print(f'Training Loss : {np.mean(np.array(train_loss))}')
             print('Train Accuracy: {:.2f}% \n'.format(100*train_accuracy[-1]))
-    #         print('Train Accuracy: {:.2f}% \n'.format(100*train_accuracy[-1][0]))
+            
 
     print('\n Total Run Time: {0:0.4f}'.format(time.time()-start_time))
 
     # Test inference after completion of training
     test_acc, test_loss = test_inference(args, global_model, test_dataset)
 
-    print(f' \n Results after {args.epochs} global rounds of training:')
+    # print(f' \n Results after {args.epochs} global rounds of training:')
+    print(f"\nAvg Training Stats after {epoch} global rounds:")
     print("|---- Avg Train Accuracy: {:.2f}%".format(100*train_accuracy[-1]))
     print("|---- Test Accuracy: {:.2f}%".format(100*test_acc))
 
+    # Saving the objects train_loss and train_accuracy:
+    file_name = '../save/objects/{}_{}_{}_C[{}]_iid[{}]_E[{}]_B[{}].pkl'.\
+    format(args.dataset, args.model, epoch, args.frac, args.iid,
+           args.local_ep, args.local_bs)
+
+    with open(file_name, 'wb') as f:
+        pickle.dump([train_loss, train_accuracy], f)

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 1036 - 0
src/federated-hierarchical_v1_twoclusters-changeEval.ipynb


+ 3 - 2
src/options.py

@@ -9,7 +9,7 @@ def args_parser():
     parser = argparse.ArgumentParser()
 
     # federated arguments (Notation for the arguments followed from paper)
-    parser.add_argument('--epochs', type=int, default=10,
+    parser.add_argument('--epochs', type=int, default=5,
                         help="number of rounds of training")
     parser.add_argument('--num_users', type=int, default=100,
                         help="number of users: K")
@@ -62,7 +62,8 @@ def args_parser():
     parser.add_argument('--seed', type=int, default=1, help='random seed')
 
     # Add arguments
-    parser.add_argument('--num_clusters', type=int, default=2, help='verbose')
+    parser.add_argument('--num_clusters', type=int, default=2, help='the number of clusters')
+    parser.add_argument('--test_acc', type=int, default=95, help='target test accuracy')
 
     args = parser.parse_args()
     return args

+ 5 - 5
src/update.py

@@ -75,11 +75,11 @@ class LocalUpdate(object):
                 loss.backward()
                 optimizer.step()
 
-                if self.args.verbose and (batch_idx % 10 == 0):
-                    print('| Global Round : {} | Local Epoch : {} | [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
-                        global_round, iter, batch_idx * len(images),
-                        len(self.trainloader.dataset),
-                        100. * batch_idx / len(self.trainloader), loss.item()))
+                # if self.args.verbose and (batch_idx % 10 == 0):
+                #     print('| Global Round : {} | Local Epoch : {} | [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
+                #         global_round, iter, batch_idx * len(images),
+                #         len(self.trainloader.dataset),
+                #         100. * batch_idx / len(self.trainloader), loss.item()))
                 self.logger.add_scalar('loss', loss.item())
                 batch_loss.append(loss.item())
             epoch_loss.append(sum(batch_loss)/len(batch_loss))

이 변경점에서 너무 많은 파일들이 변경되어 몇몇 파일들은 표시되지 않았습니다.