瀏覽代碼

Added FP16 experiments and results

tanyksg 5 年之前
父節點
當前提交
42e7e9d52e
共有 58 個文件被更改,包括 1369 次插入51 次删除
  1. 二進制
      save/objects_fp16/BaseSGD_cifar_cnn_epoch[9]_lr[0.01]_iid[1]_FP16.pkl
  2. 二進制
      save/objects_fp16/BaseSGD_mnist_cnn_epoch[9]_lr[0.01]_iid[1]_FP16.pkl
  3. 二進制
      save/objects_fp16/BaseSGD_mnist_mlp_epoch[9]_lr[0.01]_iid[1].pkl
  4. 二進制
      save/objects_fp16/BaseSGD_mnist_mlp_epoch[9]_lr[0.01]_iid[1]_FP16.pkl
  5. 二進制
      save/objects_fp16/FL_cifar_cnn_100_lr[0.01]_C[0.1]_iid[1]_E[5]_B[50].pkl
  6. 二進制
      save/objects_fp16/FL_cifar_cnn_100_lr[0.01]_C[0.1]_iid[1]_E[5]_B[50]_FP16.pkl
  7. 二進制
      save/objects_fp16/FL_cifar_cnn_100_lr[0.01]_C[0.5]_iid[1]_E[5]_B[50].pkl
  8. 二進制
      save/objects_fp16/FL_cifar_cnn_200_lr[0.01]_C[0.1]_iid[1]_E[5]_B[50]_FP16.pkl
  9. 二進制
      save/objects_fp16/FL_cifar_cnn_300_lr[0.01]_C[0.1]_iid[1]_E[5]_B[50]_FP16.pkl
  10. 二進制
      save/objects_fp16/FL_mnist_cnn_100_lr[0.01]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl
  11. 二進制
      save/objects_fp16/FL_mnist_cnn_100_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl
  12. 二進制
      save/objects_fp16/FL_mnist_cnn_261_lr[0.01]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl
  13. 二進制
      save/objects_fp16/FL_mnist_mlp_1196_lr[0.01]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl
  14. 二進制
      save/objects_fp16/FL_mnist_mlp_200_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10].pkl
  15. 二進制
      save/objects_fp16/FL_mnist_mlp_200_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl
  16. 二進制
      save/objects_fp16/FL_mnist_mlp_200_lr[0.1]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl
  17. 二進制
      save/objects_fp16/FL_mnist_mlp_300_lr[0.1]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl
  18. 二進制
      save/objects_fp16/FL_mnist_mlp_468_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl
  19. 二進制
      save/objects_fp16/HFL2_cifar_cnn_100_lr[0.01]_C[0.1]_iid[1]_E[5]_B[50]_FP16.pkl
  20. 二進制
      save/objects_fp16/HFL2_mnist_cnn_100_lr[0.01]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl
  21. 二進制
      save/objects_fp16/HFL2_mnist_cnn_100_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl
  22. 二進制
      save/objects_fp16/HFL2_mnist_mlp_100_lr[0.01]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl
  23. 二進制
      save/objects_fp16/HFL2_mnist_mlp_100_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl
  24. 二進制
      save/objects_fp16/HFL2_mnist_mlp_100_lr[0.05]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl
  25. 二進制
      save/objects_fp16/HFL4_cifar_cnn_100_lr[0.01]_C[0.1]_iid[1]_E[5]_B[50]_FP16.pkl
  26. 二進制
      save/objects_fp16/HFL4_mnist_cnn_100_lr[0.01]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl
  27. 二進制
      save/objects_fp16/HFL4_mnist_cnn_100_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl
  28. 二進制
      save/objects_fp16/HFL4_mnist_mlp_100_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl
  29. 二進制
      save/objects_fp16/HFL4_mnist_mlp_100_lr[0.05]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl
  30. 二進制
      save/objects_fp16/HFL4_mnist_mlp_100_lr[0.05]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl
  31. 二進制
      save/objects_fp16/HFL4_mnist_mlp_100_lr[0.1]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl
  32. 二進制
      save/objects_fp16/HFL4_mnist_mlp_150_lr[0.01]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl
  33. 二進制
      save/objects_fp16/HFL4_mnist_mlp_150_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl
  34. 二進制
      save/objects_fp16/HFL4_mnist_mlp_150_lr[0.05]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl
  35. 二進制
      save/objects_fp16/HFL4_mnist_mlp_30_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl
  36. 二進制
      save/objects_fp16/HFL8_cifar_cnn_100_lr[0.01]_C[0.1]_iid[1]_E[5]_B[50]_FP16.pkl
  37. 二進制
      save/objects_fp16/HFL8_mnist_cnn_30_lr[0.01]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl
  38. 二進制
      save/objects_fp16/HFL8_mnist_cnn_30_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl
  39. 二進制
      save/objects_fp16/HFL8_mnist_mlp_30_lr[0.01]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl
  40. 二進制
      save/objects_fp16/HFL8_mnist_mlp_30_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl
  41. 二進制
      save/objects_fp16/mnist_mlp_10_C[0.1]_iid[1]_E[10]_B[10].pkl
  42. 二進制
      save/objects_fp16/mnist_mlp_10_C[0.2]_iid[1]_E[10]_B[10].pkl
  43. 二進制
      save/objects_fp16/mnist_mlp_10_C[1.0]_iid[1]_E[10]_B[10].pkl
  44. 二進制
      save/objects_fp16/mnist_mlp_2_C[0.1]_iid[1]_E[10]_B[10].pkl
  45. 22 0
      src/Eval_fp16.ipynb
  46. 126 0
      src/baseline_main_fp16.py
  47. 193 0
      src/federated-hierarchical2_main_fp16.py
  48. 228 0
      src/federated-hierarchical4_main_fp16.py
  49. 295 0
      src/federated-hierarchical8_main_fp16.py
  50. 169 0
      src/federated_main_fp16.py
  51. 19 18
      src/models.py
  52. 27 19
      src/options.py
  53. 15 0
      src/script_bash_FL_diffFP.sh
  54. 32 0
      src/script_bash_FL_diffFP_cifar.sh
  55. 37 0
      src/script_bash_FL_diffFP_mnist_cnn.sh
  56. 71 0
      src/script_bash_FL_diffFP_mnist_mlp.sh
  57. 25 8
      src/update.py
  58. 110 6
      src/utils.py

二進制
save/objects_fp16/BaseSGD_cifar_cnn_epoch[9]_lr[0.01]_iid[1]_FP16.pkl


二進制
save/objects_fp16/BaseSGD_mnist_cnn_epoch[9]_lr[0.01]_iid[1]_FP16.pkl


二進制
save/objects_fp16/BaseSGD_mnist_mlp_epoch[9]_lr[0.01]_iid[1].pkl


二進制
save/objects_fp16/BaseSGD_mnist_mlp_epoch[9]_lr[0.01]_iid[1]_FP16.pkl


二進制
save/objects_fp16/FL_cifar_cnn_100_lr[0.01]_C[0.1]_iid[1]_E[5]_B[50].pkl


二進制
save/objects_fp16/FL_cifar_cnn_100_lr[0.01]_C[0.1]_iid[1]_E[5]_B[50]_FP16.pkl


二進制
save/objects_fp16/FL_cifar_cnn_100_lr[0.01]_C[0.5]_iid[1]_E[5]_B[50].pkl


二進制
save/objects_fp16/FL_cifar_cnn_200_lr[0.01]_C[0.1]_iid[1]_E[5]_B[50]_FP16.pkl


二進制
save/objects_fp16/FL_cifar_cnn_300_lr[0.01]_C[0.1]_iid[1]_E[5]_B[50]_FP16.pkl


二進制
save/objects_fp16/FL_mnist_cnn_100_lr[0.01]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/FL_mnist_cnn_100_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/FL_mnist_cnn_261_lr[0.01]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/FL_mnist_mlp_1196_lr[0.01]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/FL_mnist_mlp_200_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10].pkl


二進制
save/objects_fp16/FL_mnist_mlp_200_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/FL_mnist_mlp_200_lr[0.1]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/FL_mnist_mlp_300_lr[0.1]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/FL_mnist_mlp_468_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/HFL2_cifar_cnn_100_lr[0.01]_C[0.1]_iid[1]_E[5]_B[50]_FP16.pkl


二進制
save/objects_fp16/HFL2_mnist_cnn_100_lr[0.01]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/HFL2_mnist_cnn_100_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/HFL2_mnist_mlp_100_lr[0.01]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/HFL2_mnist_mlp_100_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/HFL2_mnist_mlp_100_lr[0.05]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/HFL4_cifar_cnn_100_lr[0.01]_C[0.1]_iid[1]_E[5]_B[50]_FP16.pkl


二進制
save/objects_fp16/HFL4_mnist_cnn_100_lr[0.01]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/HFL4_mnist_cnn_100_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/HFL4_mnist_mlp_100_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/HFL4_mnist_mlp_100_lr[0.05]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/HFL4_mnist_mlp_100_lr[0.05]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/HFL4_mnist_mlp_100_lr[0.1]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/HFL4_mnist_mlp_150_lr[0.01]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/HFL4_mnist_mlp_150_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/HFL4_mnist_mlp_150_lr[0.05]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/HFL4_mnist_mlp_30_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/HFL8_cifar_cnn_100_lr[0.01]_C[0.1]_iid[1]_E[5]_B[50]_FP16.pkl


二進制
save/objects_fp16/HFL8_mnist_cnn_30_lr[0.01]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/HFL8_mnist_cnn_30_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/HFL8_mnist_mlp_30_lr[0.01]_C[0.1]_iid[0]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/HFL8_mnist_mlp_30_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]_FP16.pkl


二進制
save/objects_fp16/mnist_mlp_10_C[0.1]_iid[1]_E[10]_B[10].pkl


二進制
save/objects_fp16/mnist_mlp_10_C[0.2]_iid[1]_E[10]_B[10].pkl


二進制
save/objects_fp16/mnist_mlp_10_C[1.0]_iid[1]_E[10]_B[10].pkl


二進制
save/objects_fp16/mnist_mlp_2_C[0.1]_iid[1]_E[10]_B[10].pkl


File diff suppressed because it is too large
+ 22 - 0
src/Eval_fp16.ipynb


+ 126 - 0
src/baseline_main_fp16.py

@@ -0,0 +1,126 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Python version: 3.6
+
+
+from tqdm import tqdm
+import matplotlib.pyplot as plt
+
+import torch
+from torch.utils.data import DataLoader
+
+from utils import get_dataset, set_device, build_model
+from options import args_parser
+from update import test_inference
+from models import MLP, CNNMnist, CNNFashion_Mnist, CNNCifar
+import pickle
+import time
+
+from sys import exit
+from torchsummary import summary
+
+
+if __name__ == '__main__':
+    start_time = time.time()
+    
+    args = args_parser()
+    
+    # Select CPU or GPU
+    device = set_device(args)
+        
+ 
+    # load datasets
+    train_dataset, test_dataset, _ = get_dataset(args)
+
+    # BUILD MODEL
+    global_model = build_model(args, train_dataset)
+
+    # Set the model to train and send it to device.
+    global_model.to(device)
+    # Set model to use Floating Point 16
+    global_model.to(dtype=torch.float16)  ##########
+    global_model.train()
+    print(global_model)
+    #img_size = train_dataset[0][0].shape
+    #summary(global_model, img_size)  ####
+    #print(global_model.parameters())
+
+    # Training
+    # Set optimizer and criterion
+    if args.optimizer == 'sgd':
+        optimizer = torch.optim.SGD(global_model.parameters(), lr=args.lr,
+                                    momentum=0.5)
+    elif args.optimizer == 'adam':
+        optimizer = torch.optim.Adam(global_model.parameters(), lr=args.lr,
+                                     weight_decay=1e-4)
+    elif args.optimizer == 'adagrad':
+        optimizer = torch.optim.Adagrad(global_model.parameters(), lr=args.lr,
+                                     weight_decay=1e-4)
+    elif args.optimizer == 'adamax':
+        optimizer = torch.optim.Adamax(global_model.parameters(), lr=args.lr,
+                                     weight_decay=1e-4)
+    elif args.optimizer == 'rmsprop':
+        optimizer = torch.optim.RMSprop(global_model.parameters(), lr=args.lr,
+                                     weight_decay=1e-4)
+    else:
+        exit('Error- unrecognized optimizer: ' + args.optimizer)
+    
+    # look under optim for more info on scheduler
+    #scheduler=torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.5)
+    
+    trainloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
+    
+    criterion = torch.nn.NLLLoss().to(device)
+    criterion.to(dtype = torch.float16)  ################
+    
+    epoch_loss = []
+
+    for epoch in tqdm(range(args.epochs)):
+        batch_loss = []
+
+        for batch_idx, (images, labels) in enumerate(trainloader):
+            images=images.to(dtype=torch.float16)  #################
+            images, labels = images.to(device), labels.to(device)
+
+            optimizer.zero_grad()
+            outputs = global_model(images)
+            loss = criterion(outputs, labels)
+            loss.backward()
+            optimizer.step()
+
+            if batch_idx % 50 == 0:
+                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
+                    epoch+1, batch_idx * len(images), len(trainloader.dataset),
+                    100. * batch_idx / len(trainloader), loss.item()))
+            batch_loss.append(loss.item())
+
+        loss_avg = sum(batch_loss)/len(batch_loss)
+        print('\nTrain loss:', loss_avg)
+        epoch_loss.append(loss_avg)
+
+
+    # testing
+    test_acc, test_loss = test_inference(args, global_model, test_dataset, dtype=torch.float16)  ############
+    print('Test on', len(test_dataset), 'samples')
+    print("Test Accuracy: {:.2f}%".format(100*test_acc))
+
+
+    # Saving the objects train_loss, test_acc, test_loss:
+    file_name = '../save/objects_fp16/BaseSGD_{}_{}_epoch[{}]_lr[{}]_iid[{}]_FP16.pkl'.\
+        format(args.dataset, args.model, epoch, args.lr, args.iid)
+
+    with open(file_name, 'wb') as f:
+        pickle.dump([epoch_loss, test_acc, test_loss], f)
+
+    print('\n Total Run Time: {0:0.4f}'.format(time.time()-start_time))
+
+
+    # # Plot loss
+    # plt.figure()
+    # plt.plot(range(len(epoch_loss)), epoch_loss)
+    # plt.xlabel('epochs')
+    # plt.ylabel('Train loss')
+    # plt.savefig('../save/nn_{}_{}_{}.png'.format(args.dataset, args.model,
+    #                                              args.epochs))
+
+

+ 193 - 0
src/federated-hierarchical2_main_fp16.py

@@ -0,0 +1,193 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Python version: 3.6
+
+import os
+import copy
+import time
+import pickle
+import numpy as np
+from tqdm import tqdm
+
+import torch
+from tensorboardX import SummaryWriter
+
+from options import args_parser
+from update import LocalUpdate, test_inference
+from models import MLP, CNNMnist, CNNFashion_Mnist, CNNCifar
+from utils import get_dataset, average_weights, exp_details, set_device, build_model, fl_train
+import math
+import random
+
+
+if __name__ == '__main__':
+    start_time = time.time()
+
+    # define paths
+    path_project = os.path.abspath('..')
+    logger = SummaryWriter('../logs')
+
+    args = args_parser()
+    exp_details(args)
+
+    # Select CPU or GPU
+    device = set_device(args)
+
+    # load dataset and user groups
+    train_dataset, test_dataset, user_groupsold = get_dataset(args)
+
+    # user_groups = user_groupsold
+    # keylist = list(user_groups.keys())
+    # ======= Shuffle dataset ======= 
+    keys =  list(user_groupsold.keys())
+    random.shuffle(keys)
+    user_groups = dict()
+    for key in keys:
+        user_groups.update({key:user_groupsold[key]})
+    # print(user_groups.keys()) 
+    keylist = list(user_groups.keys())
+    print("keylist: ", keylist)
+    # ======= Splitting into clusters. FL groups ======= 
+    if args.num_clusters != 2:
+        exit("Confirm that the number of clusters is 2?")
+    cluster_size = int(args.num_users / args.num_clusters)
+
+    # Cluster 1
+    # A1 = np.arange(cluster_size, dtaype=int)
+    A1 = keylist[:cluster_size]
+    # A1 = np.random.choice(keylist, cluster_size, replace=False)
+    print("A1: ", A1)
+    user_groupsA = {k:user_groups[k] for k in A1 if k in user_groups}
+    print("Size of cluster 1: ", len(user_groupsA))
+    # Cluster 2
+    # B1 = np.arange(cluster_size, cluster_size+cluster_size, dtype=int)
+    B1 = keylist[cluster_size:2*cluster_size]
+    # B1 = np.random.choice(keylist, cluster_size, replace=False)
+    print("B1: ", B1)
+    user_groupsB = {k:user_groups[k] for k in B1 if k in user_groups}
+    print("Size of cluster 2: ", len(user_groupsB))
+
+    # MODEL PARAM SUMMARY
+    global_model = build_model(args, train_dataset)
+    pytorch_total_params = sum(p.numel() for p in global_model.parameters())
+    print("Model total number of parameters: ", pytorch_total_params)
+
+    # from torchsummary import summary
+    # summary(global_model, (1, 28, 28))
+    # global_model.parameters()
+
+    # Set the model to train and send it to device.
+    global_model.to(device)
+    # Set model to use Floating Point 16
+    global_model.to(dtype=torch.float16)  ##########################
+    global_model.train()
+    print(global_model)
+
+    # copy weights
+    global_weights = global_model.state_dict()
+
+
+    # ======= Set the cluster models to train and send it to device. =======
+    # Cluster A
+    cluster_modelA = build_model(args, train_dataset)
+    cluster_modelA.to(device)
+    cluster_modelA.to(dtype=torch.float16)    ######################
+    cluster_modelA.train()
+    # copy weights
+    cluster_modelA_weights = cluster_modelA.state_dict()
+    
+    # Cluster B
+    cluster_modelB = build_model(args, train_dataset)
+    cluster_modelB.to(device)
+    cluster_modelB.to(dtype=torch.float16)    ######################
+    cluster_modelB.train()
+    # copy weights
+    cluster_modelB_weights = cluster_modelB.state_dict()
+
+
+    train_loss, train_accuracy = [], []
+    val_acc_list, net_list = [], []
+    cv_loss, cv_acc = [], []
+    print_every = 1
+    val_loss_pre, counter = 0, 0
+    testacc_check, epoch = 0, 0 
+    # idx = np.random.randint(0,99)
+
+    # for epoch in tqdm(range(args.epochs)):
+    for epoch in range(args.epochs):
+    # while testacc_check < args.test_acc or epoch < args.epochs:
+    # while epoch < args.epochs: 
+        local_weights, local_losses, local_accuracies= [], [], []
+        print(f'\n | Global Training Round : {epoch+1} |\n')
+        
+        # ============== TRAIN ==============
+        global_model.train()
+        
+        # ===== Cluster A ===== 
+        A_model, A_weights, A_losses = fl_train(args, train_dataset, cluster_modelA, A1, user_groupsA, args.Cepochs, logger, cluster_dtype=torch.float16)        
+        local_weights.append(copy.deepcopy(A_weights))
+        local_losses.append(copy.deepcopy(A_losses))    
+        cluster_modelA = global_model# = A_model    
+        # ===== Cluster B ===== 
+        B_model, B_weights, B_losses = fl_train(args, train_dataset, cluster_modelB, B1, user_groupsB, args.Cepochs, logger, cluster_dtype=torch.float16)
+        local_weights.append(copy.deepcopy(B_weights))
+        local_losses.append(copy.deepcopy(B_losses))
+        cluster_modelB = global_model# = B_model 
+        
+        
+        # averaging global weights
+        global_weights = average_weights(local_weights)
+
+        # update global weights
+        global_model.load_state_dict(global_weights)
+
+        loss_avg = sum(local_losses) / len(local_losses)
+        train_loss.append(loss_avg)
+        
+        # ============== EVAL ============== 
+        # Calculate avg training accuracy over all users at every epoch
+        list_acc, list_loss = [], []
+        global_model.eval()
+        # print("========== idx ========== ", idx)
+        for c in range(args.num_users):
+        # for c in range(cluster_size):
+        # C = np.random.choice(keylist, int(args.frac * args.num_users), replace=False) # random set of clients
+        # print("C: ", C)
+        # for c in C:
+            local_model = LocalUpdate(args=args, dataset=train_dataset,
+                                      idxs=user_groups[c], logger=logger)
+            acc, loss = local_model.inference(model=global_model, dtype=torch.float16)
+            list_acc.append(acc)
+            list_loss.append(loss)
+        train_accuracy.append(sum(list_acc)/len(list_acc))
+        # Add
+        testacc_check = 100*train_accuracy[-1]
+        epoch = epoch + 1
+
+        # print global training loss after every 'i' rounds
+        if (epoch+1) % print_every == 0:
+            print(f' \nAvg Training Stats after {epoch+1} global rounds:')
+            print(f'Training Loss : {np.mean(np.array(train_loss))}')
+            print('Train Accuracy: {:.2f}% \n'.format(100*train_accuracy[-1]))
+            
+
+    print('\n Total Run Time: {0:0.4f}'.format(time.time()-start_time))
+
+    # Test inference after completion of training
+    test_acc, test_loss = test_inference(args, global_model, test_dataset, dtype=torch.float16)
+
+    # print(f' \n Results after {args.epochs} global rounds of training:')
+    print(f"\nAvg Training Stats after {epoch} global rounds:")
+    print("|---- Avg Train Accuracy: {:.2f}%".format(100*train_accuracy[-1]))
+    print("|---- Test Accuracy: {:.2f}%".format(100*test_acc))
+
+    # Saving the objects train_loss and train_accuracy:
+    file_name = '../save/objects_fp16/HFL2_{}_{}_{}_lr[{}]_C[{}]_iid[{}]_E[{}]_B[{}]_FP16.pkl'.\
+    format(args.dataset, args.model, epoch, args.lr, args.frac, args.iid,
+           args.local_ep, args.local_bs)
+
+    with open(file_name, 'wb') as f:
+        pickle.dump([train_loss, train_accuracy], f)
+        
+    print('\n Total Run Time: {0:0.4f}'.format(time.time()-start_time))
+    

+ 228 - 0
src/federated-hierarchical4_main_fp16.py

@@ -0,0 +1,228 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Python version: 3.6
+
+import os
+import copy
+import time
+import pickle
+import numpy as np
+from tqdm import tqdm
+
+import torch
+from tensorboardX import SummaryWriter
+
+from options import args_parser
+from update import LocalUpdate, test_inference
+from models import MLP, CNNMnist, CNNFashion_Mnist, CNNCifar
+from utils import get_dataset, average_weights, exp_details, set_device, build_model, fl_train
+import math
+import random
+
+
+if __name__ == '__main__':
+    start_time = time.time()
+
+    # define paths
+    path_project = os.path.abspath('..')
+    logger = SummaryWriter('../logs')
+
+    args = args_parser()
+    exp_details(args)
+
+    # Select CPU or GPU
+    device = set_device(args)
+
+    # load dataset and user groups
+    train_dataset, test_dataset, user_groupsold = get_dataset(args)
+
+    # user_groups = user_groupsold
+    # keylist = list(user_groups.keys())
+    # ======= Shuffle dataset ======= 
+    keys =  list(user_groupsold.keys())
+    random.shuffle(keys)
+    user_groups = dict()
+    for key in keys:
+        user_groups.update({key:user_groupsold[key]})
+    # print(user_groups.keys()) 
+    keylist = list(user_groups.keys())
+    print("keylist: ", keylist)
+    # ======= Splitting into clusters. FL groups ======= 
+    if args.num_clusters != 4:
+        exit("Confirm that the number of clusters is 4?")
+    cluster_size = int(args.num_users / args.num_clusters)    
+    # cluster_size = 50
+    print("Each cluster size: ", cluster_size)
+
+    # Cluster 1
+    A1 = keylist[:cluster_size]
+    # A1 = np.random.choice(keylist, cluster_size, replace=False)
+    print("A1: ", A1)
+    user_groupsA = {k:user_groups[k] for k in A1 if k in user_groups}
+    print("Size of cluster 1: ", len(user_groupsA))
+    # Cluster 2
+    B1 = keylist[cluster_size:2*cluster_size]
+    # B1 = np.random.choice(keylist, cluster_size, replace=False)    
+    print("B1: ", B1)
+    user_groupsB = {k:user_groups[k] for k in B1 if k in user_groups}
+    print("Size of cluster 2: ", len(user_groupsB))
+    # Cluster 3
+    C1 = keylist[2*cluster_size:3*cluster_size]
+    # C1 = np.random.choice(keylist, cluster_size, replace=False)
+    print("C1: ", C1)
+    user_groupsC = {k:user_groups[k] for k in C1 if k in user_groups}
+    print("Size of cluster 3: ", len(user_groupsC))
+    # Cluster 4
+    D1 = keylist[3*cluster_size:4*cluster_size]
+    # D1 = np.random.choice(keylist, cluster_size, replace=False)
+    print("D1: ", D1)
+    user_groupsD = {k:user_groups[k] for k in D1 if k in user_groups}
+    print("Size of cluster 4: ", len(user_groupsD))
+
+    # MODEL PARAM SUMMARY
+    global_model = build_model(args, train_dataset)
+    pytorch_total_params = sum(p.numel() for p in global_model.parameters())
+    print("Model total number of parameters: ", pytorch_total_params)
+
+    # from torchsummary import summary
+    # summary(global_model, (1, 28, 28))
+    # global_model.parameters()
+
+    # Set the model to train and send it to device.
+    global_model.to(device)
+    # Set model to use Floating Point 16
+    global_model.to(dtype=torch.float16)  ##########################
+    global_model.train()
+    print(global_model)
+
+    # copy weights
+    global_weights = global_model.state_dict()
+
+
+    # ======= Set the cluster models to train and send it to device. =======
+    # Cluster A
+    cluster_modelA = build_model(args, train_dataset)
+    cluster_modelA.to(device)
+    cluster_modelA.to(dtype=torch.float16)
+    cluster_modelA.train()
+    # copy weights
+    cluster_modelA_weights = cluster_modelA.state_dict()
+    
+    # Cluster B
+    cluster_modelB = build_model(args, train_dataset)
+    cluster_modelB.to(device)
+    cluster_modelB.to(dtype=torch.float16)
+    cluster_modelB.train()
+    cluster_modelB_weights = cluster_modelB.state_dict()
+    
+    # Cluster C
+    cluster_modelC = build_model(args, train_dataset)
+    cluster_modelC.to(device)
+    cluster_modelC.to(dtype=torch.float16)
+    cluster_modelC.train()
+    cluster_modelC_weights = cluster_modelC.state_dict()
+    
+    # Cluster D
+    cluster_modelD = build_model(args, train_dataset)
+    cluster_modelD.to(device)
+    cluster_modelD.to(dtype=torch.float16)
+    cluster_modelD.train()
+    cluster_modelD_weights = cluster_modelD.state_dict()
+
+
+    train_loss, train_accuracy = [], []
+    val_acc_list, net_list = [], []
+    cv_loss, cv_acc = [], []
+    print_every = 1
+    val_loss_pre, counter = 0, 0
+    testacc_check, epoch = 0, 0 
+    idx = np.random.randint(0,99)
+
+    # for epoch in tqdm(range(args.epochs)):
+    for epoch in range(args.epochs):
+    # while testacc_check < args.test_acc or epoch < args.epochs:
+    # while epoch < args.epochs:        
+        local_weights, local_losses, local_accuracies= [], [], []
+        print(f'\n | Global Training Round : {epoch+1} |\n')
+        
+        # ============== TRAIN ==============
+        global_model.train()
+        
+        # ===== Cluster A ===== 
+        _, A_weights, A_losses = fl_train(args, train_dataset, cluster_modelA, A1, user_groupsA, args.Cepochs, logger, cluster_dtype=torch.float16)        
+        local_weights.append(copy.deepcopy(A_weights))
+        local_losses.append(copy.deepcopy(A_losses))    
+        cluster_modelA = global_model #= A_model        
+        # ===== Cluster B ===== 
+        B_model, B_weights, B_losses = fl_train(args, train_dataset, cluster_modelB, B1, user_groupsB, args.Cepochs, logger, cluster_dtype=torch.float16)
+        local_weights.append(copy.deepcopy(B_weights))
+        local_losses.append(copy.deepcopy(B_losses))
+        cluster_modelB = global_model #= B_model 
+        # ===== Cluster C ===== 
+        C_model, C_weights, C_losses = fl_train(args, train_dataset, cluster_modelC, C1, user_groupsC, args.Cepochs, logger, cluster_dtype=torch.float16)
+        local_weights.append(copy.deepcopy(C_weights))
+        local_losses.append(copy.deepcopy(C_losses))   
+        cluster_modelC = global_model #= C_model      
+        # ===== Cluster D ===== 
+        D_model, D_weights, D_losses = fl_train(args, train_dataset, cluster_modelD, D1, user_groupsD, args.Cepochs, logger, cluster_dtype=torch.float16)
+        local_weights.append(copy.deepcopy(D_weights))
+        local_losses.append(copy.deepcopy(D_losses))
+        cluster_modelD= global_model #= D_model 
+        
+        
+        # averaging global weights
+        global_weights = average_weights(local_weights)
+
+        # update global weights
+        global_model.load_state_dict(global_weights)
+
+        loss_avg = sum(local_losses) / len(local_losses)
+        train_loss.append(loss_avg)
+        
+        # ============== EVAL ============== 
+        # Calculate avg training accuracy over all users at every epoch
+        list_acc, list_loss = [], []
+        global_model.eval()
+        # print("========== idx ========== ", idx)
+        for c in range(args.num_users):
+        # for c in range(cluster_size):
+        # C = np.random.choice(keylist, int(args.frac * args.num_users), replace=False) # random set of clients
+        # print("C: ", C)
+        # for c in C:
+            local_model = LocalUpdate(args=args, dataset=train_dataset,
+                                      idxs=user_groups[c], logger=logger)
+            acc, loss = local_model.inference(model=global_model, dtype=torch.float16)
+            list_acc.append(acc)
+            list_loss.append(loss)
+        train_accuracy.append(sum(list_acc)/len(list_acc))
+        # Add
+        testacc_check = 100*train_accuracy[-1]
+        epoch = epoch + 1
+
+        # print global training loss after every 'i' rounds
+        if (epoch+1) % print_every == 0:
+            print(f' \nAvg Training Stats after {epoch+1} global rounds:')
+            print(f'Training Loss : {np.mean(np.array(train_loss))}')
+            print('Train Accuracy: {:.2f}% \n'.format(100*train_accuracy[-1]))
+            
+
+    print('\n Total Run Time: {0:0.4f}'.format(time.time()-start_time))
+
+    # Test inference after completion of training
+    test_acc, test_loss = test_inference(args, global_model, test_dataset, dtype=torch.float16)
+
+    # print(f' \n Results after {args.epochs} global rounds of training:')
+    print(f"\nAvg Training Stats after {epoch} global rounds:")
+    print("|---- Avg Train Accuracy: {:.2f}%".format(100*train_accuracy[-1]))
+    print("|---- Test Accuracy: {:.2f}%".format(100*test_acc))
+
+    # Saving the objects train_loss and train_accuracy:
+    file_name = '../save/objects_fp16/HFL4_{}_{}_{}_lr[{}]_C[{}]_iid[{}]_E[{}]_B[{}]_FP16.pkl'.\
+    format(args.dataset, args.model, epoch, args.lr, args.frac, args.iid,
+           args.local_ep, args.local_bs)
+
+    with open(file_name, 'wb') as f:
+        pickle.dump([train_loss, train_accuracy], f)
+        
+    print('\n Total Run Time: {0:0.4f}'.format(time.time()-start_time))
+    

+ 295 - 0
src/federated-hierarchical8_main_fp16.py

@@ -0,0 +1,295 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Python version: 3.6
+
+import os
+import copy
+import time
+import pickle
+import numpy as np
+from tqdm import tqdm
+
+import torch
+from tensorboardX import SummaryWriter
+
+from options import args_parser
+from update import LocalUpdate, test_inference
+from models import MLP, CNNMnist, CNNFashion_Mnist, CNNCifar
+from utils import get_dataset, average_weights, exp_details, set_device, build_model, fl_train
+import math
+import random
+
+
+if __name__ == '__main__':
+    start_time = time.time()
+
+    # define paths
+    path_project = os.path.abspath('..')
+    logger = SummaryWriter('../logs')
+
+    args = args_parser()
+    exp_details(args)
+
+    # Select CPU or GPU
+    device = set_device(args)
+
+    # load dataset and user groups
+    train_dataset, test_dataset, user_groupsold = get_dataset(args)
+
+    # user_groups = user_groupsold
+    # keylist = list(user_groups.keys())
+    # ======= Shuffle dataset ======= 
+    keys =  list(user_groupsold.keys())
+    random.shuffle(keys)
+    user_groups = dict()
+    for key in keys:
+        user_groups.update({key:user_groupsold[key]})
+    # print(user_groups.keys()) 
+    keylist = list(user_groups.keys())
+    print("keylist: ", keylist)
+    # ======= Splitting into clusters. FL groups ======= 
+    if args.num_clusters != 8:
+        exit("Confirm that the number of clusters is 8?")
+    cluster_size = int(args.num_users / args.num_clusters)    
+    print("Each cluster size: ", cluster_size)
+
+    # Cluster 1
+    A1 = keylist[:cluster_size]
+    # A1 = np.random.choice(keylist, cluster_size, replace=False)
+    print("A1: ", A1)
+    user_groupsA = {k:user_groups[k] for k in A1 if k in user_groups}
+    print("Size of cluster 1: ", len(user_groupsA))
+    # Cluster 2
+    B1 = keylist[cluster_size:2*cluster_size]
+    # B1 = np.random.choice(keylist, cluster_size, replace=False)    
+    print("B1: ", B1)
+    user_groupsB = {k:user_groups[k] for k in B1 if k in user_groups}
+    print("Size of cluster 2: ", len(user_groupsB))
+    # Cluster 3
+    C1 = keylist[2*cluster_size:3*cluster_size]
+    # C1 = np.random.choice(keylist, cluster_size, replace=False)
+    print("C1: ", C1)
+    user_groupsC = {k:user_groups[k] for k in C1 if k in user_groups}
+    print("Size of cluster 3: ", len(user_groupsC))
+    # Cluster 4
+    D1 = keylist[3*cluster_size:4*cluster_size]
+    # D1 = np.random.choice(keylist, cluster_size, replace=False)
+    print("D1: ", D1)
+    user_groupsD = {k:user_groups[k] for k in D1 if k in user_groups}
+    print("Size of cluster 4: ", len(user_groupsD))
+    # Cluster 5    
+    E1 = keylist[4*cluster_size:5*cluster_size] #np.random.choice(keylist, cluster_size, replace=False)
+    print("E1: ", E1)
+    user_groupsE = {k:user_groups[k] for k in E1 if k in user_groups}
+    print("Size of cluster 5: ", len(user_groupsE))
+    # Cluster 6
+    F1 = keylist[5*cluster_size:6*cluster_size] #np.random.choice(keylist, cluster_size, replace=False)    
+    print("F1: ", F1)
+    user_groupsF = {k:user_groups[k] for k in F1 if k in user_groups}
+    print("Size of cluster 6: ", len(user_groupsF))
+    # Cluster 7    
+    G1 = keylist[6*cluster_size:7*cluster_size] #np.random.choice(keylist, cluster_size, replace=False)
+    print("G1: ", G1)
+    user_groupsG = {k:user_groups[k] for k in G1 if k in user_groups}
+    print("Size of cluster 7: ", len(user_groupsC))
+    # Cluster 8
+    H1 = keylist[7*cluster_size:] #np.random.choice(keylist, cluster_size, replace=False)
+    print("H1: ", H1)
+    user_groupsH = {k:user_groups[k] for k in H1 if k in user_groups}
+    print("Size of cluster 8: ", len(user_groupsH))
+
+    # MODEL PARAM SUMMARY
+    global_model = build_model(args, train_dataset)
+    pytorch_total_params = sum(p.numel() for p in global_model.parameters())
+    print("Model total number of parameters: ", pytorch_total_params)
+
+    # from torchsummary import summary
+    # summary(global_model, (1, 28, 28))
+    # global_model.parameters()
+
+    # Set the model to train and send it to device.
+    global_model.to(device)
+    # Set model to use Floating Point 16
+    global_model.to(dtype=torch.float16)  ##########################
+    global_model.train()
+    print(global_model)
+
+    # copy weights
+    global_weights = global_model.state_dict()
+
+
+    # ======= Set the cluster models to train and send it to device. =======
+    # Cluster A
+    cluster_modelA = build_model(args, train_dataset)
+    cluster_modelA.to(device)
+    cluster_modelA.to(dtype=torch.float16)
+    cluster_modelA.train()
+    # copy weights
+    cluster_modelA_weights = cluster_modelA.state_dict()
+    
+    # Cluster B
+    cluster_modelB = build_model(args, train_dataset)
+    cluster_modelB.to(device)
+    cluster_modelB.to(dtype=torch.float16)
+    cluster_modelB.train()
+    cluster_modelB_weights = cluster_modelB.state_dict()
+    
+    # Cluster C
+    cluster_modelC = build_model(args, train_dataset)
+    cluster_modelC.to(device)
+    cluster_modelC.to(dtype=torch.float16)
+    cluster_modelC.train()
+    cluster_modelC_weights = cluster_modelC.state_dict()
+    
+    # Cluster D
+    cluster_modelD = build_model(args, train_dataset)
+    cluster_modelD.to(device)
+    cluster_modelD.to(dtype=torch.float16)
+    cluster_modelD.train()
+    cluster_modelD_weights = cluster_modelD.state_dict()
+    
+    # Cluster E
+    cluster_modelE = build_model(args, train_dataset)
+    cluster_modelE.to(device)
+    cluster_modelE.to(dtype=torch.float16)
+    cluster_modelE.train()
+    cluster_modelE_weights = cluster_modelE.state_dict()
+    
+    # Cluster F
+    cluster_modelF = build_model(args, train_dataset)
+    cluster_modelF.to(device)
+    cluster_modelF.to(dtype=torch.float16)
+    cluster_modelF.train()
+    cluster_modelF_weights = cluster_modelF.state_dict()
+    
+    # Cluster G
+    cluster_modelG = build_model(args, train_dataset)
+    cluster_modelG.to(device)
+    cluster_modelG.to(dtype=torch.float16)
+    cluster_modelG.train()
+    cluster_modelG_weights = cluster_modelG.state_dict()
+    
+    # Cluster H
+    cluster_modelH = build_model(args, train_dataset)
+    cluster_modelH.to(device)
+    cluster_modelH.to(dtype=torch.float16)
+    cluster_modelH.train()
+    cluster_modelH_weights = cluster_modelH.state_dict()
+
+
+    train_loss, train_accuracy = [], []
+    val_acc_list, net_list = [], []
+    cv_loss, cv_acc = [], []
+    print_every = 1
+    val_loss_pre, counter = 0, 0
+    testacc_check, epoch = 0, 0 
+    idx = np.random.randint(0,99)
+
+    # for epoch in tqdm(range(args.epochs)):
+    for epoch in range(args.epochs):
+    # while testacc_check < args.test_acc or epoch < args.epochs:
+    # while epoch < args.epochs:        
+        local_weights, local_losses, local_accuracies= [], [], []
+        print(f'\n | Global Training Round : {epoch+1} |\n')
+        
+        # ============== TRAIN ==============
+        global_model.train()
+        
+        # ===== Cluster A =====
+        A_model, A_weights, A_losses = fl_train(args, train_dataset, cluster_modelA, A1, user_groupsA, args.Cepochs, logger, cluster_dtype=torch.float16)        
+        local_weights.append(copy.deepcopy(A_weights))
+        local_losses.append(copy.deepcopy(A_losses))    
+        cluster_modelA = global_model# = A_model    
+        # ===== Cluster B ===== 
+        B_model, B_weights, B_losses = fl_train(args, train_dataset, cluster_modelB, B1, user_groupsB, args.Cepochs, logger, cluster_dtype=torch.float16)
+        local_weights.append(copy.deepcopy(B_weights))
+        local_losses.append(copy.deepcopy(B_losses))
+        cluster_modelB = global_model# = B_model 
+        # ===== Cluster C ===== 
+        C_model, C_weights, C_losses = fl_train(args, train_dataset, cluster_modelC, C1, user_groupsC, args.Cepochs, logger, cluster_dtype=torch.float16)
+        local_weights.append(copy.deepcopy(C_weights))
+        local_losses.append(copy.deepcopy(C_losses))   
+        cluster_modelC = global_model# = C_model      
+        # ===== Cluster D ===== 
+        D_model, D_weights, D_losses = fl_train(args, train_dataset, cluster_modelD, D1, user_groupsD, args.Cepochs, logger, cluster_dtype=torch.float16)
+        local_weights.append(copy.deepcopy(D_weights))
+        local_losses.append(copy.deepcopy(D_losses))
+        cluster_modelD = global_model# = D_model 
+        # ===== Cluster E ===== 
+        E_model, E_weights, E_losses = fl_train(args, train_dataset, cluster_modelE, E1, user_groupsE, args.Cepochs, logger, cluster_dtype=torch.float16)        
+        local_weights.append(copy.deepcopy(E_weights))
+        local_losses.append(copy.deepcopy(E_losses))    
+        cluster_modelE = global_model# = E_model    
+        # ===== Cluster F ===== 
+        F_model, F_weights, F_losses = fl_train(args, train_dataset, cluster_modelF, F1, user_groupsF, args.Cepochs, logger, cluster_dtype=torch.float16)
+        local_weights.append(copy.deepcopy(F_weights))
+        local_losses.append(copy.deepcopy(F_losses))
+        cluster_modelF = global_model# = F_model 
+        # ===== Cluster G ===== 
+        G_model, G_weights, G_losses = fl_train(args, train_dataset, cluster_modelG, G1, user_groupsG, args.Cepochs, logger, cluster_dtype=torch.float16)
+        local_weights.append(copy.deepcopy(G_weights))
+        local_losses.append(copy.deepcopy(G_losses))   
+        cluster_modelG = global_model# = G_model      
+        # ===== Cluster H ===== 
+        H_model, H_weights, H_losses = fl_train(args, train_dataset, cluster_modelH, H1, user_groupsH, args.Cepochs, logger, cluster_dtype=torch.float16)
+        local_weights.append(copy.deepcopy(H_weights))
+        local_losses.append(copy.deepcopy(H_losses))
+        cluster_modelH = global_model# = H_model 
+        
+        
+        # averaging global weights
+        global_weights = average_weights(local_weights)
+
+        # update global weights
+        global_model.load_state_dict(global_weights)
+
+        loss_avg = sum(local_losses) / len(local_losses)
+        train_loss.append(loss_avg)
+        
+        # ============== EVAL ============== 
+        # Calculate avg training accuracy over all users at every epoch
+        list_acc, list_loss = [], []
+        global_model.eval()
+        # print("========== idx ========== ", idx)
+        for c in range(args.num_users):
+        # for c in range(cluster_size):
+        # C = np.random.choice(keylist, int(args.frac * args.num_users), replace=False) # random set of clients
+        # print("C: ", C)
+        # for c in C:
+            local_model = LocalUpdate(args=args, dataset=train_dataset,
+                                      idxs=user_groups[c], logger=logger)
+            acc, loss = local_model.inference(model=global_model, dtype=torch.float16)
+            list_acc.append(acc)
+            list_loss.append(loss)
+        train_accuracy.append(sum(list_acc)/len(list_acc))
+        # Add
+        testacc_check = 100*train_accuracy[-1]
+        epoch = epoch + 1
+
+        # print global training loss after every 'i' rounds
+        if (epoch+1) % print_every == 0:
+            print(f' \nAvg Training Stats after {epoch+1} global rounds:')
+            print(f'Training Loss : {np.mean(np.array(train_loss))}')
+            print('Train Accuracy: {:.2f}% \n'.format(100*train_accuracy[-1]))
+            
+
+    print('\n Total Run Time: {0:0.4f}'.format(time.time()-start_time))
+
+    # Test inference after completion of training
+    test_acc, test_loss = test_inference(args, global_model, test_dataset, dtype=torch.float16)
+
+    # print(f' \n Results after {args.epochs} global rounds of training:')
+    print(f"\nAvg Training Stats after {epoch} global rounds:")
+    print("|---- Avg Train Accuracy: {:.2f}%".format(100*train_accuracy[-1]))
+    print("|---- Test Accuracy: {:.2f}%".format(100*test_acc))
+
+    # Saving the objects train_loss and train_accuracy:
+    file_name = '../save/objects_fp16/HFL8_{}_{}_{}_lr[{}]_C[{}]_iid[{}]_E[{}]_B[{}]_FP16.pkl'.\
+    format(args.dataset, args.model, epoch, args.lr, args.frac, args.iid,
+           args.local_ep, args.local_bs)
+
+    with open(file_name, 'wb') as f:
+        pickle.dump([train_loss, train_accuracy], f)
+    
+    print('\n Total Run Time: {0:0.4f}'.format(time.time()-start_time))
+    

+ 169 - 0
src/federated_main_fp16.py

@@ -0,0 +1,169 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Python version: 3.6
+
+
+import os
+import copy
+import time
+import pickle
+import numpy as np
+from tqdm import tqdm
+
+import torch
+from tensorboardX import SummaryWriter 
+#from torch.utils.tensorboard import SummaryWriter
+
+from options import args_parser
+from update import LocalUpdate, test_inference
+from models import MLP, CNNMnist, CNNFashion_Mnist, CNNCifar
+from utils import get_dataset, average_weights, exp_details, set_device, build_model
+# os.environ['CUDA_VISIBLE_DEVICES'] ='0'
+
+from sys import exit
+#from torchsummary import summary
+
+if __name__ == '__main__':
+    start_time = time.time()
+
+    # define paths
+    path_project = os.path.abspath('..')
+    logger = SummaryWriter('../logs')
+
+    args = args_parser()
+    exp_details(args)
+
+    # Select CPU or GPU
+    device = set_device(args)
+
+    # load dataset and user groups
+    train_dataset, test_dataset, user_groups = get_dataset(args)
+
+    # BUILD MODEL
+    global_model = build_model(args, train_dataset)
+
+    # Set the model to train and send it to device.
+    global_model.to(device)
+    # Set model to use Floating Point 16
+    global_model.to(dtype=torch.float16)  ##########
+    global_model.train()
+    print(global_model)
+    
+    # MODEL PARAM SUMMARY
+    pytorch_total_params = sum(p.numel() for p in global_model.parameters())
+    print("Model total number of parameters: ", pytorch_total_params)
+    # print(global_model.parameters())
+
+    # copy weights
+    global_weights = global_model.state_dict()
+
+    # Training
+    train_loss, train_accuracy = [], []
+    val_acc_list, net_list = [], []
+    cv_loss, cv_acc = [], []
+    print_every = 1
+    val_loss_pre, counter = 0, 0
+    testacc_check, epoch = 0, 0 
+
+    # for epoch in tqdm(range(args.epochs)):  # global training epochs
+    for epoch in range(args.epochs):
+    # while testacc_check < args.test_acc or epoch < args.epochs:
+    # while testacc_check < args.test_acc:
+        local_weights, local_losses = [], [] # init empty local weights and local losses
+        print(f'\n | Global Training Round : {epoch+1} |\n') # starting with | Global Training Round : 1 |
+
+        """
+        model.train() tells your model that you are training the model. So effectively layers like dropout, batchnorm etc. which behave different on the train and test procedures know what is going on and hence can behave accordingly.
+
+        More details: It sets the mode to train (see source code). You can call either model.eval() or model.train(mode=False) to tell that you are testing. It is somewhat intuitive to expect train function to train model but it does not do that. It just sets the mode.
+        """
+        # ============== TRAIN ============== 
+        global_model.train()
+        m = max(int(args.frac * args.num_users), 1) # C = args.frac. Setting number of clients m for training
+        idxs_users = np.random.choice(range(args.num_users), m, replace=False) # args.num_users=100 total clients. Choosing a random array of indices. Subset of clients.
+
+        for idx in idxs_users: # For each client in the subset.
+            local_model = LocalUpdate(args=args, dataset=train_dataset,
+                                      idxs=user_groups[idx], logger=logger)
+            w, loss = local_model.update_weights( # update_weights() contain multiple prints
+                model=copy.deepcopy(global_model), global_round=epoch,dtype=torch.float16) 
+                # w = local model weights
+            local_weights.append(copy.deepcopy(w))
+            local_losses.append(copy.deepcopy(loss))
+
+        # Averaging m local client weights
+        global_weights = average_weights(local_weights)
+
+        # update global weights
+        global_model.load_state_dict(global_weights)
+
+        loss_avg = sum(local_losses) / len(local_losses)
+        train_loss.append(loss_avg) # Performance measure
+
+        # ============== EVAL ============== 
+        # Calculate avg training accuracy over all users at every epoch
+        list_acc, list_loss = [], []
+        global_model.eval() # must set your model into evaluation mode when computing model output values if dropout or bach norm used for training.
+
+        for c in range(args.num_users): # 0 to 99
+            # local_model = LocalUpdate(args=args, dataset=train_dataset,
+            #                           idxs=user_groups[idx], logger=logger)
+            # Fix error idxs=user_groups[idx] to idxs=user_groups[c]                                      
+            local_model = LocalUpdate(args=args, dataset=train_dataset,
+                                      idxs=user_groups[c], logger=logger)
+            acc, loss = local_model.inference(model=global_model, dtype=torch.float16)
+            list_acc.append(acc)
+            list_loss.append(loss)
+        train_accuracy.append(sum(list_acc)/len(list_acc)) # Performance measure
+
+        # Add
+        testacc_check = 100*train_accuracy[-1]
+        epoch = epoch + 1
+
+        # print global training loss after every 'i' rounds
+        if (epoch+1) % print_every == 0: # If print_every=2, => print every 2 rounds
+            print(f' \nAvg Training Stats after {epoch+1} global rounds:')
+            print(f'Training Loss : {np.mean(np.array(train_loss))}')
+            print('Train Accuracy: {:.2f}% \n'.format(100*train_accuracy[-1]))
+
+    # Test inference after completion of training
+    test_acc, test_loss = test_inference(args, global_model, test_dataset, dtype=torch.float16)
+
+    print(f' \n Results after {epoch} global rounds of training:')
+    print("|---- Avg Train Accuracy: {:.2f}%".format(100*train_accuracy[-1]))
+    print("|---- Test Accuracy: {:.2f}%".format(100*test_acc))
+
+    # Saving the objects train_loss and train_accuracy:
+    file_name = '../save/objects_fp16/FL_{}_{}_{}_lr[{}]_C[{}]_iid[{}]_E[{}]_B[{}]_FP16.pkl'.\
+        format(args.dataset, args.model, epoch, args.lr, args.frac, args.iid,
+               args.local_ep, args.local_bs)
+
+    with open(file_name, 'wb') as f:
+        pickle.dump([train_loss, train_accuracy], f)
+
+    print('\n Total Run Time: {0:0.4f}'.format(time.time()-start_time))
+
+    # PLOTTING (optional)
+    # import matplotlib
+    # import matplotlib.pyplot as plt
+    # matplotlib.use('Agg')
+
+    # Plot Loss curve
+    # plt.figure()
+    # plt.title('Training Loss vs Communication rounds')
+    # plt.plot(range(len(train_loss)), train_loss, color='r')
+    # plt.ylabel('Training loss')
+    # plt.xlabel('Communication Rounds')
+    # plt.savefig('../save/fed_{}_{}_{}_C[{}]_iid[{}]_E[{}]_B[{}]_loss.png'.
+    #             format(args.dataset, args.model, args.epochs, args.frac,
+    #                    args.iid, args.local_ep, args.local_bs))
+    #
+    # # Plot Average Accuracy vs Communication rounds
+    # plt.figure()
+    # plt.title('Average Accuracy vs Communication rounds')
+    # plt.plot(range(len(train_accuracy)), train_accuracy, color='k')
+    # plt.ylabel('Average Accuracy')
+    # plt.xlabel('Communication Rounds')
+    # plt.savefig('../save/fed_{}_{}_{}_C[{}]_iid[{}]_E[{}]_B[{}]_acc.png'.
+    #             format(args.dataset, args.model, args.epochs, args.frac,
+    #                    args.iid, args.local_ep, args.local_bs))

+ 19 - 18
src/models.py

@@ -109,6 +109,25 @@ class CNNFashion_Mnist(nn.Module):
         return out
 
 
+class CNNCifar(nn.Module):
+    def __init__(self, args):
+        super(CNNCifar, self).__init__()
+        self.conv1 = nn.Conv2d(3, 32, 5)
+        self.pool = nn.MaxPool2d(2, 2)
+        self.conv2 = nn.Conv2d(32, 64, 5)
+        self.fc1 = nn.Linear(64 * 5 * 5, 512)
+        self.fc2 = nn.Linear(512, 84)
+        self.fc3 = nn.Linear(84, args.num_classes)
+
+    def forward(self, x):
+        x = self.pool(F.relu(self.conv1(x)))
+        x = self.pool(F.relu(self.conv2(x)))
+        x = x.view(-1, 64 * 5 * 5)
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = self.fc3(x)
+        return F.log_softmax(x, dim=1)
+
 # class CNNCifar(nn.Module):
 #     def __init__(self, args):
 #         super(CNNCifar, self).__init__()
@@ -129,22 +148,4 @@ class CNNFashion_Mnist(nn.Module):
 #         return F.log_softmax(x, dim=1)
 
 # Change CNNCifar model to 917350 params
-class CNNCifar(nn.Module):
-    def __init__(self, args):
-        super(CNNCifar, self).__init__()
-        self.conv1 = nn.Conv2d(3, 32, 5)
-        self.pool = nn.MaxPool2d(2, 2)
-        self.conv2 = nn.Conv2d(32, 64, 5)
-        self.fc1 = nn.Linear(64 * 5 * 5, 512)
-        self.fc2 = nn.Linear(512, 84)
-        self.fc3 = nn.Linear(84, args.num_classes)
-
-    def forward(self, x):
-        x = self.pool(F.relu(self.conv1(x)))
-        x = self.pool(F.relu(self.conv2(x)))
-        x = x.view(-1, 64 * 5 * 5)
-        x = F.relu(self.fc1(x))
-        x = F.relu(self.fc2(x))
-        x = self.fc3(x)
-        return F.log_softmax(x, dim=1)
 

+ 27 - 19
src/options.py

@@ -6,8 +6,9 @@ import argparse
 
 
 def args_parser():
-    parser = argparse.ArgumentParser()
-
+    #parser = argparse.ArgumentParser()
+    parser = argparse.ArgumentParser(description="Arguments for Neural Net")
+    
     # federated arguments (Notation for the arguments followed from paper)
     parser.add_argument('--epochs', type=int, default=5,
                         help="number of rounds of training")
@@ -20,17 +21,17 @@ def args_parser():
     parser.add_argument('--local_bs', type=int, default=10,
                         help="local batch size: B")
     parser.add_argument('--lr', type=float, default=0.01,
-                        help='learning rate')
+                        help="learning rate")
     parser.add_argument('--momentum', type=float, default=0.5,
-                        help='SGD momentum (default: 0.5)')
+                        help="SGD momentum (default: 0.5)")
 
     # model arguments
-    parser.add_argument('--model', type=str, default='mlp', help='model name')
+    parser.add_argument('--model', type=str, default='mlp', help="model name")
     parser.add_argument('--kernel_num', type=int, default=9,
-                        help='number of each kind of kernel')
+                        help="number of each kind of kernel")
     parser.add_argument('--kernel_sizes', type=str, default='3,4,5',
-                        help='comma-separated kernel size to \
-                        use for convolution')
+                        help="comma-separated kernel size to \
+                        use for convolution")
     parser.add_argument('--num_channels', type=int, default=1, help="number \
                         of channels of imgs")
     parser.add_argument('--norm', type=str, default='batch_norm',
@@ -44,28 +45,35 @@ def args_parser():
 
     # other arguments
     parser.add_argument('--dataset', type=str, default='mnist', help="name \
-                        of dataset")
+                        of datasetS")
     parser.add_argument('--num_classes', type=int, default=10, help="number \
                         of classes")
     parser.add_argument('--gpu', type=int, default=0, help="To use cuda, set \
-                        to a specific GPU ID. Default set to use CPU.")
+                        to 1. Default set to use CPU.")
     parser.add_argument('--optimizer', type=str, default='sgd', help="type \
                         of optimizer")
     parser.add_argument('--iid', type=int, default=1,
-                        help='Default set to IID. Set to 0 for non-IID.')
+                        help="Default set to IID. Set to 0 for non-IID.")
     parser.add_argument('--unequal', type=int, default=0,
-                        help='whether to use unequal data splits for  \
-                        non-i.i.d setting (use 0 for equal splits)')
+                        help="whether to use unequal data splits for  \
+                        non-i.i.d setting (use 0 for equal splits)")
     parser.add_argument('--stopping_rounds', type=int, default=10,
-                        help='rounds of early stopping')
-    parser.add_argument('--verbose', type=int, default=1, help='verbose')
-    parser.add_argument('--seed', type=int, default=1, help='random seed')
+                        help="rounds of early stopping")
+    parser.add_argument('--verbose', type=int, default=1, help="verbose")
+    parser.add_argument('--seed', type=int, default=1, help="random seed")
 
     # Add arguments
-    parser.add_argument('--num_clusters', type=int, default=2, help='the number of clusters')
-    parser.add_argument('--test_acc', type=int, default=95, help='target test accuracy')
+    parser.add_argument('--num_clusters', type=int, default=2, help="the number of clusters")
+    parser.add_argument('--test_acc', type=int, default=95, help="target test accuracy")
     parser.add_argument('--Cepochs', type=int, default=5,help="number of rounds of training in each cluster")
     parser.add_argument('--mlpdim', type=int, default=200,help="MLP model hidden dimension")
-
+    parser.add_argument('--gpu_id', default='cuda:0', help="To set GPU device \
+                        ID if cuda is availlable")
+    parser.add_argument('--model_dtype', default='torch.float32', help="Dtype \
+                        for model")
+    parser.add_argument('--loss_dtype', default='torch.float32', help="Dtype \
+                        for loss or criterion")
+    
+    
     args = parser.parse_args()
     return args

+ 15 - 0
src/script_bash_FL_diffFP.sh

@@ -0,0 +1,15 @@
+#!/bin/bash
+# Comments line start with a #
+# Commands are surrounde by ()
+# Website on how to write bash script https://hackernoon.com/know-shell-scripting-202b2fbe03a8
+
+# This is the baseline without FL for 16-bit floating point.
+python ./baseline_main_fp16.py --epochs=10 --model="mlp" --dataset="mnist" --num_classes=10 --gpu=1 --gpu_id="cuda:0" --mlpdim=200 | tee -a ../logs/terminal_output1.txt &
+
+python ./federated_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --model=mlp --dataset=mnist --iid=1 --gpu=1 --lr=0.01 --test_acc=95 --mlpdim=200 --epochs=200 | tee -a ../logs/terminal_output2.txt &
+
+python ./federated-hierarchical2_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=mlp --dataset=mnist --iid=1 --num_cluster=2 --gpu=1 --lr=0.01 --mlpdim=200 --epochs=100 --test_acc=94 | tee -a ../logs/terminal_output3.txt
+
+python ./federated-hierarchical4_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=mlp --dataset=mnist --iid=1 --num_cluster=4 --gpu=1 --lr=0.1 --mlpdim=200 --epochs=100 --test_acc=95 | tee -a ../logs/terminal_output4.txt
+
+python ./federated-hierarchical8_main_fp16.py --local_ep=1 --local_bs=10 --Cepochs=10 --model=mlp --dataset=mnist --iid=1 --gpu=1 --lr=0.01 --mlpdim=200 --epochs=30 --num_cluster=8 --test_acc=95 | tee -a ../logs/terminal_output5.txt

+ 32 - 0
src/script_bash_FL_diffFP_cifar.sh

@@ -0,0 +1,32 @@
+#!/bin/bash
+# Comments line start with a #
+# Commands are surrounde by ()
+# Website on how to write bash script https://hackernoon.com/know-shell-scripting-202b2fbe03a8
+
+# Set GPU device
+GPU_ID="cuda:1"
+
+# This is the baseline without FL for 16-bit floating point.
+python ./baseline_main_fp16.py --epochs=10 --model=cnn --dataset=cifar --num_classes=10 --gpu=1 --gpu_id=$GPU_ID | tee -a ../logs/terminaloutput_cifar_fp16_baseline.txt &
+
+
+# This is for 1 cluster FL for 16-bit floating point
+python ./federated_main_fp16.py --local_ep=5 --local_bs=50 --frac=0.1 --model=cnn --dataset=cifar --iid=1 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --test_acc=85 --epochs=100 | tee -a ../logs/terminaloutput_cifar_fp16_1c_10ep_ta85.txt &
+
+python ./federated_main_fp16.py --local_ep=5 --local_bs=50 --frac=0.1 --model=cnn --dataset=cifar --iid=1 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --epochs=200 | tee -a ../logs/terminaloutput_cifar_fp16_1c_200ep_ta95.txt &
+
+python ./federated_main_fp16.py --local_ep=5 --local_bs=50 --frac=0.1 --model=cnn --dataset=cifar --iid=1 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --epochs=300 | tee -a ../logs/terminaloutput_cifar_fp16_1c_300ep_ta95.txt &
+
+
+# This is for 2 clusters FL for 16-bit floating point
+python ./federated-hierarchical2_main_fp16.py --local_ep=5 --local_bs=50 --frac=0.1 --Cepochs=10 --model=cnn --dataset=cifar --iid=1 --num_cluster=2 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --epochs=100 --test_acc=85 | tee -a ../logs/terminaloutput_cifar_fp16_2c_100ep_ta85.txt &
+
+python ./federated-hierarchical2_main_fp16.py --local_ep=5 --local_bs=50 --frac=0.1 --Cepochs=10 --model=cnn --dataset=cifar --iid=1 --num_cluster=2 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --epochs=100 | tee -a ../logs/terminaloutput_cifar_fp16_2c_100ep_t95.txt &
+
+
+# This is for 4 clusters FL for 16-bit floating point
+python ./federated-hierarchical4_main_fp16.py --local_ep=5 --local_bs=50 --frac=0.1 --Cepochs=10 --model=cnn --dataset=cifar --iid=1 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --epochs=100 --num_cluster=4 | tee -a ../logs/terminaloutput_cifar_fp16_4c_100ep_t95.txt &
+
+
+# This is for 8 clusters FL for 16-bit floating point
+python ./federated-hierarchical8_main_fp16.py --local_ep=5 --local_bs=50 --Cepochs=10 --model=cnn --dataset=cifar --iid=1 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --epochs=100 --num_cluster=8 | tee -a ../logs/terminaloutput_cifar_fp16_8c_100ep_t95.txt &

+ 37 - 0
src/script_bash_FL_diffFP_mnist_cnn.sh

@@ -0,0 +1,37 @@
+#!/bin/bash
+# Comments line start with a #
+# Commands are surrounde by ()
+# Website on how to write bash script https://hackernoon.com/know-shell-scripting-202b2fbe03a8
+
+# Set GPU device
+GPU_ID="cuda:1"
+
+# This is the baseline without FL for 16-bit floating point.
+python ./baseline_main_fp16.py --epochs=10 --model=cnn --dataset=mnist --num_classes=10 --gpu=1  --gpu_id=$GPU_ID | tee -a ../logs/terminaloutput_mnist_CNN_fp16_baseline.txt &
+
+
+# This is for 1 cluster FL for 16-bit floating point
+python ./federated_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --model=cnn --dataset=mnist --iid=1 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --test_acc=97 --epochs=100 | tee -a ../logs/terminaloutput_mnist_CNN_fp16_1c1.txt &
+
+python ./federated_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --model=cnn --dataset=mnist --iid=0 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --epochs=100 --test_acc=97 | tee -a ../logs/terminaloutput_mnist_CNN_fp16_1c2.txt &
+
+python ./federated_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --model=cnn --dataset=mnist --iid=0 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --epochs=261 --test_acc=97 | tee -a ../logs/terminaloutput_mnist_CNN_fp16_1c3.txt &
+
+
+# This is for 2 clusters FL for 16-bit floating point
+python ./federated-hierarchical2_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=cnn --dataset=mnist --iid=1 --num_cluster=2 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --epochs=100 | tee -a ../logs/terminaloutput_mnist_CNN_fp16_2c1.txt &
+
+python ./federated-hierarchical2_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=cnn --dataset=mnist --iid=0 --num_cluster=2 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --epochs=100 | tee -a ../logs/terminaloutput_mnist_CNN_fp16_2c2.txt &
+
+
+
+
+# This is for 4 clusters FL for 16-bit floating point
+python ./federated-hierarchical4_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=cnn --dataset=mnist --iid=1 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --epochs=100  --num_cluster=4 | tee -a ../logs/terminaloutput_mnist_CNN_fp16_4c1.txt &
+
+python ./federated-hierarchical4_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=cnn --dataset=mnist --iid=0 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --epochs=100  --num_cluster=4 | tee -a ../logs/terminaloutput_mnist_CNN_fp16_4c2.txt &
+
+# This is for 8 clusters FL for 16-bit floating point
+python ./federated-hierarchical8_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=cnn --dataset=mnist --iid=1 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --epochs=30  --num_cluster=8 | tee -a ../logs/terminaloutput_mnist_CNN_fp16_8c1.txt &
+
+

+ 71 - 0
src/script_bash_FL_diffFP_mnist_mlp.sh

@@ -0,0 +1,71 @@
+#!/bin/bash
+# Comments line start with a #
+# Commands are surrounde by ()
+# Website on how to write bash script https://hackernoon.com/know-shell-scripting-202b2fbe03a8
+
+# Set GPU device
+GPU_ID="cuda:1"
+
+# This is the baseline without FL for 16-bit floating point.
+python ./ybaseline_main_fp16.py --epochs=10 --model=mlp --dataset=mnist --num_classes=10 --gpu=1 --gpu_id=$GPU_ID | tee -a ../logs/terminaloutput_mnist_fp16_baseline.txt &
+
+
+# This is for 1 cluster FL for 16-bit floating point
+python ./federated_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --model=mlp --dataset=mnist --iid=1 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --test_acc=95 --mlpdim=200 --epochs=200 | tee -a ../logs/terminaloutput_mnist_fp16_1c.txt &
+
+python ./federated_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --model=mlp --dataset=mnist --iid=0 --gpu=1 --gpu_id=$GPU_ID --lr=0.1 --test_acc=95 --mlpdim=200 --epochs=300 | tee -a ../logs/terminaloutput_mnist_fp16_1c.txt &
+
+python ./federated_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --model=mlp --dataset=mnist --iid=1 --gpu=1 --gpu_id=$GPU_ID --lr=0.1 --test_acc=95 --mlpdim=250 --epochs=200 | tee -a ../logs/terminaloutput_mnist_fp16_1c.txt &
+
+# FL_mnist_mlp_468_C[0.1]_iid[1]_E[1]_B[10]
+python ./federated_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --model=mlp --dataset=mnist --iid=1 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --test_acc=95 --mlpdim=200 --epochs=468 | tee -a ../logs/terminaloutput_mnist_fp16_1c_468epoch.txt &
+
+# FL_mnist_mlp_1196_lr[0.01]_C[0.1]_iid[0]_E[1]_B[10]
+python ./federated_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --model=mlp --dataset=mnist --iid=0 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --test_acc=95 --mlpdim=200 --epochs=1196 | tee -a ../logs/terminaloutput_mnist_fp16_1c_1196epoch.txt &
+
+
+
+# This is for 2 clusters FL for 16-bit floating point
+python ./federated-hierarchical2_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=mlp --dataset=mnist --iid=1 --num_cluster=2 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --mlpdim=200 --epochs=100 --test_acc=94 | tee -a ../logs/terminaloutput_mnist_fp16_2c.txt &
+
+python ./federated-hierarchical2_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=mlp --dataset=mnist --iid=0 --num_cluster=2 --gpu=1 --gpu_id=$GPU_ID --lr=0.05 --mlpdim=200 --epochs=100 --test_acc=94 | tee -a ../logs/terminaloutput_mnist_fp16_2c.txt &
+
+python ./federated-hierarchical2_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=mlp --dataset=mnist --iid=1 --num_cluster=2 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --mlpdim=200 --epochs=100 | tee -a ../logs/terminaloutput_mnist_fp16_2c.txt &
+
+python ./federated-hierarchical2_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=mlp --dataset=mnist --iid=0 --num_cluster=2 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --mlpdim=200 --epochs=100 | tee -a ../logs/terminaloutput_mnist_fp16_2c.txt &
+
+
+
+
+# This is for 4 clusters FL for 16-bit floating point
+python ./federated-hierarchical4_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=mlp --dataset=mnist --iid=1 --num_cluster=4 --gpu=1 --gpu_id=$GPU_ID --lr=0.1 --mlpdim=200 --epochs=100 --test_acc=95 | tee -a ../logs/terminaloutput_mnist_fp16_4c.txt
+
+python ./federated-hierarchical4_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=mlp --dataset=mnist --iid=1 --num_cluster=4 --gpu=1 --gpu_id=$GPU_ID --lr=0.05 --mlpdim=200 --epochs=100 | tee -a ../logs/terminaloutput_mnist_fp16_4c.txt
+
+python ./federated-hierarchical4_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=mlp --dataset=mnist --iid=0 --num_cluster=4 --gpu=1 --gpu_id=$GPU_ID --lr=0.05 --mlpdim=200 --epochs=100 | tee -a ../logs/terminaloutput_mnist_fp16_4c.txt
+
+python ./federated-hierarchical4_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=mlp --dataset=mnist --iid=1 --num_cluster=4 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --mlpdim=200 --epochs=150 | tee -a ../logs/terminaloutput_mnist_fp16_4c.txt
+
+python ./federated-hierarchical4_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=mlp --dataset=mnist --iid=0 --num_cluster=4 --gpu=1 --gpu_id=$GPU_ID --lr=0.05 --mlpdim=200 --epochs=150 | tee -a ../logs/terminaloutput_mnist_fp16_4c.txt
+
+python ./federated-hierarchical4_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=mlp --dataset=mnist --iid=1 --num_cluster=4 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --mlpdim=200 --epochs=150 --optimizer='adam' | tee -a ../logs/terminaloutput_mnist_fp16_4c.txt
+
+python ./federated-hierarchical4_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=mlp --dataset=mnist --iid=1 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --mlpdim=200 --epochs=100 | tee -a ../logs/terminaloutput_mnist_fp16_4c.txt
+
+python ./federated-hierarchical4_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=mlp --dataset=mnist --iid=1 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --mlpdim=200 --epochs=100 | tee -a ../logs/terminaloutput_mnist_fp16_4c.txt
+
+python ./federated-hierarchical4_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=mlp --dataset=mnist --iid=0 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --mlpdim=200 --epochs=100 | tee -a ../logs/terminaloutput_mnist_fp16_4c.txt
+
+python ./federated-hierarchical4_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=mlp --dataset=mnist --iid=1 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --mlpdim=200 --epochs=100  --num_cluster=4 | tee -a ../logs/terminaloutput_mnist_fp16_4c.txt
+
+python ./federated-hierarchical4_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=mlp --dataset=mnist --iid=0 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --mlpdim=200 --epochs=150  --num_cluster=4 | tee -a ../logs/terminaloutput_mnist_fp16_4c.txt
+
+# HFL4_mnist_mlp_30_lr[0.01]_C[0.1]_iid[1]_E[1]_B[10]
+python ./federated-hierarchical4_main_fp16.py --local_ep=1 --local_bs=10 --frac=0.1 --Cepochs=10 --model=mlp --dataset=mnist --iid=1 --num_cluster=4 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --mlpdim=200 --epochs=30 --test_acc=95 | tee -a ../logs/terminaloutput_mnist_fp16_4c_30epoch.txt &
+
+
+# This is for 8 clusters FL for 16-bit floating point
+python ./federated-hierarchical8_main_fp16.py --local_ep=1 --local_bs=10 --Cepochs=10 --model=mlp --dataset=mnist --iid=1 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --mlpdim=200 --epochs=30 --num_cluster=8 --test_acc=95 | tee -a ../logs/terminaloutput_mnist_fp16_8c.txt
+
+python ./federated-hierarchical8_main_fp16.py --local_ep=1 --local_bs=10 --Cepochs=10 --model=mlp --dataset=mnist --iid=0 --gpu=1 --gpu_id=$GPU_ID --lr=0.01 --mlpdim=200 --epochs=30 --num_cluster=8 --test_acc=95 | tee -a ../logs/terminaloutput_mnist_fp16_8c.txt
+

+ 25 - 8
src/update.py

@@ -5,6 +5,8 @@
 import torch
 from torch import nn
 from torch.utils.data import DataLoader, Dataset
+import utils
+#from utils import set_device
 
 
 class DatasetSplit(Dataset):
@@ -21,15 +23,15 @@ class DatasetSplit(Dataset):
     def __getitem__(self, item):
         image, label = self.dataset[self.idxs[item]]
         return torch.tensor(image), torch.tensor(label)
-
-
+       
 class LocalUpdate(object):
     def __init__(self, args, dataset, idxs, logger):
         self.args = args
         self.logger = logger
         self.trainloader, self.validloader, self.testloader = self.train_val_test(
             dataset, list(idxs))
-        self.device = 'cuda' if args.gpu else 'cpu'
+        # Select CPU or GPU
+        self.device = utils.set_device(args)
         # Default criterion set to NLL loss function
         self.criterion = nn.NLLLoss().to(self.device)
 
@@ -51,10 +53,12 @@ class LocalUpdate(object):
                                 batch_size=int(len(idxs_test)/10), shuffle=False)
         return trainloader, validloader, testloader
 
-    def update_weights(self, model, global_round):
+    def update_weights(self, model, global_round, dtype=torch.float32):
         # Set mode to train model
         model.train()
         epoch_loss = []
+        # Set dtype for criterion
+        self.criterion.to(dtype)
 
         # Set optimizer for the local updates
         if self.args.optimizer == 'sgd':
@@ -68,6 +72,8 @@ class LocalUpdate(object):
             batch_loss = []
             for batch_idx, (images, labels) in enumerate(self.trainloader):
                 images, labels = images.to(self.device), labels.to(self.device)
+                images = images.to(dtype)
+                # labels shouldn't be cast to criterion_dtype, and should remain as dtype long
 
                 model.zero_grad()
                 log_probs = model(images)
@@ -86,15 +92,19 @@ class LocalUpdate(object):
 
         return model.state_dict(), sum(epoch_loss) / len(epoch_loss)
 
-    def inference(self, model):
+    def inference(self, model, dtype=torch.float32):
         """ Returns the inference accuracy and loss.
         """
 
         model.eval()
         loss, total, correct = 0.0, 0.0, 0.0
 
+        # Set dtype for criterion
+        self.criterion.to(dtype)
+        
         for batch_idx, (images, labels) in enumerate(self.testloader):
             images, labels = images.to(self.device), labels.to(self.device)
+            images = images.to(dtype)
 
             # Inference
             outputs = model(images)
@@ -111,21 +121,28 @@ class LocalUpdate(object):
         return accuracy, loss
 
 
-def test_inference(args, model, test_dataset):
+def test_inference(args, model, test_dataset, dtype=torch.float32):
     """ Returns the test accuracy and loss.
     """
 
     model.eval()
+    model.to(dtype)
     loss, total, correct = 0.0, 0.0, 0.0
 
-    device = 'cuda' if args.gpu else 'cpu'
+    # Select CPU or GPU
+    device = utils.set_device(args)
+    
     criterion = nn.NLLLoss().to(device)
+    # Set dtype for criterion
+    criterion.to(dtype)
+    
     testloader = DataLoader(test_dataset, batch_size=128,
                             shuffle=False)
 
     for batch_idx, (images, labels) in enumerate(testloader):
         images, labels = images.to(device), labels.to(device)
-
+        images = images.to(dtype)
+            
         # Inference
         outputs = model(images)
         batch_loss = criterion(outputs, labels)

+ 110 - 6
src/utils.py

@@ -4,9 +4,15 @@
 
 import copy
 import torch
+import numpy as np
+from sys import exit
 from torchvision import datasets, transforms
 from sampling import mnist_iid, mnist_noniid, mnist_noniid_unequal
 from sampling import cifar_iid, cifar_noniid
+from models import MLP, CNNMnist, CNNFashion_Mnist, CNNCifar
+import update
+#from update import LocalUpdate, test_inference
+
 
 
 def get_dataset(args):
@@ -46,12 +52,9 @@ def get_dataset(args):
                 print("Dataset: CIFAR10 equal Non-IID")
                 user_groups = cifar_noniid(train_dataset, args.num_users)
 
-    elif args.dataset == 'mnist' or 'fmnist':
-        if args.dataset == 'mnist':
-            data_dir = '../data/mnist/'
-        else:
-            data_dir = '../data/fmnist/'
 
+    elif args.dataset == 'mnist': 
+        data_dir = '../data/mnist/'
         apply_transform = transforms.Compose([
             transforms.ToTensor(),
             transforms.Normalize((0.1307,), (0.3081,))])
@@ -77,7 +80,10 @@ def get_dataset(args):
                 # Chose equal splits for every user
                 print("Dataset: MNIST equal Non-IID")
                 user_groups = mnist_noniid(train_dataset, args.num_users)
-
+                
+    else:
+        exit("No such dataset: " + args.dataset)
+        
     return train_dataset, test_dataset, user_groups
 
 
@@ -109,3 +115,101 @@ def exp_details(args):
     print(f'    Local Batch size   : {args.local_bs}')
     print(f'    Local Epochs       : {args.local_ep}\n')
     return
+
+
+def set_device(args):
+    # Select CPU or GPU
+    if not args.gpu or not torch.cuda.is_available():
+        device=torch.device('cpu')
+    else:
+        # Check that GPU is indeed available
+        device = torch.device(args.gpu_id)
+    
+    return device
+    
+    
+def build_model(args, train_dataset):
+    if args.model == 'cnn':
+        # Convolutional neural network
+        if args.dataset == 'mnist':
+            model = CNNMnist(args=args)
+        elif args.dataset == 'fmnist':
+            model = CNNFashion_Mnist(args=args)
+        elif args.dataset == 'cifar':
+            model = CNNCifar(args=args)
+
+    elif args.model == 'mlp':
+        # Multi-layer preceptron
+        img_size = train_dataset[0][0].shape
+        len_in = 1
+        for x in img_size:
+            len_in *= x
+        model = MLP(dim_in=len_in, dim_hidden=args.mlpdim,
+                               dim_out=args.num_classes)
+    else:
+        exit('Error- unrecognized model: ' + args.model)
+        
+    return model
+    
+    
+def fl_train(args, train_dataset, cluster_global_model, cluster, usergrp, epochs, logger, cluster_dtype=torch.float32):
+    """
+    Defining the training function.
+    """
+    
+    cluster_train_loss, cluster_train_acc = [], []
+    cluster_val_acc_list, cluster_net_list = [], []
+    cluster_cv_loss, cluster_cv_acc = [], []
+    # print_every = 1
+    cluster_val_loss_pre, counter = 0, 0
+
+    for epoch in range(epochs):
+        cluster_local_weights, cluster_local_losses = [], []
+        # print(f'\n | Cluster Training Round : {epoch+1} |\n')
+
+        cluster_global_model.train()
+        # m = max(int(args.frac * len(cluster)), 1)
+        # m = max(int(math.ceil(args.frac * len(cluster))), 1)
+        m = min(int(len(cluster)), 10)
+        # print("=== m ==== ", m)
+        # m = 10
+        idxs_users = np.random.choice(cluster, m, replace=False)
+
+
+        for idx in idxs_users:
+            cluster_local_model = update.LocalUpdate(args=args, dataset=train_dataset, idxs=usergrp[idx], logger=logger)
+            cluster_w, cluster_loss = cluster_local_model.update_weights(model=copy.deepcopy(cluster_global_model), global_round=epoch, dtype=cluster_dtype)
+            cluster_local_weights.append(copy.deepcopy(cluster_w))
+            cluster_local_losses.append(copy.deepcopy(cluster_loss))
+            # print('| Global Round : {} | User : {} | \tLoss: {:.6f}'.format(epoch, idx, cluster_loss))
+
+        # averaging global weights
+        cluster_global_weights = average_weights(cluster_local_weights)
+
+        # update global weights
+        cluster_global_model.load_state_dict(cluster_global_weights)
+
+        cluster_loss_avg = sum(cluster_local_losses) / len(cluster_local_losses)
+        cluster_train_loss.append(cluster_loss_avg)
+
+        # ============== EVAL ============== 
+        # Calculate avg training accuracy over all users at every epoch
+        list_acc, list_loss = [], []
+        cluster_global_model.eval()
+        # C = np.random.choice(cluster, m, replace=False) # random set of clients
+        # print("C: ", C)
+        # for c in C:
+        # for c in range(len(cluster)):  
+        for c in idxs_users:      
+            cluster_local_model = update.LocalUpdate(args=args, dataset=train_dataset, idxs=usergrp[c], logger=logger)
+            # local_model = LocalUpdate(args=args, dataset=train_dataset,idxs=user_groups[idx], logger=logger)
+            acc, loss = cluster_local_model.inference(model=cluster_global_model, dtype=cluster_dtype)
+            list_acc.append(acc)
+            list_loss.append(loss)
+        # cluster_train_acc.append(sum(list_acc)/len(list_acc))
+        # Add
+    # print("Cluster accuracy: ", 100*cluster_train_acc[-1]) 
+    print("Cluster accuracy: ", 100*sum(list_acc)/len(list_acc)) 
+
+    return cluster_global_model, cluster_global_weights, cluster_loss_avg
+

Some files were not shown because too many files changed in this diff