federated_main.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # Python version: 3.6
  4. import os
  5. import copy
  6. import time
  7. import pickle
  8. import numpy as np
  9. from tqdm import tqdm
  10. import torch
  11. from tensorboardX import SummaryWriter
  12. from options import args_parser
  13. from update import LocalUpdate, test_inference
  14. from models import MLP, CNNMnist, CNNFashion_Mnist, CNNCifar
  15. from utils import get_dataset, average_weights
  16. if __name__ == '__main__':
  17. start_time = time.time()
  18. # define paths
  19. path_project = os.path.abspath('..')
  20. logger = SummaryWriter('../logs')
  21. args = args_parser()
  22. if args.gpu:
  23. torch.cuda.set_device(args.gpu)
  24. device = 'cuda' if args.gpu else 'cpu'
  25. # load dataset and user groups
  26. train_dataset, test_dataset, user_groups = get_dataset(args)
  27. # BUILD MODEL
  28. if args.model == 'cnn':
  29. # Convolutional neural netork
  30. if args.dataset == 'mnist':
  31. global_model = CNNMnist(args=args)
  32. elif args.dataset == 'fmnist':
  33. global_model = CNNFashion_Mnist(args=args)
  34. elif args.dataset == 'cifar':
  35. global_model = CNNCifar(args=args)
  36. elif args.model == 'mlp':
  37. # Multi-layer preceptron
  38. img_size = train_dataset[0][0].shape
  39. len_in = 1
  40. for x in img_size:
  41. len_in *= x
  42. global_model = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes)
  43. else:
  44. exit('Error: unrecognized model')
  45. # Set the model to train and send it to device.
  46. global_model.to(device)
  47. global_model.train()
  48. print(global_model)
  49. # copy weights
  50. global_weights = global_model.state_dict()
  51. # Training
  52. train_loss, train_accuracy = [], []
  53. val_acc_list, net_list = [], []
  54. cv_loss, cv_acc = [], []
  55. print_every = 2
  56. val_loss_pre, counter = 0, 0
  57. for epoch in tqdm(range(args.epochs)):
  58. local_weights, local_losses = [], []
  59. print(f'\n | Global Training Round : {epoch+1} |\n')
  60. global_model.train()
  61. m = max(int(args.frac * args.num_users), 1)
  62. idxs_users = np.random.choice(range(args.num_users), m, replace=False)
  63. for idx in idxs_users:
  64. local_model = LocalUpdate(args=args, dataset=train_dataset,
  65. idxs=user_groups[idx], logger=logger)
  66. w, loss = local_model.update_weights(
  67. model=copy.deepcopy(global_model), global_round=epoch)
  68. local_weights.append(copy.deepcopy(w))
  69. local_losses.append(copy.deepcopy(loss))
  70. # update global weights
  71. global_weights = average_weights(local_weights)
  72. # copy weight to global model
  73. global_model.load_state_dict(global_weights)
  74. loss_avg = sum(local_losses) / len(local_losses)
  75. train_loss.append(loss_avg)
  76. # Calculate avg training accuracy over all users at every epoch
  77. list_acc, list_loss = [], []
  78. global_model.eval()
  79. for c in range(args.num_users):
  80. local_model = LocalUpdate(args=args, dataset=train_dataset,
  81. idxs=user_groups[idx], logger=logger)
  82. acc, loss = local_model.inference(model=global_model)
  83. list_acc.append(acc)
  84. list_loss.append(loss)
  85. train_accuracy.append(sum(list_acc)/len(list_acc))
  86. # print global training loss after every 'i' rounds
  87. if (epoch+1) % print_every == 0:
  88. print(f' \nAvg Training Stats after {epoch+1} global rounds:')
  89. print(f'Training Loss : {np.mean(np.array(train_loss))}')
  90. print('Train Accuracy: {:.2f}% \n'.format(
  91. 100.*(np.mean(np.array(train_accuracy)))))
  92. # Test inference after completion of training
  93. test_acc, test_loss = test_inference(args, global_model, test_dataset)
  94. print(f' \n Results after {args.epochs} global rounds of training:')
  95. print("|---- Avg Train Accuracy: {:.2f}%".format(
  96. 100.*(np.mean(np.array(train_accuracy)))))
  97. print("|---- Test Accuracy: {:.2f}%".format(100*test_acc))
  98. # Saving the objects train_loss and train_accuracy:
  99. file_name = '../save/objects/{}_{}_{}_C[{}]_iid[{}]_E[{}]_B[{}].pkl'.\
  100. format(args.dataset, args.model, args.epochs, args.frac, args.iid,
  101. args.local_ep, args.local_bs)
  102. with open(file_name, 'wb') as f:
  103. pickle.dump([train_loss, train_accuracy], f)
  104. print('\n Total Run Time: {0:0.4f}'.format(time.time()-start_time))
  105. # PLOTTING (optional)
  106. # import matplotlib
  107. # import matplotlib.pyplot as plt
  108. # matplotlib.use('Agg')
  109. # Plot Loss curve
  110. # plt.figure()
  111. # plt.title('Training Loss vs Communication rounds')
  112. # plt.plot(range(len(train_loss)), train_loss, color='r')
  113. # plt.ylabel('Training loss')
  114. # plt.xlabel('Communication Rounds')
  115. # plt.savefig('../save/fed_{}_{}_{}_C[{}]_iid[{}]_E[{}]_B[{}]_loss.png'.format(args.dataset,
  116. # args.model, args.epochs, args.frac, args.iid, args.local_ep, args.local_bs))
  117. #
  118. # # Plot Average Accuracy vs Communication rounds
  119. # plt.figure()
  120. # plt.title('Average Accuracy vs Communication rounds')
  121. # plt.plot(range(len(train_accuracy)), train_accuracy, color='k')
  122. # plt.ylabel('Average Accuracy')
  123. # plt.xlabel('Communication Rounds')
  124. # plt.savefig('../save/fed_{}_{}_{}_C[{}]_iid[{}]_E[{}]_B[{}]_acc.png'.format(args.dataset,
  125. # args.model, args.epochs, args.frac, args.iid, args.local_ep, args.local_bs))