main_fedavg.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # Python version: 3.6
  4. import os
  5. import copy
  6. import time
  7. import pickle
  8. import numpy as np
  9. from tqdm import tqdm
  10. import torch
  11. from tensorboardX import SummaryWrepoch
  12. from options import args_parser
  13. from Update import LocalUpdate
  14. from FedNets import MLP, CNNMnist, CNNFashion_Mnist, CNNCifar
  15. from averaging import average_weights
  16. from utils import get_dataset
  17. if __name__ == '__main__':
  18. start_time = time.time()
  19. # define paths
  20. path_project = os.path.abspath('..')
  21. summary = SummaryWrepoch('local')
  22. args = args_parser()
  23. if args.gpu:
  24. torch.cuda.set_device(args.gpu)
  25. device = 'cuda' if args.gpu else 'cpu'
  26. # load dataset and user groups
  27. train_dataset, test_dataset, user_groups = get_dataset(args)
  28. # BUILD MODEL
  29. if args.model == 'cnn':
  30. # Convolutional neural netork
  31. if args.dataset == 'mnist':
  32. global_model = CNNMnist(args=args)
  33. elif args.dataset == 'fmnist':
  34. global_model = CNNFashion_Mnist(args=args)
  35. elif args.dataset == 'cifar':
  36. global_model = CNNCifar(args=args)
  37. elif args.model == 'mlp':
  38. # Multi-layer preceptron
  39. img_size = train_dataset[0][0].shape
  40. len_in = 1
  41. for x in img_size:
  42. len_in *= x
  43. global_model = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes)
  44. else:
  45. exit('Error: unrecognized model')
  46. # Set the model to train and send it to device.
  47. global_model.to(device)
  48. global_model.train()
  49. print(global_model)
  50. # copy weights
  51. global_weights = global_model.state_dict()
  52. # training
  53. train_loss, train_accuracy = [], []
  54. val_acc_list, net_list = [], []
  55. cv_loss, cv_acc = [], []
  56. print_every = 20
  57. val_loss_pre, counter = 0, 0
  58. for epoch in tqdm(range(args.epochs)):
  59. global_model.train()
  60. local_weights, local_losses = [], []
  61. m = max(int(args.frac * args.num_users), 1)
  62. idxs_users = np.random.choice(range(args.num_users), m, replace=False)
  63. for idx in idxs_users:
  64. local_model = LocalUpdate(args=args, dataset=train_dataset,
  65. idxs=user_groups[idx], logger=summary)
  66. w, loss = local_model.update_weights(net=copy.deepcopy(global_model))
  67. local_weights.append(copy.deepcopy(w))
  68. local_losses.append(copy.deepcopy(loss))
  69. # update global weights
  70. global_weights = average_weights(local_weights)
  71. # copy weight to global model
  72. global_model.load_state_dict(global_weights)
  73. # print loss after every 20 rounds
  74. loss_avg = sum(local_losses) / len(local_losses)
  75. if (epoch+1) % print_every == 0:
  76. print('\nTrain loss:', loss_avg)
  77. train_loss.append(loss_avg)
  78. # Calculate avg training accuracy over all users at every epoch
  79. list_acc, list_loss = [], []
  80. global_model.eval()
  81. for c in range(args.num_users):
  82. local_model = LocalUpdate(args=args, dataset=train_dataset,
  83. idxs=user_groups[idx], logger=summary)
  84. acc, loss = local_model.inference(net=global_model)
  85. list_acc.append(acc)
  86. list_loss.append(loss)
  87. train_accuracy.append(sum(list_acc)/len(list_acc))
  88. # Test inference after completion of training
  89. test_acc, test_loss = [], []
  90. for c in tqdm(range(args.num_users)):
  91. local_model = LocalUpdate(args=args, dataset=test_dataset,
  92. idxs=user_groups[idx], logger=summary)
  93. acc, loss = local_model.test(net=global_model)
  94. test_acc.append(acc)
  95. test_loss.append(loss)
  96. print("Final Average Train Accuracy after {} epochs: {:.2f}%".format(
  97. args.epochs, 100.*train_accuracy[-1]))
  98. print("Final Average Test Accuracy after {} epochs: {:.2f}%".format(
  99. args.epochs, (100.*sum(test_acc)/len(test_acc))))
  100. # # Saving the objects train_loss and train_accuracy:
  101. file_name = '../save/objects/{}_{}_{}_C[{}]_iid[{}]_E[{}]_B[{}].pkl'.format(args.dataset,
  102. args.model, args.epochs, args.frac, args.iid, args.local_ep, args.local_bs)
  103. with open(file_name, 'wb') as f:
  104. pickle.dump([train_loss, train_accuracy], f)
  105. print('Total Time: {0:0.4f}'.format(time.time()-start_time))
  106. # PLOTTING (optional)
  107. # import matplotlib
  108. # import matplotlib.pyplot as plt
  109. # matplotlib.use('Agg')
  110. # Plot Loss curve
  111. # plt.figure()
  112. # plt.title('Training Loss vs Communication rounds')
  113. # plt.plot(range(len(train_loss)), train_loss, color='r')
  114. # plt.ylabel('Training loss')
  115. # plt.xlabel('Communication Rounds')
  116. # plt.savefig('../save/fed_{}_{}_{}_C[{}]_iid[{}]_E[{}]_B[{}]_loss.png'.format(args.dataset,
  117. # args.model, args.epochs, args.frac, args.iid, args.local_ep, args.local_bs))
  118. #
  119. # # Plot Average Accuracy vs Communication rounds
  120. # plt.figure()
  121. # plt.title('Average Accuracy vs Communication rounds')
  122. # plt.plot(range(len(train_accuracy)), train_accuracy, color='k')
  123. # plt.ylabel('Average Accuracy')
  124. # plt.xlabel('Communication Rounds')
  125. # plt.savefig('../save/fed_{}_{}_{}_C[{}]_iid[{}]_E[{}]_B[{}]_acc.png'.format(args.dataset,
  126. # args.model, args.epochs, args.frac, args.iid, args.local_ep, args.local_bs))