|
@@ -29,6 +29,7 @@ def mnist_noniid(dataset, num_users):
|
|
|
:param num_users:
|
|
|
:return:
|
|
|
"""
|
|
|
+ # 60,000 training imgs --> 200 imgs/shard X 300 shards
|
|
|
num_shards, num_imgs = 200, 300
|
|
|
idx_shard = [i for i in range(num_shards)]
|
|
|
dict_users = {i: np.array([]) for i in range(num_users)}
|
|
@@ -40,7 +41,7 @@ def mnist_noniid(dataset, num_users):
|
|
|
idxs_labels = idxs_labels[:, idxs_labels[1, :].argsort()]
|
|
|
idxs = idxs_labels[0, :]
|
|
|
|
|
|
- # divide and assign
|
|
|
+ # divide and assign 2 shards/client
|
|
|
for i in range(num_users):
|
|
|
rand_set = set(np.random.choice(idx_shard, 2, replace=False))
|
|
|
idx_shard = list(set(idx_shard) - rand_set)
|
|
@@ -50,6 +51,86 @@ def mnist_noniid(dataset, num_users):
|
|
|
return dict_users
|
|
|
|
|
|
|
|
|
+def mnist_noniid_unequal(dataset, num_users):
|
|
|
+ """
|
|
|
+ Sample non-I.I.D client data from MNIST dataset s.t clients
|
|
|
+ have unequal amount of data
|
|
|
+ :param dataset:
|
|
|
+ :param num_users:
|
|
|
+ :returns a dict of clients with each clients assigned certain
|
|
|
+ number of training imgs
|
|
|
+ """
|
|
|
+ # 60,000 training imgs --> 50 imgs/shard X 1200 shards
|
|
|
+ num_shards, num_imgs = 1200, 50
|
|
|
+ idx_shard = [i for i in range(num_shards)]
|
|
|
+ dict_users = {i: np.array([]) for i in range(num_users)}
|
|
|
+ idxs = np.arange(num_shards*num_imgs)
|
|
|
+ labels = dataset.train_labels.numpy()
|
|
|
+
|
|
|
+ # sort labels
|
|
|
+ idxs_labels = np.vstack((idxs, labels))
|
|
|
+ idxs_labels = idxs_labels[:, idxs_labels[1, :].argsort()]
|
|
|
+ idxs = idxs_labels[0, :]
|
|
|
+
|
|
|
+ # Minimum and maximum shards assigned per client:
|
|
|
+ min_shard = 1
|
|
|
+ max_shard = 30
|
|
|
+
|
|
|
+ # Divide the shards into random chunks for every client
|
|
|
+ # s.t the sum of these chunks = num_shards
|
|
|
+ random_shard_size = np.random.randint(min_shard, max_shard+1, size=num_users)
|
|
|
+ random_shard_size = np.around(random_shard_size/sum(random_shard_size) * num_shards)
|
|
|
+ random_shard_size = random_shard_size.astype(int)
|
|
|
+
|
|
|
+ # Assign the shards randomly to each client
|
|
|
+ if sum(random_shard_size) > num_shards:
|
|
|
+
|
|
|
+ for i in range(num_users):
|
|
|
+ # First assign each client 1 shard to ensure every client has
|
|
|
+ # atleast one shard of data
|
|
|
+ rand_set = set(np.random.choice(idx_shard, 1, replace=False))
|
|
|
+ idx_shard = list(set(idx_shard) - rand_set)
|
|
|
+ for rand in rand_set:
|
|
|
+ dict_users[i] = np.concatenate(
|
|
|
+ (dict_users[i], idxs[rand*num_imgs:(rand+1)*num_imgs]), axis=0)
|
|
|
+
|
|
|
+ random_shard_size = random_shard_size-1
|
|
|
+
|
|
|
+ # Next, randomly assign the remaining shards
|
|
|
+ for i in range(num_users):
|
|
|
+ if len(idx_shard == 0):
|
|
|
+ continue
|
|
|
+ shard_size = random_shard_size[i]
|
|
|
+ if shard_size > len(idx_shard):
|
|
|
+ shard_size = len(idx_shard)
|
|
|
+ rand_set = set(np.random.choice(idx_shard, shard_size, replace=False))
|
|
|
+ idx_shard = list(set(idx_shard) - rand_set)
|
|
|
+ for rand in rand_set:
|
|
|
+ dict_users[i] = np.concatenate(
|
|
|
+ (dict_users[i], idxs[rand*num_imgs:(rand+1)*num_imgs]), axis=0)
|
|
|
+ else:
|
|
|
+
|
|
|
+ for i in range(num_users):
|
|
|
+ shard_size = random_shard_size[i]
|
|
|
+ rand_set = set(np.random.choice(idx_shard, shard_size, replace=False))
|
|
|
+ idx_shard = list(set(idx_shard) - rand_set)
|
|
|
+ for rand in rand_set:
|
|
|
+ dict_users[i] = np.concatenate(
|
|
|
+ (dict_users[i], idxs[rand*num_imgs:(rand+1)*num_imgs]), axis=0)
|
|
|
+
|
|
|
+ # Add the leftover shards to the client with minimum images:
|
|
|
+ shard_size = len(idx_shard)
|
|
|
+ # Add the remaining shard to the client with lowest data
|
|
|
+ k = min(dict_users, key=lambda x: len(dict_users.get(x)))
|
|
|
+ rand_set = set(np.random.choice(idx_shard, shard_size, replace=False))
|
|
|
+ idx_shard = list(set(idx_shard) - rand_set)
|
|
|
+ for rand in rand_set:
|
|
|
+ dict_users[k] = np.concatenate(
|
|
|
+ (dict_users[k], idxs[rand*num_imgs:(rand+1)*num_imgs]), axis=0)
|
|
|
+
|
|
|
+ return dict_users
|
|
|
+
|
|
|
+
|
|
|
def cifar_iid(dataset, num_users):
|
|
|
"""
|
|
|
Sample I.I.D. client data from CIFAR10 dataset
|
|
@@ -76,7 +157,8 @@ def cifar_noniid(dataset, num_users):
|
|
|
idx_shard = [i for i in range(num_shards)]
|
|
|
dict_users = {i: np.array([]) for i in range(num_users)}
|
|
|
idxs = np.arange(num_shards*num_imgs)
|
|
|
- labels = dataset.train_labels.numpy()
|
|
|
+ # labels = dataset.train_labels.numpy()
|
|
|
+ labels = np.array(dataset.train_labels)
|
|
|
|
|
|
# sort labels
|
|
|
idxs_labels = np.vstack((idxs, labels))
|