commit f2e5997fe7107f055bbc6a6f9264136370c2b71f
parent 96133a310b582f5770891f5896460b5b3e0df041
Author: Cody Lewis <cody@codymlewis.com>
Date: Tue, 24 Nov 2020 16:27:50 +1100
Added interrupt handling and updated README
Diffstat:
9 files changed, 275 insertions(+), 157 deletions(-)
diff --git a/README.md b/README.md
@@ -2,3 +2,23 @@
A pytorch based implementation of federated learning and demonstration of a new
attack against sybil mitigating FL systems.
+
+## Setup
+```
+pip install -r requirements.txt
+```
+
+## Configuration
+Simply edit `options.json`
+
+## Running
+Run the simulation with:
+```
+python3 main.py
+```
+
+Then once finished, run the following to get plots from an average of the
+simulations:
+```
+python3 plot.py
+```
diff --git a/adversaries.py b/adversaries.py
@@ -1,5 +1,5 @@
"""
-Defines the adversaries within the system
+Defines the adversaries within the system and a function to load them
Author: Cody Lewis
"""
@@ -61,6 +61,7 @@ class OnOff(Client):
def load_adversary(adversary_name):
+ """Load the class of the specified adversary"""
adversaries = {
"label flip": Flipper,
"on off": OnOff,
diff --git a/datasets.py b/datasets.py
@@ -1,3 +1,9 @@
+"""
+Module for dataset classes and a function to load them
+
+Author: Cody Lewis
+"""
+
from math import floor
from abc import abstractmethod
@@ -13,6 +19,7 @@ import errors
class DatasetWrapper(Dataset):
+ """Wrapper class for torch datasets to allow for easy non-iid splitting"""
def __init__(self):
self.targets = torch.tensor([])
self.y_dim = 0
@@ -25,24 +32,28 @@ class DatasetWrapper(Dataset):
pass
def get_dims(self):
+ """Get the x and y dimensions of the dataset"""
if len(self) < 1:
return (0, 0)
x, _ = self[0]
return (x.shape[0], self.y_dim)
def get_idx(self, classes):
+ """Get the ids of data belong to the specified classes"""
return torch.arange(len(self.targets))[
sum([(self.targets == i).long() for i in classes]).bool()
]
def assign_to_classes(self, classes):
+ """Leave only data belonging to the classes within this set"""
idx = self.get_idx(classes)
self.data = self.data[idx]
self.targets = self.targets[idx]
class MNIST(DatasetWrapper):
- def __init__(self, ds_path, train=True, download=False, classes=[]):
+ """The MNIST dataset in torch readable form"""
+ def __init__(self, ds_path, train=True, download=False, classes=None):
super().__init__()
ds = torchvision.datasets.MNIST(
ds_path,
@@ -60,7 +71,8 @@ class MNIST(DatasetWrapper):
class FashionMNIST(DatasetWrapper):
- def __init__(self, ds_path, train=True, download=False, classes=[]):
+ """The Fashion MNIST dataset in torch readable form"""
+ def __init__(self, ds_path, train=True, download=False, classes=None):
super().__init__()
ds = torchvision.datasets.MNIST(
ds_path,
@@ -78,7 +90,9 @@ class FashionMNIST(DatasetWrapper):
class KDD99(DatasetWrapper):
- def __init__(self, ds_path, train=True, download=False, classes=[]):
+ """The KDD Cup99 dataset in torch readable form"""
+ def __init__(self, ds_path, train=True, download=False, classes=None):
+ super().__init__()
self.data = torch.tensor([])
self.targets = torch.tensor([])
df = pd.read_csv(
@@ -107,7 +121,9 @@ class KDD99(DatasetWrapper):
class Amazon(DatasetWrapper):
- def __init__(self, ds_path, train=True, download=False, classes=[]):
+ """The Amazon dataset in torch readable form"""
+ def __init__(self, ds_path, train=True, download=False, classes=None):
+ super().__init__()
df = pd.read_csv(
f"{ds_path}/{'train' if train else 'test'}/amazon.data",
header=None
@@ -124,7 +140,9 @@ class Amazon(DatasetWrapper):
class VGGFace(DatasetWrapper):
- def __init__(self, ds_path, train=True, download=False, classes=[]):
+ """The VGGFace dataset in torch readable form"""
+ def __init__(self, ds_path, train=True, download=False, classes=None):
+ super().__init__()
self.ds_path = f"{ds_path}/data"
self.data_paths = []
self.targets = []
@@ -168,14 +186,15 @@ class VGGFace(DatasetWrapper):
return (X, self.targets[idx].long())
-def load_data(options, train=True, shuffle=True, classes=[]):
+def load_data(options, train=True, shuffle=True, classes=None):
"""
Load the specified dataset in a form suitable for the model
Keyword arguments:
options -- options for the simulation
train -- load the training dataset if true otherwise load the validation
- classes -- use only the classes in list, use all classes if empty list
+ classes -- use only the classes in list, use all classes if empty list or
+ None
"""
datasets = {
"mnist": MNIST,
diff --git a/main.py b/main.py
@@ -29,8 +29,8 @@ def index_match(arr):
def find_shards(num_users, num_classes, classes_per_user):
"""Find data class shards according to the parameters"""
- end_halves = [[i for i in range(num_classes)]
- for _ in range(classes_per_user - 1)]
+ end_halves = [list(range(num_classes))
+ for _ in range(classes_per_user - 1)]
if num_classes / classes_per_user < num_users:
for end_half in end_halves:
while index_match(end_half):
@@ -47,104 +47,171 @@ def find_shards(num_users, num_classes, classes_per_user):
]
-if __name__ == '__main__':
+def run(program_flow, current, run_data):
try:
- options = utils.load_options()
- if options.verbosity > 0:
- print("Options set as:")
- print(options)
- if 'cuda' in (dev_name := options.model_params['device']):
- p = not torch.cuda.is_available()
- c = int(dev_name[dev_name.find(':') + 1:]) + 1
- q = c > torch.cuda.device_count()
- if p or q:
- raise errors.MisconfigurationError(
- f"Device '{dev_name}' is not available on this machine"
- )
- train_data = load_data(
- options,
- train=True,
- shuffle=False,
- )
- val_data = load_data(
- options,
- train=False,
- shuffle=False,
- )
- user_classes = [
- Client if i <= options.users * (
- 1 - options.adversaries['percent_adv'])
- else load_adversary(options.adversaries['type'])
- for i in range(1, options.users + 1)
- ]
- if options.class_shards:
- class_shards = options.class_shards
- else:
- class_shards = find_shards(
- options.users,
- val_data['y_dim'],
- options.classes_per_user
+ program_flow[current](run_data)
+ return run_data
+ except errors.MisconfigurationError as e:
+ print(f"Miconfiguratation Error: {e}")
+ except KeyboardInterrupt:
+ print()
+ decision = input('Are you sure you want to quit? ')
+ if decision.lower().find('y') >= 0:
+ run_data['quit'] = True
+ return run_data
+ return run(program_flow, current, data)
+
+
+def system_setup(run_data):
+ run_data["options"] = utils.load_options()
+ if run_data["options"].verbosity > 0:
+ print("Options set as:")
+ print(run_data["options"])
+ if 'cuda' in (dev_name := run_data["options"].model_params['device']):
+ p = not torch.cuda.is_available()
+ c = int(dev_name[dev_name.find(':') + 1:]) + 1
+ q = c > torch.cuda.device_count()
+ if p or q:
+ raise errors.MisconfigurationError(
+ f"Device '{dev_name}' is not available on this machine"
)
- if options.class_shards is None and options.verbosity > 0:
- print("Assigned class shards:")
- print(class_shards)
- print()
- sim_confusion_matrices = torch.tensor([], dtype=int)
- for i in range(options.num_sims):
- print(f"Simulation {i + 1}/{options.num_sims}")
- server = Server(
- max(train_data['x_dim'], val_data['x_dim']),
- max(train_data['y_dim'], val_data['y_dim']),
- options
+ run_data["train_data"] = load_data(
+ run_data["options"],
+ train=True,
+ shuffle=False,
+ )
+ run_data["val_data"] = load_data(
+ run_data["options"],
+ train=False,
+ shuffle=False,
+ )
+ run_data['sim_number'] = 0
+ return run_data
+
+def setup_users(run_data):
+ run_data["user_classes"] = [
+ Client if i <= run_data["options"].users * (
+ 1 - run_data["options"].adversaries['percent_adv'])
+ else load_adversary(run_data["options"].adversaries['type'])
+ for i in range(1, run_data["options"].users + 1)
+ ]
+ if run_data["options"].class_shards:
+ run_data["class_shards"] = run_data["options"].class_shards
+ else:
+ run_data["class_shards"] = find_shards(
+ run_data["options"].users,
+ run_data["val_data"]['y_dim'],
+ run_data["options"].classes_per_user
+ )
+ if run_data["options"].class_shards is None and \
+ run_data["options"].verbosity > 0:
+ print("Assigned class shards:")
+ print(run_data["class_shards"])
+ print()
+ return run_data
+
+
+def run_simulations(run_data):
+ run_data["sim_confusion_matrices"] = torch.tensor([], dtype=int)
+ for i in range(run_data['sim_number'], run_data["options"].num_sims):
+ print(f"Simulation {i + 1}/{run_data['options'].num_sims}")
+ if not run_data.get('sim_setup'):
+ run_data["server"] = Server(
+ max(
+ run_data["train_data"]['x_dim'],
+ run_data["val_data"]['x_dim']
+ ),
+ max(
+ run_data["train_data"]['y_dim'],
+ run_data["val_data"]['y_dim']
+ ),
+ run_data["options"]
)
- server.add_clients(
+ run_data["server"].add_clients(
[
u(
- options,
- class_shards[i]
- ) for i, u in enumerate(user_classes)
+ run_data["options"],
+ run_data["class_shards"][i]
+ ) for i, u in enumerate(run_data["user_classes"])
]
)
- print("Starting training...")
- confusion_matrices = server.fit(
- val_data['dataloader'], options.server_epochs
- )
- sim_confusion_matrices = torch.cat(
- (sim_confusion_matrices, confusion_matrices.unsqueeze(dim=0))
- )
- if options.verbosity > 0:
- print("Done training.")
- criterion = torch.nn.CrossEntropyLoss()
- loss, conf_mat = utils.gen_confusion_matrix(
- server.net,
- train_data['dataloader'],
- criterion,
- server.nb_classes,
- options
+ run_data['sim_setup'] = True
+ run_data['epoch'] = 0
+ print("Starting training...")
+ for run_data['epoch'] in range(run_data['epoch'],
+ run_data['options'].server_epochs):
+ run_data["server"].fit(
+ run_data["val_data"]['dataloader'],
+ run_data['epoch'],
+ run_data["options"].server_epochs
)
- stats = utils.gen_conf_stats(conf_mat, options)
- loss_val, conf_mat = utils.gen_confusion_matrix(
- server.net,
- val_data['dataloader'],
- criterion,
- server.nb_classes,
- options
- )
- stats_val = utils.gen_conf_stats(conf_mat, options)
- print(f"Loss: t: {loss}, v: {loss_val}")
- print(f"Accuracy: t: {stats['accuracy'] * 100}%, ", end="")
- print(f"v: {stats_val['accuracy'] * 100}%")
- print(
- f"Attack success rate: t: {stats['attack_success'] * 100}%, ",
- end=""
- )
- print(f"v: {stats_val['attack_success'] * 100}%")
+ confusion_matrices = run_data['server'].get_conf_matrices()
+ if run_data["options"].verbosity > 0:
print()
- if options.verbosity > 0:
- print(f"Writing confusion matrices to {options.result_file}...")
- utils.write_results(options.result_file, sim_confusion_matrices)
- # print(utils.gen_experiment_stats(sim_confusion_matrices, options))
- if options.verbosity > 0:
- print("Done.")
- except errors.MisconfigurationError as e:
- print(f"Miconfiguratation Error: {e}")
+ run_data["sim_confusion_matrices"] = torch.cat(
+ (
+ run_data["sim_confusion_matrices"],
+ confusion_matrices.unsqueeze(dim=0)
+ )
+ )
+ print()
+ run_data['sim_setup'] = False
+ run_data['sim_number'] += 1
+ if run_data["options"].verbosity > 0:
+ print("Done training.")
+ criterion = torch.nn.CrossEntropyLoss()
+ loss, conf_mat = utils.gen_confusion_matrix(
+ run_data["server"].net,
+ run_data["train_data"]['dataloader'],
+ criterion,
+ run_data["server"].nb_classes,
+ run_data["options"]
+ )
+ stats = utils.gen_conf_stats(conf_mat, run_data["options"])
+ loss_val, conf_mat = utils.gen_confusion_matrix(
+ run_data["server"].net,
+ run_data["val_data"]['dataloader'],
+ criterion,
+ run_data["server"].nb_classes,
+ run_data["options"]
+ )
+ stats_val = utils.gen_conf_stats(conf_mat, run_data["options"])
+ print(f"Loss: t: {loss}, v: {loss_val}")
+ print(f"Accuracy: t: {stats['accuracy'] * 100}%, ", end="")
+ print(f"v: {stats_val['accuracy'] * 100}%")
+ print(f"MCC: t: {stats['MCC']}, v: {stats_val['MCC']}")
+ print(
+ f"Attack success rate: t: {stats['attack_success'] * 100}%, ",
+ end=""
+ )
+ print(f"v: {stats_val['attack_success'] * 100}%")
+ return run_data
+
+
+def write_results(run_data):
+ if run_data["options"].verbosity > 0:
+ print()
+ print(f"Writing confusion matrices to {run_data['options'].result_file}...")
+ utils.write_results(
+ run_data["options"].result_file,
+ run_data["sim_confusion_matrices"]
+ )
+ if run_data["options"].verbosity > 0:
+ print("Done.")
+ return run_data
+
+
+
+if __name__ == '__main__':
+ program_flow = {
+ "system_setup": system_setup,
+ "setup_users": setup_users,
+ "run_simulations": run_simulations,
+ "write_results": write_results
+ }
+ data = {"quit": False}
+ for k in program_flow.keys():
+ data = run(program_flow, k, data)
+ if data['quit']:
+ print("bye.")
+ break
diff --git a/models.py b/models.py
@@ -1,9 +1,11 @@
"""
-Pytorch implementation of a softmax perceptron
+A model for ML Models and a function to load them
Author: Cody Lewis
"""
+from abc import abstractmethod
+
import torch.nn as nn
import torch.optim as optim
import torchvision
@@ -21,6 +23,10 @@ class Model(nn.Module):
self.lr_changes = params['lr_changes'].copy()
self.epoch_count = 0
+ @abstractmethod
+ def forward(self, *x):
+ pass
+
def fit(self, data, epochs=1, verbose=True):
"""
Fit the model for some epochs, return history of loss values and the
@@ -99,6 +105,7 @@ class SoftMaxModel(Model):
class SqueezeNet(Model):
+ """The SqueezeNet DNN Class"""
def __init__(self, params):
super().__init__(params)
net = torchvision.models.__dict__["squeezenet1_1"](pretrained=True)
@@ -118,6 +125,7 @@ class SqueezeNet(Model):
def load_model(params):
+ """Load the model specified in params"""
models = {
"softmax": SoftMaxModel,
"squeeze": SqueezeNet,
diff --git a/options.json b/options.json
@@ -1,7 +1,7 @@
{
"dataset": "mnist",
"num_sims": 5,
- "server_epochs": 3000,
+ "server_epochs": 5,
"user_epochs": 1,
"users": 10,
"model_params": {
@@ -20,12 +20,12 @@
"adversaries": {
"percent_adv": 0.5,
"type": "on off",
- "from": 4,
- "to": 8,
- "toggle_times": [100, 100],
- "delay": null
+ "from": 5,
+ "to": 0,
+ "toggle_times": [500, 150],
+ "delay": 1000
},
- "class_shards": [[0, 9],[1, 8],[2, 7],[3, 6],[4, 5],[5, 7],[6, 2],[7, 5],[8, 9],[9, 4]],
+ "class_shards": null,
"classes_per_user": 2,
"verbosity": 1,
"result_file": "./results.pt"
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,8 @@
-pytorch
-torchvision
-ggplot
-pandas
+numpy==1.19.4
+torchvision==0.5.0
+torch==1.7.0
+matplotlib==3.3.3
+scipy==1.5.4
+pandas==1.1.3
+Pillow==8.0.1
+scikit_learn==0.23.2
diff --git a/server.py b/server.py
@@ -26,44 +26,43 @@ class Server:
self.clients = []
self.nb_classes = num_out
self.options = options
+ self.confusion_matrices = torch.tensor([], dtype=int)
+ self.criterion = nn.CrossEntropyLoss()
- def fit(self, dataloader, epochs):
- confusion_matrices = torch.tensor([], dtype=int)
- criterion = nn.CrossEntropyLoss()
- for e in range(epochs):
- start = time.time()
- grads = []
- for c in self.clients:
- c.net.copy_params(self.net.get_params())
- grads.append(c.fit()[1])
- self.net.fit(grads, self.options.params)
- loss, confusion_matrix = utils.gen_confusion_matrix(
- self.net,
- dataloader,
- criterion,
- self.nb_classes,
- self.options
- )
- confusion_matrices = torch.cat(
- (confusion_matrices, confusion_matrix.unsqueeze(dim=0))
- )
- stats = utils.gen_conf_stats(confusion_matrix, self.options)
- if self.options.verbosity > 0:
- print(
- f"[ E: {e + 1}/{epochs}, " +
- f"L: {loss:.6f}, " +
- f"Acc: {stats['accuracy']:.6f}, " +
- f"MCC: {stats['MCC']:.6f}, " +
- f"ASR: {stats['attack_success']:.6f}, " +
- f"T: {time.time() - start:.6f}s ]",
- end="\r" if self.options.verbosity < 2 else "\n"
- )
- del grads
+ def fit(self, dataloader, e, epochs):
+ start = time.time()
+ grads = []
+ for c in self.clients:
+ c.net.copy_params(self.net.get_params())
+ grads.append(c.fit()[1])
+ self.net.fit(grads, self.options.params)
+ loss, confusion_matrix = utils.gen_confusion_matrix(
+ self.net,
+ dataloader,
+ self.criterion,
+ self.nb_classes,
+ self.options
+ )
+ self.confusion_matrices = torch.cat(
+ (self.confusion_matrices, confusion_matrix.unsqueeze(dim=0))
+ )
+ stats = utils.gen_conf_stats(confusion_matrix, self.options)
if self.options.verbosity > 0:
- print()
- return confusion_matrices
+ print(
+ f"[ E: {e + 1}/{epochs}, " +
+ f"L: {loss:.6f}, " +
+ f"Acc: {stats['accuracy']:.6f}, " +
+ f"MCC: {stats['MCC']:.6f}, " +
+ f"ASR: {stats['attack_success']:.6f}, " +
+ f"T: {time.time() - start:.6f}s ]",
+ end="\r" if self.options.verbosity < 2 else "\n"
+ )
+ del grads
def add_clients(self, clients):
"""Add clients to the server"""
self.num_clients += len(clients)
self.clients.extend(clients)
+
+ def get_conf_matrices(self):
+ return self.confusion_matrices
diff --git a/utils.py b/utils.py
@@ -15,6 +15,9 @@ import numpy as np
def gen_confusion_matrix(model, dataloader, criterion, nb_classes, options):
+ """
+ Give the loss of the model across the data and Generate a confusion matrix
+ """
with torch.no_grad():
loss = 0
denom = 0
@@ -33,6 +36,7 @@ def gen_confusion_matrix(model, dataloader, criterion, nb_classes, options):
def gen_conf_stats(confusion_matrix, options):
+ """Find some statistics based on the given confusion_matrix"""
accuracy = 0
total = 0
attack_success_n = 0
@@ -54,7 +58,7 @@ def gen_conf_stats(confusion_matrix, options):
attack_success_d += cell
total += cell
class_acc[y][1] += cell
- f = lambda x, y: x / y if y > 0 else 0
+ f = lambda x, y: x / y if y > 0 else 0.
stats = {
"accuracy": f(accuracy, total),
"attack_success": f(attack_success_n, attack_success_d),
@@ -65,7 +69,9 @@ def gen_conf_stats(confusion_matrix, options):
stats[f"accuracy_{i}"] = f(acc[0], acc[1])
return stats
+
def gen_experiment_stats(sim_confusion_matrices, options):
+ """Find the statistics across multiple simulations"""
stats = merge_dicts(
[gen_sim_stats(c, options) for c in sim_confusion_matrices]
)
@@ -75,12 +81,14 @@ def gen_experiment_stats(sim_confusion_matrices, options):
def gen_sim_stats(confusion_matrices, options):
+ """Find the stastics of one simulation"""
return merge_dicts(
[gen_conf_stats(c, options) for c in confusion_matrices]
)
def merge_dicts(dict_list):
+ """Merge two dictionaries"""
merged = {k: [] for k in dict_list[0].keys()}
for d in dict_list:
for k, v in d.items():
@@ -109,16 +117,8 @@ def flatten_params(params, options):
return flat_params
-def write_log(log_file_name, stats):
- accuracies = np.mean(np.array(stats['accuracies']), axis=0)
- attack_successes = np.mean(np.array(stats['attack_successes']), axis=0)
- with open(log_file_name, "w") as f:
- f.write("epoch,accuracy,attack_success\n")
- for i, (a, b) in enumerate(zip(accuracies, attack_successes)):
- f.write(f"{i},{a},{b}\n")
-
-
def write_results(result_file, confusion_matrices):
+ """Write the results of an experiment to a pickle file"""
torch.save(confusion_matrices, result_file)