viceroy

git clone git://git.codymlewis.com/viceroy.git
Log | Files | Refs | README

commit f2e5997fe7107f055bbc6a6f9264136370c2b71f
parent 96133a310b582f5770891f5896460b5b3e0df041
Author: Cody Lewis <cody@codymlewis.com>
Date:   Tue, 24 Nov 2020 16:27:50 +1100

Added interrupt handling and updated README

Diffstat:
MREADME.md | 20++++++++++++++++++++
Madversaries.py | 3++-
Mdatasets.py | 33++++++++++++++++++++++++++-------
Mmain.py | 255++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
Mmodels.py | 10+++++++++-
Moptions.json | 12++++++------
Mrequirements.txt | 12++++++++----
Mserver.py | 67+++++++++++++++++++++++++++++++++----------------------------------
Mutils.py | 20++++++++++----------
9 files changed, 275 insertions(+), 157 deletions(-)

diff --git a/README.md b/README.md @@ -2,3 +2,23 @@ A pytorch based implementation of federated learning and demonstration of a new attack against sybil mitigating FL systems. + +## Setup +``` +pip install -r requirements.txt +``` + +## Configuration +Simply edit `options.json` + +## Running +Run the simulation with: +``` +python3 main.py +``` + +Then once finished, run the following to get plots from an average of the +simulations: +``` +python3 plot.py +``` diff --git a/adversaries.py b/adversaries.py @@ -1,5 +1,5 @@ """ -Defines the adversaries within the system +Defines the adversaries within the system and a function to load them Author: Cody Lewis """ @@ -61,6 +61,7 @@ class OnOff(Client): def load_adversary(adversary_name): + """Load the class of the specified adversary""" adversaries = { "label flip": Flipper, "on off": OnOff, diff --git a/datasets.py b/datasets.py @@ -1,3 +1,9 @@ +""" +Module for dataset classes and a function to load them + +Author: Cody Lewis +""" + from math import floor from abc import abstractmethod @@ -13,6 +19,7 @@ import errors class DatasetWrapper(Dataset): + """Wrapper class for torch datasets to allow for easy non-iid splitting""" def __init__(self): self.targets = torch.tensor([]) self.y_dim = 0 @@ -25,24 +32,28 @@ class DatasetWrapper(Dataset): pass def get_dims(self): + """Get the x and y dimensions of the dataset""" if len(self) < 1: return (0, 0) x, _ = self[0] return (x.shape[0], self.y_dim) def get_idx(self, classes): + """Get the ids of data belong to the specified classes""" return torch.arange(len(self.targets))[ sum([(self.targets == i).long() for i in classes]).bool() ] def assign_to_classes(self, classes): + """Leave only data belonging to the classes within this set""" idx = self.get_idx(classes) self.data = self.data[idx] self.targets = self.targets[idx] class MNIST(DatasetWrapper): - def __init__(self, ds_path, train=True, download=False, classes=[]): + """The MNIST dataset in torch readable form""" + def __init__(self, ds_path, train=True, download=False, classes=None): super().__init__() ds = torchvision.datasets.MNIST( ds_path, @@ -60,7 +71,8 @@ class MNIST(DatasetWrapper): class FashionMNIST(DatasetWrapper): - def __init__(self, ds_path, train=True, download=False, classes=[]): + """The Fashion MNIST dataset in torch readable form""" + def __init__(self, ds_path, train=True, download=False, classes=None): super().__init__() ds = torchvision.datasets.MNIST( ds_path, @@ -78,7 +90,9 @@ class FashionMNIST(DatasetWrapper): class KDD99(DatasetWrapper): - def __init__(self, ds_path, train=True, download=False, classes=[]): + """The KDD Cup99 dataset in torch readable form""" + def __init__(self, ds_path, train=True, download=False, classes=None): + super().__init__() self.data = torch.tensor([]) self.targets = torch.tensor([]) df = pd.read_csv( @@ -107,7 +121,9 @@ class KDD99(DatasetWrapper): class Amazon(DatasetWrapper): - def __init__(self, ds_path, train=True, download=False, classes=[]): + """The Amazon dataset in torch readable form""" + def __init__(self, ds_path, train=True, download=False, classes=None): + super().__init__() df = pd.read_csv( f"{ds_path}/{'train' if train else 'test'}/amazon.data", header=None @@ -124,7 +140,9 @@ class Amazon(DatasetWrapper): class VGGFace(DatasetWrapper): - def __init__(self, ds_path, train=True, download=False, classes=[]): + """The VGGFace dataset in torch readable form""" + def __init__(self, ds_path, train=True, download=False, classes=None): + super().__init__() self.ds_path = f"{ds_path}/data" self.data_paths = [] self.targets = [] @@ -168,14 +186,15 @@ class VGGFace(DatasetWrapper): return (X, self.targets[idx].long()) -def load_data(options, train=True, shuffle=True, classes=[]): +def load_data(options, train=True, shuffle=True, classes=None): """ Load the specified dataset in a form suitable for the model Keyword arguments: options -- options for the simulation train -- load the training dataset if true otherwise load the validation - classes -- use only the classes in list, use all classes if empty list + classes -- use only the classes in list, use all classes if empty list or + None """ datasets = { "mnist": MNIST, diff --git a/main.py b/main.py @@ -29,8 +29,8 @@ def index_match(arr): def find_shards(num_users, num_classes, classes_per_user): """Find data class shards according to the parameters""" - end_halves = [[i for i in range(num_classes)] - for _ in range(classes_per_user - 1)] + end_halves = [list(range(num_classes)) + for _ in range(classes_per_user - 1)] if num_classes / classes_per_user < num_users: for end_half in end_halves: while index_match(end_half): @@ -47,104 +47,171 @@ def find_shards(num_users, num_classes, classes_per_user): ] -if __name__ == '__main__': +def run(program_flow, current, run_data): try: - options = utils.load_options() - if options.verbosity > 0: - print("Options set as:") - print(options) - if 'cuda' in (dev_name := options.model_params['device']): - p = not torch.cuda.is_available() - c = int(dev_name[dev_name.find(':') + 1:]) + 1 - q = c > torch.cuda.device_count() - if p or q: - raise errors.MisconfigurationError( - f"Device '{dev_name}' is not available on this machine" - ) - train_data = load_data( - options, - train=True, - shuffle=False, - ) - val_data = load_data( - options, - train=False, - shuffle=False, - ) - user_classes = [ - Client if i <= options.users * ( - 1 - options.adversaries['percent_adv']) - else load_adversary(options.adversaries['type']) - for i in range(1, options.users + 1) - ] - if options.class_shards: - class_shards = options.class_shards - else: - class_shards = find_shards( - options.users, - val_data['y_dim'], - options.classes_per_user + program_flow[current](run_data) + return run_data + except errors.MisconfigurationError as e: + print(f"Miconfiguratation Error: {e}") + except KeyboardInterrupt: + print() + decision = input('Are you sure you want to quit? ') + if decision.lower().find('y') >= 0: + run_data['quit'] = True + return run_data + return run(program_flow, current, data) + + +def system_setup(run_data): + run_data["options"] = utils.load_options() + if run_data["options"].verbosity > 0: + print("Options set as:") + print(run_data["options"]) + if 'cuda' in (dev_name := run_data["options"].model_params['device']): + p = not torch.cuda.is_available() + c = int(dev_name[dev_name.find(':') + 1:]) + 1 + q = c > torch.cuda.device_count() + if p or q: + raise errors.MisconfigurationError( + f"Device '{dev_name}' is not available on this machine" ) - if options.class_shards is None and options.verbosity > 0: - print("Assigned class shards:") - print(class_shards) - print() - sim_confusion_matrices = torch.tensor([], dtype=int) - for i in range(options.num_sims): - print(f"Simulation {i + 1}/{options.num_sims}") - server = Server( - max(train_data['x_dim'], val_data['x_dim']), - max(train_data['y_dim'], val_data['y_dim']), - options + run_data["train_data"] = load_data( + run_data["options"], + train=True, + shuffle=False, + ) + run_data["val_data"] = load_data( + run_data["options"], + train=False, + shuffle=False, + ) + run_data['sim_number'] = 0 + return run_data + +def setup_users(run_data): + run_data["user_classes"] = [ + Client if i <= run_data["options"].users * ( + 1 - run_data["options"].adversaries['percent_adv']) + else load_adversary(run_data["options"].adversaries['type']) + for i in range(1, run_data["options"].users + 1) + ] + if run_data["options"].class_shards: + run_data["class_shards"] = run_data["options"].class_shards + else: + run_data["class_shards"] = find_shards( + run_data["options"].users, + run_data["val_data"]['y_dim'], + run_data["options"].classes_per_user + ) + if run_data["options"].class_shards is None and \ + run_data["options"].verbosity > 0: + print("Assigned class shards:") + print(run_data["class_shards"]) + print() + return run_data + + +def run_simulations(run_data): + run_data["sim_confusion_matrices"] = torch.tensor([], dtype=int) + for i in range(run_data['sim_number'], run_data["options"].num_sims): + print(f"Simulation {i + 1}/{run_data['options'].num_sims}") + if not run_data.get('sim_setup'): + run_data["server"] = Server( + max( + run_data["train_data"]['x_dim'], + run_data["val_data"]['x_dim'] + ), + max( + run_data["train_data"]['y_dim'], + run_data["val_data"]['y_dim'] + ), + run_data["options"] ) - server.add_clients( + run_data["server"].add_clients( [ u( - options, - class_shards[i] - ) for i, u in enumerate(user_classes) + run_data["options"], + run_data["class_shards"][i] + ) for i, u in enumerate(run_data["user_classes"]) ] ) - print("Starting training...") - confusion_matrices = server.fit( - val_data['dataloader'], options.server_epochs - ) - sim_confusion_matrices = torch.cat( - (sim_confusion_matrices, confusion_matrices.unsqueeze(dim=0)) - ) - if options.verbosity > 0: - print("Done training.") - criterion = torch.nn.CrossEntropyLoss() - loss, conf_mat = utils.gen_confusion_matrix( - server.net, - train_data['dataloader'], - criterion, - server.nb_classes, - options + run_data['sim_setup'] = True + run_data['epoch'] = 0 + print("Starting training...") + for run_data['epoch'] in range(run_data['epoch'], + run_data['options'].server_epochs): + run_data["server"].fit( + run_data["val_data"]['dataloader'], + run_data['epoch'], + run_data["options"].server_epochs ) - stats = utils.gen_conf_stats(conf_mat, options) - loss_val, conf_mat = utils.gen_confusion_matrix( - server.net, - val_data['dataloader'], - criterion, - server.nb_classes, - options - ) - stats_val = utils.gen_conf_stats(conf_mat, options) - print(f"Loss: t: {loss}, v: {loss_val}") - print(f"Accuracy: t: {stats['accuracy'] * 100}%, ", end="") - print(f"v: {stats_val['accuracy'] * 100}%") - print( - f"Attack success rate: t: {stats['attack_success'] * 100}%, ", - end="" - ) - print(f"v: {stats_val['attack_success'] * 100}%") + confusion_matrices = run_data['server'].get_conf_matrices() + if run_data["options"].verbosity > 0: print() - if options.verbosity > 0: - print(f"Writing confusion matrices to {options.result_file}...") - utils.write_results(options.result_file, sim_confusion_matrices) - # print(utils.gen_experiment_stats(sim_confusion_matrices, options)) - if options.verbosity > 0: - print("Done.") - except errors.MisconfigurationError as e: - print(f"Miconfiguratation Error: {e}") + run_data["sim_confusion_matrices"] = torch.cat( + ( + run_data["sim_confusion_matrices"], + confusion_matrices.unsqueeze(dim=0) + ) + ) + print() + run_data['sim_setup'] = False + run_data['sim_number'] += 1 + if run_data["options"].verbosity > 0: + print("Done training.") + criterion = torch.nn.CrossEntropyLoss() + loss, conf_mat = utils.gen_confusion_matrix( + run_data["server"].net, + run_data["train_data"]['dataloader'], + criterion, + run_data["server"].nb_classes, + run_data["options"] + ) + stats = utils.gen_conf_stats(conf_mat, run_data["options"]) + loss_val, conf_mat = utils.gen_confusion_matrix( + run_data["server"].net, + run_data["val_data"]['dataloader'], + criterion, + run_data["server"].nb_classes, + run_data["options"] + ) + stats_val = utils.gen_conf_stats(conf_mat, run_data["options"]) + print(f"Loss: t: {loss}, v: {loss_val}") + print(f"Accuracy: t: {stats['accuracy'] * 100}%, ", end="") + print(f"v: {stats_val['accuracy'] * 100}%") + print(f"MCC: t: {stats['MCC']}, v: {stats_val['MCC']}") + print( + f"Attack success rate: t: {stats['attack_success'] * 100}%, ", + end="" + ) + print(f"v: {stats_val['attack_success'] * 100}%") + return run_data + + +def write_results(run_data): + if run_data["options"].verbosity > 0: + print() + print(f"Writing confusion matrices to {run_data['options'].result_file}...") + utils.write_results( + run_data["options"].result_file, + run_data["sim_confusion_matrices"] + ) + if run_data["options"].verbosity > 0: + print("Done.") + return run_data + + + +if __name__ == '__main__': + program_flow = { + "system_setup": system_setup, + "setup_users": setup_users, + "run_simulations": run_simulations, + "write_results": write_results + } + data = {"quit": False} + for k in program_flow.keys(): + data = run(program_flow, k, data) + if data['quit']: + print("bye.") + break diff --git a/models.py b/models.py @@ -1,9 +1,11 @@ """ -Pytorch implementation of a softmax perceptron +A model for ML Models and a function to load them Author: Cody Lewis """ +from abc import abstractmethod + import torch.nn as nn import torch.optim as optim import torchvision @@ -21,6 +23,10 @@ class Model(nn.Module): self.lr_changes = params['lr_changes'].copy() self.epoch_count = 0 + @abstractmethod + def forward(self, *x): + pass + def fit(self, data, epochs=1, verbose=True): """ Fit the model for some epochs, return history of loss values and the @@ -99,6 +105,7 @@ class SoftMaxModel(Model): class SqueezeNet(Model): + """The SqueezeNet DNN Class""" def __init__(self, params): super().__init__(params) net = torchvision.models.__dict__["squeezenet1_1"](pretrained=True) @@ -118,6 +125,7 @@ class SqueezeNet(Model): def load_model(params): + """Load the model specified in params""" models = { "softmax": SoftMaxModel, "squeeze": SqueezeNet, diff --git a/options.json b/options.json @@ -1,7 +1,7 @@ { "dataset": "mnist", "num_sims": 5, - "server_epochs": 3000, + "server_epochs": 5, "user_epochs": 1, "users": 10, "model_params": { @@ -20,12 +20,12 @@ "adversaries": { "percent_adv": 0.5, "type": "on off", - "from": 4, - "to": 8, - "toggle_times": [100, 100], - "delay": null + "from": 5, + "to": 0, + "toggle_times": [500, 150], + "delay": 1000 }, - "class_shards": [[0, 9],[1, 8],[2, 7],[3, 6],[4, 5],[5, 7],[6, 2],[7, 5],[8, 9],[9, 4]], + "class_shards": null, "classes_per_user": 2, "verbosity": 1, "result_file": "./results.pt" diff --git a/requirements.txt b/requirements.txt @@ -1,4 +1,8 @@ -pytorch -torchvision -ggplot -pandas +numpy==1.19.4 +torchvision==0.5.0 +torch==1.7.0 +matplotlib==3.3.3 +scipy==1.5.4 +pandas==1.1.3 +Pillow==8.0.1 +scikit_learn==0.23.2 diff --git a/server.py b/server.py @@ -26,44 +26,43 @@ class Server: self.clients = [] self.nb_classes = num_out self.options = options + self.confusion_matrices = torch.tensor([], dtype=int) + self.criterion = nn.CrossEntropyLoss() - def fit(self, dataloader, epochs): - confusion_matrices = torch.tensor([], dtype=int) - criterion = nn.CrossEntropyLoss() - for e in range(epochs): - start = time.time() - grads = [] - for c in self.clients: - c.net.copy_params(self.net.get_params()) - grads.append(c.fit()[1]) - self.net.fit(grads, self.options.params) - loss, confusion_matrix = utils.gen_confusion_matrix( - self.net, - dataloader, - criterion, - self.nb_classes, - self.options - ) - confusion_matrices = torch.cat( - (confusion_matrices, confusion_matrix.unsqueeze(dim=0)) - ) - stats = utils.gen_conf_stats(confusion_matrix, self.options) - if self.options.verbosity > 0: - print( - f"[ E: {e + 1}/{epochs}, " + - f"L: {loss:.6f}, " + - f"Acc: {stats['accuracy']:.6f}, " + - f"MCC: {stats['MCC']:.6f}, " + - f"ASR: {stats['attack_success']:.6f}, " + - f"T: {time.time() - start:.6f}s ]", - end="\r" if self.options.verbosity < 2 else "\n" - ) - del grads + def fit(self, dataloader, e, epochs): + start = time.time() + grads = [] + for c in self.clients: + c.net.copy_params(self.net.get_params()) + grads.append(c.fit()[1]) + self.net.fit(grads, self.options.params) + loss, confusion_matrix = utils.gen_confusion_matrix( + self.net, + dataloader, + self.criterion, + self.nb_classes, + self.options + ) + self.confusion_matrices = torch.cat( + (self.confusion_matrices, confusion_matrix.unsqueeze(dim=0)) + ) + stats = utils.gen_conf_stats(confusion_matrix, self.options) if self.options.verbosity > 0: - print() - return confusion_matrices + print( + f"[ E: {e + 1}/{epochs}, " + + f"L: {loss:.6f}, " + + f"Acc: {stats['accuracy']:.6f}, " + + f"MCC: {stats['MCC']:.6f}, " + + f"ASR: {stats['attack_success']:.6f}, " + + f"T: {time.time() - start:.6f}s ]", + end="\r" if self.options.verbosity < 2 else "\n" + ) + del grads def add_clients(self, clients): """Add clients to the server""" self.num_clients += len(clients) self.clients.extend(clients) + + def get_conf_matrices(self): + return self.confusion_matrices diff --git a/utils.py b/utils.py @@ -15,6 +15,9 @@ import numpy as np def gen_confusion_matrix(model, dataloader, criterion, nb_classes, options): + """ + Give the loss of the model across the data and Generate a confusion matrix + """ with torch.no_grad(): loss = 0 denom = 0 @@ -33,6 +36,7 @@ def gen_confusion_matrix(model, dataloader, criterion, nb_classes, options): def gen_conf_stats(confusion_matrix, options): + """Find some statistics based on the given confusion_matrix""" accuracy = 0 total = 0 attack_success_n = 0 @@ -54,7 +58,7 @@ def gen_conf_stats(confusion_matrix, options): attack_success_d += cell total += cell class_acc[y][1] += cell - f = lambda x, y: x / y if y > 0 else 0 + f = lambda x, y: x / y if y > 0 else 0. stats = { "accuracy": f(accuracy, total), "attack_success": f(attack_success_n, attack_success_d), @@ -65,7 +69,9 @@ def gen_conf_stats(confusion_matrix, options): stats[f"accuracy_{i}"] = f(acc[0], acc[1]) return stats + def gen_experiment_stats(sim_confusion_matrices, options): + """Find the statistics across multiple simulations""" stats = merge_dicts( [gen_sim_stats(c, options) for c in sim_confusion_matrices] ) @@ -75,12 +81,14 @@ def gen_experiment_stats(sim_confusion_matrices, options): def gen_sim_stats(confusion_matrices, options): + """Find the stastics of one simulation""" return merge_dicts( [gen_conf_stats(c, options) for c in confusion_matrices] ) def merge_dicts(dict_list): + """Merge two dictionaries""" merged = {k: [] for k in dict_list[0].keys()} for d in dict_list: for k, v in d.items(): @@ -109,16 +117,8 @@ def flatten_params(params, options): return flat_params -def write_log(log_file_name, stats): - accuracies = np.mean(np.array(stats['accuracies']), axis=0) - attack_successes = np.mean(np.array(stats['attack_successes']), axis=0) - with open(log_file_name, "w") as f: - f.write("epoch,accuracy,attack_success\n") - for i, (a, b) in enumerate(zip(accuracies, attack_successes)): - f.write(f"{i},{a},{b}\n") - - def write_results(result_file, confusion_matrices): + """Write the results of an experiment to a pickle file""" torch.save(confusion_matrices, result_file)