viceroy

git clone git://git.codymlewis.com/viceroy.git
Log | Files | Refs | README

commit a32c34595393986741b42bfc03aae74ef0350c20
parent f2e5997fe7107f055bbc6a6f9264136370c2b71f
Author: Cody Lewis <cody@codymlewis.com>
Date:   Tue, 24 Nov 2020 16:58:22 +1100

Made code a folder setup neater

Diffstat:
Dadversaries.py | 74--------------------------------------------------------------------------
Dclient.py | 27---------------------------
Ddatasets.py | 227-------------------------------------------------------------------------------
Derrors.py | 2--
Dglobal_model.py | 106-------------------------------------------------------------------------------
Mmain.py | 20++++++++++++--------
Dmodels.py | 139-------------------------------------------------------------------------------
Dserver.py | 68--------------------------------------------------------------------
Aserver/__init__.py | 68++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aserver/global_model.py | 106+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ausers/__init__.py | 5+++++
Ausers/adversaries.py | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ausers/client.py | 27+++++++++++++++++++++++++++
Rutils.py -> utils/__init__.py | 0
Autils/datasets.py | 227+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Autils/errors.py | 9+++++++++
Autils/models.py | 139+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
17 files changed, 667 insertions(+), 651 deletions(-)

diff --git a/adversaries.py b/adversaries.py @@ -1,74 +0,0 @@ -""" -Defines the adversaries within the system and a function to load them - -Author: Cody Lewis -""" - - -from itertools import cycle - -from client import Client -from datasets import load_data -import errors - - -class Flipper(Client): - """A simple label-flipping model poisoner""" - def __init__(self, options, classes): - super().__init__(options, classes) - self.shadow_data = load_data(options, [options.adversaries['from']]) - self.shadow_data['dataloader'].dataset.targets[:] = \ - options.adversaries['to'] - self.epochs = 0 - if options.adversaries['delay'] is None: - self.delay_time = 0 - else: - self.delay_time = options.adversaries['delay'] - - def fit(self, verbose=False): - if self.epochs == self.delay_time: - self.data = self.shadow_data - self.epochs += 1 - return super().fit(verbose=verbose) - - -class OnOff(Client): - """ - Label flipping poisoner that switches its attack on and off every few - epochs - """ - def __init__(self, options, classes): - super().__init__(options, classes) - self.shadow_data = load_data(options, [options.adversaries['from']]) - self.shadow_data['dataloader'].dataset.targets[:] = \ - options.adversaries['to'] - self.toggle_time = cycle(self.options.adversaries['toggle_times']) - self.epochs = 0 - if self.options.adversaries['delay'] is None: - self.next_switch = self.epochs + next(self.toggle_time) - else: - self.next_switch = self.epochs + self.options.adversaries['delay'] - next(self.toggle_time) - - def fit(self, verbose=False): - if self.epochs == self.next_switch: - temp = self.data - self.data = self.shadow_data - self.shadow_data = temp - self.next_switch += next(self.toggle_time) - self.epochs += 1 - return super().fit(verbose=verbose) - - -def load_adversary(adversary_name): - """Load the class of the specified adversary""" - adversaries = { - "label flip": Flipper, - "on off": OnOff, - } - if (chosen_adversary := adversaries.get(adversary_name)) is None: - raise errors.MisconfigurationError( - f"Model '{adversary_name}' does not exist, " + - f"possible options: {set(adversaries.keys())}" - ) - return chosen_adversary diff --git a/client.py b/client.py @@ -1,27 +0,0 @@ -""" -Classes and functions for the client networking aspect of federated learning - -Author: Cody Lewis -""" - -from models import load_model -from datasets import load_data - - -class Client: - """Federated learning client""" - def __init__(self, options, classes): - self.data = load_data(options, train=True, classes=classes) - params = options.model_params - params['num_in'] = self.data['x_dim'] - params['num_out'] = self.data['y_dim'] - self.net = load_model(params).to(params['device']) - self.options = options - - def fit(self, verbose=False): - """Fit the client to its own copy of data""" - return self.net.fit( - self.data['dataloader'], - self.options.user_epochs, - verbose=verbose - ) diff --git a/datasets.py b/datasets.py @@ -1,227 +0,0 @@ -""" -Module for dataset classes and a function to load them - -Author: Cody Lewis -""" - -from math import floor -from abc import abstractmethod - -import numpy as np -import torch -import torchvision -from torchvision import transforms -from torch.utils.data.dataset import Dataset -import pandas as pd -from PIL import Image - -import errors - - -class DatasetWrapper(Dataset): - """Wrapper class for torch datasets to allow for easy non-iid splitting""" - def __init__(self): - self.targets = torch.tensor([]) - self.y_dim = 0 - - def __len__(self): - return len(self.targets) - - @abstractmethod - def __getitem__(self, i): - pass - - def get_dims(self): - """Get the x and y dimensions of the dataset""" - if len(self) < 1: - return (0, 0) - x, _ = self[0] - return (x.shape[0], self.y_dim) - - def get_idx(self, classes): - """Get the ids of data belong to the specified classes""" - return torch.arange(len(self.targets))[ - sum([(self.targets == i).long() for i in classes]).bool() - ] - - def assign_to_classes(self, classes): - """Leave only data belonging to the classes within this set""" - idx = self.get_idx(classes) - self.data = self.data[idx] - self.targets = self.targets[idx] - - -class MNIST(DatasetWrapper): - """The MNIST dataset in torch readable form""" - def __init__(self, ds_path, train=True, download=False, classes=None): - super().__init__() - ds = torchvision.datasets.MNIST( - ds_path, - train=train, - download=download - ) - self.data = ds.data.flatten(1).float() - self.targets = ds.targets - self.y_dim = len(self.targets.unique()) - if classes: - self.assign_to_classes(classes) - - def __getitem__(self, i): - return (self.data[i], self.targets[i]) - - -class FashionMNIST(DatasetWrapper): - """The Fashion MNIST dataset in torch readable form""" - def __init__(self, ds_path, train=True, download=False, classes=None): - super().__init__() - ds = torchvision.datasets.MNIST( - ds_path, - train=train, - download=download - ) - self.data = ds.data.flatten(1).float() - self.targets = ds.targets - self.y_dim = len(self.targets.unique()) - if classes: - self.assign_to_classes(classes) - - def __getitem__(self, i): - return (self.data[i], self.targets[i]) - - -class KDD99(DatasetWrapper): - """The KDD Cup99 dataset in torch readable form""" - def __init__(self, ds_path, train=True, download=False, classes=None): - super().__init__() - self.data = torch.tensor([]) - self.targets = torch.tensor([]) - df = pd.read_csv( - f"{ds_path}/{'train' if train else 'test'}/kddcup.data", - header=None, - iterator=True - ) - nl = 0 - data_len = round(494021 * (0.7 if train else 0.3)) - read_amount = 100_000 - marker = floor(data_len / read_amount) * read_amount - while read_amount > 0 and (nl := nl + read_amount) <= marker: - line = df.read(read_amount) - line = torch.from_numpy(line.to_numpy(np.dtype('float32'))) - self.data = torch.cat((self.data, line[:, 1:-1])) - self.targets = torch.cat((self.targets, line[:, -1])) - if nl == marker: - marker = data_len - read_amount = data_len % read_amount - self.y_dim = len(self.targets.unique()) - if classes: - self.assign_to_classes(classes) - - def __getitem__(self, i): - return (self.data[i], self.targets[i].long()) - - -class Amazon(DatasetWrapper): - """The Amazon dataset in torch readable form""" - def __init__(self, ds_path, train=True, download=False, classes=None): - super().__init__() - df = pd.read_csv( - f"{ds_path}/{'train' if train else 'test'}/amazon.data", - header=None - ) - data = df.to_numpy(np.dtype('float32')) - self.data = torch.from_numpy(data[:, :-1]) - self.targets = torch.from_numpy(data[:, -1]) - self.y_dim = len(self.targets.unique()) - if classes: - self.assign_to_classes(classes) - - def __getitem__(self, i): - return (self.data[i], self.targets[i].long()) - - -class VGGFace(DatasetWrapper): - """The VGGFace dataset in torch readable form""" - def __init__(self, ds_path, train=True, download=False, classes=None): - super().__init__() - self.ds_path = f"{ds_path}/data" - self.data_paths = [] - self.targets = [] - normalize = transforms.Normalize( - mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225] - ) - self.train = train - if train: - self.transform = transforms.Compose([ - transforms.Resize(256), - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ]) - else: - self.transform = transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize, - ]) - file_info = pd.read_csv(f"{ds_path}/top10_files.csv") - unique_classes = set() - for _, r in file_info[file_info['train_flag'] == int(not train)].iterrows(): - if r['Class_ID'] not in unique_classes: - unique_classes = unique_classes.union({r['Class_ID']}) - if not classes or r['Class_ID'] in classes: - self.data_paths.append(f"{self.ds_path}/{r['Class_ID']}/{r['file']}") - self.targets.append(r['Class_ID']) - self.y_dim = len(unique_classes) - self.data_paths = np.array(self.data_paths) - self.targets = torch.tensor(self.targets) - - def __getitem__(self, idx): - if torch.is_tensor(idx): - idx = idx.tolist() - X = Image.open(self.data_paths[idx]) - X = self.transform(X) - return (X, self.targets[idx].long()) - - -def load_data(options, train=True, shuffle=True, classes=None): - """ - Load the specified dataset in a form suitable for the model - - Keyword arguments: - options -- options for the simulation - train -- load the training dataset if true otherwise load the validation - classes -- use only the classes in list, use all classes if empty list or - None - """ - datasets = { - "mnist": MNIST, - "fmnist": FashionMNIST, - "kddcup99": KDD99, - "amazon": Amazon, - "vggface": VGGFace, - } - if (chosen_set := datasets.get(options.dataset)) is None: - raise errors.MisconfigurationError( - f"Dataset '{options.dataset}' does not exist, " + - f"possible options: {set(datasets.keys())}" - ) - data = chosen_set( - f"./data/{options.dataset}", - train=train, - download=True, - classes=classes - ) - x_dim, y_dim = data.get_dims() - return { - "dataloader": torch.utils.data.DataLoader( - data, - batch_size=options.model_params['batch_size'], - shuffle=shuffle, - pin_memory=True, - ), - "x_dim": x_dim, - "y_dim": y_dim, - } diff --git a/errors.py b/errors.py @@ -1,2 +0,0 @@ -class MisconfigurationError(Exception): - pass diff --git a/global_model.py b/global_model.py @@ -1,106 +0,0 @@ -""" -Classes and functions for a global model for use within federated learning - -Author: Cody Lewis -""" - -import torch - -from models import load_model -import utils - - -class GlobalModel: - """The central global model for use within federated learning""" - def __init__(self, num_in, num_out, options): - self.params = options.model_params - self.params['num_in'] = num_in - self.params['num_out'] = num_out - self.net = load_model(self.params).to(self.params['device']) - self.histories = dict() - self.fit_fun = { - "federated averaging": fed_avg, - "foolsgold": foolsgold - }[options.fit_fun] - - def fit(self, grads, params): - """Fit the model to some client gradients""" - self.fit_fun(self, grads, params) - - def predict(self, x): - """Predict the classes of the data x""" - return self.net(x) - - def get_params(self): - """Get the tensor form parameters of this model""" - return self.net.get_params() - - -def fed_avg(net, grads, params): - """Perform federated averaging across the client gradients""" - num_clients = len(grads) - total_dc = sum([grads[i]["data_count"] for i in range(num_clients)]) - for k, p in enumerate(net.net.parameters()): - for i in range(num_clients): - with torch.no_grad(): - p.data.add_( - (grads[i]["data_count"] / total_dc) * - grads[i]["params"][k] - ) - - -def find_feature_importance(net): - """Get a vector indicating the importance of features in the network""" - with torch.no_grad(): - w_t = utils.flatten_params(net.get_params(), net.params) - return abs(w_t - w_t.mean()) / sum(abs(w_t)) - - -def foolsgold(net, grads, params): - """Perform FoolsGold learning across the client gradients""" - with torch.no_grad(): - flat_grads = utils.flatten_grads(grads, net.params) - num_clients = len(grads) - cs = torch.tensor( - [[0 for _ in range(num_clients)] for _ in range(num_clients)], - dtype=torch.float32 - ) - v = torch.tensor([0 for _ in range(num_clients)], dtype=torch.float32) - alpha = torch.tensor([0 for _ in range(num_clients)], dtype=torch.float32) - if len(net.histories) < num_clients: - while len(net.histories) < num_clients: - net.histories[len(net.histories)] = flat_grads[len(net.histories)] - else: - for i in range(num_clients): - net.histories[i] += flat_grads[i] - if params['importance']: - feature_importance = find_feature_importance(net) - else: - feature_importance = torch.tensor([1]).to(net.params['device']) - for i in range(num_clients): - for j in {x for x in range(num_clients)} - {i}: - cs[i][j] = torch.cosine_similarity( - net.histories[i] * feature_importance, - net.histories[j] * feature_importance, - dim=0 - ) - v[i] = max(cs[i]) - del feature_importance - for i in range(num_clients): - for j in range(num_clients): - if (v[j] > v[i]) and (v[j] != 0): - cs[i][j] *= v[i] / v[j] - alpha[i] = 1 - max(cs[i]) - alpha = alpha / max(alpha) - ids = alpha != 1 - alpha[ids] = params['kappa'] * ( - torch.log(alpha[ids] / (1 - alpha[ids])) + 0.5) - alpha[alpha > 1] = 1 - alpha[alpha < 0] = 0 - alpha_sum = alpha.sum() - for k, p in enumerate(net.net.parameters()): - for i in range(num_clients): - p.data.add_( - (alpha[i] / alpha_sum) * - grads[i]['params'][k] - ) diff --git a/main.py b/main.py @@ -9,14 +9,13 @@ Author: Cody Lewis import random import torch -import numpy as np -from adversaries import load_adversary -from client import Client -import errors +from users.adversaries import load_adversary +from users.client import Client +import utils.errors from server import Server import utils -from datasets import load_data +from utils.datasets import load_data def index_match(arr): @@ -48,10 +47,11 @@ def find_shards(num_users, num_classes, classes_per_user): def run(program_flow, current, run_data): + """Run a part of the program""" try: program_flow[current](run_data) return run_data - except errors.MisconfigurationError as e: + except utils.errors.MisconfigurationError as e: print(f"Miconfiguratation Error: {e}") except KeyboardInterrupt: print() @@ -63,6 +63,7 @@ def run(program_flow, current, run_data): def system_setup(run_data): + """Setup the system""" run_data["options"] = utils.load_options() if run_data["options"].verbosity > 0: print("Options set as:") @@ -72,7 +73,7 @@ def system_setup(run_data): c = int(dev_name[dev_name.find(':') + 1:]) + 1 q = c > torch.cuda.device_count() if p or q: - raise errors.MisconfigurationError( + raise utils.errors.MisconfigurationError( f"Device '{dev_name}' is not available on this machine" ) run_data["train_data"] = load_data( @@ -88,7 +89,9 @@ def system_setup(run_data): run_data['sim_number'] = 0 return run_data + def setup_users(run_data): + """Setup the users/clients for the system""" run_data["user_classes"] = [ Client if i <= run_data["options"].users * ( 1 - run_data["options"].adversaries['percent_adv']) @@ -112,6 +115,7 @@ def setup_users(run_data): def run_simulations(run_data): + """Run the simulations""" run_data["sim_confusion_matrices"] = torch.tensor([], dtype=int) for i in range(run_data['sim_number'], run_data["options"].num_sims): print(f"Simulation {i + 1}/{run_data['options'].num_sims}") @@ -189,6 +193,7 @@ def run_simulations(run_data): def write_results(run_data): + """Write all of the recorded results from the experiments""" if run_data["options"].verbosity > 0: print() print(f"Writing confusion matrices to {run_data['options'].result_file}...") @@ -201,7 +206,6 @@ def write_results(run_data): return run_data - if __name__ == '__main__': program_flow = { "system_setup": system_setup, diff --git a/models.py b/models.py @@ -1,139 +0,0 @@ -""" -A model for ML Models and a function to load them - -Author: Cody Lewis -""" - -from abc import abstractmethod - -import torch.nn as nn -import torch.optim as optim -import torchvision - -import errors - - -class Model(nn.Module): - def __init__(self, params): - super().__init__() - self.params = params - self.lr = params['learning_rate'][0] - self.learning_rates = params['learning_rate'].copy() - del self.learning_rates[0] - self.lr_changes = params['lr_changes'].copy() - self.epoch_count = 0 - - @abstractmethod - def forward(self, *x): - pass - - def fit(self, data, epochs=1, verbose=True): - """ - Fit the model for some epochs, return history of loss values and the - gradients of the changed parameters - - Keyword arguments: - x -- training data - y -- training labels - epochs -- number of epochs to train for - verbose -- output training stats if True - """ - optimizer = optim.SGD( - self.parameters(), - lr=self.lr, - momentum=0.9, - weight_decay=0.0001 - ) - criterion = nn.CrossEntropyLoss() - data_count = 0 - for i in range(epochs): - optimizer.zero_grad() - x, y = next(iter(data)) - x = x.to(self.params['device']) - y = y.to(self.params['device']) - output = self(x) - loss = criterion(output, y) - if verbose: - print( - f"Epoch {i + 1}/{epochs} loss: {loss}", - end="\r" - ) - loss.backward() - optimizer.step() - data_count += len(y) - self.epoch_count += 1 - if self.lr_changes and self.epoch_count > self.lr_changes[0]: - self.lr = self.learning_rates[0] - del self.learning_rates[0] - del self.lr_changes[0] - if verbose: - print() - return loss, { - "params": [-self.lr * p.grad for p in self.parameters()], - "data_count": data_count - } - - def get_params(self): - """Get the tensor form parameters of this model""" - return [p.data for p in self.parameters()] - - def copy_params(self, params): - """Copy input parameters into self""" - for p, t in zip(params, self.parameters()): - t.data.copy_(p) - - -class SoftMaxModel(Model): - """The softmax perceptron class""" - def __init__(self, params): - super().__init__(params) - self.features = nn.ModuleList([ - nn.Linear( - params['num_in'], params['num_in'] * params['params_mul'] - ), - nn.Sigmoid(), - nn.Linear( - params['num_in'] * params['params_mul'], params['num_out'] - ), - nn.Softmax(dim=1) - ]).eval() - - def forward(self, x): - for feature in self.features: - x = feature(x) - return x - - -class SqueezeNet(Model): - """The SqueezeNet DNN Class""" - def __init__(self, params): - super().__init__(params) - net = torchvision.models.__dict__["squeezenet1_1"](pretrained=True) - net.classifier[1] = nn.Conv2d( - 512, params['num_out'], kernel_size=(1, 1), stride=(1, 1) - ) - self.features = nn.ModuleList( - [f for f in net.features] + - [f for f in net.classifier] - ).eval() - super().copy_params([p.data for p in net.parameters()]) - - def forward(self, x): - for feature in self.features: - x = feature(x) - return x.flatten(1) - - -def load_model(params): - """Load the model specified in params""" - models = { - "softmax": SoftMaxModel, - "squeeze": SqueezeNet, - } - model_name = params['architecture'] - if (chosen_model := models.get(model_name)) is None: - raise errors.MisconfigurationError( - f"Model '{model_name}' does not exist, " + - f"possible options: {set(models.keys())}" - ) - return chosen_model(params) diff --git a/server.py b/server.py @@ -1,68 +0,0 @@ -""" -Classes and functions for the server networking aspect of federated learning - -Author: Cody Lewis -""" - - -import time - -import torch -import torch.nn as nn - -from global_model import GlobalModel -import utils - - -class Server: - """Federated learning server class""" - def __init__(self, num_in, num_out, options): - self.net = GlobalModel( - num_in, - num_out, - options, - ) - self.num_clients = 0 - self.clients = [] - self.nb_classes = num_out - self.options = options - self.confusion_matrices = torch.tensor([], dtype=int) - self.criterion = nn.CrossEntropyLoss() - - def fit(self, dataloader, e, epochs): - start = time.time() - grads = [] - for c in self.clients: - c.net.copy_params(self.net.get_params()) - grads.append(c.fit()[1]) - self.net.fit(grads, self.options.params) - loss, confusion_matrix = utils.gen_confusion_matrix( - self.net, - dataloader, - self.criterion, - self.nb_classes, - self.options - ) - self.confusion_matrices = torch.cat( - (self.confusion_matrices, confusion_matrix.unsqueeze(dim=0)) - ) - stats = utils.gen_conf_stats(confusion_matrix, self.options) - if self.options.verbosity > 0: - print( - f"[ E: {e + 1}/{epochs}, " + - f"L: {loss:.6f}, " + - f"Acc: {stats['accuracy']:.6f}, " + - f"MCC: {stats['MCC']:.6f}, " + - f"ASR: {stats['attack_success']:.6f}, " + - f"T: {time.time() - start:.6f}s ]", - end="\r" if self.options.verbosity < 2 else "\n" - ) - del grads - - def add_clients(self, clients): - """Add clients to the server""" - self.num_clients += len(clients) - self.clients.extend(clients) - - def get_conf_matrices(self): - return self.confusion_matrices diff --git a/server/__init__.py b/server/__init__.py @@ -0,0 +1,68 @@ +""" +Classes and functions for the server networking aspect of federated learning + +Author: Cody Lewis +""" + + +import time + +import torch +import torch.nn as nn + +from server.global_model import GlobalModel +import utils + + +class Server: + """Federated learning server class""" + def __init__(self, num_in, num_out, options): + self.net = GlobalModel( + num_in, + num_out, + options, + ) + self.num_clients = 0 + self.clients = [] + self.nb_classes = num_out + self.options = options + self.confusion_matrices = torch.tensor([], dtype=int) + self.criterion = nn.CrossEntropyLoss() + + def fit(self, dataloader, e, epochs): + start = time.time() + grads = [] + for c in self.clients: + c.net.copy_params(self.net.get_params()) + grads.append(c.fit()[1]) + self.net.fit(grads, self.options.params) + loss, confusion_matrix = utils.gen_confusion_matrix( + self.net, + dataloader, + self.criterion, + self.nb_classes, + self.options + ) + self.confusion_matrices = torch.cat( + (self.confusion_matrices, confusion_matrix.unsqueeze(dim=0)) + ) + stats = utils.gen_conf_stats(confusion_matrix, self.options) + if self.options.verbosity > 0: + print( + f"[ E: {e + 1}/{epochs}, " + + f"L: {loss:.6f}, " + + f"Acc: {stats['accuracy']:.6f}, " + + f"MCC: {stats['MCC']:.6f}, " + + f"ASR: {stats['attack_success']:.6f}, " + + f"T: {time.time() - start:.6f}s ]", + end="\r" if self.options.verbosity < 2 else "\n" + ) + del grads + + def add_clients(self, clients): + """Add clients to the server""" + self.num_clients += len(clients) + self.clients.extend(clients) + + def get_conf_matrices(self): + return self.confusion_matrices diff --git a/server/global_model.py b/server/global_model.py @@ -0,0 +1,106 @@ +""" +Classes and functions for a global model for use within federated learning + +Author: Cody Lewis +""" + +import torch + +from utils.models import load_model +import utils + + +class GlobalModel: + """The central global model for use within federated learning""" + def __init__(self, num_in, num_out, options): + self.params = options.model_params + self.params['num_in'] = num_in + self.params['num_out'] = num_out + self.net = load_model(self.params).to(self.params['device']) + self.histories = dict() + self.fit_fun = { + "federated averaging": fed_avg, + "foolsgold": foolsgold + }[options.fit_fun] + + def fit(self, grads, params): + """Fit the model to some client gradients""" + self.fit_fun(self, grads, params) + + def predict(self, x): + """Predict the classes of the data x""" + return self.net(x) + + def get_params(self): + """Get the tensor form parameters of this model""" + return self.net.get_params() + + +def fed_avg(net, grads, params): + """Perform federated averaging across the client gradients""" + num_clients = len(grads) + total_dc = sum([grads[i]["data_count"] for i in range(num_clients)]) + for k, p in enumerate(net.net.parameters()): + for i in range(num_clients): + with torch.no_grad(): + p.data.add_( + (grads[i]["data_count"] / total_dc) * + grads[i]["params"][k] + ) + + +def find_feature_importance(net): + """Get a vector indicating the importance of features in the network""" + with torch.no_grad(): + w_t = utils.flatten_params(net.get_params(), net.params) + return abs(w_t - w_t.mean()) / sum(abs(w_t)) + + +def foolsgold(net, grads, params): + """Perform FoolsGold learning across the client gradients""" + with torch.no_grad(): + flat_grads = utils.flatten_grads(grads, net.params) + num_clients = len(grads) + cs = torch.tensor( + [[0 for _ in range(num_clients)] for _ in range(num_clients)], + dtype=torch.float32 + ) + v = torch.tensor([0 for _ in range(num_clients)], dtype=torch.float32) + alpha = torch.tensor([0 for _ in range(num_clients)], dtype=torch.float32) + if len(net.histories) < num_clients: + while len(net.histories) < num_clients: + net.histories[len(net.histories)] = flat_grads[len(net.histories)] + else: + for i in range(num_clients): + net.histories[i] += flat_grads[i] + if params['importance']: + feature_importance = find_feature_importance(net) + else: + feature_importance = torch.tensor([1]).to(net.params['device']) + for i in range(num_clients): + for j in {x for x in range(num_clients)} - {i}: + cs[i][j] = torch.cosine_similarity( + net.histories[i] * feature_importance, + net.histories[j] * feature_importance, + dim=0 + ) + v[i] = max(cs[i]) + del feature_importance + for i in range(num_clients): + for j in range(num_clients): + if (v[j] > v[i]) and (v[j] != 0): + cs[i][j] *= v[i] / v[j] + alpha[i] = 1 - max(cs[i]) + alpha = alpha / max(alpha) + ids = alpha != 1 + alpha[ids] = params['kappa'] * ( + torch.log(alpha[ids] / (1 - alpha[ids])) + 0.5) + alpha[alpha > 1] = 1 + alpha[alpha < 0] = 0 + alpha_sum = alpha.sum() + for k, p in enumerate(net.net.parameters()): + for i in range(num_clients): + p.data.add_( + (alpha[i] / alpha_sum) * + grads[i]['params'][k] + ) diff --git a/users/__init__.py b/users/__init__.py @@ -0,0 +1,5 @@ +""" +Module for various types of users within the federated learning system + +Author: Cody Lewis +""" diff --git a/users/adversaries.py b/users/adversaries.py @@ -0,0 +1,74 @@ +""" +Defines the adversaries within the system and a function to load them + +Author: Cody Lewis +""" + + +from itertools import cycle + +from users.client import Client +from utils.datasets import load_data +import utils.errors + + +class Flipper(Client): + """A simple label-flipping model poisoner""" + def __init__(self, options, classes): + super().__init__(options, classes) + self.shadow_data = load_data(options, [options.adversaries['from']]) + self.shadow_data['dataloader'].dataset.targets[:] = \ + options.adversaries['to'] + self.epochs = 0 + if options.adversaries['delay'] is None: + self.delay_time = 0 + else: + self.delay_time = options.adversaries['delay'] + + def fit(self, verbose=False): + if self.epochs == self.delay_time: + self.data = self.shadow_data + self.epochs += 1 + return super().fit(verbose=verbose) + + +class OnOff(Client): + """ + Label flipping poisoner that switches its attack on and off every few + epochs + """ + def __init__(self, options, classes): + super().__init__(options, classes) + self.shadow_data = load_data(options, [options.adversaries['from']]) + self.shadow_data['dataloader'].dataset.targets[:] = \ + options.adversaries['to'] + self.toggle_time = cycle(self.options.adversaries['toggle_times']) + self.epochs = 0 + if self.options.adversaries['delay'] is None: + self.next_switch = self.epochs + next(self.toggle_time) + else: + self.next_switch = self.epochs + self.options.adversaries['delay'] + next(self.toggle_time) + + def fit(self, verbose=False): + if self.epochs == self.next_switch: + temp = self.data + self.data = self.shadow_data + self.shadow_data = temp + self.next_switch += next(self.toggle_time) + self.epochs += 1 + return super().fit(verbose=verbose) + + +def load_adversary(adversary_name): + """Load the class of the specified adversary""" + adversaries = { + "label flip": Flipper, + "on off": OnOff, + } + if (chosen_adversary := adversaries.get(adversary_name)) is None: + raise utils.errors.MisconfigurationError( + f"Model '{adversary_name}' does not exist, " + + f"possible options: {set(adversaries.keys())}" + ) + return chosen_adversary diff --git a/users/client.py b/users/client.py @@ -0,0 +1,27 @@ +""" +Classes and functions for the client networking aspect of federated learning + +Author: Cody Lewis +""" + +from utils.models import load_model +from utils.datasets import load_data + + +class Client: + """Federated learning client""" + def __init__(self, options, classes): + self.data = load_data(options, train=True, classes=classes) + params = options.model_params + params['num_in'] = self.data['x_dim'] + params['num_out'] = self.data['y_dim'] + self.net = load_model(params).to(params['device']) + self.options = options + + def fit(self, verbose=False): + """Fit the client to its own copy of data""" + return self.net.fit( + self.data['dataloader'], + self.options.user_epochs, + verbose=verbose + ) diff --git a/utils.py b/utils/__init__.py diff --git a/utils/datasets.py b/utils/datasets.py @@ -0,0 +1,227 @@ +""" +Module for dataset classes and a function to load them + +Author: Cody Lewis +""" + +from math import floor +from abc import abstractmethod + +import numpy as np +import torch +import torchvision +from torchvision import transforms +from torch.utils.data.dataset import Dataset +import pandas as pd +from PIL import Image + +import utils.errors + + +class DatasetWrapper(Dataset): + """Wrapper class for torch datasets to allow for easy non-iid splitting""" + def __init__(self): + self.targets = torch.tensor([]) + self.y_dim = 0 + + def __len__(self): + return len(self.targets) + + @abstractmethod + def __getitem__(self, i): + pass + + def get_dims(self): + """Get the x and y dimensions of the dataset""" + if len(self) < 1: + return (0, 0) + x, _ = self[0] + return (x.shape[0], self.y_dim) + + def get_idx(self, classes): + """Get the ids of data belong to the specified classes""" + return torch.arange(len(self.targets))[ + sum([(self.targets == i).long() for i in classes]).bool() + ] + + def assign_to_classes(self, classes): + """Leave only data belonging to the classes within this set""" + idx = self.get_idx(classes) + self.data = self.data[idx] + self.targets = self.targets[idx] + + +class MNIST(DatasetWrapper): + """The MNIST dataset in torch readable form""" + def __init__(self, ds_path, train=True, download=False, classes=None): + super().__init__() + ds = torchvision.datasets.MNIST( + ds_path, + train=train, + download=download + ) + self.data = ds.data.flatten(1).float() + self.targets = ds.targets + self.y_dim = len(self.targets.unique()) + if classes: + self.assign_to_classes(classes) + + def __getitem__(self, i): + return (self.data[i], self.targets[i]) + + +class FashionMNIST(DatasetWrapper): + """The Fashion MNIST dataset in torch readable form""" + def __init__(self, ds_path, train=True, download=False, classes=None): + super().__init__() + ds = torchvision.datasets.MNIST( + ds_path, + train=train, + download=download + ) + self.data = ds.data.flatten(1).float() + self.targets = ds.targets + self.y_dim = len(self.targets.unique()) + if classes: + self.assign_to_classes(classes) + + def __getitem__(self, i): + return (self.data[i], self.targets[i]) + + +class KDD99(DatasetWrapper): + """The KDD Cup99 dataset in torch readable form""" + def __init__(self, ds_path, train=True, download=False, classes=None): + super().__init__() + self.data = torch.tensor([]) + self.targets = torch.tensor([]) + df = pd.read_csv( + f"{ds_path}/{'train' if train else 'test'}/kddcup.data", + header=None, + iterator=True + ) + nl = 0 + data_len = round(494021 * (0.7 if train else 0.3)) + read_amount = 100_000 + marker = floor(data_len / read_amount) * read_amount + while read_amount > 0 and (nl := nl + read_amount) <= marker: + line = df.read(read_amount) + line = torch.from_numpy(line.to_numpy(np.dtype('float32'))) + self.data = torch.cat((self.data, line[:, 1:-1])) + self.targets = torch.cat((self.targets, line[:, -1])) + if nl == marker: + marker = data_len + read_amount = data_len % read_amount + self.y_dim = len(self.targets.unique()) + if classes: + self.assign_to_classes(classes) + + def __getitem__(self, i): + return (self.data[i], self.targets[i].long()) + + +class Amazon(DatasetWrapper): + """The Amazon dataset in torch readable form""" + def __init__(self, ds_path, train=True, download=False, classes=None): + super().__init__() + df = pd.read_csv( + f"{ds_path}/{'train' if train else 'test'}/amazon.data", + header=None + ) + data = df.to_numpy(np.dtype('float32')) + self.data = torch.from_numpy(data[:, :-1]) + self.targets = torch.from_numpy(data[:, -1]) + self.y_dim = len(self.targets.unique()) + if classes: + self.assign_to_classes(classes) + + def __getitem__(self, i): + return (self.data[i], self.targets[i].long()) + + +class VGGFace(DatasetWrapper): + """The VGGFace dataset in torch readable form""" + def __init__(self, ds_path, train=True, download=False, classes=None): + super().__init__() + self.ds_path = f"{ds_path}/data" + self.data_paths = [] + self.targets = [] + normalize = transforms.Normalize( + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225] + ) + self.train = train + if train: + self.transform = transforms.Compose([ + transforms.Resize(256), + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ]) + else: + self.transform = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize, + ]) + file_info = pd.read_csv(f"{ds_path}/top10_files.csv") + unique_classes = set() + for _, r in file_info[file_info['train_flag'] == int(not train)].iterrows(): + if r['Class_ID'] not in unique_classes: + unique_classes = unique_classes.union({r['Class_ID']}) + if not classes or r['Class_ID'] in classes: + self.data_paths.append(f"{self.ds_path}/{r['Class_ID']}/{r['file']}") + self.targets.append(r['Class_ID']) + self.y_dim = len(unique_classes) + self.data_paths = np.array(self.data_paths) + self.targets = torch.tensor(self.targets) + + def __getitem__(self, idx): + if torch.is_tensor(idx): + idx = idx.tolist() + X = Image.open(self.data_paths[idx]) + X = self.transform(X) + return (X, self.targets[idx].long()) + + +def load_data(options, train=True, shuffle=True, classes=None): + """ + Load the specified dataset in a form suitable for the model + + Keyword arguments: + options -- options for the simulation + train -- load the training dataset if true otherwise load the validation + classes -- use only the classes in list, use all classes if empty list or + None + """ + datasets = { + "mnist": MNIST, + "fmnist": FashionMNIST, + "kddcup99": KDD99, + "amazon": Amazon, + "vggface": VGGFace, + } + if (chosen_set := datasets.get(options.dataset)) is None: + raise utils.errors.MisconfigurationError( + f"Dataset '{options.dataset}' does not exist, " + + f"possible options: {set(datasets.keys())}" + ) + data = chosen_set( + f"./data/{options.dataset}", + train=train, + download=True, + classes=classes + ) + x_dim, y_dim = data.get_dims() + return { + "dataloader": torch.utils.data.DataLoader( + data, + batch_size=options.model_params['batch_size'], + shuffle=shuffle, + pin_memory=True, + ), + "x_dim": x_dim, + "y_dim": y_dim, + } diff --git a/utils/errors.py b/utils/errors.py @@ -0,0 +1,9 @@ +""" +Collection of various custom errors that may be raised + +Author: Cody Lewis +""" + + +class MisconfigurationError(Exception): + """For when there is a user mistake in a configuration file""" diff --git a/utils/models.py b/utils/models.py @@ -0,0 +1,139 @@ +""" +A model for ML Models and a function to load them + +Author: Cody Lewis +""" + +from abc import abstractmethod + +import torch.nn as nn +import torch.optim as optim +import torchvision + +import utils.errors + + +class Model(nn.Module): + def __init__(self, params): + super().__init__() + self.params = params + self.lr = params['learning_rate'][0] + self.learning_rates = params['learning_rate'].copy() + del self.learning_rates[0] + self.lr_changes = params['lr_changes'].copy() + self.epoch_count = 0 + + @abstractmethod + def forward(self, *x): + pass + + def fit(self, data, epochs=1, verbose=True): + """ + Fit the model for some epochs, return history of loss values and the + gradients of the changed parameters + + Keyword arguments: + x -- training data + y -- training labels + epochs -- number of epochs to train for + verbose -- output training stats if True + """ + optimizer = optim.SGD( + self.parameters(), + lr=self.lr, + momentum=0.9, + weight_decay=0.0001 + ) + criterion = nn.CrossEntropyLoss() + data_count = 0 + for i in range(epochs): + optimizer.zero_grad() + x, y = next(iter(data)) + x = x.to(self.params['device']) + y = y.to(self.params['device']) + output = self(x) + loss = criterion(output, y) + if verbose: + print( + f"Epoch {i + 1}/{epochs} loss: {loss}", + end="\r" + ) + loss.backward() + optimizer.step() + data_count += len(y) + self.epoch_count += 1 + if self.lr_changes and self.epoch_count > self.lr_changes[0]: + self.lr = self.learning_rates[0] + del self.learning_rates[0] + del self.lr_changes[0] + if verbose: + print() + return loss, { + "params": [-self.lr * p.grad for p in self.parameters()], + "data_count": data_count + } + + def get_params(self): + """Get the tensor form parameters of this model""" + return [p.data for p in self.parameters()] + + def copy_params(self, params): + """Copy input parameters into self""" + for p, t in zip(params, self.parameters()): + t.data.copy_(p) + + +class SoftMaxModel(Model): + """The softmax perceptron class""" + def __init__(self, params): + super().__init__(params) + self.features = nn.ModuleList([ + nn.Linear( + params['num_in'], params['num_in'] * params['params_mul'] + ), + nn.Sigmoid(), + nn.Linear( + params['num_in'] * params['params_mul'], params['num_out'] + ), + nn.Softmax(dim=1) + ]).eval() + + def forward(self, x): + for feature in self.features: + x = feature(x) + return x + + +class SqueezeNet(Model): + """The SqueezeNet DNN Class""" + def __init__(self, params): + super().__init__(params) + net = torchvision.models.__dict__["squeezenet1_1"](pretrained=True) + net.classifier[1] = nn.Conv2d( + 512, params['num_out'], kernel_size=(1, 1), stride=(1, 1) + ) + self.features = nn.ModuleList( + [f for f in net.features] + + [f for f in net.classifier] + ).eval() + super().copy_params([p.data for p in net.parameters()]) + + def forward(self, x): + for feature in self.features: + x = feature(x) + return x.flatten(1) + + +def load_model(params): + """Load the model specified in params""" + models = { + "softmax": SoftMaxModel, + "squeeze": SqueezeNet, + } + model_name = params['architecture'] + if (chosen_model := models.get(model_name)) is None: + raise utils.errors.MisconfigurationError( + f"Model '{model_name}' does not exist, " + + f"possible options: {set(models.keys())}" + ) + return chosen_model(params)