commit a32c34595393986741b42bfc03aae74ef0350c20
parent f2e5997fe7107f055bbc6a6f9264136370c2b71f
Author: Cody Lewis <cody@codymlewis.com>
Date: Tue, 24 Nov 2020 16:58:22 +1100
Made code a folder setup neater
Diffstat:
17 files changed, 667 insertions(+), 651 deletions(-)
diff --git a/adversaries.py b/adversaries.py
@@ -1,74 +0,0 @@
-"""
-Defines the adversaries within the system and a function to load them
-
-Author: Cody Lewis
-"""
-
-
-from itertools import cycle
-
-from client import Client
-from datasets import load_data
-import errors
-
-
-class Flipper(Client):
- """A simple label-flipping model poisoner"""
- def __init__(self, options, classes):
- super().__init__(options, classes)
- self.shadow_data = load_data(options, [options.adversaries['from']])
- self.shadow_data['dataloader'].dataset.targets[:] = \
- options.adversaries['to']
- self.epochs = 0
- if options.adversaries['delay'] is None:
- self.delay_time = 0
- else:
- self.delay_time = options.adversaries['delay']
-
- def fit(self, verbose=False):
- if self.epochs == self.delay_time:
- self.data = self.shadow_data
- self.epochs += 1
- return super().fit(verbose=verbose)
-
-
-class OnOff(Client):
- """
- Label flipping poisoner that switches its attack on and off every few
- epochs
- """
- def __init__(self, options, classes):
- super().__init__(options, classes)
- self.shadow_data = load_data(options, [options.adversaries['from']])
- self.shadow_data['dataloader'].dataset.targets[:] = \
- options.adversaries['to']
- self.toggle_time = cycle(self.options.adversaries['toggle_times'])
- self.epochs = 0
- if self.options.adversaries['delay'] is None:
- self.next_switch = self.epochs + next(self.toggle_time)
- else:
- self.next_switch = self.epochs + self.options.adversaries['delay']
- next(self.toggle_time)
-
- def fit(self, verbose=False):
- if self.epochs == self.next_switch:
- temp = self.data
- self.data = self.shadow_data
- self.shadow_data = temp
- self.next_switch += next(self.toggle_time)
- self.epochs += 1
- return super().fit(verbose=verbose)
-
-
-def load_adversary(adversary_name):
- """Load the class of the specified adversary"""
- adversaries = {
- "label flip": Flipper,
- "on off": OnOff,
- }
- if (chosen_adversary := adversaries.get(adversary_name)) is None:
- raise errors.MisconfigurationError(
- f"Model '{adversary_name}' does not exist, " +
- f"possible options: {set(adversaries.keys())}"
- )
- return chosen_adversary
diff --git a/client.py b/client.py
@@ -1,27 +0,0 @@
-"""
-Classes and functions for the client networking aspect of federated learning
-
-Author: Cody Lewis
-"""
-
-from models import load_model
-from datasets import load_data
-
-
-class Client:
- """Federated learning client"""
- def __init__(self, options, classes):
- self.data = load_data(options, train=True, classes=classes)
- params = options.model_params
- params['num_in'] = self.data['x_dim']
- params['num_out'] = self.data['y_dim']
- self.net = load_model(params).to(params['device'])
- self.options = options
-
- def fit(self, verbose=False):
- """Fit the client to its own copy of data"""
- return self.net.fit(
- self.data['dataloader'],
- self.options.user_epochs,
- verbose=verbose
- )
diff --git a/datasets.py b/datasets.py
@@ -1,227 +0,0 @@
-"""
-Module for dataset classes and a function to load them
-
-Author: Cody Lewis
-"""
-
-from math import floor
-from abc import abstractmethod
-
-import numpy as np
-import torch
-import torchvision
-from torchvision import transforms
-from torch.utils.data.dataset import Dataset
-import pandas as pd
-from PIL import Image
-
-import errors
-
-
-class DatasetWrapper(Dataset):
- """Wrapper class for torch datasets to allow for easy non-iid splitting"""
- def __init__(self):
- self.targets = torch.tensor([])
- self.y_dim = 0
-
- def __len__(self):
- return len(self.targets)
-
- @abstractmethod
- def __getitem__(self, i):
- pass
-
- def get_dims(self):
- """Get the x and y dimensions of the dataset"""
- if len(self) < 1:
- return (0, 0)
- x, _ = self[0]
- return (x.shape[0], self.y_dim)
-
- def get_idx(self, classes):
- """Get the ids of data belong to the specified classes"""
- return torch.arange(len(self.targets))[
- sum([(self.targets == i).long() for i in classes]).bool()
- ]
-
- def assign_to_classes(self, classes):
- """Leave only data belonging to the classes within this set"""
- idx = self.get_idx(classes)
- self.data = self.data[idx]
- self.targets = self.targets[idx]
-
-
-class MNIST(DatasetWrapper):
- """The MNIST dataset in torch readable form"""
- def __init__(self, ds_path, train=True, download=False, classes=None):
- super().__init__()
- ds = torchvision.datasets.MNIST(
- ds_path,
- train=train,
- download=download
- )
- self.data = ds.data.flatten(1).float()
- self.targets = ds.targets
- self.y_dim = len(self.targets.unique())
- if classes:
- self.assign_to_classes(classes)
-
- def __getitem__(self, i):
- return (self.data[i], self.targets[i])
-
-
-class FashionMNIST(DatasetWrapper):
- """The Fashion MNIST dataset in torch readable form"""
- def __init__(self, ds_path, train=True, download=False, classes=None):
- super().__init__()
- ds = torchvision.datasets.MNIST(
- ds_path,
- train=train,
- download=download
- )
- self.data = ds.data.flatten(1).float()
- self.targets = ds.targets
- self.y_dim = len(self.targets.unique())
- if classes:
- self.assign_to_classes(classes)
-
- def __getitem__(self, i):
- return (self.data[i], self.targets[i])
-
-
-class KDD99(DatasetWrapper):
- """The KDD Cup99 dataset in torch readable form"""
- def __init__(self, ds_path, train=True, download=False, classes=None):
- super().__init__()
- self.data = torch.tensor([])
- self.targets = torch.tensor([])
- df = pd.read_csv(
- f"{ds_path}/{'train' if train else 'test'}/kddcup.data",
- header=None,
- iterator=True
- )
- nl = 0
- data_len = round(494021 * (0.7 if train else 0.3))
- read_amount = 100_000
- marker = floor(data_len / read_amount) * read_amount
- while read_amount > 0 and (nl := nl + read_amount) <= marker:
- line = df.read(read_amount)
- line = torch.from_numpy(line.to_numpy(np.dtype('float32')))
- self.data = torch.cat((self.data, line[:, 1:-1]))
- self.targets = torch.cat((self.targets, line[:, -1]))
- if nl == marker:
- marker = data_len
- read_amount = data_len % read_amount
- self.y_dim = len(self.targets.unique())
- if classes:
- self.assign_to_classes(classes)
-
- def __getitem__(self, i):
- return (self.data[i], self.targets[i].long())
-
-
-class Amazon(DatasetWrapper):
- """The Amazon dataset in torch readable form"""
- def __init__(self, ds_path, train=True, download=False, classes=None):
- super().__init__()
- df = pd.read_csv(
- f"{ds_path}/{'train' if train else 'test'}/amazon.data",
- header=None
- )
- data = df.to_numpy(np.dtype('float32'))
- self.data = torch.from_numpy(data[:, :-1])
- self.targets = torch.from_numpy(data[:, -1])
- self.y_dim = len(self.targets.unique())
- if classes:
- self.assign_to_classes(classes)
-
- def __getitem__(self, i):
- return (self.data[i], self.targets[i].long())
-
-
-class VGGFace(DatasetWrapper):
- """The VGGFace dataset in torch readable form"""
- def __init__(self, ds_path, train=True, download=False, classes=None):
- super().__init__()
- self.ds_path = f"{ds_path}/data"
- self.data_paths = []
- self.targets = []
- normalize = transforms.Normalize(
- mean=[0.485, 0.456, 0.406],
- std=[0.229, 0.224, 0.225]
- )
- self.train = train
- if train:
- self.transform = transforms.Compose([
- transforms.Resize(256),
- transforms.RandomResizedCrop(224),
- transforms.RandomHorizontalFlip(),
- transforms.ToTensor(),
- normalize,
- ])
- else:
- self.transform = transforms.Compose([
- transforms.Resize(256),
- transforms.CenterCrop(224),
- transforms.ToTensor(),
- normalize,
- ])
- file_info = pd.read_csv(f"{ds_path}/top10_files.csv")
- unique_classes = set()
- for _, r in file_info[file_info['train_flag'] == int(not train)].iterrows():
- if r['Class_ID'] not in unique_classes:
- unique_classes = unique_classes.union({r['Class_ID']})
- if not classes or r['Class_ID'] in classes:
- self.data_paths.append(f"{self.ds_path}/{r['Class_ID']}/{r['file']}")
- self.targets.append(r['Class_ID'])
- self.y_dim = len(unique_classes)
- self.data_paths = np.array(self.data_paths)
- self.targets = torch.tensor(self.targets)
-
- def __getitem__(self, idx):
- if torch.is_tensor(idx):
- idx = idx.tolist()
- X = Image.open(self.data_paths[idx])
- X = self.transform(X)
- return (X, self.targets[idx].long())
-
-
-def load_data(options, train=True, shuffle=True, classes=None):
- """
- Load the specified dataset in a form suitable for the model
-
- Keyword arguments:
- options -- options for the simulation
- train -- load the training dataset if true otherwise load the validation
- classes -- use only the classes in list, use all classes if empty list or
- None
- """
- datasets = {
- "mnist": MNIST,
- "fmnist": FashionMNIST,
- "kddcup99": KDD99,
- "amazon": Amazon,
- "vggface": VGGFace,
- }
- if (chosen_set := datasets.get(options.dataset)) is None:
- raise errors.MisconfigurationError(
- f"Dataset '{options.dataset}' does not exist, " +
- f"possible options: {set(datasets.keys())}"
- )
- data = chosen_set(
- f"./data/{options.dataset}",
- train=train,
- download=True,
- classes=classes
- )
- x_dim, y_dim = data.get_dims()
- return {
- "dataloader": torch.utils.data.DataLoader(
- data,
- batch_size=options.model_params['batch_size'],
- shuffle=shuffle,
- pin_memory=True,
- ),
- "x_dim": x_dim,
- "y_dim": y_dim,
- }
diff --git a/errors.py b/errors.py
@@ -1,2 +0,0 @@
-class MisconfigurationError(Exception):
- pass
diff --git a/global_model.py b/global_model.py
@@ -1,106 +0,0 @@
-"""
-Classes and functions for a global model for use within federated learning
-
-Author: Cody Lewis
-"""
-
-import torch
-
-from models import load_model
-import utils
-
-
-class GlobalModel:
- """The central global model for use within federated learning"""
- def __init__(self, num_in, num_out, options):
- self.params = options.model_params
- self.params['num_in'] = num_in
- self.params['num_out'] = num_out
- self.net = load_model(self.params).to(self.params['device'])
- self.histories = dict()
- self.fit_fun = {
- "federated averaging": fed_avg,
- "foolsgold": foolsgold
- }[options.fit_fun]
-
- def fit(self, grads, params):
- """Fit the model to some client gradients"""
- self.fit_fun(self, grads, params)
-
- def predict(self, x):
- """Predict the classes of the data x"""
- return self.net(x)
-
- def get_params(self):
- """Get the tensor form parameters of this model"""
- return self.net.get_params()
-
-
-def fed_avg(net, grads, params):
- """Perform federated averaging across the client gradients"""
- num_clients = len(grads)
- total_dc = sum([grads[i]["data_count"] for i in range(num_clients)])
- for k, p in enumerate(net.net.parameters()):
- for i in range(num_clients):
- with torch.no_grad():
- p.data.add_(
- (grads[i]["data_count"] / total_dc) *
- grads[i]["params"][k]
- )
-
-
-def find_feature_importance(net):
- """Get a vector indicating the importance of features in the network"""
- with torch.no_grad():
- w_t = utils.flatten_params(net.get_params(), net.params)
- return abs(w_t - w_t.mean()) / sum(abs(w_t))
-
-
-def foolsgold(net, grads, params):
- """Perform FoolsGold learning across the client gradients"""
- with torch.no_grad():
- flat_grads = utils.flatten_grads(grads, net.params)
- num_clients = len(grads)
- cs = torch.tensor(
- [[0 for _ in range(num_clients)] for _ in range(num_clients)],
- dtype=torch.float32
- )
- v = torch.tensor([0 for _ in range(num_clients)], dtype=torch.float32)
- alpha = torch.tensor([0 for _ in range(num_clients)], dtype=torch.float32)
- if len(net.histories) < num_clients:
- while len(net.histories) < num_clients:
- net.histories[len(net.histories)] = flat_grads[len(net.histories)]
- else:
- for i in range(num_clients):
- net.histories[i] += flat_grads[i]
- if params['importance']:
- feature_importance = find_feature_importance(net)
- else:
- feature_importance = torch.tensor([1]).to(net.params['device'])
- for i in range(num_clients):
- for j in {x for x in range(num_clients)} - {i}:
- cs[i][j] = torch.cosine_similarity(
- net.histories[i] * feature_importance,
- net.histories[j] * feature_importance,
- dim=0
- )
- v[i] = max(cs[i])
- del feature_importance
- for i in range(num_clients):
- for j in range(num_clients):
- if (v[j] > v[i]) and (v[j] != 0):
- cs[i][j] *= v[i] / v[j]
- alpha[i] = 1 - max(cs[i])
- alpha = alpha / max(alpha)
- ids = alpha != 1
- alpha[ids] = params['kappa'] * (
- torch.log(alpha[ids] / (1 - alpha[ids])) + 0.5)
- alpha[alpha > 1] = 1
- alpha[alpha < 0] = 0
- alpha_sum = alpha.sum()
- for k, p in enumerate(net.net.parameters()):
- for i in range(num_clients):
- p.data.add_(
- (alpha[i] / alpha_sum) *
- grads[i]['params'][k]
- )
diff --git a/main.py b/main.py
@@ -9,14 +9,13 @@ Author: Cody Lewis
import random
import torch
-import numpy as np
-from adversaries import load_adversary
-from client import Client
-import errors
+from users.adversaries import load_adversary
+from users.client import Client
+import utils.errors
from server import Server
import utils
-from datasets import load_data
+from utils.datasets import load_data
def index_match(arr):
@@ -48,10 +47,11 @@ def find_shards(num_users, num_classes, classes_per_user):
def run(program_flow, current, run_data):
+ """Run a part of the program"""
try:
program_flow[current](run_data)
return run_data
- except errors.MisconfigurationError as e:
+ except utils.errors.MisconfigurationError as e:
print(f"Miconfiguratation Error: {e}")
except KeyboardInterrupt:
print()
@@ -63,6 +63,7 @@ def run(program_flow, current, run_data):
def system_setup(run_data):
+ """Setup the system"""
run_data["options"] = utils.load_options()
if run_data["options"].verbosity > 0:
print("Options set as:")
@@ -72,7 +73,7 @@ def system_setup(run_data):
c = int(dev_name[dev_name.find(':') + 1:]) + 1
q = c > torch.cuda.device_count()
if p or q:
- raise errors.MisconfigurationError(
+ raise utils.errors.MisconfigurationError(
f"Device '{dev_name}' is not available on this machine"
)
run_data["train_data"] = load_data(
@@ -88,7 +89,9 @@ def system_setup(run_data):
run_data['sim_number'] = 0
return run_data
+
def setup_users(run_data):
+ """Setup the users/clients for the system"""
run_data["user_classes"] = [
Client if i <= run_data["options"].users * (
1 - run_data["options"].adversaries['percent_adv'])
@@ -112,6 +115,7 @@ def setup_users(run_data):
def run_simulations(run_data):
+ """Run the simulations"""
run_data["sim_confusion_matrices"] = torch.tensor([], dtype=int)
for i in range(run_data['sim_number'], run_data["options"].num_sims):
print(f"Simulation {i + 1}/{run_data['options'].num_sims}")
@@ -189,6 +193,7 @@ def run_simulations(run_data):
def write_results(run_data):
+ """Write all of the recorded results from the experiments"""
if run_data["options"].verbosity > 0:
print()
print(f"Writing confusion matrices to {run_data['options'].result_file}...")
@@ -201,7 +206,6 @@ def write_results(run_data):
return run_data
-
if __name__ == '__main__':
program_flow = {
"system_setup": system_setup,
diff --git a/models.py b/models.py
@@ -1,139 +0,0 @@
-"""
-A model for ML Models and a function to load them
-
-Author: Cody Lewis
-"""
-
-from abc import abstractmethod
-
-import torch.nn as nn
-import torch.optim as optim
-import torchvision
-
-import errors
-
-
-class Model(nn.Module):
- def __init__(self, params):
- super().__init__()
- self.params = params
- self.lr = params['learning_rate'][0]
- self.learning_rates = params['learning_rate'].copy()
- del self.learning_rates[0]
- self.lr_changes = params['lr_changes'].copy()
- self.epoch_count = 0
-
- @abstractmethod
- def forward(self, *x):
- pass
-
- def fit(self, data, epochs=1, verbose=True):
- """
- Fit the model for some epochs, return history of loss values and the
- gradients of the changed parameters
-
- Keyword arguments:
- x -- training data
- y -- training labels
- epochs -- number of epochs to train for
- verbose -- output training stats if True
- """
- optimizer = optim.SGD(
- self.parameters(),
- lr=self.lr,
- momentum=0.9,
- weight_decay=0.0001
- )
- criterion = nn.CrossEntropyLoss()
- data_count = 0
- for i in range(epochs):
- optimizer.zero_grad()
- x, y = next(iter(data))
- x = x.to(self.params['device'])
- y = y.to(self.params['device'])
- output = self(x)
- loss = criterion(output, y)
- if verbose:
- print(
- f"Epoch {i + 1}/{epochs} loss: {loss}",
- end="\r"
- )
- loss.backward()
- optimizer.step()
- data_count += len(y)
- self.epoch_count += 1
- if self.lr_changes and self.epoch_count > self.lr_changes[0]:
- self.lr = self.learning_rates[0]
- del self.learning_rates[0]
- del self.lr_changes[0]
- if verbose:
- print()
- return loss, {
- "params": [-self.lr * p.grad for p in self.parameters()],
- "data_count": data_count
- }
-
- def get_params(self):
- """Get the tensor form parameters of this model"""
- return [p.data for p in self.parameters()]
-
- def copy_params(self, params):
- """Copy input parameters into self"""
- for p, t in zip(params, self.parameters()):
- t.data.copy_(p)
-
-
-class SoftMaxModel(Model):
- """The softmax perceptron class"""
- def __init__(self, params):
- super().__init__(params)
- self.features = nn.ModuleList([
- nn.Linear(
- params['num_in'], params['num_in'] * params['params_mul']
- ),
- nn.Sigmoid(),
- nn.Linear(
- params['num_in'] * params['params_mul'], params['num_out']
- ),
- nn.Softmax(dim=1)
- ]).eval()
-
- def forward(self, x):
- for feature in self.features:
- x = feature(x)
- return x
-
-
-class SqueezeNet(Model):
- """The SqueezeNet DNN Class"""
- def __init__(self, params):
- super().__init__(params)
- net = torchvision.models.__dict__["squeezenet1_1"](pretrained=True)
- net.classifier[1] = nn.Conv2d(
- 512, params['num_out'], kernel_size=(1, 1), stride=(1, 1)
- )
- self.features = nn.ModuleList(
- [f for f in net.features] +
- [f for f in net.classifier]
- ).eval()
- super().copy_params([p.data for p in net.parameters()])
-
- def forward(self, x):
- for feature in self.features:
- x = feature(x)
- return x.flatten(1)
-
-
-def load_model(params):
- """Load the model specified in params"""
- models = {
- "softmax": SoftMaxModel,
- "squeeze": SqueezeNet,
- }
- model_name = params['architecture']
- if (chosen_model := models.get(model_name)) is None:
- raise errors.MisconfigurationError(
- f"Model '{model_name}' does not exist, " +
- f"possible options: {set(models.keys())}"
- )
- return chosen_model(params)
diff --git a/server.py b/server.py
@@ -1,68 +0,0 @@
-"""
-Classes and functions for the server networking aspect of federated learning
-
-Author: Cody Lewis
-"""
-
-
-import time
-
-import torch
-import torch.nn as nn
-
-from global_model import GlobalModel
-import utils
-
-
-class Server:
- """Federated learning server class"""
- def __init__(self, num_in, num_out, options):
- self.net = GlobalModel(
- num_in,
- num_out,
- options,
- )
- self.num_clients = 0
- self.clients = []
- self.nb_classes = num_out
- self.options = options
- self.confusion_matrices = torch.tensor([], dtype=int)
- self.criterion = nn.CrossEntropyLoss()
-
- def fit(self, dataloader, e, epochs):
- start = time.time()
- grads = []
- for c in self.clients:
- c.net.copy_params(self.net.get_params())
- grads.append(c.fit()[1])
- self.net.fit(grads, self.options.params)
- loss, confusion_matrix = utils.gen_confusion_matrix(
- self.net,
- dataloader,
- self.criterion,
- self.nb_classes,
- self.options
- )
- self.confusion_matrices = torch.cat(
- (self.confusion_matrices, confusion_matrix.unsqueeze(dim=0))
- )
- stats = utils.gen_conf_stats(confusion_matrix, self.options)
- if self.options.verbosity > 0:
- print(
- f"[ E: {e + 1}/{epochs}, " +
- f"L: {loss:.6f}, " +
- f"Acc: {stats['accuracy']:.6f}, " +
- f"MCC: {stats['MCC']:.6f}, " +
- f"ASR: {stats['attack_success']:.6f}, " +
- f"T: {time.time() - start:.6f}s ]",
- end="\r" if self.options.verbosity < 2 else "\n"
- )
- del grads
-
- def add_clients(self, clients):
- """Add clients to the server"""
- self.num_clients += len(clients)
- self.clients.extend(clients)
-
- def get_conf_matrices(self):
- return self.confusion_matrices
diff --git a/server/__init__.py b/server/__init__.py
@@ -0,0 +1,68 @@
+"""
+Classes and functions for the server networking aspect of federated learning
+
+Author: Cody Lewis
+"""
+
+
+import time
+
+import torch
+import torch.nn as nn
+
+from server.global_model import GlobalModel
+import utils
+
+
+class Server:
+ """Federated learning server class"""
+ def __init__(self, num_in, num_out, options):
+ self.net = GlobalModel(
+ num_in,
+ num_out,
+ options,
+ )
+ self.num_clients = 0
+ self.clients = []
+ self.nb_classes = num_out
+ self.options = options
+ self.confusion_matrices = torch.tensor([], dtype=int)
+ self.criterion = nn.CrossEntropyLoss()
+
+ def fit(self, dataloader, e, epochs):
+ start = time.time()
+ grads = []
+ for c in self.clients:
+ c.net.copy_params(self.net.get_params())
+ grads.append(c.fit()[1])
+ self.net.fit(grads, self.options.params)
+ loss, confusion_matrix = utils.gen_confusion_matrix(
+ self.net,
+ dataloader,
+ self.criterion,
+ self.nb_classes,
+ self.options
+ )
+ self.confusion_matrices = torch.cat(
+ (self.confusion_matrices, confusion_matrix.unsqueeze(dim=0))
+ )
+ stats = utils.gen_conf_stats(confusion_matrix, self.options)
+ if self.options.verbosity > 0:
+ print(
+ f"[ E: {e + 1}/{epochs}, " +
+ f"L: {loss:.6f}, " +
+ f"Acc: {stats['accuracy']:.6f}, " +
+ f"MCC: {stats['MCC']:.6f}, " +
+ f"ASR: {stats['attack_success']:.6f}, " +
+ f"T: {time.time() - start:.6f}s ]",
+ end="\r" if self.options.verbosity < 2 else "\n"
+ )
+ del grads
+
+ def add_clients(self, clients):
+ """Add clients to the server"""
+ self.num_clients += len(clients)
+ self.clients.extend(clients)
+
+ def get_conf_matrices(self):
+ return self.confusion_matrices
diff --git a/server/global_model.py b/server/global_model.py
@@ -0,0 +1,106 @@
+"""
+Classes and functions for a global model for use within federated learning
+
+Author: Cody Lewis
+"""
+
+import torch
+
+from utils.models import load_model
+import utils
+
+
+class GlobalModel:
+ """The central global model for use within federated learning"""
+ def __init__(self, num_in, num_out, options):
+ self.params = options.model_params
+ self.params['num_in'] = num_in
+ self.params['num_out'] = num_out
+ self.net = load_model(self.params).to(self.params['device'])
+ self.histories = dict()
+ self.fit_fun = {
+ "federated averaging": fed_avg,
+ "foolsgold": foolsgold
+ }[options.fit_fun]
+
+ def fit(self, grads, params):
+ """Fit the model to some client gradients"""
+ self.fit_fun(self, grads, params)
+
+ def predict(self, x):
+ """Predict the classes of the data x"""
+ return self.net(x)
+
+ def get_params(self):
+ """Get the tensor form parameters of this model"""
+ return self.net.get_params()
+
+
+def fed_avg(net, grads, params):
+ """Perform federated averaging across the client gradients"""
+ num_clients = len(grads)
+ total_dc = sum([grads[i]["data_count"] for i in range(num_clients)])
+ for k, p in enumerate(net.net.parameters()):
+ for i in range(num_clients):
+ with torch.no_grad():
+ p.data.add_(
+ (grads[i]["data_count"] / total_dc) *
+ grads[i]["params"][k]
+ )
+
+
+def find_feature_importance(net):
+ """Get a vector indicating the importance of features in the network"""
+ with torch.no_grad():
+ w_t = utils.flatten_params(net.get_params(), net.params)
+ return abs(w_t - w_t.mean()) / sum(abs(w_t))
+
+
+def foolsgold(net, grads, params):
+ """Perform FoolsGold learning across the client gradients"""
+ with torch.no_grad():
+ flat_grads = utils.flatten_grads(grads, net.params)
+ num_clients = len(grads)
+ cs = torch.tensor(
+ [[0 for _ in range(num_clients)] for _ in range(num_clients)],
+ dtype=torch.float32
+ )
+ v = torch.tensor([0 for _ in range(num_clients)], dtype=torch.float32)
+ alpha = torch.tensor([0 for _ in range(num_clients)], dtype=torch.float32)
+ if len(net.histories) < num_clients:
+ while len(net.histories) < num_clients:
+ net.histories[len(net.histories)] = flat_grads[len(net.histories)]
+ else:
+ for i in range(num_clients):
+ net.histories[i] += flat_grads[i]
+ if params['importance']:
+ feature_importance = find_feature_importance(net)
+ else:
+ feature_importance = torch.tensor([1]).to(net.params['device'])
+ for i in range(num_clients):
+ for j in {x for x in range(num_clients)} - {i}:
+ cs[i][j] = torch.cosine_similarity(
+ net.histories[i] * feature_importance,
+ net.histories[j] * feature_importance,
+ dim=0
+ )
+ v[i] = max(cs[i])
+ del feature_importance
+ for i in range(num_clients):
+ for j in range(num_clients):
+ if (v[j] > v[i]) and (v[j] != 0):
+ cs[i][j] *= v[i] / v[j]
+ alpha[i] = 1 - max(cs[i])
+ alpha = alpha / max(alpha)
+ ids = alpha != 1
+ alpha[ids] = params['kappa'] * (
+ torch.log(alpha[ids] / (1 - alpha[ids])) + 0.5)
+ alpha[alpha > 1] = 1
+ alpha[alpha < 0] = 0
+ alpha_sum = alpha.sum()
+ for k, p in enumerate(net.net.parameters()):
+ for i in range(num_clients):
+ p.data.add_(
+ (alpha[i] / alpha_sum) *
+ grads[i]['params'][k]
+ )
diff --git a/users/__init__.py b/users/__init__.py
@@ -0,0 +1,5 @@
+"""
+Module for various types of users within the federated learning system
+
+Author: Cody Lewis
+"""
diff --git a/users/adversaries.py b/users/adversaries.py
@@ -0,0 +1,74 @@
+"""
+Defines the adversaries within the system and a function to load them
+
+Author: Cody Lewis
+"""
+
+
+from itertools import cycle
+
+from users.client import Client
+from utils.datasets import load_data
+import utils.errors
+
+
+class Flipper(Client):
+ """A simple label-flipping model poisoner"""
+ def __init__(self, options, classes):
+ super().__init__(options, classes)
+ self.shadow_data = load_data(options, [options.adversaries['from']])
+ self.shadow_data['dataloader'].dataset.targets[:] = \
+ options.adversaries['to']
+ self.epochs = 0
+ if options.adversaries['delay'] is None:
+ self.delay_time = 0
+ else:
+ self.delay_time = options.adversaries['delay']
+
+ def fit(self, verbose=False):
+ if self.epochs == self.delay_time:
+ self.data = self.shadow_data
+ self.epochs += 1
+ return super().fit(verbose=verbose)
+
+
+class OnOff(Client):
+ """
+ Label flipping poisoner that switches its attack on and off every few
+ epochs
+ """
+ def __init__(self, options, classes):
+ super().__init__(options, classes)
+ self.shadow_data = load_data(options, [options.adversaries['from']])
+ self.shadow_data['dataloader'].dataset.targets[:] = \
+ options.adversaries['to']
+ self.toggle_time = cycle(self.options.adversaries['toggle_times'])
+ self.epochs = 0
+ if self.options.adversaries['delay'] is None:
+ self.next_switch = self.epochs + next(self.toggle_time)
+ else:
+ self.next_switch = self.epochs + self.options.adversaries['delay']
+ next(self.toggle_time)
+
+ def fit(self, verbose=False):
+ if self.epochs == self.next_switch:
+ temp = self.data
+ self.data = self.shadow_data
+ self.shadow_data = temp
+ self.next_switch += next(self.toggle_time)
+ self.epochs += 1
+ return super().fit(verbose=verbose)
+
+
+def load_adversary(adversary_name):
+ """Load the class of the specified adversary"""
+ adversaries = {
+ "label flip": Flipper,
+ "on off": OnOff,
+ }
+ if (chosen_adversary := adversaries.get(adversary_name)) is None:
+ raise utils.errors.MisconfigurationError(
+ f"Model '{adversary_name}' does not exist, " +
+ f"possible options: {set(adversaries.keys())}"
+ )
+ return chosen_adversary
diff --git a/users/client.py b/users/client.py
@@ -0,0 +1,27 @@
+"""
+Classes and functions for the client networking aspect of federated learning
+
+Author: Cody Lewis
+"""
+
+from utils.models import load_model
+from utils.datasets import load_data
+
+
+class Client:
+ """Federated learning client"""
+ def __init__(self, options, classes):
+ self.data = load_data(options, train=True, classes=classes)
+ params = options.model_params
+ params['num_in'] = self.data['x_dim']
+ params['num_out'] = self.data['y_dim']
+ self.net = load_model(params).to(params['device'])
+ self.options = options
+
+ def fit(self, verbose=False):
+ """Fit the client to its own copy of data"""
+ return self.net.fit(
+ self.data['dataloader'],
+ self.options.user_epochs,
+ verbose=verbose
+ )
diff --git a/utils.py b/utils/__init__.py
diff --git a/utils/datasets.py b/utils/datasets.py
@@ -0,0 +1,227 @@
+"""
+Module for dataset classes and a function to load them
+
+Author: Cody Lewis
+"""
+
+from math import floor
+from abc import abstractmethod
+
+import numpy as np
+import torch
+import torchvision
+from torchvision import transforms
+from torch.utils.data.dataset import Dataset
+import pandas as pd
+from PIL import Image
+
+import utils.errors
+
+
+class DatasetWrapper(Dataset):
+ """Wrapper class for torch datasets to allow for easy non-iid splitting"""
+ def __init__(self):
+ self.targets = torch.tensor([])
+ self.y_dim = 0
+
+ def __len__(self):
+ return len(self.targets)
+
+ @abstractmethod
+ def __getitem__(self, i):
+ pass
+
+ def get_dims(self):
+ """Get the x and y dimensions of the dataset"""
+ if len(self) < 1:
+ return (0, 0)
+ x, _ = self[0]
+ return (x.shape[0], self.y_dim)
+
+ def get_idx(self, classes):
+ """Get the ids of data belong to the specified classes"""
+ return torch.arange(len(self.targets))[
+ sum([(self.targets == i).long() for i in classes]).bool()
+ ]
+
+ def assign_to_classes(self, classes):
+ """Leave only data belonging to the classes within this set"""
+ idx = self.get_idx(classes)
+ self.data = self.data[idx]
+ self.targets = self.targets[idx]
+
+
+class MNIST(DatasetWrapper):
+ """The MNIST dataset in torch readable form"""
+ def __init__(self, ds_path, train=True, download=False, classes=None):
+ super().__init__()
+ ds = torchvision.datasets.MNIST(
+ ds_path,
+ train=train,
+ download=download
+ )
+ self.data = ds.data.flatten(1).float()
+ self.targets = ds.targets
+ self.y_dim = len(self.targets.unique())
+ if classes:
+ self.assign_to_classes(classes)
+
+ def __getitem__(self, i):
+ return (self.data[i], self.targets[i])
+
+
+class FashionMNIST(DatasetWrapper):
+ """The Fashion MNIST dataset in torch readable form"""
+ def __init__(self, ds_path, train=True, download=False, classes=None):
+ super().__init__()
+ ds = torchvision.datasets.MNIST(
+ ds_path,
+ train=train,
+ download=download
+ )
+ self.data = ds.data.flatten(1).float()
+ self.targets = ds.targets
+ self.y_dim = len(self.targets.unique())
+ if classes:
+ self.assign_to_classes(classes)
+
+ def __getitem__(self, i):
+ return (self.data[i], self.targets[i])
+
+
+class KDD99(DatasetWrapper):
+ """The KDD Cup99 dataset in torch readable form"""
+ def __init__(self, ds_path, train=True, download=False, classes=None):
+ super().__init__()
+ self.data = torch.tensor([])
+ self.targets = torch.tensor([])
+ df = pd.read_csv(
+ f"{ds_path}/{'train' if train else 'test'}/kddcup.data",
+ header=None,
+ iterator=True
+ )
+ nl = 0
+ data_len = round(494021 * (0.7 if train else 0.3))
+ read_amount = 100_000
+ marker = floor(data_len / read_amount) * read_amount
+ while read_amount > 0 and (nl := nl + read_amount) <= marker:
+ line = df.read(read_amount)
+ line = torch.from_numpy(line.to_numpy(np.dtype('float32')))
+ self.data = torch.cat((self.data, line[:, 1:-1]))
+ self.targets = torch.cat((self.targets, line[:, -1]))
+ if nl == marker:
+ marker = data_len
+ read_amount = data_len % read_amount
+ self.y_dim = len(self.targets.unique())
+ if classes:
+ self.assign_to_classes(classes)
+
+ def __getitem__(self, i):
+ return (self.data[i], self.targets[i].long())
+
+
+class Amazon(DatasetWrapper):
+ """The Amazon dataset in torch readable form"""
+ def __init__(self, ds_path, train=True, download=False, classes=None):
+ super().__init__()
+ df = pd.read_csv(
+ f"{ds_path}/{'train' if train else 'test'}/amazon.data",
+ header=None
+ )
+ data = df.to_numpy(np.dtype('float32'))
+ self.data = torch.from_numpy(data[:, :-1])
+ self.targets = torch.from_numpy(data[:, -1])
+ self.y_dim = len(self.targets.unique())
+ if classes:
+ self.assign_to_classes(classes)
+
+ def __getitem__(self, i):
+ return (self.data[i], self.targets[i].long())
+
+
+class VGGFace(DatasetWrapper):
+ """The VGGFace dataset in torch readable form"""
+ def __init__(self, ds_path, train=True, download=False, classes=None):
+ super().__init__()
+ self.ds_path = f"{ds_path}/data"
+ self.data_paths = []
+ self.targets = []
+ normalize = transforms.Normalize(
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225]
+ )
+ self.train = train
+ if train:
+ self.transform = transforms.Compose([
+ transforms.Resize(256),
+ transforms.RandomResizedCrop(224),
+ transforms.RandomHorizontalFlip(),
+ transforms.ToTensor(),
+ normalize,
+ ])
+ else:
+ self.transform = transforms.Compose([
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.ToTensor(),
+ normalize,
+ ])
+ file_info = pd.read_csv(f"{ds_path}/top10_files.csv")
+ unique_classes = set()
+ for _, r in file_info[file_info['train_flag'] == int(not train)].iterrows():
+ if r['Class_ID'] not in unique_classes:
+ unique_classes = unique_classes.union({r['Class_ID']})
+ if not classes or r['Class_ID'] in classes:
+ self.data_paths.append(f"{self.ds_path}/{r['Class_ID']}/{r['file']}")
+ self.targets.append(r['Class_ID'])
+ self.y_dim = len(unique_classes)
+ self.data_paths = np.array(self.data_paths)
+ self.targets = torch.tensor(self.targets)
+
+ def __getitem__(self, idx):
+ if torch.is_tensor(idx):
+ idx = idx.tolist()
+ X = Image.open(self.data_paths[idx])
+ X = self.transform(X)
+ return (X, self.targets[idx].long())
+
+
+def load_data(options, train=True, shuffle=True, classes=None):
+ """
+ Load the specified dataset in a form suitable for the model
+
+ Keyword arguments:
+ options -- options for the simulation
+ train -- load the training dataset if true otherwise load the validation
+ classes -- use only the classes in list, use all classes if empty list or
+ None
+ """
+ datasets = {
+ "mnist": MNIST,
+ "fmnist": FashionMNIST,
+ "kddcup99": KDD99,
+ "amazon": Amazon,
+ "vggface": VGGFace,
+ }
+ if (chosen_set := datasets.get(options.dataset)) is None:
+ raise utils.errors.MisconfigurationError(
+ f"Dataset '{options.dataset}' does not exist, " +
+ f"possible options: {set(datasets.keys())}"
+ )
+ data = chosen_set(
+ f"./data/{options.dataset}",
+ train=train,
+ download=True,
+ classes=classes
+ )
+ x_dim, y_dim = data.get_dims()
+ return {
+ "dataloader": torch.utils.data.DataLoader(
+ data,
+ batch_size=options.model_params['batch_size'],
+ shuffle=shuffle,
+ pin_memory=True,
+ ),
+ "x_dim": x_dim,
+ "y_dim": y_dim,
+ }
diff --git a/utils/errors.py b/utils/errors.py
@@ -0,0 +1,9 @@
+"""
+Collection of various custom errors that may be raised
+
+Author: Cody Lewis
+"""
+
+
+class MisconfigurationError(Exception):
+ """For when there is a user mistake in a configuration file"""
diff --git a/utils/models.py b/utils/models.py
@@ -0,0 +1,139 @@
+"""
+A model for ML Models and a function to load them
+
+Author: Cody Lewis
+"""
+
+from abc import abstractmethod
+
+import torch.nn as nn
+import torch.optim as optim
+import torchvision
+
+import utils.errors
+
+
+class Model(nn.Module):
+ def __init__(self, params):
+ super().__init__()
+ self.params = params
+ self.lr = params['learning_rate'][0]
+ self.learning_rates = params['learning_rate'].copy()
+ del self.learning_rates[0]
+ self.lr_changes = params['lr_changes'].copy()
+ self.epoch_count = 0
+
+ @abstractmethod
+ def forward(self, *x):
+ pass
+
+ def fit(self, data, epochs=1, verbose=True):
+ """
+ Fit the model for some epochs, return history of loss values and the
+ gradients of the changed parameters
+
+ Keyword arguments:
+ x -- training data
+ y -- training labels
+ epochs -- number of epochs to train for
+ verbose -- output training stats if True
+ """
+ optimizer = optim.SGD(
+ self.parameters(),
+ lr=self.lr,
+ momentum=0.9,
+ weight_decay=0.0001
+ )
+ criterion = nn.CrossEntropyLoss()
+ data_count = 0
+ for i in range(epochs):
+ optimizer.zero_grad()
+ x, y = next(iter(data))
+ x = x.to(self.params['device'])
+ y = y.to(self.params['device'])
+ output = self(x)
+ loss = criterion(output, y)
+ if verbose:
+ print(
+ f"Epoch {i + 1}/{epochs} loss: {loss}",
+ end="\r"
+ )
+ loss.backward()
+ optimizer.step()
+ data_count += len(y)
+ self.epoch_count += 1
+ if self.lr_changes and self.epoch_count > self.lr_changes[0]:
+ self.lr = self.learning_rates[0]
+ del self.learning_rates[0]
+ del self.lr_changes[0]
+ if verbose:
+ print()
+ return loss, {
+ "params": [-self.lr * p.grad for p in self.parameters()],
+ "data_count": data_count
+ }
+
+ def get_params(self):
+ """Get the tensor form parameters of this model"""
+ return [p.data for p in self.parameters()]
+
+ def copy_params(self, params):
+ """Copy input parameters into self"""
+ for p, t in zip(params, self.parameters()):
+ t.data.copy_(p)
+
+
+class SoftMaxModel(Model):
+ """The softmax perceptron class"""
+ def __init__(self, params):
+ super().__init__(params)
+ self.features = nn.ModuleList([
+ nn.Linear(
+ params['num_in'], params['num_in'] * params['params_mul']
+ ),
+ nn.Sigmoid(),
+ nn.Linear(
+ params['num_in'] * params['params_mul'], params['num_out']
+ ),
+ nn.Softmax(dim=1)
+ ]).eval()
+
+ def forward(self, x):
+ for feature in self.features:
+ x = feature(x)
+ return x
+
+
+class SqueezeNet(Model):
+ """The SqueezeNet DNN Class"""
+ def __init__(self, params):
+ super().__init__(params)
+ net = torchvision.models.__dict__["squeezenet1_1"](pretrained=True)
+ net.classifier[1] = nn.Conv2d(
+ 512, params['num_out'], kernel_size=(1, 1), stride=(1, 1)
+ )
+ self.features = nn.ModuleList(
+ [f for f in net.features] +
+ [f for f in net.classifier]
+ ).eval()
+ super().copy_params([p.data for p in net.parameters()])
+
+ def forward(self, x):
+ for feature in self.features:
+ x = feature(x)
+ return x.flatten(1)
+
+
+def load_model(params):
+ """Load the model specified in params"""
+ models = {
+ "softmax": SoftMaxModel,
+ "squeeze": SqueezeNet,
+ }
+ model_name = params['architecture']
+ if (chosen_model := models.get(model_name)) is None:
+ raise utils.errors.MisconfigurationError(
+ f"Model '{model_name}' does not exist, " +
+ f"possible options: {set(models.keys())}"
+ )
+ return chosen_model(params)