viceroy

git clone git://git.codymlewis.com/viceroy.git
Log | Files | Refs | README

commit 06cee6e59611f8a26dc3f340921fb66f9511d8d0
parent c5a5e56303d6d233df5ea6df76f312dba90fd327
Author: Cody Lewis <cody@codymlewis.com>
Date:   Fri, 20 Nov 2020 14:24:50 +1100

Changed results to be a collection of confusion matrices

Diffstat:
M.gitignore | 2+-
Mmain.py | 35++++++++++++++++++++++++-----------
Moptions.json | 4++--
Dplot.R | 116-------------------------------------------------------------------------------
Aplot.py | 70++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dplot.sh | 4----
Mserver.py | 33+++++++++++++++++++++++----------
Mutils.py | 84++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
8 files changed, 182 insertions(+), 166 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -5,5 +5,5 @@ Session.vim tags data data/* -results.log +results.pt *.png diff --git a/main.py b/main.py @@ -9,6 +9,7 @@ Author: Cody Lewis import random import torch +import numpy as np from adversaries import load_adversary from client import Client @@ -88,7 +89,7 @@ if __name__ == '__main__': print("Assigned class shards:") print(class_shards) print() - experiment_stats = {"accuracies": [], "attack_successes": []} + sim_confusion_matrices = torch.tensor([], dtype=int) for i in range(options.num_sims): print(f"Simulation {i + 1}/{options.num_sims}") server = Server( @@ -105,21 +106,32 @@ if __name__ == '__main__': ] ) print("Starting training...") - accuracies, attack_successes = server.fit( + confusion_matrices = server.fit( val_data['dataloader'], options.server_epochs ) - experiment_stats['accuracies'].append(accuracies) - experiment_stats['attack_successes'].append(attack_successes) + sim_confusion_matrices = torch.cat( + (sim_confusion_matrices, confusion_matrices.unsqueeze(dim=0)) + ) if options.verbosity > 0: print("Done training.") criterion = torch.nn.CrossEntropyLoss() - stats = utils.find_stats( - server.net, train_data['dataloader'], criterion, options + loss, conf_mat = utils.gen_confusion_matrix( + server.net, + train_data['dataloader'], + criterion, + server.nb_classes, + options ) - stats_val = utils.find_stats( - server.net, val_data['dataloader'], criterion, options + stats = utils.gen_conf_stats(conf_mat, options) + loss_val, conf_mat = utils.gen_confusion_matrix( + server.net, + val_data['dataloader'], + criterion, + server.nb_classes, + options ) - print(f"Loss: t: {stats['loss']}, v: {stats_val['loss']}") + stats_val = utils.gen_conf_stats(conf_mat, options) + print(f"Loss: t: {loss}, v: {loss_val}") print(f"Accuracy: t: {stats['accuracy'] * 100}%, ", end="") print(f"v: {stats_val['accuracy'] * 100}%") print( @@ -129,8 +141,9 @@ if __name__ == '__main__': print(f"v: {stats_val['attack_success'] * 100}%") print() if options.verbosity > 0: - print(f"Writing averaged results to {options.result_log_file}...") - utils.write_log(options.result_log_file, experiment_stats) + print(f"Writing confusion matrices to {options.result_file}...") + utils.write_results(options.result_file, sim_confusion_matrices) + # print(utils.gen_experiment_stats(sim_confusion_matrices, options)) if options.verbosity > 0: print("Done.") except errors.MisconfigurationError as e: diff --git a/options.json b/options.json @@ -1,6 +1,6 @@ { "dataset": "kddcup99", - "num_sims": 1, + "num_sims": 5, "server_epochs": 300, "user_epochs": 1, "users": 23, @@ -27,5 +27,5 @@ "class_shards": [[0, 22], [1, 21], [2, 20], [3, 19], [4, 18], [5, 17], [6, 16], [7, 15], [8, 14], [9, 13], [10, 12], [11, 11], [12, 1], [13, 10], [14, 7], [15, 11], [16, 9], [17, 13], [18, 6], [19, 21], [20, 3], [21, 14], [22, 19]], "classes_per_user": 2, "verbosity": 1, - "result_log_file": "./results.log" + "result_file": "./results.pt" } diff --git a/plot.R b/plot.R @@ -1,116 +0,0 @@ -#!/usr/bin/env Rscript - -library(ggplot2) -library(jsonlite) -library(stringr) -library(itertools) - - -main <- function(results) { - options <- fromJSON("options.json") - attack <- `if`(options$adversaries$type == "on off", - sprintf( - "On-Off Attack with %d Epoch Toggle", - options$adversaries$toggle_time - ), - sprintf( - "%s Attack", - str_to_title(options$adversaries$type) - ) - ) - attack <- `if`(options$adversaries$percent_adv > 0, - sprintf( - "%d%% %d->%d %s", - options$adversaries$percent_adv * 100, - options$adversaries$from, - options$adversaries$to, - attack - ), - "No Attack" - ) - title <- sprintf( - "Performance of %s under %s", - str_to_title(options$fit_fun), - attack - ) - df <- read.csv(results) - df$epoch <- 1:length(df$accuracy) - gp <- ggplot(data=df, aes(x=epoch)) - if(options$adversaries$percent_adv > 0 && - options$adversaries$type == "on off") { - vals <- c(0) - attacking <- c() - toggles <- recycle(options$adversaries$toggle_times) - while(tail(vals, 1) < length(df$epoch)) { - toggle <- nextElem(toggles) - vals <- c(vals, tail(vals, 1) + toggle) - attacking <- c(attacking, rep( - `if`( - length(attacking) == 0, - 0, - `if`(tail(attacking, 1) == 1, 0, 1) - ), - toggle - )) - } - vals <- c(vals[c(-1, -length(vals))], length(df$epoch)) - start <- df$epoch[attacking == 0] - end <- df$epoch[attacking == 1] - ids <- c(1:length(vals) %% 2, `if`(length(vals) %% 2 == 1, 0, NULL)) - rects <- data.frame( - start=vals[ids == 1], - end=vals[ids == 0], - group=rep(c("on", "off"), length(ids) / 2) - ) - gp <- gp + geom_rect( - data=rects, - inherit.aes=FALSE, - aes( - xmin=start, - xmax=end, - ymin=0, - ymax=1, - ), - color="transparent", - fill="orange", - alpha=0.3 - ) - } - gp + - `if`(options$adversaries$percent_adv > 0, - geom_line( - aes( - y=attack_success, - colour="Attack Success Rate" - ) - ), - NULL - ) + - geom_line(aes(y=accuracy, colour="Accuracy")) + - labs( - title=title, - x="Epochs", - y="Rate", - colour=NULL - ) + - scale_y_continuous(lim=c(0,1)) + - theme( - legend.position="bottom", - plot.title=element_text(size=11) - ) - plot_img <- str_replace_all( - sprintf( - "%s %d %s.png", - options$fit_fun, - options$adversaries$percent_adv * 100, - options$adversaries$type), - " ", - "_" - ) - ggsave(plot_img) - return(plot_img) -} - -RESULTS <- "results.log" -PLOT <- main(RESULTS) -cat(sprintf("Done. Saved plot to %s\n", PLOT)) diff --git a/plot.py b/plot.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + + +from itertools import cycle + +import matplotlib.pyplot as plt +from matplotlib.patches import Rectangle + +import torch + +import utils + + +if __name__ == '__main__': + print("Calculating statistics and generating plot...") + options = utils.load_options() + sim_confusion_matrices = torch.load(options.result_file) + stats = utils.gen_experiment_stats(sim_confusion_matrices, options) + epochs = torch.arange(len(stats['accuracy'][0])) + fig, ax = plt.subplots() + if options.adversaries['percent_adv'] > 0: + if options.adversaries['type'] == "on off": + title = "On-Off Attack with {} Epoch Toggle".format( + options.adversaries['toggle_times'] + ) + i = 0 + toggles = cycle(options.adversaries['toggle_times']) + on = False + nb_epochs = max(epochs) + while i < nb_epochs: + toggle = next(toggles) + if on: + rect_width = min(nb_epochs - i, toggle) + ax.add_patch( + Rectangle( + (i, 0), + rect_width, + 1, + color="red", + alpha=0.2 + ), + ) + i += toggle + on = not on + else: + title = f"{options.adversaries['type']} Attack" + title = "{}% {}->{} {}".format( + options.adversaries['percent_adv'] * 100, + options.adversaries['from'], + options.adversaries['to'], + title + ) + else: + title = "No Attack" + title = f"Performance of {options.fit_fun} under {title}" + for k in stats.keys(): + ax.plot(epochs, stats[k].mean(dim=0), label=k.replace('_', ' ')) + plt.xlabel("Epochs") + plt.ylabel("Rate") + plt.title(title.title(), fontdict={'fontsize': 8}) + plt.legend(loc=1, fontsize=5, framealpha=0.4) + img_name = "{}_{}_{}_{}.png".format( + options.dataset, + options.fit_fun, + options.adversaries['percent_adv'] * 100, + options.adversaries['type'] + ).replace(' ', '_') + plt.savefig(img_name, dpi=320, metadata={'comment': str(options)}) + print(f"Done. Saved plot as {img_name}") diff --git a/plot.sh b/plot.sh @@ -1,4 +0,0 @@ -#!/bin/sh - -Rscript plot.R && \ - mogrify -comment "$(cat options.json)" plot.png diff --git a/server.py b/server.py @@ -4,6 +4,10 @@ Classes and functions for the server networking aspect of federated learning Author: Cody Lewis """ + +import time + +import torch import torch.nn as nn from global_model import GlobalModel @@ -20,34 +24,43 @@ class Server: ) self.num_clients = 0 self.clients = [] + self.nb_classes = num_out self.options = options def fit(self, dataloader, epochs): - accuracies, attack_successes = [], [] + confusion_matrices = torch.tensor([], dtype=int) criterion = nn.CrossEntropyLoss() for e in range(epochs): + start = time.time() grads = [] for c in self.clients: c.net.copy_params(self.net.get_params()) grads.append(c.fit()[1]) self.net.fit(grads, self.options.params) - stats = utils.find_stats( - self.net, dataloader, criterion, self.options + loss, confusion_matrix = utils.gen_confusion_matrix( + self.net, + dataloader, + criterion, + self.nb_classes, + self.options + ) + confusion_matrices = torch.cat( + (confusion_matrices, confusion_matrix.unsqueeze(dim=0)) ) - accuracies.append(stats['accuracy']) - attack_successes.append(stats['attack_success']) + stats = utils.gen_conf_stats(confusion_matrix, self.options) if self.options.verbosity > 0: print( - f"Epoch: {e + 1}/{epochs}, " + - f"Loss: {stats['loss']:.6f}, " + - f"Accuracy: {stats['accuracy']:.6f}, " + - f"Attack Success Rate: {stats['attack_success']:.6f}", + f"[ E: {e + 1}/{epochs}, " + + f"L: {loss:.6f}, " + + f"Acc: {stats['accuracy']:.6f}, " + + f"ASR: {stats['attack_success']:.6f}, " + + f"T: {time.time() - start:.6f}s ]", end="\r" if self.options.verbosity < 2 else "\n" ) del grads if self.options.verbosity > 0: print() - return accuracies, attack_successes + return confusion_matrices def add_clients(self, clients): """Add clients to the server""" diff --git a/utils.py b/utils.py @@ -8,38 +8,74 @@ from typing import NamedTuple import json import torch +from sklearn.metrics import confusion_matrix as cm import numpy as np -def find_stats(model, dataloader, criterion, options): - """Find statistics on the model based on validation data""" - denom = 0 - as_denom = 0 - accuracy = 0 - attack_success = 0 - loss = 0 - for x, y in dataloader: - with torch.no_grad(): +def gen_confusion_matrix(model, dataloader, criterion, nb_classes, options): + with torch.no_grad(): + loss = 0 + denom = 0 + confusion_matrix = torch.zeros(nb_classes, nb_classes, dtype=int) + for x, y in dataloader: x = x.to(options.model_params['device']) y = y.to(options.model_params['device']) predictions = model.predict(x) loss += criterion(predictions, y) - predictions = torch.argmax(predictions, dim=1) - accuracy += (predictions == y).sum().item() - ids = y == options.adversaries['from'] - attack_success += ( - predictions[ids] == options.adversaries['to'] - ).sum().item() denom += len(y) - as_denom += ids.sum().item() + predictions = torch.argmax(predictions, dim=1) + confusion_matrix += torch.from_numpy( + cm(predictions.cpu(), y.cpu(), labels=np.arange(nb_classes)) + ) + return loss / denom * 100, confusion_matrix + + +def gen_conf_stats(confusion_matrix, options): + accuracy = 0 + total = 0 + attack_success_n = 0 + attack_success_d = 0 + for x, row in enumerate(confusion_matrix): + for y, cell in enumerate(row): + cell = int(cell) + if x == y: + accuracy += cell + if y == options.adversaries['from']: + if x == options.adversaries['to']: + attack_success_n += cell + attack_success_d += cell + total += cell + f = lambda x, y: x / y if y > 0 else 0 return { - "accuracy": accuracy / denom, - "attack_success": attack_success / as_denom, - "loss": loss / denom * 100, + "accuracy": f(accuracy, total), + "attack_success": f(attack_success_n, attack_success_d) } +def gen_experiment_stats(sim_confusion_matrices, options): + stats = merge_dicts( + [gen_sim_stats(c, options) for c in sim_confusion_matrices] + ) + for k, v in stats.items(): + stats[k] = torch.tensor(v) + return stats + + +def gen_sim_stats(confusion_matrices, options): + return merge_dicts( + [gen_conf_stats(c, options) for c in confusion_matrices] + ) + + +def merge_dicts(dict_list): + merged = {k: [] for k in dict_list[0].keys()} + for d in dict_list: + for k, v in d.items(): + merged[k].append(v) + return merged + + def flatten_grads(grads, params): """Flatten gradients into vectors""" with torch.no_grad(): @@ -70,6 +106,10 @@ def write_log(log_file_name, stats): f.write(f"{i},{a},{b}\n") +def write_results(result_file, confusion_matrices): + torch.save(confusion_matrices, result_file) + + class Options(NamedTuple): """Structure out the data from the options file""" dataset: str @@ -84,7 +124,7 @@ class Options(NamedTuple): class_shards: list classes_per_user: int verbosity: int - result_log_file: str + result_file: str def __str__(self): new_line = '\n' @@ -97,7 +137,7 @@ class Options(NamedTuple): Dataset: {self.dataset} Number of simulations: {self.num_sims} Verbosity: {self.verbosity} -Log file: {self.result_log_file} +Results file: {self.result_file} -----[ Model ]----- {new_line.join([f"{k}: {v}" for k, v in self.model_params.items()])} @@ -138,6 +178,6 @@ def load_options(): options['class_shards'], options['classes_per_user'], options['verbosity'], - options['result_log_file'] + options['result_file'] ) return None