import json import os import sys import numpy as np import pandas as pd import torch from matplotlib import pyplot as plt def get_stats(l): assert len(l) > 0 mean_dict, stdev_dict, min_dict, max_dict = {}, {}, {}, {} for key in l[0].keys(): all_nodes = [i[key] for i in l] all_nodes = np.array(all_nodes) mean = np.mean(all_nodes) std = np.std(all_nodes) min = np.min(all_nodes) max = np.max(all_nodes) mean_dict[int(key)] = mean stdev_dict[int(key)] = std min_dict[int(key)] = min max_dict[int(key)] = max return mean_dict, stdev_dict, min_dict, max_dict def plot(means, stdevs, mins, maxs, title, label, loc): plt.title(title) plt.xlabel("communication rounds") x_axis = list(means.keys()) y_axis = list(means.values()) err = list(stdevs.values()) plt.errorbar(x_axis, y_axis, yerr=err, label=label) plt.legend(loc=loc) def plot_results(path): """ plots the percentiles Based on plot.py Parameters ---------- path path to the folders from which to create the percentiles plots """ folders = os.listdir(path) folders.sort() print("Reading folders from: ", path) print("Folders: ", folders) for folder in folders: folder_path = os.path.join(path, folder) if not os.path.isdir(folder_path): continue results = [] all_shared_params = [] machine_folders = os.listdir(folder_path) for machine_folder in machine_folders: mf_path = os.path.join(folder_path, machine_folder) if not os.path.isdir(mf_path): continue files = os.listdir(mf_path) shared_params = [f for f in files if f.endswith("_shared_parameters.json")] files = [f for f in files if f.endswith("_results.json")] for f in files: filepath = os.path.join(mf_path, f) with open(filepath, "r") as inf: results.append(json.load(inf)) for sp in shared_params: filepath = os.path.join(mf_path, sp) with open(filepath, "r") as spf: all_shared_params.append(np.array(json.load(spf), dtype=np.int32)) # Plot Training loss plt.figure(1) # Average of the shared parameters mean = np.mean(all_shared_params, axis=0) std = np.std(all_shared_params, axis=0) with open( os.path.join(path, "shared_params_avg_" + folder + ".json"), "w" ) as mf: json.dump(mean.tolist(), mf) with open( os.path.join(path, "shared_params_std_" + folder + ".json"), "w" ) as sf: json.dump(std.tolist(), sf) # copy jupyter notebook code percentile = np.percentile(mean, np.arange(0, 100, 1)) plt.plot(np.arange(0, 100, 1), percentile, label=folder) plt.title("Shared parameters Percentiles") # plt.ylabel("Absolute frequency value") plt.xlabel("Percentiles") plt.xticks(np.arange(0, 110, 10)) plt.legend(loc="lower right") plt.figure(2) sort = torch.sort(torch.tensor(mean)).values print(sort) length = sort.shape[0] length = int(length / 20) bins = [ torch.sum(sort[length * i : length * (i + 1)]).item() for i in range(20) ] total = np.sum(bins) perc = bins / total # np.divide(bins, total) print(perc) plt.bar(np.arange(0, 97.5, 5), perc, width=5, align="edge", label=folder) plt.title("Shared parameters Percentiles") # plt.ylabel("Absolute frequency value") plt.xlabel("Percentiles") plt.legend(loc="lower right") plt.savefig(os.path.join(path, f"percentiles_histogram_{folder}.png"), dpi=300) plt.clf() plt.cla() plt.figure(1) plt.savefig(os.path.join(path, "percentiles.png"), dpi=300) if __name__ == "__main__": assert len(sys.argv) == 2 plot_results(sys.argv[1])