From d05d893442dda3065acf154078f224112b3d2748 Mon Sep 17 00:00:00 2001
From: Rishi Sharma <rishi.sharma@epfl.ch>
Date: Thu, 10 Feb 2022 12:42:07 +0100
Subject: [PATCH] Bugfix ChangeAccumulator

---
 eval/plot.py                                  | 14 +++++++++++--
 src/decentralizepy/node/Node.py               |  9 +++++++-
 .../training/ChangeAccumulator.py             | 21 ++++++++++++++++++-
 .../training/GradientAccumulator.py           | 21 ++++++++++++++++++-
 src/decentralizepy/training/Training.py       | 12 +++++++++++
 5 files changed, 72 insertions(+), 5 deletions(-)

diff --git a/eval/plot.py b/eval/plot.py
index d15db46..d3c3a39 100644
--- a/eval/plot.py
+++ b/eval/plot.py
@@ -71,11 +71,21 @@ def plot_results(path):
         plot(means, stdevs, mins, maxs, "Testing Accuracy", folder, "lower right")
         plt.figure(6)
         means, stdevs, mins, maxs = get_stats([x["grad_std"] for x in results])
-        plot(means, stdevs, mins, maxs, "Gradient Variation over Nodes", folder, "upper right")
+        plot(
+            means,
+            stdevs,
+            mins,
+            maxs,
+            "Gradient Variation over Nodes",
+            folder,
+            "upper right",
+        )
         # Plot Testing loss
         plt.figure(7)
         means, stdevs, mins, maxs = get_stats([x["grad_mean"] for x in results])
-        plot(means, stdevs, mins, maxs, "Gradient Magnitude Mean", folder, "upper right")
+        plot(
+            means, stdevs, mins, maxs, "Gradient Magnitude Mean", folder, "upper right"
+        )
         # Collect total_bytes shared
         bytes_list = []
         for x in results:
diff --git a/src/decentralizepy/node/Node.py b/src/decentralizepy/node/Node.py
index 24c3168..e7aa916 100644
--- a/src/decentralizepy/node/Node.py
+++ b/src/decentralizepy/node/Node.py
@@ -177,7 +177,14 @@ class Node:
             ],
         )
         self.trainer = train_class(
-            self.model, self.optimizer, self.loss, self.log_dir, **train_params
+            self.rank,
+            self.machine_id,
+            self.mapping,
+            self.model,
+            self.optimizer,
+            self.loss,
+            self.log_dir,
+            **train_params
         )
 
     def init_comm(self, comm_configs):
diff --git a/src/decentralizepy/training/ChangeAccumulator.py b/src/decentralizepy/training/ChangeAccumulator.py
index cf9641f..2e10572 100644
--- a/src/decentralizepy/training/ChangeAccumulator.py
+++ b/src/decentralizepy/training/ChangeAccumulator.py
@@ -16,6 +16,9 @@ class ChangeAccumulator(Training):
 
     def __init__(
         self,
+        rank,
+        machine_id,
+        mapping,
         model,
         optimizer,
         loss,
@@ -31,6 +34,12 @@ class ChangeAccumulator(Training):
 
         Parameters
         ----------
+        rank : int
+            Rank of process local to the machine
+        machine_id : int
+            Machine ID on which the process in running
+        mapping : decentralizepy.mappings
+            The object containing the mapping rank <--> uid
         model : torch.nn.Module
             Neural Network for training
         optimizer : torch.optim
@@ -52,7 +61,17 @@ class ChangeAccumulator(Training):
 
         """
         super().__init__(
-            model, optimizer, loss, log_dir, rounds, full_epochs, batch_size, shuffle
+            rank,
+            machine_id,
+            mapping,
+            model,
+            optimizer,
+            loss,
+            log_dir,
+            rounds,
+            full_epochs,
+            batch_size,
+            shuffle,
         )
         self.save_accumulated = conditional_value(save_accumulated, "", True)
         self.communication_round = 0
diff --git a/src/decentralizepy/training/GradientAccumulator.py b/src/decentralizepy/training/GradientAccumulator.py
index 3171019..fcff8e6 100644
--- a/src/decentralizepy/training/GradientAccumulator.py
+++ b/src/decentralizepy/training/GradientAccumulator.py
@@ -11,6 +11,9 @@ class GradientAccumulator(Training):
 
     def __init__(
         self,
+        rank,
+        machine_id,
+        mapping,
         model,
         optimizer,
         loss,
@@ -25,6 +28,12 @@ class GradientAccumulator(Training):
 
         Parameters
         ----------
+        rank : int
+            Rank of process local to the machine
+        machine_id : int
+            Machine ID on which the process in running
+        mapping : decentralizepy.mappings
+            The object containing the mapping rank <--> uid
         model : torch.nn.Module
             Neural Network for training
         optimizer : torch.optim
@@ -44,7 +53,17 @@ class GradientAccumulator(Training):
 
         """
         super().__init__(
-            model, optimizer, loss, log_dir, rounds, full_epochs, batch_size, shuffle
+            rank,
+            machine_id,
+            mapping,
+            model,
+            optimizer,
+            loss,
+            log_dir,
+            rounds,
+            full_epochs,
+            batch_size,
+            shuffle,
         )
 
     def trainstep(self, data, target):
diff --git a/src/decentralizepy/training/Training.py b/src/decentralizepy/training/Training.py
index 47c8f77..3b99bef 100644
--- a/src/decentralizepy/training/Training.py
+++ b/src/decentralizepy/training/Training.py
@@ -13,6 +13,9 @@ class Training:
 
     def __init__(
         self,
+        rank,
+        machine_id,
+        mapping,
         model,
         optimizer,
         loss,
@@ -27,6 +30,12 @@ class Training:
 
         Parameters
         ----------
+        rank : int
+            Rank of process local to the machine
+        machine_id : int
+            Machine ID on which the process in running
+        mapping : decentralizepy.mappings
+            The object containing the mapping rank <--> uid
         model : torch.nn.Module
             Neural Network for training
         optimizer : torch.optim
@@ -49,6 +58,9 @@ class Training:
         self.optimizer = optimizer
         self.loss = loss
         self.log_dir = log_dir
+        self.rank = rank
+        self.machine_id = machine_id
+        self.mapping = mapping
         self.rounds = utils.conditional_value(rounds, "", int(1))
         self.full_epochs = utils.conditional_value(full_epochs, "", False)
         self.batch_size = utils.conditional_value(batch_size, "", int(1))
-- 
GitLab