diff --git a/eval/ip_addr_4Machines.json b/eval/ip_addr_4Machines.json new file mode 100644 index 0000000000000000000000000000000000000000..8b7765a7a3cf576a239f047b0b80d725a4969a0b --- /dev/null +++ b/eval/ip_addr_4Machines.json @@ -0,0 +1,6 @@ +{ + "0": "10.90.41.130", + "1": "10.90.41.131", + "2": "10.90.41.132", + "3": "10.90.41.133" +} \ No newline at end of file diff --git a/eval/ip_addr_5Machines.json b/eval/ip_addr_5Machines.json new file mode 100644 index 0000000000000000000000000000000000000000..55012428f3647b7cccd2c876cb3cf391bfcef898 --- /dev/null +++ b/eval/ip_addr_5Machines.json @@ -0,0 +1,7 @@ +{ + "0": "10.90.41.129", + "1": "10.90.41.130", + "2": "10.90.41.131", + "3": "10.90.41.132", + "4": "10.90.41.133" +} \ No newline at end of file diff --git a/src/decentralizepy/sharing/PartialModel.py b/src/decentralizepy/sharing/PartialModel.py index 69875ada11a65be9fce5dd17cef8eeeb03b6497c..424fdcbf7c0f362d211059dc058f9dc3f82154d9 100644 --- a/src/decentralizepy/sharing/PartialModel.py +++ b/src/decentralizepy/sharing/PartialModel.py @@ -1,6 +1,7 @@ import json import logging import os +from pathlib import Path import numpy import torch @@ -28,6 +29,10 @@ class PartialModel(Sharing): self.alpha = alpha self.dict_ordered = dict_ordered self.communication_round = 0 + self.folder_path = os.path.join( + self.log_dir, "shared_params/{}".format(self.rank) + ) + Path(self.folder_path).mkdir(parents=True, exist_ok=True) def extract_top_gradients(self): logging.info("Summing up gradients") @@ -48,26 +53,20 @@ class PartialModel(Sharing): with torch.no_grad(): _, G_topk = self.extract_top_gradients() - if self.communication_round: - with open( - os.path.join( - self.log_dir, "{}_shared_params.json".format(self.rank) - ), - "r", - ) as inf: - shared_params = json.load(inf) - else: - shared_params = dict() - shared_params["order"] = list(self.model.state_dict().keys()) - shapes = dict() - for k, v in self.model.state_dict().items(): - shapes[k] = list(v.shape) - shared_params["shapes"] = shapes + shared_params = dict() + shared_params["order"] = list(self.model.state_dict().keys()) + shapes = dict() + for k, v in self.model.state_dict().items(): + shapes[k] = list(v.shape) + shared_params["shapes"] = shapes shared_params[self.communication_round] = G_topk.tolist() with open( - os.path.join(self.log_dir, "{}_shared_params.json".format(self.rank)), + os.path.join( + self.folder_path, + "{}_shared_params.json".format(self.communication_round + 1), + ), "w", ) as of: json.dump(shared_params, of)