From dcbe550ab8648203c445ceb165e094836d4430d8 Mon Sep 17 00:00:00 2001 From: Rishi Sharma <rishi.sharma@epfl.ch> Date: Thu, 20 Jan 2022 13:00:56 +0100 Subject: [PATCH] Modularize node, add step training --- eval/{ => epoch_configs}/config_celeba.ini | 3 +- .../{ => epoch_configs}/config_celeba_100.ini | 3 +- .../config_celeba_grow.ini | 3 +- eval/{ => epoch_configs}/config_femnist.ini | 3 +- .../config_femnist_100.ini | 3 +- .../config_femnist_grow.ini | 3 +- eval/ip_addr_7Machines.json | 9 +++++ eval/step_configs/config_celeba.ini | 34 +++++++++++++++++ eval/step_configs/config_celeba_100.ini | 34 +++++++++++++++++ eval/step_configs/config_celeba_grow.ini | 38 +++++++++++++++++++ eval/step_configs/config_femnist.ini | 33 ++++++++++++++++ eval/step_configs/config_femnist_100.ini | 33 ++++++++++++++++ eval/step_configs/config_femnist_grow.ini | 37 ++++++++++++++++++ 13 files changed, 230 insertions(+), 6 deletions(-) rename eval/{ => epoch_configs}/config_celeba.ini (96%) rename eval/{ => epoch_configs}/config_celeba_100.ini (96%) rename eval/{ => epoch_configs}/config_celeba_grow.ini (96%) rename eval/{ => epoch_configs}/config_femnist.ini (96%) rename eval/{ => epoch_configs}/config_femnist_100.ini (96%) rename eval/{ => epoch_configs}/config_femnist_grow.ini (96%) create mode 100644 eval/ip_addr_7Machines.json create mode 100644 eval/step_configs/config_celeba.ini create mode 100644 eval/step_configs/config_celeba_100.ini create mode 100644 eval/step_configs/config_celeba_grow.ini create mode 100644 eval/step_configs/config_femnist.ini create mode 100644 eval/step_configs/config_femnist_100.ini create mode 100644 eval/step_configs/config_femnist_grow.ini diff --git a/eval/config_celeba.ini b/eval/epoch_configs/config_celeba.ini similarity index 96% rename from eval/config_celeba.ini rename to eval/epoch_configs/config_celeba.ini index 61d1b26..4087828 100644 --- a/eval/config_celeba.ini +++ b/eval/epoch_configs/config_celeba.ini @@ -17,7 +17,8 @@ lr = 0.001 [TRAIN_PARAMS] training_package = decentralizepy.training.GradientAccumulator training_class = GradientAccumulator -epochs_per_round = 5 +rounds = 5 +full_epochs = True batch_size = 512 shuffle = True loss_package = torch.nn diff --git a/eval/config_celeba_100.ini b/eval/epoch_configs/config_celeba_100.ini similarity index 96% rename from eval/config_celeba_100.ini rename to eval/epoch_configs/config_celeba_100.ini index dcaff4f..910fb2d 100644 --- a/eval/config_celeba_100.ini +++ b/eval/epoch_configs/config_celeba_100.ini @@ -17,7 +17,8 @@ lr = 0.001 [TRAIN_PARAMS] training_package = decentralizepy.training.Training training_class = Training -epochs_per_round = 5 +rounds = 5 +full_epochs = True batch_size = 512 shuffle = True loss_package = torch.nn diff --git a/eval/config_celeba_grow.ini b/eval/epoch_configs/config_celeba_grow.ini similarity index 96% rename from eval/config_celeba_grow.ini rename to eval/epoch_configs/config_celeba_grow.ini index 5ac10c1..28eaaaa 100644 --- a/eval/config_celeba_grow.ini +++ b/eval/epoch_configs/config_celeba_grow.ini @@ -17,7 +17,8 @@ lr = 0.001 [TRAIN_PARAMS] training_package = decentralizepy.training.GradientAccumulator training_class = GradientAccumulator -epochs_per_round = 5 +rounds = 5 +full_epochs = True batch_size = 512 shuffle = True loss_package = torch.nn diff --git a/eval/config_femnist.ini b/eval/epoch_configs/config_femnist.ini similarity index 96% rename from eval/config_femnist.ini rename to eval/epoch_configs/config_femnist.ini index ec213c7..63ad462 100644 --- a/eval/config_femnist.ini +++ b/eval/epoch_configs/config_femnist.ini @@ -16,7 +16,8 @@ lr = 0.001 [TRAIN_PARAMS] training_package = decentralizepy.training.GradientAccumulator training_class = GradientAccumulator -epochs_per_round = 5 +rounds = 5 +full_epochs = True batch_size = 1024 shuffle = True loss_package = torch.nn diff --git a/eval/config_femnist_100.ini b/eval/epoch_configs/config_femnist_100.ini similarity index 96% rename from eval/config_femnist_100.ini rename to eval/epoch_configs/config_femnist_100.ini index c3f0ae2..63c7f4f 100644 --- a/eval/config_femnist_100.ini +++ b/eval/epoch_configs/config_femnist_100.ini @@ -16,7 +16,8 @@ lr = 0.001 [TRAIN_PARAMS] training_package = decentralizepy.training.Training training_class = Training -epochs_per_round = 5 +rounds = 5 +full_epochs = True batch_size = 1024 shuffle = True loss_package = torch.nn diff --git a/eval/config_femnist_grow.ini b/eval/epoch_configs/config_femnist_grow.ini similarity index 96% rename from eval/config_femnist_grow.ini rename to eval/epoch_configs/config_femnist_grow.ini index b9b0abe..e91b3aa 100644 --- a/eval/config_femnist_grow.ini +++ b/eval/epoch_configs/config_femnist_grow.ini @@ -16,7 +16,8 @@ lr = 0.001 [TRAIN_PARAMS] training_package = decentralizepy.training.GradientAccumulator training_class = GradientAccumulator -epochs_per_round = 5 +rounds = 5 +full_epochs = True batch_size = 1024 shuffle = True loss_package = torch.nn diff --git a/eval/ip_addr_7Machines.json b/eval/ip_addr_7Machines.json new file mode 100644 index 0000000..889afa0 --- /dev/null +++ b/eval/ip_addr_7Machines.json @@ -0,0 +1,9 @@ +{ + "0": "10.90.41.127", + "1": "10.90.41.128", + "2": "10.90.41.129", + "3": "10.90.41.130", + "4": "10.90.41.131", + "5": "10.90.41.132", + "6": "10.90.41.133" +} \ No newline at end of file diff --git a/eval/step_configs/config_celeba.ini b/eval/step_configs/config_celeba.ini new file mode 100644 index 0000000..d58052e --- /dev/null +++ b/eval/step_configs/config_celeba.ini @@ -0,0 +1,34 @@ +[DATASET] +dataset_package = decentralizepy.datasets.Celeba +dataset_class = Celeba +model_class = CNN +n_procs = 96 +images_dir = /home/risharma/leaf/data/celeba/data/raw/img_align_celeba +train_dir = /home/risharma/leaf/data/celeba/per_user_data/train +test_dir = /home/risharma/leaf/data/celeba/data/test +; python list of fractions below +sizes = + +[OPTIMIZER_PARAMS] +optimizer_package = torch.optim +optimizer_class = Adam +lr = 0.001 + +[TRAIN_PARAMS] +training_package = decentralizepy.training.GradientAccumulator +training_class = GradientAccumulator +rounds = 20 +full_epochs = False +batch_size = 64 +shuffle = True +loss_package = torch.nn +loss_class = CrossEntropyLoss + +[COMMUNICATION] +comm_package = decentralizepy.communication.TCP +comm_class = TCP +addresses_filepath = ip_addr_6Machines.json + +[SHARING] +sharing_package = decentralizepy.sharing.PartialModel +sharing_class = PartialModel diff --git a/eval/step_configs/config_celeba_100.ini b/eval/step_configs/config_celeba_100.ini new file mode 100644 index 0000000..2e351a4 --- /dev/null +++ b/eval/step_configs/config_celeba_100.ini @@ -0,0 +1,34 @@ +[DATASET] +dataset_package = decentralizepy.datasets.Celeba +dataset_class = Celeba +model_class = CNN +n_procs = 96 +images_dir = /home/risharma/leaf/data/celeba/data/raw/img_align_celeba +train_dir = /home/risharma/leaf/data/celeba/per_user_data/train +test_dir = /home/risharma/leaf/data/celeba/data/test +; python list of fractions below +sizes = + +[OPTIMIZER_PARAMS] +optimizer_package = torch.optim +optimizer_class = Adam +lr = 0.001 + +[TRAIN_PARAMS] +training_package = decentralizepy.training.Training +training_class = Training +rounds = 20 +full_epochs = False +batch_size = 64 +shuffle = True +loss_package = torch.nn +loss_class = CrossEntropyLoss + +[COMMUNICATION] +comm_package = decentralizepy.communication.TCP +comm_class = TCP +addresses_filepath = ip_addr_6Machines.json + +[SHARING] +sharing_package = decentralizepy.sharing.Sharing +sharing_class = Sharing diff --git a/eval/step_configs/config_celeba_grow.ini b/eval/step_configs/config_celeba_grow.ini new file mode 100644 index 0000000..a9cd92c --- /dev/null +++ b/eval/step_configs/config_celeba_grow.ini @@ -0,0 +1,38 @@ +[DATASET] +dataset_package = decentralizepy.datasets.Celeba +dataset_class = Celeba +model_class = CNN +n_procs = 96 +images_dir = /home/risharma/leaf/data/celeba/data/raw/img_align_celeba +train_dir = /home/risharma/leaf/data/celeba/per_user_data/train +test_dir = /home/risharma/leaf/data/celeba/data/test +; python list of fractions below +sizes = + +[OPTIMIZER_PARAMS] +optimizer_package = torch.optim +optimizer_class = Adam +lr = 0.001 + +[TRAIN_PARAMS] +training_package = decentralizepy.training.GradientAccumulator +training_class = GradientAccumulator +rounds = 20 +full_epochs = False +batch_size = 64 +shuffle = True +loss_package = torch.nn +loss_class = CrossEntropyLoss + +[COMMUNICATION] +comm_package = decentralizepy.communication.TCP +comm_class = TCP +addresses_filepath = ip_addr_6Machines.json + +[SHARING] +sharing_package = decentralizepy.sharing.GrowingAlpha +sharing_class = GrowingAlpha +init_alpha=0.10 +max_alpha=0.75 +k=6 +metadata_cap=0.65 diff --git a/eval/step_configs/config_femnist.ini b/eval/step_configs/config_femnist.ini new file mode 100644 index 0000000..0bd4a55 --- /dev/null +++ b/eval/step_configs/config_femnist.ini @@ -0,0 +1,33 @@ +[DATASET] +dataset_package = decentralizepy.datasets.Femnist +dataset_class = Femnist +model_class = CNN +n_procs = 16 +train_dir = /home/risharma/leaf/data/femnist/per_user_data/train +test_dir = /home/risharma/leaf/data/femnist/data/test +; python list of fractions below +sizes = + +[OPTIMIZER_PARAMS] +optimizer_package = torch.optim +optimizer_class = Adam +lr = 0.001 + +[TRAIN_PARAMS] +training_package = decentralizepy.training.GradientAccumulator +training_class = GradientAccumulator +rounds = 20 +full_epochs = False +batch_size = 64 +shuffle = True +loss_package = torch.nn +loss_class = CrossEntropyLoss + +[COMMUNICATION] +comm_package = decentralizepy.communication.TCP +comm_class = TCP +addresses_filepath = ip_addr_6Machines.json + +[SHARING] +sharing_package = decentralizepy.sharing.PartialModel +sharing_class = PartialModel diff --git a/eval/step_configs/config_femnist_100.ini b/eval/step_configs/config_femnist_100.ini new file mode 100644 index 0000000..aafe7d6 --- /dev/null +++ b/eval/step_configs/config_femnist_100.ini @@ -0,0 +1,33 @@ +[DATASET] +dataset_package = decentralizepy.datasets.Femnist +dataset_class = Femnist +model_class = CNN +n_procs = 16 +train_dir = /home/risharma/leaf/data/femnist/per_user_data/train +test_dir = /home/risharma/leaf/data/femnist/data/test +; python list of fractions below +sizes = + +[OPTIMIZER_PARAMS] +optimizer_package = torch.optim +optimizer_class = Adam +lr = 0.001 + +[TRAIN_PARAMS] +training_package = decentralizepy.training.Training +training_class = Training +rounds = 20 +full_epochs = False +batch_size = 64 +shuffle = True +loss_package = torch.nn +loss_class = CrossEntropyLoss + +[COMMUNICATION] +comm_package = decentralizepy.communication.TCP +comm_class = TCP +addresses_filepath = ip_addr_7Machines.json + +[SHARING] +sharing_package = decentralizepy.sharing.Sharing +sharing_class = Sharing diff --git a/eval/step_configs/config_femnist_grow.ini b/eval/step_configs/config_femnist_grow.ini new file mode 100644 index 0000000..018cdc7 --- /dev/null +++ b/eval/step_configs/config_femnist_grow.ini @@ -0,0 +1,37 @@ +[DATASET] +dataset_package = decentralizepy.datasets.Femnist +dataset_class = Femnist +model_class = CNN +n_procs = 16 +train_dir = /home/risharma/leaf/data/femnist/per_user_data/train +test_dir = /home/risharma/leaf/data/femnist/data/test +; python list of fractions below +sizes = + +[OPTIMIZER_PARAMS] +optimizer_package = torch.optim +optimizer_class = Adam +lr = 0.001 + +[TRAIN_PARAMS] +training_package = decentralizepy.training.GradientAccumulator +training_class = GradientAccumulator +rounds = 20 +full_epochs = False +batch_size = 64 +shuffle = True +loss_package = torch.nn +loss_class = CrossEntropyLoss + +[COMMUNICATION] +comm_package = decentralizepy.communication.TCP +comm_class = TCP +addresses_filepath = ip_addr_6Machines.json + +[SHARING] +sharing_package = decentralizepy.sharing.GrowingAlpha +sharing_class = GrowingAlpha +init_alpha=0.10 +max_alpha=0.75 +k=8 +metadata_cap=0.65 \ No newline at end of file -- GitLab