From 39f344415c5df3d751feb47d8a83155b14c37e40 Mon Sep 17 00:00:00 2001 From: Rishi Sharma <33762894+rishi-s8@users.noreply.github.com> Date: Mon, 14 Feb 2022 17:19:38 +0100 Subject: [PATCH] Add script and run.sh --- README.rst | 9 +++++++++ eval/epoch_configs/config_celeba.ini | 4 ++-- eval/epoch_configs/config_celeba_100.ini | 4 ++-- eval/epoch_configs/config_celeba_grow.ini | 4 ++-- eval/epoch_configs/config_femnist.ini | 4 ++-- eval/epoch_configs/config_femnist_100.ini | 4 ++-- eval/epoch_configs/config_femnist_grow.ini | 4 ++-- 7 files changed, 21 insertions(+), 12 deletions(-) diff --git a/README.rst b/README.rst index 5acb1c4..7a77286 100644 --- a/README.rst +++ b/README.rst @@ -20,6 +20,15 @@ Setting up decentralizepy pip3 install --editable .\[dev\] +---------------- +Running the code +---------------- + +* Choose and modify one of the config files in ``eval/{step,epoch}_configs``. +* Modify the dataset paths and ``addresses_filepath`` in the config file. +* In eval/run.sh, modify ``first_machine`` (used to calculate machine_id of all machines), ``original_config``, and other arguments as required. +* Execute eval/run.sh on all the machines simultaneously. There is a synchronization barrier mechanism at the start so that all processes start training together. + Node ---- * The Manager. Optimizations at process level. diff --git a/eval/epoch_configs/config_celeba.ini b/eval/epoch_configs/config_celeba.ini index 4087828..782de8a 100644 --- a/eval/epoch_configs/config_celeba.ini +++ b/eval/epoch_configs/config_celeba.ini @@ -17,9 +17,9 @@ lr = 0.001 [TRAIN_PARAMS] training_package = decentralizepy.training.GradientAccumulator training_class = GradientAccumulator -rounds = 5 +rounds = 1 full_epochs = True -batch_size = 512 +batch_size = 64 shuffle = True loss_package = torch.nn loss_class = CrossEntropyLoss diff --git a/eval/epoch_configs/config_celeba_100.ini b/eval/epoch_configs/config_celeba_100.ini index 910fb2d..3a8ea47 100644 --- a/eval/epoch_configs/config_celeba_100.ini +++ b/eval/epoch_configs/config_celeba_100.ini @@ -17,9 +17,9 @@ lr = 0.001 [TRAIN_PARAMS] training_package = decentralizepy.training.Training training_class = Training -rounds = 5 +rounds = 1 full_epochs = True -batch_size = 512 +batch_size = 64 shuffle = True loss_package = torch.nn loss_class = CrossEntropyLoss diff --git a/eval/epoch_configs/config_celeba_grow.ini b/eval/epoch_configs/config_celeba_grow.ini index 28eaaaa..8f0b718 100644 --- a/eval/epoch_configs/config_celeba_grow.ini +++ b/eval/epoch_configs/config_celeba_grow.ini @@ -17,9 +17,9 @@ lr = 0.001 [TRAIN_PARAMS] training_package = decentralizepy.training.GradientAccumulator training_class = GradientAccumulator -rounds = 5 +rounds = 1 full_epochs = True -batch_size = 512 +batch_size = 64 shuffle = True loss_package = torch.nn loss_class = CrossEntropyLoss diff --git a/eval/epoch_configs/config_femnist.ini b/eval/epoch_configs/config_femnist.ini index 63ad462..b16bd9a 100644 --- a/eval/epoch_configs/config_femnist.ini +++ b/eval/epoch_configs/config_femnist.ini @@ -16,9 +16,9 @@ lr = 0.001 [TRAIN_PARAMS] training_package = decentralizepy.training.GradientAccumulator training_class = GradientAccumulator -rounds = 5 +rounds = 1 full_epochs = True -batch_size = 1024 +batch_size = 128 shuffle = True loss_package = torch.nn loss_class = CrossEntropyLoss diff --git a/eval/epoch_configs/config_femnist_100.ini b/eval/epoch_configs/config_femnist_100.ini index 63c7f4f..0f7dd02 100644 --- a/eval/epoch_configs/config_femnist_100.ini +++ b/eval/epoch_configs/config_femnist_100.ini @@ -16,9 +16,9 @@ lr = 0.001 [TRAIN_PARAMS] training_package = decentralizepy.training.Training training_class = Training -rounds = 5 +rounds = 1 full_epochs = True -batch_size = 1024 +batch_size = 128 shuffle = True loss_package = torch.nn loss_class = CrossEntropyLoss diff --git a/eval/epoch_configs/config_femnist_grow.ini b/eval/epoch_configs/config_femnist_grow.ini index e91b3aa..b240c84 100644 --- a/eval/epoch_configs/config_femnist_grow.ini +++ b/eval/epoch_configs/config_femnist_grow.ini @@ -16,9 +16,9 @@ lr = 0.001 [TRAIN_PARAMS] training_package = decentralizepy.training.GradientAccumulator training_class = GradientAccumulator -rounds = 5 +rounds = 1 full_epochs = True -batch_size = 1024 +batch_size = 128 shuffle = True loss_package = torch.nn loss_class = CrossEntropyLoss -- GitLab