diff --git a/README.rst b/README.rst index 5acb1c43daadf40a8d6e0e06a61d0bcfa354b0b5..7a77286800b0ebcc5b574f441a6bf8d55cfe1369 100644 --- a/README.rst +++ b/README.rst @@ -20,6 +20,15 @@ Setting up decentralizepy pip3 install --editable .\[dev\] +---------------- +Running the code +---------------- + +* Choose and modify one of the config files in ``eval/{step,epoch}_configs``. +* Modify the dataset paths and ``addresses_filepath`` in the config file. +* In eval/run.sh, modify ``first_machine`` (used to calculate machine_id of all machines), ``original_config``, and other arguments as required. +* Execute eval/run.sh on all the machines simultaneously. There is a synchronization barrier mechanism at the start so that all processes start training together. + Node ---- * The Manager. Optimizations at process level. diff --git a/eval/epoch_configs/config_celeba.ini b/eval/epoch_configs/config_celeba.ini index 408782804c812ad5c3fbab0183f396dee1a24c65..782de8ac9d7c9dae2821169125df8aea633eaa05 100644 --- a/eval/epoch_configs/config_celeba.ini +++ b/eval/epoch_configs/config_celeba.ini @@ -17,9 +17,9 @@ lr = 0.001 [TRAIN_PARAMS] training_package = decentralizepy.training.GradientAccumulator training_class = GradientAccumulator -rounds = 5 +rounds = 1 full_epochs = True -batch_size = 512 +batch_size = 64 shuffle = True loss_package = torch.nn loss_class = CrossEntropyLoss diff --git a/eval/epoch_configs/config_celeba_100.ini b/eval/epoch_configs/config_celeba_100.ini index 910fb2d63f93fae94e876e28e0de71cf5c7349e1..3a8ea4727e906521cf4dd833ad971ff7396d727b 100644 --- a/eval/epoch_configs/config_celeba_100.ini +++ b/eval/epoch_configs/config_celeba_100.ini @@ -17,9 +17,9 @@ lr = 0.001 [TRAIN_PARAMS] training_package = decentralizepy.training.Training training_class = Training -rounds = 5 +rounds = 1 full_epochs = True -batch_size = 512 +batch_size = 64 shuffle = True loss_package = torch.nn loss_class = CrossEntropyLoss diff --git a/eval/epoch_configs/config_celeba_grow.ini b/eval/epoch_configs/config_celeba_grow.ini index 28eaaaaa16fb06a07156df227afd96f3dd77aae1..8f0b7180a02deb8bd3c17176915ee03eedcf8dae 100644 --- a/eval/epoch_configs/config_celeba_grow.ini +++ b/eval/epoch_configs/config_celeba_grow.ini @@ -17,9 +17,9 @@ lr = 0.001 [TRAIN_PARAMS] training_package = decentralizepy.training.GradientAccumulator training_class = GradientAccumulator -rounds = 5 +rounds = 1 full_epochs = True -batch_size = 512 +batch_size = 64 shuffle = True loss_package = torch.nn loss_class = CrossEntropyLoss diff --git a/eval/epoch_configs/config_femnist.ini b/eval/epoch_configs/config_femnist.ini index 63ad4622d2259555d22eff710a21ca26a7f8bee6..b16bd9aa25f9302fd16e0f9e7117d1b5ba273390 100644 --- a/eval/epoch_configs/config_femnist.ini +++ b/eval/epoch_configs/config_femnist.ini @@ -16,9 +16,9 @@ lr = 0.001 [TRAIN_PARAMS] training_package = decentralizepy.training.GradientAccumulator training_class = GradientAccumulator -rounds = 5 +rounds = 1 full_epochs = True -batch_size = 1024 +batch_size = 128 shuffle = True loss_package = torch.nn loss_class = CrossEntropyLoss diff --git a/eval/epoch_configs/config_femnist_100.ini b/eval/epoch_configs/config_femnist_100.ini index 63c7f4f9aaf3b629ecb8cd96b7e8b82ce543c698..0f7dd023b31b6556818962fb9c57fec6fa37ad56 100644 --- a/eval/epoch_configs/config_femnist_100.ini +++ b/eval/epoch_configs/config_femnist_100.ini @@ -16,9 +16,9 @@ lr = 0.001 [TRAIN_PARAMS] training_package = decentralizepy.training.Training training_class = Training -rounds = 5 +rounds = 1 full_epochs = True -batch_size = 1024 +batch_size = 128 shuffle = True loss_package = torch.nn loss_class = CrossEntropyLoss diff --git a/eval/epoch_configs/config_femnist_grow.ini b/eval/epoch_configs/config_femnist_grow.ini index e91b3aa6adb36346c07a7143ffa079c95f71e7eb..b240c8410c86266f3f8388cdcaea805ad5535891 100644 --- a/eval/epoch_configs/config_femnist_grow.ini +++ b/eval/epoch_configs/config_femnist_grow.ini @@ -16,9 +16,9 @@ lr = 0.001 [TRAIN_PARAMS] training_package = decentralizepy.training.GradientAccumulator training_class = GradientAccumulator -rounds = 5 +rounds = 1 full_epochs = True -batch_size = 1024 +batch_size = 128 shuffle = True loss_package = torch.nn loss_class = CrossEntropyLoss