Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • sacs/decentralizepy
  • mvujas/decentralizepy
  • randl/decentralizepy
3 results
Show changes
Showing
with 1205 additions and 99 deletions
[DATASET]
dataset_package = decentralizepy.datasets.Reddit
dataset_class = Reddit
random_seed = 97
model_class = RNN
train_dir = /mnt/nfs/shared/leaf/data/reddit_new/per_user_data/train
test_dir = /mnt/nfs/shared/leaf/data/reddit_new/new_small_data/test
; python list of fractions below
sizes =
[OPTIMIZER_PARAMS]
optimizer_package = torch.optim
optimizer_class = SGD
lr = 0.001
[TRAIN_PARAMS]
training_package = decentralizepy.training.Training
training_class = Training
rounds = 47
full_epochs = False
batch_size = 16
shuffle = True
loss_package = torch.nn
loss_class = CrossEntropyLoss
[COMMUNICATION]
comm_package = decentralizepy.communication.TCP
comm_class = TCP
addresses_filepath = ip_addr_6Machines.json
[SHARING]
sharing_package = decentralizepy.sharing.Sharing
sharing_class = Sharing
[DATASET]
dataset_package = decentralizepy.datasets.Reddit
dataset_class = Reddit
random_seed = 97
model_class = RNN
train_dir = /mnt/nfs/shared/leaf/data/reddit_new/per_user_data/train
test_dir = /mnt/nfs/shared/leaf/data/reddit_new/new_small_data/test
; python list of fractions below
sizes =
[OPTIMIZER_PARAMS]
optimizer_package = torch.optim
optimizer_class = SGD
lr = 0.001
[TRAIN_PARAMS]
training_package = decentralizepy.training.Training
training_class = Training
rounds = 4
full_epochs = False
batch_size = 16
shuffle = True
loss_package = torch.nn
loss_class = CrossEntropyLoss
[COMMUNICATION]
comm_package = decentralizepy.communication.TCP
comm_class = TCP
addresses_filepath = ip_addr_6Machines.json
[SHARING]
sharing_package = decentralizepy.sharing.SubSampling
sharing_class = SubSampling
alpha = 0.1
[DATASET]
dataset_package = decentralizepy.datasets.Reddit
dataset_class = Reddit
random_seed = 97
model_class = RNN
train_dir = /mnt/nfs/shared/leaf/data/reddit_new/per_user_data/train
test_dir = /mnt/nfs/shared/leaf/data/reddit_new/new_small_data/test
; python list of fractions below
sizes =
[OPTIMIZER_PARAMS]
optimizer_package = torch.optim
optimizer_class = SGD
lr = 0.001
[TRAIN_PARAMS]
training_package = decentralizepy.training.Training
training_class = Training
rounds = 47
full_epochs = False
batch_size = 16
shuffle = True
loss_package = torch.nn
loss_class = CrossEntropyLoss
[COMMUNICATION]
comm_package = decentralizepy.communication.TCP
comm_class = TCP
addresses_filepath = ip_addr_6Machines.json
[SHARING]
sharing_package = decentralizepy.sharing.PartialModel
sharing_class = PartialModel
alpha = 0.1
accumulation = True
accumulate_averaging_changes = True
\ No newline at end of file
[DATASET]
dataset_package = decentralizepy.datasets.Reddit
dataset_class = Reddit
random_seed = 97
model_class = RNN
train_dir = /mnt/nfs/shared/leaf/data/reddit_new/per_user_data/train
test_dir = /mnt/nfs/shared/leaf/data/reddit_new/new_small_data/test
; python list of fractions below
sizes =
[OPTIMIZER_PARAMS]
optimizer_package = torch.optim
optimizer_class = SGD
lr = 0.001
[TRAIN_PARAMS]
training_package = decentralizepy.training.Training
training_class = Training
rounds = 47
full_epochs = False
batch_size = 16
shuffle = True
loss_package = torch.nn
loss_class = CrossEntropyLoss
[COMMUNICATION]
comm_package = decentralizepy.communication.TCP
comm_class = TCP
addresses_filepath = ip_addr_6Machines.json
[SHARING]
sharing_package = decentralizepy.sharing.Wavelet
sharing_class = Wavelet
change_based_selection = True
alpha = 0.1
wavelet=sym2
level= 4
accumulation = True
accumulate_averaging_changes = True
[DATASET]
dataset_package = decentralizepy.datasets.Shakespeare
dataset_class = Shakespeare
random_seed = 97
model_class = LSTM
train_dir = /mnt/nfs/shared/leaf/data/shakespeare_sub96/per_user_data/train
test_dir = /mnt/nfs/shared/leaf/data/shakespeare_sub96/data/test
; python list of fractions below
sizes =
[OPTIMIZER_PARAMS]
optimizer_package = torch.optim
optimizer_class = SGD
lr = 0.1
[TRAIN_PARAMS]
training_package = decentralizepy.training.Training
training_class = Training
rounds = 10
full_epochs = False
batch_size = 16
shuffle = True
loss_package = torch.nn
loss_class = CrossEntropyLoss
[COMMUNICATION]
comm_package = decentralizepy.communication.TCP
comm_class = TCP
addresses_filepath = ip_addr_6Machines.json
[SHARING]
sharing_package = decentralizepy.sharing.PartialModel
sharing_class = PartialModel
alpha = 0.1
[DATASET]
dataset_package = decentralizepy.datasets.Shakespeare
dataset_class = Shakespeare
model_class = LSTM
train_dir = /mnt/nfs/shared/leaf/data/shakespeare_sub96/per_user_data/train
test_dir = /mnt/nfs/shared/leaf/data/shakespeare_sub96/data/test
; python list of fractions below
sizes =
[OPTIMIZER_PARAMS]
optimizer_package = torch.optim
optimizer_class = SGD
lr = 0.1
[TRAIN_PARAMS]
training_package = decentralizepy.training.Training
training_class = Training
rounds = 10
full_epochs = False
batch_size = 16
shuffle = True
loss_package = torch.nn
loss_class = CrossEntropyLoss
[COMMUNICATION]
comm_package = decentralizepy.communication.TCP
comm_class = TCP
addresses_filepath = ip_addr_6Machines.json
[SHARING]
sharing_package = decentralizepy.sharing.Sharing
sharing_class = Sharing
[DATASET]
dataset_package = decentralizepy.datasets.Shakespeare
dataset_class = Shakespeare
random_seed = 97
model_class = LSTM
train_dir = /mnt/nfs/shared/leaf/data/shakespeare_sub96/per_user_data/train
test_dir = /mnt/nfs/shared/leaf/data/shakespeare_sub96/data/test
; python list of fractions below
sizes =
[OPTIMIZER_PARAMS]
optimizer_package = torch.optim
optimizer_class = SGD
lr = 0.1
[TRAIN_PARAMS]
training_package = decentralizepy.training.Training
training_class = Training
rounds = 10
full_epochs = False
batch_size = 16
shuffle = True
loss_package = torch.nn
loss_class = CrossEntropyLoss
[COMMUNICATION]
comm_package = decentralizepy.communication.TCP
comm_class = TCP
addresses_filepath = ip_addr_6Machines.json
[SHARING]
sharing_package = decentralizepy.sharing.SubSampling
sharing_class = SubSampling
alpha = 0.1
[DATASET]
dataset_package = decentralizepy.datasets.Shakespeare
dataset_class = Shakespeare
random_seed = 97
model_class = LSTM
train_dir = /mnt/nfs/shared/leaf/data/shakespeare_sub96/per_user_data/train
test_dir = /mnt/nfs/shared/leaf/data/shakespeare_sub96/data/test
; python list of fractions below
sizes =
[OPTIMIZER_PARAMS]
optimizer_package = torch.optim
optimizer_class = SGD
lr = 0.1
[TRAIN_PARAMS]
training_package = decentralizepy.training.Training
training_class = Training
rounds = 10
full_epochs = False
batch_size = 16
shuffle = True
loss_package = torch.nn
loss_class = CrossEntropyLoss
[COMMUNICATION]
comm_package = decentralizepy.communication.TCP
comm_class = TCP
addresses_filepath = ip_addr_6Machines.json
[SHARING]
sharing_package = decentralizepy.sharing.PartialModel
sharing_class = PartialModel
alpha = 0.1
accumulation = True
accumulate_averaging_changes = True
\ No newline at end of file
[DATASET]
dataset_package = decentralizepy.datasets.Shakespeare
dataset_class = Shakespeare
random_seed = 97
model_class = LSTM
train_dir = /mnt/nfs/shared/leaf/data/shakespeare_sub96/per_user_data/train
test_dir = /mnt/nfs/shared/leaf/data/shakespeare_sub96/data/test
; python list of fractions below
sizes =
[OPTIMIZER_PARAMS]
optimizer_package = torch.optim
optimizer_class = SGD
lr = 0.1
[TRAIN_PARAMS]
training_package = decentralizepy.training.Training
training_class = Training
rounds = 10
full_epochs = False
batch_size = 16
shuffle = True
loss_package = torch.nn
loss_class = CrossEntropyLoss
[COMMUNICATION]
comm_package = decentralizepy.communication.TCP
comm_class = TCP
addresses_filepath = ip_addr_6Machines.json
[SHARING]
sharing_package = decentralizepy.sharing.Wavelet
sharing_class = Wavelet
change_based_selection = True
alpha = 0.1
wavelet=sym2
level= 4
accumulation = True
accumulate_averaging_changes = True
......@@ -8,7 +8,7 @@ from torch import multiprocessing as mp
from decentralizepy import utils
from decentralizepy.graphs.Graph import Graph
from decentralizepy.mappings.Linear import Linear
from decentralizepy.node.Node import Node
from decentralizepy.node.DPSGDNode import DPSGDNode
def read_ini(file_path):
......@@ -50,17 +50,30 @@ if __name__ == "__main__":
l = Linear(n_machines, procs_per_machine)
m_id = args.machine_id
mp.spawn(
fn=Node,
nprocs=procs_per_machine,
args=[
m_id,
l,
g,
my_config,
args.iterations,
args.log_dir,
log_level[args.log_level],
args.test_after,
],
)
processes = []
for r in range(procs_per_machine):
processes.append(
mp.Process(
target=DPSGDNode,
args=[
r,
m_id,
l,
g,
my_config,
args.iterations,
args.log_dir,
args.weights_store_dir,
log_level[args.log_level],
args.test_after,
args.train_evaluate_after,
args.reset_optimizer,
],
)
)
for p in processes:
p.start()
for p in processes:
p.join()
import logging
from pathlib import Path
from shutil import copy
from localconfig import LocalConfig
from torch import multiprocessing as mp
from decentralizepy import utils
from decentralizepy.graphs.Graph import Graph
from decentralizepy.mappings.Linear import Linear
from decentralizepy.node.DPSGDNodeFederated import DPSGDNodeFederated
from decentralizepy.node.FederatedParameterServer import FederatedParameterServer
def read_ini(file_path):
config = LocalConfig(file_path)
for section in config:
print("Section: ", section)
for key, value in config.items(section):
print((key, value))
print(dict(config.items("DATASET")))
return config
if __name__ == "__main__":
args = utils.get_args()
Path(args.log_dir).mkdir(parents=True, exist_ok=True)
log_level = {
"INFO": logging.INFO,
"DEBUG": logging.DEBUG,
"WARNING": logging.WARNING,
"ERROR": logging.ERROR,
"CRITICAL": logging.CRITICAL,
}
config = read_ini(args.config_file)
my_config = dict()
for section in config:
my_config[section] = dict(config.items(section))
copy(args.config_file, args.log_dir)
copy(args.graph_file, args.log_dir)
utils.write_args(args, args.log_dir)
g = Graph()
g.read_graph_from_file(args.graph_file, args.graph_type)
n_machines = args.machines
procs_per_machine = args.procs_per_machine
l = Linear(n_machines, procs_per_machine)
m_id = args.machine_id
sm = args.server_machine
sr = args.server_rank
processes = []
if sm == m_id:
processes.append(
mp.Process(
target=FederatedParameterServer,
args=[
sr,
m_id,
l,
g,
my_config,
args.iterations,
args.log_dir,
args.weights_store_dir,
log_level[args.log_level],
args.test_after,
args.train_evaluate_after,
args.working_rate,
],
)
)
for r in range(0, procs_per_machine):
processes.append(
mp.Process(
target=DPSGDNodeFederated,
args=[
r,
m_id,
l,
g,
my_config,
args.iterations,
args.log_dir,
args.weights_store_dir,
log_level[args.log_level],
args.test_after,
args.train_evaluate_after,
args.reset_optimizer,
],
)
)
for p in processes:
p.start()
for p in processes:
p.join()
import logging
from pathlib import Path
from shutil import copy
from localconfig import LocalConfig
from torch import multiprocessing as mp
from decentralizepy import utils
from decentralizepy.graphs.Graph import Graph
from decentralizepy.mappings.Linear import Linear
from decentralizepy.node.KNN import KNN
def read_ini(file_path):
config = LocalConfig(file_path)
for section in config:
print("Section: ", section)
for key, value in config.items(section):
print((key, value))
print(dict(config.items("DATASET")))
return config
if __name__ == "__main__":
args = utils.get_args()
Path(args.log_dir).mkdir(parents=True, exist_ok=True)
log_level = {
"INFO": logging.INFO,
"DEBUG": logging.DEBUG,
"WARNING": logging.WARNING,
"ERROR": logging.ERROR,
"CRITICAL": logging.CRITICAL,
}
config = read_ini(args.config_file)
my_config = dict()
for section in config:
my_config[section] = dict(config.items(section))
copy(args.config_file, args.log_dir)
copy(args.graph_file, args.log_dir)
utils.write_args(args, args.log_dir)
g = Graph()
g.read_graph_from_file(args.graph_file, args.graph_type)
n_machines = args.machines
procs_per_machine = args.procs_per_machine
l = Linear(n_machines, procs_per_machine)
m_id = args.machine_id
processes = []
for r in range(procs_per_machine):
processes.append(
mp.Process(
target=KNN,
args=[
r,
m_id,
l,
g,
my_config,
args.iterations,
args.log_dir,
args.weights_store_dir,
log_level[args.log_level],
args.test_after,
args.train_evaluate_after,
args.reset_optimizer,
],
)
)
for p in processes:
p.start()
for p in processes:
p.join()
import logging
from pathlib import Path
from shutil import copy
from localconfig import LocalConfig
from torch import multiprocessing as mp
from decentralizepy import utils
from decentralizepy.graphs.Graph import Graph
from decentralizepy.mappings.Linear import Linear
from decentralizepy.node.DPSGDWithPeerSampler import DPSGDWithPeerSampler
from decentralizepy.node.PeerSampler import PeerSampler
def read_ini(file_path):
config = LocalConfig(file_path)
for section in config:
print("Section: ", section)
for key, value in config.items(section):
print((key, value))
print(dict(config.items("DATASET")))
return config
if __name__ == "__main__":
args = utils.get_args()
Path(args.log_dir).mkdir(parents=True, exist_ok=True)
log_level = {
"INFO": logging.INFO,
"DEBUG": logging.DEBUG,
"WARNING": logging.WARNING,
"ERROR": logging.ERROR,
"CRITICAL": logging.CRITICAL,
}
config = read_ini(args.config_file)
my_config = dict()
for section in config:
my_config[section] = dict(config.items(section))
copy(args.config_file, args.log_dir)
copy(args.graph_file, args.log_dir)
utils.write_args(args, args.log_dir)
g = Graph()
g.read_graph_from_file(args.graph_file, args.graph_type)
n_machines = args.machines
procs_per_machine = args.procs_per_machine
l = Linear(n_machines, procs_per_machine)
m_id = args.machine_id
sm = args.server_machine
sr = args.server_rank
processes = []
if sm == m_id:
processes.append(
mp.Process(
# target=PeerSamplerDynamic,
target=PeerSampler,
args=[
sr,
m_id,
l,
g,
my_config,
args.iterations,
args.log_dir,
log_level[args.log_level],
],
)
)
for r in range(0, procs_per_machine):
processes.append(
mp.Process(
target=DPSGDWithPeerSampler,
args=[
r,
m_id,
l,
g,
my_config,
args.iterations,
args.log_dir,
args.weights_store_dir,
log_level[args.log_level],
args.test_after,
args.train_evaluate_after,
args.reset_optimizer,
],
)
)
for p in processes:
p.start()
for p in processes:
p.join()
import logging
from pathlib import Path
from shutil import copy
from localconfig import LocalConfig
from torch import multiprocessing as mp
from decentralizepy import utils
from decentralizepy.graphs.Graph import Graph
from decentralizepy.mappings.Linear import Linear
from decentralizepy.node.DPSGDWithPeerSampler import DPSGDWithPeerSampler
from decentralizepy.node.PeerSamplerDynamic import PeerSamplerDynamic
def read_ini(file_path):
config = LocalConfig(file_path)
for section in config:
print("Section: ", section)
for key, value in config.items(section):
print((key, value))
print(dict(config.items("DATASET")))
return config
if __name__ == "__main__":
args = utils.get_args()
Path(args.log_dir).mkdir(parents=True, exist_ok=True)
log_level = {
"INFO": logging.INFO,
"DEBUG": logging.DEBUG,
"WARNING": logging.WARNING,
"ERROR": logging.ERROR,
"CRITICAL": logging.CRITICAL,
}
config = read_ini(args.config_file)
my_config = dict()
for section in config:
my_config[section] = dict(config.items(section))
copy(args.config_file, args.log_dir)
copy(args.graph_file, args.log_dir)
utils.write_args(args, args.log_dir)
g = Graph()
g.read_graph_from_file(args.graph_file, args.graph_type)
n_machines = args.machines
procs_per_machine = args.procs_per_machine
l = Linear(n_machines, procs_per_machine)
m_id = args.machine_id
sm = args.server_machine
sr = args.server_rank
processes = []
if sm == m_id:
processes.append(
mp.Process(
target=PeerSamplerDynamic,
args=[
sr,
m_id,
l,
g,
my_config,
args.iterations,
args.log_dir,
log_level[args.log_level],
],
)
)
for r in range(0, procs_per_machine):
processes.append(
mp.Process(
target=DPSGDWithPeerSampler,
args=[
r,
m_id,
l,
g,
my_config,
args.iterations,
args.log_dir,
args.weights_store_dir,
log_level[args.log_level],
args.test_after,
args.train_evaluate_after,
args.reset_optimizer,
],
)
)
for p in processes:
p.start()
for p in processes:
p.join()
......@@ -40,16 +40,21 @@ install_requires =
zmq
jsonlines
pillow
pickle
smallworld
localconfig
PyWavelets
pandas
crudini
sklearn
lz4
fpzip
include_package_data = True
python_requires = >=3.6
[options.packages.find]
where = src
[options.extras_require]
dev =
black
black>22.3.0
coverage
isort
pytest
......
from decentralizepy.datasets.Femnist import Femnist
import sys
f = Femnist()
from decentralizepy.datasets.Reddit import Reddit
from decentralizepy.mappings import Linear
f.file_per_user("leaf/data/femnist/data/train", "leaf/data/femnist/per_user_data/train")
if __name__ == "__main__":
mapping = Linear(6, 16)
f = Reddit(0, 0, mapping)
assert len(sys.argv) == 3
frm = sys.argv[1]
to = sys.argv[2]
f.file_per_user(frm, to)
class Communication:
"""
Communcation API
"""
def __init__(self, rank, machine_id, mapping, total_procs):
"""
Constructor
Parameters
----------
rank : int
Local rank of the process
machine_id : int
Machine id of the process
mapping : decentralizepy.mappings.Mapping
uid, rank, machine_id invertible mapping
total_procs : int
Total number of processes
"""
self.total_procs = total_procs
self.rank = rank
self.machine_id = machine_id
......@@ -12,19 +28,82 @@ class Communication:
self.total_bytes = 0
def encrypt(self, data):
"""
Encode/Encrypt data.
Parameters
----------
data : dict
Data dict to send
Returns
-------
byte
Encoded data
"""
raise NotImplementedError
def decrypt(self, sender, data):
"""
Decodes received data.
Parameters
----------
sender : byte
sender of the data
data : byte
Data received
Returns
-------
tuple
(sender: int, data: dict)
"""
raise NotImplementedError
def connect_neighbors(self, neighbors):
"""
Connects all neighbors.
Parameters
----------
neighbors : list(int)
List of neighbors
"""
raise NotImplementedError
def receive(self):
"""
Returns ONE message received.
Returns
----------
dict
Received and decrypted data
"""
raise NotImplementedError
def send(self, uid, data):
"""
Send a message to a process.
Parameters
----------
uid : int
Neighbor's unique ID
data : dict
Message as a Python dictionary
"""
raise NotImplementedError
def disconnect_neighbors(self):
"""
Disconnects all neighbors.
"""
raise NotImplementedError
import json
import logging
import pickle
from collections import deque
import zmq
......@@ -13,14 +14,62 @@ BYE = b"BYE"
class TCP(Communication):
"""
TCP Communication API
"""
def addr(self, rank, machine_id):
"""
Returns TCP address of the process.
Parameters
----------
rank : int
Local rank of the process
machine_id : int
Machine id of the process
Returns
-------
str
Full address of the process using TCP
"""
machine_addr = self.ip_addrs[str(machine_id)]
port = rank + 20000
port = (2 * rank + 1) + self.offset
assert port > 0
return "tcp://{}:{}".format(machine_addr, port)
def __init__(self, rank, machine_id, mapping, total_procs, addresses_filepath):
def __init__(
self,
rank,
machine_id,
mapping,
total_procs,
addresses_filepath,
offset=9000,
recv_timeout=50,
):
"""
Constructor
Parameters
----------
rank : int
Local rank of the process
machine_id : int
Machine id of the process
mapping : decentralizepy.mappings.Mapping
uid, rank, machine_id invertible mapping
total_procs : int
Total number of processes
addresses_filepath : str
JSON file with machine_id -> ip mapping
compression_package : str
Import path of a module that implements the compression.Compression.Compression class
compression_class : str
Name of the compression class inside the compression package
"""
super().__init__(rank, machine_id, mapping, total_procs)
with open(addresses_filepath) as addrs:
......@@ -30,105 +79,149 @@ class TCP(Communication):
self.rank = rank
self.machine_id = machine_id
self.mapping = mapping
self.offset = offset
self.recv_timeout = recv_timeout
self.uid = mapping.get_uid(rank, machine_id)
self.identity = str(self.uid).encode()
self.context = zmq.Context()
self.router = self.context.socket(zmq.ROUTER)
self.router.setsockopt(zmq.IDENTITY, self.identity)
self.router.setsockopt(zmq.RCVTIMEO, self.recv_timeout)
self.router.setsockopt(zmq.ROUTER_MANDATORY, 1)
self.router.bind(self.addr(rank, machine_id))
self.sent_disconnections = False
self.total_data = 0
self.total_meta = 0
self.peer_deque = deque()
self.peer_sockets = dict()
self.barrier = set()
def __del__(self):
"""
Destroys zmq context
"""
self.context.destroy(linger=0)
def encrypt(self, data):
return json.dumps(data).encode("utf8")
"""
Encode data as python pickle.
Parameters
----------
data : dict
Data dict to send
Returns
-------
byte
Encoded data
"""
data_len = 0
if "params" in data:
data_len = len(pickle.dumps(data["params"]))
output = pickle.dumps(data)
self.total_meta += len(output) - data_len
self.total_data += data_len
return output
def decrypt(self, sender, data):
"""
Decode received pickle data.
Parameters
----------
sender : byte
sender of the data
data : byte
Data received
Returns
-------
tuple
(sender: int, data: dict)
"""
sender = int(sender.decode())
data = json.loads(data.decode("utf8"))
data = pickle.loads(data)
return sender, data
def connect_neighbors(self, neighbors):
logging.info("Sending connection request to neighbors")
for uid in neighbors:
logging.debug("Connecting to my neighbour: {}".format(uid))
id = str(uid).encode()
req = self.context.socket(zmq.DEALER)
req.setsockopt(zmq.IDENTITY, self.identity)
req.connect(self.addr(*self.mapping.get_machine_and_rank(uid)))
self.peer_sockets[id] = req
req.send(HELLO)
num_neighbors = len(neighbors)
while len(self.barrier) < num_neighbors:
sender, recv = self.router.recv_multipart()
if recv == HELLO:
logging.debug("Received {} from {}".format(HELLO, sender))
self.barrier.add(sender)
elif recv == BYE:
logging.debug("Received {} from {}".format(BYE, sender))
raise RuntimeError(
"A neighbour wants to disconnect before training started!"
)
else:
logging.debug(
"Received message from {} @ connect_neighbors".format(sender)
)
self.peer_deque.append(self.decrypt(sender, recv))
logging.info("Connected to all neighbors")
def receive(self):
if len(self.peer_deque) != 0:
resp = self.peer_deque.popleft()
return resp
sender, recv = self.router.recv_multipart()
if recv == HELLO:
logging.debug("Received {} from {}".format(HELLO, sender))
raise RuntimeError(
"A neighbour wants to connect when everyone is connected!"
)
elif recv == BYE:
logging.debug("Received {} from {}".format(BYE, sender))
self.barrier.remove(sender)
return self.receive()
def init_connection(self, neighbor):
"""
Initiates a socket to a given node.
Parameters
----------
neighbor : int
neighbor to connect to
"""
logging.debug("Connecting to my neighbour: {}".format(neighbor))
id = str(neighbor).encode()
req = self.context.socket(zmq.DEALER)
req.setsockopt(zmq.IDENTITY, self.identity)
req.connect(self.addr(*self.mapping.get_machine_and_rank(neighbor)))
self.peer_sockets[id] = req
def destroy_connection(self, neighbor, linger=None):
id = str(neighbor).encode()
if self.already_connected(neighbor):
self.peer_sockets[id].close(linger=linger)
del self.peer_sockets[id]
def already_connected(self, neighbor):
id = str(neighbor).encode()
return id in self.peer_sockets
def receive(self, block=True):
"""
Returns ONE message received.
Returns
----------
dict
Received and decrypted data
Raises
------
RuntimeError
If received HELLO
"""
while True:
try:
sender, recv = self.router.recv_multipart()
s, r = self.decrypt(sender, recv)
return s, r
except zmq.ZMQError as exc:
if exc.errno == zmq.EAGAIN:
if not block:
return None
else:
continue
else:
raise
def send(self, uid, data, encrypt=True):
"""
Send a message to a process.
Parameters
----------
uid : int
Neighbor's unique ID
data : dict
Message as a Python dictionary
"""
if encrypt:
to_send = self.encrypt(data)
else:
logging.debug("Received message from {}".format(sender))
return self.decrypt(sender, recv)
def send(self, uid, data):
to_send = self.encrypt(data)
to_send = data
data_size = len(to_send)
self.total_bytes += data_size
id = str(uid).encode()
self.peer_sockets[id].send(to_send)
logging.debug("{} sent the message to {}.".format(self.uid, uid))
logging.info("Sent this round: {}".format(data_size))
def disconnect_neighbors(self):
if not self.sent_disconnections:
logging.info("Disconnecting neighbors")
for sock in self.peer_sockets.values():
sock.send(BYE)
self.sent_disconnections = True
while len(self.barrier):
sender, recv = self.router.recv_multipart()
if recv == BYE:
logging.debug("Received {} from {}".format(BYE, sender))
self.barrier.remove(sender)
else:
logging.critical(
"Received unexpected {} from {}".format(recv, sender)
)
raise RuntimeError(
"Received a message when expecting BYE from {}".format(sender)
)
logging.info("Sent message size: {}".format(data_size))
class Compression:
"""
Compression API
"""
def __init__(self):
"""
Constructor
"""
def compress(self, arr):
"""
compression function
Parameters
----------
arr : np.ndarray
Data to compress
Returns
-------
bytearray
encoded data as bytes
"""
raise NotImplementedError
def decompress(self, bytes):
"""
decompression function
Parameters
----------
bytes :bytearray
compressed data
Returns
-------
arr : np.ndarray
decompressed data as array
"""
raise NotImplementedError
def compress_float(self, arr):
"""
compression function for float arrays
Parameters
----------
arr : np.ndarray
Data to compress
Returns
-------
bytearray
encoded data as bytes
"""
raise NotImplementedError
def decompress_float(self, bytes):
"""
decompression function for compressed float arrays
Parameters
----------
bytes :bytearray
compressed data
Returns
-------
arr : np.ndarray
decompressed data as array
"""
raise NotImplementedError
# elias implementation: taken from this stack overflow post:
# https://stackoverflow.com/questions/62843156/python-fast-compression-of-large-amount-of-numbers-with-elias-gamma
import numpy as np
from decentralizepy.compression.Compression import Compression
class Elias(Compression):
"""
Compression API
"""
def __init__(self):
"""
Constructor
"""
def compress(self, arr):
"""
compression function
Parameters
----------
arr : np.ndarray
Data to compress
Returns
-------
bytearray
encoded data as bytes
"""
arr.sort()
first = arr[0]
arr = np.diff(arr).astype(np.int32)
arr = arr.view(f"u{arr.itemsize}")
l = np.log2(arr).astype("u1")
L = ((l << 1) + 1).cumsum()
out = np.zeros(int(L[-1] + 128), "u1")
for i in range(l.max() + 1):
out[L - i - 1] += (arr >> i) & 1
s = np.array([out.size], dtype=np.int64)
size = np.ndarray(8, dtype="u1", buffer=s.data)
packed = np.packbits(out)
packed[-8:] = size
s = np.array([first], dtype=np.int64)
size = np.ndarray(8, dtype="u1", buffer=s.data)
packed[-16:-8] = size
return packed
def decompress(self, bytes):
"""
decompression function
Parameters
----------
bytes :bytearray
compressed data
Returns
-------
arr : np.ndarray
decompressed data as array
"""
n_arr = bytes[-8:]
n = np.ndarray(1, dtype=np.int64, buffer=n_arr.data)[0]
first = bytes[-16:-8]
first = np.ndarray(1, dtype=np.int64, buffer=first.data)[0]
b = bytes[:-16]
b = np.unpackbits(b, count=n).view(bool)
s = b.nonzero()[0]
s = (s << 1).repeat(np.diff(s, prepend=-1))
s -= np.arange(-1, len(s) - 1)
s = s.tolist() # list has faster __getitem__
ns = len(s)
def gen():
idx = 0
yield idx
while idx < ns:
idx = s[idx]
yield idx
offs = np.fromiter(gen(), int)
sz = np.diff(offs) >> 1
mx = sz.max() + 1
out_fin = np.zeros(offs.size, int)
out_fin[0] = first
out = out_fin[1:]
for i in range(mx):
out[b[offs[1:] - i - 1] & (sz >= i)] += 1 << i
out = np.cumsum(out_fin)
return out
def compress_float(self, arr):
"""
compression function for float arrays
Parameters
----------
arr : np.ndarray
Data to compress
Returns
-------
bytearray
encoded data as bytes
"""
return arr
def decompress_float(self, bytes):
"""
decompression function for compressed float arrays
Parameters
----------
bytes :bytearray
compressed data
Returns
-------
arr : np.ndarray
decompressed data as array
"""
return bytes