Skip to content
Snippets Groups Projects
Commit 82cd0ae9 authored by Jeffrey Wigger's avatar Jeffrey Wigger
Browse files

gridsearch run file + 192 nodes regular graph

parent 46a35ec1
No related branches found
No related tags found
No related merge requests found
192
0 32
0 97
0 1
0 191
0 40
0 106
0 75
0 15
0 18
0 95
1 0
1 97
1 2
1 41
1 170
1 111
1 178
1 18
1 61
1 191
2 1
2 162
2 3
2 71
2 12
2 45
2 172
2 87
2 155
2 156
3 2
3 4
3 71
3 104
3 75
3 43
3 176
3 19
3 152
3 26
4 26
4 3
4 5
4 103
4 137
4 21
4 185
4 186
4 124
4 159
5 32
5 35
5 4
5 6
5 41
5 44
5 76
5 118
5 184
6 5
6 7
6 105
6 16
6 94
6 91
6 158
6 191
7 128
7 68
7 6
7 8
7 74
7 82
7 118
7 151
7 156
8 7
8 105
8 138
8 9
8 80
8 145
8 148
8 154
8 60
8 159
9 8
9 74
9 43
9 10
9 16
9 50
9 180
9 86
9 188
9 30
10 65
10 9
10 106
10 11
10 141
10 80
10 85
10 182
10 183
10 62
11 101
11 165
11 134
11 168
11 10
11 12
11 51
11 118
11 86
11 188
12 128
12 2
12 70
12 72
12 11
12 13
12 49
12 87
12 154
12 187
13 33
13 130
13 67
13 134
13 138
13 12
13 109
13 14
13 116
13 31
14 89
14 13
14 15
14 17
14 182
14 88
14 121
14 26
14 92
14 25
15 0
15 162
15 100
15 166
15 135
15 71
15 14
15 16
15 180
15 181
16 129
16 133
16 6
16 9
16 15
16 17
16 55
16 88
16 187
16 28
17 35
17 39
17 14
17 16
17 49
17 18
17 51
17 184
17 61
17 158
18 0
18 1
18 34
18 103
18 110
18 111
18 113
18 17
18 19
18 22
19 3
19 36
19 132
19 40
19 48
19 18
19 20
19 21
19 56
19 185
20 69
20 136
20 111
20 144
20 19
20 21
20 119
20 24
20 123
21 4
21 42
21 141
21 176
21 19
21 148
21 20
21 22
21 154
22 162
22 67
22 139
22 171
22 142
22 18
22 23
22 21
22 151
22 57
23 73
23 120
23 47
23 178
23 118
23 22
23 24
23 123
23 28
24 121
24 170
24 176
24 113
24 20
24 23
24 25
24 63
25 163
25 38
25 135
25 42
25 77
25 14
25 24
25 26
25 31
26 64
26 3
26 4
26 27
26 43
26 139
26 110
26 14
26 25
26 91
27 163
27 165
27 134
27 79
27 147
27 180
27 182
27 26
27 28
27 157
28 163
28 78
28 16
28 148
28 182
28 23
28 27
28 29
28 190
28 127
29 131
29 37
29 75
29 172
29 139
29 176
29 180
29 151
29 28
29 30
30 131
30 68
30 38
30 71
30 9
30 41
30 76
30 154
30 29
30 31
31 32
31 35
31 108
31 13
31 144
31 93
31 30
31 153
31 125
31 25
32 0
32 33
32 98
32 5
32 137
32 171
32 176
32 56
32 122
32 31
33 128
33 32
33 34
33 169
33 109
33 77
33 13
33 81
33 179
33 60
34 96
34 33
34 35
34 39
34 108
34 18
34 51
34 82
34 89
34 94
35 129
35 34
35 36
35 5
35 176
35 17
35 87
35 184
35 157
35 31
36 67
36 35
36 37
36 71
36 45
36 19
36 179
36 55
36 62
37 66
37 132
37 36
37 38
37 47
37 83
37 52
37 125
37 29
38 130
38 37
38 102
38 39
38 40
38 187
38 25
38 91
38 189
38 30
39 34
39 101
39 38
39 72
39 40
39 140
39 17
39 177
39 148
39 123
40 0
40 38
40 39
40 41
40 19
40 52
40 53
40 151
40 56
40 120
41 1
41 65
41 5
41 40
41 73
41 106
41 139
41 42
41 88
41 30
42 162
42 41
42 74
42 43
42 111
42 47
42 114
42 147
42 21
42 25
43 26
43 3
43 102
43 9
43 42
43 44
43 183
43 58
43 123
43 125
44 66
44 69
44 5
44 75
44 43
44 45
44 83
44 86
44 183
44 92
45 2
45 36
45 68
45 105
45 44
45 46
45 113
45 183
45 185
45 158
46 131
46 100
46 165
46 164
46 45
46 47
46 177
46 189
46 59
46 125
47 64
47 164
47 37
47 166
47 42
47 46
47 48
47 189
47 23
47 61
48 132
48 101
48 70
48 47
48 145
48 49
48 19
48 121
48 157
48 158
49 160
49 65
49 165
49 168
49 12
49 77
49 48
49 17
49 50
49 149
50 160
50 167
50 9
50 144
50 49
50 51
50 148
50 124
50 157
50 159
51 34
51 103
51 11
51 78
51 175
51 79
51 17
51 50
51 115
51 52
52 37
52 135
52 40
52 74
52 143
52 51
52 53
52 58
52 155
52 93
53 167
53 40
53 105
53 139
53 177
53 52
53 85
53 117
53 54
53 58
54 66
54 179
54 55
54 53
54 151
54 184
54 153
54 90
54 152
54 94
55 65
55 36
55 77
55 143
55 16
55 147
55 148
55 54
55 56
55 90
56 32
56 165
56 40
56 174
56 147
56 19
56 181
56 55
56 57
56 156
57 161
57 165
57 169
57 78
57 22
57 56
57 185
57 58
57 187
57 127
58 130
58 43
58 173
58 52
58 53
58 57
58 59
58 93
58 126
58 191
59 97
59 46
59 142
59 112
59 79
59 181
59 58
59 123
59 60
59 63
60 33
60 66
60 166
60 8
60 75
60 83
60 89
60 90
60 59
60 61
61 1
61 73
61 106
61 76
61 47
61 17
61 83
61 151
61 60
61 62
62 36
62 69
62 104
62 168
62 10
62 140
62 87
62 154
62 61
62 63
63 64
63 130
63 133
63 134
63 107
63 173
63 84
63 24
63 59
63 62
64 65
64 72
64 108
64 76
64 47
64 144
64 111
64 89
64 26
64 63
65 64
65 97
65 161
65 163
65 66
65 41
65 10
65 49
65 55
65 124
66 65
66 99
66 67
66 37
66 136
66 44
66 84
66 54
66 60
66 93
67 66
67 36
67 68
67 141
67 13
67 80
67 178
67 146
67 22
67 122
68 67
68 164
68 69
68 7
68 45
68 78
68 111
68 86
68 155
68 30
69 160
69 68
69 70
69 167
69 75
69 44
69 20
69 93
69 62
69 191
70 69
70 71
70 140
70 12
70 141
70 79
70 48
70 112
70 85
70 92
71 2
71 3
71 36
71 70
71 136
71 72
71 170
71 110
71 15
71 30
72 64
72 98
72 133
72 166
72 39
72 71
72 73
72 12
72 76
72 126
73 129
73 133
73 72
73 41
73 74
73 110
73 23
73 118
73 183
73 61
74 7
74 9
74 42
74 107
74 75
74 73
74 175
74 83
74 52
74 120
75 0
75 3
75 69
75 74
75 76
75 44
75 114
75 60
75 29
76 64
76 5
76 72
76 75
76 77
76 143
76 82
76 61
76 30
77 33
77 76
77 78
77 177
77 49
77 180
77 150
77 55
77 25
78 161
78 68
78 28
78 77
78 79
78 51
78 89
78 57
78 95
79 70
79 167
79 169
79 59
79 78
79 112
79 80
79 51
79 149
79 27
80 160
80 67
80 8
80 10
80 79
80 81
80 116
80 149
80 150
80 154
81 33
81 106
81 171
81 175
81 143
81 80
81 178
81 82
81 84
81 86
82 34
82 90
82 7
82 76
82 174
82 81
82 83
82 152
82 122
82 191
83 99
83 164
83 100
83 37
83 74
83 44
83 82
83 84
83 60
83 61
84 66
84 109
84 173
84 175
84 143
84 81
84 83
84 181
84 85
84 63
85 165
85 70
85 168
85 169
85 10
85 84
85 53
85 86
85 190
85 126
86 132
86 68
86 103
86 9
86 11
86 44
86 81
86 85
86 87
86 88
87 2
87 35
87 131
87 103
87 136
87 12
87 86
87 88
87 62
87 95
88 135
88 41
88 170
88 109
88 14
88 16
88 86
88 87
88 89
88 189
89 64
89 34
89 102
89 78
89 14
89 88
89 90
89 155
89 60
89 125
90 98
90 141
90 110
90 146
90 82
90 54
90 55
90 89
90 91
90 60
91 90
91 38
91 6
91 166
91 105
91 172
91 144
91 26
91 92
91 93
92 99
92 70
92 169
92 44
92 14
92 93
92 184
92 91
92 124
92 152
93 159
93 66
93 69
93 52
93 120
93 58
93 91
93 92
93 94
93 31
94 34
94 131
94 132
94 6
94 142
94 54
94 184
94 122
94 93
94 95
95 0
95 96
95 172
95 78
95 112
95 145
95 116
95 119
95 87
95 94
96 97
96 34
96 102
96 138
96 107
96 177
96 145
96 187
96 95
96 191
97 0
97 65
97 1
97 98
97 96
97 133
97 141
97 111
97 59
97 127
98 32
98 161
98 97
98 99
98 72
98 136
98 108
98 121
98 90
98 158
99 129
99 66
99 98
99 100
99 137
99 138
99 92
99 83
99 180
99 188
100 99
100 101
100 171
100 140
100 173
100 46
100 15
100 178
100 83
100 115
101 128
101 100
101 102
101 39
101 11
101 112
101 48
101 148
101 121
101 127
102 96
102 160
102 101
102 38
102 103
102 43
102 140
102 144
102 89
102 127
103 4
103 166
103 102
103 136
103 104
103 18
103 51
103 150
103 86
103 87
104 3
104 103
104 105
104 106
104 139
104 149
104 182
104 117
104 189
104 62
105 161
105 6
105 8
105 168
105 106
105 104
105 45
105 53
105 91
105 158
106 0
106 104
106 41
106 10
106 107
106 105
106 145
106 81
106 151
106 61
107 96
107 164
107 134
107 74
107 106
107 108
107 110
107 176
107 124
107 63
108 64
108 34
108 98
108 107
108 109
108 174
108 144
108 115
108 187
108 31
109 160
109 33
109 172
109 13
109 110
109 108
109 84
109 88
109 187
109 190
110 163
110 26
110 71
110 73
110 138
110 107
110 109
110 111
110 18
110 90
111 64
111 1
111 97
111 68
111 42
111 141
111 110
111 112
111 18
111 20
112 101
112 70
112 136
112 79
112 111
112 113
112 181
112 152
112 59
112 95
113 162
113 167
113 45
113 112
113 18
113 114
113 182
113 119
113 24
113 185
114 42
114 75
114 171
114 113
114 115
114 117
114 184
114 186
114 123
114 156
115 128
115 100
115 108
115 114
115 179
115 51
115 116
115 150
115 186
115 126
116 169
116 171
116 13
116 174
116 143
116 80
116 177
116 115
116 117
116 95
117 104
117 169
117 124
117 114
117 116
117 53
117 118
117 122
117 156
117 158
118 162
118 5
118 7
118 73
118 11
118 175
118 119
118 117
118 23
118 121
119 134
119 166
119 138
119 173
119 113
119 20
119 118
119 120
119 95
120 40
120 74
120 179
120 119
120 182
120 23
120 121
120 93
120 190
121 98
121 101
121 14
121 48
121 118
121 183
121 24
121 122
121 120
121 159
122 32
122 67
122 137
122 173
122 82
122 117
122 121
122 123
122 156
122 94
123 39
123 43
123 174
123 142
123 114
123 20
123 23
123 122
123 59
123 124
124 65
124 129
124 4
124 107
124 50
124 147
124 117
124 123
124 92
124 125
125 191
125 37
125 135
125 43
125 46
125 146
125 89
125 124
125 126
125 31
126 131
126 133
126 154
126 72
126 115
126 85
126 58
126 156
126 125
126 127
127 128
127 161
127 97
127 132
127 101
127 102
127 134
127 57
127 28
127 126
128 33
128 129
128 101
128 133
128 7
128 12
128 115
128 157
128 158
128 127
129 128
129 130
129 99
129 35
129 132
129 73
129 16
129 150
129 124
130 159
130 129
130 131
130 38
130 13
130 152
130 58
130 157
130 63
131 130
131 132
131 135
131 46
131 87
131 30
131 126
131 188
131 29
131 94
132 129
132 131
132 37
132 133
132 48
132 19
132 86
132 153
132 94
132 127
133 128
133 97
133 132
133 134
133 72
133 73
133 16
133 180
133 126
133 63
134 133
134 135
134 11
134 107
134 13
134 175
134 119
134 27
134 127
134 63
135 131
135 134
135 168
135 136
135 173
135 15
135 52
135 88
135 25
135 125
136 66
136 98
136 103
136 71
136 137
136 135
136 175
136 112
136 20
136 87
137 32
137 99
137 4
137 136
137 138
137 147
137 148
137 182
137 122
137 189
138 96
138 162
138 99
138 164
138 8
138 137
138 139
138 13
138 110
138 119
139 104
139 41
139 138
139 140
139 178
139 53
139 22
139 26
139 29
140 100
140 165
140 70
140 167
140 102
140 39
140 139
140 141
140 62
141 97
141 67
141 70
141 10
141 140
141 142
141 111
141 21
141 90
141 190
142 162
142 168
142 123
142 155
142 141
142 143
142 146
142 22
142 59
142 94
143 76
143 142
143 144
143 81
143 52
143 181
143 84
143 55
143 116
143 153
144 64
144 102
144 108
144 143
144 145
144 50
144 20
144 91
144 157
144 31
145 96
145 8
145 106
145 172
145 48
145 144
145 178
145 146
145 186
145 95
146 160
146 67
146 90
146 170
146 174
146 142
146 145
146 147
146 186
146 125
147 27
147 137
147 42
147 146
147 179
147 148
147 55
147 56
147 155
147 124
148 101
148 39
148 8
148 137
148 50
148 147
148 21
148 149
148 55
148 28
149 164
149 104
149 171
149 79
149 80
149 49
149 179
149 148
149 150
150 129
150 103
150 167
150 77
150 80
150 115
150 149
150 151
150 186
151 7
151 40
151 106
151 61
151 54
151 22
151 152
151 183
151 150
151 29
152 130
152 3
152 168
152 112
152 82
152 54
152 151
152 153
152 92
153 132
153 143
153 156
153 181
153 54
153 152
153 154
153 188
153 31
154 8
154 170
154 12
154 80
154 21
154 30
154 153
154 126
154 155
154 62
155 2
155 68
155 154
155 142
155 156
155 147
155 52
155 89
155 186
155 188
156 2
156 7
156 114
156 117
156 56
156 153
156 122
156 155
156 157
156 126
157 128
157 130
157 35
157 174
157 48
157 144
157 50
157 27
157 156
157 158
158 128
158 98
158 6
158 105
158 45
158 48
158 17
158 117
158 157
158 159
159 160
159 130
159 163
159 4
159 8
159 50
159 189
159 121
159 93
159 158
160 161
160 69
160 102
160 109
160 80
160 49
160 146
160 50
160 159
161 160
161 65
161 98
161 162
161 105
161 78
161 184
161 57
161 127
162 161
162 2
162 163
162 42
162 138
162 142
162 15
162 113
162 118
162 22
163 65
163 162
163 164
163 166
163 110
163 175
163 25
163 27
163 28
163 159
164 163
164 68
164 165
164 138
164 107
164 46
164 47
164 83
164 181
164 149
165 164
165 166
165 11
165 140
165 46
165 49
165 85
165 56
165 57
165 27
166 163
166 165
166 103
166 72
166 167
166 47
166 15
166 119
166 91
166 60
167 69
167 166
167 168
167 140
167 79
167 113
167 50
167 53
167 150
167 185
168 135
168 167
168 105
168 169
168 11
168 142
168 49
168 85
168 152
168 62
169 33
169 168
169 170
169 92
169 79
169 116
169 117
169 85
169 57
169 188
170 1
170 71
170 169
170 171
170 172
170 173
170 146
170 24
170 154
170 88
171 32
171 100
171 170
171 172
171 81
171 114
171 179
171 116
171 149
171 22
172 2
172 170
172 171
172 187
172 109
172 173
172 145
172 91
172 29
172 95
173 100
173 122
173 135
173 170
173 172
173 174
173 84
173 119
173 58
173 63
174 108
174 173
174 175
174 146
174 82
174 116
174 56
174 186
174 123
174 157
175 163
175 134
175 136
175 74
175 174
175 176
175 81
175 51
175 84
175 118
176 32
176 35
176 3
176 107
176 175
176 177
176 21
176 24
176 29
176 191
177 96
177 39
177 77
177 46
177 176
177 178
177 116
177 53
177 183
177 190
178 1
178 67
178 100
178 139
178 81
178 145
178 179
178 177
178 23
178 188
179 33
179 36
179 171
179 178
179 147
179 115
179 149
179 54
179 180
179 120
180 99
180 133
180 9
180 77
180 15
180 179
180 181
180 185
180 27
180 29
181 164
181 15
181 112
181 143
181 84
181 180
181 182
181 56
181 153
181 59
182 104
182 137
182 10
182 14
182 113
182 181
182 183
182 120
182 27
182 28
183 73
183 10
183 43
183 44
183 45
183 177
183 182
183 151
183 184
183 121
184 161
184 35
184 5
184 17
184 114
184 54
184 183
184 185
184 92
184 94
185 4
185 167
185 45
185 113
185 19
185 180
185 184
185 57
185 186
185 190
186 4
186 187
186 174
186 145
186 146
186 115
186 114
186 150
186 185
186 155
187 96
187 38
187 108
187 172
187 12
187 109
187 16
187 57
187 186
187 188
188 131
188 99
188 169
188 9
188 11
188 187
188 178
188 153
188 155
188 189
189 38
189 104
189 137
189 46
189 47
189 88
189 188
189 190
189 159
190 109
190 141
190 177
190 85
190 120
190 185
190 28
190 189
190 191
191 96
191 1
191 0
191 69
191 6
191 176
191 82
191 58
191 125
191 190
#!/bin/bash
# Documentation
# This bash file takes three inputs. The first argument (nfs_home) is the path to the nfs home directory.
# The second one (python_bin) is the path to the python bin folder.
# The last argument (logs_subfolder) is the path to the logs folder with respect to the nfs home directory.
#
# The nfs home directory should contain the code of this framework stored in $nfs_home/decentralizepy and a folder
# called configs which contains the file 'ip_addr_6Machines.json'
# The python bin folder needs to include all the dependencies of this project including crudini.
# The results will be stored in $nfs_home/$logs_subfolder
# Each of the experiments will be stored in its own folder inside the logs_subfolder. The folder of the experiment
# starts with the last part of the config name, i.e., for 'config_celeba_topkacc.ini' it will start with topkacc.
# The name further includes the learning rate, rounds and batchsize as well as the exact date at which the experiment
# was run.
# Example: ./run_grid.sh /mnt/nfs/wigger /mnt/nfs/wigger/anaconda3/envs/sacs39/bin /logs/celeba
#
# Additional requirements:
# Each node needs a folder called 'tmp' in the user's home directory
#
# Note:
# - The script does not change the optimizer. All configs are writen to use Adam.
# For SGD these need to be changed manually
# - The script will set '--test_after' and '--train_evaluate_after' to comm_rounds_per_global_epoch, i.e., the eavaluation
# on the train set and on the test set is carried out every global epoch.
# - The '--reset_optimizer' option is set to 0, i.e., the optimizer is not reset after a communication round (only
# relevant for Adams and other optimizers with internal state)
#
# Addapting the script to other datasets:
# Change the variable 'dataset_size' to reflect the data sets size.
#
# Known issues:
# - If the script is started at the very end of a minute then there is a change that two folders are created as not all
# machines may start running the script at the exact same moment.
nfs_home=$1
python_bin=$2
logs_subfolder=$3
decpy_path=$nfs_home/decentralizepy/eval
cd $decpy_path
env_python=$python_bin/python3
graph=192_regular.edges
config_file=~/tmp/config.ini
procs_per_machine=32
machines=6
global_epochs=25
eval_file=testing.py
log_level=INFO
ip_machines=$nfs_home/configs/ip_addr_6Machines.json
m=`cat $ip_machines | grep $(/sbin/ifconfig ens785 | grep 'inet ' | awk '{print $2}') | cut -d'"' -f2`
export PYTHONFAULTHANDLER=1
# Base configs for which the gird search is done
tests=("step_configs/config_celeba_sharing.ini")
# Learning rates to test
lrs=( "0.001" "0.0001" "0.0001")
# Batch sizes to test
batchsize=("8" "16")
# The number of communication rounds per global epoch to test
comm_rounds_per_global_epoch=("1" "10" "100")
procs=`expr $procs_per_machine \* $machines`
echo procs: $procs
# Celeba has 63741 samples
dataset_size=63741
# Calculating the number of samples that each user/proc will have on average
samples_per_user=`expr $dataset_size / $procs`
echo samples per user: $samples_per_user
for b in "${batchsize[@]}"
do
echo batchsize: $b
for r in "${comm_rounds_per_global_epoch[@]}"
do
echo communication rounds per global epoch: $r
# calculating how many batches there are in a global epoch for each user/proc
batches_per_epoch=$(($samples_per_user / $b))
echo batches per global epoch: $batches_per_epoch
# the number of iterations in 25 global epochs
iterations=$($env_python -c "from math import floor; print($batches_per_epoch * $global_epochs) if $r >= $batches_per_epoch else print($global_epochs * $r)")
echo iterations: $iterations
# calculating the number of batches each user/proc uses per communication step (The actual number may be a float, which we round down)
batches_per_comm_round=$($env_python -c "from math import floor; x = floor($batches_per_epoch / $r); print(1 if x==0 else x)")
# since the batches per communication round were rounded down we need to change the number of iterations to reflect that
new_iterations=$($env_python -c "from math import floor; x = floor($batches_per_epoch / $r); y = floor((($batches_per_epoch / $r) - x +1)*$iterations); print($iterations if x==0 else y)")
echo batches per communication round: $batches_per_comm_round
echo corrected iterations: $new_iterations
for lr in "${lrs[@]}"
do
for i in "${tests[@]}"
do
echo $i
IFS='_' read -ra NAMES <<< $i
IFS='.' read -ra NAME <<< ${NAMES[-1]}
log_dir=$nfs_home$logs_subfolder/${NAME[0]}:lr=$lr:r=$r:b=$b:$(date '+%Y-%m-%dT%H:%M')/machine$m
echo results are stored in: $log_dir
mkdir -p $log_dir
cp $i $config_file
# changing the config files to reflect the values of the current grid search state
$python_bin/crudini --set $config_file COMMUNICATION addresses_filepath $ip_machines
$python_bin/crudini --set $config_file OPTIMIZER_PARAMS lr $lr
$python_bin/crudini --set $config_file TRAIN_PARAMS rounds $batches_per_comm_round
$python_bin/crudini --set $config_file TRAIN_PARAMS batch_size $b
$env_python $eval_file -ro 0 -tea $r -ld $log_dir -mid $m -ps $procs_per_machine -ms $machines -is $new_iterations -gf $graph -ta $r -cf $config_file -ll $log_level
echo $i is done
sleep 1
echo end of sleep
done
done
done
done
#
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment