Skip to content

Commit 4a2f231

Browse files
committed
fixes
1 parent 9b5d6d1 commit 4a2f231

8 files changed

Lines changed: 59 additions & 89 deletions

File tree

config/pong.ini

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
env_name = pong
33

44
[vec]
5-
total_agents = 4096
5+
total_agents = 1024
6+
num_buffers = 1
7+
num_threads = 16
68

79
[env]
810
width = 500
@@ -17,28 +19,39 @@ ball_initial_speed_y = 1
1719
ball_speed_y_increment = 3
1820
ball_max_speed_y = 13
1921
max_score = 21
20-
frameskip = 4
22+
frameskip = 8
2123
continuous = 0
2224

25+
[policy]
26+
hidden_size = 32
27+
num_layers = 1
28+
expansion_factor = 1
29+
2330
[train]
24-
total_timesteps = 50_000_000
25-
beta1 = 0.8946507418260217
26-
beta2 = 0.9
31+
gpus = 1
32+
seed = 42
33+
total_timesteps = 5000000
34+
learning_rate = 0.1
35+
anneal_lr = 1
36+
min_lr_ratio = 0
37+
gamma = 0.934713
38+
gae_lambda = 0.991989
39+
replay_ratio = 3.05148
40+
clip_coef = 0.822764
41+
vf_coef = 5
42+
vf_clip_coef = 4.95789
43+
max_grad_norm = 0.752747
44+
ent_coef = 0.000402915
45+
beta1 = 0.5
46+
beta2 = 0.947709
2747
eps = 0.0001
28-
horizon = 64
29-
clip_coef = 0.19696765958267629
30-
ent_coef = 0.0005690816545012474
31-
gae_lambda = 0.747650023961198
32-
gamma = 0.9997053654668936
33-
learning_rate = 0.044482546441415506
34-
max_grad_norm = 2.2356112188495723
3548
minibatch_size = 32768
36-
prio_alpha = 0.98967001208896
37-
prio_beta0 = 0.09999999999999998
38-
vf_clip_coef = 2.178492167689251
39-
vf_coef = 1.6832989594296321
40-
vtrace_c_clip = 2.878171091654008
41-
vtrace_rho_clip = 0.7876748061547312
49+
horizon = 32
50+
vtrace_rho_clip = 4.87841
51+
vtrace_c_clip = 1.48608
52+
prio_alpha = 0.242089
53+
prio_beta0 = 0.807575
54+
use_rnn = 1
4255

4356
[sweep.train.total_timesteps]
4457
distribution = log_normal

constellation/cache_data.py

Lines changed: 13 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,10 @@
1+
# Merges log files + filters to pareto-optimal points wrt steps, wall-clock, and score. Comment that if you want the full dataset. Also does TSNE, which is why I haven't bothered porting to C.
12
import numpy as np
23

34
import json
45
import glob
56
import os
67

7-
env_names = sorted([
8-
'breakout',
9-
#'impulse_wars',
10-
'pacman',
11-
'tetris',
12-
#'g2048',
13-
#'moba',
14-
'pong',
15-
#'tower_climb',
16-
#'grid',
17-
'freeway',
18-
'connect4',
19-
'nmmo3',
20-
#'snake',
21-
'tripletriad'
22-
])
23-
248
HYPERS = [
259
'train/learning_rate',
2610
'train/ent_coef',
@@ -147,14 +131,7 @@ def cached_load(path, env_name, cache):
147131

148132
for hyper in HYPERS:
149133
prefix, suffix = hyper.split('/')
150-
#if prefix not in sweep_metadata:
151-
# continue
152-
153134
group = sweep_metadata[prefix]
154-
#if suffix not in group:
155-
# continue
156-
157-
158135
key = f'{prefix}/{suffix}_norm'
159136
if key not in data:
160137
data[key] = []
@@ -184,15 +161,12 @@ def cached_load(path, env_name, cache):
184161
del data[k]
185162

186163
# Format im millions to avoid overfloat in C
187-
try:
188-
data['agent_steps'] = [e/1e6 for e in data['agent_steps']]
189-
except:
190-
breakpoint()
164+
data['agent_steps'] = [e/1e6 for e in data['agent_steps']]
191165
data['train/total_timesteps'] = [e/1e6 for e in data['train/total_timesteps']]
192-
#data['metrics/agent_steps'] = [e/1e6 for e in data['metrics/agent_steps']]
193166
del data['metrics/agent_steps']
194167

195168
# Filter to pareto
169+
'''
196170
steps = data['agent_steps']
197171
costs = data['uptime']
198172
scores = data['env/score']
@@ -201,7 +175,8 @@ def cached_load(path, env_name, cache):
201175
try:
202176
data[k] = [data[k][i] for i in idxs]
203177
except IndexError:
204-
breakpoint()
178+
continue
179+
'''
205180

206181
data['sweep'] = sweep_metadata
207182
return data
@@ -215,8 +190,10 @@ def compute_tsne():
215190
if os.path.exists(cache_file):
216191
cache = json.load(open(cache_file, 'r'))
217192

193+
env_names = sorted(os.listdir('logs'))
218194
for env in env_names:
219-
all_data[env] = cached_load(f'logs/puffer_{env}/*.json', env, cache)
195+
print('Loading: ', env)
196+
all_data[env] = cached_load(f'logs/{env}/*.json', env, cache)
220197

221198
with open(cache_file, 'w') as f:
222199
json.dump(cache, f)
@@ -234,49 +211,31 @@ def compute_tsne():
234211

235212
from sklearn.manifold import TSNE
236213
proj = TSNE(n_components=2)
237-
reduced = None
238-
try:
239-
reduced = proj.fit_transform(normed)
240-
except ValueError:
241-
print('Warning: TSNE failed. Skipping TSNE')
214+
reduced = proj.fit_transform(normed)
242215

243216
row = 0
244217
for env in env_names:
245218
sz = len(all_data[env]['agent_steps'])
246219
all_data[env]['tsne1'] = reduced[row:row+sz, 0].tolist()
247220
all_data[env]['tsne2'] = reduced[row:row+sz, 1].tolist()
248221

249-
'''
250-
if reduced is not None:
251-
all_data[env]['tsne1'] = reduced[row:row+sz, 0].tolist()
252-
all_data[env]['tsne2'] = reduced[row:row+sz, 1].tolist()
253-
else:
254-
all_data[env]['tsne1'] = np.random.rand(sz).tolist()
255-
all_data[env]['tsne2'] = np.random.rand(sz).tolist()
256-
'''
257-
258222
row += sz
259-
print(f'Env {env} has {sz} points')
260223

261224
for env in all_data:
262225
dat = all_data[env]
263226
dat = {k: v for k, v in dat.items() if isinstance(v, list)
264227
and len(v) > 0 and isinstance(v[0], (int, float))
265228
and (k == 'train/max_grad_norm' or not k.endswith('_norm'))}
266229
all_data[env] = dat
230+
print(f'Env {env} has {len(dat['env/perf'])} points')
267231
for k, v in dat.items():
268-
try:
269-
print(f'{env}/{k}: {len(v), min(v), max(v)}')
270-
except:
271-
print(f'{env}/{k}: {len(v)}')
232+
if 'env/perf' in k or 'score' in k:
233+
print(f'{env}/{k}: min={min(v)}, max={max(v)}')
272234

273235
for env in all_data:
274236
for k, v in all_data[env].items():
275237
if isinstance(v, list):
276-
try:
277-
all_data[env][k] = ','.join([f'{x:.6g}' for x in v])
278-
except:
279-
breakpoint()
238+
all_data[env][k] = ','.join([f'{x:.6g}' for x in v])
280239

281240
json.dump(all_data, open('resources/constellation/experiments.json', 'w'))
282241

constellation/constellation.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -572,11 +572,13 @@ void copy_hypers_to_clipboard(Env *env, char* buffer, int ary_idx) {
572572
}
573573

574574
char* suffix = slash + 1;
575-
float val = hyper->ary[ary_idx];
576-
if ((int)val == val) {
577-
buffer += sprintf(buffer, "%s = %d\n", suffix, (int)val);
575+
double val = hyper->ary[ary_idx];
576+
if (strcmp(suffix, "agent_steps") == 0 || strcmp(suffix, "total_timesteps") == 0) {
577+
buffer += sprintf(buffer, "%s = %lld\n", suffix, (long long)(val * 1e6));
578+
} else if (val == (long long)val) {
579+
buffer += sprintf(buffer, "%s = %lld\n", suffix, (long long)val);
578580
} else {
579-
buffer += sprintf(buffer, "%s = %f\n", suffix, val);
581+
buffer += sprintf(buffer, "%s = %g\n", suffix, val);
580582
}
581583
}
582584
buffer[0] = '\0';
@@ -793,7 +795,7 @@ int main(void) {
793795
char fig_range1_min[32] = {0};
794796
char fig_range1_max[32] = {0};
795797
float fig_range1_min_val = 0;
796-
float fig_range1_max_val = 1;
798+
float fig_range1_max_val = FLT_MAX;
797799
bool fig_range2_active = false;
798800
int fig_range2_idx = 1;
799801
char fig_range2_min[32] = {0};

ocean/pong/binding.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
#define NUM_ATNS 1
44
#define ACT_SIZES {3}
55
#define OBS_TENSOR_T FloatTensor
6-
#define ACT_TYPE DOUBLE
76

87
#define Env Pong
98
#include "vecenv.h"

ocean/pong/pong.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ struct Pong {
1818
Client* client;
1919
Log log;
2020
float* observations;
21-
double* actions;
21+
float* actions;
2222
float* rewards;
2323
float* terminals;
2424
int num_agents;
@@ -69,7 +69,7 @@ void init(Pong* env) {
6969
void allocate(Pong* env) {
7070
init(env);
7171
env->observations = (float*)calloc(8, sizeof(float));
72-
env->actions = (double*)calloc(1, sizeof(double));
72+
env->actions = (float*)calloc(1, sizeof(float));
7373
env->rewards = (float*)calloc(1, sizeof(float));
7474
env->terminals = (float*)calloc(1, sizeof(float));
7575
}
@@ -195,7 +195,7 @@ void c_step(Pong* env) {
195195
// collision with paddle
196196
env->ball_vx = -env->ball_vx;
197197
env->n_bounces += 1;
198-
env->rewards[0] = 0.1; // agent bounced the ball
198+
//env->rewards[0] = 0.1; // agent bounced the ball
199199
// ball speed change
200200
env->ball_vy += env->ball_speed_y_increment * env->paddle_dir;
201201
env->ball_vy = fminf(fmaxf(env->ball_vy, -env->ball_max_speed_y), env->ball_max_speed_y);

pufferlib/pufferl.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
## puffer [train | eval | sweep] [env_name] [optional args] -- See https://puffer.ai for full detail0
1+
## puffer [train | eval | sweep] [env_name] [optional args] -- See https://puffer.ai for full details
22
# This is the same as python -m pufferlib.pufferl [train | eval | sweep] [env_name] [optional args]
3-
# Distributed example: torchrun --standalone --nnodes=1 --nproc-per-node=6 -m pufferlib.pufferl train puffer_nmmo3
43

54
import warnings
65
warnings.filterwarnings('error', category=RuntimeWarning)
@@ -484,10 +483,10 @@ def load_config(env_name):
484483
parsed = vars(parser.parse_args())
485484
args = defaultdict(dict)
486485
for key, value in parsed.items():
487-
next = args
486+
nxt = args
488487
for subkey in key.split('.'):
489-
prev = next
490-
next = next.setdefault(subkey, {})
488+
prev = nxt
489+
nxt = nxt.setdefault(subkey, {})
491490

492491
prev[subkey] = value
493492

pufferlib/sweep.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
import numpy as np
99
import pufferlib
10-
from pufferlib.pufferl import unroll_nested_dict
1110

1211
import torch
1312
import gpytorch
@@ -24,7 +23,6 @@
2423

2524
EPSILON = 1e-6
2625

27-
# TODO: move
2826
def unroll_nested_dict(d):
2927
if not isinstance(d, dict):
3028
return d

tests/profile_kernels.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -614,7 +614,7 @@ static int ini_handler_vec(void* user, const char* section,
614614

615615
EnvSpeedArgs* create_envspeed(int total_agents, int num_buffers, int num_threads, int horizon) {
616616
char ini_path[512];
617-
snprintf(ini_path, sizeof(ini_path), "config/ocean/%s.ini", TOSTRING(ENV_NAME));
617+
snprintf(ini_path, sizeof(ini_path), "config/%s.ini", TOSTRING(ENV_NAME));
618618

619619
VecDefaults defaults = {0};
620620
ini_parse(ini_path, ini_handler_vec, &defaults);

0 commit comments

Comments
 (0)