fixes

jsuarez5341 · jsuarez5341 · commit 4a2f231c3c46 · 2026-04-03T18:27:57.000Z
diff --git a/config/pong.ini b/config/pong.ini
@@ -2,7 +2,9 @@
 env_name = pong
 
 [vec]
-total_agents = 4096
+total_agents = 1024
+num_buffers = 1
+num_threads = 16
 
 [env]
 width = 500
@@ -17,28 +19,39 @@ ball_initial_speed_y = 1
 ball_speed_y_increment = 3
 ball_max_speed_y = 13
 max_score = 21
-frameskip = 4
+frameskip = 8
 continuous = 0
 
+[policy]
+hidden_size = 32
+num_layers = 1
+expansion_factor = 1
+
 [train]
-total_timesteps = 50_000_000
-beta1 = 0.8946507418260217
-beta2 = 0.9
+gpus = 1
+seed = 42
+total_timesteps = 5000000
+learning_rate = 0.1
+anneal_lr = 1
+min_lr_ratio = 0
+gamma = 0.934713
+gae_lambda = 0.991989
+replay_ratio = 3.05148
+clip_coef = 0.822764
+vf_coef = 5
+vf_clip_coef = 4.95789
+max_grad_norm = 0.752747
+ent_coef = 0.000402915
+beta1 = 0.5
+beta2 = 0.947709
 eps = 0.0001
-horizon = 64
-clip_coef = 0.19696765958267629
-ent_coef = 0.0005690816545012474
-gae_lambda = 0.747650023961198
-gamma = 0.9997053654668936
-learning_rate = 0.044482546441415506
-max_grad_norm = 2.2356112188495723
 minibatch_size = 32768
-prio_alpha = 0.98967001208896
-prio_beta0 = 0.09999999999999998
-vf_clip_coef = 2.178492167689251
-vf_coef = 1.6832989594296321
-vtrace_c_clip = 2.878171091654008
-vtrace_rho_clip = 0.7876748061547312
+horizon = 32
+vtrace_rho_clip = 4.87841
+vtrace_c_clip = 1.48608
+prio_alpha = 0.242089
+prio_beta0 = 0.807575
+use_rnn = 1
 
 [sweep.train.total_timesteps]
 distribution = log_normal
diff --git a/constellation/cache_data.py b/constellation/cache_data.py
@@ -1,26 +1,10 @@
+# Merges log files + filters to pareto-optimal points wrt steps, wall-clock, and score. Comment that if you want the full dataset. Also does TSNE, which is why I haven't bothered porting to C.
 import numpy as np
 
 import json
 import glob
 import os
 
-env_names = sorted([
-    'breakout',
-    #'impulse_wars',
-    'pacman',
-    'tetris',
-    #'g2048',
-    #'moba',
-    'pong',
-    #'tower_climb',
-    #'grid',
-    'freeway',
-    'connect4',
-    'nmmo3',
-    #'snake',
-    'tripletriad'
-])
-
 HYPERS = [
     'train/learning_rate',
     'train/ent_coef',
@@ -147,14 +131,7 @@ def cached_load(path, env_name, cache):
 
         for hyper in HYPERS:
             prefix, suffix = hyper.split('/')
-            #if prefix not in sweep_metadata:
-            #    continue
-
             group = sweep_metadata[prefix]
-            #if suffix not in group:
-            #    continue
-
-
             key = f'{prefix}/{suffix}_norm'
             if key not in data:
                 data[key] = []
@@ -184,15 +161,12 @@ def cached_load(path, env_name, cache):
             del data[k]
 
     # Format im millions to avoid overfloat in C
-    try:
-        data['agent_steps'] = [e/1e6 for e in data['agent_steps']]
-    except:
-        breakpoint()
+    data['agent_steps'] = [e/1e6 for e in data['agent_steps']]
     data['train/total_timesteps'] = [e/1e6 for e in data['train/total_timesteps']]
-    #data['metrics/agent_steps'] = [e/1e6 for e in data['metrics/agent_steps']]
     del data['metrics/agent_steps']
 
     # Filter to pareto
+    '''
     steps = data['agent_steps']
     costs = data['uptime']
     scores = data['env/score']
@@ -201,7 +175,8 @@ def cached_load(path, env_name, cache):
         try:
             data[k] = [data[k][i] for i in idxs]
         except IndexError:
-            breakpoint()
+            continue
+    '''
 
     data['sweep'] = sweep_metadata
     return data
@@ -215,8 +190,10 @@ def compute_tsne():
     if os.path.exists(cache_file):
         cache = json.load(open(cache_file, 'r'))
 
+    env_names = sorted(os.listdir('logs'))
     for env in env_names:
-        all_data[env] = cached_load(f'logs/puffer_{env}/*.json', env, cache)
+        print('Loading: ', env)
+        all_data[env] = cached_load(f'logs/{env}/*.json', env, cache)
 
     with open(cache_file, 'w') as f:
         json.dump(cache, f)
@@ -234,49 +211,31 @@ def compute_tsne():
 
     from sklearn.manifold import TSNE
     proj = TSNE(n_components=2)
-    reduced = None
-    try:
-        reduced = proj.fit_transform(normed)
-    except ValueError:
-        print('Warning: TSNE failed. Skipping TSNE')
+    reduced = proj.fit_transform(normed)
 
     row = 0
     for env in env_names:
         sz = len(all_data[env]['agent_steps'])
         all_data[env]['tsne1'] = reduced[row:row+sz, 0].tolist()
         all_data[env]['tsne2'] = reduced[row:row+sz, 1].tolist()
 
-        '''
-        if reduced is not None:
-            all_data[env]['tsne1'] = reduced[row:row+sz, 0].tolist()
-            all_data[env]['tsne2'] = reduced[row:row+sz, 1].tolist()
-        else:
-            all_data[env]['tsne1'] = np.random.rand(sz).tolist()
-            all_data[env]['tsne2'] = np.random.rand(sz).tolist()
-        '''
-
         row += sz
-        print(f'Env {env} has {sz} points')
 
     for env in all_data:
         dat = all_data[env]
         dat = {k: v for k, v in dat.items() if isinstance(v, list)
                 and len(v) > 0 and isinstance(v[0], (int, float))
                 and (k == 'train/max_grad_norm' or not k.endswith('_norm'))}
         all_data[env] = dat
+        print(f'Env {env} has {len(dat['env/perf'])} points')
         for k, v in dat.items():
-            try:
-                print(f'{env}/{k}: {len(v), min(v), max(v)}')
-            except:
-                print(f'{env}/{k}: {len(v)}')
+            if 'env/perf' in k or 'score' in k:
+                print(f'{env}/{k}: min={min(v)}, max={max(v)}')
 
     for env in all_data:
         for k, v in all_data[env].items():
             if isinstance(v, list):
-                try:
-                    all_data[env][k] = ','.join([f'{x:.6g}' for x in v])
-                except:
-                    breakpoint()
+                all_data[env][k] = ','.join([f'{x:.6g}' for x in v])
 
     json.dump(all_data, open('resources/constellation/experiments.json', 'w'))
 
diff --git a/constellation/constellation.c b/constellation/constellation.c
@@ -572,11 +572,13 @@ void copy_hypers_to_clipboard(Env *env, char* buffer, int ary_idx) {
         }
 
         char* suffix = slash + 1;
-        float val = hyper->ary[ary_idx];
-        if ((int)val == val) {
-            buffer += sprintf(buffer, "%s = %d\n", suffix, (int)val);
+        double val = hyper->ary[ary_idx];
+        if (strcmp(suffix, "agent_steps") == 0 || strcmp(suffix, "total_timesteps") == 0) {
+            buffer += sprintf(buffer, "%s = %lld\n", suffix, (long long)(val * 1e6));
+        } else if (val == (long long)val) {
+            buffer += sprintf(buffer, "%s = %lld\n", suffix, (long long)val);
         } else {
-            buffer += sprintf(buffer, "%s = %f\n", suffix, val);
+            buffer += sprintf(buffer, "%s = %g\n", suffix, val);
         }
     }
     buffer[0] = '\0';
@@ -793,7 +795,7 @@ int main(void) {
     char fig_range1_min[32] = {0};
     char fig_range1_max[32] = {0};
     float fig_range1_min_val = 0;
-    float fig_range1_max_val = 1;
+    float fig_range1_max_val = FLT_MAX;
     bool fig_range2_active = false;
     int fig_range2_idx = 1;
     char fig_range2_min[32] = {0};
diff --git a/ocean/pong/binding.c b/ocean/pong/binding.c
@@ -3,7 +3,6 @@
 #define NUM_ATNS 1
 #define ACT_SIZES {3}
 #define OBS_TENSOR_T FloatTensor
-#define ACT_TYPE DOUBLE
 
 #define Env Pong
 #include "vecenv.h"
diff --git a/ocean/pong/pong.h b/ocean/pong/pong.h
@@ -18,7 +18,7 @@ struct Pong {
     Client* client;
     Log log;
     float* observations;
-    double* actions;
+    float* actions;
     float* rewards;
     float* terminals;
     int num_agents;
@@ -69,7 +69,7 @@ void init(Pong* env) {
 void allocate(Pong* env) {
     init(env);
     env->observations = (float*)calloc(8, sizeof(float));
-    env->actions = (double*)calloc(1, sizeof(double));
+    env->actions = (float*)calloc(1, sizeof(float));
     env->rewards = (float*)calloc(1, sizeof(float));
     env->terminals = (float*)calloc(1, sizeof(float));
 }
@@ -195,7 +195,7 @@ void c_step(Pong* env) {
                 // collision with paddle
                 env->ball_vx = -env->ball_vx;
                 env->n_bounces += 1;
-		env->rewards[0] = 0.1; // agent bounced the ball
+		//env->rewards[0] = 0.1; // agent bounced the ball
                 // ball speed change
                 env->ball_vy += env->ball_speed_y_increment * env->paddle_dir;
                 env->ball_vy = fminf(fmaxf(env->ball_vy, -env->ball_max_speed_y), env->ball_max_speed_y);
diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
@@ -1,6 +1,5 @@
-## puffer [train | eval | sweep] [env_name] [optional args] -- See https://puffer.ai for full detail0
+## puffer [train | eval | sweep] [env_name] [optional args] -- See https://puffer.ai for full details
 # This is the same as python -m pufferlib.pufferl [train | eval | sweep] [env_name] [optional args]
-# Distributed example: torchrun --standalone --nnodes=1 --nproc-per-node=6 -m pufferlib.pufferl train puffer_nmmo3
 
 import warnings
 warnings.filterwarnings('error', category=RuntimeWarning)
@@ -484,10 +483,10 @@ def load_config(env_name):
     parsed = vars(parser.parse_args())
     args = defaultdict(dict)
     for key, value in parsed.items():
-        next = args
+        nxt = args
         for subkey in key.split('.'):
-            prev = next
-            next = next.setdefault(subkey, {})
+            prev = nxt
+            nxt = nxt.setdefault(subkey, {})
 
         prev[subkey] = value
 
diff --git a/pufferlib/sweep.py b/pufferlib/sweep.py
@@ -7,7 +7,6 @@
 
 import numpy as np
 import pufferlib
-from pufferlib.pufferl import unroll_nested_dict
 
 import torch
 import gpytorch
@@ -24,7 +23,6 @@
 
 EPSILON = 1e-6
 
-# TODO: move
 def unroll_nested_dict(d):
     if not isinstance(d, dict):
         return d
diff --git a/tests/profile_kernels.cu b/tests/profile_kernels.cu
@@ -614,7 +614,7 @@ static int ini_handler_vec(void* user, const char* section,
 
 EnvSpeedArgs* create_envspeed(int total_agents, int num_buffers, int num_threads, int horizon) {
     char ini_path[512];
-    snprintf(ini_path, sizeof(ini_path), "config/ocean/%s.ini", TOSTRING(ENV_NAME));
+    snprintf(ini_path, sizeof(ini_path), "config/%s.ini", TOSTRING(ENV_NAME));
 
     VecDefaults defaults = {0};
     ini_parse(ini_path, ini_handler_vec, &defaults);