Skip to content

Commit a919ca7

Browse files
committed
PufferNet fixes. pong, breakout, moba local pols. Pong fixes
1 parent 8e51417 commit a919ca7

9 files changed

Lines changed: 246 additions & 390 deletions

File tree

.gitignore

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,10 @@
22
c_*.c
33
pufferlib/extensions.c
44
pufferlib/puffernet.c
5-
src/libstatic*
6-
75
logs/
86

97
# Build dir
108
build/
11-
build_web/
129

1310
# hipified cuda extensions dir [HIP/ROCM]
1411
pufferlib/extensions/hip/

README.md

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,14 @@
11
![figure](https://pufferai.github.io/source/resource/header.png)
22

3-
[![PyPI version](https://badge.fury.io/py/pufferlib.svg)](https://badge.fury.io/py/pufferlib)
4-
![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pufferlib)
53
![Github Actions](https://github.com/PufferAI/PufferLib/actions/workflows/install.yml/badge.svg)
64
[![](https://dcbadge.vercel.app/api/server/spT4huaGYV?style=plastic)](https://discord.gg/spT4huaGYV)
7-
[![Twitter](https://img.shields.io/twitter/url/https/twitter.com/cloudposse.svg?style=social&label=Follow%20%40jsuarez5341)](https://twitter.com/jsuarez5341)
5+
[![Twitter](https://img.shields.io/twitter/url/https/twitter.com/cloudposse.svg?style=social&label=Follow%20%40jsuarez)](https://twitter.com/jsuarez)
86

9-
PufferLib is the reinforcement learning library I wish existed during my PhD. It started as a compatibility layer to make working with complex environments a breeze. Now, it's a high-performance toolkit for research and industry with optimized parallel simulation, environments that run and train at 1M+ steps/second, and tons of quality of life improvements for practitioners. All our tools are free and open source. We also offer priority service for companies, startups, and labs!
7+
PufferLib is a fast and sane reinforcement learning library that can train tiny, super-human models in seconds. The included learning algorithm, hyperparameter tuning, and simulation methods are the product of our own research. All our tools are free and open source. Need a high performance environment for your application? We build them professionally and offer training + extended support. Contact jsuarez🐡puffer🐡ai.
108

119
![Trailer](https://github.com/PufferAI/puffer.ai/blob/main/docs/assets/puffer_2.gif?raw=true)
1210

13-
All of our documentation is hosted at [puffer.ai](https://puffer.ai "PufferLib Documentation"). @jsuarez5341 on [Discord](https://discord.gg/puffer) for support -- post here before opening issues. We're always looking for new contributors, too!
11+
All of our documentation is hosted at [puffer.ai](https://puffer.ai "PufferLib Documentation"). @jsuarez5341 on [Discord](https://discord.gg/puffer) for support. Post there before opening issues. We're always looking for new contributors!
1412

1513
## Star to puff up the project!
1614

build.sh

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -196,15 +196,7 @@ export CCACHE_BASEDIR="$(pwd)"
196196
export CCACHE_COMPILERCHECK=content
197197
NVCC="ccache $CUDA_HOME/bin/nvcc"
198198
CC="${CC:-$(command -v ccache >/dev/null && echo 'ccache clang' || echo 'clang')}"
199-
if [ -n "$NVCC_ARCH" ]; then
200-
ARCH=$NVCC_ARCH
201-
elif command -v nvidia-smi &>/dev/null; then
202-
GPU_CC=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | head -1 | tr -d '.')
203-
ARCH=${GPU_CC:+sm_$GPU_CC}
204-
ARCH=${ARCH:-native}
205-
else
206-
ARCH=native
207-
fi
199+
ARCH=${NVCC_ARCH:-native}
208200

209201
PYTHON_INCLUDE=$(python -c "import sysconfig; print(sysconfig.get_path('include'))")
210202
PYBIND_INCLUDE=$(python -c "import pybind11; print(pybind11.get_include())")

config/pong.ini

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ continuous = 0
2525
[policy]
2626
hidden_size = 32
2727
num_layers = 1
28-
expansion_factor = 1
2928

3029
[train]
3130
gpus = 1

ocean/breakout/breakout.c

Lines changed: 5 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
#include "puffernet.h"
44

55
void demo() {
6-
Weights* weights = load_weights("resources/breakout/breakout_weights.bin", 147844);
6+
Weights* weights = load_weights("resources/breakout/breakout_weights.bin", 32384);
77
int logit_sizes[1] = {3};
8-
LinearLSTM* net = make_linearlstm(weights, 1, 118, logit_sizes, 1);
8+
PufferNet* net = make_puffernet(weights, 1, 118, 64, 2, logit_sizes, 1);
99

1010
Breakout env = {
1111
.frameskip = 1,
@@ -46,109 +46,19 @@ void demo() {
4646
}
4747
} else if (frame % 4 == 0) {
4848
// Apply frameskip outside the env for smoother rendering
49-
int* actions = (int*)env.actions;
50-
forward_linearlstm(net, env.observations, actions);
51-
env.actions[0] = actions[0];
49+
forward_puffernet(net, env.observations, env.actions);
5250
}
5351

5452
frame = (frame + 1) % 4;
5553
c_step(&env);
5654
c_render(&env);
5755
}
58-
free_linearlstm(net);
56+
free_puffernet(net);
5957
free(weights);
6058
free_allocated(&env);
6159
close_client(env.client);
6260
}
6361

64-
void test_performance(int timeout) {
65-
Breakout env = {
66-
.num_agents = 1,
67-
.frameskip = 4,
68-
.width = 576,
69-
.height = 330,
70-
.initial_paddle_width = 62,
71-
.paddle_width = 62,
72-
.paddle_height = 8,
73-
.ball_width = 32,
74-
.ball_height = 32,
75-
.brick_width = 32,
76-
.brick_height = 12,
77-
.brick_rows = 6,
78-
.brick_cols = 18,
79-
.initial_ball_speed = 256,
80-
.max_ball_speed = 448,
81-
.paddle_speed = 620,
82-
.continuous = 0,
83-
};
84-
allocate(&env);
85-
c_reset(&env);
86-
87-
int start = time(NULL);
88-
int num_steps = 0;
89-
while (time(NULL) - start < timeout) {
90-
for (int i = 0; i < 1000; i++) {
91-
//env.actions[0] = 1;//rand() % 3;
92-
c_step(&env);
93-
num_steps++;
94-
}
95-
}
96-
97-
int end = time(NULL);
98-
float sps = num_steps / (end - start);
99-
printf("Test Environment SPS: %f\n", sps);
100-
free_allocated(&env);
101-
}
102-
103-
void test_performance_multi(int num_envs, int timeout) {
104-
Breakout* envs = (Breakout*)calloc(num_envs, sizeof(Breakout));
105-
for (int i = 0; i < num_envs; i++) {
106-
envs[i] = (Breakout){
107-
.num_agents = 1,
108-
.frameskip = 4,
109-
.width = 576,
110-
.height = 330,
111-
.initial_paddle_width = 62,
112-
.paddle_width = 62,
113-
.paddle_height = 8,
114-
.ball_width = 32,
115-
.ball_height = 32,
116-
.brick_width = 32,
117-
.brick_height = 12,
118-
.brick_rows = 6,
119-
.brick_cols = 18,
120-
.initial_ball_speed = 256,
121-
.max_ball_speed = 448,
122-
.paddle_speed = 620,
123-
.continuous = 0,
124-
};
125-
allocate(&envs[i]);
126-
c_reset(&envs[i]);
127-
}
128-
129-
int start = time(NULL);
130-
int num_steps = 0;
131-
while (time(NULL) - start < timeout) {
132-
for (int i = 0; i < num_envs; i++) {
133-
envs[i].actions[0] = 1;
134-
c_step(&envs[i]);
135-
num_steps++;
136-
}
137-
}
138-
139-
int end = time(NULL);
140-
float sps = num_steps / (end - start);
141-
printf("Test Environment SPS: %f\n", sps);
142-
143-
for (int i = 0; i < num_envs; i++) {
144-
free_allocated(&envs[i]);
145-
}
146-
free(envs);
147-
}
148-
149-
15062
int main() {
151-
//demo();
152-
//test_performance(5);
153-
test_performance_multi(65536, 5);
63+
demo();
15464
}

0 commit comments

Comments
 (0)