PufferAI
diff --git a/‎.gitignore‎
Lines changed: 0 additions & 3 deletions b/‎.gitignore‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎README.md‎
Lines changed: 3 additions & 5 deletions b/‎README.md‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎build.sh‎
Lines changed: 1 addition & 9 deletions b/‎build.sh‎
Lines changed: 1 addition & 9 deletions
diff --git a/‎config/pong.ini‎
Lines changed: 0 additions & 1 deletion b/‎config/pong.ini‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎ocean/breakout/breakout.c‎
Lines changed: 5 additions & 95 deletions b/‎ocean/breakout/breakout.c‎
Lines changed: 5 additions & 95 deletions
@@ -2,13 +2,10 @@
 c_*.c
 pufferlib/extensions.c
 pufferlib/puffernet.c
-src/libstatic*
-
 logs/
 
 # Build dir
 build/
-build_web/
 
 # hipified cuda extensions dir [HIP/ROCM]
 pufferlib/extensions/hip/
 
@@ -1,16 +1,14 @@
 ![figure](https://pufferai.github.io/source/resource/header.png)
 
-[![PyPI version](https://badge.fury.io/py/pufferlib.svg)](https://badge.fury.io/py/pufferlib)
-![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pufferlib)
 ![Github Actions](https://github.com/PufferAI/PufferLib/actions/workflows/install.yml/badge.svg)
 [![](https://dcbadge.vercel.app/api/server/spT4huaGYV?style=plastic)](https://discord.gg/spT4huaGYV)
-[![Twitter](https://img.shields.io/twitter/url/https/twitter.com/cloudposse.svg?style=social&label=Follow%20%40jsuarez5341)](https://twitter.com/jsuarez5341)
+[![Twitter](https://img.shields.io/twitter/url/https/twitter.com/cloudposse.svg?style=social&label=Follow%20%40jsuarez)](https://twitter.com/jsuarez)
 
-PufferLib is the reinforcement learning library I wish existed during my PhD. It started as a compatibility layer to make working with complex environments a breeze. Now, it's a high-performance toolkit for research and industry with optimized parallel simulation, environments that run and train at 1M+ steps/second, and tons of quality of life improvements for practitioners. All our tools are free and open source. We also offer priority service for companies, startups, and labs!
+PufferLib is a fast and sane reinforcement learning library that can train tiny, super-human models in seconds. The included learning algorithm, hyperparameter tuning, and simulation methods are the product of our own research. All our tools are free and open source. Need a high performance environment for your application? We build them professionally and offer training + extended support. Contact jsuarez🐡puffer🐡ai.
 
 ![Trailer](https://github.com/PufferAI/puffer.ai/blob/main/docs/assets/puffer_2.gif?raw=true)
 
-All of our documentation is hosted at [puffer.ai](https://puffer.ai "PufferLib Documentation"). @jsuarez5341 on [Discord](https://discord.gg/puffer) for support -- post here before opening issues. We're always looking for new contributors, too!
+All of our documentation is hosted at [puffer.ai](https://puffer.ai "PufferLib Documentation"). @jsuarez5341 on [Discord](https://discord.gg/puffer) for support. Post there before opening issues. We're always looking for new contributors!
 
 ## Star to puff up the project!
 
 
@@ -196,15 +196,7 @@ export CCACHE_BASEDIR="$(pwd)"
 export CCACHE_COMPILERCHECK=content
 NVCC="ccache $CUDA_HOME/bin/nvcc"
 CC="${CC:-$(command -v ccache >/dev/null && echo 'ccache clang' || echo 'clang')}"
-if [ -n "$NVCC_ARCH" ]; then
-    ARCH=$NVCC_ARCH
-elif command -v nvidia-smi &>/dev/null; then
-    GPU_CC=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | head -1 | tr -d '.')
-    ARCH=${GPU_CC:+sm_$GPU_CC}
-    ARCH=${ARCH:-native}
-else
-    ARCH=native
-fi
+ARCH=${NVCC_ARCH:-native}
 
 PYTHON_INCLUDE=$(python -c "import sysconfig; print(sysconfig.get_path('include'))")
 PYBIND_INCLUDE=$(python -c "import pybind11; print(pybind11.get_include())")
 
@@ -25,7 +25,6 @@ continuous = 0
 [policy]
 hidden_size = 32
 num_layers = 1
-expansion_factor = 1
 
 [train]
 gpus = 1
 
@@ -3,9 +3,9 @@
 #include "puffernet.h"
 
 void demo() {
-    Weights* weights = load_weights("resources/breakout/breakout_weights.bin", 147844);
+    Weights* weights = load_weights("resources/breakout/breakout_weights.bin", 32384);
     int logit_sizes[1] = {3};
-    LinearLSTM* net = make_linearlstm(weights, 1, 118, logit_sizes, 1);
+    PufferNet* net = make_puffernet(weights, 1, 118, 64, 2, logit_sizes, 1);
 
     Breakout env = {
         .frameskip = 1,
@@ -46,109 +46,19 @@ void demo() {
             }
         } else if (frame % 4 == 0) {
             // Apply frameskip outside the env for smoother rendering
-            int* actions = (int*)env.actions;
-            forward_linearlstm(net, env.observations, actions);
-            env.actions[0] = actions[0];
+            forward_puffernet(net, env.observations, env.actions);
         }
 
         frame = (frame + 1) % 4;
         c_step(&env);
         c_render(&env);
     }
-    free_linearlstm(net);
+    free_puffernet(net);
     free(weights);
     free_allocated(&env);
     close_client(env.client);
 }
 
-void test_performance(int timeout) {
-    Breakout env = {
-        .num_agents = 1,
-        .frameskip = 4,
-        .width = 576,
-        .height = 330,
-        .initial_paddle_width = 62,
-        .paddle_width = 62,
-        .paddle_height = 8,
-        .ball_width = 32,
-        .ball_height = 32,
-        .brick_width = 32,
-        .brick_height = 12,
-        .brick_rows = 6,
-        .brick_cols = 18,
-        .initial_ball_speed = 256,
-        .max_ball_speed = 448,
-        .paddle_speed = 620,
-        .continuous = 0,
-    };
-    allocate(&env);
-    c_reset(&env);
-
-    int start = time(NULL);
-    int num_steps = 0;
-    while (time(NULL) - start < timeout) {
-        for (int i = 0; i < 1000; i++) {
-            //env.actions[0] = 1;//rand() % 3;
-            c_step(&env);
-            num_steps++;
-        }
-    }
-
-    int end = time(NULL);
-    float sps = num_steps / (end - start);
-    printf("Test Environment SPS: %f\n", sps);
-    free_allocated(&env);
-}
-
-void test_performance_multi(int num_envs, int timeout) {
-    Breakout* envs = (Breakout*)calloc(num_envs, sizeof(Breakout));
-    for (int i = 0; i < num_envs; i++) {
-        envs[i] = (Breakout){
-            .num_agents = 1,
-            .frameskip = 4,
-            .width = 576,
-            .height = 330,
-            .initial_paddle_width = 62,
-            .paddle_width = 62,
-            .paddle_height = 8,
-            .ball_width = 32,
-            .ball_height = 32,
-            .brick_width = 32,
-            .brick_height = 12,
-            .brick_rows = 6,
-            .brick_cols = 18,
-            .initial_ball_speed = 256,
-            .max_ball_speed = 448,
-            .paddle_speed = 620,
-            .continuous = 0,
-        };
-        allocate(&envs[i]);
-        c_reset(&envs[i]);
-    }
-
-    int start = time(NULL);
-    int num_steps = 0;
-    while (time(NULL) - start < timeout) {
-        for (int i = 0; i < num_envs; i++) {
-            envs[i].actions[0] = 1;
-            c_step(&envs[i]);
-            num_steps++;
-        }
-    }
-
-    int end = time(NULL);
-    float sps = num_steps / (end - start);
-    printf("Test Environment SPS: %f\n", sps);
-
-    for (int i = 0; i < num_envs; i++) {
-        free_allocated(&envs[i]);
-    }
-    free(envs);
-}
-
-
 int main() {
-    //demo();
-    //test_performance(5);
-    test_performance_multi(65536, 5);
+    demo();
 }