From b8a30f60590293f49f219ab80525e63a429b8d2d Mon Sep 17 00:00:00 2001 From: Kinvert Date: Mon, 8 Jun 2026 18:53:17 -0700 Subject: [PATCH 01/51] feat: add bat environment --- BAT_SPEC.md | 451 ++++++++++++++++++++++ config/bat.ini | 61 +++ ocean/bat/bat.c | 57 +++ ocean/bat/bat.h | 637 ++++++++++++++++++++++++++++++++ ocean/bat/binding.c | 53 +++ ocean/bat/tests/run_all.sh | 13 + ocean/bat/tests/test_bat_core.c | 207 +++++++++++ 7 files changed, 1479 insertions(+) create mode 100644 BAT_SPEC.md create mode 100644 config/bat.ini create mode 100644 ocean/bat/bat.c create mode 100644 ocean/bat/bat.h create mode 100644 ocean/bat/binding.c create mode 100644 ocean/bat/tests/run_all.sh create mode 100644 ocean/bat/tests/test_bat_core.c diff --git a/BAT_SPEC.md b/BAT_SPEC.md new file mode 100644 index 0000000000..66a40ed953 --- /dev/null +++ b/BAT_SPEC.md @@ -0,0 +1,451 @@ +# Bat Environment Spec + +Status: draft baseline; ready for implementation planning after review + +Workspace: `/home/claude/pathfinder` + +Target branch: `bat` + +Target env name: `bat` + +## Intent + +Build a single-agent PufferLib Ocean environment inspired by bat echolocation. +The agent controls a bat flying in a 2D arena with walls, static obstacles, and +a moving bug target. The bat must avoid collisions and catch the bug using +binaural acoustic returns from self-generated chirps rather than direct map or +position observations. + +The first version should copy the small native-C env style used by Breakout: +fixed-size observations, a compact action space, simple deterministic physics, +and enough instrumentation to make training failures debuggable. + +The core challenge is active sensing. The policy must learn both how to move +and how to emit useful chirps. The environment should make chirping meaningful +without turning v1 into a full acoustic wave simulator. + +## Research Grounding + +Range cue: + +- Echolocating bats primarily estimate target distance from the delay between + an emitted call and the returning echo. +- Source: https://pmc.ncbi.nlm.nih.gov/articles/PMC9157489/ +- Source: https://pmc.ncbi.nlm.nih.gov/articles/PMC7888678/ + +Binaural direction cue: + +- Left/right ear differences are biologically plausible and useful. Bats use + binaural and spectral cues, including head-related transfer effects, to infer + sound direction. +- Source: https://pubmed.ncbi.nlm.nih.gov/15658710/ +- Source: https://pmc.ncbi.nlm.nih.gov/articles/PMC4555857/ + +Chirp design: + +- Linear frequency-modulated chirps are standard in radar and sonar because + matched filtering can compress a long emitted pulse into a sharp return peak. + Bandwidth controls range resolution, and the time-bandwidth product controls + processing gain. +- Source: https://rfessentials.com/rf-knowledge-base/how-does-pulse-compression-improve-the-range-resolution-and-sensitivity-of-a-rad/ + +Doppler: + +- Doppler shift is a useful velocity cue, especially for moving targets and + insect-like prey. Some bat species actively compensate call frequency to keep + important echo bands in a sensitive range. +- Source: https://pmc.ncbi.nlm.nih.gov/articles/PMC2438418/ +- Source: https://www.nature.com/articles/s41598-018-22880-y + +Fast signal processing: + +- FFTW is the high-performance reference point for C FFT design, but v1 should + avoid adding FFTW as a dependency. A fixed-size radix-2 FFT or precomputed + analytic matched-filter bins are preferred. +- Source: https://www.fftw.org/fftw2_doc/fftw_1.html +- Source: https://web.stanford.edu/class/cme324/classics/cooley-tukey.pdf + +Reflection model: + +- Full wave acoustics is out of scope for v1. A geometric echo model is the + right first approximation: sound travels in straight paths, reflects from + objects, and returns with delay, angle-dependent ear gain, attenuation, and + optional Doppler. +- Source: https://au.mathworks.com/help/audio/ug/room-impulse-response-simulation-with-image-source-method-and-hrtf-interpolation.html + +## Environment Model + +World: + +- 2D continuous rectangular arena. +- Arena dimensions are fixed by config. +- Boundaries are solid walls. +- Static obstacles are axis-aligned rectangles. +- The bug is a moving circular target. +- The bat is a moving circular agent with heading, speed, turn rate, and + collision radius. + +Physics: + +- Fixed control/physics timestep, default `1/60` second. +- Bat motion is acceleration-limited and turn-rate-limited. +- Bug motion uses a simple deterministic or seeded random policy. +- The bug reflects from walls and obstacles. +- The bat collides with walls and obstacles. +- Catch success occurs when bat and bug circles overlap. +- The `1/60` second tick is not the acoustic sample rate. Echo delays are + computed analytically with fractional timing inside each env step. + +Acoustics: + +- Walls, obstacles, and the bug reflect chirps. +- Static reflectors provide range and direction cues. +- The bug is the only moving reflector, so it is the main Doppler source. +- The env computes compact acoustic features analytically instead of storing or + convolving high-rate audio samples. +- Sound speed is configurable and artificial. The default should be much slower + than real air acoustics so echo timing is learnable in a small game arena. +- Start with `sound_speed = 100.0` world units per second. +- Every echo contribution has: + - two-way distance from mouth/source to reflector to each ear, + - delay derived from speed of sound, + - amplitude falloff from distance and reflector strength, + - left/right ear gain from relative azimuth, + - Doppler shift from reflector radial velocity. + +Point-reflector renderer: + +- v1 should represent walls and obstacle surfaces as stationary point + reflectors. +- Sample each wall and obstacle edge at a fixed spacing, default + `reflector_spacing = 1.0` world unit. +- The bug contributes one moving circular/point reflector at its center. +- This avoids wavefront bookkeeping while preserving range, angle, and Doppler + learning signals. + +First-order echoes only: + +- v1 should include direct echo paths from visible surfaces and the bug. +- Multiple-bounce reverberation is out of scope for v1. +- Occlusion can be approximated by ray intersection against the nearest + obstacle along the bat-to-reflector path. +- Segment-level specular reflection and raw waveform propagation are later + variants, not the v1 baseline. + +## Chirp Model + +The policy controls chirp parameters rather than emitting arbitrary audio. + +Chirp parameters: + +- `chirp_start_freq` +- `chirp_end_freq` +- `chirp_duration` + +Derived fields: + +- `chirp_bandwidth = abs(chirp_end_freq - chirp_start_freq)` +- `chirp_slope = (chirp_end_freq - chirp_start_freq) / chirp_duration` +- `chirp_age_ticks = ticks since most recent emitted chirp` + +Defaults: + +- Frequency range is normalized in the policy/action interface and mapped to a + narrow ultrasonic band in the env. +- Duration is normalized in the policy/action interface and mapped to a small + tick/subtick window. +- Up-chirps and down-chirps are both legal. +- A zero-amplitude/no-chirp action should be available so the bat is not forced + to emit every tick. + +Implementation direction: + +- Start with analytic range/Doppler bins, not literal audio buffers. +- If an FFT is needed, use a fixed power-of-two size with precomputed twiddle + factors. +- Prefer precomputed chirp templates or direct bin accumulation for v1 because + this env will run thousands of agents in parallel. +- The v1 observation bins are not raw FFT bins. They are compact + matched-filter-like echo features derived from chirp parameters, delay, + amplitude, and normalized Doppler. + +## Action Space + +Use a small multi-discrete action space. + +Recommended v1 action heads: + +- `move`: 5 values + - `0`: no thrust + - `1`: thrust forward + - `2`: brake/reverse + - `3`: strafe left + - `4`: strafe right +- `turn`: 3 values + - `0`: no turn + - `1`: turn left + - `2`: turn right +- `chirp_start_freq`: discrete bins, default `8` +- `chirp_end_freq`: discrete bins, default `8` +- `chirp_duration`: discrete bins, default `4` +- `chirp_emit`: 2 values + - `0`: do not emit a chirp this tick + - `1`: emit chirp using selected chirp parameters + +Initial action sizes: + +- `ACT_SIZES {5, 3, 8, 8, 4, 2}` +- `NUM_ATNS 6` + +Rationale: + +- Multi-discrete actions let the agent combine flight and active sensing. +- Discrete chirp bins keep the policy simple and cheap. +- Continuous actions can be a later variant after the first training baseline + is understood. + +## Observation Space + +Do not expose absolute position, absolute bug position, obstacle map, or global +heading. + +Observation layout: + +1. `left_range_energy[16]` +2. `left_doppler_energy[16]` +3. `right_range_energy[16]` +4. `right_doppler_energy[16]` +3. `chirp_age_norm` +4. `last_chirp_start_freq_norm` +5. `last_chirp_end_freq_norm` +6. `last_chirp_duration_norm` +7. `forward_speed_norm` +8. `turn_rate_norm` + +Initial observation size: + +- `OBS_SIZE = 70` + +Echo bins: + +- Each ear receives 16 range-energy bins and 16 Doppler-energy bins. +- Bins represent compact matched-filter-like range and Doppler energy, not raw + audio and not direct FFT bins. +- The frequency range is intentionally narrow, and bat/bug speeds are bounded, + so normalized Doppler can fit in a compact representation. +- Values are normalized to a bounded range before policy input. +- Nearer and stronger reflectors produce larger bin energy. +- Bug echoes can be distinguished statistically because the bug moves and + produces Doppler-shifted returns. + +Range bins: + +- Range bins accumulate echo energy by delay. +- Bin `0` represents the nearest useful echo distance. +- The last bin represents `max_echo_range`. +- Echoes beyond `max_echo_range` are ignored. + +Doppler bins: + +- Doppler energy is accumulated into the same range-indexed layout as range + energy. +- Approaching reflectors add positive normalized Doppler energy. +- Receding reflectors add negative normalized Doppler energy. +- Static reflectors contribute near-zero Doppler energy. + +Chirp metadata: + +- The agent receives the last emitted chirp start frequency, end frequency, and + duration because interpreting a return depends on knowing the transmitted + signal. +- `chirp_age_norm` lets the policy distinguish fresh echo windows from stale or + silent intervals. + +Self-motion: + +- `forward_speed_norm` and `turn_rate_norm` are proprioceptive signals. +- These do not reveal map coordinates or target location. +- They reduce unnecessary burden on recurrent policy memory. + +Model memory note: + +- PufferLib has recurrent policy support through `MinGRU`, `GRU`, and `LSTM`. +- The default config currently uses `MinGRU`, but v1 should not require the + policy to remember chirp identity just to interpret the current acoustic + observation. + +## Reward and Termination + +Reward shaping is intentionally simple in v1. It should make pursuit learnable +without leaking any privileged information through observations. + +Default reward model: + +- `+1.0` for catching the bug. +- Small negative step cost to encourage efficient pursuit. +- Dense progress reward based on reduction in true bat-to-bug distance. +- `-1.0` for hitting walls or obstacles, terminal. +- Optional chirp cost so constant chirping is not free. +- Optional silence bonus or energy budget should wait until the basic task + trains. + +Progress reward: + +- Track previous true bat-to-bug distance internally. +- Reward positive distance reduction. +- Penalize distance increase by the same or smaller scale. +- Do not expose the true distance in observations. +- Default formula: + - `reward += progress_reward_scale * (prev_bug_dist - bug_dist)` + - `reward -= step_cost` + - `reward -= chirp_cost` when a chirp is emitted +- Default starting values: + - `progress_reward_scale = 0.05` + - `step_cost = 0.001` + - `chirp_cost = 0.0005` + +Important caveat: + +- Dense distance reward is privileged training signal. It is acceptable for v1 + if the goal is to get learning started, but it should be easy to disable or + scale down once the acoustic policy learns basic pursuit. + +Termination: + +- Success: bat catches bug. +- Failure: bat collides with a wall or obstacle. +- Timeout: `tick >= max_steps`. + +Reset: + +- New episode samples arena layout, bat spawn, bug spawn, and bug velocity. +- Bat and bug should not spawn overlapping obstacles or each other. +- Initial bug distance should support curriculum. + +Logged metrics: + +- `perf` +- `score` +- `episode_return` +- `episode_length` +- `success` +- `collision` +- `timeout` +- `bug_distance_start` +- `bug_distance_final` +- `bug_distance_delta` +- `chirps_emitted` +- `mean_chirp_duration` +- `mean_chirp_bandwidth` +- `mean_echo_energy_left` +- `mean_echo_energy_right` +- `n` + +## Curriculum + +The first curriculum should keep obstacles present but make target behavior +simple before adding maneuvering. + +Recommended stages: + +- Stage 0: fixed arena, boundary walls, simple fixed obstacles, slow bug with + fixed velocity and bounce behavior. +- Stage 1: same layout class, faster bug with fixed velocity and bounce + behavior. +- Stage 2: randomized obstacles, slow bug with fixed velocity and bounce + behavior. +- Stage 3: randomized obstacles, faster bug with small seeded random turns. +- Stage 4: randomized obstacles, faster bug that can maneuver or flee. +- Stage 5: lower progress reward scale and higher chirp cost. + +Config knobs: + +- `arena_width` +- `arena_height` +- `num_obstacles` +- `obstacle_min_size` +- `obstacle_max_size` +- `bat_radius` +- `bug_radius` +- `bat_max_speed` +- `bat_accel` +- `bat_turn_rate` +- `bug_speed` +- `max_steps` +- `range_bins_per_ear` +- `doppler_bins_per_ear` +- `max_echo_range` +- `sound_speed` +- `reflector_spacing` +- `chirp_freq_bins` +- `chirp_duration_bins` +- `chirp_cost` +- `step_cost` +- `progress_reward_scale` +- `collision_penalty` +- `curriculum_enabled` +- `curriculum_stage` + +## PufferLib Integration + +Expected files after spec approval: + +- `ocean/bat/bat.h` +- `ocean/bat/bat.c` +- `ocean/bat/binding.c` +- `ocean/bat/tests/` +- `config/bat.ini` + +Follow the Breakout-style native env shape: + +- Define `Log`. +- Define env struct `Bat`. +- Store required pointers: + - `float* observations` + - `float* actions` + - `float* rewards` + - `float* terminals` + - `int num_agents` + - `Log log` + - `unsigned int rng` +- In `binding.c`, start with: + - `OBS_SIZE 70` + - `NUM_ATNS 6` + - `ACT_SIZES {5, 3, 8, 8, 4, 2}` + - `OBS_TENSOR_T FloatTensor` + - `Env Bat` + +Testing expectations: + +- Unit tests for chirp parameter normalization. +- Unit tests for echo delay/range bin placement. +- Unit tests for left/right ear asymmetry from azimuth. +- Unit tests for Doppler sign on approaching vs receding bug. +- Unit tests for collision and catch termination. +- Unit tests for progress reward sign. +- Unit tests that wall collision returns `-1.0` and terminates. +- Unit tests that obstacle reflectors create boundary-approach signals. + +## Open Design Questions + +Reward shaping: + +- The first implementation should use the default shaping constants above. +- After the first trainability pass, decide whether to clip progress reward, + anneal privileged progress reward down, or increase chirp cost. + +Acoustic representation: + +- v1 uses 16 range-energy bins and 16 signed Doppler-energy bins per ear. +- A later variant can test a flattened range-Doppler grid or literal FFT bins. + +Bug behavior: + +- v1 starts with fixed-velocity bounce behavior. +- Later curriculum stages add seeded random turns and maneuvering. + +Obstacle reflections: + +- v1 samples walls and obstacle edges into point reflectors. +- Later variants can compare analytic segment reflections or multiple-bounce + reflections. diff --git a/config/bat.ini b/config/bat.ini new file mode 100644 index 0000000000..68cf3697d2 --- /dev/null +++ b/config/bat.ini @@ -0,0 +1,61 @@ +[base] +env_name = bat + +[vec] +total_agents = 4096 +num_buffers = 8 +num_threads = 8 + +[policy] +hidden_size = 128 +num_layers = 4 + +[torch] +network = MinGRU +encoder = DefaultEncoder +decoder = DefaultDecoder + +[env] +num_agents = 1 +frameskip = 1 +width = 64 +height = 64 +num_obstacles = 3 +bat_radius = 2.0 +bug_radius = 1.5 +bat_max_speed = 12.0 +bat_accel = 30.0 +bat_turn_rate = 3.1415926 +bug_speed = 4.0 +max_steps = 512 +range_bins_per_ear = 16 +doppler_bins_per_ear = 16 +max_echo_range = 80.0 +sound_speed = 100.0 +reflector_spacing = 8.0 +max_chirp_age_ticks = 30 +chirp_cost = 0.0005 +step_cost = 0.001 +progress_reward_scale = 0.05 +collision_penalty = 1.0 + +[train] +total_timesteps = 10_000_000 +learning_rate = 0.015 +gamma = 0.995 +gae_lambda = 0.90 +replay_ratio = 1.0 +clip_coef = 0.2 +vf_coef = 2.0 +vf_clip_coef = 0.2 +max_grad_norm = 1.5 +ent_coef = 0.001 +beta1 = 0.95 +beta2 = 0.999 +eps = 1e-12 +minibatch_size = 8192 +horizon = 64 +vtrace_rho_clip = 1.0 +vtrace_c_clip = 1.0 +prio_alpha = 0.8 +prio_beta0 = 0.2 diff --git a/ocean/bat/bat.c b/ocean/bat/bat.c new file mode 100644 index 0000000000..7be2b41308 --- /dev/null +++ b/ocean/bat/bat.c @@ -0,0 +1,57 @@ +#include +#include "bat.h" + +void demo() { + Bat env = { + .frameskip = 1, + .width = 64, + .height = 64, + .num_obstacles = 3, + .bat_radius = 2.0f, + .bug_radius = 1.5f, + .bat_max_speed = 12.0f, + .bat_accel = 30.0f, + .bat_turn_rate = BAT_PI, + .bug_speed = 4.0f, + .max_steps = 512, + .range_bins_per_ear = BAT_RANGE_BINS, + .doppler_bins_per_ear = BAT_DOPPLER_BINS, + .max_echo_range = 80.0f, + .sound_speed = 100.0f, + .reflector_spacing = 8.0f, + .chirp_cost = 0.0005f, + .step_cost = 0.001f, + .progress_reward_scale = 0.05f, + .collision_penalty = 1.0f, + .rng = (unsigned int)time(NULL), + }; + allocate(&env); + env.client = make_client(&env); + c_reset(&env); + + SetTargetFPS(60); + while (!WindowShouldClose()) { + memset(env.actions, 0, sizeof(float) * BAT_NUM_ACTIONS); + if (IsKeyDown(KEY_W)) env.actions[0] = BAT_THRUST_FORWARD; + if (IsKeyDown(KEY_S)) env.actions[0] = BAT_BRAKE; + if (IsKeyDown(KEY_A)) env.actions[0] = BAT_STRAFE_LEFT; + if (IsKeyDown(KEY_D)) env.actions[0] = BAT_STRAFE_RIGHT; + if (IsKeyDown(KEY_LEFT)) env.actions[1] = BAT_TURN_LEFT; + if (IsKeyDown(KEY_RIGHT)) env.actions[1] = BAT_TURN_RIGHT; + env.actions[2] = 0; + env.actions[3] = 7; + env.actions[4] = 1; + env.actions[5] = IsKeyDown(KEY_SPACE) ? 1.0f : 0.0f; + c_step(&env); + c_render(&env); + } + + close_client(env.client); + free_allocated(&env); +} + +int main() { + demo(); + return 0; +} + diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h new file mode 100644 index 0000000000..250787da68 --- /dev/null +++ b/ocean/bat/bat.h @@ -0,0 +1,637 @@ +#pragma once + +#include +#include +#include +#include +#include + +#ifndef BAT_HEADLESS +#include "raylib.h" +#endif + +#define BAT_OBS_SIZE 70 +#define BAT_NUM_ACTIONS 6 +#define BAT_MOVE_ACTIONS 5 +#define BAT_TURN_ACTIONS 3 +#define BAT_CHIRP_FREQ_BINS 8 +#define BAT_CHIRP_DURATION_BINS 4 +#define BAT_CHIRP_EMIT_ACTIONS 2 + +#define BAT_RANGE_BINS 16 +#define BAT_DOPPLER_BINS 16 +#define BAT_LEFT_RANGE_OFFSET 0 +#define BAT_LEFT_DOPPLER_OFFSET 16 +#define BAT_RIGHT_RANGE_OFFSET 32 +#define BAT_RIGHT_DOPPLER_OFFSET 48 +#define BAT_CHIRP_AGE_OBS 64 +#define BAT_CHIRP_START_OBS 65 +#define BAT_CHIRP_END_OBS 66 +#define BAT_CHIRP_DURATION_OBS 67 +#define BAT_FORWARD_SPEED_OBS 68 +#define BAT_TURN_RATE_OBS 69 + +#define BAT_NOOP 0 +#define BAT_THRUST_FORWARD 1 +#define BAT_BRAKE 2 +#define BAT_STRAFE_LEFT 3 +#define BAT_STRAFE_RIGHT 4 + +#define BAT_TURN_NONE 0 +#define BAT_TURN_LEFT 1 +#define BAT_TURN_RIGHT 2 + +#define BAT_MAX_OBSTACLES 16 +#define BAT_TICK_RATE (1.0f/60.0f) +#define BAT_PI 3.14159265358979323846f + +typedef struct Log { + float perf; + float score; + float episode_return; + float episode_length; + float success; + float collision; + float timeout; + float bug_distance_start; + float bug_distance_final; + float bug_distance_delta; + float chirps_emitted; + float mean_chirp_duration; + float mean_chirp_bandwidth; + float mean_echo_energy_left; + float mean_echo_energy_right; + float n; +} Log; + +typedef struct Client { + int width; + int height; +} Client; + +typedef struct Bat { + Client* client; + Log log; + float* observations; + float* actions; + float* rewards; + float* terminals; + int num_agents; + + int frameskip; + int width; + int height; + int tick; + int max_steps; + int num_obstacles; + + float bat_x; + float bat_y; + float bat_vx; + float bat_vy; + float bat_heading; + float bat_turn_velocity; + float bat_radius; + float bat_max_speed; + float bat_accel; + float bat_turn_rate; + + float bug_x; + float bug_y; + float bug_vx; + float bug_vy; + float bug_radius; + float bug_speed; + + float* obstacle_x; + float* obstacle_y; + float* obstacle_w; + float* obstacle_h; + + int range_bins_per_ear; + int doppler_bins_per_ear; + float max_echo_range; + float sound_speed; + float reflector_spacing; + int max_chirp_age_ticks; + int chirp_age_ticks; + float last_chirp_start_freq; + float last_chirp_end_freq; + float last_chirp_duration; + int chirps_emitted_episode; + float chirp_duration_sum; + float chirp_bandwidth_sum; + float echo_energy_left_sum; + float echo_energy_right_sum; + + float chirp_cost; + float step_cost; + float progress_reward_scale; + float collision_penalty; + float prev_bug_dist; + float start_bug_dist; + float episode_return; + + unsigned int rng; +} Bat; + +static inline unsigned int bat_rand(Bat* env) { + env->rng = env->rng * 1664525u + 1013904223u; + return env->rng; +} + +static inline float bat_randf(Bat* env) { + return (bat_rand(env) >> 8) * (1.0f / 16777216.0f); +} + +static inline float bat_clampf(float v, float lo, float hi) { + if (v < lo) return lo; + if (v > hi) return hi; + return v; +} + +static inline int bat_action_index(float v, int n) { + int idx = (int)v; + if (idx < 0) return 0; + if (idx >= n) return n - 1; + return idx; +} + +static inline float bat_norm_bin(int idx, int count) { + if (count <= 1) return 0.0f; + return idx / (float)(count - 1); +} + +static inline float bat_len(float x, float y) { + return sqrtf(x*x + y*y); +} + +static inline float bat_dist(float ax, float ay, float bx, float by) { + return bat_len(bx - ax, by - ay); +} + +static inline void bat_norm_vec(float x, float y, float* ox, float* oy) { + float l = bat_len(x, y); + if (l <= 0.000001f) { + *ox = 1.0f; + *oy = 0.0f; + return; + } + *ox = x / l; + *oy = y / l; +} + +static inline bool bat_circle_rect_collision(float cx, float cy, float r, + float rx, float ry, float rw, float rh) { + float px = bat_clampf(cx, rx, rx + rw); + float py = bat_clampf(cy, ry, ry + rh); + return bat_dist(cx, cy, px, py) <= r; +} + +static inline void generate_obstacles(Bat* env) { + for (int i = 0; i < env->num_obstacles; i++) { + float w = 7.0f + 2.0f * (float)(i % 3); + float h = 7.0f + 2.0f * (float)((i + 1) % 3); + float lane = (i + 1.0f) / (env->num_obstacles + 1.0f); + float jitter = (bat_randf(env) - 0.5f) * 6.0f; + env->obstacle_w[i] = w; + env->obstacle_h[i] = h; + env->obstacle_x[i] = bat_clampf(env->width * lane - w * 0.5f + jitter, + env->bat_radius + 2.0f, env->width - w - env->bat_radius - 2.0f); + env->obstacle_y[i] = bat_clampf(env->height * (0.35f + 0.3f * (i % 2)) - h * 0.5f - jitter, + env->bat_radius + 2.0f, env->height - h - env->bat_radius - 2.0f); + } +} + +void init(Bat* env) { + env->tick = 0; + if (env->num_agents <= 0) env->num_agents = 1; + if (env->frameskip <= 0) env->frameskip = 1; + if (env->width <= 0) env->width = 64; + if (env->height <= 0) env->height = 64; + if (env->max_steps <= 0) env->max_steps = 512; + if (env->bat_radius <= 0.0f) env->bat_radius = 2.0f; + if (env->bug_radius <= 0.0f) env->bug_radius = 1.5f; + if (env->bat_max_speed <= 0.0f) env->bat_max_speed = 12.0f; + if (env->bat_accel <= 0.0f) env->bat_accel = 30.0f; + if (env->bat_turn_rate <= 0.0f) env->bat_turn_rate = BAT_PI; + if (env->bug_speed <= 0.0f) env->bug_speed = 4.0f; + if (env->range_bins_per_ear <= 0) env->range_bins_per_ear = BAT_RANGE_BINS; + if (env->doppler_bins_per_ear <= 0) env->doppler_bins_per_ear = BAT_DOPPLER_BINS; + if (env->max_echo_range <= 0.0f) env->max_echo_range = 80.0f; + if (env->sound_speed <= 0.0f) env->sound_speed = 100.0f; + if (env->reflector_spacing <= 0.0f) env->reflector_spacing = 8.0f; + if (env->max_chirp_age_ticks <= 0) env->max_chirp_age_ticks = 30; + if (env->step_cost <= 0.0f) env->step_cost = 0.001f; + if (env->progress_reward_scale <= 0.0f) env->progress_reward_scale = 0.05f; + if (env->collision_penalty <= 0.0f) env->collision_penalty = 1.0f; + if (env->chirp_cost <= 0.0f) env->chirp_cost = 0.0005f; + if (env->rng == 0) env->rng = 1; + + if (env->num_obstacles < 0) env->num_obstacles = 0; + if (env->num_obstacles > BAT_MAX_OBSTACLES) env->num_obstacles = BAT_MAX_OBSTACLES; + env->obstacle_x = (float*)calloc(env->num_obstacles, sizeof(float)); + env->obstacle_y = (float*)calloc(env->num_obstacles, sizeof(float)); + env->obstacle_w = (float*)calloc(env->num_obstacles, sizeof(float)); + env->obstacle_h = (float*)calloc(env->num_obstacles, sizeof(float)); +} + +void allocate(Bat* env) { + init(env); + env->observations = (float*)calloc(BAT_OBS_SIZE, sizeof(float)); + env->actions = (float*)calloc(BAT_NUM_ACTIONS, sizeof(float)); + env->rewards = (float*)calloc(1, sizeof(float)); + env->terminals = (float*)calloc(1, sizeof(float)); +} + +void c_close(Bat* env) { + free(env->obstacle_x); + free(env->obstacle_y); + free(env->obstacle_w); + free(env->obstacle_h); +} + +void free_allocated(Bat* env) { + free(env->actions); + free(env->observations); + free(env->terminals); + free(env->rewards); + c_close(env); +} + +static inline void add_log(Bat* env, float success, float collision, float timeout) { + float final_dist = bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y); + env->log.perf += success; + env->log.score += env->episode_return; + env->log.episode_return += env->episode_return; + env->log.episode_length += env->tick; + env->log.success += success; + env->log.collision += collision; + env->log.timeout += timeout; + env->log.bug_distance_start += env->start_bug_dist; + env->log.bug_distance_final += final_dist; + env->log.bug_distance_delta += env->start_bug_dist - final_dist; + env->log.chirps_emitted += env->chirps_emitted_episode; + if (env->chirps_emitted_episode > 0) { + env->log.mean_chirp_duration += env->chirp_duration_sum / env->chirps_emitted_episode; + env->log.mean_chirp_bandwidth += env->chirp_bandwidth_sum / env->chirps_emitted_episode; + } + env->log.mean_echo_energy_left += env->echo_energy_left_sum / fmaxf(1.0f, (float)(env->tick + 1)); + env->log.mean_echo_energy_right += env->echo_energy_right_sum / fmaxf(1.0f, (float)(env->tick + 1)); + env->log.n += 1.0f; +} + +static inline void bat_add_echo(Bat* env, float rx, float ry, float rvx, float rvy, + float strength) { + float fx = cosf(env->bat_heading); + float fy = sinf(env->bat_heading); + float lx = -sinf(env->bat_heading); + float ly = cosf(env->bat_heading); + float ear_sep = env->bat_radius * 0.75f; + float left_ear_x = env->bat_x - lx * ear_sep * 0.5f; + float left_ear_y = env->bat_y - ly * ear_sep * 0.5f; + float right_ear_x = env->bat_x + lx * ear_sep * 0.5f; + float right_ear_y = env->bat_y + ly * ear_sep * 0.5f; + + float ux, uy; + bat_norm_vec(rx - env->bat_x, ry - env->bat_y, &ux, &uy); + float forward = ux * fx + uy * fy; + if (forward < -0.35f) return; + + float left_dir_x = -lx; + float left_dir_y = -ly; + float right_dir_x = lx; + float right_dir_y = ly; + float left_gain = bat_clampf(0.75f + 0.25f * (ux * left_dir_x + uy * left_dir_y), 0.1f, 1.0f); + float right_gain = bat_clampf(0.75f + 0.25f * (ux * right_dir_x + uy * right_dir_y), 0.1f, 1.0f); + + float left_path = bat_dist(env->bat_x, env->bat_y, rx, ry) + bat_dist(rx, ry, left_ear_x, left_ear_y); + float right_path = bat_dist(env->bat_x, env->bat_y, rx, ry) + bat_dist(rx, ry, right_ear_x, right_ear_y); + float path = 0.5f * (left_path + right_path); + if (path > env->max_echo_range) return; + + int bin = (int)((path / env->max_echo_range) * env->range_bins_per_ear); + if (bin < 0) bin = 0; + if (bin >= env->range_bins_per_ear) bin = env->range_bins_per_ear - 1; + + float attenuation = strength / (1.0f + 0.02f * path * path); + float rel_vx = rvx - env->bat_vx; + float rel_vy = rvy - env->bat_vy; + float distance_rate = rel_vx * ux + rel_vy * uy; + float doppler = bat_clampf(-distance_rate / (env->bat_max_speed + env->bug_speed + 0.0001f), -1.0f, 1.0f); + + float left_energy = attenuation * left_gain; + float right_energy = attenuation * right_gain; + env->observations[BAT_LEFT_RANGE_OFFSET + bin] += left_energy; + env->observations[BAT_RIGHT_RANGE_OFFSET + bin] += right_energy; + env->observations[BAT_LEFT_DOPPLER_OFFSET + bin] += left_energy * doppler; + env->observations[BAT_RIGHT_DOPPLER_OFFSET + bin] += right_energy * doppler; +} + +static inline void bat_add_segment_reflectors(Bat* env, float x1, float y1, + float x2, float y2, float strength) { + float len = bat_dist(x1, y1, x2, y2); + int count = (int)(len / env->reflector_spacing) + 1; + if (count < 1) count = 1; + for (int i = 0; i <= count; i++) { + float t = count == 0 ? 0.0f : i / (float)count; + float x = x1 + (x2 - x1) * t; + float y = y1 + (y2 - y1) * t; + bat_add_echo(env, x, y, 0.0f, 0.0f, strength); + } +} + +static inline void bat_add_obstacle_echoes(Bat* env, int i) { + float x = env->obstacle_x[i]; + float y = env->obstacle_y[i]; + float w = env->obstacle_w[i]; + float h = env->obstacle_h[i]; + bat_add_segment_reflectors(env, x, y, x + w, y, 0.55f); + bat_add_segment_reflectors(env, x, y + h, x + w, y + h, 0.55f); + bat_add_segment_reflectors(env, x, y, x, y + h, 0.55f); + bat_add_segment_reflectors(env, x + w, y, x + w, y + h, 0.55f); +} + +void compute_observations(Bat* env) { + memset(env->observations, 0, BAT_OBS_SIZE * sizeof(float)); + + bat_add_echo(env, env->bug_x, env->bug_y, env->bug_vx, env->bug_vy, 8.0f); + bat_add_segment_reflectors(env, 0.0f, 0.0f, (float)env->width, 0.0f, 0.12f); + bat_add_segment_reflectors(env, 0.0f, (float)env->height, (float)env->width, (float)env->height, 0.12f); + bat_add_segment_reflectors(env, 0.0f, 0.0f, 0.0f, (float)env->height, 0.12f); + bat_add_segment_reflectors(env, (float)env->width, 0.0f, (float)env->width, (float)env->height, 0.12f); + for (int i = 0; i < env->num_obstacles; i++) { + bat_add_obstacle_echoes(env, i); + } + + float left_energy = 0.0f; + float right_energy = 0.0f; + for (int i = 0; i < BAT_RANGE_BINS; i++) { + env->observations[BAT_LEFT_RANGE_OFFSET + i] = bat_clampf(env->observations[BAT_LEFT_RANGE_OFFSET + i], 0.0f, 1.0f); + env->observations[BAT_RIGHT_RANGE_OFFSET + i] = bat_clampf(env->observations[BAT_RIGHT_RANGE_OFFSET + i], 0.0f, 1.0f); + env->observations[BAT_LEFT_DOPPLER_OFFSET + i] = bat_clampf(env->observations[BAT_LEFT_DOPPLER_OFFSET + i], -1.0f, 1.0f); + env->observations[BAT_RIGHT_DOPPLER_OFFSET + i] = bat_clampf(env->observations[BAT_RIGHT_DOPPLER_OFFSET + i], -1.0f, 1.0f); + left_energy += env->observations[BAT_LEFT_RANGE_OFFSET + i]; + right_energy += env->observations[BAT_RIGHT_RANGE_OFFSET + i]; + } + env->echo_energy_left_sum += left_energy; + env->echo_energy_right_sum += right_energy; + + env->observations[BAT_CHIRP_AGE_OBS] = bat_clampf(env->chirp_age_ticks / (float)env->max_chirp_age_ticks, 0.0f, 1.0f); + env->observations[BAT_CHIRP_START_OBS] = env->last_chirp_start_freq; + env->observations[BAT_CHIRP_END_OBS] = env->last_chirp_end_freq; + env->observations[BAT_CHIRP_DURATION_OBS] = env->last_chirp_duration; + float fwd_speed = env->bat_vx * cosf(env->bat_heading) + env->bat_vy * sinf(env->bat_heading); + env->observations[BAT_FORWARD_SPEED_OBS] = bat_clampf(fwd_speed / env->bat_max_speed, -1.0f, 1.0f); + env->observations[BAT_TURN_RATE_OBS] = bat_clampf(env->bat_turn_velocity / env->bat_turn_rate, -1.0f, 1.0f); +} + +static inline void bat_reset_episode(Bat* env) { + env->tick = 0; + env->bat_x = env->width * 0.25f; + env->bat_y = env->height * 0.5f; + env->bat_vx = 0.0f; + env->bat_vy = 0.0f; + env->bat_heading = 0.0f; + env->bat_turn_velocity = 0.0f; + generate_obstacles(env); + env->bug_x = env->width * 0.75f; + env->bug_y = env->height * (0.35f + 0.30f * bat_randf(env)); + env->bug_vx = -env->bug_speed; + env->bug_vy = (bat_randf(env) - 0.5f) * env->bug_speed * 0.5f; + env->last_chirp_start_freq = 0.0f; + env->last_chirp_end_freq = 1.0f; + env->last_chirp_duration = 0.33333334f; + env->chirp_age_ticks = 0; + env->chirps_emitted_episode = 0; + env->chirp_duration_sum = 0.0f; + env->chirp_bandwidth_sum = 0.0f; + env->echo_energy_left_sum = 0.0f; + env->echo_energy_right_sum = 0.0f; + env->episode_return = 0.0f; + env->start_bug_dist = bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y); + env->prev_bug_dist = env->start_bug_dist; + compute_observations(env); +} + +void c_reset(Bat* env) { + env->rewards[0] = 0.0f; + env->terminals[0] = 0.0f; + bat_reset_episode(env); +} + +static inline bool bat_hits_obstacle(Bat* env) { + for (int i = 0; i < env->num_obstacles; i++) { + if (bat_circle_rect_collision(env->bat_x, env->bat_y, env->bat_radius, + env->obstacle_x[i], env->obstacle_y[i], env->obstacle_w[i], env->obstacle_h[i])) { + return true; + } + } + return false; +} + +static inline bool bat_hits_wall(Bat* env) { + return env->bat_x - env->bat_radius < 0.0f || + env->bat_x + env->bat_radius > env->width || + env->bat_y - env->bat_radius < 0.0f || + env->bat_y + env->bat_radius > env->height; +} + +static inline void bat_update_bug(Bat* env, float dt) { + env->bug_x += env->bug_vx * dt; + env->bug_y += env->bug_vy * dt; + if (env->bug_x - env->bug_radius < 0.0f) { + env->bug_x = env->bug_radius; + env->bug_vx = fabsf(env->bug_vx); + } + if (env->bug_x + env->bug_radius > env->width) { + env->bug_x = env->width - env->bug_radius; + env->bug_vx = -fabsf(env->bug_vx); + } + if (env->bug_y - env->bug_radius < 0.0f) { + env->bug_y = env->bug_radius; + env->bug_vy = fabsf(env->bug_vy); + } + if (env->bug_y + env->bug_radius > env->height) { + env->bug_y = env->height - env->bug_radius; + env->bug_vy = -fabsf(env->bug_vy); + } +} + +static inline void bat_update_motion(Bat* env, float dt) { + int move = bat_action_index(env->actions[0], BAT_MOVE_ACTIONS); + int turn = bat_action_index(env->actions[1], BAT_TURN_ACTIONS); + float fx = cosf(env->bat_heading); + float fy = sinf(env->bat_heading); + float rx = -sinf(env->bat_heading); + float ry = cosf(env->bat_heading); + float ax = 0.0f; + float ay = 0.0f; + + if (move == BAT_THRUST_FORWARD) { + ax += fx * env->bat_accel; + ay += fy * env->bat_accel; + } else if (move == BAT_BRAKE) { + ax -= fx * env->bat_accel; + ay -= fy * env->bat_accel; + } else if (move == BAT_STRAFE_LEFT) { + ax -= rx * env->bat_accel; + ay -= ry * env->bat_accel; + } else if (move == BAT_STRAFE_RIGHT) { + ax += rx * env->bat_accel; + ay += ry * env->bat_accel; + } + + env->bat_turn_velocity = 0.0f; + if (turn == BAT_TURN_LEFT) env->bat_turn_velocity = -env->bat_turn_rate; + if (turn == BAT_TURN_RIGHT) env->bat_turn_velocity = env->bat_turn_rate; + env->bat_heading += env->bat_turn_velocity * dt; + if (env->bat_heading > BAT_PI) env->bat_heading -= 2.0f * BAT_PI; + if (env->bat_heading < -BAT_PI) env->bat_heading += 2.0f * BAT_PI; + + env->bat_vx += ax * dt; + env->bat_vy += ay * dt; + float speed = bat_len(env->bat_vx, env->bat_vy); + if (speed > env->bat_max_speed) { + env->bat_vx = env->bat_vx / speed * env->bat_max_speed; + env->bat_vy = env->bat_vy / speed * env->bat_max_speed; + } + env->bat_x += env->bat_vx * dt; + env->bat_y += env->bat_vy * dt; +} + +static inline void bat_update_chirp(Bat* env) { + int start_idx = bat_action_index(env->actions[2], BAT_CHIRP_FREQ_BINS); + int end_idx = bat_action_index(env->actions[3], BAT_CHIRP_FREQ_BINS); + int duration_idx = bat_action_index(env->actions[4], BAT_CHIRP_DURATION_BINS); + int emit = bat_action_index(env->actions[5], BAT_CHIRP_EMIT_ACTIONS); + if (emit) { + env->last_chirp_start_freq = bat_norm_bin(start_idx, BAT_CHIRP_FREQ_BINS); + env->last_chirp_end_freq = bat_norm_bin(end_idx, BAT_CHIRP_FREQ_BINS); + env->last_chirp_duration = bat_norm_bin(duration_idx, BAT_CHIRP_DURATION_BINS); + env->chirp_age_ticks = 0; + env->chirps_emitted_episode += 1; + env->chirp_duration_sum += env->last_chirp_duration; + env->chirp_bandwidth_sum += fabsf(env->last_chirp_end_freq - env->last_chirp_start_freq); + } else if (env->chirp_age_ticks < env->max_chirp_age_ticks) { + env->chirp_age_ticks += 1; + } +} + +static inline bool bat_caught_bug(Bat* env) { + return bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y) <= env->bat_radius + env->bug_radius; +} + +void c_step(Bat* env) { + env->rewards[0] = 0.0f; + env->terminals[0] = 0.0f; + + bat_update_chirp(env); + if (bat_caught_bug(env)) { + env->rewards[0] = 1.0f; + env->terminals[0] = 1.0f; + env->episode_return += env->rewards[0]; + add_log(env, 1.0f, 0.0f, 0.0f); + bat_reset_episode(env); + return; + } + + for (int i = 0; i < env->frameskip; i++) { + bat_update_motion(env, BAT_TICK_RATE); + bat_update_bug(env, BAT_TICK_RATE); + if (bat_hits_wall(env) || bat_hits_obstacle(env)) { + env->rewards[0] = -env->collision_penalty; + env->terminals[0] = 1.0f; + env->episode_return += env->rewards[0]; + add_log(env, 0.0f, 1.0f, 0.0f); + bat_reset_episode(env); + return; + } + if (bat_caught_bug(env)) { + env->rewards[0] = 1.0f; + env->terminals[0] = 1.0f; + env->episode_return += env->rewards[0]; + add_log(env, 1.0f, 0.0f, 0.0f); + bat_reset_episode(env); + return; + } + } + + env->tick += 1; + float bug_dist = bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y); + float progress = env->prev_bug_dist - bug_dist; + env->rewards[0] += env->progress_reward_scale * progress; + env->rewards[0] -= env->step_cost; + if (bat_action_index(env->actions[5], BAT_CHIRP_EMIT_ACTIONS)) { + env->rewards[0] -= env->chirp_cost; + } + env->prev_bug_dist = bug_dist; + + if (env->tick >= env->max_steps) { + env->terminals[0] = 1.0f; + env->episode_return += env->rewards[0]; + add_log(env, 0.0f, 0.0f, 1.0f); + bat_reset_episode(env); + return; + } + + env->episode_return += env->rewards[0]; + compute_observations(env); +} + +#ifndef BAT_HEADLESS +Client* make_client(Bat* env) { + Client* client = (Client*)calloc(1, sizeof(Client)); + client->width = env->width * 10; + client->height = env->height * 10; + InitWindow(client->width, client->height, "Bat"); + return client; +} + +void close_client(Client* client) { + CloseWindow(); + free(client); +} + +void c_render(Bat* env) { + if (env->client == NULL) { + env->client = make_client(env); + } + float sx = env->client->width / (float)env->width; + float sy = env->client->height / (float)env->height; + BeginDrawing(); + ClearBackground((Color){18, 20, 24, 255}); + DrawRectangleLines(0, 0, env->client->width, env->client->height, GRAY); + for (int i = 0; i < env->num_obstacles; i++) { + DrawRectangle( + (int)(env->obstacle_x[i] * sx), + (int)(env->obstacle_y[i] * sy), + (int)(env->obstacle_w[i] * sx), + (int)(env->obstacle_h[i] * sy), + (Color){92, 92, 96, 255}); + } + DrawCircle((int)(env->bug_x * sx), (int)(env->bug_y * sy), + env->bug_radius * sx, GREEN); + DrawCircle((int)(env->bat_x * sx), (int)(env->bat_y * sy), + env->bat_radius * sx, BLUE); + float hx = env->bat_x + cosf(env->bat_heading) * env->bat_radius * 2.0f; + float hy = env->bat_y + sinf(env->bat_heading) * env->bat_radius * 2.0f; + DrawLine((int)(env->bat_x * sx), (int)(env->bat_y * sy), (int)(hx * sx), (int)(hy * sy), WHITE); + DrawText(TextFormat("reward %.3f tick %d chirps %d", env->rewards[0], env->tick, + env->chirps_emitted_episode), 10, 10, 20, RAYWHITE); + EndDrawing(); +} +#else +Client* make_client(Bat* env) { + (void)env; + return NULL; +} + +void close_client(Client* client) { + (void)client; +} + +void c_render(Bat* env) { + (void)env; +} +#endif diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c new file mode 100644 index 0000000000..255aa3c3d7 --- /dev/null +++ b/ocean/bat/binding.c @@ -0,0 +1,53 @@ +#include "bat.h" +#define OBS_SIZE 70 +#define NUM_ATNS 6 +#define ACT_SIZES {5, 3, 8, 8, 4, 2} +#define OBS_TENSOR_T FloatTensor + +#define Env Bat +#include "vecenv.h" + +void my_init(Env* env, Dict* kwargs) { + env->num_agents = 1; + env->frameskip = dict_get(kwargs, "frameskip")->value; + env->width = dict_get(kwargs, "width")->value; + env->height = dict_get(kwargs, "height")->value; + env->num_obstacles = dict_get(kwargs, "num_obstacles")->value; + env->bat_radius = dict_get(kwargs, "bat_radius")->value; + env->bug_radius = dict_get(kwargs, "bug_radius")->value; + env->bat_max_speed = dict_get(kwargs, "bat_max_speed")->value; + env->bat_accel = dict_get(kwargs, "bat_accel")->value; + env->bat_turn_rate = dict_get(kwargs, "bat_turn_rate")->value; + env->bug_speed = dict_get(kwargs, "bug_speed")->value; + env->max_steps = dict_get(kwargs, "max_steps")->value; + env->range_bins_per_ear = dict_get(kwargs, "range_bins_per_ear")->value; + env->doppler_bins_per_ear = dict_get(kwargs, "doppler_bins_per_ear")->value; + env->max_echo_range = dict_get(kwargs, "max_echo_range")->value; + env->sound_speed = dict_get(kwargs, "sound_speed")->value; + env->reflector_spacing = dict_get(kwargs, "reflector_spacing")->value; + env->max_chirp_age_ticks = dict_get(kwargs, "max_chirp_age_ticks")->value; + env->chirp_cost = dict_get(kwargs, "chirp_cost")->value; + env->step_cost = dict_get(kwargs, "step_cost")->value; + env->progress_reward_scale = dict_get(kwargs, "progress_reward_scale")->value; + env->collision_penalty = dict_get(kwargs, "collision_penalty")->value; + init(env); +} + +void my_log(Log* log, Dict* out) { + dict_set(out, "perf", log->perf); + dict_set(out, "score", log->score); + dict_set(out, "episode_return", log->episode_return); + dict_set(out, "episode_length", log->episode_length); + dict_set(out, "success", log->success); + dict_set(out, "collision", log->collision); + dict_set(out, "timeout", log->timeout); + dict_set(out, "bug_distance_start", log->bug_distance_start); + dict_set(out, "bug_distance_final", log->bug_distance_final); + dict_set(out, "bug_distance_delta", log->bug_distance_delta); + dict_set(out, "chirps_emitted", log->chirps_emitted); + dict_set(out, "mean_chirp_duration", log->mean_chirp_duration); + dict_set(out, "mean_chirp_bandwidth", log->mean_chirp_bandwidth); + dict_set(out, "mean_echo_energy_left", log->mean_echo_energy_left); + dict_set(out, "mean_echo_energy_right", log->mean_echo_energy_right); +} + diff --git a/ocean/bat/tests/run_all.sh b/ocean/bat/tests/run_all.sh new file mode 100644 index 0000000000..f693ea1f27 --- /dev/null +++ b/ocean/bat/tests/run_all.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +set -euo pipefail + +cd "$(dirname "$0")/../../.." + +mkdir -p build/bat-tests +cc -std=c99 -O2 -Wall -Wextra -DBAT_HEADLESS \ + -I. -Iocean/bat -Ivendor -Iraylib-5.5_linux_amd64/include \ + ocean/bat/tests/test_bat_core.c \ + -lm \ + -o build/bat-tests/test_bat_core + +build/bat-tests/test_bat_core diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c new file mode 100644 index 0000000000..81a1ccf5a4 --- /dev/null +++ b/ocean/bat/tests/test_bat_core.c @@ -0,0 +1,207 @@ +#include +#include +#include + +#include "../bat.h" + +#define ASSERT_TRUE(cond) do { \ + if (!(cond)) { \ + printf("ASSERT_TRUE failed at %s:%d: %s\n", __FILE__, __LINE__, #cond); \ + return 1; \ + } \ +} while (0) + +#define ASSERT_FLOAT_NEAR(actual, expected, eps) do { \ + float _a = (actual); \ + float _e = (expected); \ + if (fabsf(_a - _e) > (eps)) { \ + printf("ASSERT_FLOAT_NEAR failed at %s:%d: got %.6f expected %.6f\n", \ + __FILE__, __LINE__, _a, _e); \ + return 1; \ + } \ +} while (0) + +static Bat make_test_env(void) { + Bat env = { + .num_agents = 1, + .frameskip = 1, + .width = 64, + .height = 64, + .num_obstacles = 1, + .bat_radius = 2.0f, + .bug_radius = 1.5f, + .bat_max_speed = 12.0f, + .bat_accel = 30.0f, + .bat_turn_rate = 3.1415926f, + .bug_speed = 4.0f, + .max_steps = 512, + .range_bins_per_ear = BAT_RANGE_BINS, + .doppler_bins_per_ear = BAT_DOPPLER_BINS, + .max_echo_range = 80.0f, + .sound_speed = 100.0f, + .reflector_spacing = 8.0f, + .chirp_cost = 0.0005f, + .step_cost = 0.001f, + .progress_reward_scale = 0.05f, + .collision_penalty = 1.0f, + .rng = 1, + }; + allocate(&env); + return env; +} + +static int test_chirp_metadata_and_observation_size(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.actions[0] = 0.0f; + env.actions[1] = 0.0f; + env.actions[2] = 7.0f; + env.actions[3] = 0.0f; + env.actions[4] = 3.0f; + env.actions[5] = 1.0f; + c_step(&env); + + ASSERT_FLOAT_NEAR(env.observations[BAT_CHIRP_START_OBS], 1.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.observations[BAT_CHIRP_END_OBS], 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.observations[BAT_CHIRP_DURATION_OBS], 1.0f, 0.0001f); + ASSERT_TRUE(env.observations[BAT_CHIRP_AGE_OBS] <= 1.0f); + ASSERT_TRUE(env.observations[BAT_CHIRP_AGE_OBS] >= 0.0f); + + free_allocated(&env); + return 0; +} + +static int test_left_right_echo_asymmetry(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.bat_x = 20.0f; + env.bat_y = 20.0f; + env.bat_heading = 0.0f; + env.bug_x = 35.0f; + env.bug_y = 10.0f; + env.bug_vx = 0.0f; + env.bug_vy = 0.0f; + compute_observations(&env); + + float left_energy = 0.0f; + float right_energy = 0.0f; + for (int i = 0; i < BAT_RANGE_BINS; i++) { + left_energy += env.observations[BAT_LEFT_RANGE_OFFSET + i]; + right_energy += env.observations[BAT_RIGHT_RANGE_OFFSET + i]; + } + + ASSERT_TRUE(left_energy > right_energy); + + free_allocated(&env); + return 0; +} + +static int test_doppler_sign_for_approaching_bug(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.bat_x = 20.0f; + env.bat_y = 20.0f; + env.bat_vx = 0.0f; + env.bat_vy = 0.0f; + env.bug_x = 42.0f; + env.bug_y = 20.0f; + env.bug_vx = -3.0f; + env.bug_vy = 0.0f; + compute_observations(&env); + + float doppler = 0.0f; + for (int i = 0; i < BAT_DOPPLER_BINS; i++) { + doppler += env.observations[BAT_LEFT_DOPPLER_OFFSET + i]; + doppler += env.observations[BAT_RIGHT_DOPPLER_OFFSET + i]; + } + + ASSERT_TRUE(doppler > 0.0f); + + free_allocated(&env); + return 0; +} + +static int test_wall_collision_is_terminal_minus_one(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.bat_x = env.width - env.bat_radius - 0.1f; + env.bat_y = env.height * 0.5f; + env.bat_heading = 0.0f; + env.bat_vx = env.bat_max_speed; + env.bat_vy = 0.0f; + env.actions[0] = 1.0f; + env.actions[1] = 0.0f; + env.actions[2] = 0.0f; + env.actions[3] = 7.0f; + env.actions[4] = 1.0f; + env.actions[5] = 0.0f; + + c_step(&env); + + ASSERT_FLOAT_NEAR(env.terminals[0], 1.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.rewards[0], -1.0f, 0.0001f); + + free_allocated(&env); + return 0; +} + +static int test_catch_bug_is_terminal_plus_one(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.bat_x = 20.0f; + env.bat_y = 20.0f; + env.bug_x = 20.5f; + env.bug_y = 20.0f; + + c_step(&env); + + ASSERT_FLOAT_NEAR(env.terminals[0], 1.0f, 0.0001f); + ASSERT_TRUE(env.rewards[0] > 0.9f); + + free_allocated(&env); + return 0; +} + +static int test_progress_reward_sign(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.bat_x = 20.0f; + env.bat_y = 20.0f; + env.bug_x = 40.0f; + env.bug_y = 20.0f; + env.prev_bug_dist = 25.0f; + env.bat_vx = 0.0f; + env.bat_vy = 0.0f; + + env.actions[0] = 1.0f; + env.actions[1] = 0.0f; + env.actions[2] = 0.0f; + env.actions[3] = 7.0f; + env.actions[4] = 1.0f; + env.actions[5] = 0.0f; + c_step(&env); + + ASSERT_TRUE(env.rewards[0] > 0.0f); + + free_allocated(&env); + return 0; +} + +int main(void) { + if (test_chirp_metadata_and_observation_size()) return 1; + if (test_left_right_echo_asymmetry()) return 1; + if (test_doppler_sign_for_approaching_bug()) return 1; + if (test_wall_collision_is_terminal_minus_one()) return 1; + if (test_catch_bug_is_terminal_plus_one()) return 1; + if (test_progress_reward_sign()) return 1; + + printf("bat core tests passed\n"); + return 0; +} + From 50ab5c2c66dbfeb928858ea4eadc988d8b51d785 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Mon, 8 Jun 2026 19:21:08 -0700 Subject: [PATCH 02/51] feat: visualize bat chirps and reflections --- config/bat.ini | 1 + ocean/bat/bat.h | 224 ++++++++++++++++++++++++++++++-- ocean/bat/binding.c | 2 +- ocean/bat/tests/test_bat_core.c | 61 ++++++++- 4 files changed, 274 insertions(+), 14 deletions(-) diff --git a/config/bat.ini b/config/bat.ini index 68cf3697d2..0dd2bf907b 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -34,6 +34,7 @@ max_echo_range = 80.0 sound_speed = 100.0 reflector_spacing = 8.0 max_chirp_age_ticks = 30 +chirp_cooldown_ticks = 12 chirp_cost = 0.0005 step_cost = 0.001 progress_reward_scale = 0.05 diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 250787da68..9822773e0a 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -44,6 +44,25 @@ #define BAT_MAX_OBSTACLES 16 #define BAT_TICK_RATE (1.0f/60.0f) #define BAT_PI 3.14159265358979323846f +#define BAT_CHIRP_HISTORY 4 +#define BAT_CHIRP_RINGS 5 + +typedef struct BatColor { + unsigned char r; + unsigned char g; + unsigned char b; + unsigned char a; +} BatColor; + +typedef struct ChirpEvent { + float x; + float y; + float start_freq; + float end_freq; + float duration; + int birth_tick; + int active; +} ChirpEvent; typedef struct Log { float perf; @@ -114,10 +133,14 @@ typedef struct Bat { float sound_speed; float reflector_spacing; int max_chirp_age_ticks; + int chirp_cooldown_ticks; int chirp_age_ticks; + int last_chirp_tick; float last_chirp_start_freq; float last_chirp_end_freq; float last_chirp_duration; + ChirpEvent chirps[BAT_CHIRP_HISTORY]; + int chirp_head; int chirps_emitted_episode; float chirp_duration_sum; float chirp_bandwidth_sum; @@ -157,6 +180,39 @@ static inline int bat_action_index(float v, int n) { return idx; } +static inline float bat_chirp_duration_seconds(float duration_norm) { + return 0.04f + 0.18f * bat_clampf(duration_norm, 0.0f, 1.0f); +} + +static inline float bat_chirp_ring_radius(float age_seconds, float slice, + float duration_seconds, float sound_speed) { + float ring_age = age_seconds - slice * duration_seconds; + if (ring_age < 0.0f) return 0.0f; + return sound_speed * ring_age; +} + +static inline float bat_echo_time_seconds(float distance, float sound_speed) { + if (sound_speed <= 0.0f) return 0.0f; + return 2.0f * distance / sound_speed; +} + +static inline bool bat_echo_is_arriving(float echo_time, float chirp_age, + float window) { + return fabsf(chirp_age - echo_time) <= window; +} + +static inline BatColor bat_freq_color(float freq_norm, float alpha_norm) { + float f = bat_clampf(freq_norm, 0.0f, 1.0f); + float mid = 1.0f - fabsf(2.0f * f - 1.0f); + BatColor color = { + .r = (unsigned char)(255.0f * (1.0f - f) + 45.0f * f), + .g = (unsigned char)(45.0f + 180.0f * mid), + .b = (unsigned char)(45.0f * (1.0f - f) + 255.0f * f), + .a = (unsigned char)(255.0f * bat_clampf(alpha_norm, 0.0f, 1.0f)), + }; + return color; +} + static inline float bat_norm_bin(int idx, int count) { if (count <= 1) return 0.0f; return idx / (float)(count - 1); @@ -222,6 +278,7 @@ void init(Bat* env) { if (env->sound_speed <= 0.0f) env->sound_speed = 100.0f; if (env->reflector_spacing <= 0.0f) env->reflector_spacing = 8.0f; if (env->max_chirp_age_ticks <= 0) env->max_chirp_age_ticks = 30; + if (env->chirp_cooldown_ticks <= 0) env->chirp_cooldown_ticks = 12; if (env->step_cost <= 0.0f) env->step_cost = 0.001f; if (env->progress_reward_scale <= 0.0f) env->progress_reward_scale = 0.05f; if (env->collision_penalty <= 0.0f) env->collision_penalty = 1.0f; @@ -403,6 +460,9 @@ static inline void bat_reset_episode(Bat* env) { env->last_chirp_end_freq = 1.0f; env->last_chirp_duration = 0.33333334f; env->chirp_age_ticks = 0; + env->last_chirp_tick = -env->chirp_cooldown_ticks; + memset(env->chirps, 0, sizeof(env->chirps)); + env->chirp_head = 0; env->chirps_emitted_episode = 0; env->chirp_duration_sum = 0.0f; env->chirp_bandwidth_sum = 0.0f; @@ -500,22 +560,43 @@ static inline void bat_update_motion(Bat* env, float dt) { env->bat_y += env->bat_vy * dt; } -static inline void bat_update_chirp(Bat* env) { +static inline bool bat_try_emit_chirp(Bat* env) { int start_idx = bat_action_index(env->actions[2], BAT_CHIRP_FREQ_BINS); int end_idx = bat_action_index(env->actions[3], BAT_CHIRP_FREQ_BINS); int duration_idx = bat_action_index(env->actions[4], BAT_CHIRP_DURATION_BINS); + + if (env->tick - env->last_chirp_tick < env->chirp_cooldown_ticks) { + return false; + } + + env->last_chirp_start_freq = bat_norm_bin(start_idx, BAT_CHIRP_FREQ_BINS); + env->last_chirp_end_freq = bat_norm_bin(end_idx, BAT_CHIRP_FREQ_BINS); + env->last_chirp_duration = bat_norm_bin(duration_idx, BAT_CHIRP_DURATION_BINS); + env->chirp_age_ticks = 0; + env->last_chirp_tick = env->tick; + env->chirps_emitted_episode += 1; + env->chirp_duration_sum += env->last_chirp_duration; + env->chirp_bandwidth_sum += fabsf(env->last_chirp_end_freq - env->last_chirp_start_freq); + ChirpEvent* chirp = &env->chirps[env->chirp_head]; + chirp->x = env->bat_x; + chirp->y = env->bat_y; + chirp->start_freq = env->last_chirp_start_freq; + chirp->end_freq = env->last_chirp_end_freq; + chirp->duration = bat_chirp_duration_seconds(env->last_chirp_duration); + chirp->birth_tick = env->tick; + chirp->active = 1; + env->chirp_head = (env->chirp_head + 1) % BAT_CHIRP_HISTORY; + return true; +} + +static inline bool bat_update_chirp(Bat* env) { int emit = bat_action_index(env->actions[5], BAT_CHIRP_EMIT_ACTIONS); if (emit) { - env->last_chirp_start_freq = bat_norm_bin(start_idx, BAT_CHIRP_FREQ_BINS); - env->last_chirp_end_freq = bat_norm_bin(end_idx, BAT_CHIRP_FREQ_BINS); - env->last_chirp_duration = bat_norm_bin(duration_idx, BAT_CHIRP_DURATION_BINS); - env->chirp_age_ticks = 0; - env->chirps_emitted_episode += 1; - env->chirp_duration_sum += env->last_chirp_duration; - env->chirp_bandwidth_sum += fabsf(env->last_chirp_end_freq - env->last_chirp_start_freq); + return bat_try_emit_chirp(env); } else if (env->chirp_age_ticks < env->max_chirp_age_ticks) { env->chirp_age_ticks += 1; } + return false; } static inline bool bat_caught_bug(Bat* env) { @@ -526,7 +607,7 @@ void c_step(Bat* env) { env->rewards[0] = 0.0f; env->terminals[0] = 0.0f; - bat_update_chirp(env); + bool accepted_chirp = bat_update_chirp(env); if (bat_caught_bug(env)) { env->rewards[0] = 1.0f; env->terminals[0] = 1.0f; @@ -562,7 +643,7 @@ void c_step(Bat* env) { float progress = env->prev_bug_dist - bug_dist; env->rewards[0] += env->progress_reward_scale * progress; env->rewards[0] -= env->step_cost; - if (bat_action_index(env->actions[5], BAT_CHIRP_EMIT_ACTIONS)) { + if (accepted_chirp) { env->rewards[0] -= env->chirp_cost; } env->prev_bug_dist = bug_dist; @@ -580,6 +661,118 @@ void c_step(Bat* env) { } #ifndef BAT_HEADLESS +static inline Color bat_ray_color(BatColor c) { + return (Color){c.r, c.g, c.b, c.a}; +} + +static inline void bat_draw_chirp_rings(Bat* env, float sx, float sy) { + float scale = fminf(sx, sy); + for (int i = 0; i < BAT_CHIRP_HISTORY; i++) { + ChirpEvent* chirp = &env->chirps[i]; + if (!chirp->active) continue; + + float age_seconds = (env->tick - chirp->birth_tick) * BAT_TICK_RATE; + float max_age = env->max_echo_range / env->sound_speed + chirp->duration; + if (age_seconds < 0.0f || age_seconds > max_age) { + chirp->active = 0; + continue; + } + + for (int ring = 0; ring < BAT_CHIRP_RINGS; ring++) { + float slice = ring / (float)(BAT_CHIRP_RINGS - 1); + float freq = chirp->start_freq + slice * (chirp->end_freq - chirp->start_freq); + float radius = bat_chirp_ring_radius(age_seconds, slice, chirp->duration, env->sound_speed); + if (radius <= 0.0f || radius > env->max_echo_range) continue; + + float fade = 1.0f - radius / env->max_echo_range; + float alpha = 0.18f + 0.42f * bat_clampf(fade, 0.0f, 1.0f); + DrawCircleLines( + (int)(chirp->x * sx), + (int)(chirp->y * sy), + radius * scale, + bat_ray_color(bat_freq_color(freq, alpha))); + } + } +} + +static inline Color bat_doppler_ray_color(float doppler, float alpha) { + BatColor c; + if (doppler > 0.05f) { + c = bat_freq_color(1.0f, alpha); + } else if (doppler < -0.05f) { + c = bat_freq_color(0.0f, alpha); + } else { + c = (BatColor){210, 210, 220, (unsigned char)(255.0f * bat_clampf(alpha, 0.0f, 1.0f))}; + } + return bat_ray_color(c); +} + +static inline void bat_draw_echo_flash(Bat* env, ChirpEvent* chirp, + float rx, float ry, float rvx, float rvy, float strength, + float sx, float sy) { + float age_seconds = (env->tick - chirp->birth_tick) * BAT_TICK_RATE; + float distance = bat_dist(chirp->x, chirp->y, rx, ry); + float echo_time = bat_echo_time_seconds(distance, env->sound_speed); + if (!bat_echo_is_arriving(echo_time, age_seconds, 0.025f)) return; + + float ux, uy; + bat_norm_vec(rx - chirp->x, ry - chirp->y, &ux, &uy); + float rel_vx = rvx - env->bat_vx; + float rel_vy = rvy - env->bat_vy; + float distance_rate = rel_vx * ux + rel_vy * uy; + float doppler = bat_clampf(-distance_rate / (env->bat_max_speed + env->bug_speed + 0.0001f), -1.0f, 1.0f); + float amp = strength / (1.0f + 0.02f * distance * distance); + float alpha = bat_clampf(0.20f + amp * 2.0f, 0.20f, 0.90f); + Color color = bat_doppler_ray_color(doppler, alpha); + + DrawLine((int)(chirp->x * sx), (int)(chirp->y * sy), + (int)(rx * sx), (int)(ry * sy), color); + DrawCircleLines((int)(rx * sx), (int)(ry * sy), + fmaxf(3.0f, 8.0f * alpha), color); +} + +static inline void bat_draw_segment_echoes(Bat* env, ChirpEvent* chirp, + float x1, float y1, float x2, float y2, float strength, + float sx, float sy) { + float len = bat_dist(x1, y1, x2, y2); + int count = (int)(len / env->reflector_spacing) + 1; + if (count < 1) count = 1; + for (int i = 0; i <= count; i++) { + float t = i / (float)count; + float x = x1 + (x2 - x1) * t; + float y = y1 + (y2 - y1) * t; + bat_draw_echo_flash(env, chirp, x, y, 0.0f, 0.0f, strength, sx, sy); + } +} + +static inline void bat_draw_obstacle_echoes(Bat* env, ChirpEvent* chirp, + int i, float sx, float sy) { + float x = env->obstacle_x[i]; + float y = env->obstacle_y[i]; + float w = env->obstacle_w[i]; + float h = env->obstacle_h[i]; + bat_draw_segment_echoes(env, chirp, x, y, x + w, y, 0.55f, sx, sy); + bat_draw_segment_echoes(env, chirp, x, y + h, x + w, y + h, 0.55f, sx, sy); + bat_draw_segment_echoes(env, chirp, x, y, x, y + h, 0.55f, sx, sy); + bat_draw_segment_echoes(env, chirp, x + w, y, x + w, y + h, 0.55f, sx, sy); +} + +static inline void bat_draw_echo_reflections(Bat* env, float sx, float sy) { + for (int i = 0; i < BAT_CHIRP_HISTORY; i++) { + ChirpEvent* chirp = &env->chirps[i]; + if (!chirp->active) continue; + bat_draw_echo_flash(env, chirp, env->bug_x, env->bug_y, + env->bug_vx, env->bug_vy, 4.0f, sx, sy); + bat_draw_segment_echoes(env, chirp, 0.0f, 0.0f, (float)env->width, 0.0f, 0.18f, sx, sy); + bat_draw_segment_echoes(env, chirp, 0.0f, (float)env->height, (float)env->width, (float)env->height, 0.18f, sx, sy); + bat_draw_segment_echoes(env, chirp, 0.0f, 0.0f, 0.0f, (float)env->height, 0.18f, sx, sy); + bat_draw_segment_echoes(env, chirp, (float)env->width, 0.0f, (float)env->width, (float)env->height, 0.18f, sx, sy); + for (int j = 0; j < env->num_obstacles; j++) { + bat_draw_obstacle_echoes(env, chirp, j, sx, sy); + } + } +} + Client* make_client(Bat* env) { Client* client = (Client*)calloc(1, sizeof(Client)); client->width = env->width * 10; @@ -594,6 +787,9 @@ void close_client(Client* client) { } void c_render(Bat* env) { + if (IsKeyPressed(KEY_ESCAPE)) { + exit(0); + } if (env->client == NULL) { env->client = make_client(env); } @@ -601,6 +797,8 @@ void c_render(Bat* env) { float sy = env->client->height / (float)env->height; BeginDrawing(); ClearBackground((Color){18, 20, 24, 255}); + bat_draw_chirp_rings(env, sx, sy); + bat_draw_echo_reflections(env, sx, sy); DrawRectangleLines(0, 0, env->client->width, env->client->height, GRAY); for (int i = 0; i < env->num_obstacles; i++) { DrawRectangle( @@ -617,8 +815,10 @@ void c_render(Bat* env) { float hx = env->bat_x + cosf(env->bat_heading) * env->bat_radius * 2.0f; float hy = env->bat_y + sinf(env->bat_heading) * env->bat_radius * 2.0f; DrawLine((int)(env->bat_x * sx), (int)(env->bat_y * sy), (int)(hx * sx), (int)(hy * sy), WHITE); - DrawText(TextFormat("reward %.3f tick %d chirps %d", env->rewards[0], env->tick, - env->chirps_emitted_episode), 10, 10, 20, RAYWHITE); + int cooldown = env->chirp_cooldown_ticks - (env->tick - env->last_chirp_tick); + if (cooldown < 0) cooldown = 0; + DrawText(TextFormat("reward %.3f tick %d chirps %d cooldown %d ESC exits", env->rewards[0], env->tick, + env->chirps_emitted_episode, cooldown), 10, 10, 20, RAYWHITE); EndDrawing(); } #else diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index 255aa3c3d7..dfc46fd1c0 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -26,6 +26,7 @@ void my_init(Env* env, Dict* kwargs) { env->sound_speed = dict_get(kwargs, "sound_speed")->value; env->reflector_spacing = dict_get(kwargs, "reflector_spacing")->value; env->max_chirp_age_ticks = dict_get(kwargs, "max_chirp_age_ticks")->value; + env->chirp_cooldown_ticks = dict_get(kwargs, "chirp_cooldown_ticks")->value; env->chirp_cost = dict_get(kwargs, "chirp_cost")->value; env->step_cost = dict_get(kwargs, "step_cost")->value; env->progress_reward_scale = dict_get(kwargs, "progress_reward_scale")->value; @@ -50,4 +51,3 @@ void my_log(Log* log, Dict* out) { dict_set(out, "mean_echo_energy_left", log->mean_echo_energy_left); dict_set(out, "mean_echo_energy_right", log->mean_echo_energy_right); } - diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index 81a1ccf5a4..75df15735b 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -193,6 +193,62 @@ static int test_progress_reward_sign(void) { return 0; } +static int test_chirp_ring_physical_ordering(void) { + float duration = bat_chirp_duration_seconds(1.0f); + float outer = bat_chirp_ring_radius(1.0f, 0.0f, duration, 100.0f); + float inner = bat_chirp_ring_radius(1.0f, 1.0f, duration, 100.0f); + + ASSERT_TRUE(outer > inner); + ASSERT_FLOAT_NEAR(outer, 100.0f, 0.0001f); + ASSERT_FLOAT_NEAR(inner, 100.0f * (1.0f - duration), 0.0001f); + + return 0; +} + +static int test_chirp_color_maps_low_to_red_high_to_blue(void) { + BatColor low = bat_freq_color(0.0f, 1.0f); + BatColor mid = bat_freq_color(0.5f, 1.0f); + BatColor high = bat_freq_color(1.0f, 1.0f); + + ASSERT_TRUE(low.r > low.b); + ASSERT_TRUE(high.b > high.r); + ASSERT_TRUE(mid.g >= low.g); + ASSERT_TRUE(mid.g >= high.g); + + return 0; +} + +static int test_chirp_cooldown_accepts_only_after_delay(void) { + Bat env = make_test_env(); + c_reset(&env); + env.chirp_cooldown_ticks = 12; + + env.actions[2] = 0.0f; + env.actions[3] = 7.0f; + env.actions[4] = 1.0f; + env.actions[5] = 1.0f; + ASSERT_TRUE(bat_try_emit_chirp(&env)); + ASSERT_TRUE(!bat_try_emit_chirp(&env)); + + env.tick += 12; + ASSERT_TRUE(bat_try_emit_chirp(&env)); + + free_allocated(&env); + return 0; +} + +static int test_reflection_arrives_at_two_way_travel_time(void) { + float sound_speed = 100.0f; + float distance = 25.0f; + float echo_time = bat_echo_time_seconds(distance, sound_speed); + + ASSERT_FLOAT_NEAR(echo_time, 0.5f, 0.0001f); + ASSERT_TRUE(bat_echo_is_arriving(echo_time, echo_time + 0.005f, 0.02f)); + ASSERT_TRUE(!bat_echo_is_arriving(echo_time, echo_time + 0.050f, 0.02f)); + + return 0; +} + int main(void) { if (test_chirp_metadata_and_observation_size()) return 1; if (test_left_right_echo_asymmetry()) return 1; @@ -200,8 +256,11 @@ int main(void) { if (test_wall_collision_is_terminal_minus_one()) return 1; if (test_catch_bug_is_terminal_plus_one()) return 1; if (test_progress_reward_sign()) return 1; + if (test_chirp_ring_physical_ordering()) return 1; + if (test_chirp_color_maps_low_to_red_high_to_blue()) return 1; + if (test_chirp_cooldown_accepts_only_after_delay()) return 1; + if (test_reflection_arrives_at_two_way_travel_time()) return 1; printf("bat core tests passed\n"); return 0; } - From b30a23baecd6166dca11f16049ddfe770ce1db2e Mon Sep 17 00:00:00 2001 From: Kinvert Date: Mon, 8 Jun 2026 19:37:22 -0700 Subject: [PATCH 03/51] feat: improve bat randomization training --- config/bat.ini | 2 +- ocean/bat/bat.h | 111 +++++++++++++++++++++++++++----- ocean/bat/tests/test_bat_core.c | 95 +++++++++++++++++++++++++++ 3 files changed, 190 insertions(+), 18 deletions(-) diff --git a/config/bat.ini b/config/bat.ini index 0dd2bf907b..702e2fcd4e 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -41,7 +41,7 @@ progress_reward_scale = 0.05 collision_penalty = 1.0 [train] -total_timesteps = 10_000_000 +total_timesteps = 50_000_000 learning_rate = 0.015 gamma = 0.995 gae_lambda = 0.90 diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 9822773e0a..b4e3e9f48e 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -244,18 +244,97 @@ static inline bool bat_circle_rect_collision(float cx, float cy, float r, return bat_dist(cx, cy, px, py) <= r; } +static inline bool bat_rects_overlap(float ax, float ay, float aw, float ah, + float bx, float by, float bw, float bh, float margin) { + return ax - margin < bx + bw && + ax + aw + margin > bx && + ay - margin < by + bh && + ay + ah + margin > by; +} + +static inline void bat_sample_in_quadrant(Bat* env, int quadrant, float radius, + float* x, float* y) { + int east = quadrant & 1; + int south = (quadrant >> 1) & 1; + float margin = fmaxf(6.0f, radius + 3.0f); + float half_w = env->width * 0.5f; + float half_h = env->height * 0.5f; + float min_x = (east ? half_w : 0.0f) + margin; + float max_x = (east ? (float)env->width : half_w) - margin; + float min_y = (south ? half_h : 0.0f) + margin; + float max_y = (south ? (float)env->height : half_h) - margin; + if (max_x < min_x) max_x = min_x; + if (max_y < min_y) max_y = min_y; + *x = min_x + bat_randf(env) * (max_x - min_x); + *y = min_y + bat_randf(env) * (max_y - min_y); +} + +static inline void bat_sample_spawns(Bat* env) { + int bat_quadrant = (int)(bat_rand(env) & 3u); + int bug_quadrant = bat_quadrant ^ 3; + float min_sep = fminf(env->width, env->height) * 0.31f; + + for (int attempt = 0; attempt < 64; attempt++) { + bat_sample_in_quadrant(env, bat_quadrant, env->bat_radius, &env->bat_x, &env->bat_y); + bat_sample_in_quadrant(env, bug_quadrant, env->bug_radius, &env->bug_x, &env->bug_y); + if (bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y) >= min_sep) { + return; + } + } + + float qx[4] = {0.25f, 0.75f, 0.25f, 0.75f}; + float qy[4] = {0.25f, 0.25f, 0.75f, 0.75f}; + env->bat_x = env->width * qx[bat_quadrant]; + env->bat_y = env->height * qy[bat_quadrant]; + env->bug_x = env->width * qx[bug_quadrant]; + env->bug_y = env->height * qy[bug_quadrant]; +} + +static inline bool bat_obstacle_clear(Bat* env, int idx, float x, float y, + float w, float h) { + if (bat_circle_rect_collision(env->bat_x, env->bat_y, env->bat_radius + 2.0f, x, y, w, h)) { + return false; + } + if (bat_circle_rect_collision(env->bug_x, env->bug_y, env->bug_radius + 2.0f, x, y, w, h)) { + return false; + } + for (int j = 0; j < idx; j++) { + if (bat_rects_overlap(x, y, w, h, + env->obstacle_x[j], env->obstacle_y[j], env->obstacle_w[j], env->obstacle_h[j], 3.0f)) { + return false; + } + } + return true; +} + static inline void generate_obstacles(Bat* env) { for (int i = 0; i < env->num_obstacles; i++) { - float w = 7.0f + 2.0f * (float)(i % 3); - float h = 7.0f + 2.0f * (float)((i + 1) % 3); - float lane = (i + 1.0f) / (env->num_obstacles + 1.0f); - float jitter = (bat_randf(env) - 0.5f) * 6.0f; - env->obstacle_w[i] = w; - env->obstacle_h[i] = h; - env->obstacle_x[i] = bat_clampf(env->width * lane - w * 0.5f + jitter, - env->bat_radius + 2.0f, env->width - w - env->bat_radius - 2.0f); - env->obstacle_y[i] = bat_clampf(env->height * (0.35f + 0.3f * (i % 2)) - h * 0.5f - jitter, - env->bat_radius + 2.0f, env->height - h - env->bat_radius - 2.0f); + bool placed = false; + for (int attempt = 0; attempt < 96; attempt++) { + float w = 3.0f + 5.0f * bat_randf(env); + float h = 3.0f + 5.0f * bat_randf(env); + float margin = 4.0f; + float x = margin + bat_randf(env) * (env->width - w - 2.0f * margin); + float y = margin + bat_randf(env) * (env->height - h - 2.0f * margin); + if (bat_obstacle_clear(env, i, x, y, w, h)) { + env->obstacle_x[i] = x; + env->obstacle_y[i] = y; + env->obstacle_w[i] = w; + env->obstacle_h[i] = h; + placed = true; + break; + } + } + if (!placed) { + float w = 6.0f; + float h = 6.0f; + float x = env->width * (0.30f + 0.20f * (i % 2)) - w * 0.5f; + float y = env->height * (0.30f + 0.20f * ((i + 1) % 2)) - h * 0.5f; + env->obstacle_x[i] = x; + env->obstacle_y[i] = y; + env->obstacle_w[i] = w; + env->obstacle_h[i] = h; + } } } @@ -445,17 +524,15 @@ void compute_observations(Bat* env) { static inline void bat_reset_episode(Bat* env) { env->tick = 0; - env->bat_x = env->width * 0.25f; - env->bat_y = env->height * 0.5f; env->bat_vx = 0.0f; env->bat_vy = 0.0f; - env->bat_heading = 0.0f; env->bat_turn_velocity = 0.0f; + env->bat_heading = bat_randf(env) * 2.0f * BAT_PI - BAT_PI; + bat_sample_spawns(env); generate_obstacles(env); - env->bug_x = env->width * 0.75f; - env->bug_y = env->height * (0.35f + 0.30f * bat_randf(env)); - env->bug_vx = -env->bug_speed; - env->bug_vy = (bat_randf(env) - 0.5f) * env->bug_speed * 0.5f; + float bug_heading = bat_randf(env) * 2.0f * BAT_PI - BAT_PI; + env->bug_vx = cosf(bug_heading) * env->bug_speed; + env->bug_vy = sinf(bug_heading) * env->bug_speed; env->last_chirp_start_freq = 0.0f; env->last_chirp_end_freq = 1.0f; env->last_chirp_duration = 0.33333334f; diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index 75df15735b..6d714b7995 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -249,6 +249,97 @@ static int test_reflection_arrives_at_two_way_travel_time(void) { return 0; } +static int test_quadrant(float x, float y, float width, float height) { + int east = x >= width * 0.5f; + int south = y >= height * 0.5f; + return south * 2 + east; +} + +static int test_spawns_use_different_random_quadrants(void) { + Bat env = make_test_env(); + int seen_bat[4] = {0}; + int seen_bug[4] = {0}; + int bat_quadrants = 0; + int bug_quadrants = 0; + + for (int i = 0; i < 48; i++) { + c_reset(&env); + int bq = test_quadrant(env.bat_x, env.bat_y, env.width, env.height); + int gq = test_quadrant(env.bug_x, env.bug_y, env.width, env.height); + ASSERT_TRUE(bq != gq); + if (!seen_bat[bq]) { + seen_bat[bq] = 1; + bat_quadrants += 1; + } + if (!seen_bug[gq]) { + seen_bug[gq] = 1; + bug_quadrants += 1; + } + } + + ASSERT_TRUE(bat_quadrants >= 3); + ASSERT_TRUE(bug_quadrants >= 3); + + free_allocated(&env); + return 0; +} + +static int test_spawns_keep_minimum_separation_and_avoid_obstacles(void) { + Bat env = make_test_env(); + float min_sep = 20.0f; + + for (int reset = 0; reset < 32; reset++) { + c_reset(&env); + ASSERT_TRUE(bat_dist(env.bat_x, env.bat_y, env.bug_x, env.bug_y) >= min_sep); + for (int i = 0; i < env.num_obstacles; i++) { + ASSERT_TRUE(!bat_circle_rect_collision(env.bat_x, env.bat_y, env.bat_radius + 1.0f, + env.obstacle_x[i], env.obstacle_y[i], env.obstacle_w[i], env.obstacle_h[i])); + ASSERT_TRUE(!bat_circle_rect_collision(env.bug_x, env.bug_y, env.bug_radius + 1.0f, + env.obstacle_x[i], env.obstacle_y[i], env.obstacle_w[i], env.obstacle_h[i])); + } + } + + free_allocated(&env); + return 0; +} + +static int test_obstacles_move_substantially_across_resets(void) { + Bat env = make_test_env(); + c_reset(&env); + float first_x = env.obstacle_x[0]; + float first_y = env.obstacle_y[0]; + float max_delta = 0.0f; + + for (int i = 0; i < 32; i++) { + c_reset(&env); + float delta = bat_dist(first_x, first_y, env.obstacle_x[0], env.obstacle_y[0]); + if (delta > max_delta) max_delta = delta; + } + + ASSERT_TRUE(max_delta > 16.0f); + + free_allocated(&env); + return 0; +} + +static int test_obstacles_are_small_enough_for_trainability(void) { + Bat env = make_test_env(); + + for (int reset = 0; reset < 64; reset++) { + c_reset(&env); + for (int i = 0; i < env.num_obstacles; i++) { + ASSERT_TRUE(env.obstacle_w[i] >= 3.0f); + ASSERT_TRUE(env.obstacle_h[i] >= 3.0f); + ASSERT_TRUE(env.obstacle_w[i] <= 8.0f); + ASSERT_TRUE(env.obstacle_h[i] <= 8.0f); + ASSERT_TRUE(env.obstacle_w[i] * env.obstacle_h[i] <= 64.0f); + } + } + + free_allocated(&env); + return 0; +} + int main(void) { if (test_chirp_metadata_and_observation_size()) return 1; if (test_left_right_echo_asymmetry()) return 1; @@ -260,6 +351,10 @@ int main(void) { if (test_chirp_color_maps_low_to_red_high_to_blue()) return 1; if (test_chirp_cooldown_accepts_only_after_delay()) return 1; if (test_reflection_arrives_at_two_way_travel_time()) return 1; + if (test_spawns_use_different_random_quadrants()) return 1; + if (test_spawns_keep_minimum_separation_and_avoid_obstacles()) return 1; + if (test_obstacles_move_substantially_across_resets()) return 1; + if (test_obstacles_are_small_enough_for_trainability()) return 1; printf("bat core tests passed\n"); return 0; From 4bc4aa1b6034703d3faaf16da9ec3421b788fb2f Mon Sep 17 00:00:00 2001 From: Kinvert Date: Mon, 8 Jun 2026 23:07:14 -0700 Subject: [PATCH 04/51] feat: switch bat sonar to per-ear frequency bins --- BAT_SONAR_OBSERVATION_NOTES.md | 260 ++++++++++++++ BAT_SPEC.md | 123 ++++--- config/bat.ini | 156 ++++++++- ocean/bat/bat.h | 376 ++++++++++++++++----- ocean/bat/binding.c | 18 +- ocean/bat/tests/test_bat_core.c | 579 +++++++++++++++++++++++++++++++- 6 files changed, 1373 insertions(+), 139 deletions(-) create mode 100644 BAT_SONAR_OBSERVATION_NOTES.md diff --git a/BAT_SONAR_OBSERVATION_NOTES.md b/BAT_SONAR_OBSERVATION_NOTES.md new file mode 100644 index 0000000000..238138bdb3 --- /dev/null +++ b/BAT_SONAR_OBSERVATION_NOTES.md @@ -0,0 +1,260 @@ +# Bat Sonar Observation Notes + +Status: design note for current and future Bat agents + +Workspace: `/home/claude/pathfinder` + +Related spec: `BAT_SPEC.md` + +## Purpose + +This note records the intended next observation and echo model for the Bat +environment. The current implementation was deliberately simplified to get a +trainable baseline. The next rung should make active echolocation real: the bat +should hear frequency energy only when echoes from its own chirps return. + +## Retired Scaffold Implementation + +The first Bat observation was a fast synthetic feature extractor, not a true +chirp-return audio model. It has been retired, but the notes are kept here so +future agents understand why the env moved away from it. + +Current layout: + +- `left_range_energy[16]` +- `left_doppler_energy[16]` +- `right_range_energy[16]` +- `right_doppler_energy[16]` +- `chirp_age_norm` +- `last_chirp_start_freq_norm` +- `last_chirp_end_freq_norm` +- `last_chirp_duration_norm` +- `forward_speed_norm` +- `turn_rate_norm` + +Total size: `70`. + +Each frame, the env recomputes current echo features from the current bat, +bug, wall, and obstacle positions. The bug is one strong moving reflector. +Walls and obstacle edges are sampled into static point reflectors. For each +reflector, the env computes approximate left-ear and right-ear path lengths, +attenuation, left/right gain, and a normalized Doppler value. It then deposits +energy into range-indexed observation slots. + +This is useful for a first baseline, but it is too informative: + +- The bat gets fresh echo-like information every frame, even if it did not + chirp. +- Chirp start frequency, end frequency, and duration do not materially affect + the acoustic observation. +- The Doppler channels are scalar range-indexed values, not FFT bins. +- Range is exposed as direct binned path length instead of being inferred from + echo return timing. + +## Current Target Model + +The observation should be per-tick binaural frequency energy: + +- `left_freq_bins[N]` +- `right_freq_bins[N]` +- chirp metadata +- cooldown/age metadata +- self-motion metadata + +No explicit delay/range bins are needed in the observation. Distance should be +implicit in time. The policy should infer range from when frequency energy +returns after a chirp. + +Current layout: + +- `left_freq_bins[16]` +- `right_freq_bins[16]` +- `chirp_age_norm` +- `chirp_cooldown_norm` +- `last_chirp_start_freq_norm` +- `last_chirp_end_freq_norm` +- `last_chirp_duration_norm` +- `forward_speed_norm` +- `turn_rate_norm` + +Total size: `39`. + +If 16 bins is too coarse after implementation, use 24 bins per ear for a total +size of `55`. + +## Event-Driven Echo Model + +Do not synthesize raw audio and do not run an FFT per environment step. Use an +analytic event model that directly deposits echo energy into frequency bins at +the tick when the echo reaches each ear. + +When a chirp is emitted: + +1. Break the chirp into a small number of time slices. +2. For each slice, compute the emitted frequency from chirp start frequency, + end frequency, and duration. +3. For each reflector, compute when that slice reaches the reflector. +4. Compute when the reflected sound reaches the left ear and right ear. +5. Compute returned amplitude, ear gain, and Doppler-shifted frequency. +6. Enqueue an echo event for each ear. + +Each echo event should store: + +- receive time in continuous ticks or seconds +- target ear +- returned normalized frequency +- intensity +- source chirp identifier or chirp birth tick, if useful for debugging + +On each env tick: + +1. Clear left/right frequency bins. +2. Process all echo events whose receive time falls in the current tick window. +3. Deposit event intensity into the relevant frequency bin, with optional + fractional spill into neighboring bins. +4. Add a small configurable noise floor. +5. Apply bounded compression, such as `log1p(k * energy) / log1p(k)`. +6. Append chirp and self-motion metadata. + +This produces the desired behavior: + +- No chirp means no new echo energy, aside from noise or any intentionally + modeled lingering sensor state. +- A low-to-high chirp creates a time-coded return pattern. +- Multiple reflectors can overlap naturally in the same tick and frequency + bin. +- Range must be inferred from echo timing, not from a direct range channel. + +## Example: Two-Frequency Chirp and Two Targets + +Assume two frequency bins: low and high. + +The bat emits a two-slice chirp: + +- slice 0: high frequency +- slice 1: low frequency + +There are two static targets, one near and one far. With zero Doppler, the +per-tick ear spectrum could look like: + +```text +[0, 0] sound still traveling +[0, 0] sound still traveling +[0, 1] near target returns high slice +[1, 1] near target returns low slice, far target returns high slice +[1, 0] far target returns low slice +[0, 0] no active returns +``` + +This is the intended observation style. It is not a delay-bin representation. +The temporal sequence itself contains the delay/range information. + +## Timing and Physics Notes + +Echo timing is two-way: + +```text +emit position -> reflector -> ear +``` + +For static reflectors, the approximate return time is: + +```text +t_receive = t_emit + + distance(chirp_origin, reflector) / sound_speed + + distance(reflector, ear_at_receive) / sound_speed +``` + +For moving reflectors, such as the bug, the hit time should use predicted +reflector position at the time of impact. A linear-motion approximation is good +enough for the next implementation. + +Doppler should be based on the rate of change of the acoustic path length: + +```text +doppler_shift ~= -path_length_rate / sound_speed +``` + +Static walls and obstacles can still have Doppler from bat self-motion. The +moving bug additionally contributes target radial velocity. + +Use fractional receive times internally. The env control tick can stay at +`1/60` second while echo events are scheduled at sub-tick times and deposited +into the nearest tick or split across adjacent ticks. + +## Chirp Overlap and Memory + +Without explicit delay bins, the policy needs temporal memory to infer range. +The observation at a single tick only says what frequency energy is arriving +now. It does not directly say how long ago that sound was emitted unless the +policy remembers the chirp sequence or the env provides reliable chirp-age +metadata. + +For the next rung, use one active chirp at a time: + +- `chirp_cooldown_ticks >= max_echo_return_ticks` +- include `chirp_age_norm` +- include last chirp start frequency, end frequency, and duration + +This keeps return timing interpretable before adding overlapping chirps. Later +curriculum stages can reduce cooldown and allow ambiguity from multiple active +chirps. + +## Performance Constraints + +The target is high SPS. Avoid raw waveform buffers, convolution, and per-step +FFT. + +Use: + +- a fixed upper bound on active chirps +- a fixed upper bound on echo events +- static reflector precomputation after reset +- direct frequency-bin deposition +- simple geometric attenuation and ear gain +- first-order reflections only + +The expected work per tick should stay near: + +```text +active_chirps * chirp_slices * reflectors * ears +``` + +With small constants, this remains cheap C code and should preserve the spirit +of the current native PufferLib env. + +## Implementation Direction + +The next implementation should replace current range/Doppler observation +generation with an event queue. + +Suggested data structures: + +- `ChirpEvent`: emitted chirp metadata, birth time, origin, frequency sweep +- `Reflector`: position, velocity, strength, normal or type +- `EchoEvent`: receive time, ear, frequency, intensity + +Suggested tests: + +- no chirp produces no echo energy beyond noise +- single static reflector returns at expected two-way travel time +- left and right ears receive slightly different timings/intensities off-axis +- two chirp slices and two reflectors produce the expected overlapping bin + pattern +- moving bug shifts frequency in the expected Doppler direction +- cooldown prevents ambiguous overlapping chirps in the initial curriculum +- bug echo progress reward only fires when the echo-derived bug path is shorter + than the previous bug echo path +- static echoes never receive bug echo progress reward + +## Non-Goals for the Next Rung + +Do not add raw audio synthesis yet. + +Do not add an actual FFT dependency yet. + +Do not add full wave acoustics. + +Do not add multi-bounce reverberation yet. + +Do not expose direct range bins if the goal is to force temporal echolocation. diff --git a/BAT_SPEC.md b/BAT_SPEC.md index 66a40ed953..5117775592 100644 --- a/BAT_SPEC.md +++ b/BAT_SPEC.md @@ -8,6 +8,10 @@ Target branch: `bat` Target env name: `bat` +Detailed sonar observation design note: + +- `BAT_SONAR_OBSERVATION_NOTES.md` + ## Intent Build a single-agent PufferLib Ocean environment inspired by bat echolocation. @@ -105,7 +109,13 @@ Acoustics: convolving high-rate audio samples. - Sound speed is configurable and artificial. The default should be much slower than real air acoustics so echo timing is learnable in a small game arena. -- Start with `sound_speed = 100.0` world units per second. +- Start with `sound_speed = 60.0` world units per second. At the default + `1/60` second env tick and current ear spacing, this gives broadside echoes + enough artificial time-of-arrival separation for one ear to be able to hear a + return about one tick before the other. +- `ear_separation_scale` controls the artificial distance between ears as a + multiple of `bat_radius`. Keep it bounded; the implementation clamps it to + `[0.25, 2.0]` and the default sweep range is `[0.5, 2.0]`. - Every echo contribution has: - two-way distance from mouth/source to reflector to each ear, - delay derived from speed of sound, @@ -211,53 +221,55 @@ heading. Observation layout: -1. `left_range_energy[16]` -2. `left_doppler_energy[16]` -3. `right_range_energy[16]` -4. `right_doppler_energy[16]` +1. `left_freq_bins[16]` +2. `right_freq_bins[16]` 3. `chirp_age_norm` -4. `last_chirp_start_freq_norm` -5. `last_chirp_end_freq_norm` -6. `last_chirp_duration_norm` -7. `forward_speed_norm` -8. `turn_rate_norm` +4. `chirp_cooldown_norm` +5. `last_chirp_start_freq_norm` +6. `last_chirp_end_freq_norm` +7. `last_chirp_duration_norm` +8. `forward_speed_norm` +9. `turn_rate_norm` Initial observation size: -- `OBS_SIZE = 70` +- `OBS_SIZE = 39` Echo bins: -- Each ear receives 16 range-energy bins and 16 Doppler-energy bins. -- Bins represent compact matched-filter-like range and Doppler energy, not raw - audio and not direct FFT bins. -- The frequency range is intentionally narrow, and bat/bug speeds are bounded, - so normalized Doppler can fit in a compact representation. -- Values are normalized to a bounded range before policy input. -- Nearer and stronger reflectors produce larger bin energy. -- Bug echoes can be distinguished statistically because the bug moves and - produces Doppler-shifted returns. - -Range bins: - -- Range bins accumulate echo energy by delay. -- Bin `0` represents the nearest useful echo distance. -- The last bin represents `max_echo_range`. +- Each ear receives 16 frequency-intensity bins. +- Bins represent the summed intensity arriving at that ear during the current + env tick. +- Values are capped to `[0.0, 1.0]` before policy input. +- No explicit delay/range bins are exposed. +- No chirp means no new echo energy, aside from any later noise model. +- Range must be inferred from when frequency energy returns after an emitted + chirp. +- Doppler shifts move return energy across nearby frequency bins instead of + appearing in a separate Doppler observation channel. + +Echo timing: + +- Chirps schedule analytic echo-arrival events. +- Each event has a receive time, ear, normalized frequency, and intensity. +- On each tick, all events arriving in that tick window are summed into the + corresponding ear frequency bins. +- Multiple reflectors can contribute to the same bin on the same tick. - Echoes beyond `max_echo_range` are ignored. -Doppler bins: - -- Doppler energy is accumulated into the same range-indexed layout as range - energy. -- Approaching reflectors add positive normalized Doppler energy. -- Receding reflectors add negative normalized Doppler energy. -- Static reflectors contribute near-zero Doppler energy. - Chirp metadata: - The agent receives the last emitted chirp start frequency, end frequency, and duration because interpreting a return depends on knowing the transmitted signal. + +Current implementation note: + +- The range/Doppler scaffold has been retired in favor of per-tick left/right + frequency spectra generated by analytic echo-arrival events. +- Range is inferred from echo timing and chirp age rather than exposed as an + observation axis. +- See `BAT_SONAR_OBSERVATION_NOTES.md` before changing acoustic observations. - `chirp_age_norm` lets the policy distinguish fresh echo windows from stale or silent intervals. @@ -285,7 +297,13 @@ Default reward model: - Small negative step cost to encourage efficient pursuit. - Dense progress reward based on reduction in true bat-to-bug distance. - `-1.0` for hitting walls or obstacles, terminal. -- Optional chirp cost so constant chirping is not free. +- Tiny chirp cost so constant chirping is not fully free without causing + chirp collapse. +- Sound-derived bug echo progress reward: + - when a bug echo returns with a shorter acoustic path than the previous bug + echo, add a small shaped reward, + - farther bug echoes update the previous bug echo path but do not reward, + - static wall and obstacle echoes do not receive this reward. - Optional silence bonus or energy budget should wait until the basic task trains. @@ -299,9 +317,14 @@ Progress reward: - `reward += progress_reward_scale * (prev_bug_dist - bug_dist)` - `reward -= step_cost` - `reward -= chirp_cost` when a chirp is emitted + - `reward += bug_echo_reward_scale * echo_path_reduction / max_echo_range` + when a returning bug echo indicates the bug is closer than the previous bug + echo - Default starting values: - `progress_reward_scale = 0.05` - `step_cost = 0.001` + - `chirp_cost = 0.00005` + - `bug_echo_reward_scale = 0.02` - `chirp_cost = 0.0005` Important caveat: @@ -366,14 +389,14 @@ Config knobs: - `obstacle_min_size` - `obstacle_max_size` - `bat_radius` +- `ear_separation_scale` - `bug_radius` - `bat_max_speed` - `bat_accel` - `bat_turn_rate` - `bug_speed` - `max_steps` -- `range_bins_per_ear` -- `doppler_bins_per_ear` +- `freq_bins_per_ear` - `max_echo_range` - `sound_speed` - `reflector_spacing` @@ -418,7 +441,7 @@ Follow the Breakout-style native env shape: Testing expectations: - Unit tests for chirp parameter normalization. -- Unit tests for echo delay/range bin placement. +- Unit tests for echo delay and per-tick frequency-bin placement. - Unit tests for left/right ear asymmetry from azimuth. - Unit tests for Doppler sign on approaching vs receding bug. - Unit tests for collision and catch termination. @@ -436,8 +459,9 @@ Reward shaping: Acoustic representation: -- v1 uses 16 range-energy bins and 16 signed Doppler-energy bins per ear. -- A later variant can test a flattened range-Doppler grid or literal FFT bins. +- v1 uses 16 current-tick frequency-intensity bins per ear. +- A later variant can test more frequency bins, a flattened range-Doppler grid, + or literal FFT bins. Bug behavior: @@ -449,3 +473,22 @@ Obstacle reflections: - v1 samples walls and obstacle edges into point reflectors. - Later variants can compare analytic segment reflections or multiple-bounce reflections. + +## Training and Sweep Operations + +- Use `perf` as the sweep objective. It is `1.0` only when the bat catches the bug and `0.0` for collision or timeout. +- Reward terms are training scaffolding and should remain sweepable. `progress_reward_scale` is true-distance shaping and should usually stay below `bug_echo_reward_scale`, which is based on closer received bug reflections. +- Acoustic scale terms should be swept before increasing model size. Current bounded acoustic sweep knobs are `env.sound_speed` in `[45.0, 120.0]` and `env.ear_separation_scale` in `[0.5, 2.0]`. +- Train workers should use CUDA with `--train.gpus 1`. +- Protein/sweep control does not need CUDA. Run sweeps with `--sweep.use-gpu ""` so the optimizer stays off CUDA and avoids CUDA IPC/resource-handle failures. +- Do not override training duration with ad hoc `--train.total-timesteps`. Put duration ranges in `config/bat.ini`. +- Keep Bat sweep ranges bounded so a sweep cannot accidentally launch huge slow models. Bat config uses stock `sweep_only` as a safety filter because PufferLib's default sweep config includes unsafe inherited ranges such as `train.total_timesteps` up to `1e11`, `policy.hidden_size` up to `1024`, `policy.num_layers` up to `8`, and `train.horizon` up to `1024`. +- The default Bat sweep does not sweep policy model size; it keeps `policy.hidden_size = 128` and `policy.num_layers = 4`. Current cost-sensitive sweep bounds cap training duration at `50_000_000`, rollout horizon at `128`, replay ratio at `1.25`, and `vec.num_buffers` at `8`. +- Do not add broad model-size sweep ranges. If model size must be swept later, require explicit human approval and keep a hard ceiling of `policy.hidden_size <= 256` and `policy.num_layers <= 4` unless there is a measured SPS reason to widen it. +- Keep PufferLib core stock for Bat. If sweep parsing conflicts with inherited default sweep keys, solve it through Bat config or command-line args, not core edits. +- On this PufferLib branch, `sweep bat --sweep.max-runs 2` is not enough to + exercise suggested hyperparameters: the first two launched experiments use + the current config defaults, and `sweep_obj.suggest(...)` is only called for + later runs. Use at least `--sweep.max-runs 3` for one actual suggestion, or + run explicit bounded comparison trains when testing a small acoustic grid. +- Curriculum difficulty should not advance on a single lucky catch. `env.curriculum_successes_per_level` gates advancement so each env must catch the bug multiple times at the current level before increasing bug distance or obstacle count. diff --git a/config/bat.ini b/config/bat.ini index 702e2fcd4e..6621353b44 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -22,22 +22,33 @@ width = 64 height = 64 num_obstacles = 3 bat_radius = 2.0 +ear_separation_scale = 0.75 bug_radius = 1.5 bat_max_speed = 12.0 bat_accel = 30.0 bat_turn_rate = 3.1415926 bug_speed = 4.0 max_steps = 512 -range_bins_per_ear = 16 -doppler_bins_per_ear = 16 +curriculum_enabled = 1 +curriculum_start_obstacles = 1 +curriculum_max_obstacles = 3 +curriculum_obstacle_step = 8 +curriculum_successes_per_level = 8 +curriculum_start_bug_distance = 12.0 +curriculum_max_bug_distance = 44.0 +curriculum_bug_distance_step = 1.5 +freq_bins_per_ear = 16 max_echo_range = 80.0 -sound_speed = 100.0 +sound_speed = 60.0 reflector_spacing = 8.0 max_chirp_age_ticks = 30 chirp_cooldown_ticks = 12 -chirp_cost = 0.0005 +chirp_cost = 0.0 +valid_chirp_reward = 0.0005 +early_chirp_penalty = 0.001 +bug_echo_reward_scale = 0.05 step_cost = 0.001 -progress_reward_scale = 0.05 +progress_reward_scale = 0.02 collision_penalty = 1.0 [train] @@ -60,3 +71,138 @@ vtrace_rho_clip = 1.0 vtrace_c_clip = 1.0 prio_alpha = 0.8 prio_beta0 = 0.2 + +[sweep] +method = Protein +metric = perf +metric_distribution = linear +goal = maximize +max_runs = 8 +gpus = 1 +downsample = 5 +use_gpu = True +sweep_only = total_timesteps,learning_rate,gamma,gae_lambda,ent_coef,horizon,replay_ratio,num_buffers,step_cost,sound_speed,ear_separation_scale,progress_reward_scale,valid_chirp_reward,early_chirp_penalty,bug_echo_reward_scale,collision_penalty,curriculum_start_bug_distance,curriculum_bug_distance_step,curriculum_obstacle_step,curriculum_successes_per_level +match_enemy_model_path = {} +match_num_games = {} +match_enemy_hidden_size = {} +match_enemy_num_layers = {} + +[sweep.train.total_timesteps] +distribution = log_normal +min = 30_000_000 +max = 50_000_000 +scale = auto + +[sweep.train.learning_rate] +distribution = log_normal +min = 0.003 +max = 0.03 +scale = auto + +[sweep.train.gamma] +distribution = logit_normal +min = 0.98 +max = 0.9995 +scale = auto + +[sweep.train.gae_lambda] +distribution = logit_normal +min = 0.80 +max = 0.98 +scale = auto + +[sweep.train.ent_coef] +distribution = log_normal +min = 0.0001 +max = 0.01 +scale = auto + +[sweep.train.horizon] +distribution = uniform_pow2 +min = 32 +max = 128 +scale = auto + +[sweep.train.replay_ratio] +distribution = uniform +min = 0.75 +max = 1.25 +scale = auto + +[sweep.vec.num_buffers] +distribution = int_uniform +min = 4 +max = 8 +scale = auto + +[sweep.env.step_cost] +distribution = uniform +min = 0.0002 +max = 0.002 +scale = auto + +[sweep.env.sound_speed] +distribution = uniform +min = 45.0 +max = 120.0 +scale = auto + +[sweep.env.ear_separation_scale] +distribution = uniform +min = 0.5 +max = 2.0 +scale = auto + +[sweep.env.progress_reward_scale] +distribution = uniform +min = 0.0 +max = 0.08 +scale = auto + +[sweep.env.valid_chirp_reward] +distribution = uniform +min = 0.00005 +max = 0.002 +scale = auto + +[sweep.env.early_chirp_penalty] +distribution = uniform +min = 0.0001 +max = 0.005 +scale = auto + +[sweep.env.bug_echo_reward_scale] +distribution = uniform +min = 0.02 +max = 0.30 +scale = auto + +[sweep.env.collision_penalty] +distribution = uniform +min = 0.5 +max = 2.0 +scale = auto + +[sweep.env.curriculum_start_bug_distance] +distribution = uniform +min = 8.0 +max = 20.0 +scale = auto + +[sweep.env.curriculum_bug_distance_step] +distribution = uniform +min = 0.5 +max = 4.0 +scale = auto + +[sweep.env.curriculum_obstacle_step] +distribution = int_uniform +min = 4 +max = 24 +scale = auto + +[sweep.env.curriculum_successes_per_level] +distribution = int_uniform +min = 4 +max = 32 +scale = auto diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index b4e3e9f48e..faec64e6cd 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -10,7 +10,7 @@ #include "raylib.h" #endif -#define BAT_OBS_SIZE 70 +#define BAT_OBS_SIZE 39 #define BAT_NUM_ACTIONS 6 #define BAT_MOVE_ACTIONS 5 #define BAT_TURN_ACTIONS 3 @@ -18,18 +18,16 @@ #define BAT_CHIRP_DURATION_BINS 4 #define BAT_CHIRP_EMIT_ACTIONS 2 -#define BAT_RANGE_BINS 16 -#define BAT_DOPPLER_BINS 16 -#define BAT_LEFT_RANGE_OFFSET 0 -#define BAT_LEFT_DOPPLER_OFFSET 16 -#define BAT_RIGHT_RANGE_OFFSET 32 -#define BAT_RIGHT_DOPPLER_OFFSET 48 -#define BAT_CHIRP_AGE_OBS 64 -#define BAT_CHIRP_START_OBS 65 -#define BAT_CHIRP_END_OBS 66 -#define BAT_CHIRP_DURATION_OBS 67 -#define BAT_FORWARD_SPEED_OBS 68 -#define BAT_TURN_RATE_OBS 69 +#define BAT_FREQ_BINS 16 +#define BAT_LEFT_FREQ_OFFSET 0 +#define BAT_RIGHT_FREQ_OFFSET 16 +#define BAT_CHIRP_AGE_OBS 32 +#define BAT_CHIRP_COOLDOWN_OBS 33 +#define BAT_CHIRP_START_OBS 34 +#define BAT_CHIRP_END_OBS 35 +#define BAT_CHIRP_DURATION_OBS 36 +#define BAT_FORWARD_SPEED_OBS 37 +#define BAT_TURN_RATE_OBS 38 #define BAT_NOOP 0 #define BAT_THRUST_FORWARD 1 @@ -46,6 +44,11 @@ #define BAT_PI 3.14159265358979323846f #define BAT_CHIRP_HISTORY 4 #define BAT_CHIRP_RINGS 5 +#define BAT_MAX_CHIRP_SLICES 16 +#define BAT_MAX_ECHO_EVENTS 4096 + +#define BAT_ECHO_STATIC 0 +#define BAT_ECHO_BUG 1 typedef struct BatColor { unsigned char r; @@ -64,6 +67,16 @@ typedef struct ChirpEvent { int active; } ChirpEvent; +typedef struct EchoEvent { + float receive_tick; + float freq; + float intensity; + float path; + int ear; + int source; + int active; +} EchoEvent; + typedef struct Log { float perf; float score; @@ -103,6 +116,16 @@ typedef struct Bat { int tick; int max_steps; int num_obstacles; + int curriculum_enabled; + int curriculum_level; + int curriculum_start_obstacles; + int curriculum_max_obstacles; + int curriculum_obstacle_step; + int curriculum_successes_per_level; + int curriculum_successes_at_level; + float curriculum_start_bug_distance; + float curriculum_max_bug_distance; + float curriculum_bug_distance_step; float bat_x; float bat_y; @@ -111,6 +134,7 @@ typedef struct Bat { float bat_heading; float bat_turn_velocity; float bat_radius; + float ear_separation_scale; float bat_max_speed; float bat_accel; float bat_turn_rate; @@ -127,8 +151,7 @@ typedef struct Bat { float* obstacle_w; float* obstacle_h; - int range_bins_per_ear; - int doppler_bins_per_ear; + int freq_bins_per_ear; float max_echo_range; float sound_speed; float reflector_spacing; @@ -141,6 +164,8 @@ typedef struct Bat { float last_chirp_duration; ChirpEvent chirps[BAT_CHIRP_HISTORY]; int chirp_head; + EchoEvent echo_events[BAT_MAX_ECHO_EVENTS]; + int echo_head; int chirps_emitted_episode; float chirp_duration_sum; float chirp_bandwidth_sum; @@ -148,8 +173,14 @@ typedef struct Bat { float echo_energy_right_sum; float chirp_cost; + float valid_chirp_reward; + float early_chirp_penalty; float step_cost; float progress_reward_scale; + float bug_echo_reward_scale; + float tick_bug_echo_energy; + float tick_bug_echo_path; + float last_bug_echo_path; float collision_penalty; float prev_bug_dist; float start_bug_dist; @@ -201,6 +232,12 @@ static inline bool bat_echo_is_arriving(float echo_time, float chirp_age, return fabsf(chirp_age - echo_time) <= window; } +static inline float bat_chirp_age_norm_denominator(Bat* env) { + float travel_ticks = env->max_echo_range / fmaxf(1.0f, env->sound_speed) / BAT_TICK_RATE; + float chirp_ticks = bat_chirp_duration_seconds(1.0f) / BAT_TICK_RATE; + return fmaxf(1.0f, 1.25f * (travel_ticks + chirp_ticks)); +} + static inline BatColor bat_freq_color(float freq_norm, float alpha_norm) { float f = bat_clampf(freq_norm, 0.0f, 1.0f); float mid = 1.0f - fabsf(2.0f * f - 1.0f); @@ -290,6 +327,63 @@ static inline void bat_sample_spawns(Bat* env) { env->bug_y = env->height * qy[bug_quadrant]; } +static inline int bat_curriculum_obstacles(Bat* env) { + if (!env->curriculum_enabled) return env->num_obstacles; + int step = env->curriculum_obstacle_step <= 0 ? 1 : env->curriculum_obstacle_step; + int count = env->curriculum_start_obstacles + env->curriculum_level / step; + if (count < 0) count = 0; + if (count > env->curriculum_max_obstacles) count = env->curriculum_max_obstacles; + if (count > BAT_MAX_OBSTACLES) count = BAT_MAX_OBSTACLES; + return count; +} + +static inline float bat_curriculum_bug_distance(Bat* env) { + float distance = env->curriculum_start_bug_distance + + env->curriculum_bug_distance_step * env->curriculum_level; + return bat_clampf(distance, env->curriculum_start_bug_distance, + env->curriculum_max_bug_distance); +} + +static inline void bat_sample_spawns_at_distance(Bat* env, float target_distance) { + float margin = fmaxf(6.0f, fmaxf(env->bat_radius, env->bug_radius) + 3.0f); + target_distance = fmaxf(0.0f, target_distance); + + for (int attempt = 0; attempt < 96; attempt++) { + float angle = bat_randf(env) * 2.0f * BAT_PI - BAT_PI; + float dx = cosf(angle) * target_distance; + float dy = sinf(angle) * target_distance; + float min_bat_x = fmaxf(margin, margin - dx); + float max_bat_x = fminf(env->width - margin, env->width - margin - dx); + float min_bat_y = fmaxf(margin, margin - dy); + float max_bat_y = fminf(env->height - margin, env->height - margin - dy); + if (max_bat_x < min_bat_x || max_bat_y < min_bat_y) continue; + + env->bat_x = min_bat_x + bat_randf(env) * (max_bat_x - min_bat_x); + env->bat_y = min_bat_y + bat_randf(env) * (max_bat_y - min_bat_y); + env->bug_x = env->bat_x + dx; + env->bug_y = env->bat_y + dy; + return; + } + + bat_sample_spawns(env); +} + +static inline void bat_apply_curriculum(Bat* env) { + if (env->curriculum_enabled) { + env->num_obstacles = bat_curriculum_obstacles(env); + } +} + +static inline void bat_advance_curriculum(Bat* env) { + if (env->curriculum_enabled) { + env->curriculum_successes_at_level += 1; + if (env->curriculum_successes_at_level >= env->curriculum_successes_per_level) { + env->curriculum_level += 1; + env->curriculum_successes_at_level = 0; + } + } +} + static inline bool bat_obstacle_clear(Bat* env, int idx, float x, float y, float w, float h) { if (bat_circle_rect_collision(env->bat_x, env->bat_y, env->bat_radius + 2.0f, x, y, w, h)) { @@ -346,30 +440,47 @@ void init(Bat* env) { if (env->height <= 0) env->height = 64; if (env->max_steps <= 0) env->max_steps = 512; if (env->bat_radius <= 0.0f) env->bat_radius = 2.0f; + if (env->ear_separation_scale <= 0.0f) env->ear_separation_scale = 0.75f; + env->ear_separation_scale = bat_clampf(env->ear_separation_scale, 0.25f, 2.0f); if (env->bug_radius <= 0.0f) env->bug_radius = 1.5f; if (env->bat_max_speed <= 0.0f) env->bat_max_speed = 12.0f; if (env->bat_accel <= 0.0f) env->bat_accel = 30.0f; if (env->bat_turn_rate <= 0.0f) env->bat_turn_rate = BAT_PI; if (env->bug_speed <= 0.0f) env->bug_speed = 4.0f; - if (env->range_bins_per_ear <= 0) env->range_bins_per_ear = BAT_RANGE_BINS; - if (env->doppler_bins_per_ear <= 0) env->doppler_bins_per_ear = BAT_DOPPLER_BINS; + if (env->freq_bins_per_ear <= 0) env->freq_bins_per_ear = BAT_FREQ_BINS; if (env->max_echo_range <= 0.0f) env->max_echo_range = 80.0f; - if (env->sound_speed <= 0.0f) env->sound_speed = 100.0f; + if (env->sound_speed <= 0.0f) env->sound_speed = 60.0f; if (env->reflector_spacing <= 0.0f) env->reflector_spacing = 8.0f; if (env->max_chirp_age_ticks <= 0) env->max_chirp_age_ticks = 30; if (env->chirp_cooldown_ticks <= 0) env->chirp_cooldown_ticks = 12; if (env->step_cost <= 0.0f) env->step_cost = 0.001f; if (env->progress_reward_scale <= 0.0f) env->progress_reward_scale = 0.05f; if (env->collision_penalty <= 0.0f) env->collision_penalty = 1.0f; - if (env->chirp_cost <= 0.0f) env->chirp_cost = 0.0005f; + if (env->chirp_cost < 0.0f) env->chirp_cost = 0.0f; + if (env->valid_chirp_reward <= 0.0f) env->valid_chirp_reward = 0.0005f; + if (env->early_chirp_penalty <= 0.0f) env->early_chirp_penalty = 0.001f; + if (env->bug_echo_reward_scale <= 0.0f) env->bug_echo_reward_scale = 0.0f; if (env->rng == 0) env->rng = 1; if (env->num_obstacles < 0) env->num_obstacles = 0; if (env->num_obstacles > BAT_MAX_OBSTACLES) env->num_obstacles = BAT_MAX_OBSTACLES; - env->obstacle_x = (float*)calloc(env->num_obstacles, sizeof(float)); - env->obstacle_y = (float*)calloc(env->num_obstacles, sizeof(float)); - env->obstacle_w = (float*)calloc(env->num_obstacles, sizeof(float)); - env->obstacle_h = (float*)calloc(env->num_obstacles, sizeof(float)); + if (env->curriculum_start_obstacles <= 0) env->curriculum_start_obstacles = 1; + if (env->curriculum_max_obstacles <= 0) env->curriculum_max_obstacles = env->num_obstacles; + if (env->curriculum_max_obstacles > BAT_MAX_OBSTACLES) env->curriculum_max_obstacles = BAT_MAX_OBSTACLES; + if (env->curriculum_start_obstacles > env->curriculum_max_obstacles) { + env->curriculum_start_obstacles = env->curriculum_max_obstacles; + } + if (env->curriculum_obstacle_step <= 0) env->curriculum_obstacle_step = 8; + if (env->curriculum_successes_per_level <= 0) env->curriculum_successes_per_level = 1; + if (env->curriculum_start_bug_distance <= 0.0f) env->curriculum_start_bug_distance = 14.0f; + if (env->curriculum_max_bug_distance <= 0.0f) { + env->curriculum_max_bug_distance = fminf(env->width, env->height) * 0.70f; + } + if (env->curriculum_bug_distance_step <= 0.0f) env->curriculum_bug_distance_step = 1.5f; + env->obstacle_x = (float*)calloc(BAT_MAX_OBSTACLES, sizeof(float)); + env->obstacle_y = (float*)calloc(BAT_MAX_OBSTACLES, sizeof(float)); + env->obstacle_w = (float*)calloc(BAT_MAX_OBSTACLES, sizeof(float)); + env->obstacle_h = (float*)calloc(BAT_MAX_OBSTACLES, sizeof(float)); } void allocate(Bat* env) { @@ -417,20 +528,56 @@ static inline void add_log(Bat* env, float success, float collision, float timeo env->log.n += 1.0f; } -static inline void bat_add_echo(Bat* env, float rx, float ry, float rvx, float rvy, - float strength) { +static inline void bat_add_freq_energy(Bat* env, int offset, float freq_norm, + float intensity) { + int bins = env->freq_bins_per_ear; + if (bins <= 0) bins = BAT_FREQ_BINS; + if (bins > BAT_FREQ_BINS) bins = BAT_FREQ_BINS; + int bin = (int)(bat_clampf(freq_norm, 0.0f, 1.0f) * bins); + if (bin < 0) bin = 0; + if (bin >= bins) bin = bins - 1; + int idx = offset + bin; + env->observations[idx] = bat_clampf(env->observations[idx] + intensity, 0.0f, 1.0f); +} + +static inline void bat_add_echo_event(Bat* env, int ear, float receive_tick, + float freq, float intensity, float path, int source) { + if (receive_tick <= env->tick) return; + if (intensity <= 0.000001f) return; + EchoEvent* event = &env->echo_events[env->echo_head]; + event->receive_tick = receive_tick; + event->freq = bat_clampf(freq, 0.0f, 1.0f); + event->intensity = intensity; + event->path = path; + event->ear = ear; + event->source = source; + event->active = 1; + env->echo_head = (env->echo_head + 1) % BAT_MAX_ECHO_EVENTS; +} + +static inline void bat_ear_positions(Bat* env, float* left_x, float* left_y, + float* right_x, float* right_y) { + float lx = -sinf(env->bat_heading); + float ly = cosf(env->bat_heading); + float ear_sep = env->bat_radius * env->ear_separation_scale; + *left_x = env->bat_x - lx * ear_sep * 0.5f; + *left_y = env->bat_y - ly * ear_sep * 0.5f; + *right_x = env->bat_x + lx * ear_sep * 0.5f; + *right_y = env->bat_y + ly * ear_sep * 0.5f; +} + +static inline void bat_schedule_echo(Bat* env, ChirpEvent* chirp, + float slice_ticks, float freq, float rx, float ry, float rvx, float rvy, + float strength, int source) { float fx = cosf(env->bat_heading); float fy = sinf(env->bat_heading); float lx = -sinf(env->bat_heading); float ly = cosf(env->bat_heading); - float ear_sep = env->bat_radius * 0.75f; - float left_ear_x = env->bat_x - lx * ear_sep * 0.5f; - float left_ear_y = env->bat_y - ly * ear_sep * 0.5f; - float right_ear_x = env->bat_x + lx * ear_sep * 0.5f; - float right_ear_y = env->bat_y + ly * ear_sep * 0.5f; + float left_ear_x, left_ear_y, right_ear_x, right_ear_y; + bat_ear_positions(env, &left_ear_x, &left_ear_y, &right_ear_x, &right_ear_y); float ux, uy; - bat_norm_vec(rx - env->bat_x, ry - env->bat_y, &ux, &uy); + bat_norm_vec(rx - chirp->x, ry - chirp->y, &ux, &uy); float forward = ux * fx + uy * fy; if (forward < -0.35f) return; @@ -441,31 +588,32 @@ static inline void bat_add_echo(Bat* env, float rx, float ry, float rvx, float r float left_gain = bat_clampf(0.75f + 0.25f * (ux * left_dir_x + uy * left_dir_y), 0.1f, 1.0f); float right_gain = bat_clampf(0.75f + 0.25f * (ux * right_dir_x + uy * right_dir_y), 0.1f, 1.0f); - float left_path = bat_dist(env->bat_x, env->bat_y, rx, ry) + bat_dist(rx, ry, left_ear_x, left_ear_y); - float right_path = bat_dist(env->bat_x, env->bat_y, rx, ry) + bat_dist(rx, ry, right_ear_x, right_ear_y); - float path = 0.5f * (left_path + right_path); - if (path > env->max_echo_range) return; + float source_path = bat_dist(chirp->x, chirp->y, rx, ry); + float left_path = source_path + bat_dist(rx, ry, left_ear_x, left_ear_y); + float right_path = source_path + bat_dist(rx, ry, right_ear_x, right_ear_y); + if (left_path > env->max_echo_range && right_path > env->max_echo_range) return; - int bin = (int)((path / env->max_echo_range) * env->range_bins_per_ear); - if (bin < 0) bin = 0; - if (bin >= env->range_bins_per_ear) bin = env->range_bins_per_ear - 1; - - float attenuation = strength / (1.0f + 0.02f * path * path); float rel_vx = rvx - env->bat_vx; float rel_vy = rvy - env->bat_vy; float distance_rate = rel_vx * ux + rel_vy * uy; float doppler = bat_clampf(-distance_rate / (env->bat_max_speed + env->bug_speed + 0.0001f), -1.0f, 1.0f); + float shifted_freq = bat_clampf(freq + 0.20f * doppler, 0.0f, 1.0f); - float left_energy = attenuation * left_gain; - float right_energy = attenuation * right_gain; - env->observations[BAT_LEFT_RANGE_OFFSET + bin] += left_energy; - env->observations[BAT_RIGHT_RANGE_OFFSET + bin] += right_energy; - env->observations[BAT_LEFT_DOPPLER_OFFSET + bin] += left_energy * doppler; - env->observations[BAT_RIGHT_DOPPLER_OFFSET + bin] += right_energy * doppler; + if (left_path <= env->max_echo_range) { + float attenuation = strength / (1.0f + 0.02f * left_path * left_path); + float receive_tick = chirp->birth_tick + slice_ticks + left_path / env->sound_speed / BAT_TICK_RATE; + bat_add_echo_event(env, 0, receive_tick, shifted_freq, attenuation * left_gain, left_path, source); + } + if (right_path <= env->max_echo_range) { + float attenuation = strength / (1.0f + 0.02f * right_path * right_path); + float receive_tick = chirp->birth_tick + slice_ticks + right_path / env->sound_speed / BAT_TICK_RATE; + bat_add_echo_event(env, 1, receive_tick, shifted_freq, attenuation * right_gain, right_path, source); + } } -static inline void bat_add_segment_reflectors(Bat* env, float x1, float y1, - float x2, float y2, float strength) { +static inline void bat_schedule_segment_reflectors(Bat* env, ChirpEvent* chirp, + float slice_ticks, float freq, float x1, float y1, float x2, float y2, + float strength) { float len = bat_dist(x1, y1, x2, y2); int count = (int)(len / env->reflector_spacing) + 1; if (count < 1) count = 1; @@ -473,47 +621,97 @@ static inline void bat_add_segment_reflectors(Bat* env, float x1, float y1, float t = count == 0 ? 0.0f : i / (float)count; float x = x1 + (x2 - x1) * t; float y = y1 + (y2 - y1) * t; - bat_add_echo(env, x, y, 0.0f, 0.0f, strength); + bat_schedule_echo(env, chirp, slice_ticks, freq, x, y, 0.0f, 0.0f, strength, BAT_ECHO_STATIC); } } -static inline void bat_add_obstacle_echoes(Bat* env, int i) { +static inline void bat_schedule_obstacle_echoes(Bat* env, ChirpEvent* chirp, + float slice_ticks, float freq, int i) { float x = env->obstacle_x[i]; float y = env->obstacle_y[i]; float w = env->obstacle_w[i]; float h = env->obstacle_h[i]; - bat_add_segment_reflectors(env, x, y, x + w, y, 0.55f); - bat_add_segment_reflectors(env, x, y + h, x + w, y + h, 0.55f); - bat_add_segment_reflectors(env, x, y, x, y + h, 0.55f); - bat_add_segment_reflectors(env, x + w, y, x + w, y + h, 0.55f); + bat_schedule_segment_reflectors(env, chirp, slice_ticks, freq, x, y, x + w, y, 0.55f); + bat_schedule_segment_reflectors(env, chirp, slice_ticks, freq, x, y + h, x + w, y + h, 0.55f); + bat_schedule_segment_reflectors(env, chirp, slice_ticks, freq, x, y, x, y + h, 0.55f); + bat_schedule_segment_reflectors(env, chirp, slice_ticks, freq, x + w, y, x + w, y + h, 0.55f); +} + +static inline void bat_schedule_chirp_echoes(Bat* env, ChirpEvent* chirp) { + int slices = (int)ceilf(chirp->duration / BAT_TICK_RATE); + if (slices < 1) slices = 1; + if (slices > BAT_MAX_CHIRP_SLICES) slices = BAT_MAX_CHIRP_SLICES; + + for (int i = 0; i < slices; i++) { + float t = (i + 0.5f) / (float)slices; + float slice_seconds = t * chirp->duration; + float slice_ticks = slice_seconds / BAT_TICK_RATE; + float freq = chirp->start_freq + t * (chirp->end_freq - chirp->start_freq); + + bat_schedule_echo(env, chirp, slice_ticks, freq, + env->bug_x, env->bug_y, env->bug_vx, env->bug_vy, 8.0f, BAT_ECHO_BUG); + bat_schedule_segment_reflectors(env, chirp, slice_ticks, freq, + 0.0f, 0.0f, (float)env->width, 0.0f, 0.12f); + bat_schedule_segment_reflectors(env, chirp, slice_ticks, freq, + 0.0f, (float)env->height, (float)env->width, (float)env->height, 0.12f); + bat_schedule_segment_reflectors(env, chirp, slice_ticks, freq, + 0.0f, 0.0f, 0.0f, (float)env->height, 0.12f); + bat_schedule_segment_reflectors(env, chirp, slice_ticks, freq, + (float)env->width, 0.0f, (float)env->width, (float)env->height, 0.12f); + for (int j = 0; j < env->num_obstacles; j++) { + bat_schedule_obstacle_echoes(env, chirp, slice_ticks, freq, j); + } + } +} + +static inline void bat_process_echo_events(Bat* env) { + float start_tick = env->tick - 1.0f; + float end_tick = env->tick; + for (int i = 0; i < BAT_MAX_ECHO_EVENTS; i++) { + EchoEvent* event = &env->echo_events[i]; + if (!event->active) continue; + if (event->receive_tick > start_tick && event->receive_tick <= end_tick) { + int offset = event->ear == 0 ? BAT_LEFT_FREQ_OFFSET : BAT_RIGHT_FREQ_OFFSET; + bat_add_freq_energy(env, offset, event->freq, event->intensity); + if (event->source == BAT_ECHO_BUG) { + env->tick_bug_echo_energy += event->intensity; + if (env->tick_bug_echo_path < 0.0f || event->path < env->tick_bug_echo_path) { + env->tick_bug_echo_path = event->path; + } + } + event->active = 0; + } else if (event->receive_tick <= start_tick) { + event->active = 0; + } + } } void compute_observations(Bat* env) { memset(env->observations, 0, BAT_OBS_SIZE * sizeof(float)); + env->tick_bug_echo_energy = 0.0f; + env->tick_bug_echo_path = -1.0f; - bat_add_echo(env, env->bug_x, env->bug_y, env->bug_vx, env->bug_vy, 8.0f); - bat_add_segment_reflectors(env, 0.0f, 0.0f, (float)env->width, 0.0f, 0.12f); - bat_add_segment_reflectors(env, 0.0f, (float)env->height, (float)env->width, (float)env->height, 0.12f); - bat_add_segment_reflectors(env, 0.0f, 0.0f, 0.0f, (float)env->height, 0.12f); - bat_add_segment_reflectors(env, (float)env->width, 0.0f, (float)env->width, (float)env->height, 0.12f); - for (int i = 0; i < env->num_obstacles; i++) { - bat_add_obstacle_echoes(env, i); - } + bat_process_echo_events(env); float left_energy = 0.0f; float right_energy = 0.0f; - for (int i = 0; i < BAT_RANGE_BINS; i++) { - env->observations[BAT_LEFT_RANGE_OFFSET + i] = bat_clampf(env->observations[BAT_LEFT_RANGE_OFFSET + i], 0.0f, 1.0f); - env->observations[BAT_RIGHT_RANGE_OFFSET + i] = bat_clampf(env->observations[BAT_RIGHT_RANGE_OFFSET + i], 0.0f, 1.0f); - env->observations[BAT_LEFT_DOPPLER_OFFSET + i] = bat_clampf(env->observations[BAT_LEFT_DOPPLER_OFFSET + i], -1.0f, 1.0f); - env->observations[BAT_RIGHT_DOPPLER_OFFSET + i] = bat_clampf(env->observations[BAT_RIGHT_DOPPLER_OFFSET + i], -1.0f, 1.0f); - left_energy += env->observations[BAT_LEFT_RANGE_OFFSET + i]; - right_energy += env->observations[BAT_RIGHT_RANGE_OFFSET + i]; + for (int i = 0; i < BAT_FREQ_BINS; i++) { + env->observations[BAT_LEFT_FREQ_OFFSET + i] = bat_clampf(env->observations[BAT_LEFT_FREQ_OFFSET + i], 0.0f, 1.0f); + env->observations[BAT_RIGHT_FREQ_OFFSET + i] = bat_clampf(env->observations[BAT_RIGHT_FREQ_OFFSET + i], 0.0f, 1.0f); + left_energy += env->observations[BAT_LEFT_FREQ_OFFSET + i]; + right_energy += env->observations[BAT_RIGHT_FREQ_OFFSET + i]; } env->echo_energy_left_sum += left_energy; env->echo_energy_right_sum += right_energy; - env->observations[BAT_CHIRP_AGE_OBS] = bat_clampf(env->chirp_age_ticks / (float)env->max_chirp_age_ticks, 0.0f, 1.0f); + float chirp_age_denom = bat_chirp_age_norm_denominator(env); + int chirp_age = env->tick - env->last_chirp_tick; + if (env->last_chirp_tick < 0) chirp_age = (int)ceilf(chirp_age_denom); + env->chirp_age_ticks = chirp_age; + int cooldown = env->chirp_cooldown_ticks - (env->tick - env->last_chirp_tick); + if (cooldown < 0) cooldown = 0; + env->observations[BAT_CHIRP_AGE_OBS] = bat_clampf(chirp_age / chirp_age_denom, 0.0f, 1.0f); + env->observations[BAT_CHIRP_COOLDOWN_OBS] = bat_clampf(cooldown / (float)env->chirp_cooldown_ticks, 0.0f, 1.0f); env->observations[BAT_CHIRP_START_OBS] = env->last_chirp_start_freq; env->observations[BAT_CHIRP_END_OBS] = env->last_chirp_end_freq; env->observations[BAT_CHIRP_DURATION_OBS] = env->last_chirp_duration; @@ -528,7 +726,12 @@ static inline void bat_reset_episode(Bat* env) { env->bat_vy = 0.0f; env->bat_turn_velocity = 0.0f; env->bat_heading = bat_randf(env) * 2.0f * BAT_PI - BAT_PI; - bat_sample_spawns(env); + bat_apply_curriculum(env); + if (env->curriculum_enabled) { + bat_sample_spawns_at_distance(env, bat_curriculum_bug_distance(env)); + } else { + bat_sample_spawns(env); + } generate_obstacles(env); float bug_heading = bat_randf(env) * 2.0f * BAT_PI - BAT_PI; env->bug_vx = cosf(bug_heading) * env->bug_speed; @@ -540,6 +743,11 @@ static inline void bat_reset_episode(Bat* env) { env->last_chirp_tick = -env->chirp_cooldown_ticks; memset(env->chirps, 0, sizeof(env->chirps)); env->chirp_head = 0; + memset(env->echo_events, 0, sizeof(env->echo_events)); + env->echo_head = 0; + env->tick_bug_echo_energy = 0.0f; + env->tick_bug_echo_path = -1.0f; + env->last_bug_echo_path = -1.0f; env->chirps_emitted_episode = 0; env->chirp_duration_sum = 0.0f; env->chirp_bandwidth_sum = 0.0f; @@ -663,17 +871,18 @@ static inline bool bat_try_emit_chirp(Bat* env) { chirp->birth_tick = env->tick; chirp->active = 1; env->chirp_head = (env->chirp_head + 1) % BAT_CHIRP_HISTORY; + bat_schedule_chirp_echoes(env, chirp); return true; } -static inline bool bat_update_chirp(Bat* env) { +static inline int bat_update_chirp(Bat* env) { int emit = bat_action_index(env->actions[5], BAT_CHIRP_EMIT_ACTIONS); if (emit) { - return bat_try_emit_chirp(env); + return bat_try_emit_chirp(env) ? 1 : -1; } else if (env->chirp_age_ticks < env->max_chirp_age_ticks) { env->chirp_age_ticks += 1; } - return false; + return 0; } static inline bool bat_caught_bug(Bat* env) { @@ -684,11 +893,12 @@ void c_step(Bat* env) { env->rewards[0] = 0.0f; env->terminals[0] = 0.0f; - bool accepted_chirp = bat_update_chirp(env); + int chirp_status = bat_update_chirp(env); if (bat_caught_bug(env)) { env->rewards[0] = 1.0f; env->terminals[0] = 1.0f; env->episode_return += env->rewards[0]; + bat_advance_curriculum(env); add_log(env, 1.0f, 0.0f, 0.0f); bat_reset_episode(env); return; @@ -709,6 +919,7 @@ void c_step(Bat* env) { env->rewards[0] = 1.0f; env->terminals[0] = 1.0f; env->episode_return += env->rewards[0]; + bat_advance_curriculum(env); add_log(env, 1.0f, 0.0f, 0.0f); bat_reset_episode(env); return; @@ -720,8 +931,10 @@ void c_step(Bat* env) { float progress = env->prev_bug_dist - bug_dist; env->rewards[0] += env->progress_reward_scale * progress; env->rewards[0] -= env->step_cost; - if (accepted_chirp) { - env->rewards[0] -= env->chirp_cost; + if (chirp_status > 0) { + env->rewards[0] += env->valid_chirp_reward; + } else if (chirp_status < 0) { + env->rewards[0] -= env->early_chirp_penalty; } env->prev_bug_dist = bug_dist; @@ -733,8 +946,15 @@ void c_step(Bat* env) { return; } - env->episode_return += env->rewards[0]; compute_observations(env); + if (env->tick_bug_echo_path > 0.0f) { + if (env->last_bug_echo_path > 0.0f && env->tick_bug_echo_path < env->last_bug_echo_path) { + float echo_progress = (env->last_bug_echo_path - env->tick_bug_echo_path) / fmaxf(1.0f, env->max_echo_range); + env->rewards[0] += env->bug_echo_reward_scale * echo_progress; + } + env->last_bug_echo_path = env->tick_bug_echo_path; + } + env->episode_return += env->rewards[0]; } #ifndef BAT_HEADLESS diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index dfc46fd1c0..56e5d20b8c 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -1,5 +1,5 @@ #include "bat.h" -#define OBS_SIZE 70 +#define OBS_SIZE 39 #define NUM_ATNS 6 #define ACT_SIZES {5, 3, 8, 8, 4, 2} #define OBS_TENSOR_T FloatTensor @@ -14,20 +14,31 @@ void my_init(Env* env, Dict* kwargs) { env->height = dict_get(kwargs, "height")->value; env->num_obstacles = dict_get(kwargs, "num_obstacles")->value; env->bat_radius = dict_get(kwargs, "bat_radius")->value; + env->ear_separation_scale = dict_get(kwargs, "ear_separation_scale")->value; env->bug_radius = dict_get(kwargs, "bug_radius")->value; env->bat_max_speed = dict_get(kwargs, "bat_max_speed")->value; env->bat_accel = dict_get(kwargs, "bat_accel")->value; env->bat_turn_rate = dict_get(kwargs, "bat_turn_rate")->value; env->bug_speed = dict_get(kwargs, "bug_speed")->value; env->max_steps = dict_get(kwargs, "max_steps")->value; - env->range_bins_per_ear = dict_get(kwargs, "range_bins_per_ear")->value; - env->doppler_bins_per_ear = dict_get(kwargs, "doppler_bins_per_ear")->value; + env->curriculum_enabled = dict_get(kwargs, "curriculum_enabled")->value; + env->curriculum_start_obstacles = dict_get(kwargs, "curriculum_start_obstacles")->value; + env->curriculum_max_obstacles = dict_get(kwargs, "curriculum_max_obstacles")->value; + env->curriculum_obstacle_step = dict_get(kwargs, "curriculum_obstacle_step")->value; + env->curriculum_successes_per_level = dict_get(kwargs, "curriculum_successes_per_level")->value; + env->curriculum_start_bug_distance = dict_get(kwargs, "curriculum_start_bug_distance")->value; + env->curriculum_max_bug_distance = dict_get(kwargs, "curriculum_max_bug_distance")->value; + env->curriculum_bug_distance_step = dict_get(kwargs, "curriculum_bug_distance_step")->value; + env->freq_bins_per_ear = dict_get(kwargs, "freq_bins_per_ear")->value; env->max_echo_range = dict_get(kwargs, "max_echo_range")->value; env->sound_speed = dict_get(kwargs, "sound_speed")->value; env->reflector_spacing = dict_get(kwargs, "reflector_spacing")->value; env->max_chirp_age_ticks = dict_get(kwargs, "max_chirp_age_ticks")->value; env->chirp_cooldown_ticks = dict_get(kwargs, "chirp_cooldown_ticks")->value; env->chirp_cost = dict_get(kwargs, "chirp_cost")->value; + env->valid_chirp_reward = dict_get(kwargs, "valid_chirp_reward")->value; + env->early_chirp_penalty = dict_get(kwargs, "early_chirp_penalty")->value; + env->bug_echo_reward_scale = dict_get(kwargs, "bug_echo_reward_scale")->value; env->step_cost = dict_get(kwargs, "step_cost")->value; env->progress_reward_scale = dict_get(kwargs, "progress_reward_scale")->value; env->collision_penalty = dict_get(kwargs, "collision_penalty")->value; @@ -39,7 +50,6 @@ void my_log(Log* log, Dict* out) { dict_set(out, "score", log->score); dict_set(out, "episode_return", log->episode_return); dict_set(out, "episode_length", log->episode_length); - dict_set(out, "success", log->success); dict_set(out, "collision", log->collision); dict_set(out, "timeout", log->timeout); dict_set(out, "bug_distance_start", log->bug_distance_start); diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index 6d714b7995..ad2aa077a3 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -35,8 +35,7 @@ static Bat make_test_env(void) { .bat_turn_rate = 3.1415926f, .bug_speed = 4.0f, .max_steps = 512, - .range_bins_per_ear = BAT_RANGE_BINS, - .doppler_bins_per_ear = BAT_DOPPLER_BINS, + .freq_bins_per_ear = BAT_FREQ_BINS, .max_echo_range = 80.0f, .sound_speed = 100.0f, .reflector_spacing = 8.0f, @@ -83,13 +82,31 @@ static int test_left_right_echo_asymmetry(void) { env.bug_y = 10.0f; env.bug_vx = 0.0f; env.bug_vy = 0.0f; - compute_observations(&env); + memset(env.echo_events, 0, sizeof(env.echo_events)); + env.echo_head = 0; + env.tick = 0; + + ChirpEvent chirp = { + .x = env.bat_x, + .y = env.bat_y, + .start_freq = 1.0f, + .end_freq = 1.0f, + .duration = bat_chirp_duration_seconds(0.0f), + .birth_tick = 0, + .active = 1, + }; + bat_schedule_echo(&env, &chirp, 0.0f, 1.0f, + env.bug_x, env.bug_y, env.bug_vx, env.bug_vy, 8.0f, BAT_ECHO_BUG); float left_energy = 0.0f; float right_energy = 0.0f; - for (int i = 0; i < BAT_RANGE_BINS; i++) { - left_energy += env.observations[BAT_LEFT_RANGE_OFFSET + i]; - right_energy += env.observations[BAT_RIGHT_RANGE_OFFSET + i]; + for (int i = 0; i < BAT_MAX_ECHO_EVENTS; i++) { + if (!env.echo_events[i].active) continue; + if (env.echo_events[i].ear == 0) { + left_energy += env.echo_events[i].intensity; + } else { + right_energy += env.echo_events[i].intensity; + } } ASSERT_TRUE(left_energy > right_energy); @@ -98,6 +115,116 @@ static int test_left_right_echo_asymmetry(void) { return 0; } +static int test_default_sound_speed_allows_one_tick_interaural_delay(void) { + Bat env = { + .num_agents = 1, + .frameskip = 1, + .width = 64, + .height = 64, + .num_obstacles = 0, + .bat_radius = 2.0f, + .bug_radius = 1.5f, + .bat_max_speed = 12.0f, + .bat_accel = 30.0f, + .bat_turn_rate = 3.1415926f, + .bug_speed = 4.0f, + .max_steps = 512, + .freq_bins_per_ear = BAT_FREQ_BINS, + .max_echo_range = 80.0f, + .reflector_spacing = 8.0f, + .rng = 1, + }; + allocate(&env); + + env.bat_x = 20.0f; + env.bat_y = 20.0f; + env.bat_vx = 0.0f; + env.bat_vy = 0.0f; + env.bat_heading = 0.0f; + env.tick = 0; + memset(env.echo_events, 0, sizeof(env.echo_events)); + env.echo_head = 0; + + ChirpEvent chirp = { + .x = env.bat_x, + .y = env.bat_y, + .start_freq = 0.5f, + .end_freq = 0.5f, + .duration = bat_chirp_duration_seconds(0.0f), + .birth_tick = 0, + .active = 1, + }; + bat_schedule_echo(&env, &chirp, 0.0f, 0.5f, + env.bat_x, env.bat_y - 12.0f, 0.0f, 0.0f, 8.0f, BAT_ECHO_BUG); + + float left_tick = -1.0f; + float right_tick = -1.0f; + for (int i = 0; i < BAT_MAX_ECHO_EVENTS; i++) { + if (!env.echo_events[i].active) continue; + if (env.echo_events[i].ear == 0) left_tick = env.echo_events[i].receive_tick; + if (env.echo_events[i].ear == 1) right_tick = env.echo_events[i].receive_tick; + } + + ASSERT_TRUE(left_tick > 0.0f); + ASSERT_TRUE(right_tick > 0.0f); + ASSERT_TRUE(fabsf(left_tick - right_tick) >= 1.0f); + + free_allocated(&env); + return 0; +} + +static float test_side_echo_receive_tick_gap(float ear_separation_scale) { + Bat env = make_test_env(); + c_reset(&env); + + env.ear_separation_scale = ear_separation_scale; + env.bat_x = 20.0f; + env.bat_y = 20.0f; + env.bat_vx = 0.0f; + env.bat_vy = 0.0f; + env.bat_heading = 0.0f; + env.tick = 0; + memset(env.echo_events, 0, sizeof(env.echo_events)); + env.echo_head = 0; + + ChirpEvent chirp = { + .x = env.bat_x, + .y = env.bat_y, + .start_freq = 0.5f, + .end_freq = 0.5f, + .duration = bat_chirp_duration_seconds(0.0f), + .birth_tick = 0, + .active = 1, + }; + bat_schedule_echo(&env, &chirp, 0.0f, 0.5f, + env.bat_x, env.bat_y - 12.0f, 0.0f, 0.0f, 8.0f, BAT_ECHO_BUG); + + float left_tick = -1.0f; + float right_tick = -1.0f; + for (int i = 0; i < BAT_MAX_ECHO_EVENTS; i++) { + if (!env.echo_events[i].active) continue; + if (env.echo_events[i].ear == 0) left_tick = env.echo_events[i].receive_tick; + if (env.echo_events[i].ear == 1) right_tick = env.echo_events[i].receive_tick; + } + + ASSERT_TRUE(left_tick > 0.0f); + ASSERT_TRUE(right_tick > 0.0f); + float gap = fabsf(left_tick - right_tick); + + free_allocated(&env); + return gap; +} + +static int test_ear_separation_scale_controls_arrival_gap(void) { + float narrow_gap = test_side_echo_receive_tick_gap(0.75f); + float wide_gap = test_side_echo_receive_tick_gap(1.50f); + + ASSERT_TRUE(narrow_gap > 0.0f); + ASSERT_TRUE(wide_gap > narrow_gap * 1.75f); + + return 0; +} + static int test_doppler_sign_for_approaching_bug(void) { Bat env = make_test_env(); c_reset(&env); @@ -108,17 +235,42 @@ static int test_doppler_sign_for_approaching_bug(void) { env.bat_vy = 0.0f; env.bug_x = 42.0f; env.bug_y = 20.0f; - env.bug_vx = -3.0f; + env.bug_vx = -16.0f; env.bug_vy = 0.0f; + env.bat_heading = 0.0f; + memset(env.observations, 0, BAT_OBS_SIZE * sizeof(float)); + memset(env.echo_events, 0, sizeof(env.echo_events)); + env.echo_head = 0; + env.tick = 0; + + ChirpEvent chirp = { + .x = env.bat_x, + .y = env.bat_y, + .start_freq = 0.5f, + .end_freq = 0.5f, + .duration = bat_chirp_duration_seconds(0.0f), + .birth_tick = 0, + .active = 1, + }; + bat_schedule_echo(&env, &chirp, 0.0f, 0.5f, + env.bug_x, env.bug_y, env.bug_vx, env.bug_vy, 8.0f, BAT_ECHO_BUG); + + env.tick = 27; compute_observations(&env); - float doppler = 0.0f; - for (int i = 0; i < BAT_DOPPLER_BINS; i++) { - doppler += env.observations[BAT_LEFT_DOPPLER_OFFSET + i]; - doppler += env.observations[BAT_RIGHT_DOPPLER_OFFSET + i]; + float low_energy = 0.0f; + float high_energy = 0.0f; + for (int i = 0; i < BAT_FREQ_BINS; i++) { + float energy = env.observations[BAT_LEFT_FREQ_OFFSET + i] + + env.observations[BAT_RIGHT_FREQ_OFFSET + i]; + if (i < BAT_FREQ_BINS / 2) { + low_energy += energy; + } else { + high_energy += energy; + } } - ASSERT_TRUE(doppler > 0.0f); + ASSERT_TRUE(high_energy > low_energy); free_allocated(&env); return 0; @@ -237,6 +389,77 @@ static int test_chirp_cooldown_accepts_only_after_delay(void) { return 0; } +static void test_place_safe_stationary_scene(Bat* env) { + env->num_obstacles = 0; + env->bat_x = 20.0f; + env->bat_y = 20.0f; + env->bat_vx = 0.0f; + env->bat_vy = 0.0f; + env->bat_heading = 0.0f; + env->bug_x = 48.0f; + env->bug_y = 48.0f; + env->bug_vx = 0.0f; + env->bug_vy = 0.0f; + env->prev_bug_dist = bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y); +} + +static void test_set_emit_chirp_action(Bat* env) { + env->actions[0] = BAT_NOOP; + env->actions[1] = BAT_TURN_NONE; + env->actions[2] = 0.0f; + env->actions[3] = 7.0f; + env->actions[4] = 1.0f; + env->actions[5] = 1.0f; +} + +static int test_valid_chirp_gets_reward_without_legacy_cost(void) { + Bat env = make_test_env(); + c_reset(&env); + test_place_safe_stationary_scene(&env); + env.step_cost = 0.0f; + env.progress_reward_scale = 0.0f; + env.bug_echo_reward_scale = 0.0f; + env.chirp_cost = 10.0f; + env.valid_chirp_reward = 0.0005f; + env.early_chirp_penalty = 0.0020f; + test_set_emit_chirp_action(&env); + + c_step(&env); + + ASSERT_FLOAT_NEAR(env.terminals[0], 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.rewards[0], env.valid_chirp_reward, 0.0001f); + ASSERT_TRUE(env.chirps_emitted_episode == 1); + + free_allocated(&env); + return 0; +} + +static int test_early_chirp_gets_penalty_and_emits_nothing(void) { + Bat env = make_test_env(); + c_reset(&env); + test_place_safe_stationary_scene(&env); + env.step_cost = 0.0f; + env.progress_reward_scale = 0.0f; + env.bug_echo_reward_scale = 0.0f; + env.chirp_cost = 0.0f; + env.valid_chirp_reward = 0.0005f; + env.early_chirp_penalty = 0.0020f; + env.chirp_cooldown_ticks = 12; + test_set_emit_chirp_action(&env); + c_step(&env); + test_place_safe_stationary_scene(&env); + test_set_emit_chirp_action(&env); + + c_step(&env); + + ASSERT_FLOAT_NEAR(env.terminals[0], 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.rewards[0], -env.early_chirp_penalty, 0.0001f); + ASSERT_TRUE(env.chirps_emitted_episode == 1); + + free_allocated(&env); + return 0; +} + static int test_reflection_arrives_at_two_way_travel_time(void) { float sound_speed = 100.0f; float distance = 25.0f; @@ -249,6 +472,323 @@ static int test_reflection_arrives_at_two_way_travel_time(void) { return 0; } +static float test_sum_obs(Bat* env, int offset, int count) { + float sum = 0.0f; + for (int i = 0; i < count; i++) { + sum += env->observations[offset + i]; + } + return sum; +} + +static int test_bins_only_observation_layout(void) { + ASSERT_TRUE(BAT_OBS_SIZE == 39); + ASSERT_TRUE(BAT_FREQ_BINS == 16); + ASSERT_TRUE(BAT_LEFT_FREQ_OFFSET == 0); + ASSERT_TRUE(BAT_RIGHT_FREQ_OFFSET == 16); + ASSERT_TRUE(BAT_CHIRP_AGE_OBS == 32); + ASSERT_TRUE(BAT_CHIRP_COOLDOWN_OBS == 33); + ASSERT_TRUE(BAT_CHIRP_START_OBS == 34); + ASSERT_TRUE(BAT_CHIRP_END_OBS == 35); + ASSERT_TRUE(BAT_CHIRP_DURATION_OBS == 36); + ASSERT_TRUE(BAT_FORWARD_SPEED_OBS == 37); + ASSERT_TRUE(BAT_TURN_RATE_OBS == 38); + return 0; +} + +static int test_no_chirp_produces_silent_frequency_bins(void) { + Bat env = make_test_env(); + c_reset(&env); + + ASSERT_FLOAT_NEAR(test_sum_obs(&env, BAT_LEFT_FREQ_OFFSET, BAT_FREQ_BINS), 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(test_sum_obs(&env, BAT_RIGHT_FREQ_OFFSET, BAT_FREQ_BINS), 0.0f, 0.0001f); + + free_allocated(&env); + return 0; +} + +static int test_observations_stay_normalized_after_chirp(void) { + Bat env = make_test_env(); + env.max_steps = 1000; + c_reset(&env); + + ASSERT_FLOAT_NEAR(env.observations[BAT_CHIRP_AGE_OBS], 1.0f, 0.0001f); + for (int i = 0; i < BAT_OBS_SIZE; i++) { + ASSERT_TRUE(env.observations[i] >= -1.0f); + ASSERT_TRUE(env.observations[i] <= 1.0f); + } + + env.actions[0] = BAT_NOOP; + env.actions[1] = BAT_TURN_NONE; + env.actions[2] = 0.0f; + env.actions[3] = 7.0f; + env.actions[4] = 1.0f; + env.actions[5] = 1.0f; + c_step(&env); + + float age_denom = bat_chirp_age_norm_denominator(&env); + ASSERT_FLOAT_NEAR(env.observations[BAT_CHIRP_AGE_OBS], 1.0f / age_denom, 0.0001f); + for (int i = 0; i < BAT_OBS_SIZE; i++) { + ASSERT_TRUE(env.observations[i] >= -1.0f); + ASSERT_TRUE(env.observations[i] <= 1.0f); + } + + free_allocated(&env); + return 0; +} + +static int test_curriculum_starts_close_with_one_obstacle(void) { + Bat env = make_test_env(); + env.num_obstacles = 3; + env.curriculum_enabled = 1; + env.curriculum_start_obstacles = 1; + env.curriculum_max_obstacles = 3; + env.curriculum_obstacle_step = 1; + env.curriculum_start_bug_distance = 12.0f; + env.curriculum_max_bug_distance = 40.0f; + env.curriculum_bug_distance_step = 6.0f; + c_reset(&env); + + ASSERT_TRUE(env.num_obstacles == 1); + ASSERT_TRUE(bat_dist(env.bat_x, env.bat_y, env.bug_x, env.bug_y) <= 14.0f); + + free_allocated(&env); + return 0; +} + +static int test_curriculum_advances_after_catch(void) { + Bat env = make_test_env(); + env.num_obstacles = 3; + env.curriculum_enabled = 1; + env.curriculum_start_obstacles = 1; + env.curriculum_max_obstacles = 3; + env.curriculum_obstacle_step = 1; + env.curriculum_start_bug_distance = 12.0f; + env.curriculum_max_bug_distance = 40.0f; + env.curriculum_bug_distance_step = 6.0f; + c_reset(&env); + env.bat_x = 20.0f; + env.bat_y = 20.0f; + env.bug_x = 20.5f; + env.bug_y = 20.0f; + + c_step(&env); + + ASSERT_TRUE(env.curriculum_level == 1); + ASSERT_TRUE(env.num_obstacles == 2); + ASSERT_TRUE(bat_dist(env.bat_x, env.bat_y, env.bug_x, env.bug_y) <= 20.0f); + + free_allocated(&env); + return 0; +} + +static int test_curriculum_waits_for_required_catches(void) { + Bat env = make_test_env(); + env.num_obstacles = 3; + env.curriculum_enabled = 1; + env.curriculum_start_obstacles = 1; + env.curriculum_max_obstacles = 3; + env.curriculum_obstacle_step = 1; + env.curriculum_start_bug_distance = 12.0f; + env.curriculum_max_bug_distance = 40.0f; + env.curriculum_bug_distance_step = 6.0f; + env.curriculum_successes_per_level = 2; + c_reset(&env); + env.bat_x = 20.0f; + env.bat_y = 20.0f; + env.bug_x = 20.5f; + env.bug_y = 20.0f; + + c_step(&env); + + ASSERT_TRUE(env.curriculum_level == 0); + ASSERT_TRUE(env.curriculum_successes_at_level == 1); + + env.bat_x = 20.0f; + env.bat_y = 20.0f; + env.bug_x = 20.5f; + env.bug_y = 20.0f; + + c_step(&env); + + ASSERT_TRUE(env.curriculum_level == 1); + ASSERT_TRUE(env.curriculum_successes_at_level == 0); + + free_allocated(&env); + return 0; +} + +static int test_chirp_echo_arrives_after_two_way_travel_not_immediately(void) { + Bat env = make_test_env(); + env.num_obstacles = 0; + env.sound_speed = 60.0f; + env.max_echo_range = 128.0f; + c_reset(&env); + + env.bat_x = 32.0f; + env.bat_y = 32.0f; + env.bat_vx = 0.0f; + env.bat_vy = 0.0f; + env.bat_heading = 0.0f; + env.bug_x = 38.0f; + env.bug_y = 32.0f; + env.bug_vx = 0.0f; + env.bug_vy = 0.0f; + compute_observations(&env); + + env.actions[0] = BAT_NOOP; + env.actions[1] = BAT_TURN_NONE; + env.actions[2] = 7; + env.actions[3] = 7; + env.actions[4] = 0; + env.actions[5] = 1; + c_step(&env); + + for (int i = 0; i < 6; i++) { + ASSERT_FLOAT_NEAR(test_sum_obs(&env, BAT_LEFT_FREQ_OFFSET, BAT_FREQ_BINS), 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(test_sum_obs(&env, BAT_RIGHT_FREQ_OFFSET, BAT_FREQ_BINS), 0.0f, 0.0001f); + env.actions[5] = 0; + c_step(&env); + } + + float max_energy = 0.0f; + for (int i = 0; i < 32; i++) { + float energy = test_sum_obs(&env, BAT_LEFT_FREQ_OFFSET, BAT_FREQ_BINS) + + test_sum_obs(&env, BAT_RIGHT_FREQ_OFFSET, BAT_FREQ_BINS); + if (energy > max_energy) max_energy = energy; + c_step(&env); + } + + ASSERT_TRUE(max_energy > 0.01f); + + free_allocated(&env); + return 0; +} + +static int test_frequency_bin_energy_sums_and_caps(void) { + Bat env = make_test_env(); + memset(env.observations, 0, BAT_OBS_SIZE * sizeof(float)); + + bat_add_freq_energy(&env, BAT_LEFT_FREQ_OFFSET, 1.0f, 0.75f); + bat_add_freq_energy(&env, BAT_LEFT_FREQ_OFFSET, 1.0f, 0.75f); + bat_add_freq_energy(&env, BAT_RIGHT_FREQ_OFFSET, 0.0f, 0.35f); + + ASSERT_FLOAT_NEAR(env.observations[BAT_LEFT_FREQ_OFFSET + BAT_FREQ_BINS - 1], 1.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.observations[BAT_RIGHT_FREQ_OFFSET], 0.35f, 0.0001f); + + free_allocated(&env); + return 0; +} + +static int test_bug_echo_reward_is_added_when_bug_echo_is_closer(void) { + Bat env = make_test_env(); + c_reset(&env); + env.bug_echo_reward_scale = 0.05f; + env.last_bug_echo_path = 20.0f; + env.step_cost = 0.0f; + env.progress_reward_scale = 0.0f; + env.chirp_cost = 0.0f; + env.bat_x = 10.0f; + env.bat_y = 10.0f; + env.bat_vx = 0.0f; + env.bat_vy = 0.0f; + env.bug_vx = 0.0f; + env.bug_vy = 0.0f; + env.bug_x = 50.0f; + env.bug_y = 50.0f; + memset(env.echo_events, 0, sizeof(env.echo_events)); + + env.echo_events[0] = (EchoEvent){ + .receive_tick = 1.0f, + .freq = 0.5f, + .intensity = 0.6f, + .ear = 0, + .source = BAT_ECHO_BUG, + .path = 15.0f, + .active = 1, + }; + + c_step(&env); + + ASSERT_TRUE(env.rewards[0] > 0.002f); + ASSERT_FLOAT_NEAR(env.observations[BAT_LEFT_FREQ_OFFSET + 8], 0.6f, 0.0001f); + + free_allocated(&env); + return 0; +} + +static int test_bug_echo_reward_ignores_farther_bug_echo(void) { + Bat env = make_test_env(); + c_reset(&env); + env.bug_echo_reward_scale = 0.05f; + env.last_bug_echo_path = 20.0f; + env.step_cost = 0.0f; + env.progress_reward_scale = 0.0f; + env.chirp_cost = 0.0f; + env.bat_x = 10.0f; + env.bat_y = 10.0f; + env.bat_vx = 0.0f; + env.bat_vy = 0.0f; + env.bug_vx = 0.0f; + env.bug_vy = 0.0f; + env.bug_x = 50.0f; + env.bug_y = 50.0f; + memset(env.echo_events, 0, sizeof(env.echo_events)); + + env.echo_events[0] = (EchoEvent){ + .receive_tick = 1.0f, + .freq = 0.5f, + .intensity = 0.6f, + .ear = 0, + .source = BAT_ECHO_BUG, + .path = 25.0f, + .active = 1, + }; + + c_step(&env); + + ASSERT_FLOAT_NEAR(env.rewards[0], 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.last_bug_echo_path, 25.0f, 0.0001f); + + free_allocated(&env); + return 0; +} + +static int test_static_echo_does_not_get_bug_echo_reward(void) { + Bat env = make_test_env(); + c_reset(&env); + env.bug_echo_reward_scale = 0.05f; + env.step_cost = 0.0f; + env.progress_reward_scale = 0.0f; + env.chirp_cost = 0.0f; + env.bat_x = 10.0f; + env.bat_y = 10.0f; + env.bat_vx = 0.0f; + env.bat_vy = 0.0f; + env.bug_vx = 0.0f; + env.bug_vy = 0.0f; + env.bug_x = 50.0f; + env.bug_y = 50.0f; + memset(env.echo_events, 0, sizeof(env.echo_events)); + + env.echo_events[0] = (EchoEvent){ + .receive_tick = 1.0f, + .freq = 0.5f, + .intensity = 0.6f, + .ear = 0, + .source = BAT_ECHO_STATIC, + .path = 15.0f, + .active = 1, + }; + + c_step(&env); + + ASSERT_FLOAT_NEAR(env.rewards[0], 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.observations[BAT_LEFT_FREQ_OFFSET + 8], 0.6f, 0.0001f); + + free_allocated(&env); + return 0; +} + static int test_quadrant(float x, float y, float width, float height) { int east = x >= width * 0.5f; int south = y >= height * 0.5f; @@ -343,6 +883,8 @@ static int test_obstacles_are_small_enough_for_trainability(void) { int main(void) { if (test_chirp_metadata_and_observation_size()) return 1; if (test_left_right_echo_asymmetry()) return 1; + if (test_default_sound_speed_allows_one_tick_interaural_delay()) return 1; + if (test_ear_separation_scale_controls_arrival_gap()) return 1; if (test_doppler_sign_for_approaching_bug()) return 1; if (test_wall_collision_is_terminal_minus_one()) return 1; if (test_catch_bug_is_terminal_plus_one()) return 1; @@ -350,7 +892,20 @@ int main(void) { if (test_chirp_ring_physical_ordering()) return 1; if (test_chirp_color_maps_low_to_red_high_to_blue()) return 1; if (test_chirp_cooldown_accepts_only_after_delay()) return 1; + if (test_valid_chirp_gets_reward_without_legacy_cost()) return 1; + if (test_early_chirp_gets_penalty_and_emits_nothing()) return 1; if (test_reflection_arrives_at_two_way_travel_time()) return 1; + if (test_bins_only_observation_layout()) return 1; + if (test_no_chirp_produces_silent_frequency_bins()) return 1; + if (test_observations_stay_normalized_after_chirp()) return 1; + if (test_curriculum_starts_close_with_one_obstacle()) return 1; + if (test_curriculum_advances_after_catch()) return 1; + if (test_curriculum_waits_for_required_catches()) return 1; + if (test_chirp_echo_arrives_after_two_way_travel_not_immediately()) return 1; + if (test_frequency_bin_energy_sums_and_caps()) return 1; + if (test_bug_echo_reward_is_added_when_bug_echo_is_closer()) return 1; + if (test_bug_echo_reward_ignores_farther_bug_echo()) return 1; + if (test_static_echo_does_not_get_bug_echo_reward()) return 1; if (test_spawns_use_different_random_quadrants()) return 1; if (test_spawns_keep_minimum_separation_and_avoid_obstacles()) return 1; if (test_obstacles_move_substantially_across_resets()) return 1; From 83af6cab34d31b4aaf23f5f73b98fe6ee88fc494 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Mon, 8 Jun 2026 23:13:42 -0700 Subject: [PATCH 05/51] perf: bucket bat echoes by arrival tick --- BAT_SPEC.md | 7 ++ ocean/bat/bat.h | 106 ++++++++++++++++++------------ ocean/bat/tests/test_bat_core.c | 110 ++++++++++++++++---------------- 3 files changed, 126 insertions(+), 97 deletions(-) diff --git a/BAT_SPEC.md b/BAT_SPEC.md index 5117775592..1ed75f74d1 100644 --- a/BAT_SPEC.md +++ b/BAT_SPEC.md @@ -256,6 +256,13 @@ Echo timing: corresponding ear frequency bins. - Multiple reflectors can contribute to the same bin on the same tick. - Echoes beyond `max_echo_range` are ignored. +- Implementation should use a fixed future-tick accumulator, not a full active + event scan every env step. The current design buckets each echo by + `ceil(receive_tick)` into `BAT_ECHO_QUEUE_TICKS = 256`, sums by + `[ear][freq_bin]`, and processes only the current tick's bucket. +- The accumulator is an implementation detail only. It must preserve the + observation semantics: current-tick per-ear frequency intensities are summed + and capped to `[0.0, 1.0]`; no range/delay axis is exposed. Chirp metadata: diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index faec64e6cd..24930ff249 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -45,7 +45,7 @@ #define BAT_CHIRP_HISTORY 4 #define BAT_CHIRP_RINGS 5 #define BAT_MAX_CHIRP_SLICES 16 -#define BAT_MAX_ECHO_EVENTS 4096 +#define BAT_ECHO_QUEUE_TICKS 256 #define BAT_ECHO_STATIC 0 #define BAT_ECHO_BUG 1 @@ -67,15 +67,12 @@ typedef struct ChirpEvent { int active; } ChirpEvent; -typedef struct EchoEvent { - float receive_tick; - float freq; - float intensity; - float path; - int ear; - int source; - int active; -} EchoEvent; +typedef struct EchoBucket { + float energy[2][BAT_FREQ_BINS]; + float bug_energy; + float bug_path; + int tick; +} EchoBucket; typedef struct Log { float perf; @@ -164,8 +161,7 @@ typedef struct Bat { float last_chirp_duration; ChirpEvent chirps[BAT_CHIRP_HISTORY]; int chirp_head; - EchoEvent echo_events[BAT_MAX_ECHO_EVENTS]; - int echo_head; + EchoBucket echo_queue[BAT_ECHO_QUEUE_TICKS]; int chirps_emitted_episode; float chirp_duration_sum; float chirp_bandwidth_sum; @@ -528,31 +524,58 @@ static inline void add_log(Bat* env, float success, float collision, float timeo env->log.n += 1.0f; } -static inline void bat_add_freq_energy(Bat* env, int offset, float freq_norm, - float intensity) { +static inline int bat_freq_bin_index(Bat* env, float freq_norm) { int bins = env->freq_bins_per_ear; if (bins <= 0) bins = BAT_FREQ_BINS; if (bins > BAT_FREQ_BINS) bins = BAT_FREQ_BINS; int bin = (int)(bat_clampf(freq_norm, 0.0f, 1.0f) * bins); if (bin < 0) bin = 0; if (bin >= bins) bin = bins - 1; + return bin; +} + +static inline void bat_add_freq_energy(Bat* env, int offset, float freq_norm, + float intensity) { + int bin = bat_freq_bin_index(env, freq_norm); int idx = offset + bin; env->observations[idx] = bat_clampf(env->observations[idx] + intensity, 0.0f, 1.0f); } +static inline void bat_clear_echo_bucket(EchoBucket* bucket) { + memset(bucket, 0, sizeof(*bucket)); + bucket->bug_path = -1.0f; + bucket->tick = -1; +} + +static inline void bat_clear_echo_queue(Bat* env) { + for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { + bat_clear_echo_bucket(&env->echo_queue[i]); + } +} + static inline void bat_add_echo_event(Bat* env, int ear, float receive_tick, float freq, float intensity, float path, int source) { if (receive_tick <= env->tick) return; if (intensity <= 0.000001f) return; - EchoEvent* event = &env->echo_events[env->echo_head]; - event->receive_tick = receive_tick; - event->freq = bat_clampf(freq, 0.0f, 1.0f); - event->intensity = intensity; - event->path = path; - event->ear = ear; - event->source = source; - event->active = 1; - env->echo_head = (env->echo_head + 1) % BAT_MAX_ECHO_EVENTS; + int arrival_tick = (int)ceilf(receive_tick); + int delay = arrival_tick - env->tick; + if (delay <= 0 || delay >= BAT_ECHO_QUEUE_TICKS) return; + int slot = arrival_tick % BAT_ECHO_QUEUE_TICKS; + EchoBucket* bucket = &env->echo_queue[slot]; + if (bucket->tick != arrival_tick) { + bat_clear_echo_bucket(bucket); + bucket->tick = arrival_tick; + } + + int ear_idx = ear == 0 ? 0 : 1; + int bin = bat_freq_bin_index(env, freq); + bucket->energy[ear_idx][bin] += intensity; + if (source == BAT_ECHO_BUG) { + bucket->bug_energy += intensity; + if (bucket->bug_path < 0.0f || path < bucket->bug_path) { + bucket->bug_path = path; + } + } } static inline void bat_ear_positions(Bat* env, float* left_x, float* left_y, @@ -665,25 +688,25 @@ static inline void bat_schedule_chirp_echoes(Bat* env, ChirpEvent* chirp) { } static inline void bat_process_echo_events(Bat* env) { - float start_tick = env->tick - 1.0f; - float end_tick = env->tick; - for (int i = 0; i < BAT_MAX_ECHO_EVENTS; i++) { - EchoEvent* event = &env->echo_events[i]; - if (!event->active) continue; - if (event->receive_tick > start_tick && event->receive_tick <= end_tick) { - int offset = event->ear == 0 ? BAT_LEFT_FREQ_OFFSET : BAT_RIGHT_FREQ_OFFSET; - bat_add_freq_energy(env, offset, event->freq, event->intensity); - if (event->source == BAT_ECHO_BUG) { - env->tick_bug_echo_energy += event->intensity; - if (env->tick_bug_echo_path < 0.0f || event->path < env->tick_bug_echo_path) { - env->tick_bug_echo_path = event->path; - } - } - event->active = 0; - } else if (event->receive_tick <= start_tick) { - event->active = 0; + int slot = env->tick % BAT_ECHO_QUEUE_TICKS; + EchoBucket* bucket = &env->echo_queue[slot]; + if (bucket->tick != env->tick) return; + + for (int i = 0; i < BAT_FREQ_BINS; i++) { + int left_idx = BAT_LEFT_FREQ_OFFSET + i; + int right_idx = BAT_RIGHT_FREQ_OFFSET + i; + env->observations[left_idx] = bat_clampf( + env->observations[left_idx] + bucket->energy[0][i], 0.0f, 1.0f); + env->observations[right_idx] = bat_clampf( + env->observations[right_idx] + bucket->energy[1][i], 0.0f, 1.0f); + } + if (bucket->bug_energy > 0.0f) { + env->tick_bug_echo_energy += bucket->bug_energy; + if (env->tick_bug_echo_path < 0.0f || bucket->bug_path < env->tick_bug_echo_path) { + env->tick_bug_echo_path = bucket->bug_path; } } + bat_clear_echo_bucket(bucket); } void compute_observations(Bat* env) { @@ -743,8 +766,7 @@ static inline void bat_reset_episode(Bat* env) { env->last_chirp_tick = -env->chirp_cooldown_ticks; memset(env->chirps, 0, sizeof(env->chirps)); env->chirp_head = 0; - memset(env->echo_events, 0, sizeof(env->echo_events)); - env->echo_head = 0; + bat_clear_echo_queue(env); env->tick_bug_echo_energy = 0.0f; env->tick_bug_echo_path = -1.0f; env->last_bug_echo_path = -1.0f; diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index ad2aa077a3..2e07a547d6 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -82,8 +82,7 @@ static int test_left_right_echo_asymmetry(void) { env.bug_y = 10.0f; env.bug_vx = 0.0f; env.bug_vy = 0.0f; - memset(env.echo_events, 0, sizeof(env.echo_events)); - env.echo_head = 0; + bat_clear_echo_queue(&env); env.tick = 0; ChirpEvent chirp = { @@ -100,12 +99,11 @@ static int test_left_right_echo_asymmetry(void) { float left_energy = 0.0f; float right_energy = 0.0f; - for (int i = 0; i < BAT_MAX_ECHO_EVENTS; i++) { - if (!env.echo_events[i].active) continue; - if (env.echo_events[i].ear == 0) { - left_energy += env.echo_events[i].intensity; - } else { - right_energy += env.echo_events[i].intensity; + for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { + if (env.echo_queue[i].tick < 0) continue; + for (int bin = 0; bin < BAT_FREQ_BINS; bin++) { + left_energy += env.echo_queue[i].energy[0][bin]; + right_energy += env.echo_queue[i].energy[1][bin]; } } @@ -142,8 +140,7 @@ static int test_default_sound_speed_allows_one_tick_interaural_delay(void) { env.bat_vy = 0.0f; env.bat_heading = 0.0f; env.tick = 0; - memset(env.echo_events, 0, sizeof(env.echo_events)); - env.echo_head = 0; + bat_clear_echo_queue(&env); ChirpEvent chirp = { .x = env.bat_x, @@ -159,10 +156,16 @@ static int test_default_sound_speed_allows_one_tick_interaural_delay(void) { float left_tick = -1.0f; float right_tick = -1.0f; - for (int i = 0; i < BAT_MAX_ECHO_EVENTS; i++) { - if (!env.echo_events[i].active) continue; - if (env.echo_events[i].ear == 0) left_tick = env.echo_events[i].receive_tick; - if (env.echo_events[i].ear == 1) right_tick = env.echo_events[i].receive_tick; + for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { + if (env.echo_queue[i].tick < 0) continue; + float left_energy = 0.0f; + float right_energy = 0.0f; + for (int bin = 0; bin < BAT_FREQ_BINS; bin++) { + left_energy += env.echo_queue[i].energy[0][bin]; + right_energy += env.echo_queue[i].energy[1][bin]; + } + if (left_energy > 0.0f) left_tick = env.echo_queue[i].tick; + if (right_energy > 0.0f) right_tick = env.echo_queue[i].tick; } ASSERT_TRUE(left_tick > 0.0f); @@ -173,6 +176,25 @@ static int test_default_sound_speed_allows_one_tick_interaural_delay(void) { return 0; } +static int test_echo_scheduling_uses_tick_bucket_accumulator(void) { + Bat env = make_test_env(); + c_reset(&env); + + bat_clear_echo_queue(&env); + env.tick = 7; + bat_add_echo_event(&env, 0, 9.25f, 1.0f, 0.4f, 18.0f, BAT_ECHO_BUG); + bat_add_echo_event(&env, 0, 9.75f, 1.0f, 0.7f, 12.0f, BAT_ECHO_BUG); + + int slot = 10 % BAT_ECHO_QUEUE_TICKS; + ASSERT_TRUE(env.echo_queue[slot].tick == 10); + ASSERT_FLOAT_NEAR(env.echo_queue[slot].energy[0][BAT_FREQ_BINS - 1], 1.1f, 0.0001f); + ASSERT_FLOAT_NEAR(env.echo_queue[slot].bug_energy, 1.1f, 0.0001f); + ASSERT_FLOAT_NEAR(env.echo_queue[slot].bug_path, 12.0f, 0.0001f); + + free_allocated(&env); + return 0; +} + static float test_side_echo_receive_tick_gap(float ear_separation_scale) { Bat env = make_test_env(); c_reset(&env); @@ -184,8 +206,7 @@ static float test_side_echo_receive_tick_gap(float ear_separation_scale) { env.bat_vy = 0.0f; env.bat_heading = 0.0f; env.tick = 0; - memset(env.echo_events, 0, sizeof(env.echo_events)); - env.echo_head = 0; + bat_clear_echo_queue(&env); ChirpEvent chirp = { .x = env.bat_x, @@ -201,10 +222,16 @@ static float test_side_echo_receive_tick_gap(float ear_separation_scale) { float left_tick = -1.0f; float right_tick = -1.0f; - for (int i = 0; i < BAT_MAX_ECHO_EVENTS; i++) { - if (!env.echo_events[i].active) continue; - if (env.echo_events[i].ear == 0) left_tick = env.echo_events[i].receive_tick; - if (env.echo_events[i].ear == 1) right_tick = env.echo_events[i].receive_tick; + for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { + if (env.echo_queue[i].tick < 0) continue; + float left_energy = 0.0f; + float right_energy = 0.0f; + for (int bin = 0; bin < BAT_FREQ_BINS; bin++) { + left_energy += env.echo_queue[i].energy[0][bin]; + right_energy += env.echo_queue[i].energy[1][bin]; + } + if (left_energy > 0.0f) left_tick = env.echo_queue[i].tick; + if (right_energy > 0.0f) right_tick = env.echo_queue[i].tick; } ASSERT_TRUE(left_tick > 0.0f); @@ -239,8 +266,7 @@ static int test_doppler_sign_for_approaching_bug(void) { env.bug_vy = 0.0f; env.bat_heading = 0.0f; memset(env.observations, 0, BAT_OBS_SIZE * sizeof(float)); - memset(env.echo_events, 0, sizeof(env.echo_events)); - env.echo_head = 0; + bat_clear_echo_queue(&env); env.tick = 0; ChirpEvent chirp = { @@ -695,17 +721,8 @@ static int test_bug_echo_reward_is_added_when_bug_echo_is_closer(void) { env.bug_vy = 0.0f; env.bug_x = 50.0f; env.bug_y = 50.0f; - memset(env.echo_events, 0, sizeof(env.echo_events)); - - env.echo_events[0] = (EchoEvent){ - .receive_tick = 1.0f, - .freq = 0.5f, - .intensity = 0.6f, - .ear = 0, - .source = BAT_ECHO_BUG, - .path = 15.0f, - .active = 1, - }; + bat_clear_echo_queue(&env); + bat_add_echo_event(&env, 0, 1.0f, 0.5f, 0.6f, 15.0f, BAT_ECHO_BUG); c_step(&env); @@ -732,17 +749,8 @@ static int test_bug_echo_reward_ignores_farther_bug_echo(void) { env.bug_vy = 0.0f; env.bug_x = 50.0f; env.bug_y = 50.0f; - memset(env.echo_events, 0, sizeof(env.echo_events)); - - env.echo_events[0] = (EchoEvent){ - .receive_tick = 1.0f, - .freq = 0.5f, - .intensity = 0.6f, - .ear = 0, - .source = BAT_ECHO_BUG, - .path = 25.0f, - .active = 1, - }; + bat_clear_echo_queue(&env); + bat_add_echo_event(&env, 0, 1.0f, 0.5f, 0.6f, 25.0f, BAT_ECHO_BUG); c_step(&env); @@ -768,17 +776,8 @@ static int test_static_echo_does_not_get_bug_echo_reward(void) { env.bug_vy = 0.0f; env.bug_x = 50.0f; env.bug_y = 50.0f; - memset(env.echo_events, 0, sizeof(env.echo_events)); - - env.echo_events[0] = (EchoEvent){ - .receive_tick = 1.0f, - .freq = 0.5f, - .intensity = 0.6f, - .ear = 0, - .source = BAT_ECHO_STATIC, - .path = 15.0f, - .active = 1, - }; + bat_clear_echo_queue(&env); + bat_add_echo_event(&env, 0, 1.0f, 0.5f, 0.6f, 15.0f, BAT_ECHO_STATIC); c_step(&env); @@ -884,6 +883,7 @@ int main(void) { if (test_chirp_metadata_and_observation_size()) return 1; if (test_left_right_echo_asymmetry()) return 1; if (test_default_sound_speed_allows_one_tick_interaural_delay()) return 1; + if (test_echo_scheduling_uses_tick_bucket_accumulator()) return 1; if (test_ear_separation_scale_controls_arrival_gap()) return 1; if (test_doppler_sign_for_approaching_bug()) return 1; if (test_wall_collision_is_terminal_minus_one()) return 1; From fb794b3b895e4eac2a75f463fd7393040ac52c12 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Mon, 8 Jun 2026 23:35:03 -0700 Subject: [PATCH 06/51] fix: constrain bat to forward dynamics --- BAT_SPEC.md | 21 ++++++--- config/bat.ini | 20 ++++++++- ocean/bat/bat.c | 7 +-- ocean/bat/bat.h | 39 +++++------------ ocean/bat/binding.c | 2 +- ocean/bat/tests/test_bat_core.c | 75 +++++++++++++++++++++++++++++++++ 6 files changed, 124 insertions(+), 40 deletions(-) diff --git a/BAT_SPEC.md b/BAT_SPEC.md index 1ed75f74d1..be94eef23a 100644 --- a/BAT_SPEC.md +++ b/BAT_SPEC.md @@ -185,12 +185,10 @@ Use a small multi-discrete action space. Recommended v1 action heads: -- `move`: 5 values +- `move`: 3 values - `0`: no thrust - `1`: thrust forward - - `2`: brake/reverse - - `3`: strafe left - - `4`: strafe right + - `2`: brake / reduce forward speed - `turn`: 3 values - `0`: no turn - `1`: turn left @@ -204,13 +202,19 @@ Recommended v1 action heads: Initial action sizes: -- `ACT_SIZES {5, 3, 8, 8, 4, 2}` +- `ACT_SIZES {3, 3, 8, 8, 4, 2}` - `NUM_ATNS 6` Rationale: - Multi-discrete actions let the agent combine flight and active sensing. - Discrete chirp bins keep the policy simple and cheap. +- Bat movement is scalar forward speed plus heading. The velocity vector is + recomputed as `heading * speed` every tick. +- Brake clamps speed at zero. The bat cannot fly backward. +- Strafe/lateral velocity is intentionally unavailable. This avoids sideways + spiral policies and makes the visual behavior match the game fantasy better + than a full inertial top-down spacecraft model. - Continuous actions can be a later variant after the first training baseline is understood. @@ -283,6 +287,7 @@ Current implementation note: Self-motion: - `forward_speed_norm` and `turn_rate_norm` are proprioceptive signals. +- `forward_speed_norm` is normalized scalar speed and should stay in `[0, 1]`. - These do not reveal map coordinates or target location. - They reduce unnecessary burden on recurrent policy memory. @@ -485,6 +490,9 @@ Obstacle reflections: - Use `perf` as the sweep objective. It is `1.0` only when the bat catches the bug and `0.0` for collision or timeout. - Reward terms are training scaffolding and should remain sweepable. `progress_reward_scale` is true-distance shaping and should usually stay below `bug_echo_reward_scale`, which is based on closer received bug reflections. +- Forward-only movement dynamics should be swept with bounded ranges: + `env.bat_max_speed` in `[8.0, 18.0]`, `env.bat_accel` in `[15.0, 60.0]`, + and `env.bat_turn_rate` in `[pi/2, 2pi]`. - Acoustic scale terms should be swept before increasing model size. Current bounded acoustic sweep knobs are `env.sound_speed` in `[45.0, 120.0]` and `env.ear_separation_scale` in `[0.5, 2.0]`. - Train workers should use CUDA with `--train.gpus 1`. - Protein/sweep control does not need CUDA. Run sweeps with `--sweep.use-gpu ""` so the optimizer stays off CUDA and avoids CUDA IPC/resource-handle failures. @@ -493,6 +501,9 @@ Obstacle reflections: - The default Bat sweep does not sweep policy model size; it keeps `policy.hidden_size = 128` and `policy.num_layers = 4`. Current cost-sensitive sweep bounds cap training duration at `50_000_000`, rollout horizon at `128`, replay ratio at `1.25`, and `vec.num_buffers` at `8`. - Do not add broad model-size sweep ranges. If model size must be swept later, require explicit human approval and keep a hard ceiling of `policy.hidden_size <= 256` and `policy.num_layers <= 4` unless there is a measured SPS reason to widen it. - Keep PufferLib core stock for Bat. If sweep parsing conflicts with inherited default sweep keys, solve it through Bat config or command-line args, not core edits. +- Checkpoints trained before the forward-only action model are stale. After + changing action dimensions or movement semantics, run a normal `train bat` + before `eval bat --load-model-path latest`. - On this PufferLib branch, `sweep bat --sweep.max-runs 2` is not enough to exercise suggested hyperparameters: the first two launched experiments use the current config defaults, and `sweep_obj.suggest(...)` is only called for diff --git a/config/bat.ini b/config/bat.ini index 6621353b44..303decb411 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -81,7 +81,7 @@ max_runs = 8 gpus = 1 downsample = 5 use_gpu = True -sweep_only = total_timesteps,learning_rate,gamma,gae_lambda,ent_coef,horizon,replay_ratio,num_buffers,step_cost,sound_speed,ear_separation_scale,progress_reward_scale,valid_chirp_reward,early_chirp_penalty,bug_echo_reward_scale,collision_penalty,curriculum_start_bug_distance,curriculum_bug_distance_step,curriculum_obstacle_step,curriculum_successes_per_level +sweep_only = total_timesteps,learning_rate,gamma,gae_lambda,ent_coef,horizon,replay_ratio,num_buffers,bat_max_speed,bat_accel,bat_turn_rate,step_cost,sound_speed,ear_separation_scale,progress_reward_scale,valid_chirp_reward,early_chirp_penalty,bug_echo_reward_scale,collision_penalty,curriculum_start_bug_distance,curriculum_bug_distance_step,curriculum_obstacle_step,curriculum_successes_per_level match_enemy_model_path = {} match_num_games = {} match_enemy_hidden_size = {} @@ -135,6 +135,24 @@ min = 4 max = 8 scale = auto +[sweep.env.bat_max_speed] +distribution = uniform +min = 8.0 +max = 18.0 +scale = auto + +[sweep.env.bat_accel] +distribution = uniform +min = 15.0 +max = 60.0 +scale = auto + +[sweep.env.bat_turn_rate] +distribution = uniform +min = 1.5707963 +max = 6.2831853 +scale = auto + [sweep.env.step_cost] distribution = uniform min = 0.0002 diff --git a/ocean/bat/bat.c b/ocean/bat/bat.c index 7be2b41308..3ad19213c4 100644 --- a/ocean/bat/bat.c +++ b/ocean/bat/bat.c @@ -34,10 +34,8 @@ void demo() { memset(env.actions, 0, sizeof(float) * BAT_NUM_ACTIONS); if (IsKeyDown(KEY_W)) env.actions[0] = BAT_THRUST_FORWARD; if (IsKeyDown(KEY_S)) env.actions[0] = BAT_BRAKE; - if (IsKeyDown(KEY_A)) env.actions[0] = BAT_STRAFE_LEFT; - if (IsKeyDown(KEY_D)) env.actions[0] = BAT_STRAFE_RIGHT; - if (IsKeyDown(KEY_LEFT)) env.actions[1] = BAT_TURN_LEFT; - if (IsKeyDown(KEY_RIGHT)) env.actions[1] = BAT_TURN_RIGHT; + if (IsKeyDown(KEY_A) || IsKeyDown(KEY_LEFT)) env.actions[1] = BAT_TURN_LEFT; + if (IsKeyDown(KEY_D) || IsKeyDown(KEY_RIGHT)) env.actions[1] = BAT_TURN_RIGHT; env.actions[2] = 0; env.actions[3] = 7; env.actions[4] = 1; @@ -54,4 +52,3 @@ int main() { demo(); return 0; } - diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 24930ff249..90aa57cc27 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -12,7 +12,7 @@ #define BAT_OBS_SIZE 39 #define BAT_NUM_ACTIONS 6 -#define BAT_MOVE_ACTIONS 5 +#define BAT_MOVE_ACTIONS 3 #define BAT_TURN_ACTIONS 3 #define BAT_CHIRP_FREQ_BINS 8 #define BAT_CHIRP_DURATION_BINS 4 @@ -32,8 +32,6 @@ #define BAT_NOOP 0 #define BAT_THRUST_FORWARD 1 #define BAT_BRAKE 2 -#define BAT_STRAFE_LEFT 3 -#define BAT_STRAFE_RIGHT 4 #define BAT_TURN_NONE 0 #define BAT_TURN_LEFT 1 @@ -830,24 +828,8 @@ static inline void bat_update_motion(Bat* env, float dt) { int turn = bat_action_index(env->actions[1], BAT_TURN_ACTIONS); float fx = cosf(env->bat_heading); float fy = sinf(env->bat_heading); - float rx = -sinf(env->bat_heading); - float ry = cosf(env->bat_heading); - float ax = 0.0f; - float ay = 0.0f; - - if (move == BAT_THRUST_FORWARD) { - ax += fx * env->bat_accel; - ay += fy * env->bat_accel; - } else if (move == BAT_BRAKE) { - ax -= fx * env->bat_accel; - ay -= fy * env->bat_accel; - } else if (move == BAT_STRAFE_LEFT) { - ax -= rx * env->bat_accel; - ay -= ry * env->bat_accel; - } else if (move == BAT_STRAFE_RIGHT) { - ax += rx * env->bat_accel; - ay += ry * env->bat_accel; - } + float speed = env->bat_vx * fx + env->bat_vy * fy; + if (speed < 0.0f) speed = 0.0f; env->bat_turn_velocity = 0.0f; if (turn == BAT_TURN_LEFT) env->bat_turn_velocity = -env->bat_turn_rate; @@ -856,13 +838,14 @@ static inline void bat_update_motion(Bat* env, float dt) { if (env->bat_heading > BAT_PI) env->bat_heading -= 2.0f * BAT_PI; if (env->bat_heading < -BAT_PI) env->bat_heading += 2.0f * BAT_PI; - env->bat_vx += ax * dt; - env->bat_vy += ay * dt; - float speed = bat_len(env->bat_vx, env->bat_vy); - if (speed > env->bat_max_speed) { - env->bat_vx = env->bat_vx / speed * env->bat_max_speed; - env->bat_vy = env->bat_vy / speed * env->bat_max_speed; - } + if (move == BAT_THRUST_FORWARD) speed += env->bat_accel * dt; + if (move == BAT_BRAKE) speed -= env->bat_accel * dt; + speed = bat_clampf(speed, 0.0f, env->bat_max_speed); + + float heading_fx = cosf(env->bat_heading); + float heading_fy = sinf(env->bat_heading); + env->bat_vx = heading_fx * speed; + env->bat_vy = heading_fy * speed; env->bat_x += env->bat_vx * dt; env->bat_y += env->bat_vy * dt; } diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index 56e5d20b8c..5a69eaf747 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -1,7 +1,7 @@ #include "bat.h" #define OBS_SIZE 39 #define NUM_ATNS 6 -#define ACT_SIZES {5, 3, 8, 8, 4, 2} +#define ACT_SIZES {3, 3, 8, 8, 4, 2} #define OBS_TENSOR_T FloatTensor #define Env Bat diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index 2e07a547d6..aa99d13730 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -371,6 +371,78 @@ static int test_progress_reward_sign(void) { return 0; } +static int test_bat_cannot_accelerate_backward_from_brake(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.step_cost = 0.0f; + env.progress_reward_scale = 0.0f; + env.chirp_cost = 0.0f; + env.bat_x = 20.0f; + env.bat_y = 20.0f; + env.bug_x = 50.0f; + env.bug_y = 50.0f; + env.bat_heading = 0.0f; + env.bat_vx = 0.0f; + env.bat_vy = 0.0f; + env.actions[0] = BAT_BRAKE; + env.actions[1] = BAT_TURN_NONE; + env.actions[2] = 0.0f; + env.actions[3] = 7.0f; + env.actions[4] = 1.0f; + env.actions[5] = 0.0f; + + c_step(&env); + + float forward = env.bat_vx * cosf(env.bat_heading) + env.bat_vy * sinf(env.bat_heading); + ASSERT_TRUE(forward >= -0.0001f); + ASSERT_TRUE(env.observations[BAT_FORWARD_SPEED_OBS] >= -0.0001f); + + free_allocated(&env); + return 0; +} + +static int test_bat_velocity_is_locked_to_heading(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.step_cost = 0.0f; + env.progress_reward_scale = 0.0f; + env.chirp_cost = 0.0f; + env.bat_x = 20.0f; + env.bat_y = 20.0f; + env.bug_x = 50.0f; + env.bug_y = 50.0f; + env.bat_heading = 0.0f; + env.bat_vx = -env.bat_max_speed * 0.5f; + env.bat_vy = 3.0f; + env.actions[0] = BAT_NOOP; + env.actions[1] = BAT_TURN_NONE; + env.actions[2] = 0.0f; + env.actions[3] = 7.0f; + env.actions[4] = 1.0f; + env.actions[5] = 0.0f; + + c_step(&env); + + float forward = env.bat_vx * cosf(env.bat_heading) + env.bat_vy * sinf(env.bat_heading); + float lateral = env.bat_vx * -sinf(env.bat_heading) + env.bat_vy * cosf(env.bat_heading); + ASSERT_TRUE(forward >= -0.0001f); + ASSERT_FLOAT_NEAR(lateral, 0.0f, 0.0001f); + ASSERT_TRUE(env.observations[BAT_FORWARD_SPEED_OBS] >= -0.0001f); + + free_allocated(&env); + return 0; +} + +static int test_bat_speed_action_space_has_no_strafe(void) { + ASSERT_TRUE(BAT_MOVE_ACTIONS == 3); + ASSERT_TRUE(BAT_NOOP == 0); + ASSERT_TRUE(BAT_THRUST_FORWARD == 1); + ASSERT_TRUE(BAT_BRAKE == 2); + return 0; +} + static int test_chirp_ring_physical_ordering(void) { float duration = bat_chirp_duration_seconds(1.0f); float outer = bat_chirp_ring_radius(1.0f, 0.0f, duration, 100.0f); @@ -889,6 +961,9 @@ int main(void) { if (test_wall_collision_is_terminal_minus_one()) return 1; if (test_catch_bug_is_terminal_plus_one()) return 1; if (test_progress_reward_sign()) return 1; + if (test_bat_cannot_accelerate_backward_from_brake()) return 1; + if (test_bat_velocity_is_locked_to_heading()) return 1; + if (test_bat_speed_action_space_has_no_strafe()) return 1; if (test_chirp_ring_physical_ordering()) return 1; if (test_chirp_color_maps_low_to_red_high_to_blue()) return 1; if (test_chirp_cooldown_accepts_only_after_delay()) return 1; From 89613c97efa5040f51cde028a006c252eec98fa2 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Tue, 9 Jun 2026 08:59:54 -0700 Subject: [PATCH 07/51] tune bat defaults from bat1 sweep --- BAT_SPEC.md | 16 +++++++-- config/bat.ini | 94 +++++++++++++++++++++++++------------------------- 2 files changed, 60 insertions(+), 50 deletions(-) diff --git a/BAT_SPEC.md b/BAT_SPEC.md index be94eef23a..4ea26282b9 100644 --- a/BAT_SPEC.md +++ b/BAT_SPEC.md @@ -491,9 +491,19 @@ Obstacle reflections: - Use `perf` as the sweep objective. It is `1.0` only when the bat catches the bug and `0.0` for collision or timeout. - Reward terms are training scaffolding and should remain sweepable. `progress_reward_scale` is true-distance shaping and should usually stay below `bug_echo_reward_scale`, which is based on closer received bug reflections. - Forward-only movement dynamics should be swept with bounded ranges: - `env.bat_max_speed` in `[8.0, 18.0]`, `env.bat_accel` in `[15.0, 60.0]`, - and `env.bat_turn_rate` in `[pi/2, 2pi]`. -- Acoustic scale terms should be swept before increasing model size. Current bounded acoustic sweep knobs are `env.sound_speed` in `[45.0, 120.0]` and `env.ear_separation_scale` in `[0.5, 2.0]`. + `env.bat_max_speed` in `[8.0, 22.0]`, `env.bat_accel` in `[40.0, 90.0]`, + and `env.bat_turn_rate` in `[4.0, 3pi]`. +- Acoustic scale terms should be swept before increasing model size. Current bounded acoustic sweep knobs are `env.sound_speed` in `[80.0, 180.0]` and `env.ear_separation_scale` in `[1.0, 3.0]`. +- The June 9, 2026 `bat1` sweep strongly improved after the forward-only + dynamics change. Best observed run was `sage-cherry-92` with `perf ~= 0.953`, + `SPS ~= 2.06M`, collision `~= 0.031`, and timeout `~= 0.016`. The old default + had higher SPS but poor `perf`, so use `perf` first and SPS only as a + tie-breaker. +- That sweep pushed several bounds upward: `bat_accel`, `bat_turn_rate`, + `sound_speed`, `ear_separation_scale`, `progress_reward_scale`, + `replay_ratio`, and often `ent_coef`. It pushed `step_cost` and + `valid_chirp_reward` down. Defaults in `config/bat.ini` now track the best + high-perf region rather than the highest-SPS failed default. - Train workers should use CUDA with `--train.gpus 1`. - Protein/sweep control does not need CUDA. Run sweeps with `--sweep.use-gpu ""` so the optimizer stays off CUDA and avoids CUDA IPC/resource-handle failures. - Do not override training duration with ad hoc `--train.total-timesteps`. Put duration ranges in `config/bat.ini`. diff --git a/config/bat.ini b/config/bat.ini index 303decb411..435c96985d 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -22,46 +22,46 @@ width = 64 height = 64 num_obstacles = 3 bat_radius = 2.0 -ear_separation_scale = 0.75 +ear_separation_scale = 2.0 bug_radius = 1.5 -bat_max_speed = 12.0 -bat_accel = 30.0 -bat_turn_rate = 3.1415926 +bat_max_speed = 11.9 +bat_accel = 60.0 +bat_turn_rate = 6.2831853 bug_speed = 4.0 max_steps = 512 curriculum_enabled = 1 curriculum_start_obstacles = 1 curriculum_max_obstacles = 3 -curriculum_obstacle_step = 8 -curriculum_successes_per_level = 8 -curriculum_start_bug_distance = 12.0 +curriculum_obstacle_step = 18 +curriculum_successes_per_level = 21 +curriculum_start_bug_distance = 8.0 curriculum_max_bug_distance = 44.0 -curriculum_bug_distance_step = 1.5 +curriculum_bug_distance_step = 4.0 freq_bins_per_ear = 16 max_echo_range = 80.0 -sound_speed = 60.0 +sound_speed = 120.0 reflector_spacing = 8.0 max_chirp_age_ticks = 30 chirp_cooldown_ticks = 12 chirp_cost = 0.0 -valid_chirp_reward = 0.0005 -early_chirp_penalty = 0.001 -bug_echo_reward_scale = 0.05 -step_cost = 0.001 -progress_reward_scale = 0.02 +valid_chirp_reward = 0.00005 +early_chirp_penalty = 0.0043 +bug_echo_reward_scale = 0.13 +step_cost = 0.0002 +progress_reward_scale = 0.076 collision_penalty = 1.0 [train] total_timesteps = 50_000_000 -learning_rate = 0.015 -gamma = 0.995 -gae_lambda = 0.90 -replay_ratio = 1.0 +learning_rate = 0.027 +gamma = 0.994 +gae_lambda = 0.98 +replay_ratio = 1.25 clip_coef = 0.2 vf_coef = 2.0 vf_clip_coef = 0.2 max_grad_norm = 1.5 -ent_coef = 0.001 +ent_coef = 0.002 beta1 = 0.95 beta2 = 0.999 eps = 1e-12 @@ -95,8 +95,8 @@ scale = auto [sweep.train.learning_rate] distribution = log_normal -min = 0.003 -max = 0.03 +min = 0.01 +max = 0.05 scale = auto [sweep.train.gamma] @@ -107,14 +107,14 @@ scale = auto [sweep.train.gae_lambda] distribution = logit_normal -min = 0.80 +min = 0.92 max = 0.98 scale = auto [sweep.train.ent_coef] distribution = log_normal -min = 0.0001 -max = 0.01 +min = 0.0005 +max = 0.03 scale = auto [sweep.train.horizon] @@ -138,61 +138,61 @@ scale = auto [sweep.env.bat_max_speed] distribution = uniform min = 8.0 -max = 18.0 +max = 22.0 scale = auto [sweep.env.bat_accel] distribution = uniform -min = 15.0 -max = 60.0 +min = 40.0 +max = 90.0 scale = auto [sweep.env.bat_turn_rate] distribution = uniform -min = 1.5707963 -max = 6.2831853 +min = 4.0 +max = 9.4247780 scale = auto [sweep.env.step_cost] distribution = uniform -min = 0.0002 -max = 0.002 +min = 0.0001 +max = 0.0008 scale = auto [sweep.env.sound_speed] distribution = uniform -min = 45.0 -max = 120.0 +min = 80.0 +max = 180.0 scale = auto [sweep.env.ear_separation_scale] distribution = uniform -min = 0.5 -max = 2.0 +min = 1.0 +max = 3.0 scale = auto [sweep.env.progress_reward_scale] distribution = uniform -min = 0.0 -max = 0.08 +min = 0.04 +max = 0.12 scale = auto [sweep.env.valid_chirp_reward] distribution = uniform -min = 0.00005 -max = 0.002 +min = 0.0 +max = 0.0015 scale = auto [sweep.env.early_chirp_penalty] distribution = uniform -min = 0.0001 -max = 0.005 +min = 0.001 +max = 0.006 scale = auto [sweep.env.bug_echo_reward_scale] distribution = uniform min = 0.02 -max = 0.30 +max = 0.35 scale = auto [sweep.env.collision_penalty] @@ -209,18 +209,18 @@ scale = auto [sweep.env.curriculum_bug_distance_step] distribution = uniform -min = 0.5 -max = 4.0 +min = 2.0 +max = 5.0 scale = auto [sweep.env.curriculum_obstacle_step] distribution = int_uniform -min = 4 -max = 24 +min = 8 +max = 28 scale = auto [sweep.env.curriculum_successes_per_level] distribution = int_uniform min = 4 -max = 32 +max = 24 scale = auto From 2b64c834b3be45d5e8e16195175dde65cb5ce0f1 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Tue, 9 Jun 2026 09:07:19 -0700 Subject: [PATCH 08/51] add harder bat curriculum start level --- BAT_SPEC.md | 31 ++++++++++++++++++++ config/bat.ini | 11 +++++-- ocean/bat/bat.c | 1 + ocean/bat/bat.h | 5 ++++ ocean/bat/binding.c | 1 + ocean/bat/tests/test_bat_core.c | 52 +++++++++++++++++++++++++++++++++ 6 files changed, 99 insertions(+), 2 deletions(-) diff --git a/BAT_SPEC.md b/BAT_SPEC.md index 4ea26282b9..a93e5c8e1a 100644 --- a/BAT_SPEC.md +++ b/BAT_SPEC.md @@ -419,6 +419,7 @@ Config knobs: - `progress_reward_scale` - `collision_penalty` - `curriculum_enabled` +- `curriculum_initial_level` - `curriculum_stage` ## PufferLib Integration @@ -520,3 +521,33 @@ Obstacle reflections: later runs. Use at least `--sweep.max-runs 3` for one actual suggestion, or run explicit bounded comparison trains when testing a small acoustic grid. - Curriculum difficulty should not advance on a single lucky catch. `env.curriculum_successes_per_level` gates advancement so each env must catch the bug multiple times at the current level before increasing bug distance or obstacle count. + +## Near-Term Roadmap + +Keep these changes small and reversible. Use TDD for env behavior changes, +train/eval after each rung, and commit each known-good rung separately. + +1. Harder curriculum and eval difficulty. + - Plain eval starts from a fresh env at curriculum level 0, so the bug can + look too close even when training eventually reaches harder levels. + - Add a configurable initial curriculum level so eval can start at a + representative harder level without requiring manual in-session catches. + - Increase the maximum curriculum bug distance so longer runs can keep + getting harder after the current successful range. + - Preserve monotonic progress: once an env advances above the configured + initial level, resets must not drop it back down. + +2. Finite chirp budget. + - Try a default around `20` chirps per episode. + - Track remaining chirps as a normalized observation unless explicitly + testing a memory-only variant. + - When the budget is exhausted, terminate with a `-1` style failure penalty. + Prefer triggering this on an over-budget chirp attempt rather than + instantly after the last valid chirp, so the final echo can still matter. + +3. Later bug motion curriculum. + - Keep the current fixed-velocity bounce bug as the base rung. + - Add later stages for sine/cosine perturbations, circular/arc paths, and + simple maneuvers. + - Sweep bug speed and maneuver amplitude only after harder curriculum and + chirp budget are stable. diff --git a/config/bat.ini b/config/bat.ini index 435c96985d..306db379ea 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -30,12 +30,13 @@ bat_turn_rate = 6.2831853 bug_speed = 4.0 max_steps = 512 curriculum_enabled = 1 +curriculum_initial_level = 3 curriculum_start_obstacles = 1 curriculum_max_obstacles = 3 curriculum_obstacle_step = 18 curriculum_successes_per_level = 21 curriculum_start_bug_distance = 8.0 -curriculum_max_bug_distance = 44.0 +curriculum_max_bug_distance = 56.0 curriculum_bug_distance_step = 4.0 freq_bins_per_ear = 16 max_echo_range = 80.0 @@ -81,7 +82,7 @@ max_runs = 8 gpus = 1 downsample = 5 use_gpu = True -sweep_only = total_timesteps,learning_rate,gamma,gae_lambda,ent_coef,horizon,replay_ratio,num_buffers,bat_max_speed,bat_accel,bat_turn_rate,step_cost,sound_speed,ear_separation_scale,progress_reward_scale,valid_chirp_reward,early_chirp_penalty,bug_echo_reward_scale,collision_penalty,curriculum_start_bug_distance,curriculum_bug_distance_step,curriculum_obstacle_step,curriculum_successes_per_level +sweep_only = total_timesteps,learning_rate,gamma,gae_lambda,ent_coef,horizon,replay_ratio,num_buffers,bat_max_speed,bat_accel,bat_turn_rate,step_cost,sound_speed,ear_separation_scale,progress_reward_scale,valid_chirp_reward,early_chirp_penalty,bug_echo_reward_scale,collision_penalty,curriculum_initial_level,curriculum_start_bug_distance,curriculum_bug_distance_step,curriculum_obstacle_step,curriculum_successes_per_level match_enemy_model_path = {} match_num_games = {} match_enemy_hidden_size = {} @@ -201,6 +202,12 @@ min = 0.5 max = 2.0 scale = auto +[sweep.env.curriculum_initial_level] +distribution = int_uniform +min = 0 +max = 5 +scale = auto + [sweep.env.curriculum_start_bug_distance] distribution = uniform min = 8.0 diff --git a/ocean/bat/bat.c b/ocean/bat/bat.c index 3ad19213c4..18f6743531 100644 --- a/ocean/bat/bat.c +++ b/ocean/bat/bat.c @@ -14,6 +14,7 @@ void demo() { .bat_turn_rate = BAT_PI, .bug_speed = 4.0f, .max_steps = 512, + .curriculum_initial_level = 3, .range_bins_per_ear = BAT_RANGE_BINS, .doppler_bins_per_ear = BAT_DOPPLER_BINS, .max_echo_range = 80.0f, diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 90aa57cc27..06e01289f2 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -113,6 +113,7 @@ typedef struct Bat { int num_obstacles; int curriculum_enabled; int curriculum_level; + int curriculum_initial_level; int curriculum_start_obstacles; int curriculum_max_obstacles; int curriculum_obstacle_step; @@ -464,6 +465,7 @@ void init(Bat* env) { if (env->curriculum_start_obstacles > env->curriculum_max_obstacles) { env->curriculum_start_obstacles = env->curriculum_max_obstacles; } + if (env->curriculum_initial_level < 0) env->curriculum_initial_level = 0; if (env->curriculum_obstacle_step <= 0) env->curriculum_obstacle_step = 8; if (env->curriculum_successes_per_level <= 0) env->curriculum_successes_per_level = 1; if (env->curriculum_start_bug_distance <= 0.0f) env->curriculum_start_bug_distance = 14.0f; @@ -747,6 +749,9 @@ static inline void bat_reset_episode(Bat* env) { env->bat_vy = 0.0f; env->bat_turn_velocity = 0.0f; env->bat_heading = bat_randf(env) * 2.0f * BAT_PI - BAT_PI; + if (env->curriculum_enabled && env->curriculum_level < env->curriculum_initial_level) { + env->curriculum_level = env->curriculum_initial_level; + } bat_apply_curriculum(env); if (env->curriculum_enabled) { bat_sample_spawns_at_distance(env, bat_curriculum_bug_distance(env)); diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index 5a69eaf747..a577113ffc 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -22,6 +22,7 @@ void my_init(Env* env, Dict* kwargs) { env->bug_speed = dict_get(kwargs, "bug_speed")->value; env->max_steps = dict_get(kwargs, "max_steps")->value; env->curriculum_enabled = dict_get(kwargs, "curriculum_enabled")->value; + env->curriculum_initial_level = dict_get(kwargs, "curriculum_initial_level")->value; env->curriculum_start_obstacles = dict_get(kwargs, "curriculum_start_obstacles")->value; env->curriculum_max_obstacles = dict_get(kwargs, "curriculum_max_obstacles")->value; env->curriculum_obstacle_step = dict_get(kwargs, "curriculum_obstacle_step")->value; diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index aa99d13730..8768c29e4c 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -715,6 +715,56 @@ static int test_curriculum_waits_for_required_catches(void) { return 0; } +static int test_curriculum_initial_level_sets_first_reset_difficulty(void) { + Bat env = make_test_env(); + env.num_obstacles = 3; + env.curriculum_enabled = 1; + env.curriculum_initial_level = 4; + env.curriculum_start_obstacles = 1; + env.curriculum_max_obstacles = 3; + env.curriculum_obstacle_step = 2; + env.curriculum_start_bug_distance = 8.0f; + env.curriculum_max_bug_distance = 56.0f; + env.curriculum_bug_distance_step = 4.0f; + c_reset(&env); + + ASSERT_TRUE(env.curriculum_level == 4); + ASSERT_TRUE(env.num_obstacles == 3); + float dist = bat_dist(env.bat_x, env.bat_y, env.bug_x, env.bug_y); + ASSERT_TRUE(dist >= 20.0f); + ASSERT_TRUE(dist <= 28.0f); + + free_allocated(&env); + return 0; +} + +static int test_curriculum_initial_level_does_not_reset_progress(void) { + Bat env = make_test_env(); + env.num_obstacles = 3; + env.curriculum_enabled = 1; + env.curriculum_initial_level = 2; + env.curriculum_start_obstacles = 1; + env.curriculum_max_obstacles = 3; + env.curriculum_obstacle_step = 1; + env.curriculum_successes_per_level = 1; + env.curriculum_start_bug_distance = 8.0f; + env.curriculum_max_bug_distance = 56.0f; + env.curriculum_bug_distance_step = 4.0f; + c_reset(&env); + env.bat_x = 20.0f; + env.bat_y = 20.0f; + env.bug_x = 20.5f; + env.bug_y = 20.0f; + + c_step(&env); + + ASSERT_TRUE(env.curriculum_level == 3); + ASSERT_TRUE(env.curriculum_successes_at_level == 0); + + free_allocated(&env); + return 0; +} + static int test_chirp_echo_arrives_after_two_way_travel_not_immediately(void) { Bat env = make_test_env(); env.num_obstacles = 0; @@ -976,6 +1026,8 @@ int main(void) { if (test_curriculum_starts_close_with_one_obstacle()) return 1; if (test_curriculum_advances_after_catch()) return 1; if (test_curriculum_waits_for_required_catches()) return 1; + if (test_curriculum_initial_level_sets_first_reset_difficulty()) return 1; + if (test_curriculum_initial_level_does_not_reset_progress()) return 1; if (test_chirp_echo_arrives_after_two_way_travel_not_immediately()) return 1; if (test_frequency_bin_energy_sums_and_caps()) return 1; if (test_bug_echo_reward_is_added_when_bug_echo_is_closer()) return 1; From 4dead3475d3045622f890029985e3510610fca3f Mon Sep 17 00:00:00 2001 From: Kinvert Date: Tue, 9 Jun 2026 09:12:02 -0700 Subject: [PATCH 09/51] test bat bug wall bounce --- ocean/bat/tests/test_bat_core.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index 8768c29e4c..085b83e441 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -765,6 +765,32 @@ static int test_curriculum_initial_level_does_not_reset_progress(void) { return 0; } +static int test_bug_bounces_off_arena_walls(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.bug_x = env.width - env.bug_radius + 0.1f; + env.bug_y = env.height * 0.5f; + env.bug_vx = 3.0f; + env.bug_vy = 1.0f; + bat_update_bug(&env, 0.0f); + ASSERT_TRUE(env.bug_x == env.width - env.bug_radius); + ASSERT_TRUE(env.bug_vx < 0.0f); + ASSERT_TRUE(env.bug_vy == 1.0f); + + env.bug_x = env.width * 0.5f; + env.bug_y = env.bug_radius - 0.1f; + env.bug_vx = 2.0f; + env.bug_vy = -4.0f; + bat_update_bug(&env, 0.0f); + ASSERT_TRUE(env.bug_y == env.bug_radius); + ASSERT_TRUE(env.bug_vx == 2.0f); + ASSERT_TRUE(env.bug_vy > 0.0f); + + free_allocated(&env); + return 0; +} + static int test_chirp_echo_arrives_after_two_way_travel_not_immediately(void) { Bat env = make_test_env(); env.num_obstacles = 0; @@ -1028,6 +1054,7 @@ int main(void) { if (test_curriculum_waits_for_required_catches()) return 1; if (test_curriculum_initial_level_sets_first_reset_difficulty()) return 1; if (test_curriculum_initial_level_does_not_reset_progress()) return 1; + if (test_bug_bounces_off_arena_walls()) return 1; if (test_chirp_echo_arrives_after_two_way_travel_not_immediately()) return 1; if (test_frequency_bin_energy_sums_and_caps()) return 1; if (test_bug_echo_reward_is_added_when_bug_echo_is_closer()) return 1; From 5f7ac48369040a39d71dce4875cb7776bf3b312d Mon Sep 17 00:00:00 2001 From: Kinvert Date: Tue, 9 Jun 2026 10:32:23 -0700 Subject: [PATCH 10/51] add bat chirp budget metrics --- BAT_SPEC.md | 72 +++++++-- config/bat.ini | 30 +++- ocean/bat/bat.c | 3 + ocean/bat/bat.h | 185 ++++++++++++++++++++++- ocean/bat/binding.c | 22 ++- ocean/bat/tests/test_bat_core.c | 251 +++++++++++++++++++++++++++++++- 6 files changed, 542 insertions(+), 21 deletions(-) diff --git a/BAT_SPEC.md b/BAT_SPEC.md index a93e5c8e1a..475034e031 100644 --- a/BAT_SPEC.md +++ b/BAT_SPEC.md @@ -232,12 +232,13 @@ Observation layout: 5. `last_chirp_start_freq_norm` 6. `last_chirp_end_freq_norm` 7. `last_chirp_duration_norm` -8. `forward_speed_norm` -9. `turn_rate_norm` +8. `chirps_used_norm = chirps_used / chirp_budget` +9. `forward_speed_norm` +10. `turn_rate_norm` Initial observation size: -- `OBS_SIZE = 39` +- `OBS_SIZE = 40` Echo bins: @@ -311,6 +312,11 @@ Default reward model: - `-1.0` for hitting walls or obstacles, terminal. - Tiny chirp cost so constant chirping is not fully free without causing chirp collapse. +- Solve-time chirp efficiency reward: + - `chirp_efficiency = 0.5 + 0.5 * (1.0 - chirps_used / chirp_budget)`, + - a catch after spending the full budget gets efficiency `0.5`, + - a catch with very few chirps approaches efficiency `1.0`, + - `chirp_efficiency_reward` scales this bonus and should be sweepable. - Sound-derived bug echo progress reward: - when a bug echo returns with a shorter acoustic path than the previous bug echo, add a small shaped reward, @@ -329,12 +335,14 @@ Progress reward: - `reward += progress_reward_scale * (prev_bug_dist - bug_dist)` - `reward -= step_cost` - `reward -= chirp_cost` when a chirp is emitted + - `reward += chirp_efficiency_reward * chirp_efficiency` on catch - `reward += bug_echo_reward_scale * echo_path_reduction / max_echo_range` when a returning bug echo indicates the bug is closer than the previous bug echo - Default starting values: - `progress_reward_scale = 0.05` - `step_cost = 0.001` + - `chirp_efficiency_reward = 1.0` - `chirp_cost = 0.00005` - `bug_echo_reward_scale = 0.02` - `chirp_cost = 0.0005` @@ -360,6 +368,21 @@ Reset: Logged metrics: - `perf` + - composite sweep objective: + `base_perf * curriculum_difficulty * budget_difficulty * chirp_efficiency` +- `base_perf` + - pure catch rate: `1.0` for catching the bug, `0.0` otherwise +- `curriculum_level` +- `curriculum_difficulty` + - normalized actual episode difficulty from start bug distance and obstacle + count +- `curriculum_perf` + - `base_perf * curriculum_difficulty`; useful diagnostic for level progress + without chirp-budget weighting +- `budget_difficulty` + - sweep-pressure multiplier derived from selected `max_chirps_per_episode`; + the empirical edge from the June 9, 2026 budget grid is `8` chirps, while + `20` chirps is easy - `score` - `episode_return` - `episode_length` @@ -370,6 +393,21 @@ Logged metrics: - `bug_distance_final` - `bug_distance_delta` - `chirps_emitted` +- `chirp_budget` +- `chirps_used_ratio` +- `chirps_remaining_ratio` +- `chirp_efficiency` + - `0.5` if the full budget was spent, approaching `1.0` when few chirps were + used +- `far_chirp_fraction` +- `near_chirp_fraction` +- `far_chirp_rate` +- `near_chirp_rate` +- `chirp_tempo_ratio` + - `near_chirp_rate / far_chirp_rate`, clamped to `[0, 10]`; values above + `1.0` indicate chirps are denser near the bug than far away +- `first_chirp_tick_norm` +- `mean_chirp_tick_norm` - `mean_chirp_duration` - `mean_chirp_bandwidth` - `mean_echo_energy_left` @@ -412,9 +450,13 @@ Config knobs: - `max_echo_range` - `sound_speed` - `reflector_spacing` +- `max_chirps_per_episode` +- `min_chirps_per_episode` +- `chirp_budget_decay_levels` - `chirp_freq_bins` - `chirp_duration_bins` - `chirp_cost` +- `chirp_efficiency_reward` - `step_cost` - `progress_reward_scale` - `collision_penalty` @@ -489,7 +531,10 @@ Obstacle reflections: ## Training and Sweep Operations -- Use `perf` as the sweep objective. It is `1.0` only when the bat catches the bug and `0.0` for collision or timeout. +- Keep `base_perf` as pure catch rate. Use composite `perf` as the sweep + objective. It rewards catching harder curriculum levels with fewer chirps and + under stricter configured chirp budgets without changing in-episode reward + shaping. - Reward terms are training scaffolding and should remain sweepable. `progress_reward_scale` is true-distance shaping and should usually stay below `bug_echo_reward_scale`, which is based on closer received bug reflections. - Forward-only movement dynamics should be swept with bounded ranges: `env.bat_max_speed` in `[8.0, 22.0]`, `env.bat_accel` in `[40.0, 90.0]`, @@ -538,12 +583,19 @@ train/eval after each rung, and commit each known-good rung separately. initial level, resets must not drop it back down. 2. Finite chirp budget. - - Try a default around `20` chirps per episode. - - Track remaining chirps as a normalized observation unless explicitly - testing a memory-only variant. - - When the budget is exhausted, terminate with a `-1` style failure penalty. - Prefer triggering this on an over-budget chirp attempt rather than - instantly after the last valid chirp, so the final echo can still matter. + - Default to `20` chirps per episode at low curriculum levels. + - Reduce the budget as curriculum level increases, with a floor so harder + levels require smarter chirp timing without creating an impossible cliff. + - Track `chirps_used / chirp_budget` as a normalized `0..1` observation. + - When the budget is exhausted, terminate with a `-1` style failure penalty + if the policy attempts another chirp. Do not terminate immediately after + the last valid chirp, so the final echo can still matter. + - Log chirp budget, used ratio, remaining ratio, and efficiency to W&B so + sweeps can distinguish successful policies that waste every chirp from + successful policies that catch the bug with useful chirp timing. + - Add a sweepable solve-time efficiency reward where spending the full + budget scores `0.5` on the efficiency component and using very few chirps + approaches `1.0`. 3. Later bug motion curriculum. - Keep the current fixed-velocity bounce bug as the base rung. diff --git a/config/bat.ini b/config/bat.ini index 306db379ea..2506d58ead 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -44,7 +44,11 @@ sound_speed = 120.0 reflector_spacing = 8.0 max_chirp_age_ticks = 30 chirp_cooldown_ticks = 12 +max_chirps_per_episode = 10 +min_chirps_per_episode = 5 +chirp_budget_decay_levels = 4 chirp_cost = 0.0 +chirp_efficiency_reward = 1.0 valid_chirp_reward = 0.00005 early_chirp_penalty = 0.0043 bug_echo_reward_scale = 0.13 @@ -82,7 +86,7 @@ max_runs = 8 gpus = 1 downsample = 5 use_gpu = True -sweep_only = total_timesteps,learning_rate,gamma,gae_lambda,ent_coef,horizon,replay_ratio,num_buffers,bat_max_speed,bat_accel,bat_turn_rate,step_cost,sound_speed,ear_separation_scale,progress_reward_scale,valid_chirp_reward,early_chirp_penalty,bug_echo_reward_scale,collision_penalty,curriculum_initial_level,curriculum_start_bug_distance,curriculum_bug_distance_step,curriculum_obstacle_step,curriculum_successes_per_level +sweep_only = total_timesteps,learning_rate,gamma,gae_lambda,ent_coef,horizon,replay_ratio,num_buffers,bat_max_speed,bat_accel,bat_turn_rate,step_cost,sound_speed,ear_separation_scale,progress_reward_scale,chirp_efficiency_reward,valid_chirp_reward,early_chirp_penalty,bug_echo_reward_scale,collision_penalty,max_chirps_per_episode,min_chirps_per_episode,chirp_budget_decay_levels,curriculum_initial_level,curriculum_start_bug_distance,curriculum_bug_distance_step,curriculum_obstacle_step,curriculum_successes_per_level match_enemy_model_path = {} match_num_games = {} match_enemy_hidden_size = {} @@ -184,6 +188,12 @@ min = 0.0 max = 0.0015 scale = auto +[sweep.env.chirp_efficiency_reward] +distribution = uniform +min = 0.0 +max = 2.0 +scale = auto + [sweep.env.early_chirp_penalty] distribution = uniform min = 0.001 @@ -202,6 +212,24 @@ min = 0.5 max = 2.0 scale = auto +[sweep.env.max_chirps_per_episode] +distribution = int_uniform +min = 6 +max = 20 +scale = auto + +[sweep.env.min_chirps_per_episode] +distribution = int_uniform +min = 4 +max = 8 +scale = auto + +[sweep.env.chirp_budget_decay_levels] +distribution = int_uniform +min = 3 +max = 8 +scale = auto + [sweep.env.curriculum_initial_level] distribution = int_uniform min = 0 diff --git a/ocean/bat/bat.c b/ocean/bat/bat.c index 18f6743531..ccd0c865b3 100644 --- a/ocean/bat/bat.c +++ b/ocean/bat/bat.c @@ -20,6 +20,9 @@ void demo() { .max_echo_range = 80.0f, .sound_speed = 100.0f, .reflector_spacing = 8.0f, + .max_chirps_per_episode = 20, + .min_chirps_per_episode = 10, + .chirp_budget_decay_levels = 4, .chirp_cost = 0.0005f, .step_cost = 0.001f, .progress_reward_scale = 0.05f, diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 06e01289f2..f30c5dabbb 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -10,7 +10,7 @@ #include "raylib.h" #endif -#define BAT_OBS_SIZE 39 +#define BAT_OBS_SIZE 40 #define BAT_NUM_ACTIONS 6 #define BAT_MOVE_ACTIONS 3 #define BAT_TURN_ACTIONS 3 @@ -26,8 +26,9 @@ #define BAT_CHIRP_START_OBS 34 #define BAT_CHIRP_END_OBS 35 #define BAT_CHIRP_DURATION_OBS 36 -#define BAT_FORWARD_SPEED_OBS 37 -#define BAT_TURN_RATE_OBS 38 +#define BAT_CHIRPS_USED_OBS 37 +#define BAT_FORWARD_SPEED_OBS 38 +#define BAT_TURN_RATE_OBS 39 #define BAT_NOOP 0 #define BAT_THRUST_FORWARD 1 @@ -44,6 +45,8 @@ #define BAT_CHIRP_RINGS 5 #define BAT_MAX_CHIRP_SLICES 16 #define BAT_ECHO_QUEUE_TICKS 256 +#define BAT_BUDGET_EASY_CHIRPS 20.0f +#define BAT_BUDGET_EDGE_CHIRPS 8.0f #define BAT_ECHO_STATIC 0 #define BAT_ECHO_BUG 1 @@ -74,16 +77,32 @@ typedef struct EchoBucket { typedef struct Log { float perf; + float base_perf; float score; float episode_return; float episode_length; float success; float collision; float timeout; + float curriculum_level; + float curriculum_difficulty; + float curriculum_perf; + float budget_difficulty; float bug_distance_start; float bug_distance_final; float bug_distance_delta; float chirps_emitted; + float chirp_budget; + float chirps_used_ratio; + float chirps_remaining_ratio; + float chirp_efficiency; + float far_chirp_fraction; + float near_chirp_fraction; + float far_chirp_rate; + float near_chirp_rate; + float chirp_tempo_ratio; + float first_chirp_tick_norm; + float mean_chirp_tick_norm; float mean_chirp_duration; float mean_chirp_bandwidth; float mean_echo_energy_left; @@ -153,6 +172,10 @@ typedef struct Bat { float reflector_spacing; int max_chirp_age_ticks; int chirp_cooldown_ticks; + int max_chirps_per_episode; + int min_chirps_per_episode; + int chirp_budget_decay_levels; + int chirp_budget; int chirp_age_ticks; int last_chirp_tick; float last_chirp_start_freq; @@ -164,10 +187,19 @@ typedef struct Bat { int chirps_emitted_episode; float chirp_duration_sum; float chirp_bandwidth_sum; + float chirps_far; + float chirps_mid; + float chirps_near; + float ticks_far; + float ticks_mid; + float ticks_near; + float first_chirp_tick; + float chirp_tick_sum; float echo_energy_left_sum; float echo_energy_right_sum; float chirp_cost; + float chirp_efficiency_reward; float valid_chirp_reward; float early_chirp_penalty; float step_cost; @@ -339,6 +371,80 @@ static inline float bat_curriculum_bug_distance(Bat* env) { env->curriculum_max_bug_distance); } +static inline int bat_curriculum_chirp_budget(Bat* env) { + int decay = env->chirp_budget_decay_levels <= 0 ? 1 : env->chirp_budget_decay_levels; + int level = env->curriculum_enabled ? env->curriculum_level : 0; + int budget = env->max_chirps_per_episode - level / decay; + if (budget < env->min_chirps_per_episode) budget = env->min_chirps_per_episode; + if (budget > env->max_chirps_per_episode) budget = env->max_chirps_per_episode; + if (budget < 1) budget = 1; + return budget; +} + +static inline float bat_chirps_used_ratio(Bat* env) { + int budget = env->chirp_budget > 0 ? env->chirp_budget : env->max_chirps_per_episode; + if (budget <= 0) budget = 1; + return bat_clampf(env->chirps_emitted_episode / (float)budget, 0.0f, 1.0f); +} + +static inline float bat_chirp_efficiency(Bat* env) { + return 0.5f + 0.5f * (1.0f - bat_chirps_used_ratio(env)); +} + +static inline float bat_norm_range(float value, float lo, float hi) { + float span = hi - lo; + if (span <= 0.000001f) return 0.0f; + return bat_clampf((value - lo) / span, 0.0f, 1.0f); +} + +static inline float bat_curriculum_difficulty(Bat* env) { + float distance = bat_norm_range(env->start_bug_dist, + env->curriculum_start_bug_distance, env->curriculum_max_bug_distance); + float obstacles = bat_norm_range((float)env->num_obstacles, + (float)env->curriculum_start_obstacles, (float)env->curriculum_max_obstacles); + return (distance + obstacles) / 2.0f; +} + +static inline float bat_budget_difficulty(Bat* env) { + float pressure = (BAT_BUDGET_EASY_CHIRPS - (float)env->max_chirps_per_episode) + / (BAT_BUDGET_EASY_CHIRPS - BAT_BUDGET_EDGE_CHIRPS); + return 0.5f + 0.5f * bat_clampf(pressure, 0.0f, 1.0f); +} + +static inline float bat_success_reward(Bat* env) { + return 1.0f + env->chirp_efficiency_reward * bat_chirp_efficiency(env); +} + +static inline float bat_current_distance_ratio(Bat* env) { + float dist = bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y); + return dist / fmaxf(1.0f, env->start_bug_dist); +} + +static inline void bat_accumulate_distance_region(float ratio, float amount, + float* far, float* mid, float* near) { + if (ratio > 0.66f) { + *far += amount; + } else if (ratio < 0.33f) { + *near += amount; + } else { + *mid += amount; + } +} + +static inline void bat_record_distance_tick(Bat* env) { + bat_accumulate_distance_region(bat_current_distance_ratio(env), 1.0f, + &env->ticks_far, &env->ticks_mid, &env->ticks_near); +} + +static inline void bat_record_chirp_timing(Bat* env) { + if (env->first_chirp_tick < 0.0f) { + env->first_chirp_tick = (float)env->tick; + } + env->chirp_tick_sum += (float)env->tick; + bat_accumulate_distance_region(bat_current_distance_ratio(env), 1.0f, + &env->chirps_far, &env->chirps_mid, &env->chirps_near); +} + static inline void bat_sample_spawns_at_distance(Bat* env, float target_distance) { float margin = fmaxf(6.0f, fmaxf(env->bat_radius, env->bug_radius) + 3.0f); target_distance = fmaxf(0.0f, target_distance); @@ -448,10 +554,17 @@ void init(Bat* env) { if (env->reflector_spacing <= 0.0f) env->reflector_spacing = 8.0f; if (env->max_chirp_age_ticks <= 0) env->max_chirp_age_ticks = 30; if (env->chirp_cooldown_ticks <= 0) env->chirp_cooldown_ticks = 12; + if (env->max_chirps_per_episode <= 0) env->max_chirps_per_episode = 20; + if (env->min_chirps_per_episode <= 0) env->min_chirps_per_episode = 10; + if (env->min_chirps_per_episode > env->max_chirps_per_episode) { + env->min_chirps_per_episode = env->max_chirps_per_episode; + } + if (env->chirp_budget_decay_levels <= 0) env->chirp_budget_decay_levels = 4; if (env->step_cost <= 0.0f) env->step_cost = 0.001f; if (env->progress_reward_scale <= 0.0f) env->progress_reward_scale = 0.05f; if (env->collision_penalty <= 0.0f) env->collision_penalty = 1.0f; if (env->chirp_cost < 0.0f) env->chirp_cost = 0.0f; + if (env->chirp_efficiency_reward < 0.0f) env->chirp_efficiency_reward = 0.0f; if (env->valid_chirp_reward <= 0.0f) env->valid_chirp_reward = 0.0005f; if (env->early_chirp_penalty <= 0.0f) env->early_chirp_penalty = 0.001f; if (env->bug_echo_reward_scale <= 0.0f) env->bug_echo_reward_scale = 0.0f; @@ -504,17 +617,49 @@ void free_allocated(Bat* env) { static inline void add_log(Bat* env, float success, float collision, float timeout) { float final_dist = bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y); - env->log.perf += success; + float curriculum_difficulty = bat_curriculum_difficulty(env); + float budget_difficulty = bat_budget_difficulty(env); + float chirp_efficiency = bat_chirp_efficiency(env); + env->log.perf += success * curriculum_difficulty * budget_difficulty * chirp_efficiency; + env->log.base_perf += success; env->log.score += env->episode_return; env->log.episode_return += env->episode_return; env->log.episode_length += env->tick; env->log.success += success; env->log.collision += collision; env->log.timeout += timeout; + env->log.curriculum_level += env->curriculum_level; + env->log.curriculum_difficulty += curriculum_difficulty; + env->log.curriculum_perf += success * curriculum_difficulty; + env->log.budget_difficulty += budget_difficulty; env->log.bug_distance_start += env->start_bug_dist; env->log.bug_distance_final += final_dist; env->log.bug_distance_delta += env->start_bug_dist - final_dist; env->log.chirps_emitted += env->chirps_emitted_episode; + env->log.chirp_budget += env->chirp_budget; + env->log.chirps_used_ratio += bat_chirps_used_ratio(env); + env->log.chirps_remaining_ratio += 1.0f - bat_chirps_used_ratio(env); + env->log.chirp_efficiency += chirp_efficiency; + float chirps = fmaxf(1.0f, (float)env->chirps_emitted_episode); + env->log.far_chirp_fraction += env->chirps_far / chirps; + env->log.near_chirp_fraction += env->chirps_near / chirps; + float far_rate = env->chirps_far / fmaxf(1.0f, env->ticks_far); + float near_rate = env->chirps_near / fmaxf(1.0f, env->ticks_near); + env->log.far_chirp_rate += far_rate; + env->log.near_chirp_rate += near_rate; + float tempo_ratio = 0.0f; + if (far_rate > 0.000001f) { + tempo_ratio = near_rate / far_rate; + } else if (near_rate > 0.000001f) { + tempo_ratio = 10.0f; + } + env->log.chirp_tempo_ratio += bat_clampf(tempo_ratio, 0.0f, 10.0f); + env->log.first_chirp_tick_norm += env->first_chirp_tick >= 0.0f + ? bat_clampf(env->first_chirp_tick / fmaxf(1.0f, (float)env->max_steps), 0.0f, 1.0f) + : 1.0f; + env->log.mean_chirp_tick_norm += env->chirps_emitted_episode > 0 + ? bat_clampf((env->chirp_tick_sum / chirps) / fmaxf(1.0f, (float)env->max_steps), 0.0f, 1.0f) + : 1.0f; if (env->chirps_emitted_episode > 0) { env->log.mean_chirp_duration += env->chirp_duration_sum / env->chirps_emitted_episode; env->log.mean_chirp_bandwidth += env->chirp_bandwidth_sum / env->chirps_emitted_episode; @@ -738,6 +883,7 @@ void compute_observations(Bat* env) { env->observations[BAT_CHIRP_START_OBS] = env->last_chirp_start_freq; env->observations[BAT_CHIRP_END_OBS] = env->last_chirp_end_freq; env->observations[BAT_CHIRP_DURATION_OBS] = env->last_chirp_duration; + env->observations[BAT_CHIRPS_USED_OBS] = bat_chirps_used_ratio(env); float fwd_speed = env->bat_vx * cosf(env->bat_heading) + env->bat_vy * sinf(env->bat_heading); env->observations[BAT_FORWARD_SPEED_OBS] = bat_clampf(fwd_speed / env->bat_max_speed, -1.0f, 1.0f); env->observations[BAT_TURN_RATE_OBS] = bat_clampf(env->bat_turn_velocity / env->bat_turn_rate, -1.0f, 1.0f); @@ -770,12 +916,21 @@ static inline void bat_reset_episode(Bat* env) { memset(env->chirps, 0, sizeof(env->chirps)); env->chirp_head = 0; bat_clear_echo_queue(env); + env->chirp_budget = bat_curriculum_chirp_budget(env); env->tick_bug_echo_energy = 0.0f; env->tick_bug_echo_path = -1.0f; env->last_bug_echo_path = -1.0f; env->chirps_emitted_episode = 0; env->chirp_duration_sum = 0.0f; env->chirp_bandwidth_sum = 0.0f; + env->chirps_far = 0.0f; + env->chirps_mid = 0.0f; + env->chirps_near = 0.0f; + env->ticks_far = 0.0f; + env->ticks_mid = 0.0f; + env->ticks_near = 0.0f; + env->first_chirp_tick = -1.0f; + env->chirp_tick_sum = 0.0f; env->echo_energy_left_sum = 0.0f; env->echo_energy_right_sum = 0.0f; env->episode_return = 0.0f; @@ -864,11 +1019,16 @@ static inline bool bat_try_emit_chirp(Bat* env) { return false; } + if (env->chirps_emitted_episode >= env->chirp_budget) { + return false; + } + env->last_chirp_start_freq = bat_norm_bin(start_idx, BAT_CHIRP_FREQ_BINS); env->last_chirp_end_freq = bat_norm_bin(end_idx, BAT_CHIRP_FREQ_BINS); env->last_chirp_duration = bat_norm_bin(duration_idx, BAT_CHIRP_DURATION_BINS); env->chirp_age_ticks = 0; env->last_chirp_tick = env->tick; + bat_record_chirp_timing(env); env->chirps_emitted_episode += 1; env->chirp_duration_sum += env->last_chirp_duration; env->chirp_bandwidth_sum += fabsf(env->last_chirp_end_freq - env->last_chirp_start_freq); @@ -888,6 +1048,10 @@ static inline bool bat_try_emit_chirp(Bat* env) { static inline int bat_update_chirp(Bat* env) { int emit = bat_action_index(env->actions[5], BAT_CHIRP_EMIT_ACTIONS); if (emit) { + if (env->tick - env->last_chirp_tick >= env->chirp_cooldown_ticks && + env->chirps_emitted_episode >= env->chirp_budget) { + return -2; + } return bat_try_emit_chirp(env) ? 1 : -1; } else if (env->chirp_age_ticks < env->max_chirp_age_ticks) { env->chirp_age_ticks += 1; @@ -904,8 +1068,16 @@ void c_step(Bat* env) { env->terminals[0] = 0.0f; int chirp_status = bat_update_chirp(env); + if (chirp_status == -2) { + env->rewards[0] = -1.0f; + env->terminals[0] = 1.0f; + env->episode_return += env->rewards[0]; + add_log(env, 0.0f, 1.0f, 0.0f); + bat_reset_episode(env); + return; + } if (bat_caught_bug(env)) { - env->rewards[0] = 1.0f; + env->rewards[0] = bat_success_reward(env); env->terminals[0] = 1.0f; env->episode_return += env->rewards[0]; bat_advance_curriculum(env); @@ -926,7 +1098,7 @@ void c_step(Bat* env) { return; } if (bat_caught_bug(env)) { - env->rewards[0] = 1.0f; + env->rewards[0] = bat_success_reward(env); env->terminals[0] = 1.0f; env->episode_return += env->rewards[0]; bat_advance_curriculum(env); @@ -937,6 +1109,7 @@ void c_step(Bat* env) { } env->tick += 1; + bat_record_distance_tick(env); float bug_dist = bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y); float progress = env->prev_bug_dist - bug_dist; env->rewards[0] += env->progress_reward_scale * progress; diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index a577113ffc..61b5297d1c 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -1,5 +1,5 @@ #include "bat.h" -#define OBS_SIZE 39 +#define OBS_SIZE 40 #define NUM_ATNS 6 #define ACT_SIZES {3, 3, 8, 8, 4, 2} #define OBS_TENSOR_T FloatTensor @@ -36,7 +36,11 @@ void my_init(Env* env, Dict* kwargs) { env->reflector_spacing = dict_get(kwargs, "reflector_spacing")->value; env->max_chirp_age_ticks = dict_get(kwargs, "max_chirp_age_ticks")->value; env->chirp_cooldown_ticks = dict_get(kwargs, "chirp_cooldown_ticks")->value; + env->max_chirps_per_episode = dict_get(kwargs, "max_chirps_per_episode")->value; + env->min_chirps_per_episode = dict_get(kwargs, "min_chirps_per_episode")->value; + env->chirp_budget_decay_levels = dict_get(kwargs, "chirp_budget_decay_levels")->value; env->chirp_cost = dict_get(kwargs, "chirp_cost")->value; + env->chirp_efficiency_reward = dict_get(kwargs, "chirp_efficiency_reward")->value; env->valid_chirp_reward = dict_get(kwargs, "valid_chirp_reward")->value; env->early_chirp_penalty = dict_get(kwargs, "early_chirp_penalty")->value; env->bug_echo_reward_scale = dict_get(kwargs, "bug_echo_reward_scale")->value; @@ -48,15 +52,31 @@ void my_init(Env* env, Dict* kwargs) { void my_log(Log* log, Dict* out) { dict_set(out, "perf", log->perf); + dict_set(out, "base_perf", log->base_perf); dict_set(out, "score", log->score); dict_set(out, "episode_return", log->episode_return); dict_set(out, "episode_length", log->episode_length); dict_set(out, "collision", log->collision); dict_set(out, "timeout", log->timeout); + dict_set(out, "curriculum_level", log->curriculum_level); + dict_set(out, "curriculum_difficulty", log->curriculum_difficulty); + dict_set(out, "curriculum_perf", log->curriculum_perf); + dict_set(out, "budget_difficulty", log->budget_difficulty); dict_set(out, "bug_distance_start", log->bug_distance_start); dict_set(out, "bug_distance_final", log->bug_distance_final); dict_set(out, "bug_distance_delta", log->bug_distance_delta); dict_set(out, "chirps_emitted", log->chirps_emitted); + dict_set(out, "chirp_budget", log->chirp_budget); + dict_set(out, "chirps_used_ratio", log->chirps_used_ratio); + dict_set(out, "chirps_remaining_ratio", log->chirps_remaining_ratio); + dict_set(out, "chirp_efficiency", log->chirp_efficiency); + dict_set(out, "far_chirp_fraction", log->far_chirp_fraction); + dict_set(out, "near_chirp_fraction", log->near_chirp_fraction); + dict_set(out, "far_chirp_rate", log->far_chirp_rate); + dict_set(out, "near_chirp_rate", log->near_chirp_rate); + dict_set(out, "chirp_tempo_ratio", log->chirp_tempo_ratio); + dict_set(out, "first_chirp_tick_norm", log->first_chirp_tick_norm); + dict_set(out, "mean_chirp_tick_norm", log->mean_chirp_tick_norm); dict_set(out, "mean_chirp_duration", log->mean_chirp_duration); dict_set(out, "mean_chirp_bandwidth", log->mean_chirp_bandwidth); dict_set(out, "mean_echo_energy_left", log->mean_echo_energy_left); diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index 085b83e441..5ae918e34a 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -71,6 +71,240 @@ static int test_chirp_metadata_and_observation_size(void) { return 0; } +static int test_chirp_budget_observation_tracks_used_chirps(void) { + Bat env = make_test_env(); + env.max_chirps_per_episode = 4; + env.min_chirps_per_episode = 2; + env.chirp_budget_decay_levels = 4; + c_reset(&env); + + ASSERT_TRUE(env.chirp_budget == 4); + ASSERT_FLOAT_NEAR(env.observations[BAT_CHIRPS_USED_OBS], 0.0f, 0.0001f); + + env.actions[2] = 0.0f; + env.actions[3] = 7.0f; + env.actions[4] = 1.0f; + env.actions[5] = 1.0f; + c_step(&env); + + ASSERT_TRUE(env.chirps_emitted_episode == 1); + ASSERT_FLOAT_NEAR(env.observations[BAT_CHIRPS_USED_OBS], 0.25f, 0.0001f); + + free_allocated(&env); + return 0; +} + +static int test_chirp_budget_decreases_with_curriculum_level(void) { + Bat env = make_test_env(); + env.curriculum_enabled = 1; + env.curriculum_initial_level = 8; + env.max_chirps_per_episode = 20; + env.min_chirps_per_episode = 10; + env.chirp_budget_decay_levels = 4; + c_reset(&env); + + ASSERT_TRUE(env.curriculum_level == 8); + ASSERT_TRUE(env.chirp_budget == 18); + + free_allocated(&env); + return 0; +} + +static int test_chirping_after_budget_terminates_with_penalty(void) { + Bat env = make_test_env(); + env.max_chirps_per_episode = 1; + env.min_chirps_per_episode = 1; + env.chirp_budget_decay_levels = 4; + env.chirp_cooldown_ticks = 1; + env.early_chirp_penalty = 0.0f; + c_reset(&env); + + env.actions[2] = 0.0f; + env.actions[3] = 7.0f; + env.actions[4] = 1.0f; + env.actions[5] = 1.0f; + c_step(&env); + ASSERT_TRUE(env.terminals[0] == 0.0f); + ASSERT_TRUE(env.chirps_emitted_episode == 1); + + env.tick = env.last_chirp_tick + env.chirp_cooldown_ticks; + c_step(&env); + + ASSERT_TRUE(env.terminals[0] == 1.0f); + ASSERT_FLOAT_NEAR(env.rewards[0], -1.0f, 0.0001f); + ASSERT_TRUE(env.chirps_emitted_episode == 0); + + free_allocated(&env); + return 0; +} + +static int test_chirp_efficiency_scores_low_usage_above_full_budget(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.chirp_budget = 10; + env.chirps_emitted_episode = 1; + ASSERT_FLOAT_NEAR(bat_chirp_efficiency(&env), 0.95f, 0.0001f); + + env.chirps_emitted_episode = 10; + ASSERT_FLOAT_NEAR(bat_chirp_efficiency(&env), 0.50f, 0.0001f); + + free_allocated(&env); + return 0; +} + +static int test_success_reward_includes_chirp_efficiency_bonus(void) { + Bat env = make_test_env(); + env.chirp_efficiency_reward = 1.0f; + c_reset(&env); + + env.chirp_budget = 10; + env.chirps_emitted_episode = 2; + env.bat_x = 20.0f; + env.bat_y = 20.0f; + env.bug_x = 20.5f; + env.bug_y = 20.0f; + + c_step(&env); + + ASSERT_FLOAT_NEAR(env.terminals[0], 1.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.rewards[0], 1.90f, 0.0001f); + + free_allocated(&env); + return 0; +} + +static int test_chirp_budget_logs_ratios_for_wandb(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.chirp_budget = 10; + env.chirps_emitted_episode = 4; + add_log(&env, 1.0f, 0.0f, 0.0f); + + ASSERT_FLOAT_NEAR(env.log.chirp_budget, 10.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.chirps_used_ratio, 0.40f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.chirps_remaining_ratio, 0.60f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.chirp_efficiency, 0.80f, 0.0001f); + + free_allocated(&env); + return 0; +} + +static int test_curriculum_perf_uses_success_and_actual_difficulty(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.curriculum_start_bug_distance = 8.0f; + env.curriculum_max_bug_distance = 56.0f; + env.curriculum_start_obstacles = 1; + env.curriculum_max_obstacles = 3; + env.num_obstacles = 2; + env.max_chirps_per_episode = 10; + env.min_chirps_per_episode = 5; + env.chirp_budget = 8; + env.start_bug_dist = 32.0f; + + ASSERT_FLOAT_NEAR(bat_curriculum_difficulty(&env), 0.5000000f, 0.0001f); + add_log(&env, 1.0f, 0.0f, 0.0f); + ASSERT_FLOAT_NEAR(env.log.base_perf, 1.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.5000000f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_perf, 0.5000000f, 0.0001f); + + memset(&env.log, 0, sizeof(env.log)); + add_log(&env, 0.0f, 1.0f, 0.0f); + ASSERT_FLOAT_NEAR(env.log.base_perf, 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.5000000f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_perf, 0.0f, 0.0001f); + + free_allocated(&env); + return 0; +} + +static int test_budget_difficulty_uses_empirical_edge_at_eight_chirps(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.max_chirps_per_episode = 20; + ASSERT_FLOAT_NEAR(bat_budget_difficulty(&env), 0.50f, 0.0001f); + + env.max_chirps_per_episode = 14; + ASSERT_FLOAT_NEAR(bat_budget_difficulty(&env), 0.75f, 0.0001f); + + env.max_chirps_per_episode = 10; + ASSERT_FLOAT_NEAR(bat_budget_difficulty(&env), 0.9166667f, 0.0001f); + + env.max_chirps_per_episode = 8; + ASSERT_FLOAT_NEAR(bat_budget_difficulty(&env), 1.0f, 0.0001f); + + env.max_chirps_per_episode = 4; + ASSERT_FLOAT_NEAR(bat_budget_difficulty(&env), 1.0f, 0.0001f); + + free_allocated(&env); + return 0; +} + +static int test_perf_composes_base_perf_difficulty_budget_and_chirp_efficiency(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.curriculum_start_bug_distance = 8.0f; + env.curriculum_max_bug_distance = 56.0f; + env.curriculum_start_obstacles = 1; + env.curriculum_max_obstacles = 3; + env.num_obstacles = 2; + env.max_chirps_per_episode = 14; + env.min_chirps_per_episode = 4; + env.chirp_budget = 14; + env.chirps_emitted_episode = 7; + env.start_bug_dist = 32.0f; + + add_log(&env, 1.0f, 0.0f, 0.0f); + + ASSERT_FLOAT_NEAR(env.log.base_perf, 1.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.budget_difficulty, 0.75f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.chirp_efficiency, 0.75f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.50f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.perf, 0.28125f, 0.0001f); + + memset(&env.log, 0, sizeof(env.log)); + add_log(&env, 0.0f, 1.0f, 0.0f); + ASSERT_FLOAT_NEAR(env.log.base_perf, 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.perf, 0.0f, 0.0001f); + + free_allocated(&env); + return 0; +} + +static int test_chirp_tempo_logs_far_and_near_rates(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.chirps_emitted_episode = 4; + env.chirps_far = 2.0f; + env.chirps_mid = 1.0f; + env.chirps_near = 1.0f; + env.ticks_far = 40.0f; + env.ticks_mid = 20.0f; + env.ticks_near = 10.0f; + env.first_chirp_tick = 12.0f; + env.chirp_tick_sum = 120.0f; + env.max_steps = 120; + + add_log(&env, 1.0f, 0.0f, 0.0f); + + ASSERT_FLOAT_NEAR(env.log.far_chirp_fraction, 0.50f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.near_chirp_fraction, 0.25f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.far_chirp_rate, 0.05f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.near_chirp_rate, 0.10f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.chirp_tempo_ratio, 2.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.first_chirp_tick_norm, 0.10f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.mean_chirp_tick_norm, 0.25f, 0.0001f); + + free_allocated(&env); + return 0; +} + static int test_left_right_echo_asymmetry(void) { Bat env = make_test_env(); c_reset(&env); @@ -579,7 +813,7 @@ static float test_sum_obs(Bat* env, int offset, int count) { } static int test_bins_only_observation_layout(void) { - ASSERT_TRUE(BAT_OBS_SIZE == 39); + ASSERT_TRUE(BAT_OBS_SIZE == 40); ASSERT_TRUE(BAT_FREQ_BINS == 16); ASSERT_TRUE(BAT_LEFT_FREQ_OFFSET == 0); ASSERT_TRUE(BAT_RIGHT_FREQ_OFFSET == 16); @@ -588,8 +822,9 @@ static int test_bins_only_observation_layout(void) { ASSERT_TRUE(BAT_CHIRP_START_OBS == 34); ASSERT_TRUE(BAT_CHIRP_END_OBS == 35); ASSERT_TRUE(BAT_CHIRP_DURATION_OBS == 36); - ASSERT_TRUE(BAT_FORWARD_SPEED_OBS == 37); - ASSERT_TRUE(BAT_TURN_RATE_OBS == 38); + ASSERT_TRUE(BAT_CHIRPS_USED_OBS == 37); + ASSERT_TRUE(BAT_FORWARD_SPEED_OBS == 38); + ASSERT_TRUE(BAT_TURN_RATE_OBS == 39); return 0; } @@ -1029,6 +1264,16 @@ static int test_obstacles_are_small_enough_for_trainability(void) { int main(void) { if (test_chirp_metadata_and_observation_size()) return 1; + if (test_chirp_budget_observation_tracks_used_chirps()) return 1; + if (test_chirp_budget_decreases_with_curriculum_level()) return 1; + if (test_chirping_after_budget_terminates_with_penalty()) return 1; + if (test_chirp_efficiency_scores_low_usage_above_full_budget()) return 1; + if (test_success_reward_includes_chirp_efficiency_bonus()) return 1; + if (test_chirp_budget_logs_ratios_for_wandb()) return 1; + if (test_curriculum_perf_uses_success_and_actual_difficulty()) return 1; + if (test_budget_difficulty_uses_empirical_edge_at_eight_chirps()) return 1; + if (test_perf_composes_base_perf_difficulty_budget_and_chirp_efficiency()) return 1; + if (test_chirp_tempo_logs_far_and_near_rates()) return 1; if (test_left_right_echo_asymmetry()) return 1; if (test_default_sound_speed_allows_one_tick_interaural_delay()) return 1; if (test_echo_scheduling_uses_tick_bucket_accumulator()) return 1; From 40b88043b2a5bc6e246ca8baafdcc5745c221e33 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Tue, 9 Jun 2026 10:40:09 -0700 Subject: [PATCH 11/51] retune bat chirp budget pressure --- BAT_SPEC.md | 12 ++++++++---- config/bat.ini | 8 ++++---- ocean/bat/bat.h | 4 ++-- ocean/bat/tests/test_bat_core.c | 18 +++++++++--------- 4 files changed, 23 insertions(+), 19 deletions(-) diff --git a/BAT_SPEC.md b/BAT_SPEC.md index 475034e031..f59183f4a3 100644 --- a/BAT_SPEC.md +++ b/BAT_SPEC.md @@ -381,8 +381,9 @@ Logged metrics: without chirp-budget weighting - `budget_difficulty` - sweep-pressure multiplier derived from selected `max_chirps_per_episode`; - the empirical edge from the June 9, 2026 budget grid is `8` chirps, while - `20` chirps is easy + `15` chirps maps to the floor `0.50`, budgets below `6` chirps map to + `1.0`, and `20` chirps is intentionally outside the default sweep because + it was too easy in the June 9, 2026 budget grid - `score` - `episode_return` - `episode_length` @@ -534,7 +535,9 @@ Obstacle reflections: - Keep `base_perf` as pure catch rate. Use composite `perf` as the sweep objective. It rewards catching harder curriculum levels with fewer chirps and under stricter configured chirp budgets without changing in-episode reward - shaping. + shaping. Current budget scoring treats `15` chirps as the easy floor and + gives full pressure below `6` chirps; do not include `20` chirps in the + default budget sweep. - Reward terms are training scaffolding and should remain sweepable. `progress_reward_scale` is true-distance shaping and should usually stay below `bug_echo_reward_scale`, which is based on closer received bug reflections. - Forward-only movement dynamics should be swept with bounded ranges: `env.bat_max_speed` in `[8.0, 22.0]`, `env.bat_accel` in `[40.0, 90.0]`, @@ -583,7 +586,8 @@ train/eval after each rung, and commit each known-good rung separately. initial level, resets must not drop it back down. 2. Finite chirp budget. - - Default to `20` chirps per episode at low curriculum levels. + - Keep the low-curriculum budget below the old `20`-chirp setting; `20` + proved too easy and should not be part of the default sweep. - Reduce the budget as curriculum level increases, with a floor so harder levels require smarter chirp timing without creating an impossible cliff. - Track `chirps_used / chirp_budget` as a normalized `0..1` observation. diff --git a/config/bat.ini b/config/bat.ini index 2506d58ead..d943ddb195 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -214,14 +214,14 @@ scale = auto [sweep.env.max_chirps_per_episode] distribution = int_uniform -min = 6 -max = 20 +min = 4 +max = 15 scale = auto [sweep.env.min_chirps_per_episode] distribution = int_uniform -min = 4 -max = 8 +min = 3 +max = 6 scale = auto [sweep.env.chirp_budget_decay_levels] diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index f30c5dabbb..5f0e65a757 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -45,8 +45,8 @@ #define BAT_CHIRP_RINGS 5 #define BAT_MAX_CHIRP_SLICES 16 #define BAT_ECHO_QUEUE_TICKS 256 -#define BAT_BUDGET_EASY_CHIRPS 20.0f -#define BAT_BUDGET_EDGE_CHIRPS 8.0f +#define BAT_BUDGET_EASY_CHIRPS 15.0f +#define BAT_BUDGET_EDGE_CHIRPS 5.0f #define BAT_ECHO_STATIC 0 #define BAT_ECHO_BUG 1 diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index 5ae918e34a..86742f57db 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -221,20 +221,20 @@ static int test_curriculum_perf_uses_success_and_actual_difficulty(void) { return 0; } -static int test_budget_difficulty_uses_empirical_edge_at_eight_chirps(void) { +static int test_budget_difficulty_uses_hard_edge_below_six_chirps(void) { Bat env = make_test_env(); c_reset(&env); - env.max_chirps_per_episode = 20; + env.max_chirps_per_episode = 15; ASSERT_FLOAT_NEAR(bat_budget_difficulty(&env), 0.50f, 0.0001f); - env.max_chirps_per_episode = 14; + env.max_chirps_per_episode = 10; ASSERT_FLOAT_NEAR(bat_budget_difficulty(&env), 0.75f, 0.0001f); - env.max_chirps_per_episode = 10; - ASSERT_FLOAT_NEAR(bat_budget_difficulty(&env), 0.9166667f, 0.0001f); + env.max_chirps_per_episode = 6; + ASSERT_FLOAT_NEAR(bat_budget_difficulty(&env), 0.95f, 0.0001f); - env.max_chirps_per_episode = 8; + env.max_chirps_per_episode = 5; ASSERT_FLOAT_NEAR(bat_budget_difficulty(&env), 1.0f, 0.0001f); env.max_chirps_per_episode = 4; @@ -262,10 +262,10 @@ static int test_perf_composes_base_perf_difficulty_budget_and_chirp_efficiency(v add_log(&env, 1.0f, 0.0f, 0.0f); ASSERT_FLOAT_NEAR(env.log.base_perf, 1.0f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.budget_difficulty, 0.75f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.budget_difficulty, 0.55f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.chirp_efficiency, 0.75f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.50f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.perf, 0.28125f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.perf, 0.20625f, 0.0001f); memset(&env.log, 0, sizeof(env.log)); add_log(&env, 0.0f, 1.0f, 0.0f); @@ -1271,7 +1271,7 @@ int main(void) { if (test_success_reward_includes_chirp_efficiency_bonus()) return 1; if (test_chirp_budget_logs_ratios_for_wandb()) return 1; if (test_curriculum_perf_uses_success_and_actual_difficulty()) return 1; - if (test_budget_difficulty_uses_empirical_edge_at_eight_chirps()) return 1; + if (test_budget_difficulty_uses_hard_edge_below_six_chirps()) return 1; if (test_perf_composes_base_perf_difficulty_budget_and_chirp_efficiency()) return 1; if (test_chirp_tempo_logs_far_and_near_rates()) return 1; if (test_left_right_echo_asymmetry()) return 1; From 6a30d14d135d171f3a4f7b503d8b2526715d3272 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Tue, 9 Jun 2026 10:46:29 -0700 Subject: [PATCH 12/51] penalize overlapping bat chirps --- BAT_SPEC.md | 11 ++++++++++ config/bat.ini | 9 +++++++- ocean/bat/bat.h | 16 ++++++++++++++ ocean/bat/binding.c | 2 ++ ocean/bat/tests/test_bat_core.c | 37 +++++++++++++++++++++++++++++++++ 5 files changed, 74 insertions(+), 1 deletion(-) diff --git a/BAT_SPEC.md b/BAT_SPEC.md index f59183f4a3..3283c9319a 100644 --- a/BAT_SPEC.md +++ b/BAT_SPEC.md @@ -312,6 +312,10 @@ Default reward model: - `-1.0` for hitting walls or obstacles, terminal. - Tiny chirp cost so constant chirping is not fully free without causing chirp collapse. +- Chirping again before the prior chirp's max echo return window has cleared + gets a small physical overlap penalty. This is not a generic timing-efficiency + reward; it represents self-induced acoustic ambiguity from overlapping + returns. - Solve-time chirp efficiency reward: - `chirp_efficiency = 0.5 + 0.5 * (1.0 - chirps_used / chirp_budget)`, - a catch after spending the full budget gets efficiency `0.5`, @@ -335,6 +339,8 @@ Progress reward: - `reward += progress_reward_scale * (prev_bug_dist - bug_dist)` - `reward -= step_cost` - `reward -= chirp_cost` when a chirp is emitted + - `reward -= chirp_overlap_penalty` when a valid chirp is emitted before + the previous chirp's max echo return window has cleared - `reward += chirp_efficiency_reward * chirp_efficiency` on catch - `reward += bug_echo_reward_scale * echo_path_reduction / max_echo_range` when a returning bug echo indicates the bug is closer than the previous bug @@ -344,6 +350,7 @@ Progress reward: - `step_cost = 0.001` - `chirp_efficiency_reward = 1.0` - `chirp_cost = 0.00005` + - `chirp_overlap_penalty = 0.004` - `bug_echo_reward_scale = 0.02` - `chirp_cost = 0.0005` @@ -400,6 +407,9 @@ Logged metrics: - `chirp_efficiency` - `0.5` if the full budget was spent, approaching `1.0` when few chirps were used +- `chirp_overlap_fraction` + - fraction of emitted chirps that were sent before the previous chirp's max + return window cleared - `far_chirp_fraction` - `near_chirp_fraction` - `far_chirp_rate` @@ -458,6 +468,7 @@ Config knobs: - `chirp_duration_bins` - `chirp_cost` - `chirp_efficiency_reward` +- `chirp_overlap_penalty` - `step_cost` - `progress_reward_scale` - `collision_penalty` diff --git a/config/bat.ini b/config/bat.ini index d943ddb195..898d1ce9f0 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -51,6 +51,7 @@ chirp_cost = 0.0 chirp_efficiency_reward = 1.0 valid_chirp_reward = 0.00005 early_chirp_penalty = 0.0043 +chirp_overlap_penalty = 0.0040 bug_echo_reward_scale = 0.13 step_cost = 0.0002 progress_reward_scale = 0.076 @@ -86,7 +87,7 @@ max_runs = 8 gpus = 1 downsample = 5 use_gpu = True -sweep_only = total_timesteps,learning_rate,gamma,gae_lambda,ent_coef,horizon,replay_ratio,num_buffers,bat_max_speed,bat_accel,bat_turn_rate,step_cost,sound_speed,ear_separation_scale,progress_reward_scale,chirp_efficiency_reward,valid_chirp_reward,early_chirp_penalty,bug_echo_reward_scale,collision_penalty,max_chirps_per_episode,min_chirps_per_episode,chirp_budget_decay_levels,curriculum_initial_level,curriculum_start_bug_distance,curriculum_bug_distance_step,curriculum_obstacle_step,curriculum_successes_per_level +sweep_only = total_timesteps,learning_rate,gamma,gae_lambda,ent_coef,horizon,replay_ratio,num_buffers,bat_max_speed,bat_accel,bat_turn_rate,step_cost,sound_speed,ear_separation_scale,progress_reward_scale,chirp_efficiency_reward,valid_chirp_reward,early_chirp_penalty,chirp_overlap_penalty,bug_echo_reward_scale,collision_penalty,max_chirps_per_episode,min_chirps_per_episode,chirp_budget_decay_levels,curriculum_initial_level,curriculum_start_bug_distance,curriculum_bug_distance_step,curriculum_obstacle_step,curriculum_successes_per_level match_enemy_model_path = {} match_num_games = {} match_enemy_hidden_size = {} @@ -200,6 +201,12 @@ min = 0.001 max = 0.006 scale = auto +[sweep.env.chirp_overlap_penalty] +distribution = uniform +min = 0.001 +max = 0.008 +scale = auto + [sweep.env.bug_echo_reward_scale] distribution = uniform min = 0.02 diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 5f0e65a757..0156c240b9 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -96,6 +96,7 @@ typedef struct Log { float chirps_used_ratio; float chirps_remaining_ratio; float chirp_efficiency; + float chirp_overlap_fraction; float far_chirp_fraction; float near_chirp_fraction; float far_chirp_rate; @@ -185,6 +186,7 @@ typedef struct Bat { int chirp_head; EchoBucket echo_queue[BAT_ECHO_QUEUE_TICKS]; int chirps_emitted_episode; + int chirps_overlapped; float chirp_duration_sum; float chirp_bandwidth_sum; float chirps_far; @@ -202,6 +204,7 @@ typedef struct Bat { float chirp_efficiency_reward; float valid_chirp_reward; float early_chirp_penalty; + float chirp_overlap_penalty; float step_cost; float progress_reward_scale; float bug_echo_reward_scale; @@ -567,6 +570,7 @@ void init(Bat* env) { if (env->chirp_efficiency_reward < 0.0f) env->chirp_efficiency_reward = 0.0f; if (env->valid_chirp_reward <= 0.0f) env->valid_chirp_reward = 0.0005f; if (env->early_chirp_penalty <= 0.0f) env->early_chirp_penalty = 0.001f; + if (env->chirp_overlap_penalty < 0.0f) env->chirp_overlap_penalty = 0.0f; if (env->bug_echo_reward_scale <= 0.0f) env->bug_echo_reward_scale = 0.0f; if (env->rng == 0) env->rng = 1; @@ -641,6 +645,7 @@ static inline void add_log(Bat* env, float success, float collision, float timeo env->log.chirps_remaining_ratio += 1.0f - bat_chirps_used_ratio(env); env->log.chirp_efficiency += chirp_efficiency; float chirps = fmaxf(1.0f, (float)env->chirps_emitted_episode); + env->log.chirp_overlap_fraction += env->chirps_overlapped / chirps; env->log.far_chirp_fraction += env->chirps_far / chirps; env->log.near_chirp_fraction += env->chirps_near / chirps; float far_rate = env->chirps_far / fmaxf(1.0f, env->ticks_far); @@ -921,6 +926,7 @@ static inline void bat_reset_episode(Bat* env) { env->tick_bug_echo_path = -1.0f; env->last_bug_echo_path = -1.0f; env->chirps_emitted_episode = 0; + env->chirps_overlapped = 0; env->chirp_duration_sum = 0.0f; env->chirp_bandwidth_sum = 0.0f; env->chirps_far = 0.0f; @@ -1045,6 +1051,11 @@ static inline bool bat_try_emit_chirp(Bat* env) { return true; } +static inline bool bat_next_chirp_overlaps_return_window(Bat* env) { + if (env->chirps_emitted_episode <= 0) return false; + return env->tick - env->last_chirp_tick < env->max_chirp_age_ticks; +} + static inline int bat_update_chirp(Bat* env) { int emit = bat_action_index(env->actions[5], BAT_CHIRP_EMIT_ACTIONS); if (emit) { @@ -1067,6 +1078,7 @@ void c_step(Bat* env) { env->rewards[0] = 0.0f; env->terminals[0] = 0.0f; + bool chirp_overlaps_return_window = bat_next_chirp_overlaps_return_window(env); int chirp_status = bat_update_chirp(env); if (chirp_status == -2) { env->rewards[0] = -1.0f; @@ -1116,6 +1128,10 @@ void c_step(Bat* env) { env->rewards[0] -= env->step_cost; if (chirp_status > 0) { env->rewards[0] += env->valid_chirp_reward; + if (chirp_overlaps_return_window) { + env->rewards[0] -= env->chirp_overlap_penalty; + env->chirps_overlapped += 1; + } } else if (chirp_status < 0) { env->rewards[0] -= env->early_chirp_penalty; } diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index 61b5297d1c..9fcdc3e73f 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -43,6 +43,7 @@ void my_init(Env* env, Dict* kwargs) { env->chirp_efficiency_reward = dict_get(kwargs, "chirp_efficiency_reward")->value; env->valid_chirp_reward = dict_get(kwargs, "valid_chirp_reward")->value; env->early_chirp_penalty = dict_get(kwargs, "early_chirp_penalty")->value; + env->chirp_overlap_penalty = dict_get(kwargs, "chirp_overlap_penalty")->value; env->bug_echo_reward_scale = dict_get(kwargs, "bug_echo_reward_scale")->value; env->step_cost = dict_get(kwargs, "step_cost")->value; env->progress_reward_scale = dict_get(kwargs, "progress_reward_scale")->value; @@ -70,6 +71,7 @@ void my_log(Log* log, Dict* out) { dict_set(out, "chirps_used_ratio", log->chirps_used_ratio); dict_set(out, "chirps_remaining_ratio", log->chirps_remaining_ratio); dict_set(out, "chirp_efficiency", log->chirp_efficiency); + dict_set(out, "chirp_overlap_fraction", log->chirp_overlap_fraction); dict_set(out, "far_chirp_fraction", log->far_chirp_fraction); dict_set(out, "near_chirp_fraction", log->near_chirp_fraction); dict_set(out, "far_chirp_rate", log->far_chirp_rate); diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index 86742f57db..b5da1d738a 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -792,6 +792,42 @@ static int test_early_chirp_gets_penalty_and_emits_nothing(void) { return 0; } +static int test_chirp_before_echo_window_clears_gets_overlap_penalty(void) { + Bat env = make_test_env(); + c_reset(&env); + test_place_safe_stationary_scene(&env); + env.step_cost = 0.0f; + env.progress_reward_scale = 0.0f; + env.bug_echo_reward_scale = 0.0f; + env.chirp_cost = 0.0f; + env.valid_chirp_reward = 0.0005f; + env.early_chirp_penalty = 0.0020f; + env.chirp_overlap_penalty = 0.0040f; + env.chirp_cooldown_ticks = 1; + env.max_chirp_age_ticks = 8; + test_set_emit_chirp_action(&env); + + c_step(&env); + + ASSERT_FLOAT_NEAR(env.terminals[0], 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.rewards[0], env.valid_chirp_reward, 0.0001f); + ASSERT_TRUE(env.chirps_emitted_episode == 1); + ASSERT_TRUE(env.chirps_overlapped == 0); + + test_place_safe_stationary_scene(&env); + test_set_emit_chirp_action(&env); + c_step(&env); + + ASSERT_FLOAT_NEAR(env.terminals[0], 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.rewards[0], + env.valid_chirp_reward - env.chirp_overlap_penalty, 0.0001f); + ASSERT_TRUE(env.chirps_emitted_episode == 2); + ASSERT_TRUE(env.chirps_overlapped == 1); + + free_allocated(&env); + return 0; +} + static int test_reflection_arrives_at_two_way_travel_time(void) { float sound_speed = 100.0f; float distance = 25.0f; @@ -1290,6 +1326,7 @@ int main(void) { if (test_chirp_cooldown_accepts_only_after_delay()) return 1; if (test_valid_chirp_gets_reward_without_legacy_cost()) return 1; if (test_early_chirp_gets_penalty_and_emits_nothing()) return 1; + if (test_chirp_before_echo_window_clears_gets_overlap_penalty()) return 1; if (test_reflection_arrives_at_two_way_travel_time()) return 1; if (test_bins_only_observation_layout()) return 1; if (test_no_chirp_produces_silent_frequency_bins()) return 1; From 1c1f629c6c4092c6a71a73815ecf987d2ce85837 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Tue, 9 Jun 2026 13:29:02 -0700 Subject: [PATCH 13/51] stabilize bat sonar curriculum metrics --- BAT_CURRICULUM.md | 514 ++++++++++++++++++++++++++++++++ BAT_SPEC.md | 82 +++-- config/bat.ini | 75 +++-- ocean/bat/bat.h | 122 ++++++-- ocean/bat/binding.c | 13 +- ocean/bat/tests/test_bat_core.c | 209 ++++++++++++- 6 files changed, 924 insertions(+), 91 deletions(-) create mode 100644 BAT_CURRICULUM.md diff --git a/BAT_CURRICULUM.md b/BAT_CURRICULUM.md new file mode 100644 index 0000000000..4e54f2b48c --- /dev/null +++ b/BAT_CURRICULUM.md @@ -0,0 +1,514 @@ +# Bat Curriculum Strategy + +This note explains how Bat curriculum should work, why `curriculum_perf` can +appear pinned around `0.2`, and how to extend difficulty toward maneuvering bugs +without adding artificial reward dials. + +## Current Diagnosis + +The old `curriculum_perf` behavior was mostly structural. + +The old code computed: + +```text +curriculum_difficulty = (distance_norm + obstacle_norm) / 2 +curriculum_perf = success * curriculum_difficulty +``` + +With the current defaults: + +```ini +curriculum_initial_level = 3 +curriculum_start_obstacles = 1 +curriculum_max_obstacles = 3 +curriculum_obstacle_step = 18 +curriculum_start_bug_distance = 8.0 +curriculum_max_bug_distance = 56.0 +curriculum_bug_distance_step = 4.0 +``` + +At `curriculum_level ~= 5`, the bug starts around distance `28`, giving: + +```text +distance_norm ~= (28 - 8) / (56 - 8) ~= 0.42 +obstacle_norm = 0.0 because obstacle count is still 1 +curriculum_difficulty ~= (0.42 + 0.0) / 2 ~= 0.21 +``` + +So `curriculum_perf ~= 0.2` did not necessarily mean the policy hit an +impossible wall. It means the metric gives half of its difficulty credit to +obstacles, but obstacle difficulty stays zero until level `18`. + +The current code logs split difficulty components and computes +`curriculum_difficulty` from active weighted components: + +```text +distance_norm = normalize(start_bug_dist) +obstacle_norm = normalize(num_obstacles) +budget_norm = normalize(chirp budget reduction) +motion_norm = 0 until bug maneuvers are added + +curriculum_difficulty = + (0.40 * distance_norm + + 0.25 * obstacle_norm + + 0.20 * budget_norm) / 0.85 +``` + +Motion difficulty is logged separately as `0`, but it does not lower the metric +ceiling before maneuver curricula exist. + +## Design Principles + +- Keep `base_perf` pure: `1.0` for catching the bug, `0.0` otherwise. +- Use composite `perf` for sweep ranking, but always sanity-check it against + `base_perf`, `curriculum_level`, and failure modes. +- Curriculum should change task distribution, not secretly define behavior with + dense reward shaping. +- Chirp timing pressure should come from physical constraints: + finite budget, cooldown, and overlapping echo ambiguity. +- Movement pressure should come from task dynamics, not a hover exploit. The + bat must keep a minimum forward speed so harder levels cannot collapse into + stationary timeout policies. +- Do not add generic timing-efficiency reward. +- Difficulty metrics should be smooth enough that W&B does not show fake + plateaus caused by integer schedule thresholds. +- Every curriculum rung should be reversible and testable. + +## Recommended Metrics + +Add or keep these logs: + +```text +base_perf +perf +curriculum_perf +curriculum_level +curriculum_distance_difficulty +curriculum_obstacle_difficulty +curriculum_chirp_budget_difficulty +curriculum_difficulty +bug_motion_mode +bug_motion_speed +num_obstacles +chirp_budget +chirps_used_ratio +chirp_overlap_fraction +collision +timeout +SPS +``` + +Keep the W&B export list capped at 31 explicit metrics because PufferLib appends +`n` as the 32nd metric. Diagnostics such as `budget_difficulty`, +`curriculum_motion_difficulty`, chirp far/near fractions, and redundant inverse +ratios should stay internal unless they are needed for a specific sweep. +Do not remove `score` from `binding.c`; PufferLib's train worker reads +`metrics["env/score"]` when a child process exits. If the cap is tight, drop +`episode_return` instead. + +The key change is splitting `curriculum_difficulty` into components. If +`curriculum_perf` is low, we should be able to tell whether the policy is stuck +on distance, obstacles, chirp budget, or motion. + +## Recommended Difficulty Formula + +Use an explicit weighted difficulty. Until motion curriculum exists, renormalize +over active components: + +```text +distance_norm = normalize(start_bug_dist) +obstacle_norm = normalize(num_obstacles) +budget_norm = normalize(chirp budget pressure) +motion_norm = normalize(bug maneuver difficulty) + +curriculum_difficulty = + 0.40 * distance_norm + + 0.25 * obstacle_norm + + 0.20 * budget_norm + +curriculum_difficulty /= 0.85 + +curriculum_perf = base_perf * curriculum_difficulty +``` + +The exact weights can change, but the important property is that no inactive +component silently cuts the maximum metric in half. If `motion_norm` is not +enabled yet, either log it as `0` and accept the lower ceiling, or renormalize +active components. For sweeps, renormalizing active components is easier to +interpret. + +## Recommended Stage Order + +### Stage 0: Known-good baseline + +Purpose: + +- Preserve the current solved rung as a fallback. + +Task: + +- One obstacle. +- Moderate starting bug distance. +- Current forward-only bat dynamics. +- Configurable minimum forward speed; brake cannot stop the bat below this + floor. +- Current chirp budget and overlap penalty. + +Gate: + +- `base_perf >= 0.80` +- collision not exploding +- timeout not dominating + +### Stage 1: Distance curriculum + +Purpose: + +- Make the bat solve larger spatial uncertainty before adding more clutter. + +Schedule: + +```text +start_bug_distance = 8 + level * distance_step +``` + +Recommendation: + +- Keep `curriculum_bug_distance_step` in the current `2.0..5.0` sweep range. +- Log `curriculum_distance_difficulty` directly. + +Gate: + +- advance after a small success count, not one lucky catch. +- current `curriculum_successes_per_level = 21` is conservative; for 50M-step + runs, sweep lower values such as `4..16`. + +### Stage 2: Obstacle curriculum + +Purpose: + +- Avoid the current metric plateau where obstacle difficulty is invisible until + level `18`. + +Recommendation: + +- Reduce default `curriculum_obstacle_step`. +- A practical next default is `6`, giving: + +```text +level 0..5: 1 obstacle +level 6..11: 2 obstacles +level 12+: 3 obstacles +``` + +Alternative: + +- Keep the count schedule but add obstacle size or reflector strength as a + smoother sub-rung. + +Gate: + +- Do not increase obstacles and reduce chirp budget on the same level unless + the previous rung is clearly solved. + +### Stage 3: Chirp budget curriculum + +Purpose: + +- Force useful chirp timing without artificial timing reward. + +Current behavior: + +- Observation includes `chirps_used / chirp_budget`. +- Chirping after the last chirp causes `-1` terminal. +- Budget reduces as curriculum level increases. +- Valid chirps before the previous max echo window clears get a physical + overlap penalty. + +Recommendation: + +- Keep terminal/logging pressure as the primary signal: + +```text +chirp_perf = clamp(1.0 - chirps_emitted / 15.0, 0.05, 1.0) +perf = base_perf * curriculum_difficulty * chirp_perf +``` + +- Keep `chirp_overlap_penalty` small and sweepable. +- Treat `chirp_overlap_fraction` as a diagnostic, not the main objective. +- Keep `budget_difficulty` and `chirp_efficiency` as diagnostics, but do not + multiply them into `perf`; the fixed 15-chirp reference gives cleaner Protein + ranking pressure across 10, 8, and 6 chirp policies. + +### Stage 4: Constant-velocity moving bug + +Purpose: + +- Make Doppler and reacquisition matter while keeping motion predictable. + +Current/near-term model: + +- Fixed speed and heading. +- Wall bounce: + - vertical wall flips `vx` + - horizontal wall flips `vy` + +Recommended knobs: + +```text +bug_speed +bug_wall_bounce_enabled +``` + +Gate: + +- Require maintained `base_perf` and non-collapsing `chirps_used_ratio`. +- Motion should not start before distance and obstacle rungs are stable. + +### Stage 5: Simple bug maneuvers + +Purpose: + +- Add nontrivial pursuit without jumping directly to adversarial behavior. + +Recommended maneuver order: + +1. Sine lateral motion + - Bug keeps forward velocity but adds low-frequency lateral acceleration. + - Knobs: `bug_maneuver_amplitude`, `bug_maneuver_period`. + +2. Smooth heading drift + - Bug heading changes slowly with bounded turn rate. + - Knobs: `bug_turn_rate`, `bug_turn_period`. + +3. Circular or oval path segments + - Bug follows simple parametric curves. + - Knobs: `bug_orbit_radius`, `bug_orbit_period`. + +4. Piecewise constant heading changes + - Bug chooses a new heading every N ticks. + - Knobs: `bug_heading_change_interval`, `bug_heading_change_angle`. + +5. Mild evasive steering + - Bug slowly biases away from bat only at higher curriculum. + - This should come late because it changes the task from tracking to pursuit. + +Do not expose bug mode, bug position, or true velocity in observations. The bat +should infer motion from echoes. + +## Proposed Curriculum State + +Keep the current single integer `curriculum_level`, but derive separate +sub-difficulties from it: + +```text +distance_level = level +obstacle_level = max(0, level - obstacle_start_level) +budget_level = max(0, level - budget_start_level) +motion_level = max(0, level - motion_start_level) +``` + +Recommended starting points: + +```ini +obstacle_start_level = 4 +budget_start_level = 0 +motion_start_level = 10 +maneuver_start_level = 16 +``` + +This keeps one scalar progression while preventing all difficulty knobs from +turning on at once. + +## Eval Requirements + +Eval should support fixed curriculum levels so we can inspect specific rungs. + +Useful modes: + +```text +default latest curriculum level +fixed level 0 +fixed level 6 +fixed level 12 +fixed level 18 +fixed maneuver mode +``` + +This matters because an aggregate training metric can look fine while a +specific rung has bad behavior. + +## TDD Targets + +Add focused tests before changing curriculum code: + +```text +curriculum_difficulty_logs_distance_and_obstacle_components +curriculum_obstacles_advance_before_level_18 +curriculum_budget_reduces_monotonically_with_level +curriculum_motion_stays_zero_before_motion_start_level +bug_wall_bounce_flips_x_or_y_velocity +bug_sine_maneuver_keeps_speed_bounded +eval_fixed_curriculum_level_overrides_training_level +``` + +## Near-Term Recommendation + +Before adding maneuvers, do this: + +1. Add split difficulty logs: + - `curriculum_distance_difficulty` + - `curriculum_obstacle_difficulty` + - `curriculum_chirp_budget_difficulty` + - `curriculum_motion_difficulty` + +2. Change obstacle schedule so it starts contributing around level `6`, not + level `18`. + +3. Consider reducing `curriculum_successes_per_level` default from `21` to a + faster value such as `8..12`, while keeping it sweepable. + +4. Add fixed-level eval override so visual checks can inspect harder rungs + directly. + +5. Only then add simple bug maneuvers, starting with constant velocity wall + bounce and then sine/heading drift. + +The goal is a ladder where each rung is visibly harder, metrics explain why, +and the bat must improve sensing behavior without reward terms that directly +script the desired chirp timing. + +## Proposed Cleanup After Current Sweep + +Do not mix chirp-budget pressure into curriculum difficulty. + +Rationale: + +- More obstacles legitimately require more chirps. A cluttered arena should not + rank worse just because the bat used more chirps than it would in open space. +- Chirp count is a sensing-efficiency metric, not a world-difficulty metric. +- A shrinking chirp budget can make later curriculum levels impossible before + we know whether the policy has learned robust obstacle disambiguation. + +Proposed next curriculum split: + +```text +level 0: + no obstacles + moving bug only + +later levels: + increase bug start distance + then introduce obstacles + then increase obstacle count/clutter + then add maneuvering bug motion +``` + +Proposed curriculum difficulty: + +```text +curriculum_difficulty = + 0.5 * distance_difficulty + + 0.5 * obstacle_difficulty +``` + +If a component has not been activated yet, renormalize over active components +instead of letting inactive components cap the score. Once obstacle curriculum +is active, the two-component `0.5 / 0.5` interpretation is easy to explain: +half distance, half clutter. + +Proposed chirp handling: + +```text +max_chirps_per_episode = 15 +chirp_budget does not decrease with curriculum level +chirp_budget_difficulty is removed from curriculum difficulty +``` + +Reward/perf pressure for chirps should focus on intelligent use, not simply +fewer chirps everywhere: + +- Keep a finite chirp budget so chirps are not unlimited. +- Keep overlap penalty because overlapping echo returns are physically + ambiguous. +- Consider rewarding successful catches with a chirp-use bonus based on + `chirps_emitted / 15`, but avoid dense shaping that scripts exact chirp + timing. +- Keep `chirp_perf = clamp(1.0 - chirps_emitted / 15.0, 0.05, 1.0)` as a sweep + diagnostic/objective term, but interpret it together with obstacle difficulty + and not as an absolute "fewer chirps is always better" rule. + +This cleanup should be done in a clean commit after the current sweep is either +finished or intentionally stopped, because it changes how `perf` compares to +existing Bat sweep runs. + +## Bat3 Partial Sweep Notes + +These notes are from an in-progress `bat3` W&B peek on June 9, 2026. Treat them +as directional, not final. + +Early top-`perf` runs show: + +- `bat_min_speed` tends toward the low end, usually near `2.0`. +- `bat_turn_rate` tends toward the high end, often near `3pi`. +- `sound_speed` tends toward the high end, often `175..180`. +- `progress_reward_scale` tends toward the high end, around `0.11..0.12`. +- Good policies often catch with roughly `6..8` chirps. +- Highest `base_perf` runs can exceed `0.90`, but may use more chirps and rank + lower by `perf`. +- Highest `curriculum_perf` runs reach around levels `8..9`, but often pay with + higher collision/timeout and around `10` chirps. + +Behavior read: + +- The current interesting behavior is circle-search followed by full-speed + dash/intercept after apparent target acquisition. +- Do not remove this behavior unless harder fixed-level evals prove it is an + exploit. It may be a useful active-sensing search pattern. + +Metric implication: + +- `chirp_perf` is working as a sweep ranking term, but it also confirms that + "fewer chirps" cannot be the whole story once obstacles increase. +- More clutter can legitimately require more chirps, so chirp count should stay + separate from world/curriculum difficulty. + +Post-sweep PR gate: + +1. Update `config/bat.ini` defaults from the best sane run, not merely highest + `perf`. +2. Run one normal training pass with those defaults. +3. Eval fixed levels `0`, `4`, `7`, and `10`. +4. Commit only if visual behavior remains sane and the policy does not regress + to hover/spin/collision farming. + +Visual eval diagnosis from `fresh-wood-149`: + +- Default/easier eval looks good: circle-search followed by dash/intercept. +- Fixed level `10` performs poorly. +- Fixed level `7` reveals the likely failure mode: + - the bat spends many chirps during early search before acquiring the bug, + - once it finally gets a useful bug signal, the remaining chirp budget is low, + - it dashes toward the last known/acquired bug direction, + - if the bug moves enough after the final echoes, the bat keeps flying blind + and misses. + +Implication: + +- The next bottleneck is acquisition/reacquisition under finite chirp budget, + not basic forward flight. +- Be careful with any metric or reward that simply minimizes chirp count. At + harder distances or with obstacles, useful policies may need more search and + reacquisition chirps. +- This supports the proposed cleanup: keep a fixed chirp budget for now and + remove chirp-budget reduction from curriculum difficulty before adding harder + motion or more clutter. + +## Reward-Shaping Guardrails + +Bug-echo progress shaping is allowed, but it must not pay for passive target +motion. Compare the current bug echo path against the previous bug echo only +after the bat has displaced by at least `bug_echo_min_displacement`. If the echo +path is shorter, reward by `bug_echo_reward_scale`; if it is longer, apply a +weaker penalty using `bug_echo_farther_penalty_scale`, currently defaulting to +`0.10`. diff --git a/BAT_SPEC.md b/BAT_SPEC.md index 3283c9319a..9e43528eac 100644 --- a/BAT_SPEC.md +++ b/BAT_SPEC.md @@ -93,6 +93,8 @@ Physics: - Fixed control/physics timestep, default `1/60` second. - Bat motion is acceleration-limited and turn-rate-limited. +- The bat has a configurable minimum forward speed. It cannot hover; brake + only reduces speed down to this stall-speed floor. - Bug motion uses a simple deterministic or seeded random policy. - The bug reflects from walls and obstacles. - The bat collides with walls and obstacles. @@ -211,7 +213,7 @@ Rationale: - Discrete chirp bins keep the policy simple and cheap. - Bat movement is scalar forward speed plus heading. The velocity vector is recomputed as `heading * speed` every tick. -- Brake clamps speed at zero. The bat cannot fly backward. +- Brake clamps speed at `bat_min_speed`. The bat cannot fly backward or hover. - Strafe/lateral velocity is intentionally unavailable. This avoids sideways spiral policies and makes the visual behavior match the game fantasy better than a full inertial top-down spacecraft model. @@ -324,7 +326,11 @@ Default reward model: - Sound-derived bug echo progress reward: - when a bug echo returns with a shorter acoustic path than the previous bug echo, add a small shaped reward, - - farther bug echoes update the previous bug echo path but do not reward, + - this reward only applies if the bat has moved at least + `bug_echo_min_displacement` since the previous scored bug echo, so a + stationary bat cannot farm reward from the bug moving closer by itself, + - farther bug echoes update the previous bug echo path and receive a weaker + penalty scaled by `bug_echo_farther_penalty_scale`, default `0.10`, - static wall and obstacle echoes do not receive this reward. - Optional silence bonus or energy budget should wait until the basic task trains. @@ -344,7 +350,10 @@ Progress reward: - `reward += chirp_efficiency_reward * chirp_efficiency` on catch - `reward += bug_echo_reward_scale * echo_path_reduction / max_echo_range` when a returning bug echo indicates the bug is closer than the previous bug - echo + echo and the bat has moved enough since that previous echo + - `reward -= bug_echo_reward_scale * bug_echo_farther_penalty_scale * + echo_path_increase / max_echo_range` when a later moved-enough bug echo is + farther away - Default starting values: - `progress_reward_scale = 0.05` - `step_cost = 0.001` @@ -372,27 +381,28 @@ Reset: - Bat and bug should not spawn overlapping obstacles or each other. - Initial bug distance should support curriculum. -Logged metrics: +W&B exported metrics: + +- Export at most 31 explicit `dict_set(out, ...)` metrics from `binding.c`. + PufferLib appends `n`, giving the 32-key cap. Keep lower-value diagnostics + internal unless they are actively needed for sweep decisions. - `perf` - composite sweep objective: - `base_perf * curriculum_difficulty * budget_difficulty * chirp_efficiency` + `base_perf * curriculum_difficulty * chirp_perf` - `base_perf` - pure catch rate: `1.0` for catching the bug, `0.0` otherwise - `curriculum_level` - `curriculum_difficulty` - - normalized actual episode difficulty from start bug distance and obstacle - count + - weighted normalized episode difficulty from split curriculum components - `curriculum_perf` - `base_perf * curriculum_difficulty`; useful diagnostic for level progress without chirp-budget weighting -- `budget_difficulty` - - sweep-pressure multiplier derived from selected `max_chirps_per_episode`; - `15` chirps maps to the floor `0.50`, budgets below `6` chirps map to - `1.0`, and `20` chirps is intentionally outside the default sweep because - it was too easy in the June 9, 2026 budget grid +- `curriculum_distance_difficulty` +- `curriculum_obstacle_difficulty` +- `curriculum_chirp_budget_difficulty` - `score` -- `episode_return` + - required by PufferLib train worker; do not remove from `binding.c` - `episode_length` - `success` - `collision` @@ -400,18 +410,21 @@ Logged metrics: - `bug_distance_start` - `bug_distance_final` - `bug_distance_delta` +- `num_obstacles` - `chirps_emitted` - `chirp_budget` - `chirps_used_ratio` -- `chirps_remaining_ratio` - `chirp_efficiency` - `0.5` if the full budget was spent, approaching `1.0` when few chirps were used +- `chirp_perf` + - sweep-objective chirp multiplier: + `clamp(1.0 - chirps_emitted / 15.0, 0.05, 1.0)` + - this uses a fixed 15-chirp reference instead of the current per-level + budget so 6-chirp and 8-chirp policies remain meaningfully separated - `chirp_overlap_fraction` - fraction of emitted chirps that were sent before the previous chirp's max return window cleared -- `far_chirp_fraction` -- `near_chirp_fraction` - `far_chirp_rate` - `near_chirp_rate` - `chirp_tempo_ratio` @@ -453,6 +466,7 @@ Config knobs: - `ear_separation_scale` - `bug_radius` - `bat_max_speed` +- `bat_min_speed` - `bat_accel` - `bat_turn_rate` - `bug_speed` @@ -469,6 +483,8 @@ Config knobs: - `chirp_cost` - `chirp_efficiency_reward` - `chirp_overlap_penalty` +- `bug_echo_farther_penalty_scale` +- `bug_echo_min_displacement` - `step_cost` - `progress_reward_scale` - `collision_penalty` @@ -543,16 +559,36 @@ Obstacle reflections: ## Training and Sweep Operations +- Curriculum design notes are tracked in `BAT_CURRICULUM.md`. Keep that file + updated when changing level progression, difficulty metrics, or bug motion + rungs. +- The next proposed curriculum cleanup is documented in + `BAT_CURRICULUM.md`: start level 0 with no obstacles, remove chirp-budget + pressure from curriculum difficulty, and use a simpler distance/obstacle + curriculum difficulty. Do this only after the current sweep is finished or + intentionally stopped, because it changes `perf` comparability. - Keep `base_perf` as pure catch rate. Use composite `perf` as the sweep - objective. It rewards catching harder curriculum levels with fewer chirps and - under stricter configured chirp budgets without changing in-episode reward - shaping. Current budget scoring treats `15` chirps as the easy floor and - gives full pressure below `6` chirps; do not include `20` chirps in the - default budget sweep. + objective. It rewards catching harder curriculum levels with fewer chirps + without changing in-episode reward shaping: + `perf = base_perf * curriculum_difficulty * chirp_perf`. +- `chirp_perf` uses a fixed 15-chirp reference: + `clamp(1.0 - chirps_emitted / 15.0, 0.05, 1.0)`. This intentionally gives + strong sweep-ranking separation between 10, 8, and 6 chirps. Do not multiply + `perf` by both `budget_difficulty` and `chirp_efficiency`; that made the + metric harder to reason about and double-counted chirp pressure. +- Keep `score` exported in `binding.c`. If the 31-metric cap is tight, drop + `episode_return` before dropping `score`; PufferLib reads `metrics["env/score"]` + when train workers finish. - Reward terms are training scaffolding and should remain sweepable. `progress_reward_scale` is true-distance shaping and should usually stay below `bug_echo_reward_scale`, which is based on closer received bug reflections. - Forward-only movement dynamics should be swept with bounded ranges: - `env.bat_max_speed` in `[8.0, 22.0]`, `env.bat_accel` in `[40.0, 90.0]`, - and `env.bat_turn_rate` in `[4.0, 3pi]`. + `env.bat_max_speed` in `[8.0, 22.0]`, `env.bat_min_speed` in `[2.0, 6.0]`, + `env.bat_accel` in `[40.0, 90.0]`, and `env.bat_turn_rate` in `[4.0, 3pi]`. +- Do not remove the minimum forward speed invariant. If the bat can hover at + zero velocity, PPO can learn a bad local optimum where it avoids collision + and timeout-shapes instead of exploring movement. +- Bug-echo progress shaping must be gated on bat displacement. Closer bug + echoes can reward, and farther bug echoes can weakly penalize, but neither + should pay out when the bat has not moved enough since the prior bug echo. - Acoustic scale terms should be swept before increasing model size. Current bounded acoustic sweep knobs are `env.sound_speed` in `[80.0, 180.0]` and `env.ear_separation_scale` in `[1.0, 3.0]`. - The June 9, 2026 `bat1` sweep strongly improved after the forward-only dynamics change. Best observed run was `sage-cherry-92` with `perf ~= 0.953`, diff --git a/config/bat.ini b/config/bat.ini index 898d1ce9f0..7ebfb10e64 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -3,7 +3,7 @@ env_name = bat [vec] total_agents = 4096 -num_buffers = 8 +num_buffers = 4 num_threads = 8 [policy] @@ -22,52 +22,55 @@ width = 64 height = 64 num_obstacles = 3 bat_radius = 2.0 -ear_separation_scale = 2.0 +ear_separation_scale = 2.194554 bug_radius = 1.5 -bat_max_speed = 11.9 -bat_accel = 60.0 -bat_turn_rate = 6.2831853 +bat_max_speed = 20.352376 +bat_min_speed = 2.0 +bat_accel = 40.0 +bat_turn_rate = 9.424778 bug_speed = 4.0 max_steps = 512 curriculum_enabled = 1 -curriculum_initial_level = 3 +curriculum_initial_level = 2 curriculum_start_obstacles = 1 curriculum_max_obstacles = 3 -curriculum_obstacle_step = 18 -curriculum_successes_per_level = 21 +curriculum_obstacle_step = 9 +curriculum_successes_per_level = 16 curriculum_start_bug_distance = 8.0 curriculum_max_bug_distance = 56.0 -curriculum_bug_distance_step = 4.0 +curriculum_bug_distance_step = 5.0 freq_bins_per_ear = 16 max_echo_range = 80.0 -sound_speed = 120.0 +sound_speed = 180.0 reflector_spacing = 8.0 max_chirp_age_ticks = 30 chirp_cooldown_ticks = 12 -max_chirps_per_episode = 10 -min_chirps_per_episode = 5 +max_chirps_per_episode = 15 +min_chirps_per_episode = 6 chirp_budget_decay_levels = 4 chirp_cost = 0.0 -chirp_efficiency_reward = 1.0 -valid_chirp_reward = 0.00005 -early_chirp_penalty = 0.0043 -chirp_overlap_penalty = 0.0040 -bug_echo_reward_scale = 0.13 -step_cost = 0.0002 -progress_reward_scale = 0.076 -collision_penalty = 1.0 +chirp_efficiency_reward = 2.0 +valid_chirp_reward = 0.0000106 +early_chirp_penalty = 0.006 +chirp_overlap_penalty = 0.008 +bug_echo_reward_scale = 0.235466 +bug_echo_farther_penalty_scale = 0.167897 +bug_echo_min_displacement = 1.0 +step_cost = 0.000193626 +progress_reward_scale = 0.12 +collision_penalty = 1.27806 [train] -total_timesteps = 50_000_000 -learning_rate = 0.027 -gamma = 0.994 -gae_lambda = 0.98 -replay_ratio = 1.25 +total_timesteps = 47_352_761 +learning_rate = 0.0193521 +gamma = 0.998791 +gae_lambda = 0.969562 +replay_ratio = 1.17746 clip_coef = 0.2 vf_coef = 2.0 vf_clip_coef = 0.2 max_grad_norm = 1.5 -ent_coef = 0.002 +ent_coef = 0.0005 beta1 = 0.95 beta2 = 0.999 eps = 1e-12 @@ -87,7 +90,7 @@ max_runs = 8 gpus = 1 downsample = 5 use_gpu = True -sweep_only = total_timesteps,learning_rate,gamma,gae_lambda,ent_coef,horizon,replay_ratio,num_buffers,bat_max_speed,bat_accel,bat_turn_rate,step_cost,sound_speed,ear_separation_scale,progress_reward_scale,chirp_efficiency_reward,valid_chirp_reward,early_chirp_penalty,chirp_overlap_penalty,bug_echo_reward_scale,collision_penalty,max_chirps_per_episode,min_chirps_per_episode,chirp_budget_decay_levels,curriculum_initial_level,curriculum_start_bug_distance,curriculum_bug_distance_step,curriculum_obstacle_step,curriculum_successes_per_level +sweep_only = total_timesteps,learning_rate,gamma,gae_lambda,ent_coef,horizon,replay_ratio,num_buffers,bat_max_speed,bat_min_speed,bat_accel,bat_turn_rate,step_cost,sound_speed,ear_separation_scale,progress_reward_scale,chirp_efficiency_reward,valid_chirp_reward,early_chirp_penalty,chirp_overlap_penalty,bug_echo_reward_scale,bug_echo_farther_penalty_scale,collision_penalty,max_chirps_per_episode,min_chirps_per_episode,chirp_budget_decay_levels,curriculum_initial_level,curriculum_start_bug_distance,curriculum_bug_distance_step,curriculum_obstacle_step,curriculum_successes_per_level match_enemy_model_path = {} match_num_games = {} match_enemy_hidden_size = {} @@ -147,6 +150,12 @@ min = 8.0 max = 22.0 scale = auto +[sweep.env.bat_min_speed] +distribution = uniform +min = 2.0 +max = 6.0 +scale = auto + [sweep.env.bat_accel] distribution = uniform min = 40.0 @@ -213,6 +222,12 @@ min = 0.02 max = 0.35 scale = auto +[sweep.env.bug_echo_farther_penalty_scale] +distribution = uniform +min = 0.05 +max = 0.20 +scale = auto + [sweep.env.collision_penalty] distribution = uniform min = 0.5 @@ -257,12 +272,12 @@ scale = auto [sweep.env.curriculum_obstacle_step] distribution = int_uniform -min = 8 -max = 28 +min = 4 +max = 14 scale = auto [sweep.env.curriculum_successes_per_level] distribution = int_uniform min = 4 -max = 24 +max = 16 scale = auto diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 0156c240b9..f557e8e89b 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -47,6 +47,8 @@ #define BAT_ECHO_QUEUE_TICKS 256 #define BAT_BUDGET_EASY_CHIRPS 15.0f #define BAT_BUDGET_EDGE_CHIRPS 5.0f +#define BAT_CHIRP_PERF_REFERENCE_CHIRPS 15.0f +#define BAT_CHIRP_PERF_FLOOR 0.05f #define BAT_ECHO_STATIC 0 #define BAT_ECHO_BUG 1 @@ -87,7 +89,12 @@ typedef struct Log { float curriculum_level; float curriculum_difficulty; float curriculum_perf; + float curriculum_distance_difficulty; + float curriculum_obstacle_difficulty; + float curriculum_chirp_budget_difficulty; + float curriculum_motion_difficulty; float budget_difficulty; + float num_obstacles; float bug_distance_start; float bug_distance_final; float bug_distance_delta; @@ -96,6 +103,7 @@ typedef struct Log { float chirps_used_ratio; float chirps_remaining_ratio; float chirp_efficiency; + float chirp_perf; float chirp_overlap_fraction; float far_chirp_fraction; float near_chirp_fraction; @@ -152,6 +160,7 @@ typedef struct Bat { float bat_radius; float ear_separation_scale; float bat_max_speed; + float bat_min_speed; float bat_accel; float bat_turn_rate; @@ -208,9 +217,13 @@ typedef struct Bat { float step_cost; float progress_reward_scale; float bug_echo_reward_scale; + float bug_echo_farther_penalty_scale; + float bug_echo_min_displacement; float tick_bug_echo_energy; float tick_bug_echo_path; float last_bug_echo_path; + float last_bug_echo_bat_x; + float last_bug_echo_bat_y; float collision_penalty; float prev_bug_dist; float start_bug_dist; @@ -394,18 +407,55 @@ static inline float bat_chirp_efficiency(Bat* env) { return 0.5f + 0.5f * (1.0f - bat_chirps_used_ratio(env)); } +static inline float bat_chirp_perf(Bat* env) { + float raw = 1.0f - env->chirps_emitted_episode / BAT_CHIRP_PERF_REFERENCE_CHIRPS; + return bat_clampf(raw, BAT_CHIRP_PERF_FLOOR, 1.0f); +} + +static inline float bat_min_forward_speed(Bat* env) { + float min_speed = env->bat_min_speed; + if (min_speed <= 0.0f) { + min_speed = 0.20f * env->bat_max_speed; + } + return bat_clampf(min_speed, 0.0f, env->bat_max_speed); +} + static inline float bat_norm_range(float value, float lo, float hi) { float span = hi - lo; if (span <= 0.000001f) return 0.0f; return bat_clampf((value - lo) / span, 0.0f, 1.0f); } -static inline float bat_curriculum_difficulty(Bat* env) { - float distance = bat_norm_range(env->start_bug_dist, +static inline float bat_curriculum_distance_difficulty(Bat* env) { + return bat_norm_range(env->start_bug_dist, env->curriculum_start_bug_distance, env->curriculum_max_bug_distance); - float obstacles = bat_norm_range((float)env->num_obstacles, +} + +static inline float bat_curriculum_obstacle_difficulty(Bat* env) { + return bat_norm_range((float)env->num_obstacles, (float)env->curriculum_start_obstacles, (float)env->curriculum_max_obstacles); - return (distance + obstacles) / 2.0f; +} + +static inline float bat_curriculum_chirp_budget_difficulty(Bat* env) { + float span = (float)(env->max_chirps_per_episode - env->min_chirps_per_episode); + if (span <= 0.000001f) return 0.0f; + float budget = env->chirp_budget > 0 ? (float)env->chirp_budget : (float)env->max_chirps_per_episode; + return bat_clampf(((float)env->max_chirps_per_episode - budget) / span, 0.0f, 1.0f); +} + +static inline float bat_curriculum_motion_difficulty(Bat* env) { + (void)env; + return 0.0f; +} + +static inline float bat_curriculum_difficulty(Bat* env) { + float distance = bat_curriculum_distance_difficulty(env); + float obstacles = bat_curriculum_obstacle_difficulty(env); + float budget = bat_curriculum_chirp_budget_difficulty(env); + float active_weight = 0.40f + 0.25f + 0.20f; + if (active_weight <= 0.000001f) return 0.0f; + float weighted = 0.40f * distance + 0.25f * obstacles + 0.20f * budget; + return bat_clampf(weighted / active_weight, 0.0f, 1.0f); } static inline float bat_budget_difficulty(Bat* env) { @@ -548,6 +598,8 @@ void init(Bat* env) { env->ear_separation_scale = bat_clampf(env->ear_separation_scale, 0.25f, 2.0f); if (env->bug_radius <= 0.0f) env->bug_radius = 1.5f; if (env->bat_max_speed <= 0.0f) env->bat_max_speed = 12.0f; + if (env->bat_min_speed <= 0.0f) env->bat_min_speed = 0.20f * env->bat_max_speed; + env->bat_min_speed = bat_min_forward_speed(env); if (env->bat_accel <= 0.0f) env->bat_accel = 30.0f; if (env->bat_turn_rate <= 0.0f) env->bat_turn_rate = BAT_PI; if (env->bug_speed <= 0.0f) env->bug_speed = 4.0f; @@ -572,6 +624,9 @@ void init(Bat* env) { if (env->early_chirp_penalty <= 0.0f) env->early_chirp_penalty = 0.001f; if (env->chirp_overlap_penalty < 0.0f) env->chirp_overlap_penalty = 0.0f; if (env->bug_echo_reward_scale <= 0.0f) env->bug_echo_reward_scale = 0.0f; + if (env->bug_echo_farther_penalty_scale <= 0.0f) env->bug_echo_farther_penalty_scale = 0.10f; + env->bug_echo_farther_penalty_scale = bat_clampf(env->bug_echo_farther_penalty_scale, 0.0f, 1.0f); + if (env->bug_echo_min_displacement <= 0.0f) env->bug_echo_min_displacement = 1.0f; if (env->rng == 0) env->rng = 1; if (env->num_obstacles < 0) env->num_obstacles = 0; @@ -622,9 +677,14 @@ void free_allocated(Bat* env) { static inline void add_log(Bat* env, float success, float collision, float timeout) { float final_dist = bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y); float curriculum_difficulty = bat_curriculum_difficulty(env); + float distance_difficulty = bat_curriculum_distance_difficulty(env); + float obstacle_difficulty = bat_curriculum_obstacle_difficulty(env); + float chirp_budget_difficulty = bat_curriculum_chirp_budget_difficulty(env); + float motion_difficulty = bat_curriculum_motion_difficulty(env); float budget_difficulty = bat_budget_difficulty(env); float chirp_efficiency = bat_chirp_efficiency(env); - env->log.perf += success * curriculum_difficulty * budget_difficulty * chirp_efficiency; + float chirp_perf = bat_chirp_perf(env); + env->log.perf += success * curriculum_difficulty * chirp_perf; env->log.base_perf += success; env->log.score += env->episode_return; env->log.episode_return += env->episode_return; @@ -635,7 +695,12 @@ static inline void add_log(Bat* env, float success, float collision, float timeo env->log.curriculum_level += env->curriculum_level; env->log.curriculum_difficulty += curriculum_difficulty; env->log.curriculum_perf += success * curriculum_difficulty; + env->log.curriculum_distance_difficulty += distance_difficulty; + env->log.curriculum_obstacle_difficulty += obstacle_difficulty; + env->log.curriculum_chirp_budget_difficulty += chirp_budget_difficulty; + env->log.curriculum_motion_difficulty += motion_difficulty; env->log.budget_difficulty += budget_difficulty; + env->log.num_obstacles += env->num_obstacles; env->log.bug_distance_start += env->start_bug_dist; env->log.bug_distance_final += final_dist; env->log.bug_distance_delta += env->start_bug_dist - final_dist; @@ -644,6 +709,7 @@ static inline void add_log(Bat* env, float success, float collision, float timeo env->log.chirps_used_ratio += bat_chirps_used_ratio(env); env->log.chirps_remaining_ratio += 1.0f - bat_chirps_used_ratio(env); env->log.chirp_efficiency += chirp_efficiency; + env->log.chirp_perf += chirp_perf; float chirps = fmaxf(1.0f, (float)env->chirps_emitted_episode); env->log.chirp_overlap_fraction += env->chirps_overlapped / chirps; env->log.far_chirp_fraction += env->chirps_far / chirps; @@ -890,16 +956,17 @@ void compute_observations(Bat* env) { env->observations[BAT_CHIRP_DURATION_OBS] = env->last_chirp_duration; env->observations[BAT_CHIRPS_USED_OBS] = bat_chirps_used_ratio(env); float fwd_speed = env->bat_vx * cosf(env->bat_heading) + env->bat_vy * sinf(env->bat_heading); - env->observations[BAT_FORWARD_SPEED_OBS] = bat_clampf(fwd_speed / env->bat_max_speed, -1.0f, 1.0f); + env->observations[BAT_FORWARD_SPEED_OBS] = bat_clampf(fwd_speed / env->bat_max_speed, 0.0f, 1.0f); env->observations[BAT_TURN_RATE_OBS] = bat_clampf(env->bat_turn_velocity / env->bat_turn_rate, -1.0f, 1.0f); } static inline void bat_reset_episode(Bat* env) { env->tick = 0; - env->bat_vx = 0.0f; - env->bat_vy = 0.0f; env->bat_turn_velocity = 0.0f; env->bat_heading = bat_randf(env) * 2.0f * BAT_PI - BAT_PI; + float initial_speed = bat_min_forward_speed(env); + env->bat_vx = cosf(env->bat_heading) * initial_speed; + env->bat_vy = sinf(env->bat_heading) * initial_speed; if (env->curriculum_enabled && env->curriculum_level < env->curriculum_initial_level) { env->curriculum_level = env->curriculum_initial_level; } @@ -942,6 +1009,8 @@ static inline void bat_reset_episode(Bat* env) { env->episode_return = 0.0f; env->start_bug_dist = bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y); env->prev_bug_dist = env->start_bug_dist; + env->last_bug_echo_bat_x = env->bat_x; + env->last_bug_echo_bat_y = env->bat_y; compute_observations(env); } @@ -995,19 +1064,22 @@ static inline void bat_update_motion(Bat* env, float dt) { float fx = cosf(env->bat_heading); float fy = sinf(env->bat_heading); float speed = env->bat_vx * fx + env->bat_vy * fy; - if (speed < 0.0f) speed = 0.0f; + float min_speed = bat_min_forward_speed(env); + if (speed < min_speed) speed = min_speed; - env->bat_turn_velocity = 0.0f; - if (turn == BAT_TURN_LEFT) env->bat_turn_velocity = -env->bat_turn_rate; - if (turn == BAT_TURN_RIGHT) env->bat_turn_velocity = env->bat_turn_rate; + if (move == BAT_THRUST_FORWARD) speed += env->bat_accel * dt; + if (move == BAT_BRAKE) speed -= env->bat_accel * dt; + speed = bat_clampf(speed, min_speed, env->bat_max_speed); + + float turn_command = 0.0f; + if (turn == BAT_TURN_LEFT) turn_command = -1.0f; + if (turn == BAT_TURN_RIGHT) turn_command = 1.0f; + float speed_ratio = env->bat_max_speed > 0.0f ? speed / env->bat_max_speed : 0.0f; + env->bat_turn_velocity = turn_command * env->bat_turn_rate * bat_clampf(speed_ratio, 0.0f, 1.0f); env->bat_heading += env->bat_turn_velocity * dt; if (env->bat_heading > BAT_PI) env->bat_heading -= 2.0f * BAT_PI; if (env->bat_heading < -BAT_PI) env->bat_heading += 2.0f * BAT_PI; - if (move == BAT_THRUST_FORWARD) speed += env->bat_accel * dt; - if (move == BAT_BRAKE) speed -= env->bat_accel * dt; - speed = bat_clampf(speed, 0.0f, env->bat_max_speed); - float heading_fx = cosf(env->bat_heading); float heading_fy = sinf(env->bat_heading); env->bat_vx = heading_fx * speed; @@ -1147,11 +1219,23 @@ void c_step(Bat* env) { compute_observations(env); if (env->tick_bug_echo_path > 0.0f) { - if (env->last_bug_echo_path > 0.0f && env->tick_bug_echo_path < env->last_bug_echo_path) { - float echo_progress = (env->last_bug_echo_path - env->tick_bug_echo_path) / fmaxf(1.0f, env->max_echo_range); - env->rewards[0] += env->bug_echo_reward_scale * echo_progress; + if (env->last_bug_echo_path > 0.0f) { + float bat_echo_displacement = bat_dist(env->last_bug_echo_bat_x, env->last_bug_echo_bat_y, + env->bat_x, env->bat_y); + if (bat_echo_displacement >= env->bug_echo_min_displacement) { + float echo_progress = (env->last_bug_echo_path - env->tick_bug_echo_path) + / fmaxf(1.0f, env->max_echo_range); + if (echo_progress > 0.0f) { + env->rewards[0] += env->bug_echo_reward_scale * echo_progress; + } else if (echo_progress < 0.0f) { + env->rewards[0] += env->bug_echo_reward_scale + * env->bug_echo_farther_penalty_scale * echo_progress; + } + } } env->last_bug_echo_path = env->tick_bug_echo_path; + env->last_bug_echo_bat_x = env->bat_x; + env->last_bug_echo_bat_y = env->bat_y; } env->episode_return += env->rewards[0]; } diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index 9fcdc3e73f..0cb1f4d123 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -17,6 +17,7 @@ void my_init(Env* env, Dict* kwargs) { env->ear_separation_scale = dict_get(kwargs, "ear_separation_scale")->value; env->bug_radius = dict_get(kwargs, "bug_radius")->value; env->bat_max_speed = dict_get(kwargs, "bat_max_speed")->value; + env->bat_min_speed = dict_get(kwargs, "bat_min_speed")->value; env->bat_accel = dict_get(kwargs, "bat_accel")->value; env->bat_turn_rate = dict_get(kwargs, "bat_turn_rate")->value; env->bug_speed = dict_get(kwargs, "bug_speed")->value; @@ -45,6 +46,8 @@ void my_init(Env* env, Dict* kwargs) { env->early_chirp_penalty = dict_get(kwargs, "early_chirp_penalty")->value; env->chirp_overlap_penalty = dict_get(kwargs, "chirp_overlap_penalty")->value; env->bug_echo_reward_scale = dict_get(kwargs, "bug_echo_reward_scale")->value; + env->bug_echo_farther_penalty_scale = dict_get(kwargs, "bug_echo_farther_penalty_scale")->value; + env->bug_echo_min_displacement = dict_get(kwargs, "bug_echo_min_displacement")->value; env->step_cost = dict_get(kwargs, "step_cost")->value; env->progress_reward_scale = dict_get(kwargs, "progress_reward_scale")->value; env->collision_penalty = dict_get(kwargs, "collision_penalty")->value; @@ -55,25 +58,25 @@ void my_log(Log* log, Dict* out) { dict_set(out, "perf", log->perf); dict_set(out, "base_perf", log->base_perf); dict_set(out, "score", log->score); - dict_set(out, "episode_return", log->episode_return); dict_set(out, "episode_length", log->episode_length); dict_set(out, "collision", log->collision); dict_set(out, "timeout", log->timeout); dict_set(out, "curriculum_level", log->curriculum_level); dict_set(out, "curriculum_difficulty", log->curriculum_difficulty); dict_set(out, "curriculum_perf", log->curriculum_perf); - dict_set(out, "budget_difficulty", log->budget_difficulty); + dict_set(out, "curriculum_distance_difficulty", log->curriculum_distance_difficulty); + dict_set(out, "curriculum_obstacle_difficulty", log->curriculum_obstacle_difficulty); + dict_set(out, "curriculum_chirp_budget_difficulty", log->curriculum_chirp_budget_difficulty); + dict_set(out, "num_obstacles", log->num_obstacles); dict_set(out, "bug_distance_start", log->bug_distance_start); dict_set(out, "bug_distance_final", log->bug_distance_final); dict_set(out, "bug_distance_delta", log->bug_distance_delta); dict_set(out, "chirps_emitted", log->chirps_emitted); dict_set(out, "chirp_budget", log->chirp_budget); dict_set(out, "chirps_used_ratio", log->chirps_used_ratio); - dict_set(out, "chirps_remaining_ratio", log->chirps_remaining_ratio); dict_set(out, "chirp_efficiency", log->chirp_efficiency); + dict_set(out, "chirp_perf", log->chirp_perf); dict_set(out, "chirp_overlap_fraction", log->chirp_overlap_fraction); - dict_set(out, "far_chirp_fraction", log->far_chirp_fraction); - dict_set(out, "near_chirp_fraction", log->near_chirp_fraction); dict_set(out, "far_chirp_rate", log->far_chirp_rate); dict_set(out, "near_chirp_rate", log->near_chirp_rate); dict_set(out, "chirp_tempo_ratio", log->chirp_tempo_ratio); diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index b5da1d738a..b13378feb7 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -153,6 +153,29 @@ static int test_chirp_efficiency_scores_low_usage_above_full_budget(void) { return 0; } +static int test_chirp_perf_uses_fixed_fifteen_chirp_reference(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.chirps_emitted_episode = 0; + ASSERT_FLOAT_NEAR(bat_chirp_perf(&env), 1.0f, 0.0001f); + + env.chirps_emitted_episode = 6; + ASSERT_FLOAT_NEAR(bat_chirp_perf(&env), 0.60f, 0.0001f); + + env.chirps_emitted_episode = 8; + ASSERT_FLOAT_NEAR(bat_chirp_perf(&env), 0.4666667f, 0.0001f); + + env.chirps_emitted_episode = 15; + ASSERT_FLOAT_NEAR(bat_chirp_perf(&env), 0.05f, 0.0001f); + + env.chirps_emitted_episode = 30; + ASSERT_FLOAT_NEAR(bat_chirp_perf(&env), 0.05f, 0.0001f); + + free_allocated(&env); + return 0; +} + static int test_success_reward_includes_chirp_efficiency_bonus(void) { Bat env = make_test_env(); env.chirp_efficiency_reward = 1.0f; @@ -191,7 +214,7 @@ static int test_chirp_budget_logs_ratios_for_wandb(void) { return 0; } -static int test_curriculum_perf_uses_success_and_actual_difficulty(void) { +static int test_curriculum_perf_logs_split_weighted_difficulty_components(void) { Bat env = make_test_env(); c_reset(&env); @@ -200,21 +223,30 @@ static int test_curriculum_perf_uses_success_and_actual_difficulty(void) { env.curriculum_start_obstacles = 1; env.curriculum_max_obstacles = 3; env.num_obstacles = 2; - env.max_chirps_per_episode = 10; - env.min_chirps_per_episode = 5; - env.chirp_budget = 8; + env.max_chirps_per_episode = 15; + env.min_chirps_per_episode = 6; + env.chirp_budget = 12; env.start_bug_dist = 32.0f; - ASSERT_FLOAT_NEAR(bat_curriculum_difficulty(&env), 0.5000000f, 0.0001f); + ASSERT_FLOAT_NEAR(bat_curriculum_distance_difficulty(&env), 0.5000000f, 0.0001f); + ASSERT_FLOAT_NEAR(bat_curriculum_obstacle_difficulty(&env), 0.5000000f, 0.0001f); + ASSERT_FLOAT_NEAR(bat_curriculum_chirp_budget_difficulty(&env), 0.3333333f, 0.0001f); + ASSERT_FLOAT_NEAR(bat_curriculum_motion_difficulty(&env), 0.0000000f, 0.0001f); + ASSERT_FLOAT_NEAR(bat_curriculum_difficulty(&env), 0.4607843f, 0.0001f); add_log(&env, 1.0f, 0.0f, 0.0f); ASSERT_FLOAT_NEAR(env.log.base_perf, 1.0f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.5000000f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.curriculum_perf, 0.5000000f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_distance_difficulty, 0.5000000f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_obstacle_difficulty, 0.5000000f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_chirp_budget_difficulty, 0.3333333f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_motion_difficulty, 0.0000000f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.4607843f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_perf, 0.4607843f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.num_obstacles, 2.0f, 0.0001f); memset(&env.log, 0, sizeof(env.log)); add_log(&env, 0.0f, 1.0f, 0.0f); ASSERT_FLOAT_NEAR(env.log.base_perf, 0.0f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.5000000f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.4607843f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.curriculum_perf, 0.0f, 0.0001f); free_allocated(&env); @@ -264,8 +296,9 @@ static int test_perf_composes_base_perf_difficulty_budget_and_chirp_efficiency(v ASSERT_FLOAT_NEAR(env.log.base_perf, 1.0f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.budget_difficulty, 0.55f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.chirp_efficiency, 0.75f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.50f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.perf, 0.20625f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.chirp_perf, 0.5333334f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.3823529f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.perf, 0.2039215f, 0.0001f); memset(&env.log, 0, sizeof(env.log)); add_log(&env, 0.0f, 1.0f, 0.0f); @@ -636,6 +669,49 @@ static int test_bat_cannot_accelerate_backward_from_brake(void) { return 0; } +static int test_bat_reset_starts_with_forward_stall_speed(void) { + Bat env = make_test_env(); + c_reset(&env); + + float forward = env.bat_vx * cosf(env.bat_heading) + env.bat_vy * sinf(env.bat_heading); + ASSERT_TRUE(forward >= 0.19f * env.bat_max_speed); + ASSERT_FLOAT_NEAR(env.observations[BAT_FORWARD_SPEED_OBS], forward / env.bat_max_speed, 0.0001f); + + free_allocated(&env); + return 0; +} + +static int test_bat_brake_clamps_to_forward_stall_speed(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.step_cost = 0.0f; + env.progress_reward_scale = 0.0f; + env.chirp_cost = 0.0f; + env.bat_x = 20.0f; + env.bat_y = 20.0f; + env.bug_x = 50.0f; + env.bug_y = 50.0f; + env.bat_heading = 0.0f; + env.bat_vx = 0.0f; + env.bat_vy = 0.0f; + env.actions[0] = BAT_BRAKE; + env.actions[1] = BAT_TURN_NONE; + env.actions[2] = 0.0f; + env.actions[3] = 7.0f; + env.actions[4] = 1.0f; + env.actions[5] = 0.0f; + + c_step(&env); + + float forward = env.bat_vx * cosf(env.bat_heading) + env.bat_vy * sinf(env.bat_heading); + ASSERT_TRUE(forward >= 0.19f * env.bat_max_speed); + ASSERT_TRUE(env.bat_x > 20.0f); + + free_allocated(&env); + return 0; +} + static int test_bat_velocity_is_locked_to_heading(void) { Bat env = make_test_env(); c_reset(&env); @@ -669,6 +745,70 @@ static int test_bat_velocity_is_locked_to_heading(void) { return 0; } +static int test_bat_zero_speed_recovers_to_forward_arc(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.step_cost = 0.0f; + env.progress_reward_scale = 0.0f; + env.chirp_cost = 0.0f; + env.bat_x = 20.0f; + env.bat_y = 20.0f; + env.bug_x = 50.0f; + env.bug_y = 50.0f; + env.bat_heading = 0.25f; + env.bat_vx = 0.0f; + env.bat_vy = 0.0f; + env.actions[0] = BAT_NOOP; + env.actions[1] = BAT_TURN_LEFT; + env.actions[2] = 0.0f; + env.actions[3] = 7.0f; + env.actions[4] = 1.0f; + env.actions[5] = 0.0f; + + float start_x = env.bat_x; + float start_y = env.bat_y; + c_step(&env); + + float forward = env.bat_vx * cosf(env.bat_heading) + env.bat_vy * sinf(env.bat_heading); + ASSERT_TRUE(forward >= 0.19f * env.bat_max_speed); + ASSERT_TRUE(bat_dist(start_x, start_y, env.bat_x, env.bat_y) > 0.0f); + ASSERT_TRUE(fabsf(env.bat_heading - 0.25f) > 0.0001f); + + free_allocated(&env); + return 0; +} + +static int test_bat_turn_rate_scales_with_forward_speed(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.step_cost = 0.0f; + env.progress_reward_scale = 0.0f; + env.chirp_cost = 0.0f; + env.bat_x = 20.0f; + env.bat_y = 20.0f; + env.bug_x = 50.0f; + env.bug_y = 50.0f; + env.bat_heading = 0.0f; + env.bat_vx = env.bat_max_speed * 0.5f; + env.bat_vy = 0.0f; + env.actions[0] = BAT_NOOP; + env.actions[1] = BAT_TURN_RIGHT; + env.actions[2] = 0.0f; + env.actions[3] = 7.0f; + env.actions[4] = 1.0f; + env.actions[5] = 0.0f; + + c_step(&env); + + ASSERT_FLOAT_NEAR(env.bat_turn_velocity, env.bat_turn_rate * 0.5f, 0.0001f); + ASSERT_FLOAT_NEAR(env.bat_heading, env.bat_turn_rate * 0.5f * BAT_TICK_RATE, 0.0001f); + + free_allocated(&env); + return 0; +} + static int test_bat_speed_action_space_has_no_strafe(void) { ASSERT_TRUE(BAT_MOVE_ACTIONS == 3); ASSERT_TRUE(BAT_NOOP == 0); @@ -1129,6 +1269,8 @@ static int test_bug_echo_reward_is_added_when_bug_echo_is_closer(void) { c_reset(&env); env.bug_echo_reward_scale = 0.05f; env.last_bug_echo_path = 20.0f; + env.last_bug_echo_bat_x = 8.0f; + env.last_bug_echo_bat_y = 10.0f; env.step_cost = 0.0f; env.progress_reward_scale = 0.0f; env.chirp_cost = 0.0f; @@ -1152,16 +1294,19 @@ static int test_bug_echo_reward_is_added_when_bug_echo_is_closer(void) { return 0; } -static int test_bug_echo_reward_ignores_farther_bug_echo(void) { +static int test_bug_echo_reward_requires_bat_displacement(void) { Bat env = make_test_env(); c_reset(&env); env.bug_echo_reward_scale = 0.05f; env.last_bug_echo_path = 20.0f; + env.last_bug_echo_bat_x = 10.0f; + env.last_bug_echo_bat_y = 10.0f; env.step_cost = 0.0f; env.progress_reward_scale = 0.0f; env.chirp_cost = 0.0f; env.bat_x = 10.0f; env.bat_y = 10.0f; + env.bat_heading = 0.0f; env.bat_vx = 0.0f; env.bat_vy = 0.0f; env.bug_vx = 0.0f; @@ -1169,11 +1314,41 @@ static int test_bug_echo_reward_ignores_farther_bug_echo(void) { env.bug_x = 50.0f; env.bug_y = 50.0f; bat_clear_echo_queue(&env); - bat_add_echo_event(&env, 0, 1.0f, 0.5f, 0.6f, 25.0f, BAT_ECHO_BUG); + bat_add_echo_event(&env, 0, 1.0f, 0.5f, 0.6f, 15.0f, BAT_ECHO_BUG); c_step(&env); ASSERT_FLOAT_NEAR(env.rewards[0], 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.last_bug_echo_path, 15.0f, 0.0001f); + + free_allocated(&env); + return 0; +} + +static int test_bug_echo_reward_penalizes_farther_bug_echo_weakly(void) { + Bat env = make_test_env(); + c_reset(&env); + env.bug_echo_reward_scale = 0.05f; + env.last_bug_echo_path = 20.0f; + env.last_bug_echo_bat_x = 8.0f; + env.last_bug_echo_bat_y = 10.0f; + env.step_cost = 0.0f; + env.progress_reward_scale = 0.0f; + env.chirp_cost = 0.0f; + env.bat_x = 10.0f; + env.bat_y = 10.0f; + env.bat_vx = 0.0f; + env.bat_vy = 0.0f; + env.bug_vx = 0.0f; + env.bug_vy = 0.0f; + env.bug_x = 50.0f; + env.bug_y = 50.0f; + bat_clear_echo_queue(&env); + bat_add_echo_event(&env, 0, 1.0f, 0.5f, 0.6f, 25.0f, BAT_ECHO_BUG); + + c_step(&env); + + ASSERT_FLOAT_NEAR(env.rewards[0], -0.0003125f, 0.0001f); ASSERT_FLOAT_NEAR(env.last_bug_echo_path, 25.0f, 0.0001f); free_allocated(&env); @@ -1304,9 +1479,10 @@ int main(void) { if (test_chirp_budget_decreases_with_curriculum_level()) return 1; if (test_chirping_after_budget_terminates_with_penalty()) return 1; if (test_chirp_efficiency_scores_low_usage_above_full_budget()) return 1; + if (test_chirp_perf_uses_fixed_fifteen_chirp_reference()) return 1; if (test_success_reward_includes_chirp_efficiency_bonus()) return 1; if (test_chirp_budget_logs_ratios_for_wandb()) return 1; - if (test_curriculum_perf_uses_success_and_actual_difficulty()) return 1; + if (test_curriculum_perf_logs_split_weighted_difficulty_components()) return 1; if (test_budget_difficulty_uses_hard_edge_below_six_chirps()) return 1; if (test_perf_composes_base_perf_difficulty_budget_and_chirp_efficiency()) return 1; if (test_chirp_tempo_logs_far_and_near_rates()) return 1; @@ -1319,7 +1495,11 @@ int main(void) { if (test_catch_bug_is_terminal_plus_one()) return 1; if (test_progress_reward_sign()) return 1; if (test_bat_cannot_accelerate_backward_from_brake()) return 1; + if (test_bat_reset_starts_with_forward_stall_speed()) return 1; + if (test_bat_brake_clamps_to_forward_stall_speed()) return 1; if (test_bat_velocity_is_locked_to_heading()) return 1; + if (test_bat_zero_speed_recovers_to_forward_arc()) return 1; + if (test_bat_turn_rate_scales_with_forward_speed()) return 1; if (test_bat_speed_action_space_has_no_strafe()) return 1; if (test_chirp_ring_physical_ordering()) return 1; if (test_chirp_color_maps_low_to_red_high_to_blue()) return 1; @@ -1340,7 +1520,8 @@ int main(void) { if (test_chirp_echo_arrives_after_two_way_travel_not_immediately()) return 1; if (test_frequency_bin_energy_sums_and_caps()) return 1; if (test_bug_echo_reward_is_added_when_bug_echo_is_closer()) return 1; - if (test_bug_echo_reward_ignores_farther_bug_echo()) return 1; + if (test_bug_echo_reward_requires_bat_displacement()) return 1; + if (test_bug_echo_reward_penalizes_farther_bug_echo_weakly()) return 1; if (test_static_echo_does_not_get_bug_echo_reward()) return 1; if (test_spawns_use_different_random_quadrants()) return 1; if (test_spawns_keep_minimum_separation_and_avoid_obstacles()) return 1; From e7454e13a0b15ce56e30cf62337cb0fc8e85f6bf Mon Sep 17 00:00:00 2001 From: Kinvert Date: Tue, 9 Jun 2026 13:51:23 -0700 Subject: [PATCH 14/51] simplify bat chirp budget curriculum --- BAT_CURRICULUM.md | 62 +++++++++++++++---------------- BAT_SPEC.md | 21 ++++++----- config/bat.ini | 23 ++---------- ocean/bat/bat.h | 34 +++++++++-------- ocean/bat/binding.c | 2 - ocean/bat/tests/test_bat_core.c | 65 +++++++++++++++++++++++---------- 6 files changed, 109 insertions(+), 98 deletions(-) diff --git a/BAT_CURRICULUM.md b/BAT_CURRICULUM.md index 4e54f2b48c..daca317748 100644 --- a/BAT_CURRICULUM.md +++ b/BAT_CURRICULUM.md @@ -40,20 +40,19 @@ impossible wall. It means the metric gives half of its difficulty credit to obstacles, but obstacle difficulty stays zero until level `18`. The current code logs split difficulty components and computes -`curriculum_difficulty` from active weighted components: +`curriculum_difficulty` from active distance and obstacle components: ```text distance_norm = normalize(start_bug_dist) obstacle_norm = normalize(num_obstacles) -budget_norm = normalize(chirp budget reduction) motion_norm = 0 until bug maneuvers are added curriculum_difficulty = - (0.40 * distance_norm + - 0.25 * obstacle_norm + - 0.20 * budget_norm) / 0.85 + (0.50 * distance_norm + + 0.50 * obstacle_norm) / active_weight ``` +Chirp-budget pressure is intentionally excluded from curriculum difficulty. Motion difficulty is logged separately as `0`, but it does not lower the metric ceiling before maneuver curricula exist. @@ -85,7 +84,7 @@ curriculum_perf curriculum_level curriculum_distance_difficulty curriculum_obstacle_difficulty -curriculum_chirp_budget_difficulty +curriculum_chirp_budget_difficulty (legacy fixed zero) curriculum_difficulty bug_motion_mode bug_motion_speed @@ -108,7 +107,7 @@ Do not remove `score` from `binding.c`; PufferLib's train worker reads The key change is splitting `curriculum_difficulty` into components. If `curriculum_perf` is low, we should be able to tell whether the policy is stuck -on distance, obstacles, chirp budget, or motion. +on distance, obstacles, or later motion. ## Recommended Difficulty Formula @@ -118,15 +117,13 @@ over active components: ```text distance_norm = normalize(start_bug_dist) obstacle_norm = normalize(num_obstacles) -budget_norm = normalize(chirp budget pressure) motion_norm = normalize(bug maneuver difficulty) curriculum_difficulty = - 0.40 * distance_norm + - 0.25 * obstacle_norm + - 0.20 * budget_norm + 0.50 * distance_norm + + 0.50 * obstacle_norm -curriculum_difficulty /= 0.85 +curriculum_difficulty /= active_weight curriculum_perf = base_perf * curriculum_difficulty ``` @@ -147,7 +144,7 @@ Purpose: Task: -- One obstacle. +- No obstacles at level 0. - Moderate starting bug distance. - Current forward-only bat dynamics. - Configurable minimum forward speed; brake cannot stop the bat below this @@ -193,12 +190,13 @@ Purpose: Recommendation: - Reduce default `curriculum_obstacle_step`. -- A practical next default is `6`, giving: +- The current default is `4`, with only level 0 obstacle-free: ```text -level 0..5: 1 obstacle -level 6..11: 2 obstacles -level 12+: 3 obstacles +level 0: 0 obstacles +level 1..4: 1 obstacle +level 5..8: 2 obstacles +level 9+: 3 obstacles ``` Alternative: @@ -208,8 +206,8 @@ Alternative: Gate: -- Do not increase obstacles and reduce chirp budget on the same level unless - the previous rung is clearly solved. +- Do not reduce chirp budget as obstacle count rises; clutter legitimately + requires reacquisition chirps. ### Stage 3: Chirp budget curriculum @@ -221,7 +219,7 @@ Current behavior: - Observation includes `chirps_used / chirp_budget`. - Chirping after the last chirp causes `-1` terminal. -- Budget reduces as curriculum level increases. +- Budget is fixed across curriculum levels. - Valid chirps before the previous max echo window clears get a physical overlap penalty. @@ -378,9 +376,9 @@ The goal is a ladder where each rung is visibly harder, metrics explain why, and the bat must improve sensing behavior without reward terms that directly script the desired chirp timing. -## Proposed Cleanup After Current Sweep +## Current Curriculum Cleanup -Do not mix chirp-budget pressure into curriculum difficulty. +Chirp-budget pressure is no longer mixed into curriculum difficulty. Rationale: @@ -390,7 +388,7 @@ Rationale: - A shrinking chirp budget can make later curriculum levels impossible before we know whether the policy has learned robust obstacle disambiguation. -Proposed next curriculum split: +Current curriculum split: ```text level 0: @@ -399,12 +397,12 @@ level 0: later levels: increase bug start distance - then introduce obstacles - then increase obstacle count/clutter + introduce the first obstacle immediately at level 1 + increase obstacle count/clutter every few levels then add maneuvering bug motion ``` -Proposed curriculum difficulty: +Current curriculum difficulty: ```text curriculum_difficulty = @@ -417,7 +415,7 @@ instead of letting inactive components cap the score. Once obstacle curriculum is active, the two-component `0.5 / 0.5` interpretation is easy to explain: half distance, half clutter. -Proposed chirp handling: +Current chirp handling: ```text max_chirps_per_episode = 15 @@ -438,9 +436,9 @@ fewer chirps everywhere: diagnostic/objective term, but interpret it together with obstacle difficulty and not as an absolute "fewer chirps is always better" rule. -This cleanup should be done in a clean commit after the current sweep is either -finished or intentionally stopped, because it changes how `perf` compares to -existing Bat sweep runs. +This cleanup changes how `perf` compares to older Bat sweep runs. Compare old +and new runs through component logs (`base_perf`, `curriculum_perf`, +`chirp_perf`) when needed. ## Bat3 Partial Sweep Notes @@ -500,8 +498,8 @@ Implication: - Be careful with any metric or reward that simply minimizes chirp count. At harder distances or with obstacles, useful policies may need more search and reacquisition chirps. -- This supports the proposed cleanup: keep a fixed chirp budget for now and - remove chirp-budget reduction from curriculum difficulty before adding harder +- This supports the current cleanup: keep a fixed chirp budget for now and keep + chirp pressure separate from curriculum difficulty before adding harder motion or more clutter. ## Reward-Shaping Guardrails diff --git a/BAT_SPEC.md b/BAT_SPEC.md index 9e43528eac..1cfa3728e5 100644 --- a/BAT_SPEC.md +++ b/BAT_SPEC.md @@ -401,6 +401,8 @@ W&B exported metrics: - `curriculum_distance_difficulty` - `curriculum_obstacle_difficulty` - `curriculum_chirp_budget_difficulty` + - legacy diagnostic; fixed at `0.0` because chirp budget no longer decays + with curriculum - `score` - required by PufferLib train worker; do not remove from `binding.c` - `episode_length` @@ -476,8 +478,6 @@ Config knobs: - `sound_speed` - `reflector_spacing` - `max_chirps_per_episode` -- `min_chirps_per_episode` -- `chirp_budget_decay_levels` - `chirp_freq_bins` - `chirp_duration_bins` - `chirp_cost` @@ -562,11 +562,9 @@ Obstacle reflections: - Curriculum design notes are tracked in `BAT_CURRICULUM.md`. Keep that file updated when changing level progression, difficulty metrics, or bug motion rungs. -- The next proposed curriculum cleanup is documented in - `BAT_CURRICULUM.md`: start level 0 with no obstacles, remove chirp-budget - pressure from curriculum difficulty, and use a simpler distance/obstacle - curriculum difficulty. Do this only after the current sweep is finished or - intentionally stopped, because it changes `perf` comparability. +- Current curriculum cleanup is documented in `BAT_CURRICULUM.md`: level 0 + starts with no obstacles, chirp-budget pressure is separate from curriculum + difficulty, and curriculum difficulty uses distance/obstacles only. - Keep `base_perf` as pure catch rate. Use composite `perf` as the sweep objective. It rewards catching harder curriculum levels with fewer chirps without changing in-episode reward shaping: @@ -603,7 +601,9 @@ Obstacle reflections: - Train workers should use CUDA with `--train.gpus 1`. - Protein/sweep control does not need CUDA. Run sweeps with `--sweep.use-gpu ""` so the optimizer stays off CUDA and avoids CUDA IPC/resource-handle failures. - Do not override training duration with ad hoc `--train.total-timesteps`. Put duration ranges in `config/bat.ini`. -- Keep Bat sweep ranges bounded so a sweep cannot accidentally launch huge slow models. Bat config uses stock `sweep_only` as a safety filter because PufferLib's default sweep config includes unsafe inherited ranges such as `train.total_timesteps` up to `1e11`, `policy.hidden_size` up to `1024`, `policy.num_layers` up to `8`, and `train.horizon` up to `1024`. +- Keep Bat sweep ranges bounded so a sweep cannot accidentally launch huge slow models. +- Do not use `sweep_only` in Bat config. Keep the config clean and bound the + actual sweep sections/defaults instead. - The default Bat sweep does not sweep policy model size; it keeps `policy.hidden_size = 128` and `policy.num_layers = 4`. Current cost-sensitive sweep bounds cap training duration at `50_000_000`, rollout horizon at `128`, replay ratio at `1.25`, and `vec.num_buffers` at `8`. - Do not add broad model-size sweep ranges. If model size must be swept later, require explicit human approval and keep a hard ceiling of `policy.hidden_size <= 256` and `policy.num_layers <= 4` unless there is a measured SPS reason to widen it. - Keep PufferLib core stock for Bat. If sweep parsing conflicts with inherited default sweep keys, solve it through Bat config or command-line args, not core edits. @@ -635,8 +635,9 @@ train/eval after each rung, and commit each known-good rung separately. 2. Finite chirp budget. - Keep the low-curriculum budget below the old `20`-chirp setting; `20` proved too easy and should not be part of the default sweep. - - Reduce the budget as curriculum level increases, with a floor so harder - levels require smarter chirp timing without creating an impossible cliff. + - Keep the chirp budget fixed across curriculum levels. Harder levels and + clutter legitimately need reacquisition chirps, so budget decay made + later levels fail for the wrong reason. - Track `chirps_used / chirp_budget` as a normalized `0..1` observation. - When the budget is exhausted, terminate with a `-1` style failure penalty if the policy attempts another chirp. Do not terminate immediately after diff --git a/config/bat.ini b/config/bat.ini index 7ebfb10e64..1c37c51e30 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -32,9 +32,9 @@ bug_speed = 4.0 max_steps = 512 curriculum_enabled = 1 curriculum_initial_level = 2 -curriculum_start_obstacles = 1 +curriculum_start_obstacles = 0 curriculum_max_obstacles = 3 -curriculum_obstacle_step = 9 +curriculum_obstacle_step = 4 curriculum_successes_per_level = 16 curriculum_start_bug_distance = 8.0 curriculum_max_bug_distance = 56.0 @@ -46,8 +46,6 @@ reflector_spacing = 8.0 max_chirp_age_ticks = 30 chirp_cooldown_ticks = 12 max_chirps_per_episode = 15 -min_chirps_per_episode = 6 -chirp_budget_decay_levels = 4 chirp_cost = 0.0 chirp_efficiency_reward = 2.0 valid_chirp_reward = 0.0000106 @@ -90,7 +88,6 @@ max_runs = 8 gpus = 1 downsample = 5 use_gpu = True -sweep_only = total_timesteps,learning_rate,gamma,gae_lambda,ent_coef,horizon,replay_ratio,num_buffers,bat_max_speed,bat_min_speed,bat_accel,bat_turn_rate,step_cost,sound_speed,ear_separation_scale,progress_reward_scale,chirp_efficiency_reward,valid_chirp_reward,early_chirp_penalty,chirp_overlap_penalty,bug_echo_reward_scale,bug_echo_farther_penalty_scale,collision_penalty,max_chirps_per_episode,min_chirps_per_episode,chirp_budget_decay_levels,curriculum_initial_level,curriculum_start_bug_distance,curriculum_bug_distance_step,curriculum_obstacle_step,curriculum_successes_per_level match_enemy_model_path = {} match_num_games = {} match_enemy_hidden_size = {} @@ -240,18 +237,6 @@ min = 4 max = 15 scale = auto -[sweep.env.min_chirps_per_episode] -distribution = int_uniform -min = 3 -max = 6 -scale = auto - -[sweep.env.chirp_budget_decay_levels] -distribution = int_uniform -min = 3 -max = 8 -scale = auto - [sweep.env.curriculum_initial_level] distribution = int_uniform min = 0 @@ -272,8 +257,8 @@ scale = auto [sweep.env.curriculum_obstacle_step] distribution = int_uniform -min = 4 -max = 14 +min = 3 +max = 8 scale = auto [sweep.env.curriculum_successes_per_level] diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index f557e8e89b..236dd7d109 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -373,7 +373,10 @@ static inline void bat_sample_spawns(Bat* env) { static inline int bat_curriculum_obstacles(Bat* env) { if (!env->curriculum_enabled) return env->num_obstacles; int step = env->curriculum_obstacle_step <= 0 ? 1 : env->curriculum_obstacle_step; - int count = env->curriculum_start_obstacles + env->curriculum_level / step; + int count = env->curriculum_start_obstacles; + if (env->curriculum_level > 0) { + count = env->curriculum_start_obstacles + 1 + (env->curriculum_level - 1) / step; + } if (count < 0) count = 0; if (count > env->curriculum_max_obstacles) count = env->curriculum_max_obstacles; if (count > BAT_MAX_OBSTACLES) count = BAT_MAX_OBSTACLES; @@ -388,13 +391,7 @@ static inline float bat_curriculum_bug_distance(Bat* env) { } static inline int bat_curriculum_chirp_budget(Bat* env) { - int decay = env->chirp_budget_decay_levels <= 0 ? 1 : env->chirp_budget_decay_levels; - int level = env->curriculum_enabled ? env->curriculum_level : 0; - int budget = env->max_chirps_per_episode - level / decay; - if (budget < env->min_chirps_per_episode) budget = env->min_chirps_per_episode; - if (budget > env->max_chirps_per_episode) budget = env->max_chirps_per_episode; - if (budget < 1) budget = 1; - return budget; + return env->max_chirps_per_episode > 0 ? env->max_chirps_per_episode : 1; } static inline float bat_chirps_used_ratio(Bat* env) { @@ -437,10 +434,8 @@ static inline float bat_curriculum_obstacle_difficulty(Bat* env) { } static inline float bat_curriculum_chirp_budget_difficulty(Bat* env) { - float span = (float)(env->max_chirps_per_episode - env->min_chirps_per_episode); - if (span <= 0.000001f) return 0.0f; - float budget = env->chirp_budget > 0 ? (float)env->chirp_budget : (float)env->max_chirps_per_episode; - return bat_clampf(((float)env->max_chirps_per_episode - budget) / span, 0.0f, 1.0f); + (void)env; + return 0.0f; } static inline float bat_curriculum_motion_difficulty(Bat* env) { @@ -451,10 +446,17 @@ static inline float bat_curriculum_motion_difficulty(Bat* env) { static inline float bat_curriculum_difficulty(Bat* env) { float distance = bat_curriculum_distance_difficulty(env); float obstacles = bat_curriculum_obstacle_difficulty(env); - float budget = bat_curriculum_chirp_budget_difficulty(env); - float active_weight = 0.40f + 0.25f + 0.20f; + float active_weight = 0.0f; + float weighted = 0.0f; + if (env->curriculum_max_bug_distance > env->curriculum_start_bug_distance) { + weighted += 0.5f * distance; + active_weight += 0.5f; + } + if (env->curriculum_max_obstacles > env->curriculum_start_obstacles) { + weighted += 0.5f * obstacles; + active_weight += 0.5f; + } if (active_weight <= 0.000001f) return 0.0f; - float weighted = 0.40f * distance + 0.25f * obstacles + 0.20f * budget; return bat_clampf(weighted / active_weight, 0.0f, 1.0f); } @@ -631,7 +633,7 @@ void init(Bat* env) { if (env->num_obstacles < 0) env->num_obstacles = 0; if (env->num_obstacles > BAT_MAX_OBSTACLES) env->num_obstacles = BAT_MAX_OBSTACLES; - if (env->curriculum_start_obstacles <= 0) env->curriculum_start_obstacles = 1; + if (env->curriculum_start_obstacles < 0) env->curriculum_start_obstacles = 0; if (env->curriculum_max_obstacles <= 0) env->curriculum_max_obstacles = env->num_obstacles; if (env->curriculum_max_obstacles > BAT_MAX_OBSTACLES) env->curriculum_max_obstacles = BAT_MAX_OBSTACLES; if (env->curriculum_start_obstacles > env->curriculum_max_obstacles) { diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index 0cb1f4d123..97277a7edd 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -38,8 +38,6 @@ void my_init(Env* env, Dict* kwargs) { env->max_chirp_age_ticks = dict_get(kwargs, "max_chirp_age_ticks")->value; env->chirp_cooldown_ticks = dict_get(kwargs, "chirp_cooldown_ticks")->value; env->max_chirps_per_episode = dict_get(kwargs, "max_chirps_per_episode")->value; - env->min_chirps_per_episode = dict_get(kwargs, "min_chirps_per_episode")->value; - env->chirp_budget_decay_levels = dict_get(kwargs, "chirp_budget_decay_levels")->value; env->chirp_cost = dict_get(kwargs, "chirp_cost")->value; env->chirp_efficiency_reward = dict_get(kwargs, "chirp_efficiency_reward")->value; env->valid_chirp_reward = dict_get(kwargs, "valid_chirp_reward")->value; diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index b13378feb7..a2efa8b101 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -94,7 +94,7 @@ static int test_chirp_budget_observation_tracks_used_chirps(void) { return 0; } -static int test_chirp_budget_decreases_with_curriculum_level(void) { +static int test_chirp_budget_stays_fixed_with_curriculum_level(void) { Bat env = make_test_env(); env.curriculum_enabled = 1; env.curriculum_initial_level = 8; @@ -104,7 +104,7 @@ static int test_chirp_budget_decreases_with_curriculum_level(void) { c_reset(&env); ASSERT_TRUE(env.curriculum_level == 8); - ASSERT_TRUE(env.chirp_budget == 18); + ASSERT_TRUE(env.chirp_budget == 20); free_allocated(&env); return 0; @@ -214,7 +214,7 @@ static int test_chirp_budget_logs_ratios_for_wandb(void) { return 0; } -static int test_curriculum_perf_logs_split_weighted_difficulty_components(void) { +static int test_curriculum_perf_logs_distance_and_obstacle_difficulty_components(void) { Bat env = make_test_env(); c_reset(&env); @@ -230,23 +230,23 @@ static int test_curriculum_perf_logs_split_weighted_difficulty_components(void) ASSERT_FLOAT_NEAR(bat_curriculum_distance_difficulty(&env), 0.5000000f, 0.0001f); ASSERT_FLOAT_NEAR(bat_curriculum_obstacle_difficulty(&env), 0.5000000f, 0.0001f); - ASSERT_FLOAT_NEAR(bat_curriculum_chirp_budget_difficulty(&env), 0.3333333f, 0.0001f); + ASSERT_FLOAT_NEAR(bat_curriculum_chirp_budget_difficulty(&env), 0.0000000f, 0.0001f); ASSERT_FLOAT_NEAR(bat_curriculum_motion_difficulty(&env), 0.0000000f, 0.0001f); - ASSERT_FLOAT_NEAR(bat_curriculum_difficulty(&env), 0.4607843f, 0.0001f); + ASSERT_FLOAT_NEAR(bat_curriculum_difficulty(&env), 0.5000000f, 0.0001f); add_log(&env, 1.0f, 0.0f, 0.0f); ASSERT_FLOAT_NEAR(env.log.base_perf, 1.0f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.curriculum_distance_difficulty, 0.5000000f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.curriculum_obstacle_difficulty, 0.5000000f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.curriculum_chirp_budget_difficulty, 0.3333333f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_chirp_budget_difficulty, 0.0000000f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.curriculum_motion_difficulty, 0.0000000f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.4607843f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.curriculum_perf, 0.4607843f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.5000000f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_perf, 0.5000000f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.num_obstacles, 2.0f, 0.0001f); memset(&env.log, 0, sizeof(env.log)); add_log(&env, 0.0f, 1.0f, 0.0f); ASSERT_FLOAT_NEAR(env.log.base_perf, 0.0f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.4607843f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.5000000f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.curriculum_perf, 0.0f, 0.0001f); free_allocated(&env); @@ -276,7 +276,7 @@ static int test_budget_difficulty_uses_hard_edge_below_six_chirps(void) { return 0; } -static int test_perf_composes_base_perf_difficulty_budget_and_chirp_efficiency(void) { +static int test_perf_composes_base_perf_curriculum_difficulty_and_chirp_perf(void) { Bat env = make_test_env(); c_reset(&env); @@ -297,8 +297,8 @@ static int test_perf_composes_base_perf_difficulty_budget_and_chirp_efficiency(v ASSERT_FLOAT_NEAR(env.log.budget_difficulty, 0.55f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.chirp_efficiency, 0.75f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.chirp_perf, 0.5333334f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.3823529f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.perf, 0.2039215f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.5000000f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.perf, 0.2666667f, 0.0001f); memset(&env.log, 0, sizeof(env.log)); add_log(&env, 0.0f, 1.0f, 0.0f); @@ -1045,11 +1045,11 @@ static int test_observations_stay_normalized_after_chirp(void) { return 0; } -static int test_curriculum_starts_close_with_one_obstacle(void) { +static int test_curriculum_level_zero_starts_close_with_no_obstacles(void) { Bat env = make_test_env(); env.num_obstacles = 3; env.curriculum_enabled = 1; - env.curriculum_start_obstacles = 1; + env.curriculum_start_obstacles = 0; env.curriculum_max_obstacles = 3; env.curriculum_obstacle_step = 1; env.curriculum_start_bug_distance = 12.0f; @@ -1057,13 +1057,39 @@ static int test_curriculum_starts_close_with_one_obstacle(void) { env.curriculum_bug_distance_step = 6.0f; c_reset(&env); - ASSERT_TRUE(env.num_obstacles == 1); + ASSERT_TRUE(env.num_obstacles == 0); ASSERT_TRUE(bat_dist(env.bat_x, env.bat_y, env.bug_x, env.bug_y) <= 14.0f); free_allocated(&env); return 0; } +static int test_curriculum_adds_first_obstacle_after_level_zero(void) { + Bat env = make_test_env(); + env.num_obstacles = 3; + env.curriculum_enabled = 1; + env.curriculum_start_obstacles = 0; + env.curriculum_max_obstacles = 3; + env.curriculum_obstacle_step = 4; + + env.curriculum_initial_level = 1; + c_reset(&env); + ASSERT_TRUE(env.num_obstacles == 1); + + env.curriculum_initial_level = 5; + env.curriculum_level = 0; + c_reset(&env); + ASSERT_TRUE(env.num_obstacles == 2); + + env.curriculum_initial_level = 9; + env.curriculum_level = 0; + c_reset(&env); + ASSERT_TRUE(env.num_obstacles == 3); + + free_allocated(&env); + return 0; +} + static int test_curriculum_advances_after_catch(void) { Bat env = make_test_env(); env.num_obstacles = 3; @@ -1476,15 +1502,15 @@ static int test_obstacles_are_small_enough_for_trainability(void) { int main(void) { if (test_chirp_metadata_and_observation_size()) return 1; if (test_chirp_budget_observation_tracks_used_chirps()) return 1; - if (test_chirp_budget_decreases_with_curriculum_level()) return 1; + if (test_chirp_budget_stays_fixed_with_curriculum_level()) return 1; if (test_chirping_after_budget_terminates_with_penalty()) return 1; if (test_chirp_efficiency_scores_low_usage_above_full_budget()) return 1; if (test_chirp_perf_uses_fixed_fifteen_chirp_reference()) return 1; if (test_success_reward_includes_chirp_efficiency_bonus()) return 1; if (test_chirp_budget_logs_ratios_for_wandb()) return 1; - if (test_curriculum_perf_logs_split_weighted_difficulty_components()) return 1; + if (test_curriculum_perf_logs_distance_and_obstacle_difficulty_components()) return 1; if (test_budget_difficulty_uses_hard_edge_below_six_chirps()) return 1; - if (test_perf_composes_base_perf_difficulty_budget_and_chirp_efficiency()) return 1; + if (test_perf_composes_base_perf_curriculum_difficulty_and_chirp_perf()) return 1; if (test_chirp_tempo_logs_far_and_near_rates()) return 1; if (test_left_right_echo_asymmetry()) return 1; if (test_default_sound_speed_allows_one_tick_interaural_delay()) return 1; @@ -1511,7 +1537,8 @@ int main(void) { if (test_bins_only_observation_layout()) return 1; if (test_no_chirp_produces_silent_frequency_bins()) return 1; if (test_observations_stay_normalized_after_chirp()) return 1; - if (test_curriculum_starts_close_with_one_obstacle()) return 1; + if (test_curriculum_level_zero_starts_close_with_no_obstacles()) return 1; + if (test_curriculum_adds_first_obstacle_after_level_zero()) return 1; if (test_curriculum_advances_after_catch()) return 1; if (test_curriculum_waits_for_required_catches()) return 1; if (test_curriculum_initial_level_sets_first_reset_difficulty()) return 1; From 7d99f84a1a07559b009f9da06de400663091f273 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Tue, 9 Jun 2026 14:13:07 -0700 Subject: [PATCH 15/51] add bat raylib chirp audio --- BAT_SPEC.md | 11 +++ config/bat.ini | 1 + ocean/bat/bat.h | 135 ++++++++++++++++++++++++++++++++ ocean/bat/binding.c | 1 + ocean/bat/tests/test_bat_core.c | 45 +++++++++++ 5 files changed, 193 insertions(+) diff --git a/BAT_SPEC.md b/BAT_SPEC.md index 1cfa3728e5..36e877fa61 100644 --- a/BAT_SPEC.md +++ b/BAT_SPEC.md @@ -180,6 +180,15 @@ Implementation direction: - The v1 observation bins are not raw FFT bins. They are compact matched-filter-like echo features derived from chirp parameters, delay, amplitude, and normalized Doppler. +- RayLib eval rendering may play an audible debug version of emitted chirps. + This is render-only and must not run in headless training. The audible sound + maps the normalized chirp band to a human-hearable swept sine while preserving + the selected start frequency, end frequency, and duration. +- RayLib eval rendering also supports `env.render_target_fps`; default `60`, + and `0` leaves RayLib uncapped. This is for visualization/audio inspection + only and should not be used in training or sweep interpretation. Audible + debug chirp duration scales as `max(1, 60 / render_target_fps)` so low-FPS + inspection preserves chirp ordering while making each sweep easier to hear. ## Action Space @@ -524,6 +533,8 @@ Follow the Breakout-style native env shape: Testing expectations: - Unit tests for chirp parameter normalization. +- Unit tests for audible chirp waveform helper math. Rendering playback itself + stays a RayLib eval concern, not a training dependency. - Unit tests for echo delay and per-tick frequency-bin placement. - Unit tests for left/right ear asymmetry from azimuth. - Unit tests for Doppler sign on approaching vs receding bug. diff --git a/config/bat.ini b/config/bat.ini index 1c37c51e30..5f160b91fa 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -30,6 +30,7 @@ bat_accel = 40.0 bat_turn_rate = 9.424778 bug_speed = 4.0 max_steps = 512 +render_target_fps = 60 curriculum_enabled = 1 curriculum_initial_level = 2 curriculum_start_obstacles = 0 diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 236dd7d109..346f2cf4ed 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -45,6 +45,11 @@ #define BAT_CHIRP_RINGS 5 #define BAT_MAX_CHIRP_SLICES 16 #define BAT_ECHO_QUEUE_TICKS 256 +#define BAT_AUDIO_VOICES 8 +#define BAT_AUDIO_SAMPLE_RATE 48000 +#define BAT_AUDIO_MIN_HZ 600.0f +#define BAT_AUDIO_MAX_HZ 3600.0f +#define BAT_AUDIO_VOLUME 0.22f #define BAT_BUDGET_EASY_CHIRPS 15.0f #define BAT_BUDGET_EDGE_CHIRPS 5.0f #define BAT_CHIRP_PERF_REFERENCE_CHIRPS 15.0f @@ -122,6 +127,13 @@ typedef struct Log { typedef struct Client { int width; int height; +#ifndef BAT_HEADLESS + int audio_ready; + int last_audio_chirp_serial; + int audio_voice_cursor; + Sound chirp_sounds[BAT_AUDIO_VOICES]; + int chirp_sound_loaded[BAT_AUDIO_VOICES]; +#endif } Client; typedef struct Bat { @@ -138,6 +150,7 @@ typedef struct Bat { int height; int tick; int max_steps; + int render_target_fps; int num_obstacles; int curriculum_enabled; int curriculum_level; @@ -195,6 +208,7 @@ typedef struct Bat { int chirp_head; EchoBucket echo_queue[BAT_ECHO_QUEUE_TICKS]; int chirps_emitted_episode; + int audio_chirp_serial; int chirps_overlapped; float chirp_duration_sum; float chirp_bandwidth_sum; @@ -254,10 +268,61 @@ static inline int bat_action_index(float v, int n) { return idx; } +static inline int bat_render_target_fps(Bat* env) { + return env->render_target_fps > 0 ? env->render_target_fps : 0; +} + static inline float bat_chirp_duration_seconds(float duration_norm) { return 0.04f + 0.18f * bat_clampf(duration_norm, 0.0f, 1.0f); } +static inline float bat_chirp_audio_duration_seconds(Bat* env, float duration_norm) { + float duration = bat_chirp_duration_seconds(duration_norm); + int fps = bat_render_target_fps(env); + if (fps <= 0) return duration; + float scale = 60.0f / (float)fps; + if (scale < 1.0f) scale = 1.0f; + return duration * scale; +} + +static inline float bat_chirp_audio_frequency_hz(float freq_norm) { + return BAT_AUDIO_MIN_HZ + bat_clampf(freq_norm, 0.0f, 1.0f) + * (BAT_AUDIO_MAX_HZ - BAT_AUDIO_MIN_HZ); +} + +static inline float bat_chirp_audio_instant_hz(float start_norm, float end_norm, + float duration_seconds, float t_seconds) { + if (duration_seconds <= 0.0f) { + return bat_chirp_audio_frequency_hz(start_norm); + } + float t = bat_clampf(t_seconds / duration_seconds, 0.0f, 1.0f); + float start_hz = bat_chirp_audio_frequency_hz(start_norm); + float end_hz = bat_chirp_audio_frequency_hz(end_norm); + return start_hz + t * (end_hz - start_hz); +} + +static inline float bat_chirp_audio_envelope(float t_norm) { + if (t_norm <= 0.0f || t_norm >= 1.0f) return 0.0f; + const float fade = 0.08f; + float attack = t_norm / fade; + float release = (1.0f - t_norm) / fade; + return bat_clampf(fminf(attack, release), 0.0f, 1.0f); +} + +static inline float bat_chirp_audio_sample_f32(float start_norm, float end_norm, + float duration_seconds, int sample_index, int sample_rate) { + if (duration_seconds <= 0.0f || sample_index < 0 || sample_rate <= 0) return 0.0f; + float t = sample_index / (float)sample_rate; + if (t < 0.0f || t >= duration_seconds) return 0.0f; + + float start_hz = bat_chirp_audio_frequency_hz(start_norm); + float end_hz = bat_chirp_audio_frequency_hz(end_norm); + float chirp_rate = (end_hz - start_hz) / duration_seconds; + float phase = 2.0f * BAT_PI * (start_hz * t + 0.5f * chirp_rate * t * t); + float envelope = bat_chirp_audio_envelope(t / duration_seconds); + return BAT_AUDIO_VOLUME * envelope * sinf(phase); +} + static inline float bat_chirp_ring_radius(float age_seconds, float slice, float duration_seconds, float sound_speed) { float ring_age = age_seconds - slice * duration_seconds; @@ -1121,6 +1186,7 @@ static inline bool bat_try_emit_chirp(Bat* env) { chirp->birth_tick = env->tick; chirp->active = 1; env->chirp_head = (env->chirp_head + 1) % BAT_CHIRP_HISTORY; + env->audio_chirp_serial += 1; bat_schedule_chirp_echoes(env, chirp); return true; } @@ -1355,15 +1421,83 @@ static inline void bat_draw_echo_reflections(Bat* env, float sx, float sy) { } } +static inline void bat_unload_chirp_sound(Client* client, int i) { + if (!client->chirp_sound_loaded[i]) return; + UnloadSound(client->chirp_sounds[i]); + client->chirp_sound_loaded[i] = 0; +} + +static inline void bat_cleanup_audio(Client* client) { + if (!client->audio_ready) return; + for (int i = 0; i < BAT_AUDIO_VOICES; i++) { + if (client->chirp_sound_loaded[i] && !IsSoundPlaying(client->chirp_sounds[i])) { + bat_unload_chirp_sound(client, i); + } + } +} + +static inline void bat_play_chirp_audio(Bat* env) { + Client* client = env->client; + if (client == NULL || !client->audio_ready) return; + bat_cleanup_audio(client); + if (env->audio_chirp_serial <= 0 || + env->audio_chirp_serial == client->last_audio_chirp_serial) { + return; + } + client->last_audio_chirp_serial = env->audio_chirp_serial; + + float duration = bat_chirp_audio_duration_seconds(env, env->last_chirp_duration); + int sample_count = (int)ceilf(duration * BAT_AUDIO_SAMPLE_RATE); + if (sample_count <= 0) return; + + short* samples = (short*)malloc(sample_count * sizeof(short)); + if (samples == NULL) return; + for (int i = 0; i < sample_count; i++) { + float sample = bat_chirp_audio_sample_f32(env->last_chirp_start_freq, + env->last_chirp_end_freq, duration, i, BAT_AUDIO_SAMPLE_RATE); + samples[i] = (short)(bat_clampf(sample, -1.0f, 1.0f) * 32767.0f); + } + + Wave wave = { + .frameCount = (unsigned int)sample_count, + .sampleRate = BAT_AUDIO_SAMPLE_RATE, + .sampleSize = 16, + .channels = 1, + .data = samples, + }; + Sound sound = LoadSoundFromWave(wave); + UnloadWave(wave); + + int voice = client->audio_voice_cursor; + client->audio_voice_cursor = (client->audio_voice_cursor + 1) % BAT_AUDIO_VOICES; + bat_unload_chirp_sound(client, voice); + client->chirp_sounds[voice] = sound; + client->chirp_sound_loaded[voice] = 1; + SetSoundVolume(client->chirp_sounds[voice], 1.0f); + PlaySound(client->chirp_sounds[voice]); +} + Client* make_client(Bat* env) { Client* client = (Client*)calloc(1, sizeof(Client)); client->width = env->width * 10; client->height = env->height * 10; InitWindow(client->width, client->height, "Bat"); + int target_fps = bat_render_target_fps(env); + if (target_fps > 0) { + SetTargetFPS(target_fps); + } + InitAudioDevice(); + client->audio_ready = IsAudioDeviceReady(); return client; } void close_client(Client* client) { + if (client->audio_ready) { + for (int i = 0; i < BAT_AUDIO_VOICES; i++) { + bat_unload_chirp_sound(client, i); + } + CloseAudioDevice(); + } CloseWindow(); free(client); } @@ -1375,6 +1509,7 @@ void c_render(Bat* env) { if (env->client == NULL) { env->client = make_client(env); } + bat_play_chirp_audio(env); float sx = env->client->width / (float)env->width; float sy = env->client->height / (float)env->height; BeginDrawing(); diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index 97277a7edd..37cceca18c 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -22,6 +22,7 @@ void my_init(Env* env, Dict* kwargs) { env->bat_turn_rate = dict_get(kwargs, "bat_turn_rate")->value; env->bug_speed = dict_get(kwargs, "bug_speed")->value; env->max_steps = dict_get(kwargs, "max_steps")->value; + env->render_target_fps = dict_get(kwargs, "render_target_fps")->value; env->curriculum_enabled = dict_get(kwargs, "curriculum_enabled")->value; env->curriculum_initial_level = dict_get(kwargs, "curriculum_initial_level")->value; env->curriculum_start_obstacles = dict_get(kwargs, "curriculum_start_obstacles")->value; diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index a2efa8b101..f6bc5048d8 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -842,6 +842,48 @@ static int test_chirp_color_maps_low_to_red_high_to_blue(void) { return 0; } +static int test_chirp_audio_maps_norm_freq_to_audible_sweep(void) { + ASSERT_FLOAT_NEAR(bat_chirp_audio_frequency_hz(0.0f), 600.0f, 0.0001f); + ASSERT_FLOAT_NEAR(bat_chirp_audio_frequency_hz(1.0f), 3600.0f, 0.0001f); + ASSERT_FLOAT_NEAR(bat_chirp_audio_instant_hz(0.0f, 1.0f, 0.20f, 0.10f), 2100.0f, 0.0001f); + ASSERT_FLOAT_NEAR(bat_chirp_audio_instant_hz(1.0f, 0.0f, 0.20f, 0.10f), 2100.0f, 0.0001f); + ASSERT_FLOAT_NEAR(bat_chirp_audio_sample_f32(0.0f, 1.0f, 0.20f, -1, 48000), 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(bat_chirp_audio_sample_f32(0.0f, 1.0f, 0.20f, 9600, 48000), 0.0f, 0.0001f); + float sample = bat_chirp_audio_sample_f32(0.0f, 1.0f, 0.20f, 2400, 48000); + ASSERT_TRUE(sample >= -0.25f); + ASSERT_TRUE(sample <= 0.25f); + return 0; +} + +static int test_render_target_fps_is_eval_only_and_can_be_uncapped(void) { + Bat env = make_test_env(); + env.render_target_fps = 60; + ASSERT_TRUE(bat_render_target_fps(&env) == 60); + env.render_target_fps = 15; + ASSERT_TRUE(bat_render_target_fps(&env) == 15); + env.render_target_fps = 0; + ASSERT_TRUE(bat_render_target_fps(&env) == 0); + env.render_target_fps = -1; + ASSERT_TRUE(bat_render_target_fps(&env) == 0); + free_allocated(&env); + return 0; +} + +static int test_chirp_audio_duration_scales_with_render_fps(void) { + Bat env = make_test_env(); + float base_duration = bat_chirp_duration_seconds(0.0f); + env.render_target_fps = 60; + ASSERT_FLOAT_NEAR(bat_chirp_audio_duration_seconds(&env, 0.0f), base_duration, 0.0001f); + env.render_target_fps = 30; + ASSERT_FLOAT_NEAR(bat_chirp_audio_duration_seconds(&env, 0.0f), base_duration * 2.0f, 0.0001f); + env.render_target_fps = 15; + ASSERT_FLOAT_NEAR(bat_chirp_audio_duration_seconds(&env, 0.0f), base_duration * 4.0f, 0.0001f); + env.render_target_fps = 0; + ASSERT_FLOAT_NEAR(bat_chirp_audio_duration_seconds(&env, 0.0f), base_duration, 0.0001f); + free_allocated(&env); + return 0; +} + static int test_chirp_cooldown_accepts_only_after_delay(void) { Bat env = make_test_env(); c_reset(&env); @@ -1529,6 +1571,9 @@ int main(void) { if (test_bat_speed_action_space_has_no_strafe()) return 1; if (test_chirp_ring_physical_ordering()) return 1; if (test_chirp_color_maps_low_to_red_high_to_blue()) return 1; + if (test_chirp_audio_maps_norm_freq_to_audible_sweep()) return 1; + if (test_render_target_fps_is_eval_only_and_can_be_uncapped()) return 1; + if (test_chirp_audio_duration_scales_with_render_fps()) return 1; if (test_chirp_cooldown_accepts_only_after_delay()) return 1; if (test_valid_chirp_gets_reward_without_legacy_cost()) return 1; if (test_early_chirp_gets_penalty_and_emits_nothing()) return 1; From fb1f5910ccce1e32ff1b5590e02a1674df3ba64c Mon Sep 17 00:00:00 2001 From: Kinvert Date: Tue, 9 Jun 2026 14:22:38 -0700 Subject: [PATCH 16/51] penalize early bug echo chirps --- BAT_PRIORITIES.md | 50 +++++++++++++++++++++++++++++++++ BAT_SPEC.md | 12 ++++---- config/bat.ini | 4 +-- ocean/bat/bat.h | 46 ++++++++++++++++++++++++++---- ocean/bat/tests/test_bat_core.c | 39 +++++++++++++++++++++++-- 5 files changed, 135 insertions(+), 16 deletions(-) create mode 100644 BAT_PRIORITIES.md diff --git a/BAT_PRIORITIES.md b/BAT_PRIORITIES.md new file mode 100644 index 0000000000..458041e4c2 --- /dev/null +++ b/BAT_PRIORITIES.md @@ -0,0 +1,50 @@ +# Bat Priorities + +Current near-term priorities for the Bat PufferLib environment. + +## 0. Video capture with audio + +- RayLib can render and play audio, but it does not natively encode MP4. +- Preferred path: keep RayLib as the renderer/audio source, capture frames/audio + during eval, and use `ffmpeg` to mux an MP4. +- A future helper should make this feel like one command, but avoid embedding an + MP4 encoder in the env. +- Existing GIF capture remains useful for quick silent demos. +- Later render polish: play audible reflection blips in addition to emitted + chirps. Keep this eval-only. Bug reflections and static wall/obstacle + reflections should likely use distinguishable volume, timbre, panning, or + marker sounds so the debug audio stays interpretable. + +## 1. Bug-reflection chirp timing penalty + +- Replace broad "chirp before all echoes clear" pressure with bug-specific + timing pressure. +- Penalize a valid chirp if it is emitted before the previous chirp's expected + bug reflection has returned. +- Scale the penalty by remaining wait fraction, so chirping immediately after a + prior chirp is worse than chirping shortly before the bug echo arrives. +- Keep the coefficient sweepable through `chirp_overlap_penalty`. +- Do not penalize based on all static wall/obstacle reflections; clutter may + legitimately require reacquisition chirps. + +## 2. Resume performance work + +- Use level 7 and level 10 evals as visual sanity checks. +- Focus on harder-level failures where the bat spends chirps before acquiring + the bug. +- Keep reward shaping minimal and prefer terminal/curriculum/perf pressure where + possible. + +## 3. Prepare the next sweep + +- Make sure the next sweep includes any new timing penalty coefficient ranges. +- Keep sweep ranges bounded so runs cannot become extremely slow from oversized + policies or excessive env settings. +- Watch `perf`, `base_perf`, `curriculum_perf`, `chirps_emitted`, + `chirp_overlap_fraction`, `chirp_tempo_ratio`, `collision`, and SPS. + +## Priority judgment + +The current ordering is sound: the video/audio capture work is useful for demos, +but the bug-reflection timing penalty is more likely to improve level 7/10 +performance before the next sweep. diff --git a/BAT_SPEC.md b/BAT_SPEC.md index 36e877fa61..7525bf4a5f 100644 --- a/BAT_SPEC.md +++ b/BAT_SPEC.md @@ -323,10 +323,11 @@ Default reward model: - `-1.0` for hitting walls or obstacles, terminal. - Tiny chirp cost so constant chirping is not fully free without causing chirp collapse. -- Chirping again before the prior chirp's max echo return window has cleared +- Chirping again before the prior chirp's expected bug reflection has returned gets a small physical overlap penalty. This is not a generic timing-efficiency - reward; it represents self-induced acoustic ambiguity from overlapping - returns. + reward; it represents self-induced acoustic ambiguity from overlapping bug + returns without forcing the bat to wait for every static wall or obstacle + reflection. - Solve-time chirp efficiency reward: - `chirp_efficiency = 0.5 + 0.5 * (1.0 - chirps_used / chirp_budget)`, - a catch after spending the full budget gets efficiency `0.5`, @@ -354,8 +355,9 @@ Progress reward: - `reward += progress_reward_scale * (prev_bug_dist - bug_dist)` - `reward -= step_cost` - `reward -= chirp_cost` when a chirp is emitted - - `reward -= chirp_overlap_penalty` when a valid chirp is emitted before - the previous chirp's max echo return window has cleared + - `reward -= chirp_overlap_penalty * bug_echo_wait_fraction` when a valid + chirp is emitted before the previous chirp's expected bug reflection has + returned - `reward += chirp_efficiency_reward * chirp_efficiency` on catch - `reward += bug_echo_reward_scale * echo_path_reduction / max_echo_range` when a returning bug echo indicates the bug is closer than the previous bug diff --git a/config/bat.ini b/config/bat.ini index 5f160b91fa..727c8498e8 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -51,7 +51,7 @@ chirp_cost = 0.0 chirp_efficiency_reward = 2.0 valid_chirp_reward = 0.0000106 early_chirp_penalty = 0.006 -chirp_overlap_penalty = 0.008 +chirp_overlap_penalty = 0.012 bug_echo_reward_scale = 0.235466 bug_echo_farther_penalty_scale = 0.167897 bug_echo_min_displacement = 1.0 @@ -211,7 +211,7 @@ scale = auto [sweep.env.chirp_overlap_penalty] distribution = uniform min = 0.001 -max = 0.008 +max = 0.030 scale = auto [sweep.env.bug_echo_reward_scale] diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 346f2cf4ed..8698001f77 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -236,6 +236,7 @@ typedef struct Bat { float tick_bug_echo_energy; float tick_bug_echo_path; float last_bug_echo_path; + float last_bug_echo_expected_tick; float last_bug_echo_bat_x; float last_bug_echo_bat_y; float collision_penalty; @@ -872,6 +873,33 @@ static inline void bat_ear_positions(Bat* env, float* left_x, float* left_y, *right_y = env->bat_y + ly * ear_sep * 0.5f; } +static inline float bat_expected_bug_echo_tick(Bat* env, ChirpEvent* chirp) { + float fx = cosf(env->bat_heading); + float fy = sinf(env->bat_heading); + float ux, uy; + bat_norm_vec(env->bug_x - chirp->x, env->bug_y - chirp->y, &ux, &uy); + float forward = ux * fx + uy * fy; + if (forward < -0.35f) return -1.0f; + + float left_ear_x, left_ear_y, right_ear_x, right_ear_y; + bat_ear_positions(env, &left_ear_x, &left_ear_y, &right_ear_x, &right_ear_y); + float source_path = bat_dist(chirp->x, chirp->y, env->bug_x, env->bug_y); + float left_path = source_path + bat_dist(env->bug_x, env->bug_y, left_ear_x, left_ear_y); + float right_path = source_path + bat_dist(env->bug_x, env->bug_y, right_ear_x, right_ear_y); + float best_path = -1.0f; + if (left_path <= env->max_echo_range) best_path = left_path; + if (right_path <= env->max_echo_range && (best_path < 0.0f || right_path < best_path)) { + best_path = right_path; + } + if (best_path < 0.0f) return -1.0f; + + int slices = (int)ceilf(chirp->duration / BAT_TICK_RATE); + if (slices < 1) slices = 1; + if (slices > BAT_MAX_CHIRP_SLICES) slices = BAT_MAX_CHIRP_SLICES; + float first_slice_ticks = (0.5f / (float)slices) * chirp->duration / BAT_TICK_RATE; + return chirp->birth_tick + first_slice_ticks + best_path / env->sound_speed / BAT_TICK_RATE; +} + static inline void bat_schedule_echo(Bat* env, ChirpEvent* chirp, float slice_ticks, float freq, float rx, float ry, float rvx, float rvy, float strength, int source) { @@ -1059,6 +1087,7 @@ static inline void bat_reset_episode(Bat* env) { env->tick_bug_echo_energy = 0.0f; env->tick_bug_echo_path = -1.0f; env->last_bug_echo_path = -1.0f; + env->last_bug_echo_expected_tick = -1.0f; env->chirps_emitted_episode = 0; env->chirps_overlapped = 0; env->chirp_duration_sum = 0.0f; @@ -1187,13 +1216,18 @@ static inline bool bat_try_emit_chirp(Bat* env) { chirp->active = 1; env->chirp_head = (env->chirp_head + 1) % BAT_CHIRP_HISTORY; env->audio_chirp_serial += 1; + env->last_bug_echo_expected_tick = bat_expected_bug_echo_tick(env, chirp); bat_schedule_chirp_echoes(env, chirp); return true; } -static inline bool bat_next_chirp_overlaps_return_window(Bat* env) { - if (env->chirps_emitted_episode <= 0) return false; - return env->tick - env->last_chirp_tick < env->max_chirp_age_ticks; +static inline float bat_next_chirp_overlap_fraction(Bat* env) { + if (env->chirps_emitted_episode <= 0) return 0.0f; + if (env->last_bug_echo_expected_tick <= (float)env->tick) return 0.0f; + float wait_ticks = env->last_bug_echo_expected_tick - (float)env->last_chirp_tick; + if (wait_ticks <= 0.000001f) return 0.0f; + float remaining_ticks = env->last_bug_echo_expected_tick - (float)env->tick; + return bat_clampf(remaining_ticks / wait_ticks, 0.0f, 1.0f); } static inline int bat_update_chirp(Bat* env) { @@ -1218,7 +1252,7 @@ void c_step(Bat* env) { env->rewards[0] = 0.0f; env->terminals[0] = 0.0f; - bool chirp_overlaps_return_window = bat_next_chirp_overlaps_return_window(env); + float chirp_overlap_fraction = bat_next_chirp_overlap_fraction(env); int chirp_status = bat_update_chirp(env); if (chirp_status == -2) { env->rewards[0] = -1.0f; @@ -1268,8 +1302,8 @@ void c_step(Bat* env) { env->rewards[0] -= env->step_cost; if (chirp_status > 0) { env->rewards[0] += env->valid_chirp_reward; - if (chirp_overlaps_return_window) { - env->rewards[0] -= env->chirp_overlap_penalty; + if (chirp_overlap_fraction > 0.0f) { + env->rewards[0] -= env->chirp_overlap_penalty * chirp_overlap_fraction; env->chirps_overlapped += 1; } } else if (chirp_status < 0) { diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index f6bc5048d8..38e0fa6414 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -974,7 +974,7 @@ static int test_early_chirp_gets_penalty_and_emits_nothing(void) { return 0; } -static int test_chirp_before_echo_window_clears_gets_overlap_penalty(void) { +static int test_chirp_before_bug_echo_arrives_gets_scaled_overlap_penalty(void) { Bat env = make_test_env(); c_reset(&env); test_place_safe_stationary_scene(&env); @@ -996,13 +996,16 @@ static int test_chirp_before_echo_window_clears_gets_overlap_penalty(void) { ASSERT_TRUE(env.chirps_emitted_episode == 1); ASSERT_TRUE(env.chirps_overlapped == 0); + env.last_chirp_tick = 0; + env.last_bug_echo_expected_tick = 10.0f; + env.tick = 5; test_place_safe_stationary_scene(&env); test_set_emit_chirp_action(&env); c_step(&env); ASSERT_FLOAT_NEAR(env.terminals[0], 0.0f, 0.0001f); ASSERT_FLOAT_NEAR(env.rewards[0], - env.valid_chirp_reward - env.chirp_overlap_penalty, 0.0001f); + env.valid_chirp_reward - 0.5f * env.chirp_overlap_penalty, 0.0001f); ASSERT_TRUE(env.chirps_emitted_episode == 2); ASSERT_TRUE(env.chirps_overlapped == 1); @@ -1010,6 +1013,35 @@ static int test_chirp_before_echo_window_clears_gets_overlap_penalty(void) { return 0; } +static int test_chirp_after_bug_echo_arrives_ignores_static_echo_window(void) { + Bat env = make_test_env(); + c_reset(&env); + test_place_safe_stationary_scene(&env); + env.step_cost = 0.0f; + env.progress_reward_scale = 0.0f; + env.bug_echo_reward_scale = 0.0f; + env.valid_chirp_reward = 0.0005f; + env.chirp_overlap_penalty = 0.0040f; + env.chirp_cooldown_ticks = 1; + env.max_chirp_age_ticks = 100; + env.chirp_budget = 10; + env.chirps_emitted_episode = 1; + env.last_chirp_tick = 0; + env.last_bug_echo_expected_tick = 3.0f; + env.tick = 4; + test_set_emit_chirp_action(&env); + + c_step(&env); + + ASSERT_FLOAT_NEAR(env.terminals[0], 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.rewards[0], env.valid_chirp_reward, 0.0001f); + ASSERT_TRUE(env.chirps_emitted_episode == 2); + ASSERT_TRUE(env.chirps_overlapped == 0); + + free_allocated(&env); + return 0; +} + static int test_reflection_arrives_at_two_way_travel_time(void) { float sound_speed = 100.0f; float distance = 25.0f; @@ -1577,7 +1609,8 @@ int main(void) { if (test_chirp_cooldown_accepts_only_after_delay()) return 1; if (test_valid_chirp_gets_reward_without_legacy_cost()) return 1; if (test_early_chirp_gets_penalty_and_emits_nothing()) return 1; - if (test_chirp_before_echo_window_clears_gets_overlap_penalty()) return 1; + if (test_chirp_before_bug_echo_arrives_gets_scaled_overlap_penalty()) return 1; + if (test_chirp_after_bug_echo_arrives_ignores_static_echo_window()) return 1; if (test_reflection_arrives_at_two_way_travel_time()) return 1; if (test_bins_only_observation_layout()) return 1; if (test_no_chirp_produces_silent_frequency_bins()) return 1; From f923149a0474b8c5c4e58888e6e64e20e16bf755 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Tue, 9 Jun 2026 14:27:07 -0700 Subject: [PATCH 17/51] prepare bat timing sweep --- BAT_PRIORITIES.md | 3 +++ BAT_SPEC.md | 1 + config/bat.ini | 12 ++++++------ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/BAT_PRIORITIES.md b/BAT_PRIORITIES.md index 458041e4c2..f702a5d6f1 100644 --- a/BAT_PRIORITIES.md +++ b/BAT_PRIORITIES.md @@ -38,6 +38,9 @@ Current near-term priorities for the Bat PufferLib environment. ## 3. Prepare the next sweep - Make sure the next sweep includes any new timing penalty coefficient ranges. +- Sweep `chirp_cooldown_ticks` in a bounded range. Current range is `6..18`. +- Keep `max_chirps_per_episode` fixed at `15` for this sweep so budget does + not confound timing penalty and cooldown effects. - Keep sweep ranges bounded so runs cannot become extremely slow from oversized policies or excessive env settings. - Watch `perf`, `base_perf`, `curriculum_perf`, `chirps_emitted`, diff --git a/BAT_SPEC.md b/BAT_SPEC.md index 7525bf4a5f..a1b473c03f 100644 --- a/BAT_SPEC.md +++ b/BAT_SPEC.md @@ -489,6 +489,7 @@ Config knobs: - `sound_speed` - `reflector_spacing` - `max_chirps_per_episode` +- `chirp_cooldown_ticks` - `chirp_freq_bins` - `chirp_duration_bins` - `chirp_cost` diff --git a/config/bat.ini b/config/bat.ini index 727c8498e8..9b06b20b0b 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -208,6 +208,12 @@ min = 0.001 max = 0.006 scale = auto +[sweep.env.chirp_cooldown_ticks] +distribution = int_uniform +min = 6 +max = 18 +scale = auto + [sweep.env.chirp_overlap_penalty] distribution = uniform min = 0.001 @@ -232,12 +238,6 @@ min = 0.5 max = 2.0 scale = auto -[sweep.env.max_chirps_per_episode] -distribution = int_uniform -min = 4 -max = 15 -scale = auto - [sweep.env.curriculum_initial_level] distribution = int_uniform min = 0 From 2fe1b240195e4076e8bfc200411ba13eac630e4f Mon Sep 17 00:00:00 2001 From: Kinvert Date: Tue, 9 Jun 2026 18:59:27 -0700 Subject: [PATCH 18/51] Add bat corner reflectors and sweep defaults --- config/bat.ini | 81 ++++++++------ ocean/bat/bat.h | 52 ++++++++- ocean/bat/binding.c | 1 + ocean/bat/tests/test_bat_core.c | 184 ++++++++++++++++++++++++++++++++ 4 files changed, 284 insertions(+), 34 deletions(-) diff --git a/config/bat.ini b/config/bat.ini index 9b06b20b0b..c1dd9e6ee3 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -22,11 +22,11 @@ width = 64 height = 64 num_obstacles = 3 bat_radius = 2.0 -ear_separation_scale = 2.194554 +ear_separation_scale = 2.0 bug_radius = 1.5 -bat_max_speed = 20.352376 +bat_max_speed = 26.0 bat_min_speed = 2.0 -bat_accel = 40.0 +bat_accel = 45.366636480970875 bat_turn_rate = 9.424778 bug_speed = 4.0 max_steps = 512 @@ -35,50 +35,51 @@ curriculum_enabled = 1 curriculum_initial_level = 2 curriculum_start_obstacles = 0 curriculum_max_obstacles = 3 -curriculum_obstacle_step = 4 +curriculum_obstacle_step = 5 curriculum_successes_per_level = 16 -curriculum_start_bug_distance = 8.0 +curriculum_start_bug_distance = 10.95126023748812 curriculum_max_bug_distance = 56.0 -curriculum_bug_distance_step = 5.0 +curriculum_bug_distance_step = 4.711042873726855 freq_bins_per_ear = 16 -max_echo_range = 80.0 +max_echo_range = 128.0 sound_speed = 180.0 reflector_spacing = 8.0 +corner_reflectors = 1 max_chirp_age_ticks = 30 -chirp_cooldown_ticks = 12 +chirp_cooldown_ticks = 18 max_chirps_per_episode = 15 chirp_cost = 0.0 chirp_efficiency_reward = 2.0 -valid_chirp_reward = 0.0000106 +valid_chirp_reward = 0.00009333487783059136 early_chirp_penalty = 0.006 -chirp_overlap_penalty = 0.012 -bug_echo_reward_scale = 0.235466 -bug_echo_farther_penalty_scale = 0.167897 +chirp_overlap_penalty = 0.010103772089353678 +bug_echo_reward_scale = 0.2854634145187602 +bug_echo_farther_penalty_scale = 0.14365071684654107 bug_echo_min_displacement = 1.0 step_cost = 0.000193626 -progress_reward_scale = 0.12 -collision_penalty = 1.27806 +progress_reward_scale = 0.11539891887797996 +collision_penalty = 1.0770950996247066 [train] total_timesteps = 47_352_761 -learning_rate = 0.0193521 -gamma = 0.998791 -gae_lambda = 0.969562 -replay_ratio = 1.17746 -clip_coef = 0.2 -vf_coef = 2.0 -vf_clip_coef = 0.2 -max_grad_norm = 1.5 +learning_rate = 0.021835584823882728 +gamma = 0.997 +gae_lambda = 0.948 +replay_ratio = 1.097551956279086 +clip_coef = 0.42 +vf_coef = 3.8 +vf_clip_coef = 0.45 +max_grad_norm = 2.5 ent_coef = 0.0005 -beta1 = 0.95 -beta2 = 0.999 -eps = 1e-12 +beta1 = 0.8786399699496005 +beta2 = 0.9977243436203428 +eps = 5.914725791235274e-13 minibatch_size = 8192 horizon = 64 vtrace_rho_clip = 1.0 vtrace_c_clip = 1.0 -prio_alpha = 0.8 -prio_beta0 = 0.2 +prio_alpha = 0.6482377203508398 +prio_beta0 = 0.06263998034560592 [sweep] method = Protein @@ -89,10 +90,6 @@ max_runs = 8 gpus = 1 downsample = 5 use_gpu = True -match_enemy_model_path = {} -match_num_games = {} -match_enemy_hidden_size = {} -match_enemy_num_layers = {} [sweep.train.total_timesteps] distribution = log_normal @@ -100,6 +97,18 @@ min = 30_000_000 max = 50_000_000 scale = auto +[sweep.policy.hidden_size] +distribution = uniform_pow2 +min = 64 +max = 256 +scale = auto + +[sweep.policy.num_layers] +distribution = int_uniform +min = 2 +max = 4 +scale = auto + [sweep.train.learning_rate] distribution = log_normal min = 0.01 @@ -145,7 +154,7 @@ scale = auto [sweep.env.bat_max_speed] distribution = uniform min = 8.0 -max = 22.0 +max = 30.0 scale = auto [sweep.env.bat_min_speed] @@ -181,7 +190,13 @@ scale = auto [sweep.env.ear_separation_scale] distribution = uniform min = 1.0 -max = 3.0 +max = 2.0 +scale = auto + +[sweep.env.corner_reflectors] +distribution = int_uniform +min = 0 +max = 1 scale = auto [sweep.env.progress_reward_scale] diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 8698001f77..00cc210a2d 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -193,6 +193,7 @@ typedef struct Bat { float max_echo_range; float sound_speed; float reflector_spacing; + int corner_reflectors; int max_chirp_age_ticks; int chirp_cooldown_ticks; int max_chirps_per_episode; @@ -672,9 +673,10 @@ void init(Bat* env) { if (env->bat_turn_rate <= 0.0f) env->bat_turn_rate = BAT_PI; if (env->bug_speed <= 0.0f) env->bug_speed = 4.0f; if (env->freq_bins_per_ear <= 0) env->freq_bins_per_ear = BAT_FREQ_BINS; - if (env->max_echo_range <= 0.0f) env->max_echo_range = 80.0f; + if (env->max_echo_range <= 0.0f) env->max_echo_range = 128.0f; if (env->sound_speed <= 0.0f) env->sound_speed = 60.0f; if (env->reflector_spacing <= 0.0f) env->reflector_spacing = 8.0f; + env->corner_reflectors = env->corner_reflectors ? 1 : 0; if (env->max_chirp_age_ticks <= 0) env->max_chirp_age_ticks = 30; if (env->chirp_cooldown_ticks <= 0) env->chirp_cooldown_ticks = 12; if (env->max_chirps_per_episode <= 0) env->max_chirps_per_episode = 20; @@ -959,6 +961,22 @@ static inline void bat_schedule_segment_reflectors(Bat* env, ChirpEvent* chirp, } } +static inline void bat_schedule_corner_reflector_echoes(Bat* env, ChirpEvent* chirp, + float slice_ticks, float freq) { + if (!env->corner_reflectors) return; + float w = (float)env->width; + float h = (float)env->height; + const float strength = 2.0f; + bat_schedule_echo(env, chirp, slice_ticks, freq, 0.0f, 0.0f, + 0.0f, 0.0f, strength, BAT_ECHO_STATIC); + bat_schedule_echo(env, chirp, slice_ticks, freq, w, 0.0f, + 0.0f, 0.0f, strength, BAT_ECHO_STATIC); + bat_schedule_echo(env, chirp, slice_ticks, freq, 0.0f, h, + 0.0f, 0.0f, strength, BAT_ECHO_STATIC); + bat_schedule_echo(env, chirp, slice_ticks, freq, w, h, + 0.0f, 0.0f, strength, BAT_ECHO_STATIC); +} + static inline void bat_schedule_obstacle_echoes(Bat* env, ChirpEvent* chirp, float slice_ticks, float freq, int i) { float x = env->obstacle_x[i]; @@ -992,6 +1010,7 @@ static inline void bat_schedule_chirp_echoes(Bat* env, ChirpEvent* chirp) { 0.0f, 0.0f, 0.0f, (float)env->height, 0.12f); bat_schedule_segment_reflectors(env, chirp, slice_ticks, freq, (float)env->width, 0.0f, (float)env->width, (float)env->height, 0.12f); + bat_schedule_corner_reflector_echoes(env, chirp, slice_ticks, freq); for (int j = 0; j < env->num_obstacles; j++) { bat_schedule_obstacle_echoes(env, chirp, slice_ticks, freq, j); } @@ -1439,6 +1458,35 @@ static inline void bat_draw_obstacle_echoes(Bat* env, ChirpEvent* chirp, bat_draw_segment_echoes(env, chirp, x + w, y, x + w, y + h, 0.55f, sx, sy); } +static inline void bat_draw_corner_reflector_echoes(Bat* env, ChirpEvent* chirp, + float sx, float sy) { + if (!env->corner_reflectors) return; + float w = (float)env->width; + float h = (float)env->height; + const float strength = 2.0f; + bat_draw_echo_flash(env, chirp, 0.0f, 0.0f, 0.0f, 0.0f, strength, sx, sy); + bat_draw_echo_flash(env, chirp, w, 0.0f, 0.0f, 0.0f, strength, sx, sy); + bat_draw_echo_flash(env, chirp, 0.0f, h, 0.0f, 0.0f, strength, sx, sy); + bat_draw_echo_flash(env, chirp, w, h, 0.0f, 0.0f, strength, sx, sy); +} + +static inline void bat_draw_corner_reflector_markers(Bat* env) { + if (!env->corner_reflectors) return; + const int size = 8; + const Color fill = (Color){128, 128, 132, 255}; + const Color outline = (Color){202, 202, 208, 255}; + int max_x = env->client->width - size; + int max_y = env->client->height - size; + DrawRectangle(0, 0, size, size, fill); + DrawRectangleLines(0, 0, size, size, outline); + DrawRectangle(max_x, 0, size, size, fill); + DrawRectangleLines(max_x, 0, size, size, outline); + DrawRectangle(0, max_y, size, size, fill); + DrawRectangleLines(0, max_y, size, size, outline); + DrawRectangle(max_x, max_y, size, size, fill); + DrawRectangleLines(max_x, max_y, size, size, outline); +} + static inline void bat_draw_echo_reflections(Bat* env, float sx, float sy) { for (int i = 0; i < BAT_CHIRP_HISTORY; i++) { ChirpEvent* chirp = &env->chirps[i]; @@ -1449,6 +1497,7 @@ static inline void bat_draw_echo_reflections(Bat* env, float sx, float sy) { bat_draw_segment_echoes(env, chirp, 0.0f, (float)env->height, (float)env->width, (float)env->height, 0.18f, sx, sy); bat_draw_segment_echoes(env, chirp, 0.0f, 0.0f, 0.0f, (float)env->height, 0.18f, sx, sy); bat_draw_segment_echoes(env, chirp, (float)env->width, 0.0f, (float)env->width, (float)env->height, 0.18f, sx, sy); + bat_draw_corner_reflector_echoes(env, chirp, sx, sy); for (int j = 0; j < env->num_obstacles; j++) { bat_draw_obstacle_echoes(env, chirp, j, sx, sy); } @@ -1559,6 +1608,7 @@ void c_render(Bat* env) { (int)(env->obstacle_h[i] * sy), (Color){92, 92, 96, 255}); } + bat_draw_corner_reflector_markers(env); DrawCircle((int)(env->bug_x * sx), (int)(env->bug_y * sy), env->bug_radius * sx, GREEN); DrawCircle((int)(env->bat_x * sx), (int)(env->bat_y * sy), diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index 37cceca18c..ab86b69d94 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -36,6 +36,7 @@ void my_init(Env* env, Dict* kwargs) { env->max_echo_range = dict_get(kwargs, "max_echo_range")->value; env->sound_speed = dict_get(kwargs, "sound_speed")->value; env->reflector_spacing = dict_get(kwargs, "reflector_spacing")->value; + env->corner_reflectors = dict_get(kwargs, "corner_reflectors")->value; env->max_chirp_age_ticks = dict_get(kwargs, "max_chirp_age_ticks")->value; env->chirp_cooldown_ticks = dict_get(kwargs, "chirp_cooldown_ticks")->value; env->max_chirps_per_episode = dict_get(kwargs, "max_chirps_per_episode")->value; diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index 38e0fa6414..e5648bf528 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -1349,6 +1349,186 @@ static int test_chirp_echo_arrives_after_two_way_travel_not_immediately(void) { return 0; } +static int test_default_echo_range_reaches_curriculum_max_bug_distance(void) { + Bat env = { + .num_agents = 1, + .frameskip = 1, + .width = 64, + .height = 64, + .num_obstacles = 0, + .bat_radius = 2.0f, + .bug_radius = 1.5f, + .bat_max_speed = 22.0f, + .bat_min_speed = 2.0f, + .bat_accel = 45.0f, + .bat_turn_rate = 9.424778f, + .bug_speed = 4.0f, + .sound_speed = 180.0f, + .curriculum_max_bug_distance = 56.0f, + .rng = 1, + }; + allocate(&env); + c_reset(&env); + + env.tick = 0; + env.bat_x = 4.0f; + env.bat_y = 32.0f; + env.bat_vx = 0.0f; + env.bat_vy = 0.0f; + env.bat_heading = 0.0f; + env.bug_x = env.bat_x + env.curriculum_max_bug_distance; + env.bug_y = env.bat_y; + env.bug_vx = 0.0f; + env.bug_vy = 0.0f; + bat_clear_echo_queue(&env); + + ChirpEvent chirp = { + .x = env.bat_x, + .y = env.bat_y, + .start_freq = 0.0f, + .end_freq = 1.0f, + .duration = bat_chirp_duration_seconds(0.0f), + .birth_tick = env.tick, + .active = 1, + }; + bat_schedule_chirp_echoes(&env, &chirp); + + float bug_energy = 0.0f; + for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { + bug_energy += env.echo_queue[i].bug_energy; + } + + ASSERT_TRUE(bug_energy > 0.0f); + + free_allocated(&env); + return 0; +} + +static float test_sum_queued_echo_energy(Bat* env) { + float energy = 0.0f; + for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { + for (int ear = 0; ear < 2; ear++) { + for (int bin = 0; bin < BAT_FREQ_BINS; bin++) { + energy += env->echo_queue[i].energy[ear][bin]; + } + } + } + return energy; +} + +static int test_corner_reflectors_disabled_schedule_no_static_events(void) { + Bat env = make_test_env(); + env.num_obstacles = 0; + env.corner_reflectors = 0; + env.max_echo_range = 128.0f; + c_reset(&env); + + env.tick = 0; + env.bat_x = 32.0f; + env.bat_y = 32.0f; + env.bat_heading = 0.0f; + env.bat_vx = 0.0f; + env.bat_vy = 0.0f; + bat_clear_echo_queue(&env); + ChirpEvent chirp = { + .x = env.bat_x, + .y = env.bat_y, + .start_freq = 0.0f, + .end_freq = 1.0f, + .duration = bat_chirp_duration_seconds(0.0f), + .birth_tick = env.tick, + .active = 1, + }; + + bat_schedule_corner_reflector_echoes(&env, &chirp, 0.0f, 0.5f); + + ASSERT_FLOAT_NEAR(test_sum_queued_echo_energy(&env), 0.0f, 0.0001f); + + free_allocated(&env); + return 0; +} + +static int test_corner_reflectors_enabled_schedule_stable_echo_events(void) { + Bat env = make_test_env(); + env.num_obstacles = 0; + env.corner_reflectors = 1; + env.max_echo_range = 128.0f; + env.sound_speed = 180.0f; + c_reset(&env); + + env.tick = 0; + env.bat_x = 32.0f; + env.bat_y = 32.0f; + env.bat_heading = 0.0f; + env.bat_vx = 0.0f; + env.bat_vy = 0.0f; + bat_clear_echo_queue(&env); + ChirpEvent chirp = { + .x = env.bat_x, + .y = env.bat_y, + .start_freq = 0.0f, + .end_freq = 1.0f, + .duration = bat_chirp_duration_seconds(0.0f), + .birth_tick = env.tick, + .active = 1, + }; + + bat_schedule_corner_reflector_echoes(&env, &chirp, 0.0f, 0.5f); + + ASSERT_TRUE(test_sum_queued_echo_energy(&env) > 0.0f); + + free_allocated(&env); + return 0; +} + +static int test_corner_reflector_echo_observations_stay_normalized(void) { + Bat env = make_test_env(); + env.num_obstacles = 0; + env.corner_reflectors = 1; + env.max_echo_range = 128.0f; + env.sound_speed = 180.0f; + c_reset(&env); + + env.tick = 0; + env.bat_x = 32.0f; + env.bat_y = 32.0f; + env.bat_heading = 0.0f; + env.bat_vx = 0.0f; + env.bat_vy = 0.0f; + bat_clear_echo_queue(&env); + ChirpEvent chirp = { + .x = env.bat_x, + .y = env.bat_y, + .start_freq = 0.0f, + .end_freq = 1.0f, + .duration = bat_chirp_duration_seconds(0.0f), + .birth_tick = env.tick, + .active = 1, + }; + bat_schedule_corner_reflector_echoes(&env, &chirp, 0.0f, 0.5f); + + int arrival_tick = -1; + for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { + if (env.echo_queue[i].tick > 0 && test_sum_queued_echo_energy(&env) > 0.0f) { + arrival_tick = env.echo_queue[i].tick; + break; + } + } + ASSERT_TRUE(arrival_tick > 0); + + env.tick = arrival_tick; + compute_observations(&env); + ASSERT_TRUE(test_sum_obs(&env, BAT_LEFT_FREQ_OFFSET, BAT_FREQ_BINS) > 0.0f || + test_sum_obs(&env, BAT_RIGHT_FREQ_OFFSET, BAT_FREQ_BINS) > 0.0f); + for (int i = 0; i < BAT_OBS_SIZE; i++) { + ASSERT_TRUE(env.observations[i] >= -1.0f); + ASSERT_TRUE(env.observations[i] <= 1.0f); + } + + free_allocated(&env); + return 0; +} + static int test_frequency_bin_energy_sums_and_caps(void) { Bat env = make_test_env(); memset(env.observations, 0, BAT_OBS_SIZE * sizeof(float)); @@ -1623,6 +1803,10 @@ int main(void) { if (test_curriculum_initial_level_does_not_reset_progress()) return 1; if (test_bug_bounces_off_arena_walls()) return 1; if (test_chirp_echo_arrives_after_two_way_travel_not_immediately()) return 1; + if (test_default_echo_range_reaches_curriculum_max_bug_distance()) return 1; + if (test_corner_reflectors_disabled_schedule_no_static_events()) return 1; + if (test_corner_reflectors_enabled_schedule_stable_echo_events()) return 1; + if (test_corner_reflector_echo_observations_stay_normalized()) return 1; if (test_frequency_bin_energy_sums_and_caps()) return 1; if (test_bug_echo_reward_is_added_when_bug_echo_is_closer()) return 1; if (test_bug_echo_reward_requires_bat_displacement()) return 1; From 732f87bebf84edf3737561e28062c22bd4ef280c Mon Sep 17 00:00:00 2001 From: Kinvert Date: Tue, 9 Jun 2026 19:30:07 -0700 Subject: [PATCH 19/51] Add Bat MP4 recording export --- config/bat.ini | 4 + ocean/bat/bat.h | 233 +++++++++++++++++++++++++++++++++++++++++++- ocean/bat/binding.c | 4 + 3 files changed, 239 insertions(+), 2 deletions(-) diff --git a/config/bat.ini b/config/bat.ini index c1dd9e6ee3..269947814f 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -31,6 +31,10 @@ bat_turn_rate = 9.424778 bug_speed = 4.0 max_steps = 512 render_target_fps = 60 +record_video = 0 +record_video_fps = 30 +record_video_seconds = 30 +record_video_audio = 1 curriculum_enabled = 1 curriculum_initial_level = 2 curriculum_start_obstacles = 0 diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 00cc210a2d..bfab7b2687 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -50,6 +51,7 @@ #define BAT_AUDIO_MIN_HZ 600.0f #define BAT_AUDIO_MAX_HZ 3600.0f #define BAT_AUDIO_VOLUME 0.22f +#define BAT_RECORD_MAX_VOICES 16 #define BAT_BUDGET_EASY_CHIRPS 15.0f #define BAT_BUDGET_EDGE_CHIRPS 5.0f #define BAT_CHIRP_PERF_REFERENCE_CHIRPS 15.0f @@ -82,6 +84,14 @@ typedef struct EchoBucket { int tick; } EchoBucket; +typedef struct BatRecordVoice { + int active; + int start_sample; + float start_freq; + float end_freq; + float duration; +} BatRecordVoice; + typedef struct Log { float perf; float base_perf; @@ -133,6 +143,21 @@ typedef struct Client { int audio_voice_cursor; Sound chirp_sounds[BAT_AUDIO_VOICES]; int chirp_sound_loaded[BAT_AUDIO_VOICES]; + int recording_initialized; + int recording_finalized; + int record_frame; + int record_max_frames; + int record_fps; + int record_audio; + int record_last_audio_chirp_serial; + int record_audio_sample_cursor; + int record_audio_data_bytes; + int record_voice_cursor; + FILE* record_wav; + char record_frame_dir[256]; + char record_wav_path[256]; + char record_mp4_path[256]; + BatRecordVoice record_voices[BAT_RECORD_MAX_VOICES]; #endif } Client; @@ -151,6 +176,10 @@ typedef struct Bat { int tick; int max_steps; int render_target_fps; + int record_video; + int record_video_fps; + int record_video_seconds; + int record_video_audio; int num_obstacles; int curriculum_enabled; int curriculum_level; @@ -274,19 +303,51 @@ static inline int bat_render_target_fps(Bat* env) { return env->render_target_fps > 0 ? env->render_target_fps : 0; } +static inline bool bat_record_video_enabled(Bat* env) { + return env->record_video != 0; +} + +static inline int bat_record_video_fps(Bat* env) { + int fps = env->record_video_fps > 0 ? env->record_video_fps : 30; + if (fps < 1) fps = 1; + if (fps > 120) fps = 120; + return fps; +} + +static inline int bat_record_video_seconds(Bat* env) { + int seconds = env->record_video_seconds > 0 ? env->record_video_seconds : 20; + if (seconds < 1) seconds = 1; + if (seconds > 600) seconds = 600; + return seconds; +} + +static inline int bat_record_frame_samples(int fps) { + if (fps <= 0) fps = 30; + return BAT_AUDIO_SAMPLE_RATE / fps; +} + +static inline int bat_record_max_frames(int fps, int seconds) { + if (fps <= 0) fps = 30; + if (seconds <= 0) seconds = 20; + return fps * seconds; +} + static inline float bat_chirp_duration_seconds(float duration_norm) { return 0.04f + 0.18f * bat_clampf(duration_norm, 0.0f, 1.0f); } -static inline float bat_chirp_audio_duration_seconds(Bat* env, float duration_norm) { +static inline float bat_chirp_audio_duration_at_fps(float duration_norm, int fps) { float duration = bat_chirp_duration_seconds(duration_norm); - int fps = bat_render_target_fps(env); if (fps <= 0) return duration; float scale = 60.0f / (float)fps; if (scale < 1.0f) scale = 1.0f; return duration * scale; } +static inline float bat_chirp_audio_duration_seconds(Bat* env, float duration_norm) { + return bat_chirp_audio_duration_at_fps(duration_norm, bat_render_target_fps(env)); +} + static inline float bat_chirp_audio_frequency_hz(float freq_norm) { return BAT_AUDIO_MIN_HZ + bat_clampf(freq_norm, 0.0f, 1.0f) * (BAT_AUDIO_MAX_HZ - BAT_AUDIO_MIN_HZ); @@ -677,6 +738,10 @@ void init(Bat* env) { if (env->sound_speed <= 0.0f) env->sound_speed = 60.0f; if (env->reflector_spacing <= 0.0f) env->reflector_spacing = 8.0f; env->corner_reflectors = env->corner_reflectors ? 1 : 0; + env->record_video = env->record_video ? 1 : 0; + env->record_video_fps = bat_record_video_fps(env); + env->record_video_seconds = bat_record_video_seconds(env); + env->record_video_audio = env->record_video_audio ? 1 : 0; if (env->max_chirp_age_ticks <= 0) env->max_chirp_age_ticks = 30; if (env->chirp_cooldown_ticks <= 0) env->chirp_cooldown_ticks = 12; if (env->max_chirps_per_episode <= 0) env->max_chirps_per_episode = 20; @@ -1560,6 +1625,167 @@ static inline void bat_play_chirp_audio(Bat* env) { PlaySound(client->chirp_sounds[voice]); } +static inline void bat_record_write_le16(FILE* f, unsigned int v) { + fputc((int)(v & 0xffu), f); + fputc((int)((v >> 8) & 0xffu), f); +} + +static inline void bat_record_write_le32(FILE* f, unsigned int v) { + fputc((int)(v & 0xffu), f); + fputc((int)((v >> 8) & 0xffu), f); + fputc((int)((v >> 16) & 0xffu), f); + fputc((int)((v >> 24) & 0xffu), f); +} + +static inline void bat_record_write_wav_header(FILE* f, int data_bytes) { + int byte_rate = BAT_AUDIO_SAMPLE_RATE * 2; + fwrite("RIFF", 1, 4, f); + bat_record_write_le32(f, 36u + (unsigned int)data_bytes); + fwrite("WAVE", 1, 4, f); + fwrite("fmt ", 1, 4, f); + bat_record_write_le32(f, 16); + bat_record_write_le16(f, 1); + bat_record_write_le16(f, 1); + bat_record_write_le32(f, BAT_AUDIO_SAMPLE_RATE); + bat_record_write_le32(f, (unsigned int)byte_rate); + bat_record_write_le16(f, 2); + bat_record_write_le16(f, 16); + fwrite("data", 1, 4, f); + bat_record_write_le32(f, (unsigned int)data_bytes); +} + +static inline void bat_record_init(Bat* env, Client* client) { + if (!bat_record_video_enabled(env) || client->recording_initialized) return; + client->recording_initialized = 1; + client->record_fps = bat_record_video_fps(env); + client->record_audio = env->record_video_audio ? 1 : 0; + client->record_max_frames = bat_record_max_frames( + client->record_fps, bat_record_video_seconds(env)); + snprintf(client->record_frame_dir, sizeof(client->record_frame_dir), + "recordings/bat_recording_frames"); + snprintf(client->record_wav_path, sizeof(client->record_wav_path), + "recordings/bat_recording.wav"); + snprintf(client->record_mp4_path, sizeof(client->record_mp4_path), + "recordings/bat_recording.mp4"); + system("mkdir -p recordings recordings/bat_recording_frames"); + if (client->record_audio) { + client->record_wav = fopen(client->record_wav_path, "wb"); + if (client->record_wav != NULL) { + bat_record_write_wav_header(client->record_wav, 0); + } + } + printf("Bat recording enabled: %s (%d fps, %d frames)\n", + client->record_mp4_path, client->record_fps, client->record_max_frames); +} + +static inline void bat_record_enqueue_chirp(Bat* env) { + Client* client = env->client; + if (client == NULL || !client->recording_initialized || + client->recording_finalized || !client->record_audio) { + return; + } + if (env->audio_chirp_serial <= 0 || + env->audio_chirp_serial == client->record_last_audio_chirp_serial) { + return; + } + client->record_last_audio_chirp_serial = env->audio_chirp_serial; + int voice_idx = client->record_voice_cursor; + client->record_voice_cursor = (client->record_voice_cursor + 1) % BAT_RECORD_MAX_VOICES; + BatRecordVoice* voice = &client->record_voices[voice_idx]; + voice->active = 1; + voice->start_sample = client->record_audio_sample_cursor; + voice->start_freq = env->last_chirp_start_freq; + voice->end_freq = env->last_chirp_end_freq; + voice->duration = bat_chirp_audio_duration_at_fps( + env->last_chirp_duration, client->record_fps); +} + +static inline void bat_record_append_audio_frame(Bat* env) { + Client* client = env->client; + if (client == NULL || !client->record_audio || client->record_wav == NULL) return; + int frame_samples = bat_record_frame_samples(client->record_fps); + for (int i = 0; i < frame_samples; i++) { + int sample_index = client->record_audio_sample_cursor + i; + float mixed = 0.0f; + for (int v = 0; v < BAT_RECORD_MAX_VOICES; v++) { + BatRecordVoice* voice = &client->record_voices[v]; + if (!voice->active) continue; + int local_sample = sample_index - voice->start_sample; + int voice_samples = (int)ceilf(voice->duration * BAT_AUDIO_SAMPLE_RATE); + if (local_sample < 0) continue; + if (local_sample >= voice_samples) { + voice->active = 0; + continue; + } + mixed += bat_chirp_audio_sample_f32(voice->start_freq, voice->end_freq, + voice->duration, local_sample, BAT_AUDIO_SAMPLE_RATE); + } + short pcm = (short)(bat_clampf(mixed, -1.0f, 1.0f) * 32767.0f); + fwrite(&pcm, sizeof(short), 1, client->record_wav); + client->record_audio_data_bytes += (int)sizeof(short); + } + client->record_audio_sample_cursor += frame_samples; +} + +static inline void bat_record_finalize(Client* client) { + if (client == NULL || !client->recording_initialized || + client->recording_finalized) { + return; + } + client->recording_finalized = 1; + if (client->record_wav != NULL) { + fseek(client->record_wav, 0, SEEK_SET); + bat_record_write_wav_header(client->record_wav, client->record_audio_data_bytes); + fclose(client->record_wav); + client->record_wav = NULL; + } + + char cmd[1024]; + if (client->record_audio) { + snprintf(cmd, sizeof(cmd), + "ffmpeg -y -framerate %d -i %s/%%06d.png -i %s -frames:v %d " + "-c:v libx264 -pix_fmt yuv420p -c:a aac -shortest %s", + client->record_fps, client->record_frame_dir, client->record_wav_path, + client->record_frame, client->record_mp4_path); + } else { + snprintf(cmd, sizeof(cmd), + "ffmpeg -y -framerate %d -i %s/%%06d.png -frames:v %d " + "-c:v libx264 -pix_fmt yuv420p %s", + client->record_fps, client->record_frame_dir, client->record_frame, + client->record_mp4_path); + } + int status = system(cmd); + if (status == 0) { + printf("Bat recording saved: %s\n", client->record_mp4_path); + } else { + printf("Bat recording ffmpeg command failed with status %d\n", status); + } +} + +static inline void bat_record_capture_frame(Bat* env) { + Client* client = env->client; + if (client == NULL || !client->recording_initialized || + client->recording_finalized) { + return; + } + if (client->record_frame >= client->record_max_frames) { + bat_record_finalize(client); + return; + } + bat_record_enqueue_chirp(env); + char path[512]; + snprintf(path, sizeof(path), "%s/%06d.png", client->record_frame_dir, + client->record_frame); + Image image = LoadImageFromScreen(); + ExportImage(image, path); + UnloadImage(image); + bat_record_append_audio_frame(env); + client->record_frame += 1; + if (client->record_frame >= client->record_max_frames) { + bat_record_finalize(client); + } +} + Client* make_client(Bat* env) { Client* client = (Client*)calloc(1, sizeof(Client)); client->width = env->width * 10; @@ -1571,10 +1797,12 @@ Client* make_client(Bat* env) { } InitAudioDevice(); client->audio_ready = IsAudioDeviceReady(); + bat_record_init(env, client); return client; } void close_client(Client* client) { + bat_record_finalize(client); if (client->audio_ready) { for (int i = 0; i < BAT_AUDIO_VOICES; i++) { bat_unload_chirp_sound(client, i); @@ -1621,6 +1849,7 @@ void c_render(Bat* env) { DrawText(TextFormat("reward %.3f tick %d chirps %d cooldown %d ESC exits", env->rewards[0], env->tick, env->chirps_emitted_episode, cooldown), 10, 10, 20, RAYWHITE); EndDrawing(); + bat_record_capture_frame(env); } #else Client* make_client(Bat* env) { diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index ab86b69d94..1d0caef6f7 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -23,6 +23,10 @@ void my_init(Env* env, Dict* kwargs) { env->bug_speed = dict_get(kwargs, "bug_speed")->value; env->max_steps = dict_get(kwargs, "max_steps")->value; env->render_target_fps = dict_get(kwargs, "render_target_fps")->value; + env->record_video = dict_get(kwargs, "record_video")->value; + env->record_video_fps = dict_get(kwargs, "record_video_fps")->value; + env->record_video_seconds = dict_get(kwargs, "record_video_seconds")->value; + env->record_video_audio = dict_get(kwargs, "record_video_audio")->value; env->curriculum_enabled = dict_get(kwargs, "curriculum_enabled")->value; env->curriculum_initial_level = dict_get(kwargs, "curriculum_initial_level")->value; env->curriculum_start_obstacles = dict_get(kwargs, "curriculum_start_obstacles")->value; From b3d65bac7059653aec1124b63162e0676b1e6cb6 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Tue, 9 Jun 2026 20:39:29 -0700 Subject: [PATCH 20/51] Tune Bat chirp source and distance curriculum --- config/bat.ini | 60 +++++++++--------- ocean/bat/bat.h | 157 ++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 156 insertions(+), 61 deletions(-) diff --git a/config/bat.ini b/config/bat.ini index 269947814f..e82177ef2c 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -24,9 +24,9 @@ num_obstacles = 3 bat_radius = 2.0 ear_separation_scale = 2.0 bug_radius = 1.5 -bat_max_speed = 26.0 +bat_max_speed = 26.339301984627312 bat_min_speed = 2.0 -bat_accel = 45.366636480970875 +bat_accel = 40.31149837035544 bat_turn_rate = 9.424778 bug_speed = 4.0 max_steps = 512 @@ -36,48 +36,48 @@ record_video_fps = 30 record_video_seconds = 30 record_video_audio = 1 curriculum_enabled = 1 -curriculum_initial_level = 2 +curriculum_initial_level = 1 curriculum_start_obstacles = 0 curriculum_max_obstacles = 3 -curriculum_obstacle_step = 5 +curriculum_obstacle_step = 4 curriculum_successes_per_level = 16 -curriculum_start_bug_distance = 10.95126023748812 -curriculum_max_bug_distance = 56.0 -curriculum_bug_distance_step = 4.711042873726855 +curriculum_start_bug_distance = 8.0 +curriculum_max_bug_distance = 40.0 +curriculum_bug_distance_step = 5.0 freq_bins_per_ear = 16 max_echo_range = 128.0 -sound_speed = 180.0 +sound_speed = 156.31428757902597 reflector_spacing = 8.0 corner_reflectors = 1 max_chirp_age_ticks = 30 chirp_cooldown_ticks = 18 max_chirps_per_episode = 15 chirp_cost = 0.0 -chirp_efficiency_reward = 2.0 -valid_chirp_reward = 0.00009333487783059136 -early_chirp_penalty = 0.006 -chirp_overlap_penalty = 0.010103772089353678 -bug_echo_reward_scale = 0.2854634145187602 -bug_echo_farther_penalty_scale = 0.14365071684654107 +chirp_efficiency_reward = 1.8643105696678046 +valid_chirp_reward = 0.00010025883377819797 +early_chirp_penalty = 0.005642742950658355 +chirp_overlap_penalty = 0.013198598933272212 +bug_echo_reward_scale = 0.2790819053600338 +bug_echo_farther_penalty_scale = 0.14845872550312994 bug_echo_min_displacement = 1.0 -step_cost = 0.000193626 -progress_reward_scale = 0.11539891887797996 -collision_penalty = 1.0770950996247066 +step_cost = 0.0002156052256090029 +progress_reward_scale = 0.12 +collision_penalty = 1.2506363533934068 [train] -total_timesteps = 47_352_761 -learning_rate = 0.021835584823882728 -gamma = 0.997 -gae_lambda = 0.948 -replay_ratio = 1.097551956279086 -clip_coef = 0.42 -vf_coef = 3.8 -vf_clip_coef = 0.45 -max_grad_norm = 2.5 -ent_coef = 0.0005 -beta1 = 0.8786399699496005 -beta2 = 0.9977243436203428 -eps = 5.914725791235274e-13 +total_timesteps = 39_302_645 +learning_rate = 0.030158879761834633 +gamma = 0.9912833685506411 +gae_lambda = 0.9539098999787969 +replay_ratio = 1.25 +clip_coef = 0.3062252432359945 +vf_coef = 3.5394785870481407 +vf_clip_coef = 0.4792786619293973 +max_grad_norm = 2.644620908575875 +ent_coef = 0.0005000000000000004 +beta1 = 0.8651428827975649 +beta2 = 0.9995922336033932 +eps = 1e-14 minibatch_size = 8192 horizon = 64 vtrace_rho_clip = 1.0 diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index bfab7b2687..be5294a586 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -70,10 +70,14 @@ typedef struct BatColor { typedef struct ChirpEvent { float x; float y; + float source_x[BAT_MAX_CHIRP_SLICES]; + float source_y[BAT_MAX_CHIRP_SLICES]; float start_freq; float end_freq; float duration; int birth_tick; + int slice_count; + int slices_scheduled; int active; } ChirpEvent; @@ -393,6 +397,47 @@ static inline float bat_chirp_ring_radius(float age_seconds, float slice, return sound_speed * ring_age; } +static inline int bat_chirp_slice_count(float duration_seconds) { + int slices = (int)ceilf(duration_seconds / BAT_TICK_RATE); + if (slices < 1) slices = 1; + if (slices > BAT_MAX_CHIRP_SLICES) slices = BAT_MAX_CHIRP_SLICES; + return slices; +} + +static inline float bat_chirp_slice_seconds(ChirpEvent* chirp, int slice_idx) { + int slices = chirp->slice_count > 0 ? chirp->slice_count : + bat_chirp_slice_count(chirp->duration); + if (slice_idx < 0) slice_idx = 0; + if (slice_idx >= slices) slice_idx = slices - 1; + return ((slice_idx + 0.5f) / (float)slices) * chirp->duration; +} + +static inline float bat_chirp_slice_ticks(ChirpEvent* chirp, int slice_idx) { + return bat_chirp_slice_seconds(chirp, slice_idx) / BAT_TICK_RATE; +} + +static inline void bat_chirp_source_for_slice(ChirpEvent* chirp, int slice_idx, + float* source_x, float* source_y) { + int scheduled = chirp->slices_scheduled; + if (slice_idx >= 0 && slice_idx < scheduled && + slice_idx < BAT_MAX_CHIRP_SLICES) { + *source_x = chirp->source_x[slice_idx]; + *source_y = chirp->source_y[slice_idx]; + return; + } + *source_x = chirp->x; + *source_y = chirp->y; +} + +static inline void bat_chirp_source_for_fraction(ChirpEvent* chirp, float slice, + float* source_x, float* source_y) { + int slices = chirp->slice_count > 0 ? chirp->slice_count : + bat_chirp_slice_count(chirp->duration); + int slice_idx = (int)floorf(bat_clampf(slice, 0.0f, 1.0f) * (float)slices); + if (slice_idx >= slices) slice_idx = slices - 1; + bat_chirp_source_for_slice(chirp, slice_idx, source_x, source_y); +} + static inline float bat_echo_time_seconds(float distance, float sound_speed) { if (sound_speed <= 0.0f) return 0.0f; return 2.0f * distance / sound_speed; @@ -943,14 +988,16 @@ static inline void bat_ear_positions(Bat* env, float* left_x, float* left_y, static inline float bat_expected_bug_echo_tick(Bat* env, ChirpEvent* chirp) { float fx = cosf(env->bat_heading); float fy = sinf(env->bat_heading); + float source_x, source_y; + bat_chirp_source_for_slice(chirp, 0, &source_x, &source_y); float ux, uy; - bat_norm_vec(env->bug_x - chirp->x, env->bug_y - chirp->y, &ux, &uy); + bat_norm_vec(env->bug_x - source_x, env->bug_y - source_y, &ux, &uy); float forward = ux * fx + uy * fy; if (forward < -0.35f) return -1.0f; float left_ear_x, left_ear_y, right_ear_x, right_ear_y; bat_ear_positions(env, &left_ear_x, &left_ear_y, &right_ear_x, &right_ear_y); - float source_path = bat_dist(chirp->x, chirp->y, env->bug_x, env->bug_y); + float source_path = bat_dist(source_x, source_y, env->bug_x, env->bug_y); float left_path = source_path + bat_dist(env->bug_x, env->bug_y, left_ear_x, left_ear_y); float right_path = source_path + bat_dist(env->bug_x, env->bug_y, right_ear_x, right_ear_y); float best_path = -1.0f; @@ -960,10 +1007,7 @@ static inline float bat_expected_bug_echo_tick(Bat* env, ChirpEvent* chirp) { } if (best_path < 0.0f) return -1.0f; - int slices = (int)ceilf(chirp->duration / BAT_TICK_RATE); - if (slices < 1) slices = 1; - if (slices > BAT_MAX_CHIRP_SLICES) slices = BAT_MAX_CHIRP_SLICES; - float first_slice_ticks = (0.5f / (float)slices) * chirp->duration / BAT_TICK_RATE; + float first_slice_ticks = bat_chirp_slice_ticks(chirp, 0); return chirp->birth_tick + first_slice_ticks + best_path / env->sound_speed / BAT_TICK_RATE; } @@ -1054,30 +1098,73 @@ static inline void bat_schedule_obstacle_echoes(Bat* env, ChirpEvent* chirp, bat_schedule_segment_reflectors(env, chirp, slice_ticks, freq, x + w, y, x + w, y + h, 0.55f); } +static inline void bat_schedule_chirp_slice_echoes(Bat* env, ChirpEvent* chirp, + int slice_idx) { + int slices = chirp->slice_count > 0 ? chirp->slice_count : + bat_chirp_slice_count(chirp->duration); + if (slice_idx < 0 || slice_idx >= slices || slice_idx >= BAT_MAX_CHIRP_SLICES) { + return; + } + + float t = (slice_idx + 0.5f) / (float)slices; + float slice_ticks = bat_chirp_slice_ticks(chirp, slice_idx); + float freq = chirp->start_freq + t * (chirp->end_freq - chirp->start_freq); + + chirp->source_x[slice_idx] = env->bat_x; + chirp->source_y[slice_idx] = env->bat_y; + ChirpEvent slice_chirp = *chirp; + slice_chirp.x = chirp->source_x[slice_idx]; + slice_chirp.y = chirp->source_y[slice_idx]; + + bat_schedule_echo(env, &slice_chirp, slice_ticks, freq, + env->bug_x, env->bug_y, env->bug_vx, env->bug_vy, 8.0f, BAT_ECHO_BUG); + bat_schedule_segment_reflectors(env, &slice_chirp, slice_ticks, freq, + 0.0f, 0.0f, (float)env->width, 0.0f, 0.12f); + bat_schedule_segment_reflectors(env, &slice_chirp, slice_ticks, freq, + 0.0f, (float)env->height, (float)env->width, (float)env->height, 0.12f); + bat_schedule_segment_reflectors(env, &slice_chirp, slice_ticks, freq, + 0.0f, 0.0f, 0.0f, (float)env->height, 0.12f); + bat_schedule_segment_reflectors(env, &slice_chirp, slice_ticks, freq, + (float)env->width, 0.0f, (float)env->width, (float)env->height, 0.12f); + bat_schedule_corner_reflector_echoes(env, &slice_chirp, slice_ticks, freq); + for (int j = 0; j < env->num_obstacles; j++) { + bat_schedule_obstacle_echoes(env, &slice_chirp, slice_ticks, freq, j); + } +} + static inline void bat_schedule_chirp_echoes(Bat* env, ChirpEvent* chirp) { - int slices = (int)ceilf(chirp->duration / BAT_TICK_RATE); - if (slices < 1) slices = 1; - if (slices > BAT_MAX_CHIRP_SLICES) slices = BAT_MAX_CHIRP_SLICES; + int slices = chirp->slice_count > 0 ? chirp->slice_count : + bat_chirp_slice_count(chirp->duration); + chirp->slice_count = slices; + if (chirp->slices_scheduled < 0) { + chirp->slices_scheduled = 0; + } + if (chirp->slices_scheduled > slices) { + chirp->slices_scheduled = slices; + } + while (chirp->slices_scheduled < slices) { + int slice_idx = chirp->slices_scheduled; + bat_schedule_chirp_slice_echoes(env, chirp, slice_idx); + chirp->slices_scheduled += 1; + } +} - for (int i = 0; i < slices; i++) { - float t = (i + 0.5f) / (float)slices; - float slice_seconds = t * chirp->duration; - float slice_ticks = slice_seconds / BAT_TICK_RATE; - float freq = chirp->start_freq + t * (chirp->end_freq - chirp->start_freq); - - bat_schedule_echo(env, chirp, slice_ticks, freq, - env->bug_x, env->bug_y, env->bug_vx, env->bug_vy, 8.0f, BAT_ECHO_BUG); - bat_schedule_segment_reflectors(env, chirp, slice_ticks, freq, - 0.0f, 0.0f, (float)env->width, 0.0f, 0.12f); - bat_schedule_segment_reflectors(env, chirp, slice_ticks, freq, - 0.0f, (float)env->height, (float)env->width, (float)env->height, 0.12f); - bat_schedule_segment_reflectors(env, chirp, slice_ticks, freq, - 0.0f, 0.0f, 0.0f, (float)env->height, 0.12f); - bat_schedule_segment_reflectors(env, chirp, slice_ticks, freq, - (float)env->width, 0.0f, (float)env->width, (float)env->height, 0.12f); - bat_schedule_corner_reflector_echoes(env, chirp, slice_ticks, freq); - for (int j = 0; j < env->num_obstacles; j++) { - bat_schedule_obstacle_echoes(env, chirp, slice_ticks, freq, j); +static inline void bat_schedule_due_chirp_slices(Bat* env) { + for (int i = 0; i < BAT_CHIRP_HISTORY; i++) { + ChirpEvent* chirp = &env->chirps[i]; + if (!chirp->active) continue; + int slices = chirp->slice_count > 0 ? chirp->slice_count : + bat_chirp_slice_count(chirp->duration); + if (chirp->slices_scheduled < 0) chirp->slices_scheduled = 0; + if (chirp->slices_scheduled > slices) chirp->slices_scheduled = slices; + + float age_ticks = (float)(env->tick - chirp->birth_tick); + while (chirp->slices_scheduled < slices) { + int slice_idx = chirp->slices_scheduled; + float slice_ticks = bat_chirp_slice_ticks(chirp, slice_idx); + if (slice_ticks >= age_ticks + 1.0f) break; + bat_schedule_chirp_slice_echoes(env, chirp, slice_idx); + chirp->slices_scheduled += 1; } } } @@ -1297,11 +1384,16 @@ static inline bool bat_try_emit_chirp(Bat* env) { chirp->end_freq = env->last_chirp_end_freq; chirp->duration = bat_chirp_duration_seconds(env->last_chirp_duration); chirp->birth_tick = env->tick; + chirp->slice_count = bat_chirp_slice_count(chirp->duration); + chirp->slices_scheduled = 0; + for (int i = 0; i < BAT_MAX_CHIRP_SLICES; i++) { + chirp->source_x[i] = chirp->x; + chirp->source_y[i] = chirp->y; + } chirp->active = 1; env->chirp_head = (env->chirp_head + 1) % BAT_CHIRP_HISTORY; env->audio_chirp_serial += 1; env->last_bug_echo_expected_tick = bat_expected_bug_echo_tick(env, chirp); - bat_schedule_chirp_echoes(env, chirp); return true; } @@ -1355,6 +1447,7 @@ void c_step(Bat* env) { bat_reset_episode(env); return; } + bat_schedule_due_chirp_slices(env); for (int i = 0; i < env->frameskip; i++) { bat_update_motion(env, BAT_TICK_RATE); @@ -1452,9 +1545,11 @@ static inline void bat_draw_chirp_rings(Bat* env, float sx, float sy) { float fade = 1.0f - radius / env->max_echo_range; float alpha = 0.18f + 0.42f * bat_clampf(fade, 0.0f, 1.0f); + float source_x, source_y; + bat_chirp_source_for_fraction(chirp, slice, &source_x, &source_y); DrawCircleLines( - (int)(chirp->x * sx), - (int)(chirp->y * sy), + (int)(source_x * sx), + (int)(source_y * sy), radius * scale, bat_ray_color(bat_freq_color(freq, alpha))); } From ba6734a21af33c6166c97c81dafd12dcb771ba87 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Tue, 9 Jun 2026 20:45:17 -0700 Subject: [PATCH 21/51] Add Bat inbound maneuver curriculum --- config/bat.ini | 8 ++ ocean/bat/bat.h | 175 ++++++++++++++++++++++++++++++++++++++++++-- ocean/bat/binding.c | 9 +++ 3 files changed, 184 insertions(+), 8 deletions(-) diff --git a/config/bat.ini b/config/bat.ini index e82177ef2c..009759dd3f 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -44,6 +44,14 @@ curriculum_successes_per_level = 16 curriculum_start_bug_distance = 8.0 curriculum_max_bug_distance = 40.0 curriculum_bug_distance_step = 5.0 +curriculum_inbound_start_level = 8 +curriculum_inbound_max_bug_distance = 56.0 +curriculum_inbound_bug_distance_step = 4.0 +inbound_bug_speed_multiplier = 1.75 +inbound_heading_noise_degrees = 18.0 +bug_maneuver_start_level = 7 +bug_maneuver_strength = 0.35 +bug_maneuver_frequency = 0.35 freq_bins_per_ear = 16 max_echo_range = 128.0 sound_speed = 156.31428757902597 diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index be5294a586..1222d0e5bd 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -196,6 +196,14 @@ typedef struct Bat { float curriculum_start_bug_distance; float curriculum_max_bug_distance; float curriculum_bug_distance_step; + int curriculum_inbound_start_level; + float curriculum_inbound_max_bug_distance; + float curriculum_inbound_bug_distance_step; + float inbound_bug_speed_multiplier; + float inbound_heading_noise_degrees; + int bug_maneuver_start_level; + float bug_maneuver_strength; + float bug_maneuver_frequency; float bat_x; float bat_y; @@ -216,6 +224,12 @@ typedef struct Bat { float bug_vy; float bug_radius; float bug_speed; + int bug_inbound; + int bug_maneuver_mode; + float bug_base_heading; + float bug_maneuver_phase; + float bug_maneuver_rate; + float bug_maneuver_sign; float* obstacle_x; float* obstacle_y; @@ -523,7 +537,8 @@ static inline void bat_sample_in_quadrant(Bat* env, int quadrant, float radius, } static inline void bat_sample_spawns(Bat* env) { - int bat_quadrant = (int)(bat_rand(env) & 3u); + int bat_quadrant = (int)(bat_randf(env) * 4.0f); + if (bat_quadrant > 3) bat_quadrant = 3; int bug_quadrant = bat_quadrant ^ 3; float min_sep = fminf(env->width, env->height) * 0.31f; @@ -563,6 +578,41 @@ static inline float bat_curriculum_bug_distance(Bat* env) { env->curriculum_max_bug_distance); } +static inline bool bat_curriculum_inbound_enabled(Bat* env) { + if (!env->curriculum_enabled) return false; + return env->curriculum_level >= env->curriculum_inbound_start_level; +} + +static inline float bat_curriculum_inbound_bug_distance(Bat* env) { + float base = env->curriculum_max_bug_distance; + int extra_levels = env->curriculum_level - env->curriculum_inbound_start_level + 1; + if (extra_levels < 1) extra_levels = 1; + float distance = base + env->curriculum_inbound_bug_distance_step * extra_levels; + return bat_clampf(distance, base, env->curriculum_inbound_max_bug_distance); +} + +static inline float bat_curriculum_spawn_distance(Bat* env) { + if (bat_curriculum_inbound_enabled(env)) { + return bat_curriculum_inbound_bug_distance(env); + } + return bat_curriculum_bug_distance(env); +} + +static inline float bat_curriculum_bug_speed(Bat* env) { + float speed = env->bug_speed; + if (bat_curriculum_inbound_enabled(env)) { + speed *= env->inbound_bug_speed_multiplier; + } + return fmaxf(0.0f, speed); +} + +static inline float bat_curriculum_bug_maneuver_strength(Bat* env) { + if (!env->curriculum_enabled) return 0.0f; + if (env->curriculum_level < env->bug_maneuver_start_level) return 0.0f; + float ramp = (env->curriculum_level - env->bug_maneuver_start_level + 1) / 4.0f; + return env->bug_maneuver_strength * bat_clampf(ramp, 0.0f, 1.0f); +} + static inline int bat_curriculum_chirp_budget(Bat* env) { return env->max_chirps_per_episode > 0 ? env->max_chirps_per_episode : 1; } @@ -597,8 +647,10 @@ static inline float bat_norm_range(float value, float lo, float hi) { } static inline float bat_curriculum_distance_difficulty(Bat* env) { + float max_distance = fmaxf(env->curriculum_max_bug_distance, + env->curriculum_inbound_max_bug_distance); return bat_norm_range(env->start_bug_dist, - env->curriculum_start_bug_distance, env->curriculum_max_bug_distance); + env->curriculum_start_bug_distance, max_distance); } static inline float bat_curriculum_obstacle_difficulty(Bat* env) { @@ -612,8 +664,12 @@ static inline float bat_curriculum_chirp_budget_difficulty(Bat* env) { } static inline float bat_curriculum_motion_difficulty(Bat* env) { - (void)env; - return 0.0f; + if (!env->curriculum_enabled) return 0.0f; + if (env->curriculum_level < env->bug_maneuver_start_level) return 0.0f; + float span = fmaxf(1.0f, + (float)(env->curriculum_inbound_start_level + 4 - env->bug_maneuver_start_level)); + return bat_clampf((env->curriculum_level - env->bug_maneuver_start_level + 1) / span, + 0.0f, 1.0f); } static inline float bat_curriculum_difficulty(Bat* env) { @@ -629,6 +685,11 @@ static inline float bat_curriculum_difficulty(Bat* env) { weighted += 0.5f * obstacles; active_weight += 0.5f; } + float motion = bat_curriculum_motion_difficulty(env); + if (env->bug_maneuver_strength > 0.0f) { + weighted += 0.5f * motion; + active_weight += 0.5f; + } if (active_weight <= 0.000001f) return 0.0f; return bat_clampf(weighted / active_weight, 0.0f, 1.0f); } @@ -697,6 +758,34 @@ static inline void bat_sample_spawns_at_distance(Bat* env, float target_distance bat_sample_spawns(env); } +static inline void bat_set_bug_velocity(Bat* env, float heading, float speed) { + env->bug_base_heading = heading; + env->bug_vx = cosf(heading) * speed; + env->bug_vy = sinf(heading) * speed; +} + +static inline void bat_reset_bug_motion(Bat* env) { + env->bug_inbound = bat_curriculum_inbound_enabled(env) ? 1 : 0; + float strength = bat_curriculum_bug_maneuver_strength(env); + env->bug_maneuver_mode = strength > 0.000001f ? 1 + (int)(bat_rand(env) % 3u) : 0; + env->bug_maneuver_phase = bat_randf(env) * 2.0f * BAT_PI; + env->bug_maneuver_rate = 2.0f * BAT_PI * env->bug_maneuver_frequency * + (0.75f + 0.50f * bat_randf(env)); + env->bug_maneuver_sign = (bat_rand(env) & 1u) ? -1.0f : 1.0f; + + float speed = bat_curriculum_bug_speed(env); + if (env->bug_inbound) { + float tx, ty; + bat_norm_vec(env->bat_x - env->bug_x, env->bat_y - env->bug_y, &tx, &ty); + float noise = env->inbound_heading_noise_degrees * (BAT_PI / 180.0f); + float heading = atan2f(ty, tx) + (2.0f * bat_randf(env) - 1.0f) * noise; + bat_set_bug_velocity(env, heading, speed); + } else { + float heading = bat_randf(env) * 2.0f * BAT_PI - BAT_PI; + bat_set_bug_velocity(env, heading, speed); + } +} + static inline void bat_apply_curriculum(Bat* env) { if (env->curriculum_enabled) { env->num_obstacles = bat_curriculum_obstacles(env); @@ -825,6 +914,21 @@ void init(Bat* env) { env->curriculum_max_bug_distance = fminf(env->width, env->height) * 0.70f; } if (env->curriculum_bug_distance_step <= 0.0f) env->curriculum_bug_distance_step = 1.5f; + if (env->curriculum_inbound_start_level <= 0) env->curriculum_inbound_start_level = 8; + if (env->curriculum_inbound_max_bug_distance <= env->curriculum_max_bug_distance) { + env->curriculum_inbound_max_bug_distance = env->curriculum_max_bug_distance; + } + if (env->curriculum_inbound_bug_distance_step <= 0.0f) { + env->curriculum_inbound_bug_distance_step = env->curriculum_bug_distance_step; + } + if (env->inbound_bug_speed_multiplier <= 0.0f) env->inbound_bug_speed_multiplier = 1.5f; + env->inbound_bug_speed_multiplier = bat_clampf(env->inbound_bug_speed_multiplier, 1.0f, 4.0f); + if (env->inbound_heading_noise_degrees < 0.0f) env->inbound_heading_noise_degrees = 0.0f; + env->inbound_heading_noise_degrees = bat_clampf(env->inbound_heading_noise_degrees, 0.0f, 60.0f); + if (env->bug_maneuver_start_level <= 0) env->bug_maneuver_start_level = 7; + if (env->bug_maneuver_strength < 0.0f) env->bug_maneuver_strength = 0.0f; + env->bug_maneuver_strength = bat_clampf(env->bug_maneuver_strength, 0.0f, 0.75f); + if (env->bug_maneuver_frequency <= 0.0f) env->bug_maneuver_frequency = 0.35f; env->obstacle_x = (float*)calloc(BAT_MAX_OBSTACLES, sizeof(float)); env->obstacle_y = (float*)calloc(BAT_MAX_OBSTACLES, sizeof(float)); env->obstacle_w = (float*)calloc(BAT_MAX_OBSTACLES, sizeof(float)); @@ -1238,14 +1342,12 @@ static inline void bat_reset_episode(Bat* env) { } bat_apply_curriculum(env); if (env->curriculum_enabled) { - bat_sample_spawns_at_distance(env, bat_curriculum_bug_distance(env)); + bat_sample_spawns_at_distance(env, bat_curriculum_spawn_distance(env)); } else { bat_sample_spawns(env); } generate_obstacles(env); - float bug_heading = bat_randf(env) * 2.0f * BAT_PI - BAT_PI; - env->bug_vx = cosf(bug_heading) * env->bug_speed; - env->bug_vy = sinf(bug_heading) * env->bug_speed; + bat_reset_bug_motion(env); env->last_chirp_start_freq = 0.0f; env->last_chirp_end_freq = 1.0f; env->last_chirp_duration = 0.33333334f; @@ -1305,23 +1407,80 @@ static inline bool bat_hits_wall(Bat* env) { } static inline void bat_update_bug(Bat* env, float dt) { + float speed = bat_curriculum_bug_speed(env); + float strength = bat_curriculum_bug_maneuver_strength(env); + if (env->bug_maneuver_mode > 0) { + env->bug_maneuver_phase += env->bug_maneuver_rate * dt; + if (env->bug_maneuver_phase > 2.0f * BAT_PI) { + env->bug_maneuver_phase -= 2.0f * BAT_PI; + } + } + + if (env->bug_inbound) { + float tx, ty; + bat_norm_vec(env->bat_x - env->bug_x, env->bat_y - env->bug_y, &tx, &ty); + float px = -ty; + float py = tx; + float lateral = 0.0f; + if (env->bug_maneuver_mode > 0) { + lateral = strength * sinf(env->bug_maneuver_phase); + if (env->bug_maneuver_mode == 2) { + lateral += 0.5f * strength * env->bug_maneuver_sign; + } else if (env->bug_maneuver_mode == 3) { + lateral += 0.35f * strength * cosf(0.5f * env->bug_maneuver_phase); + } + } + lateral = bat_clampf(lateral, -0.50f, 0.50f); + float forward = sqrtf(fmaxf(0.0f, 1.0f - lateral * lateral)); + env->bug_vx = (tx * forward + px * lateral) * speed; + env->bug_vy = (ty * forward + py * lateral) * speed; + } else if (env->bug_maneuver_mode > 0) { + float heading = env->bug_base_heading; + if (env->bug_maneuver_mode == 1) { + heading += strength * sinf(env->bug_maneuver_phase); + } else if (env->bug_maneuver_mode == 2) { + env->bug_base_heading += env->bug_maneuver_sign * strength * dt; + heading = env->bug_base_heading; + } else { + heading += strength * sinf(env->bug_maneuver_phase) + + 0.35f * strength * cosf(0.5f * env->bug_maneuver_phase); + } + env->bug_vx = cosf(heading) * speed; + env->bug_vy = sinf(heading) * speed; + } + env->bug_x += env->bug_vx * dt; env->bug_y += env->bug_vy * dt; + bool bounced = false; if (env->bug_x - env->bug_radius < 0.0f) { env->bug_x = env->bug_radius; env->bug_vx = fabsf(env->bug_vx); + bounced = true; } if (env->bug_x + env->bug_radius > env->width) { env->bug_x = env->width - env->bug_radius; env->bug_vx = -fabsf(env->bug_vx); + bounced = true; } if (env->bug_y - env->bug_radius < 0.0f) { env->bug_y = env->bug_radius; env->bug_vy = fabsf(env->bug_vy); + bounced = true; } if (env->bug_y + env->bug_radius > env->height) { env->bug_y = env->height - env->bug_radius; env->bug_vy = -fabsf(env->bug_vy); + bounced = true; + } + if (bounced) { + env->bug_base_heading = atan2f(env->bug_vy, env->bug_vx); + if (env->bug_inbound) { + float tx, ty; + bat_norm_vec(env->bat_x - env->bug_x, env->bat_y - env->bug_y, &tx, &ty); + env->bug_vx = tx * speed; + env->bug_vy = ty * speed; + env->bug_base_heading = atan2f(env->bug_vy, env->bug_vx); + } } } diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index 1d0caef6f7..112f3400fc 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -36,6 +36,14 @@ void my_init(Env* env, Dict* kwargs) { env->curriculum_start_bug_distance = dict_get(kwargs, "curriculum_start_bug_distance")->value; env->curriculum_max_bug_distance = dict_get(kwargs, "curriculum_max_bug_distance")->value; env->curriculum_bug_distance_step = dict_get(kwargs, "curriculum_bug_distance_step")->value; + env->curriculum_inbound_start_level = dict_get(kwargs, "curriculum_inbound_start_level")->value; + env->curriculum_inbound_max_bug_distance = dict_get(kwargs, "curriculum_inbound_max_bug_distance")->value; + env->curriculum_inbound_bug_distance_step = dict_get(kwargs, "curriculum_inbound_bug_distance_step")->value; + env->inbound_bug_speed_multiplier = dict_get(kwargs, "inbound_bug_speed_multiplier")->value; + env->inbound_heading_noise_degrees = dict_get(kwargs, "inbound_heading_noise_degrees")->value; + env->bug_maneuver_start_level = dict_get(kwargs, "bug_maneuver_start_level")->value; + env->bug_maneuver_strength = dict_get(kwargs, "bug_maneuver_strength")->value; + env->bug_maneuver_frequency = dict_get(kwargs, "bug_maneuver_frequency")->value; env->freq_bins_per_ear = dict_get(kwargs, "freq_bins_per_ear")->value; env->max_echo_range = dict_get(kwargs, "max_echo_range")->value; env->sound_speed = dict_get(kwargs, "sound_speed")->value; @@ -71,6 +79,7 @@ void my_log(Log* log, Dict* out) { dict_set(out, "curriculum_distance_difficulty", log->curriculum_distance_difficulty); dict_set(out, "curriculum_obstacle_difficulty", log->curriculum_obstacle_difficulty); dict_set(out, "curriculum_chirp_budget_difficulty", log->curriculum_chirp_budget_difficulty); + dict_set(out, "curriculum_motion_difficulty", log->curriculum_motion_difficulty); dict_set(out, "num_obstacles", log->num_obstacles); dict_set(out, "bug_distance_start", log->bug_distance_start); dict_set(out, "bug_distance_final", log->bug_distance_final); From a22f01f8cb548e009b31d0cf8b5d4fffc7233f0f Mon Sep 17 00:00:00 2001 From: Kinvert Date: Tue, 9 Jun 2026 21:05:18 -0700 Subject: [PATCH 22/51] Prepare Bat reflector strength sweep --- config/bat.ini | 19 +++++++++++++------ ocean/bat/bat.h | 43 +++++++++++++++++++++++++++++++++++++++---- ocean/bat/binding.c | 1 + 3 files changed, 53 insertions(+), 10 deletions(-) diff --git a/config/bat.ini b/config/bat.ini index 009759dd3f..d87e452bd5 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -50,13 +50,14 @@ curriculum_inbound_bug_distance_step = 4.0 inbound_bug_speed_multiplier = 1.75 inbound_heading_noise_degrees = 18.0 bug_maneuver_start_level = 7 -bug_maneuver_strength = 0.35 -bug_maneuver_frequency = 0.35 +bug_maneuver_strength = 0.40 +bug_maneuver_frequency = 0.40 freq_bins_per_ear = 16 max_echo_range = 128.0 sound_speed = 156.31428757902597 reflector_spacing = 8.0 corner_reflectors = 1 +reflector_strength = 2.0 max_chirp_age_ticks = 30 chirp_cooldown_ticks = 18 max_chirps_per_episode = 15 @@ -112,13 +113,13 @@ scale = auto [sweep.policy.hidden_size] distribution = uniform_pow2 min = 64 -max = 256 +max = 512 scale = auto [sweep.policy.num_layers] distribution = int_uniform min = 2 -max = 4 +max = 5 scale = auto [sweep.train.learning_rate] @@ -147,8 +148,8 @@ scale = auto [sweep.train.horizon] distribution = uniform_pow2 -min = 32 -max = 128 +min = 64 +max = 256 scale = auto [sweep.train.replay_ratio] @@ -211,6 +212,12 @@ min = 0 max = 1 scale = auto +[sweep.env.reflector_strength] +distribution = uniform +min = 0.6 +max = 3.0 +scale = auto + [sweep.env.progress_reward_scale] distribution = uniform min = 0.04 diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 1222d0e5bd..981c71c285 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -241,6 +241,7 @@ typedef struct Bat { float sound_speed; float reflector_spacing; int corner_reflectors; + float reflector_strength; int max_chirp_age_ticks; int chirp_cooldown_ticks; int max_chirps_per_episode; @@ -609,10 +610,21 @@ static inline float bat_curriculum_bug_speed(Bat* env) { static inline float bat_curriculum_bug_maneuver_strength(Bat* env) { if (!env->curriculum_enabled) return 0.0f; if (env->curriculum_level < env->bug_maneuver_start_level) return 0.0f; - float ramp = (env->curriculum_level - env->bug_maneuver_start_level + 1) / 4.0f; + int extra_levels = env->curriculum_level - env->bug_maneuver_start_level; + float ramp = extra_levels <= 0 ? 0.25f : 0.75f + 0.25f * (extra_levels - 1); return env->bug_maneuver_strength * bat_clampf(ramp, 0.0f, 1.0f); } +static inline float bat_curriculum_bug_maneuver_frequency(Bat* env) { + if (!env->curriculum_enabled) return env->bug_maneuver_frequency; + if (env->curriculum_level < env->bug_maneuver_start_level) { + return env->bug_maneuver_frequency; + } + int extra_levels = env->curriculum_level - env->bug_maneuver_start_level; + float multiplier = 1.0f + 0.50f * extra_levels; + return env->bug_maneuver_frequency * bat_clampf(multiplier, 1.0f, 2.5f); +} + static inline int bat_curriculum_chirp_budget(Bat* env) { return env->max_chirps_per_episode > 0 ? env->max_chirps_per_episode : 1; } @@ -769,7 +781,7 @@ static inline void bat_reset_bug_motion(Bat* env) { float strength = bat_curriculum_bug_maneuver_strength(env); env->bug_maneuver_mode = strength > 0.000001f ? 1 + (int)(bat_rand(env) % 3u) : 0; env->bug_maneuver_phase = bat_randf(env) * 2.0f * BAT_PI; - env->bug_maneuver_rate = 2.0f * BAT_PI * env->bug_maneuver_frequency * + env->bug_maneuver_rate = 2.0f * BAT_PI * bat_curriculum_bug_maneuver_frequency(env) * (0.75f + 0.50f * bat_randf(env)); env->bug_maneuver_sign = (bat_rand(env) & 1u) ? -1.0f : 1.0f; @@ -872,6 +884,7 @@ void init(Bat* env) { if (env->sound_speed <= 0.0f) env->sound_speed = 60.0f; if (env->reflector_spacing <= 0.0f) env->reflector_spacing = 8.0f; env->corner_reflectors = env->corner_reflectors ? 1 : 0; + if (env->reflector_strength <= 0.0f) env->reflector_strength = 2.0f; env->record_video = env->record_video ? 1 : 0; env->record_video_fps = bat_record_video_fps(env); env->record_video_seconds = bat_record_video_seconds(env); @@ -1179,7 +1192,7 @@ static inline void bat_schedule_corner_reflector_echoes(Bat* env, ChirpEvent* ch if (!env->corner_reflectors) return; float w = (float)env->width; float h = (float)env->height; - const float strength = 2.0f; + float strength = env->reflector_strength; bat_schedule_echo(env, chirp, slice_ticks, freq, 0.0f, 0.0f, 0.0f, 0.0f, strength, BAT_ECHO_STATIC); bat_schedule_echo(env, chirp, slice_ticks, freq, w, 0.0f, @@ -1188,6 +1201,14 @@ static inline void bat_schedule_corner_reflector_echoes(Bat* env, ChirpEvent* ch 0.0f, 0.0f, strength, BAT_ECHO_STATIC); bat_schedule_echo(env, chirp, slice_ticks, freq, w, h, 0.0f, 0.0f, strength, BAT_ECHO_STATIC); + bat_schedule_echo(env, chirp, slice_ticks, freq, 0.5f * w, 0.0f, + 0.0f, 0.0f, strength, BAT_ECHO_STATIC); + bat_schedule_echo(env, chirp, slice_ticks, freq, 0.5f * w, h, + 0.0f, 0.0f, strength, BAT_ECHO_STATIC); + bat_schedule_echo(env, chirp, slice_ticks, freq, 0.0f, 0.5f * h, + 0.0f, 0.0f, strength, BAT_ECHO_STATIC); + bat_schedule_echo(env, chirp, slice_ticks, freq, w, 0.5f * h, + 0.0f, 0.0f, strength, BAT_ECHO_STATIC); } static inline void bat_schedule_obstacle_echoes(Bat* env, ChirpEvent* chirp, @@ -1782,11 +1803,15 @@ static inline void bat_draw_corner_reflector_echoes(Bat* env, ChirpEvent* chirp, if (!env->corner_reflectors) return; float w = (float)env->width; float h = (float)env->height; - const float strength = 2.0f; + float strength = env->reflector_strength; bat_draw_echo_flash(env, chirp, 0.0f, 0.0f, 0.0f, 0.0f, strength, sx, sy); bat_draw_echo_flash(env, chirp, w, 0.0f, 0.0f, 0.0f, strength, sx, sy); bat_draw_echo_flash(env, chirp, 0.0f, h, 0.0f, 0.0f, strength, sx, sy); bat_draw_echo_flash(env, chirp, w, h, 0.0f, 0.0f, strength, sx, sy); + bat_draw_echo_flash(env, chirp, 0.5f * w, 0.0f, 0.0f, 0.0f, strength, sx, sy); + bat_draw_echo_flash(env, chirp, 0.5f * w, h, 0.0f, 0.0f, strength, sx, sy); + bat_draw_echo_flash(env, chirp, 0.0f, 0.5f * h, 0.0f, 0.0f, strength, sx, sy); + bat_draw_echo_flash(env, chirp, w, 0.5f * h, 0.0f, 0.0f, strength, sx, sy); } static inline void bat_draw_corner_reflector_markers(Bat* env) { @@ -1796,6 +1821,8 @@ static inline void bat_draw_corner_reflector_markers(Bat* env) { const Color outline = (Color){202, 202, 208, 255}; int max_x = env->client->width - size; int max_y = env->client->height - size; + int mid_x = env->client->width / 2 - size / 2; + int mid_y = env->client->height / 2 - size / 2; DrawRectangle(0, 0, size, size, fill); DrawRectangleLines(0, 0, size, size, outline); DrawRectangle(max_x, 0, size, size, fill); @@ -1804,6 +1831,14 @@ static inline void bat_draw_corner_reflector_markers(Bat* env) { DrawRectangleLines(0, max_y, size, size, outline); DrawRectangle(max_x, max_y, size, size, fill); DrawRectangleLines(max_x, max_y, size, size, outline); + DrawRectangle(mid_x, 0, size, size, fill); + DrawRectangleLines(mid_x, 0, size, size, outline); + DrawRectangle(mid_x, max_y, size, size, fill); + DrawRectangleLines(mid_x, max_y, size, size, outline); + DrawRectangle(0, mid_y, size, size, fill); + DrawRectangleLines(0, mid_y, size, size, outline); + DrawRectangle(max_x, mid_y, size, size, fill); + DrawRectangleLines(max_x, mid_y, size, size, outline); } static inline void bat_draw_echo_reflections(Bat* env, float sx, float sy) { diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index 112f3400fc..5b71d98fe9 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -49,6 +49,7 @@ void my_init(Env* env, Dict* kwargs) { env->sound_speed = dict_get(kwargs, "sound_speed")->value; env->reflector_spacing = dict_get(kwargs, "reflector_spacing")->value; env->corner_reflectors = dict_get(kwargs, "corner_reflectors")->value; + env->reflector_strength = dict_get(kwargs, "reflector_strength")->value; env->max_chirp_age_ticks = dict_get(kwargs, "max_chirp_age_ticks")->value; env->chirp_cooldown_ticks = dict_get(kwargs, "chirp_cooldown_ticks")->value; env->max_chirps_per_episode = dict_get(kwargs, "max_chirps_per_episode")->value; From 18358a50e7f66a42e6310ce4b785cddf9c4b7e67 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Tue, 9 Jun 2026 21:06:50 -0700 Subject: [PATCH 23/51] Set Bat sweep budget to sixteen runs --- config/bat.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/bat.ini b/config/bat.ini index d87e452bd5..a24860ff4e 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -99,7 +99,7 @@ method = Protein metric = perf metric_distribution = linear goal = maximize -max_runs = 8 +max_runs = 16 gpus = 1 downsample = 5 use_gpu = True From 653dc447877ce9dad7c8ab01c37c4931dd380c62 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Tue, 9 Jun 2026 21:08:49 -0700 Subject: [PATCH 24/51] Keep Bat sweep run count CLI controlled --- config/bat.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/bat.ini b/config/bat.ini index a24860ff4e..d87e452bd5 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -99,7 +99,7 @@ method = Protein metric = perf metric_distribution = linear goal = maximize -max_runs = 16 +max_runs = 8 gpus = 1 downsample = 5 use_gpu = True From a055440ff585c69c4f6238df223e02024af1abb1 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Wed, 10 Jun 2026 16:33:06 -0700 Subject: [PATCH 25/51] Tighten Bat observations and sweep setup --- BAT_SPEC.md | 24 +++++-- config/bat.ini | 121 +++++++++++++++++--------------- config/default.ini | 7 -- ocean/bat/bat.h | 93 +++--------------------- ocean/bat/binding.c | 7 +- ocean/bat/tests/test_bat_core.c | 86 +++++++++++++++++++++-- pufferlib/sweep.py | 4 +- 7 files changed, 181 insertions(+), 161 deletions(-) diff --git a/BAT_SPEC.md b/BAT_SPEC.md index a1b473c03f..af907d70c6 100644 --- a/BAT_SPEC.md +++ b/BAT_SPEC.md @@ -246,10 +246,20 @@ Observation layout: 8. `chirps_used_norm = chirps_used / chirp_budget` 9. `forward_speed_norm` 10. `turn_rate_norm` +11. `timer_norm = elapsed_steps / max_steps`, clamped to `[0, 1]` Initial observation size: -- `OBS_SIZE = 40` +- `OBS_SIZE = 41` + +Timer normalization: + +- The timer starts at `0.0` on reset. +- With the default `max_steps = 512`, after step `N` the observation is + `N * BAT_DEFAULT_MAX_STEPS_INV`, where + `BAT_DEFAULT_MAX_STEPS_INV = 1.0 / 512.0`. +- Non-default `max_steps` values use `N / max_steps`. +- The observed timer is clamped to `[0.0, 1.0]`. Echo bins: @@ -321,6 +331,9 @@ Default reward model: - Small negative step cost to encourage efficient pursuit. - Dense progress reward based on reduction in true bat-to-bug distance. - `-1.0` for hitting walls or obstacles, terminal. +- `-1.0` for timeout, terminal. +- `-1.0` for attempting a chirp after `chirps_used_norm` reaches `1.0`, + terminal. - Tiny chirp cost so constant chirping is not fully free without causing chirp collapse. - Chirping again before the prior chirp's expected bug reflection has returned @@ -384,6 +397,7 @@ Termination: - Success: bat catches bug. - Failure: bat collides with a wall or obstacle. +- Failure: bat attempts to chirp after exhausting the chirp budget. - Timeout: `tick >= max_steps`. Reset: @@ -527,7 +541,7 @@ Follow the Breakout-style native env shape: - `Log log` - `unsigned int rng` - In `binding.c`, start with: - - `OBS_SIZE 70` + - `OBS_SIZE 41` - `NUM_ATNS 6` - `ACT_SIZES {5, 3, 8, 8, 4, 2}` - `OBS_TENSOR_T FloatTensor` @@ -653,9 +667,9 @@ train/eval after each rung, and commit each known-good rung separately. clutter legitimately need reacquisition chirps, so budget decay made later levels fail for the wrong reason. - Track `chirps_used / chirp_budget` as a normalized `0..1` observation. - - When the budget is exhausted, terminate with a `-1` style failure penalty - if the policy attempts another chirp. Do not terminate immediately after - the last valid chirp, so the final echo can still matter. + - When the budget is exhausted, terminate with a `-1.0` failure penalty if + the policy attempts another chirp. Do not terminate immediately after the + last valid chirp, so the final echo can still matter. - Log chirp budget, used ratio, remaining ratio, and efficiency to W&B so sweeps can distinguish successful policies that waste every chirp from successful policies that catch the bug with useful chirp timing. diff --git a/config/bat.ini b/config/bat.ini index d87e452bd5..a51cb1d6ee 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -3,12 +3,12 @@ env_name = bat [vec] total_agents = 4096 -num_buffers = 4 +num_buffers = 7 num_threads = 8 [policy] hidden_size = 128 -num_layers = 4 +num_layers = 5 [torch] network = MinGRU @@ -21,85 +21,90 @@ frameskip = 1 width = 64 height = 64 num_obstacles = 3 -bat_radius = 2.0 -ear_separation_scale = 2.0 +bat_radius = 2 bug_radius = 1.5 -bat_max_speed = 26.339301984627312 -bat_min_speed = 2.0 -bat_accel = 40.31149837035544 +bat_max_speed = 18.344521522276608 +bat_min_speed = 2.9687783252761477 +bat_accel = 49.19185634905935 bat_turn_rate = 9.424778 -bug_speed = 4.0 +bug_speed = 4 max_steps = 512 render_target_fps = 60 record_video = 0 record_video_fps = 30 record_video_seconds = 30 record_video_audio = 1 +bug_echo_farther_penalty_scale = 0.05 +bug_echo_reward_scale = 0.2789761793107612 +bug_echo_min_displacement = 1 +bug_maneuver_frequency = 0.4 +bug_maneuver_start_level = 7 +bug_maneuver_strength = 0.4 +curriculum_bug_distance_step = 2.191219134146933 curriculum_enabled = 1 -curriculum_initial_level = 1 -curriculum_start_obstacles = 0 -curriculum_max_obstacles = 3 -curriculum_obstacle_step = 4 -curriculum_successes_per_level = 16 -curriculum_start_bug_distance = 8.0 -curriculum_max_bug_distance = 40.0 -curriculum_bug_distance_step = 5.0 +curriculum_inbound_bug_distance_step = 4 +curriculum_inbound_max_bug_distance = 56 curriculum_inbound_start_level = 8 -curriculum_inbound_max_bug_distance = 56.0 -curriculum_inbound_bug_distance_step = 4.0 -inbound_bug_speed_multiplier = 1.75 -inbound_heading_noise_degrees = 18.0 -bug_maneuver_start_level = 7 -bug_maneuver_strength = 0.40 -bug_maneuver_frequency = 0.40 +curriculum_initial_level = 2 +curriculum_max_bug_distance = 40 +curriculum_max_obstacles = 3 +curriculum_obstacle_step = 7 +curriculum_start_bug_distance = 8 +curriculum_start_obstacles = 0 +curriculum_successes_per_level = 4 +ear_separation_scale = 1.6650928741686002 +early_chirp_penalty = 0.004893262939164256 freq_bins_per_ear = 16 -max_echo_range = 128.0 -sound_speed = 156.31428757902597 -reflector_spacing = 8.0 -corner_reflectors = 1 -reflector_strength = 2.0 +inbound_bug_speed_multiplier = 1.75 +inbound_heading_noise_degrees = 18 max_chirp_age_ticks = 30 -chirp_cooldown_ticks = 18 max_chirps_per_episode = 15 -chirp_cost = 0.0 -chirp_efficiency_reward = 1.8643105696678046 -valid_chirp_reward = 0.00010025883377819797 -early_chirp_penalty = 0.005642742950658355 -chirp_overlap_penalty = 0.013198598933272212 -bug_echo_reward_scale = 0.2790819053600338 -bug_echo_farther_penalty_scale = 0.14845872550312994 -bug_echo_min_displacement = 1.0 -step_cost = 0.0002156052256090029 +max_echo_range = 128 progress_reward_scale = 0.12 -collision_penalty = 1.2506363533934068 +reflector_spacing = 8 +corner_reflectors = 1 +reflector_strength = 0.6 +sound_speed = 180 +step_cost = 0.0001 +valid_chirp_reward = 0.0007907239068821393 +chirp_cooldown_ticks = 13 +chirp_cost = 0 +chirp_efficiency_reward = 2 +chirp_overlap_penalty = 0.010041805305229239 +collision_penalty = 2 [train] -total_timesteps = 39_302_645 -learning_rate = 0.030158879761834633 -gamma = 0.9912833685506411 -gae_lambda = 0.9539098999787969 -replay_ratio = 1.25 -clip_coef = 0.3062252432359945 -vf_coef = 3.5394785870481407 -vf_clip_coef = 0.4792786619293973 -max_grad_norm = 2.644620908575875 -ent_coef = 0.0005000000000000004 -beta1 = 0.8651428827975649 -beta2 = 0.9995922336033932 -eps = 1e-14 -minibatch_size = 8192 +anneal_ent_coef = 0 +anneal_lr = 1 +beta1 = 0.9276441339551883 +beta2 = 0.9996971732178918 +clip_coef = 0.40220288325366393 +ent_coef = 0.009414797813275677 +eps = 1.9967415498800064e-13 +gae_lambda = 0.92 +gamma = 0.996174294667965 +gpus = 1 horizon = 64 -vtrace_rho_clip = 1.0 -vtrace_c_clip = 1.0 -prio_alpha = 0.6482377203508398 -prio_beta0 = 0.06263998034560592 +learning_rate = 0.011740312394802619 +max_grad_norm = 3.140246920394498 +min_ent_coef_ratio = 0.1 +min_lr_ratio = 0 +minibatch_size = 8192 +prio_alpha = 0.8583393112514485 +prio_beta0 = 0.5859110774374029 +replay_ratio = 1.25 +seed = 42 +total_timesteps = 32663421 +vf_clip_coef = 0.7961030866198207 +vf_coef = 4.268711755319329 +vtrace_c_clip = 1.272270973533673 +vtrace_rho_clip = 4.399657272231955 [sweep] method = Protein metric = perf metric_distribution = linear goal = maximize -max_runs = 8 gpus = 1 downsample = 5 use_gpu = True diff --git a/config/default.ini b/config/default.ini index 29bc1808b7..5bec213815 100644 --- a/config/default.ini +++ b/config/default.ini @@ -100,13 +100,6 @@ downsample = 5 use_gpu = True prune_pareto = True early_stop_quantile = 0.3 -# When set, each sweep trial is scored by winrate in a match against a fixed -# enemy checkpoint rather than by the training-time env/score. Score key emitted -# as env/match_score; set match_enemy_model_path to '' to disable. -match_enemy_model_path = '' -match_num_games = 1024 -match_enemy_hidden_size = 0 -match_enemy_num_layers = 0 [sweep.train.total_timesteps] distribution = log_normal diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 981c71c285..04cabfdb14 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -11,7 +11,7 @@ #include "raylib.h" #endif -#define BAT_OBS_SIZE 40 +#define BAT_OBS_SIZE 41 #define BAT_NUM_ACTIONS 6 #define BAT_MOVE_ACTIONS 3 #define BAT_TURN_ACTIONS 3 @@ -41,6 +41,8 @@ #define BAT_MAX_OBSTACLES 16 #define BAT_TICK_RATE (1.0f/60.0f) +#define BAT_DEFAULT_MAX_STEPS 512 +#define BAT_DEFAULT_MAX_STEPS_INV (1.0f / (float)BAT_DEFAULT_MAX_STEPS) #define BAT_PI 3.14159265358979323846f #define BAT_CHIRP_HISTORY 4 #define BAT_CHIRP_RINGS 5 @@ -713,7 +715,8 @@ static inline float bat_budget_difficulty(Bat* env) { } static inline float bat_success_reward(Bat* env) { - return 1.0f + env->chirp_efficiency_reward * bat_chirp_efficiency(env); + //return 1.0f + env->chirp_efficiency_reward * bat_chirp_efficiency(env); // old retarded and gay code + return env->chirp_efficiency_reward * bat_chirp_efficiency(env); } static inline float bat_current_distance_ratio(Bat* env) { @@ -864,84 +867,6 @@ static inline void generate_obstacles(Bat* env) { void init(Bat* env) { env->tick = 0; - if (env->num_agents <= 0) env->num_agents = 1; - if (env->frameskip <= 0) env->frameskip = 1; - if (env->width <= 0) env->width = 64; - if (env->height <= 0) env->height = 64; - if (env->max_steps <= 0) env->max_steps = 512; - if (env->bat_radius <= 0.0f) env->bat_radius = 2.0f; - if (env->ear_separation_scale <= 0.0f) env->ear_separation_scale = 0.75f; - env->ear_separation_scale = bat_clampf(env->ear_separation_scale, 0.25f, 2.0f); - if (env->bug_radius <= 0.0f) env->bug_radius = 1.5f; - if (env->bat_max_speed <= 0.0f) env->bat_max_speed = 12.0f; - if (env->bat_min_speed <= 0.0f) env->bat_min_speed = 0.20f * env->bat_max_speed; - env->bat_min_speed = bat_min_forward_speed(env); - if (env->bat_accel <= 0.0f) env->bat_accel = 30.0f; - if (env->bat_turn_rate <= 0.0f) env->bat_turn_rate = BAT_PI; - if (env->bug_speed <= 0.0f) env->bug_speed = 4.0f; - if (env->freq_bins_per_ear <= 0) env->freq_bins_per_ear = BAT_FREQ_BINS; - if (env->max_echo_range <= 0.0f) env->max_echo_range = 128.0f; - if (env->sound_speed <= 0.0f) env->sound_speed = 60.0f; - if (env->reflector_spacing <= 0.0f) env->reflector_spacing = 8.0f; - env->corner_reflectors = env->corner_reflectors ? 1 : 0; - if (env->reflector_strength <= 0.0f) env->reflector_strength = 2.0f; - env->record_video = env->record_video ? 1 : 0; - env->record_video_fps = bat_record_video_fps(env); - env->record_video_seconds = bat_record_video_seconds(env); - env->record_video_audio = env->record_video_audio ? 1 : 0; - if (env->max_chirp_age_ticks <= 0) env->max_chirp_age_ticks = 30; - if (env->chirp_cooldown_ticks <= 0) env->chirp_cooldown_ticks = 12; - if (env->max_chirps_per_episode <= 0) env->max_chirps_per_episode = 20; - if (env->min_chirps_per_episode <= 0) env->min_chirps_per_episode = 10; - if (env->min_chirps_per_episode > env->max_chirps_per_episode) { - env->min_chirps_per_episode = env->max_chirps_per_episode; - } - if (env->chirp_budget_decay_levels <= 0) env->chirp_budget_decay_levels = 4; - if (env->step_cost <= 0.0f) env->step_cost = 0.001f; - if (env->progress_reward_scale <= 0.0f) env->progress_reward_scale = 0.05f; - if (env->collision_penalty <= 0.0f) env->collision_penalty = 1.0f; - if (env->chirp_cost < 0.0f) env->chirp_cost = 0.0f; - if (env->chirp_efficiency_reward < 0.0f) env->chirp_efficiency_reward = 0.0f; - if (env->valid_chirp_reward <= 0.0f) env->valid_chirp_reward = 0.0005f; - if (env->early_chirp_penalty <= 0.0f) env->early_chirp_penalty = 0.001f; - if (env->chirp_overlap_penalty < 0.0f) env->chirp_overlap_penalty = 0.0f; - if (env->bug_echo_reward_scale <= 0.0f) env->bug_echo_reward_scale = 0.0f; - if (env->bug_echo_farther_penalty_scale <= 0.0f) env->bug_echo_farther_penalty_scale = 0.10f; - env->bug_echo_farther_penalty_scale = bat_clampf(env->bug_echo_farther_penalty_scale, 0.0f, 1.0f); - if (env->bug_echo_min_displacement <= 0.0f) env->bug_echo_min_displacement = 1.0f; - if (env->rng == 0) env->rng = 1; - - if (env->num_obstacles < 0) env->num_obstacles = 0; - if (env->num_obstacles > BAT_MAX_OBSTACLES) env->num_obstacles = BAT_MAX_OBSTACLES; - if (env->curriculum_start_obstacles < 0) env->curriculum_start_obstacles = 0; - if (env->curriculum_max_obstacles <= 0) env->curriculum_max_obstacles = env->num_obstacles; - if (env->curriculum_max_obstacles > BAT_MAX_OBSTACLES) env->curriculum_max_obstacles = BAT_MAX_OBSTACLES; - if (env->curriculum_start_obstacles > env->curriculum_max_obstacles) { - env->curriculum_start_obstacles = env->curriculum_max_obstacles; - } - if (env->curriculum_initial_level < 0) env->curriculum_initial_level = 0; - if (env->curriculum_obstacle_step <= 0) env->curriculum_obstacle_step = 8; - if (env->curriculum_successes_per_level <= 0) env->curriculum_successes_per_level = 1; - if (env->curriculum_start_bug_distance <= 0.0f) env->curriculum_start_bug_distance = 14.0f; - if (env->curriculum_max_bug_distance <= 0.0f) { - env->curriculum_max_bug_distance = fminf(env->width, env->height) * 0.70f; - } - if (env->curriculum_bug_distance_step <= 0.0f) env->curriculum_bug_distance_step = 1.5f; - if (env->curriculum_inbound_start_level <= 0) env->curriculum_inbound_start_level = 8; - if (env->curriculum_inbound_max_bug_distance <= env->curriculum_max_bug_distance) { - env->curriculum_inbound_max_bug_distance = env->curriculum_max_bug_distance; - } - if (env->curriculum_inbound_bug_distance_step <= 0.0f) { - env->curriculum_inbound_bug_distance_step = env->curriculum_bug_distance_step; - } - if (env->inbound_bug_speed_multiplier <= 0.0f) env->inbound_bug_speed_multiplier = 1.5f; - env->inbound_bug_speed_multiplier = bat_clampf(env->inbound_bug_speed_multiplier, 1.0f, 4.0f); - if (env->inbound_heading_noise_degrees < 0.0f) env->inbound_heading_noise_degrees = 0.0f; - env->inbound_heading_noise_degrees = bat_clampf(env->inbound_heading_noise_degrees, 0.0f, 60.0f); - if (env->bug_maneuver_start_level <= 0) env->bug_maneuver_start_level = 7; - if (env->bug_maneuver_strength < 0.0f) env->bug_maneuver_strength = 0.0f; - env->bug_maneuver_strength = bat_clampf(env->bug_maneuver_strength, 0.0f, 0.75f); - if (env->bug_maneuver_frequency <= 0.0f) env->bug_maneuver_frequency = 0.35f; env->obstacle_x = (float*)calloc(BAT_MAX_OBSTACLES, sizeof(float)); env->obstacle_y = (float*)calloc(BAT_MAX_OBSTACLES, sizeof(float)); env->obstacle_w = (float*)calloc(BAT_MAX_OBSTACLES, sizeof(float)); @@ -1349,6 +1274,10 @@ void compute_observations(Bat* env) { float fwd_speed = env->bat_vx * cosf(env->bat_heading) + env->bat_vy * sinf(env->bat_heading); env->observations[BAT_FORWARD_SPEED_OBS] = bat_clampf(fwd_speed / env->bat_max_speed, 0.0f, 1.0f); env->observations[BAT_TURN_RATE_OBS] = bat_clampf(env->bat_turn_velocity / env->bat_turn_rate, -1.0f, 1.0f); + float timer_norm = env->max_steps == BAT_DEFAULT_MAX_STEPS + ? env->tick * BAT_DEFAULT_MAX_STEPS_INV + : env->tick / fmaxf(1.0f, (float)env->max_steps); + env->observations[40] = bat_clampf(timer_norm, 0.0f, 1.0f); } static inline void bat_reset_episode(Bat* env) { @@ -1589,8 +1518,7 @@ static inline float bat_next_chirp_overlap_fraction(Bat* env) { static inline int bat_update_chirp(Bat* env) { int emit = bat_action_index(env->actions[5], BAT_CHIRP_EMIT_ACTIONS); if (emit) { - if (env->tick - env->last_chirp_tick >= env->chirp_cooldown_ticks && - env->chirps_emitted_episode >= env->chirp_budget) { + if (env->chirps_emitted_episode >= env->chirp_budget) { return -2; } return bat_try_emit_chirp(env) ? 1 : -1; @@ -1669,6 +1597,7 @@ void c_step(Bat* env) { env->prev_bug_dist = bug_dist; if (env->tick >= env->max_steps) { + env->rewards[0] = -1.0f; env->terminals[0] = 1.0f; env->episode_return += env->rewards[0]; add_log(env, 0.0f, 0.0f, 1.0f); diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index 5b71d98fe9..4ee90a64df 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -1,5 +1,5 @@ #include "bat.h" -#define OBS_SIZE 40 +#define OBS_SIZE 41 #define NUM_ATNS 6 #define ACT_SIZES {3, 3, 8, 8, 4, 2} #define OBS_TENSOR_T FloatTensor @@ -69,9 +69,10 @@ void my_init(Env* env, Dict* kwargs) { void my_log(Log* log, Dict* out) { dict_set(out, "perf", log->perf); - dict_set(out, "base_perf", log->base_perf); dict_set(out, "score", log->score); + dict_set(out, "episode_return", log->episode_return); dict_set(out, "episode_length", log->episode_length); + dict_set(out, "base_perf", log->base_perf); dict_set(out, "collision", log->collision); dict_set(out, "timeout", log->timeout); dict_set(out, "curriculum_level", log->curriculum_level); @@ -98,6 +99,4 @@ void my_log(Log* log, Dict* out) { dict_set(out, "mean_chirp_tick_norm", log->mean_chirp_tick_norm); dict_set(out, "mean_chirp_duration", log->mean_chirp_duration); dict_set(out, "mean_chirp_bandwidth", log->mean_chirp_bandwidth); - dict_set(out, "mean_echo_energy_left", log->mean_echo_energy_left); - dict_set(out, "mean_echo_energy_right", log->mean_echo_energy_right); } diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index e5648bf528..c94eb371c4 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -29,8 +29,10 @@ static Bat make_test_env(void) { .height = 64, .num_obstacles = 1, .bat_radius = 2.0f, + .ear_separation_scale = 0.75f, .bug_radius = 1.5f, .bat_max_speed = 12.0f, + .bat_min_speed = 2.4f, .bat_accel = 30.0f, .bat_turn_rate = 3.1415926f, .bug_speed = 4.0f, @@ -39,10 +41,33 @@ static Bat make_test_env(void) { .max_echo_range = 80.0f, .sound_speed = 100.0f, .reflector_spacing = 8.0f, + .reflector_strength = 2.0f, + .max_chirp_age_ticks = 30, + .chirp_cooldown_ticks = 12, + .max_chirps_per_episode = 20, + .min_chirps_per_episode = 10, + .chirp_budget_decay_levels = 4, .chirp_cost = 0.0005f, + .chirp_efficiency_reward = 1.0f, .step_cost = 0.001f, .progress_reward_scale = 0.05f, .collision_penalty = 1.0f, + .valid_chirp_reward = 0.0005f, + .early_chirp_penalty = 0.001f, + .bug_echo_farther_penalty_scale = 0.10f, + .bug_echo_min_displacement = 1.0f, + .curriculum_max_obstacles = 1, + .curriculum_obstacle_step = 8, + .curriculum_successes_per_level = 1, + .curriculum_start_bug_distance = 14.0f, + .curriculum_max_bug_distance = 44.8f, + .curriculum_bug_distance_step = 1.5f, + .curriculum_inbound_start_level = 8, + .curriculum_inbound_max_bug_distance = 44.8f, + .curriculum_inbound_bug_distance_step = 1.5f, + .inbound_bug_speed_multiplier = 1.5f, + .bug_maneuver_start_level = 7, + .bug_maneuver_frequency = 0.35f, .rng = 1, }; allocate(&env); @@ -90,6 +115,10 @@ static int test_chirp_budget_observation_tracks_used_chirps(void) { ASSERT_TRUE(env.chirps_emitted_episode == 1); ASSERT_FLOAT_NEAR(env.observations[BAT_CHIRPS_USED_OBS], 0.25f, 0.0001f); + env.chirps_emitted_episode = 12; + compute_observations(&env); + ASSERT_FLOAT_NEAR(env.observations[BAT_CHIRPS_USED_OBS], 1.0f, 0.0001f); + free_allocated(&env); return 0; } @@ -115,7 +144,7 @@ static int test_chirping_after_budget_terminates_with_penalty(void) { env.max_chirps_per_episode = 1; env.min_chirps_per_episode = 1; env.chirp_budget_decay_levels = 4; - env.chirp_cooldown_ticks = 1; + env.chirp_cooldown_ticks = 5; env.early_chirp_penalty = 0.0f; c_reset(&env); @@ -126,8 +155,8 @@ static int test_chirping_after_budget_terminates_with_penalty(void) { c_step(&env); ASSERT_TRUE(env.terminals[0] == 0.0f); ASSERT_TRUE(env.chirps_emitted_episode == 1); + ASSERT_FLOAT_NEAR(env.observations[BAT_CHIRPS_USED_OBS], 1.0f, 0.0001f); - env.tick = env.last_chirp_tick + env.chirp_cooldown_ticks; c_step(&env); ASSERT_TRUE(env.terminals[0] == 1.0f); @@ -138,6 +167,50 @@ static int test_chirping_after_budget_terminates_with_penalty(void) { return 0; } +static int test_timer_observation_tracks_elapsed_fraction(void) { + Bat env = make_test_env(); + env.max_steps = 512; + c_reset(&env); + + ASSERT_TRUE(BAT_OBS_SIZE == 41); + ASSERT_FLOAT_NEAR(env.observations[40], 0.0f, 0.0001f); + + env.actions[0] = BAT_NOOP; + env.actions[1] = BAT_TURN_NONE; + env.actions[5] = 0.0f; + c_step(&env); + + ASSERT_FLOAT_NEAR(env.observations[40], 1.0f / 512.0f, 0.0001f); + + env.tick = 256; + compute_observations(&env); + ASSERT_FLOAT_NEAR(env.observations[40], 0.5f, 0.0001f); + + free_allocated(&env); + return 0; +} + +static int test_timeout_terminates_with_minus_one_reward(void) { + Bat env = make_test_env(); + env.num_obstacles = 0; + env.max_steps = 1; + env.progress_reward_scale = 0.0f; + env.step_cost = 0.0f; + c_reset(&env); + + env.actions[0] = BAT_NOOP; + env.actions[1] = BAT_TURN_NONE; + env.actions[5] = 0.0f; + c_step(&env); + + ASSERT_TRUE(env.terminals[0] == 1.0f); + ASSERT_FLOAT_NEAR(env.rewards[0], -1.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.timeout, 1.0f, 0.0001f); + + free_allocated(&env); + return 0; +} + static int test_chirp_efficiency_scores_low_usage_above_full_budget(void) { Bat env = make_test_env(); c_reset(&env); @@ -191,7 +264,7 @@ static int test_success_reward_includes_chirp_efficiency_bonus(void) { c_step(&env); ASSERT_FLOAT_NEAR(env.terminals[0], 1.0f, 0.0001f); - ASSERT_FLOAT_NEAR(env.rewards[0], 1.90f, 0.0001f); + ASSERT_FLOAT_NEAR(env.rewards[0], 0.90f, 0.0001f); free_allocated(&env); return 0; @@ -388,6 +461,7 @@ static int test_default_sound_speed_allows_one_tick_interaural_delay(void) { .height = 64, .num_obstacles = 0, .bat_radius = 2.0f, + .ear_separation_scale = 0.75f, .bug_radius = 1.5f, .bat_max_speed = 12.0f, .bat_accel = 30.0f, @@ -396,6 +470,7 @@ static int test_default_sound_speed_allows_one_tick_interaural_delay(void) { .max_steps = 512, .freq_bins_per_ear = BAT_FREQ_BINS, .max_echo_range = 80.0f, + .sound_speed = 60.0f, .reflector_spacing = 8.0f, .rng = 1, }; @@ -1063,7 +1138,7 @@ static float test_sum_obs(Bat* env, int offset, int count) { } static int test_bins_only_observation_layout(void) { - ASSERT_TRUE(BAT_OBS_SIZE == 40); + ASSERT_TRUE(BAT_OBS_SIZE == 41); ASSERT_TRUE(BAT_FREQ_BINS == 16); ASSERT_TRUE(BAT_LEFT_FREQ_OFFSET == 0); ASSERT_TRUE(BAT_RIGHT_FREQ_OFFSET == 16); @@ -1363,6 +1438,7 @@ static int test_default_echo_range_reaches_curriculum_max_bug_distance(void) { .bat_accel = 45.0f, .bat_turn_rate = 9.424778f, .bug_speed = 4.0f, + .max_echo_range = 128.0f, .sound_speed = 180.0f, .curriculum_max_bug_distance = 56.0f, .rng = 1, @@ -1758,6 +1834,8 @@ int main(void) { if (test_chirp_budget_observation_tracks_used_chirps()) return 1; if (test_chirp_budget_stays_fixed_with_curriculum_level()) return 1; if (test_chirping_after_budget_terminates_with_penalty()) return 1; + if (test_timer_observation_tracks_elapsed_fraction()) return 1; + if (test_timeout_terminates_with_minus_one_reward()) return 1; if (test_chirp_efficiency_scores_low_usage_above_full_budget()) return 1; if (test_chirp_perf_uses_fixed_fifteen_chirp_reference()) return 1; if (test_success_reward_includes_chirp_efficiency_bonus()) return 1; diff --git a/pufferlib/sweep.py b/pufferlib/sweep.py index 36e27bf42a..219a9cb9eb 100644 --- a/pufferlib/sweep.py +++ b/pufferlib/sweep.py @@ -146,7 +146,9 @@ def _params_from_puffer_sweep(sweep_config, only_include=None): for name, param in sweep_config.items(): if name in ('method', 'metric', 'metric_distribution', 'goal', 'downsample', 'use_gpu', 'prune_pareto', - 'sweep_only', 'max_suggestion_cost', 'early_stop_quantile', 'gpus', 'max_runs'): + 'sweep_only', 'max_suggestion_cost', 'early_stop_quantile', 'gpus', + 'max_runs', 'match_enemy_model_path', 'match_num_games', + 'match_enemy_hidden_size', 'match_enemy_num_layers'): continue assert isinstance(param, dict), f'Param {name} is not a dict' From 8d9428a85080a001248754cd18b8d8c0c2c9851b Mon Sep 17 00:00:00 2001 From: Kinvert Date: Wed, 10 Jun 2026 17:03:59 -0700 Subject: [PATCH 26/51] Remove Bat config fallback guards --- BAT_PRIORITIES.md | 18 ++++++++++--- ocean/bat/bat.h | 66 ++++++++++++----------------------------------- 2 files changed, 32 insertions(+), 52 deletions(-) diff --git a/BAT_PRIORITIES.md b/BAT_PRIORITIES.md index f702a5d6f1..6df2accefb 100644 --- a/BAT_PRIORITIES.md +++ b/BAT_PRIORITIES.md @@ -15,7 +15,17 @@ Current near-term priorities for the Bat PufferLib environment. reflections should likely use distinguishable volume, timbre, panning, or marker sounds so the debug audio stays interpretable. -## 1. Bug-reflection chirp timing penalty +## 1. Add episode timer observation + +- Add a normalized episode timer observation so the policy knows urgency. +- For the current `max_steps = 512` Bat episode budget, expose a float in + `[0, 1]` representing elapsed time from `0` ticks to timeout. If the budget is + later changed to exactly `500`, scale the same way from `0..500`. +- The Bat8 visual evals show a likely failure mode where policies chirp too + little, settle into circling, and time out. Without a timer observation, the + policy has no direct signal that it is running out of episode time. + +## 2. Bug-reflection chirp timing penalty - Replace broad "chirp before all echoes clear" pressure with bug-specific timing pressure. @@ -27,7 +37,7 @@ Current near-term priorities for the Bat PufferLib environment. - Do not penalize based on all static wall/obstacle reflections; clutter may legitimately require reacquisition chirps. -## 2. Resume performance work +## 3. Resume performance work - Use level 7 and level 10 evals as visual sanity checks. - Focus on harder-level failures where the bat spends chirps before acquiring @@ -35,12 +45,14 @@ Current near-term priorities for the Bat PufferLib environment. - Keep reward shaping minimal and prefer terminal/curriculum/perf pressure where possible. -## 3. Prepare the next sweep +## 4. Prepare the next sweep - Make sure the next sweep includes any new timing penalty coefficient ranges. - Sweep `chirp_cooldown_ticks` in a bounded range. Current range is `6..18`. - Keep `max_chirps_per_episode` fixed at `15` for this sweep so budget does not confound timing penalty and cooldown effects. +- Cap policy sweep size at `hidden_size = 64..256` and `num_layers = 2..4` so + overnight sweeps do not waste runs on very slow oversized networks. - Keep sweep ranges bounded so runs cannot become extremely slow from oversized policies or excessive env settings. - Watch `perf`, `base_perf`, `curriculum_perf`, `chirps_emitted`, diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 04cabfdb14..bc5f0af73e 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -416,15 +416,12 @@ static inline float bat_chirp_ring_radius(float age_seconds, float slice, static inline int bat_chirp_slice_count(float duration_seconds) { int slices = (int)ceilf(duration_seconds / BAT_TICK_RATE); - if (slices < 1) slices = 1; - if (slices > BAT_MAX_CHIRP_SLICES) slices = BAT_MAX_CHIRP_SLICES; return slices; } static inline float bat_chirp_slice_seconds(ChirpEvent* chirp, int slice_idx) { int slices = chirp->slice_count > 0 ? chirp->slice_count : bat_chirp_slice_count(chirp->duration); - if (slice_idx < 0) slice_idx = 0; if (slice_idx >= slices) slice_idx = slices - 1; return ((slice_idx + 0.5f) / (float)slices) * chirp->duration; } @@ -456,7 +453,6 @@ static inline void bat_chirp_source_for_fraction(ChirpEvent* chirp, float slice, } static inline float bat_echo_time_seconds(float distance, float sound_speed) { - if (sound_speed <= 0.0f) return 0.0f; return 2.0f * distance / sound_speed; } @@ -466,9 +462,9 @@ static inline bool bat_echo_is_arriving(float echo_time, float chirp_age, } static inline float bat_chirp_age_norm_denominator(Bat* env) { - float travel_ticks = env->max_echo_range / fmaxf(1.0f, env->sound_speed) / BAT_TICK_RATE; + float travel_ticks = env->max_echo_range / env->sound_speed / BAT_TICK_RATE; float chirp_ticks = bat_chirp_duration_seconds(1.0f) / BAT_TICK_RATE; - return fmaxf(1.0f, 1.25f * (travel_ticks + chirp_ticks)); + return 1.25f * (travel_ticks + chirp_ticks); } static inline BatColor bat_freq_color(float freq_norm, float alpha_norm) { @@ -484,7 +480,6 @@ static inline BatColor bat_freq_color(float freq_norm, float alpha_norm) { } static inline float bat_norm_bin(int idx, int count) { - if (count <= 1) return 0.0f; return idx / (float)(count - 1); } @@ -533,8 +528,6 @@ static inline void bat_sample_in_quadrant(Bat* env, int quadrant, float radius, float max_x = (east ? (float)env->width : half_w) - margin; float min_y = (south ? half_h : 0.0f) + margin; float max_y = (south ? (float)env->height : half_h) - margin; - if (max_x < min_x) max_x = min_x; - if (max_y < min_y) max_y = min_y; *x = min_x + bat_randf(env) * (max_x - min_x); *y = min_y + bat_randf(env) * (max_y - min_y); } @@ -606,7 +599,7 @@ static inline float bat_curriculum_bug_speed(Bat* env) { if (bat_curriculum_inbound_enabled(env)) { speed *= env->inbound_bug_speed_multiplier; } - return fmaxf(0.0f, speed); + return speed; } static inline float bat_curriculum_bug_maneuver_strength(Bat* env) { @@ -628,13 +621,11 @@ static inline float bat_curriculum_bug_maneuver_frequency(Bat* env) { } static inline int bat_curriculum_chirp_budget(Bat* env) { - return env->max_chirps_per_episode > 0 ? env->max_chirps_per_episode : 1; + return env->max_chirps_per_episode; } static inline float bat_chirps_used_ratio(Bat* env) { - int budget = env->chirp_budget > 0 ? env->chirp_budget : env->max_chirps_per_episode; - if (budget <= 0) budget = 1; - return bat_clampf(env->chirps_emitted_episode / (float)budget, 0.0f, 1.0f); + return bat_clampf(env->chirps_emitted_episode / (float)env->chirp_budget, 0.0f, 1.0f); } static inline float bat_chirp_efficiency(Bat* env) { @@ -647,16 +638,11 @@ static inline float bat_chirp_perf(Bat* env) { } static inline float bat_min_forward_speed(Bat* env) { - float min_speed = env->bat_min_speed; - if (min_speed <= 0.0f) { - min_speed = 0.20f * env->bat_max_speed; - } - return bat_clampf(min_speed, 0.0f, env->bat_max_speed); + return env->bat_min_speed; } static inline float bat_norm_range(float value, float lo, float hi) { float span = hi - lo; - if (span <= 0.000001f) return 0.0f; return bat_clampf((value - lo) / span, 0.0f, 1.0f); } @@ -680,8 +666,7 @@ static inline float bat_curriculum_chirp_budget_difficulty(Bat* env) { static inline float bat_curriculum_motion_difficulty(Bat* env) { if (!env->curriculum_enabled) return 0.0f; if (env->curriculum_level < env->bug_maneuver_start_level) return 0.0f; - float span = fmaxf(1.0f, - (float)(env->curriculum_inbound_start_level + 4 - env->bug_maneuver_start_level)); + float span = (float)(env->curriculum_inbound_start_level + 4 - env->bug_maneuver_start_level); return bat_clampf((env->curriculum_level - env->bug_maneuver_start_level + 1) / span, 0.0f, 1.0f); } @@ -704,7 +689,6 @@ static inline float bat_curriculum_difficulty(Bat* env) { weighted += 0.5f * motion; active_weight += 0.5f; } - if (active_weight <= 0.000001f) return 0.0f; return bat_clampf(weighted / active_weight, 0.0f, 1.0f); } @@ -715,13 +699,12 @@ static inline float bat_budget_difficulty(Bat* env) { } static inline float bat_success_reward(Bat* env) { - //return 1.0f + env->chirp_efficiency_reward * bat_chirp_efficiency(env); // old retarded and gay code return env->chirp_efficiency_reward * bat_chirp_efficiency(env); } static inline float bat_current_distance_ratio(Bat* env) { float dist = bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y); - return dist / fmaxf(1.0f, env->start_bug_dist); + return dist / env->start_bug_dist; } static inline void bat_accumulate_distance_region(float ratio, float amount, @@ -751,8 +734,6 @@ static inline void bat_record_chirp_timing(Bat* env) { static inline void bat_sample_spawns_at_distance(Bat* env, float target_distance) { float margin = fmaxf(6.0f, fmaxf(env->bat_radius, env->bug_radius) + 3.0f); - target_distance = fmaxf(0.0f, target_distance); - for (int attempt = 0; attempt < 96; attempt++) { float angle = bat_randf(env) * 2.0f * BAT_PI - BAT_PI; float dx = cosf(angle) * target_distance; @@ -948,26 +929,23 @@ static inline void add_log(Bat* env, float success, float collision, float timeo } env->log.chirp_tempo_ratio += bat_clampf(tempo_ratio, 0.0f, 10.0f); env->log.first_chirp_tick_norm += env->first_chirp_tick >= 0.0f - ? bat_clampf(env->first_chirp_tick / fmaxf(1.0f, (float)env->max_steps), 0.0f, 1.0f) + ? bat_clampf(env->first_chirp_tick / (float)env->max_steps, 0.0f, 1.0f) : 1.0f; env->log.mean_chirp_tick_norm += env->chirps_emitted_episode > 0 - ? bat_clampf((env->chirp_tick_sum / chirps) / fmaxf(1.0f, (float)env->max_steps), 0.0f, 1.0f) + ? bat_clampf((env->chirp_tick_sum / chirps) / (float)env->max_steps, 0.0f, 1.0f) : 1.0f; if (env->chirps_emitted_episode > 0) { env->log.mean_chirp_duration += env->chirp_duration_sum / env->chirps_emitted_episode; env->log.mean_chirp_bandwidth += env->chirp_bandwidth_sum / env->chirps_emitted_episode; } - env->log.mean_echo_energy_left += env->echo_energy_left_sum / fmaxf(1.0f, (float)(env->tick + 1)); - env->log.mean_echo_energy_right += env->echo_energy_right_sum / fmaxf(1.0f, (float)(env->tick + 1)); + env->log.mean_echo_energy_left += env->echo_energy_left_sum / (float)(env->tick + 1); + env->log.mean_echo_energy_right += env->echo_energy_right_sum / (float)(env->tick + 1); env->log.n += 1.0f; } static inline int bat_freq_bin_index(Bat* env, float freq_norm) { int bins = env->freq_bins_per_ear; - if (bins <= 0) bins = BAT_FREQ_BINS; - if (bins > BAT_FREQ_BINS) bins = BAT_FREQ_BINS; int bin = (int)(bat_clampf(freq_norm, 0.0f, 1.0f) * bins); - if (bin < 0) bin = 0; if (bin >= bins) bin = bins - 1; return bin; } @@ -1083,7 +1061,7 @@ static inline void bat_schedule_echo(Bat* env, ChirpEvent* chirp, float rel_vx = rvx - env->bat_vx; float rel_vy = rvy - env->bat_vy; float distance_rate = rel_vx * ux + rel_vy * uy; - float doppler = bat_clampf(-distance_rate / (env->bat_max_speed + env->bug_speed + 0.0001f), -1.0f, 1.0f); + float doppler = bat_clampf(-distance_rate / (env->bat_max_speed + env->bug_speed), -1.0f, 1.0f); float shifted_freq = bat_clampf(freq + 0.20f * doppler, 0.0f, 1.0f); if (left_path <= env->max_echo_range) { @@ -1152,7 +1130,7 @@ static inline void bat_schedule_chirp_slice_echoes(Bat* env, ChirpEvent* chirp, int slice_idx) { int slices = chirp->slice_count > 0 ? chirp->slice_count : bat_chirp_slice_count(chirp->duration); - if (slice_idx < 0 || slice_idx >= slices || slice_idx >= BAT_MAX_CHIRP_SLICES) { + if (slice_idx >= slices || slice_idx >= BAT_MAX_CHIRP_SLICES) { return; } @@ -1186,12 +1164,6 @@ static inline void bat_schedule_chirp_echoes(Bat* env, ChirpEvent* chirp) { int slices = chirp->slice_count > 0 ? chirp->slice_count : bat_chirp_slice_count(chirp->duration); chirp->slice_count = slices; - if (chirp->slices_scheduled < 0) { - chirp->slices_scheduled = 0; - } - if (chirp->slices_scheduled > slices) { - chirp->slices_scheduled = slices; - } while (chirp->slices_scheduled < slices) { int slice_idx = chirp->slices_scheduled; bat_schedule_chirp_slice_echoes(env, chirp, slice_idx); @@ -1205,8 +1177,6 @@ static inline void bat_schedule_due_chirp_slices(Bat* env) { if (!chirp->active) continue; int slices = chirp->slice_count > 0 ? chirp->slice_count : bat_chirp_slice_count(chirp->duration); - if (chirp->slices_scheduled < 0) chirp->slices_scheduled = 0; - if (chirp->slices_scheduled > slices) chirp->slices_scheduled = slices; float age_ticks = (float)(env->tick - chirp->birth_tick); while (chirp->slices_scheduled < slices) { @@ -1276,7 +1246,7 @@ void compute_observations(Bat* env) { env->observations[BAT_TURN_RATE_OBS] = bat_clampf(env->bat_turn_velocity / env->bat_turn_rate, -1.0f, 1.0f); float timer_norm = env->max_steps == BAT_DEFAULT_MAX_STEPS ? env->tick * BAT_DEFAULT_MAX_STEPS_INV - : env->tick / fmaxf(1.0f, (float)env->max_steps); + : env->tick / (float)env->max_steps; env->observations[40] = bat_clampf(timer_norm, 0.0f, 1.0f); } @@ -1507,10 +1477,8 @@ static inline bool bat_try_emit_chirp(Bat* env) { } static inline float bat_next_chirp_overlap_fraction(Bat* env) { - if (env->chirps_emitted_episode <= 0) return 0.0f; if (env->last_bug_echo_expected_tick <= (float)env->tick) return 0.0f; float wait_ticks = env->last_bug_echo_expected_tick - (float)env->last_chirp_tick; - if (wait_ticks <= 0.000001f) return 0.0f; float remaining_ticks = env->last_bug_echo_expected_tick - (float)env->tick; return bat_clampf(remaining_ticks / wait_ticks, 0.0f, 1.0f); } @@ -1612,7 +1580,7 @@ void c_step(Bat* env) { env->bat_x, env->bat_y); if (bat_echo_displacement >= env->bug_echo_min_displacement) { float echo_progress = (env->last_bug_echo_path - env->tick_bug_echo_path) - / fmaxf(1.0f, env->max_echo_range); + / env->max_echo_range; if (echo_progress > 0.0f) { env->rewards[0] += env->bug_echo_reward_scale * echo_progress; } else if (echo_progress < 0.0f) { @@ -1690,7 +1658,7 @@ static inline void bat_draw_echo_flash(Bat* env, ChirpEvent* chirp, float rel_vx = rvx - env->bat_vx; float rel_vy = rvy - env->bat_vy; float distance_rate = rel_vx * ux + rel_vy * uy; - float doppler = bat_clampf(-distance_rate / (env->bat_max_speed + env->bug_speed + 0.0001f), -1.0f, 1.0f); + float doppler = bat_clampf(-distance_rate / (env->bat_max_speed + env->bug_speed), -1.0f, 1.0f); float amp = strength / (1.0f + 0.02f * distance * distance); float alpha = bat_clampf(0.20f + amp * 2.0f, 0.20f, 0.90f); Color color = bat_doppler_ray_color(doppler, alpha); From ac8165aeb184ff98d6bd40c4402a872a0ed90c44 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Wed, 10 Jun 2026 17:12:40 -0700 Subject: [PATCH 27/51] Remove more Bat fallback guards --- ocean/bat/bat.h | 41 +++++++++++------------------------------ 1 file changed, 11 insertions(+), 30 deletions(-) diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index bc5f0af73e..38528264a2 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -315,8 +315,6 @@ static inline float bat_clampf(float v, float lo, float hi) { static inline int bat_action_index(float v, int n) { int idx = (int)v; - if (idx < 0) return 0; - if (idx >= n) return n - 1; return idx; } @@ -329,32 +327,23 @@ static inline bool bat_record_video_enabled(Bat* env) { } static inline int bat_record_video_fps(Bat* env) { - int fps = env->record_video_fps > 0 ? env->record_video_fps : 30; - if (fps < 1) fps = 1; - if (fps > 120) fps = 120; - return fps; + return env->record_video_fps; } static inline int bat_record_video_seconds(Bat* env) { - int seconds = env->record_video_seconds > 0 ? env->record_video_seconds : 20; - if (seconds < 1) seconds = 1; - if (seconds > 600) seconds = 600; - return seconds; + return env->record_video_seconds; } static inline int bat_record_frame_samples(int fps) { - if (fps <= 0) fps = 30; return BAT_AUDIO_SAMPLE_RATE / fps; } static inline int bat_record_max_frames(int fps, int seconds) { - if (fps <= 0) fps = 30; - if (seconds <= 0) seconds = 20; return fps * seconds; } static inline float bat_chirp_duration_seconds(float duration_norm) { - return 0.04f + 0.18f * bat_clampf(duration_norm, 0.0f, 1.0f); + return 0.04f + 0.18f * duration_norm; } static inline float bat_chirp_audio_duration_at_fps(float duration_norm, int fps) { @@ -370,16 +359,13 @@ static inline float bat_chirp_audio_duration_seconds(Bat* env, float duration_no } static inline float bat_chirp_audio_frequency_hz(float freq_norm) { - return BAT_AUDIO_MIN_HZ + bat_clampf(freq_norm, 0.0f, 1.0f) + return BAT_AUDIO_MIN_HZ + freq_norm * (BAT_AUDIO_MAX_HZ - BAT_AUDIO_MIN_HZ); } static inline float bat_chirp_audio_instant_hz(float start_norm, float end_norm, float duration_seconds, float t_seconds) { - if (duration_seconds <= 0.0f) { - return bat_chirp_audio_frequency_hz(start_norm); - } - float t = bat_clampf(t_seconds / duration_seconds, 0.0f, 1.0f); + float t = t_seconds / duration_seconds; float start_hz = bat_chirp_audio_frequency_hz(start_norm); float end_hz = bat_chirp_audio_frequency_hz(end_norm); return start_hz + t * (end_hz - start_hz); @@ -395,9 +381,8 @@ static inline float bat_chirp_audio_envelope(float t_norm) { static inline float bat_chirp_audio_sample_f32(float start_norm, float end_norm, float duration_seconds, int sample_index, int sample_rate) { - if (duration_seconds <= 0.0f || sample_index < 0 || sample_rate <= 0) return 0.0f; float t = sample_index / (float)sample_rate; - if (t < 0.0f || t >= duration_seconds) return 0.0f; + if (t >= duration_seconds) return 0.0f; float start_hz = bat_chirp_audio_frequency_hz(start_norm); float end_hz = bat_chirp_audio_frequency_hz(end_norm); @@ -447,7 +432,7 @@ static inline void bat_chirp_source_for_fraction(ChirpEvent* chirp, float slice, float* source_x, float* source_y) { int slices = chirp->slice_count > 0 ? chirp->slice_count : bat_chirp_slice_count(chirp->duration); - int slice_idx = (int)floorf(bat_clampf(slice, 0.0f, 1.0f) * (float)slices); + int slice_idx = (int)floorf(slice * (float)slices); if (slice_idx >= slices) slice_idx = slices - 1; bat_chirp_source_for_slice(chirp, slice_idx, source_x, source_y); } @@ -468,13 +453,13 @@ static inline float bat_chirp_age_norm_denominator(Bat* env) { } static inline BatColor bat_freq_color(float freq_norm, float alpha_norm) { - float f = bat_clampf(freq_norm, 0.0f, 1.0f); + float f = freq_norm; float mid = 1.0f - fabsf(2.0f * f - 1.0f); BatColor color = { .r = (unsigned char)(255.0f * (1.0f - f) + 45.0f * f), .g = (unsigned char)(45.0f + 180.0f * mid), .b = (unsigned char)(45.0f * (1.0f - f) + 255.0f * f), - .a = (unsigned char)(255.0f * bat_clampf(alpha_norm, 0.0f, 1.0f)), + .a = (unsigned char)(255.0f * alpha_norm), }; return color; } @@ -556,12 +541,11 @@ static inline void bat_sample_spawns(Bat* env) { static inline int bat_curriculum_obstacles(Bat* env) { if (!env->curriculum_enabled) return env->num_obstacles; - int step = env->curriculum_obstacle_step <= 0 ? 1 : env->curriculum_obstacle_step; + int step = env->curriculum_obstacle_step; int count = env->curriculum_start_obstacles; if (env->curriculum_level > 0) { count = env->curriculum_start_obstacles + 1 + (env->curriculum_level - 1) / step; } - if (count < 0) count = 0; if (count > env->curriculum_max_obstacles) count = env->curriculum_max_obstacles; if (count > BAT_MAX_OBSTACLES) count = BAT_MAX_OBSTACLES; return count; @@ -945,7 +929,7 @@ static inline void add_log(Bat* env, float success, float collision, float timeo static inline int bat_freq_bin_index(Bat* env, float freq_norm) { int bins = env->freq_bins_per_ear; - int bin = (int)(bat_clampf(freq_norm, 0.0f, 1.0f) * bins); + int bin = (int)(freq_norm * bins); if (bin >= bins) bin = bins - 1; return bin; } @@ -1234,7 +1218,6 @@ void compute_observations(Bat* env) { if (env->last_chirp_tick < 0) chirp_age = (int)ceilf(chirp_age_denom); env->chirp_age_ticks = chirp_age; int cooldown = env->chirp_cooldown_ticks - (env->tick - env->last_chirp_tick); - if (cooldown < 0) cooldown = 0; env->observations[BAT_CHIRP_AGE_OBS] = bat_clampf(chirp_age / chirp_age_denom, 0.0f, 1.0f); env->observations[BAT_CHIRP_COOLDOWN_OBS] = bat_clampf(cooldown / (float)env->chirp_cooldown_ticks, 0.0f, 1.0f); env->observations[BAT_CHIRP_START_OBS] = env->last_chirp_start_freq; @@ -1782,7 +1765,6 @@ static inline void bat_play_chirp_audio(Bat* env) { float duration = bat_chirp_audio_duration_seconds(env, env->last_chirp_duration); int sample_count = (int)ceilf(duration * BAT_AUDIO_SAMPLE_RATE); - if (sample_count <= 0) return; short* samples = (short*)malloc(sample_count * sizeof(short)); if (samples == NULL) return; @@ -2031,7 +2013,6 @@ void c_render(Bat* env) { float hy = env->bat_y + sinf(env->bat_heading) * env->bat_radius * 2.0f; DrawLine((int)(env->bat_x * sx), (int)(env->bat_y * sy), (int)(hx * sx), (int)(hy * sy), WHITE); int cooldown = env->chirp_cooldown_ticks - (env->tick - env->last_chirp_tick); - if (cooldown < 0) cooldown = 0; DrawText(TextFormat("reward %.3f tick %d chirps %d cooldown %d ESC exits", env->rewards[0], env->tick, env->chirps_emitted_episode, cooldown), 10, 10, 20, RAYWHITE); EndDrawing(); From 227dec5398d03ee17bd1bcf2fd86ca43d352a6da Mon Sep 17 00:00:00 2001 From: Kinvert Date: Wed, 10 Jun 2026 17:28:35 -0700 Subject: [PATCH 28/51] Remove remaining Bat spawn bloat --- ocean/bat/bat.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 38528264a2..2e8c18a3e8 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -519,7 +519,6 @@ static inline void bat_sample_in_quadrant(Bat* env, int quadrant, float radius, static inline void bat_sample_spawns(Bat* env) { int bat_quadrant = (int)(bat_randf(env) * 4.0f); - if (bat_quadrant > 3) bat_quadrant = 3; int bug_quadrant = bat_quadrant ^ 3; float min_sep = fminf(env->width, env->height) * 0.31f; @@ -566,7 +565,6 @@ static inline bool bat_curriculum_inbound_enabled(Bat* env) { static inline float bat_curriculum_inbound_bug_distance(Bat* env) { float base = env->curriculum_max_bug_distance; int extra_levels = env->curriculum_level - env->curriculum_inbound_start_level + 1; - if (extra_levels < 1) extra_levels = 1; float distance = base + env->curriculum_inbound_bug_distance_step * extra_levels; return bat_clampf(distance, base, env->curriculum_inbound_max_bug_distance); } From 9917db41134c89d8e971a03801ef17ab2a6a3179 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Wed, 10 Jun 2026 17:41:44 -0700 Subject: [PATCH 29/51] Simplify Bat helper scaffolding --- ocean/bat/bat.h | 70 ++++++++------------------------- ocean/bat/tests/test_bat_core.c | 6 +-- 2 files changed, 20 insertions(+), 56 deletions(-) diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 2e8c18a3e8..e2a2386307 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -399,20 +399,9 @@ static inline float bat_chirp_ring_radius(float age_seconds, float slice, return sound_speed * ring_age; } -static inline int bat_chirp_slice_count(float duration_seconds) { - int slices = (int)ceilf(duration_seconds / BAT_TICK_RATE); - return slices; -} - -static inline float bat_chirp_slice_seconds(ChirpEvent* chirp, int slice_idx) { - int slices = chirp->slice_count > 0 ? chirp->slice_count : - bat_chirp_slice_count(chirp->duration); - if (slice_idx >= slices) slice_idx = slices - 1; - return ((slice_idx + 0.5f) / (float)slices) * chirp->duration; -} - static inline float bat_chirp_slice_ticks(ChirpEvent* chirp, int slice_idx) { - return bat_chirp_slice_seconds(chirp, slice_idx) / BAT_TICK_RATE; + return ((slice_idx + 0.5f) / (float)chirp->slice_count) * + chirp->duration / BAT_TICK_RATE; } static inline void bat_chirp_source_for_slice(ChirpEvent* chirp, int slice_idx, @@ -430,8 +419,7 @@ static inline void bat_chirp_source_for_slice(ChirpEvent* chirp, int slice_idx, static inline void bat_chirp_source_for_fraction(ChirpEvent* chirp, float slice, float* source_x, float* source_y) { - int slices = chirp->slice_count > 0 ? chirp->slice_count : - bat_chirp_slice_count(chirp->duration); + int slices = chirp->slice_count; int slice_idx = (int)floorf(slice * (float)slices); if (slice_idx >= slices) slice_idx = slices - 1; bat_chirp_source_for_slice(chirp, slice_idx, source_x, source_y); @@ -441,11 +429,6 @@ static inline float bat_echo_time_seconds(float distance, float sound_speed) { return 2.0f * distance / sound_speed; } -static inline bool bat_echo_is_arriving(float echo_time, float chirp_age, - float window) { - return fabsf(chirp_age - echo_time) <= window; -} - static inline float bat_chirp_age_norm_denominator(Bat* env) { float travel_ticks = env->max_echo_range / env->sound_speed / BAT_TICK_RATE; float chirp_ticks = bat_chirp_duration_seconds(1.0f) / BAT_TICK_RATE; @@ -468,16 +451,14 @@ static inline float bat_norm_bin(int idx, int count) { return idx / (float)(count - 1); } -static inline float bat_len(float x, float y) { - return sqrtf(x*x + y*y); -} - static inline float bat_dist(float ax, float ay, float bx, float by) { - return bat_len(bx - ax, by - ay); + float dx = bx - ax; + float dy = by - ay; + return sqrtf(dx*dx + dy*dy); } static inline void bat_norm_vec(float x, float y, float* ox, float* oy) { - float l = bat_len(x, y); + float l = sqrtf(x*x + y*y); if (l <= 0.000001f) { *ox = 1.0f; *oy = 0.0f; @@ -602,10 +583,6 @@ static inline float bat_curriculum_bug_maneuver_frequency(Bat* env) { return env->bug_maneuver_frequency * bat_clampf(multiplier, 1.0f, 2.5f); } -static inline int bat_curriculum_chirp_budget(Bat* env) { - return env->max_chirps_per_episode; -} - static inline float bat_chirps_used_ratio(Bat* env) { return bat_clampf(env->chirps_emitted_episode / (float)env->chirp_budget, 0.0f, 1.0f); } @@ -619,10 +596,6 @@ static inline float bat_chirp_perf(Bat* env) { return bat_clampf(raw, BAT_CHIRP_PERF_FLOOR, 1.0f); } -static inline float bat_min_forward_speed(Bat* env) { - return env->bat_min_speed; -} - static inline float bat_norm_range(float value, float lo, float hi) { float span = hi - lo; return bat_clampf((value - lo) / span, 0.0f, 1.0f); @@ -640,11 +613,6 @@ static inline float bat_curriculum_obstacle_difficulty(Bat* env) { (float)env->curriculum_start_obstacles, (float)env->curriculum_max_obstacles); } -static inline float bat_curriculum_chirp_budget_difficulty(Bat* env) { - (void)env; - return 0.0f; -} - static inline float bat_curriculum_motion_difficulty(Bat* env) { if (!env->curriculum_enabled) return 0.0f; if (env->curriculum_level < env->bug_maneuver_start_level) return 0.0f; @@ -864,7 +832,6 @@ static inline void add_log(Bat* env, float success, float collision, float timeo float curriculum_difficulty = bat_curriculum_difficulty(env); float distance_difficulty = bat_curriculum_distance_difficulty(env); float obstacle_difficulty = bat_curriculum_obstacle_difficulty(env); - float chirp_budget_difficulty = bat_curriculum_chirp_budget_difficulty(env); float motion_difficulty = bat_curriculum_motion_difficulty(env); float budget_difficulty = bat_budget_difficulty(env); float chirp_efficiency = bat_chirp_efficiency(env); @@ -882,7 +849,7 @@ static inline void add_log(Bat* env, float success, float collision, float timeo env->log.curriculum_perf += success * curriculum_difficulty; env->log.curriculum_distance_difficulty += distance_difficulty; env->log.curriculum_obstacle_difficulty += obstacle_difficulty; - env->log.curriculum_chirp_budget_difficulty += chirp_budget_difficulty; + env->log.curriculum_chirp_budget_difficulty += 0.0f; env->log.curriculum_motion_difficulty += motion_difficulty; env->log.budget_difficulty += budget_difficulty; env->log.num_obstacles += env->num_obstacles; @@ -1110,8 +1077,7 @@ static inline void bat_schedule_obstacle_echoes(Bat* env, ChirpEvent* chirp, static inline void bat_schedule_chirp_slice_echoes(Bat* env, ChirpEvent* chirp, int slice_idx) { - int slices = chirp->slice_count > 0 ? chirp->slice_count : - bat_chirp_slice_count(chirp->duration); + int slices = chirp->slice_count; if (slice_idx >= slices || slice_idx >= BAT_MAX_CHIRP_SLICES) { return; } @@ -1143,9 +1109,7 @@ static inline void bat_schedule_chirp_slice_echoes(Bat* env, ChirpEvent* chirp, } static inline void bat_schedule_chirp_echoes(Bat* env, ChirpEvent* chirp) { - int slices = chirp->slice_count > 0 ? chirp->slice_count : - bat_chirp_slice_count(chirp->duration); - chirp->slice_count = slices; + int slices = chirp->slice_count; while (chirp->slices_scheduled < slices) { int slice_idx = chirp->slices_scheduled; bat_schedule_chirp_slice_echoes(env, chirp, slice_idx); @@ -1157,8 +1121,7 @@ static inline void bat_schedule_due_chirp_slices(Bat* env) { for (int i = 0; i < BAT_CHIRP_HISTORY; i++) { ChirpEvent* chirp = &env->chirps[i]; if (!chirp->active) continue; - int slices = chirp->slice_count > 0 ? chirp->slice_count : - bat_chirp_slice_count(chirp->duration); + int slices = chirp->slice_count; float age_ticks = (float)(env->tick - chirp->birth_tick); while (chirp->slices_scheduled < slices) { @@ -1235,7 +1198,7 @@ static inline void bat_reset_episode(Bat* env) { env->tick = 0; env->bat_turn_velocity = 0.0f; env->bat_heading = bat_randf(env) * 2.0f * BAT_PI - BAT_PI; - float initial_speed = bat_min_forward_speed(env); + float initial_speed = env->bat_min_speed; env->bat_vx = cosf(env->bat_heading) * initial_speed; env->bat_vy = sinf(env->bat_heading) * initial_speed; if (env->curriculum_enabled && env->curriculum_level < env->curriculum_initial_level) { @@ -1257,7 +1220,7 @@ static inline void bat_reset_episode(Bat* env) { memset(env->chirps, 0, sizeof(env->chirps)); env->chirp_head = 0; bat_clear_echo_queue(env); - env->chirp_budget = bat_curriculum_chirp_budget(env); + env->chirp_budget = env->max_chirps_per_episode; env->tick_bug_echo_energy = 0.0f; env->tick_bug_echo_path = -1.0f; env->last_bug_echo_path = -1.0f; @@ -1391,7 +1354,7 @@ static inline void bat_update_motion(Bat* env, float dt) { float fx = cosf(env->bat_heading); float fy = sinf(env->bat_heading); float speed = env->bat_vx * fx + env->bat_vy * fy; - float min_speed = bat_min_forward_speed(env); + float min_speed = env->bat_min_speed; if (speed < min_speed) speed = min_speed; if (move == BAT_THRUST_FORWARD) speed += env->bat_accel * dt; @@ -1444,7 +1407,7 @@ static inline bool bat_try_emit_chirp(Bat* env) { chirp->end_freq = env->last_chirp_end_freq; chirp->duration = bat_chirp_duration_seconds(env->last_chirp_duration); chirp->birth_tick = env->tick; - chirp->slice_count = bat_chirp_slice_count(chirp->duration); + chirp->slice_count = (int)ceilf(chirp->duration / BAT_TICK_RATE); chirp->slices_scheduled = 0; for (int i = 0; i < BAT_MAX_CHIRP_SLICES; i++) { chirp->source_x[i] = chirp->x; @@ -1632,7 +1595,8 @@ static inline void bat_draw_echo_flash(Bat* env, ChirpEvent* chirp, float age_seconds = (env->tick - chirp->birth_tick) * BAT_TICK_RATE; float distance = bat_dist(chirp->x, chirp->y, rx, ry); float echo_time = bat_echo_time_seconds(distance, env->sound_speed); - if (!bat_echo_is_arriving(echo_time, age_seconds, 0.025f)) return; + bool echo_arriving_now = fabsf(age_seconds - echo_time) <= 0.025f; + if (!echo_arriving_now) return; float ux, uy; bat_norm_vec(rx - chirp->x, ry - chirp->y, &ux, &uy); diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index c94eb371c4..527ccd001b 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -303,7 +303,6 @@ static int test_curriculum_perf_logs_distance_and_obstacle_difficulty_components ASSERT_FLOAT_NEAR(bat_curriculum_distance_difficulty(&env), 0.5000000f, 0.0001f); ASSERT_FLOAT_NEAR(bat_curriculum_obstacle_difficulty(&env), 0.5000000f, 0.0001f); - ASSERT_FLOAT_NEAR(bat_curriculum_chirp_budget_difficulty(&env), 0.0000000f, 0.0001f); ASSERT_FLOAT_NEAR(bat_curriculum_motion_difficulty(&env), 0.0000000f, 0.0001f); ASSERT_FLOAT_NEAR(bat_curriculum_difficulty(&env), 0.5000000f, 0.0001f); add_log(&env, 1.0f, 0.0f, 0.0f); @@ -1123,8 +1122,8 @@ static int test_reflection_arrives_at_two_way_travel_time(void) { float echo_time = bat_echo_time_seconds(distance, sound_speed); ASSERT_FLOAT_NEAR(echo_time, 0.5f, 0.0001f); - ASSERT_TRUE(bat_echo_is_arriving(echo_time, echo_time + 0.005f, 0.02f)); - ASSERT_TRUE(!bat_echo_is_arriving(echo_time, echo_time + 0.050f, 0.02f)); + ASSERT_TRUE(fabsf((echo_time + 0.005f) - echo_time) <= 0.02f); + ASSERT_TRUE(fabsf((echo_time + 0.050f) - echo_time) > 0.02f); return 0; } @@ -1467,6 +1466,7 @@ static int test_default_echo_range_reaches_curriculum_max_bug_distance(void) { .birth_tick = env.tick, .active = 1, }; + chirp.slice_count = (int)ceilf(chirp.duration / BAT_TICK_RATE); bat_schedule_chirp_echoes(&env, &chirp); float bug_energy = 0.0f; From 513387a5049b3951ad4d5553d95b9ba600d48ef2 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Wed, 10 Jun 2026 19:12:23 -0700 Subject: [PATCH 30/51] Slim Bat logging and recording code --- ocean/bat/bat.h | 304 +++----------------------------- ocean/bat/bat_record.h | 164 +++++++++++++++++ ocean/bat/binding.c | 9 +- ocean/bat/tests/test_bat_core.c | 72 ++------ 4 files changed, 208 insertions(+), 341 deletions(-) create mode 100644 ocean/bat/bat_record.h diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index e2a2386307..b68f44e409 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -44,6 +44,7 @@ #define BAT_DEFAULT_MAX_STEPS 512 #define BAT_DEFAULT_MAX_STEPS_INV (1.0f / (float)BAT_DEFAULT_MAX_STEPS) #define BAT_PI 3.14159265358979323846f +#define BAT_TWO_PI (2.0f * BAT_PI) #define BAT_CHIRP_HISTORY 4 #define BAT_CHIRP_RINGS 5 #define BAT_MAX_CHIRP_SLICES 16 @@ -54,21 +55,12 @@ #define BAT_AUDIO_MAX_HZ 3600.0f #define BAT_AUDIO_VOLUME 0.22f #define BAT_RECORD_MAX_VOICES 16 -#define BAT_BUDGET_EASY_CHIRPS 15.0f -#define BAT_BUDGET_EDGE_CHIRPS 5.0f #define BAT_CHIRP_PERF_REFERENCE_CHIRPS 15.0f #define BAT_CHIRP_PERF_FLOOR 0.05f #define BAT_ECHO_STATIC 0 #define BAT_ECHO_BUG 1 -typedef struct BatColor { - unsigned char r; - unsigned char g; - unsigned char b; - unsigned char a; -} BatColor; - typedef struct ChirpEvent { float x; float y; @@ -104,7 +96,6 @@ typedef struct Log { float score; float episode_return; float episode_length; - float success; float collision; float timeout; float curriculum_level; @@ -114,15 +105,10 @@ typedef struct Log { float curriculum_obstacle_difficulty; float curriculum_chirp_budget_difficulty; float curriculum_motion_difficulty; - float budget_difficulty; float num_obstacles; - float bug_distance_start; - float bug_distance_final; - float bug_distance_delta; float chirps_emitted; float chirp_budget; float chirps_used_ratio; - float chirps_remaining_ratio; float chirp_efficiency; float chirp_perf; float chirp_overlap_fraction; @@ -135,8 +121,6 @@ typedef struct Log { float mean_chirp_tick_norm; float mean_chirp_duration; float mean_chirp_bandwidth; - float mean_echo_energy_left; - float mean_echo_energy_right; float n; } Log; @@ -271,8 +255,6 @@ typedef struct Bat { float ticks_near; float first_chirp_tick; float chirp_tick_sum; - float echo_energy_left_sum; - float echo_energy_right_sum; float chirp_cost; float chirp_efficiency_reward; @@ -318,30 +300,6 @@ static inline int bat_action_index(float v, int n) { return idx; } -static inline int bat_render_target_fps(Bat* env) { - return env->render_target_fps > 0 ? env->render_target_fps : 0; -} - -static inline bool bat_record_video_enabled(Bat* env) { - return env->record_video != 0; -} - -static inline int bat_record_video_fps(Bat* env) { - return env->record_video_fps; -} - -static inline int bat_record_video_seconds(Bat* env) { - return env->record_video_seconds; -} - -static inline int bat_record_frame_samples(int fps) { - return BAT_AUDIO_SAMPLE_RATE / fps; -} - -static inline int bat_record_max_frames(int fps, int seconds) { - return fps * seconds; -} - static inline float bat_chirp_duration_seconds(float duration_norm) { return 0.04f + 0.18f * duration_norm; } @@ -355,7 +313,7 @@ static inline float bat_chirp_audio_duration_at_fps(float duration_norm, int fps } static inline float bat_chirp_audio_duration_seconds(Bat* env, float duration_norm) { - return bat_chirp_audio_duration_at_fps(duration_norm, bat_render_target_fps(env)); + return bat_chirp_audio_duration_at_fps(duration_norm, env->render_target_fps); } static inline float bat_chirp_audio_frequency_hz(float freq_norm) { @@ -387,7 +345,7 @@ static inline float bat_chirp_audio_sample_f32(float start_norm, float end_norm, float start_hz = bat_chirp_audio_frequency_hz(start_norm); float end_hz = bat_chirp_audio_frequency_hz(end_norm); float chirp_rate = (end_hz - start_hz) / duration_seconds; - float phase = 2.0f * BAT_PI * (start_hz * t + 0.5f * chirp_rate * t * t); + float phase = BAT_TWO_PI * (start_hz * t + 0.5f * chirp_rate * t * t); float envelope = bat_chirp_audio_envelope(t / duration_seconds); return BAT_AUDIO_VOLUME * envelope * sinf(phase); } @@ -435,18 +393,6 @@ static inline float bat_chirp_age_norm_denominator(Bat* env) { return 1.25f * (travel_ticks + chirp_ticks); } -static inline BatColor bat_freq_color(float freq_norm, float alpha_norm) { - float f = freq_norm; - float mid = 1.0f - fabsf(2.0f * f - 1.0f); - BatColor color = { - .r = (unsigned char)(255.0f * (1.0f - f) + 45.0f * f), - .g = (unsigned char)(45.0f + 180.0f * mid), - .b = (unsigned char)(45.0f * (1.0f - f) + 255.0f * f), - .a = (unsigned char)(255.0f * alpha_norm), - }; - return color; -} - static inline float bat_norm_bin(int idx, int count) { return idx / (float)(count - 1); } @@ -642,12 +588,6 @@ static inline float bat_curriculum_difficulty(Bat* env) { return bat_clampf(weighted / active_weight, 0.0f, 1.0f); } -static inline float bat_budget_difficulty(Bat* env) { - float pressure = (BAT_BUDGET_EASY_CHIRPS - (float)env->max_chirps_per_episode) - / (BAT_BUDGET_EASY_CHIRPS - BAT_BUDGET_EDGE_CHIRPS); - return 0.5f + 0.5f * bat_clampf(pressure, 0.0f, 1.0f); -} - static inline float bat_success_reward(Bat* env) { return env->chirp_efficiency_reward * bat_chirp_efficiency(env); } @@ -685,7 +625,7 @@ static inline void bat_record_chirp_timing(Bat* env) { static inline void bat_sample_spawns_at_distance(Bat* env, float target_distance) { float margin = fmaxf(6.0f, fmaxf(env->bat_radius, env->bug_radius) + 3.0f); for (int attempt = 0; attempt < 96; attempt++) { - float angle = bat_randf(env) * 2.0f * BAT_PI - BAT_PI; + float angle = bat_randf(env) * BAT_TWO_PI - BAT_PI; float dx = cosf(angle) * target_distance; float dy = sinf(angle) * target_distance; float min_bat_x = fmaxf(margin, margin - dx); @@ -714,8 +654,8 @@ static inline void bat_reset_bug_motion(Bat* env) { env->bug_inbound = bat_curriculum_inbound_enabled(env) ? 1 : 0; float strength = bat_curriculum_bug_maneuver_strength(env); env->bug_maneuver_mode = strength > 0.000001f ? 1 + (int)(bat_rand(env) % 3u) : 0; - env->bug_maneuver_phase = bat_randf(env) * 2.0f * BAT_PI; - env->bug_maneuver_rate = 2.0f * BAT_PI * bat_curriculum_bug_maneuver_frequency(env) * + env->bug_maneuver_phase = bat_randf(env) * BAT_TWO_PI; + env->bug_maneuver_rate = BAT_TWO_PI * bat_curriculum_bug_maneuver_frequency(env) * (0.75f + 0.50f * bat_randf(env)); env->bug_maneuver_sign = (bat_rand(env) & 1u) ? -1.0f : 1.0f; @@ -727,7 +667,7 @@ static inline void bat_reset_bug_motion(Bat* env) { float heading = atan2f(ty, tx) + (2.0f * bat_randf(env) - 1.0f) * noise; bat_set_bug_velocity(env, heading, speed); } else { - float heading = bat_randf(env) * 2.0f * BAT_PI - BAT_PI; + float heading = bat_randf(env) * BAT_TWO_PI - BAT_PI; bat_set_bug_velocity(env, heading, speed); } } @@ -828,12 +768,10 @@ void free_allocated(Bat* env) { } static inline void add_log(Bat* env, float success, float collision, float timeout) { - float final_dist = bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y); float curriculum_difficulty = bat_curriculum_difficulty(env); float distance_difficulty = bat_curriculum_distance_difficulty(env); float obstacle_difficulty = bat_curriculum_obstacle_difficulty(env); float motion_difficulty = bat_curriculum_motion_difficulty(env); - float budget_difficulty = bat_budget_difficulty(env); float chirp_efficiency = bat_chirp_efficiency(env); float chirp_perf = bat_chirp_perf(env); env->log.perf += success * curriculum_difficulty * chirp_perf; @@ -841,7 +779,6 @@ static inline void add_log(Bat* env, float success, float collision, float timeo env->log.score += env->episode_return; env->log.episode_return += env->episode_return; env->log.episode_length += env->tick; - env->log.success += success; env->log.collision += collision; env->log.timeout += timeout; env->log.curriculum_level += env->curriculum_level; @@ -851,15 +788,10 @@ static inline void add_log(Bat* env, float success, float collision, float timeo env->log.curriculum_obstacle_difficulty += obstacle_difficulty; env->log.curriculum_chirp_budget_difficulty += 0.0f; env->log.curriculum_motion_difficulty += motion_difficulty; - env->log.budget_difficulty += budget_difficulty; env->log.num_obstacles += env->num_obstacles; - env->log.bug_distance_start += env->start_bug_dist; - env->log.bug_distance_final += final_dist; - env->log.bug_distance_delta += env->start_bug_dist - final_dist; env->log.chirps_emitted += env->chirps_emitted_episode; env->log.chirp_budget += env->chirp_budget; env->log.chirps_used_ratio += bat_chirps_used_ratio(env); - env->log.chirps_remaining_ratio += 1.0f - bat_chirps_used_ratio(env); env->log.chirp_efficiency += chirp_efficiency; env->log.chirp_perf += chirp_perf; float chirps = fmaxf(1.0f, (float)env->chirps_emitted_episode); @@ -887,8 +819,6 @@ static inline void add_log(Bat* env, float success, float collision, float timeo env->log.mean_chirp_duration += env->chirp_duration_sum / env->chirps_emitted_episode; env->log.mean_chirp_bandwidth += env->chirp_bandwidth_sum / env->chirps_emitted_episode; } - env->log.mean_echo_energy_left += env->echo_energy_left_sum / (float)(env->tick + 1); - env->log.mean_echo_energy_right += env->echo_energy_right_sum / (float)(env->tick + 1); env->log.n += 1.0f; } @@ -899,13 +829,6 @@ static inline int bat_freq_bin_index(Bat* env, float freq_norm) { return bin; } -static inline void bat_add_freq_energy(Bat* env, int offset, float freq_norm, - float intensity) { - int bin = bat_freq_bin_index(env, freq_norm); - int idx = offset + bin; - env->observations[idx] = bat_clampf(env->observations[idx] + intensity, 0.0f, 1.0f); -} - static inline void bat_clear_echo_bucket(EchoBucket* bucket) { memset(bucket, 0, sizeof(*bucket)); bucket->bug_path = -1.0f; @@ -1108,15 +1031,6 @@ static inline void bat_schedule_chirp_slice_echoes(Bat* env, ChirpEvent* chirp, } } -static inline void bat_schedule_chirp_echoes(Bat* env, ChirpEvent* chirp) { - int slices = chirp->slice_count; - while (chirp->slices_scheduled < slices) { - int slice_idx = chirp->slices_scheduled; - bat_schedule_chirp_slice_echoes(env, chirp, slice_idx); - chirp->slices_scheduled += 1; - } -} - static inline void bat_schedule_due_chirp_slices(Bat* env) { for (int i = 0; i < BAT_CHIRP_HISTORY; i++) { ChirpEvent* chirp = &env->chirps[i]; @@ -1163,16 +1077,10 @@ void compute_observations(Bat* env) { bat_process_echo_events(env); - float left_energy = 0.0f; - float right_energy = 0.0f; for (int i = 0; i < BAT_FREQ_BINS; i++) { env->observations[BAT_LEFT_FREQ_OFFSET + i] = bat_clampf(env->observations[BAT_LEFT_FREQ_OFFSET + i], 0.0f, 1.0f); env->observations[BAT_RIGHT_FREQ_OFFSET + i] = bat_clampf(env->observations[BAT_RIGHT_FREQ_OFFSET + i], 0.0f, 1.0f); - left_energy += env->observations[BAT_LEFT_FREQ_OFFSET + i]; - right_energy += env->observations[BAT_RIGHT_FREQ_OFFSET + i]; } - env->echo_energy_left_sum += left_energy; - env->echo_energy_right_sum += right_energy; float chirp_age_denom = bat_chirp_age_norm_denominator(env); int chirp_age = env->tick - env->last_chirp_tick; @@ -1197,7 +1105,7 @@ void compute_observations(Bat* env) { static inline void bat_reset_episode(Bat* env) { env->tick = 0; env->bat_turn_velocity = 0.0f; - env->bat_heading = bat_randf(env) * 2.0f * BAT_PI - BAT_PI; + env->bat_heading = bat_randf(env) * BAT_TWO_PI - BAT_PI; float initial_speed = env->bat_min_speed; env->bat_vx = cosf(env->bat_heading) * initial_speed; env->bat_vy = sinf(env->bat_heading) * initial_speed; @@ -1237,8 +1145,6 @@ static inline void bat_reset_episode(Bat* env) { env->ticks_near = 0.0f; env->first_chirp_tick = -1.0f; env->chirp_tick_sum = 0.0f; - env->echo_energy_left_sum = 0.0f; - env->echo_energy_right_sum = 0.0f; env->episode_return = 0.0f; env->start_bug_dist = bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y); env->prev_bug_dist = env->start_bug_dist; @@ -1275,8 +1181,8 @@ static inline void bat_update_bug(Bat* env, float dt) { float strength = bat_curriculum_bug_maneuver_strength(env); if (env->bug_maneuver_mode > 0) { env->bug_maneuver_phase += env->bug_maneuver_rate * dt; - if (env->bug_maneuver_phase > 2.0f * BAT_PI) { - env->bug_maneuver_phase -= 2.0f * BAT_PI; + if (env->bug_maneuver_phase > BAT_TWO_PI) { + env->bug_maneuver_phase -= BAT_TWO_PI; } } @@ -1367,8 +1273,8 @@ static inline void bat_update_motion(Bat* env, float dt) { float speed_ratio = env->bat_max_speed > 0.0f ? speed / env->bat_max_speed : 0.0f; env->bat_turn_velocity = turn_command * env->bat_turn_rate * bat_clampf(speed_ratio, 0.0f, 1.0f); env->bat_heading += env->bat_turn_velocity * dt; - if (env->bat_heading > BAT_PI) env->bat_heading -= 2.0f * BAT_PI; - if (env->bat_heading < -BAT_PI) env->bat_heading += 2.0f * BAT_PI; + if (env->bat_heading > BAT_PI) env->bat_heading -= BAT_TWO_PI; + if (env->bat_heading < -BAT_PI) env->bat_heading += BAT_TWO_PI; float heading_fx = cosf(env->bat_heading); float heading_fy = sinf(env->bat_heading); @@ -1541,8 +1447,15 @@ void c_step(Bat* env) { } #ifndef BAT_HEADLESS -static inline Color bat_ray_color(BatColor c) { - return (Color){c.r, c.g, c.b, c.a}; +static inline Color bat_freq_color(float freq_norm, float alpha_norm) { + float f = freq_norm; + float mid = 1.0f - fabsf(2.0f * f - 1.0f); + return (Color){ + (unsigned char)(255.0f * (1.0f - f) + 45.0f * f), + (unsigned char)(45.0f + 180.0f * mid), + (unsigned char)(45.0f * (1.0f - f) + 255.0f * f), + (unsigned char)(255.0f * alpha_norm), + }; } static inline void bat_draw_chirp_rings(Bat* env, float sx, float sy) { @@ -1572,21 +1485,19 @@ static inline void bat_draw_chirp_rings(Bat* env, float sx, float sy) { (int)(source_x * sx), (int)(source_y * sy), radius * scale, - bat_ray_color(bat_freq_color(freq, alpha))); + bat_freq_color(freq, alpha)); } } } static inline Color bat_doppler_ray_color(float doppler, float alpha) { - BatColor c; if (doppler > 0.05f) { - c = bat_freq_color(1.0f, alpha); + return bat_freq_color(1.0f, alpha); } else if (doppler < -0.05f) { - c = bat_freq_color(0.0f, alpha); - } else { - c = (BatColor){210, 210, 220, (unsigned char)(255.0f * bat_clampf(alpha, 0.0f, 1.0f))}; + return bat_freq_color(0.0f, alpha); } - return bat_ray_color(c); + return (Color){210, 210, 220, + (unsigned char)(255.0f * bat_clampf(alpha, 0.0f, 1.0f))}; } static inline void bat_draw_echo_flash(Bat* env, ChirpEvent* chirp, @@ -1755,173 +1666,14 @@ static inline void bat_play_chirp_audio(Bat* env) { PlaySound(client->chirp_sounds[voice]); } -static inline void bat_record_write_le16(FILE* f, unsigned int v) { - fputc((int)(v & 0xffu), f); - fputc((int)((v >> 8) & 0xffu), f); -} - -static inline void bat_record_write_le32(FILE* f, unsigned int v) { - fputc((int)(v & 0xffu), f); - fputc((int)((v >> 8) & 0xffu), f); - fputc((int)((v >> 16) & 0xffu), f); - fputc((int)((v >> 24) & 0xffu), f); -} - -static inline void bat_record_write_wav_header(FILE* f, int data_bytes) { - int byte_rate = BAT_AUDIO_SAMPLE_RATE * 2; - fwrite("RIFF", 1, 4, f); - bat_record_write_le32(f, 36u + (unsigned int)data_bytes); - fwrite("WAVE", 1, 4, f); - fwrite("fmt ", 1, 4, f); - bat_record_write_le32(f, 16); - bat_record_write_le16(f, 1); - bat_record_write_le16(f, 1); - bat_record_write_le32(f, BAT_AUDIO_SAMPLE_RATE); - bat_record_write_le32(f, (unsigned int)byte_rate); - bat_record_write_le16(f, 2); - bat_record_write_le16(f, 16); - fwrite("data", 1, 4, f); - bat_record_write_le32(f, (unsigned int)data_bytes); -} - -static inline void bat_record_init(Bat* env, Client* client) { - if (!bat_record_video_enabled(env) || client->recording_initialized) return; - client->recording_initialized = 1; - client->record_fps = bat_record_video_fps(env); - client->record_audio = env->record_video_audio ? 1 : 0; - client->record_max_frames = bat_record_max_frames( - client->record_fps, bat_record_video_seconds(env)); - snprintf(client->record_frame_dir, sizeof(client->record_frame_dir), - "recordings/bat_recording_frames"); - snprintf(client->record_wav_path, sizeof(client->record_wav_path), - "recordings/bat_recording.wav"); - snprintf(client->record_mp4_path, sizeof(client->record_mp4_path), - "recordings/bat_recording.mp4"); - system("mkdir -p recordings recordings/bat_recording_frames"); - if (client->record_audio) { - client->record_wav = fopen(client->record_wav_path, "wb"); - if (client->record_wav != NULL) { - bat_record_write_wav_header(client->record_wav, 0); - } - } - printf("Bat recording enabled: %s (%d fps, %d frames)\n", - client->record_mp4_path, client->record_fps, client->record_max_frames); -} - -static inline void bat_record_enqueue_chirp(Bat* env) { - Client* client = env->client; - if (client == NULL || !client->recording_initialized || - client->recording_finalized || !client->record_audio) { - return; - } - if (env->audio_chirp_serial <= 0 || - env->audio_chirp_serial == client->record_last_audio_chirp_serial) { - return; - } - client->record_last_audio_chirp_serial = env->audio_chirp_serial; - int voice_idx = client->record_voice_cursor; - client->record_voice_cursor = (client->record_voice_cursor + 1) % BAT_RECORD_MAX_VOICES; - BatRecordVoice* voice = &client->record_voices[voice_idx]; - voice->active = 1; - voice->start_sample = client->record_audio_sample_cursor; - voice->start_freq = env->last_chirp_start_freq; - voice->end_freq = env->last_chirp_end_freq; - voice->duration = bat_chirp_audio_duration_at_fps( - env->last_chirp_duration, client->record_fps); -} - -static inline void bat_record_append_audio_frame(Bat* env) { - Client* client = env->client; - if (client == NULL || !client->record_audio || client->record_wav == NULL) return; - int frame_samples = bat_record_frame_samples(client->record_fps); - for (int i = 0; i < frame_samples; i++) { - int sample_index = client->record_audio_sample_cursor + i; - float mixed = 0.0f; - for (int v = 0; v < BAT_RECORD_MAX_VOICES; v++) { - BatRecordVoice* voice = &client->record_voices[v]; - if (!voice->active) continue; - int local_sample = sample_index - voice->start_sample; - int voice_samples = (int)ceilf(voice->duration * BAT_AUDIO_SAMPLE_RATE); - if (local_sample < 0) continue; - if (local_sample >= voice_samples) { - voice->active = 0; - continue; - } - mixed += bat_chirp_audio_sample_f32(voice->start_freq, voice->end_freq, - voice->duration, local_sample, BAT_AUDIO_SAMPLE_RATE); - } - short pcm = (short)(bat_clampf(mixed, -1.0f, 1.0f) * 32767.0f); - fwrite(&pcm, sizeof(short), 1, client->record_wav); - client->record_audio_data_bytes += (int)sizeof(short); - } - client->record_audio_sample_cursor += frame_samples; -} - -static inline void bat_record_finalize(Client* client) { - if (client == NULL || !client->recording_initialized || - client->recording_finalized) { - return; - } - client->recording_finalized = 1; - if (client->record_wav != NULL) { - fseek(client->record_wav, 0, SEEK_SET); - bat_record_write_wav_header(client->record_wav, client->record_audio_data_bytes); - fclose(client->record_wav); - client->record_wav = NULL; - } - - char cmd[1024]; - if (client->record_audio) { - snprintf(cmd, sizeof(cmd), - "ffmpeg -y -framerate %d -i %s/%%06d.png -i %s -frames:v %d " - "-c:v libx264 -pix_fmt yuv420p -c:a aac -shortest %s", - client->record_fps, client->record_frame_dir, client->record_wav_path, - client->record_frame, client->record_mp4_path); - } else { - snprintf(cmd, sizeof(cmd), - "ffmpeg -y -framerate %d -i %s/%%06d.png -frames:v %d " - "-c:v libx264 -pix_fmt yuv420p %s", - client->record_fps, client->record_frame_dir, client->record_frame, - client->record_mp4_path); - } - int status = system(cmd); - if (status == 0) { - printf("Bat recording saved: %s\n", client->record_mp4_path); - } else { - printf("Bat recording ffmpeg command failed with status %d\n", status); - } -} - -static inline void bat_record_capture_frame(Bat* env) { - Client* client = env->client; - if (client == NULL || !client->recording_initialized || - client->recording_finalized) { - return; - } - if (client->record_frame >= client->record_max_frames) { - bat_record_finalize(client); - return; - } - bat_record_enqueue_chirp(env); - char path[512]; - snprintf(path, sizeof(path), "%s/%06d.png", client->record_frame_dir, - client->record_frame); - Image image = LoadImageFromScreen(); - ExportImage(image, path); - UnloadImage(image); - bat_record_append_audio_frame(env); - client->record_frame += 1; - if (client->record_frame >= client->record_max_frames) { - bat_record_finalize(client); - } -} +#include "bat_record.h" Client* make_client(Bat* env) { Client* client = (Client*)calloc(1, sizeof(Client)); client->width = env->width * 10; client->height = env->height * 10; InitWindow(client->width, client->height, "Bat"); - int target_fps = bat_render_target_fps(env); + int target_fps = env->render_target_fps; if (target_fps > 0) { SetTargetFPS(target_fps); } diff --git a/ocean/bat/bat_record.h b/ocean/bat/bat_record.h new file mode 100644 index 0000000000..971751fa99 --- /dev/null +++ b/ocean/bat/bat_record.h @@ -0,0 +1,164 @@ +#ifndef BAT_RECORD_H +#define BAT_RECORD_H + +static inline void bat_record_write_le16(FILE* f, unsigned int v) { + fputc((int)(v & 0xffu), f); + fputc((int)((v >> 8) & 0xffu), f); +} + +static inline void bat_record_write_le32(FILE* f, unsigned int v) { + fputc((int)(v & 0xffu), f); + fputc((int)((v >> 8) & 0xffu), f); + fputc((int)((v >> 16) & 0xffu), f); + fputc((int)((v >> 24) & 0xffu), f); +} + +static inline void bat_record_write_wav_header(FILE* f, int data_bytes) { + int byte_rate = BAT_AUDIO_SAMPLE_RATE * 2; + fwrite("RIFF", 1, 4, f); + bat_record_write_le32(f, 36u + (unsigned int)data_bytes); + fwrite("WAVE", 1, 4, f); + fwrite("fmt ", 1, 4, f); + bat_record_write_le32(f, 16); + bat_record_write_le16(f, 1); + bat_record_write_le16(f, 1); + bat_record_write_le32(f, BAT_AUDIO_SAMPLE_RATE); + bat_record_write_le32(f, (unsigned int)byte_rate); + bat_record_write_le16(f, 2); + bat_record_write_le16(f, 16); + fwrite("data", 1, 4, f); + bat_record_write_le32(f, (unsigned int)data_bytes); +} + +static inline void bat_record_init(Bat* env, Client* client) { + if (!env->record_video || client->recording_initialized) return; + client->recording_initialized = 1; + client->record_fps = env->record_video_fps; + client->record_audio = env->record_video_audio ? 1 : 0; + client->record_max_frames = client->record_fps * env->record_video_seconds; + snprintf(client->record_frame_dir, sizeof(client->record_frame_dir), + "recordings/bat_recording_frames"); + snprintf(client->record_wav_path, sizeof(client->record_wav_path), + "recordings/bat_recording.wav"); + snprintf(client->record_mp4_path, sizeof(client->record_mp4_path), + "recordings/bat_recording.mp4"); + system("mkdir -p recordings recordings/bat_recording_frames"); + if (client->record_audio) { + client->record_wav = fopen(client->record_wav_path, "wb"); + if (client->record_wav != NULL) { + bat_record_write_wav_header(client->record_wav, 0); + } + } + printf("Bat recording enabled: %s (%d fps, %d frames)\n", + client->record_mp4_path, client->record_fps, client->record_max_frames); +} + +static inline void bat_record_enqueue_chirp(Bat* env) { + Client* client = env->client; + if (client == NULL || !client->recording_initialized || + client->recording_finalized || !client->record_audio) { + return; + } + if (env->audio_chirp_serial <= 0 || + env->audio_chirp_serial == client->record_last_audio_chirp_serial) { + return; + } + client->record_last_audio_chirp_serial = env->audio_chirp_serial; + int voice_idx = client->record_voice_cursor; + client->record_voice_cursor = (client->record_voice_cursor + 1) % BAT_RECORD_MAX_VOICES; + BatRecordVoice* voice = &client->record_voices[voice_idx]; + voice->active = 1; + voice->start_sample = client->record_audio_sample_cursor; + voice->start_freq = env->last_chirp_start_freq; + voice->end_freq = env->last_chirp_end_freq; + voice->duration = bat_chirp_audio_duration_at_fps( + env->last_chirp_duration, client->record_fps); +} + +static inline void bat_record_append_audio_frame(Bat* env) { + Client* client = env->client; + if (client == NULL || !client->record_audio || client->record_wav == NULL) return; + int frame_samples = BAT_AUDIO_SAMPLE_RATE / client->record_fps; + for (int i = 0; i < frame_samples; i++) { + int sample_index = client->record_audio_sample_cursor + i; + float mixed = 0.0f; + for (int v = 0; v < BAT_RECORD_MAX_VOICES; v++) { + BatRecordVoice* voice = &client->record_voices[v]; + if (!voice->active) continue; + int local_sample = sample_index - voice->start_sample; + int voice_samples = (int)ceilf(voice->duration * BAT_AUDIO_SAMPLE_RATE); + if (local_sample < 0) continue; + if (local_sample >= voice_samples) { + voice->active = 0; + continue; + } + mixed += bat_chirp_audio_sample_f32(voice->start_freq, voice->end_freq, + voice->duration, local_sample, BAT_AUDIO_SAMPLE_RATE); + } + short pcm = (short)(bat_clampf(mixed, -1.0f, 1.0f) * 32767.0f); + fwrite(&pcm, sizeof(short), 1, client->record_wav); + client->record_audio_data_bytes += (int)sizeof(short); + } + client->record_audio_sample_cursor += frame_samples; +} + +static inline void bat_record_finalize(Client* client) { + if (client == NULL || !client->recording_initialized || + client->recording_finalized) { + return; + } + client->recording_finalized = 1; + if (client->record_wav != NULL) { + fseek(client->record_wav, 0, SEEK_SET); + bat_record_write_wav_header(client->record_wav, client->record_audio_data_bytes); + fclose(client->record_wav); + client->record_wav = NULL; + } + + char cmd[1024]; + if (client->record_audio) { + snprintf(cmd, sizeof(cmd), + "ffmpeg -y -framerate %d -i %s/%%06d.png -i %s -frames:v %d " + "-c:v libx264 -pix_fmt yuv420p -c:a aac -shortest %s", + client->record_fps, client->record_frame_dir, client->record_wav_path, + client->record_frame, client->record_mp4_path); + } else { + snprintf(cmd, sizeof(cmd), + "ffmpeg -y -framerate %d -i %s/%%06d.png -frames:v %d " + "-c:v libx264 -pix_fmt yuv420p %s", + client->record_fps, client->record_frame_dir, client->record_frame, + client->record_mp4_path); + } + int status = system(cmd); + if (status == 0) { + printf("Bat recording saved: %s\n", client->record_mp4_path); + } else { + printf("Bat recording ffmpeg command failed with status %d\n", status); + } +} + +static inline void bat_record_capture_frame(Bat* env) { + Client* client = env->client; + if (client == NULL || !client->recording_initialized || + client->recording_finalized) { + return; + } + if (client->record_frame >= client->record_max_frames) { + bat_record_finalize(client); + return; + } + bat_record_enqueue_chirp(env); + char path[512]; + snprintf(path, sizeof(path), "%s/%06d.png", client->record_frame_dir, + client->record_frame); + Image image = LoadImageFromScreen(); + ExportImage(image, path); + UnloadImage(image); + bat_record_append_audio_frame(env); + client->record_frame += 1; + if (client->record_frame >= client->record_max_frames) { + bat_record_finalize(client); + } +} + +#endif diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index 4ee90a64df..ebe8ffa599 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -1,7 +1,7 @@ #include "bat.h" -#define OBS_SIZE 41 -#define NUM_ATNS 6 -#define ACT_SIZES {3, 3, 8, 8, 4, 2} +#define OBS_SIZE BAT_OBS_SIZE +#define NUM_ATNS BAT_NUM_ACTIONS +#define ACT_SIZES {BAT_MOVE_ACTIONS, BAT_TURN_ACTIONS, BAT_CHIRP_FREQ_BINS, BAT_CHIRP_FREQ_BINS, BAT_CHIRP_DURATION_BINS, BAT_CHIRP_EMIT_ACTIONS} #define OBS_TENSOR_T FloatTensor #define Env Bat @@ -83,9 +83,6 @@ void my_log(Log* log, Dict* out) { dict_set(out, "curriculum_chirp_budget_difficulty", log->curriculum_chirp_budget_difficulty); dict_set(out, "curriculum_motion_difficulty", log->curriculum_motion_difficulty); dict_set(out, "num_obstacles", log->num_obstacles); - dict_set(out, "bug_distance_start", log->bug_distance_start); - dict_set(out, "bug_distance_final", log->bug_distance_final); - dict_set(out, "bug_distance_delta", log->bug_distance_delta); dict_set(out, "chirps_emitted", log->chirps_emitted); dict_set(out, "chirp_budget", log->chirp_budget); dict_set(out, "chirps_used_ratio", log->chirps_used_ratio); diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index 527ccd001b..8dd85dd399 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -280,7 +280,6 @@ static int test_chirp_budget_logs_ratios_for_wandb(void) { ASSERT_FLOAT_NEAR(env.log.chirp_budget, 10.0f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.chirps_used_ratio, 0.40f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.chirps_remaining_ratio, 0.60f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.chirp_efficiency, 0.80f, 0.0001f); free_allocated(&env); @@ -325,29 +324,6 @@ static int test_curriculum_perf_logs_distance_and_obstacle_difficulty_components return 0; } -static int test_budget_difficulty_uses_hard_edge_below_six_chirps(void) { - Bat env = make_test_env(); - c_reset(&env); - - env.max_chirps_per_episode = 15; - ASSERT_FLOAT_NEAR(bat_budget_difficulty(&env), 0.50f, 0.0001f); - - env.max_chirps_per_episode = 10; - ASSERT_FLOAT_NEAR(bat_budget_difficulty(&env), 0.75f, 0.0001f); - - env.max_chirps_per_episode = 6; - ASSERT_FLOAT_NEAR(bat_budget_difficulty(&env), 0.95f, 0.0001f); - - env.max_chirps_per_episode = 5; - ASSERT_FLOAT_NEAR(bat_budget_difficulty(&env), 1.0f, 0.0001f); - - env.max_chirps_per_episode = 4; - ASSERT_FLOAT_NEAR(bat_budget_difficulty(&env), 1.0f, 0.0001f); - - free_allocated(&env); - return 0; -} - static int test_perf_composes_base_perf_curriculum_difficulty_and_chirp_perf(void) { Bat env = make_test_env(); c_reset(&env); @@ -366,7 +342,6 @@ static int test_perf_composes_base_perf_curriculum_difficulty_and_chirp_perf(voi add_log(&env, 1.0f, 0.0f, 0.0f); ASSERT_FLOAT_NEAR(env.log.base_perf, 1.0f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.budget_difficulty, 0.55f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.chirp_efficiency, 0.75f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.chirp_perf, 0.5333334f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.5000000f, 0.0001f); @@ -903,19 +878,6 @@ static int test_chirp_ring_physical_ordering(void) { return 0; } -static int test_chirp_color_maps_low_to_red_high_to_blue(void) { - BatColor low = bat_freq_color(0.0f, 1.0f); - BatColor mid = bat_freq_color(0.5f, 1.0f); - BatColor high = bat_freq_color(1.0f, 1.0f); - - ASSERT_TRUE(low.r > low.b); - ASSERT_TRUE(high.b > high.r); - ASSERT_TRUE(mid.g >= low.g); - ASSERT_TRUE(mid.g >= high.g); - - return 0; -} - static int test_chirp_audio_maps_norm_freq_to_audible_sweep(void) { ASSERT_FLOAT_NEAR(bat_chirp_audio_frequency_hz(0.0f), 600.0f, 0.0001f); ASSERT_FLOAT_NEAR(bat_chirp_audio_frequency_hz(1.0f), 3600.0f, 0.0001f); @@ -929,20 +891,6 @@ static int test_chirp_audio_maps_norm_freq_to_audible_sweep(void) { return 0; } -static int test_render_target_fps_is_eval_only_and_can_be_uncapped(void) { - Bat env = make_test_env(); - env.render_target_fps = 60; - ASSERT_TRUE(bat_render_target_fps(&env) == 60); - env.render_target_fps = 15; - ASSERT_TRUE(bat_render_target_fps(&env) == 15); - env.render_target_fps = 0; - ASSERT_TRUE(bat_render_target_fps(&env) == 0); - env.render_target_fps = -1; - ASSERT_TRUE(bat_render_target_fps(&env) == 0); - free_allocated(&env); - return 0; -} - static int test_chirp_audio_duration_scales_with_render_fps(void) { Bat env = make_test_env(); float base_duration = bat_chirp_duration_seconds(0.0f); @@ -1467,7 +1415,11 @@ static int test_default_echo_range_reaches_curriculum_max_bug_distance(void) { .active = 1, }; chirp.slice_count = (int)ceilf(chirp.duration / BAT_TICK_RATE); - bat_schedule_chirp_echoes(&env, &chirp); + while (chirp.slices_scheduled < chirp.slice_count) { + int slice_idx = chirp.slices_scheduled; + bat_schedule_chirp_slice_echoes(&env, &chirp, slice_idx); + chirp.slices_scheduled += 1; + } float bug_energy = 0.0f; for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { @@ -1609,9 +1561,14 @@ static int test_frequency_bin_energy_sums_and_caps(void) { Bat env = make_test_env(); memset(env.observations, 0, BAT_OBS_SIZE * sizeof(float)); - bat_add_freq_energy(&env, BAT_LEFT_FREQ_OFFSET, 1.0f, 0.75f); - bat_add_freq_energy(&env, BAT_LEFT_FREQ_OFFSET, 1.0f, 0.75f); - bat_add_freq_energy(&env, BAT_RIGHT_FREQ_OFFSET, 0.0f, 0.35f); + int high_bin = bat_freq_bin_index(&env, 1.0f); + int low_bin = bat_freq_bin_index(&env, 0.0f); + env.observations[BAT_LEFT_FREQ_OFFSET + high_bin] = bat_clampf( + env.observations[BAT_LEFT_FREQ_OFFSET + high_bin] + 0.75f, 0.0f, 1.0f); + env.observations[BAT_LEFT_FREQ_OFFSET + high_bin] = bat_clampf( + env.observations[BAT_LEFT_FREQ_OFFSET + high_bin] + 0.75f, 0.0f, 1.0f); + env.observations[BAT_RIGHT_FREQ_OFFSET + low_bin] = bat_clampf( + env.observations[BAT_RIGHT_FREQ_OFFSET + low_bin] + 0.35f, 0.0f, 1.0f); ASSERT_FLOAT_NEAR(env.observations[BAT_LEFT_FREQ_OFFSET + BAT_FREQ_BINS - 1], 1.0f, 0.0001f); ASSERT_FLOAT_NEAR(env.observations[BAT_RIGHT_FREQ_OFFSET], 0.35f, 0.0001f); @@ -1841,7 +1798,6 @@ int main(void) { if (test_success_reward_includes_chirp_efficiency_bonus()) return 1; if (test_chirp_budget_logs_ratios_for_wandb()) return 1; if (test_curriculum_perf_logs_distance_and_obstacle_difficulty_components()) return 1; - if (test_budget_difficulty_uses_hard_edge_below_six_chirps()) return 1; if (test_perf_composes_base_perf_curriculum_difficulty_and_chirp_perf()) return 1; if (test_chirp_tempo_logs_far_and_near_rates()) return 1; if (test_left_right_echo_asymmetry()) return 1; @@ -1860,9 +1816,7 @@ int main(void) { if (test_bat_turn_rate_scales_with_forward_speed()) return 1; if (test_bat_speed_action_space_has_no_strafe()) return 1; if (test_chirp_ring_physical_ordering()) return 1; - if (test_chirp_color_maps_low_to_red_high_to_blue()) return 1; if (test_chirp_audio_maps_norm_freq_to_audible_sweep()) return 1; - if (test_render_target_fps_is_eval_only_and_can_be_uncapped()) return 1; if (test_chirp_audio_duration_scales_with_render_fps()) return 1; if (test_chirp_cooldown_accepts_only_after_delay()) return 1; if (test_valid_chirp_gets_reward_without_legacy_cost()) return 1; From cb92935538ff86d9bd49afb0b41cf7f5ee73250f Mon Sep 17 00:00:00 2001 From: Kinvert Date: Wed, 10 Jun 2026 19:24:02 -0700 Subject: [PATCH 31/51] Move Bat audio helpers --- ocean/bat/bat.h | 101 +------------------------------- ocean/bat/bat_audio.h | 99 +++++++++++++++++++++++++++++++ ocean/bat/tests/test_bat_core.c | 2 - 3 files changed, 100 insertions(+), 102 deletions(-) create mode 100644 ocean/bat/bat_audio.h diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index b68f44e409..7e22160c14 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -304,51 +304,7 @@ static inline float bat_chirp_duration_seconds(float duration_norm) { return 0.04f + 0.18f * duration_norm; } -static inline float bat_chirp_audio_duration_at_fps(float duration_norm, int fps) { - float duration = bat_chirp_duration_seconds(duration_norm); - if (fps <= 0) return duration; - float scale = 60.0f / (float)fps; - if (scale < 1.0f) scale = 1.0f; - return duration * scale; -} - -static inline float bat_chirp_audio_duration_seconds(Bat* env, float duration_norm) { - return bat_chirp_audio_duration_at_fps(duration_norm, env->render_target_fps); -} - -static inline float bat_chirp_audio_frequency_hz(float freq_norm) { - return BAT_AUDIO_MIN_HZ + freq_norm - * (BAT_AUDIO_MAX_HZ - BAT_AUDIO_MIN_HZ); -} - -static inline float bat_chirp_audio_instant_hz(float start_norm, float end_norm, - float duration_seconds, float t_seconds) { - float t = t_seconds / duration_seconds; - float start_hz = bat_chirp_audio_frequency_hz(start_norm); - float end_hz = bat_chirp_audio_frequency_hz(end_norm); - return start_hz + t * (end_hz - start_hz); -} - -static inline float bat_chirp_audio_envelope(float t_norm) { - if (t_norm <= 0.0f || t_norm >= 1.0f) return 0.0f; - const float fade = 0.08f; - float attack = t_norm / fade; - float release = (1.0f - t_norm) / fade; - return bat_clampf(fminf(attack, release), 0.0f, 1.0f); -} - -static inline float bat_chirp_audio_sample_f32(float start_norm, float end_norm, - float duration_seconds, int sample_index, int sample_rate) { - float t = sample_index / (float)sample_rate; - if (t >= duration_seconds) return 0.0f; - - float start_hz = bat_chirp_audio_frequency_hz(start_norm); - float end_hz = bat_chirp_audio_frequency_hz(end_norm); - float chirp_rate = (end_hz - start_hz) / duration_seconds; - float phase = BAT_TWO_PI * (start_hz * t + 0.5f * chirp_rate * t * t); - float envelope = bat_chirp_audio_envelope(t / duration_seconds); - return BAT_AUDIO_VOLUME * envelope * sinf(phase); -} +#include "bat_audio.h" static inline float bat_chirp_ring_radius(float age_seconds, float slice, float duration_seconds, float sound_speed) { @@ -1611,61 +1567,6 @@ static inline void bat_draw_echo_reflections(Bat* env, float sx, float sy) { } } -static inline void bat_unload_chirp_sound(Client* client, int i) { - if (!client->chirp_sound_loaded[i]) return; - UnloadSound(client->chirp_sounds[i]); - client->chirp_sound_loaded[i] = 0; -} - -static inline void bat_cleanup_audio(Client* client) { - if (!client->audio_ready) return; - for (int i = 0; i < BAT_AUDIO_VOICES; i++) { - if (client->chirp_sound_loaded[i] && !IsSoundPlaying(client->chirp_sounds[i])) { - bat_unload_chirp_sound(client, i); - } - } -} - -static inline void bat_play_chirp_audio(Bat* env) { - Client* client = env->client; - if (client == NULL || !client->audio_ready) return; - bat_cleanup_audio(client); - if (env->audio_chirp_serial <= 0 || - env->audio_chirp_serial == client->last_audio_chirp_serial) { - return; - } - client->last_audio_chirp_serial = env->audio_chirp_serial; - - float duration = bat_chirp_audio_duration_seconds(env, env->last_chirp_duration); - int sample_count = (int)ceilf(duration * BAT_AUDIO_SAMPLE_RATE); - - short* samples = (short*)malloc(sample_count * sizeof(short)); - if (samples == NULL) return; - for (int i = 0; i < sample_count; i++) { - float sample = bat_chirp_audio_sample_f32(env->last_chirp_start_freq, - env->last_chirp_end_freq, duration, i, BAT_AUDIO_SAMPLE_RATE); - samples[i] = (short)(bat_clampf(sample, -1.0f, 1.0f) * 32767.0f); - } - - Wave wave = { - .frameCount = (unsigned int)sample_count, - .sampleRate = BAT_AUDIO_SAMPLE_RATE, - .sampleSize = 16, - .channels = 1, - .data = samples, - }; - Sound sound = LoadSoundFromWave(wave); - UnloadWave(wave); - - int voice = client->audio_voice_cursor; - client->audio_voice_cursor = (client->audio_voice_cursor + 1) % BAT_AUDIO_VOICES; - bat_unload_chirp_sound(client, voice); - client->chirp_sounds[voice] = sound; - client->chirp_sound_loaded[voice] = 1; - SetSoundVolume(client->chirp_sounds[voice], 1.0f); - PlaySound(client->chirp_sounds[voice]); -} - #include "bat_record.h" Client* make_client(Bat* env) { diff --git a/ocean/bat/bat_audio.h b/ocean/bat/bat_audio.h new file mode 100644 index 0000000000..2cd17ab19c --- /dev/null +++ b/ocean/bat/bat_audio.h @@ -0,0 +1,99 @@ +#ifndef BAT_AUDIO_H +#define BAT_AUDIO_H + +static inline float bat_chirp_audio_duration_at_fps(float duration_norm, int fps) { + float duration = bat_chirp_duration_seconds(duration_norm); + if (fps <= 0) return duration; + float scale = 60.0f / (float)fps; + if (scale < 1.0f) scale = 1.0f; + return duration * scale; +} + +static inline float bat_chirp_audio_duration_seconds(Bat* env, float duration_norm) { + return bat_chirp_audio_duration_at_fps(duration_norm, env->render_target_fps); +} + +static inline float bat_chirp_audio_frequency_hz(float freq_norm) { + return BAT_AUDIO_MIN_HZ + freq_norm + * (BAT_AUDIO_MAX_HZ - BAT_AUDIO_MIN_HZ); +} + +static inline float bat_chirp_audio_envelope(float t_norm) { + if (t_norm <= 0.0f || t_norm >= 1.0f) return 0.0f; + const float fade = 0.08f; + float attack = t_norm / fade; + float release = (1.0f - t_norm) / fade; + return bat_clampf(fminf(attack, release), 0.0f, 1.0f); +} + +static inline float bat_chirp_audio_sample_f32(float start_norm, float end_norm, + float duration_seconds, int sample_index, int sample_rate) { + float t = sample_index / (float)sample_rate; + if (t >= duration_seconds) return 0.0f; + + float start_hz = bat_chirp_audio_frequency_hz(start_norm); + float end_hz = bat_chirp_audio_frequency_hz(end_norm); + float chirp_rate = (end_hz - start_hz) / duration_seconds; + float phase = BAT_TWO_PI * (start_hz * t + 0.5f * chirp_rate * t * t); + float envelope = bat_chirp_audio_envelope(t / duration_seconds); + return BAT_AUDIO_VOLUME * envelope * sinf(phase); +} + +#ifndef BAT_HEADLESS +static inline void bat_unload_chirp_sound(Client* client, int i) { + if (!client->chirp_sound_loaded[i]) return; + UnloadSound(client->chirp_sounds[i]); + client->chirp_sound_loaded[i] = 0; +} + +static inline void bat_cleanup_audio(Client* client) { + if (!client->audio_ready) return; + for (int i = 0; i < BAT_AUDIO_VOICES; i++) { + if (client->chirp_sound_loaded[i] && !IsSoundPlaying(client->chirp_sounds[i])) { + bat_unload_chirp_sound(client, i); + } + } +} + +static inline void bat_play_chirp_audio(Bat* env) { + Client* client = env->client; + if (client == NULL || !client->audio_ready) return; + bat_cleanup_audio(client); + if (env->audio_chirp_serial <= 0 || + env->audio_chirp_serial == client->last_audio_chirp_serial) { + return; + } + client->last_audio_chirp_serial = env->audio_chirp_serial; + + float duration = bat_chirp_audio_duration_seconds(env, env->last_chirp_duration); + int sample_count = (int)ceilf(duration * BAT_AUDIO_SAMPLE_RATE); + + short* samples = (short*)malloc(sample_count * sizeof(short)); + if (samples == NULL) return; + for (int i = 0; i < sample_count; i++) { + float sample = bat_chirp_audio_sample_f32(env->last_chirp_start_freq, + env->last_chirp_end_freq, duration, i, BAT_AUDIO_SAMPLE_RATE); + samples[i] = (short)(bat_clampf(sample, -1.0f, 1.0f) * 32767.0f); + } + + Wave wave = { + .frameCount = (unsigned int)sample_count, + .sampleRate = BAT_AUDIO_SAMPLE_RATE, + .sampleSize = 16, + .channels = 1, + .data = samples, + }; + Sound sound = LoadSoundFromWave(wave); + UnloadWave(wave); + + int voice = client->audio_voice_cursor; + client->audio_voice_cursor = (client->audio_voice_cursor + 1) % BAT_AUDIO_VOICES; + bat_unload_chirp_sound(client, voice); + client->chirp_sounds[voice] = sound; + client->chirp_sound_loaded[voice] = 1; + SetSoundVolume(client->chirp_sounds[voice], 1.0f); + PlaySound(client->chirp_sounds[voice]); +} +#endif + +#endif diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index 8dd85dd399..af7b64dc67 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -881,8 +881,6 @@ static int test_chirp_ring_physical_ordering(void) { static int test_chirp_audio_maps_norm_freq_to_audible_sweep(void) { ASSERT_FLOAT_NEAR(bat_chirp_audio_frequency_hz(0.0f), 600.0f, 0.0001f); ASSERT_FLOAT_NEAR(bat_chirp_audio_frequency_hz(1.0f), 3600.0f, 0.0001f); - ASSERT_FLOAT_NEAR(bat_chirp_audio_instant_hz(0.0f, 1.0f, 0.20f, 0.10f), 2100.0f, 0.0001f); - ASSERT_FLOAT_NEAR(bat_chirp_audio_instant_hz(1.0f, 0.0f, 0.20f, 0.10f), 2100.0f, 0.0001f); ASSERT_FLOAT_NEAR(bat_chirp_audio_sample_f32(0.0f, 1.0f, 0.20f, -1, 48000), 0.0f, 0.0001f); ASSERT_FLOAT_NEAR(bat_chirp_audio_sample_f32(0.0f, 1.0f, 0.20f, 9600, 48000), 0.0f, 0.0001f); float sample = bat_chirp_audio_sample_f32(0.0f, 1.0f, 0.20f, 2400, 48000); From 41f7651fbe7cfb5469096b791105fe933c12295f Mon Sep 17 00:00:00 2001 From: Kinvert Date: Wed, 10 Jun 2026 20:25:01 -0700 Subject: [PATCH 32/51] Document Bat sensing research --- ocean/bat/BAT_EAR_DIRECTIVITY_RESEARCH.md | 288 +++++++++++++++++++++ ocean/bat/BAT_NEXT_SWEEP_RESEARCH_NOTES.md | 135 ++++++++++ ocean/bat/BAT_WING_ECHO_RESEARCH.md | 288 +++++++++++++++++++++ 3 files changed, 711 insertions(+) create mode 100644 ocean/bat/BAT_EAR_DIRECTIVITY_RESEARCH.md create mode 100644 ocean/bat/BAT_NEXT_SWEEP_RESEARCH_NOTES.md create mode 100644 ocean/bat/BAT_WING_ECHO_RESEARCH.md diff --git a/ocean/bat/BAT_EAR_DIRECTIVITY_RESEARCH.md b/ocean/bat/BAT_EAR_DIRECTIVITY_RESEARCH.md new file mode 100644 index 0000000000..ca03e876c5 --- /dev/null +++ b/ocean/bat/BAT_EAR_DIRECTIVITY_RESEARCH.md @@ -0,0 +1,288 @@ +# Bat ear directivity research notes + +Purpose: preserve research and implementation guidance for a possible low-cost directional hearing model in `ocean/bat/`. + +Status: research/design note only. No behavior change is implied by this document. + +## Short answer + +Yes, the model should not treat each ear as an omnidirectional scalar receiver. Bat echolocation uses directional emission, directional reception, and binaural differences. The useful terms are: + +- `HRTF`: head-related transfer function, the direction-dependent filtering from a sound source to each ear. +- `HRIR`: time-domain head-related impulse response. +- `ILD`: interaural level difference, the loudness/intensity difference between left and right ears. +- `ITD`: interaural time difference, the arrival-time difference between ears. +- `Pinna directivity`: direction-dependent gain/filtering caused by the external ear shape. +- `Beam pattern` or `polar response`: gain as a function of angle. +- `Acoustic field of view`: the spatial volume that is ensonified or heard well enough for detection. + +For Bat env purposes, the best first implementation is a cheap per-ear gain curve in `bat_schedule_echo`, based on relative angle to target/obstacle/echo source. It should use dot products and multiplications, not `atan2f`, not tables, and not per-frequency filters. + +## What the literature says + +### Bats have directional sonar emission and dynamic beam width + +Jakobsen, Ratcliffe, and Surlykke found that multiple vespertilionid species converge on similar sonar fields of view. The Nature abstract reports a directivity index around `11 +/- 1 dB`, half-amplitude angle about `37 degrees`, and on-axis source level around `108 +/- 4 dB SPL re 20 uPa rms at 10 cm` under their tested condition. + +Source: + +- Jakobsen, L.; Ratcliffe, J. M.; Surlykke, A. `Convergent acoustic field of view in echolocating bats`. Nature 493, 93-96, 2013. DOI: https://doi.org/10.1038/nature11664 +- Nature page: https://www.nature.com/articles/nature11664 + +Implementation relevance: + +- The environment already has directional structure via left/right echo channels, but the hearing side can plausibly become more directional. +- A simple polar response is justified: forward is strong, rear is weak, lateral differs by ear. +- A 2D game does not need full 3D HRTF. The important behavioral signal is `left/right relative energy`, not spectral notches. + +### Directionality and intensity jointly define what the bat can detect + +Jakobsen, Brinklov, and Surlykke reviewed bat echolocation intensity and directionality. Key implementation-relevant points: + +- Bat calls are directional; more energy is focused forward than to the sides. +- An object detectable directly in front at a given range may not be detectable at the same range off-axis. +- Directionality reduces clutter because less energy is emitted to the sides/back. +- Beam shape acts as a spatial filter before echoes return. +- Bats dynamically control intensity, duration, frequency, and directionality. +- Nose emitters can have beam shape affected by nostril separation and noseleaf geometry. +- Mouth emitters can affect directionality via gape size. + +Source: + +- Jakobsen, L.; Brinklov, S.; Surlykke, A. `Intensity and directionality of bat echolocation signals`. Frontiers in Physiology 4:89, 2013. DOI: https://doi.org/10.3389/fphys.2013.00089 +- Open full text: https://pmc.ncbi.nlm.nih.gov/articles/PMC3635024/ + +Implementation relevance: + +- If we add hearing directivity, it should be part of the echo energy calculation, not an observation post-process. +- It should affect both bug and obstacle echoes consistently. +- We should keep it cheap enough to run per echo/event/source. + +### Bats can broaden beams in terminal pursuit + +Jakobsen and Surlykke showed that `Myotis daubentonii` and `Eptesicus serotinus` broaden their biosonar beam during prey pursuit. Crossref metadata includes the useful quantitative anchor: `M. daubentonii` increased half-amplitude angle from about `40 degrees` to about `90 degrees` horizontally and from about `45 degrees` to more than `90 degrees` vertically, mostly by dropping call frequency by about one octave from `55 kHz` to `27.5 kHz`. + +Source: + +- Jakobsen, L.; Surlykke, A. `Vespertilionid bats control the width of their biosonar sound beam dynamically during prey pursuit`. PNAS 107(31), 13930-13935, 2010. DOI: https://doi.org/10.1073/pnas.1006630107 +- PNAS page: https://www.pnas.org/doi/10.1073/pnas.1006630107 + +Implementation relevance: + +- This is more about emission than reception, but it argues against a single static omnidirectional model. +- We do not need to implement dynamic beam width yet. It would be a meaningful physics change and should be isolated in a sweep. +- If implemented later, chirp duration/frequency choices could alter beam width. That would make action consequences richer, but it is not the minimum ear-directivity change. + +### Reception-side filtering matters too + +Wotton, Jenison, and Hartley modeled/combined emission and external-ear reception in the big brown bat. Their abstract says localization cues become clearer when emission spectra and external-ear spectra are convolved; spectral peaks sharpen and peak/notch contrast increases. It also notes cues restricted to a cone of about `+/-30 degrees`. + +Source: + +- Wotton, J. M.; Jenison, R. L.; Hartley, D. J. `The combination of echolocation emission and ear reception enhances directional spectral cues of the big brown bat, Eptesicus fuscus`. JASA 101(3), 1723-1733, 1997. DOI: https://doi.org/10.1121/1.418271 +- AIP/JASA page: https://pubs.aip.org/asa/jasa/article/101/3/1723/559358/The-combination-of-echolocation-emission-and-ear + +Implementation relevance: + +- Full spectral filtering is overkill for current Bat. The obs are low-dimensional echo features, not raw waveforms. +- A cheap gain curve per ear captures the important part for policy learning: direction-dependent intensity. +- Avoid adding FFTs, filters, or per-frequency HRTF tables unless the environment changes to raw audio observations. + +### Noseleaf and pinnae can cooperate dynamically + +Kuc proposed a model where noseleaf and pinnae cooperate through direct and delayed acoustic paths. The abstract says the delayed pinna component can increase on-axis emission strength, narrow beam width, and sculpt frequency-dependent beam patterns. + +Source: + +- Kuc, R. `Morphology suggests noseleaf and pinnae cooperate to enhance bat echolocation`. JASA 128(5), 3190-3199, 2010. DOI: https://doi.org/10.1121/1.3488304 +- AIP/JASA page: https://pubs.aip.org/asa/jasa/article/128/5/3190/917806/Morphology-suggests-noseleaf-and-pinnae-cooperate + +Zhang et al. studied great roundleaf bats and found coordinated noseleaf and pinna movements during echolocation. + +Source: + +- Zhang, S.; et al. `Dynamic relationship between noseleaf and pinnae in echolocating hipposiderid bats`. Journal of Experimental Biology, 2019. DOI: https://doi.org/10.1242/jeb.210252 +- JEB page: https://journals.biologists.com/jeb/article/222/20/jeb210252/224403/Dynamic-relationship-between-noseleaf-and-pinnae-in + +Vanderelst et al. found that the noseleaf of `Rhinolophus formosae` focuses the FM component of calls. + +Source: + +- Vanderelst, D.; Lee, Y.-F.; Geipel, I.; Kalko, E. K. V.; Kuo, Y.-M.; Peremans, H. `The noseleaf of Rhinolophus formosae focuses the Frequency Modulated (FM) component of the calls`. Frontiers in Physiology 4:191, 2013. DOI: https://doi.org/10.3389/fphys.2013.00191 +- Frontiers page: https://www.frontiersin.org/articles/10.3389/fphys.2013.00191/full + +Implementation relevance: + +- These papers support a directional receive model, but they also warn that exact geometry is species-specific and complex. +- For Bat env, do not model moving pinnae/noseleaf first. That would create extra state and new parameters without proving learning benefit. +- Keep the first model static and symmetric, then sweep it. + +## Current likely Bat code location + +The directivity should probably be applied in or near the echo scheduling/energy path, around the existing left/right echo gain logic. Earlier review found a mild directional term like this in `bat_schedule_echo`: + +```c +float left_gain = 0.75f + 0.25f * something; +float right_gain = 0.75f + 0.25f * something; +``` + +That is a weak directional receiver. A stronger, biologically motivated model would replace that with a front-and-side polar response. + +Do not add this in render/audio code. The training observation echo energy must change, not only playback. + +## Recommended cheap implementation + +Use only normalized source direction and bat forward/side vectors. No angle, no trig. + +Definitions: + +- `ux, uy`: unit vector from bat to echo source. +- `fx, fy`: bat forward unit vector. +- `lx, ly`: bat left-ear preferred lateral unit vector, usually left of forward. +- `rx, ry`: bat right-ear preferred lateral unit vector, usually right of forward. +- `front`: nonnegative forward alignment. +- `left_side`: nonnegative left-ear side alignment. +- `right_side`: nonnegative right-ear side alignment. +- `rear_floor`: minimum rear sensitivity so rear echoes are not impossible. + +Sketch: + +```c +float front = bat_clampf(ux*fx + uy*fy, 0.0f, 1.0f); +float left_side = bat_clampf(ux*lx + uy*ly, 0.0f, 1.0f); +float right_side = bat_clampf(ux*rx + uy*ry, 0.0f, 1.0f); + +float front2 = front * front; +float left2 = left_side * left_side; +float right2 = right_side * right_side; + +float left_gain = rear_floor + front_gain*front2 + side_gain*left2; +float right_gain = rear_floor + front_gain*front2 + side_gain*right2; +``` + +Potential initial constants: + +```c +#define BAT_EAR_REAR_GAIN 0.15f +#define BAT_EAR_FRONT_GAIN 0.55f +#define BAT_EAR_SIDE_GAIN 0.45f +``` + +Normalize if needed: + +```c +#define BAT_EAR_GAIN_NORM (1.0f / (BAT_EAR_REAR_GAIN + BAT_EAR_FRONT_GAIN + BAT_EAR_SIDE_GAIN)) +left_gain *= BAT_EAR_GAIN_NORM; +right_gain *= BAT_EAR_GAIN_NORM; +``` + +This keeps max gain near `1.0`, gives front-left stronger left signal, front-right stronger right signal, and keeps behind weak but nonzero. + +## Variant: ear axes angled forward + +Pure side vectors can make lateral echoes too strong compared with forward echoes. A better biological-ish 2D approximation is ears pointed outward but forward-biased. + +Given forward `f` and left normal `n`: + +```c +float ear_forward = 0.75f; +float ear_side = 0.66f; +float left_ear_x = ear_forward*fx + ear_side*nx; +float left_ear_y = ear_forward*fy + ear_side*ny; +float right_ear_x = ear_forward*fx - ear_side*nx; +float right_ear_y = ear_forward*fy - ear_side*ny; +``` + +If `ear_forward^2 + ear_side^2` is approximately `1`, no normalization needed. `0.75/0.66` is close enough for a cheap model. + +Then: + +```c +float left_lobe = bat_clampf(ux*left_ear_x + uy*left_ear_y, 0.0f, 1.0f); +float right_lobe = bat_clampf(ux*right_ear_x + uy*right_ear_y, 0.0f, 1.0f); +left_gain = rear_floor + main_gain * left_lobe * left_lobe; +right_gain = rear_floor + main_gain * right_lobe * right_lobe; +``` + +This is even simpler and likely enough. + +## Performance considerations + +Good: + +- Dot products. +- Multiplication for squaring. +- `bat_clampf` or inline clamp. +- Constants as `#define`. + +Avoid: + +- `atan2f` per echo. +- `cosf`/`sinf` per echo if forward/side vectors already exist. +- Per-frequency HRTF tables. +- New heap allocations. +- Raw audio convolution. + +The model should cost only a few multiplies per scheduled echo. + +## Expected behavior change + +Likely effects: + +- Better left/right spatial signal when target is off-center. +- Rear obstacles/bugs become less audible. +- Policy may learn to turn/scan because facing matters more. +- Existing trained checkpoint performance may change because observations change. + +Potential risk: + +- If rear/side gain is too low, exploration may get harder. +- If gains are not normalized, reward/observation scale may drift. +- If directivity is applied on top of an already strong directional term, left/right energy may saturate. + +## Sweep recommendation + +Do not combine this with wing micro-Doppler in the same first sweep. Use a clean ablation: + +- Baseline: current Bat after timer/log/audio cleanup. +- Variant A: static ear directivity only. +- Variant B: wing sidebands only. +- Variant C: both, only if A and B individually help or at least do not hurt. + +Suggested parameters for first sweep: + +```ini +[env] +ear_directivity_enabled = 1 +ear_rear_gain = 0.15 +ear_front_gain = 0.55 +ear_side_gain = 0.45 +``` + +If avoiding config bloat, hard-code the first constants behind defines and sweep by branch/commit instead. + +## Implementation checklist + +- Apply directivity before writing echo energy into observations. +- Apply to bug and obstacle echoes unless there is a specific reason not to. +- Keep left/right symmetry exact. +- Keep max gain normalized near current max so observation scale does not drift hard. +- Add one focused C test for left/right asymmetry if tests are desired. +- Run build/tests/train/eval before comparing performance. + +## Source list + +- https://doi.org/10.1038/nature11664 +- https://www.nature.com/articles/nature11664 +- https://doi.org/10.3389/fphys.2013.00089 +- https://pmc.ncbi.nlm.nih.gov/articles/PMC3635024/ +- https://doi.org/10.1073/pnas.1006630107 +- https://www.pnas.org/doi/10.1073/pnas.1006630107 +- https://doi.org/10.1121/1.418271 +- https://pubs.aip.org/asa/jasa/article/101/3/1723/559358/The-combination-of-echolocation-emission-and-ear +- https://doi.org/10.1121/1.3488304 +- https://pubs.aip.org/asa/jasa/article/128/5/3190/917806/Morphology-suggests-noseleaf-and-pinnae-cooperate +- https://doi.org/10.1242/jeb.210252 +- https://journals.biologists.com/jeb/article/222/20/jeb210252/224403/Dynamic-relationship-between-noseleaf-and-pinnae-in +- https://doi.org/10.3389/fphys.2013.00191 +- https://www.frontiersin.org/articles/10.3389/fphys.2013.00191/full diff --git a/ocean/bat/BAT_NEXT_SWEEP_RESEARCH_NOTES.md b/ocean/bat/BAT_NEXT_SWEEP_RESEARCH_NOTES.md new file mode 100644 index 0000000000..36086018ab --- /dev/null +++ b/ocean/bat/BAT_NEXT_SWEEP_RESEARCH_NOTES.md @@ -0,0 +1,135 @@ +# Bat next sweep research notes + +Purpose: concise decision notes for future agents before changing Bat physics. + +Status: planning note only. + +## Current baseline to preserve first + +Before adding new physics, commit and sweep the current Bat state that already includes: + +- Timer observation normalized `0..1`. +- Timeout terminal value as `-1.0`. +- Chirp usage normalized `0..1` with death/termination if exceeding the allowed budget. +- Reward/log cleanup. +- Recording code moved out of `bat.h`. +- Audio helpers moved out of `bat.h`. + +Reason: ear directivity and wing micro-Doppler are real behavior changes. They should not be mixed into the baseline sweep used to judge timer/log/audio cleanup. + +## Candidate A: static ear directivity + +Add a cheap polar response for each ear. + +Expected benefit: + +- Stronger left/right spatial cue. +- Facing direction matters more. +- Rear echoes become weaker. + +Main risk: + +- Exploration may become harder if rear/side gain is too low. +- Observation scale may drift if gains are not normalized. + +Recommended first form: + +```c +float front = clamp(dot(source_dir, forward), 0, 1); +float left = clamp(dot(source_dir, left_ear_dir), 0, 1); +float right = clamp(dot(source_dir, right_ear_dir), 0, 1); +left_gain = rear_floor + front_gain*front*front + side_gain*left*left; +right_gain = rear_floor + front_gain*front*front + side_gain*right*right; +``` + +Suggested constants: + +```c +#define BAT_EAR_REAR_GAIN 0.15f +#define BAT_EAR_FRONT_GAIN 0.55f +#define BAT_EAR_SIDE_GAIN 0.45f +#define BAT_EAR_GAIN_NORM (1.0f / (BAT_EAR_REAR_GAIN + BAT_EAR_FRONT_GAIN + BAT_EAR_SIDE_GAIN)) +``` + +Research doc: + +- `ocean/bat/BAT_EAR_DIRECTIVITY_RESEARCH.md` + +## Candidate B: bug wing echo sideband + +Add prey-specific wing flutter echo structure. + +Expected benefit: + +- Bug echoes become distinguishable from obstacle echoes. +- Adds a moving-prey cue without raw audio simulation. + +Main risk: + +- More echo events can saturate event capacity or observation bins. +- If energy is too low it adds no learnable signal; if too high it changes task scale. + +Recommended first form: + +- Keep body echo unchanged. +- Add one extra bug-only wing echo. +- Use triangle phase, no `sinf`. + +Suggested constants: + +```c +#define BAT_BUG_WING_ECHO_GAIN 0.20f +#define BAT_BUG_WING_FREQ_OFFSET 0.06f +#define BAT_BUG_WING_PHASE_STEP 0.11f +``` + +Research doc: + +- `ocean/bat/BAT_WING_ECHO_RESEARCH.md` + +## Sweep ordering + +1. Baseline current Bat. +2. Ear directivity only. +3. Wing sideband only. +4. Combined directivity + wing sideband only if individual variants are viable. + +Do not add both new physics changes before an ablation. It will make results ambiguous. + +## Success metrics to compare + +Use the same training/eval flow as the recent Bat work: + +- Build passes. +- Bat C tests pass. +- Training completes on current ini without timestep override. +- Compare `perf`, `base_perf`, `SPS`, `timeout`, and qualitative eval behavior. +- Level 5 eval should still look reasonable. + +Known recent baseline from audio-helper move: + +- `perf` around `0.375`. +- `base_perf` around `0.942`. +- `SPS` around `1.5M`. +- `timeout` around `0.001`. + +Do not overinterpret one training run. Use it as a regression/sanity check, then sweep. + +## Source anchors + +Ear directivity: + +- https://doi.org/10.1038/nature11664 +- https://doi.org/10.3389/fphys.2013.00089 +- https://doi.org/10.1073/pnas.1006630107 +- https://doi.org/10.1121/1.418271 +- https://doi.org/10.1121/1.3488304 +- https://doi.org/10.1242/jeb.210252 +- https://doi.org/10.3389/fphys.2013.00191 + +Wing echo / micro-Doppler: + +- https://doi.org/10.1007/BF00612592 +- https://doi.org/10.1098/rspb.2003.2487 +- https://doi.org/10.1098/rspb.2012.2830 +- https://doi.org/10.1037/bne0000315 diff --git a/ocean/bat/BAT_WING_ECHO_RESEARCH.md b/ocean/bat/BAT_WING_ECHO_RESEARCH.md new file mode 100644 index 0000000000..4e8695b51e --- /dev/null +++ b/ocean/bat/BAT_WING_ECHO_RESEARCH.md @@ -0,0 +1,288 @@ +# Bat insect-wing echo and micro-Doppler research notes + +Purpose: preserve research and implementation guidance for possible low-cost insect wing flutter / micro-Doppler echoes in `ocean/bat/`. + +Status: research/design note only. No behavior change is implied by this document. + +## Short answer + +Yes, insect prey should plausibly produce more than a single body echo. Flying insect wings can create echo fluctuations, amplitude modulation, and Doppler/micro-Doppler-like frequency structure. The simplest useful Bat env approximation is: + +- Keep the existing normal body echo. +- For bug echoes only, add one or two weaker wing echoes near the body echo. +- Make wing echoes vary over time with a cheap phase oscillator. +- Keep obstacle echoes unchanged. + +This should add a moving-prey signature without turning the environment into an expensive acoustic simulator. + +## Useful terminology + +- `Doppler shift`: frequency shift caused by relative motion between bat and target. +- `Micro-Doppler`: additional Doppler components from moving parts of a target, such as flapping wings, legs, rotors, or vibrating surfaces. +- `Flutter detection`: detecting oscillating target movements, especially insect wing motion, in echoes. +- `Amplitude modulation`: echo strength fluctuates as wing orientation and scattering cross-section change. +- `Spectral glints`: brief bright echo components from reflective target parts at favorable orientations. +- `Sidebands`: frequency components above and below a carrier/body frequency caused by modulation. + +## What the literature says + +### CF/CF-FM bats can use Doppler and flutter cues + +The classic result is Schnitzler and Flieger on greater horseshoe bats detecting oscillating target movement. Crossref metadata confirms the paper: + +- Schnitzler, H.-U.; Flieger, E. `Detection of oscillating target movements by echolocation in the Greater Horseshoe bat`. Journal of Comparative Physiology 153, 385-391, 1983. DOI: https://doi.org/10.1007/BF00612592 + +Secondary summaries and reviews describe the key idea: CF bats are especially suited to detecting target velocity and wing flutter as Doppler-shifted frequencies. Oscillating wings also create amplitude shifts that help distinguish flying prey from stationary targets. + +Implementation relevance: + +- Bat env currently uses chirps/echoes as compact observations. It does not need raw CF sonar. +- A cheap wing signature is still justified because it gives the policy a prey-specific temporal/frequency cue. +- Apply it only to `BAT_ECHO_BUG`, not walls/obstacles. + +### Echolocation range and wingbeat timing are behaviorally linked + +Holderied and von Helversen studied aerial-hawking bats and found a relationship between echolocation range and wingbeat period. + +Source: + +- Holderied, M. W.; von Helversen, O. `Echolocation range and wingbeat period match in aerial-hawking bats`. Proceedings of the Royal Society B 270, 2293-2299, 2003. DOI: https://doi.org/10.1098/rspb.2003.2487 +- Royal Society page: https://royalsocietypublishing.org/doi/10.1098/rspb.2003.2487 + +Implementation relevance: + +- Wingbeat dynamics are not just visual animation; they are related to sensing and prey pursuit timing. +- If Bat already has a tick-based model, wing phase can update once per tick using a fixed increment. +- No per-chirp expensive computation is needed. + +### Bats can classify prey shape/material from echo structure + +Geipel, Jung, and Kalko showed that `Micronycteris microtis` can detect, classify, and localize silent, motionless prey in clutter using echolocation alone. Their abstract says bats used short, multi-harmonic broadband calls and appeared to perceive a detailed acoustic image based on shape, surface structure, and material. + +Source: + +- Geipel, I.; Jung, K.; Kalko, E. K. V. `Perception of silent and motionless prey on vegetation by echolocation in the gleaning bat Micronycteris microtis`. Proceedings of the Royal Society B 280:20122830, 2013. DOI: https://doi.org/10.1098/rspb.2012.2830 +- Royal Society page: https://royalsocietypublishing.org/doi/10.1098/rspb.2012.2830 + +Implementation relevance: + +- Even without active wing motion, bugs are not acoustically equivalent to points. +- If adding wing sidebands, keep them as prey-specific echo complexity, not as general noise. +- This supports making bug echoes richer than obstacle echoes. + +### Micro-spectral ripple research supports compact target-specific echo features + +Shriram and Simmons studied bats perceiving natural-size targets as a unitary class using micro-spectral ripples in echoes. + +Source: + +- Shriram, U.; Simmons, J. A. `Echolocating bats perceive natural-size targets as a unitary class using micro-spectral ripples in echoes`. Behavioral Neuroscience 133(3), 297-304, 2019. DOI: https://doi.org/10.1037/bne0000315 +- APA page: https://doi.apa.org/doi/10.1037/bne0000315 + +Implementation relevance: + +- Richer echo spectra can matter, but Bat should not model detailed spectra first. +- A few deterministic sidebands are a cheap stand-in for target-specific microstructure. +- This is closer to a useful observation feature than raw acoustic realism. + +## Recommended cheap model + +### Core idea + +When scheduling a bug echo, add: + +- `body echo`: existing echo path, unchanged except for any directivity/range logic already present. +- `wing upper echo`: smaller energy, slightly higher normalized frequency. +- `wing lower echo`: smaller energy, slightly lower normalized frequency. + +The upper/lower echoes represent wing motion toward/away from the bat and modulation around the body return. + +Sketch: + +```c +float wing_phase = env->bug_wing_phase; +float wing = 0.5f + 0.5f * sinf(wing_phase); +float wing_offset = BAT_BUG_WING_FREQ_OFFSET * (0.5f + 0.5f * wing); +float wing_energy = body_energy * BAT_BUG_WING_ECHO_GAIN; + +bat_add_echo_event(env, echo_time, body_freq, body_energy, left_gain, right_gain, BAT_ECHO_BUG); +bat_add_echo_event(env, echo_time, body_freq + wing_offset, wing_energy, left_gain, right_gain, BAT_ECHO_BUG); +bat_add_echo_event(env, echo_time, body_freq - wing_offset, wing_energy, left_gain, right_gain, BAT_ECHO_BUG); +``` + +If avoiding `sinf`, use a triangle oscillator: + +```c +float phase = env->bug_wing_phase; +float tri = phase < 0.5f ? phase * 2.0f : (1.0f - phase) * 2.0f; +float wing_offset = BAT_BUG_WING_FREQ_OFFSET * tri; +``` + +Then update phase once per env step: + +```c +env->bug_wing_phase += BAT_BUG_WING_PHASE_STEP; +if (env->bug_wing_phase >= 1.0f) env->bug_wing_phase -= 1.0f; +``` + +Use a constant phase step instead of division per tick. If it needs to depend on tick rate, define the reciprocal as a constant. + +### Initial constants + +The actual values should be tuned by sweep, but a reasonable first pass: + +```c +#define BAT_BUG_WING_ECHO_GAIN 0.20f +#define BAT_BUG_WING_FREQ_OFFSET 0.06f +#define BAT_BUG_WING_PHASE_STEP 0.11f +``` + +Interpretation: + +- `BAT_BUG_WING_ECHO_GAIN`: each sideband gets 20% of body energy. +- `BAT_BUG_WING_FREQ_OFFSET`: normalized frequency offset, not real kHz. +- `BAT_BUG_WING_PHASE_STEP`: wing animation/sensing phase increment per env tick. + +If the two sidebands make total bug energy too high, compensate: + +```c +float body_energy = base_energy * 0.75f; +float wing_energy = base_energy * 0.125f; +``` + +This preserves total energy while adding structure. If the goal is to make bugs easier to identify, do not preserve total energy exactly; but then treat it as a real behavior change. + +## Cheaper one-sideband variant + +If three echo events per bug chirp is too much, use one extra echo whose sign flips with wing phase: + +```c +float tri = env->bug_wing_phase < 0.5f ? env->bug_wing_phase * 2.0f : (1.0f - env->bug_wing_phase) * 2.0f; +float sign = env->bug_wing_phase < 0.5f ? 1.0f : -1.0f; +float wing_freq = body_freq + sign * BAT_BUG_WING_FREQ_OFFSET * tri; +float wing_energy = body_energy * BAT_BUG_WING_ECHO_GAIN; + +bat_add_echo_event(env, echo_time, body_freq, body_energy, left_gain, right_gain, BAT_ECHO_BUG); +bat_add_echo_event(env, echo_time, wing_freq, wing_energy, left_gain, right_gain, BAT_ECHO_BUG); +``` + +This is half the extra event count. It gives time-varying high/low pings, but not simultaneous symmetric sidebands. + +## Even cheaper amplitude-only variant + +If we want no extra echo events, modulate bug echo energy: + +```c +float tri = env->bug_wing_phase < 0.5f ? env->bug_wing_phase * 2.0f : (1.0f - env->bug_wing_phase) * 2.0f; +float flutter_gain = 1.0f + BAT_BUG_WING_AMP_MOD * (tri - 0.5f); +body_energy *= flutter_gain; +``` + +Potential constant: + +```c +#define BAT_BUG_WING_AMP_MOD 0.30f +``` + +This is cheapest but probably less useful because the policy may see it as noise unless it can integrate over time. + +## Recommended first implementation choice + +Use the two-sideband model only if echo event capacity is safely high and current observations can represent multiple arrivals without saturation. + +Use the one-sideband model if event pressure is a concern. + +Use amplitude-only only as a fallback. + +My recommendation for first sweep: + +- One extra wing echo per bug echo. +- Triangle oscillator, no `sinf`. +- `BAT_BUG_WING_ECHO_GAIN = 0.20f`. +- `BAT_BUG_WING_FREQ_OFFSET = 0.06f`. +- Preserve body echo unchanged for the first test so the new signal is additive and easy to ablate. + +## Where it should live in Bat + +Likely location: + +- Bug echo scheduling path, near `bat_schedule_echo` or wherever `BAT_ECHO_BUG` events are created. + +Rules: + +- Do not add this to obstacle echoes. +- Do not add this to render-only or audio-only code. +- Add/advance wing phase in the core env tick/reset state if deterministic observations depend on it. +- If state serialization exists or is added later, include wing phase. +- If randomizing initial wing phase, seed it deterministically with env RNG. + +## Performance considerations + +Good: + +- Phase update once per step. +- Triangle wave instead of `sinf`. +- Constants as `#define`. +- Add at most one extra event first. +- Clamp normalized frequency with existing clamp logic. + +Avoid: + +- Per-echo trigonometry if not needed. +- FFT or convolution. +- Large target meshes. +- Per-wing geometry. +- More than one or two additional events without checking event capacity and observation saturation. + +## Expected behavior change + +Likely effects: + +- Bug echoes become more identifiable than obstacle echoes. +- The policy may learn that moving/oscillating echo structure indicates prey. +- Depending on reward and observation clipping, it may improve pursuit or just add noise. +- If event buffers saturate, it can silently hurt by dropping echoes. + +Important risk: + +- PufferLib reward clipping already caused signal issues earlier. Echo observation scaling can have a similar failure mode if new wing echoes saturate observation bins. Keep energy modest and inspect normalization before committing to a sweep. + +## Interaction with ear directivity + +Ear directivity and wing sidebands should be tested separately first. + +Reason: + +- Ear directivity changes spatial gain. +- Wing sidebands change prey identity/frequency/time structure. +- Combining them at once makes it hard to know what helped or broke. + +Order recommendation: + +1. Sweep current baseline after timer/log/audio cleanup. +2. Add static ear directivity only. +3. Add bug wing sideband only. +4. Combine only if both individual variants look viable. + +## Implementation checklist + +- Add `bug_wing_phase` to env state only if needed by deterministic core observations. +- Reset/init `bug_wing_phase` deterministically. +- Advance phase with multiplication/addition, not division. +- Add wing echo only for `BAT_ECHO_BUG`. +- Clamp wing frequency after offset. +- Ensure event capacity cannot drop important echoes. +- Keep observation normalization stable. +- Build/test/train/eval before comparing to baseline. + +## Source list + +- https://doi.org/10.1007/BF00612592 +- https://doi.org/10.1098/rspb.2003.2487 +- https://royalsocietypublishing.org/doi/10.1098/rspb.2003.2487 +- https://doi.org/10.1098/rspb.2012.2830 +- https://royalsocietypublishing.org/doi/10.1098/rspb.2012.2830 +- https://doi.org/10.1037/bne0000315 +- https://doi.apa.org/doi/10.1037/bne0000315 +- https://en.wikipedia.org/wiki/Animal_echolocation +- https://en.wikipedia.org/wiki/Doppler_shift_compensation From 3544257b200a63b8f66b4df044c3fdbe7c9390ec Mon Sep 17 00:00:00 2001 From: Kinvert Date: Wed, 10 Jun 2026 20:35:51 -0700 Subject: [PATCH 33/51] Add sweepable Bat ear directivity --- config/bat.ini | 21 ++++ ocean/bat/bat.h | 15 ++- ocean/bat/binding.c | 3 + ocean/bat/tests/test_bat_core.c | 166 ++++++++++++++++++++++++++++++++ 4 files changed, 203 insertions(+), 2 deletions(-) diff --git a/config/bat.ini b/config/bat.ini index a51cb1d6ee..f6358215f5 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -53,6 +53,9 @@ curriculum_start_bug_distance = 8 curriculum_start_obstacles = 0 curriculum_successes_per_level = 4 ear_separation_scale = 1.6650928741686002 +ear_rear_gain = 0.20 +ear_front_gain = 0.55 +ear_side_gain = 0.35 early_chirp_penalty = 0.004893262939164256 freq_bins_per_ear = 16 inbound_bug_speed_multiplier = 1.75 @@ -211,6 +214,24 @@ min = 1.0 max = 2.0 scale = auto +[sweep.env.ear_rear_gain] +distribution = uniform +min = 0.10 +max = 0.30 +scale = auto + +[sweep.env.ear_front_gain] +distribution = uniform +min = 0.40 +max = 0.75 +scale = auto + +[sweep.env.ear_side_gain] +distribution = uniform +min = 0.20 +max = 0.55 +scale = auto + [sweep.env.corner_reflectors] distribution = int_uniform min = 0 diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 7e22160c14..e9ecc29df3 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -199,6 +199,9 @@ typedef struct Bat { float bat_turn_velocity; float bat_radius; float ear_separation_scale; + float ear_rear_gain; + float ear_front_gain; + float ear_side_gain; float bat_max_speed; float bat_min_speed; float bat_accel; @@ -878,8 +881,16 @@ static inline void bat_schedule_echo(Bat* env, ChirpEvent* chirp, float left_dir_y = -ly; float right_dir_x = lx; float right_dir_y = ly; - float left_gain = bat_clampf(0.75f + 0.25f * (ux * left_dir_x + uy * left_dir_y), 0.1f, 1.0f); - float right_gain = bat_clampf(0.75f + 0.25f * (ux * right_dir_x + uy * right_dir_y), 0.1f, 1.0f); + float front_gain = bat_clampf(forward, 0.0f, 1.0f); + float left_side_gain = bat_clampf(ux * left_dir_x + uy * left_dir_y, 0.0f, 1.0f); + float right_side_gain = bat_clampf(ux * right_dir_x + uy * right_dir_y, 0.0f, 1.0f); + front_gain *= front_gain; + left_side_gain *= left_side_gain; + right_side_gain *= right_side_gain; + float left_gain = env->ear_rear_gain + env->ear_front_gain * front_gain + + env->ear_side_gain * left_side_gain; + float right_gain = env->ear_rear_gain + env->ear_front_gain * front_gain + + env->ear_side_gain * right_side_gain; float source_path = bat_dist(chirp->x, chirp->y, rx, ry); float left_path = source_path + bat_dist(rx, ry, left_ear_x, left_ear_y); diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index ebe8ffa599..709464fdc6 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -15,6 +15,9 @@ void my_init(Env* env, Dict* kwargs) { env->num_obstacles = dict_get(kwargs, "num_obstacles")->value; env->bat_radius = dict_get(kwargs, "bat_radius")->value; env->ear_separation_scale = dict_get(kwargs, "ear_separation_scale")->value; + env->ear_rear_gain = dict_get(kwargs, "ear_rear_gain")->value; + env->ear_front_gain = dict_get(kwargs, "ear_front_gain")->value; + env->ear_side_gain = dict_get(kwargs, "ear_side_gain")->value; env->bug_radius = dict_get(kwargs, "bug_radius")->value; env->bat_max_speed = dict_get(kwargs, "bat_max_speed")->value; env->bat_min_speed = dict_get(kwargs, "bat_min_speed")->value; diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index af7b64dc67..55c0389c6a 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -30,6 +30,9 @@ static Bat make_test_env(void) { .num_obstacles = 1, .bat_radius = 2.0f, .ear_separation_scale = 0.75f, + .ear_rear_gain = 0.20f, + .ear_front_gain = 0.55f, + .ear_side_gain = 0.35f, .bug_radius = 1.5f, .bat_max_speed = 12.0f, .bat_min_speed = 2.4f, @@ -427,6 +430,161 @@ static int test_left_right_echo_asymmetry(void) { return 0; } +typedef struct BatEchoProbe { + float left_energy; + float right_energy; + float left_tick; + float right_tick; +} BatEchoProbe; + +static BatEchoProbe test_probe_echo_from_relative_source(float dx, float dy) { + Bat env = make_test_env(); + c_reset(&env); + + env.bat_x = 24.0f; + env.bat_y = 24.0f; + env.bat_vx = 0.0f; + env.bat_vy = 0.0f; + env.bat_heading = 0.0f; + env.sound_speed = 40.0f; + env.ear_separation_scale = 2.0f; + env.max_echo_range = 128.0f; + env.ear_rear_gain = 0.20f; + env.ear_front_gain = 0.55f; + env.ear_side_gain = 0.35f; + env.tick = 0; + bat_clear_echo_queue(&env); + + ChirpEvent chirp = { + .x = env.bat_x, + .y = env.bat_y, + .start_freq = 0.5f, + .end_freq = 0.5f, + .duration = bat_chirp_duration_seconds(0.0f), + .birth_tick = 0, + .active = 1, + }; + bat_schedule_echo(&env, &chirp, 0.0f, 0.5f, + env.bat_x + dx, env.bat_y + dy, 0.0f, 0.0f, 8.0f, BAT_ECHO_BUG); + + BatEchoProbe probe = { + .left_tick = -1.0f, + .right_tick = -1.0f, + }; + for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { + if (env.echo_queue[i].tick < 0) continue; + float left_energy = 0.0f; + float right_energy = 0.0f; + for (int bin = 0; bin < BAT_FREQ_BINS; bin++) { + left_energy += env.echo_queue[i].energy[0][bin]; + right_energy += env.echo_queue[i].energy[1][bin]; + } + if (left_energy > 0.0f) { + probe.left_energy += left_energy; + probe.left_tick = env.echo_queue[i].tick; + } + if (right_energy > 0.0f) { + probe.right_energy += right_energy; + probe.right_tick = env.echo_queue[i].tick; + } + } + + free_allocated(&env); + return probe; +} + +static int test_directional_echo_arrival_and_gain_by_side(void) { + const float left_sources[3][2] = { + {0.0f, -18.0f}, + {18.0f, -18.0f}, + {24.0f, -8.0f}, + }; + const float right_sources[3][2] = { + {0.0f, 18.0f}, + {18.0f, 18.0f}, + {24.0f, 8.0f}, + }; + + for (int i = 0; i < 3; i++) { + BatEchoProbe left = test_probe_echo_from_relative_source( + left_sources[i][0], left_sources[i][1]); + ASSERT_TRUE(left.left_tick > 0.0f); + ASSERT_TRUE(left.right_tick > 0.0f); + ASSERT_TRUE(left.left_tick < left.right_tick); + ASSERT_TRUE(left.left_energy > left.right_energy); + + BatEchoProbe right = test_probe_echo_from_relative_source( + right_sources[i][0], right_sources[i][1]); + ASSERT_TRUE(right.left_tick > 0.0f); + ASSERT_TRUE(right.right_tick > 0.0f); + ASSERT_TRUE(right.right_tick < right.left_tick); + ASSERT_TRUE(right.right_energy > right.left_energy); + } + + BatEchoProbe front = test_probe_echo_from_relative_source(18.0f, 0.0f); + ASSERT_TRUE(front.left_tick > 0.0f); + ASSERT_TRUE(front.right_tick > 0.0f); + ASSERT_FLOAT_NEAR(front.left_tick, front.right_tick, 0.0001f); + ASSERT_FLOAT_NEAR(front.left_energy, front.right_energy, 0.0001f); + + return 0; +} + +static int test_ear_directivity_gains_control_echo_energy(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.bat_x = 20.0f; + env.bat_y = 20.0f; + env.bat_heading = 0.0f; + env.bug_vx = 0.0f; + env.bug_vy = 0.0f; + env.ear_rear_gain = 0.0f; + env.ear_front_gain = 1.0f; + env.ear_side_gain = 0.0f; + env.tick = 0; + + ChirpEvent chirp = { + .x = env.bat_x, + .y = env.bat_y, + .start_freq = 1.0f, + .end_freq = 1.0f, + .duration = bat_chirp_duration_seconds(0.0f), + .birth_tick = 0, + .active = 1, + }; + + bat_clear_echo_queue(&env); + bat_schedule_echo(&env, &chirp, 0.0f, 1.0f, + env.bat_x + 16.0f, env.bat_y, 0.0f, 0.0f, 8.0f, BAT_ECHO_BUG); + float front_energy = 0.0f; + for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { + for (int ear = 0; ear < 2; ear++) { + for (int bin = 0; bin < BAT_FREQ_BINS; bin++) { + front_energy += env.echo_queue[i].energy[ear][bin]; + } + } + } + + bat_clear_echo_queue(&env); + bat_schedule_echo(&env, &chirp, 0.0f, 1.0f, + env.bat_x, env.bat_y - 16.0f, 0.0f, 0.0f, 8.0f, BAT_ECHO_BUG); + float side_energy = 0.0f; + for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { + for (int ear = 0; ear < 2; ear++) { + for (int bin = 0; bin < BAT_FREQ_BINS; bin++) { + side_energy += env.echo_queue[i].energy[ear][bin]; + } + } + } + + ASSERT_TRUE(front_energy > 0.0f); + ASSERT_FLOAT_NEAR(side_energy, 0.0f, 0.0001f); + + free_allocated(&env); + return 0; +} + static int test_default_sound_speed_allows_one_tick_interaural_delay(void) { Bat env = { .num_agents = 1, @@ -436,6 +594,9 @@ static int test_default_sound_speed_allows_one_tick_interaural_delay(void) { .num_obstacles = 0, .bat_radius = 2.0f, .ear_separation_scale = 0.75f, + .ear_rear_gain = 0.20f, + .ear_front_gain = 0.55f, + .ear_side_gain = 0.35f, .bug_radius = 1.5f, .bat_max_speed = 12.0f, .bat_accel = 30.0f, @@ -1384,6 +1545,9 @@ static int test_default_echo_range_reaches_curriculum_max_bug_distance(void) { .bat_turn_rate = 9.424778f, .bug_speed = 4.0f, .max_echo_range = 128.0f, + .ear_rear_gain = 0.20f, + .ear_front_gain = 0.55f, + .ear_side_gain = 0.35f, .sound_speed = 180.0f, .curriculum_max_bug_distance = 56.0f, .rng = 1, @@ -1799,6 +1963,8 @@ int main(void) { if (test_perf_composes_base_perf_curriculum_difficulty_and_chirp_perf()) return 1; if (test_chirp_tempo_logs_far_and_near_rates()) return 1; if (test_left_right_echo_asymmetry()) return 1; + if (test_directional_echo_arrival_and_gain_by_side()) return 1; + if (test_ear_directivity_gains_control_echo_energy()) return 1; if (test_default_sound_speed_allows_one_tick_interaural_delay()) return 1; if (test_echo_scheduling_uses_tick_bucket_accumulator()) return 1; if (test_ear_separation_scale_controls_arrival_gap()) return 1; From ac61d3bfebb5c24c6a0703c3998940904df0a140 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Wed, 10 Jun 2026 20:49:46 -0700 Subject: [PATCH 34/51] Add Bat bug wing sidebands --- config/bat.ini | 7 +++++++ ocean/bat/bat.h | 7 +++++++ ocean/bat/binding.c | 1 + ocean/bat/tests/test_bat_core.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+) diff --git a/config/bat.ini b/config/bat.ini index f6358215f5..8d4c02c553 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -37,6 +37,7 @@ record_video_audio = 1 bug_echo_farther_penalty_scale = 0.05 bug_echo_reward_scale = 0.2789761793107612 bug_echo_min_displacement = 1 +bug_wing_sideband_gain = 0.10 bug_maneuver_frequency = 0.4 bug_maneuver_start_level = 7 bug_maneuver_strength = 0.4 @@ -292,6 +293,12 @@ min = 0.05 max = 0.20 scale = auto +[sweep.env.bug_wing_sideband_gain] +distribution = uniform +min = 0.05 +max = 0.25 +scale = auto + [sweep.env.collision_penalty] distribution = uniform min = 0.5 diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index e9ecc29df3..a2c28fc9af 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -269,6 +269,7 @@ typedef struct Bat { float bug_echo_reward_scale; float bug_echo_farther_penalty_scale; float bug_echo_min_displacement; + float bug_wing_sideband_gain; float tick_bug_echo_energy; float tick_bug_echo_path; float last_bug_echo_path; @@ -818,6 +819,12 @@ static inline void bat_add_echo_event(Bat* env, int ear, float receive_tick, int bin = bat_freq_bin_index(env, freq); bucket->energy[ear_idx][bin] += intensity; if (source == BAT_ECHO_BUG) { + float sideband = intensity * env->bug_wing_sideband_gain; + int bins = env->freq_bins_per_ear; + if (sideband > 0.000001f) { + if (bin > 0) bucket->energy[ear_idx][bin - 1] += sideband; + if (bin + 1 < bins) bucket->energy[ear_idx][bin + 1] += sideband; + } bucket->bug_energy += intensity; if (bucket->bug_path < 0.0f || path < bucket->bug_path) { bucket->bug_path = path; diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index 709464fdc6..2948d99988 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -64,6 +64,7 @@ void my_init(Env* env, Dict* kwargs) { env->bug_echo_reward_scale = dict_get(kwargs, "bug_echo_reward_scale")->value; env->bug_echo_farther_penalty_scale = dict_get(kwargs, "bug_echo_farther_penalty_scale")->value; env->bug_echo_min_displacement = dict_get(kwargs, "bug_echo_min_displacement")->value; + env->bug_wing_sideband_gain = dict_get(kwargs, "bug_wing_sideband_gain")->value; env->step_cost = dict_get(kwargs, "step_cost")->value; env->progress_reward_scale = dict_get(kwargs, "progress_reward_scale")->value; env->collision_penalty = dict_get(kwargs, "collision_penalty")->value; diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index 55c0389c6a..c7132e1c58 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -59,6 +59,7 @@ static Bat make_test_env(void) { .early_chirp_penalty = 0.001f, .bug_echo_farther_penalty_scale = 0.10f, .bug_echo_min_displacement = 1.0f, + .bug_wing_sideband_gain = 0.10f, .curriculum_max_obstacles = 1, .curriculum_obstacle_step = 8, .curriculum_successes_per_level = 1, @@ -672,6 +673,34 @@ static int test_echo_scheduling_uses_tick_bucket_accumulator(void) { return 0; } +static int test_bug_wing_sidebands_spill_adjacent_bins_without_reward_inflation(void) { + Bat env = make_test_env(); + c_reset(&env); + + env.tick = 0; + env.bug_wing_sideband_gain = 0.25f; + bat_clear_echo_queue(&env); + + int bin = bat_freq_bin_index(&env, 0.5f); + bat_add_echo_event(&env, 0, 1.0f, 0.5f, 0.4f, 12.0f, BAT_ECHO_BUG); + EchoBucket* bug_bucket = &env.echo_queue[1 % BAT_ECHO_QUEUE_TICKS]; + ASSERT_FLOAT_NEAR(bug_bucket->energy[0][bin], 0.4f, 0.0001f); + ASSERT_FLOAT_NEAR(bug_bucket->energy[0][bin - 1], 0.1f, 0.0001f); + ASSERT_FLOAT_NEAR(bug_bucket->energy[0][bin + 1], 0.1f, 0.0001f); + ASSERT_FLOAT_NEAR(bug_bucket->bug_energy, 0.4f, 0.0001f); + + bat_clear_echo_queue(&env); + bat_add_echo_event(&env, 0, 1.0f, 0.5f, 0.4f, 12.0f, BAT_ECHO_STATIC); + EchoBucket* static_bucket = &env.echo_queue[1 % BAT_ECHO_QUEUE_TICKS]; + ASSERT_FLOAT_NEAR(static_bucket->energy[0][bin], 0.4f, 0.0001f); + ASSERT_FLOAT_NEAR(static_bucket->energy[0][bin - 1], 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(static_bucket->energy[0][bin + 1], 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(static_bucket->bug_energy, 0.0f, 0.0001f); + + free_allocated(&env); + return 0; +} + static float test_side_echo_receive_tick_gap(float ear_separation_scale) { Bat env = make_test_env(); c_reset(&env); @@ -1967,6 +1996,7 @@ int main(void) { if (test_ear_directivity_gains_control_echo_energy()) return 1; if (test_default_sound_speed_allows_one_tick_interaural_delay()) return 1; if (test_echo_scheduling_uses_tick_bucket_accumulator()) return 1; + if (test_bug_wing_sidebands_spill_adjacent_bins_without_reward_inflation()) return 1; if (test_ear_separation_scale_controls_arrival_gap()) return 1; if (test_doppler_sign_for_approaching_bug()) return 1; if (test_wall_collision_is_terminal_minus_one()) return 1; From ff094bd3ea40a83dde08eeac585fe67c594c9008 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Thu, 11 Jun 2026 13:22:03 -0700 Subject: [PATCH 35/51] Set Bat defaults to ewgh6l5l --- BAT9_SWEEP_ANALYSIS.md | 450 +++++++++++++++++++++++++++++++++++++++++ config/bat.ini | 85 ++++---- 2 files changed, 493 insertions(+), 42 deletions(-) create mode 100644 BAT9_SWEEP_ANALYSIS.md diff --git a/BAT9_SWEEP_ANALYSIS.md b/BAT9_SWEEP_ANALYSIS.md new file mode 100644 index 0000000000..54d848c97d --- /dev/null +++ b/BAT9_SWEEP_ANALYSIS.md @@ -0,0 +1,450 @@ +# Bat9 Sweep Analysis + +Date: 2026-06-11 + +This note summarizes the local W&B `bat9` sweep after the timer observation, +sweepable ear directivity, and bug wing sideband changes. It is modeled after +`BAT8_SWEEP_ANALYSIS.md` and should be used before copying a Bat9 candidate into +`config/bat.ini`. + +## Future Agent Workflow + +Use this workflow when trying a Bat9 candidate. + +1. Pick by behavior objective, not only by `env/perf`. + - Start with `ifbn2epd` for the first balanced retrain/video candidate. + - Keep `ewgh6l5l` as the best composite scalar anchor. + - Use `qkwrqhzy` when SPS matters, `sfmk59n1` or `w938us46` for high + curriculum checks, and `cpx4gj2f` for a 128x5 balanced comparison. + - Treat `1a2s8uvf` as a low-chirp experiment only; its timeout rate is high. + +2. Pull exact hyperparameters from the local W&B config. + - Use `wandb/run-*-/files/config.yaml` or `logs/bat/.json`. + - Copy only concrete values from `vec`, `policy`, `env`, and `train`. + - Do not copy sweep search-space sections. + - Keep the run's configured `train.total_timesteps`. + - Local JSON/YAML files do not always store W&B display names. When the name + matters, query W&B with the run hash, for example: + `wandb.Api().run("kinvert-k/bat9/").name`. + +3. Before each candidate train/eval cycle, run: + + ```bash + source .venv/bin/activate && ./build.sh bat && bash ocean/bat/tests/run_all.sh + ``` + +4. Train with the selected config and no timestep override: + + ```bash + source .venv/bin/activate && python -m pufferlib.pufferl train bat --train.gpus 1 + ``` + + If CUDA is hidden inside Codex, rerun the same command outside the sandbox or + escalated. Do not switch Bat to CPU. + +5. Run fixed-level visual evals before adopting defaults: + + ```bash + timeout 45s bash -lc 'source .venv/bin/activate && DISPLAY=:0 python -m pufferlib.pufferl eval bat --load-model-path latest --env.curriculum-initial-level 5 --env.curriculum-successes-per-level 1000000' + timeout 45s bash -lc 'source .venv/bin/activate && DISPLAY=:0 python -m pufferlib.pufferl eval bat --load-model-path latest --env.curriculum-initial-level 10 --env.curriculum-successes-per-level 1000000' + ``` + +6. Record the first postable MP4 only after a retrained checkpoint looks clean: + + ```bash + timeout 45s bash -lc 'source .venv/bin/activate && DISPLAY=:0 python -m pufferlib.pufferl eval bat --load-model-path latest --env.curriculum-initial-level 5 --env.curriculum-successes-per-level 1000000 --env.record-video 1 --env.record-video-fps 30 --env.record-video-seconds 30 --env.record-video-audio 1' + ``` + + Expected output is `recordings/bat_recording.mp4`. Do not commit recordings, + gifs, local W&B folders, logs, or checkpoint artifacts unless asked. + +## Source And Filter + +Source data is the local `wandb/` tree in `/home/claude/pathfinder`, filtered to +runs where the W&B metadata/config has `--wandb-project bat9` or +`wandb_project = bat9`. + +- Sweep invocation in run metadata: + `python -m pufferlib.pufferl ... --sweep.gpus 1 --train.gpus 1 --sweep.use-gpu "" --sweep.max-runs 1000 --wandb --wandb-project bat9` +- Git commit in run metadata: `ac61d3bfebb5c24c6a0703c3998940904df0a140` +- Hardware in run metadata: `G240`, `NVIDIA GeForce RTX 5060` +- Bat9 rows with `env/perf` and usable W&B config: `789` +- Rows with `env/perf >= 0.25`: `507` +- Pareto front rows over the selected objectives: `123` + +The previous handoff mentioned `773` complete runs. The local tree had grown by +the time this snapshot was frozen; the top-six ordering remained unchanged. + +`env/perf` is the composite sweep objective: + +```text +perf = base_perf * curriculum_difficulty * chirp_perf +``` + +This is still not the same as "best visible behavior." High scalar scores can +come from low chirp counts, curriculum progress, or catch rate in different +proportions. + +## Bat9 Code Changes + +Bat9 differs from Bat8 in three behavior-relevant ways: + +- Timer observation: `BAT_OBS_SIZE` is now `41`, and observation slot `40` + receives normalized elapsed episode time. This should give the policy urgency + information that Bat8 lacked. +- Ear directivity: `ear_rear_gain`, `ear_front_gain`, and `ear_side_gain` are + sweepable, and the echo scheduler mixes rear baseline, forward response, and + left/right side response into per-ear intensity. +- Bug wing sidebands: bug echoes add adjacent frequency-bin sideband energy + scaled by `bug_wing_sideband_gain`. + +Bat9 also logs `env/mean_chirp_bandwidth`, which helps distinguish detection +benefits from policies that merely chirp broadly or noisily. + +## Overall Distribution + +Across all 789 complete Bat9 rows: + +| `env/perf` quantile | value | +| ---: | ---: | +| min | `0.0000` | +| 25% | `0.2023` | +| 50% | `0.2996` | +| 75% | `0.3628` | +| 90% | `0.4014` | +| 95% | `0.4274` | +| 99% | `0.4861` | +| max | `0.5565` | + +Filtered high-perf rows (`env/perf >= 0.25`) look like this: + +| metric | mean | median | q25 | q75 | min | max | +| --- | ---: | ---: | ---: | ---: | ---: | ---: | +| `env/perf` | `0.3477` | `0.3459` | `0.3058` | `0.3843` | `0.2505` | `0.5565` | +| `env/base_perf` | `0.9173` | `0.9237` | `0.9028` | `0.9427` | `0.5768` | `0.9741` | +| `env/curriculum_perf` | `0.8004` | `0.8161` | `0.7841` | `0.8350` | `0.4398` | `0.9327` | +| `env/chirp_perf` | `0.4452` | `0.4417` | `0.3936` | `0.4877` | `0.3011` | `0.6888` | +| `env/curriculum_level` | `11.94` | `11.99` | `11.12` | `12.83` | `8.05` | `15.62` | +| `env/chirps_emitted` | `8.33` | `8.38` | `7.69` | `9.11` | `4.67` | `10.50` | +| `env/chirp_overlap_fraction` | `0.1249` | `0.1069` | `0.0510` | `0.1742` | `0.0001` | `0.5243` | +| `env/mean_chirp_bandwidth` | `0.4071` | `0.3750` | `0.3356` | `0.4556` | `0.0010` | `0.9753` | +| `env/timeout` | `0.0023` | `0.0011` | `0.0004` | `0.0025` | `0.0000` | `0.0521` | +| `env/collision` | `0.0804` | `0.0737` | `0.0549` | `0.0962` | `0.0182` | `0.4232` | +| `bad_terminal` | `0.0827` | `0.0763` | `0.0573` | `0.0972` | `0.0259` | `0.4232` | +| `env/episode_length` | `251.18` | `249.34` | `224.93` | `272.49` | `153.72` | `420.04` | +| `SPS` | `1.09M` | `0.99M` | `0.44M` | `1.63M` | `0.40M` | `2.45M` | + +## Bat8 To Bat9 Read + +This is a qualitative before/after, not a matched statistical test. The Bat8 +numbers are from `BAT8_SWEEP_ANALYSIS.md` plus the same local W&B scan for +episode length. + +| high-perf metric | Bat8 mean | Bat9 mean | read | +| --- | ---: | ---: | --- | +| `env/perf` | `0.3250` | `0.3477` | Bat9 shifted the upper half upward, though Bat8's single best scalar was slightly higher (`0.5695` vs `0.5565`). | +| `env/base_perf` | `0.8592` | `0.9173` | Clear catch-rate improvement. | +| `env/curriculum_perf` | `0.7583` | `0.8004` | Bat9 reaches harder behavior more consistently. | +| `env/chirp_perf` | `0.4591` | `0.4452` | Slightly worse chirp efficiency; Bat9 spends a bit more chirp budget. | +| `env/curriculum_level` | `11.10` | `11.94` | Curriculum level improved by about `0.85`. | +| `env/chirps_emitted` | `8.12` | `8.33` | Small increase in chirp usage. | +| `env/chirp_overlap_fraction` | `0.1580` | `0.1249` | Overlap improved. | +| `env/timeout` | `0.0020` | `0.0023` | No aggregate timeout win from the timer observation. | +| `env/collision` | `0.1388` | `0.0804` | Large collision reduction. | +| `env/episode_length` | `214.93` | `251.18` | Episodes got longer, so the timer did not simply make policies rush. | +| `SPS` | `1.63M` | `1.09M` | Bat9 is slower, mostly because many good runs are wider models. | + +Timer read: the timer observation did not remove timeout/circling risk by +itself. In the top six, `1a2s8uvf` still times out at `0.0512`, `qkwrqhzy` at +`0.0216`, and `ewgh6l5l`/`gli5dke9` have long episodes. Fixed-level visual eval +is still required before declaring a default. + +## Top Composite Runs + +| run id | role | perf | base | curriculum | chirp perf | level | chirps | timeout | collision | episode len | SPS | model | +| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | --- | +| `rdjj5r21` | silent inbound exploit | `0.8438` | `0.9638` | `0.8450` | `0.9985` | `10.97` | `0.02` | `0.0282` | `0.0079` | `468.48` | `2.62M` | 64x5 | +| `ewgh6l5l` | best composite | `0.5565` | `0.9497` | `0.8392` | `0.6544` | `11.30` | `5.19` | `0.0088` | `0.0414` | `378.95` | `1.13M` | 256x5 | +| `ifbn2epd` | first retrain/video pick | `0.5398` | `0.9639` | `0.8524` | `0.6372` | `11.11` | `5.44` | `0.0051` | `0.0310` | `303.99` | `2.15M` | 64x5 | +| `gli5dke9` | high scalar, slow wide model | `0.5371` | `0.9459` | `0.8363` | `0.6448` | `11.54` | `5.33` | `0.0139` | `0.0402` | `420.04` | `0.46M` | 512x5 | +| `sfmk59n1` | high curriculum top-six | `0.5314` | `0.9285` | `0.9213` | `0.5840` | `13.51` | `6.24` | `0.0014` | `0.0701` | `263.42` | `0.98M` | 256x5 | +| `1a2s8uvf` | lowest chirp top-six, timeout risk | `0.5243` | `0.9048` | `0.7648` | `0.6888` | `10.47` | `4.67` | `0.0512` | `0.0441` | `362.04` | `1.07M` | 256x5 | +| `qkwrqhzy` | fastest top-six | `0.5145` | `0.9227` | `0.8075` | `0.6389` | `11.05` | `5.42` | `0.0216` | `0.0557` | `330.63` | `2.45M` | 64x4 | +| `63dl6lpc` | low-chirp comparison | `0.4975` | `0.9138` | `0.7219` | `0.6864` | `10.03` | `4.70` | `0.0447` | `0.0415` | `379.87` | `1.32M` | 256x4 | +| `cpx4gj2f` | balanced 128x5 candidate | `0.4968` | `0.9522` | `0.8269` | `0.6070` | `10.96` | `5.89` | `0.0119` | `0.0360` | `309.02` | `1.72M` | 128x5 | + +## Pursuit-Biased Short-Episode Candidates + +This pass looks for high `env/perf` with lower `env/episode_length`, under the +working hypothesis that shorter successful episodes are more likely to be active +pursuit than waiting/intercept behavior. This is only a proxy: very short +episodes can also mean fast collisions, so the best candidates below keep +`base_perf` high and avoid large timeout/collision rates. + +Across the current local Bat9 logs, `env/perf >= 0.40` has median episode length +`248.23` and q25 `223.10`. The rows below are the most promising pursuit-biased +visual candidates, with W&B names resolved from `kinvert-k/bat9` on 2026-06-11. + +| hash | W&B name | why inspect | perf | episode len | base | level | chirps | timeout | collision | SPS | model | +| --- | --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | --- | +| `o7yrj371` | `vivid-breeze-268` | Best first pursuit candidate: high perf, short episode, low timeout/collision, and fast 64x5 model. | `0.4749` | `221.67` | `0.9323` | `13.21` | `6.29` | `0.0008` | `0.0670` | `1.99M` | 64x5 | +| `wxyb10fq` | `happy-shadow-619` | Highest short-episode candidate under ~230 steps; high level and good SPS. | `0.4846` | `225.07` | `0.9237` | `13.99` | `6.12` | `0.0003` | `0.0760` | `1.70M` | 128x5 | +| `rm3a29ie` | `generous-violet-224` | Highest `perf / episode_length` among the near-0.485 perf group; moderate collision risk. | `0.4847` | `226.97` | `0.9144` | `12.91` | `5.90` | `0.0014` | `0.0842` | `1.00M` | 256x5 | +| `x1ayhg3j` | `clean-pyramid-454` | Strong pursuit-ratio candidate: ~199-step episodes with good base and low timeout. | `0.4458` | `199.11` | `0.9234` | `12.90` | `6.72` | `0.0005` | `0.0761` | `0.99M` | 256x5 | +| `zfxopb9j` | `vocal-snowflake-675` | Similar to `x1ayhg3j`, slightly higher perf and slightly longer episode; collision is higher but not extreme. | `0.4535` | `203.92` | `0.9091` | `13.39` | `6.48` | `0.0013` | `0.0896` | `0.92M` | 256x5 | +| `e4ut00v8` | `gentle-wind-710` | Cleaner terminal profile: zero timeout, low collision, high base, short-ish episode. | `0.4365` | `218.73` | `0.9472` | `13.78` | `7.14` | `0.0000` | `0.0528` | `1.66M` | 128x5 | +| `op9q6evk` | `sparkling-plasma-859` | Low collision and high base; slower 512x5 model, but a useful clean-pursuit comparison. | `0.4301` | `215.24` | `0.9408` | `13.60` | `7.21` | `0.0005` | `0.0586` | `0.44M` | 512x5 | +| `vt8s8kok` | `golden-moon-129` | Good 128x5 speed/behavior balance with sub-200 episode length and low timeout. | `0.4274` | `198.50` | `0.9297` | `13.58` | `7.14` | `0.0002` | `0.0701` | `1.79M` | 128x5 | +| `tuz0bo8d` | `youthful-lake-438` | Short, low-timeout 128x5 backup; lower perf than the rows above. | `0.4211` | `198.97` | `0.9330` | `13.59` | `7.33` | `0.0001` | `0.0669` | `1.46M` | 128x5 | + +Risky short-episode rows to treat with caution: + +| hash | W&B name | caution | perf | episode len | base | level | chirps | timeout | collision | model | +| --- | --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | --- | +| `d46xhryw` | `hopeful-universe-874` | Very short and high level, but lower base and high collision; may be fast contact/failure rather than clean pursuit. | `0.4608` | `178.85` | `0.8634` | `15.30` | `6.66` | `0.0000` | `0.1366` | 512x5 | +| `l2sg0cpf` | `scarlet-butterfly-228` | Shortest high-perf episode length, but collision is too high for first visual pass. | `0.4573` | `153.72` | `0.8273` | `15.08` | `6.14` | `0.0001` | `0.1726` | 256x5 | + +## Distance-Tempo Chirp Candidates + +Bat logs `env/far_chirp_rate`, `env/near_chirp_rate`, and +`env/chirp_tempo_ratio`. These are distance-region metrics, not strict +episode-time buckets: "far" means the bat-bug distance is greater than `0.66` +of the start distance, and "near" means less than `0.33`. The tempo ratio is +`near_chirp_rate / far_chirp_rate`, capped at `10`. + +This shortlist looks for runs that may chirp sparsely while far from the bug +and chirp faster once close. The strict filter was `perf >= 0.40`, +`base_perf >= 0.90`, `timeout <= 0.01`, and `collision <= 0.10`; rows were then +ranked by high tempo ratio, low far rate, enough near rate, later mean chirp +time, lower chirp count, and scalar perf. This pass used `logs/bat/*.json`, +which currently has `907` Bat9 rows with tempo metrics. Display names were +resolved from W&B (`kinvert-k/bat9`) on 2026-06-11. + +| hash | W&B name | why inspect | perf | episode len | chirps | far rate | near rate | tempo ratio | mean chirp tick | timeout | collision | SPS | model | +| --- | --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | --- | +| `x1ayhg3j` | `clean-pyramid-454` | Best first practical eval: short episodes, good perf/base, high near rate, and strong `1.74x` close/far tempo. Level 10 visual review showed a one/few-chirp blind-intercept tactic, not simply continuous close-range chirping. | `0.4458` | `199.11` | `6.72` | `0.0291` | `0.0480` | `1.74` | `0.210` | `0.0005` | `0.0761` | `0.99M` | 256x5 | +| `agdoug04` | `unique-dawn-764` | Best composite sparse-far/fast-near row; far chirp rate is very low, but episodes are long. | `0.4565` | `352.20` | `6.00` | `0.0213` | `0.0298` | `1.80` | `0.362` | `0.0035` | `0.0813` | `2.16M` | 128x5 | +| `ks16xv58` | `ancient-pyramid-609` | Extreme tempo-ratio study: almost no far chirping and very late mean chirp time; slower 512x5 and lower perf. | `0.4036` | `354.60` | `7.29` | `0.0088` | `0.0387` | `4.92` | `0.497` | `0.0060` | `0.0663` | `0.44M` | 512x5 | +| `899idvcg` | `giddy-eon-856` | Balanced candidate with short-ish episodes, low overlap, and clean terminal stats. | `0.4258` | `248.10` | `7.08` | `0.0262` | `0.0405` | `1.57` | `0.260` | `0.0042` | `0.0708` | `1.46M` | 64x5 | +| `m6vsxc7b` | `balmy-glitter-608` | Clean terminal profile and high base; useful if the shorter `x1ayhg3j` behavior is too noisy. | `0.4161` | `251.40` | `7.51` | `0.0266` | `0.0416` | `1.60` | `0.262` | `0.0001` | `0.0520` | `0.91M` | 256x5 | +| `z6mh0t3b` | `zesty-oath-412` | Highest scalar among the cleaner tempo candidates, but the tempo ratio is milder. | `0.4757` | `247.50` | `6.46` | `0.0254` | `0.0313` | `1.28` | `0.250` | `0.0015` | `0.0443` | `1.06M` | 256x5 | +| `63dl6lpc` | `jolly-night-696` | Risky extreme: `9.31x` tempo ratio and only 4.70 chirps, but timeout is high and episodes are long. | `0.4975` | `379.87` | `4.70` | `0.0008` | `0.0375` | `9.31` | `0.629` | `0.0447` | `0.0415` | `1.32M` | 256x4 | +| `gli5dke9` | `efficient-totem-502` | High perf with high tempo ratio, but very long/slow 512x5 and timeout is above the clean filter. | `0.5371` | `420.04` | `5.33` | `0.0126` | `0.0276` | `3.04` | `0.492` | `0.0139` | `0.0402` | `0.46M` | 512x5 | + +## Silent Outlier + +`rdjj5r21` / `atomic-dragon-816` stands far outside the rest of Bat9. In the +current local logs, it ranks 1st of 930 Bat9 rows by `env/perf` (`0.8438`) while +averaging only `0.022` chirps per episode. The next-lowest-chirp high-perf rows +are around `4.7` chirps. It also has the 3rd-lowest collision rate (`0.0079`) +and top-quartile base success (`0.9638`), but it is extremely slow: episode +length `468.48`, 4th-longest in the local Bat9 set, with timeout `0.0282`. + +Metric interpretation: this is not a close-range tempo-chirp policy. Its +`first_chirp_tick_norm` and `mean_chirp_tick_norm` are both about `0.996`, which +mostly means "no chirp happened" under the current logging convention. The high +score comes from combining high success and high curriculum difficulty with +near-perfect `chirp_perf`. + +Visual eval after retraining from the exact `rdjj5r21` hyperparameters confirmed +the exploit. The run loaded `checkpoints/bat/1781207491807/0000000034340864.bin` +via `--load-model-path latest`. Per human review, the bat mostly circles, almost +never chirps, and appears to wait for the inbound bug to hit it accidentally. +This explains the very high scalar score and very long episodes: it is not a +usable pursuit policy. + +Physics implication: the policy is exploiting the level 8+ inbound bug +curriculum and timer/motion priors rather than echolocation. At inbound levels, +the bug is re-aimed toward the bat every tick with noise and optional lateral +maneuver. A near-silent policy can therefore learn a wait/patrol strategy that +avoids collisions and catches the bug late. The observation does not include +direct bug position: it contains echo bins, chirp state/cooldown, speed, turn +rate, and timer. This run should remain documented as a useful failure case, not +as a default or video candidate. + +## Physics Knob Analysis + +Spearman correlations across all 789 complete rows: + +| pair | rho | +| --- | ---: | +| `perf` vs `chirps_emitted` | `-0.777` | +| `perf` vs `chirp_perf` | `0.776` | +| `perf` vs `far_chirp_rate` | `-0.678` | +| `perf` vs `chirp_overlap_fraction` | `-0.589` | +| `perf` vs `curriculum_perf` | `0.543` | +| `perf` vs `curriculum_level` | `0.508` | +| `perf` vs `bad_terminal` | `-0.443` | +| `perf` vs `base_perf` | `0.443` | +| `perf` vs `collision` | `-0.377` | +| `perf` vs `timeout` | `-0.300` | + +Wing sideband read: + +- `bug_wing_sideband_gain` has a weak positive relationship with `perf` + (`rho = 0.158`), stronger with `curriculum_perf` (`rho = 0.220`) and + `curriculum_level` (`rho = 0.237`). +- Top-quartile sideband gain hit `env/perf >= 0.25` in `69.2%` of rows versus + `51.0%` in the bottom quartile. +- It does not look like sidebands merely encourage broad noisy chirps: + correlation with `mean_chirp_bandwidth` is slightly negative (`rho = -0.078`), + and correlation with `chirps_emitted` is also slightly negative (`rho = -0.064`). + +Ear directivity read: + +- Raw ear gain effects are weak. `ear_rear_gain` is mildly positive for `perf` + (`rho = 0.071`), while `ear_front_gain` and `ear_side_gain` are near zero. +- Lower `front_to_rear` ratios look better: bottom-quartile hit rate is `70.7%` + versus `60.1%` for the top quartile. +- High `ear_side_gain` correlates with worse collision (`rho = 0.175`) and lower + `base_perf` (`rho = -0.170`), but the hit-rate split is flat. Treat this as a + weak caution, not a rule. +- `ear_separation_scale` has weak positive `perf` signal and high-perf IQR + around `1.73..1.99`. + +Other sweep reads: + +- `reflector_strength` is now beneficial at the higher end. Top quartile hit + rate is `81.3%` versus `53.6%` in the low quartile, unlike the Bat8 low-strength + preference. +- `horizon = 64` remains the only reliable setting. The few `128`/`256` rows are + mostly failures. +- `num_layers = 5` is still the default region; `num_layers = 4` can work for + speed (`qkwrqhzy`), while shallower models are under-sampled or weak. +- `hidden_size = 128` and `256` have the best hit rates, but the top Pareto run + is a 64x5 model. Use model size as a speed/behavior tradeoff, not a hard rule. + +ExtraTrees feature-importance sanity check ranked these as the top predictors of +`env/perf`: `train.beta1`, `train.clip_coef`, `train.ent_coef`, +`train.vf_clip_coef`, `env.curriculum_successes_per_level`, `env.bat_max_speed`, +`train.prio_beta0`, `env.chirp_cooldown_ticks`, `env.reflector_strength`, and +`policy.hidden_size`. Treat this as nonlinear importance, not causal proof. + +## Candidate Shortlist + +Display names below were resolved from W&B (`kinvert-k/bat9`) on 2026-06-11. + +| hash | W&B name | use when | human / visual notes | analysis notes | +| --- | --- | --- | --- | --- | +| `rdjj5r21` | `atomic-dragon-816` | failure-mode study for silent inbound exploit | Per human visual review after retrain: it mostly circles and almost never chirps, apparently waiting for the inbound bug to accidentally hit the bat. Interesting as a scalar exploit, but not a usable pursuit policy. | Rank 1 local Bat9 scalar outlier: `0.8438` perf, `0.9638` base, `0.8450` curriculum perf, `0.9985` chirp perf, `0.022` chirps, `468.48` episode length, `0.0282` timeout, and `0.0079` collision. Fresh checkpoint `1781207491807` reproduced the scalar profile and loaded `checkpoints/bat/1781207491807/0000000034340864.bin` via `--load-model-path latest`. This points to the inbound bug retargeting policy as a curriculum exploit source. | +| `ifbn2epd` | `super-wind-258` | first retrain and video attempt; behavior-strategy study | Per human visual review: performed poorly overall, but learned a very interesting speed-gated chirp strategy. It flies around most of the time presumably at minimum speed, accelerates presumably to max speed just before chirping, then slows right back down after the chirp. | Chosen first because it had the best Pareto score, `0.5398` perf, `0.9639` base, low collision, low overlap, and 2.15M SPS. The speed-before-chirp pattern is worth preserving as a discovered tactic even if this run is not the final default. | +| `ewgh6l5l` | `distinctive-surf-293` | current default candidate | Per human visual review: normal/low-level eval looked erratic, often spun in place, and every watched run appeared to time out. Retesting fixed level 10 showed the desired harder-level tactic: chirp to infer where the bug is going, move to that future path, then circle/wait there until the bug reaches the bat. A fresh 2026-06-11 retrain and level 10 eval confirmed this is the behavior we want as the current default. | Top non-silent Bat9 scalar profile: `0.5565` perf, `0.9497` base, `0.8392` curriculum perf, `0.6544` chirp perf, level `11.30`, `5.19` chirps, episode length `378.95`, timeout `0.0088`, and collision `0.0414`. Fresh checkpoint `1781208977022` reproduced the scalar profile and loaded `checkpoints/bat/1781208977022/0000000033554432.bin` via `--load-model-path latest`. | +| `qkwrqhzy` | `earnest-galaxy-621` | behavior-strategy study; not default yet | Per human visual review: weak overall, about 25% wins in watched eval, and poor at levels 0-1. Surprisingly more interesting on harder eval: it often chirps enough to infer where the bug is going, moves ahead of the bug, then circles until the bug reaches the bat. | 64x4 speed candidate with `0.5145` sweep perf and strong SPS. Fresh checkpoint `1781200339036` reproduced the scalar profile, but visual robustness was too low. Preserve the intercept-and-wait tactic as a discovered behavior, but do not use as default/video pick without fixing low-level competence. | +| `sfmk59n1` | `cool-snowflake-484` | sparse-chirp memory/navigation behavior study | Per human visual review at fixed level 10: very interesting deliberate sparse-chirp strategy. It can fly for long periods without chirping, apparently remembering what it saw from an earlier chirp, navigating around, looking, then chirping again later. It does not look like the previous aimless circling/intercept pattern. Visual perf did not look obviously high, but the behavior is important. | Chosen next because it is the strongest not-yet-watched high-curriculum candidate: `0.5314` perf, `0.9213` curriculum perf, level `13.51`, only `0.0014` timeout, `6.24` chirps, and very low overlap (`0.0070`). It is not another low-chirp/intercept candidate; it trades a moderate collision rate (`0.0701`) and 256x5 speed cost (`0.98M` SPS) for cleaner curriculum progress. Physics read: max sideband gain (`0.25`), high directivity gains (`rear 0.30`, `front 0.678`, `side 0.520`; front/rear `2.26`), slow max speed (`12.91`) with high turn rate (`9.10`). | +| `o7yrj371` | `vivid-breeze-268` | pursuit-biased/default candidate | Per human visual review at fixed level 10: performs well and actively pursues the bugs. This is the first watched high-perf, short-episode candidate that visually supports the pursuit hypothesis rather than the previously observed intercept-and-wait behavior. | Chosen from the high-perf, short-episode screen: `0.4749` perf, `221.67` episode length, `0.9323` base, level `13.21`, near-zero timeout (`0.0008`), moderate collision (`0.0670`), and fast 64x5 throughput (`1.99M` SPS). Fresh checkpoint `1781204865675` was trained from the exact `o7yrj371` hyperparameters and eval loaded `checkpoints/bat/1781204865675/0000000032768000.bin` via `--load-model-path latest`. | +| `x1ayhg3j` | `clean-pyramid-454` | blind-map behavior evidence | Per human visual review after fresh retrain: it does initial chirps, builds an apparent mental map of where the bug is going, then deliberately flies where it believes is right, often continuing blindly after the map is made. Interesting behavior, but not the selected default because the desired current default is the stronger wait-at-predicted-path tactic from `ewgh6l5l`. | Fresh checkpoint `1781208510323` reproduced the original scalar profile: `0.446` perf, `0.923` base, `0.821` curriculum perf, `199.11` episode length, `6.72` chirps, `1.735` tempo ratio, near-zero timeout, and collision `0.076`. Preserve as behavior evidence and comparison point. | +| `cpx4gj2f` | `sleek-smoke-681` | balanced 128x5 comparison | Needs visual review; useful if 64-wide `super-wind-258` looks brittle. | `0.9522` base, low bad terminal `0.0478`, 1.72M SPS; not Pareto-front by the selected objective mix. | +| `1a2s8uvf` | `good-valley-684` | low-chirp experiment | Needs visual review specifically for timeout behavior. | Best top-six chirp perf (`0.6888`) and 4.67 chirps, but timeout is high at `0.0512`; not a default without strong visual evidence. | +| `w938us46` | `fanciful-shape-202` | high-level stress check | Needs visual review; use for stress behavior rather than first video. | Highest level (`15.62`) while still `0.4501` perf; collision `0.1285`. | + +## Eval Notes And Video Pick + +Human visual note for `ifbn2epd` / `super-wind-258`: it performed poorly overall, +but learned a notable speed-gated chirp tactic. It appears to cruise at minimum +speed, accelerate sharply just before a chirp, then slow back down after chirping. +This is important behavior evidence and should be preserved even if the run is +not adopted as a default. + +Human visual note for `ewgh6l5l` / `distinctive-surf-293`: after retraining from +the sweep config into checkpoint `1781199673401` and running normal/low-level +eval, the policy looked erratic, often spun in place, and every watched run +appeared to time out. A later fixed level 10 retest using `--load-model-path +latest` showed the desired harder-level behavior: it chirps to infer where the +bug is going, moves onto that future path, then circles/waits there until the bug +reaches the bat. This is the same broad intercept-and-wait tactic later observed +in `qkwrqhzy`, but `ewgh6l5l` has the stronger scalar profile. + +Fresh default retrain on 2026-06-11 set `config/bat.ini` to the exact concrete +`ewgh6l5l` hyperparameters and produced checkpoint +`checkpoints/bat/1781208977022/0000000033554432.bin`. The scalar profile again +matched the sweep: `0.556` perf, `0.950` base, `0.839` curriculum perf, level +`11.298`, `5.191` chirps, `378.948` episode length, timeout `0.009`, collision +`0.041`, and chirp tempo ratio `0.134`. Human level-10 visual review confirmed +this is the intended current default: it chooses a place on the predicted bug +path and waits/intercepts there. + +Human visual note for `o7yrj371` / `vivid-breeze-268`: after training from the +exact sweep hyperparameters into checkpoint `1781204865675`, fixed level 10 eval +loaded with `--load-model-path latest` showed good performance and active bug +pursuit. This is the cleanest visual support so far for the short-episode screen: +it looked like it chased the bugs rather than mainly waiting on a predicted +intercept point. + +Human visual note for `x1ayhg3j` / `clean-pyramid-454`: after training from the +exact sweep hyperparameters into checkpoint `1781206232504`, normal eval and +fixed level 10 eval loaded `checkpoints/bat/1781206232504/0000000035651584.bin` +with `--load-model-path latest`. Scalar profile reproduced the sweep pattern: +about `0.446` perf, `199` episode length, `6.72` chirps, far chirp rate `0.029`, +near chirp rate `0.048`, and tempo ratio about `1.74`. Level 10 visual review +showed that this should not be interpreted as simply chirping more continuously +when close. It sometimes chirps, builds an apparent internal estimate of the bug +trajectory/map, then flies an intercept course with no more chirps. It works +roughly half the time in watched attempts and looks like a confident blind +intercept strategy. + +Human visual note for `qkwrqhzy` / `earnest-galaxy-621`: after retraining from +the sweep config into checkpoint `1781200339036`, normal eval and fixed level 10 +eval showed weak overall win rate, roughly 25% in the watched sample, and poor +behavior at levels 0-1. The run nevertheless learned a notable harder-level +strategy: chirp enough to infer the bug trajectory, position itself ahead of the +bug, then circle/intercept until the bug reaches the bat. This is valuable +behavior evidence but not a default-quality policy. + +Human visual note for `sfmk59n1` / `cool-snowflake-484`: after setting +`config/bat.ini` to the exact concrete `sfmk59n1` hyperparameters, the policy was +trained into checkpoint `checkpoints/bat/1781203704964/0000000036175872.bin`. +The fresh run reproduced the sweep-scale scalar profile: final `env/perf` about +`0.531`, `base_perf` about `0.929`, `curriculum_perf` about `0.921`, curriculum +level `13.507`, `6.24` chirps, timeout about `0.001`, and collision about +`0.070`. Fixed level 10 visual review showed an important sparse-chirp +memory/navigation tactic: it flies deliberately for long periods without +chirping, apparently using information remembered from an earlier chirp, +navigates around while looking, then chirps again later. It did not look like the +previous aimless circling/intercept behavior. Visual performance did not look +obviously high, but this behavior should be preserved as evidence of memory-like +navigation under limited chirping. + +Recorded MP4 artifact: `recordings/bat_recording.mp4`. This is a 30.0 second, +640x640, 30 fps H.264 MP4 with AAC audio, recorded from `ewgh6l5l` / +`distinctive-surf-293` at fixed level 10 using `--load-model-path latest`. To +make `latest` resolve to this candidate after later runs had newer checkpoints, +the `ewgh6l5l` checkpoint was copied non-destructively to +`checkpoints/bat/ewgh6l5l-latest-eval/0000000033554432.bin`. The video is a +behavior-evidence artifact for the trajectory-prediction/intercept tactic, not a +claim that this run is default-quality. + +Current default candidate is `ewgh6l5l` / `distinctive-surf-293`. It is not the +most robust low-level visual policy, but it is the best current match for the +desired behavior: chirp enough to infer the future bug path, move to that path, +then wait/intercept there. `qkwrqhzy` preserves a similar tactic but looked weaker +overall. `x1ayhg3j` remains useful behavior evidence for blind-map navigation, +and `sfmk59n1` remains important behavior evidence for sparse-chirp +memory/navigation. + +`config/bat.ini` is now intentionally set to `ewgh6l5l` for the current default +candidate. This decision is based on fresh retrain plus level 10 visual eval, not +scalar rank alone. + +## Recommended Next Defaults + +Current default source: + +| parameter | recommendation | +| --- | --- | +| candidate source | `ewgh6l5l` / `distinctive-surf-293` | +| `policy.hidden_size` | `256` | +| `policy.num_layers` | `5` | +| `train.horizon` | `64` | +| `vec.num_buffers` | `4` | +| `env.bug_wing_sideband_gain` | `0.19056934455600955` | +| `env.ear_rear_gain` | `0.22038613968607276` | +| `env.ear_front_gain` | `0.6419214149115183` | +| `env.ear_side_gain` | `0.28043867572747055` | +| `env.ear_separation_scale` | `2.0` | +| `env.reflector_strength` | `0.6` | +| `env.chirp_cooldown_ticks` | `11` | +| `env.curriculum_successes_per_level` | `4` | +| `env.curriculum_bug_distance_step` | `2.0` | + +Keep `--sweep.use-gpu ""` for future Bat9 sweep continuation so Protein stays +off GPU while training uses `--train.gpus 1`. diff --git a/config/bat.ini b/config/bat.ini index 8d4c02c553..82afcc0095 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -3,12 +3,13 @@ env_name = bat [vec] total_agents = 4096 -num_buffers = 7 +num_buffers = 4 num_threads = 8 [policy] -hidden_size = 128 +hidden_size = 256 num_layers = 5 +expansion_factor = 1 [torch] network = MinGRU @@ -23,10 +24,10 @@ height = 64 num_obstacles = 3 bat_radius = 2 bug_radius = 1.5 -bat_max_speed = 18.344521522276608 -bat_min_speed = 2.9687783252761477 -bat_accel = 49.19185634905935 -bat_turn_rate = 9.424778 +bat_max_speed = 15.498233877318418 +bat_min_speed = 2.6389946132676654 +bat_accel = 53.02330161128345 +bat_turn_rate = 8.371655963408276 bug_speed = 4 max_steps = 512 render_target_fps = 60 @@ -34,30 +35,30 @@ record_video = 0 record_video_fps = 30 record_video_seconds = 30 record_video_audio = 1 -bug_echo_farther_penalty_scale = 0.05 -bug_echo_reward_scale = 0.2789761793107612 +bug_echo_farther_penalty_scale = 0.19351291407677712 +bug_echo_reward_scale = 0.35 bug_echo_min_displacement = 1 -bug_wing_sideband_gain = 0.10 +bug_wing_sideband_gain = 0.19056934455600955 bug_maneuver_frequency = 0.4 bug_maneuver_start_level = 7 bug_maneuver_strength = 0.4 -curriculum_bug_distance_step = 2.191219134146933 +curriculum_bug_distance_step = 2.0 curriculum_enabled = 1 curriculum_inbound_bug_distance_step = 4 curriculum_inbound_max_bug_distance = 56 curriculum_inbound_start_level = 8 -curriculum_initial_level = 2 +curriculum_initial_level = 1 curriculum_max_bug_distance = 40 curriculum_max_obstacles = 3 -curriculum_obstacle_step = 7 -curriculum_start_bug_distance = 8 +curriculum_obstacle_step = 8 +curriculum_start_bug_distance = 8.438008720355143 curriculum_start_obstacles = 0 curriculum_successes_per_level = 4 -ear_separation_scale = 1.6650928741686002 -ear_rear_gain = 0.20 -ear_front_gain = 0.55 -ear_side_gain = 0.35 -early_chirp_penalty = 0.004893262939164256 +ear_separation_scale = 2.0 +ear_rear_gain = 0.22038613968607276 +ear_front_gain = 0.6419214149115183 +ear_side_gain = 0.28043867572747055 +early_chirp_penalty = 0.006 freq_bins_per_ear = 16 inbound_bug_speed_multiplier = 1.75 inbound_heading_noise_degrees = 18 @@ -68,41 +69,41 @@ progress_reward_scale = 0.12 reflector_spacing = 8 corner_reflectors = 1 reflector_strength = 0.6 -sound_speed = 180 -step_cost = 0.0001 -valid_chirp_reward = 0.0007907239068821393 -chirp_cooldown_ticks = 13 +sound_speed = 180.0 +step_cost = 0.00010781401476030468 +valid_chirp_reward = 0.00015478540834814922 +chirp_cooldown_ticks = 11 chirp_cost = 0 -chirp_efficiency_reward = 2 -chirp_overlap_penalty = 0.010041805305229239 -collision_penalty = 2 +chirp_efficiency_reward = 2.0 +chirp_overlap_penalty = 0.004278154705335052 +collision_penalty = 1.950717141233687 [train] anneal_ent_coef = 0 anneal_lr = 1 -beta1 = 0.9276441339551883 -beta2 = 0.9996971732178918 -clip_coef = 0.40220288325366393 -ent_coef = 0.009414797813275677 -eps = 1.9967415498800064e-13 -gae_lambda = 0.92 -gamma = 0.996174294667965 +beta1 = 0.6151083880184249 +beta2 = 0.9994430814361022 +clip_coef = 0.6358533174485217 +ent_coef = 0.0012852601662540154 +eps = 6.005678002222838e-10 +gae_lambda = 0.9208599830048286 +gamma = 0.99842121229845 gpus = 1 horizon = 64 -learning_rate = 0.011740312394802619 -max_grad_norm = 3.140246920394498 +learning_rate = 0.011919361446426807 +max_grad_norm = 1.5952002930880629 min_ent_coef_ratio = 0.1 min_lr_ratio = 0 minibatch_size = 8192 -prio_alpha = 0.8583393112514485 -prio_beta0 = 0.5859110774374029 -replay_ratio = 1.25 +prio_alpha = 0.9144113738603952 +prio_beta0 = 1.0 +replay_ratio = 1.1554225446340287 seed = 42 -total_timesteps = 32663421 -vf_clip_coef = 0.7961030866198207 -vf_coef = 4.268711755319329 -vtrace_c_clip = 1.272270973533673 -vtrace_rho_clip = 4.399657272231955 +total_timesteps = 33699113.0 +vf_clip_coef = 0.01 +vf_coef = 5.0 +vtrace_c_clip = 2.7120354439967884 +vtrace_rho_clip = 5.0 [sweep] method = Protein From 3a438a0519e77d02e0da6d9cf3ac9a7716a00565 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Thu, 11 Jun 2026 15:19:42 -0700 Subject: [PATCH 36/51] Hardcode stable bat constants --- BAT_CURRICULUM.md | 19 +-- BAT_SPEC.md | 24 +--- config/bat.ini | 35 ------ ocean/bat/bat.c | 150 +++++++++++++++++++---- ocean/bat/bat.h | 283 +++++++++++++++++++++----------------------- ocean/bat/binding.c | 23 ---- 6 files changed, 280 insertions(+), 254 deletions(-) diff --git a/BAT_CURRICULUM.md b/BAT_CURRICULUM.md index daca317748..8459ee6037 100644 --- a/BAT_CURRICULUM.md +++ b/BAT_CURRICULUM.md @@ -18,13 +18,15 @@ curriculum_perf = success * curriculum_difficulty With the current defaults: ```ini -curriculum_initial_level = 3 -curriculum_start_obstacles = 1 -curriculum_max_obstacles = 3 -curriculum_obstacle_step = 18 -curriculum_start_bug_distance = 8.0 -curriculum_max_bug_distance = 56.0 -curriculum_bug_distance_step = 4.0 +curriculum_initial_level = 1 +BAT_CURRICULUM_START_OBSTACLES = 0 +BAT_CURRICULUM_MAX_OBSTACLES = 3 +curriculum_obstacle_step = 8 +curriculum_start_bug_distance = 8.438 +BAT_CURRICULUM_MAX_BUG_DISTANCE = 40.0 +BAT_CURRICULUM_BUG_DISTANCE_STEP = 2.0 +BAT_CURRICULUM_INBOUND_MAX_BUG_DISTANCE = 56.0 +BAT_CURRICULUM_INBOUND_BUG_DISTANCE_STEP = 4.0 ``` At `curriculum_level ~= 5`, the bug starts around distance `28`, giving: @@ -171,7 +173,8 @@ start_bug_distance = 8 + level * distance_step Recommendation: -- Keep `curriculum_bug_distance_step` in the current `2.0..5.0` sweep range. +- Keep `BAT_CURRICULUM_BUG_DISTANCE_STEP` hardcoded at `2.0`; Bat9's best runs + clustered there, and the inbound curriculum already expands later distances. - Log `curriculum_distance_difficulty` directly. Gate: diff --git a/BAT_SPEC.md b/BAT_SPEC.md index af907d70c6..a165f946e9 100644 --- a/BAT_SPEC.md +++ b/BAT_SPEC.md @@ -116,7 +116,7 @@ Acoustics: enough artificial time-of-arrival separation for one ear to be able to hear a return about one tick before the other. - `ear_separation_scale` controls the artificial distance between ears as a - multiple of `bat_radius`. Keep it bounded; the implementation clamps it to + multiple of `BAT_RADIUS`. Keep it bounded; the implementation clamps it to `[0.25, 2.0]` and the default sweep range is `[0.5, 2.0]`. - Every echo contribution has: - two-way distance from mouth/source to reflector to each ear, @@ -129,8 +129,8 @@ Point-reflector renderer: - v1 should represent walls and obstacle surfaces as stationary point reflectors. -- Sample each wall and obstacle edge at a fixed spacing, default - `reflector_spacing = 1.0` world unit. +- Sample each wall and obstacle edge at fixed spacing, + `BAT_REFLECTOR_SPACING = 8.0` world units. - The bug contributes one moving circular/point reflector at its center. - This avoids wavefront bookkeeping while preserving range, angle, and Doppler learning signals. @@ -350,7 +350,7 @@ Default reward model: - when a bug echo returns with a shorter acoustic path than the previous bug echo, add a small shaped reward, - this reward only applies if the bat has moved at least - `bug_echo_min_displacement` since the previous scored bug echo, so a + `BAT_BUG_ECHO_MIN_DISPLACEMENT` since the previous scored bug echo, so a stationary bat cannot farm reward from the bug moving closer by itself, - farther bug echoes update the previous bug echo path and receive a weaker penalty scaled by `bug_echo_farther_penalty_scale`, default `0.10`, @@ -367,7 +367,8 @@ Progress reward: - Default formula: - `reward += progress_reward_scale * (prev_bug_dist - bug_dist)` - `reward -= step_cost` - - `reward -= chirp_cost` when a chirp is emitted + - `reward -= BAT_CHIRP_COST` when a chirp is emitted; this is hardcoded to + zero for the current Bat defaults - `reward -= chirp_overlap_penalty * bug_echo_wait_fraction` when a valid chirp is emitted before the previous chirp's expected bug reflection has returned @@ -382,10 +383,8 @@ Progress reward: - `progress_reward_scale = 0.05` - `step_cost = 0.001` - `chirp_efficiency_reward = 1.0` - - `chirp_cost = 0.00005` - `chirp_overlap_penalty = 0.004` - `bug_echo_reward_scale = 0.02` - - `chirp_cost = 0.0005` Important caveat: @@ -484,37 +483,26 @@ Recommended stages: Config knobs: -- `arena_width` -- `arena_height` -- `num_obstacles` - `obstacle_min_size` - `obstacle_max_size` -- `bat_radius` - `ear_separation_scale` -- `bug_radius` - `bat_max_speed` - `bat_min_speed` - `bat_accel` - `bat_turn_rate` -- `bug_speed` - `max_steps` -- `freq_bins_per_ear` - `max_echo_range` - `sound_speed` -- `reflector_spacing` - `max_chirps_per_episode` - `chirp_cooldown_ticks` - `chirp_freq_bins` - `chirp_duration_bins` -- `chirp_cost` - `chirp_efficiency_reward` - `chirp_overlap_penalty` - `bug_echo_farther_penalty_scale` -- `bug_echo_min_displacement` - `step_cost` - `progress_reward_scale` - `collision_penalty` -- `curriculum_enabled` - `curriculum_initial_level` - `curriculum_stage` diff --git a/config/bat.ini b/config/bat.ini index 82afcc0095..3b0f31b555 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -17,18 +17,11 @@ encoder = DefaultEncoder decoder = DefaultDecoder [env] -num_agents = 1 frameskip = 1 -width = 64 -height = 64 -num_obstacles = 3 -bat_radius = 2 -bug_radius = 1.5 bat_max_speed = 15.498233877318418 bat_min_speed = 2.6389946132676654 bat_accel = 53.02330161128345 bat_turn_rate = 8.371655963408276 -bug_speed = 4 max_steps = 512 render_target_fps = 60 record_video = 0 @@ -37,43 +30,27 @@ record_video_seconds = 30 record_video_audio = 1 bug_echo_farther_penalty_scale = 0.19351291407677712 bug_echo_reward_scale = 0.35 -bug_echo_min_displacement = 1 bug_wing_sideband_gain = 0.19056934455600955 -bug_maneuver_frequency = 0.4 -bug_maneuver_start_level = 7 -bug_maneuver_strength = 0.4 -curriculum_bug_distance_step = 2.0 -curriculum_enabled = 1 -curriculum_inbound_bug_distance_step = 4 -curriculum_inbound_max_bug_distance = 56 -curriculum_inbound_start_level = 8 curriculum_initial_level = 1 -curriculum_max_bug_distance = 40 -curriculum_max_obstacles = 3 curriculum_obstacle_step = 8 curriculum_start_bug_distance = 8.438008720355143 -curriculum_start_obstacles = 0 curriculum_successes_per_level = 4 ear_separation_scale = 2.0 ear_rear_gain = 0.22038613968607276 ear_front_gain = 0.6419214149115183 ear_side_gain = 0.28043867572747055 early_chirp_penalty = 0.006 -freq_bins_per_ear = 16 inbound_bug_speed_multiplier = 1.75 inbound_heading_noise_degrees = 18 max_chirp_age_ticks = 30 max_chirps_per_episode = 15 max_echo_range = 128 progress_reward_scale = 0.12 -reflector_spacing = 8 -corner_reflectors = 1 reflector_strength = 0.6 sound_speed = 180.0 step_cost = 0.00010781401476030468 valid_chirp_reward = 0.00015478540834814922 chirp_cooldown_ticks = 11 -chirp_cost = 0 chirp_efficiency_reward = 2.0 chirp_overlap_penalty = 0.004278154705335052 collision_penalty = 1.950717141233687 @@ -234,12 +211,6 @@ min = 0.20 max = 0.55 scale = auto -[sweep.env.corner_reflectors] -distribution = int_uniform -min = 0 -max = 1 -scale = auto - [sweep.env.reflector_strength] distribution = uniform min = 0.6 @@ -318,12 +289,6 @@ min = 8.0 max = 20.0 scale = auto -[sweep.env.curriculum_bug_distance_step] -distribution = uniform -min = 2.0 -max = 5.0 -scale = auto - [sweep.env.curriculum_obstacle_step] distribution = int_uniform min = 3 diff --git a/ocean/bat/bat.c b/ocean/bat/bat.c index ccd0c865b3..8c65353527 100644 --- a/ocean/bat/bat.c +++ b/ocean/bat/bat.c @@ -1,34 +1,132 @@ #include +#include +#include #include "bat.h" -void demo() { - Bat env = { +#define BAT_DEMO_CONFIG_PATH "config/bat.ini" + +static char* trim(char* s) { + while (isspace((unsigned char)*s)) s++; + char* end = s + strlen(s); + while (end > s && isspace((unsigned char)end[-1])) end--; + *end = '\0'; + return s; +} + +static void set_demo_defaults(Bat* env) { + *env = (Bat){ + .num_agents = 1, .frameskip = 1, - .width = 64, - .height = 64, - .num_obstacles = 3, - .bat_radius = 2.0f, - .bug_radius = 1.5f, - .bat_max_speed = 12.0f, - .bat_accel = 30.0f, - .bat_turn_rate = BAT_PI, - .bug_speed = 4.0f, + .bat_max_speed = 15.498233877318418f, + .bat_min_speed = 2.6389946132676654f, + .bat_accel = 53.02330161128345f, + .bat_turn_rate = 8.371655963408276f, .max_steps = 512, - .curriculum_initial_level = 3, - .range_bins_per_ear = BAT_RANGE_BINS, - .doppler_bins_per_ear = BAT_DOPPLER_BINS, - .max_echo_range = 80.0f, - .sound_speed = 100.0f, - .reflector_spacing = 8.0f, - .max_chirps_per_episode = 20, - .min_chirps_per_episode = 10, - .chirp_budget_decay_levels = 4, - .chirp_cost = 0.0005f, - .step_cost = 0.001f, - .progress_reward_scale = 0.05f, - .collision_penalty = 1.0f, - .rng = (unsigned int)time(NULL), + .render_target_fps = 60, + .record_video = 0, + .record_video_fps = 30, + .record_video_seconds = 30, + .record_video_audio = 1, + .bug_echo_farther_penalty_scale = 0.19351291407677712f, + .bug_echo_reward_scale = 0.35f, + .bug_wing_sideband_gain = 0.19056934455600955f, + .curriculum_initial_level = 1, + .curriculum_obstacle_step = 8, + .curriculum_start_bug_distance = 8.438008720355143f, + .curriculum_successes_per_level = 4, + .ear_separation_scale = 2.0f, + .ear_rear_gain = 0.22038613968607276f, + .ear_front_gain = 0.6419214149115183f, + .ear_side_gain = 0.28043867572747055f, + .early_chirp_penalty = 0.006f, + .inbound_bug_speed_multiplier = 1.75f, + .inbound_heading_noise_degrees = 18.0f, + .max_chirp_age_ticks = 30, + .max_chirps_per_episode = 15, + .max_echo_range = 128.0f, + .progress_reward_scale = 0.12f, + .reflector_strength = 0.6f, + .sound_speed = 180.0f, + .step_cost = 0.00010781401476030468f, + .valid_chirp_reward = 0.00015478540834814922f, + .chirp_cooldown_ticks = 11, + .chirp_efficiency_reward = 2.0f, + .chirp_overlap_penalty = 0.004278154705335052f, + .collision_penalty = 1.950717141233687f, }; +} + +static void apply_env_config_value(Bat* env, const char* key, float value) { + if (strcmp(key, "frameskip") == 0) env->frameskip = (int)value; + else if (strcmp(key, "bat_max_speed") == 0) env->bat_max_speed = value; + else if (strcmp(key, "bat_min_speed") == 0) env->bat_min_speed = value; + else if (strcmp(key, "bat_accel") == 0) env->bat_accel = value; + else if (strcmp(key, "bat_turn_rate") == 0) env->bat_turn_rate = value; + else if (strcmp(key, "max_steps") == 0) env->max_steps = (int)value; + else if (strcmp(key, "render_target_fps") == 0) env->render_target_fps = (int)value; + else if (strcmp(key, "record_video") == 0) env->record_video = (int)value; + else if (strcmp(key, "record_video_fps") == 0) env->record_video_fps = (int)value; + else if (strcmp(key, "record_video_seconds") == 0) env->record_video_seconds = (int)value; + else if (strcmp(key, "record_video_audio") == 0) env->record_video_audio = (int)value; + else if (strcmp(key, "bug_echo_farther_penalty_scale") == 0) env->bug_echo_farther_penalty_scale = value; + else if (strcmp(key, "bug_echo_reward_scale") == 0) env->bug_echo_reward_scale = value; + else if (strcmp(key, "bug_wing_sideband_gain") == 0) env->bug_wing_sideband_gain = value; + else if (strcmp(key, "curriculum_initial_level") == 0) env->curriculum_initial_level = (int)value; + else if (strcmp(key, "curriculum_obstacle_step") == 0) env->curriculum_obstacle_step = (int)value; + else if (strcmp(key, "curriculum_start_bug_distance") == 0) env->curriculum_start_bug_distance = value; + else if (strcmp(key, "curriculum_successes_per_level") == 0) env->curriculum_successes_per_level = (int)value; + else if (strcmp(key, "ear_separation_scale") == 0) env->ear_separation_scale = value; + else if (strcmp(key, "ear_rear_gain") == 0) env->ear_rear_gain = value; + else if (strcmp(key, "ear_front_gain") == 0) env->ear_front_gain = value; + else if (strcmp(key, "ear_side_gain") == 0) env->ear_side_gain = value; + else if (strcmp(key, "early_chirp_penalty") == 0) env->early_chirp_penalty = value; + else if (strcmp(key, "inbound_bug_speed_multiplier") == 0) env->inbound_bug_speed_multiplier = value; + else if (strcmp(key, "inbound_heading_noise_degrees") == 0) env->inbound_heading_noise_degrees = value; + else if (strcmp(key, "max_chirp_age_ticks") == 0) env->max_chirp_age_ticks = (int)value; + else if (strcmp(key, "max_chirps_per_episode") == 0) env->max_chirps_per_episode = (int)value; + else if (strcmp(key, "max_echo_range") == 0) env->max_echo_range = value; + else if (strcmp(key, "progress_reward_scale") == 0) env->progress_reward_scale = value; + else if (strcmp(key, "reflector_strength") == 0) env->reflector_strength = value; + else if (strcmp(key, "sound_speed") == 0) env->sound_speed = value; + else if (strcmp(key, "step_cost") == 0) env->step_cost = value; + else if (strcmp(key, "valid_chirp_reward") == 0) env->valid_chirp_reward = value; + else if (strcmp(key, "chirp_cooldown_ticks") == 0) env->chirp_cooldown_ticks = (int)value; + else if (strcmp(key, "chirp_efficiency_reward") == 0) env->chirp_efficiency_reward = value; + else if (strcmp(key, "chirp_overlap_penalty") == 0) env->chirp_overlap_penalty = value; + else if (strcmp(key, "collision_penalty") == 0) env->collision_penalty = value; +} + +static void load_env_config(Bat* env, const char* path) { + FILE* file = fopen(path, "r"); + if (file == NULL) return; + + bool in_env = false; + char line[256]; + while (fgets(line, sizeof(line), file) != NULL) { + char* s = trim(line); + if (*s == '\0' || *s == '#' || *s == ';') continue; + if (*s == '[') { + in_env = strcmp(s, "[env]") == 0; + continue; + } + if (!in_env) continue; + + char* eq = strchr(s, '='); + if (eq == NULL) continue; + *eq = '\0'; + char* key = trim(s); + char* raw_value = trim(eq + 1); + apply_env_config_value(env, key, strtof(raw_value, NULL)); + } + + fclose(file); +} + +void demo() { + Bat env; + set_demo_defaults(&env); + load_env_config(&env, BAT_DEMO_CONFIG_PATH); + env.rng = (unsigned int)time(NULL); allocate(&env); env.client = make_client(&env); c_reset(&env); @@ -36,6 +134,8 @@ void demo() { SetTargetFPS(60); while (!WindowShouldClose()) { memset(env.actions, 0, sizeof(float) * BAT_NUM_ACTIONS); + env.actions[0] = BAT_NOOP; + env.actions[1] = BAT_TURN_NONE; if (IsKeyDown(KEY_W)) env.actions[0] = BAT_THRUST_FORWARD; if (IsKeyDown(KEY_S)) env.actions[0] = BAT_BRAKE; if (IsKeyDown(KEY_A) || IsKeyDown(KEY_LEFT)) env.actions[1] = BAT_TURN_LEFT; diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index a2c28fc9af..510b68071a 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -41,22 +41,38 @@ #define BAT_MAX_OBSTACLES 16 #define BAT_TICK_RATE (1.0f/60.0f) -#define BAT_DEFAULT_MAX_STEPS 512 -#define BAT_DEFAULT_MAX_STEPS_INV (1.0f / (float)BAT_DEFAULT_MAX_STEPS) +#define BAT_WIDTH 64 +#define BAT_HEIGHT 64 +#define BAT_RADIUS 2.0f +#define BAT_BUG_RADIUS 1.5f +#define BAT_BUG_SPEED 4.0f +#define BAT_BUG_MANEUVER_START_LEVEL 7 +#define BAT_BUG_MANEUVER_STRENGTH 0.4f +#define BAT_BUG_MANEUVER_FREQUENCY 0.4f +#define BAT_REFLECTOR_SPACING 8.0f +#define BAT_BUG_ECHO_MIN_DISPLACEMENT 1.0f +#define BAT_CURRICULUM_START_OBSTACLES 0 +#define BAT_CURRICULUM_MAX_OBSTACLES 3 +#define BAT_CURRICULUM_BUG_DISTANCE_STEP 2.0f +#define BAT_CURRICULUM_MAX_BUG_DISTANCE 40.0f +#define BAT_CURRICULUM_INBOUND_START_LEVEL 8 +#define BAT_CURRICULUM_INBOUND_MAX_BUG_DISTANCE 56.0f +#define BAT_CURRICULUM_INBOUND_BUG_DISTANCE_STEP 4.0f #define BAT_PI 3.14159265358979323846f #define BAT_TWO_PI (2.0f * BAT_PI) #define BAT_CHIRP_HISTORY 4 #define BAT_CHIRP_RINGS 5 #define BAT_MAX_CHIRP_SLICES 16 #define BAT_ECHO_QUEUE_TICKS 256 +#define BAT_CORNER_REFLECTORS 1 #define BAT_AUDIO_VOICES 8 #define BAT_AUDIO_SAMPLE_RATE 48000 #define BAT_AUDIO_MIN_HZ 600.0f #define BAT_AUDIO_MAX_HZ 3600.0f #define BAT_AUDIO_VOLUME 0.22f #define BAT_RECORD_MAX_VOICES 16 -#define BAT_CHIRP_PERF_REFERENCE_CHIRPS 15.0f #define BAT_CHIRP_PERF_FLOOR 0.05f +#define BAT_CHIRP_COST 0.0f #define BAT_ECHO_STATIC 0 #define BAT_ECHO_BUG 1 @@ -103,7 +119,6 @@ typedef struct Log { float curriculum_perf; float curriculum_distance_difficulty; float curriculum_obstacle_difficulty; - float curriculum_chirp_budget_difficulty; float curriculum_motion_difficulty; float num_obstacles; float chirps_emitted; @@ -161,8 +176,6 @@ typedef struct Bat { int num_agents; int frameskip; - int width; - int height; int tick; int max_steps; int render_target_fps; @@ -171,25 +184,14 @@ typedef struct Bat { int record_video_seconds; int record_video_audio; int num_obstacles; - int curriculum_enabled; int curriculum_level; int curriculum_initial_level; - int curriculum_start_obstacles; - int curriculum_max_obstacles; int curriculum_obstacle_step; int curriculum_successes_per_level; int curriculum_successes_at_level; float curriculum_start_bug_distance; - float curriculum_max_bug_distance; - float curriculum_bug_distance_step; - int curriculum_inbound_start_level; - float curriculum_inbound_max_bug_distance; - float curriculum_inbound_bug_distance_step; float inbound_bug_speed_multiplier; float inbound_heading_noise_degrees; - int bug_maneuver_start_level; - float bug_maneuver_strength; - float bug_maneuver_frequency; float bat_x; float bat_y; @@ -197,7 +199,6 @@ typedef struct Bat { float bat_vy; float bat_heading; float bat_turn_velocity; - float bat_radius; float ear_separation_scale; float ear_rear_gain; float ear_front_gain; @@ -211,8 +212,6 @@ typedef struct Bat { float bug_y; float bug_vx; float bug_vy; - float bug_radius; - float bug_speed; int bug_inbound; int bug_maneuver_mode; float bug_base_heading; @@ -225,17 +224,12 @@ typedef struct Bat { float* obstacle_w; float* obstacle_h; - int freq_bins_per_ear; float max_echo_range; float sound_speed; - float reflector_spacing; - int corner_reflectors; float reflector_strength; int max_chirp_age_ticks; int chirp_cooldown_ticks; int max_chirps_per_episode; - int min_chirps_per_episode; - int chirp_budget_decay_levels; int chirp_budget; int chirp_age_ticks; int last_chirp_tick; @@ -259,7 +253,6 @@ typedef struct Bat { float first_chirp_tick; float chirp_tick_sum; - float chirp_cost; float chirp_efficiency_reward; float valid_chirp_reward; float early_chirp_penalty; @@ -268,7 +261,6 @@ typedef struct Bat { float progress_reward_scale; float bug_echo_reward_scale; float bug_echo_farther_penalty_scale; - float bug_echo_min_displacement; float bug_wing_sideband_gain; float tick_bug_echo_energy; float tick_bug_echo_path; @@ -394,12 +386,12 @@ static inline void bat_sample_in_quadrant(Bat* env, int quadrant, float radius, int east = quadrant & 1; int south = (quadrant >> 1) & 1; float margin = fmaxf(6.0f, radius + 3.0f); - float half_w = env->width * 0.5f; - float half_h = env->height * 0.5f; + float half_w = BAT_WIDTH * 0.5f; + float half_h = BAT_HEIGHT * 0.5f; float min_x = (east ? half_w : 0.0f) + margin; - float max_x = (east ? (float)env->width : half_w) - margin; + float max_x = (east ? (float)BAT_WIDTH : half_w) - margin; float min_y = (south ? half_h : 0.0f) + margin; - float max_y = (south ? (float)env->height : half_h) - margin; + float max_y = (south ? (float)BAT_HEIGHT : half_h) - margin; *x = min_x + bat_randf(env) * (max_x - min_x); *y = min_y + bat_randf(env) * (max_y - min_y); } @@ -407,11 +399,11 @@ static inline void bat_sample_in_quadrant(Bat* env, int quadrant, float radius, static inline void bat_sample_spawns(Bat* env) { int bat_quadrant = (int)(bat_randf(env) * 4.0f); int bug_quadrant = bat_quadrant ^ 3; - float min_sep = fminf(env->width, env->height) * 0.31f; + float min_sep = fminf(BAT_WIDTH, BAT_HEIGHT) * 0.31f; for (int attempt = 0; attempt < 64; attempt++) { - bat_sample_in_quadrant(env, bat_quadrant, env->bat_radius, &env->bat_x, &env->bat_y); - bat_sample_in_quadrant(env, bug_quadrant, env->bug_radius, &env->bug_x, &env->bug_y); + bat_sample_in_quadrant(env, bat_quadrant, BAT_RADIUS, &env->bat_x, &env->bat_y); + bat_sample_in_quadrant(env, bug_quadrant, BAT_BUG_RADIUS, &env->bug_x, &env->bug_y); if (bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y) >= min_sep) { return; } @@ -419,41 +411,39 @@ static inline void bat_sample_spawns(Bat* env) { float qx[4] = {0.25f, 0.75f, 0.25f, 0.75f}; float qy[4] = {0.25f, 0.25f, 0.75f, 0.75f}; - env->bat_x = env->width * qx[bat_quadrant]; - env->bat_y = env->height * qy[bat_quadrant]; - env->bug_x = env->width * qx[bug_quadrant]; - env->bug_y = env->height * qy[bug_quadrant]; + env->bat_x = BAT_WIDTH * qx[bat_quadrant]; + env->bat_y = BAT_HEIGHT * qy[bat_quadrant]; + env->bug_x = BAT_WIDTH * qx[bug_quadrant]; + env->bug_y = BAT_HEIGHT * qy[bug_quadrant]; } static inline int bat_curriculum_obstacles(Bat* env) { - if (!env->curriculum_enabled) return env->num_obstacles; int step = env->curriculum_obstacle_step; - int count = env->curriculum_start_obstacles; + int count = BAT_CURRICULUM_START_OBSTACLES; if (env->curriculum_level > 0) { - count = env->curriculum_start_obstacles + 1 + (env->curriculum_level - 1) / step; + count = BAT_CURRICULUM_START_OBSTACLES + 1 + (env->curriculum_level - 1) / step; } - if (count > env->curriculum_max_obstacles) count = env->curriculum_max_obstacles; + if (count > BAT_CURRICULUM_MAX_OBSTACLES) count = BAT_CURRICULUM_MAX_OBSTACLES; if (count > BAT_MAX_OBSTACLES) count = BAT_MAX_OBSTACLES; return count; } static inline float bat_curriculum_bug_distance(Bat* env) { float distance = env->curriculum_start_bug_distance - + env->curriculum_bug_distance_step * env->curriculum_level; + + BAT_CURRICULUM_BUG_DISTANCE_STEP * env->curriculum_level; return bat_clampf(distance, env->curriculum_start_bug_distance, - env->curriculum_max_bug_distance); + BAT_CURRICULUM_MAX_BUG_DISTANCE); } static inline bool bat_curriculum_inbound_enabled(Bat* env) { - if (!env->curriculum_enabled) return false; - return env->curriculum_level >= env->curriculum_inbound_start_level; + return env->curriculum_level >= BAT_CURRICULUM_INBOUND_START_LEVEL; } static inline float bat_curriculum_inbound_bug_distance(Bat* env) { - float base = env->curriculum_max_bug_distance; - int extra_levels = env->curriculum_level - env->curriculum_inbound_start_level + 1; - float distance = base + env->curriculum_inbound_bug_distance_step * extra_levels; - return bat_clampf(distance, base, env->curriculum_inbound_max_bug_distance); + float base = BAT_CURRICULUM_MAX_BUG_DISTANCE; + int extra_levels = env->curriculum_level - BAT_CURRICULUM_INBOUND_START_LEVEL + 1; + float distance = base + BAT_CURRICULUM_INBOUND_BUG_DISTANCE_STEP * extra_levels; + return bat_clampf(distance, base, BAT_CURRICULUM_INBOUND_MAX_BUG_DISTANCE); } static inline float bat_curriculum_spawn_distance(Bat* env) { @@ -464,7 +454,7 @@ static inline float bat_curriculum_spawn_distance(Bat* env) { } static inline float bat_curriculum_bug_speed(Bat* env) { - float speed = env->bug_speed; + float speed = BAT_BUG_SPEED; if (bat_curriculum_inbound_enabled(env)) { speed *= env->inbound_bug_speed_multiplier; } @@ -472,21 +462,19 @@ static inline float bat_curriculum_bug_speed(Bat* env) { } static inline float bat_curriculum_bug_maneuver_strength(Bat* env) { - if (!env->curriculum_enabled) return 0.0f; - if (env->curriculum_level < env->bug_maneuver_start_level) return 0.0f; - int extra_levels = env->curriculum_level - env->bug_maneuver_start_level; + if (env->curriculum_level < BAT_BUG_MANEUVER_START_LEVEL) return 0.0f; + int extra_levels = env->curriculum_level - BAT_BUG_MANEUVER_START_LEVEL; float ramp = extra_levels <= 0 ? 0.25f : 0.75f + 0.25f * (extra_levels - 1); - return env->bug_maneuver_strength * bat_clampf(ramp, 0.0f, 1.0f); + return BAT_BUG_MANEUVER_STRENGTH * bat_clampf(ramp, 0.0f, 1.0f); } static inline float bat_curriculum_bug_maneuver_frequency(Bat* env) { - if (!env->curriculum_enabled) return env->bug_maneuver_frequency; - if (env->curriculum_level < env->bug_maneuver_start_level) { - return env->bug_maneuver_frequency; + if (env->curriculum_level < BAT_BUG_MANEUVER_START_LEVEL) { + return BAT_BUG_MANEUVER_FREQUENCY; } - int extra_levels = env->curriculum_level - env->bug_maneuver_start_level; + int extra_levels = env->curriculum_level - BAT_BUG_MANEUVER_START_LEVEL; float multiplier = 1.0f + 0.50f * extra_levels; - return env->bug_maneuver_frequency * bat_clampf(multiplier, 1.0f, 2.5f); + return BAT_BUG_MANEUVER_FREQUENCY * bat_clampf(multiplier, 1.0f, 2.5f); } static inline float bat_chirps_used_ratio(Bat* env) { @@ -498,7 +486,8 @@ static inline float bat_chirp_efficiency(Bat* env) { } static inline float bat_chirp_perf(Bat* env) { - float raw = 1.0f - env->chirps_emitted_episode / BAT_CHIRP_PERF_REFERENCE_CHIRPS; + float reference_chirps = fmaxf(1.0f, (float)env->max_chirps_per_episode); + float raw = 1.0f - env->chirps_emitted_episode / reference_chirps; return bat_clampf(raw, BAT_CHIRP_PERF_FLOOR, 1.0f); } @@ -508,22 +497,21 @@ static inline float bat_norm_range(float value, float lo, float hi) { } static inline float bat_curriculum_distance_difficulty(Bat* env) { - float max_distance = fmaxf(env->curriculum_max_bug_distance, - env->curriculum_inbound_max_bug_distance); + float max_distance = fmaxf(BAT_CURRICULUM_MAX_BUG_DISTANCE, + BAT_CURRICULUM_INBOUND_MAX_BUG_DISTANCE); return bat_norm_range(env->start_bug_dist, env->curriculum_start_bug_distance, max_distance); } static inline float bat_curriculum_obstacle_difficulty(Bat* env) { return bat_norm_range((float)env->num_obstacles, - (float)env->curriculum_start_obstacles, (float)env->curriculum_max_obstacles); + (float)BAT_CURRICULUM_START_OBSTACLES, (float)BAT_CURRICULUM_MAX_OBSTACLES); } static inline float bat_curriculum_motion_difficulty(Bat* env) { - if (!env->curriculum_enabled) return 0.0f; - if (env->curriculum_level < env->bug_maneuver_start_level) return 0.0f; - float span = (float)(env->curriculum_inbound_start_level + 4 - env->bug_maneuver_start_level); - return bat_clampf((env->curriculum_level - env->bug_maneuver_start_level + 1) / span, + if (env->curriculum_level < BAT_BUG_MANEUVER_START_LEVEL) return 0.0f; + float span = (float)(BAT_CURRICULUM_INBOUND_START_LEVEL + 4 - BAT_BUG_MANEUVER_START_LEVEL); + return bat_clampf((env->curriculum_level - BAT_BUG_MANEUVER_START_LEVEL + 1) / span, 0.0f, 1.0f); } @@ -532,16 +520,16 @@ static inline float bat_curriculum_difficulty(Bat* env) { float obstacles = bat_curriculum_obstacle_difficulty(env); float active_weight = 0.0f; float weighted = 0.0f; - if (env->curriculum_max_bug_distance > env->curriculum_start_bug_distance) { + if (BAT_CURRICULUM_MAX_BUG_DISTANCE > env->curriculum_start_bug_distance) { weighted += 0.5f * distance; active_weight += 0.5f; } - if (env->curriculum_max_obstacles > env->curriculum_start_obstacles) { + if (BAT_CURRICULUM_MAX_OBSTACLES > BAT_CURRICULUM_START_OBSTACLES) { weighted += 0.5f * obstacles; active_weight += 0.5f; } float motion = bat_curriculum_motion_difficulty(env); - if (env->bug_maneuver_strength > 0.0f) { + if (BAT_BUG_MANEUVER_STRENGTH > 0.0f) { weighted += 0.5f * motion; active_weight += 0.5f; } @@ -583,15 +571,15 @@ static inline void bat_record_chirp_timing(Bat* env) { } static inline void bat_sample_spawns_at_distance(Bat* env, float target_distance) { - float margin = fmaxf(6.0f, fmaxf(env->bat_radius, env->bug_radius) + 3.0f); + float margin = fmaxf(6.0f, fmaxf(BAT_RADIUS, BAT_BUG_RADIUS) + 3.0f); for (int attempt = 0; attempt < 96; attempt++) { float angle = bat_randf(env) * BAT_TWO_PI - BAT_PI; float dx = cosf(angle) * target_distance; float dy = sinf(angle) * target_distance; float min_bat_x = fmaxf(margin, margin - dx); - float max_bat_x = fminf(env->width - margin, env->width - margin - dx); + float max_bat_x = fminf(BAT_WIDTH - margin, BAT_WIDTH - margin - dx); float min_bat_y = fmaxf(margin, margin - dy); - float max_bat_y = fminf(env->height - margin, env->height - margin - dy); + float max_bat_y = fminf(BAT_HEIGHT - margin, BAT_HEIGHT - margin - dy); if (max_bat_x < min_bat_x || max_bat_y < min_bat_y) continue; env->bat_x = min_bat_x + bat_randf(env) * (max_bat_x - min_bat_x); @@ -633,27 +621,23 @@ static inline void bat_reset_bug_motion(Bat* env) { } static inline void bat_apply_curriculum(Bat* env) { - if (env->curriculum_enabled) { - env->num_obstacles = bat_curriculum_obstacles(env); - } + env->num_obstacles = bat_curriculum_obstacles(env); } static inline void bat_advance_curriculum(Bat* env) { - if (env->curriculum_enabled) { - env->curriculum_successes_at_level += 1; - if (env->curriculum_successes_at_level >= env->curriculum_successes_per_level) { - env->curriculum_level += 1; - env->curriculum_successes_at_level = 0; - } + env->curriculum_successes_at_level += 1; + if (env->curriculum_successes_at_level >= env->curriculum_successes_per_level) { + env->curriculum_level += 1; + env->curriculum_successes_at_level = 0; } } static inline bool bat_obstacle_clear(Bat* env, int idx, float x, float y, float w, float h) { - if (bat_circle_rect_collision(env->bat_x, env->bat_y, env->bat_radius + 2.0f, x, y, w, h)) { + if (bat_circle_rect_collision(env->bat_x, env->bat_y, BAT_RADIUS + 2.0f, x, y, w, h)) { return false; } - if (bat_circle_rect_collision(env->bug_x, env->bug_y, env->bug_radius + 2.0f, x, y, w, h)) { + if (bat_circle_rect_collision(env->bug_x, env->bug_y, BAT_BUG_RADIUS + 2.0f, x, y, w, h)) { return false; } for (int j = 0; j < idx; j++) { @@ -672,8 +656,8 @@ static inline void generate_obstacles(Bat* env) { float w = 3.0f + 5.0f * bat_randf(env); float h = 3.0f + 5.0f * bat_randf(env); float margin = 4.0f; - float x = margin + bat_randf(env) * (env->width - w - 2.0f * margin); - float y = margin + bat_randf(env) * (env->height - h - 2.0f * margin); + float x = margin + bat_randf(env) * (BAT_WIDTH - w - 2.0f * margin); + float y = margin + bat_randf(env) * (BAT_HEIGHT - h - 2.0f * margin); if (bat_obstacle_clear(env, i, x, y, w, h)) { env->obstacle_x[i] = x; env->obstacle_y[i] = y; @@ -686,8 +670,8 @@ static inline void generate_obstacles(Bat* env) { if (!placed) { float w = 6.0f; float h = 6.0f; - float x = env->width * (0.30f + 0.20f * (i % 2)) - w * 0.5f; - float y = env->height * (0.30f + 0.20f * ((i + 1) % 2)) - h * 0.5f; + float x = BAT_WIDTH * (0.30f + 0.20f * (i % 2)) - w * 0.5f; + float y = BAT_HEIGHT * (0.30f + 0.20f * ((i + 1) % 2)) - h * 0.5f; env->obstacle_x[i] = x; env->obstacle_y[i] = y; env->obstacle_w[i] = w; @@ -746,7 +730,6 @@ static inline void add_log(Bat* env, float success, float collision, float timeo env->log.curriculum_perf += success * curriculum_difficulty; env->log.curriculum_distance_difficulty += distance_difficulty; env->log.curriculum_obstacle_difficulty += obstacle_difficulty; - env->log.curriculum_chirp_budget_difficulty += 0.0f; env->log.curriculum_motion_difficulty += motion_difficulty; env->log.num_obstacles += env->num_obstacles; env->log.chirps_emitted += env->chirps_emitted_episode; @@ -783,7 +766,8 @@ static inline void add_log(Bat* env, float success, float collision, float timeo } static inline int bat_freq_bin_index(Bat* env, float freq_norm) { - int bins = env->freq_bins_per_ear; + (void)env; + int bins = BAT_FREQ_BINS; int bin = (int)(freq_norm * bins); if (bin >= bins) bin = bins - 1; return bin; @@ -820,10 +804,9 @@ static inline void bat_add_echo_event(Bat* env, int ear, float receive_tick, bucket->energy[ear_idx][bin] += intensity; if (source == BAT_ECHO_BUG) { float sideband = intensity * env->bug_wing_sideband_gain; - int bins = env->freq_bins_per_ear; if (sideband > 0.000001f) { if (bin > 0) bucket->energy[ear_idx][bin - 1] += sideband; - if (bin + 1 < bins) bucket->energy[ear_idx][bin + 1] += sideband; + if (bin + 1 < BAT_FREQ_BINS) bucket->energy[ear_idx][bin + 1] += sideband; } bucket->bug_energy += intensity; if (bucket->bug_path < 0.0f || path < bucket->bug_path) { @@ -836,7 +819,7 @@ static inline void bat_ear_positions(Bat* env, float* left_x, float* left_y, float* right_x, float* right_y) { float lx = -sinf(env->bat_heading); float ly = cosf(env->bat_heading); - float ear_sep = env->bat_radius * env->ear_separation_scale; + float ear_sep = BAT_RADIUS * env->ear_separation_scale; *left_x = env->bat_x - lx * ear_sep * 0.5f; *left_y = env->bat_y - ly * ear_sep * 0.5f; *right_x = env->bat_x + lx * ear_sep * 0.5f; @@ -907,7 +890,7 @@ static inline void bat_schedule_echo(Bat* env, ChirpEvent* chirp, float rel_vx = rvx - env->bat_vx; float rel_vy = rvy - env->bat_vy; float distance_rate = rel_vx * ux + rel_vy * uy; - float doppler = bat_clampf(-distance_rate / (env->bat_max_speed + env->bug_speed), -1.0f, 1.0f); + float doppler = bat_clampf(-distance_rate / (env->bat_max_speed + BAT_BUG_SPEED), -1.0f, 1.0f); float shifted_freq = bat_clampf(freq + 0.20f * doppler, 0.0f, 1.0f); if (left_path <= env->max_echo_range) { @@ -926,7 +909,7 @@ static inline void bat_schedule_segment_reflectors(Bat* env, ChirpEvent* chirp, float slice_ticks, float freq, float x1, float y1, float x2, float y2, float strength) { float len = bat_dist(x1, y1, x2, y2); - int count = (int)(len / env->reflector_spacing) + 1; + int count = (int)(len / BAT_REFLECTOR_SPACING) + 1; if (count < 1) count = 1; for (int i = 0; i <= count; i++) { float t = count == 0 ? 0.0f : i / (float)count; @@ -938,9 +921,9 @@ static inline void bat_schedule_segment_reflectors(Bat* env, ChirpEvent* chirp, static inline void bat_schedule_corner_reflector_echoes(Bat* env, ChirpEvent* chirp, float slice_ticks, float freq) { - if (!env->corner_reflectors) return; - float w = (float)env->width; - float h = (float)env->height; +#if BAT_CORNER_REFLECTORS + float w = (float)BAT_WIDTH; + float h = (float)BAT_HEIGHT; float strength = env->reflector_strength; bat_schedule_echo(env, chirp, slice_ticks, freq, 0.0f, 0.0f, 0.0f, 0.0f, strength, BAT_ECHO_STATIC); @@ -958,6 +941,12 @@ static inline void bat_schedule_corner_reflector_echoes(Bat* env, ChirpEvent* ch 0.0f, 0.0f, strength, BAT_ECHO_STATIC); bat_schedule_echo(env, chirp, slice_ticks, freq, w, 0.5f * h, 0.0f, 0.0f, strength, BAT_ECHO_STATIC); +#else + (void)env; + (void)chirp; + (void)slice_ticks; + (void)freq; +#endif } static inline void bat_schedule_obstacle_echoes(Bat* env, ChirpEvent* chirp, @@ -992,13 +981,13 @@ static inline void bat_schedule_chirp_slice_echoes(Bat* env, ChirpEvent* chirp, bat_schedule_echo(env, &slice_chirp, slice_ticks, freq, env->bug_x, env->bug_y, env->bug_vx, env->bug_vy, 8.0f, BAT_ECHO_BUG); bat_schedule_segment_reflectors(env, &slice_chirp, slice_ticks, freq, - 0.0f, 0.0f, (float)env->width, 0.0f, 0.12f); + 0.0f, 0.0f, (float)BAT_WIDTH, 0.0f, 0.12f); bat_schedule_segment_reflectors(env, &slice_chirp, slice_ticks, freq, - 0.0f, (float)env->height, (float)env->width, (float)env->height, 0.12f); + 0.0f, (float)BAT_HEIGHT, (float)BAT_WIDTH, (float)BAT_HEIGHT, 0.12f); bat_schedule_segment_reflectors(env, &slice_chirp, slice_ticks, freq, - 0.0f, 0.0f, 0.0f, (float)env->height, 0.12f); + 0.0f, 0.0f, 0.0f, (float)BAT_HEIGHT, 0.12f); bat_schedule_segment_reflectors(env, &slice_chirp, slice_ticks, freq, - (float)env->width, 0.0f, (float)env->width, (float)env->height, 0.12f); + (float)BAT_WIDTH, 0.0f, (float)BAT_WIDTH, (float)BAT_HEIGHT, 0.12f); bat_schedule_corner_reflector_echoes(env, &slice_chirp, slice_ticks, freq); for (int j = 0; j < env->num_obstacles; j++) { bat_schedule_obstacle_echoes(env, &slice_chirp, slice_ticks, freq, j); @@ -1070,9 +1059,7 @@ void compute_observations(Bat* env) { float fwd_speed = env->bat_vx * cosf(env->bat_heading) + env->bat_vy * sinf(env->bat_heading); env->observations[BAT_FORWARD_SPEED_OBS] = bat_clampf(fwd_speed / env->bat_max_speed, 0.0f, 1.0f); env->observations[BAT_TURN_RATE_OBS] = bat_clampf(env->bat_turn_velocity / env->bat_turn_rate, -1.0f, 1.0f); - float timer_norm = env->max_steps == BAT_DEFAULT_MAX_STEPS - ? env->tick * BAT_DEFAULT_MAX_STEPS_INV - : env->tick / (float)env->max_steps; + float timer_norm = env->tick / (float)env->max_steps; env->observations[40] = bat_clampf(timer_norm, 0.0f, 1.0f); } @@ -1083,15 +1070,11 @@ static inline void bat_reset_episode(Bat* env) { float initial_speed = env->bat_min_speed; env->bat_vx = cosf(env->bat_heading) * initial_speed; env->bat_vy = sinf(env->bat_heading) * initial_speed; - if (env->curriculum_enabled && env->curriculum_level < env->curriculum_initial_level) { + if (env->curriculum_level < env->curriculum_initial_level) { env->curriculum_level = env->curriculum_initial_level; } bat_apply_curriculum(env); - if (env->curriculum_enabled) { - bat_sample_spawns_at_distance(env, bat_curriculum_spawn_distance(env)); - } else { - bat_sample_spawns(env); - } + bat_sample_spawns_at_distance(env, bat_curriculum_spawn_distance(env)); generate_obstacles(env); bat_reset_bug_motion(env); env->last_chirp_start_freq = 0.0f; @@ -1135,7 +1118,7 @@ void c_reset(Bat* env) { static inline bool bat_hits_obstacle(Bat* env) { for (int i = 0; i < env->num_obstacles; i++) { - if (bat_circle_rect_collision(env->bat_x, env->bat_y, env->bat_radius, + if (bat_circle_rect_collision(env->bat_x, env->bat_y, BAT_RADIUS, env->obstacle_x[i], env->obstacle_y[i], env->obstacle_w[i], env->obstacle_h[i])) { return true; } @@ -1144,10 +1127,10 @@ static inline bool bat_hits_obstacle(Bat* env) { } static inline bool bat_hits_wall(Bat* env) { - return env->bat_x - env->bat_radius < 0.0f || - env->bat_x + env->bat_radius > env->width || - env->bat_y - env->bat_radius < 0.0f || - env->bat_y + env->bat_radius > env->height; + return env->bat_x - BAT_RADIUS < 0.0f || + env->bat_x + BAT_RADIUS > BAT_WIDTH || + env->bat_y - BAT_RADIUS < 0.0f || + env->bat_y + BAT_RADIUS > BAT_HEIGHT; } static inline void bat_update_bug(Bat* env, float dt) { @@ -1196,23 +1179,23 @@ static inline void bat_update_bug(Bat* env, float dt) { env->bug_x += env->bug_vx * dt; env->bug_y += env->bug_vy * dt; bool bounced = false; - if (env->bug_x - env->bug_radius < 0.0f) { - env->bug_x = env->bug_radius; + if (env->bug_x - BAT_BUG_RADIUS < 0.0f) { + env->bug_x = BAT_BUG_RADIUS; env->bug_vx = fabsf(env->bug_vx); bounced = true; } - if (env->bug_x + env->bug_radius > env->width) { - env->bug_x = env->width - env->bug_radius; + if (env->bug_x + BAT_BUG_RADIUS > BAT_WIDTH) { + env->bug_x = BAT_WIDTH - BAT_BUG_RADIUS; env->bug_vx = -fabsf(env->bug_vx); bounced = true; } - if (env->bug_y - env->bug_radius < 0.0f) { - env->bug_y = env->bug_radius; + if (env->bug_y - BAT_BUG_RADIUS < 0.0f) { + env->bug_y = BAT_BUG_RADIUS; env->bug_vy = fabsf(env->bug_vy); bounced = true; } - if (env->bug_y + env->bug_radius > env->height) { - env->bug_y = env->height - env->bug_radius; + if (env->bug_y + BAT_BUG_RADIUS > BAT_HEIGHT) { + env->bug_y = BAT_HEIGHT - BAT_BUG_RADIUS; env->bug_vy = -fabsf(env->bug_vy); bounced = true; } @@ -1321,7 +1304,7 @@ static inline int bat_update_chirp(Bat* env) { } static inline bool bat_caught_bug(Bat* env) { - return bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y) <= env->bat_radius + env->bug_radius; + return bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y) <= BAT_RADIUS + BAT_BUG_RADIUS; } void c_step(Bat* env) { @@ -1379,6 +1362,7 @@ void c_step(Bat* env) { env->rewards[0] -= env->step_cost; if (chirp_status > 0) { env->rewards[0] += env->valid_chirp_reward; + env->rewards[0] -= BAT_CHIRP_COST; if (chirp_overlap_fraction > 0.0f) { env->rewards[0] -= env->chirp_overlap_penalty * chirp_overlap_fraction; env->chirps_overlapped += 1; @@ -1402,7 +1386,7 @@ void c_step(Bat* env) { if (env->last_bug_echo_path > 0.0f) { float bat_echo_displacement = bat_dist(env->last_bug_echo_bat_x, env->last_bug_echo_bat_y, env->bat_x, env->bat_y); - if (bat_echo_displacement >= env->bug_echo_min_displacement) { + if (bat_echo_displacement >= BAT_BUG_ECHO_MIN_DISPLACEMENT) { float echo_progress = (env->last_bug_echo_path - env->tick_bug_echo_path) / env->max_echo_range; if (echo_progress > 0.0f) { @@ -1488,7 +1472,7 @@ static inline void bat_draw_echo_flash(Bat* env, ChirpEvent* chirp, float rel_vx = rvx - env->bat_vx; float rel_vy = rvy - env->bat_vy; float distance_rate = rel_vx * ux + rel_vy * uy; - float doppler = bat_clampf(-distance_rate / (env->bat_max_speed + env->bug_speed), -1.0f, 1.0f); + float doppler = bat_clampf(-distance_rate / (env->bat_max_speed + BAT_BUG_SPEED), -1.0f, 1.0f); float amp = strength / (1.0f + 0.02f * distance * distance); float alpha = bat_clampf(0.20f + amp * 2.0f, 0.20f, 0.90f); Color color = bat_doppler_ray_color(doppler, alpha); @@ -1503,7 +1487,7 @@ static inline void bat_draw_segment_echoes(Bat* env, ChirpEvent* chirp, float x1, float y1, float x2, float y2, float strength, float sx, float sy) { float len = bat_dist(x1, y1, x2, y2); - int count = (int)(len / env->reflector_spacing) + 1; + int count = (int)(len / BAT_REFLECTOR_SPACING) + 1; if (count < 1) count = 1; for (int i = 0; i <= count; i++) { float t = i / (float)count; @@ -1527,9 +1511,9 @@ static inline void bat_draw_obstacle_echoes(Bat* env, ChirpEvent* chirp, static inline void bat_draw_corner_reflector_echoes(Bat* env, ChirpEvent* chirp, float sx, float sy) { - if (!env->corner_reflectors) return; - float w = (float)env->width; - float h = (float)env->height; +#if BAT_CORNER_REFLECTORS + float w = (float)BAT_WIDTH; + float h = (float)BAT_HEIGHT; float strength = env->reflector_strength; bat_draw_echo_flash(env, chirp, 0.0f, 0.0f, 0.0f, 0.0f, strength, sx, sy); bat_draw_echo_flash(env, chirp, w, 0.0f, 0.0f, 0.0f, strength, sx, sy); @@ -1539,10 +1523,16 @@ static inline void bat_draw_corner_reflector_echoes(Bat* env, ChirpEvent* chirp, bat_draw_echo_flash(env, chirp, 0.5f * w, h, 0.0f, 0.0f, strength, sx, sy); bat_draw_echo_flash(env, chirp, 0.0f, 0.5f * h, 0.0f, 0.0f, strength, sx, sy); bat_draw_echo_flash(env, chirp, w, 0.5f * h, 0.0f, 0.0f, strength, sx, sy); +#else + (void)env; + (void)chirp; + (void)sx; + (void)sy; +#endif } static inline void bat_draw_corner_reflector_markers(Bat* env) { - if (!env->corner_reflectors) return; +#if BAT_CORNER_REFLECTORS const int size = 8; const Color fill = (Color){128, 128, 132, 255}; const Color outline = (Color){202, 202, 208, 255}; @@ -1566,6 +1556,9 @@ static inline void bat_draw_corner_reflector_markers(Bat* env) { DrawRectangleLines(0, mid_y, size, size, outline); DrawRectangle(max_x, mid_y, size, size, fill); DrawRectangleLines(max_x, mid_y, size, size, outline); +#else + (void)env; +#endif } static inline void bat_draw_echo_reflections(Bat* env, float sx, float sy) { @@ -1574,10 +1567,10 @@ static inline void bat_draw_echo_reflections(Bat* env, float sx, float sy) { if (!chirp->active) continue; bat_draw_echo_flash(env, chirp, env->bug_x, env->bug_y, env->bug_vx, env->bug_vy, 4.0f, sx, sy); - bat_draw_segment_echoes(env, chirp, 0.0f, 0.0f, (float)env->width, 0.0f, 0.18f, sx, sy); - bat_draw_segment_echoes(env, chirp, 0.0f, (float)env->height, (float)env->width, (float)env->height, 0.18f, sx, sy); - bat_draw_segment_echoes(env, chirp, 0.0f, 0.0f, 0.0f, (float)env->height, 0.18f, sx, sy); - bat_draw_segment_echoes(env, chirp, (float)env->width, 0.0f, (float)env->width, (float)env->height, 0.18f, sx, sy); + bat_draw_segment_echoes(env, chirp, 0.0f, 0.0f, (float)BAT_WIDTH, 0.0f, 0.18f, sx, sy); + bat_draw_segment_echoes(env, chirp, 0.0f, (float)BAT_HEIGHT, (float)BAT_WIDTH, (float)BAT_HEIGHT, 0.18f, sx, sy); + bat_draw_segment_echoes(env, chirp, 0.0f, 0.0f, 0.0f, (float)BAT_HEIGHT, 0.18f, sx, sy); + bat_draw_segment_echoes(env, chirp, (float)BAT_WIDTH, 0.0f, (float)BAT_WIDTH, (float)BAT_HEIGHT, 0.18f, sx, sy); bat_draw_corner_reflector_echoes(env, chirp, sx, sy); for (int j = 0; j < env->num_obstacles; j++) { bat_draw_obstacle_echoes(env, chirp, j, sx, sy); @@ -1589,8 +1582,8 @@ static inline void bat_draw_echo_reflections(Bat* env, float sx, float sy) { Client* make_client(Bat* env) { Client* client = (Client*)calloc(1, sizeof(Client)); - client->width = env->width * 10; - client->height = env->height * 10; + client->width = BAT_WIDTH * 10; + client->height = BAT_HEIGHT * 10; InitWindow(client->width, client->height, "Bat"); int target_fps = env->render_target_fps; if (target_fps > 0) { @@ -1622,8 +1615,8 @@ void c_render(Bat* env) { env->client = make_client(env); } bat_play_chirp_audio(env); - float sx = env->client->width / (float)env->width; - float sy = env->client->height / (float)env->height; + float sx = env->client->width / (float)BAT_WIDTH; + float sy = env->client->height / (float)BAT_HEIGHT; BeginDrawing(); ClearBackground((Color){18, 20, 24, 255}); bat_draw_chirp_rings(env, sx, sy); @@ -1639,11 +1632,11 @@ void c_render(Bat* env) { } bat_draw_corner_reflector_markers(env); DrawCircle((int)(env->bug_x * sx), (int)(env->bug_y * sy), - env->bug_radius * sx, GREEN); + BAT_BUG_RADIUS * sx, GREEN); DrawCircle((int)(env->bat_x * sx), (int)(env->bat_y * sy), - env->bat_radius * sx, BLUE); - float hx = env->bat_x + cosf(env->bat_heading) * env->bat_radius * 2.0f; - float hy = env->bat_y + sinf(env->bat_heading) * env->bat_radius * 2.0f; + BAT_RADIUS * sx, BLUE); + float hx = env->bat_x + cosf(env->bat_heading) * BAT_RADIUS * 2.0f; + float hy = env->bat_y + sinf(env->bat_heading) * BAT_RADIUS * 2.0f; DrawLine((int)(env->bat_x * sx), (int)(env->bat_y * sy), (int)(hx * sx), (int)(hy * sy), WHITE); int cooldown = env->chirp_cooldown_ticks - (env->tick - env->last_chirp_tick); DrawText(TextFormat("reward %.3f tick %d chirps %d cooldown %d ESC exits", env->rewards[0], env->tick, diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index 2948d99988..dafb7b9d89 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -10,60 +10,38 @@ void my_init(Env* env, Dict* kwargs) { env->num_agents = 1; env->frameskip = dict_get(kwargs, "frameskip")->value; - env->width = dict_get(kwargs, "width")->value; - env->height = dict_get(kwargs, "height")->value; - env->num_obstacles = dict_get(kwargs, "num_obstacles")->value; - env->bat_radius = dict_get(kwargs, "bat_radius")->value; env->ear_separation_scale = dict_get(kwargs, "ear_separation_scale")->value; env->ear_rear_gain = dict_get(kwargs, "ear_rear_gain")->value; env->ear_front_gain = dict_get(kwargs, "ear_front_gain")->value; env->ear_side_gain = dict_get(kwargs, "ear_side_gain")->value; - env->bug_radius = dict_get(kwargs, "bug_radius")->value; env->bat_max_speed = dict_get(kwargs, "bat_max_speed")->value; env->bat_min_speed = dict_get(kwargs, "bat_min_speed")->value; env->bat_accel = dict_get(kwargs, "bat_accel")->value; env->bat_turn_rate = dict_get(kwargs, "bat_turn_rate")->value; - env->bug_speed = dict_get(kwargs, "bug_speed")->value; env->max_steps = dict_get(kwargs, "max_steps")->value; env->render_target_fps = dict_get(kwargs, "render_target_fps")->value; env->record_video = dict_get(kwargs, "record_video")->value; env->record_video_fps = dict_get(kwargs, "record_video_fps")->value; env->record_video_seconds = dict_get(kwargs, "record_video_seconds")->value; env->record_video_audio = dict_get(kwargs, "record_video_audio")->value; - env->curriculum_enabled = dict_get(kwargs, "curriculum_enabled")->value; env->curriculum_initial_level = dict_get(kwargs, "curriculum_initial_level")->value; - env->curriculum_start_obstacles = dict_get(kwargs, "curriculum_start_obstacles")->value; - env->curriculum_max_obstacles = dict_get(kwargs, "curriculum_max_obstacles")->value; env->curriculum_obstacle_step = dict_get(kwargs, "curriculum_obstacle_step")->value; env->curriculum_successes_per_level = dict_get(kwargs, "curriculum_successes_per_level")->value; env->curriculum_start_bug_distance = dict_get(kwargs, "curriculum_start_bug_distance")->value; - env->curriculum_max_bug_distance = dict_get(kwargs, "curriculum_max_bug_distance")->value; - env->curriculum_bug_distance_step = dict_get(kwargs, "curriculum_bug_distance_step")->value; - env->curriculum_inbound_start_level = dict_get(kwargs, "curriculum_inbound_start_level")->value; - env->curriculum_inbound_max_bug_distance = dict_get(kwargs, "curriculum_inbound_max_bug_distance")->value; - env->curriculum_inbound_bug_distance_step = dict_get(kwargs, "curriculum_inbound_bug_distance_step")->value; env->inbound_bug_speed_multiplier = dict_get(kwargs, "inbound_bug_speed_multiplier")->value; env->inbound_heading_noise_degrees = dict_get(kwargs, "inbound_heading_noise_degrees")->value; - env->bug_maneuver_start_level = dict_get(kwargs, "bug_maneuver_start_level")->value; - env->bug_maneuver_strength = dict_get(kwargs, "bug_maneuver_strength")->value; - env->bug_maneuver_frequency = dict_get(kwargs, "bug_maneuver_frequency")->value; - env->freq_bins_per_ear = dict_get(kwargs, "freq_bins_per_ear")->value; env->max_echo_range = dict_get(kwargs, "max_echo_range")->value; env->sound_speed = dict_get(kwargs, "sound_speed")->value; - env->reflector_spacing = dict_get(kwargs, "reflector_spacing")->value; - env->corner_reflectors = dict_get(kwargs, "corner_reflectors")->value; env->reflector_strength = dict_get(kwargs, "reflector_strength")->value; env->max_chirp_age_ticks = dict_get(kwargs, "max_chirp_age_ticks")->value; env->chirp_cooldown_ticks = dict_get(kwargs, "chirp_cooldown_ticks")->value; env->max_chirps_per_episode = dict_get(kwargs, "max_chirps_per_episode")->value; - env->chirp_cost = dict_get(kwargs, "chirp_cost")->value; env->chirp_efficiency_reward = dict_get(kwargs, "chirp_efficiency_reward")->value; env->valid_chirp_reward = dict_get(kwargs, "valid_chirp_reward")->value; env->early_chirp_penalty = dict_get(kwargs, "early_chirp_penalty")->value; env->chirp_overlap_penalty = dict_get(kwargs, "chirp_overlap_penalty")->value; env->bug_echo_reward_scale = dict_get(kwargs, "bug_echo_reward_scale")->value; env->bug_echo_farther_penalty_scale = dict_get(kwargs, "bug_echo_farther_penalty_scale")->value; - env->bug_echo_min_displacement = dict_get(kwargs, "bug_echo_min_displacement")->value; env->bug_wing_sideband_gain = dict_get(kwargs, "bug_wing_sideband_gain")->value; env->step_cost = dict_get(kwargs, "step_cost")->value; env->progress_reward_scale = dict_get(kwargs, "progress_reward_scale")->value; @@ -84,7 +62,6 @@ void my_log(Log* log, Dict* out) { dict_set(out, "curriculum_perf", log->curriculum_perf); dict_set(out, "curriculum_distance_difficulty", log->curriculum_distance_difficulty); dict_set(out, "curriculum_obstacle_difficulty", log->curriculum_obstacle_difficulty); - dict_set(out, "curriculum_chirp_budget_difficulty", log->curriculum_chirp_budget_difficulty); dict_set(out, "curriculum_motion_difficulty", log->curriculum_motion_difficulty); dict_set(out, "num_obstacles", log->num_obstacles); dict_set(out, "chirps_emitted", log->chirps_emitted); From 5dd5b191a069bb7d5eeb38d1f31934610751af46 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Thu, 11 Jun 2026 15:44:16 -0700 Subject: [PATCH 37/51] Hardcode bat episode constants --- BAT_CURRICULUM.md | 2 +- BAT_SPEC.md | 19 +++++++---------- config/bat.ini | 6 ------ ocean/bat/bat.c | 14 +------------ ocean/bat/bat.h | 51 +++++++++++++++++++++++---------------------- ocean/bat/binding.c | 8 +------ 6 files changed, 36 insertions(+), 64 deletions(-) diff --git a/BAT_CURRICULUM.md b/BAT_CURRICULUM.md index 8459ee6037..b41331b9bb 100644 --- a/BAT_CURRICULUM.md +++ b/BAT_CURRICULUM.md @@ -421,7 +421,7 @@ half distance, half clutter. Current chirp handling: ```text -max_chirps_per_episode = 15 +BAT_MAX_CHIRPS_PER_EPISODE = 15 chirp_budget does not decrease with curriculum level chirp_budget_difficulty is removed from curriculum difficulty ``` diff --git a/BAT_SPEC.md b/BAT_SPEC.md index a165f946e9..1d1218aef9 100644 --- a/BAT_SPEC.md +++ b/BAT_SPEC.md @@ -246,7 +246,7 @@ Observation layout: 8. `chirps_used_norm = chirps_used / chirp_budget` 9. `forward_speed_norm` 10. `turn_rate_norm` -11. `timer_norm = elapsed_steps / max_steps`, clamped to `[0, 1]` +11. `timer_norm = elapsed_steps / BAT_MAX_STEPS`, clamped to `[0, 1]` Initial observation size: @@ -255,10 +255,8 @@ Initial observation size: Timer normalization: - The timer starts at `0.0` on reset. -- With the default `max_steps = 512`, after step `N` the observation is - `N * BAT_DEFAULT_MAX_STEPS_INV`, where - `BAT_DEFAULT_MAX_STEPS_INV = 1.0 / 512.0`. -- Non-default `max_steps` values use `N / max_steps`. +- With `BAT_MAX_STEPS = 512`, after step `N` the observation is + `N / 512.0`. - The observed timer is clamped to `[0.0, 1.0]`. Echo bins: @@ -281,7 +279,7 @@ Echo timing: - On each tick, all events arriving in that tick window are summed into the corresponding ear frequency bins. - Multiple reflectors can contribute to the same bin on the same tick. -- Echoes beyond `max_echo_range` are ignored. +- Echoes beyond `BAT_MAX_ECHO_RANGE` are ignored. - Implementation should use a fixed future-tick accumulator, not a full active event scan every env step. The current design buckets each echo by `ceil(receive_tick)` into `BAT_ECHO_QUEUE_TICKS = 256`, sums by @@ -373,11 +371,11 @@ Progress reward: chirp is emitted before the previous chirp's expected bug reflection has returned - `reward += chirp_efficiency_reward * chirp_efficiency` on catch - - `reward += bug_echo_reward_scale * echo_path_reduction / max_echo_range` + - `reward += bug_echo_reward_scale * echo_path_reduction / BAT_MAX_ECHO_RANGE` when a returning bug echo indicates the bug is closer than the previous bug echo and the bat has moved enough since that previous echo - `reward -= bug_echo_reward_scale * bug_echo_farther_penalty_scale * - echo_path_increase / max_echo_range` when a later moved-enough bug echo is + echo_path_increase / BAT_MAX_ECHO_RANGE` when a later moved-enough bug echo is farther away - Default starting values: - `progress_reward_scale = 0.05` @@ -397,7 +395,7 @@ Termination: - Success: bat catches bug. - Failure: bat collides with a wall or obstacle. - Failure: bat attempts to chirp after exhausting the chirp budget. -- Timeout: `tick >= max_steps`. +- Timeout: `tick >= BAT_MAX_STEPS`. Reset: @@ -490,10 +488,7 @@ Config knobs: - `bat_min_speed` - `bat_accel` - `bat_turn_rate` -- `max_steps` -- `max_echo_range` - `sound_speed` -- `max_chirps_per_episode` - `chirp_cooldown_ticks` - `chirp_freq_bins` - `chirp_duration_bins` diff --git a/config/bat.ini b/config/bat.ini index 3b0f31b555..9909a46a81 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -22,7 +22,6 @@ bat_max_speed = 15.498233877318418 bat_min_speed = 2.6389946132676654 bat_accel = 53.02330161128345 bat_turn_rate = 8.371655963408276 -max_steps = 512 render_target_fps = 60 record_video = 0 record_video_fps = 30 @@ -40,11 +39,6 @@ ear_rear_gain = 0.22038613968607276 ear_front_gain = 0.6419214149115183 ear_side_gain = 0.28043867572747055 early_chirp_penalty = 0.006 -inbound_bug_speed_multiplier = 1.75 -inbound_heading_noise_degrees = 18 -max_chirp_age_ticks = 30 -max_chirps_per_episode = 15 -max_echo_range = 128 progress_reward_scale = 0.12 reflector_strength = 0.6 sound_speed = 180.0 diff --git a/ocean/bat/bat.c b/ocean/bat/bat.c index 8c65353527..1dc20a3b44 100644 --- a/ocean/bat/bat.c +++ b/ocean/bat/bat.c @@ -15,13 +15,12 @@ static char* trim(char* s) { static void set_demo_defaults(Bat* env) { *env = (Bat){ - .num_agents = 1, + .num_agents = BAT_NUM_AGENTS, .frameskip = 1, .bat_max_speed = 15.498233877318418f, .bat_min_speed = 2.6389946132676654f, .bat_accel = 53.02330161128345f, .bat_turn_rate = 8.371655963408276f, - .max_steps = 512, .render_target_fps = 60, .record_video = 0, .record_video_fps = 30, @@ -39,11 +38,6 @@ static void set_demo_defaults(Bat* env) { .ear_front_gain = 0.6419214149115183f, .ear_side_gain = 0.28043867572747055f, .early_chirp_penalty = 0.006f, - .inbound_bug_speed_multiplier = 1.75f, - .inbound_heading_noise_degrees = 18.0f, - .max_chirp_age_ticks = 30, - .max_chirps_per_episode = 15, - .max_echo_range = 128.0f, .progress_reward_scale = 0.12f, .reflector_strength = 0.6f, .sound_speed = 180.0f, @@ -62,7 +56,6 @@ static void apply_env_config_value(Bat* env, const char* key, float value) { else if (strcmp(key, "bat_min_speed") == 0) env->bat_min_speed = value; else if (strcmp(key, "bat_accel") == 0) env->bat_accel = value; else if (strcmp(key, "bat_turn_rate") == 0) env->bat_turn_rate = value; - else if (strcmp(key, "max_steps") == 0) env->max_steps = (int)value; else if (strcmp(key, "render_target_fps") == 0) env->render_target_fps = (int)value; else if (strcmp(key, "record_video") == 0) env->record_video = (int)value; else if (strcmp(key, "record_video_fps") == 0) env->record_video_fps = (int)value; @@ -80,11 +73,6 @@ static void apply_env_config_value(Bat* env, const char* key, float value) { else if (strcmp(key, "ear_front_gain") == 0) env->ear_front_gain = value; else if (strcmp(key, "ear_side_gain") == 0) env->ear_side_gain = value; else if (strcmp(key, "early_chirp_penalty") == 0) env->early_chirp_penalty = value; - else if (strcmp(key, "inbound_bug_speed_multiplier") == 0) env->inbound_bug_speed_multiplier = value; - else if (strcmp(key, "inbound_heading_noise_degrees") == 0) env->inbound_heading_noise_degrees = value; - else if (strcmp(key, "max_chirp_age_ticks") == 0) env->max_chirp_age_ticks = (int)value; - else if (strcmp(key, "max_chirps_per_episode") == 0) env->max_chirps_per_episode = (int)value; - else if (strcmp(key, "max_echo_range") == 0) env->max_echo_range = value; else if (strcmp(key, "progress_reward_scale") == 0) env->progress_reward_scale = value; else if (strcmp(key, "reflector_strength") == 0) env->reflector_strength = value; else if (strcmp(key, "sound_speed") == 0) env->sound_speed = value; diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 510b68071a..1aac0ff0e2 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -12,6 +12,7 @@ #endif #define BAT_OBS_SIZE 41 +#define BAT_NUM_AGENTS 1 #define BAT_NUM_ACTIONS 6 #define BAT_MOVE_ACTIONS 3 #define BAT_TURN_ACTIONS 3 @@ -40,6 +41,7 @@ #define BAT_TURN_RIGHT 2 #define BAT_MAX_OBSTACLES 16 +#define BAT_MAX_STEPS 512 #define BAT_TICK_RATE (1.0f/60.0f) #define BAT_WIDTH 64 #define BAT_HEIGHT 64 @@ -49,7 +51,10 @@ #define BAT_BUG_MANEUVER_START_LEVEL 7 #define BAT_BUG_MANEUVER_STRENGTH 0.4f #define BAT_BUG_MANEUVER_FREQUENCY 0.4f +#define BAT_INBOUND_BUG_SPEED_MULTIPLIER 1.75f +#define BAT_INBOUND_HEADING_NOISE_DEGREES 18.0f #define BAT_REFLECTOR_SPACING 8.0f +#define BAT_MAX_ECHO_RANGE 128.0f #define BAT_BUG_ECHO_MIN_DISPLACEMENT 1.0f #define BAT_CURRICULUM_START_OBSTACLES 0 #define BAT_CURRICULUM_MAX_OBSTACLES 3 @@ -73,6 +78,8 @@ #define BAT_RECORD_MAX_VOICES 16 #define BAT_CHIRP_PERF_FLOOR 0.05f #define BAT_CHIRP_COST 0.0f +#define BAT_MAX_CHIRP_AGE_TICKS 30 +#define BAT_MAX_CHIRPS_PER_EPISODE 15 #define BAT_ECHO_STATIC 0 #define BAT_ECHO_BUG 1 @@ -177,7 +184,6 @@ typedef struct Bat { int frameskip; int tick; - int max_steps; int render_target_fps; int record_video; int record_video_fps; @@ -190,8 +196,6 @@ typedef struct Bat { int curriculum_successes_per_level; int curriculum_successes_at_level; float curriculum_start_bug_distance; - float inbound_bug_speed_multiplier; - float inbound_heading_noise_degrees; float bat_x; float bat_y; @@ -224,12 +228,9 @@ typedef struct Bat { float* obstacle_w; float* obstacle_h; - float max_echo_range; float sound_speed; float reflector_strength; - int max_chirp_age_ticks; int chirp_cooldown_ticks; - int max_chirps_per_episode; int chirp_budget; int chirp_age_ticks; int last_chirp_tick; @@ -340,7 +341,7 @@ static inline float bat_echo_time_seconds(float distance, float sound_speed) { } static inline float bat_chirp_age_norm_denominator(Bat* env) { - float travel_ticks = env->max_echo_range / env->sound_speed / BAT_TICK_RATE; + float travel_ticks = BAT_MAX_ECHO_RANGE / env->sound_speed / BAT_TICK_RATE; float chirp_ticks = bat_chirp_duration_seconds(1.0f) / BAT_TICK_RATE; return 1.25f * (travel_ticks + chirp_ticks); } @@ -456,7 +457,7 @@ static inline float bat_curriculum_spawn_distance(Bat* env) { static inline float bat_curriculum_bug_speed(Bat* env) { float speed = BAT_BUG_SPEED; if (bat_curriculum_inbound_enabled(env)) { - speed *= env->inbound_bug_speed_multiplier; + speed *= BAT_INBOUND_BUG_SPEED_MULTIPLIER; } return speed; } @@ -486,7 +487,7 @@ static inline float bat_chirp_efficiency(Bat* env) { } static inline float bat_chirp_perf(Bat* env) { - float reference_chirps = fmaxf(1.0f, (float)env->max_chirps_per_episode); + float reference_chirps = fmaxf(1.0f, (float)BAT_MAX_CHIRPS_PER_EPISODE); float raw = 1.0f - env->chirps_emitted_episode / reference_chirps; return bat_clampf(raw, BAT_CHIRP_PERF_FLOOR, 1.0f); } @@ -611,7 +612,7 @@ static inline void bat_reset_bug_motion(Bat* env) { if (env->bug_inbound) { float tx, ty; bat_norm_vec(env->bat_x - env->bug_x, env->bat_y - env->bug_y, &tx, &ty); - float noise = env->inbound_heading_noise_degrees * (BAT_PI / 180.0f); + float noise = BAT_INBOUND_HEADING_NOISE_DEGREES * (BAT_PI / 180.0f); float heading = atan2f(ty, tx) + (2.0f * bat_randf(env) - 1.0f) * noise; bat_set_bug_velocity(env, heading, speed); } else { @@ -753,10 +754,10 @@ static inline void add_log(Bat* env, float success, float collision, float timeo } env->log.chirp_tempo_ratio += bat_clampf(tempo_ratio, 0.0f, 10.0f); env->log.first_chirp_tick_norm += env->first_chirp_tick >= 0.0f - ? bat_clampf(env->first_chirp_tick / (float)env->max_steps, 0.0f, 1.0f) + ? bat_clampf(env->first_chirp_tick / (float)BAT_MAX_STEPS, 0.0f, 1.0f) : 1.0f; env->log.mean_chirp_tick_norm += env->chirps_emitted_episode > 0 - ? bat_clampf((env->chirp_tick_sum / chirps) / (float)env->max_steps, 0.0f, 1.0f) + ? bat_clampf((env->chirp_tick_sum / chirps) / (float)BAT_MAX_STEPS, 0.0f, 1.0f) : 1.0f; if (env->chirps_emitted_episode > 0) { env->log.mean_chirp_duration += env->chirp_duration_sum / env->chirps_emitted_episode; @@ -842,8 +843,8 @@ static inline float bat_expected_bug_echo_tick(Bat* env, ChirpEvent* chirp) { float left_path = source_path + bat_dist(env->bug_x, env->bug_y, left_ear_x, left_ear_y); float right_path = source_path + bat_dist(env->bug_x, env->bug_y, right_ear_x, right_ear_y); float best_path = -1.0f; - if (left_path <= env->max_echo_range) best_path = left_path; - if (right_path <= env->max_echo_range && (best_path < 0.0f || right_path < best_path)) { + if (left_path <= BAT_MAX_ECHO_RANGE) best_path = left_path; + if (right_path <= BAT_MAX_ECHO_RANGE && (best_path < 0.0f || right_path < best_path)) { best_path = right_path; } if (best_path < 0.0f) return -1.0f; @@ -885,7 +886,7 @@ static inline void bat_schedule_echo(Bat* env, ChirpEvent* chirp, float source_path = bat_dist(chirp->x, chirp->y, rx, ry); float left_path = source_path + bat_dist(rx, ry, left_ear_x, left_ear_y); float right_path = source_path + bat_dist(rx, ry, right_ear_x, right_ear_y); - if (left_path > env->max_echo_range && right_path > env->max_echo_range) return; + if (left_path > BAT_MAX_ECHO_RANGE && right_path > BAT_MAX_ECHO_RANGE) return; float rel_vx = rvx - env->bat_vx; float rel_vy = rvy - env->bat_vy; @@ -893,12 +894,12 @@ static inline void bat_schedule_echo(Bat* env, ChirpEvent* chirp, float doppler = bat_clampf(-distance_rate / (env->bat_max_speed + BAT_BUG_SPEED), -1.0f, 1.0f); float shifted_freq = bat_clampf(freq + 0.20f * doppler, 0.0f, 1.0f); - if (left_path <= env->max_echo_range) { + if (left_path <= BAT_MAX_ECHO_RANGE) { float attenuation = strength / (1.0f + 0.02f * left_path * left_path); float receive_tick = chirp->birth_tick + slice_ticks + left_path / env->sound_speed / BAT_TICK_RATE; bat_add_echo_event(env, 0, receive_tick, shifted_freq, attenuation * left_gain, left_path, source); } - if (right_path <= env->max_echo_range) { + if (right_path <= BAT_MAX_ECHO_RANGE) { float attenuation = strength / (1.0f + 0.02f * right_path * right_path); float receive_tick = chirp->birth_tick + slice_ticks + right_path / env->sound_speed / BAT_TICK_RATE; bat_add_echo_event(env, 1, receive_tick, shifted_freq, attenuation * right_gain, right_path, source); @@ -1059,7 +1060,7 @@ void compute_observations(Bat* env) { float fwd_speed = env->bat_vx * cosf(env->bat_heading) + env->bat_vy * sinf(env->bat_heading); env->observations[BAT_FORWARD_SPEED_OBS] = bat_clampf(fwd_speed / env->bat_max_speed, 0.0f, 1.0f); env->observations[BAT_TURN_RATE_OBS] = bat_clampf(env->bat_turn_velocity / env->bat_turn_rate, -1.0f, 1.0f); - float timer_norm = env->tick / (float)env->max_steps; + float timer_norm = env->tick / (float)BAT_MAX_STEPS; env->observations[40] = bat_clampf(timer_norm, 0.0f, 1.0f); } @@ -1085,7 +1086,7 @@ static inline void bat_reset_episode(Bat* env) { memset(env->chirps, 0, sizeof(env->chirps)); env->chirp_head = 0; bat_clear_echo_queue(env); - env->chirp_budget = env->max_chirps_per_episode; + env->chirp_budget = BAT_MAX_CHIRPS_PER_EPISODE; env->tick_bug_echo_energy = 0.0f; env->tick_bug_echo_path = -1.0f; env->last_bug_echo_path = -1.0f; @@ -1297,7 +1298,7 @@ static inline int bat_update_chirp(Bat* env) { return -2; } return bat_try_emit_chirp(env) ? 1 : -1; - } else if (env->chirp_age_ticks < env->max_chirp_age_ticks) { + } else if (env->chirp_age_ticks < BAT_MAX_CHIRP_AGE_TICKS) { env->chirp_age_ticks += 1; } return 0; @@ -1372,7 +1373,7 @@ void c_step(Bat* env) { } env->prev_bug_dist = bug_dist; - if (env->tick >= env->max_steps) { + if (env->tick >= BAT_MAX_STEPS) { env->rewards[0] = -1.0f; env->terminals[0] = 1.0f; env->episode_return += env->rewards[0]; @@ -1388,7 +1389,7 @@ void c_step(Bat* env) { env->bat_x, env->bat_y); if (bat_echo_displacement >= BAT_BUG_ECHO_MIN_DISPLACEMENT) { float echo_progress = (env->last_bug_echo_path - env->tick_bug_echo_path) - / env->max_echo_range; + / BAT_MAX_ECHO_RANGE; if (echo_progress > 0.0f) { env->rewards[0] += env->bug_echo_reward_scale * echo_progress; } else if (echo_progress < 0.0f) { @@ -1423,7 +1424,7 @@ static inline void bat_draw_chirp_rings(Bat* env, float sx, float sy) { if (!chirp->active) continue; float age_seconds = (env->tick - chirp->birth_tick) * BAT_TICK_RATE; - float max_age = env->max_echo_range / env->sound_speed + chirp->duration; + float max_age = BAT_MAX_ECHO_RANGE / env->sound_speed + chirp->duration; if (age_seconds < 0.0f || age_seconds > max_age) { chirp->active = 0; continue; @@ -1433,9 +1434,9 @@ static inline void bat_draw_chirp_rings(Bat* env, float sx, float sy) { float slice = ring / (float)(BAT_CHIRP_RINGS - 1); float freq = chirp->start_freq + slice * (chirp->end_freq - chirp->start_freq); float radius = bat_chirp_ring_radius(age_seconds, slice, chirp->duration, env->sound_speed); - if (radius <= 0.0f || radius > env->max_echo_range) continue; + if (radius <= 0.0f || radius > BAT_MAX_ECHO_RANGE) continue; - float fade = 1.0f - radius / env->max_echo_range; + float fade = 1.0f - radius / BAT_MAX_ECHO_RANGE; float alpha = 0.18f + 0.42f * bat_clampf(fade, 0.0f, 1.0f); float source_x, source_y; bat_chirp_source_for_fraction(chirp, slice, &source_x, &source_y); diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index dafb7b9d89..c9ad24abf9 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -8,7 +8,7 @@ #include "vecenv.h" void my_init(Env* env, Dict* kwargs) { - env->num_agents = 1; + env->num_agents = BAT_NUM_AGENTS; env->frameskip = dict_get(kwargs, "frameskip")->value; env->ear_separation_scale = dict_get(kwargs, "ear_separation_scale")->value; env->ear_rear_gain = dict_get(kwargs, "ear_rear_gain")->value; @@ -18,7 +18,6 @@ void my_init(Env* env, Dict* kwargs) { env->bat_min_speed = dict_get(kwargs, "bat_min_speed")->value; env->bat_accel = dict_get(kwargs, "bat_accel")->value; env->bat_turn_rate = dict_get(kwargs, "bat_turn_rate")->value; - env->max_steps = dict_get(kwargs, "max_steps")->value; env->render_target_fps = dict_get(kwargs, "render_target_fps")->value; env->record_video = dict_get(kwargs, "record_video")->value; env->record_video_fps = dict_get(kwargs, "record_video_fps")->value; @@ -28,14 +27,9 @@ void my_init(Env* env, Dict* kwargs) { env->curriculum_obstacle_step = dict_get(kwargs, "curriculum_obstacle_step")->value; env->curriculum_successes_per_level = dict_get(kwargs, "curriculum_successes_per_level")->value; env->curriculum_start_bug_distance = dict_get(kwargs, "curriculum_start_bug_distance")->value; - env->inbound_bug_speed_multiplier = dict_get(kwargs, "inbound_bug_speed_multiplier")->value; - env->inbound_heading_noise_degrees = dict_get(kwargs, "inbound_heading_noise_degrees")->value; - env->max_echo_range = dict_get(kwargs, "max_echo_range")->value; env->sound_speed = dict_get(kwargs, "sound_speed")->value; env->reflector_strength = dict_get(kwargs, "reflector_strength")->value; - env->max_chirp_age_ticks = dict_get(kwargs, "max_chirp_age_ticks")->value; env->chirp_cooldown_ticks = dict_get(kwargs, "chirp_cooldown_ticks")->value; - env->max_chirps_per_episode = dict_get(kwargs, "max_chirps_per_episode")->value; env->chirp_efficiency_reward = dict_get(kwargs, "chirp_efficiency_reward")->value; env->valid_chirp_reward = dict_get(kwargs, "valid_chirp_reward")->value; env->early_chirp_penalty = dict_get(kwargs, "early_chirp_penalty")->value; From 21eb1013eba1dc872b93f4ac5e6339d170db888c Mon Sep 17 00:00:00 2001 From: Kinvert Date: Thu, 11 Jun 2026 16:39:59 -0700 Subject: [PATCH 38/51] Clean up bat symbol names --- BAT_CURRICULUM.md | 16 +- BAT_SPEC.md | 22 +- ocean/bat/bat.c | 36 +- ocean/bat/bat.h | 1148 ++++++++++++++++++++-------------------- ocean/bat/bat_audio.h | 54 +- ocean/bat/bat_record.h | 62 +-- ocean/bat/binding.c | 15 +- 7 files changed, 676 insertions(+), 677 deletions(-) diff --git a/BAT_CURRICULUM.md b/BAT_CURRICULUM.md index b41331b9bb..b0595266dd 100644 --- a/BAT_CURRICULUM.md +++ b/BAT_CURRICULUM.md @@ -19,14 +19,14 @@ With the current defaults: ```ini curriculum_initial_level = 1 -BAT_CURRICULUM_START_OBSTACLES = 0 -BAT_CURRICULUM_MAX_OBSTACLES = 3 +CURRICULUM_START_OBSTACLES = 0 +CURRICULUM_MAX_OBSTACLES = 3 curriculum_obstacle_step = 8 curriculum_start_bug_distance = 8.438 -BAT_CURRICULUM_MAX_BUG_DISTANCE = 40.0 -BAT_CURRICULUM_BUG_DISTANCE_STEP = 2.0 -BAT_CURRICULUM_INBOUND_MAX_BUG_DISTANCE = 56.0 -BAT_CURRICULUM_INBOUND_BUG_DISTANCE_STEP = 4.0 +CURRICULUM_MAX_BUG_DISTANCE = 40.0 +CURRICULUM_BUG_DISTANCE_STEP = 2.0 +CURRICULUM_INBOUND_MAX_BUG_DISTANCE = 56.0 +CURRICULUM_INBOUND_BUG_DISTANCE_STEP = 4.0 ``` At `curriculum_level ~= 5`, the bug starts around distance `28`, giving: @@ -173,7 +173,7 @@ start_bug_distance = 8 + level * distance_step Recommendation: -- Keep `BAT_CURRICULUM_BUG_DISTANCE_STEP` hardcoded at `2.0`; Bat9's best runs +- Keep `CURRICULUM_BUG_DISTANCE_STEP` hardcoded at `2.0`; Bat9's best runs clustered there, and the inbound curriculum already expands later distances. - Log `curriculum_distance_difficulty` directly. @@ -421,7 +421,7 @@ half distance, half clutter. Current chirp handling: ```text -BAT_MAX_CHIRPS_PER_EPISODE = 15 +MAX_CHIRPS_PER_EPISODE = 15 chirp_budget does not decrease with curriculum level chirp_budget_difficulty is removed from curriculum difficulty ``` diff --git a/BAT_SPEC.md b/BAT_SPEC.md index 1d1218aef9..e33bb127fb 100644 --- a/BAT_SPEC.md +++ b/BAT_SPEC.md @@ -116,7 +116,7 @@ Acoustics: enough artificial time-of-arrival separation for one ear to be able to hear a return about one tick before the other. - `ear_separation_scale` controls the artificial distance between ears as a - multiple of `BAT_RADIUS`. Keep it bounded; the implementation clamps it to + multiple of `AGENT_RADIUS`. Keep it bounded; the implementation clamps it to `[0.25, 2.0]` and the default sweep range is `[0.5, 2.0]`. - Every echo contribution has: - two-way distance from mouth/source to reflector to each ear, @@ -130,7 +130,7 @@ Point-reflector renderer: - v1 should represent walls and obstacle surfaces as stationary point reflectors. - Sample each wall and obstacle edge at fixed spacing, - `BAT_REFLECTOR_SPACING = 8.0` world units. + `REFLECTOR_SPACING = 8.0` world units. - The bug contributes one moving circular/point reflector at its center. - This avoids wavefront bookkeeping while preserving range, angle, and Doppler learning signals. @@ -246,7 +246,7 @@ Observation layout: 8. `chirps_used_norm = chirps_used / chirp_budget` 9. `forward_speed_norm` 10. `turn_rate_norm` -11. `timer_norm = elapsed_steps / BAT_MAX_STEPS`, clamped to `[0, 1]` +11. `timer_norm = elapsed_steps / MAX_STEPS`, clamped to `[0, 1]` Initial observation size: @@ -255,7 +255,7 @@ Initial observation size: Timer normalization: - The timer starts at `0.0` on reset. -- With `BAT_MAX_STEPS = 512`, after step `N` the observation is +- With `MAX_STEPS = 512`, after step `N` the observation is `N / 512.0`. - The observed timer is clamped to `[0.0, 1.0]`. @@ -279,10 +279,10 @@ Echo timing: - On each tick, all events arriving in that tick window are summed into the corresponding ear frequency bins. - Multiple reflectors can contribute to the same bin on the same tick. -- Echoes beyond `BAT_MAX_ECHO_RANGE` are ignored. +- Echoes beyond `MAX_ECHO_RANGE` are ignored. - Implementation should use a fixed future-tick accumulator, not a full active event scan every env step. The current design buckets each echo by - `ceil(receive_tick)` into `BAT_ECHO_QUEUE_TICKS = 256`, sums by + `ceil(receive_tick)` into `ECHO_QUEUE_TICKS = 256`, sums by `[ear][freq_bin]`, and processes only the current tick's bucket. - The accumulator is an implementation detail only. It must preserve the observation semantics: current-tick per-ear frequency intensities are summed @@ -348,7 +348,7 @@ Default reward model: - when a bug echo returns with a shorter acoustic path than the previous bug echo, add a small shaped reward, - this reward only applies if the bat has moved at least - `BAT_BUG_ECHO_MIN_DISPLACEMENT` since the previous scored bug echo, so a + `BUG_ECHO_MIN_DISPLACEMENT` since the previous scored bug echo, so a stationary bat cannot farm reward from the bug moving closer by itself, - farther bug echoes update the previous bug echo path and receive a weaker penalty scaled by `bug_echo_farther_penalty_scale`, default `0.10`, @@ -365,17 +365,17 @@ Progress reward: - Default formula: - `reward += progress_reward_scale * (prev_bug_dist - bug_dist)` - `reward -= step_cost` - - `reward -= BAT_CHIRP_COST` when a chirp is emitted; this is hardcoded to + - `reward -= CHIRP_COST` when a chirp is emitted; this is hardcoded to zero for the current Bat defaults - `reward -= chirp_overlap_penalty * bug_echo_wait_fraction` when a valid chirp is emitted before the previous chirp's expected bug reflection has returned - `reward += chirp_efficiency_reward * chirp_efficiency` on catch - - `reward += bug_echo_reward_scale * echo_path_reduction / BAT_MAX_ECHO_RANGE` + - `reward += bug_echo_reward_scale * echo_path_reduction / MAX_ECHO_RANGE` when a returning bug echo indicates the bug is closer than the previous bug echo and the bat has moved enough since that previous echo - `reward -= bug_echo_reward_scale * bug_echo_farther_penalty_scale * - echo_path_increase / BAT_MAX_ECHO_RANGE` when a later moved-enough bug echo is + echo_path_increase / MAX_ECHO_RANGE` when a later moved-enough bug echo is farther away - Default starting values: - `progress_reward_scale = 0.05` @@ -395,7 +395,7 @@ Termination: - Success: bat catches bug. - Failure: bat collides with a wall or obstacle. - Failure: bat attempts to chirp after exhausting the chirp budget. -- Timeout: `tick >= BAT_MAX_STEPS`. +- Timeout: `tick >= MAX_STEPS`. Reset: diff --git a/ocean/bat/bat.c b/ocean/bat/bat.c index 1dc20a3b44..db17016814 100644 --- a/ocean/bat/bat.c +++ b/ocean/bat/bat.c @@ -3,7 +3,7 @@ #include #include "bat.h" -#define BAT_DEMO_CONFIG_PATH "config/bat.ini" +#define DEMO_CONFIG_PATH "config/bat.ini" static char* trim(char* s) { while (isspace((unsigned char)*s)) s++; @@ -15,12 +15,12 @@ static char* trim(char* s) { static void set_demo_defaults(Bat* env) { *env = (Bat){ - .num_agents = BAT_NUM_AGENTS, + .num_agents = NUM_AGENTS, .frameskip = 1, - .bat_max_speed = 15.498233877318418f, - .bat_min_speed = 2.6389946132676654f, - .bat_accel = 53.02330161128345f, - .bat_turn_rate = 8.371655963408276f, + .max_speed = 15.498233877318418f, + .min_speed = 2.6389946132676654f, + .accel = 53.02330161128345f, + .turn_rate = 8.371655963408276f, .render_target_fps = 60, .record_video = 0, .record_video_fps = 30, @@ -52,10 +52,10 @@ static void set_demo_defaults(Bat* env) { static void apply_env_config_value(Bat* env, const char* key, float value) { if (strcmp(key, "frameskip") == 0) env->frameskip = (int)value; - else if (strcmp(key, "bat_max_speed") == 0) env->bat_max_speed = value; - else if (strcmp(key, "bat_min_speed") == 0) env->bat_min_speed = value; - else if (strcmp(key, "bat_accel") == 0) env->bat_accel = value; - else if (strcmp(key, "bat_turn_rate") == 0) env->bat_turn_rate = value; + else if (strcmp(key, "bat_max_speed") == 0) env->max_speed = value; + else if (strcmp(key, "bat_min_speed") == 0) env->min_speed = value; + else if (strcmp(key, "bat_accel") == 0) env->accel = value; + else if (strcmp(key, "bat_turn_rate") == 0) env->turn_rate = value; else if (strcmp(key, "render_target_fps") == 0) env->render_target_fps = (int)value; else if (strcmp(key, "record_video") == 0) env->record_video = (int)value; else if (strcmp(key, "record_video_fps") == 0) env->record_video_fps = (int)value; @@ -113,7 +113,7 @@ static void load_env_config(Bat* env, const char* path) { void demo() { Bat env; set_demo_defaults(&env); - load_env_config(&env, BAT_DEMO_CONFIG_PATH); + load_env_config(&env, DEMO_CONFIG_PATH); env.rng = (unsigned int)time(NULL); allocate(&env); env.client = make_client(&env); @@ -121,13 +121,13 @@ void demo() { SetTargetFPS(60); while (!WindowShouldClose()) { - memset(env.actions, 0, sizeof(float) * BAT_NUM_ACTIONS); - env.actions[0] = BAT_NOOP; - env.actions[1] = BAT_TURN_NONE; - if (IsKeyDown(KEY_W)) env.actions[0] = BAT_THRUST_FORWARD; - if (IsKeyDown(KEY_S)) env.actions[0] = BAT_BRAKE; - if (IsKeyDown(KEY_A) || IsKeyDown(KEY_LEFT)) env.actions[1] = BAT_TURN_LEFT; - if (IsKeyDown(KEY_D) || IsKeyDown(KEY_RIGHT)) env.actions[1] = BAT_TURN_RIGHT; + memset(env.actions, 0, sizeof(float) * NUM_ACTIONS); + env.actions[0] = NOOP; + env.actions[1] = TURN_NONE; + if (IsKeyDown(KEY_W)) env.actions[0] = THRUST_FORWARD; + if (IsKeyDown(KEY_S)) env.actions[0] = BRAKE; + if (IsKeyDown(KEY_A) || IsKeyDown(KEY_LEFT)) env.actions[1] = TURN_LEFT; + if (IsKeyDown(KEY_D) || IsKeyDown(KEY_RIGHT)) env.actions[1] = TURN_RIGHT; env.actions[2] = 0; env.actions[3] = 7; env.actions[4] = 1; diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 1aac0ff0e2..2dfb145f66 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -11,84 +11,84 @@ #include "raylib.h" #endif -#define BAT_OBS_SIZE 41 -#define BAT_NUM_AGENTS 1 -#define BAT_NUM_ACTIONS 6 -#define BAT_MOVE_ACTIONS 3 -#define BAT_TURN_ACTIONS 3 -#define BAT_CHIRP_FREQ_BINS 8 -#define BAT_CHIRP_DURATION_BINS 4 -#define BAT_CHIRP_EMIT_ACTIONS 2 - -#define BAT_FREQ_BINS 16 -#define BAT_LEFT_FREQ_OFFSET 0 -#define BAT_RIGHT_FREQ_OFFSET 16 -#define BAT_CHIRP_AGE_OBS 32 -#define BAT_CHIRP_COOLDOWN_OBS 33 -#define BAT_CHIRP_START_OBS 34 -#define BAT_CHIRP_END_OBS 35 -#define BAT_CHIRP_DURATION_OBS 36 -#define BAT_CHIRPS_USED_OBS 37 -#define BAT_FORWARD_SPEED_OBS 38 -#define BAT_TURN_RATE_OBS 39 - -#define BAT_NOOP 0 -#define BAT_THRUST_FORWARD 1 -#define BAT_BRAKE 2 - -#define BAT_TURN_NONE 0 -#define BAT_TURN_LEFT 1 -#define BAT_TURN_RIGHT 2 - -#define BAT_MAX_OBSTACLES 16 -#define BAT_MAX_STEPS 512 -#define BAT_TICK_RATE (1.0f/60.0f) -#define BAT_WIDTH 64 -#define BAT_HEIGHT 64 -#define BAT_RADIUS 2.0f -#define BAT_BUG_RADIUS 1.5f -#define BAT_BUG_SPEED 4.0f -#define BAT_BUG_MANEUVER_START_LEVEL 7 -#define BAT_BUG_MANEUVER_STRENGTH 0.4f -#define BAT_BUG_MANEUVER_FREQUENCY 0.4f -#define BAT_INBOUND_BUG_SPEED_MULTIPLIER 1.75f -#define BAT_INBOUND_HEADING_NOISE_DEGREES 18.0f -#define BAT_REFLECTOR_SPACING 8.0f -#define BAT_MAX_ECHO_RANGE 128.0f -#define BAT_BUG_ECHO_MIN_DISPLACEMENT 1.0f -#define BAT_CURRICULUM_START_OBSTACLES 0 -#define BAT_CURRICULUM_MAX_OBSTACLES 3 -#define BAT_CURRICULUM_BUG_DISTANCE_STEP 2.0f -#define BAT_CURRICULUM_MAX_BUG_DISTANCE 40.0f -#define BAT_CURRICULUM_INBOUND_START_LEVEL 8 -#define BAT_CURRICULUM_INBOUND_MAX_BUG_DISTANCE 56.0f -#define BAT_CURRICULUM_INBOUND_BUG_DISTANCE_STEP 4.0f -#define BAT_PI 3.14159265358979323846f -#define BAT_TWO_PI (2.0f * BAT_PI) -#define BAT_CHIRP_HISTORY 4 -#define BAT_CHIRP_RINGS 5 -#define BAT_MAX_CHIRP_SLICES 16 -#define BAT_ECHO_QUEUE_TICKS 256 -#define BAT_CORNER_REFLECTORS 1 -#define BAT_AUDIO_VOICES 8 -#define BAT_AUDIO_SAMPLE_RATE 48000 -#define BAT_AUDIO_MIN_HZ 600.0f -#define BAT_AUDIO_MAX_HZ 3600.0f -#define BAT_AUDIO_VOLUME 0.22f -#define BAT_RECORD_MAX_VOICES 16 -#define BAT_CHIRP_PERF_FLOOR 0.05f -#define BAT_CHIRP_COST 0.0f -#define BAT_MAX_CHIRP_AGE_TICKS 30 -#define BAT_MAX_CHIRPS_PER_EPISODE 15 - -#define BAT_ECHO_STATIC 0 -#define BAT_ECHO_BUG 1 +#define OBS_SIZE 41 +#define NUM_AGENTS 1 +#define NUM_ACTIONS 6 +#define MOVE_ACTIONS 3 +#define TURN_ACTIONS 3 +#define CHIRP_FREQ_BINS 8 +#define CHIRP_DURATION_BINS 4 +#define CHIRP_EMIT_ACTIONS 2 + +#define FREQ_BINS 16 +#define LEFT_FREQ_OFFSET 0 +#define RIGHT_FREQ_OFFSET 16 +#define CHIRP_AGE_OBS 32 +#define CHIRP_COOLDOWN_OBS 33 +#define CHIRP_START_OBS 34 +#define CHIRP_END_OBS 35 +#define CHIRP_DURATION_OBS 36 +#define CHIRPS_USED_OBS 37 +#define FORWARD_SPEED_OBS 38 +#define TURN_RATE_OBS 39 + +#define NOOP 0 +#define THRUST_FORWARD 1 +#define BRAKE 2 + +#define TURN_NONE 0 +#define TURN_LEFT 1 +#define TURN_RIGHT 2 + +#define MAX_OBSTACLES 16 +#define MAX_STEPS 512 +#define TICK_RATE (1.0f/60.0f) +#define ARENA_WIDTH 64 +#define ARENA_HEIGHT 64 +#define AGENT_RADIUS 2.0f +#define BUG_RADIUS 1.5f +#define BUG_SPEED 4.0f +#define BUG_MANEUVER_START_LEVEL 7 +#define BUG_MANEUVER_STRENGTH 0.4f +#define BUG_MANEUVER_FREQUENCY 0.4f +#define INBOUND_BUG_SPEED_MULTIPLIER 1.75f +#define INBOUND_HEADING_NOISE_DEGREES 18.0f +#define REFLECTOR_SPACING 8.0f +#define MAX_ECHO_RANGE 128.0f +#define BUG_ECHO_MIN_DISPLACEMENT 1.0f +#define CURRICULUM_START_OBSTACLES 0 +#define CURRICULUM_MAX_OBSTACLES 3 +#define CURRICULUM_BUG_DISTANCE_STEP 2.0f +#define CURRICULUM_MAX_BUG_DISTANCE 40.0f +#define CURRICULUM_INBOUND_START_LEVEL 8 +#define CURRICULUM_INBOUND_MAX_BUG_DISTANCE 56.0f +#define CURRICULUM_INBOUND_BUG_DISTANCE_STEP 4.0f +#define PI_F 3.14159265358979323846f +#define TWO_PI (2.0f * PI_F) +#define CHIRP_HISTORY 4 +#define CHIRP_RINGS 5 +#define MAX_CHIRP_SLICES 16 +#define ECHO_QUEUE_TICKS 256 +#define CORNER_REFLECTORS 1 +#define AUDIO_VOICES 8 +#define AUDIO_SAMPLE_RATE 48000 +#define AUDIO_MIN_HZ 600.0f +#define AUDIO_MAX_HZ 3600.0f +#define AUDIO_VOLUME 0.22f +#define RECORD_MAX_VOICES 16 +#define CHIRP_PERF_FLOOR 0.05f +#define CHIRP_COST 0.0f +#define MAX_CHIRP_AGE_TICKS 30 +#define MAX_CHIRPS_PER_EPISODE 15 + +#define ECHO_STATIC 0 +#define ECHO_BUG 1 typedef struct ChirpEvent { float x; float y; - float source_x[BAT_MAX_CHIRP_SLICES]; - float source_y[BAT_MAX_CHIRP_SLICES]; + float source_x[MAX_CHIRP_SLICES]; + float source_y[MAX_CHIRP_SLICES]; float start_freq; float end_freq; float duration; @@ -99,7 +99,7 @@ typedef struct ChirpEvent { } ChirpEvent; typedef struct EchoBucket { - float energy[2][BAT_FREQ_BINS]; + float energy[2][FREQ_BINS]; float bug_energy; float bug_path; int tick; @@ -153,8 +153,8 @@ typedef struct Client { int audio_ready; int last_audio_chirp_serial; int audio_voice_cursor; - Sound chirp_sounds[BAT_AUDIO_VOICES]; - int chirp_sound_loaded[BAT_AUDIO_VOICES]; + Sound chirp_sounds[AUDIO_VOICES]; + int chirp_sound_loaded[AUDIO_VOICES]; int recording_initialized; int recording_finalized; int record_frame; @@ -169,7 +169,7 @@ typedef struct Client { char record_frame_dir[256]; char record_wav_path[256]; char record_mp4_path[256]; - BatRecordVoice record_voices[BAT_RECORD_MAX_VOICES]; + BatRecordVoice record_voices[RECORD_MAX_VOICES]; #endif } Client; @@ -197,20 +197,20 @@ typedef struct Bat { int curriculum_successes_at_level; float curriculum_start_bug_distance; - float bat_x; - float bat_y; - float bat_vx; - float bat_vy; - float bat_heading; - float bat_turn_velocity; + float x; + float y; + float vx; + float vy; + float heading; + float turn_velocity; float ear_separation_scale; float ear_rear_gain; float ear_front_gain; float ear_side_gain; - float bat_max_speed; - float bat_min_speed; - float bat_accel; - float bat_turn_rate; + float max_speed; + float min_speed; + float accel; + float turn_rate; float bug_x; float bug_y; @@ -237,9 +237,9 @@ typedef struct Bat { float last_chirp_start_freq; float last_chirp_end_freq; float last_chirp_duration; - ChirpEvent chirps[BAT_CHIRP_HISTORY]; + ChirpEvent chirps[CHIRP_HISTORY]; int chirp_head; - EchoBucket echo_queue[BAT_ECHO_QUEUE_TICKS]; + EchoBucket echo_queue[ECHO_QUEUE_TICKS]; int chirps_emitted_episode; int audio_chirp_serial; int chirps_overlapped; @@ -267,8 +267,8 @@ typedef struct Bat { float tick_bug_echo_path; float last_bug_echo_path; float last_bug_echo_expected_tick; - float last_bug_echo_bat_x; - float last_bug_echo_bat_y; + float last_bug_echo_x; + float last_bug_echo_y; float collision_penalty; float prev_bug_dist; float start_bug_dist; @@ -277,13 +277,13 @@ typedef struct Bat { unsigned int rng; } Bat; -static inline unsigned int bat_rand(Bat* env) { +static inline unsigned int rng_next(Bat* env) { env->rng = env->rng * 1664525u + 1013904223u; return env->rng; } -static inline float bat_randf(Bat* env) { - return (bat_rand(env) >> 8) * (1.0f / 16777216.0f); +static inline float randf(Bat* env) { + return (rng_next(env) >> 8) * (1.0f / 16777216.0f); } static inline float bat_clampf(float v, float lo, float hi) { @@ -292,34 +292,34 @@ static inline float bat_clampf(float v, float lo, float hi) { return v; } -static inline int bat_action_index(float v, int n) { +static inline int action_index(float v, int n) { int idx = (int)v; return idx; } -static inline float bat_chirp_duration_seconds(float duration_norm) { +static inline float chirp_duration_seconds(float duration_norm) { return 0.04f + 0.18f * duration_norm; } #include "bat_audio.h" -static inline float bat_chirp_ring_radius(float age_seconds, float slice, +static inline float chirp_ring_radius(float age_seconds, float slice, float duration_seconds, float sound_speed) { float ring_age = age_seconds - slice * duration_seconds; if (ring_age < 0.0f) return 0.0f; return sound_speed * ring_age; } -static inline float bat_chirp_slice_ticks(ChirpEvent* chirp, int slice_idx) { +static inline float chirp_slice_ticks(ChirpEvent* chirp, int slice_idx) { return ((slice_idx + 0.5f) / (float)chirp->slice_count) * - chirp->duration / BAT_TICK_RATE; + chirp->duration / TICK_RATE; } -static inline void bat_chirp_source_for_slice(ChirpEvent* chirp, int slice_idx, +static inline void chirp_source_for_slice(ChirpEvent* chirp, int slice_idx, float* source_x, float* source_y) { int scheduled = chirp->slices_scheduled; if (slice_idx >= 0 && slice_idx < scheduled && - slice_idx < BAT_MAX_CHIRP_SLICES) { + slice_idx < MAX_CHIRP_SLICES) { *source_x = chirp->source_x[slice_idx]; *source_y = chirp->source_y[slice_idx]; return; @@ -328,35 +328,35 @@ static inline void bat_chirp_source_for_slice(ChirpEvent* chirp, int slice_idx, *source_y = chirp->y; } -static inline void bat_chirp_source_for_fraction(ChirpEvent* chirp, float slice, +static inline void chirp_source_for_fraction(ChirpEvent* chirp, float slice, float* source_x, float* source_y) { int slices = chirp->slice_count; int slice_idx = (int)floorf(slice * (float)slices); if (slice_idx >= slices) slice_idx = slices - 1; - bat_chirp_source_for_slice(chirp, slice_idx, source_x, source_y); + chirp_source_for_slice(chirp, slice_idx, source_x, source_y); } -static inline float bat_echo_time_seconds(float distance, float sound_speed) { +static inline float echo_time_seconds(float distance, float sound_speed) { return 2.0f * distance / sound_speed; } -static inline float bat_chirp_age_norm_denominator(Bat* env) { - float travel_ticks = BAT_MAX_ECHO_RANGE / env->sound_speed / BAT_TICK_RATE; - float chirp_ticks = bat_chirp_duration_seconds(1.0f) / BAT_TICK_RATE; +static inline float chirp_age_norm_denominator(Bat* env) { + float travel_ticks = MAX_ECHO_RANGE / env->sound_speed / TICK_RATE; + float chirp_ticks = chirp_duration_seconds(1.0f) / TICK_RATE; return 1.25f * (travel_ticks + chirp_ticks); } -static inline float bat_norm_bin(int idx, int count) { +static inline float norm_bin(int idx, int count) { return idx / (float)(count - 1); } -static inline float bat_dist(float ax, float ay, float bx, float by) { +static inline float dist(float ax, float ay, float bx, float by) { float dx = bx - ax; float dy = by - ay; return sqrtf(dx*dx + dy*dy); } -static inline void bat_norm_vec(float x, float y, float* ox, float* oy) { +static inline void norm_vec(float x, float y, float* ox, float* oy) { float l = sqrtf(x*x + y*y); if (l <= 0.000001f) { *ox = 1.0f; @@ -367,14 +367,14 @@ static inline void bat_norm_vec(float x, float y, float* ox, float* oy) { *oy = y / l; } -static inline bool bat_circle_rect_collision(float cx, float cy, float r, +static inline bool circle_rect_collision(float cx, float cy, float r, float rx, float ry, float rw, float rh) { float px = bat_clampf(cx, rx, rx + rw); float py = bat_clampf(cy, ry, ry + rh); - return bat_dist(cx, cy, px, py) <= r; + return dist(cx, cy, px, py) <= r; } -static inline bool bat_rects_overlap(float ax, float ay, float aw, float ah, +static inline bool rects_overlap(float ax, float ay, float aw, float ah, float bx, float by, float bw, float bh, float margin) { return ax - margin < bx + bw && ax + aw + margin > bx && @@ -382,171 +382,171 @@ static inline bool bat_rects_overlap(float ax, float ay, float aw, float ah, ay + ah + margin > by; } -static inline void bat_sample_in_quadrant(Bat* env, int quadrant, float radius, +static inline void sample_in_quadrant(Bat* env, int quadrant, float radius, float* x, float* y) { int east = quadrant & 1; int south = (quadrant >> 1) & 1; float margin = fmaxf(6.0f, radius + 3.0f); - float half_w = BAT_WIDTH * 0.5f; - float half_h = BAT_HEIGHT * 0.5f; + float half_w = ARENA_WIDTH * 0.5f; + float half_h = ARENA_HEIGHT * 0.5f; float min_x = (east ? half_w : 0.0f) + margin; - float max_x = (east ? (float)BAT_WIDTH : half_w) - margin; + float max_x = (east ? (float)ARENA_WIDTH : half_w) - margin; float min_y = (south ? half_h : 0.0f) + margin; - float max_y = (south ? (float)BAT_HEIGHT : half_h) - margin; - *x = min_x + bat_randf(env) * (max_x - min_x); - *y = min_y + bat_randf(env) * (max_y - min_y); + float max_y = (south ? (float)ARENA_HEIGHT : half_h) - margin; + *x = min_x + randf(env) * (max_x - min_x); + *y = min_y + randf(env) * (max_y - min_y); } -static inline void bat_sample_spawns(Bat* env) { - int bat_quadrant = (int)(bat_randf(env) * 4.0f); - int bug_quadrant = bat_quadrant ^ 3; - float min_sep = fminf(BAT_WIDTH, BAT_HEIGHT) * 0.31f; +static inline void sample_spawns(Bat* env) { + int agent_quadrant = (int)(randf(env) * 4.0f); + int bug_quadrant = agent_quadrant ^ 3; + float min_sep = fminf(ARENA_WIDTH, ARENA_HEIGHT) * 0.31f; for (int attempt = 0; attempt < 64; attempt++) { - bat_sample_in_quadrant(env, bat_quadrant, BAT_RADIUS, &env->bat_x, &env->bat_y); - bat_sample_in_quadrant(env, bug_quadrant, BAT_BUG_RADIUS, &env->bug_x, &env->bug_y); - if (bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y) >= min_sep) { + sample_in_quadrant(env, agent_quadrant, AGENT_RADIUS, &env->x, &env->y); + sample_in_quadrant(env, bug_quadrant, BUG_RADIUS, &env->bug_x, &env->bug_y); + if (dist(env->x, env->y, env->bug_x, env->bug_y) >= min_sep) { return; } } float qx[4] = {0.25f, 0.75f, 0.25f, 0.75f}; float qy[4] = {0.25f, 0.25f, 0.75f, 0.75f}; - env->bat_x = BAT_WIDTH * qx[bat_quadrant]; - env->bat_y = BAT_HEIGHT * qy[bat_quadrant]; - env->bug_x = BAT_WIDTH * qx[bug_quadrant]; - env->bug_y = BAT_HEIGHT * qy[bug_quadrant]; + env->x = ARENA_WIDTH * qx[agent_quadrant]; + env->y = ARENA_HEIGHT * qy[agent_quadrant]; + env->bug_x = ARENA_WIDTH * qx[bug_quadrant]; + env->bug_y = ARENA_HEIGHT * qy[bug_quadrant]; } -static inline int bat_curriculum_obstacles(Bat* env) { +static inline int curriculum_obstacles(Bat* env) { int step = env->curriculum_obstacle_step; - int count = BAT_CURRICULUM_START_OBSTACLES; + int count = CURRICULUM_START_OBSTACLES; if (env->curriculum_level > 0) { - count = BAT_CURRICULUM_START_OBSTACLES + 1 + (env->curriculum_level - 1) / step; + count = CURRICULUM_START_OBSTACLES + 1 + (env->curriculum_level - 1) / step; } - if (count > BAT_CURRICULUM_MAX_OBSTACLES) count = BAT_CURRICULUM_MAX_OBSTACLES; - if (count > BAT_MAX_OBSTACLES) count = BAT_MAX_OBSTACLES; + if (count > CURRICULUM_MAX_OBSTACLES) count = CURRICULUM_MAX_OBSTACLES; + if (count > MAX_OBSTACLES) count = MAX_OBSTACLES; return count; } -static inline float bat_curriculum_bug_distance(Bat* env) { +static inline float curriculum_bug_distance(Bat* env) { float distance = env->curriculum_start_bug_distance - + BAT_CURRICULUM_BUG_DISTANCE_STEP * env->curriculum_level; + + CURRICULUM_BUG_DISTANCE_STEP * env->curriculum_level; return bat_clampf(distance, env->curriculum_start_bug_distance, - BAT_CURRICULUM_MAX_BUG_DISTANCE); + CURRICULUM_MAX_BUG_DISTANCE); } -static inline bool bat_curriculum_inbound_enabled(Bat* env) { - return env->curriculum_level >= BAT_CURRICULUM_INBOUND_START_LEVEL; +static inline bool curriculum_inbound_enabled(Bat* env) { + return env->curriculum_level >= CURRICULUM_INBOUND_START_LEVEL; } -static inline float bat_curriculum_inbound_bug_distance(Bat* env) { - float base = BAT_CURRICULUM_MAX_BUG_DISTANCE; - int extra_levels = env->curriculum_level - BAT_CURRICULUM_INBOUND_START_LEVEL + 1; - float distance = base + BAT_CURRICULUM_INBOUND_BUG_DISTANCE_STEP * extra_levels; - return bat_clampf(distance, base, BAT_CURRICULUM_INBOUND_MAX_BUG_DISTANCE); +static inline float curriculum_inbound_bug_distance(Bat* env) { + float base = CURRICULUM_MAX_BUG_DISTANCE; + int extra_levels = env->curriculum_level - CURRICULUM_INBOUND_START_LEVEL + 1; + float distance = base + CURRICULUM_INBOUND_BUG_DISTANCE_STEP * extra_levels; + return bat_clampf(distance, base, CURRICULUM_INBOUND_MAX_BUG_DISTANCE); } -static inline float bat_curriculum_spawn_distance(Bat* env) { - if (bat_curriculum_inbound_enabled(env)) { - return bat_curriculum_inbound_bug_distance(env); +static inline float curriculum_spawn_distance(Bat* env) { + if (curriculum_inbound_enabled(env)) { + return curriculum_inbound_bug_distance(env); } - return bat_curriculum_bug_distance(env); + return curriculum_bug_distance(env); } -static inline float bat_curriculum_bug_speed(Bat* env) { - float speed = BAT_BUG_SPEED; - if (bat_curriculum_inbound_enabled(env)) { - speed *= BAT_INBOUND_BUG_SPEED_MULTIPLIER; +static inline float curriculum_bug_speed(Bat* env) { + float speed = BUG_SPEED; + if (curriculum_inbound_enabled(env)) { + speed *= INBOUND_BUG_SPEED_MULTIPLIER; } return speed; } -static inline float bat_curriculum_bug_maneuver_strength(Bat* env) { - if (env->curriculum_level < BAT_BUG_MANEUVER_START_LEVEL) return 0.0f; - int extra_levels = env->curriculum_level - BAT_BUG_MANEUVER_START_LEVEL; +static inline float curriculum_bug_maneuver_strength(Bat* env) { + if (env->curriculum_level < BUG_MANEUVER_START_LEVEL) return 0.0f; + int extra_levels = env->curriculum_level - BUG_MANEUVER_START_LEVEL; float ramp = extra_levels <= 0 ? 0.25f : 0.75f + 0.25f * (extra_levels - 1); - return BAT_BUG_MANEUVER_STRENGTH * bat_clampf(ramp, 0.0f, 1.0f); + return BUG_MANEUVER_STRENGTH * bat_clampf(ramp, 0.0f, 1.0f); } -static inline float bat_curriculum_bug_maneuver_frequency(Bat* env) { - if (env->curriculum_level < BAT_BUG_MANEUVER_START_LEVEL) { - return BAT_BUG_MANEUVER_FREQUENCY; +static inline float curriculum_bug_maneuver_frequency(Bat* env) { + if (env->curriculum_level < BUG_MANEUVER_START_LEVEL) { + return BUG_MANEUVER_FREQUENCY; } - int extra_levels = env->curriculum_level - BAT_BUG_MANEUVER_START_LEVEL; + int extra_levels = env->curriculum_level - BUG_MANEUVER_START_LEVEL; float multiplier = 1.0f + 0.50f * extra_levels; - return BAT_BUG_MANEUVER_FREQUENCY * bat_clampf(multiplier, 1.0f, 2.5f); + return BUG_MANEUVER_FREQUENCY * bat_clampf(multiplier, 1.0f, 2.5f); } -static inline float bat_chirps_used_ratio(Bat* env) { +static inline float chirps_used_ratio(Bat* env) { return bat_clampf(env->chirps_emitted_episode / (float)env->chirp_budget, 0.0f, 1.0f); } -static inline float bat_chirp_efficiency(Bat* env) { - return 0.5f + 0.5f * (1.0f - bat_chirps_used_ratio(env)); +static inline float chirp_efficiency(Bat* env) { + return 0.5f + 0.5f * (1.0f - chirps_used_ratio(env)); } -static inline float bat_chirp_perf(Bat* env) { - float reference_chirps = fmaxf(1.0f, (float)BAT_MAX_CHIRPS_PER_EPISODE); +static inline float chirp_perf(Bat* env) { + float reference_chirps = fmaxf(1.0f, (float)MAX_CHIRPS_PER_EPISODE); float raw = 1.0f - env->chirps_emitted_episode / reference_chirps; - return bat_clampf(raw, BAT_CHIRP_PERF_FLOOR, 1.0f); + return bat_clampf(raw, CHIRP_PERF_FLOOR, 1.0f); } -static inline float bat_norm_range(float value, float lo, float hi) { +static inline float norm_range(float value, float lo, float hi) { float span = hi - lo; return bat_clampf((value - lo) / span, 0.0f, 1.0f); } -static inline float bat_curriculum_distance_difficulty(Bat* env) { - float max_distance = fmaxf(BAT_CURRICULUM_MAX_BUG_DISTANCE, - BAT_CURRICULUM_INBOUND_MAX_BUG_DISTANCE); - return bat_norm_range(env->start_bug_dist, +static inline float curriculum_distance_difficulty(Bat* env) { + float max_distance = fmaxf(CURRICULUM_MAX_BUG_DISTANCE, + CURRICULUM_INBOUND_MAX_BUG_DISTANCE); + return norm_range(env->start_bug_dist, env->curriculum_start_bug_distance, max_distance); } -static inline float bat_curriculum_obstacle_difficulty(Bat* env) { - return bat_norm_range((float)env->num_obstacles, - (float)BAT_CURRICULUM_START_OBSTACLES, (float)BAT_CURRICULUM_MAX_OBSTACLES); +static inline float curriculum_obstacle_difficulty(Bat* env) { + return norm_range((float)env->num_obstacles, + (float)CURRICULUM_START_OBSTACLES, (float)CURRICULUM_MAX_OBSTACLES); } -static inline float bat_curriculum_motion_difficulty(Bat* env) { - if (env->curriculum_level < BAT_BUG_MANEUVER_START_LEVEL) return 0.0f; - float span = (float)(BAT_CURRICULUM_INBOUND_START_LEVEL + 4 - BAT_BUG_MANEUVER_START_LEVEL); - return bat_clampf((env->curriculum_level - BAT_BUG_MANEUVER_START_LEVEL + 1) / span, +static inline float curriculum_motion_difficulty(Bat* env) { + if (env->curriculum_level < BUG_MANEUVER_START_LEVEL) return 0.0f; + float span = (float)(CURRICULUM_INBOUND_START_LEVEL + 4 - BUG_MANEUVER_START_LEVEL); + return bat_clampf((env->curriculum_level - BUG_MANEUVER_START_LEVEL + 1) / span, 0.0f, 1.0f); } -static inline float bat_curriculum_difficulty(Bat* env) { - float distance = bat_curriculum_distance_difficulty(env); - float obstacles = bat_curriculum_obstacle_difficulty(env); +static inline float curriculum_difficulty(Bat* env) { + float distance = curriculum_distance_difficulty(env); + float obstacles = curriculum_obstacle_difficulty(env); float active_weight = 0.0f; float weighted = 0.0f; - if (BAT_CURRICULUM_MAX_BUG_DISTANCE > env->curriculum_start_bug_distance) { + if (CURRICULUM_MAX_BUG_DISTANCE > env->curriculum_start_bug_distance) { weighted += 0.5f * distance; active_weight += 0.5f; } - if (BAT_CURRICULUM_MAX_OBSTACLES > BAT_CURRICULUM_START_OBSTACLES) { + if (CURRICULUM_MAX_OBSTACLES > CURRICULUM_START_OBSTACLES) { weighted += 0.5f * obstacles; active_weight += 0.5f; } - float motion = bat_curriculum_motion_difficulty(env); - if (BAT_BUG_MANEUVER_STRENGTH > 0.0f) { + float motion = curriculum_motion_difficulty(env); + if (BUG_MANEUVER_STRENGTH > 0.0f) { weighted += 0.5f * motion; active_weight += 0.5f; } return bat_clampf(weighted / active_weight, 0.0f, 1.0f); } -static inline float bat_success_reward(Bat* env) { - return env->chirp_efficiency_reward * bat_chirp_efficiency(env); +static inline float success_reward(Bat* env) { + return env->chirp_efficiency_reward * chirp_efficiency(env); } -static inline float bat_current_distance_ratio(Bat* env) { - float dist = bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y); - return dist / env->start_bug_dist; +static inline float current_distance_ratio(Bat* env) { + float distance = dist(env->x, env->y, env->bug_x, env->bug_y); + return distance / env->start_bug_dist; } -static inline void bat_accumulate_distance_region(float ratio, float amount, +static inline void accumulate_distance_region(float ratio, float amount, float* far, float* mid, float* near) { if (ratio > 0.66f) { *far += amount; @@ -557,75 +557,75 @@ static inline void bat_accumulate_distance_region(float ratio, float amount, } } -static inline void bat_record_distance_tick(Bat* env) { - bat_accumulate_distance_region(bat_current_distance_ratio(env), 1.0f, +static inline void record_distance_tick(Bat* env) { + accumulate_distance_region(current_distance_ratio(env), 1.0f, &env->ticks_far, &env->ticks_mid, &env->ticks_near); } -static inline void bat_record_chirp_timing(Bat* env) { +static inline void record_chirp_timing(Bat* env) { if (env->first_chirp_tick < 0.0f) { env->first_chirp_tick = (float)env->tick; } env->chirp_tick_sum += (float)env->tick; - bat_accumulate_distance_region(bat_current_distance_ratio(env), 1.0f, + accumulate_distance_region(current_distance_ratio(env), 1.0f, &env->chirps_far, &env->chirps_mid, &env->chirps_near); } -static inline void bat_sample_spawns_at_distance(Bat* env, float target_distance) { - float margin = fmaxf(6.0f, fmaxf(BAT_RADIUS, BAT_BUG_RADIUS) + 3.0f); +static inline void sample_spawns_at_distance(Bat* env, float target_distance) { + float margin = fmaxf(6.0f, fmaxf(AGENT_RADIUS, BUG_RADIUS) + 3.0f); for (int attempt = 0; attempt < 96; attempt++) { - float angle = bat_randf(env) * BAT_TWO_PI - BAT_PI; + float angle = randf(env) * TWO_PI - PI_F; float dx = cosf(angle) * target_distance; float dy = sinf(angle) * target_distance; float min_bat_x = fmaxf(margin, margin - dx); - float max_bat_x = fminf(BAT_WIDTH - margin, BAT_WIDTH - margin - dx); + float max_bat_x = fminf(ARENA_WIDTH - margin, ARENA_WIDTH - margin - dx); float min_bat_y = fmaxf(margin, margin - dy); - float max_bat_y = fminf(BAT_HEIGHT - margin, BAT_HEIGHT - margin - dy); + float max_bat_y = fminf(ARENA_HEIGHT - margin, ARENA_HEIGHT - margin - dy); if (max_bat_x < min_bat_x || max_bat_y < min_bat_y) continue; - env->bat_x = min_bat_x + bat_randf(env) * (max_bat_x - min_bat_x); - env->bat_y = min_bat_y + bat_randf(env) * (max_bat_y - min_bat_y); - env->bug_x = env->bat_x + dx; - env->bug_y = env->bat_y + dy; + env->x = min_bat_x + randf(env) * (max_bat_x - min_bat_x); + env->y = min_bat_y + randf(env) * (max_bat_y - min_bat_y); + env->bug_x = env->x + dx; + env->bug_y = env->y + dy; return; } - bat_sample_spawns(env); + sample_spawns(env); } -static inline void bat_set_bug_velocity(Bat* env, float heading, float speed) { +static inline void set_bug_velocity(Bat* env, float heading, float speed) { env->bug_base_heading = heading; env->bug_vx = cosf(heading) * speed; env->bug_vy = sinf(heading) * speed; } -static inline void bat_reset_bug_motion(Bat* env) { - env->bug_inbound = bat_curriculum_inbound_enabled(env) ? 1 : 0; - float strength = bat_curriculum_bug_maneuver_strength(env); - env->bug_maneuver_mode = strength > 0.000001f ? 1 + (int)(bat_rand(env) % 3u) : 0; - env->bug_maneuver_phase = bat_randf(env) * BAT_TWO_PI; - env->bug_maneuver_rate = BAT_TWO_PI * bat_curriculum_bug_maneuver_frequency(env) * - (0.75f + 0.50f * bat_randf(env)); - env->bug_maneuver_sign = (bat_rand(env) & 1u) ? -1.0f : 1.0f; +static inline void reset_bug_motion(Bat* env) { + env->bug_inbound = curriculum_inbound_enabled(env) ? 1 : 0; + float strength = curriculum_bug_maneuver_strength(env); + env->bug_maneuver_mode = strength > 0.000001f ? 1 + (int)(rng_next(env) % 3u) : 0; + env->bug_maneuver_phase = randf(env) * TWO_PI; + env->bug_maneuver_rate = TWO_PI * curriculum_bug_maneuver_frequency(env) * + (0.75f + 0.50f * randf(env)); + env->bug_maneuver_sign = (rng_next(env) & 1u) ? -1.0f : 1.0f; - float speed = bat_curriculum_bug_speed(env); + float speed = curriculum_bug_speed(env); if (env->bug_inbound) { float tx, ty; - bat_norm_vec(env->bat_x - env->bug_x, env->bat_y - env->bug_y, &tx, &ty); - float noise = BAT_INBOUND_HEADING_NOISE_DEGREES * (BAT_PI / 180.0f); - float heading = atan2f(ty, tx) + (2.0f * bat_randf(env) - 1.0f) * noise; - bat_set_bug_velocity(env, heading, speed); + norm_vec(env->x - env->bug_x, env->y - env->bug_y, &tx, &ty); + float noise = INBOUND_HEADING_NOISE_DEGREES * (PI_F / 180.0f); + float heading = atan2f(ty, tx) + (2.0f * randf(env) - 1.0f) * noise; + set_bug_velocity(env, heading, speed); } else { - float heading = bat_randf(env) * BAT_TWO_PI - BAT_PI; - bat_set_bug_velocity(env, heading, speed); + float heading = randf(env) * TWO_PI - PI_F; + set_bug_velocity(env, heading, speed); } } -static inline void bat_apply_curriculum(Bat* env) { - env->num_obstacles = bat_curriculum_obstacles(env); +static inline void apply_curriculum(Bat* env) { + env->num_obstacles = curriculum_obstacles(env); } -static inline void bat_advance_curriculum(Bat* env) { +static inline void advance_curriculum(Bat* env) { env->curriculum_successes_at_level += 1; if (env->curriculum_successes_at_level >= env->curriculum_successes_per_level) { env->curriculum_level += 1; @@ -633,16 +633,16 @@ static inline void bat_advance_curriculum(Bat* env) { } } -static inline bool bat_obstacle_clear(Bat* env, int idx, float x, float y, +static inline bool obstacle_clear(Bat* env, int idx, float x, float y, float w, float h) { - if (bat_circle_rect_collision(env->bat_x, env->bat_y, BAT_RADIUS + 2.0f, x, y, w, h)) { + if (circle_rect_collision(env->x, env->y, AGENT_RADIUS + 2.0f, x, y, w, h)) { return false; } - if (bat_circle_rect_collision(env->bug_x, env->bug_y, BAT_BUG_RADIUS + 2.0f, x, y, w, h)) { + if (circle_rect_collision(env->bug_x, env->bug_y, BUG_RADIUS + 2.0f, x, y, w, h)) { return false; } for (int j = 0; j < idx; j++) { - if (bat_rects_overlap(x, y, w, h, + if (rects_overlap(x, y, w, h, env->obstacle_x[j], env->obstacle_y[j], env->obstacle_w[j], env->obstacle_h[j], 3.0f)) { return false; } @@ -654,12 +654,12 @@ static inline void generate_obstacles(Bat* env) { for (int i = 0; i < env->num_obstacles; i++) { bool placed = false; for (int attempt = 0; attempt < 96; attempt++) { - float w = 3.0f + 5.0f * bat_randf(env); - float h = 3.0f + 5.0f * bat_randf(env); + float w = 3.0f + 5.0f * randf(env); + float h = 3.0f + 5.0f * randf(env); float margin = 4.0f; - float x = margin + bat_randf(env) * (BAT_WIDTH - w - 2.0f * margin); - float y = margin + bat_randf(env) * (BAT_HEIGHT - h - 2.0f * margin); - if (bat_obstacle_clear(env, i, x, y, w, h)) { + float x = margin + randf(env) * (ARENA_WIDTH - w - 2.0f * margin); + float y = margin + randf(env) * (ARENA_HEIGHT - h - 2.0f * margin); + if (obstacle_clear(env, i, x, y, w, h)) { env->obstacle_x[i] = x; env->obstacle_y[i] = y; env->obstacle_w[i] = w; @@ -671,8 +671,8 @@ static inline void generate_obstacles(Bat* env) { if (!placed) { float w = 6.0f; float h = 6.0f; - float x = BAT_WIDTH * (0.30f + 0.20f * (i % 2)) - w * 0.5f; - float y = BAT_HEIGHT * (0.30f + 0.20f * ((i + 1) % 2)) - h * 0.5f; + float x = ARENA_WIDTH * (0.30f + 0.20f * (i % 2)) - w * 0.5f; + float y = ARENA_HEIGHT * (0.30f + 0.20f * ((i + 1) % 2)) - h * 0.5f; env->obstacle_x[i] = x; env->obstacle_y[i] = y; env->obstacle_w[i] = w; @@ -683,16 +683,16 @@ static inline void generate_obstacles(Bat* env) { void init(Bat* env) { env->tick = 0; - env->obstacle_x = (float*)calloc(BAT_MAX_OBSTACLES, sizeof(float)); - env->obstacle_y = (float*)calloc(BAT_MAX_OBSTACLES, sizeof(float)); - env->obstacle_w = (float*)calloc(BAT_MAX_OBSTACLES, sizeof(float)); - env->obstacle_h = (float*)calloc(BAT_MAX_OBSTACLES, sizeof(float)); + env->obstacle_x = (float*)calloc(MAX_OBSTACLES, sizeof(float)); + env->obstacle_y = (float*)calloc(MAX_OBSTACLES, sizeof(float)); + env->obstacle_w = (float*)calloc(MAX_OBSTACLES, sizeof(float)); + env->obstacle_h = (float*)calloc(MAX_OBSTACLES, sizeof(float)); } void allocate(Bat* env) { init(env); - env->observations = (float*)calloc(BAT_OBS_SIZE, sizeof(float)); - env->actions = (float*)calloc(BAT_NUM_ACTIONS, sizeof(float)); + env->observations = (float*)calloc(OBS_SIZE, sizeof(float)); + env->actions = (float*)calloc(NUM_ACTIONS, sizeof(float)); env->rewards = (float*)calloc(1, sizeof(float)); env->terminals = (float*)calloc(1, sizeof(float)); } @@ -713,13 +713,13 @@ void free_allocated(Bat* env) { } static inline void add_log(Bat* env, float success, float collision, float timeout) { - float curriculum_difficulty = bat_curriculum_difficulty(env); - float distance_difficulty = bat_curriculum_distance_difficulty(env); - float obstacle_difficulty = bat_curriculum_obstacle_difficulty(env); - float motion_difficulty = bat_curriculum_motion_difficulty(env); - float chirp_efficiency = bat_chirp_efficiency(env); - float chirp_perf = bat_chirp_perf(env); - env->log.perf += success * curriculum_difficulty * chirp_perf; + float curriculum_difficulty_value = curriculum_difficulty(env); + float distance_difficulty = curriculum_distance_difficulty(env); + float obstacle_difficulty = curriculum_obstacle_difficulty(env); + float motion_difficulty = curriculum_motion_difficulty(env); + float chirp_efficiency_value = chirp_efficiency(env); + float chirp_perf_value = chirp_perf(env); + env->log.perf += success * curriculum_difficulty_value * chirp_perf_value; env->log.base_perf += success; env->log.score += env->episode_return; env->log.episode_return += env->episode_return; @@ -727,17 +727,17 @@ static inline void add_log(Bat* env, float success, float collision, float timeo env->log.collision += collision; env->log.timeout += timeout; env->log.curriculum_level += env->curriculum_level; - env->log.curriculum_difficulty += curriculum_difficulty; - env->log.curriculum_perf += success * curriculum_difficulty; + env->log.curriculum_difficulty += curriculum_difficulty_value; + env->log.curriculum_perf += success * curriculum_difficulty_value; env->log.curriculum_distance_difficulty += distance_difficulty; env->log.curriculum_obstacle_difficulty += obstacle_difficulty; env->log.curriculum_motion_difficulty += motion_difficulty; env->log.num_obstacles += env->num_obstacles; env->log.chirps_emitted += env->chirps_emitted_episode; env->log.chirp_budget += env->chirp_budget; - env->log.chirps_used_ratio += bat_chirps_used_ratio(env); - env->log.chirp_efficiency += chirp_efficiency; - env->log.chirp_perf += chirp_perf; + env->log.chirps_used_ratio += chirps_used_ratio(env); + env->log.chirp_efficiency += chirp_efficiency_value; + env->log.chirp_perf += chirp_perf_value; float chirps = fmaxf(1.0f, (float)env->chirps_emitted_episode); env->log.chirp_overlap_fraction += env->chirps_overlapped / chirps; env->log.far_chirp_fraction += env->chirps_far / chirps; @@ -754,10 +754,10 @@ static inline void add_log(Bat* env, float success, float collision, float timeo } env->log.chirp_tempo_ratio += bat_clampf(tempo_ratio, 0.0f, 10.0f); env->log.first_chirp_tick_norm += env->first_chirp_tick >= 0.0f - ? bat_clampf(env->first_chirp_tick / (float)BAT_MAX_STEPS, 0.0f, 1.0f) + ? bat_clampf(env->first_chirp_tick / (float)MAX_STEPS, 0.0f, 1.0f) : 1.0f; env->log.mean_chirp_tick_norm += env->chirps_emitted_episode > 0 - ? bat_clampf((env->chirp_tick_sum / chirps) / (float)BAT_MAX_STEPS, 0.0f, 1.0f) + ? bat_clampf((env->chirp_tick_sum / chirps) / (float)MAX_STEPS, 0.0f, 1.0f) : 1.0f; if (env->chirps_emitted_episode > 0) { env->log.mean_chirp_duration += env->chirp_duration_sum / env->chirps_emitted_episode; @@ -766,48 +766,48 @@ static inline void add_log(Bat* env, float success, float collision, float timeo env->log.n += 1.0f; } -static inline int bat_freq_bin_index(Bat* env, float freq_norm) { +static inline int freq_bin_index(Bat* env, float freq_norm) { (void)env; - int bins = BAT_FREQ_BINS; + int bins = FREQ_BINS; int bin = (int)(freq_norm * bins); if (bin >= bins) bin = bins - 1; return bin; } -static inline void bat_clear_echo_bucket(EchoBucket* bucket) { +static inline void clear_echo_bucket(EchoBucket* bucket) { memset(bucket, 0, sizeof(*bucket)); bucket->bug_path = -1.0f; bucket->tick = -1; } -static inline void bat_clear_echo_queue(Bat* env) { - for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { - bat_clear_echo_bucket(&env->echo_queue[i]); +static inline void clear_echo_queue(Bat* env) { + for (int i = 0; i < ECHO_QUEUE_TICKS; i++) { + clear_echo_bucket(&env->echo_queue[i]); } } -static inline void bat_add_echo_event(Bat* env, int ear, float receive_tick, +static inline void add_echo_event(Bat* env, int ear, float receive_tick, float freq, float intensity, float path, int source) { if (receive_tick <= env->tick) return; if (intensity <= 0.000001f) return; int arrival_tick = (int)ceilf(receive_tick); int delay = arrival_tick - env->tick; - if (delay <= 0 || delay >= BAT_ECHO_QUEUE_TICKS) return; - int slot = arrival_tick % BAT_ECHO_QUEUE_TICKS; + if (delay <= 0 || delay >= ECHO_QUEUE_TICKS) return; + int slot = arrival_tick % ECHO_QUEUE_TICKS; EchoBucket* bucket = &env->echo_queue[slot]; if (bucket->tick != arrival_tick) { - bat_clear_echo_bucket(bucket); + clear_echo_bucket(bucket); bucket->tick = arrival_tick; } int ear_idx = ear == 0 ? 0 : 1; - int bin = bat_freq_bin_index(env, freq); + int bin = freq_bin_index(env, freq); bucket->energy[ear_idx][bin] += intensity; - if (source == BAT_ECHO_BUG) { + if (source == ECHO_BUG) { float sideband = intensity * env->bug_wing_sideband_gain; if (sideband > 0.000001f) { if (bin > 0) bucket->energy[ear_idx][bin - 1] += sideband; - if (bin + 1 < BAT_FREQ_BINS) bucket->energy[ear_idx][bin + 1] += sideband; + if (bin + 1 < FREQ_BINS) bucket->energy[ear_idx][bin + 1] += sideband; } bucket->bug_energy += intensity; if (bucket->bug_path < 0.0f || path < bucket->bug_path) { @@ -816,55 +816,55 @@ static inline void bat_add_echo_event(Bat* env, int ear, float receive_tick, } } -static inline void bat_ear_positions(Bat* env, float* left_x, float* left_y, +static inline void ear_positions(Bat* env, float* left_x, float* left_y, float* right_x, float* right_y) { - float lx = -sinf(env->bat_heading); - float ly = cosf(env->bat_heading); - float ear_sep = BAT_RADIUS * env->ear_separation_scale; - *left_x = env->bat_x - lx * ear_sep * 0.5f; - *left_y = env->bat_y - ly * ear_sep * 0.5f; - *right_x = env->bat_x + lx * ear_sep * 0.5f; - *right_y = env->bat_y + ly * ear_sep * 0.5f; -} - -static inline float bat_expected_bug_echo_tick(Bat* env, ChirpEvent* chirp) { - float fx = cosf(env->bat_heading); - float fy = sinf(env->bat_heading); + float lx = -sinf(env->heading); + float ly = cosf(env->heading); + float ear_sep = AGENT_RADIUS * env->ear_separation_scale; + *left_x = env->x - lx * ear_sep * 0.5f; + *left_y = env->y - ly * ear_sep * 0.5f; + *right_x = env->x + lx * ear_sep * 0.5f; + *right_y = env->y + ly * ear_sep * 0.5f; +} + +static inline float expected_bug_echo_tick(Bat* env, ChirpEvent* chirp) { + float fx = cosf(env->heading); + float fy = sinf(env->heading); float source_x, source_y; - bat_chirp_source_for_slice(chirp, 0, &source_x, &source_y); + chirp_source_for_slice(chirp, 0, &source_x, &source_y); float ux, uy; - bat_norm_vec(env->bug_x - source_x, env->bug_y - source_y, &ux, &uy); + norm_vec(env->bug_x - source_x, env->bug_y - source_y, &ux, &uy); float forward = ux * fx + uy * fy; if (forward < -0.35f) return -1.0f; float left_ear_x, left_ear_y, right_ear_x, right_ear_y; - bat_ear_positions(env, &left_ear_x, &left_ear_y, &right_ear_x, &right_ear_y); - float source_path = bat_dist(source_x, source_y, env->bug_x, env->bug_y); - float left_path = source_path + bat_dist(env->bug_x, env->bug_y, left_ear_x, left_ear_y); - float right_path = source_path + bat_dist(env->bug_x, env->bug_y, right_ear_x, right_ear_y); + ear_positions(env, &left_ear_x, &left_ear_y, &right_ear_x, &right_ear_y); + float source_path = dist(source_x, source_y, env->bug_x, env->bug_y); + float left_path = source_path + dist(env->bug_x, env->bug_y, left_ear_x, left_ear_y); + float right_path = source_path + dist(env->bug_x, env->bug_y, right_ear_x, right_ear_y); float best_path = -1.0f; - if (left_path <= BAT_MAX_ECHO_RANGE) best_path = left_path; - if (right_path <= BAT_MAX_ECHO_RANGE && (best_path < 0.0f || right_path < best_path)) { + if (left_path <= MAX_ECHO_RANGE) best_path = left_path; + if (right_path <= MAX_ECHO_RANGE && (best_path < 0.0f || right_path < best_path)) { best_path = right_path; } if (best_path < 0.0f) return -1.0f; - float first_slice_ticks = bat_chirp_slice_ticks(chirp, 0); - return chirp->birth_tick + first_slice_ticks + best_path / env->sound_speed / BAT_TICK_RATE; + float first_slice_ticks = chirp_slice_ticks(chirp, 0); + return chirp->birth_tick + first_slice_ticks + best_path / env->sound_speed / TICK_RATE; } -static inline void bat_schedule_echo(Bat* env, ChirpEvent* chirp, +static inline void schedule_echo(Bat* env, ChirpEvent* chirp, float slice_ticks, float freq, float rx, float ry, float rvx, float rvy, float strength, int source) { - float fx = cosf(env->bat_heading); - float fy = sinf(env->bat_heading); - float lx = -sinf(env->bat_heading); - float ly = cosf(env->bat_heading); + float fx = cosf(env->heading); + float fy = sinf(env->heading); + float lx = -sinf(env->heading); + float ly = cosf(env->heading); float left_ear_x, left_ear_y, right_ear_x, right_ear_y; - bat_ear_positions(env, &left_ear_x, &left_ear_y, &right_ear_x, &right_ear_y); + ear_positions(env, &left_ear_x, &left_ear_y, &right_ear_x, &right_ear_y); float ux, uy; - bat_norm_vec(rx - chirp->x, ry - chirp->y, &ux, &uy); + norm_vec(rx - chirp->x, ry - chirp->y, &ux, &uy); float forward = ux * fx + uy * fy; if (forward < -0.35f) return; @@ -883,65 +883,65 @@ static inline void bat_schedule_echo(Bat* env, ChirpEvent* chirp, float right_gain = env->ear_rear_gain + env->ear_front_gain * front_gain + env->ear_side_gain * right_side_gain; - float source_path = bat_dist(chirp->x, chirp->y, rx, ry); - float left_path = source_path + bat_dist(rx, ry, left_ear_x, left_ear_y); - float right_path = source_path + bat_dist(rx, ry, right_ear_x, right_ear_y); - if (left_path > BAT_MAX_ECHO_RANGE && right_path > BAT_MAX_ECHO_RANGE) return; + float source_path = dist(chirp->x, chirp->y, rx, ry); + float left_path = source_path + dist(rx, ry, left_ear_x, left_ear_y); + float right_path = source_path + dist(rx, ry, right_ear_x, right_ear_y); + if (left_path > MAX_ECHO_RANGE && right_path > MAX_ECHO_RANGE) return; - float rel_vx = rvx - env->bat_vx; - float rel_vy = rvy - env->bat_vy; + float rel_vx = rvx - env->vx; + float rel_vy = rvy - env->vy; float distance_rate = rel_vx * ux + rel_vy * uy; - float doppler = bat_clampf(-distance_rate / (env->bat_max_speed + BAT_BUG_SPEED), -1.0f, 1.0f); + float doppler = bat_clampf(-distance_rate / (env->max_speed + BUG_SPEED), -1.0f, 1.0f); float shifted_freq = bat_clampf(freq + 0.20f * doppler, 0.0f, 1.0f); - if (left_path <= BAT_MAX_ECHO_RANGE) { + if (left_path <= MAX_ECHO_RANGE) { float attenuation = strength / (1.0f + 0.02f * left_path * left_path); - float receive_tick = chirp->birth_tick + slice_ticks + left_path / env->sound_speed / BAT_TICK_RATE; - bat_add_echo_event(env, 0, receive_tick, shifted_freq, attenuation * left_gain, left_path, source); + float receive_tick = chirp->birth_tick + slice_ticks + left_path / env->sound_speed / TICK_RATE; + add_echo_event(env, 0, receive_tick, shifted_freq, attenuation * left_gain, left_path, source); } - if (right_path <= BAT_MAX_ECHO_RANGE) { + if (right_path <= MAX_ECHO_RANGE) { float attenuation = strength / (1.0f + 0.02f * right_path * right_path); - float receive_tick = chirp->birth_tick + slice_ticks + right_path / env->sound_speed / BAT_TICK_RATE; - bat_add_echo_event(env, 1, receive_tick, shifted_freq, attenuation * right_gain, right_path, source); + float receive_tick = chirp->birth_tick + slice_ticks + right_path / env->sound_speed / TICK_RATE; + add_echo_event(env, 1, receive_tick, shifted_freq, attenuation * right_gain, right_path, source); } } -static inline void bat_schedule_segment_reflectors(Bat* env, ChirpEvent* chirp, +static inline void schedule_segment_reflectors(Bat* env, ChirpEvent* chirp, float slice_ticks, float freq, float x1, float y1, float x2, float y2, float strength) { - float len = bat_dist(x1, y1, x2, y2); - int count = (int)(len / BAT_REFLECTOR_SPACING) + 1; + float len = dist(x1, y1, x2, y2); + int count = (int)(len / REFLECTOR_SPACING) + 1; if (count < 1) count = 1; for (int i = 0; i <= count; i++) { float t = count == 0 ? 0.0f : i / (float)count; float x = x1 + (x2 - x1) * t; float y = y1 + (y2 - y1) * t; - bat_schedule_echo(env, chirp, slice_ticks, freq, x, y, 0.0f, 0.0f, strength, BAT_ECHO_STATIC); + schedule_echo(env, chirp, slice_ticks, freq, x, y, 0.0f, 0.0f, strength, ECHO_STATIC); } } -static inline void bat_schedule_corner_reflector_echoes(Bat* env, ChirpEvent* chirp, +static inline void schedule_corner_reflector_echoes(Bat* env, ChirpEvent* chirp, float slice_ticks, float freq) { -#if BAT_CORNER_REFLECTORS - float w = (float)BAT_WIDTH; - float h = (float)BAT_HEIGHT; +#if CORNER_REFLECTORS + float w = (float)ARENA_WIDTH; + float h = (float)ARENA_HEIGHT; float strength = env->reflector_strength; - bat_schedule_echo(env, chirp, slice_ticks, freq, 0.0f, 0.0f, - 0.0f, 0.0f, strength, BAT_ECHO_STATIC); - bat_schedule_echo(env, chirp, slice_ticks, freq, w, 0.0f, - 0.0f, 0.0f, strength, BAT_ECHO_STATIC); - bat_schedule_echo(env, chirp, slice_ticks, freq, 0.0f, h, - 0.0f, 0.0f, strength, BAT_ECHO_STATIC); - bat_schedule_echo(env, chirp, slice_ticks, freq, w, h, - 0.0f, 0.0f, strength, BAT_ECHO_STATIC); - bat_schedule_echo(env, chirp, slice_ticks, freq, 0.5f * w, 0.0f, - 0.0f, 0.0f, strength, BAT_ECHO_STATIC); - bat_schedule_echo(env, chirp, slice_ticks, freq, 0.5f * w, h, - 0.0f, 0.0f, strength, BAT_ECHO_STATIC); - bat_schedule_echo(env, chirp, slice_ticks, freq, 0.0f, 0.5f * h, - 0.0f, 0.0f, strength, BAT_ECHO_STATIC); - bat_schedule_echo(env, chirp, slice_ticks, freq, w, 0.5f * h, - 0.0f, 0.0f, strength, BAT_ECHO_STATIC); + schedule_echo(env, chirp, slice_ticks, freq, 0.0f, 0.0f, + 0.0f, 0.0f, strength, ECHO_STATIC); + schedule_echo(env, chirp, slice_ticks, freq, w, 0.0f, + 0.0f, 0.0f, strength, ECHO_STATIC); + schedule_echo(env, chirp, slice_ticks, freq, 0.0f, h, + 0.0f, 0.0f, strength, ECHO_STATIC); + schedule_echo(env, chirp, slice_ticks, freq, w, h, + 0.0f, 0.0f, strength, ECHO_STATIC); + schedule_echo(env, chirp, slice_ticks, freq, 0.5f * w, 0.0f, + 0.0f, 0.0f, strength, ECHO_STATIC); + schedule_echo(env, chirp, slice_ticks, freq, 0.5f * w, h, + 0.0f, 0.0f, strength, ECHO_STATIC); + schedule_echo(env, chirp, slice_ticks, freq, 0.0f, 0.5f * h, + 0.0f, 0.0f, strength, ECHO_STATIC); + schedule_echo(env, chirp, slice_ticks, freq, w, 0.5f * h, + 0.0f, 0.0f, strength, ECHO_STATIC); #else (void)env; (void)chirp; @@ -950,53 +950,53 @@ static inline void bat_schedule_corner_reflector_echoes(Bat* env, ChirpEvent* ch #endif } -static inline void bat_schedule_obstacle_echoes(Bat* env, ChirpEvent* chirp, +static inline void schedule_obstacle_echoes(Bat* env, ChirpEvent* chirp, float slice_ticks, float freq, int i) { float x = env->obstacle_x[i]; float y = env->obstacle_y[i]; float w = env->obstacle_w[i]; float h = env->obstacle_h[i]; - bat_schedule_segment_reflectors(env, chirp, slice_ticks, freq, x, y, x + w, y, 0.55f); - bat_schedule_segment_reflectors(env, chirp, slice_ticks, freq, x, y + h, x + w, y + h, 0.55f); - bat_schedule_segment_reflectors(env, chirp, slice_ticks, freq, x, y, x, y + h, 0.55f); - bat_schedule_segment_reflectors(env, chirp, slice_ticks, freq, x + w, y, x + w, y + h, 0.55f); + schedule_segment_reflectors(env, chirp, slice_ticks, freq, x, y, x + w, y, 0.55f); + schedule_segment_reflectors(env, chirp, slice_ticks, freq, x, y + h, x + w, y + h, 0.55f); + schedule_segment_reflectors(env, chirp, slice_ticks, freq, x, y, x, y + h, 0.55f); + schedule_segment_reflectors(env, chirp, slice_ticks, freq, x + w, y, x + w, y + h, 0.55f); } -static inline void bat_schedule_chirp_slice_echoes(Bat* env, ChirpEvent* chirp, +static inline void schedule_chirp_slice_echoes(Bat* env, ChirpEvent* chirp, int slice_idx) { int slices = chirp->slice_count; - if (slice_idx >= slices || slice_idx >= BAT_MAX_CHIRP_SLICES) { + if (slice_idx >= slices || slice_idx >= MAX_CHIRP_SLICES) { return; } float t = (slice_idx + 0.5f) / (float)slices; - float slice_ticks = bat_chirp_slice_ticks(chirp, slice_idx); + float slice_ticks = chirp_slice_ticks(chirp, slice_idx); float freq = chirp->start_freq + t * (chirp->end_freq - chirp->start_freq); - chirp->source_x[slice_idx] = env->bat_x; - chirp->source_y[slice_idx] = env->bat_y; + chirp->source_x[slice_idx] = env->x; + chirp->source_y[slice_idx] = env->y; ChirpEvent slice_chirp = *chirp; slice_chirp.x = chirp->source_x[slice_idx]; slice_chirp.y = chirp->source_y[slice_idx]; - bat_schedule_echo(env, &slice_chirp, slice_ticks, freq, - env->bug_x, env->bug_y, env->bug_vx, env->bug_vy, 8.0f, BAT_ECHO_BUG); - bat_schedule_segment_reflectors(env, &slice_chirp, slice_ticks, freq, - 0.0f, 0.0f, (float)BAT_WIDTH, 0.0f, 0.12f); - bat_schedule_segment_reflectors(env, &slice_chirp, slice_ticks, freq, - 0.0f, (float)BAT_HEIGHT, (float)BAT_WIDTH, (float)BAT_HEIGHT, 0.12f); - bat_schedule_segment_reflectors(env, &slice_chirp, slice_ticks, freq, - 0.0f, 0.0f, 0.0f, (float)BAT_HEIGHT, 0.12f); - bat_schedule_segment_reflectors(env, &slice_chirp, slice_ticks, freq, - (float)BAT_WIDTH, 0.0f, (float)BAT_WIDTH, (float)BAT_HEIGHT, 0.12f); - bat_schedule_corner_reflector_echoes(env, &slice_chirp, slice_ticks, freq); + schedule_echo(env, &slice_chirp, slice_ticks, freq, + env->bug_x, env->bug_y, env->bug_vx, env->bug_vy, 8.0f, ECHO_BUG); + schedule_segment_reflectors(env, &slice_chirp, slice_ticks, freq, + 0.0f, 0.0f, (float)ARENA_WIDTH, 0.0f, 0.12f); + schedule_segment_reflectors(env, &slice_chirp, slice_ticks, freq, + 0.0f, (float)ARENA_HEIGHT, (float)ARENA_WIDTH, (float)ARENA_HEIGHT, 0.12f); + schedule_segment_reflectors(env, &slice_chirp, slice_ticks, freq, + 0.0f, 0.0f, 0.0f, (float)ARENA_HEIGHT, 0.12f); + schedule_segment_reflectors(env, &slice_chirp, slice_ticks, freq, + (float)ARENA_WIDTH, 0.0f, (float)ARENA_WIDTH, (float)ARENA_HEIGHT, 0.12f); + schedule_corner_reflector_echoes(env, &slice_chirp, slice_ticks, freq); for (int j = 0; j < env->num_obstacles; j++) { - bat_schedule_obstacle_echoes(env, &slice_chirp, slice_ticks, freq, j); + schedule_obstacle_echoes(env, &slice_chirp, slice_ticks, freq, j); } } -static inline void bat_schedule_due_chirp_slices(Bat* env) { - for (int i = 0; i < BAT_CHIRP_HISTORY; i++) { +static inline void schedule_due_chirp_slices(Bat* env) { + for (int i = 0; i < CHIRP_HISTORY; i++) { ChirpEvent* chirp = &env->chirps[i]; if (!chirp->active) continue; int slices = chirp->slice_count; @@ -1004,22 +1004,22 @@ static inline void bat_schedule_due_chirp_slices(Bat* env) { float age_ticks = (float)(env->tick - chirp->birth_tick); while (chirp->slices_scheduled < slices) { int slice_idx = chirp->slices_scheduled; - float slice_ticks = bat_chirp_slice_ticks(chirp, slice_idx); + float slice_ticks = chirp_slice_ticks(chirp, slice_idx); if (slice_ticks >= age_ticks + 1.0f) break; - bat_schedule_chirp_slice_echoes(env, chirp, slice_idx); + schedule_chirp_slice_echoes(env, chirp, slice_idx); chirp->slices_scheduled += 1; } } } -static inline void bat_process_echo_events(Bat* env) { - int slot = env->tick % BAT_ECHO_QUEUE_TICKS; +static inline void process_echo_events(Bat* env) { + int slot = env->tick % ECHO_QUEUE_TICKS; EchoBucket* bucket = &env->echo_queue[slot]; if (bucket->tick != env->tick) return; - for (int i = 0; i < BAT_FREQ_BINS; i++) { - int left_idx = BAT_LEFT_FREQ_OFFSET + i; - int right_idx = BAT_RIGHT_FREQ_OFFSET + i; + for (int i = 0; i < FREQ_BINS; i++) { + int left_idx = LEFT_FREQ_OFFSET + i; + int right_idx = RIGHT_FREQ_OFFSET + i; env->observations[left_idx] = bat_clampf( env->observations[left_idx] + bucket->energy[0][i], 0.0f, 1.0f); env->observations[right_idx] = bat_clampf( @@ -1031,53 +1031,53 @@ static inline void bat_process_echo_events(Bat* env) { env->tick_bug_echo_path = bucket->bug_path; } } - bat_clear_echo_bucket(bucket); + clear_echo_bucket(bucket); } void compute_observations(Bat* env) { - memset(env->observations, 0, BAT_OBS_SIZE * sizeof(float)); + memset(env->observations, 0, OBS_SIZE * sizeof(float)); env->tick_bug_echo_energy = 0.0f; env->tick_bug_echo_path = -1.0f; - bat_process_echo_events(env); + process_echo_events(env); - for (int i = 0; i < BAT_FREQ_BINS; i++) { - env->observations[BAT_LEFT_FREQ_OFFSET + i] = bat_clampf(env->observations[BAT_LEFT_FREQ_OFFSET + i], 0.0f, 1.0f); - env->observations[BAT_RIGHT_FREQ_OFFSET + i] = bat_clampf(env->observations[BAT_RIGHT_FREQ_OFFSET + i], 0.0f, 1.0f); + for (int i = 0; i < FREQ_BINS; i++) { + env->observations[LEFT_FREQ_OFFSET + i] = bat_clampf(env->observations[LEFT_FREQ_OFFSET + i], 0.0f, 1.0f); + env->observations[RIGHT_FREQ_OFFSET + i] = bat_clampf(env->observations[RIGHT_FREQ_OFFSET + i], 0.0f, 1.0f); } - float chirp_age_denom = bat_chirp_age_norm_denominator(env); + float chirp_age_denom = chirp_age_norm_denominator(env); int chirp_age = env->tick - env->last_chirp_tick; if (env->last_chirp_tick < 0) chirp_age = (int)ceilf(chirp_age_denom); env->chirp_age_ticks = chirp_age; int cooldown = env->chirp_cooldown_ticks - (env->tick - env->last_chirp_tick); - env->observations[BAT_CHIRP_AGE_OBS] = bat_clampf(chirp_age / chirp_age_denom, 0.0f, 1.0f); - env->observations[BAT_CHIRP_COOLDOWN_OBS] = bat_clampf(cooldown / (float)env->chirp_cooldown_ticks, 0.0f, 1.0f); - env->observations[BAT_CHIRP_START_OBS] = env->last_chirp_start_freq; - env->observations[BAT_CHIRP_END_OBS] = env->last_chirp_end_freq; - env->observations[BAT_CHIRP_DURATION_OBS] = env->last_chirp_duration; - env->observations[BAT_CHIRPS_USED_OBS] = bat_chirps_used_ratio(env); - float fwd_speed = env->bat_vx * cosf(env->bat_heading) + env->bat_vy * sinf(env->bat_heading); - env->observations[BAT_FORWARD_SPEED_OBS] = bat_clampf(fwd_speed / env->bat_max_speed, 0.0f, 1.0f); - env->observations[BAT_TURN_RATE_OBS] = bat_clampf(env->bat_turn_velocity / env->bat_turn_rate, -1.0f, 1.0f); - float timer_norm = env->tick / (float)BAT_MAX_STEPS; + env->observations[CHIRP_AGE_OBS] = bat_clampf(chirp_age / chirp_age_denom, 0.0f, 1.0f); + env->observations[CHIRP_COOLDOWN_OBS] = bat_clampf(cooldown / (float)env->chirp_cooldown_ticks, 0.0f, 1.0f); + env->observations[CHIRP_START_OBS] = env->last_chirp_start_freq; + env->observations[CHIRP_END_OBS] = env->last_chirp_end_freq; + env->observations[CHIRP_DURATION_OBS] = env->last_chirp_duration; + env->observations[CHIRPS_USED_OBS] = chirps_used_ratio(env); + float fwd_speed = env->vx * cosf(env->heading) + env->vy * sinf(env->heading); + env->observations[FORWARD_SPEED_OBS] = bat_clampf(fwd_speed / env->max_speed, 0.0f, 1.0f); + env->observations[TURN_RATE_OBS] = bat_clampf(env->turn_velocity / env->turn_rate, -1.0f, 1.0f); + float timer_norm = env->tick / (float)MAX_STEPS; env->observations[40] = bat_clampf(timer_norm, 0.0f, 1.0f); } -static inline void bat_reset_episode(Bat* env) { +static inline void reset_episode(Bat* env) { env->tick = 0; - env->bat_turn_velocity = 0.0f; - env->bat_heading = bat_randf(env) * BAT_TWO_PI - BAT_PI; - float initial_speed = env->bat_min_speed; - env->bat_vx = cosf(env->bat_heading) * initial_speed; - env->bat_vy = sinf(env->bat_heading) * initial_speed; + env->turn_velocity = 0.0f; + env->heading = randf(env) * TWO_PI - PI_F; + float initial_speed = env->min_speed; + env->vx = cosf(env->heading) * initial_speed; + env->vy = sinf(env->heading) * initial_speed; if (env->curriculum_level < env->curriculum_initial_level) { env->curriculum_level = env->curriculum_initial_level; } - bat_apply_curriculum(env); - bat_sample_spawns_at_distance(env, bat_curriculum_spawn_distance(env)); + apply_curriculum(env); + sample_spawns_at_distance(env, curriculum_spawn_distance(env)); generate_obstacles(env); - bat_reset_bug_motion(env); + reset_bug_motion(env); env->last_chirp_start_freq = 0.0f; env->last_chirp_end_freq = 1.0f; env->last_chirp_duration = 0.33333334f; @@ -1085,8 +1085,8 @@ static inline void bat_reset_episode(Bat* env) { env->last_chirp_tick = -env->chirp_cooldown_ticks; memset(env->chirps, 0, sizeof(env->chirps)); env->chirp_head = 0; - bat_clear_echo_queue(env); - env->chirp_budget = BAT_MAX_CHIRPS_PER_EPISODE; + clear_echo_queue(env); + env->chirp_budget = MAX_CHIRPS_PER_EPISODE; env->tick_bug_echo_energy = 0.0f; env->tick_bug_echo_path = -1.0f; env->last_bug_echo_path = -1.0f; @@ -1104,22 +1104,22 @@ static inline void bat_reset_episode(Bat* env) { env->first_chirp_tick = -1.0f; env->chirp_tick_sum = 0.0f; env->episode_return = 0.0f; - env->start_bug_dist = bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y); + env->start_bug_dist = dist(env->x, env->y, env->bug_x, env->bug_y); env->prev_bug_dist = env->start_bug_dist; - env->last_bug_echo_bat_x = env->bat_x; - env->last_bug_echo_bat_y = env->bat_y; + env->last_bug_echo_x = env->x; + env->last_bug_echo_y = env->y; compute_observations(env); } void c_reset(Bat* env) { env->rewards[0] = 0.0f; env->terminals[0] = 0.0f; - bat_reset_episode(env); + reset_episode(env); } -static inline bool bat_hits_obstacle(Bat* env) { +static inline bool hits_obstacle(Bat* env) { for (int i = 0; i < env->num_obstacles; i++) { - if (bat_circle_rect_collision(env->bat_x, env->bat_y, BAT_RADIUS, + if (circle_rect_collision(env->x, env->y, AGENT_RADIUS, env->obstacle_x[i], env->obstacle_y[i], env->obstacle_w[i], env->obstacle_h[i])) { return true; } @@ -1127,26 +1127,26 @@ static inline bool bat_hits_obstacle(Bat* env) { return false; } -static inline bool bat_hits_wall(Bat* env) { - return env->bat_x - BAT_RADIUS < 0.0f || - env->bat_x + BAT_RADIUS > BAT_WIDTH || - env->bat_y - BAT_RADIUS < 0.0f || - env->bat_y + BAT_RADIUS > BAT_HEIGHT; +static inline bool hits_wall(Bat* env) { + return env->x - AGENT_RADIUS < 0.0f || + env->x + AGENT_RADIUS > ARENA_WIDTH || + env->y - AGENT_RADIUS < 0.0f || + env->y + AGENT_RADIUS > ARENA_HEIGHT; } -static inline void bat_update_bug(Bat* env, float dt) { - float speed = bat_curriculum_bug_speed(env); - float strength = bat_curriculum_bug_maneuver_strength(env); +static inline void update_bug(Bat* env, float dt) { + float speed = curriculum_bug_speed(env); + float strength = curriculum_bug_maneuver_strength(env); if (env->bug_maneuver_mode > 0) { env->bug_maneuver_phase += env->bug_maneuver_rate * dt; - if (env->bug_maneuver_phase > BAT_TWO_PI) { - env->bug_maneuver_phase -= BAT_TWO_PI; + if (env->bug_maneuver_phase > TWO_PI) { + env->bug_maneuver_phase -= TWO_PI; } } if (env->bug_inbound) { float tx, ty; - bat_norm_vec(env->bat_x - env->bug_x, env->bat_y - env->bug_y, &tx, &ty); + norm_vec(env->x - env->bug_x, env->y - env->bug_y, &tx, &ty); float px = -ty; float py = tx; float lateral = 0.0f; @@ -1180,23 +1180,23 @@ static inline void bat_update_bug(Bat* env, float dt) { env->bug_x += env->bug_vx * dt; env->bug_y += env->bug_vy * dt; bool bounced = false; - if (env->bug_x - BAT_BUG_RADIUS < 0.0f) { - env->bug_x = BAT_BUG_RADIUS; + if (env->bug_x - BUG_RADIUS < 0.0f) { + env->bug_x = BUG_RADIUS; env->bug_vx = fabsf(env->bug_vx); bounced = true; } - if (env->bug_x + BAT_BUG_RADIUS > BAT_WIDTH) { - env->bug_x = BAT_WIDTH - BAT_BUG_RADIUS; + if (env->bug_x + BUG_RADIUS > ARENA_WIDTH) { + env->bug_x = ARENA_WIDTH - BUG_RADIUS; env->bug_vx = -fabsf(env->bug_vx); bounced = true; } - if (env->bug_y - BAT_BUG_RADIUS < 0.0f) { - env->bug_y = BAT_BUG_RADIUS; + if (env->bug_y - BUG_RADIUS < 0.0f) { + env->bug_y = BUG_RADIUS; env->bug_vy = fabsf(env->bug_vy); bounced = true; } - if (env->bug_y + BAT_BUG_RADIUS > BAT_HEIGHT) { - env->bug_y = BAT_HEIGHT - BAT_BUG_RADIUS; + if (env->bug_y + BUG_RADIUS > ARENA_HEIGHT) { + env->bug_y = ARENA_HEIGHT - BUG_RADIUS; env->bug_vy = -fabsf(env->bug_vy); bounced = true; } @@ -1204,7 +1204,7 @@ static inline void bat_update_bug(Bat* env, float dt) { env->bug_base_heading = atan2f(env->bug_vy, env->bug_vx); if (env->bug_inbound) { float tx, ty; - bat_norm_vec(env->bat_x - env->bug_x, env->bat_y - env->bug_y, &tx, &ty); + norm_vec(env->x - env->bug_x, env->y - env->bug_y, &tx, &ty); env->bug_vx = tx * speed; env->bug_vy = ty * speed; env->bug_base_heading = atan2f(env->bug_vy, env->bug_vx); @@ -1212,40 +1212,40 @@ static inline void bat_update_bug(Bat* env, float dt) { } } -static inline void bat_update_motion(Bat* env, float dt) { - int move = bat_action_index(env->actions[0], BAT_MOVE_ACTIONS); - int turn = bat_action_index(env->actions[1], BAT_TURN_ACTIONS); - float fx = cosf(env->bat_heading); - float fy = sinf(env->bat_heading); - float speed = env->bat_vx * fx + env->bat_vy * fy; - float min_speed = env->bat_min_speed; +static inline void update_motion(Bat* env, float dt) { + int move = action_index(env->actions[0], MOVE_ACTIONS); + int turn = action_index(env->actions[1], TURN_ACTIONS); + float fx = cosf(env->heading); + float fy = sinf(env->heading); + float speed = env->vx * fx + env->vy * fy; + float min_speed = env->min_speed; if (speed < min_speed) speed = min_speed; - if (move == BAT_THRUST_FORWARD) speed += env->bat_accel * dt; - if (move == BAT_BRAKE) speed -= env->bat_accel * dt; - speed = bat_clampf(speed, min_speed, env->bat_max_speed); + if (move == THRUST_FORWARD) speed += env->accel * dt; + if (move == BRAKE) speed -= env->accel * dt; + speed = bat_clampf(speed, min_speed, env->max_speed); float turn_command = 0.0f; - if (turn == BAT_TURN_LEFT) turn_command = -1.0f; - if (turn == BAT_TURN_RIGHT) turn_command = 1.0f; - float speed_ratio = env->bat_max_speed > 0.0f ? speed / env->bat_max_speed : 0.0f; - env->bat_turn_velocity = turn_command * env->bat_turn_rate * bat_clampf(speed_ratio, 0.0f, 1.0f); - env->bat_heading += env->bat_turn_velocity * dt; - if (env->bat_heading > BAT_PI) env->bat_heading -= BAT_TWO_PI; - if (env->bat_heading < -BAT_PI) env->bat_heading += BAT_TWO_PI; - - float heading_fx = cosf(env->bat_heading); - float heading_fy = sinf(env->bat_heading); - env->bat_vx = heading_fx * speed; - env->bat_vy = heading_fy * speed; - env->bat_x += env->bat_vx * dt; - env->bat_y += env->bat_vy * dt; -} - -static inline bool bat_try_emit_chirp(Bat* env) { - int start_idx = bat_action_index(env->actions[2], BAT_CHIRP_FREQ_BINS); - int end_idx = bat_action_index(env->actions[3], BAT_CHIRP_FREQ_BINS); - int duration_idx = bat_action_index(env->actions[4], BAT_CHIRP_DURATION_BINS); + if (turn == TURN_LEFT) turn_command = -1.0f; + if (turn == TURN_RIGHT) turn_command = 1.0f; + float speed_ratio = env->max_speed > 0.0f ? speed / env->max_speed : 0.0f; + env->turn_velocity = turn_command * env->turn_rate * bat_clampf(speed_ratio, 0.0f, 1.0f); + env->heading += env->turn_velocity * dt; + if (env->heading > PI_F) env->heading -= TWO_PI; + if (env->heading < -PI_F) env->heading += TWO_PI; + + float heading_fx = cosf(env->heading); + float heading_fy = sinf(env->heading); + env->vx = heading_fx * speed; + env->vy = heading_fy * speed; + env->x += env->vx * dt; + env->y += env->vy * dt; +} + +static inline bool try_emit_chirp(Bat* env) { + int start_idx = action_index(env->actions[2], CHIRP_FREQ_BINS); + int end_idx = action_index(env->actions[3], CHIRP_FREQ_BINS); + int duration_idx = action_index(env->actions[4], CHIRP_DURATION_BINS); if (env->tick - env->last_chirp_tick < env->chirp_cooldown_ticks) { return false; @@ -1255,115 +1255,115 @@ static inline bool bat_try_emit_chirp(Bat* env) { return false; } - env->last_chirp_start_freq = bat_norm_bin(start_idx, BAT_CHIRP_FREQ_BINS); - env->last_chirp_end_freq = bat_norm_bin(end_idx, BAT_CHIRP_FREQ_BINS); - env->last_chirp_duration = bat_norm_bin(duration_idx, BAT_CHIRP_DURATION_BINS); + env->last_chirp_start_freq = norm_bin(start_idx, CHIRP_FREQ_BINS); + env->last_chirp_end_freq = norm_bin(end_idx, CHIRP_FREQ_BINS); + env->last_chirp_duration = norm_bin(duration_idx, CHIRP_DURATION_BINS); env->chirp_age_ticks = 0; env->last_chirp_tick = env->tick; - bat_record_chirp_timing(env); + record_chirp_timing(env); env->chirps_emitted_episode += 1; env->chirp_duration_sum += env->last_chirp_duration; env->chirp_bandwidth_sum += fabsf(env->last_chirp_end_freq - env->last_chirp_start_freq); ChirpEvent* chirp = &env->chirps[env->chirp_head]; - chirp->x = env->bat_x; - chirp->y = env->bat_y; + chirp->x = env->x; + chirp->y = env->y; chirp->start_freq = env->last_chirp_start_freq; chirp->end_freq = env->last_chirp_end_freq; - chirp->duration = bat_chirp_duration_seconds(env->last_chirp_duration); + chirp->duration = chirp_duration_seconds(env->last_chirp_duration); chirp->birth_tick = env->tick; - chirp->slice_count = (int)ceilf(chirp->duration / BAT_TICK_RATE); + chirp->slice_count = (int)ceilf(chirp->duration / TICK_RATE); chirp->slices_scheduled = 0; - for (int i = 0; i < BAT_MAX_CHIRP_SLICES; i++) { + for (int i = 0; i < MAX_CHIRP_SLICES; i++) { chirp->source_x[i] = chirp->x; chirp->source_y[i] = chirp->y; } chirp->active = 1; - env->chirp_head = (env->chirp_head + 1) % BAT_CHIRP_HISTORY; + env->chirp_head = (env->chirp_head + 1) % CHIRP_HISTORY; env->audio_chirp_serial += 1; - env->last_bug_echo_expected_tick = bat_expected_bug_echo_tick(env, chirp); + env->last_bug_echo_expected_tick = expected_bug_echo_tick(env, chirp); return true; } -static inline float bat_next_chirp_overlap_fraction(Bat* env) { +static inline float next_chirp_overlap_fraction(Bat* env) { if (env->last_bug_echo_expected_tick <= (float)env->tick) return 0.0f; float wait_ticks = env->last_bug_echo_expected_tick - (float)env->last_chirp_tick; float remaining_ticks = env->last_bug_echo_expected_tick - (float)env->tick; return bat_clampf(remaining_ticks / wait_ticks, 0.0f, 1.0f); } -static inline int bat_update_chirp(Bat* env) { - int emit = bat_action_index(env->actions[5], BAT_CHIRP_EMIT_ACTIONS); +static inline int update_chirp(Bat* env) { + int emit = action_index(env->actions[5], CHIRP_EMIT_ACTIONS); if (emit) { if (env->chirps_emitted_episode >= env->chirp_budget) { return -2; } - return bat_try_emit_chirp(env) ? 1 : -1; - } else if (env->chirp_age_ticks < BAT_MAX_CHIRP_AGE_TICKS) { + return try_emit_chirp(env) ? 1 : -1; + } else if (env->chirp_age_ticks < MAX_CHIRP_AGE_TICKS) { env->chirp_age_ticks += 1; } return 0; } -static inline bool bat_caught_bug(Bat* env) { - return bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y) <= BAT_RADIUS + BAT_BUG_RADIUS; +static inline bool caught_bug(Bat* env) { + return dist(env->x, env->y, env->bug_x, env->bug_y) <= AGENT_RADIUS + BUG_RADIUS; } void c_step(Bat* env) { env->rewards[0] = 0.0f; env->terminals[0] = 0.0f; - float chirp_overlap_fraction = bat_next_chirp_overlap_fraction(env); - int chirp_status = bat_update_chirp(env); + float chirp_overlap_fraction = next_chirp_overlap_fraction(env); + int chirp_status = update_chirp(env); if (chirp_status == -2) { env->rewards[0] = -1.0f; env->terminals[0] = 1.0f; env->episode_return += env->rewards[0]; add_log(env, 0.0f, 1.0f, 0.0f); - bat_reset_episode(env); + reset_episode(env); return; } - if (bat_caught_bug(env)) { - env->rewards[0] = bat_success_reward(env); + if (caught_bug(env)) { + env->rewards[0] = success_reward(env); env->terminals[0] = 1.0f; env->episode_return += env->rewards[0]; - bat_advance_curriculum(env); + advance_curriculum(env); add_log(env, 1.0f, 0.0f, 0.0f); - bat_reset_episode(env); + reset_episode(env); return; } - bat_schedule_due_chirp_slices(env); + schedule_due_chirp_slices(env); for (int i = 0; i < env->frameskip; i++) { - bat_update_motion(env, BAT_TICK_RATE); - bat_update_bug(env, BAT_TICK_RATE); - if (bat_hits_wall(env) || bat_hits_obstacle(env)) { + update_motion(env, TICK_RATE); + update_bug(env, TICK_RATE); + if (hits_wall(env) || hits_obstacle(env)) { env->rewards[0] = -env->collision_penalty; env->terminals[0] = 1.0f; env->episode_return += env->rewards[0]; add_log(env, 0.0f, 1.0f, 0.0f); - bat_reset_episode(env); + reset_episode(env); return; } - if (bat_caught_bug(env)) { - env->rewards[0] = bat_success_reward(env); + if (caught_bug(env)) { + env->rewards[0] = success_reward(env); env->terminals[0] = 1.0f; env->episode_return += env->rewards[0]; - bat_advance_curriculum(env); + advance_curriculum(env); add_log(env, 1.0f, 0.0f, 0.0f); - bat_reset_episode(env); + reset_episode(env); return; } } env->tick += 1; - bat_record_distance_tick(env); - float bug_dist = bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y); + record_distance_tick(env); + float bug_dist = dist(env->x, env->y, env->bug_x, env->bug_y); float progress = env->prev_bug_dist - bug_dist; env->rewards[0] += env->progress_reward_scale * progress; env->rewards[0] -= env->step_cost; if (chirp_status > 0) { env->rewards[0] += env->valid_chirp_reward; - env->rewards[0] -= BAT_CHIRP_COST; + env->rewards[0] -= CHIRP_COST; if (chirp_overlap_fraction > 0.0f) { env->rewards[0] -= env->chirp_overlap_penalty * chirp_overlap_fraction; env->chirps_overlapped += 1; @@ -1373,23 +1373,23 @@ void c_step(Bat* env) { } env->prev_bug_dist = bug_dist; - if (env->tick >= BAT_MAX_STEPS) { + if (env->tick >= MAX_STEPS) { env->rewards[0] = -1.0f; env->terminals[0] = 1.0f; env->episode_return += env->rewards[0]; add_log(env, 0.0f, 0.0f, 1.0f); - bat_reset_episode(env); + reset_episode(env); return; } compute_observations(env); if (env->tick_bug_echo_path > 0.0f) { if (env->last_bug_echo_path > 0.0f) { - float bat_echo_displacement = bat_dist(env->last_bug_echo_bat_x, env->last_bug_echo_bat_y, - env->bat_x, env->bat_y); - if (bat_echo_displacement >= BAT_BUG_ECHO_MIN_DISPLACEMENT) { + float echo_displacement = dist(env->last_bug_echo_x, env->last_bug_echo_y, + env->x, env->y); + if (echo_displacement >= BUG_ECHO_MIN_DISPLACEMENT) { float echo_progress = (env->last_bug_echo_path - env->tick_bug_echo_path) - / BAT_MAX_ECHO_RANGE; + / MAX_ECHO_RANGE; if (echo_progress > 0.0f) { env->rewards[0] += env->bug_echo_reward_scale * echo_progress; } else if (echo_progress < 0.0f) { @@ -1399,14 +1399,14 @@ void c_step(Bat* env) { } } env->last_bug_echo_path = env->tick_bug_echo_path; - env->last_bug_echo_bat_x = env->bat_x; - env->last_bug_echo_bat_y = env->bat_y; + env->last_bug_echo_x = env->x; + env->last_bug_echo_y = env->y; } env->episode_return += env->rewards[0]; } #ifndef BAT_HEADLESS -static inline Color bat_freq_color(float freq_norm, float alpha_norm) { +static inline Color freq_color(float freq_norm, float alpha_norm) { float f = freq_norm; float mid = 1.0f - fabsf(2.0f * f - 1.0f); return (Color){ @@ -1417,66 +1417,66 @@ static inline Color bat_freq_color(float freq_norm, float alpha_norm) { }; } -static inline void bat_draw_chirp_rings(Bat* env, float sx, float sy) { +static inline void draw_chirp_rings(Bat* env, float sx, float sy) { float scale = fminf(sx, sy); - for (int i = 0; i < BAT_CHIRP_HISTORY; i++) { + for (int i = 0; i < CHIRP_HISTORY; i++) { ChirpEvent* chirp = &env->chirps[i]; if (!chirp->active) continue; - float age_seconds = (env->tick - chirp->birth_tick) * BAT_TICK_RATE; - float max_age = BAT_MAX_ECHO_RANGE / env->sound_speed + chirp->duration; + float age_seconds = (env->tick - chirp->birth_tick) * TICK_RATE; + float max_age = MAX_ECHO_RANGE / env->sound_speed + chirp->duration; if (age_seconds < 0.0f || age_seconds > max_age) { chirp->active = 0; continue; } - for (int ring = 0; ring < BAT_CHIRP_RINGS; ring++) { - float slice = ring / (float)(BAT_CHIRP_RINGS - 1); + for (int ring = 0; ring < CHIRP_RINGS; ring++) { + float slice = ring / (float)(CHIRP_RINGS - 1); float freq = chirp->start_freq + slice * (chirp->end_freq - chirp->start_freq); - float radius = bat_chirp_ring_radius(age_seconds, slice, chirp->duration, env->sound_speed); - if (radius <= 0.0f || radius > BAT_MAX_ECHO_RANGE) continue; + float radius = chirp_ring_radius(age_seconds, slice, chirp->duration, env->sound_speed); + if (radius <= 0.0f || radius > MAX_ECHO_RANGE) continue; - float fade = 1.0f - radius / BAT_MAX_ECHO_RANGE; + float fade = 1.0f - radius / MAX_ECHO_RANGE; float alpha = 0.18f + 0.42f * bat_clampf(fade, 0.0f, 1.0f); float source_x, source_y; - bat_chirp_source_for_fraction(chirp, slice, &source_x, &source_y); + chirp_source_for_fraction(chirp, slice, &source_x, &source_y); DrawCircleLines( (int)(source_x * sx), (int)(source_y * sy), radius * scale, - bat_freq_color(freq, alpha)); + freq_color(freq, alpha)); } } } -static inline Color bat_doppler_ray_color(float doppler, float alpha) { +static inline Color doppler_ray_color(float doppler, float alpha) { if (doppler > 0.05f) { - return bat_freq_color(1.0f, alpha); + return freq_color(1.0f, alpha); } else if (doppler < -0.05f) { - return bat_freq_color(0.0f, alpha); + return freq_color(0.0f, alpha); } return (Color){210, 210, 220, (unsigned char)(255.0f * bat_clampf(alpha, 0.0f, 1.0f))}; } -static inline void bat_draw_echo_flash(Bat* env, ChirpEvent* chirp, +static inline void draw_echo_flash(Bat* env, ChirpEvent* chirp, float rx, float ry, float rvx, float rvy, float strength, float sx, float sy) { - float age_seconds = (env->tick - chirp->birth_tick) * BAT_TICK_RATE; - float distance = bat_dist(chirp->x, chirp->y, rx, ry); - float echo_time = bat_echo_time_seconds(distance, env->sound_speed); + float age_seconds = (env->tick - chirp->birth_tick) * TICK_RATE; + float distance = dist(chirp->x, chirp->y, rx, ry); + float echo_time = echo_time_seconds(distance, env->sound_speed); bool echo_arriving_now = fabsf(age_seconds - echo_time) <= 0.025f; if (!echo_arriving_now) return; float ux, uy; - bat_norm_vec(rx - chirp->x, ry - chirp->y, &ux, &uy); - float rel_vx = rvx - env->bat_vx; - float rel_vy = rvy - env->bat_vy; + norm_vec(rx - chirp->x, ry - chirp->y, &ux, &uy); + float rel_vx = rvx - env->vx; + float rel_vy = rvy - env->vy; float distance_rate = rel_vx * ux + rel_vy * uy; - float doppler = bat_clampf(-distance_rate / (env->bat_max_speed + BAT_BUG_SPEED), -1.0f, 1.0f); + float doppler = bat_clampf(-distance_rate / (env->max_speed + BUG_SPEED), -1.0f, 1.0f); float amp = strength / (1.0f + 0.02f * distance * distance); float alpha = bat_clampf(0.20f + amp * 2.0f, 0.20f, 0.90f); - Color color = bat_doppler_ray_color(doppler, alpha); + Color color = doppler_ray_color(doppler, alpha); DrawLine((int)(chirp->x * sx), (int)(chirp->y * sy), (int)(rx * sx), (int)(ry * sy), color); @@ -1484,46 +1484,46 @@ static inline void bat_draw_echo_flash(Bat* env, ChirpEvent* chirp, fmaxf(3.0f, 8.0f * alpha), color); } -static inline void bat_draw_segment_echoes(Bat* env, ChirpEvent* chirp, +static inline void draw_segment_echoes(Bat* env, ChirpEvent* chirp, float x1, float y1, float x2, float y2, float strength, float sx, float sy) { - float len = bat_dist(x1, y1, x2, y2); - int count = (int)(len / BAT_REFLECTOR_SPACING) + 1; + float len = dist(x1, y1, x2, y2); + int count = (int)(len / REFLECTOR_SPACING) + 1; if (count < 1) count = 1; for (int i = 0; i <= count; i++) { float t = i / (float)count; float x = x1 + (x2 - x1) * t; float y = y1 + (y2 - y1) * t; - bat_draw_echo_flash(env, chirp, x, y, 0.0f, 0.0f, strength, sx, sy); + draw_echo_flash(env, chirp, x, y, 0.0f, 0.0f, strength, sx, sy); } } -static inline void bat_draw_obstacle_echoes(Bat* env, ChirpEvent* chirp, +static inline void draw_obstacle_echoes(Bat* env, ChirpEvent* chirp, int i, float sx, float sy) { float x = env->obstacle_x[i]; float y = env->obstacle_y[i]; float w = env->obstacle_w[i]; float h = env->obstacle_h[i]; - bat_draw_segment_echoes(env, chirp, x, y, x + w, y, 0.55f, sx, sy); - bat_draw_segment_echoes(env, chirp, x, y + h, x + w, y + h, 0.55f, sx, sy); - bat_draw_segment_echoes(env, chirp, x, y, x, y + h, 0.55f, sx, sy); - bat_draw_segment_echoes(env, chirp, x + w, y, x + w, y + h, 0.55f, sx, sy); + draw_segment_echoes(env, chirp, x, y, x + w, y, 0.55f, sx, sy); + draw_segment_echoes(env, chirp, x, y + h, x + w, y + h, 0.55f, sx, sy); + draw_segment_echoes(env, chirp, x, y, x, y + h, 0.55f, sx, sy); + draw_segment_echoes(env, chirp, x + w, y, x + w, y + h, 0.55f, sx, sy); } -static inline void bat_draw_corner_reflector_echoes(Bat* env, ChirpEvent* chirp, +static inline void draw_corner_reflector_echoes(Bat* env, ChirpEvent* chirp, float sx, float sy) { -#if BAT_CORNER_REFLECTORS - float w = (float)BAT_WIDTH; - float h = (float)BAT_HEIGHT; +#if CORNER_REFLECTORS + float w = (float)ARENA_WIDTH; + float h = (float)ARENA_HEIGHT; float strength = env->reflector_strength; - bat_draw_echo_flash(env, chirp, 0.0f, 0.0f, 0.0f, 0.0f, strength, sx, sy); - bat_draw_echo_flash(env, chirp, w, 0.0f, 0.0f, 0.0f, strength, sx, sy); - bat_draw_echo_flash(env, chirp, 0.0f, h, 0.0f, 0.0f, strength, sx, sy); - bat_draw_echo_flash(env, chirp, w, h, 0.0f, 0.0f, strength, sx, sy); - bat_draw_echo_flash(env, chirp, 0.5f * w, 0.0f, 0.0f, 0.0f, strength, sx, sy); - bat_draw_echo_flash(env, chirp, 0.5f * w, h, 0.0f, 0.0f, strength, sx, sy); - bat_draw_echo_flash(env, chirp, 0.0f, 0.5f * h, 0.0f, 0.0f, strength, sx, sy); - bat_draw_echo_flash(env, chirp, w, 0.5f * h, 0.0f, 0.0f, strength, sx, sy); + draw_echo_flash(env, chirp, 0.0f, 0.0f, 0.0f, 0.0f, strength, sx, sy); + draw_echo_flash(env, chirp, w, 0.0f, 0.0f, 0.0f, strength, sx, sy); + draw_echo_flash(env, chirp, 0.0f, h, 0.0f, 0.0f, strength, sx, sy); + draw_echo_flash(env, chirp, w, h, 0.0f, 0.0f, strength, sx, sy); + draw_echo_flash(env, chirp, 0.5f * w, 0.0f, 0.0f, 0.0f, strength, sx, sy); + draw_echo_flash(env, chirp, 0.5f * w, h, 0.0f, 0.0f, strength, sx, sy); + draw_echo_flash(env, chirp, 0.0f, 0.5f * h, 0.0f, 0.0f, strength, sx, sy); + draw_echo_flash(env, chirp, w, 0.5f * h, 0.0f, 0.0f, strength, sx, sy); #else (void)env; (void)chirp; @@ -1532,8 +1532,8 @@ static inline void bat_draw_corner_reflector_echoes(Bat* env, ChirpEvent* chirp, #endif } -static inline void bat_draw_corner_reflector_markers(Bat* env) { -#if BAT_CORNER_REFLECTORS +static inline void draw_corner_reflector_markers(Bat* env) { +#if CORNER_REFLECTORS const int size = 8; const Color fill = (Color){128, 128, 132, 255}; const Color outline = (Color){202, 202, 208, 255}; @@ -1562,19 +1562,19 @@ static inline void bat_draw_corner_reflector_markers(Bat* env) { #endif } -static inline void bat_draw_echo_reflections(Bat* env, float sx, float sy) { - for (int i = 0; i < BAT_CHIRP_HISTORY; i++) { +static inline void draw_echo_reflections(Bat* env, float sx, float sy) { + for (int i = 0; i < CHIRP_HISTORY; i++) { ChirpEvent* chirp = &env->chirps[i]; if (!chirp->active) continue; - bat_draw_echo_flash(env, chirp, env->bug_x, env->bug_y, + draw_echo_flash(env, chirp, env->bug_x, env->bug_y, env->bug_vx, env->bug_vy, 4.0f, sx, sy); - bat_draw_segment_echoes(env, chirp, 0.0f, 0.0f, (float)BAT_WIDTH, 0.0f, 0.18f, sx, sy); - bat_draw_segment_echoes(env, chirp, 0.0f, (float)BAT_HEIGHT, (float)BAT_WIDTH, (float)BAT_HEIGHT, 0.18f, sx, sy); - bat_draw_segment_echoes(env, chirp, 0.0f, 0.0f, 0.0f, (float)BAT_HEIGHT, 0.18f, sx, sy); - bat_draw_segment_echoes(env, chirp, (float)BAT_WIDTH, 0.0f, (float)BAT_WIDTH, (float)BAT_HEIGHT, 0.18f, sx, sy); - bat_draw_corner_reflector_echoes(env, chirp, sx, sy); + draw_segment_echoes(env, chirp, 0.0f, 0.0f, (float)ARENA_WIDTH, 0.0f, 0.18f, sx, sy); + draw_segment_echoes(env, chirp, 0.0f, (float)ARENA_HEIGHT, (float)ARENA_WIDTH, (float)ARENA_HEIGHT, 0.18f, sx, sy); + draw_segment_echoes(env, chirp, 0.0f, 0.0f, 0.0f, (float)ARENA_HEIGHT, 0.18f, sx, sy); + draw_segment_echoes(env, chirp, (float)ARENA_WIDTH, 0.0f, (float)ARENA_WIDTH, (float)ARENA_HEIGHT, 0.18f, sx, sy); + draw_corner_reflector_echoes(env, chirp, sx, sy); for (int j = 0; j < env->num_obstacles; j++) { - bat_draw_obstacle_echoes(env, chirp, j, sx, sy); + draw_obstacle_echoes(env, chirp, j, sx, sy); } } } @@ -1583,8 +1583,8 @@ static inline void bat_draw_echo_reflections(Bat* env, float sx, float sy) { Client* make_client(Bat* env) { Client* client = (Client*)calloc(1, sizeof(Client)); - client->width = BAT_WIDTH * 10; - client->height = BAT_HEIGHT * 10; + client->width = ARENA_WIDTH * 10; + client->height = ARENA_HEIGHT * 10; InitWindow(client->width, client->height, "Bat"); int target_fps = env->render_target_fps; if (target_fps > 0) { @@ -1592,15 +1592,15 @@ Client* make_client(Bat* env) { } InitAudioDevice(); client->audio_ready = IsAudioDeviceReady(); - bat_record_init(env, client); + record_init(env, client); return client; } void close_client(Client* client) { - bat_record_finalize(client); + record_finalize(client); if (client->audio_ready) { - for (int i = 0; i < BAT_AUDIO_VOICES; i++) { - bat_unload_chirp_sound(client, i); + for (int i = 0; i < AUDIO_VOICES; i++) { + unload_chirp_sound(client, i); } CloseAudioDevice(); } @@ -1615,13 +1615,13 @@ void c_render(Bat* env) { if (env->client == NULL) { env->client = make_client(env); } - bat_play_chirp_audio(env); - float sx = env->client->width / (float)BAT_WIDTH; - float sy = env->client->height / (float)BAT_HEIGHT; + play_chirp_audio(env); + float sx = env->client->width / (float)ARENA_WIDTH; + float sy = env->client->height / (float)ARENA_HEIGHT; BeginDrawing(); ClearBackground((Color){18, 20, 24, 255}); - bat_draw_chirp_rings(env, sx, sy); - bat_draw_echo_reflections(env, sx, sy); + draw_chirp_rings(env, sx, sy); + draw_echo_reflections(env, sx, sy); DrawRectangleLines(0, 0, env->client->width, env->client->height, GRAY); for (int i = 0; i < env->num_obstacles; i++) { DrawRectangle( @@ -1631,19 +1631,19 @@ void c_render(Bat* env) { (int)(env->obstacle_h[i] * sy), (Color){92, 92, 96, 255}); } - bat_draw_corner_reflector_markers(env); + draw_corner_reflector_markers(env); DrawCircle((int)(env->bug_x * sx), (int)(env->bug_y * sy), - BAT_BUG_RADIUS * sx, GREEN); - DrawCircle((int)(env->bat_x * sx), (int)(env->bat_y * sy), - BAT_RADIUS * sx, BLUE); - float hx = env->bat_x + cosf(env->bat_heading) * BAT_RADIUS * 2.0f; - float hy = env->bat_y + sinf(env->bat_heading) * BAT_RADIUS * 2.0f; - DrawLine((int)(env->bat_x * sx), (int)(env->bat_y * sy), (int)(hx * sx), (int)(hy * sy), WHITE); + BUG_RADIUS * sx, GREEN); + DrawCircle((int)(env->x * sx), (int)(env->y * sy), + AGENT_RADIUS * sx, BLUE); + float hx = env->x + cosf(env->heading) * AGENT_RADIUS * 2.0f; + float hy = env->y + sinf(env->heading) * AGENT_RADIUS * 2.0f; + DrawLine((int)(env->x * sx), (int)(env->y * sy), (int)(hx * sx), (int)(hy * sy), WHITE); int cooldown = env->chirp_cooldown_ticks - (env->tick - env->last_chirp_tick); DrawText(TextFormat("reward %.3f tick %d chirps %d cooldown %d ESC exits", env->rewards[0], env->tick, env->chirps_emitted_episode, cooldown), 10, 10, 20, RAYWHITE); EndDrawing(); - bat_record_capture_frame(env); + record_capture_frame(env); } #else Client* make_client(Bat* env) { diff --git a/ocean/bat/bat_audio.h b/ocean/bat/bat_audio.h index 2cd17ab19c..a67bf2b084 100644 --- a/ocean/bat/bat_audio.h +++ b/ocean/bat/bat_audio.h @@ -1,24 +1,24 @@ #ifndef BAT_AUDIO_H #define BAT_AUDIO_H -static inline float bat_chirp_audio_duration_at_fps(float duration_norm, int fps) { - float duration = bat_chirp_duration_seconds(duration_norm); +static inline float chirp_audio_duration_at_fps(float duration_norm, int fps) { + float duration = chirp_duration_seconds(duration_norm); if (fps <= 0) return duration; float scale = 60.0f / (float)fps; if (scale < 1.0f) scale = 1.0f; return duration * scale; } -static inline float bat_chirp_audio_duration_seconds(Bat* env, float duration_norm) { - return bat_chirp_audio_duration_at_fps(duration_norm, env->render_target_fps); +static inline float chirp_audio_duration_seconds(Bat* env, float duration_norm) { + return chirp_audio_duration_at_fps(duration_norm, env->render_target_fps); } -static inline float bat_chirp_audio_frequency_hz(float freq_norm) { - return BAT_AUDIO_MIN_HZ + freq_norm - * (BAT_AUDIO_MAX_HZ - BAT_AUDIO_MIN_HZ); +static inline float chirp_audio_frequency_hz(float freq_norm) { + return AUDIO_MIN_HZ + freq_norm + * (AUDIO_MAX_HZ - AUDIO_MIN_HZ); } -static inline float bat_chirp_audio_envelope(float t_norm) { +static inline float chirp_audio_envelope(float t_norm) { if (t_norm <= 0.0f || t_norm >= 1.0f) return 0.0f; const float fade = 0.08f; float attack = t_norm / fade; @@ -26,59 +26,59 @@ static inline float bat_chirp_audio_envelope(float t_norm) { return bat_clampf(fminf(attack, release), 0.0f, 1.0f); } -static inline float bat_chirp_audio_sample_f32(float start_norm, float end_norm, +static inline float chirp_audio_sample_f32(float start_norm, float end_norm, float duration_seconds, int sample_index, int sample_rate) { float t = sample_index / (float)sample_rate; if (t >= duration_seconds) return 0.0f; - float start_hz = bat_chirp_audio_frequency_hz(start_norm); - float end_hz = bat_chirp_audio_frequency_hz(end_norm); + float start_hz = chirp_audio_frequency_hz(start_norm); + float end_hz = chirp_audio_frequency_hz(end_norm); float chirp_rate = (end_hz - start_hz) / duration_seconds; - float phase = BAT_TWO_PI * (start_hz * t + 0.5f * chirp_rate * t * t); - float envelope = bat_chirp_audio_envelope(t / duration_seconds); - return BAT_AUDIO_VOLUME * envelope * sinf(phase); + float phase = TWO_PI * (start_hz * t + 0.5f * chirp_rate * t * t); + float envelope = chirp_audio_envelope(t / duration_seconds); + return AUDIO_VOLUME * envelope * sinf(phase); } #ifndef BAT_HEADLESS -static inline void bat_unload_chirp_sound(Client* client, int i) { +static inline void unload_chirp_sound(Client* client, int i) { if (!client->chirp_sound_loaded[i]) return; UnloadSound(client->chirp_sounds[i]); client->chirp_sound_loaded[i] = 0; } -static inline void bat_cleanup_audio(Client* client) { +static inline void cleanup_audio(Client* client) { if (!client->audio_ready) return; - for (int i = 0; i < BAT_AUDIO_VOICES; i++) { + for (int i = 0; i < AUDIO_VOICES; i++) { if (client->chirp_sound_loaded[i] && !IsSoundPlaying(client->chirp_sounds[i])) { - bat_unload_chirp_sound(client, i); + unload_chirp_sound(client, i); } } } -static inline void bat_play_chirp_audio(Bat* env) { +static inline void play_chirp_audio(Bat* env) { Client* client = env->client; if (client == NULL || !client->audio_ready) return; - bat_cleanup_audio(client); + cleanup_audio(client); if (env->audio_chirp_serial <= 0 || env->audio_chirp_serial == client->last_audio_chirp_serial) { return; } client->last_audio_chirp_serial = env->audio_chirp_serial; - float duration = bat_chirp_audio_duration_seconds(env, env->last_chirp_duration); - int sample_count = (int)ceilf(duration * BAT_AUDIO_SAMPLE_RATE); + float duration = chirp_audio_duration_seconds(env, env->last_chirp_duration); + int sample_count = (int)ceilf(duration * AUDIO_SAMPLE_RATE); short* samples = (short*)malloc(sample_count * sizeof(short)); if (samples == NULL) return; for (int i = 0; i < sample_count; i++) { - float sample = bat_chirp_audio_sample_f32(env->last_chirp_start_freq, - env->last_chirp_end_freq, duration, i, BAT_AUDIO_SAMPLE_RATE); + float sample = chirp_audio_sample_f32(env->last_chirp_start_freq, + env->last_chirp_end_freq, duration, i, AUDIO_SAMPLE_RATE); samples[i] = (short)(bat_clampf(sample, -1.0f, 1.0f) * 32767.0f); } Wave wave = { .frameCount = (unsigned int)sample_count, - .sampleRate = BAT_AUDIO_SAMPLE_RATE, + .sampleRate = AUDIO_SAMPLE_RATE, .sampleSize = 16, .channels = 1, .data = samples, @@ -87,8 +87,8 @@ static inline void bat_play_chirp_audio(Bat* env) { UnloadWave(wave); int voice = client->audio_voice_cursor; - client->audio_voice_cursor = (client->audio_voice_cursor + 1) % BAT_AUDIO_VOICES; - bat_unload_chirp_sound(client, voice); + client->audio_voice_cursor = (client->audio_voice_cursor + 1) % AUDIO_VOICES; + unload_chirp_sound(client, voice); client->chirp_sounds[voice] = sound; client->chirp_sound_loaded[voice] = 1; SetSoundVolume(client->chirp_sounds[voice], 1.0f); diff --git a/ocean/bat/bat_record.h b/ocean/bat/bat_record.h index 971751fa99..0ba923a18f 100644 --- a/ocean/bat/bat_record.h +++ b/ocean/bat/bat_record.h @@ -1,36 +1,36 @@ #ifndef BAT_RECORD_H #define BAT_RECORD_H -static inline void bat_record_write_le16(FILE* f, unsigned int v) { +static inline void record_write_le16(FILE* f, unsigned int v) { fputc((int)(v & 0xffu), f); fputc((int)((v >> 8) & 0xffu), f); } -static inline void bat_record_write_le32(FILE* f, unsigned int v) { +static inline void record_write_le32(FILE* f, unsigned int v) { fputc((int)(v & 0xffu), f); fputc((int)((v >> 8) & 0xffu), f); fputc((int)((v >> 16) & 0xffu), f); fputc((int)((v >> 24) & 0xffu), f); } -static inline void bat_record_write_wav_header(FILE* f, int data_bytes) { - int byte_rate = BAT_AUDIO_SAMPLE_RATE * 2; +static inline void record_write_wav_header(FILE* f, int data_bytes) { + int byte_rate = AUDIO_SAMPLE_RATE * 2; fwrite("RIFF", 1, 4, f); - bat_record_write_le32(f, 36u + (unsigned int)data_bytes); + record_write_le32(f, 36u + (unsigned int)data_bytes); fwrite("WAVE", 1, 4, f); fwrite("fmt ", 1, 4, f); - bat_record_write_le32(f, 16); - bat_record_write_le16(f, 1); - bat_record_write_le16(f, 1); - bat_record_write_le32(f, BAT_AUDIO_SAMPLE_RATE); - bat_record_write_le32(f, (unsigned int)byte_rate); - bat_record_write_le16(f, 2); - bat_record_write_le16(f, 16); + record_write_le32(f, 16); + record_write_le16(f, 1); + record_write_le16(f, 1); + record_write_le32(f, AUDIO_SAMPLE_RATE); + record_write_le32(f, (unsigned int)byte_rate); + record_write_le16(f, 2); + record_write_le16(f, 16); fwrite("data", 1, 4, f); - bat_record_write_le32(f, (unsigned int)data_bytes); + record_write_le32(f, (unsigned int)data_bytes); } -static inline void bat_record_init(Bat* env, Client* client) { +static inline void record_init(Bat* env, Client* client) { if (!env->record_video || client->recording_initialized) return; client->recording_initialized = 1; client->record_fps = env->record_video_fps; @@ -46,14 +46,14 @@ static inline void bat_record_init(Bat* env, Client* client) { if (client->record_audio) { client->record_wav = fopen(client->record_wav_path, "wb"); if (client->record_wav != NULL) { - bat_record_write_wav_header(client->record_wav, 0); + record_write_wav_header(client->record_wav, 0); } } printf("Bat recording enabled: %s (%d fps, %d frames)\n", client->record_mp4_path, client->record_fps, client->record_max_frames); } -static inline void bat_record_enqueue_chirp(Bat* env) { +static inline void record_enqueue_chirp(Bat* env) { Client* client = env->client; if (client == NULL || !client->recording_initialized || client->recording_finalized || !client->record_audio) { @@ -65,35 +65,35 @@ static inline void bat_record_enqueue_chirp(Bat* env) { } client->record_last_audio_chirp_serial = env->audio_chirp_serial; int voice_idx = client->record_voice_cursor; - client->record_voice_cursor = (client->record_voice_cursor + 1) % BAT_RECORD_MAX_VOICES; + client->record_voice_cursor = (client->record_voice_cursor + 1) % RECORD_MAX_VOICES; BatRecordVoice* voice = &client->record_voices[voice_idx]; voice->active = 1; voice->start_sample = client->record_audio_sample_cursor; voice->start_freq = env->last_chirp_start_freq; voice->end_freq = env->last_chirp_end_freq; - voice->duration = bat_chirp_audio_duration_at_fps( + voice->duration = chirp_audio_duration_at_fps( env->last_chirp_duration, client->record_fps); } -static inline void bat_record_append_audio_frame(Bat* env) { +static inline void record_append_audio_frame(Bat* env) { Client* client = env->client; if (client == NULL || !client->record_audio || client->record_wav == NULL) return; - int frame_samples = BAT_AUDIO_SAMPLE_RATE / client->record_fps; + int frame_samples = AUDIO_SAMPLE_RATE / client->record_fps; for (int i = 0; i < frame_samples; i++) { int sample_index = client->record_audio_sample_cursor + i; float mixed = 0.0f; - for (int v = 0; v < BAT_RECORD_MAX_VOICES; v++) { + for (int v = 0; v < RECORD_MAX_VOICES; v++) { BatRecordVoice* voice = &client->record_voices[v]; if (!voice->active) continue; int local_sample = sample_index - voice->start_sample; - int voice_samples = (int)ceilf(voice->duration * BAT_AUDIO_SAMPLE_RATE); + int voice_samples = (int)ceilf(voice->duration * AUDIO_SAMPLE_RATE); if (local_sample < 0) continue; if (local_sample >= voice_samples) { voice->active = 0; continue; } - mixed += bat_chirp_audio_sample_f32(voice->start_freq, voice->end_freq, - voice->duration, local_sample, BAT_AUDIO_SAMPLE_RATE); + mixed += chirp_audio_sample_f32(voice->start_freq, voice->end_freq, + voice->duration, local_sample, AUDIO_SAMPLE_RATE); } short pcm = (short)(bat_clampf(mixed, -1.0f, 1.0f) * 32767.0f); fwrite(&pcm, sizeof(short), 1, client->record_wav); @@ -102,7 +102,7 @@ static inline void bat_record_append_audio_frame(Bat* env) { client->record_audio_sample_cursor += frame_samples; } -static inline void bat_record_finalize(Client* client) { +static inline void record_finalize(Client* client) { if (client == NULL || !client->recording_initialized || client->recording_finalized) { return; @@ -110,7 +110,7 @@ static inline void bat_record_finalize(Client* client) { client->recording_finalized = 1; if (client->record_wav != NULL) { fseek(client->record_wav, 0, SEEK_SET); - bat_record_write_wav_header(client->record_wav, client->record_audio_data_bytes); + record_write_wav_header(client->record_wav, client->record_audio_data_bytes); fclose(client->record_wav); client->record_wav = NULL; } @@ -137,27 +137,27 @@ static inline void bat_record_finalize(Client* client) { } } -static inline void bat_record_capture_frame(Bat* env) { +static inline void record_capture_frame(Bat* env) { Client* client = env->client; if (client == NULL || !client->recording_initialized || client->recording_finalized) { return; } if (client->record_frame >= client->record_max_frames) { - bat_record_finalize(client); + record_finalize(client); return; } - bat_record_enqueue_chirp(env); + record_enqueue_chirp(env); char path[512]; snprintf(path, sizeof(path), "%s/%06d.png", client->record_frame_dir, client->record_frame); Image image = LoadImageFromScreen(); ExportImage(image, path); UnloadImage(image); - bat_record_append_audio_frame(env); + record_append_audio_frame(env); client->record_frame += 1; if (client->record_frame >= client->record_max_frames) { - bat_record_finalize(client); + record_finalize(client); } } diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index c9ad24abf9..db22cf53c9 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -1,23 +1,22 @@ #include "bat.h" -#define OBS_SIZE BAT_OBS_SIZE -#define NUM_ATNS BAT_NUM_ACTIONS -#define ACT_SIZES {BAT_MOVE_ACTIONS, BAT_TURN_ACTIONS, BAT_CHIRP_FREQ_BINS, BAT_CHIRP_FREQ_BINS, BAT_CHIRP_DURATION_BINS, BAT_CHIRP_EMIT_ACTIONS} +#define NUM_ATNS NUM_ACTIONS +#define ACT_SIZES {MOVE_ACTIONS, TURN_ACTIONS, CHIRP_FREQ_BINS, CHIRP_FREQ_BINS, CHIRP_DURATION_BINS, CHIRP_EMIT_ACTIONS} #define OBS_TENSOR_T FloatTensor #define Env Bat #include "vecenv.h" void my_init(Env* env, Dict* kwargs) { - env->num_agents = BAT_NUM_AGENTS; + env->num_agents = NUM_AGENTS; env->frameskip = dict_get(kwargs, "frameskip")->value; env->ear_separation_scale = dict_get(kwargs, "ear_separation_scale")->value; env->ear_rear_gain = dict_get(kwargs, "ear_rear_gain")->value; env->ear_front_gain = dict_get(kwargs, "ear_front_gain")->value; env->ear_side_gain = dict_get(kwargs, "ear_side_gain")->value; - env->bat_max_speed = dict_get(kwargs, "bat_max_speed")->value; - env->bat_min_speed = dict_get(kwargs, "bat_min_speed")->value; - env->bat_accel = dict_get(kwargs, "bat_accel")->value; - env->bat_turn_rate = dict_get(kwargs, "bat_turn_rate")->value; + env->max_speed = dict_get(kwargs, "bat_max_speed")->value; + env->min_speed = dict_get(kwargs, "bat_min_speed")->value; + env->accel = dict_get(kwargs, "bat_accel")->value; + env->turn_rate = dict_get(kwargs, "bat_turn_rate")->value; env->render_target_fps = dict_get(kwargs, "render_target_fps")->value; env->record_video = dict_get(kwargs, "record_video")->value; env->record_video_fps = dict_get(kwargs, "record_video_fps")->value; From b59371cdcc28a20f03bf67b727886c94afc65b46 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Thu, 11 Jun 2026 17:33:58 -0700 Subject: [PATCH 39/51] Clean up stale bat metrics --- BAT_CURRICULUM.md | 13 +- BAT_SPEC.md | 36 +- ocean/bat/bat.h | 93 --- ocean/bat/binding.c | 10 - ocean/bat/tests/test_bat_core.c | 1017 +++++++++++++------------------ 5 files changed, 427 insertions(+), 742 deletions(-) diff --git a/BAT_CURRICULUM.md b/BAT_CURRICULUM.md index b0595266dd..45a4a0f4cc 100644 --- a/BAT_CURRICULUM.md +++ b/BAT_CURRICULUM.md @@ -86,13 +86,10 @@ curriculum_perf curriculum_level curriculum_distance_difficulty curriculum_obstacle_difficulty -curriculum_chirp_budget_difficulty (legacy fixed zero) curriculum_difficulty bug_motion_mode bug_motion_speed num_obstacles -chirp_budget -chirps_used_ratio chirp_overlap_fraction collision timeout @@ -237,9 +234,9 @@ perf = base_perf * curriculum_difficulty * chirp_perf - Keep `chirp_overlap_penalty` small and sweepable. - Treat `chirp_overlap_fraction` as a diagnostic, not the main objective. -- Keep `budget_difficulty` and `chirp_efficiency` as diagnostics, but do not - multiply them into `perf`; the fixed 15-chirp reference gives cleaner Protein - ranking pressure across 10, 8, and 6 chirp policies. +- Keep chirp-budget ratio as an observation instead of an exported diagnostic; + the fixed 15-chirp reference gives cleaner Protein ranking pressure across + 10, 8, and 6 chirp policies. ### Stage 4: Constant-velocity moving bug @@ -263,7 +260,8 @@ bug_wall_bounce_enabled Gate: -- Require maintained `base_perf` and non-collapsing `chirps_used_ratio`. +- Require maintained `base_perf` and non-collapsing chirp behavior, checked + through `chirps_emitted` and `chirp_perf`. - Motion should not start before distance and obstacle rungs are stable. ### Stage 5: Simple bug maneuvers @@ -360,7 +358,6 @@ Before adding maneuvers, do this: 1. Add split difficulty logs: - `curriculum_distance_difficulty` - `curriculum_obstacle_difficulty` - - `curriculum_chirp_budget_difficulty` - `curriculum_motion_difficulty` 2. Change obstacle schedule so it starts contributing around level `6`, not diff --git a/BAT_SPEC.md b/BAT_SPEC.md index e33bb127fb..6c58f8f901 100644 --- a/BAT_SPEC.md +++ b/BAT_SPEC.md @@ -405,9 +405,9 @@ Reset: W&B exported metrics: -- Export at most 31 explicit `dict_set(out, ...)` metrics from `binding.c`. - PufferLib appends `n`, giving the 32-key cap. Keep lower-value diagnostics - internal unless they are actively needed for sweep decisions. +- Keep the explicit `dict_set(out, ...)` list in `binding.c` small. PufferLib + appends `n`, and lower-value diagnostics should stay internal unless they + are actively needed for sweep decisions. - `perf` - composite sweep objective: @@ -422,25 +422,14 @@ W&B exported metrics: without chirp-budget weighting - `curriculum_distance_difficulty` - `curriculum_obstacle_difficulty` -- `curriculum_chirp_budget_difficulty` - - legacy diagnostic; fixed at `0.0` because chirp budget no longer decays - with curriculum - `score` - required by PufferLib train worker; do not remove from `binding.c` +- `episode_return` - `episode_length` -- `success` - `collision` - `timeout` -- `bug_distance_start` -- `bug_distance_final` -- `bug_distance_delta` - `num_obstacles` - `chirps_emitted` -- `chirp_budget` -- `chirps_used_ratio` -- `chirp_efficiency` - - `0.5` if the full budget was spent, approaching `1.0` when few chirps were - used - `chirp_perf` - sweep-objective chirp multiplier: `clamp(1.0 - chirps_emitted / 15.0, 0.05, 1.0)` @@ -449,17 +438,6 @@ W&B exported metrics: - `chirp_overlap_fraction` - fraction of emitted chirps that were sent before the previous chirp's max return window cleared -- `far_chirp_rate` -- `near_chirp_rate` -- `chirp_tempo_ratio` - - `near_chirp_rate / far_chirp_rate`, clamped to `[0, 10]`; values above - `1.0` indicate chirps are denser near the bug than far away -- `first_chirp_tick_norm` -- `mean_chirp_tick_norm` -- `mean_chirp_duration` -- `mean_chirp_bandwidth` -- `mean_echo_energy_left` -- `mean_echo_energy_right` - `n` ## Curriculum @@ -653,9 +631,9 @@ train/eval after each rung, and commit each known-good rung separately. - When the budget is exhausted, terminate with a `-1.0` failure penalty if the policy attempts another chirp. Do not terminate immediately after the last valid chirp, so the final echo can still matter. - - Log chirp budget, used ratio, remaining ratio, and efficiency to W&B so - sweeps can distinguish successful policies that waste every chirp from - successful policies that catch the bug with useful chirp timing. + - Keep chirp-use pressure visible through `chirps_emitted`, `chirp_perf`, + and the `chirps_used / chirp_budget` observation. Do not export duplicate + budget-ratio logs unless a future sweep needs them. - Add a sweepable solve-time efficiency reward where spending the full budget scores `0.5` on the efficiency component and using very few chirps approaches `1.0`. diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 2dfb145f66..4468093fdd 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -129,20 +129,8 @@ typedef struct Log { float curriculum_motion_difficulty; float num_obstacles; float chirps_emitted; - float chirp_budget; - float chirps_used_ratio; - float chirp_efficiency; float chirp_perf; float chirp_overlap_fraction; - float far_chirp_fraction; - float near_chirp_fraction; - float far_chirp_rate; - float near_chirp_rate; - float chirp_tempo_ratio; - float first_chirp_tick_norm; - float mean_chirp_tick_norm; - float mean_chirp_duration; - float mean_chirp_bandwidth; float n; } Log; @@ -243,16 +231,6 @@ typedef struct Bat { int chirps_emitted_episode; int audio_chirp_serial; int chirps_overlapped; - float chirp_duration_sum; - float chirp_bandwidth_sum; - float chirps_far; - float chirps_mid; - float chirps_near; - float ticks_far; - float ticks_mid; - float ticks_near; - float first_chirp_tick; - float chirp_tick_sum; float chirp_efficiency_reward; float valid_chirp_reward; @@ -541,36 +519,6 @@ static inline float success_reward(Bat* env) { return env->chirp_efficiency_reward * chirp_efficiency(env); } -static inline float current_distance_ratio(Bat* env) { - float distance = dist(env->x, env->y, env->bug_x, env->bug_y); - return distance / env->start_bug_dist; -} - -static inline void accumulate_distance_region(float ratio, float amount, - float* far, float* mid, float* near) { - if (ratio > 0.66f) { - *far += amount; - } else if (ratio < 0.33f) { - *near += amount; - } else { - *mid += amount; - } -} - -static inline void record_distance_tick(Bat* env) { - accumulate_distance_region(current_distance_ratio(env), 1.0f, - &env->ticks_far, &env->ticks_mid, &env->ticks_near); -} - -static inline void record_chirp_timing(Bat* env) { - if (env->first_chirp_tick < 0.0f) { - env->first_chirp_tick = (float)env->tick; - } - env->chirp_tick_sum += (float)env->tick; - accumulate_distance_region(current_distance_ratio(env), 1.0f, - &env->chirps_far, &env->chirps_mid, &env->chirps_near); -} - static inline void sample_spawns_at_distance(Bat* env, float target_distance) { float margin = fmaxf(6.0f, fmaxf(AGENT_RADIUS, BUG_RADIUS) + 3.0f); for (int attempt = 0; attempt < 96; attempt++) { @@ -717,7 +665,6 @@ static inline void add_log(Bat* env, float success, float collision, float timeo float distance_difficulty = curriculum_distance_difficulty(env); float obstacle_difficulty = curriculum_obstacle_difficulty(env); float motion_difficulty = curriculum_motion_difficulty(env); - float chirp_efficiency_value = chirp_efficiency(env); float chirp_perf_value = chirp_perf(env); env->log.perf += success * curriculum_difficulty_value * chirp_perf_value; env->log.base_perf += success; @@ -734,35 +681,9 @@ static inline void add_log(Bat* env, float success, float collision, float timeo env->log.curriculum_motion_difficulty += motion_difficulty; env->log.num_obstacles += env->num_obstacles; env->log.chirps_emitted += env->chirps_emitted_episode; - env->log.chirp_budget += env->chirp_budget; - env->log.chirps_used_ratio += chirps_used_ratio(env); - env->log.chirp_efficiency += chirp_efficiency_value; env->log.chirp_perf += chirp_perf_value; float chirps = fmaxf(1.0f, (float)env->chirps_emitted_episode); env->log.chirp_overlap_fraction += env->chirps_overlapped / chirps; - env->log.far_chirp_fraction += env->chirps_far / chirps; - env->log.near_chirp_fraction += env->chirps_near / chirps; - float far_rate = env->chirps_far / fmaxf(1.0f, env->ticks_far); - float near_rate = env->chirps_near / fmaxf(1.0f, env->ticks_near); - env->log.far_chirp_rate += far_rate; - env->log.near_chirp_rate += near_rate; - float tempo_ratio = 0.0f; - if (far_rate > 0.000001f) { - tempo_ratio = near_rate / far_rate; - } else if (near_rate > 0.000001f) { - tempo_ratio = 10.0f; - } - env->log.chirp_tempo_ratio += bat_clampf(tempo_ratio, 0.0f, 10.0f); - env->log.first_chirp_tick_norm += env->first_chirp_tick >= 0.0f - ? bat_clampf(env->first_chirp_tick / (float)MAX_STEPS, 0.0f, 1.0f) - : 1.0f; - env->log.mean_chirp_tick_norm += env->chirps_emitted_episode > 0 - ? bat_clampf((env->chirp_tick_sum / chirps) / (float)MAX_STEPS, 0.0f, 1.0f) - : 1.0f; - if (env->chirps_emitted_episode > 0) { - env->log.mean_chirp_duration += env->chirp_duration_sum / env->chirps_emitted_episode; - env->log.mean_chirp_bandwidth += env->chirp_bandwidth_sum / env->chirps_emitted_episode; - } env->log.n += 1.0f; } @@ -1093,16 +1014,6 @@ static inline void reset_episode(Bat* env) { env->last_bug_echo_expected_tick = -1.0f; env->chirps_emitted_episode = 0; env->chirps_overlapped = 0; - env->chirp_duration_sum = 0.0f; - env->chirp_bandwidth_sum = 0.0f; - env->chirps_far = 0.0f; - env->chirps_mid = 0.0f; - env->chirps_near = 0.0f; - env->ticks_far = 0.0f; - env->ticks_mid = 0.0f; - env->ticks_near = 0.0f; - env->first_chirp_tick = -1.0f; - env->chirp_tick_sum = 0.0f; env->episode_return = 0.0f; env->start_bug_dist = dist(env->x, env->y, env->bug_x, env->bug_y); env->prev_bug_dist = env->start_bug_dist; @@ -1260,10 +1171,7 @@ static inline bool try_emit_chirp(Bat* env) { env->last_chirp_duration = norm_bin(duration_idx, CHIRP_DURATION_BINS); env->chirp_age_ticks = 0; env->last_chirp_tick = env->tick; - record_chirp_timing(env); env->chirps_emitted_episode += 1; - env->chirp_duration_sum += env->last_chirp_duration; - env->chirp_bandwidth_sum += fabsf(env->last_chirp_end_freq - env->last_chirp_start_freq); ChirpEvent* chirp = &env->chirps[env->chirp_head]; chirp->x = env->x; chirp->y = env->y; @@ -1356,7 +1264,6 @@ void c_step(Bat* env) { } env->tick += 1; - record_distance_tick(env); float bug_dist = dist(env->x, env->y, env->bug_x, env->bug_y); float progress = env->prev_bug_dist - bug_dist; env->rewards[0] += env->progress_reward_scale * progress; diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index db22cf53c9..115631fbfd 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -58,16 +58,6 @@ void my_log(Log* log, Dict* out) { dict_set(out, "curriculum_motion_difficulty", log->curriculum_motion_difficulty); dict_set(out, "num_obstacles", log->num_obstacles); dict_set(out, "chirps_emitted", log->chirps_emitted); - dict_set(out, "chirp_budget", log->chirp_budget); - dict_set(out, "chirps_used_ratio", log->chirps_used_ratio); - dict_set(out, "chirp_efficiency", log->chirp_efficiency); dict_set(out, "chirp_perf", log->chirp_perf); dict_set(out, "chirp_overlap_fraction", log->chirp_overlap_fraction); - dict_set(out, "far_chirp_rate", log->far_chirp_rate); - dict_set(out, "near_chirp_rate", log->near_chirp_rate); - dict_set(out, "chirp_tempo_ratio", log->chirp_tempo_ratio); - dict_set(out, "first_chirp_tick_norm", log->first_chirp_tick_norm); - dict_set(out, "mean_chirp_tick_norm", log->mean_chirp_tick_norm); - dict_set(out, "mean_chirp_duration", log->mean_chirp_duration); - dict_set(out, "mean_chirp_bandwidth", log->mean_chirp_bandwidth); } diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index c7132e1c58..fa182f99c8 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -25,32 +25,18 @@ static Bat make_test_env(void) { Bat env = { .num_agents = 1, .frameskip = 1, - .width = 64, - .height = 64, .num_obstacles = 1, - .bat_radius = 2.0f, .ear_separation_scale = 0.75f, .ear_rear_gain = 0.20f, .ear_front_gain = 0.55f, .ear_side_gain = 0.35f, - .bug_radius = 1.5f, - .bat_max_speed = 12.0f, - .bat_min_speed = 2.4f, - .bat_accel = 30.0f, - .bat_turn_rate = 3.1415926f, - .bug_speed = 4.0f, - .max_steps = 512, - .freq_bins_per_ear = BAT_FREQ_BINS, - .max_echo_range = 80.0f, + .max_speed = 12.0f, + .min_speed = 2.4f, + .accel = 30.0f, + .turn_rate = 3.1415926f, .sound_speed = 100.0f, - .reflector_spacing = 8.0f, .reflector_strength = 2.0f, - .max_chirp_age_ticks = 30, .chirp_cooldown_ticks = 12, - .max_chirps_per_episode = 20, - .min_chirps_per_episode = 10, - .chirp_budget_decay_levels = 4, - .chirp_cost = 0.0005f, .chirp_efficiency_reward = 1.0f, .step_cost = 0.001f, .progress_reward_scale = 0.05f, @@ -58,20 +44,10 @@ static Bat make_test_env(void) { .valid_chirp_reward = 0.0005f, .early_chirp_penalty = 0.001f, .bug_echo_farther_penalty_scale = 0.10f, - .bug_echo_min_displacement = 1.0f, .bug_wing_sideband_gain = 0.10f, - .curriculum_max_obstacles = 1, .curriculum_obstacle_step = 8, .curriculum_successes_per_level = 1, .curriculum_start_bug_distance = 14.0f, - .curriculum_max_bug_distance = 44.8f, - .curriculum_bug_distance_step = 1.5f, - .curriculum_inbound_start_level = 8, - .curriculum_inbound_max_bug_distance = 44.8f, - .curriculum_inbound_bug_distance_step = 1.5f, - .inbound_bug_speed_multiplier = 1.5f, - .bug_maneuver_start_level = 7, - .bug_maneuver_frequency = 0.35f, .rng = 1, }; allocate(&env); @@ -90,11 +66,11 @@ static int test_chirp_metadata_and_observation_size(void) { env.actions[5] = 1.0f; c_step(&env); - ASSERT_FLOAT_NEAR(env.observations[BAT_CHIRP_START_OBS], 1.0f, 0.0001f); - ASSERT_FLOAT_NEAR(env.observations[BAT_CHIRP_END_OBS], 0.0f, 0.0001f); - ASSERT_FLOAT_NEAR(env.observations[BAT_CHIRP_DURATION_OBS], 1.0f, 0.0001f); - ASSERT_TRUE(env.observations[BAT_CHIRP_AGE_OBS] <= 1.0f); - ASSERT_TRUE(env.observations[BAT_CHIRP_AGE_OBS] >= 0.0f); + ASSERT_FLOAT_NEAR(env.observations[CHIRP_START_OBS], 1.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.observations[CHIRP_END_OBS], 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.observations[CHIRP_DURATION_OBS], 1.0f, 0.0001f); + ASSERT_TRUE(env.observations[CHIRP_AGE_OBS] <= 1.0f); + ASSERT_TRUE(env.observations[CHIRP_AGE_OBS] >= 0.0f); free_allocated(&env); return 0; @@ -102,13 +78,10 @@ static int test_chirp_metadata_and_observation_size(void) { static int test_chirp_budget_observation_tracks_used_chirps(void) { Bat env = make_test_env(); - env.max_chirps_per_episode = 4; - env.min_chirps_per_episode = 2; - env.chirp_budget_decay_levels = 4; c_reset(&env); - ASSERT_TRUE(env.chirp_budget == 4); - ASSERT_FLOAT_NEAR(env.observations[BAT_CHIRPS_USED_OBS], 0.0f, 0.0001f); + ASSERT_TRUE(env.chirp_budget == MAX_CHIRPS_PER_EPISODE); + ASSERT_FLOAT_NEAR(env.observations[CHIRPS_USED_OBS], 0.0f, 0.0001f); env.actions[2] = 0.0f; env.actions[3] = 7.0f; @@ -117,11 +90,12 @@ static int test_chirp_budget_observation_tracks_used_chirps(void) { c_step(&env); ASSERT_TRUE(env.chirps_emitted_episode == 1); - ASSERT_FLOAT_NEAR(env.observations[BAT_CHIRPS_USED_OBS], 0.25f, 0.0001f); + ASSERT_FLOAT_NEAR(env.observations[CHIRPS_USED_OBS], + 1.0f / (float)MAX_CHIRPS_PER_EPISODE, 0.0001f); - env.chirps_emitted_episode = 12; + env.chirps_emitted_episode = MAX_CHIRPS_PER_EPISODE + 1; compute_observations(&env); - ASSERT_FLOAT_NEAR(env.observations[BAT_CHIRPS_USED_OBS], 1.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.observations[CHIRPS_USED_OBS], 1.0f, 0.0001f); free_allocated(&env); return 0; @@ -129,15 +103,11 @@ static int test_chirp_budget_observation_tracks_used_chirps(void) { static int test_chirp_budget_stays_fixed_with_curriculum_level(void) { Bat env = make_test_env(); - env.curriculum_enabled = 1; env.curriculum_initial_level = 8; - env.max_chirps_per_episode = 20; - env.min_chirps_per_episode = 10; - env.chirp_budget_decay_levels = 4; c_reset(&env); ASSERT_TRUE(env.curriculum_level == 8); - ASSERT_TRUE(env.chirp_budget == 20); + ASSERT_TRUE(env.chirp_budget == MAX_CHIRPS_PER_EPISODE); free_allocated(&env); return 0; @@ -145,12 +115,11 @@ static int test_chirp_budget_stays_fixed_with_curriculum_level(void) { static int test_chirping_after_budget_terminates_with_penalty(void) { Bat env = make_test_env(); - env.max_chirps_per_episode = 1; - env.min_chirps_per_episode = 1; - env.chirp_budget_decay_levels = 4; env.chirp_cooldown_ticks = 5; env.early_chirp_penalty = 0.0f; c_reset(&env); + env.chirp_budget = 1; + compute_observations(&env); env.actions[2] = 0.0f; env.actions[3] = 7.0f; @@ -159,7 +128,7 @@ static int test_chirping_after_budget_terminates_with_penalty(void) { c_step(&env); ASSERT_TRUE(env.terminals[0] == 0.0f); ASSERT_TRUE(env.chirps_emitted_episode == 1); - ASSERT_FLOAT_NEAR(env.observations[BAT_CHIRPS_USED_OBS], 1.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.observations[CHIRPS_USED_OBS], 1.0f, 0.0001f); c_step(&env); @@ -173,20 +142,19 @@ static int test_chirping_after_budget_terminates_with_penalty(void) { static int test_timer_observation_tracks_elapsed_fraction(void) { Bat env = make_test_env(); - env.max_steps = 512; c_reset(&env); - ASSERT_TRUE(BAT_OBS_SIZE == 41); + ASSERT_TRUE(OBS_SIZE == 41); ASSERT_FLOAT_NEAR(env.observations[40], 0.0f, 0.0001f); - env.actions[0] = BAT_NOOP; - env.actions[1] = BAT_TURN_NONE; + env.actions[0] = NOOP; + env.actions[1] = TURN_NONE; env.actions[5] = 0.0f; c_step(&env); - ASSERT_FLOAT_NEAR(env.observations[40], 1.0f / 512.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.observations[40], 1.0f / (float)MAX_STEPS, 0.0001f); - env.tick = 256; + env.tick = MAX_STEPS / 2; compute_observations(&env); ASSERT_FLOAT_NEAR(env.observations[40], 0.5f, 0.0001f); @@ -197,13 +165,13 @@ static int test_timer_observation_tracks_elapsed_fraction(void) { static int test_timeout_terminates_with_minus_one_reward(void) { Bat env = make_test_env(); env.num_obstacles = 0; - env.max_steps = 1; env.progress_reward_scale = 0.0f; env.step_cost = 0.0f; c_reset(&env); + env.tick = MAX_STEPS - 1; - env.actions[0] = BAT_NOOP; - env.actions[1] = BAT_TURN_NONE; + env.actions[0] = NOOP; + env.actions[1] = TURN_NONE; env.actions[5] = 0.0f; c_step(&env); @@ -221,10 +189,10 @@ static int test_chirp_efficiency_scores_low_usage_above_full_budget(void) { env.chirp_budget = 10; env.chirps_emitted_episode = 1; - ASSERT_FLOAT_NEAR(bat_chirp_efficiency(&env), 0.95f, 0.0001f); + ASSERT_FLOAT_NEAR(chirp_efficiency(&env), 0.95f, 0.0001f); env.chirps_emitted_episode = 10; - ASSERT_FLOAT_NEAR(bat_chirp_efficiency(&env), 0.50f, 0.0001f); + ASSERT_FLOAT_NEAR(chirp_efficiency(&env), 0.50f, 0.0001f); free_allocated(&env); return 0; @@ -235,19 +203,19 @@ static int test_chirp_perf_uses_fixed_fifteen_chirp_reference(void) { c_reset(&env); env.chirps_emitted_episode = 0; - ASSERT_FLOAT_NEAR(bat_chirp_perf(&env), 1.0f, 0.0001f); + ASSERT_FLOAT_NEAR(chirp_perf(&env), 1.0f, 0.0001f); env.chirps_emitted_episode = 6; - ASSERT_FLOAT_NEAR(bat_chirp_perf(&env), 0.60f, 0.0001f); + ASSERT_FLOAT_NEAR(chirp_perf(&env), 0.60f, 0.0001f); env.chirps_emitted_episode = 8; - ASSERT_FLOAT_NEAR(bat_chirp_perf(&env), 0.4666667f, 0.0001f); + ASSERT_FLOAT_NEAR(chirp_perf(&env), 0.4666667f, 0.0001f); env.chirps_emitted_episode = 15; - ASSERT_FLOAT_NEAR(bat_chirp_perf(&env), 0.05f, 0.0001f); + ASSERT_FLOAT_NEAR(chirp_perf(&env), 0.05f, 0.0001f); env.chirps_emitted_episode = 30; - ASSERT_FLOAT_NEAR(bat_chirp_perf(&env), 0.05f, 0.0001f); + ASSERT_FLOAT_NEAR(chirp_perf(&env), 0.05f, 0.0001f); free_allocated(&env); return 0; @@ -260,8 +228,8 @@ static int test_success_reward_includes_chirp_efficiency_bonus(void) { env.chirp_budget = 10; env.chirps_emitted_episode = 2; - env.bat_x = 20.0f; - env.bat_y = 20.0f; + env.x = 20.0f; + env.y = 20.0f; env.bug_x = 20.5f; env.bug_y = 20.0f; @@ -274,54 +242,32 @@ static int test_success_reward_includes_chirp_efficiency_bonus(void) { return 0; } -static int test_chirp_budget_logs_ratios_for_wandb(void) { - Bat env = make_test_env(); - c_reset(&env); - - env.chirp_budget = 10; - env.chirps_emitted_episode = 4; - add_log(&env, 1.0f, 0.0f, 0.0f); - - ASSERT_FLOAT_NEAR(env.log.chirp_budget, 10.0f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.chirps_used_ratio, 0.40f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.chirp_efficiency, 0.80f, 0.0001f); - - free_allocated(&env); - return 0; -} - static int test_curriculum_perf_logs_distance_and_obstacle_difficulty_components(void) { Bat env = make_test_env(); c_reset(&env); env.curriculum_start_bug_distance = 8.0f; - env.curriculum_max_bug_distance = 56.0f; - env.curriculum_start_obstacles = 1; - env.curriculum_max_obstacles = 3; env.num_obstacles = 2; - env.max_chirps_per_episode = 15; - env.min_chirps_per_episode = 6; env.chirp_budget = 12; env.start_bug_dist = 32.0f; - ASSERT_FLOAT_NEAR(bat_curriculum_distance_difficulty(&env), 0.5000000f, 0.0001f); - ASSERT_FLOAT_NEAR(bat_curriculum_obstacle_difficulty(&env), 0.5000000f, 0.0001f); - ASSERT_FLOAT_NEAR(bat_curriculum_motion_difficulty(&env), 0.0000000f, 0.0001f); - ASSERT_FLOAT_NEAR(bat_curriculum_difficulty(&env), 0.5000000f, 0.0001f); + ASSERT_FLOAT_NEAR(curriculum_distance_difficulty(&env), 0.5000000f, 0.0001f); + ASSERT_FLOAT_NEAR(curriculum_obstacle_difficulty(&env), 0.6666667f, 0.0001f); + ASSERT_FLOAT_NEAR(curriculum_motion_difficulty(&env), 0.0000000f, 0.0001f); + ASSERT_FLOAT_NEAR(curriculum_difficulty(&env), 0.3888889f, 0.0001f); add_log(&env, 1.0f, 0.0f, 0.0f); ASSERT_FLOAT_NEAR(env.log.base_perf, 1.0f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.curriculum_distance_difficulty, 0.5000000f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.curriculum_obstacle_difficulty, 0.5000000f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.curriculum_chirp_budget_difficulty, 0.0000000f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_obstacle_difficulty, 0.6666667f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.curriculum_motion_difficulty, 0.0000000f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.5000000f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.curriculum_perf, 0.5000000f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.3888889f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_perf, 0.3888889f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.num_obstacles, 2.0f, 0.0001f); memset(&env.log, 0, sizeof(env.log)); add_log(&env, 0.0f, 1.0f, 0.0f); ASSERT_FLOAT_NEAR(env.log.base_perf, 0.0f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.5000000f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.3888889f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.curriculum_perf, 0.0f, 0.0001f); free_allocated(&env); @@ -333,12 +279,7 @@ static int test_perf_composes_base_perf_curriculum_difficulty_and_chirp_perf(voi c_reset(&env); env.curriculum_start_bug_distance = 8.0f; - env.curriculum_max_bug_distance = 56.0f; - env.curriculum_start_obstacles = 1; - env.curriculum_max_obstacles = 3; env.num_obstacles = 2; - env.max_chirps_per_episode = 14; - env.min_chirps_per_episode = 4; env.chirp_budget = 14; env.chirps_emitted_episode = 7; env.start_bug_dist = 32.0f; @@ -346,10 +287,9 @@ static int test_perf_composes_base_perf_curriculum_difficulty_and_chirp_perf(voi add_log(&env, 1.0f, 0.0f, 0.0f); ASSERT_FLOAT_NEAR(env.log.base_perf, 1.0f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.chirp_efficiency, 0.75f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.chirp_perf, 0.5333334f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.5000000f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.perf, 0.2666667f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.3888889f, 0.0001f); + ASSERT_FLOAT_NEAR(env.log.perf, 0.2074074f, 0.0001f); memset(&env.log, 0, sizeof(env.log)); add_log(&env, 0.0f, 1.0f, 0.0f); @@ -360,66 +300,37 @@ static int test_perf_composes_base_perf_curriculum_difficulty_and_chirp_perf(voi return 0; } -static int test_chirp_tempo_logs_far_and_near_rates(void) { - Bat env = make_test_env(); - c_reset(&env); - - env.chirps_emitted_episode = 4; - env.chirps_far = 2.0f; - env.chirps_mid = 1.0f; - env.chirps_near = 1.0f; - env.ticks_far = 40.0f; - env.ticks_mid = 20.0f; - env.ticks_near = 10.0f; - env.first_chirp_tick = 12.0f; - env.chirp_tick_sum = 120.0f; - env.max_steps = 120; - - add_log(&env, 1.0f, 0.0f, 0.0f); - - ASSERT_FLOAT_NEAR(env.log.far_chirp_fraction, 0.50f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.near_chirp_fraction, 0.25f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.far_chirp_rate, 0.05f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.near_chirp_rate, 0.10f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.chirp_tempo_ratio, 2.0f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.first_chirp_tick_norm, 0.10f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.mean_chirp_tick_norm, 0.25f, 0.0001f); - - free_allocated(&env); - return 0; -} - static int test_left_right_echo_asymmetry(void) { Bat env = make_test_env(); c_reset(&env); - env.bat_x = 20.0f; - env.bat_y = 20.0f; - env.bat_heading = 0.0f; + env.x = 20.0f; + env.y = 20.0f; + env.heading = 0.0f; env.bug_x = 35.0f; env.bug_y = 10.0f; env.bug_vx = 0.0f; env.bug_vy = 0.0f; - bat_clear_echo_queue(&env); + clear_echo_queue(&env); env.tick = 0; ChirpEvent chirp = { - .x = env.bat_x, - .y = env.bat_y, + .x = env.x, + .y = env.y, .start_freq = 1.0f, .end_freq = 1.0f, - .duration = bat_chirp_duration_seconds(0.0f), + .duration = chirp_duration_seconds(0.0f), .birth_tick = 0, .active = 1, }; - bat_schedule_echo(&env, &chirp, 0.0f, 1.0f, - env.bug_x, env.bug_y, env.bug_vx, env.bug_vy, 8.0f, BAT_ECHO_BUG); + schedule_echo(&env, &chirp, 0.0f, 1.0f, + env.bug_x, env.bug_y, env.bug_vx, env.bug_vy, 8.0f, ECHO_BUG); float left_energy = 0.0f; float right_energy = 0.0f; - for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { + for (int i = 0; i < ECHO_QUEUE_TICKS; i++) { if (env.echo_queue[i].tick < 0) continue; - for (int bin = 0; bin < BAT_FREQ_BINS; bin++) { + for (int bin = 0; bin < FREQ_BINS; bin++) { left_energy += env.echo_queue[i].energy[0][bin]; right_energy += env.echo_queue[i].energy[1][bin]; } @@ -442,41 +353,40 @@ static BatEchoProbe test_probe_echo_from_relative_source(float dx, float dy) { Bat env = make_test_env(); c_reset(&env); - env.bat_x = 24.0f; - env.bat_y = 24.0f; - env.bat_vx = 0.0f; - env.bat_vy = 0.0f; - env.bat_heading = 0.0f; + env.x = 24.0f; + env.y = 24.0f; + env.vx = 0.0f; + env.vy = 0.0f; + env.heading = 0.0f; env.sound_speed = 40.0f; env.ear_separation_scale = 2.0f; - env.max_echo_range = 128.0f; env.ear_rear_gain = 0.20f; env.ear_front_gain = 0.55f; env.ear_side_gain = 0.35f; env.tick = 0; - bat_clear_echo_queue(&env); + clear_echo_queue(&env); ChirpEvent chirp = { - .x = env.bat_x, - .y = env.bat_y, + .x = env.x, + .y = env.y, .start_freq = 0.5f, .end_freq = 0.5f, - .duration = bat_chirp_duration_seconds(0.0f), + .duration = chirp_duration_seconds(0.0f), .birth_tick = 0, .active = 1, }; - bat_schedule_echo(&env, &chirp, 0.0f, 0.5f, - env.bat_x + dx, env.bat_y + dy, 0.0f, 0.0f, 8.0f, BAT_ECHO_BUG); + schedule_echo(&env, &chirp, 0.0f, 0.5f, + env.x + dx, env.y + dy, 0.0f, 0.0f, 8.0f, ECHO_BUG); BatEchoProbe probe = { .left_tick = -1.0f, .right_tick = -1.0f, }; - for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { + for (int i = 0; i < ECHO_QUEUE_TICKS; i++) { if (env.echo_queue[i].tick < 0) continue; float left_energy = 0.0f; float right_energy = 0.0f; - for (int bin = 0; bin < BAT_FREQ_BINS; bin++) { + for (int bin = 0; bin < FREQ_BINS; bin++) { left_energy += env.echo_queue[i].energy[0][bin]; right_energy += env.echo_queue[i].energy[1][bin]; } @@ -535,9 +445,9 @@ static int test_ear_directivity_gains_control_echo_energy(void) { Bat env = make_test_env(); c_reset(&env); - env.bat_x = 20.0f; - env.bat_y = 20.0f; - env.bat_heading = 0.0f; + env.x = 20.0f; + env.y = 20.0f; + env.heading = 0.0f; env.bug_vx = 0.0f; env.bug_vy = 0.0f; env.ear_rear_gain = 0.0f; @@ -546,34 +456,34 @@ static int test_ear_directivity_gains_control_echo_energy(void) { env.tick = 0; ChirpEvent chirp = { - .x = env.bat_x, - .y = env.bat_y, + .x = env.x, + .y = env.y, .start_freq = 1.0f, .end_freq = 1.0f, - .duration = bat_chirp_duration_seconds(0.0f), + .duration = chirp_duration_seconds(0.0f), .birth_tick = 0, .active = 1, }; - bat_clear_echo_queue(&env); - bat_schedule_echo(&env, &chirp, 0.0f, 1.0f, - env.bat_x + 16.0f, env.bat_y, 0.0f, 0.0f, 8.0f, BAT_ECHO_BUG); + clear_echo_queue(&env); + schedule_echo(&env, &chirp, 0.0f, 1.0f, + env.x + 16.0f, env.y, 0.0f, 0.0f, 8.0f, ECHO_BUG); float front_energy = 0.0f; - for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { + for (int i = 0; i < ECHO_QUEUE_TICKS; i++) { for (int ear = 0; ear < 2; ear++) { - for (int bin = 0; bin < BAT_FREQ_BINS; bin++) { + for (int bin = 0; bin < FREQ_BINS; bin++) { front_energy += env.echo_queue[i].energy[ear][bin]; } } } - bat_clear_echo_queue(&env); - bat_schedule_echo(&env, &chirp, 0.0f, 1.0f, - env.bat_x, env.bat_y - 16.0f, 0.0f, 0.0f, 8.0f, BAT_ECHO_BUG); + clear_echo_queue(&env); + schedule_echo(&env, &chirp, 0.0f, 1.0f, + env.x, env.y - 16.0f, 0.0f, 0.0f, 8.0f, ECHO_BUG); float side_energy = 0.0f; - for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { + for (int i = 0; i < ECHO_QUEUE_TICKS; i++) { for (int ear = 0; ear < 2; ear++) { - for (int bin = 0; bin < BAT_FREQ_BINS; bin++) { + for (int bin = 0; bin < FREQ_BINS; bin++) { side_energy += env.echo_queue[i].energy[ear][bin]; } } @@ -590,55 +500,46 @@ static int test_default_sound_speed_allows_one_tick_interaural_delay(void) { Bat env = { .num_agents = 1, .frameskip = 1, - .width = 64, - .height = 64, .num_obstacles = 0, - .bat_radius = 2.0f, .ear_separation_scale = 0.75f, .ear_rear_gain = 0.20f, .ear_front_gain = 0.55f, .ear_side_gain = 0.35f, - .bug_radius = 1.5f, - .bat_max_speed = 12.0f, - .bat_accel = 30.0f, - .bat_turn_rate = 3.1415926f, - .bug_speed = 4.0f, - .max_steps = 512, - .freq_bins_per_ear = BAT_FREQ_BINS, - .max_echo_range = 80.0f, + .max_speed = 12.0f, + .accel = 30.0f, + .turn_rate = 3.1415926f, .sound_speed = 60.0f, - .reflector_spacing = 8.0f, .rng = 1, }; allocate(&env); - env.bat_x = 20.0f; - env.bat_y = 20.0f; - env.bat_vx = 0.0f; - env.bat_vy = 0.0f; - env.bat_heading = 0.0f; + env.x = 20.0f; + env.y = 20.0f; + env.vx = 0.0f; + env.vy = 0.0f; + env.heading = 0.0f; env.tick = 0; - bat_clear_echo_queue(&env); + clear_echo_queue(&env); ChirpEvent chirp = { - .x = env.bat_x, - .y = env.bat_y, + .x = env.x, + .y = env.y, .start_freq = 0.5f, .end_freq = 0.5f, - .duration = bat_chirp_duration_seconds(0.0f), + .duration = chirp_duration_seconds(0.0f), .birth_tick = 0, .active = 1, }; - bat_schedule_echo(&env, &chirp, 0.0f, 0.5f, - env.bat_x, env.bat_y - 12.0f, 0.0f, 0.0f, 8.0f, BAT_ECHO_BUG); + schedule_echo(&env, &chirp, 0.0f, 0.5f, + env.x, env.y - 12.0f, 0.0f, 0.0f, 8.0f, ECHO_BUG); float left_tick = -1.0f; float right_tick = -1.0f; - for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { + for (int i = 0; i < ECHO_QUEUE_TICKS; i++) { if (env.echo_queue[i].tick < 0) continue; float left_energy = 0.0f; float right_energy = 0.0f; - for (int bin = 0; bin < BAT_FREQ_BINS; bin++) { + for (int bin = 0; bin < FREQ_BINS; bin++) { left_energy += env.echo_queue[i].energy[0][bin]; right_energy += env.echo_queue[i].energy[1][bin]; } @@ -658,14 +559,14 @@ static int test_echo_scheduling_uses_tick_bucket_accumulator(void) { Bat env = make_test_env(); c_reset(&env); - bat_clear_echo_queue(&env); + clear_echo_queue(&env); env.tick = 7; - bat_add_echo_event(&env, 0, 9.25f, 1.0f, 0.4f, 18.0f, BAT_ECHO_BUG); - bat_add_echo_event(&env, 0, 9.75f, 1.0f, 0.7f, 12.0f, BAT_ECHO_BUG); + add_echo_event(&env, 0, 9.25f, 1.0f, 0.4f, 18.0f, ECHO_BUG); + add_echo_event(&env, 0, 9.75f, 1.0f, 0.7f, 12.0f, ECHO_BUG); - int slot = 10 % BAT_ECHO_QUEUE_TICKS; + int slot = 10 % ECHO_QUEUE_TICKS; ASSERT_TRUE(env.echo_queue[slot].tick == 10); - ASSERT_FLOAT_NEAR(env.echo_queue[slot].energy[0][BAT_FREQ_BINS - 1], 1.1f, 0.0001f); + ASSERT_FLOAT_NEAR(env.echo_queue[slot].energy[0][FREQ_BINS - 1], 1.1f, 0.0001f); ASSERT_FLOAT_NEAR(env.echo_queue[slot].bug_energy, 1.1f, 0.0001f); ASSERT_FLOAT_NEAR(env.echo_queue[slot].bug_path, 12.0f, 0.0001f); @@ -679,19 +580,19 @@ static int test_bug_wing_sidebands_spill_adjacent_bins_without_reward_inflation( env.tick = 0; env.bug_wing_sideband_gain = 0.25f; - bat_clear_echo_queue(&env); + clear_echo_queue(&env); - int bin = bat_freq_bin_index(&env, 0.5f); - bat_add_echo_event(&env, 0, 1.0f, 0.5f, 0.4f, 12.0f, BAT_ECHO_BUG); - EchoBucket* bug_bucket = &env.echo_queue[1 % BAT_ECHO_QUEUE_TICKS]; + int bin = freq_bin_index(&env, 0.5f); + add_echo_event(&env, 0, 1.0f, 0.5f, 0.4f, 12.0f, ECHO_BUG); + EchoBucket* bug_bucket = &env.echo_queue[1 % ECHO_QUEUE_TICKS]; ASSERT_FLOAT_NEAR(bug_bucket->energy[0][bin], 0.4f, 0.0001f); ASSERT_FLOAT_NEAR(bug_bucket->energy[0][bin - 1], 0.1f, 0.0001f); ASSERT_FLOAT_NEAR(bug_bucket->energy[0][bin + 1], 0.1f, 0.0001f); ASSERT_FLOAT_NEAR(bug_bucket->bug_energy, 0.4f, 0.0001f); - bat_clear_echo_queue(&env); - bat_add_echo_event(&env, 0, 1.0f, 0.5f, 0.4f, 12.0f, BAT_ECHO_STATIC); - EchoBucket* static_bucket = &env.echo_queue[1 % BAT_ECHO_QUEUE_TICKS]; + clear_echo_queue(&env); + add_echo_event(&env, 0, 1.0f, 0.5f, 0.4f, 12.0f, ECHO_STATIC); + EchoBucket* static_bucket = &env.echo_queue[1 % ECHO_QUEUE_TICKS]; ASSERT_FLOAT_NEAR(static_bucket->energy[0][bin], 0.4f, 0.0001f); ASSERT_FLOAT_NEAR(static_bucket->energy[0][bin - 1], 0.0f, 0.0001f); ASSERT_FLOAT_NEAR(static_bucket->energy[0][bin + 1], 0.0f, 0.0001f); @@ -706,33 +607,33 @@ static float test_side_echo_receive_tick_gap(float ear_separation_scale) { c_reset(&env); env.ear_separation_scale = ear_separation_scale; - env.bat_x = 20.0f; - env.bat_y = 20.0f; - env.bat_vx = 0.0f; - env.bat_vy = 0.0f; - env.bat_heading = 0.0f; + env.x = 20.0f; + env.y = 20.0f; + env.vx = 0.0f; + env.vy = 0.0f; + env.heading = 0.0f; env.tick = 0; - bat_clear_echo_queue(&env); + clear_echo_queue(&env); ChirpEvent chirp = { - .x = env.bat_x, - .y = env.bat_y, + .x = env.x, + .y = env.y, .start_freq = 0.5f, .end_freq = 0.5f, - .duration = bat_chirp_duration_seconds(0.0f), + .duration = chirp_duration_seconds(0.0f), .birth_tick = 0, .active = 1, }; - bat_schedule_echo(&env, &chirp, 0.0f, 0.5f, - env.bat_x, env.bat_y - 12.0f, 0.0f, 0.0f, 8.0f, BAT_ECHO_BUG); + schedule_echo(&env, &chirp, 0.0f, 0.5f, + env.x, env.y - 12.0f, 0.0f, 0.0f, 8.0f, ECHO_BUG); float left_tick = -1.0f; float right_tick = -1.0f; - for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { + for (int i = 0; i < ECHO_QUEUE_TICKS; i++) { if (env.echo_queue[i].tick < 0) continue; float left_energy = 0.0f; float right_energy = 0.0f; - for (int bin = 0; bin < BAT_FREQ_BINS; bin++) { + for (int bin = 0; bin < FREQ_BINS; bin++) { left_energy += env.echo_queue[i].energy[0][bin]; right_energy += env.echo_queue[i].energy[1][bin]; } @@ -762,40 +663,40 @@ static int test_doppler_sign_for_approaching_bug(void) { Bat env = make_test_env(); c_reset(&env); - env.bat_x = 20.0f; - env.bat_y = 20.0f; - env.bat_vx = 0.0f; - env.bat_vy = 0.0f; + env.x = 20.0f; + env.y = 20.0f; + env.vx = 0.0f; + env.vy = 0.0f; env.bug_x = 42.0f; env.bug_y = 20.0f; env.bug_vx = -16.0f; env.bug_vy = 0.0f; - env.bat_heading = 0.0f; - memset(env.observations, 0, BAT_OBS_SIZE * sizeof(float)); - bat_clear_echo_queue(&env); + env.heading = 0.0f; + memset(env.observations, 0, OBS_SIZE * sizeof(float)); + clear_echo_queue(&env); env.tick = 0; ChirpEvent chirp = { - .x = env.bat_x, - .y = env.bat_y, + .x = env.x, + .y = env.y, .start_freq = 0.5f, .end_freq = 0.5f, - .duration = bat_chirp_duration_seconds(0.0f), + .duration = chirp_duration_seconds(0.0f), .birth_tick = 0, .active = 1, }; - bat_schedule_echo(&env, &chirp, 0.0f, 0.5f, - env.bug_x, env.bug_y, env.bug_vx, env.bug_vy, 8.0f, BAT_ECHO_BUG); + schedule_echo(&env, &chirp, 0.0f, 0.5f, + env.bug_x, env.bug_y, env.bug_vx, env.bug_vy, 8.0f, ECHO_BUG); env.tick = 27; compute_observations(&env); float low_energy = 0.0f; float high_energy = 0.0f; - for (int i = 0; i < BAT_FREQ_BINS; i++) { - float energy = env.observations[BAT_LEFT_FREQ_OFFSET + i] - + env.observations[BAT_RIGHT_FREQ_OFFSET + i]; - if (i < BAT_FREQ_BINS / 2) { + for (int i = 0; i < FREQ_BINS; i++) { + float energy = env.observations[LEFT_FREQ_OFFSET + i] + + env.observations[RIGHT_FREQ_OFFSET + i]; + if (i < FREQ_BINS / 2) { low_energy += energy; } else { high_energy += energy; @@ -812,11 +713,11 @@ static int test_wall_collision_is_terminal_minus_one(void) { Bat env = make_test_env(); c_reset(&env); - env.bat_x = env.width - env.bat_radius - 0.1f; - env.bat_y = env.height * 0.5f; - env.bat_heading = 0.0f; - env.bat_vx = env.bat_max_speed; - env.bat_vy = 0.0f; + env.x = ARENA_WIDTH - AGENT_RADIUS - 0.1f; + env.y = ARENA_HEIGHT * 0.5f; + env.heading = 0.0f; + env.vx = env.max_speed; + env.vy = 0.0f; env.actions[0] = 1.0f; env.actions[1] = 0.0f; env.actions[2] = 0.0f; @@ -837,8 +738,8 @@ static int test_catch_bug_is_terminal_plus_one(void) { Bat env = make_test_env(); c_reset(&env); - env.bat_x = 20.0f; - env.bat_y = 20.0f; + env.x = 20.0f; + env.y = 20.0f; env.bug_x = 20.5f; env.bug_y = 20.0f; @@ -855,13 +756,13 @@ static int test_progress_reward_sign(void) { Bat env = make_test_env(); c_reset(&env); - env.bat_x = 20.0f; - env.bat_y = 20.0f; + env.x = 20.0f; + env.y = 20.0f; env.bug_x = 40.0f; env.bug_y = 20.0f; env.prev_bug_dist = 25.0f; - env.bat_vx = 0.0f; - env.bat_vy = 0.0f; + env.vx = 0.0f; + env.vy = 0.0f; env.actions[0] = 1.0f; env.actions[1] = 0.0f; @@ -883,16 +784,15 @@ static int test_bat_cannot_accelerate_backward_from_brake(void) { env.step_cost = 0.0f; env.progress_reward_scale = 0.0f; - env.chirp_cost = 0.0f; - env.bat_x = 20.0f; - env.bat_y = 20.0f; + env.x = 20.0f; + env.y = 20.0f; env.bug_x = 50.0f; env.bug_y = 50.0f; - env.bat_heading = 0.0f; - env.bat_vx = 0.0f; - env.bat_vy = 0.0f; - env.actions[0] = BAT_BRAKE; - env.actions[1] = BAT_TURN_NONE; + env.heading = 0.0f; + env.vx = 0.0f; + env.vy = 0.0f; + env.actions[0] = BRAKE; + env.actions[1] = TURN_NONE; env.actions[2] = 0.0f; env.actions[3] = 7.0f; env.actions[4] = 1.0f; @@ -900,9 +800,9 @@ static int test_bat_cannot_accelerate_backward_from_brake(void) { c_step(&env); - float forward = env.bat_vx * cosf(env.bat_heading) + env.bat_vy * sinf(env.bat_heading); + float forward = env.vx * cosf(env.heading) + env.vy * sinf(env.heading); ASSERT_TRUE(forward >= -0.0001f); - ASSERT_TRUE(env.observations[BAT_FORWARD_SPEED_OBS] >= -0.0001f); + ASSERT_TRUE(env.observations[FORWARD_SPEED_OBS] >= -0.0001f); free_allocated(&env); return 0; @@ -912,9 +812,9 @@ static int test_bat_reset_starts_with_forward_stall_speed(void) { Bat env = make_test_env(); c_reset(&env); - float forward = env.bat_vx * cosf(env.bat_heading) + env.bat_vy * sinf(env.bat_heading); - ASSERT_TRUE(forward >= 0.19f * env.bat_max_speed); - ASSERT_FLOAT_NEAR(env.observations[BAT_FORWARD_SPEED_OBS], forward / env.bat_max_speed, 0.0001f); + float forward = env.vx * cosf(env.heading) + env.vy * sinf(env.heading); + ASSERT_TRUE(forward >= 0.19f * env.max_speed); + ASSERT_FLOAT_NEAR(env.observations[FORWARD_SPEED_OBS], forward / env.max_speed, 0.0001f); free_allocated(&env); return 0; @@ -926,16 +826,15 @@ static int test_bat_brake_clamps_to_forward_stall_speed(void) { env.step_cost = 0.0f; env.progress_reward_scale = 0.0f; - env.chirp_cost = 0.0f; - env.bat_x = 20.0f; - env.bat_y = 20.0f; + env.x = 20.0f; + env.y = 20.0f; env.bug_x = 50.0f; env.bug_y = 50.0f; - env.bat_heading = 0.0f; - env.bat_vx = 0.0f; - env.bat_vy = 0.0f; - env.actions[0] = BAT_BRAKE; - env.actions[1] = BAT_TURN_NONE; + env.heading = 0.0f; + env.vx = 0.0f; + env.vy = 0.0f; + env.actions[0] = BRAKE; + env.actions[1] = TURN_NONE; env.actions[2] = 0.0f; env.actions[3] = 7.0f; env.actions[4] = 1.0f; @@ -943,9 +842,9 @@ static int test_bat_brake_clamps_to_forward_stall_speed(void) { c_step(&env); - float forward = env.bat_vx * cosf(env.bat_heading) + env.bat_vy * sinf(env.bat_heading); - ASSERT_TRUE(forward >= 0.19f * env.bat_max_speed); - ASSERT_TRUE(env.bat_x > 20.0f); + float forward = env.vx * cosf(env.heading) + env.vy * sinf(env.heading); + ASSERT_TRUE(forward >= 0.19f * env.max_speed); + ASSERT_TRUE(env.x > 20.0f); free_allocated(&env); return 0; @@ -957,16 +856,15 @@ static int test_bat_velocity_is_locked_to_heading(void) { env.step_cost = 0.0f; env.progress_reward_scale = 0.0f; - env.chirp_cost = 0.0f; - env.bat_x = 20.0f; - env.bat_y = 20.0f; + env.x = 20.0f; + env.y = 20.0f; env.bug_x = 50.0f; env.bug_y = 50.0f; - env.bat_heading = 0.0f; - env.bat_vx = -env.bat_max_speed * 0.5f; - env.bat_vy = 3.0f; - env.actions[0] = BAT_NOOP; - env.actions[1] = BAT_TURN_NONE; + env.heading = 0.0f; + env.vx = -env.max_speed * 0.5f; + env.vy = 3.0f; + env.actions[0] = NOOP; + env.actions[1] = TURN_NONE; env.actions[2] = 0.0f; env.actions[3] = 7.0f; env.actions[4] = 1.0f; @@ -974,11 +872,11 @@ static int test_bat_velocity_is_locked_to_heading(void) { c_step(&env); - float forward = env.bat_vx * cosf(env.bat_heading) + env.bat_vy * sinf(env.bat_heading); - float lateral = env.bat_vx * -sinf(env.bat_heading) + env.bat_vy * cosf(env.bat_heading); + float forward = env.vx * cosf(env.heading) + env.vy * sinf(env.heading); + float lateral = env.vx * -sinf(env.heading) + env.vy * cosf(env.heading); ASSERT_TRUE(forward >= -0.0001f); ASSERT_FLOAT_NEAR(lateral, 0.0f, 0.0001f); - ASSERT_TRUE(env.observations[BAT_FORWARD_SPEED_OBS] >= -0.0001f); + ASSERT_TRUE(env.observations[FORWARD_SPEED_OBS] >= -0.0001f); free_allocated(&env); return 0; @@ -990,29 +888,28 @@ static int test_bat_zero_speed_recovers_to_forward_arc(void) { env.step_cost = 0.0f; env.progress_reward_scale = 0.0f; - env.chirp_cost = 0.0f; - env.bat_x = 20.0f; - env.bat_y = 20.0f; + env.x = 20.0f; + env.y = 20.0f; env.bug_x = 50.0f; env.bug_y = 50.0f; - env.bat_heading = 0.25f; - env.bat_vx = 0.0f; - env.bat_vy = 0.0f; - env.actions[0] = BAT_NOOP; - env.actions[1] = BAT_TURN_LEFT; + env.heading = 0.25f; + env.vx = 0.0f; + env.vy = 0.0f; + env.actions[0] = NOOP; + env.actions[1] = TURN_LEFT; env.actions[2] = 0.0f; env.actions[3] = 7.0f; env.actions[4] = 1.0f; env.actions[5] = 0.0f; - float start_x = env.bat_x; - float start_y = env.bat_y; + float start_x = env.x; + float start_y = env.y; c_step(&env); - float forward = env.bat_vx * cosf(env.bat_heading) + env.bat_vy * sinf(env.bat_heading); - ASSERT_TRUE(forward >= 0.19f * env.bat_max_speed); - ASSERT_TRUE(bat_dist(start_x, start_y, env.bat_x, env.bat_y) > 0.0f); - ASSERT_TRUE(fabsf(env.bat_heading - 0.25f) > 0.0001f); + float forward = env.vx * cosf(env.heading) + env.vy * sinf(env.heading); + ASSERT_TRUE(forward >= 0.19f * env.max_speed); + ASSERT_TRUE(dist(start_x, start_y, env.x, env.y) > 0.0f); + ASSERT_TRUE(fabsf(env.heading - 0.25f) > 0.0001f); free_allocated(&env); return 0; @@ -1024,16 +921,15 @@ static int test_bat_turn_rate_scales_with_forward_speed(void) { env.step_cost = 0.0f; env.progress_reward_scale = 0.0f; - env.chirp_cost = 0.0f; - env.bat_x = 20.0f; - env.bat_y = 20.0f; + env.x = 20.0f; + env.y = 20.0f; env.bug_x = 50.0f; env.bug_y = 50.0f; - env.bat_heading = 0.0f; - env.bat_vx = env.bat_max_speed * 0.5f; - env.bat_vy = 0.0f; - env.actions[0] = BAT_NOOP; - env.actions[1] = BAT_TURN_RIGHT; + env.heading = 0.0f; + env.vx = env.max_speed * 0.5f; + env.vy = 0.0f; + env.actions[0] = NOOP; + env.actions[1] = TURN_RIGHT; env.actions[2] = 0.0f; env.actions[3] = 7.0f; env.actions[4] = 1.0f; @@ -1041,25 +937,25 @@ static int test_bat_turn_rate_scales_with_forward_speed(void) { c_step(&env); - ASSERT_FLOAT_NEAR(env.bat_turn_velocity, env.bat_turn_rate * 0.5f, 0.0001f); - ASSERT_FLOAT_NEAR(env.bat_heading, env.bat_turn_rate * 0.5f * BAT_TICK_RATE, 0.0001f); + ASSERT_FLOAT_NEAR(env.turn_velocity, env.turn_rate * 0.5f, 0.0001f); + ASSERT_FLOAT_NEAR(env.heading, env.turn_rate * 0.5f * TICK_RATE, 0.0001f); free_allocated(&env); return 0; } static int test_bat_speed_action_space_has_no_strafe(void) { - ASSERT_TRUE(BAT_MOVE_ACTIONS == 3); - ASSERT_TRUE(BAT_NOOP == 0); - ASSERT_TRUE(BAT_THRUST_FORWARD == 1); - ASSERT_TRUE(BAT_BRAKE == 2); + ASSERT_TRUE(MOVE_ACTIONS == 3); + ASSERT_TRUE(NOOP == 0); + ASSERT_TRUE(THRUST_FORWARD == 1); + ASSERT_TRUE(BRAKE == 2); return 0; } static int test_chirp_ring_physical_ordering(void) { - float duration = bat_chirp_duration_seconds(1.0f); - float outer = bat_chirp_ring_radius(1.0f, 0.0f, duration, 100.0f); - float inner = bat_chirp_ring_radius(1.0f, 1.0f, duration, 100.0f); + float duration = chirp_duration_seconds(1.0f); + float outer = chirp_ring_radius(1.0f, 0.0f, duration, 100.0f); + float inner = chirp_ring_radius(1.0f, 1.0f, duration, 100.0f); ASSERT_TRUE(outer > inner); ASSERT_FLOAT_NEAR(outer, 100.0f, 0.0001f); @@ -1069,11 +965,11 @@ static int test_chirp_ring_physical_ordering(void) { } static int test_chirp_audio_maps_norm_freq_to_audible_sweep(void) { - ASSERT_FLOAT_NEAR(bat_chirp_audio_frequency_hz(0.0f), 600.0f, 0.0001f); - ASSERT_FLOAT_NEAR(bat_chirp_audio_frequency_hz(1.0f), 3600.0f, 0.0001f); - ASSERT_FLOAT_NEAR(bat_chirp_audio_sample_f32(0.0f, 1.0f, 0.20f, -1, 48000), 0.0f, 0.0001f); - ASSERT_FLOAT_NEAR(bat_chirp_audio_sample_f32(0.0f, 1.0f, 0.20f, 9600, 48000), 0.0f, 0.0001f); - float sample = bat_chirp_audio_sample_f32(0.0f, 1.0f, 0.20f, 2400, 48000); + ASSERT_FLOAT_NEAR(chirp_audio_frequency_hz(0.0f), 600.0f, 0.0001f); + ASSERT_FLOAT_NEAR(chirp_audio_frequency_hz(1.0f), 3600.0f, 0.0001f); + ASSERT_FLOAT_NEAR(chirp_audio_sample_f32(0.0f, 1.0f, 0.20f, -1, 48000), 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(chirp_audio_sample_f32(0.0f, 1.0f, 0.20f, 9600, 48000), 0.0f, 0.0001f); + float sample = chirp_audio_sample_f32(0.0f, 1.0f, 0.20f, 2400, 48000); ASSERT_TRUE(sample >= -0.25f); ASSERT_TRUE(sample <= 0.25f); return 0; @@ -1081,15 +977,15 @@ static int test_chirp_audio_maps_norm_freq_to_audible_sweep(void) { static int test_chirp_audio_duration_scales_with_render_fps(void) { Bat env = make_test_env(); - float base_duration = bat_chirp_duration_seconds(0.0f); + float base_duration = chirp_duration_seconds(0.0f); env.render_target_fps = 60; - ASSERT_FLOAT_NEAR(bat_chirp_audio_duration_seconds(&env, 0.0f), base_duration, 0.0001f); + ASSERT_FLOAT_NEAR(chirp_audio_duration_seconds(&env, 0.0f), base_duration, 0.0001f); env.render_target_fps = 30; - ASSERT_FLOAT_NEAR(bat_chirp_audio_duration_seconds(&env, 0.0f), base_duration * 2.0f, 0.0001f); + ASSERT_FLOAT_NEAR(chirp_audio_duration_seconds(&env, 0.0f), base_duration * 2.0f, 0.0001f); env.render_target_fps = 15; - ASSERT_FLOAT_NEAR(bat_chirp_audio_duration_seconds(&env, 0.0f), base_duration * 4.0f, 0.0001f); + ASSERT_FLOAT_NEAR(chirp_audio_duration_seconds(&env, 0.0f), base_duration * 4.0f, 0.0001f); env.render_target_fps = 0; - ASSERT_FLOAT_NEAR(bat_chirp_audio_duration_seconds(&env, 0.0f), base_duration, 0.0001f); + ASSERT_FLOAT_NEAR(chirp_audio_duration_seconds(&env, 0.0f), base_duration, 0.0001f); free_allocated(&env); return 0; } @@ -1103,11 +999,11 @@ static int test_chirp_cooldown_accepts_only_after_delay(void) { env.actions[3] = 7.0f; env.actions[4] = 1.0f; env.actions[5] = 1.0f; - ASSERT_TRUE(bat_try_emit_chirp(&env)); - ASSERT_TRUE(!bat_try_emit_chirp(&env)); + ASSERT_TRUE(try_emit_chirp(&env)); + ASSERT_TRUE(!try_emit_chirp(&env)); env.tick += 12; - ASSERT_TRUE(bat_try_emit_chirp(&env)); + ASSERT_TRUE(try_emit_chirp(&env)); free_allocated(&env); return 0; @@ -1115,35 +1011,34 @@ static int test_chirp_cooldown_accepts_only_after_delay(void) { static void test_place_safe_stationary_scene(Bat* env) { env->num_obstacles = 0; - env->bat_x = 20.0f; - env->bat_y = 20.0f; - env->bat_vx = 0.0f; - env->bat_vy = 0.0f; - env->bat_heading = 0.0f; + env->x = 20.0f; + env->y = 20.0f; + env->vx = 0.0f; + env->vy = 0.0f; + env->heading = 0.0f; env->bug_x = 48.0f; env->bug_y = 48.0f; env->bug_vx = 0.0f; env->bug_vy = 0.0f; - env->prev_bug_dist = bat_dist(env->bat_x, env->bat_y, env->bug_x, env->bug_y); + env->prev_bug_dist = dist(env->x, env->y, env->bug_x, env->bug_y); } static void test_set_emit_chirp_action(Bat* env) { - env->actions[0] = BAT_NOOP; - env->actions[1] = BAT_TURN_NONE; + env->actions[0] = NOOP; + env->actions[1] = TURN_NONE; env->actions[2] = 0.0f; env->actions[3] = 7.0f; env->actions[4] = 1.0f; env->actions[5] = 1.0f; } -static int test_valid_chirp_gets_reward_without_legacy_cost(void) { +static int test_valid_chirp_gets_reward(void) { Bat env = make_test_env(); c_reset(&env); test_place_safe_stationary_scene(&env); env.step_cost = 0.0f; env.progress_reward_scale = 0.0f; env.bug_echo_reward_scale = 0.0f; - env.chirp_cost = 10.0f; env.valid_chirp_reward = 0.0005f; env.early_chirp_penalty = 0.0020f; test_set_emit_chirp_action(&env); @@ -1165,7 +1060,6 @@ static int test_early_chirp_gets_penalty_and_emits_nothing(void) { env.step_cost = 0.0f; env.progress_reward_scale = 0.0f; env.bug_echo_reward_scale = 0.0f; - env.chirp_cost = 0.0f; env.valid_chirp_reward = 0.0005f; env.early_chirp_penalty = 0.0020f; env.chirp_cooldown_ticks = 12; @@ -1191,12 +1085,10 @@ static int test_chirp_before_bug_echo_arrives_gets_scaled_overlap_penalty(void) env.step_cost = 0.0f; env.progress_reward_scale = 0.0f; env.bug_echo_reward_scale = 0.0f; - env.chirp_cost = 0.0f; env.valid_chirp_reward = 0.0005f; env.early_chirp_penalty = 0.0020f; env.chirp_overlap_penalty = 0.0040f; env.chirp_cooldown_ticks = 1; - env.max_chirp_age_ticks = 8; test_set_emit_chirp_action(&env); c_step(&env); @@ -1233,7 +1125,6 @@ static int test_chirp_after_bug_echo_arrives_ignores_static_echo_window(void) { env.valid_chirp_reward = 0.0005f; env.chirp_overlap_penalty = 0.0040f; env.chirp_cooldown_ticks = 1; - env.max_chirp_age_ticks = 100; env.chirp_budget = 10; env.chirps_emitted_episode = 1; env.last_chirp_tick = 0; @@ -1255,7 +1146,7 @@ static int test_chirp_after_bug_echo_arrives_ignores_static_echo_window(void) { static int test_reflection_arrives_at_two_way_travel_time(void) { float sound_speed = 100.0f; float distance = 25.0f; - float echo_time = bat_echo_time_seconds(distance, sound_speed); + float echo_time = echo_time_seconds(distance, sound_speed); ASSERT_FLOAT_NEAR(echo_time, 0.5f, 0.0001f); ASSERT_TRUE(fabsf((echo_time + 0.005f) - echo_time) <= 0.02f); @@ -1273,18 +1164,18 @@ static float test_sum_obs(Bat* env, int offset, int count) { } static int test_bins_only_observation_layout(void) { - ASSERT_TRUE(BAT_OBS_SIZE == 41); - ASSERT_TRUE(BAT_FREQ_BINS == 16); - ASSERT_TRUE(BAT_LEFT_FREQ_OFFSET == 0); - ASSERT_TRUE(BAT_RIGHT_FREQ_OFFSET == 16); - ASSERT_TRUE(BAT_CHIRP_AGE_OBS == 32); - ASSERT_TRUE(BAT_CHIRP_COOLDOWN_OBS == 33); - ASSERT_TRUE(BAT_CHIRP_START_OBS == 34); - ASSERT_TRUE(BAT_CHIRP_END_OBS == 35); - ASSERT_TRUE(BAT_CHIRP_DURATION_OBS == 36); - ASSERT_TRUE(BAT_CHIRPS_USED_OBS == 37); - ASSERT_TRUE(BAT_FORWARD_SPEED_OBS == 38); - ASSERT_TRUE(BAT_TURN_RATE_OBS == 39); + ASSERT_TRUE(OBS_SIZE == 41); + ASSERT_TRUE(FREQ_BINS == 16); + ASSERT_TRUE(LEFT_FREQ_OFFSET == 0); + ASSERT_TRUE(RIGHT_FREQ_OFFSET == 16); + ASSERT_TRUE(CHIRP_AGE_OBS == 32); + ASSERT_TRUE(CHIRP_COOLDOWN_OBS == 33); + ASSERT_TRUE(CHIRP_START_OBS == 34); + ASSERT_TRUE(CHIRP_END_OBS == 35); + ASSERT_TRUE(CHIRP_DURATION_OBS == 36); + ASSERT_TRUE(CHIRPS_USED_OBS == 37); + ASSERT_TRUE(FORWARD_SPEED_OBS == 38); + ASSERT_TRUE(TURN_RATE_OBS == 39); return 0; } @@ -1292,8 +1183,8 @@ static int test_no_chirp_produces_silent_frequency_bins(void) { Bat env = make_test_env(); c_reset(&env); - ASSERT_FLOAT_NEAR(test_sum_obs(&env, BAT_LEFT_FREQ_OFFSET, BAT_FREQ_BINS), 0.0f, 0.0001f); - ASSERT_FLOAT_NEAR(test_sum_obs(&env, BAT_RIGHT_FREQ_OFFSET, BAT_FREQ_BINS), 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(test_sum_obs(&env, LEFT_FREQ_OFFSET, FREQ_BINS), 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(test_sum_obs(&env, RIGHT_FREQ_OFFSET, FREQ_BINS), 0.0f, 0.0001f); free_allocated(&env); return 0; @@ -1301,26 +1192,25 @@ static int test_no_chirp_produces_silent_frequency_bins(void) { static int test_observations_stay_normalized_after_chirp(void) { Bat env = make_test_env(); - env.max_steps = 1000; c_reset(&env); - ASSERT_FLOAT_NEAR(env.observations[BAT_CHIRP_AGE_OBS], 1.0f, 0.0001f); - for (int i = 0; i < BAT_OBS_SIZE; i++) { + ASSERT_FLOAT_NEAR(env.observations[CHIRP_AGE_OBS], 1.0f, 0.0001f); + for (int i = 0; i < OBS_SIZE; i++) { ASSERT_TRUE(env.observations[i] >= -1.0f); ASSERT_TRUE(env.observations[i] <= 1.0f); } - env.actions[0] = BAT_NOOP; - env.actions[1] = BAT_TURN_NONE; + env.actions[0] = NOOP; + env.actions[1] = TURN_NONE; env.actions[2] = 0.0f; env.actions[3] = 7.0f; env.actions[4] = 1.0f; env.actions[5] = 1.0f; c_step(&env); - float age_denom = bat_chirp_age_norm_denominator(&env); - ASSERT_FLOAT_NEAR(env.observations[BAT_CHIRP_AGE_OBS], 1.0f / age_denom, 0.0001f); - for (int i = 0; i < BAT_OBS_SIZE; i++) { + float age_denom = chirp_age_norm_denominator(&env); + ASSERT_FLOAT_NEAR(env.observations[CHIRP_AGE_OBS], 1.0f / age_denom, 0.0001f); + for (int i = 0; i < OBS_SIZE; i++) { ASSERT_TRUE(env.observations[i] >= -1.0f); ASSERT_TRUE(env.observations[i] <= 1.0f); } @@ -1332,17 +1222,12 @@ static int test_observations_stay_normalized_after_chirp(void) { static int test_curriculum_level_zero_starts_close_with_no_obstacles(void) { Bat env = make_test_env(); env.num_obstacles = 3; - env.curriculum_enabled = 1; - env.curriculum_start_obstacles = 0; - env.curriculum_max_obstacles = 3; env.curriculum_obstacle_step = 1; env.curriculum_start_bug_distance = 12.0f; - env.curriculum_max_bug_distance = 40.0f; - env.curriculum_bug_distance_step = 6.0f; c_reset(&env); ASSERT_TRUE(env.num_obstacles == 0); - ASSERT_TRUE(bat_dist(env.bat_x, env.bat_y, env.bug_x, env.bug_y) <= 14.0f); + ASSERT_TRUE(dist(env.x, env.y, env.bug_x, env.bug_y) <= 14.0f); free_allocated(&env); return 0; @@ -1351,9 +1236,6 @@ static int test_curriculum_level_zero_starts_close_with_no_obstacles(void) { static int test_curriculum_adds_first_obstacle_after_level_zero(void) { Bat env = make_test_env(); env.num_obstacles = 3; - env.curriculum_enabled = 1; - env.curriculum_start_obstacles = 0; - env.curriculum_max_obstacles = 3; env.curriculum_obstacle_step = 4; env.curriculum_initial_level = 1; @@ -1377,24 +1259,19 @@ static int test_curriculum_adds_first_obstacle_after_level_zero(void) { static int test_curriculum_advances_after_catch(void) { Bat env = make_test_env(); env.num_obstacles = 3; - env.curriculum_enabled = 1; - env.curriculum_start_obstacles = 1; - env.curriculum_max_obstacles = 3; env.curriculum_obstacle_step = 1; env.curriculum_start_bug_distance = 12.0f; - env.curriculum_max_bug_distance = 40.0f; - env.curriculum_bug_distance_step = 6.0f; c_reset(&env); - env.bat_x = 20.0f; - env.bat_y = 20.0f; + env.x = 20.0f; + env.y = 20.0f; env.bug_x = 20.5f; env.bug_y = 20.0f; c_step(&env); ASSERT_TRUE(env.curriculum_level == 1); - ASSERT_TRUE(env.num_obstacles == 2); - ASSERT_TRUE(bat_dist(env.bat_x, env.bat_y, env.bug_x, env.bug_y) <= 20.0f); + ASSERT_TRUE(env.num_obstacles == 1); + ASSERT_TRUE(dist(env.x, env.y, env.bug_x, env.bug_y) <= 16.0f); free_allocated(&env); return 0; @@ -1403,17 +1280,12 @@ static int test_curriculum_advances_after_catch(void) { static int test_curriculum_waits_for_required_catches(void) { Bat env = make_test_env(); env.num_obstacles = 3; - env.curriculum_enabled = 1; - env.curriculum_start_obstacles = 1; - env.curriculum_max_obstacles = 3; env.curriculum_obstacle_step = 1; env.curriculum_start_bug_distance = 12.0f; - env.curriculum_max_bug_distance = 40.0f; - env.curriculum_bug_distance_step = 6.0f; env.curriculum_successes_per_level = 2; c_reset(&env); - env.bat_x = 20.0f; - env.bat_y = 20.0f; + env.x = 20.0f; + env.y = 20.0f; env.bug_x = 20.5f; env.bug_y = 20.0f; @@ -1422,8 +1294,8 @@ static int test_curriculum_waits_for_required_catches(void) { ASSERT_TRUE(env.curriculum_level == 0); ASSERT_TRUE(env.curriculum_successes_at_level == 1); - env.bat_x = 20.0f; - env.bat_y = 20.0f; + env.x = 20.0f; + env.y = 20.0f; env.bug_x = 20.5f; env.bug_y = 20.0f; @@ -1439,21 +1311,16 @@ static int test_curriculum_waits_for_required_catches(void) { static int test_curriculum_initial_level_sets_first_reset_difficulty(void) { Bat env = make_test_env(); env.num_obstacles = 3; - env.curriculum_enabled = 1; env.curriculum_initial_level = 4; - env.curriculum_start_obstacles = 1; - env.curriculum_max_obstacles = 3; env.curriculum_obstacle_step = 2; env.curriculum_start_bug_distance = 8.0f; - env.curriculum_max_bug_distance = 56.0f; - env.curriculum_bug_distance_step = 4.0f; c_reset(&env); ASSERT_TRUE(env.curriculum_level == 4); - ASSERT_TRUE(env.num_obstacles == 3); - float dist = bat_dist(env.bat_x, env.bat_y, env.bug_x, env.bug_y); - ASSERT_TRUE(dist >= 20.0f); - ASSERT_TRUE(dist <= 28.0f); + ASSERT_TRUE(env.num_obstacles == 2); + float distance = dist(env.x, env.y, env.bug_x, env.bug_y); + ASSERT_TRUE(distance >= 15.0f); + ASSERT_TRUE(distance <= 17.0f); free_allocated(&env); return 0; @@ -1462,18 +1329,13 @@ static int test_curriculum_initial_level_sets_first_reset_difficulty(void) { static int test_curriculum_initial_level_does_not_reset_progress(void) { Bat env = make_test_env(); env.num_obstacles = 3; - env.curriculum_enabled = 1; env.curriculum_initial_level = 2; - env.curriculum_start_obstacles = 1; - env.curriculum_max_obstacles = 3; env.curriculum_obstacle_step = 1; env.curriculum_successes_per_level = 1; env.curriculum_start_bug_distance = 8.0f; - env.curriculum_max_bug_distance = 56.0f; - env.curriculum_bug_distance_step = 4.0f; c_reset(&env); - env.bat_x = 20.0f; - env.bat_y = 20.0f; + env.x = 20.0f; + env.y = 20.0f; env.bug_x = 20.5f; env.bug_y = 20.0f; @@ -1490,21 +1352,21 @@ static int test_bug_bounces_off_arena_walls(void) { Bat env = make_test_env(); c_reset(&env); - env.bug_x = env.width - env.bug_radius + 0.1f; - env.bug_y = env.height * 0.5f; + env.bug_x = ARENA_WIDTH - BUG_RADIUS + 0.1f; + env.bug_y = ARENA_HEIGHT * 0.5f; env.bug_vx = 3.0f; env.bug_vy = 1.0f; - bat_update_bug(&env, 0.0f); - ASSERT_TRUE(env.bug_x == env.width - env.bug_radius); + update_bug(&env, 0.0f); + ASSERT_TRUE(env.bug_x == ARENA_WIDTH - BUG_RADIUS); ASSERT_TRUE(env.bug_vx < 0.0f); ASSERT_TRUE(env.bug_vy == 1.0f); - env.bug_x = env.width * 0.5f; - env.bug_y = env.bug_radius - 0.1f; + env.bug_x = ARENA_WIDTH * 0.5f; + env.bug_y = BUG_RADIUS - 0.1f; env.bug_vx = 2.0f; env.bug_vy = -4.0f; - bat_update_bug(&env, 0.0f); - ASSERT_TRUE(env.bug_y == env.bug_radius); + update_bug(&env, 0.0f); + ASSERT_TRUE(env.bug_y == BUG_RADIUS); ASSERT_TRUE(env.bug_vx == 2.0f); ASSERT_TRUE(env.bug_vy > 0.0f); @@ -1516,22 +1378,21 @@ static int test_chirp_echo_arrives_after_two_way_travel_not_immediately(void) { Bat env = make_test_env(); env.num_obstacles = 0; env.sound_speed = 60.0f; - env.max_echo_range = 128.0f; c_reset(&env); - env.bat_x = 32.0f; - env.bat_y = 32.0f; - env.bat_vx = 0.0f; - env.bat_vy = 0.0f; - env.bat_heading = 0.0f; + env.x = 32.0f; + env.y = 32.0f; + env.vx = 0.0f; + env.vy = 0.0f; + env.heading = 0.0f; env.bug_x = 38.0f; env.bug_y = 32.0f; env.bug_vx = 0.0f; env.bug_vy = 0.0f; compute_observations(&env); - env.actions[0] = BAT_NOOP; - env.actions[1] = BAT_TURN_NONE; + env.actions[0] = NOOP; + env.actions[1] = TURN_NONE; env.actions[2] = 7; env.actions[3] = 7; env.actions[4] = 0; @@ -1539,16 +1400,16 @@ static int test_chirp_echo_arrives_after_two_way_travel_not_immediately(void) { c_step(&env); for (int i = 0; i < 6; i++) { - ASSERT_FLOAT_NEAR(test_sum_obs(&env, BAT_LEFT_FREQ_OFFSET, BAT_FREQ_BINS), 0.0f, 0.0001f); - ASSERT_FLOAT_NEAR(test_sum_obs(&env, BAT_RIGHT_FREQ_OFFSET, BAT_FREQ_BINS), 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(test_sum_obs(&env, LEFT_FREQ_OFFSET, FREQ_BINS), 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(test_sum_obs(&env, RIGHT_FREQ_OFFSET, FREQ_BINS), 0.0f, 0.0001f); env.actions[5] = 0; c_step(&env); } float max_energy = 0.0f; for (int i = 0; i < 32; i++) { - float energy = test_sum_obs(&env, BAT_LEFT_FREQ_OFFSET, BAT_FREQ_BINS) - + test_sum_obs(&env, BAT_RIGHT_FREQ_OFFSET, BAT_FREQ_BINS); + float energy = test_sum_obs(&env, LEFT_FREQ_OFFSET, FREQ_BINS) + + test_sum_obs(&env, RIGHT_FREQ_OFFSET, FREQ_BINS); if (energy > max_energy) max_energy = energy; c_step(&env); } @@ -1563,57 +1424,50 @@ static int test_default_echo_range_reaches_curriculum_max_bug_distance(void) { Bat env = { .num_agents = 1, .frameskip = 1, - .width = 64, - .height = 64, .num_obstacles = 0, - .bat_radius = 2.0f, - .bug_radius = 1.5f, - .bat_max_speed = 22.0f, - .bat_min_speed = 2.0f, - .bat_accel = 45.0f, - .bat_turn_rate = 9.424778f, - .bug_speed = 4.0f, - .max_echo_range = 128.0f, + .max_speed = 22.0f, + .min_speed = 2.0f, + .accel = 45.0f, + .turn_rate = 9.424778f, .ear_rear_gain = 0.20f, .ear_front_gain = 0.55f, .ear_side_gain = 0.35f, .sound_speed = 180.0f, - .curriculum_max_bug_distance = 56.0f, .rng = 1, }; allocate(&env); c_reset(&env); env.tick = 0; - env.bat_x = 4.0f; - env.bat_y = 32.0f; - env.bat_vx = 0.0f; - env.bat_vy = 0.0f; - env.bat_heading = 0.0f; - env.bug_x = env.bat_x + env.curriculum_max_bug_distance; - env.bug_y = env.bat_y; + env.x = 4.0f; + env.y = 32.0f; + env.vx = 0.0f; + env.vy = 0.0f; + env.heading = 0.0f; + env.bug_x = env.x + CURRICULUM_INBOUND_MAX_BUG_DISTANCE; + env.bug_y = env.y; env.bug_vx = 0.0f; env.bug_vy = 0.0f; - bat_clear_echo_queue(&env); + clear_echo_queue(&env); ChirpEvent chirp = { - .x = env.bat_x, - .y = env.bat_y, + .x = env.x, + .y = env.y, .start_freq = 0.0f, .end_freq = 1.0f, - .duration = bat_chirp_duration_seconds(0.0f), + .duration = chirp_duration_seconds(0.0f), .birth_tick = env.tick, .active = 1, }; - chirp.slice_count = (int)ceilf(chirp.duration / BAT_TICK_RATE); + chirp.slice_count = (int)ceilf(chirp.duration / TICK_RATE); while (chirp.slices_scheduled < chirp.slice_count) { int slice_idx = chirp.slices_scheduled; - bat_schedule_chirp_slice_echoes(&env, &chirp, slice_idx); + schedule_chirp_slice_echoes(&env, &chirp, slice_idx); chirp.slices_scheduled += 1; } float bug_energy = 0.0f; - for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { + for (int i = 0; i < ECHO_QUEUE_TICKS; i++) { bug_energy += env.echo_queue[i].bug_energy; } @@ -1625,9 +1479,9 @@ static int test_default_echo_range_reaches_curriculum_max_bug_distance(void) { static float test_sum_queued_echo_energy(Bat* env) { float energy = 0.0f; - for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { + for (int i = 0; i < ECHO_QUEUE_TICKS; i++) { for (int ear = 0; ear < 2; ear++) { - for (int bin = 0; bin < BAT_FREQ_BINS; bin++) { + for (int bin = 0; bin < FREQ_BINS; bin++) { energy += env->echo_queue[i].energy[ear][bin]; } } @@ -1635,64 +1489,30 @@ static float test_sum_queued_echo_energy(Bat* env) { return energy; } -static int test_corner_reflectors_disabled_schedule_no_static_events(void) { - Bat env = make_test_env(); - env.num_obstacles = 0; - env.corner_reflectors = 0; - env.max_echo_range = 128.0f; - c_reset(&env); - - env.tick = 0; - env.bat_x = 32.0f; - env.bat_y = 32.0f; - env.bat_heading = 0.0f; - env.bat_vx = 0.0f; - env.bat_vy = 0.0f; - bat_clear_echo_queue(&env); - ChirpEvent chirp = { - .x = env.bat_x, - .y = env.bat_y, - .start_freq = 0.0f, - .end_freq = 1.0f, - .duration = bat_chirp_duration_seconds(0.0f), - .birth_tick = env.tick, - .active = 1, - }; - - bat_schedule_corner_reflector_echoes(&env, &chirp, 0.0f, 0.5f); - - ASSERT_FLOAT_NEAR(test_sum_queued_echo_energy(&env), 0.0f, 0.0001f); - - free_allocated(&env); - return 0; -} - static int test_corner_reflectors_enabled_schedule_stable_echo_events(void) { Bat env = make_test_env(); env.num_obstacles = 0; - env.corner_reflectors = 1; - env.max_echo_range = 128.0f; env.sound_speed = 180.0f; c_reset(&env); env.tick = 0; - env.bat_x = 32.0f; - env.bat_y = 32.0f; - env.bat_heading = 0.0f; - env.bat_vx = 0.0f; - env.bat_vy = 0.0f; - bat_clear_echo_queue(&env); + env.x = 32.0f; + env.y = 32.0f; + env.heading = 0.0f; + env.vx = 0.0f; + env.vy = 0.0f; + clear_echo_queue(&env); ChirpEvent chirp = { - .x = env.bat_x, - .y = env.bat_y, + .x = env.x, + .y = env.y, .start_freq = 0.0f, .end_freq = 1.0f, - .duration = bat_chirp_duration_seconds(0.0f), + .duration = chirp_duration_seconds(0.0f), .birth_tick = env.tick, .active = 1, }; - bat_schedule_corner_reflector_echoes(&env, &chirp, 0.0f, 0.5f); + schedule_corner_reflector_echoes(&env, &chirp, 0.0f, 0.5f); ASSERT_TRUE(test_sum_queued_echo_energy(&env) > 0.0f); @@ -1703,31 +1523,29 @@ static int test_corner_reflectors_enabled_schedule_stable_echo_events(void) { static int test_corner_reflector_echo_observations_stay_normalized(void) { Bat env = make_test_env(); env.num_obstacles = 0; - env.corner_reflectors = 1; - env.max_echo_range = 128.0f; env.sound_speed = 180.0f; c_reset(&env); env.tick = 0; - env.bat_x = 32.0f; - env.bat_y = 32.0f; - env.bat_heading = 0.0f; - env.bat_vx = 0.0f; - env.bat_vy = 0.0f; - bat_clear_echo_queue(&env); + env.x = 32.0f; + env.y = 32.0f; + env.heading = 0.0f; + env.vx = 0.0f; + env.vy = 0.0f; + clear_echo_queue(&env); ChirpEvent chirp = { - .x = env.bat_x, - .y = env.bat_y, + .x = env.x, + .y = env.y, .start_freq = 0.0f, .end_freq = 1.0f, - .duration = bat_chirp_duration_seconds(0.0f), + .duration = chirp_duration_seconds(0.0f), .birth_tick = env.tick, .active = 1, }; - bat_schedule_corner_reflector_echoes(&env, &chirp, 0.0f, 0.5f); + schedule_corner_reflector_echoes(&env, &chirp, 0.0f, 0.5f); int arrival_tick = -1; - for (int i = 0; i < BAT_ECHO_QUEUE_TICKS; i++) { + for (int i = 0; i < ECHO_QUEUE_TICKS; i++) { if (env.echo_queue[i].tick > 0 && test_sum_queued_echo_energy(&env) > 0.0f) { arrival_tick = env.echo_queue[i].tick; break; @@ -1737,9 +1555,9 @@ static int test_corner_reflector_echo_observations_stay_normalized(void) { env.tick = arrival_tick; compute_observations(&env); - ASSERT_TRUE(test_sum_obs(&env, BAT_LEFT_FREQ_OFFSET, BAT_FREQ_BINS) > 0.0f || - test_sum_obs(&env, BAT_RIGHT_FREQ_OFFSET, BAT_FREQ_BINS) > 0.0f); - for (int i = 0; i < BAT_OBS_SIZE; i++) { + ASSERT_TRUE(test_sum_obs(&env, LEFT_FREQ_OFFSET, FREQ_BINS) > 0.0f || + test_sum_obs(&env, RIGHT_FREQ_OFFSET, FREQ_BINS) > 0.0f); + for (int i = 0; i < OBS_SIZE; i++) { ASSERT_TRUE(env.observations[i] >= -1.0f); ASSERT_TRUE(env.observations[i] <= 1.0f); } @@ -1750,19 +1568,19 @@ static int test_corner_reflector_echo_observations_stay_normalized(void) { static int test_frequency_bin_energy_sums_and_caps(void) { Bat env = make_test_env(); - memset(env.observations, 0, BAT_OBS_SIZE * sizeof(float)); + memset(env.observations, 0, OBS_SIZE * sizeof(float)); - int high_bin = bat_freq_bin_index(&env, 1.0f); - int low_bin = bat_freq_bin_index(&env, 0.0f); - env.observations[BAT_LEFT_FREQ_OFFSET + high_bin] = bat_clampf( - env.observations[BAT_LEFT_FREQ_OFFSET + high_bin] + 0.75f, 0.0f, 1.0f); - env.observations[BAT_LEFT_FREQ_OFFSET + high_bin] = bat_clampf( - env.observations[BAT_LEFT_FREQ_OFFSET + high_bin] + 0.75f, 0.0f, 1.0f); - env.observations[BAT_RIGHT_FREQ_OFFSET + low_bin] = bat_clampf( - env.observations[BAT_RIGHT_FREQ_OFFSET + low_bin] + 0.35f, 0.0f, 1.0f); + int high_bin = freq_bin_index(&env, 1.0f); + int low_bin = freq_bin_index(&env, 0.0f); + env.observations[LEFT_FREQ_OFFSET + high_bin] = bat_clampf( + env.observations[LEFT_FREQ_OFFSET + high_bin] + 0.75f, 0.0f, 1.0f); + env.observations[LEFT_FREQ_OFFSET + high_bin] = bat_clampf( + env.observations[LEFT_FREQ_OFFSET + high_bin] + 0.75f, 0.0f, 1.0f); + env.observations[RIGHT_FREQ_OFFSET + low_bin] = bat_clampf( + env.observations[RIGHT_FREQ_OFFSET + low_bin] + 0.35f, 0.0f, 1.0f); - ASSERT_FLOAT_NEAR(env.observations[BAT_LEFT_FREQ_OFFSET + BAT_FREQ_BINS - 1], 1.0f, 0.0001f); - ASSERT_FLOAT_NEAR(env.observations[BAT_RIGHT_FREQ_OFFSET], 0.35f, 0.0001f); + ASSERT_FLOAT_NEAR(env.observations[LEFT_FREQ_OFFSET + FREQ_BINS - 1], 1.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.observations[RIGHT_FREQ_OFFSET], 0.35f, 0.0001f); free_allocated(&env); return 0; @@ -1773,26 +1591,25 @@ static int test_bug_echo_reward_is_added_when_bug_echo_is_closer(void) { c_reset(&env); env.bug_echo_reward_scale = 0.05f; env.last_bug_echo_path = 20.0f; - env.last_bug_echo_bat_x = 8.0f; - env.last_bug_echo_bat_y = 10.0f; + env.last_bug_echo_x = 8.0f; + env.last_bug_echo_y = 10.0f; env.step_cost = 0.0f; env.progress_reward_scale = 0.0f; - env.chirp_cost = 0.0f; - env.bat_x = 10.0f; - env.bat_y = 10.0f; - env.bat_vx = 0.0f; - env.bat_vy = 0.0f; + env.x = 10.0f; + env.y = 10.0f; + env.vx = 0.0f; + env.vy = 0.0f; env.bug_vx = 0.0f; env.bug_vy = 0.0f; env.bug_x = 50.0f; env.bug_y = 50.0f; - bat_clear_echo_queue(&env); - bat_add_echo_event(&env, 0, 1.0f, 0.5f, 0.6f, 15.0f, BAT_ECHO_BUG); + clear_echo_queue(&env); + add_echo_event(&env, 0, 1.0f, 0.5f, 0.6f, 15.0f, ECHO_BUG); c_step(&env); - ASSERT_TRUE(env.rewards[0] > 0.002f); - ASSERT_FLOAT_NEAR(env.observations[BAT_LEFT_FREQ_OFFSET + 8], 0.6f, 0.0001f); + ASSERT_TRUE(env.rewards[0] > 0.0015f); + ASSERT_FLOAT_NEAR(env.observations[LEFT_FREQ_OFFSET + 8], 0.6f, 0.0001f); free_allocated(&env); return 0; @@ -1803,22 +1620,21 @@ static int test_bug_echo_reward_requires_bat_displacement(void) { c_reset(&env); env.bug_echo_reward_scale = 0.05f; env.last_bug_echo_path = 20.0f; - env.last_bug_echo_bat_x = 10.0f; - env.last_bug_echo_bat_y = 10.0f; + env.last_bug_echo_x = 10.0f; + env.last_bug_echo_y = 10.0f; env.step_cost = 0.0f; env.progress_reward_scale = 0.0f; - env.chirp_cost = 0.0f; - env.bat_x = 10.0f; - env.bat_y = 10.0f; - env.bat_heading = 0.0f; - env.bat_vx = 0.0f; - env.bat_vy = 0.0f; + env.x = 10.0f; + env.y = 10.0f; + env.heading = 0.0f; + env.vx = 0.0f; + env.vy = 0.0f; env.bug_vx = 0.0f; env.bug_vy = 0.0f; env.bug_x = 50.0f; env.bug_y = 50.0f; - bat_clear_echo_queue(&env); - bat_add_echo_event(&env, 0, 1.0f, 0.5f, 0.6f, 15.0f, BAT_ECHO_BUG); + clear_echo_queue(&env); + add_echo_event(&env, 0, 1.0f, 0.5f, 0.6f, 15.0f, ECHO_BUG); c_step(&env); @@ -1834,25 +1650,24 @@ static int test_bug_echo_reward_penalizes_farther_bug_echo_weakly(void) { c_reset(&env); env.bug_echo_reward_scale = 0.05f; env.last_bug_echo_path = 20.0f; - env.last_bug_echo_bat_x = 8.0f; - env.last_bug_echo_bat_y = 10.0f; + env.last_bug_echo_x = 8.0f; + env.last_bug_echo_y = 10.0f; env.step_cost = 0.0f; env.progress_reward_scale = 0.0f; - env.chirp_cost = 0.0f; - env.bat_x = 10.0f; - env.bat_y = 10.0f; - env.bat_vx = 0.0f; - env.bat_vy = 0.0f; + env.x = 10.0f; + env.y = 10.0f; + env.vx = 0.0f; + env.vy = 0.0f; env.bug_vx = 0.0f; env.bug_vy = 0.0f; env.bug_x = 50.0f; env.bug_y = 50.0f; - bat_clear_echo_queue(&env); - bat_add_echo_event(&env, 0, 1.0f, 0.5f, 0.6f, 25.0f, BAT_ECHO_BUG); + clear_echo_queue(&env); + add_echo_event(&env, 0, 1.0f, 0.5f, 0.6f, 25.0f, ECHO_BUG); c_step(&env); - ASSERT_FLOAT_NEAR(env.rewards[0], -0.0003125f, 0.0001f); + ASSERT_FLOAT_NEAR(env.rewards[0], -0.0001953f, 0.0001f); ASSERT_FLOAT_NEAR(env.last_bug_echo_path, 25.0f, 0.0001f); free_allocated(&env); @@ -1865,57 +1680,54 @@ static int test_static_echo_does_not_get_bug_echo_reward(void) { env.bug_echo_reward_scale = 0.05f; env.step_cost = 0.0f; env.progress_reward_scale = 0.0f; - env.chirp_cost = 0.0f; - env.bat_x = 10.0f; - env.bat_y = 10.0f; - env.bat_vx = 0.0f; - env.bat_vy = 0.0f; + env.x = 10.0f; + env.y = 10.0f; + env.vx = 0.0f; + env.vy = 0.0f; env.bug_vx = 0.0f; env.bug_vy = 0.0f; env.bug_x = 50.0f; env.bug_y = 50.0f; - bat_clear_echo_queue(&env); - bat_add_echo_event(&env, 0, 1.0f, 0.5f, 0.6f, 15.0f, BAT_ECHO_STATIC); + clear_echo_queue(&env); + add_echo_event(&env, 0, 1.0f, 0.5f, 0.6f, 15.0f, ECHO_STATIC); c_step(&env); ASSERT_FLOAT_NEAR(env.rewards[0], 0.0f, 0.0001f); - ASSERT_FLOAT_NEAR(env.observations[BAT_LEFT_FREQ_OFFSET + 8], 0.6f, 0.0001f); + ASSERT_FLOAT_NEAR(env.observations[LEFT_FREQ_OFFSET + 8], 0.6f, 0.0001f); free_allocated(&env); return 0; } -static int test_quadrant(float x, float y, float width, float height) { - int east = x >= width * 0.5f; - int south = y >= height * 0.5f; - return south * 2 + east; -} - -static int test_spawns_use_different_random_quadrants(void) { +static int test_spawns_use_curriculum_distance_with_random_positions(void) { Bat env = make_test_env(); - int seen_bat[4] = {0}; - int seen_bug[4] = {0}; - int bat_quadrants = 0; - int bug_quadrants = 0; + float first_x = 0.0f; + float first_y = 0.0f; + float first_bug_x = 0.0f; + float first_bug_y = 0.0f; + float max_bat_delta = 0.0f; + float max_bug_delta = 0.0f; for (int i = 0; i < 48; i++) { c_reset(&env); - int bq = test_quadrant(env.bat_x, env.bat_y, env.width, env.height); - int gq = test_quadrant(env.bug_x, env.bug_y, env.width, env.height); - ASSERT_TRUE(bq != gq); - if (!seen_bat[bq]) { - seen_bat[bq] = 1; - bat_quadrants += 1; - } - if (!seen_bug[gq]) { - seen_bug[gq] = 1; - bug_quadrants += 1; + ASSERT_FLOAT_NEAR(dist(env.x, env.y, env.bug_x, env.bug_y), + env.curriculum_start_bug_distance, 0.001f); + if (i == 0) { + first_x = env.x; + first_y = env.y; + first_bug_x = env.bug_x; + first_bug_y = env.bug_y; + } else { + float bat_delta = dist(first_x, first_y, env.x, env.y); + float bug_delta = dist(first_bug_x, first_bug_y, env.bug_x, env.bug_y); + if (bat_delta > max_bat_delta) max_bat_delta = bat_delta; + if (bug_delta > max_bug_delta) max_bug_delta = bug_delta; } } - ASSERT_TRUE(bat_quadrants >= 3); - ASSERT_TRUE(bug_quadrants >= 3); + ASSERT_TRUE(max_bat_delta > 8.0f); + ASSERT_TRUE(max_bug_delta > 8.0f); free_allocated(&env); return 0; @@ -1923,15 +1735,17 @@ static int test_spawns_use_different_random_quadrants(void) { static int test_spawns_keep_minimum_separation_and_avoid_obstacles(void) { Bat env = make_test_env(); - float min_sep = 20.0f; + env.curriculum_initial_level = 1; + float expected_distance = env.curriculum_start_bug_distance + CURRICULUM_BUG_DISTANCE_STEP; for (int reset = 0; reset < 32; reset++) { c_reset(&env); - ASSERT_TRUE(bat_dist(env.bat_x, env.bat_y, env.bug_x, env.bug_y) >= min_sep); + ASSERT_FLOAT_NEAR(dist(env.x, env.y, env.bug_x, env.bug_y), + expected_distance, 0.001f); for (int i = 0; i < env.num_obstacles; i++) { - ASSERT_TRUE(!bat_circle_rect_collision(env.bat_x, env.bat_y, env.bat_radius + 1.0f, + ASSERT_TRUE(!circle_rect_collision(env.x, env.y, AGENT_RADIUS + 1.0f, env.obstacle_x[i], env.obstacle_y[i], env.obstacle_w[i], env.obstacle_h[i])); - ASSERT_TRUE(!bat_circle_rect_collision(env.bug_x, env.bug_y, env.bug_radius + 1.0f, + ASSERT_TRUE(!circle_rect_collision(env.bug_x, env.bug_y, BUG_RADIUS + 1.0f, env.obstacle_x[i], env.obstacle_y[i], env.obstacle_w[i], env.obstacle_h[i])); } } @@ -1942,6 +1756,7 @@ static int test_spawns_keep_minimum_separation_and_avoid_obstacles(void) { static int test_obstacles_move_substantially_across_resets(void) { Bat env = make_test_env(); + env.curriculum_initial_level = 1; c_reset(&env); float first_x = env.obstacle_x[0]; float first_y = env.obstacle_y[0]; @@ -1949,7 +1764,7 @@ static int test_obstacles_move_substantially_across_resets(void) { for (int i = 0; i < 32; i++) { c_reset(&env); - float delta = bat_dist(first_x, first_y, env.obstacle_x[0], env.obstacle_y[0]); + float delta = dist(first_x, first_y, env.obstacle_x[0], env.obstacle_y[0]); if (delta > max_delta) max_delta = delta; } @@ -1961,6 +1776,7 @@ static int test_obstacles_move_substantially_across_resets(void) { static int test_obstacles_are_small_enough_for_trainability(void) { Bat env = make_test_env(); + env.curriculum_initial_level = 1; for (int reset = 0; reset < 64; reset++) { c_reset(&env); @@ -1987,10 +1803,8 @@ int main(void) { if (test_chirp_efficiency_scores_low_usage_above_full_budget()) return 1; if (test_chirp_perf_uses_fixed_fifteen_chirp_reference()) return 1; if (test_success_reward_includes_chirp_efficiency_bonus()) return 1; - if (test_chirp_budget_logs_ratios_for_wandb()) return 1; if (test_curriculum_perf_logs_distance_and_obstacle_difficulty_components()) return 1; if (test_perf_composes_base_perf_curriculum_difficulty_and_chirp_perf()) return 1; - if (test_chirp_tempo_logs_far_and_near_rates()) return 1; if (test_left_right_echo_asymmetry()) return 1; if (test_directional_echo_arrival_and_gain_by_side()) return 1; if (test_ear_directivity_gains_control_echo_energy()) return 1; @@ -2013,7 +1827,7 @@ int main(void) { if (test_chirp_audio_maps_norm_freq_to_audible_sweep()) return 1; if (test_chirp_audio_duration_scales_with_render_fps()) return 1; if (test_chirp_cooldown_accepts_only_after_delay()) return 1; - if (test_valid_chirp_gets_reward_without_legacy_cost()) return 1; + if (test_valid_chirp_gets_reward()) return 1; if (test_early_chirp_gets_penalty_and_emits_nothing()) return 1; if (test_chirp_before_bug_echo_arrives_gets_scaled_overlap_penalty()) return 1; if (test_chirp_after_bug_echo_arrives_ignores_static_echo_window()) return 1; @@ -2030,7 +1844,6 @@ int main(void) { if (test_bug_bounces_off_arena_walls()) return 1; if (test_chirp_echo_arrives_after_two_way_travel_not_immediately()) return 1; if (test_default_echo_range_reaches_curriculum_max_bug_distance()) return 1; - if (test_corner_reflectors_disabled_schedule_no_static_events()) return 1; if (test_corner_reflectors_enabled_schedule_stable_echo_events()) return 1; if (test_corner_reflector_echo_observations_stay_normalized()) return 1; if (test_frequency_bin_energy_sums_and_caps()) return 1; @@ -2038,7 +1851,7 @@ int main(void) { if (test_bug_echo_reward_requires_bat_displacement()) return 1; if (test_bug_echo_reward_penalizes_farther_bug_echo_weakly()) return 1; if (test_static_echo_does_not_get_bug_echo_reward()) return 1; - if (test_spawns_use_different_random_quadrants()) return 1; + if (test_spawns_use_curriculum_distance_with_random_positions()) return 1; if (test_spawns_keep_minimum_separation_and_avoid_obstacles()) return 1; if (test_obstacles_move_substantially_across_resets()) return 1; if (test_obstacles_are_small_enough_for_trainability()) return 1; From 8b4cdfd1f87f350180ce0f7f9735f6ea8aa842a1 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Thu, 11 Jun 2026 23:03:31 -0700 Subject: [PATCH 40/51] Clean up bat step logic --- config/bat.ini | 1 - ocean/bat/bat.c | 4 +- ocean/bat/bat.h | 133 +++++++++++++++----------------- ocean/bat/binding.c | 1 - ocean/bat/tests/test_bat_core.c | 41 +++++----- 5 files changed, 81 insertions(+), 99 deletions(-) diff --git a/config/bat.ini b/config/bat.ini index 9909a46a81..0d2ded03cf 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -17,7 +17,6 @@ encoder = DefaultEncoder decoder = DefaultDecoder [env] -frameskip = 1 bat_max_speed = 15.498233877318418 bat_min_speed = 2.6389946132676654 bat_accel = 53.02330161128345 diff --git a/ocean/bat/bat.c b/ocean/bat/bat.c index db17016814..1891fbbcf6 100644 --- a/ocean/bat/bat.c +++ b/ocean/bat/bat.c @@ -16,7 +16,6 @@ static char* trim(char* s) { static void set_demo_defaults(Bat* env) { *env = (Bat){ .num_agents = NUM_AGENTS, - .frameskip = 1, .max_speed = 15.498233877318418f, .min_speed = 2.6389946132676654f, .accel = 53.02330161128345f, @@ -51,8 +50,7 @@ static void set_demo_defaults(Bat* env) { } static void apply_env_config_value(Bat* env, const char* key, float value) { - if (strcmp(key, "frameskip") == 0) env->frameskip = (int)value; - else if (strcmp(key, "bat_max_speed") == 0) env->max_speed = value; + if (strcmp(key, "bat_max_speed") == 0) env->max_speed = value; else if (strcmp(key, "bat_min_speed") == 0) env->min_speed = value; else if (strcmp(key, "bat_accel") == 0) env->accel = value; else if (strcmp(key, "bat_turn_rate") == 0) env->turn_rate = value; diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 4468093fdd..649ffd57ab 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -161,6 +161,13 @@ typedef struct Client { #endif } Client; +typedef enum ChirpStatus { + CHIRP_STATUS_OVER_BUDGET = -2, + CHIRP_STATUS_COOLDOWN = -1, + CHIRP_STATUS_NONE = 0, + CHIRP_STATUS_EMITTED = 1, +} ChirpStatus; + typedef struct Bat { Client* client; Log log; @@ -170,7 +177,6 @@ typedef struct Bat { float* terminals; int num_agents; - int frameskip; int tick; int render_target_fps; int record_video; @@ -228,7 +234,7 @@ typedef struct Bat { ChirpEvent chirps[CHIRP_HISTORY]; int chirp_head; EchoBucket echo_queue[ECHO_QUEUE_TICKS]; - int chirps_emitted_episode; + int chirps_emitted; int audio_chirp_serial; int chirps_overlapped; @@ -457,7 +463,7 @@ static inline float curriculum_bug_maneuver_frequency(Bat* env) { } static inline float chirps_used_ratio(Bat* env) { - return bat_clampf(env->chirps_emitted_episode / (float)env->chirp_budget, 0.0f, 1.0f); + return bat_clampf(env->chirps_emitted / (float)env->chirp_budget, 0.0f, 1.0f); } static inline float chirp_efficiency(Bat* env) { @@ -466,7 +472,7 @@ static inline float chirp_efficiency(Bat* env) { static inline float chirp_perf(Bat* env) { float reference_chirps = fmaxf(1.0f, (float)MAX_CHIRPS_PER_EPISODE); - float raw = 1.0f - env->chirps_emitted_episode / reference_chirps; + float raw = 1.0f - env->chirps_emitted / reference_chirps; return bat_clampf(raw, CHIRP_PERF_FLOOR, 1.0f); } @@ -680,9 +686,9 @@ static inline void add_log(Bat* env, float success, float collision, float timeo env->log.curriculum_obstacle_difficulty += obstacle_difficulty; env->log.curriculum_motion_difficulty += motion_difficulty; env->log.num_obstacles += env->num_obstacles; - env->log.chirps_emitted += env->chirps_emitted_episode; + env->log.chirps_emitted += env->chirps_emitted; env->log.chirp_perf += chirp_perf_value; - float chirps = fmaxf(1.0f, (float)env->chirps_emitted_episode); + float chirps = fmaxf(1.0f, (float)env->chirps_emitted); env->log.chirp_overlap_fraction += env->chirps_overlapped / chirps; env->log.n += 1.0f; } @@ -1012,7 +1018,7 @@ static inline void reset_episode(Bat* env) { env->tick_bug_echo_path = -1.0f; env->last_bug_echo_path = -1.0f; env->last_bug_echo_expected_tick = -1.0f; - env->chirps_emitted_episode = 0; + env->chirps_emitted = 0; env->chirps_overlapped = 0; env->episode_return = 0.0f; env->start_bug_dist = dist(env->x, env->y, env->bug_x, env->bug_y); @@ -1154,24 +1160,20 @@ static inline void update_motion(Bat* env, float dt) { } static inline bool try_emit_chirp(Bat* env) { - int start_idx = action_index(env->actions[2], CHIRP_FREQ_BINS); - int end_idx = action_index(env->actions[3], CHIRP_FREQ_BINS); - int duration_idx = action_index(env->actions[4], CHIRP_DURATION_BINS); - if (env->tick - env->last_chirp_tick < env->chirp_cooldown_ticks) { return false; } - if (env->chirps_emitted_episode >= env->chirp_budget) { - return false; - } + int start_idx = action_index(env->actions[2], CHIRP_FREQ_BINS); + int end_idx = action_index(env->actions[3], CHIRP_FREQ_BINS); + int duration_idx = action_index(env->actions[4], CHIRP_DURATION_BINS); env->last_chirp_start_freq = norm_bin(start_idx, CHIRP_FREQ_BINS); env->last_chirp_end_freq = norm_bin(end_idx, CHIRP_FREQ_BINS); env->last_chirp_duration = norm_bin(duration_idx, CHIRP_DURATION_BINS); env->chirp_age_ticks = 0; env->last_chirp_tick = env->tick; - env->chirps_emitted_episode += 1; + env->chirps_emitted += 1; ChirpEvent* chirp = &env->chirps[env->chirp_head]; chirp->x = env->x; chirp->y = env->y; @@ -1199,17 +1201,17 @@ static inline float next_chirp_overlap_fraction(Bat* env) { return bat_clampf(remaining_ticks / wait_ticks, 0.0f, 1.0f); } -static inline int update_chirp(Bat* env) { +static inline ChirpStatus update_chirp(Bat* env) { int emit = action_index(env->actions[5], CHIRP_EMIT_ACTIONS); if (emit) { - if (env->chirps_emitted_episode >= env->chirp_budget) { - return -2; + if (env->chirps_emitted >= env->chirp_budget) { + return CHIRP_STATUS_OVER_BUDGET; } - return try_emit_chirp(env) ? 1 : -1; + return try_emit_chirp(env) ? CHIRP_STATUS_EMITTED : CHIRP_STATUS_COOLDOWN; } else if (env->chirp_age_ticks < MAX_CHIRP_AGE_TICKS) { env->chirp_age_ticks += 1; } - return 0; + return CHIRP_STATUS_NONE; } static inline bool caught_bug(Bat* env) { @@ -1219,72 +1221,59 @@ static inline bool caught_bug(Bat* env) { void c_step(Bat* env) { env->rewards[0] = 0.0f; env->terminals[0] = 0.0f; + float success = 0.0f; + float collision = 0.0f; + float timeout = 0.0f; float chirp_overlap_fraction = next_chirp_overlap_fraction(env); - int chirp_status = update_chirp(env); - if (chirp_status == -2) { + ChirpStatus chirp_status = update_chirp(env); + if (chirp_status == CHIRP_STATUS_OVER_BUDGET) { + env->tick += 1; env->rewards[0] = -1.0f; - env->terminals[0] = 1.0f; - env->episode_return += env->rewards[0]; - add_log(env, 0.0f, 1.0f, 0.0f); - reset_episode(env); - return; - } - if (caught_bug(env)) { - env->rewards[0] = success_reward(env); - env->terminals[0] = 1.0f; - env->episode_return += env->rewards[0]; - advance_curriculum(env); - add_log(env, 1.0f, 0.0f, 0.0f); - reset_episode(env); - return; - } - schedule_due_chirp_slices(env); + collision = 1.0f; + } else { + schedule_due_chirp_slices(env); - for (int i = 0; i < env->frameskip; i++) { update_motion(env, TICK_RATE); update_bug(env, TICK_RATE); + env->tick += 1; if (hits_wall(env) || hits_obstacle(env)) { env->rewards[0] = -env->collision_penalty; - env->terminals[0] = 1.0f; - env->episode_return += env->rewards[0]; - add_log(env, 0.0f, 1.0f, 0.0f); - reset_episode(env); - return; - } - if (caught_bug(env)) { + collision = 1.0f; + } else if (caught_bug(env)) { env->rewards[0] = success_reward(env); - env->terminals[0] = 1.0f; - env->episode_return += env->rewards[0]; - advance_curriculum(env); - add_log(env, 1.0f, 0.0f, 0.0f); - reset_episode(env); - return; - } - } + success = 1.0f; + } else { + float bug_dist = dist(env->x, env->y, env->bug_x, env->bug_y); + float progress = env->prev_bug_dist - bug_dist; + env->rewards[0] += env->progress_reward_scale * progress; + env->rewards[0] -= env->step_cost; // TODO: Fold this only when we are ready to break training determinism. + if (chirp_status == CHIRP_STATUS_EMITTED) { + env->rewards[0] += env->valid_chirp_reward; // TODO: Remove this; chirps should only pay when bug echoes improve. + env->rewards[0] -= CHIRP_COST; + if (chirp_overlap_fraction > 0.0f) { + env->rewards[0] -= env->chirp_overlap_penalty * chirp_overlap_fraction; + env->chirps_overlapped += 1; + } + } else if (chirp_status == CHIRP_STATUS_COOLDOWN) { + env->rewards[0] -= env->early_chirp_penalty; + } + env->prev_bug_dist = bug_dist; - env->tick += 1; - float bug_dist = dist(env->x, env->y, env->bug_x, env->bug_y); - float progress = env->prev_bug_dist - bug_dist; - env->rewards[0] += env->progress_reward_scale * progress; - env->rewards[0] -= env->step_cost; - if (chirp_status > 0) { - env->rewards[0] += env->valid_chirp_reward; - env->rewards[0] -= CHIRP_COST; - if (chirp_overlap_fraction > 0.0f) { - env->rewards[0] -= env->chirp_overlap_penalty * chirp_overlap_fraction; - env->chirps_overlapped += 1; + if (env->tick >= MAX_STEPS) { + env->rewards[0] = -1.0f; + timeout = 1.0f; + } } - } else if (chirp_status < 0) { - env->rewards[0] -= env->early_chirp_penalty; } - env->prev_bug_dist = bug_dist; - if (env->tick >= MAX_STEPS) { - env->rewards[0] = -1.0f; + if (success || collision || timeout) { env->terminals[0] = 1.0f; env->episode_return += env->rewards[0]; - add_log(env, 0.0f, 0.0f, 1.0f); + if (success) { + advance_curriculum(env); + } + add_log(env, success, collision, timeout); reset_episode(env); return; } @@ -1548,7 +1537,7 @@ void c_render(Bat* env) { DrawLine((int)(env->x * sx), (int)(env->y * sy), (int)(hx * sx), (int)(hy * sy), WHITE); int cooldown = env->chirp_cooldown_ticks - (env->tick - env->last_chirp_tick); DrawText(TextFormat("reward %.3f tick %d chirps %d cooldown %d ESC exits", env->rewards[0], env->tick, - env->chirps_emitted_episode, cooldown), 10, 10, 20, RAYWHITE); + env->chirps_emitted, cooldown), 10, 10, 20, RAYWHITE); EndDrawing(); record_capture_frame(env); } diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index 115631fbfd..4ebd90c18f 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -8,7 +8,6 @@ void my_init(Env* env, Dict* kwargs) { env->num_agents = NUM_AGENTS; - env->frameskip = dict_get(kwargs, "frameskip")->value; env->ear_separation_scale = dict_get(kwargs, "ear_separation_scale")->value; env->ear_rear_gain = dict_get(kwargs, "ear_rear_gain")->value; env->ear_front_gain = dict_get(kwargs, "ear_front_gain")->value; diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index fa182f99c8..28af0b1ae5 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -24,7 +24,6 @@ static Bat make_test_env(void) { Bat env = { .num_agents = 1, - .frameskip = 1, .num_obstacles = 1, .ear_separation_scale = 0.75f, .ear_rear_gain = 0.20f, @@ -89,11 +88,11 @@ static int test_chirp_budget_observation_tracks_used_chirps(void) { env.actions[5] = 1.0f; c_step(&env); - ASSERT_TRUE(env.chirps_emitted_episode == 1); + ASSERT_TRUE(env.chirps_emitted == 1); ASSERT_FLOAT_NEAR(env.observations[CHIRPS_USED_OBS], 1.0f / (float)MAX_CHIRPS_PER_EPISODE, 0.0001f); - env.chirps_emitted_episode = MAX_CHIRPS_PER_EPISODE + 1; + env.chirps_emitted = MAX_CHIRPS_PER_EPISODE + 1; compute_observations(&env); ASSERT_FLOAT_NEAR(env.observations[CHIRPS_USED_OBS], 1.0f, 0.0001f); @@ -127,14 +126,14 @@ static int test_chirping_after_budget_terminates_with_penalty(void) { env.actions[5] = 1.0f; c_step(&env); ASSERT_TRUE(env.terminals[0] == 0.0f); - ASSERT_TRUE(env.chirps_emitted_episode == 1); + ASSERT_TRUE(env.chirps_emitted == 1); ASSERT_FLOAT_NEAR(env.observations[CHIRPS_USED_OBS], 1.0f, 0.0001f); c_step(&env); ASSERT_TRUE(env.terminals[0] == 1.0f); ASSERT_FLOAT_NEAR(env.rewards[0], -1.0f, 0.0001f); - ASSERT_TRUE(env.chirps_emitted_episode == 0); + ASSERT_TRUE(env.chirps_emitted == 0); free_allocated(&env); return 0; @@ -188,10 +187,10 @@ static int test_chirp_efficiency_scores_low_usage_above_full_budget(void) { c_reset(&env); env.chirp_budget = 10; - env.chirps_emitted_episode = 1; + env.chirps_emitted = 1; ASSERT_FLOAT_NEAR(chirp_efficiency(&env), 0.95f, 0.0001f); - env.chirps_emitted_episode = 10; + env.chirps_emitted = 10; ASSERT_FLOAT_NEAR(chirp_efficiency(&env), 0.50f, 0.0001f); free_allocated(&env); @@ -202,19 +201,19 @@ static int test_chirp_perf_uses_fixed_fifteen_chirp_reference(void) { Bat env = make_test_env(); c_reset(&env); - env.chirps_emitted_episode = 0; + env.chirps_emitted = 0; ASSERT_FLOAT_NEAR(chirp_perf(&env), 1.0f, 0.0001f); - env.chirps_emitted_episode = 6; + env.chirps_emitted = 6; ASSERT_FLOAT_NEAR(chirp_perf(&env), 0.60f, 0.0001f); - env.chirps_emitted_episode = 8; + env.chirps_emitted = 8; ASSERT_FLOAT_NEAR(chirp_perf(&env), 0.4666667f, 0.0001f); - env.chirps_emitted_episode = 15; + env.chirps_emitted = 15; ASSERT_FLOAT_NEAR(chirp_perf(&env), 0.05f, 0.0001f); - env.chirps_emitted_episode = 30; + env.chirps_emitted = 30; ASSERT_FLOAT_NEAR(chirp_perf(&env), 0.05f, 0.0001f); free_allocated(&env); @@ -227,7 +226,7 @@ static int test_success_reward_includes_chirp_efficiency_bonus(void) { c_reset(&env); env.chirp_budget = 10; - env.chirps_emitted_episode = 2; + env.chirps_emitted = 2; env.x = 20.0f; env.y = 20.0f; env.bug_x = 20.5f; @@ -281,7 +280,7 @@ static int test_perf_composes_base_perf_curriculum_difficulty_and_chirp_perf(voi env.curriculum_start_bug_distance = 8.0f; env.num_obstacles = 2; env.chirp_budget = 14; - env.chirps_emitted_episode = 7; + env.chirps_emitted = 7; env.start_bug_dist = 32.0f; add_log(&env, 1.0f, 0.0f, 0.0f); @@ -499,7 +498,6 @@ static int test_ear_directivity_gains_control_echo_energy(void) { static int test_default_sound_speed_allows_one_tick_interaural_delay(void) { Bat env = { .num_agents = 1, - .frameskip = 1, .num_obstacles = 0, .ear_separation_scale = 0.75f, .ear_rear_gain = 0.20f, @@ -1047,7 +1045,7 @@ static int test_valid_chirp_gets_reward(void) { ASSERT_FLOAT_NEAR(env.terminals[0], 0.0f, 0.0001f); ASSERT_FLOAT_NEAR(env.rewards[0], env.valid_chirp_reward, 0.0001f); - ASSERT_TRUE(env.chirps_emitted_episode == 1); + ASSERT_TRUE(env.chirps_emitted == 1); free_allocated(&env); return 0; @@ -1072,7 +1070,7 @@ static int test_early_chirp_gets_penalty_and_emits_nothing(void) { ASSERT_FLOAT_NEAR(env.terminals[0], 0.0f, 0.0001f); ASSERT_FLOAT_NEAR(env.rewards[0], -env.early_chirp_penalty, 0.0001f); - ASSERT_TRUE(env.chirps_emitted_episode == 1); + ASSERT_TRUE(env.chirps_emitted == 1); free_allocated(&env); return 0; @@ -1095,7 +1093,7 @@ static int test_chirp_before_bug_echo_arrives_gets_scaled_overlap_penalty(void) ASSERT_FLOAT_NEAR(env.terminals[0], 0.0f, 0.0001f); ASSERT_FLOAT_NEAR(env.rewards[0], env.valid_chirp_reward, 0.0001f); - ASSERT_TRUE(env.chirps_emitted_episode == 1); + ASSERT_TRUE(env.chirps_emitted == 1); ASSERT_TRUE(env.chirps_overlapped == 0); env.last_chirp_tick = 0; @@ -1108,7 +1106,7 @@ static int test_chirp_before_bug_echo_arrives_gets_scaled_overlap_penalty(void) ASSERT_FLOAT_NEAR(env.terminals[0], 0.0f, 0.0001f); ASSERT_FLOAT_NEAR(env.rewards[0], env.valid_chirp_reward - 0.5f * env.chirp_overlap_penalty, 0.0001f); - ASSERT_TRUE(env.chirps_emitted_episode == 2); + ASSERT_TRUE(env.chirps_emitted == 2); ASSERT_TRUE(env.chirps_overlapped == 1); free_allocated(&env); @@ -1126,7 +1124,7 @@ static int test_chirp_after_bug_echo_arrives_ignores_static_echo_window(void) { env.chirp_overlap_penalty = 0.0040f; env.chirp_cooldown_ticks = 1; env.chirp_budget = 10; - env.chirps_emitted_episode = 1; + env.chirps_emitted = 1; env.last_chirp_tick = 0; env.last_bug_echo_expected_tick = 3.0f; env.tick = 4; @@ -1136,7 +1134,7 @@ static int test_chirp_after_bug_echo_arrives_ignores_static_echo_window(void) { ASSERT_FLOAT_NEAR(env.terminals[0], 0.0f, 0.0001f); ASSERT_FLOAT_NEAR(env.rewards[0], env.valid_chirp_reward, 0.0001f); - ASSERT_TRUE(env.chirps_emitted_episode == 2); + ASSERT_TRUE(env.chirps_emitted == 2); ASSERT_TRUE(env.chirps_overlapped == 0); free_allocated(&env); @@ -1423,7 +1421,6 @@ static int test_chirp_echo_arrives_after_two_way_travel_not_immediately(void) { static int test_default_echo_range_reaches_curriculum_max_bug_distance(void) { Bat env = { .num_agents = 1, - .frameskip = 1, .num_obstacles = 0, .max_speed = 22.0f, .min_speed = 2.0f, From 11a092ed22e799312cc36799b4740e9085ee959b Mon Sep 17 00:00:00 2001 From: Kinvert Date: Fri, 12 Jun 2026 00:23:17 -0700 Subject: [PATCH 41/51] Clean up bat action and reflector code --- ocean/bat/bat.c | 20 ++++----- ocean/bat/bat.h | 77 ++++++++++----------------------- ocean/bat/tests/test_bat_core.c | 6 +-- 3 files changed, 35 insertions(+), 68 deletions(-) diff --git a/ocean/bat/bat.c b/ocean/bat/bat.c index 1891fbbcf6..f6f7219dae 100644 --- a/ocean/bat/bat.c +++ b/ocean/bat/bat.c @@ -120,16 +120,16 @@ void demo() { SetTargetFPS(60); while (!WindowShouldClose()) { memset(env.actions, 0, sizeof(float) * NUM_ACTIONS); - env.actions[0] = NOOP; - env.actions[1] = TURN_NONE; - if (IsKeyDown(KEY_W)) env.actions[0] = THRUST_FORWARD; - if (IsKeyDown(KEY_S)) env.actions[0] = BRAKE; - if (IsKeyDown(KEY_A) || IsKeyDown(KEY_LEFT)) env.actions[1] = TURN_LEFT; - if (IsKeyDown(KEY_D) || IsKeyDown(KEY_RIGHT)) env.actions[1] = TURN_RIGHT; - env.actions[2] = 0; - env.actions[3] = 7; - env.actions[4] = 1; - env.actions[5] = IsKeyDown(KEY_SPACE) ? 1.0f : 0.0f; + env.actions[ACTION_MOVE] = NOOP; + env.actions[ACTION_TURN] = TURN_NONE; + if (IsKeyDown(KEY_W)) env.actions[ACTION_MOVE] = THRUST_FORWARD; + if (IsKeyDown(KEY_S)) env.actions[ACTION_MOVE] = BRAKE; + if (IsKeyDown(KEY_A) || IsKeyDown(KEY_LEFT)) env.actions[ACTION_TURN] = TURN_LEFT; + if (IsKeyDown(KEY_D) || IsKeyDown(KEY_RIGHT)) env.actions[ACTION_TURN] = TURN_RIGHT; + env.actions[ACTION_CHIRP_FREQ_START] = 0; + env.actions[ACTION_CHIRP_FREQ_END] = 7; + env.actions[ACTION_CHIRP_DURATION] = 1; + env.actions[ACTION_CHIRP_EMIT] = IsKeyDown(KEY_SPACE) ? 1.0f : 0.0f; c_step(&env); c_render(&env); } diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 649ffd57ab..a20acc79c4 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include @@ -14,6 +13,12 @@ #define OBS_SIZE 41 #define NUM_AGENTS 1 #define NUM_ACTIONS 6 +#define ACTION_MOVE 0 +#define ACTION_TURN 1 +#define ACTION_CHIRP_FREQ_START 2 +#define ACTION_CHIRP_FREQ_END 3 +#define ACTION_CHIRP_DURATION 4 +#define ACTION_CHIRP_EMIT 5 #define MOVE_ACTIONS 3 #define TURN_ACTIONS 3 #define CHIRP_FREQ_BINS 8 @@ -69,7 +74,6 @@ #define CHIRP_RINGS 5 #define MAX_CHIRP_SLICES 16 #define ECHO_QUEUE_TICKS 256 -#define CORNER_REFLECTORS 1 #define AUDIO_VOICES 8 #define AUDIO_SAMPLE_RATE 48000 #define AUDIO_MIN_HZ 600.0f @@ -276,11 +280,6 @@ static inline float bat_clampf(float v, float lo, float hi) { return v; } -static inline int action_index(float v, int n) { - int idx = (int)v; - return idx; -} - static inline float chirp_duration_seconds(float duration_norm) { return 0.04f + 0.18f * duration_norm; } @@ -521,10 +520,6 @@ static inline float curriculum_difficulty(Bat* env) { return bat_clampf(weighted / active_weight, 0.0f, 1.0f); } -static inline float success_reward(Bat* env) { - return env->chirp_efficiency_reward * chirp_efficiency(env); -} - static inline void sample_spawns_at_distance(Bat* env, float target_distance) { float margin = fmaxf(6.0f, fmaxf(AGENT_RADIUS, BUG_RADIUS) + 3.0f); for (int attempt = 0; attempt < 96; attempt++) { @@ -693,11 +688,9 @@ static inline void add_log(Bat* env, float success, float collision, float timeo env->log.n += 1.0f; } -static inline int freq_bin_index(Bat* env, float freq_norm) { - (void)env; - int bins = FREQ_BINS; - int bin = (int)(freq_norm * bins); - if (bin >= bins) bin = bins - 1; +static inline int freq_bin_index(float freq_norm) { + int bin = (int)(freq_norm * FREQ_BINS); + if (bin >= FREQ_BINS) bin = FREQ_BINS - 1; return bin; } @@ -728,7 +721,7 @@ static inline void add_echo_event(Bat* env, int ear, float receive_tick, } int ear_idx = ear == 0 ? 0 : 1; - int bin = freq_bin_index(env, freq); + int bin = freq_bin_index(freq); bucket->energy[ear_idx][bin] += intensity; if (source == ECHO_BUG) { float sideband = intensity * env->bug_wing_sideband_gain; @@ -838,9 +831,8 @@ static inline void schedule_segment_reflectors(Bat* env, ChirpEvent* chirp, float strength) { float len = dist(x1, y1, x2, y2); int count = (int)(len / REFLECTOR_SPACING) + 1; - if (count < 1) count = 1; for (int i = 0; i <= count; i++) { - float t = count == 0 ? 0.0f : i / (float)count; + float t = i / (float)count; float x = x1 + (x2 - x1) * t; float y = y1 + (y2 - y1) * t; schedule_echo(env, chirp, slice_ticks, freq, x, y, 0.0f, 0.0f, strength, ECHO_STATIC); @@ -849,7 +841,6 @@ static inline void schedule_segment_reflectors(Bat* env, ChirpEvent* chirp, static inline void schedule_corner_reflector_echoes(Bat* env, ChirpEvent* chirp, float slice_ticks, float freq) { -#if CORNER_REFLECTORS float w = (float)ARENA_WIDTH; float h = (float)ARENA_HEIGHT; float strength = env->reflector_strength; @@ -869,12 +860,6 @@ static inline void schedule_corner_reflector_echoes(Bat* env, ChirpEvent* chirp, 0.0f, 0.0f, strength, ECHO_STATIC); schedule_echo(env, chirp, slice_ticks, freq, w, 0.5f * h, 0.0f, 0.0f, strength, ECHO_STATIC); -#else - (void)env; - (void)chirp; - (void)slice_ticks; - (void)freq; -#endif } static inline void schedule_obstacle_echoes(Bat* env, ChirpEvent* chirp, @@ -968,11 +953,6 @@ void compute_observations(Bat* env) { process_echo_events(env); - for (int i = 0; i < FREQ_BINS; i++) { - env->observations[LEFT_FREQ_OFFSET + i] = bat_clampf(env->observations[LEFT_FREQ_OFFSET + i], 0.0f, 1.0f); - env->observations[RIGHT_FREQ_OFFSET + i] = bat_clampf(env->observations[RIGHT_FREQ_OFFSET + i], 0.0f, 1.0f); - } - float chirp_age_denom = chirp_age_norm_denominator(env); int chirp_age = env->tick - env->last_chirp_tick; if (env->last_chirp_tick < 0) chirp_age = (int)ceilf(chirp_age_denom); @@ -1130,8 +1110,8 @@ static inline void update_bug(Bat* env, float dt) { } static inline void update_motion(Bat* env, float dt) { - int move = action_index(env->actions[0], MOVE_ACTIONS); - int turn = action_index(env->actions[1], TURN_ACTIONS); + int move = (int)env->actions[ACTION_MOVE]; + int turn = (int)env->actions[ACTION_TURN]; float fx = cosf(env->heading); float fy = sinf(env->heading); float speed = env->vx * fx + env->vy * fy; @@ -1164,9 +1144,9 @@ static inline bool try_emit_chirp(Bat* env) { return false; } - int start_idx = action_index(env->actions[2], CHIRP_FREQ_BINS); - int end_idx = action_index(env->actions[3], CHIRP_FREQ_BINS); - int duration_idx = action_index(env->actions[4], CHIRP_DURATION_BINS); + int start_idx = (int)env->actions[ACTION_CHIRP_FREQ_START]; + int end_idx = (int)env->actions[ACTION_CHIRP_FREQ_END]; + int duration_idx = (int)env->actions[ACTION_CHIRP_DURATION]; env->last_chirp_start_freq = norm_bin(start_idx, CHIRP_FREQ_BINS); env->last_chirp_end_freq = norm_bin(end_idx, CHIRP_FREQ_BINS); @@ -1202,22 +1182,20 @@ static inline float next_chirp_overlap_fraction(Bat* env) { } static inline ChirpStatus update_chirp(Bat* env) { - int emit = action_index(env->actions[5], CHIRP_EMIT_ACTIONS); + int emit = (int)env->actions[ACTION_CHIRP_EMIT]; if (emit) { if (env->chirps_emitted >= env->chirp_budget) { return CHIRP_STATUS_OVER_BUDGET; } return try_emit_chirp(env) ? CHIRP_STATUS_EMITTED : CHIRP_STATUS_COOLDOWN; - } else if (env->chirp_age_ticks < MAX_CHIRP_AGE_TICKS) { + } + + if (env->chirp_age_ticks < MAX_CHIRP_AGE_TICKS) { env->chirp_age_ticks += 1; } return CHIRP_STATUS_NONE; } -static inline bool caught_bug(Bat* env) { - return dist(env->x, env->y, env->bug_x, env->bug_y) <= AGENT_RADIUS + BUG_RADIUS; -} - void c_step(Bat* env) { env->rewards[0] = 0.0f; env->terminals[0] = 0.0f; @@ -1240,8 +1218,8 @@ void c_step(Bat* env) { if (hits_wall(env) || hits_obstacle(env)) { env->rewards[0] = -env->collision_penalty; collision = 1.0f; - } else if (caught_bug(env)) { - env->rewards[0] = success_reward(env); + } else if (dist(env->x, env->y, env->bug_x, env->bug_y) <= AGENT_RADIUS + BUG_RADIUS) { + env->rewards[0] = env->chirp_efficiency_reward * chirp_efficiency(env); success = 1.0f; } else { float bug_dist = dist(env->x, env->y, env->bug_x, env->bug_y); @@ -1408,7 +1386,6 @@ static inline void draw_obstacle_echoes(Bat* env, ChirpEvent* chirp, static inline void draw_corner_reflector_echoes(Bat* env, ChirpEvent* chirp, float sx, float sy) { -#if CORNER_REFLECTORS float w = (float)ARENA_WIDTH; float h = (float)ARENA_HEIGHT; float strength = env->reflector_strength; @@ -1420,16 +1397,9 @@ static inline void draw_corner_reflector_echoes(Bat* env, ChirpEvent* chirp, draw_echo_flash(env, chirp, 0.5f * w, h, 0.0f, 0.0f, strength, sx, sy); draw_echo_flash(env, chirp, 0.0f, 0.5f * h, 0.0f, 0.0f, strength, sx, sy); draw_echo_flash(env, chirp, w, 0.5f * h, 0.0f, 0.0f, strength, sx, sy); -#else - (void)env; - (void)chirp; - (void)sx; - (void)sy; -#endif } static inline void draw_corner_reflector_markers(Bat* env) { -#if CORNER_REFLECTORS const int size = 8; const Color fill = (Color){128, 128, 132, 255}; const Color outline = (Color){202, 202, 208, 255}; @@ -1453,9 +1423,6 @@ static inline void draw_corner_reflector_markers(Bat* env) { DrawRectangleLines(0, mid_y, size, size, outline); DrawRectangle(max_x, mid_y, size, size, fill); DrawRectangleLines(max_x, mid_y, size, size, outline); -#else - (void)env; -#endif } static inline void draw_echo_reflections(Bat* env, float sx, float sy) { diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index 28af0b1ae5..5429b88db5 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -580,7 +580,7 @@ static int test_bug_wing_sidebands_spill_adjacent_bins_without_reward_inflation( env.bug_wing_sideband_gain = 0.25f; clear_echo_queue(&env); - int bin = freq_bin_index(&env, 0.5f); + int bin = freq_bin_index(0.5f); add_echo_event(&env, 0, 1.0f, 0.5f, 0.4f, 12.0f, ECHO_BUG); EchoBucket* bug_bucket = &env.echo_queue[1 % ECHO_QUEUE_TICKS]; ASSERT_FLOAT_NEAR(bug_bucket->energy[0][bin], 0.4f, 0.0001f); @@ -1567,8 +1567,8 @@ static int test_frequency_bin_energy_sums_and_caps(void) { Bat env = make_test_env(); memset(env.observations, 0, OBS_SIZE * sizeof(float)); - int high_bin = freq_bin_index(&env, 1.0f); - int low_bin = freq_bin_index(&env, 0.0f); + int high_bin = freq_bin_index(1.0f); + int low_bin = freq_bin_index(0.0f); env.observations[LEFT_FREQ_OFFSET + high_bin] = bat_clampf( env.observations[LEFT_FREQ_OFFSET + high_bin] + 0.75f, 0.0f, 1.0f); env.observations[LEFT_FREQ_OFFSET + high_bin] = bat_clampf( From 57ec5f10401f957526926f30f3b0b69f05b376cd Mon Sep 17 00:00:00 2001 From: Kinvert Date: Fri, 12 Jun 2026 09:23:00 -0700 Subject: [PATCH 42/51] Simplify bat curriculum and echo helpers --- config/bat.ini | 16 ++--- ocean/bat/bat.c | 8 +-- ocean/bat/bat.h | 100 ++++++++++++-------------------- ocean/bat/binding.c | 8 +-- ocean/bat/tests/test_bat_core.c | 11 ++-- 5 files changed, 60 insertions(+), 83 deletions(-) diff --git a/config/bat.ini b/config/bat.ini index 0d2ded03cf..b66260d9cd 100644 --- a/config/bat.ini +++ b/config/bat.ini @@ -17,10 +17,10 @@ encoder = DefaultEncoder decoder = DefaultDecoder [env] -bat_max_speed = 15.498233877318418 -bat_min_speed = 2.6389946132676654 -bat_accel = 53.02330161128345 -bat_turn_rate = 8.371655963408276 +max_speed = 15.498233877318418 +min_speed = 2.6389946132676654 +accel = 53.02330161128345 +turn_rate = 8.371655963408276 render_target_fps = 60 record_video = 0 record_video_fps = 30 @@ -144,25 +144,25 @@ min = 4 max = 8 scale = auto -[sweep.env.bat_max_speed] +[sweep.env.max_speed] distribution = uniform min = 8.0 max = 30.0 scale = auto -[sweep.env.bat_min_speed] +[sweep.env.min_speed] distribution = uniform min = 2.0 max = 6.0 scale = auto -[sweep.env.bat_accel] +[sweep.env.accel] distribution = uniform min = 40.0 max = 90.0 scale = auto -[sweep.env.bat_turn_rate] +[sweep.env.turn_rate] distribution = uniform min = 4.0 max = 9.4247780 diff --git a/ocean/bat/bat.c b/ocean/bat/bat.c index f6f7219dae..cfff6a9064 100644 --- a/ocean/bat/bat.c +++ b/ocean/bat/bat.c @@ -50,10 +50,10 @@ static void set_demo_defaults(Bat* env) { } static void apply_env_config_value(Bat* env, const char* key, float value) { - if (strcmp(key, "bat_max_speed") == 0) env->max_speed = value; - else if (strcmp(key, "bat_min_speed") == 0) env->min_speed = value; - else if (strcmp(key, "bat_accel") == 0) env->accel = value; - else if (strcmp(key, "bat_turn_rate") == 0) env->turn_rate = value; + if (strcmp(key, "max_speed") == 0) env->max_speed = value; + else if (strcmp(key, "min_speed") == 0) env->min_speed = value; + else if (strcmp(key, "accel") == 0) env->accel = value; + else if (strcmp(key, "turn_rate") == 0) env->turn_rate = value; else if (strcmp(key, "render_target_fps") == 0) env->render_target_fps = (int)value; else if (strcmp(key, "record_video") == 0) env->record_video = (int)value; else if (strcmp(key, "record_video_fps") == 0) env->record_video_fps = (int)value; diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index a20acc79c4..b7873a6455 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -36,6 +36,7 @@ #define CHIRPS_USED_OBS 37 #define FORWARD_SPEED_OBS 38 #define TURN_RATE_OBS 39 +#define TIMER_OBS 40 #define NOOP 0 #define THRUST_FORWARD 1 @@ -45,7 +46,6 @@ #define TURN_LEFT 1 #define TURN_RIGHT 2 -#define MAX_OBSTACLES 16 #define MAX_STEPS 512 #define TICK_RATE (1.0f/60.0f) #define ARENA_WIDTH 64 @@ -81,7 +81,8 @@ #define AUDIO_VOLUME 0.22f #define RECORD_MAX_VOICES 16 #define CHIRP_PERF_FLOOR 0.05f -#define CHIRP_COST 0.0f +#define CHIRP_MIN_DURATION_SECONDS 0.04f +#define CHIRP_DURATION_RANGE_SECONDS 0.18f #define MAX_CHIRP_AGE_TICKS 30 #define MAX_CHIRPS_PER_EPISODE 15 @@ -105,7 +106,7 @@ typedef struct ChirpEvent { typedef struct EchoBucket { float energy[2][FREQ_BINS]; float bug_energy; - float bug_path; + float closest_bug_echo_path; int tick; } EchoBucket; @@ -281,7 +282,7 @@ static inline float bat_clampf(float v, float lo, float hi) { } static inline float chirp_duration_seconds(float duration_norm) { - return 0.04f + 0.18f * duration_norm; + return CHIRP_MIN_DURATION_SECONDS + CHIRP_DURATION_RANGE_SECONDS * duration_norm; } #include "bat_audio.h" @@ -319,10 +320,6 @@ static inline void chirp_source_for_fraction(ChirpEvent* chirp, float slice, chirp_source_for_slice(chirp, slice_idx, source_x, source_y); } -static inline float echo_time_seconds(float distance, float sound_speed) { - return 2.0f * distance / sound_speed; -} - static inline float chirp_age_norm_denominator(Bat* env) { float travel_ticks = MAX_ECHO_RANGE / env->sound_speed / TICK_RATE; float chirp_ticks = chirp_duration_seconds(1.0f) / TICK_RATE; @@ -408,7 +405,6 @@ static inline int curriculum_obstacles(Bat* env) { count = CURRICULUM_START_OBSTACLES + 1 + (env->curriculum_level - 1) / step; } if (count > CURRICULUM_MAX_OBSTACLES) count = CURRICULUM_MAX_OBSTACLES; - if (count > MAX_OBSTACLES) count = MAX_OBSTACLES; return count; } @@ -419,10 +415,6 @@ static inline float curriculum_bug_distance(Bat* env) { CURRICULUM_MAX_BUG_DISTANCE); } -static inline bool curriculum_inbound_enabled(Bat* env) { - return env->curriculum_level >= CURRICULUM_INBOUND_START_LEVEL; -} - static inline float curriculum_inbound_bug_distance(Bat* env) { float base = CURRICULUM_MAX_BUG_DISTANCE; int extra_levels = env->curriculum_level - CURRICULUM_INBOUND_START_LEVEL + 1; @@ -430,21 +422,6 @@ static inline float curriculum_inbound_bug_distance(Bat* env) { return bat_clampf(distance, base, CURRICULUM_INBOUND_MAX_BUG_DISTANCE); } -static inline float curriculum_spawn_distance(Bat* env) { - if (curriculum_inbound_enabled(env)) { - return curriculum_inbound_bug_distance(env); - } - return curriculum_bug_distance(env); -} - -static inline float curriculum_bug_speed(Bat* env) { - float speed = BUG_SPEED; - if (curriculum_inbound_enabled(env)) { - speed *= INBOUND_BUG_SPEED_MULTIPLIER; - } - return speed; -} - static inline float curriculum_bug_maneuver_strength(Bat* env) { if (env->curriculum_level < BUG_MANEUVER_START_LEVEL) return 0.0f; int extra_levels = env->curriculum_level - BUG_MANEUVER_START_LEVEL; @@ -480,6 +457,8 @@ static inline float norm_range(float value, float lo, float hi) { return bat_clampf((value - lo) / span, 0.0f, 1.0f); } +// TODO: Revisit whether these curriculum difficulty diagnostics are worth logging; +// they add a lot of code and may be removable before merge. static inline float curriculum_distance_difficulty(Bat* env) { float max_distance = fmaxf(CURRICULUM_MAX_BUG_DISTANCE, CURRICULUM_INBOUND_MAX_BUG_DISTANCE); @@ -542,14 +521,7 @@ static inline void sample_spawns_at_distance(Bat* env, float target_distance) { sample_spawns(env); } -static inline void set_bug_velocity(Bat* env, float heading, float speed) { - env->bug_base_heading = heading; - env->bug_vx = cosf(heading) * speed; - env->bug_vy = sinf(heading) * speed; -} - static inline void reset_bug_motion(Bat* env) { - env->bug_inbound = curriculum_inbound_enabled(env) ? 1 : 0; float strength = curriculum_bug_maneuver_strength(env); env->bug_maneuver_mode = strength > 0.000001f ? 1 + (int)(rng_next(env) % 3u) : 0; env->bug_maneuver_phase = randf(env) * TWO_PI; @@ -557,21 +529,19 @@ static inline void reset_bug_motion(Bat* env) { (0.75f + 0.50f * randf(env)); env->bug_maneuver_sign = (rng_next(env) & 1u) ? -1.0f : 1.0f; - float speed = curriculum_bug_speed(env); + float speed = env->bug_inbound ? BUG_SPEED * INBOUND_BUG_SPEED_MULTIPLIER : BUG_SPEED; + float heading; if (env->bug_inbound) { float tx, ty; norm_vec(env->x - env->bug_x, env->y - env->bug_y, &tx, &ty); float noise = INBOUND_HEADING_NOISE_DEGREES * (PI_F / 180.0f); - float heading = atan2f(ty, tx) + (2.0f * randf(env) - 1.0f) * noise; - set_bug_velocity(env, heading, speed); + heading = atan2f(ty, tx) + (2.0f * randf(env) - 1.0f) * noise; } else { - float heading = randf(env) * TWO_PI - PI_F; - set_bug_velocity(env, heading, speed); + heading = randf(env) * TWO_PI - PI_F; } -} - -static inline void apply_curriculum(Bat* env) { - env->num_obstacles = curriculum_obstacles(env); + env->bug_base_heading = heading; + env->bug_vx = cosf(heading) * speed; + env->bug_vy = sinf(heading) * speed; } static inline void advance_curriculum(Bat* env) { @@ -582,6 +552,9 @@ static inline void advance_curriculum(Bat* env) { } } +// TODO: Revisit this when we are ready to break reset determinism. If overlapping +// random obstacles are acceptable, remove rects_overlap(), obstacle_clear(), and +// the attempt loop/fallback placement in generate_obstacles(). static inline bool obstacle_clear(Bat* env, int idx, float x, float y, float w, float h) { if (circle_rect_collision(env->x, env->y, AGENT_RADIUS + 2.0f, x, y, w, h)) { @@ -632,10 +605,10 @@ static inline void generate_obstacles(Bat* env) { void init(Bat* env) { env->tick = 0; - env->obstacle_x = (float*)calloc(MAX_OBSTACLES, sizeof(float)); - env->obstacle_y = (float*)calloc(MAX_OBSTACLES, sizeof(float)); - env->obstacle_w = (float*)calloc(MAX_OBSTACLES, sizeof(float)); - env->obstacle_h = (float*)calloc(MAX_OBSTACLES, sizeof(float)); + env->obstacle_x = (float*)calloc(CURRICULUM_MAX_OBSTACLES, sizeof(float)); + env->obstacle_y = (float*)calloc(CURRICULUM_MAX_OBSTACLES, sizeof(float)); + env->obstacle_w = (float*)calloc(CURRICULUM_MAX_OBSTACLES, sizeof(float)); + env->obstacle_h = (float*)calloc(CURRICULUM_MAX_OBSTACLES, sizeof(float)); } void allocate(Bat* env) { @@ -696,7 +669,7 @@ static inline int freq_bin_index(float freq_norm) { static inline void clear_echo_bucket(EchoBucket* bucket) { memset(bucket, 0, sizeof(*bucket)); - bucket->bug_path = -1.0f; + bucket->closest_bug_echo_path = -1.0f; bucket->tick = -1; } @@ -730,8 +703,8 @@ static inline void add_echo_event(Bat* env, int ear, float receive_tick, if (bin + 1 < FREQ_BINS) bucket->energy[ear_idx][bin + 1] += sideband; } bucket->bug_energy += intensity; - if (bucket->bug_path < 0.0f || path < bucket->bug_path) { - bucket->bug_path = path; + if (bucket->closest_bug_echo_path < 0.0f || path < bucket->closest_bug_echo_path) { + bucket->closest_bug_echo_path = path; } } } @@ -939,8 +912,9 @@ static inline void process_echo_events(Bat* env) { } if (bucket->bug_energy > 0.0f) { env->tick_bug_echo_energy += bucket->bug_energy; - if (env->tick_bug_echo_path < 0.0f || bucket->bug_path < env->tick_bug_echo_path) { - env->tick_bug_echo_path = bucket->bug_path; + if (env->tick_bug_echo_path < 0.0f + || bucket->closest_bug_echo_path < env->tick_bug_echo_path) { + env->tick_bug_echo_path = bucket->closest_bug_echo_path; } } clear_echo_bucket(bucket); @@ -968,21 +942,24 @@ void compute_observations(Bat* env) { env->observations[FORWARD_SPEED_OBS] = bat_clampf(fwd_speed / env->max_speed, 0.0f, 1.0f); env->observations[TURN_RATE_OBS] = bat_clampf(env->turn_velocity / env->turn_rate, -1.0f, 1.0f); float timer_norm = env->tick / (float)MAX_STEPS; - env->observations[40] = bat_clampf(timer_norm, 0.0f, 1.0f); + env->observations[TIMER_OBS] = bat_clampf(timer_norm, 0.0f, 1.0f); } static inline void reset_episode(Bat* env) { env->tick = 0; env->turn_velocity = 0.0f; env->heading = randf(env) * TWO_PI - PI_F; - float initial_speed = env->min_speed; - env->vx = cosf(env->heading) * initial_speed; - env->vy = sinf(env->heading) * initial_speed; + env->vx = cosf(env->heading) * env->min_speed; + env->vy = sinf(env->heading) * env->min_speed; if (env->curriculum_level < env->curriculum_initial_level) { env->curriculum_level = env->curriculum_initial_level; } - apply_curriculum(env); - sample_spawns_at_distance(env, curriculum_spawn_distance(env)); + env->num_obstacles = curriculum_obstacles(env); + env->bug_inbound = env->curriculum_level >= CURRICULUM_INBOUND_START_LEVEL; + float bug_distance = env->bug_inbound + ? curriculum_inbound_bug_distance(env) + : curriculum_bug_distance(env); + sample_spawns_at_distance(env, bug_distance); generate_obstacles(env); reset_bug_motion(env); env->last_chirp_start_freq = 0.0f; @@ -1032,7 +1009,7 @@ static inline bool hits_wall(Bat* env) { } static inline void update_bug(Bat* env, float dt) { - float speed = curriculum_bug_speed(env); + float speed = env->bug_inbound ? BUG_SPEED * INBOUND_BUG_SPEED_MULTIPLIER : BUG_SPEED; float strength = curriculum_bug_maneuver_strength(env); if (env->bug_maneuver_mode > 0) { env->bug_maneuver_phase += env->bug_maneuver_rate * dt; @@ -1228,7 +1205,6 @@ void c_step(Bat* env) { env->rewards[0] -= env->step_cost; // TODO: Fold this only when we are ready to break training determinism. if (chirp_status == CHIRP_STATUS_EMITTED) { env->rewards[0] += env->valid_chirp_reward; // TODO: Remove this; chirps should only pay when bug echoes improve. - env->rewards[0] -= CHIRP_COST; if (chirp_overlap_fraction > 0.0f) { env->rewards[0] -= env->chirp_overlap_penalty * chirp_overlap_fraction; env->chirps_overlapped += 1; @@ -1338,7 +1314,7 @@ static inline void draw_echo_flash(Bat* env, ChirpEvent* chirp, float sx, float sy) { float age_seconds = (env->tick - chirp->birth_tick) * TICK_RATE; float distance = dist(chirp->x, chirp->y, rx, ry); - float echo_time = echo_time_seconds(distance, env->sound_speed); + float echo_time = 2.0f * distance / env->sound_speed; bool echo_arriving_now = fabsf(age_seconds - echo_time) <= 0.025f; if (!echo_arriving_now) return; diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index 4ebd90c18f..e5e3d15909 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -12,10 +12,10 @@ void my_init(Env* env, Dict* kwargs) { env->ear_rear_gain = dict_get(kwargs, "ear_rear_gain")->value; env->ear_front_gain = dict_get(kwargs, "ear_front_gain")->value; env->ear_side_gain = dict_get(kwargs, "ear_side_gain")->value; - env->max_speed = dict_get(kwargs, "bat_max_speed")->value; - env->min_speed = dict_get(kwargs, "bat_min_speed")->value; - env->accel = dict_get(kwargs, "bat_accel")->value; - env->turn_rate = dict_get(kwargs, "bat_turn_rate")->value; + env->max_speed = dict_get(kwargs, "max_speed")->value; + env->min_speed = dict_get(kwargs, "min_speed")->value; + env->accel = dict_get(kwargs, "accel")->value; + env->turn_rate = dict_get(kwargs, "turn_rate")->value; env->render_target_fps = dict_get(kwargs, "render_target_fps")->value; env->record_video = dict_get(kwargs, "record_video")->value; env->record_video_fps = dict_get(kwargs, "record_video_fps")->value; diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index 5429b88db5..982f799e8a 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -144,18 +144,18 @@ static int test_timer_observation_tracks_elapsed_fraction(void) { c_reset(&env); ASSERT_TRUE(OBS_SIZE == 41); - ASSERT_FLOAT_NEAR(env.observations[40], 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.observations[TIMER_OBS], 0.0f, 0.0001f); env.actions[0] = NOOP; env.actions[1] = TURN_NONE; env.actions[5] = 0.0f; c_step(&env); - ASSERT_FLOAT_NEAR(env.observations[40], 1.0f / (float)MAX_STEPS, 0.0001f); + ASSERT_FLOAT_NEAR(env.observations[TIMER_OBS], 1.0f / (float)MAX_STEPS, 0.0001f); env.tick = MAX_STEPS / 2; compute_observations(&env); - ASSERT_FLOAT_NEAR(env.observations[40], 0.5f, 0.0001f); + ASSERT_FLOAT_NEAR(env.observations[TIMER_OBS], 0.5f, 0.0001f); free_allocated(&env); return 0; @@ -566,7 +566,7 @@ static int test_echo_scheduling_uses_tick_bucket_accumulator(void) { ASSERT_TRUE(env.echo_queue[slot].tick == 10); ASSERT_FLOAT_NEAR(env.echo_queue[slot].energy[0][FREQ_BINS - 1], 1.1f, 0.0001f); ASSERT_FLOAT_NEAR(env.echo_queue[slot].bug_energy, 1.1f, 0.0001f); - ASSERT_FLOAT_NEAR(env.echo_queue[slot].bug_path, 12.0f, 0.0001f); + ASSERT_FLOAT_NEAR(env.echo_queue[slot].closest_bug_echo_path, 12.0f, 0.0001f); free_allocated(&env); return 0; @@ -1144,7 +1144,7 @@ static int test_chirp_after_bug_echo_arrives_ignores_static_echo_window(void) { static int test_reflection_arrives_at_two_way_travel_time(void) { float sound_speed = 100.0f; float distance = 25.0f; - float echo_time = echo_time_seconds(distance, sound_speed); + float echo_time = 2.0f * distance / sound_speed; ASSERT_FLOAT_NEAR(echo_time, 0.5f, 0.0001f); ASSERT_TRUE(fabsf((echo_time + 0.005f) - echo_time) <= 0.02f); @@ -1174,6 +1174,7 @@ static int test_bins_only_observation_layout(void) { ASSERT_TRUE(CHIRPS_USED_OBS == 37); ASSERT_TRUE(FORWARD_SPEED_OBS == 38); ASSERT_TRUE(TURN_RATE_OBS == 39); + ASSERT_TRUE(TIMER_OBS == 40); return 0; } From ffea3183ebf694095f7ab5761a8cd17d940e06ba Mon Sep 17 00:00:00 2001 From: Kinvert Date: Fri, 12 Jun 2026 09:30:58 -0700 Subject: [PATCH 43/51] Remove stale bat chirp budget field --- ocean/bat/bat.h | 8 ++++---- ocean/bat/tests/test_bat_core.c | 29 +++++++++++++---------------- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index b7873a6455..08c18d3a51 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -230,7 +230,6 @@ typedef struct Bat { float sound_speed; float reflector_strength; int chirp_cooldown_ticks; - int chirp_budget; int chirp_age_ticks; int last_chirp_tick; float last_chirp_start_freq; @@ -439,9 +438,11 @@ static inline float curriculum_bug_maneuver_frequency(Bat* env) { } static inline float chirps_used_ratio(Bat* env) { - return bat_clampf(env->chirps_emitted / (float)env->chirp_budget, 0.0f, 1.0f); + return bat_clampf(env->chirps_emitted / (float)MAX_CHIRPS_PER_EPISODE, 0.0f, 1.0f); } +// TODO: Revisit this when we are ready to break reward determinism. The ratio is +// still an observation, but this reward bonus may be removable before merge. static inline float chirp_efficiency(Bat* env) { return 0.5f + 0.5f * (1.0f - chirps_used_ratio(env)); } @@ -970,7 +971,6 @@ static inline void reset_episode(Bat* env) { memset(env->chirps, 0, sizeof(env->chirps)); env->chirp_head = 0; clear_echo_queue(env); - env->chirp_budget = MAX_CHIRPS_PER_EPISODE; env->tick_bug_echo_energy = 0.0f; env->tick_bug_echo_path = -1.0f; env->last_bug_echo_path = -1.0f; @@ -1161,7 +1161,7 @@ static inline float next_chirp_overlap_fraction(Bat* env) { static inline ChirpStatus update_chirp(Bat* env) { int emit = (int)env->actions[ACTION_CHIRP_EMIT]; if (emit) { - if (env->chirps_emitted >= env->chirp_budget) { + if (env->chirps_emitted >= MAX_CHIRPS_PER_EPISODE) { return CHIRP_STATUS_OVER_BUDGET; } return try_emit_chirp(env) ? CHIRP_STATUS_EMITTED : CHIRP_STATUS_COOLDOWN; diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index 982f799e8a..72d6815f9d 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -75,11 +75,10 @@ static int test_chirp_metadata_and_observation_size(void) { return 0; } -static int test_chirp_budget_observation_tracks_used_chirps(void) { +static int test_chirps_used_observation_tracks_emitted_chirps(void) { Bat env = make_test_env(); c_reset(&env); - ASSERT_TRUE(env.chirp_budget == MAX_CHIRPS_PER_EPISODE); ASSERT_FLOAT_NEAR(env.observations[CHIRPS_USED_OBS], 0.0f, 0.0001f); env.actions[2] = 0.0f; @@ -100,13 +99,16 @@ static int test_chirp_budget_observation_tracks_used_chirps(void) { return 0; } -static int test_chirp_budget_stays_fixed_with_curriculum_level(void) { +static int test_max_chirps_stays_fixed_with_curriculum_level(void) { Bat env = make_test_env(); env.curriculum_initial_level = 8; c_reset(&env); ASSERT_TRUE(env.curriculum_level == 8); - ASSERT_TRUE(env.chirp_budget == MAX_CHIRPS_PER_EPISODE); + env.chirps_emitted = 1; + compute_observations(&env); + ASSERT_FLOAT_NEAR(env.observations[CHIRPS_USED_OBS], + 1.0f / (float)MAX_CHIRPS_PER_EPISODE, 0.0001f); free_allocated(&env); return 0; @@ -117,7 +119,7 @@ static int test_chirping_after_budget_terminates_with_penalty(void) { env.chirp_cooldown_ticks = 5; env.early_chirp_penalty = 0.0f; c_reset(&env); - env.chirp_budget = 1; + env.chirps_emitted = MAX_CHIRPS_PER_EPISODE - 1; compute_observations(&env); env.actions[2] = 0.0f; @@ -126,7 +128,7 @@ static int test_chirping_after_budget_terminates_with_penalty(void) { env.actions[5] = 1.0f; c_step(&env); ASSERT_TRUE(env.terminals[0] == 0.0f); - ASSERT_TRUE(env.chirps_emitted == 1); + ASSERT_TRUE(env.chirps_emitted == MAX_CHIRPS_PER_EPISODE); ASSERT_FLOAT_NEAR(env.observations[CHIRPS_USED_OBS], 1.0f, 0.0001f); c_step(&env); @@ -186,11 +188,10 @@ static int test_chirp_efficiency_scores_low_usage_above_full_budget(void) { Bat env = make_test_env(); c_reset(&env); - env.chirp_budget = 10; env.chirps_emitted = 1; - ASSERT_FLOAT_NEAR(chirp_efficiency(&env), 0.95f, 0.0001f); + ASSERT_FLOAT_NEAR(chirp_efficiency(&env), 0.9666667f, 0.0001f); - env.chirps_emitted = 10; + env.chirps_emitted = MAX_CHIRPS_PER_EPISODE; ASSERT_FLOAT_NEAR(chirp_efficiency(&env), 0.50f, 0.0001f); free_allocated(&env); @@ -225,7 +226,6 @@ static int test_success_reward_includes_chirp_efficiency_bonus(void) { env.chirp_efficiency_reward = 1.0f; c_reset(&env); - env.chirp_budget = 10; env.chirps_emitted = 2; env.x = 20.0f; env.y = 20.0f; @@ -235,7 +235,7 @@ static int test_success_reward_includes_chirp_efficiency_bonus(void) { c_step(&env); ASSERT_FLOAT_NEAR(env.terminals[0], 1.0f, 0.0001f); - ASSERT_FLOAT_NEAR(env.rewards[0], 0.90f, 0.0001f); + ASSERT_FLOAT_NEAR(env.rewards[0], 0.9333333f, 0.0001f); free_allocated(&env); return 0; @@ -247,7 +247,6 @@ static int test_curriculum_perf_logs_distance_and_obstacle_difficulty_components env.curriculum_start_bug_distance = 8.0f; env.num_obstacles = 2; - env.chirp_budget = 12; env.start_bug_dist = 32.0f; ASSERT_FLOAT_NEAR(curriculum_distance_difficulty(&env), 0.5000000f, 0.0001f); @@ -279,7 +278,6 @@ static int test_perf_composes_base_perf_curriculum_difficulty_and_chirp_perf(voi env.curriculum_start_bug_distance = 8.0f; env.num_obstacles = 2; - env.chirp_budget = 14; env.chirps_emitted = 7; env.start_bug_dist = 32.0f; @@ -1123,7 +1121,6 @@ static int test_chirp_after_bug_echo_arrives_ignores_static_echo_window(void) { env.valid_chirp_reward = 0.0005f; env.chirp_overlap_penalty = 0.0040f; env.chirp_cooldown_ticks = 1; - env.chirp_budget = 10; env.chirps_emitted = 1; env.last_chirp_tick = 0; env.last_bug_echo_expected_tick = 3.0f; @@ -1793,8 +1790,8 @@ static int test_obstacles_are_small_enough_for_trainability(void) { int main(void) { if (test_chirp_metadata_and_observation_size()) return 1; - if (test_chirp_budget_observation_tracks_used_chirps()) return 1; - if (test_chirp_budget_stays_fixed_with_curriculum_level()) return 1; + if (test_chirps_used_observation_tracks_emitted_chirps()) return 1; + if (test_max_chirps_stays_fixed_with_curriculum_level()) return 1; if (test_chirping_after_budget_terminates_with_penalty()) return 1; if (test_timer_observation_tracks_elapsed_fraction()) return 1; if (test_timeout_terminates_with_minus_one_reward()) return 1; From 7967d4428e0a998eaf344ca60063e9f3f39ac0ec Mon Sep 17 00:00:00 2001 From: Kinvert Date: Fri, 12 Jun 2026 09:35:52 -0700 Subject: [PATCH 44/51] Trim bat curriculum component logs --- ocean/bat/bat.h | 9 --------- ocean/bat/binding.c | 3 --- ocean/bat/tests/test_bat_core.c | 7 ++----- 3 files changed, 2 insertions(+), 17 deletions(-) diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 08c18d3a51..19610a66c7 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -129,9 +129,6 @@ typedef struct Log { float curriculum_level; float curriculum_difficulty; float curriculum_perf; - float curriculum_distance_difficulty; - float curriculum_obstacle_difficulty; - float curriculum_motion_difficulty; float num_obstacles; float chirps_emitted; float chirp_perf; @@ -637,9 +634,6 @@ void free_allocated(Bat* env) { static inline void add_log(Bat* env, float success, float collision, float timeout) { float curriculum_difficulty_value = curriculum_difficulty(env); - float distance_difficulty = curriculum_distance_difficulty(env); - float obstacle_difficulty = curriculum_obstacle_difficulty(env); - float motion_difficulty = curriculum_motion_difficulty(env); float chirp_perf_value = chirp_perf(env); env->log.perf += success * curriculum_difficulty_value * chirp_perf_value; env->log.base_perf += success; @@ -651,9 +645,6 @@ static inline void add_log(Bat* env, float success, float collision, float timeo env->log.curriculum_level += env->curriculum_level; env->log.curriculum_difficulty += curriculum_difficulty_value; env->log.curriculum_perf += success * curriculum_difficulty_value; - env->log.curriculum_distance_difficulty += distance_difficulty; - env->log.curriculum_obstacle_difficulty += obstacle_difficulty; - env->log.curriculum_motion_difficulty += motion_difficulty; env->log.num_obstacles += env->num_obstacles; env->log.chirps_emitted += env->chirps_emitted; env->log.chirp_perf += chirp_perf_value; diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index e5e3d15909..ceb9769e28 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -52,9 +52,6 @@ void my_log(Log* log, Dict* out) { dict_set(out, "curriculum_level", log->curriculum_level); dict_set(out, "curriculum_difficulty", log->curriculum_difficulty); dict_set(out, "curriculum_perf", log->curriculum_perf); - dict_set(out, "curriculum_distance_difficulty", log->curriculum_distance_difficulty); - dict_set(out, "curriculum_obstacle_difficulty", log->curriculum_obstacle_difficulty); - dict_set(out, "curriculum_motion_difficulty", log->curriculum_motion_difficulty); dict_set(out, "num_obstacles", log->num_obstacles); dict_set(out, "chirps_emitted", log->chirps_emitted); dict_set(out, "chirp_perf", log->chirp_perf); diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index 72d6815f9d..bad8d41c62 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -241,7 +241,7 @@ static int test_success_reward_includes_chirp_efficiency_bonus(void) { return 0; } -static int test_curriculum_perf_logs_distance_and_obstacle_difficulty_components(void) { +static int test_curriculum_perf_uses_distance_and_obstacle_difficulty(void) { Bat env = make_test_env(); c_reset(&env); @@ -255,9 +255,6 @@ static int test_curriculum_perf_logs_distance_and_obstacle_difficulty_components ASSERT_FLOAT_NEAR(curriculum_difficulty(&env), 0.3888889f, 0.0001f); add_log(&env, 1.0f, 0.0f, 0.0f); ASSERT_FLOAT_NEAR(env.log.base_perf, 1.0f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.curriculum_distance_difficulty, 0.5000000f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.curriculum_obstacle_difficulty, 0.6666667f, 0.0001f); - ASSERT_FLOAT_NEAR(env.log.curriculum_motion_difficulty, 0.0000000f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.curriculum_difficulty, 0.3888889f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.curriculum_perf, 0.3888889f, 0.0001f); ASSERT_FLOAT_NEAR(env.log.num_obstacles, 2.0f, 0.0001f); @@ -1798,7 +1795,7 @@ int main(void) { if (test_chirp_efficiency_scores_low_usage_above_full_budget()) return 1; if (test_chirp_perf_uses_fixed_fifteen_chirp_reference()) return 1; if (test_success_reward_includes_chirp_efficiency_bonus()) return 1; - if (test_curriculum_perf_logs_distance_and_obstacle_difficulty_components()) return 1; + if (test_curriculum_perf_uses_distance_and_obstacle_difficulty()) return 1; if (test_perf_composes_base_perf_curriculum_difficulty_and_chirp_perf()) return 1; if (test_left_right_echo_asymmetry()) return 1; if (test_directional_echo_arrival_and_gain_by_side()) return 1; From fa5915915a3841af0e5575896afac9103cd0de8a Mon Sep 17 00:00:00 2001 From: Kinvert Date: Fri, 12 Jun 2026 09:39:44 -0700 Subject: [PATCH 45/51] Simplify fixed chirp perf reference --- ocean/bat/bat.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 19610a66c7..c803c0fd46 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -445,8 +445,7 @@ static inline float chirp_efficiency(Bat* env) { } static inline float chirp_perf(Bat* env) { - float reference_chirps = fmaxf(1.0f, (float)MAX_CHIRPS_PER_EPISODE); - float raw = 1.0f - env->chirps_emitted / reference_chirps; + float raw = 1.0f - env->chirps_emitted / (float)MAX_CHIRPS_PER_EPISODE; return bat_clampf(raw, CHIRP_PERF_FLOOR, 1.0f); } From eb24a21df08967e7783a413354394330e5c81717 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Fri, 12 Jun 2026 09:51:07 -0700 Subject: [PATCH 46/51] Remove impossible bat max speed guard --- ocean/bat/bat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index c803c0fd46..4290437769 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -1092,7 +1092,7 @@ static inline void update_motion(Bat* env, float dt) { float turn_command = 0.0f; if (turn == TURN_LEFT) turn_command = -1.0f; if (turn == TURN_RIGHT) turn_command = 1.0f; - float speed_ratio = env->max_speed > 0.0f ? speed / env->max_speed : 0.0f; + float speed_ratio = speed / env->max_speed; env->turn_velocity = turn_command * env->turn_rate * bat_clampf(speed_ratio, 0.0f, 1.0f); env->heading += env->turn_velocity * dt; if (env->heading > PI_F) env->heading -= TWO_PI; From 3fd1f0fe0e37f742dafa61da60d617f779fcc136 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Fri, 12 Jun 2026 09:54:24 -0700 Subject: [PATCH 47/51] Remove dead bat echo energy accumulator --- ocean/bat/bat.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 4290437769..c5bc2d6004 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -248,7 +248,6 @@ typedef struct Bat { float bug_echo_reward_scale; float bug_echo_farther_penalty_scale; float bug_wing_sideband_gain; - float tick_bug_echo_energy; float tick_bug_echo_path; float last_bug_echo_path; float last_bug_echo_expected_tick; @@ -902,7 +901,6 @@ static inline void process_echo_events(Bat* env) { env->observations[right_idx] + bucket->energy[1][i], 0.0f, 1.0f); } if (bucket->bug_energy > 0.0f) { - env->tick_bug_echo_energy += bucket->bug_energy; if (env->tick_bug_echo_path < 0.0f || bucket->closest_bug_echo_path < env->tick_bug_echo_path) { env->tick_bug_echo_path = bucket->closest_bug_echo_path; @@ -913,7 +911,6 @@ static inline void process_echo_events(Bat* env) { void compute_observations(Bat* env) { memset(env->observations, 0, OBS_SIZE * sizeof(float)); - env->tick_bug_echo_energy = 0.0f; env->tick_bug_echo_path = -1.0f; process_echo_events(env); @@ -961,7 +958,6 @@ static inline void reset_episode(Bat* env) { memset(env->chirps, 0, sizeof(env->chirps)); env->chirp_head = 0; clear_echo_queue(env); - env->tick_bug_echo_energy = 0.0f; env->tick_bug_echo_path = -1.0f; env->last_bug_echo_path = -1.0f; env->last_bug_echo_expected_tick = -1.0f; From 426be02f9437bf598809d70db70f764f678e8788 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Fri, 12 Jun 2026 12:00:07 -0700 Subject: [PATCH 48/51] Clean up bat environment internals --- ocean/bat/bat.h | 190 ++++++++++++-------------------- ocean/bat/bat_audio.h | 7 +- ocean/bat/binding.c | 1 - ocean/bat/tests/test_bat_core.c | 18 --- 4 files changed, 72 insertions(+), 144 deletions(-) diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index c5bc2d6004..92922d119d 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -52,6 +52,7 @@ #define ARENA_HEIGHT 64 #define AGENT_RADIUS 2.0f #define BUG_RADIUS 1.5f +#define SPAWN_MARGIN 6.0f #define BUG_SPEED 4.0f #define BUG_MANEUVER_START_LEVEL 7 #define BUG_MANEUVER_STRENGTH 0.4f @@ -60,6 +61,7 @@ #define INBOUND_HEADING_NOISE_DEGREES 18.0f #define REFLECTOR_SPACING 8.0f #define MAX_ECHO_RANGE 128.0f +#define ECHO_MIN_FORWARD -0.35f #define BUG_ECHO_MIN_DISPLACEMENT 1.0f #define CURRICULUM_START_OBSTACLES 0 #define CURRICULUM_MAX_OBSTACLES 3 @@ -79,6 +81,7 @@ #define AUDIO_MIN_HZ 600.0f #define AUDIO_MAX_HZ 3600.0f #define AUDIO_VOLUME 0.22f +#define AUDIO_ENVELOPE_FADE 0.08f #define RECORD_MAX_VOICES 16 #define CHIRP_PERF_FLOOR 0.05f #define CHIRP_MIN_DURATION_SECONDS 0.04f @@ -132,7 +135,6 @@ typedef struct Log { float num_obstacles; float chirps_emitted; float chirp_perf; - float chirp_overlap_fraction; float n; } Log; @@ -237,7 +239,6 @@ typedef struct Bat { EchoBucket echo_queue[ECHO_QUEUE_TICKS]; int chirps_emitted; int audio_chirp_serial; - int chirps_overlapped; float chirp_efficiency_reward; float valid_chirp_reward; @@ -282,13 +283,6 @@ static inline float chirp_duration_seconds(float duration_norm) { #include "bat_audio.h" -static inline float chirp_ring_radius(float age_seconds, float slice, - float duration_seconds, float sound_speed) { - float ring_age = age_seconds - slice * duration_seconds; - if (ring_age < 0.0f) return 0.0f; - return sound_speed * ring_age; -} - static inline float chirp_slice_ticks(ChirpEvent* chirp, int slice_idx) { return ((slice_idx + 0.5f) / (float)chirp->slice_count) * chirp->duration / TICK_RATE; @@ -357,17 +351,15 @@ static inline bool rects_overlap(float ax, float ay, float aw, float ah, ay + ah + margin > by; } -static inline void sample_in_quadrant(Bat* env, int quadrant, float radius, - float* x, float* y) { +static inline void sample_in_quadrant(Bat* env, int quadrant, float* x, float* y) { int east = quadrant & 1; int south = (quadrant >> 1) & 1; - float margin = fmaxf(6.0f, radius + 3.0f); float half_w = ARENA_WIDTH * 0.5f; float half_h = ARENA_HEIGHT * 0.5f; - float min_x = (east ? half_w : 0.0f) + margin; - float max_x = (east ? (float)ARENA_WIDTH : half_w) - margin; - float min_y = (south ? half_h : 0.0f) + margin; - float max_y = (south ? (float)ARENA_HEIGHT : half_h) - margin; + float min_x = (east ? half_w : 0.0f) + SPAWN_MARGIN; + float max_x = (east ? (float)ARENA_WIDTH : half_w) - SPAWN_MARGIN; + float min_y = (south ? half_h : 0.0f) + SPAWN_MARGIN; + float max_y = (south ? (float)ARENA_HEIGHT : half_h) - SPAWN_MARGIN; *x = min_x + randf(env) * (max_x - min_x); *y = min_y + randf(env) * (max_y - min_y); } @@ -378,8 +370,8 @@ static inline void sample_spawns(Bat* env) { float min_sep = fminf(ARENA_WIDTH, ARENA_HEIGHT) * 0.31f; for (int attempt = 0; attempt < 64; attempt++) { - sample_in_quadrant(env, agent_quadrant, AGENT_RADIUS, &env->x, &env->y); - sample_in_quadrant(env, bug_quadrant, BUG_RADIUS, &env->bug_x, &env->bug_y); + sample_in_quadrant(env, agent_quadrant, &env->x, &env->y); + sample_in_quadrant(env, bug_quadrant, &env->bug_x, &env->bug_y); if (dist(env->x, env->y, env->bug_x, env->bug_y) >= min_sep) { return; } @@ -394,27 +386,23 @@ static inline void sample_spawns(Bat* env) { } static inline int curriculum_obstacles(Bat* env) { - int step = env->curriculum_obstacle_step; - int count = CURRICULUM_START_OBSTACLES; - if (env->curriculum_level > 0) { - count = CURRICULUM_START_OBSTACLES + 1 + (env->curriculum_level - 1) / step; - } - if (count > CURRICULUM_MAX_OBSTACLES) count = CURRICULUM_MAX_OBSTACLES; - return count; + int count = CURRICULUM_START_OBSTACLES + (env->curriculum_level > 0 + ? 1 + (env->curriculum_level - 1) / env->curriculum_obstacle_step : 0); + return count > CURRICULUM_MAX_OBSTACLES ? CURRICULUM_MAX_OBSTACLES : count; } static inline float curriculum_bug_distance(Bat* env) { - float distance = env->curriculum_start_bug_distance - + CURRICULUM_BUG_DISTANCE_STEP * env->curriculum_level; - return bat_clampf(distance, env->curriculum_start_bug_distance, + return bat_clampf(env->curriculum_start_bug_distance + + CURRICULUM_BUG_DISTANCE_STEP * env->curriculum_level, + env->curriculum_start_bug_distance, CURRICULUM_MAX_BUG_DISTANCE); } static inline float curriculum_inbound_bug_distance(Bat* env) { - float base = CURRICULUM_MAX_BUG_DISTANCE; - int extra_levels = env->curriculum_level - CURRICULUM_INBOUND_START_LEVEL + 1; - float distance = base + CURRICULUM_INBOUND_BUG_DISTANCE_STEP * extra_levels; - return bat_clampf(distance, base, CURRICULUM_INBOUND_MAX_BUG_DISTANCE); + return bat_clampf(CURRICULUM_MAX_BUG_DISTANCE + + CURRICULUM_INBOUND_BUG_DISTANCE_STEP + * (env->curriculum_level - CURRICULUM_INBOUND_START_LEVEL + 1), + CURRICULUM_MAX_BUG_DISTANCE, CURRICULUM_INBOUND_MAX_BUG_DISTANCE); } static inline float curriculum_bug_maneuver_strength(Bat* env) { @@ -428,9 +416,9 @@ static inline float curriculum_bug_maneuver_frequency(Bat* env) { if (env->curriculum_level < BUG_MANEUVER_START_LEVEL) { return BUG_MANEUVER_FREQUENCY; } - int extra_levels = env->curriculum_level - BUG_MANEUVER_START_LEVEL; - float multiplier = 1.0f + 0.50f * extra_levels; - return BUG_MANEUVER_FREQUENCY * bat_clampf(multiplier, 1.0f, 2.5f); + return BUG_MANEUVER_FREQUENCY * bat_clampf( + 1.0f + 0.50f * (env->curriculum_level - BUG_MANEUVER_START_LEVEL), + 1.0f, 2.5f); } static inline float chirps_used_ratio(Bat* env) { @@ -444,67 +432,46 @@ static inline float chirp_efficiency(Bat* env) { } static inline float chirp_perf(Bat* env) { - float raw = 1.0f - env->chirps_emitted / (float)MAX_CHIRPS_PER_EPISODE; - return bat_clampf(raw, CHIRP_PERF_FLOOR, 1.0f); -} - -static inline float norm_range(float value, float lo, float hi) { - float span = hi - lo; - return bat_clampf((value - lo) / span, 0.0f, 1.0f); + return bat_clampf(1.0f - env->chirps_emitted / (float)MAX_CHIRPS_PER_EPISODE, + CHIRP_PERF_FLOOR, 1.0f); } // TODO: Revisit whether these curriculum difficulty diagnostics are worth logging; // they add a lot of code and may be removable before merge. static inline float curriculum_distance_difficulty(Bat* env) { - float max_distance = fmaxf(CURRICULUM_MAX_BUG_DISTANCE, - CURRICULUM_INBOUND_MAX_BUG_DISTANCE); - return norm_range(env->start_bug_dist, - env->curriculum_start_bug_distance, max_distance); + return bat_clampf((env->start_bug_dist - env->curriculum_start_bug_distance) + / (CURRICULUM_INBOUND_MAX_BUG_DISTANCE - env->curriculum_start_bug_distance), + 0.0f, 1.0f); } static inline float curriculum_obstacle_difficulty(Bat* env) { - return norm_range((float)env->num_obstacles, - (float)CURRICULUM_START_OBSTACLES, (float)CURRICULUM_MAX_OBSTACLES); + return bat_clampf((env->num_obstacles - CURRICULUM_START_OBSTACLES) + / (float)(CURRICULUM_MAX_OBSTACLES - CURRICULUM_START_OBSTACLES), + 0.0f, 1.0f); } static inline float curriculum_motion_difficulty(Bat* env) { if (env->curriculum_level < BUG_MANEUVER_START_LEVEL) return 0.0f; - float span = (float)(CURRICULUM_INBOUND_START_LEVEL + 4 - BUG_MANEUVER_START_LEVEL); - return bat_clampf((env->curriculum_level - BUG_MANEUVER_START_LEVEL + 1) / span, + return bat_clampf((env->curriculum_level - BUG_MANEUVER_START_LEVEL + 1) + / (float)(CURRICULUM_INBOUND_START_LEVEL + 4 - BUG_MANEUVER_START_LEVEL), 0.0f, 1.0f); } static inline float curriculum_difficulty(Bat* env) { - float distance = curriculum_distance_difficulty(env); - float obstacles = curriculum_obstacle_difficulty(env); - float active_weight = 0.0f; - float weighted = 0.0f; - if (CURRICULUM_MAX_BUG_DISTANCE > env->curriculum_start_bug_distance) { - weighted += 0.5f * distance; - active_weight += 0.5f; - } - if (CURRICULUM_MAX_OBSTACLES > CURRICULUM_START_OBSTACLES) { - weighted += 0.5f * obstacles; - active_weight += 0.5f; - } - float motion = curriculum_motion_difficulty(env); - if (BUG_MANEUVER_STRENGTH > 0.0f) { - weighted += 0.5f * motion; - active_weight += 0.5f; - } - return bat_clampf(weighted / active_weight, 0.0f, 1.0f); + return bat_clampf((curriculum_distance_difficulty(env) + + curriculum_obstacle_difficulty(env) + + curriculum_motion_difficulty(env)) / 3.0f, 0.0f, 1.0f); } static inline void sample_spawns_at_distance(Bat* env, float target_distance) { - float margin = fmaxf(6.0f, fmaxf(AGENT_RADIUS, BUG_RADIUS) + 3.0f); for (int attempt = 0; attempt < 96; attempt++) { float angle = randf(env) * TWO_PI - PI_F; float dx = cosf(angle) * target_distance; float dy = sinf(angle) * target_distance; - float min_bat_x = fmaxf(margin, margin - dx); - float max_bat_x = fminf(ARENA_WIDTH - margin, ARENA_WIDTH - margin - dx); - float min_bat_y = fmaxf(margin, margin - dy); - float max_bat_y = fminf(ARENA_HEIGHT - margin, ARENA_HEIGHT - margin - dy); + float min_bat_x = fmaxf(SPAWN_MARGIN, SPAWN_MARGIN - dx); + float max_bat_x = fminf(ARENA_WIDTH - SPAWN_MARGIN, ARENA_WIDTH - SPAWN_MARGIN - dx); + float min_bat_y = fmaxf(SPAWN_MARGIN, SPAWN_MARGIN - dy); + float max_bat_y = fminf(ARENA_HEIGHT - SPAWN_MARGIN, ARENA_HEIGHT - SPAWN_MARGIN - dy); if (max_bat_x < min_bat_x || max_bat_y < min_bat_y) continue; env->x = min_bat_x + randf(env) * (max_bat_x - min_bat_x); @@ -646,15 +613,12 @@ static inline void add_log(Bat* env, float success, float collision, float timeo env->log.num_obstacles += env->num_obstacles; env->log.chirps_emitted += env->chirps_emitted; env->log.chirp_perf += chirp_perf_value; - float chirps = fmaxf(1.0f, (float)env->chirps_emitted); - env->log.chirp_overlap_fraction += env->chirps_overlapped / chirps; env->log.n += 1.0f; } static inline int freq_bin_index(float freq_norm) { int bin = (int)(freq_norm * FREQ_BINS); - if (bin >= FREQ_BINS) bin = FREQ_BINS - 1; - return bin; + return bin >= FREQ_BINS ? FREQ_BINS - 1 : bin; } static inline void clear_echo_bucket(EchoBucket* bucket) { @@ -718,7 +682,7 @@ static inline float expected_bug_echo_tick(Bat* env, ChirpEvent* chirp) { float ux, uy; norm_vec(env->bug_x - source_x, env->bug_y - source_y, &ux, &uy); float forward = ux * fx + uy * fy; - if (forward < -0.35f) return -1.0f; + if (forward < ECHO_MIN_FORWARD) return -1.0f; float left_ear_x, left_ear_y, right_ear_x, right_ear_y; ear_positions(env, &left_ear_x, &left_ear_y, &right_ear_x, &right_ear_y); @@ -732,8 +696,8 @@ static inline float expected_bug_echo_tick(Bat* env, ChirpEvent* chirp) { } if (best_path < 0.0f) return -1.0f; - float first_slice_ticks = chirp_slice_ticks(chirp, 0); - return chirp->birth_tick + first_slice_ticks + best_path / env->sound_speed / TICK_RATE; + return chirp->birth_tick + chirp_slice_ticks(chirp, 0) + + best_path / env->sound_speed / TICK_RATE; } static inline void schedule_echo(Bat* env, ChirpEvent* chirp, @@ -741,23 +705,19 @@ static inline void schedule_echo(Bat* env, ChirpEvent* chirp, float strength, int source) { float fx = cosf(env->heading); float fy = sinf(env->heading); - float lx = -sinf(env->heading); - float ly = cosf(env->heading); + float lateral_x = -sinf(env->heading); + float lateral_y = cosf(env->heading); float left_ear_x, left_ear_y, right_ear_x, right_ear_y; ear_positions(env, &left_ear_x, &left_ear_y, &right_ear_x, &right_ear_y); float ux, uy; norm_vec(rx - chirp->x, ry - chirp->y, &ux, &uy); float forward = ux * fx + uy * fy; - if (forward < -0.35f) return; + if (forward < ECHO_MIN_FORWARD) return; - float left_dir_x = -lx; - float left_dir_y = -ly; - float right_dir_x = lx; - float right_dir_y = ly; float front_gain = bat_clampf(forward, 0.0f, 1.0f); - float left_side_gain = bat_clampf(ux * left_dir_x + uy * left_dir_y, 0.0f, 1.0f); - float right_side_gain = bat_clampf(ux * right_dir_x + uy * right_dir_y, 0.0f, 1.0f); + float left_side_gain = bat_clampf(ux * -lateral_x + uy * -lateral_y, 0.0f, 1.0f); + float right_side_gain = bat_clampf(ux * lateral_x + uy * lateral_y, 0.0f, 1.0f); front_gain *= front_gain; left_side_gain *= left_side_gain; right_side_gain *= right_side_gain; @@ -929,8 +889,7 @@ void compute_observations(Bat* env) { float fwd_speed = env->vx * cosf(env->heading) + env->vy * sinf(env->heading); env->observations[FORWARD_SPEED_OBS] = bat_clampf(fwd_speed / env->max_speed, 0.0f, 1.0f); env->observations[TURN_RATE_OBS] = bat_clampf(env->turn_velocity / env->turn_rate, -1.0f, 1.0f); - float timer_norm = env->tick / (float)MAX_STEPS; - env->observations[TIMER_OBS] = bat_clampf(timer_norm, 0.0f, 1.0f); + env->observations[TIMER_OBS] = bat_clampf(env->tick / (float)MAX_STEPS, 0.0f, 1.0f); } static inline void reset_episode(Bat* env) { @@ -950,6 +909,7 @@ static inline void reset_episode(Bat* env) { sample_spawns_at_distance(env, bug_distance); generate_obstacles(env); reset_bug_motion(env); + // TODO: Revisit these first-observation defaults when we are ready to break determinism. env->last_chirp_start_freq = 0.0f; env->last_chirp_end_freq = 1.0f; env->last_chirp_duration = 0.33333334f; @@ -962,7 +922,6 @@ static inline void reset_episode(Bat* env) { env->last_bug_echo_path = -1.0f; env->last_bug_echo_expected_tick = -1.0f; env->chirps_emitted = 0; - env->chirps_overlapped = 0; env->episode_return = 0.0f; env->start_bug_dist = dist(env->x, env->y, env->bug_x, env->bug_y); env->prev_bug_dist = env->start_bug_dist; @@ -1078,12 +1037,11 @@ static inline void update_motion(Bat* env, float dt) { float fx = cosf(env->heading); float fy = sinf(env->heading); float speed = env->vx * fx + env->vy * fy; - float min_speed = env->min_speed; - if (speed < min_speed) speed = min_speed; + if (speed < env->min_speed) speed = env->min_speed; if (move == THRUST_FORWARD) speed += env->accel * dt; if (move == BRAKE) speed -= env->accel * dt; - speed = bat_clampf(speed, min_speed, env->max_speed); + speed = bat_clampf(speed, env->min_speed, env->max_speed); float turn_command = 0.0f; if (turn == TURN_LEFT) turn_command = -1.0f; @@ -1094,10 +1052,8 @@ static inline void update_motion(Bat* env, float dt) { if (env->heading > PI_F) env->heading -= TWO_PI; if (env->heading < -PI_F) env->heading += TWO_PI; - float heading_fx = cosf(env->heading); - float heading_fy = sinf(env->heading); - env->vx = heading_fx * speed; - env->vy = heading_fy * speed; + env->vx = cosf(env->heading) * speed; + env->vy = sinf(env->heading) * speed; env->x += env->vx * dt; env->y += env->vy * dt; } @@ -1186,14 +1142,12 @@ void c_step(Bat* env) { success = 1.0f; } else { float bug_dist = dist(env->x, env->y, env->bug_x, env->bug_y); - float progress = env->prev_bug_dist - bug_dist; - env->rewards[0] += env->progress_reward_scale * progress; + env->rewards[0] += env->progress_reward_scale * (env->prev_bug_dist - bug_dist); env->rewards[0] -= env->step_cost; // TODO: Fold this only when we are ready to break training determinism. if (chirp_status == CHIRP_STATUS_EMITTED) { env->rewards[0] += env->valid_chirp_reward; // TODO: Remove this; chirps should only pay when bug echoes improve. if (chirp_overlap_fraction > 0.0f) { env->rewards[0] -= env->chirp_overlap_penalty * chirp_overlap_fraction; - env->chirps_overlapped += 1; } } else if (chirp_status == CHIRP_STATUS_COOLDOWN) { env->rewards[0] -= env->early_chirp_penalty; @@ -1243,25 +1197,23 @@ void c_step(Bat* env) { #ifndef BAT_HEADLESS static inline Color freq_color(float freq_norm, float alpha_norm) { - float f = freq_norm; - float mid = 1.0f - fabsf(2.0f * f - 1.0f); + float mid = 1.0f - fabsf(2.0f * freq_norm - 1.0f); return (Color){ - (unsigned char)(255.0f * (1.0f - f) + 45.0f * f), + (unsigned char)(255.0f * (1.0f - freq_norm) + 45.0f * freq_norm), (unsigned char)(45.0f + 180.0f * mid), - (unsigned char)(45.0f * (1.0f - f) + 255.0f * f), + (unsigned char)(45.0f * (1.0f - freq_norm) + 255.0f * freq_norm), (unsigned char)(255.0f * alpha_norm), }; } static inline void draw_chirp_rings(Bat* env, float sx, float sy) { - float scale = fminf(sx, sy); for (int i = 0; i < CHIRP_HISTORY; i++) { ChirpEvent* chirp = &env->chirps[i]; if (!chirp->active) continue; float age_seconds = (env->tick - chirp->birth_tick) * TICK_RATE; - float max_age = MAX_ECHO_RANGE / env->sound_speed + chirp->duration; - if (age_seconds < 0.0f || age_seconds > max_age) { + if (age_seconds < 0.0f || + age_seconds > MAX_ECHO_RANGE / env->sound_speed + chirp->duration) { chirp->active = 0; continue; } @@ -1269,17 +1221,19 @@ static inline void draw_chirp_rings(Bat* env, float sx, float sy) { for (int ring = 0; ring < CHIRP_RINGS; ring++) { float slice = ring / (float)(CHIRP_RINGS - 1); float freq = chirp->start_freq + slice * (chirp->end_freq - chirp->start_freq); - float radius = chirp_ring_radius(age_seconds, slice, chirp->duration, env->sound_speed); - if (radius <= 0.0f || radius > MAX_ECHO_RANGE) continue; + float ring_age = age_seconds - slice * chirp->duration; + if (ring_age <= 0.0f) continue; + float radius = env->sound_speed * ring_age; + if (radius > MAX_ECHO_RANGE) continue; - float fade = 1.0f - radius / MAX_ECHO_RANGE; - float alpha = 0.18f + 0.42f * bat_clampf(fade, 0.0f, 1.0f); + float alpha = 0.18f + 0.42f * bat_clampf( + 1.0f - radius / MAX_ECHO_RANGE, 0.0f, 1.0f); float source_x, source_y; chirp_source_for_fraction(chirp, slice, &source_x, &source_y); DrawCircleLines( (int)(source_x * sx), (int)(source_y * sy), - radius * scale, + radius * fminf(sx, sy), freq_color(freq, alpha)); } } @@ -1301,8 +1255,7 @@ static inline void draw_echo_flash(Bat* env, ChirpEvent* chirp, float age_seconds = (env->tick - chirp->birth_tick) * TICK_RATE; float distance = dist(chirp->x, chirp->y, rx, ry); float echo_time = 2.0f * distance / env->sound_speed; - bool echo_arriving_now = fabsf(age_seconds - echo_time) <= 0.025f; - if (!echo_arriving_now) return; + if (fabsf(age_seconds - echo_time) > 0.025f) return; float ux, uy; norm_vec(rx - chirp->x, ry - chirp->y, &ux, &uy); @@ -1411,10 +1364,7 @@ Client* make_client(Bat* env) { client->width = ARENA_WIDTH * 10; client->height = ARENA_HEIGHT * 10; InitWindow(client->width, client->height, "Bat"); - int target_fps = env->render_target_fps; - if (target_fps > 0) { - SetTargetFPS(target_fps); - } + SetTargetFPS(env->render_target_fps); InitAudioDevice(); client->audio_ready = IsAudioDeviceReady(); record_init(env, client); diff --git a/ocean/bat/bat_audio.h b/ocean/bat/bat_audio.h index a67bf2b084..5010775fc3 100644 --- a/ocean/bat/bat_audio.h +++ b/ocean/bat/bat_audio.h @@ -3,7 +3,6 @@ static inline float chirp_audio_duration_at_fps(float duration_norm, int fps) { float duration = chirp_duration_seconds(duration_norm); - if (fps <= 0) return duration; float scale = 60.0f / (float)fps; if (scale < 1.0f) scale = 1.0f; return duration * scale; @@ -20,10 +19,8 @@ static inline float chirp_audio_frequency_hz(float freq_norm) { static inline float chirp_audio_envelope(float t_norm) { if (t_norm <= 0.0f || t_norm >= 1.0f) return 0.0f; - const float fade = 0.08f; - float attack = t_norm / fade; - float release = (1.0f - t_norm) / fade; - return bat_clampf(fminf(attack, release), 0.0f, 1.0f); + return bat_clampf(fminf(t_norm / AUDIO_ENVELOPE_FADE, + (1.0f - t_norm) / AUDIO_ENVELOPE_FADE), 0.0f, 1.0f); } static inline float chirp_audio_sample_f32(float start_norm, float end_norm, diff --git a/ocean/bat/binding.c b/ocean/bat/binding.c index ceb9769e28..02ecf722a7 100644 --- a/ocean/bat/binding.c +++ b/ocean/bat/binding.c @@ -55,5 +55,4 @@ void my_log(Log* log, Dict* out) { dict_set(out, "num_obstacles", log->num_obstacles); dict_set(out, "chirps_emitted", log->chirps_emitted); dict_set(out, "chirp_perf", log->chirp_perf); - dict_set(out, "chirp_overlap_fraction", log->chirp_overlap_fraction); } diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index bad8d41c62..29aef6cd05 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -945,18 +945,6 @@ static int test_bat_speed_action_space_has_no_strafe(void) { return 0; } -static int test_chirp_ring_physical_ordering(void) { - float duration = chirp_duration_seconds(1.0f); - float outer = chirp_ring_radius(1.0f, 0.0f, duration, 100.0f); - float inner = chirp_ring_radius(1.0f, 1.0f, duration, 100.0f); - - ASSERT_TRUE(outer > inner); - ASSERT_FLOAT_NEAR(outer, 100.0f, 0.0001f); - ASSERT_FLOAT_NEAR(inner, 100.0f * (1.0f - duration), 0.0001f); - - return 0; -} - static int test_chirp_audio_maps_norm_freq_to_audible_sweep(void) { ASSERT_FLOAT_NEAR(chirp_audio_frequency_hz(0.0f), 600.0f, 0.0001f); ASSERT_FLOAT_NEAR(chirp_audio_frequency_hz(1.0f), 3600.0f, 0.0001f); @@ -977,8 +965,6 @@ static int test_chirp_audio_duration_scales_with_render_fps(void) { ASSERT_FLOAT_NEAR(chirp_audio_duration_seconds(&env, 0.0f), base_duration * 2.0f, 0.0001f); env.render_target_fps = 15; ASSERT_FLOAT_NEAR(chirp_audio_duration_seconds(&env, 0.0f), base_duration * 4.0f, 0.0001f); - env.render_target_fps = 0; - ASSERT_FLOAT_NEAR(chirp_audio_duration_seconds(&env, 0.0f), base_duration, 0.0001f); free_allocated(&env); return 0; } @@ -1089,7 +1075,6 @@ static int test_chirp_before_bug_echo_arrives_gets_scaled_overlap_penalty(void) ASSERT_FLOAT_NEAR(env.terminals[0], 0.0f, 0.0001f); ASSERT_FLOAT_NEAR(env.rewards[0], env.valid_chirp_reward, 0.0001f); ASSERT_TRUE(env.chirps_emitted == 1); - ASSERT_TRUE(env.chirps_overlapped == 0); env.last_chirp_tick = 0; env.last_bug_echo_expected_tick = 10.0f; @@ -1102,7 +1087,6 @@ static int test_chirp_before_bug_echo_arrives_gets_scaled_overlap_penalty(void) ASSERT_FLOAT_NEAR(env.rewards[0], env.valid_chirp_reward - 0.5f * env.chirp_overlap_penalty, 0.0001f); ASSERT_TRUE(env.chirps_emitted == 2); - ASSERT_TRUE(env.chirps_overlapped == 1); free_allocated(&env); return 0; @@ -1129,7 +1113,6 @@ static int test_chirp_after_bug_echo_arrives_ignores_static_echo_window(void) { ASSERT_FLOAT_NEAR(env.terminals[0], 0.0f, 0.0001f); ASSERT_FLOAT_NEAR(env.rewards[0], env.valid_chirp_reward, 0.0001f); ASSERT_TRUE(env.chirps_emitted == 2); - ASSERT_TRUE(env.chirps_overlapped == 0); free_allocated(&env); return 0; @@ -1815,7 +1798,6 @@ int main(void) { if (test_bat_zero_speed_recovers_to_forward_arc()) return 1; if (test_bat_turn_rate_scales_with_forward_speed()) return 1; if (test_bat_speed_action_space_has_no_strafe()) return 1; - if (test_chirp_ring_physical_ordering()) return 1; if (test_chirp_audio_maps_norm_freq_to_audible_sweep()) return 1; if (test_chirp_audio_duration_scales_with_render_fps()) return 1; if (test_chirp_cooldown_accepts_only_after_delay()) return 1; From c9da7aa2cc14220a9660175159c1fd91ccfe6a80 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Fri, 12 Jun 2026 12:49:47 -0700 Subject: [PATCH 49/51] Render bat echo frequency history --- ocean/bat/bat.h | 107 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 98 insertions(+), 9 deletions(-) diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 92922d119d..5217ee4810 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -83,6 +83,9 @@ #define AUDIO_VOLUME 0.22f #define AUDIO_ENVELOPE_FADE 0.08f #define RECORD_MAX_VOICES 16 +#define FREQ_HISTORY_TICKS 96 +#define FREQ_PANEL_WIDTH 192 +#define FREQ_PANEL_MARGIN 8 #define CHIRP_PERF_FLOOR 0.05f #define CHIRP_MIN_DURATION_SECONDS 0.04f #define CHIRP_DURATION_RANGE_SECONDS 0.18f @@ -162,6 +165,9 @@ typedef struct Client { char record_wav_path[256]; char record_mp4_path[256]; BatRecordVoice record_voices[RECORD_MAX_VOICES]; + float freq_history[FREQ_HISTORY_TICKS][2][FREQ_BINS]; + int freq_history_head; + int freq_history_last_tick; #endif } Client; @@ -1249,6 +1255,85 @@ static inline Color doppler_ray_color(float doppler, float alpha) { (unsigned char)(255.0f * bat_clampf(alpha, 0.0f, 1.0f))}; } +static inline void clear_freq_history(Client* client) { + memset(client->freq_history, 0, sizeof(client->freq_history)); + client->freq_history_head = 0; + client->freq_history_last_tick = -1; +} + +static inline void capture_freq_history(Bat* env) { + Client* client = env->client; + if (env->tick < client->freq_history_last_tick) { + clear_freq_history(client); + } + if (env->tick == client->freq_history_last_tick) return; + + float (*sample)[FREQ_BINS] = client->freq_history[client->freq_history_head]; + for (int i = 0; i < FREQ_BINS; i++) { + sample[0][i] = env->observations[LEFT_FREQ_OFFSET + i]; + sample[1][i] = env->observations[RIGHT_FREQ_OFFSET + i]; + } + + client->freq_history_head = (client->freq_history_head + 1) % FREQ_HISTORY_TICKS; + client->freq_history_last_tick = env->tick; +} + +static inline Color freq_history_color(int bin, float energy) { + float e = sqrtf(bat_clampf(energy, 0.0f, 1.0f)); + if (e <= 0.001f) return (Color){42, 46, 56, 255}; + + Color base = freq_color(bin / (float)(FREQ_BINS - 1), 1.0f); + float brightness = 0.25f + 0.75f * e; + return (Color){ + (unsigned char)(36.0f + 219.0f * (base.r / 255.0f) * brightness), + (unsigned char)(36.0f + 219.0f * (base.g / 255.0f) * brightness), + (unsigned char)(36.0f + 219.0f * (base.b / 255.0f) * brightness), + 255, + }; +} + +static inline void draw_freq_history_band(Client* client, + int ear, int x, int y, int width, int height) { + float col_width = width / (float)FREQ_HISTORY_TICKS; + float row_height = height / (float)FREQ_BINS; + for (int t = 0; t < FREQ_HISTORY_TICKS; t++) { + int history_idx = (client->freq_history_head + t) % FREQ_HISTORY_TICKS; + int x0 = x + (int)(t * col_width); + int x1 = x + (int)((t + 1) * col_width); + if (x1 <= x0) x1 = x0 + 1; + + for (int row = 0; row < FREQ_BINS; row++) { + int bin = FREQ_BINS - 1 - row; + int y0 = y + (int)(row * row_height); + int y1 = y + (int)((row + 1) * row_height); + if (y1 <= y0) y1 = y0 + 1; + DrawRectangle(x0, y0, x1 - x0, y1 - y0, + freq_history_color(bin, client->freq_history[history_idx][ear][bin])); + } + } +} + +static inline void draw_freq_history_panel(Bat* env, int x, int y, int width, int height) { + capture_freq_history(env); + + DrawRectangle(x, y, width, height, (Color){32, 36, 46, 255}); + int band_width = width - 2 * FREQ_PANEL_MARGIN; + int band_height = (height - 3 * FREQ_PANEL_MARGIN) / 2; + int left_y = y + FREQ_PANEL_MARGIN; + int right_y = left_y + band_height + FREQ_PANEL_MARGIN; + + draw_freq_history_band(env->client, 0, x + FREQ_PANEL_MARGIN, left_y, + band_width, band_height); + draw_freq_history_band(env->client, 1, x + FREQ_PANEL_MARGIN, right_y, + band_width, band_height); + + DrawRectangleLines(x, y, width, height, (Color){124, 132, 148, 255}); + DrawRectangleLines(x + FREQ_PANEL_MARGIN, left_y, band_width, band_height, + (Color){102, 110, 126, 255}); + DrawRectangleLines(x + FREQ_PANEL_MARGIN, right_y, band_width, band_height, + (Color){102, 110, 126, 255}); +} + static inline void draw_echo_flash(Bat* env, ChirpEvent* chirp, float rx, float ry, float rvx, float rvy, float strength, float sx, float sy) { @@ -1314,14 +1399,14 @@ static inline void draw_corner_reflector_echoes(Bat* env, ChirpEvent* chirp, draw_echo_flash(env, chirp, w, 0.5f * h, 0.0f, 0.0f, strength, sx, sy); } -static inline void draw_corner_reflector_markers(Bat* env) { +static inline void draw_corner_reflector_markers(int width, int height) { const int size = 8; const Color fill = (Color){128, 128, 132, 255}; const Color outline = (Color){202, 202, 208, 255}; - int max_x = env->client->width - size; - int max_y = env->client->height - size; - int mid_x = env->client->width / 2 - size / 2; - int mid_y = env->client->height / 2 - size / 2; + int max_x = width - size; + int max_y = height - size; + int mid_x = width / 2 - size / 2; + int mid_y = height / 2 - size / 2; DrawRectangle(0, 0, size, size, fill); DrawRectangleLines(0, 0, size, size, outline); DrawRectangle(max_x, 0, size, size, fill); @@ -1361,8 +1446,9 @@ static inline void draw_echo_reflections(Bat* env, float sx, float sy) { Client* make_client(Bat* env) { Client* client = (Client*)calloc(1, sizeof(Client)); - client->width = ARENA_WIDTH * 10; + client->width = ARENA_WIDTH * 10 + FREQ_PANEL_WIDTH; client->height = ARENA_HEIGHT * 10; + clear_freq_history(client); InitWindow(client->width, client->height, "Bat"); SetTargetFPS(env->render_target_fps); InitAudioDevice(); @@ -1391,13 +1477,15 @@ void c_render(Bat* env) { env->client = make_client(env); } play_chirp_audio(env); - float sx = env->client->width / (float)ARENA_WIDTH; + int arena_width = env->client->width - FREQ_PANEL_WIDTH; + int arena_height = env->client->height; + float sx = arena_width / (float)ARENA_WIDTH; float sy = env->client->height / (float)ARENA_HEIGHT; BeginDrawing(); ClearBackground((Color){18, 20, 24, 255}); draw_chirp_rings(env, sx, sy); draw_echo_reflections(env, sx, sy); - DrawRectangleLines(0, 0, env->client->width, env->client->height, GRAY); + DrawRectangleLines(0, 0, arena_width, arena_height, GRAY); for (int i = 0; i < env->num_obstacles; i++) { DrawRectangle( (int)(env->obstacle_x[i] * sx), @@ -1406,7 +1494,7 @@ void c_render(Bat* env) { (int)(env->obstacle_h[i] * sy), (Color){92, 92, 96, 255}); } - draw_corner_reflector_markers(env); + draw_corner_reflector_markers(arena_width, arena_height); DrawCircle((int)(env->bug_x * sx), (int)(env->bug_y * sy), BUG_RADIUS * sx, GREEN); DrawCircle((int)(env->x * sx), (int)(env->y * sy), @@ -1417,6 +1505,7 @@ void c_render(Bat* env) { int cooldown = env->chirp_cooldown_ticks - (env->tick - env->last_chirp_tick); DrawText(TextFormat("reward %.3f tick %d chirps %d cooldown %d ESC exits", env->rewards[0], env->tick, env->chirps_emitted, cooldown), 10, 10, 20, RAYWHITE); + draw_freq_history_panel(env, arena_width, 0, FREQ_PANEL_WIDTH, arena_height); EndDrawing(); record_capture_frame(env); } From aa3b383c6e319014c3d8d0f69571565f7df57ba0 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Fri, 12 Jun 2026 18:31:58 -0700 Subject: [PATCH 50/51] Add bat observation render gauges --- ocean/bat/bat.h | 121 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 118 insertions(+), 3 deletions(-) diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 5217ee4810..70593bfcf0 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -84,7 +84,8 @@ #define AUDIO_ENVELOPE_FADE 0.08f #define RECORD_MAX_VOICES 16 #define FREQ_HISTORY_TICKS 96 -#define FREQ_PANEL_WIDTH 192 +#define FREQ_PANEL_WIDTH 384 +#define FREQ_WATERFALL_WIDTH 192 #define FREQ_PANEL_MARGIN 8 #define CHIRP_PERF_FLOOR 0.05f #define CHIRP_MIN_DURATION_SECONDS 0.04f @@ -1297,7 +1298,8 @@ static inline void draw_freq_history_band(Client* client, float col_width = width / (float)FREQ_HISTORY_TICKS; float row_height = height / (float)FREQ_BINS; for (int t = 0; t < FREQ_HISTORY_TICKS; t++) { - int history_idx = (client->freq_history_head + t) % FREQ_HISTORY_TICKS; + int history_idx = (client->freq_history_head + FREQ_HISTORY_TICKS - 1 - t) + % FREQ_HISTORY_TICKS; int x0 = x + (int)(t * col_width); int x1 = x + (int)((t + 1) * col_width); if (x1 <= x0) x1 = x0 + 1; @@ -1313,25 +1315,138 @@ static inline void draw_freq_history_band(Client* client, } } +typedef struct ObsBar { + const char* label; + int obs_idx; + Color color; + bool signed_value; +} ObsBar; + +static inline void draw_obs_bar(int x, int y, int width, + const ObsBar* bar, const float* observations) { + const int label_width = 68; + const int bar_height = 12; + int bar_x = x + label_width; + int bar_width = width - label_width; + if (bar_width <= 0) return; + + DrawText(bar->label, x, y - 1, 10, (Color){226, 230, 238, 255}); + DrawRectangle(bar_x, y, bar_width, bar_height, (Color){48, 52, 62, 255}); + + if (bar->signed_value) { + int center = bar_x + bar_width / 2; + float value = bat_clampf(observations[bar->obs_idx], -1.0f, 1.0f); + int fill = (int)(fabsf(value) * bar_width * 0.5f); + if (value >= 0.0f) { + DrawRectangle(center, y, fill, bar_height, bar->color); + } else { + DrawRectangle(center - fill, y, fill, bar_height, bar->color); + } + DrawLine(center, y, center, y + bar_height, (Color){196, 200, 210, 255}); + } else { + float value = bat_clampf(observations[bar->obs_idx], 0.0f, 1.0f); + DrawRectangle(bar_x, y, (int)(value * bar_width), bar_height, bar->color); + } + + DrawRectangleLines(bar_x, y, bar_width, bar_height, (Color){118, 126, 142, 255}); +} + +static inline void draw_arrow_line(int x0, int y0, int x1, int y1, Color color) { + DrawLine(x0, y0, x1, y1, color); + float angle = atan2f((float)(y1 - y0), (float)(x1 - x0)); + const float head = 7.0f; + DrawLine(x1, y1, + (int)(x1 - cosf(angle - 0.45f) * head), + (int)(y1 - sinf(angle - 0.45f) * head), color); + DrawLine(x1, y1, + (int)(x1 - cosf(angle + 0.45f) * head), + (int)(y1 - sinf(angle + 0.45f) * head), color); +} + +static inline int draw_observation_bars(Bat* env, int x, int y, int width, int height) { + static const ObsBar chirp_bars[] = { + {"age", CHIRP_AGE_OBS, {112, 196, 255, 255}, false}, + {"cooldown", CHIRP_COOLDOWN_OBS, {255, 206, 96, 255}, false}, + {"start", CHIRP_START_OBS, {255, 112, 160, 255}, false}, + {"end", CHIRP_END_OBS, {126, 224, 255, 255}, false}, + {"duration", CHIRP_DURATION_OBS, {190, 154, 255, 255}, false}, + {"used", CHIRPS_USED_OBS, {255, 150, 96, 255}, false}, + }; + static const ObsBar action_bars[] = { + {"speed", FORWARD_SPEED_OBS, {120, 226, 142, 255}, false}, + {"turn", TURN_RATE_OBS, {255, 112, 112, 255}, true}, + }; + static const ObsBar episode_bars[] = { + {"timer", TIMER_OBS, {88, 164, 255, 255}, false}, + }; + + const int row_step = 18; + const Color header = (Color){246, 248, 255, 255}; + (void)height; + + DrawText("Chirp", x, y, 12, header); + y += 18; + for (int i = 0; i < (int)(sizeof(chirp_bars) / sizeof(chirp_bars[0])); i++) { + draw_obs_bar(x, y + i * row_step, width, &chirp_bars[i], env->observations); + } + y += (int)(sizeof(chirp_bars) / sizeof(chirp_bars[0])) * row_step + 14; + + DrawText("Actions", x, y, 12, header); + y += 18; + for (int i = 0; i < (int)(sizeof(action_bars) / sizeof(action_bars[0])); i++) { + draw_obs_bar(x, y + i * row_step, width, &action_bars[i], env->observations); + } + y += (int)(sizeof(action_bars) / sizeof(action_bars[0])) * row_step + 14; + + DrawText("Episode", x, y, 12, header); + y += 18; + draw_obs_bar(x, y, width, &episode_bars[0], env->observations); + return y + row_step + 16; +} + +static inline void draw_reflections_hint(int x, int y, int width, + int target_x, int left_target_y, int right_target_y) { + Color color = (Color){255, 96, 96, 255}; + int text_x = x + 40; + int text_y = (left_target_y + right_target_y) / 2 - 6; + int source_x = text_x - 8; + int source_y = text_y + 8; + (void)y; + (void)width; + DrawText("Reflections L/R", text_x, text_y, 12, color); + draw_arrow_line(source_x, source_y, target_x, left_target_y, color); + draw_arrow_line(source_x, source_y + 10, target_x, right_target_y, color); +} + static inline void draw_freq_history_panel(Bat* env, int x, int y, int width, int height) { capture_freq_history(env); DrawRectangle(x, y, width, height, (Color){32, 36, 46, 255}); - int band_width = width - 2 * FREQ_PANEL_MARGIN; + int band_width = FREQ_WATERFALL_WIDTH - 2 * FREQ_PANEL_MARGIN; int band_height = (height - 3 * FREQ_PANEL_MARGIN) / 2; int left_y = y + FREQ_PANEL_MARGIN; int right_y = left_y + band_height + FREQ_PANEL_MARGIN; + int obs_x = x + FREQ_WATERFALL_WIDTH + FREQ_PANEL_MARGIN; + int obs_width = width - FREQ_WATERFALL_WIDTH - 2 * FREQ_PANEL_MARGIN; draw_freq_history_band(env->client, 0, x + FREQ_PANEL_MARGIN, left_y, band_width, band_height); draw_freq_history_band(env->client, 1, x + FREQ_PANEL_MARGIN, right_y, band_width, band_height); + int hint_y = draw_observation_bars(env, obs_x, y + FREQ_PANEL_MARGIN, + obs_width, height - 2 * FREQ_PANEL_MARGIN); + draw_reflections_hint(obs_x, hint_y, obs_width, + x + FREQ_PANEL_MARGIN + band_width - 4, + left_y + band_height / 2, + right_y + band_height / 2); DrawRectangleLines(x, y, width, height, (Color){124, 132, 148, 255}); DrawRectangleLines(x + FREQ_PANEL_MARGIN, left_y, band_width, band_height, (Color){102, 110, 126, 255}); DrawRectangleLines(x + FREQ_PANEL_MARGIN, right_y, band_width, band_height, (Color){102, 110, 126, 255}); + DrawLine(x + FREQ_WATERFALL_WIDTH, y, x + FREQ_WATERFALL_WIDTH, y + height, + (Color){86, 94, 110, 255}); } static inline void draw_echo_flash(Bat* env, ChirpEvent* chirp, From 822681ae5d29983e4f40e30f723681971f8d4980 Mon Sep 17 00:00:00 2001 From: Kinvert Date: Sat, 13 Jun 2026 11:57:16 -0700 Subject: [PATCH 51/51] Clean up bat env internals --- PR_AND_MERGE.md | 121 ++++++++++ ocean/bat/bat.c | 51 +---- ocean/bat/bat.h | 382 ++++++++++++-------------------- ocean/bat/tests/test_bat_core.c | 19 +- 4 files changed, 281 insertions(+), 292 deletions(-) create mode 100644 PR_AND_MERGE.md diff --git a/PR_AND_MERGE.md b/PR_AND_MERGE.md new file mode 100644 index 0000000000..acbf1051d9 --- /dev/null +++ b/PR_AND_MERGE.md @@ -0,0 +1,121 @@ +# Bat PR And Merge Notes + +## Determinism Terms + +- **Old-baseline trajectory equivalence**: same code/config/seed reproduces the + current exact training trajectory, scalar signature, checkpoint behavior, and + level-10 eval behavior. +- **Deterministic reproducibility**: same code/config/seed reproduces the same + result after we intentionally change behavior. + +For cleanup before the first merge, preserve old-baseline trajectory equivalence +unless we explicitly decide a change belongs in the new deterministic baseline. +Later behavior-breaking cleanups are allowed, but each one needs a fresh +reproducible training/eval signature. + +## Most Embarrassing Review Targets + +| Area | Why Joseph might call it out | Cleanup class | +| --- | --- | --- | +| `c_step()` terminal/reward flow | Over-budget chirp, collision, success, and timeout are detected in different branches, then partially consolidated later. It is correct enough, but less direct than Breakout/Boxoban/G2048. | Try to preserve old baseline first; larger reshaping may break it. | +| `compute_observations()` side effects | The name says observations, but it also consumes echo buckets and sets `tick_bug_echo_path`, which later affects reward. This is now direct in the function instead of hidden behind a one-use helper. | Preserve old baseline unless the echo reward order is deliberately changed. | +| `schedule_echo()` size | It mixes heading math, ear directivity, path/range checks, Doppler, attenuation, and queue writes in one function. | Preserve old baseline by extracting repeated left/right queueing only. | +| `reset_bug_motion()` and `update_bug()` | Three maneuver modes, inbound special cases, sign state, bounce repair, and multiple curriculum helpers are too much. A single sine wave with curriculum-ramped amplitude would be cleaner. | New deterministic baseline. This will likely break old behavior. | +| Spawn helpers | Exact-distance spawn and fallback quadrant spawn now live in one function, but the fallback loops remain defensive. | New deterministic baseline if RNG order changes. | +| Obstacle generation | `rects_overlap()`, `obstacle_clear()`, 96 attempts, and fallback placements are probably more safety than we need. | New deterministic baseline. Remove if overlapping random obstacles are acceptable. | +| Curriculum difficulty logs | `curriculum_distance_difficulty()`, `curriculum_obstacle_difficulty()`, `curriculum_motion_difficulty()`, and `curriculum_difficulty()` are a lot of code for diagnostics/objective shaping. | Likely behavior/metric breaking; do later. | +| Chirp efficiency / chirps-used logs | `chirps_used_ratio()` is still an observation, but reward/log helpers around sparse chirping are low conviction. | Keep observation if needed; remove reward/log parts in new baseline. | +| Demo defaults in `bat.c` | `set_demo_defaults()` duplicates `config/bat.ini`, which can drift. | Cleanup after deciding how the human demo path should load defaults. | +| Magic constants | Echo strengths, attenuation constants, spawn attempts, obstacle margins, first chirp defaults, and render colors are mostly unnamed. | Rename constants where it clarifies intent; avoid sweeping constant churn. | + +## Preserve Old Baseline First + +These changes should be attempted one at a time with the full gate: + +1. Reduce duplicated left/right queueing in `schedule_echo()`. +2. Keep `compute_observations()` order stable. Echo bucket observation copying is + now direct in the function. +3. Simplify local renderer helpers and repeated static reflector drawing. +4. Remove dead fields, dead constants, and obviously unreachable guards. +5. Keep `c_step()` reward order stable unless we deliberately decide to break + old-baseline trajectory equivalence. + +Gate after each code change: + +```bash +source .venv/bin/activate && ./build.sh bat +source .venv/bin/activate && bash ocean/bat/tests/run_all.sh +.venv/bin/python -m pufferlib.pufferl train bat --train.gpus 1 +timeout 45s env DISPLAY=:0 .venv/bin/python -m pufferlib.pufferl eval bat --load-model-path latest --env.curriculum-initial-level 10 --env.curriculum-successes-per-level 1000000 +``` + +Known old-baseline training signature: + +- `perf 0.556` +- `base_perf 0.950` +- `timeout 0.009` +- `chirps_emitted 5.191` + +## Current Safe-Cleanup Notes + +- Keep the normal `init()`, `allocate()`, `c_close()`, and `free_allocated()` + shape. Breakout and G2048 use this pattern too, even when Bat currently has + less heap-owned state after obstacle arrays became fixed-size. +- Breakout resets `terminals[0]` and `rewards[0]` at the top of `c_step()`; + Boxoban increments `tick`, clears terminal/reward, then handles success and + timeout as separate early-return branches. Bat's current terminal block is + somewhat more abstract, but changing the reward/terminal order belongs in the + new-baseline phase unless we intentionally stop matching the old run. +- Avoid changing reward arithmetic order in `c_step()` while preserving the old + baseline. The one-line reward fold already proved it can break trajectory + equivalence. +- `schedule_ear_echo()` is worth keeping for now. It is a small helper that + removed duplicated left/right attenuation and receive-time logic. +- `norm_bin()` is small, but it names the action-bin normalization used by three + chirp fields. Inlining it would save little and may make `try_emit_chirp()` + less readable. +- `compute_observations()` now copies due echo buckets directly into + observations and updates `tick_bug_echo_path` in place. Build, tests, + training signature, and level-10 eval all preserved the old baseline. +- The fallback spawn cleanup removed the one-use `sample_spawns()` helper and + kept the same RNG order inside `sample_spawns_at_distance()`. Build, tests, + training signature, and level-10 eval all preserved the old baseline. +- Chirp slice scheduling no longer pre-fills every future source slot at emit + time, and constructs the per-slice echo source explicitly instead of copying + the whole `ChirpEvent`. Build, tests, training signature, and level-10 eval + all preserved the old baseline. +- Expected bug echo timing now reads the just-emitted chirp source directly + instead of asking for slice `0` before any slices are scheduled. Build, tests, + training signature, and level-10 eval all preserved the old baseline. + +## Next Old-Baseline Candidates + +These are candidates only after the latest visual gate is confirmed: + +1. Revisit any remaining one-use render helpers, but only if removal reduces + lines without making `draw_freq_history_panel()` harder to scan. +2. Look for dead test-only exposure caused by removed helpers. The tests should + assert behavior, not preserve helpers just because they were previously + callable. +3. Review tiny math helpers one at a time. Keep helpers that name a real domain + concept (`chirp_slice_ticks`, `chirp_age_norm_denominator`); consider + inlining helpers that merely restate one field expression. +4. Leave `c_step()` structural reshaping for later. It is one of the highest + review-value areas, but it is also one of the easiest ways to break old + trajectory equivalence. + +## New Deterministic Baseline Later + +These are probably the real pre-merge quality wins, but they should be grouped +after we are ready to stop matching the current trajectory exactly: + +1. Replace bug maneuver modes with one always-active sine-wave path and + curriculum-ramped amplitude. +2. Simplify spawn and obstacle generation, including removing overlap checks if + overlapping obstacles are acceptable. +3. Remove low-conviction curriculum difficulty and chirp efficiency logs/reward + shaping. +4. Rework `c_step()` into a direct step, reward, done, log/reset shape matching + the simpler reference envs. +5. Reconsider the initial chirp observation defaults instead of pretending a + chirp happened before the episode starts. diff --git a/ocean/bat/bat.c b/ocean/bat/bat.c index cfff6a9064..f0c62c51f8 100644 --- a/ocean/bat/bat.c +++ b/ocean/bat/bat.c @@ -13,42 +13,6 @@ static char* trim(char* s) { return s; } -static void set_demo_defaults(Bat* env) { - *env = (Bat){ - .num_agents = NUM_AGENTS, - .max_speed = 15.498233877318418f, - .min_speed = 2.6389946132676654f, - .accel = 53.02330161128345f, - .turn_rate = 8.371655963408276f, - .render_target_fps = 60, - .record_video = 0, - .record_video_fps = 30, - .record_video_seconds = 30, - .record_video_audio = 1, - .bug_echo_farther_penalty_scale = 0.19351291407677712f, - .bug_echo_reward_scale = 0.35f, - .bug_wing_sideband_gain = 0.19056934455600955f, - .curriculum_initial_level = 1, - .curriculum_obstacle_step = 8, - .curriculum_start_bug_distance = 8.438008720355143f, - .curriculum_successes_per_level = 4, - .ear_separation_scale = 2.0f, - .ear_rear_gain = 0.22038613968607276f, - .ear_front_gain = 0.6419214149115183f, - .ear_side_gain = 0.28043867572747055f, - .early_chirp_penalty = 0.006f, - .progress_reward_scale = 0.12f, - .reflector_strength = 0.6f, - .sound_speed = 180.0f, - .step_cost = 0.00010781401476030468f, - .valid_chirp_reward = 0.00015478540834814922f, - .chirp_cooldown_ticks = 11, - .chirp_efficiency_reward = 2.0f, - .chirp_overlap_penalty = 0.004278154705335052f, - .collision_penalty = 1.950717141233687f, - }; -} - static void apply_env_config_value(Bat* env, const char* key, float value) { if (strcmp(key, "max_speed") == 0) env->max_speed = value; else if (strcmp(key, "min_speed") == 0) env->min_speed = value; @@ -109,25 +73,26 @@ static void load_env_config(Bat* env, const char* path) { } void demo() { - Bat env; - set_demo_defaults(&env); + Bat env = { + .num_agents = NUM_AGENTS, + .render_target_fps = 60, + .record_video_fps = 30, + .record_video_seconds = 30, + .record_video_audio = 1, + }; load_env_config(&env, DEMO_CONFIG_PATH); env.rng = (unsigned int)time(NULL); allocate(&env); env.client = make_client(&env); c_reset(&env); - SetTargetFPS(60); while (!WindowShouldClose()) { memset(env.actions, 0, sizeof(float) * NUM_ACTIONS); - env.actions[ACTION_MOVE] = NOOP; - env.actions[ACTION_TURN] = TURN_NONE; if (IsKeyDown(KEY_W)) env.actions[ACTION_MOVE] = THRUST_FORWARD; if (IsKeyDown(KEY_S)) env.actions[ACTION_MOVE] = BRAKE; if (IsKeyDown(KEY_A) || IsKeyDown(KEY_LEFT)) env.actions[ACTION_TURN] = TURN_LEFT; if (IsKeyDown(KEY_D) || IsKeyDown(KEY_RIGHT)) env.actions[ACTION_TURN] = TURN_RIGHT; - env.actions[ACTION_CHIRP_FREQ_START] = 0; - env.actions[ACTION_CHIRP_FREQ_END] = 7; + env.actions[ACTION_CHIRP_FREQ_END] = CHIRP_FREQ_BINS - 1; env.actions[ACTION_CHIRP_DURATION] = 1; env.actions[ACTION_CHIRP_EMIT] = IsKeyDown(KEY_SPACE) ? 1.0f : 0.0f; c_step(&env); diff --git a/ocean/bat/bat.h b/ocean/bat/bat.h index 70593bfcf0..f32b4c9312 100644 --- a/ocean/bat/bat.h +++ b/ocean/bat/bat.h @@ -10,7 +10,6 @@ #include "raylib.h" #endif -#define OBS_SIZE 41 #define NUM_AGENTS 1 #define NUM_ACTIONS 6 #define ACTION_MOVE 0 @@ -27,16 +26,17 @@ #define FREQ_BINS 16 #define LEFT_FREQ_OFFSET 0 -#define RIGHT_FREQ_OFFSET 16 -#define CHIRP_AGE_OBS 32 -#define CHIRP_COOLDOWN_OBS 33 -#define CHIRP_START_OBS 34 -#define CHIRP_END_OBS 35 -#define CHIRP_DURATION_OBS 36 -#define CHIRPS_USED_OBS 37 -#define FORWARD_SPEED_OBS 38 -#define TURN_RATE_OBS 39 -#define TIMER_OBS 40 +#define RIGHT_FREQ_OFFSET FREQ_BINS +#define CHIRP_AGE_OBS (RIGHT_FREQ_OFFSET + FREQ_BINS) +#define CHIRP_COOLDOWN_OBS (CHIRP_AGE_OBS + 1) +#define CHIRP_START_OBS (CHIRP_COOLDOWN_OBS + 1) +#define CHIRP_END_OBS (CHIRP_START_OBS + 1) +#define CHIRP_DURATION_OBS (CHIRP_END_OBS + 1) +#define CHIRPS_USED_OBS (CHIRP_DURATION_OBS + 1) +#define FORWARD_SPEED_OBS (CHIRPS_USED_OBS + 1) +#define TURN_RATE_OBS (FORWARD_SPEED_OBS + 1) +#define TIMER_OBS (TURN_RATE_OBS + 1) +#define OBS_SIZE (TIMER_OBS + 1) #define NOOP 0 #define THRUST_FORWARD 1 @@ -63,7 +63,6 @@ #define MAX_ECHO_RANGE 128.0f #define ECHO_MIN_FORWARD -0.35f #define BUG_ECHO_MIN_DISPLACEMENT 1.0f -#define CURRICULUM_START_OBSTACLES 0 #define CURRICULUM_MAX_OBSTACLES 3 #define CURRICULUM_BUG_DISTANCE_STEP 2.0f #define CURRICULUM_MAX_BUG_DISTANCE 40.0f @@ -90,11 +89,14 @@ #define CHIRP_PERF_FLOOR 0.05f #define CHIRP_MIN_DURATION_SECONDS 0.04f #define CHIRP_DURATION_RANGE_SECONDS 0.18f -#define MAX_CHIRP_AGE_TICKS 30 #define MAX_CHIRPS_PER_EPISODE 15 #define ECHO_STATIC 0 #define ECHO_BUG 1 +#define ARENA_REFLECTORS 8 + +static const float ARENA_REFLECTOR_X[ARENA_REFLECTORS] = {0.0f, 1.0f, 0.0f, 1.0f, 0.5f, 0.5f, 0.0f, 1.0f}; +static const float ARENA_REFLECTOR_Y[ARENA_REFLECTORS] = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f, 0.5f, 0.5f}; typedef struct ChirpEvent { float x; @@ -112,7 +114,6 @@ typedef struct ChirpEvent { typedef struct EchoBucket { float energy[2][FREQ_BINS]; - float bug_energy; float closest_bug_echo_path; int tick; } EchoBucket; @@ -228,15 +229,14 @@ typedef struct Bat { float bug_maneuver_rate; float bug_maneuver_sign; - float* obstacle_x; - float* obstacle_y; - float* obstacle_w; - float* obstacle_h; + float obstacle_x[CURRICULUM_MAX_OBSTACLES]; + float obstacle_y[CURRICULUM_MAX_OBSTACLES]; + float obstacle_w[CURRICULUM_MAX_OBSTACLES]; + float obstacle_h[CURRICULUM_MAX_OBSTACLES]; float sound_speed; float reflector_strength; int chirp_cooldown_ticks; - int chirp_age_ticks; int last_chirp_tick; float last_chirp_start_freq; float last_chirp_end_freq; @@ -297,9 +297,7 @@ static inline float chirp_slice_ticks(ChirpEvent* chirp, int slice_idx) { static inline void chirp_source_for_slice(ChirpEvent* chirp, int slice_idx, float* source_x, float* source_y) { - int scheduled = chirp->slices_scheduled; - if (slice_idx >= 0 && slice_idx < scheduled && - slice_idx < MAX_CHIRP_SLICES) { + if (slice_idx < chirp->slices_scheduled) { *source_x = chirp->source_x[slice_idx]; *source_y = chirp->source_y[slice_idx]; return; @@ -308,14 +306,6 @@ static inline void chirp_source_for_slice(ChirpEvent* chirp, int slice_idx, *source_y = chirp->y; } -static inline void chirp_source_for_fraction(ChirpEvent* chirp, float slice, - float* source_x, float* source_y) { - int slices = chirp->slice_count; - int slice_idx = (int)floorf(slice * (float)slices); - if (slice_idx >= slices) slice_idx = slices - 1; - chirp_source_for_slice(chirp, slice_idx, source_x, source_y); -} - static inline float chirp_age_norm_denominator(Bat* env) { float travel_ticks = MAX_ECHO_RANGE / env->sound_speed / TICK_RATE; float chirp_ticks = chirp_duration_seconds(1.0f) / TICK_RATE; @@ -371,30 +361,9 @@ static inline void sample_in_quadrant(Bat* env, int quadrant, float* x, float* y *y = min_y + randf(env) * (max_y - min_y); } -static inline void sample_spawns(Bat* env) { - int agent_quadrant = (int)(randf(env) * 4.0f); - int bug_quadrant = agent_quadrant ^ 3; - float min_sep = fminf(ARENA_WIDTH, ARENA_HEIGHT) * 0.31f; - - for (int attempt = 0; attempt < 64; attempt++) { - sample_in_quadrant(env, agent_quadrant, &env->x, &env->y); - sample_in_quadrant(env, bug_quadrant, &env->bug_x, &env->bug_y); - if (dist(env->x, env->y, env->bug_x, env->bug_y) >= min_sep) { - return; - } - } - - float qx[4] = {0.25f, 0.75f, 0.25f, 0.75f}; - float qy[4] = {0.25f, 0.25f, 0.75f, 0.75f}; - env->x = ARENA_WIDTH * qx[agent_quadrant]; - env->y = ARENA_HEIGHT * qy[agent_quadrant]; - env->bug_x = ARENA_WIDTH * qx[bug_quadrant]; - env->bug_y = ARENA_HEIGHT * qy[bug_quadrant]; -} - static inline int curriculum_obstacles(Bat* env) { - int count = CURRICULUM_START_OBSTACLES + (env->curriculum_level > 0 - ? 1 + (env->curriculum_level - 1) / env->curriculum_obstacle_step : 0); + int count = env->curriculum_level > 0 + ? 1 + (env->curriculum_level - 1) / env->curriculum_obstacle_step : 0; return count > CURRICULUM_MAX_OBSTACLES ? CURRICULUM_MAX_OBSTACLES : count; } @@ -419,6 +388,9 @@ static inline float curriculum_bug_maneuver_strength(Bat* env) { return BUG_MANEUVER_STRENGTH * bat_clampf(ramp, 0.0f, 1.0f); } +// TODO: When we are ready to break determinism, simplify bug maneuvering to one +// always-active sine wave with curriculum-ramped amplitude, then remove the mode +// and sign branches below. static inline float curriculum_bug_maneuver_frequency(Bat* env) { if (env->curriculum_level < BUG_MANEUVER_START_LEVEL) { return BUG_MANEUVER_FREQUENCY; @@ -452,9 +424,7 @@ static inline float curriculum_distance_difficulty(Bat* env) { } static inline float curriculum_obstacle_difficulty(Bat* env) { - return bat_clampf((env->num_obstacles - CURRICULUM_START_OBSTACLES) - / (float)(CURRICULUM_MAX_OBSTACLES - CURRICULUM_START_OBSTACLES), - 0.0f, 1.0f); + return bat_clampf(env->num_obstacles / (float)CURRICULUM_MAX_OBSTACLES, 0.0f, 1.0f); } static inline float curriculum_motion_difficulty(Bat* env) { @@ -488,7 +458,21 @@ static inline void sample_spawns_at_distance(Bat* env, float target_distance) { return; } - sample_spawns(env); + int agent_quadrant = (int)(randf(env) * 4.0f); + int bug_quadrant = agent_quadrant ^ 3; + float min_sep = fminf(ARENA_WIDTH, ARENA_HEIGHT) * 0.31f; + for (int attempt = 0; attempt < 64; attempt++) { + sample_in_quadrant(env, agent_quadrant, &env->x, &env->y); + sample_in_quadrant(env, bug_quadrant, &env->bug_x, &env->bug_y); + if (dist(env->x, env->y, env->bug_x, env->bug_y) >= min_sep) { + return; + } + } + + env->x = ARENA_WIDTH * ((agent_quadrant & 1) ? 0.75f : 0.25f); + env->y = ARENA_HEIGHT * ((agent_quadrant & 2) ? 0.75f : 0.25f); + env->bug_x = ARENA_WIDTH * ((bug_quadrant & 1) ? 0.75f : 0.25f); + env->bug_y = ARENA_HEIGHT * ((bug_quadrant & 2) ? 0.75f : 0.25f); } static inline void reset_bug_motion(Bat* env) { @@ -574,11 +558,8 @@ static inline void generate_obstacles(Bat* env) { } void init(Bat* env) { - env->tick = 0; - env->obstacle_x = (float*)calloc(CURRICULUM_MAX_OBSTACLES, sizeof(float)); - env->obstacle_y = (float*)calloc(CURRICULUM_MAX_OBSTACLES, sizeof(float)); - env->obstacle_w = (float*)calloc(CURRICULUM_MAX_OBSTACLES, sizeof(float)); - env->obstacle_h = (float*)calloc(CURRICULUM_MAX_OBSTACLES, sizeof(float)); + env->curriculum_level = env->curriculum_initial_level; + env->curriculum_successes_at_level = 0; } void allocate(Bat* env) { @@ -590,10 +571,7 @@ void allocate(Bat* env) { } void c_close(Bat* env) { - free(env->obstacle_x); - free(env->obstacle_y); - free(env->obstacle_w); - free(env->obstacle_h); + (void)env; } void free_allocated(Bat* env) { @@ -623,11 +601,6 @@ static inline void add_log(Bat* env, float success, float collision, float timeo env->log.n += 1.0f; } -static inline int freq_bin_index(float freq_norm) { - int bin = (int)(freq_norm * FREQ_BINS); - return bin >= FREQ_BINS ? FREQ_BINS - 1 : bin; -} - static inline void clear_echo_bucket(EchoBucket* bucket) { memset(bucket, 0, sizeof(*bucket)); bucket->closest_bug_echo_path = -1.0f; @@ -645,8 +618,7 @@ static inline void add_echo_event(Bat* env, int ear, float receive_tick, if (receive_tick <= env->tick) return; if (intensity <= 0.000001f) return; int arrival_tick = (int)ceilf(receive_tick); - int delay = arrival_tick - env->tick; - if (delay <= 0 || delay >= ECHO_QUEUE_TICKS) return; + if (arrival_tick - env->tick >= ECHO_QUEUE_TICKS) return; int slot = arrival_tick % ECHO_QUEUE_TICKS; EchoBucket* bucket = &env->echo_queue[slot]; if (bucket->tick != arrival_tick) { @@ -654,16 +626,15 @@ static inline void add_echo_event(Bat* env, int ear, float receive_tick, bucket->tick = arrival_tick; } - int ear_idx = ear == 0 ? 0 : 1; - int bin = freq_bin_index(freq); - bucket->energy[ear_idx][bin] += intensity; + int bin = (int)(freq * FREQ_BINS); + if (bin >= FREQ_BINS) bin = FREQ_BINS - 1; + bucket->energy[ear][bin] += intensity; if (source == ECHO_BUG) { float sideband = intensity * env->bug_wing_sideband_gain; if (sideband > 0.000001f) { - if (bin > 0) bucket->energy[ear_idx][bin - 1] += sideband; - if (bin + 1 < FREQ_BINS) bucket->energy[ear_idx][bin + 1] += sideband; + if (bin > 0) bucket->energy[ear][bin - 1] += sideband; + if (bin + 1 < FREQ_BINS) bucket->energy[ear][bin + 1] += sideband; } - bucket->bug_energy += intensity; if (bucket->closest_bug_echo_path < 0.0f || path < bucket->closest_bug_echo_path) { bucket->closest_bug_echo_path = path; } @@ -681,27 +652,30 @@ static inline void ear_positions(Bat* env, float* left_x, float* left_y, *right_y = env->y + ly * ear_sep * 0.5f; } +static inline void schedule_ear_echo(Bat* env, int birth_tick, int ear, + float slice_ticks, float freq, float strength, float path, + float gain, int source) { + if (path > MAX_ECHO_RANGE) return; + float attenuation = strength / (1.0f + 0.02f * path * path); + float receive_tick = birth_tick + slice_ticks + path / env->sound_speed / TICK_RATE; + add_echo_event(env, ear, receive_tick, freq, attenuation * gain, path, source); +} + static inline float expected_bug_echo_tick(Bat* env, ChirpEvent* chirp) { float fx = cosf(env->heading); float fy = sinf(env->heading); - float source_x, source_y; - chirp_source_for_slice(chirp, 0, &source_x, &source_y); float ux, uy; - norm_vec(env->bug_x - source_x, env->bug_y - source_y, &ux, &uy); + norm_vec(env->bug_x - chirp->x, env->bug_y - chirp->y, &ux, &uy); float forward = ux * fx + uy * fy; if (forward < ECHO_MIN_FORWARD) return -1.0f; float left_ear_x, left_ear_y, right_ear_x, right_ear_y; ear_positions(env, &left_ear_x, &left_ear_y, &right_ear_x, &right_ear_y); - float source_path = dist(source_x, source_y, env->bug_x, env->bug_y); + float source_path = dist(chirp->x, chirp->y, env->bug_x, env->bug_y); float left_path = source_path + dist(env->bug_x, env->bug_y, left_ear_x, left_ear_y); float right_path = source_path + dist(env->bug_x, env->bug_y, right_ear_x, right_ear_y); - float best_path = -1.0f; - if (left_path <= MAX_ECHO_RANGE) best_path = left_path; - if (right_path <= MAX_ECHO_RANGE && (best_path < 0.0f || right_path < best_path)) { - best_path = right_path; - } - if (best_path < 0.0f) return -1.0f; + float best_path = fminf(left_path, right_path); + if (best_path > MAX_ECHO_RANGE) return -1.0f; return chirp->birth_tick + chirp_slice_ticks(chirp, 0) + best_path / env->sound_speed / TICK_RATE; @@ -712,8 +686,8 @@ static inline void schedule_echo(Bat* env, ChirpEvent* chirp, float strength, int source) { float fx = cosf(env->heading); float fy = sinf(env->heading); - float lateral_x = -sinf(env->heading); - float lateral_y = cosf(env->heading); + float lateral_x = -fy; + float lateral_y = fx; float left_ear_x, left_ear_y, right_ear_x, right_ear_y; ear_positions(env, &left_ear_x, &left_ear_y, &right_ear_x, &right_ear_y); @@ -744,16 +718,10 @@ static inline void schedule_echo(Bat* env, ChirpEvent* chirp, float doppler = bat_clampf(-distance_rate / (env->max_speed + BUG_SPEED), -1.0f, 1.0f); float shifted_freq = bat_clampf(freq + 0.20f * doppler, 0.0f, 1.0f); - if (left_path <= MAX_ECHO_RANGE) { - float attenuation = strength / (1.0f + 0.02f * left_path * left_path); - float receive_tick = chirp->birth_tick + slice_ticks + left_path / env->sound_speed / TICK_RATE; - add_echo_event(env, 0, receive_tick, shifted_freq, attenuation * left_gain, left_path, source); - } - if (right_path <= MAX_ECHO_RANGE) { - float attenuation = strength / (1.0f + 0.02f * right_path * right_path); - float receive_tick = chirp->birth_tick + slice_ticks + right_path / env->sound_speed / TICK_RATE; - add_echo_event(env, 1, receive_tick, shifted_freq, attenuation * right_gain, right_path, source); - } + schedule_ear_echo(env, chirp->birth_tick, 0, + slice_ticks, shifted_freq, strength, left_path, left_gain, source); + schedule_ear_echo(env, chirp->birth_tick, 1, + slice_ticks, shifted_freq, strength, right_path, right_gain, source); } static inline void schedule_segment_reflectors(Bat* env, ChirpEvent* chirp, @@ -773,23 +741,11 @@ static inline void schedule_corner_reflector_echoes(Bat* env, ChirpEvent* chirp, float slice_ticks, float freq) { float w = (float)ARENA_WIDTH; float h = (float)ARENA_HEIGHT; - float strength = env->reflector_strength; - schedule_echo(env, chirp, slice_ticks, freq, 0.0f, 0.0f, - 0.0f, 0.0f, strength, ECHO_STATIC); - schedule_echo(env, chirp, slice_ticks, freq, w, 0.0f, - 0.0f, 0.0f, strength, ECHO_STATIC); - schedule_echo(env, chirp, slice_ticks, freq, 0.0f, h, - 0.0f, 0.0f, strength, ECHO_STATIC); - schedule_echo(env, chirp, slice_ticks, freq, w, h, - 0.0f, 0.0f, strength, ECHO_STATIC); - schedule_echo(env, chirp, slice_ticks, freq, 0.5f * w, 0.0f, - 0.0f, 0.0f, strength, ECHO_STATIC); - schedule_echo(env, chirp, slice_ticks, freq, 0.5f * w, h, - 0.0f, 0.0f, strength, ECHO_STATIC); - schedule_echo(env, chirp, slice_ticks, freq, 0.0f, 0.5f * h, - 0.0f, 0.0f, strength, ECHO_STATIC); - schedule_echo(env, chirp, slice_ticks, freq, w, 0.5f * h, - 0.0f, 0.0f, strength, ECHO_STATIC); + for (int i = 0; i < ARENA_REFLECTORS; i++) { + schedule_echo(env, chirp, slice_ticks, freq, + ARENA_REFLECTOR_X[i] * w, ARENA_REFLECTOR_Y[i] * h, + 0.0f, 0.0f, env->reflector_strength, ECHO_STATIC); + } } static inline void schedule_obstacle_echoes(Bat* env, ChirpEvent* chirp, @@ -806,20 +762,21 @@ static inline void schedule_obstacle_echoes(Bat* env, ChirpEvent* chirp, static inline void schedule_chirp_slice_echoes(Bat* env, ChirpEvent* chirp, int slice_idx) { - int slices = chirp->slice_count; - if (slice_idx >= slices || slice_idx >= MAX_CHIRP_SLICES) { + if (slice_idx >= chirp->slice_count) { return; } - float t = (slice_idx + 0.5f) / (float)slices; + float t = (slice_idx + 0.5f) / (float)chirp->slice_count; float slice_ticks = chirp_slice_ticks(chirp, slice_idx); float freq = chirp->start_freq + t * (chirp->end_freq - chirp->start_freq); - chirp->source_x[slice_idx] = env->x; - chirp->source_y[slice_idx] = env->y; - ChirpEvent slice_chirp = *chirp; - slice_chirp.x = chirp->source_x[slice_idx]; - slice_chirp.y = chirp->source_y[slice_idx]; + ChirpEvent slice_chirp = { + .x = env->x, + .y = env->y, + .birth_tick = chirp->birth_tick, + }; + chirp->source_x[slice_idx] = slice_chirp.x; + chirp->source_y[slice_idx] = slice_chirp.y; schedule_echo(env, &slice_chirp, slice_ticks, freq, env->bug_x, env->bug_y, env->bug_vx, env->bug_vy, 8.0f, ECHO_BUG); @@ -841,10 +798,9 @@ static inline void schedule_due_chirp_slices(Bat* env) { for (int i = 0; i < CHIRP_HISTORY; i++) { ChirpEvent* chirp = &env->chirps[i]; if (!chirp->active) continue; - int slices = chirp->slice_count; float age_ticks = (float)(env->tick - chirp->birth_tick); - while (chirp->slices_scheduled < slices) { + while (chirp->slices_scheduled < chirp->slice_count) { int slice_idx = chirp->slices_scheduled; float slice_ticks = chirp_slice_ticks(chirp, slice_idx); if (slice_ticks >= age_ticks + 1.0f) break; @@ -854,38 +810,26 @@ static inline void schedule_due_chirp_slices(Bat* env) { } } -static inline void process_echo_events(Bat* env) { +void compute_observations(Bat* env) { + memset(env->observations, 0, OBS_SIZE * sizeof(float)); + env->tick_bug_echo_path = -1.0f; + int slot = env->tick % ECHO_QUEUE_TICKS; EchoBucket* bucket = &env->echo_queue[slot]; - if (bucket->tick != env->tick) return; - - for (int i = 0; i < FREQ_BINS; i++) { - int left_idx = LEFT_FREQ_OFFSET + i; - int right_idx = RIGHT_FREQ_OFFSET + i; - env->observations[left_idx] = bat_clampf( - env->observations[left_idx] + bucket->energy[0][i], 0.0f, 1.0f); - env->observations[right_idx] = bat_clampf( - env->observations[right_idx] + bucket->energy[1][i], 0.0f, 1.0f); - } - if (bucket->bug_energy > 0.0f) { - if (env->tick_bug_echo_path < 0.0f - || bucket->closest_bug_echo_path < env->tick_bug_echo_path) { + if (bucket->tick == env->tick) { + for (int i = 0; i < FREQ_BINS; i++) { + env->observations[LEFT_FREQ_OFFSET + i] = bat_clampf(bucket->energy[0][i], 0.0f, 1.0f); + env->observations[RIGHT_FREQ_OFFSET + i] = bat_clampf(bucket->energy[1][i], 0.0f, 1.0f); + } + if (bucket->closest_bug_echo_path >= 0.0f) { env->tick_bug_echo_path = bucket->closest_bug_echo_path; } + clear_echo_bucket(bucket); } - clear_echo_bucket(bucket); -} - -void compute_observations(Bat* env) { - memset(env->observations, 0, OBS_SIZE * sizeof(float)); - env->tick_bug_echo_path = -1.0f; - - process_echo_events(env); float chirp_age_denom = chirp_age_norm_denominator(env); int chirp_age = env->tick - env->last_chirp_tick; if (env->last_chirp_tick < 0) chirp_age = (int)ceilf(chirp_age_denom); - env->chirp_age_ticks = chirp_age; int cooldown = env->chirp_cooldown_ticks - (env->tick - env->last_chirp_tick); env->observations[CHIRP_AGE_OBS] = bat_clampf(chirp_age / chirp_age_denom, 0.0f, 1.0f); env->observations[CHIRP_COOLDOWN_OBS] = bat_clampf(cooldown / (float)env->chirp_cooldown_ticks, 0.0f, 1.0f); @@ -910,17 +854,15 @@ static inline void reset_episode(Bat* env) { } env->num_obstacles = curriculum_obstacles(env); env->bug_inbound = env->curriculum_level >= CURRICULUM_INBOUND_START_LEVEL; - float bug_distance = env->bug_inbound + sample_spawns_at_distance(env, env->bug_inbound ? curriculum_inbound_bug_distance(env) - : curriculum_bug_distance(env); - sample_spawns_at_distance(env, bug_distance); + : curriculum_bug_distance(env)); generate_obstacles(env); reset_bug_motion(env); // TODO: Revisit these first-observation defaults when we are ready to break determinism. env->last_chirp_start_freq = 0.0f; env->last_chirp_end_freq = 1.0f; env->last_chirp_duration = 0.33333334f; - env->chirp_age_ticks = 0; env->last_chirp_tick = -env->chirp_cooldown_ticks; memset(env->chirps, 0, sizeof(env->chirps)); env->chirp_head = 0; @@ -1027,14 +969,13 @@ static inline void update_bug(Bat* env, float dt) { bounced = true; } if (bounced) { - env->bug_base_heading = atan2f(env->bug_vy, env->bug_vx); if (env->bug_inbound) { float tx, ty; norm_vec(env->x - env->bug_x, env->y - env->bug_y, &tx, &ty); env->bug_vx = tx * speed; env->bug_vy = ty * speed; - env->bug_base_heading = atan2f(env->bug_vy, env->bug_vx); } + env->bug_base_heading = atan2f(env->bug_vy, env->bug_vx); } } @@ -1077,7 +1018,6 @@ static inline bool try_emit_chirp(Bat* env) { env->last_chirp_start_freq = norm_bin(start_idx, CHIRP_FREQ_BINS); env->last_chirp_end_freq = norm_bin(end_idx, CHIRP_FREQ_BINS); env->last_chirp_duration = norm_bin(duration_idx, CHIRP_DURATION_BINS); - env->chirp_age_ticks = 0; env->last_chirp_tick = env->tick; env->chirps_emitted += 1; ChirpEvent* chirp = &env->chirps[env->chirp_head]; @@ -1089,10 +1029,6 @@ static inline bool try_emit_chirp(Bat* env) { chirp->birth_tick = env->tick; chirp->slice_count = (int)ceilf(chirp->duration / TICK_RATE); chirp->slices_scheduled = 0; - for (int i = 0; i < MAX_CHIRP_SLICES; i++) { - chirp->source_x[i] = chirp->x; - chirp->source_y[i] = chirp->y; - } chirp->active = 1; env->chirp_head = (env->chirp_head + 1) % CHIRP_HISTORY; env->audio_chirp_serial += 1; @@ -1116,9 +1052,6 @@ static inline ChirpStatus update_chirp(Bat* env) { return try_emit_chirp(env) ? CHIRP_STATUS_EMITTED : CHIRP_STATUS_COOLDOWN; } - if (env->chirp_age_ticks < MAX_CHIRP_AGE_TICKS) { - env->chirp_age_ticks += 1; - } return CHIRP_STATUS_NONE; } @@ -1181,18 +1114,15 @@ void c_step(Bat* env) { compute_observations(env); if (env->tick_bug_echo_path > 0.0f) { - if (env->last_bug_echo_path > 0.0f) { - float echo_displacement = dist(env->last_bug_echo_x, env->last_bug_echo_y, - env->x, env->y); - if (echo_displacement >= BUG_ECHO_MIN_DISPLACEMENT) { - float echo_progress = (env->last_bug_echo_path - env->tick_bug_echo_path) - / MAX_ECHO_RANGE; - if (echo_progress > 0.0f) { - env->rewards[0] += env->bug_echo_reward_scale * echo_progress; - } else if (echo_progress < 0.0f) { - env->rewards[0] += env->bug_echo_reward_scale - * env->bug_echo_farther_penalty_scale * echo_progress; - } + if (env->last_bug_echo_path > 0.0f && dist(env->last_bug_echo_x, env->last_bug_echo_y, + env->x, env->y) >= BUG_ECHO_MIN_DISPLACEMENT) { + float echo_progress = (env->last_bug_echo_path - env->tick_bug_echo_path) + / MAX_ECHO_RANGE; + if (echo_progress > 0.0f) { + env->rewards[0] += env->bug_echo_reward_scale * echo_progress; + } else if (echo_progress < 0.0f) { + env->rewards[0] += env->bug_echo_reward_scale + * env->bug_echo_farther_penalty_scale * echo_progress; } } env->last_bug_echo_path = env->tick_bug_echo_path; @@ -1219,8 +1149,7 @@ static inline void draw_chirp_rings(Bat* env, float sx, float sy) { if (!chirp->active) continue; float age_seconds = (env->tick - chirp->birth_tick) * TICK_RATE; - if (age_seconds < 0.0f || - age_seconds > MAX_ECHO_RANGE / env->sound_speed + chirp->duration) { + if (age_seconds > MAX_ECHO_RANGE / env->sound_speed + chirp->duration) { chirp->active = 0; continue; } @@ -1236,7 +1165,9 @@ static inline void draw_chirp_rings(Bat* env, float sx, float sy) { float alpha = 0.18f + 0.42f * bat_clampf( 1.0f - radius / MAX_ECHO_RANGE, 0.0f, 1.0f); float source_x, source_y; - chirp_source_for_fraction(chirp, slice, &source_x, &source_y); + int slice_idx = (int)floorf(slice * (float)chirp->slice_count); + if (slice_idx >= chirp->slice_count) slice_idx = chirp->slice_count - 1; + chirp_source_for_slice(chirp, slice_idx, &source_x, &source_y); DrawCircleLines( (int)(source_x * sx), (int)(source_y * sy), @@ -1328,7 +1259,6 @@ static inline void draw_obs_bar(int x, int y, int width, const int bar_height = 12; int bar_x = x + label_width; int bar_width = width - label_width; - if (bar_width <= 0) return; DrawText(bar->label, x, y - 1, 10, (Color){226, 230, 238, 255}); DrawRectangle(bar_x, y, bar_width, bar_height, (Color){48, 52, 62, 255}); @@ -1363,7 +1293,7 @@ static inline void draw_arrow_line(int x0, int y0, int x1, int y1, Color color) (int)(y1 - sinf(angle + 0.45f) * head), color); } -static inline int draw_observation_bars(Bat* env, int x, int y, int width, int height) { +static inline void draw_observation_bars(Bat* env, int x, int y, int width) { static const ObsBar chirp_bars[] = { {"age", CHIRP_AGE_OBS, {112, 196, 255, 255}, false}, {"cooldown", CHIRP_COOLDOWN_OBS, {255, 206, 96, 255}, false}, @@ -1376,46 +1306,30 @@ static inline int draw_observation_bars(Bat* env, int x, int y, int width, int h {"speed", FORWARD_SPEED_OBS, {120, 226, 142, 255}, false}, {"turn", TURN_RATE_OBS, {255, 112, 112, 255}, true}, }; - static const ObsBar episode_bars[] = { - {"timer", TIMER_OBS, {88, 164, 255, 255}, false}, - }; + static const ObsBar timer_bar = {"timer", TIMER_OBS, {88, 164, 255, 255}, false}; const int row_step = 18; const Color header = (Color){246, 248, 255, 255}; - (void)height; + int chirp_count = (int)(sizeof(chirp_bars) / sizeof(chirp_bars[0])); + int action_count = (int)(sizeof(action_bars) / sizeof(action_bars[0])); DrawText("Chirp", x, y, 12, header); y += 18; - for (int i = 0; i < (int)(sizeof(chirp_bars) / sizeof(chirp_bars[0])); i++) { + for (int i = 0; i < chirp_count; i++) { draw_obs_bar(x, y + i * row_step, width, &chirp_bars[i], env->observations); } - y += (int)(sizeof(chirp_bars) / sizeof(chirp_bars[0])) * row_step + 14; + y += chirp_count * row_step + 14; DrawText("Actions", x, y, 12, header); y += 18; - for (int i = 0; i < (int)(sizeof(action_bars) / sizeof(action_bars[0])); i++) { + for (int i = 0; i < action_count; i++) { draw_obs_bar(x, y + i * row_step, width, &action_bars[i], env->observations); } - y += (int)(sizeof(action_bars) / sizeof(action_bars[0])) * row_step + 14; + y += action_count * row_step + 14; DrawText("Episode", x, y, 12, header); y += 18; - draw_obs_bar(x, y, width, &episode_bars[0], env->observations); - return y + row_step + 16; -} - -static inline void draw_reflections_hint(int x, int y, int width, - int target_x, int left_target_y, int right_target_y) { - Color color = (Color){255, 96, 96, 255}; - int text_x = x + 40; - int text_y = (left_target_y + right_target_y) / 2 - 6; - int source_x = text_x - 8; - int source_y = text_y + 8; - (void)y; - (void)width; - DrawText("Reflections L/R", text_x, text_y, 12, color); - draw_arrow_line(source_x, source_y, target_x, left_target_y, color); - draw_arrow_line(source_x, source_y + 10, target_x, right_target_y, color); + draw_obs_bar(x, y, width, &timer_bar, env->observations); } static inline void draw_freq_history_panel(Bat* env, int x, int y, int width, int height) { @@ -1433,12 +1347,18 @@ static inline void draw_freq_history_panel(Bat* env, int x, int y, int width, in band_width, band_height); draw_freq_history_band(env->client, 1, x + FREQ_PANEL_MARGIN, right_y, band_width, band_height); - int hint_y = draw_observation_bars(env, obs_x, y + FREQ_PANEL_MARGIN, - obs_width, height - 2 * FREQ_PANEL_MARGIN); - draw_reflections_hint(obs_x, hint_y, obs_width, - x + FREQ_PANEL_MARGIN + band_width - 4, - left_y + band_height / 2, - right_y + band_height / 2); + draw_observation_bars(env, obs_x, y + FREQ_PANEL_MARGIN, obs_width); + Color reflection_color = (Color){255, 96, 96, 255}; + int reflection_text_x = obs_x + 40; + int reflection_text_y = (left_y + right_y + band_height) / 2 - 6; + int reflection_source_x = reflection_text_x - 8; + int reflection_source_y = reflection_text_y + 8; + int reflection_target_x = x + FREQ_PANEL_MARGIN + band_width - 4; + DrawText("Reflections L/R", reflection_text_x, reflection_text_y, 12, reflection_color); + draw_arrow_line(reflection_source_x, reflection_source_y, + reflection_target_x, left_y + band_height / 2, reflection_color); + draw_arrow_line(reflection_source_x, reflection_source_y + 10, + reflection_target_x, right_y + band_height / 2, reflection_color); DrawRectangleLines(x, y, width, height, (Color){124, 132, 148, 255}); DrawRectangleLines(x + FREQ_PANEL_MARGIN, left_y, band_width, band_height, @@ -1478,7 +1398,6 @@ static inline void draw_segment_echoes(Bat* env, ChirpEvent* chirp, float sx, float sy) { float len = dist(x1, y1, x2, y2); int count = (int)(len / REFLECTOR_SPACING) + 1; - if (count < 1) count = 1; for (int i = 0; i <= count; i++) { float t = i / (float)count; float x = x1 + (x2 - x1) * t; @@ -1503,15 +1422,10 @@ static inline void draw_corner_reflector_echoes(Bat* env, ChirpEvent* chirp, float sx, float sy) { float w = (float)ARENA_WIDTH; float h = (float)ARENA_HEIGHT; - float strength = env->reflector_strength; - draw_echo_flash(env, chirp, 0.0f, 0.0f, 0.0f, 0.0f, strength, sx, sy); - draw_echo_flash(env, chirp, w, 0.0f, 0.0f, 0.0f, strength, sx, sy); - draw_echo_flash(env, chirp, 0.0f, h, 0.0f, 0.0f, strength, sx, sy); - draw_echo_flash(env, chirp, w, h, 0.0f, 0.0f, strength, sx, sy); - draw_echo_flash(env, chirp, 0.5f * w, 0.0f, 0.0f, 0.0f, strength, sx, sy); - draw_echo_flash(env, chirp, 0.5f * w, h, 0.0f, 0.0f, strength, sx, sy); - draw_echo_flash(env, chirp, 0.0f, 0.5f * h, 0.0f, 0.0f, strength, sx, sy); - draw_echo_flash(env, chirp, w, 0.5f * h, 0.0f, 0.0f, strength, sx, sy); + for (int i = 0; i < ARENA_REFLECTORS; i++) { + draw_echo_flash(env, chirp, ARENA_REFLECTOR_X[i] * w, + ARENA_REFLECTOR_Y[i] * h, 0.0f, 0.0f, env->reflector_strength, sx, sy); + } } static inline void draw_corner_reflector_markers(int width, int height) { @@ -1520,24 +1434,12 @@ static inline void draw_corner_reflector_markers(int width, int height) { const Color outline = (Color){202, 202, 208, 255}; int max_x = width - size; int max_y = height - size; - int mid_x = width / 2 - size / 2; - int mid_y = height / 2 - size / 2; - DrawRectangle(0, 0, size, size, fill); - DrawRectangleLines(0, 0, size, size, outline); - DrawRectangle(max_x, 0, size, size, fill); - DrawRectangleLines(max_x, 0, size, size, outline); - DrawRectangle(0, max_y, size, size, fill); - DrawRectangleLines(0, max_y, size, size, outline); - DrawRectangle(max_x, max_y, size, size, fill); - DrawRectangleLines(max_x, max_y, size, size, outline); - DrawRectangle(mid_x, 0, size, size, fill); - DrawRectangleLines(mid_x, 0, size, size, outline); - DrawRectangle(mid_x, max_y, size, size, fill); - DrawRectangleLines(mid_x, max_y, size, size, outline); - DrawRectangle(0, mid_y, size, size, fill); - DrawRectangleLines(0, mid_y, size, size, outline); - DrawRectangle(max_x, mid_y, size, size, fill); - DrawRectangleLines(max_x, mid_y, size, size, outline); + for (int i = 0; i < ARENA_REFLECTORS; i++) { + int x = (int)(ARENA_REFLECTOR_X[i] * max_x); + int y = (int)(ARENA_REFLECTOR_Y[i] * max_y); + DrawRectangle(x, y, size, size, fill); + DrawRectangleLines(x, y, size, size, outline); + } } static inline void draw_echo_reflections(Bat* env, float sx, float sy) { diff --git a/ocean/bat/tests/test_bat_core.c b/ocean/bat/tests/test_bat_core.c index 29aef6cd05..40c04b5ca3 100644 --- a/ocean/bat/tests/test_bat_core.c +++ b/ocean/bat/tests/test_bat_core.c @@ -560,7 +560,6 @@ static int test_echo_scheduling_uses_tick_bucket_accumulator(void) { int slot = 10 % ECHO_QUEUE_TICKS; ASSERT_TRUE(env.echo_queue[slot].tick == 10); ASSERT_FLOAT_NEAR(env.echo_queue[slot].energy[0][FREQ_BINS - 1], 1.1f, 0.0001f); - ASSERT_FLOAT_NEAR(env.echo_queue[slot].bug_energy, 1.1f, 0.0001f); ASSERT_FLOAT_NEAR(env.echo_queue[slot].closest_bug_echo_path, 12.0f, 0.0001f); free_allocated(&env); @@ -575,13 +574,13 @@ static int test_bug_wing_sidebands_spill_adjacent_bins_without_reward_inflation( env.bug_wing_sideband_gain = 0.25f; clear_echo_queue(&env); - int bin = freq_bin_index(0.5f); + int bin = (int)(0.5f * FREQ_BINS); add_echo_event(&env, 0, 1.0f, 0.5f, 0.4f, 12.0f, ECHO_BUG); EchoBucket* bug_bucket = &env.echo_queue[1 % ECHO_QUEUE_TICKS]; ASSERT_FLOAT_NEAR(bug_bucket->energy[0][bin], 0.4f, 0.0001f); ASSERT_FLOAT_NEAR(bug_bucket->energy[0][bin - 1], 0.1f, 0.0001f); ASSERT_FLOAT_NEAR(bug_bucket->energy[0][bin + 1], 0.1f, 0.0001f); - ASSERT_FLOAT_NEAR(bug_bucket->bug_energy, 0.4f, 0.0001f); + ASSERT_FLOAT_NEAR(bug_bucket->closest_bug_echo_path, 12.0f, 0.0001f); clear_echo_queue(&env); add_echo_event(&env, 0, 1.0f, 0.5f, 0.4f, 12.0f, ECHO_STATIC); @@ -589,7 +588,7 @@ static int test_bug_wing_sidebands_spill_adjacent_bins_without_reward_inflation( ASSERT_FLOAT_NEAR(static_bucket->energy[0][bin], 0.4f, 0.0001f); ASSERT_FLOAT_NEAR(static_bucket->energy[0][bin - 1], 0.0f, 0.0001f); ASSERT_FLOAT_NEAR(static_bucket->energy[0][bin + 1], 0.0f, 0.0001f); - ASSERT_FLOAT_NEAR(static_bucket->bug_energy, 0.0f, 0.0001f); + ASSERT_FLOAT_NEAR(static_bucket->closest_bug_echo_path, -1.0f, 0.0001f); free_allocated(&env); return 0; @@ -1441,12 +1440,14 @@ static int test_default_echo_range_reaches_curriculum_max_bug_distance(void) { chirp.slices_scheduled += 1; } - float bug_energy = 0.0f; + int bug_echo_buckets = 0; for (int i = 0; i < ECHO_QUEUE_TICKS; i++) { - bug_energy += env.echo_queue[i].bug_energy; + if (env.echo_queue[i].closest_bug_echo_path >= 0.0f) { + bug_echo_buckets += 1; + } } - ASSERT_TRUE(bug_energy > 0.0f); + ASSERT_TRUE(bug_echo_buckets > 0); free_allocated(&env); return 0; @@ -1545,8 +1546,8 @@ static int test_frequency_bin_energy_sums_and_caps(void) { Bat env = make_test_env(); memset(env.observations, 0, OBS_SIZE * sizeof(float)); - int high_bin = freq_bin_index(1.0f); - int low_bin = freq_bin_index(0.0f); + int high_bin = FREQ_BINS - 1; + int low_bin = 0; env.observations[LEFT_FREQ_OFFSET + high_bin] = bat_clampf( env.observations[LEFT_FREQ_OFFSET + high_bin] + 0.75f, 0.0f, 1.0f); env.observations[LEFT_FREQ_OFFSET + high_bin] = bat_clampf(