diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 813d1970eb..2ed8997004 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -76,6 +76,10 @@ compute_eval_metrics = False ; --- Goal / Target --- ; Target representation - options: "static", "dynamic" target_type = "static" +; True: place goals along the agent's route (existing behavior, on-lane and +; in front of the agent). False: scatter each goal at a uniformly random +; drivable point anywhere on the map. +goal_on_lane = True ; Meters around goal to be considered "reached" goal_radius = 2.0 ; Maximum speed at final waypoint to count goal reward diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index 13f37465b7..ec7f34d2c1 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -1953,6 +1953,7 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) { env->num_target_waypoints = MAX_TARGET_WAYPOINTS; } env->target_type = (int) unpack(kwargs, "target_type"); + env->goal_on_lane = (int) unpack(kwargs, "goal_on_lane"); env->obs_slots_boundary_n = (int) unpack(kwargs, "obs_slots_boundary_n"); env->obs_slots_lane_n = (int) unpack(kwargs, "obs_slots_lane_n"); env->obs_slots_partners_n = (int) unpack(kwargs, "obs_slots_partners_n"); diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 1dc87e6fd3..dca0b738f6 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -112,7 +112,7 @@ // => For each entity type in gridmap, diagonal poly-lines -> sqrt(2), include diagonal ends -> 2 #define MAX_ENTITIES_PER_CELL 30 -// TARGET_TYPE modes (controls what target info is in observations) +// TARGET_TYPE modes (controls what target info is in observations). #define TARGET_STATIC 0 #define TARGET_DYNAMIC 1 @@ -396,6 +396,7 @@ struct Drive { int num_target_waypoints; int logs_capacity; int target_type; + int goal_on_lane; char *ini_file; int collision_behavior; // 0 = none, 1=stop, 2 = remove int offroad_behavior; // 0 = none, 1=stop, 2 = remove @@ -1927,8 +1928,142 @@ static int compute_new_route(Drive *env, int agent_idx, int current_lane_idx) { return 1; // Success } +// Pick a random drivable point on the map whose Euclidean distance from +// (ref_x, ref_y) lies in [min_dist, max_dist]. Returns 1 on success. +static int pick_random_drivable_position( + Drive *env, + float ref_x, + float ref_y, + float min_dist, + float max_dist, + float *out_x, + float *out_y, + float *out_z) { + GridMap *gm = env->grid_map; + float cell = GRID_CELL_SIZE; + float half_diag = 0.5f * cell * (float) M_SQRT2; + float min_d2 = min_dist * min_dist; + float max_d2 = max_dist * max_dist; + float cell_filter = max_dist + half_diag; + float cell_filter2 = cell_filter * cell_filter; + + int ref_cx = (int) ((ref_x - gm->top_left_x) / cell); + int ref_cy = (int) ((ref_y - gm->bottom_right_y) / cell); + int half_extent = (int) ceilf(cell_filter / cell); + + int x_lo = ref_cx - half_extent; + int y_lo = ref_cy - half_extent; + int x_hi = ref_cx + half_extent; + int y_hi = ref_cy + half_extent; + if (x_lo < 0) { + x_lo = 0; + } + if (y_lo < 0) { + y_lo = 0; + } + if (x_hi >= gm->grid_cols) { + x_hi = gm->grid_cols - 1; + } + if (y_hi >= gm->grid_rows) { + y_hi = gm->grid_rows - 1; + } + + int n_cand = 0; + float pick_x = 0.0f, pick_y = 0.0f, pick_z = 0.0f; + for (int gy = y_lo; gy <= y_hi; gy++) { + float cy = gm->bottom_right_y + (gy + 0.5f) * cell; + for (int gx = x_lo; gx <= x_hi; gx++) { + float cx = gm->top_left_x + (gx + 0.5f) * cell; + float dcx = cx - ref_x; + float dcy = cy - ref_y; + if (dcx * dcx + dcy * dcy > cell_filter2) { + continue; + } + int gi = gy * gm->grid_cols + gx; + for (int i = 0; i < gm->cell_entities_count[gi]; i++) { + GridMapEntity e = gm->cells[gi][i]; + RoadMapElement *lane = &env->road_elements[e.entity_idx]; + if (!is_drivable_road_lane(lane->type)) { + continue; + } + // The grid stores polyline SEGMENTS (start vertex = geometry_idx). + // Sample a uniform point along the segment so candidate positions + // are continuous along the road rather than quantized to vertices. + int k = e.geometry_idx; + if (k + 1 >= lane->segment_length) { + continue; + } + float t = (float) rand() / (float) RAND_MAX; + float ex = lane->x[k] + t * (lane->x[k + 1] - lane->x[k]); + float ey = lane->y[k] + t * (lane->y[k + 1] - lane->y[k]); + float ez = lane->z[k] + t * (lane->z[k + 1] - lane->z[k]); + float edx = ex - ref_x; + float edy = ey - ref_y; + float ed2 = edx * edx + edy * edy; + if (ed2 < min_d2 || ed2 > max_d2) { + continue; + } + n_cand++; + if (rand() % n_cand == 0) { + pick_x = ex; + pick_y = ey; + pick_z = ez; + } + } + } + } + + if (n_cand == 0) { + return 0; + } + *out_x = pick_x; + *out_y = pick_y; + *out_z = pick_z; + return 1; +} + static void compute_goals(Drive *env, int agent_idx) { Agent *agent = &env->agents[agent_idx]; + + // goal_on_lane=False: place each goal at a random drivable point whose + // Euclidean distance from the previous anchor (agent for goal 0, previous + // goal for subsequent ones) lies in [min_waypoint_spacing, + // max_waypoint_spacing]. + if (!env->goal_on_lane) { + int num_target_waypoints = env->num_target_waypoints; + if (num_target_waypoints <= 0 || num_target_waypoints > MAX_TARGET_WAYPOINTS) { + num_target_waypoints = MAX_TARGET_WAYPOINTS; + } + float ref_x = agent->sim_x; + float ref_y = agent->sim_y; + for (int i = 0; i < num_target_waypoints; i++) { + float gx, gy, gz; + if (!pick_random_drivable_position( + env, + ref_x, + ref_y, + env->min_waypoint_spacing, + env->max_waypoint_spacing, + &gx, + &gy, + &gz)) { + printf("[GIGAFLOW WARNING] -> pick_random_drivable_position failed for agent %d\n", agent_idx); + agent->removed = 1; + return; + } + agent->goal_positions_x[i] = gx; + agent->goal_positions_y[i] = gy; + agent->goal_positions_z[i] = gz; + ref_x = gx; + ref_y = gy; + } + agent->current_goal_idx = 0; + agent->goal_position_x = agent->goal_positions_x[0]; + agent->goal_position_y = agent->goal_positions_y[0]; + agent->goal_position_z = agent->goal_positions_z[0]; + return; + } + struct Path *path = agent->path; // Validate path exists diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py index 577d42252e..c26d2bd8eb 100644 --- a/pufferlib/ocean/drive/drive.py +++ b/pufferlib/ocean/drive/drive.py @@ -78,6 +78,7 @@ def __init__( control_mode="control_vehicles", map_dir=None, target_type="static", + goal_on_lane=True, reward_conditioning=False, reward_randomization=False, compute_eval_metrics=True, @@ -144,6 +145,7 @@ def __init__( self.target_type = binding.TARGET_DYNAMIC else: raise ValueError(f"target_type must be 'static' or 'dynamic'. Got: {target_type}") + self.goal_on_lane = int(bool(goal_on_lane)) self.collision_behavior = collision_behavior self.offroad_behavior = offroad_behavior self.traffic_light_behavior = traffic_light_behavior @@ -393,6 +395,7 @@ def _env_init_kwargs(self, map_file, max_agents): "max_waypoint_spacing": self.max_waypoint_spacing, "num_target_waypoints": self.num_target_waypoints, "target_type": self.target_type, + "goal_on_lane": self.goal_on_lane, "obs_slots_lane_n": self.obs_slots_lane_n, "obs_slots_boundary_n": self.obs_slots_boundary_n, "obs_slots_partners_n": self.obs_slots_partners_n, diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 809225b7dc..14a270defd 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -1265,6 +1265,7 @@ def __init__(self, args, load_id=None, resume="allow"): wandb.init( id=load_id or wandb.util.generate_id(), + name=args.get("run_name") or None, project=args["wandb_project"], group=args["wandb_group"], allow_val_change=True, @@ -2117,6 +2118,12 @@ def load_config(env_name, config_dir=None): parser.add_argument("--wandb", action="store_true", help="Use wandb for logging") parser.add_argument("--wandb-project", type=str, default="pufferlib") parser.add_argument("--wandb-group", type=str, default="debug") + parser.add_argument( + "--run-name", + type=str, + default=None, + help="Wandb run display name. Unset → wandb auto-generates one.", + ) parser.add_argument("--neptune", action="store_true", help="Use neptune for logging") parser.add_argument("--neptune-name", type=str, default="pufferai") parser.add_argument("--neptune-project", type=str, default="ablations") diff --git a/scripts/cluster_configs/single_agent_speed_run.yaml b/scripts/cluster_configs/single_agent_speed_run.yaml index 300a8281e2..c4b0a8372b 100644 --- a/scripts/cluster_configs/single_agent_speed_run.yaml +++ b/scripts/cluster_configs/single_agent_speed_run.yaml @@ -24,9 +24,14 @@ env.max_agents_per_env: 1 env.num_agents: 1024 env.use_map_cache: 1 -# Single goal waypoint ahead of the agent (route mode uses the default -# min/max waypoint spacing of 20m/60m to place it). +# Single goal placed at a random drivable point on the map whose Euclidean +# distance from the agent lies in [min_waypoint_spacing, max_waypoint_spacing]. +# 6m floor avoids spawning the goal on top of the agent; 500m saturates at the +# Town10HD map diameter (~260m) so goals can land anywhere on the network. env.num_target_waypoints: 1 +env.goal_on_lane: False +env.min_waypoint_spacing: 6.0 +env.max_waypoint_spacing: 500.0 # Traffic lights fully off: not observed, not scored, no reward penalty. env.traffic_light_behavior: 0 @@ -63,5 +68,5 @@ eval.behaviors_unprotected_right.enabled: 0 # W&B. Group has no space: submit_cluster.py joins the inner command into a # bash -c string without quoting arg values, so a space would split the arg. wandb: True -wandb_project: puffer_drive +wandb_project: single_agent_nightly_test wandb_group: Nightly_Test diff --git a/scripts/launch_single_agent.sh b/scripts/launch_single_agent.sh index 8963721b69..e0e1135ab2 100755 --- a/scripts/launch_single_agent.sh +++ b/scripts/launch_single_agent.sh @@ -26,13 +26,20 @@ PARTITION="${PARTITION:-h200_tandon}" TIME="${TIME:-720}" SEEDS="${SEEDS:-0:1:2}" PREFIX="${PREFIX:-$(date +%Y-%m-%d)_single_agent}" +DATE_STAMP="$(date +%Y-%m-%d)" source "/scratch/$USER/venvs/pufferdrive/bin/activate" -python scripts/submit_cluster.py \ - --save_dir "/scratch/$USER/runs" \ - --prefix "$PREFIX" \ - --compute_config "$COMPUTE_CONFIG" \ - --program_config "$PROGRAM_CONFIG" \ - --container --heartbeat \ - --account "$ACCOUNT" --partition "$PARTITION" --time "$TIME" \ - --args "train.seed=$SEEDS" + +# One submission per seed so we can pass a per-seed run_name (wandb display +# name like 2026-05-31_seed0) +IFS=':' read -ra SEED_LIST <<< "$SEEDS" +for SEED in "${SEED_LIST[@]}"; do + python scripts/submit_cluster.py \ + --save_dir "/scratch/$USER/runs" \ + --prefix "$PREFIX" \ + --compute_config "$COMPUTE_CONFIG" \ + --program_config "$PROGRAM_CONFIG" \ + --container --heartbeat \ + --account "$ACCOUNT" --partition "$PARTITION" --time "$TIME" \ + --args "train.seed=$SEED" "run_name=${DATE_STAMP}_seed${SEED}" +done