[BUG]

I was evaluating LFM 2 350 M on Gsm8k , i experimented from 0 shot to 5 shots and thus had 6 experiments , now my code was this with max length = None and max generation = 1024 , i know that gsm8k gen tokens is set to 256 by light eval  
### code 
import torch, gc


for obj in gc.get_objects():
    try:
        if torch.is_tensor(obj) and obj.is_cuda:
            del obj
    except:
        pass

gc.collect()
torch.cuda.empty_cache()
from lighteval.pipeline import Pipeline, PipelineParameters, ParallelismManager
from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.models.transformers.transformers_model import TransformersModel, TransformersModelConfig
from transformers import AutoModelForCausalLM
from lighteval.models.model_input import GenerationParameters
import os
import wandb
# -------------------------------------------------------------------------------------------------------------------

os.environ["WANDB_PROJECT"] = "my_lighteval_project" 
wandb.login(key="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")

tracker = EvaluationTracker(
    output_dir                = "./eval_results",       # required — where results are saved locally
    results_path_template     = None,                   # custom folder structure template, leave None
    save_details              = True,                   # save per-sample predictions and correctness
    push_to_hub               = False,                  # push results to HuggingFace Hub
    push_to_tensorboard       = False,                  # push metrics to TensorBoard
    hub_results_org           = "",                     # your HF org name, only needed if push_to_hub=True
    tensorboard_metric_prefix = "eval",                 # prefix for TensorBoard metric names
    public                    = False,                  # make HF Hub dataset public or private
    nanotron_run_info         = None,                   # only for Nanotron users, ignore
    use_wandb                 = True,                  # log to Weights & Biases
)

# ── PipelineParameters ────────────────────────────────────────────────────────
pipeline_params = PipelineParameters(
    launcher_type                      = ParallelismManager.NONE,  # parallelism backend 
    job_id                             = 0,             # job ID for cluster runs, always 0 on Colab
    dataset_loading_processes          = 1,             # CPU processes for loading dataset
    nanotron_checkpoint_path           = None,          # only for Nanotron, ignore
    custom_tasks_directory             = None,          # path to custom tasks folder, None for registered tasks
    num_fewshot_seeds                  = 1,             # how many seeds to average few-shot over
    max_samples                        = None,           # limit samples for quick test, None for full eval
    cot_prompt                         = None,          # chain-of-thought string to append to prompts
    remove_reasoning_tags              = True,          # strip <think>...</think> from reasoning model outputs
    reasoning_tags                     = [("<think>", "</think>")],  # the tags to strip
    load_responses_from_details_date_id= None,          # reuse saved responses from a previous run
    bootstrap_iters                    = 1000,          # iterations for confidence interval bootstrapping
    load_tasks_multilingual            = False,         # load multilingual task variants
)

# ── GenerationParameters (nested inside model config) ─────────────────────────
gen_params = GenerationParameters(
    temperature         = 0.0,       # 0 = greedy/deterministic, higher = more random
    top_p               = None,    # nucleus sampling threshold
    top_k               = None,    # top-k sampling
    min_p               = None,    # min-p sampling
    max_new_tokens      = ,     # max tokens to generate per sample
    min_new_tokens      = None,    # min tokens before EOS is allowed
    stop_tokens         = None,  # ← square brackets, makes it a list    # list of strings that stop generation
    seed                = 42,    # random seed for generation
    repetition_penalty  = None,    # penalize repeated tokens, 1.0 = no penalty
    frequency_penalty   = None,    # penalize frequent tokens
    length_penalty      = None,    # penalize long outputs (for beam search)
    presence_penalty    = None,    # penalize tokens already present
    early_stopping      = None,    # stop beam search early
    truncate_prompt     = None,    # truncate prompt if too long instead of erroring
    num_blocks          = None,    # for block-sparse attention, ignore
    block_size          = None,    # for block-sparse attention, ignore
    cache_implementation= None,    # custom KV-cache implementation, leave None
    response_format     = None,    # structured output format, leave None
)

# ── TransformersModelConfig ───────────────────────────────────────────────────
model_cfg = TransformersModelConfig(
    model_name                          = "LiquidAI/LFM2-350M",  # HF model ID or local path
    tokenizer                           = None,          # separate tokenizer ID if different from model
    subfolder                           = None,          # subfolder inside the HF repo
    revision                            = "main",        # git revision / commit hash
    batch_size                          = 50,             # inference batch size, tune for your GPU
    max_length                          = None,          # max total tokens (prompt + output)
    model_loading_kwargs                = {},            # extra kwargs for from_pretrained e.g. {"device_map": "auto"}
    add_special_tokens                  = True,          # add BOS etc. during tokenization
    skip_special_tokens                 = True,          # strip special tokens from decoded output
    model_parallel                      = None,          # multi-GPU model parallelism, None = auto
    dtype                               = "float16",     # weight precision: float16, bfloat16, float32, 4bit, 8bit
    device                              = "cuda",        # cuda, cpu, or GPU index
    trust_remote_code                   = False,         # allow custom model code from HF repo
    compile                             = False,         # torch.compile the model
    multichoice_continuations_start_space = None,        # space before MCQ answer choices, None = auto
    pairwise_tokenization               = False,         # tokenize context and continuation separately
    continuous_batching                 = False,         # continuous batching for generation
    override_chat_template              = True,          # True/False/None to force or disable chat template
    generation_parameters               = gen_params,    # the GenerationParameters object above
    system_prompt                       = None,          # system prompt prepended to every sample
    cache_dir                           = "/content/hf_cache",  # where model weights are cached on disk
)

# ── Pipeline ──────────────────────────────────────────────────────────────────
pipeline = Pipeline(
    tasks              = "|gsm8k|5",  # task string: |task|num_fewshot|
    pipeline_parameters= pipeline_params,
    evaluation_tracker = tracker,
    model_config       = model_cfg,
    model              = None,           # pass a pre-loaded model object here instead of model_config
    metric_options     = None,           # extra metric options, None for registered tasks
)

# ── Run ───────────────────────────────────────────────────────────────────────
pipeline.evaluate()
pipeline.save_and_push_results()
pipeline.show_results() 
## query : 
 when i opened my detail file produced by light eval i saw this "padded_tokens_count": 178, "reasonings": [], text [ all what it said ] , "], "truncated_tokens_count": 2047, "unconditioned_logprobs": null} . i do not know why the truncated count was 2047 and what was it . this happened even when in 0 shot . what is this , i could not find it . also my result file was this for 5 shot
## result  {
  "config_general": {
    "lighteval_sha": "?",
    "num_fewshot_seeds": 1,
    "max_samples": null,
    "job_id": "0",
    "start_time": 3275.624964992,
    "end_time": 3699.36364977,
    "total_evaluation_time_secondes": "423.7386847779999",
    "model_config": {
      "model_name": "LiquidAI/LFM2-350M",
      "generation_parameters": {
        "num_blocks": null,
        "block_size": null,
        "early_stopping": null,
        "repetition_penalty": null,
        "frequency_penalty": null,
        "length_penalty": null,
        "presence_penalty": null,
        "max_new_tokens": 1024,
        "min_new_tokens": null,
        "seed": 42,
        "stop_tokens": null,
        "temperature": 0.0,
        "top_k": null,
        "min_p": null,
        "top_p": null,
        "truncate_prompt": null,
        "cache_implementation": null,
        "response_format": null
      },
      "system_prompt": null,
      "cache_dir": "/content/hf_cache",
      "tokenizer": null,
      "subfolder": null,
      "revision": "main",
      "batch_size": 50,
      "max_length": null,
      "model_loading_kwargs": {},
      "add_special_tokens": true,
      "skip_special_tokens": true,
      "model_parallel": false,
      "dtype": "float16",
      "device": "cuda",
      "trust_remote_code": false,
      "compile": false,
      "multichoice_continuations_start_space": null,
      "pairwise_tokenization": false,
      "continuous_batching": false,
      "override_chat_template": true
    },
    "model_name": "LiquidAI/LFM2-350M"
  },
  "results": {
    "gsm8k|5": {
      "extractive_match": 0.3502653525398029,
      "extractive_match_stderr": 0.013140409455571263
    },
    "all": {
      "extractive_match": 0.3502653525398029,
      "extractive_match_stderr": 0.013140409455571263
    }
  },
  "versions": {},
  "config_tasks": {
    "gsm8k|5": {
      "name": "gsm8k",
      "prompt_function": "gsm8k_prompt",
      "hf_repo": "openai/gsm8k",
      "hf_subset": "main",
      "metrics": [
        {
          "metric_name": "extractive_match",
          "higher_is_better": true,
          "category": "GENERATIVE",
          "sample_level_fn": "MultilingualExtractiveMatchMetric(language=Language.ENGLISH, gold_extraction_target=(ExprExtractionConfig(try_extract_without_anchor=True),), pred_extraction_target=(ExprExtractionConfig(try_extract_without_anchor=True), LatexExtractionConfig(try_extract_without_anchor=True, boxed_match_priority=0, normalization_config=NormalizationConfig(basic_latex=True, units=True, malformed_operators=True, nits=True, boxed='all', equations=True))), aggregation_function=max, fallback_mode=first_match, extraction_mode=any_match, precision=5, timeout_seconds=5)",
          "corpus_level_fn": "mean",
          "batched_compute": false
        }
      ],
      "solver": [
        "solve",
        "solve"
      ],
      "scorer": "score",
      "sample_fields": "record_to_sample",
      "sample_to_fewshot": "sample_to_fewshot",
      "filter": null,
      "hf_revision": null,
      "hf_filter": null,
      "hf_avail_splits": [
        "train",
        "test"
      ],
      "evaluation_splits": [
        "test"
      ],
      "few_shots_split": null,
      "few_shots_select": "random_sampling_from_train",
      "generation_size": 256,
      "generation_grammar": null,
      "stop_sequence": [
        "Question:"
      ],
      "num_samples": null,
      "original_num_docs": -1,
      "effective_num_docs": -1,
      "must_remove_duplicate_docs": false,
      "num_fewshots": 5,
      "version": 0
    }
  },
  "summary_tasks": {
    "gsm8k|5": {
      "hashes": {
        "hash_examples": "0ed016e24e7512fd",
        "hash_full_prompts": "ef46db3751d8e999",
        "hash_input_tokens": "d01025ef6535eaa0",
        "hash_cont_tokens": "84ac51e597f7a1bb"
      },
      "truncated": 0,
      "non_truncated": 0,
      "padded": 0,
      "non_padded": 0
    }
  },
  "summary_general": {
    "hashes": {
      "hash_examples": "bc71463e88551d0e",
      "hash_full_prompts": "c166e5d20ad58f4e",
      "hash_input_tokens": "bdee8939673f2335",
      "hash_cont_tokens": "56b056577811391f"
    },
    "truncated": 0,
    "non_truncated": 0,
    "padded": 0,
    "non_padded": 0
  }
}. Please explain what is this 


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[BUG] #1198

code

-------------------------------------------------------------------------------------------------------------------

── PipelineParameters ────────────────────────────────────────────────────────

── GenerationParameters (nested inside model config) ─────────────────────────

── TransformersModelConfig ───────────────────────────────────────────────────

── Pipeline ──────────────────────────────────────────────────────────────────

── Run ───────────────────────────────────────────────────────────────────────

query :

result {

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

[BUG] #1198

Description

code

-------------------------------------------------------------------------------------------------------------------

── PipelineParameters ────────────────────────────────────────────────────────

── GenerationParameters (nested inside model config) ─────────────────────────

── TransformersModelConfig ───────────────────────────────────────────────────

── Pipeline ──────────────────────────────────────────────────────────────────

── Run ───────────────────────────────────────────────────────────────────────

query :

result {

Metadata

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

Issue actions