API Reference

This section provides a reference for our cutomized API of SkyRL, which is used in the RL training of SWE-Agent.

SkyRL.skyrl-train.examples.swe_agent.main_swe.MiniSWEPPOExp

Bases: BasePPOExp

Customized PPO experiment for SWE-Agent.

Source code in SkyRL/skyrl-train/examples/swe_agent/main_swe.py

class MiniSWEPPOExp(BasePPOExp):
    """Customized PPO experiment for SWE-Agent."""

    def get_generator(self, cfg, tokenizer, inference_engine_client):
        """Initializes the generator."""
        generator = SweAgentGenerator(
            generator_cfg=cfg.generator,
            skyrl_gym_cfg=OmegaConf.create({"max_env_workers": 0}),
            inference_engine_client=inference_engine_client,
            tokenizer=tokenizer,
            model_name=self.cfg.trainer.policy.model.path,
        )
        return generator
    def get_rollout_worker(self,cfg, tokenizer,sweagent_config):
        worker=SWERolloutWorker.remote(generator_cfg=cfg.generator,tokenizer=tokenizer,sweagent_config=sweagent_config)
        return worker
    def get_trainer(
        self,
        cfg,
        tracker,
        tokenizer,
        train_dataset,
        eval_dataset,
        inference_engine_client,
        generator: GeneratorInterface,
        colocate_pg,
    ):
        """Initializes the trainer. By default, we use RayPPOTrainer.
        RayPPOAsynchTrainer overlap the environment setup and model training for better resource utilization.

        Returns:
            RayPPOTrainer: The trainer.
        """
        if cfg.trainer.asynch:
            sweworker=self.get_rollout_worker(cfg,tokenizer,generator.sweagent_config)
            return RayPPOAsynchTrainer(
                sweworker=sweworker,
                cfg=cfg,
                tracker=tracker,
                tokenizer=tokenizer,
                train_dataset=train_dataset,
                eval_dataset=eval_dataset,
                inference_engine_client=inference_engine_client,
                generator=generator,
                colocate_pg=colocate_pg,
            )
        else:

            return RayPPOTrainer(
                cfg=cfg,
                tracker=tracker,
                tokenizer=tokenizer,
                train_dataset=train_dataset,
                eval_dataset=eval_dataset,
                inference_engine_client=inference_engine_client,
                generator=generator,
                colocate_pg=colocate_pg,
            )

get_generator

get_generator(cfg, tokenizer, inference_engine_client)

Initializes the generator.

Source code in SkyRL/skyrl-train/examples/swe_agent/main_swe.py

def get_generator(self, cfg, tokenizer, inference_engine_client):
    """Initializes the generator."""
    generator = SweAgentGenerator(
        generator_cfg=cfg.generator,
        skyrl_gym_cfg=OmegaConf.create({"max_env_workers": 0}),
        inference_engine_client=inference_engine_client,
        tokenizer=tokenizer,
        model_name=self.cfg.trainer.policy.model.path,
    )
    return generator

get_trainer

get_trainer(cfg, tracker, tokenizer, train_dataset, eval_dataset, inference_engine_client, generator, colocate_pg)

Initializes the trainer. By default, we use RayPPOTrainer. RayPPOAsynchTrainer overlap the environment setup and model training for better resource utilization.

Returns:	`RayPPOTrainer` – The trainer.

Source code in SkyRL/skyrl-train/examples/swe_agent/main_swe.py

def get_trainer(
    self,
    cfg,
    tracker,
    tokenizer,
    train_dataset,
    eval_dataset,
    inference_engine_client,
    generator: GeneratorInterface,
    colocate_pg,
):
    """Initializes the trainer. By default, we use RayPPOTrainer.
    RayPPOAsynchTrainer overlap the environment setup and model training for better resource utilization.

    Returns:
        RayPPOTrainer: The trainer.
    """
    if cfg.trainer.asynch:
        sweworker=self.get_rollout_worker(cfg,tokenizer,generator.sweagent_config)
        return RayPPOAsynchTrainer(
            sweworker=sweworker,
            cfg=cfg,
            tracker=tracker,
            tokenizer=tokenizer,
            train_dataset=train_dataset,
            eval_dataset=eval_dataset,
            inference_engine_client=inference_engine_client,
            generator=generator,
            colocate_pg=colocate_pg,
        )
    else:

        return RayPPOTrainer(
            cfg=cfg,
            tracker=tracker,
            tokenizer=tokenizer,
            train_dataset=train_dataset,
            eval_dataset=eval_dataset,
            inference_engine_client=inference_engine_client,
            generator=generator,
            colocate_pg=colocate_pg,
        )

SkyRL.skyrl-train.examples.swe_agent.swe_generator.SweAgentGenerator

Bases: SkyRLGymGenerator

Customized SkyRLGymGenerator for SWE-Agent.

Source code in SkyRL/skyrl-train/examples/swe_agent/swe_generator.py

class SweAgentGenerator(SkyRLGymGenerator):
    """Customized SkyRLGymGenerator for SWE-Agent."""
    def __init__(
        self,
        generator_cfg: DictConfig,
        skyrl_gym_cfg: DictConfig,
        inference_engine_client: InferenceEngineClient,
        tokenizer,
        model_name: str,
    ):

        # Call parent constructor first
        super().__init__(generator_cfg, skyrl_gym_cfg, inference_engine_client, tokenizer, model_name)

        self.http_server_inference_engine_client_host = generator_cfg.get(
            "http_server_inference_engine_client_host", "127.0.0.1"
        )
        self.http_server_inference_engine_client_port = generator_cfg.get(
            "http_server_inference_engine_client_port", 8000
        )
        self.base_url = (
            f"http://{self.http_server_inference_engine_client_host}:{self.http_server_inference_engine_client_port}"
        )
        self.generator_cfg = generator_cfg
        cli_cfg = self.generator_cfg.sweagent
        # # turn cli_cfg into args

        if hasattr(cli_cfg, "to_container"):
            cli_cfg = cli_cfg.to_container(resolve=True)
        flat_cfg = flatten_dict(cli_cfg)    
        args = []
        for key, value in flat_cfg.items():
            args.append(f"--{key}")

            args.append(str(value))
        self.sweagent_config = BasicCLI(RunBatchConfig, help_text='s').get_config(args)

        self.tokenizer = tokenizer
        self.model_name = model_name
        self.litellm_model_name = "openai/" + self.model_name

        if self.generator_cfg.chat_template.name_or_path is not None:
            raise NotImplementedError("SweAgentGenerator doesn't support custom chat template")

    async def minisweagent_agent_loop(
        self,
        sweagent_config: DictConfig,
        instance: BatchInstance,
        prompt: ConversationType,
        env_extras: Dict[str, Any],
        max_tokens: int,
        max_input_length: int,
        sampling_params: Dict[str, Any],
        trajectory_id: TrajectoryID,
        batch_metadata: BatchMetadata,
    ) -> Tuple[List[int], float, str, List[int], List[int], Optional[List[int]]]:
        """
         The rollout inner loop for mini-swe-agent. It cakls the init_and_run_container_remote or init_and_run_sb_remote based on the env_type in sweagent_config.

        Attributes:
            sweagent_config: The sweagent configuration. RunBatchConfig object.
            instance: The BatchInstance to run.
            prompt: The input prompt. Deprecated. Not used.
            env_extras: Extra environment information.
            max_tokens: The maximum number of tokens to generate.
            max_input_length: The maximum input length.
            sampling_params: The sampling parameters to use for the model.
            trajectory_id: The trajectory ID. Deprecated. Not used.
            batch_metadata: The batch metadata.
        """
        # sweagent_config = yaml.safe_load(get_config_path(self.generator_cfg.miniswe_config_path).read_text())
        # NOTE (sumanthrh): Input `prompt` is not used here because mini-swe-agent uses a similar entry from the `instance` obj
        if sweagent_config.env_type=='sandbox':
            ref = init_and_run_sb_remote.remote(
                instance,
                self.litellm_model_name,
                sweagent_config,
                self.generator_cfg,
                env_extras["data_source"],
                sampling_params,
                trajectory_id,
                batch_metadata.global_step,
                batch_metadata.training_phase,
            )
        elif sweagent_config.env_type=='docker':

            ref = init_and_run_container_remote.remote(
                instance,
                self.litellm_model_name,
                sweagent_config,
                self.generator_cfg,
                env_extras["data_source"],
                sampling_params,
                trajectory_id,
                batch_metadata.global_step,
                batch_metadata.training_phase,
            )
        else:
            raise Exception()
        messages, reward, error = await asyncio.to_thread(ray.get, ref)
        if not len(messages):
            return None, None, None, None, None, None

        # TODO (sumanthrh): This is currently hardcoded for SWEBench with 2 initial messages (system and user).
        response_messages = messages[2:]

        for message in messages[:2]:
            assert message["role"] in (
                "system",
                "user",
            ), "Expected the first two messages to be system and user messages"

        initial_input_ids = self.tokenizer.apply_chat_template(messages[:2], add_generation_prompt=False, tokenize=True)
        initial_prompt_length = len(initial_input_ids)

        response_ids: List[int] = []
        loss_mask: List[int] = []

        # We remove trailing `user` messages - this is added by Mini-SWE-Agent to capture the final git diff for the trajectory
        last_idx = len(response_messages) - 1
        while response_messages[last_idx]["role"] == "user":
            last_idx -= 1
        if last_idx < 0:
            raise ValueError(
                "Found no assistant messages. Please ensure that your environment is configured correctly and the `OPENAI_BASE_URL` points to the HTTP server from the inference engine client"
            )
        response_messages = response_messages[: last_idx + 1]

        for message in response_messages:
            # Apply chat template and tokenize each message
            msg_encoding = encode_messages_subset([message], self.tokenizer)

            # Extend response_ids with the tokens
            response_ids.extend(msg_encoding)

            # Extend loss_mask: 0s for user, 1s for assistant
            if message["role"] == "user":
                loss_mask.extend([0] * len(msg_encoding))
            else:  # assistant
                loss_mask.extend([1] * len(msg_encoding))
        # Extract prompt ids
        prompt_ids = initial_input_ids

        # Calculate maximum response tokens allowed
        max_response_tokens = max_tokens + max_input_length - initial_prompt_length

        # Determine stop reason
        stop_reason = "complete"  # Default for trial completion
        if len(response_ids) > max_response_tokens:
            stop_reason = "length"

        # Truncate to maximum allowed length
        response_ids = response_ids[:max_response_tokens]
        loss_mask = loss_mask[:max_response_tokens]

        return (response_ids, reward, stop_reason, loss_mask, prompt_ids, None)

    async def generate(self, input_batch: GeneratorInput) -> GeneratorOutput:
        """
        Generate trajectories for the input batch. It call the minisweagent_agent_loop for each instance in the batch concurrently.
        Returns outputs in the same order as the input batch.

        Attributes:
            input_batch: GeneratorInput

        Returns:
            GeneratorOutput
        """
        prompts = input_batch["prompts"]
        env_extras = input_batch["env_extras"]
        trajectory_ids = input_batch["trajectory_ids"]
        batch_metadata = input_batch["batch_metadata"]
        max_tokens = self.generator_cfg.sampling_params.max_generate_length
        max_input_length = self.generator_cfg.max_input_length
        sampling_params = get_sampling_params_for_backend(
            self.generator_cfg.backend, self.generator_cfg.sampling_params
        )

        tasks = []

        datasets=[]
        for i in range(len(env_extras)):
            data_instance = copy.deepcopy(env_extras[i]["instance"])

            data_instance['traj_id'] = data_instance['instance_id']+'@'+str(trajectory_ids[i].repetition_id)
            data_instance['global_step'] = env_extras[i]['global_step']
            datasets.append(data_instance)



        instances = self.sweagent_config.instances.get_instance_configs_ds(datasets)
        for i in range(len(prompts)):

            tasks.append(

                self.minisweagent_agent_loop(
                    self.sweagent_config,
                    instances[i],
                    prompts[i],
                    env_extras[i],
                    max_tokens=max_tokens,
                    max_input_length=max_input_length,
                    sampling_params=sampling_params,
                    trajectory_id=trajectory_ids[i],
                    batch_metadata=batch_metadata,
                )
            )

        all_outputs = await asyncio.gather(*tasks)

        # Filter out the `None` entries, which means that trajectory generation failed
        responses = [output[0] for output in all_outputs if output[0] is not None]
        rewards = [output[1] for output in all_outputs if output[0] is not None]
        stop_reasons = [output[2] for output in all_outputs if output[0] is not None]
        loss_masks = [output[3] for output in all_outputs if output[0] is not None]
        prompt_token_ids = [output[4] for output in all_outputs if output[0] is not None]
        if not len(responses):
            raise ValueError(
                "Found no valid responses for this step. This means that generation failed for all trajectories, likely due to errors in environment setup."
            )
        rollout_metrics = get_rollout_metrics(responses, rewards)

        generator_output: GeneratorOutput = {
            "prompt_token_ids": prompt_token_ids,
            "response_ids": responses,
            "rewards": rewards,
            "loss_masks": loss_masks,
            "stop_reasons": stop_reasons,
            "rollout_metrics": rollout_metrics,
            "rollout_logprobs": None,
        }

        return generator_output

generate `async`

generate(input_batch)

Generate trajectories for the input batch. It call the minisweagent_agent_loop for each instance in the batch concurrently. Returns outputs in the same order as the input batch.

Attributes:	`input_batch` – GeneratorInput

Returns:	`GeneratorOutput` – GeneratorOutput

Source code in SkyRL/skyrl-train/examples/swe_agent/swe_generator.py

async def generate(self, input_batch: GeneratorInput) -> GeneratorOutput:
    """
    Generate trajectories for the input batch. It call the minisweagent_agent_loop for each instance in the batch concurrently.
    Returns outputs in the same order as the input batch.

    Attributes:
        input_batch: GeneratorInput

    Returns:
        GeneratorOutput
    """
    prompts = input_batch["prompts"]
    env_extras = input_batch["env_extras"]
    trajectory_ids = input_batch["trajectory_ids"]
    batch_metadata = input_batch["batch_metadata"]
    max_tokens = self.generator_cfg.sampling_params.max_generate_length
    max_input_length = self.generator_cfg.max_input_length
    sampling_params = get_sampling_params_for_backend(
        self.generator_cfg.backend, self.generator_cfg.sampling_params
    )

    tasks = []

    datasets=[]
    for i in range(len(env_extras)):
        data_instance = copy.deepcopy(env_extras[i]["instance"])

        data_instance['traj_id'] = data_instance['instance_id']+'@'+str(trajectory_ids[i].repetition_id)
        data_instance['global_step'] = env_extras[i]['global_step']
        datasets.append(data_instance)



    instances = self.sweagent_config.instances.get_instance_configs_ds(datasets)
    for i in range(len(prompts)):

        tasks.append(

            self.minisweagent_agent_loop(
                self.sweagent_config,
                instances[i],
                prompts[i],
                env_extras[i],
                max_tokens=max_tokens,
                max_input_length=max_input_length,
                sampling_params=sampling_params,
                trajectory_id=trajectory_ids[i],
                batch_metadata=batch_metadata,
            )
        )

    all_outputs = await asyncio.gather(*tasks)

    # Filter out the `None` entries, which means that trajectory generation failed
    responses = [output[0] for output in all_outputs if output[0] is not None]
    rewards = [output[1] for output in all_outputs if output[0] is not None]
    stop_reasons = [output[2] for output in all_outputs if output[0] is not None]
    loss_masks = [output[3] for output in all_outputs if output[0] is not None]
    prompt_token_ids = [output[4] for output in all_outputs if output[0] is not None]
    if not len(responses):
        raise ValueError(
            "Found no valid responses for this step. This means that generation failed for all trajectories, likely due to errors in environment setup."
        )
    rollout_metrics = get_rollout_metrics(responses, rewards)

    generator_output: GeneratorOutput = {
        "prompt_token_ids": prompt_token_ids,
        "response_ids": responses,
        "rewards": rewards,
        "loss_masks": loss_masks,
        "stop_reasons": stop_reasons,
        "rollout_metrics": rollout_metrics,
        "rollout_logprobs": None,
    }

    return generator_output

minisweagent_agent_loop `async`

minisweagent_agent_loop(sweagent_config, instance, prompt, env_extras, max_tokens, max_input_length, sampling_params, trajectory_id, batch_metadata)

The rollout inner loop for mini-swe-agent. It cakls the init_and_run_container_remote or init_and_run_sb_remote based on the env_type in sweagent_config.

Attributes:

sweagent_config –

The sweagent configuration. RunBatchConfig object.
instance –

The BatchInstance to run.
prompt –

The input prompt. Deprecated. Not used.
env_extras –

Extra environment information.
max_tokens –

The maximum number of tokens to generate.
max_input_length –

The maximum input length.
sampling_params –

The sampling parameters to use for the model.
trajectory_id –

The trajectory ID. Deprecated. Not used.
batch_metadata –

The batch metadata.

Source code in SkyRL/skyrl-train/examples/swe_agent/swe_generator.py

async def minisweagent_agent_loop(
    self,
    sweagent_config: DictConfig,
    instance: BatchInstance,
    prompt: ConversationType,
    env_extras: Dict[str, Any],
    max_tokens: int,
    max_input_length: int,
    sampling_params: Dict[str, Any],
    trajectory_id: TrajectoryID,
    batch_metadata: BatchMetadata,
) -> Tuple[List[int], float, str, List[int], List[int], Optional[List[int]]]:
    """
     The rollout inner loop for mini-swe-agent. It cakls the init_and_run_container_remote or init_and_run_sb_remote based on the env_type in sweagent_config.

    Attributes:
        sweagent_config: The sweagent configuration. RunBatchConfig object.
        instance: The BatchInstance to run.
        prompt: The input prompt. Deprecated. Not used.
        env_extras: Extra environment information.
        max_tokens: The maximum number of tokens to generate.
        max_input_length: The maximum input length.
        sampling_params: The sampling parameters to use for the model.
        trajectory_id: The trajectory ID. Deprecated. Not used.
        batch_metadata: The batch metadata.
    """
    # sweagent_config = yaml.safe_load(get_config_path(self.generator_cfg.miniswe_config_path).read_text())
    # NOTE (sumanthrh): Input `prompt` is not used here because mini-swe-agent uses a similar entry from the `instance` obj
    if sweagent_config.env_type=='sandbox':
        ref = init_and_run_sb_remote.remote(
            instance,
            self.litellm_model_name,
            sweagent_config,
            self.generator_cfg,
            env_extras["data_source"],
            sampling_params,
            trajectory_id,
            batch_metadata.global_step,
            batch_metadata.training_phase,
        )
    elif sweagent_config.env_type=='docker':

        ref = init_and_run_container_remote.remote(
            instance,
            self.litellm_model_name,
            sweagent_config,
            self.generator_cfg,
            env_extras["data_source"],
            sampling_params,
            trajectory_id,
            batch_metadata.global_step,
            batch_metadata.training_phase,
        )
    else:
        raise Exception()
    messages, reward, error = await asyncio.to_thread(ray.get, ref)
    if not len(messages):
        return None, None, None, None, None, None

    # TODO (sumanthrh): This is currently hardcoded for SWEBench with 2 initial messages (system and user).
    response_messages = messages[2:]

    for message in messages[:2]:
        assert message["role"] in (
            "system",
            "user",
        ), "Expected the first two messages to be system and user messages"

    initial_input_ids = self.tokenizer.apply_chat_template(messages[:2], add_generation_prompt=False, tokenize=True)
    initial_prompt_length = len(initial_input_ids)

    response_ids: List[int] = []
    loss_mask: List[int] = []

    # We remove trailing `user` messages - this is added by Mini-SWE-Agent to capture the final git diff for the trajectory
    last_idx = len(response_messages) - 1
    while response_messages[last_idx]["role"] == "user":
        last_idx -= 1
    if last_idx < 0:
        raise ValueError(
            "Found no assistant messages. Please ensure that your environment is configured correctly and the `OPENAI_BASE_URL` points to the HTTP server from the inference engine client"
        )
    response_messages = response_messages[: last_idx + 1]

    for message in response_messages:
        # Apply chat template and tokenize each message
        msg_encoding = encode_messages_subset([message], self.tokenizer)

        # Extend response_ids with the tokens
        response_ids.extend(msg_encoding)

        # Extend loss_mask: 0s for user, 1s for assistant
        if message["role"] == "user":
            loss_mask.extend([0] * len(msg_encoding))
        else:  # assistant
            loss_mask.extend([1] * len(msg_encoding))
    # Extract prompt ids
    prompt_ids = initial_input_ids

    # Calculate maximum response tokens allowed
    max_response_tokens = max_tokens + max_input_length - initial_prompt_length

    # Determine stop reason
    stop_reason = "complete"  # Default for trial completion
    if len(response_ids) > max_response_tokens:
        stop_reason = "length"

    # Truncate to maximum allowed length
    response_ids = response_ids[:max_response_tokens]
    loss_mask = loss_mask[:max_response_tokens]

    return (response_ids, reward, stop_reason, loss_mask, prompt_ids, None)

SkyRL.skyrl-train.examples.swe_agent.swe_generator.init_and_run_container

init_and_run_container(instance, litellm_model_name, sweagent_config, generator_cfg, data_source, sampling_params, trajectory_id, global_step, training_phase)

Initialize and run the container agent loop for the given instance.

Attributes:

instance –

The BatchInstance to run.
litellm_model_name –

Deprecated. Not used.
sweagent_config –

One instance of RunBatchConfig of SWE-agent project
generator_cfg –

Deprecated. Not used.
data_source –

Deprecated. Not used.
sampling_params –

The sampling parameters to use for the model.
trajectory_id –

The trajectory ID. Deprecated. Not used.
global_step –

The global step. Used for output directory structure.
training_phase –

The training phase. Used for output directory structure.

Source code in SkyRL/skyrl-train/examples/swe_agent/swe_generator.py

def init_and_run_container(
    instance: BatchInstance,
    litellm_model_name: str,
    sweagent_config: dict,
    generator_cfg: DictConfig,
    data_source: str,
    sampling_params: dict,
    trajectory_id: TrajectoryID,
    global_step: int,
    training_phase: TrainingPhase,
):
    """Initialize and run the container agent loop for the given instance.

    Attributes:
        instance: The BatchInstance to run.
        litellm_model_name: Deprecated. Not used.
        sweagent_config: One instance of RunBatchConfig of SWE-agent project 
        generator_cfg: Deprecated. Not used.
        data_source: Deprecated. Not used.
        sampling_params: The sampling parameters to use for the model.
        trajectory_id: The trajectory ID. Deprecated. Not used.
        global_step: The global step. Used for output directory structure.
        training_phase: The training phase. Used for output directory structure.
    """
    from loguru import logger
    agent_config = sweagent_config.agent

    # Use new sampling parameters
    # Can also have custom sampling parameters per trajectory (ex: custom max tokens)
    # agent_config.model.update(sampling_params)
    agent_config.model = agent_config.model.model_copy(update=sampling_params)
    agent_config.model.completion_kwargs=sampling_params
    agent = None
    env = None
    extra_info = None
    result = None
    reward = 0
    error = None

    env_type=sweagent_config.env_type


    single_run_replay_config = RunSingleConfig(
            agent=agent_config,
            problem_statement=instance.problem_statement,
            env=instance.env,
        )


    instance.env.name = f"{instance.problem_statement.id}"

    # implement loop retry logic
    successful=False
    max_retries = 20
    num_retries = 0

    time_records={}
    start_time= time.time()
    while num_retries < max_retries:
        num_retries += 1
        try:
            output_dir = Path(sweagent_config.output_dir)/ f"step_{global_step}" / training_phase / instance.problem_statement.id
            output_dir.mkdir(parents=True, exist_ok=True)
            (output_dir / f"{instance.problem_statement.id}.config.yaml").write_text(
            yaml.dump(single_run_replay_config.model_dump_json(), indent=2)
            )
            # env = get_sb_environment(sweagent_config, instance, data_source)
            bundles = agent_config.tools.bundles
            # env = SWEEnv.from_config(ds=instance.ds,bundles=bundles,config=instance.env)




            def start_env_logic(instance):
                env = ray.get(start_container_remote.remote(instance))
                Time_data = env.start()
                return env, Time_data
            env = ray.get(start_container_remote.remote(instance))
            env_startup_time = time.time()
            Time_data = env.start()
            env_ready_time = time.time()
            # (env, Time_data), cpu_samples, env_startup_time, env_ready_time = monitor_cpu_with_children(start_env_logic, instance)

            (output_dir / f"after_env_init.config.yaml").write_text(
            yaml.dump(single_run_replay_config.model_dump_json(), indent=2)
            )

            time_records[num_retries]={'env_start_time':env_startup_time,'env_ready_time':env_ready_time,'time_taken':env_ready_time - env_startup_time}
            time_records[num_retries]['detailed_time_data']=Time_data
            try:
                rollout_signal = ray.get_actor("rollout_signal_actor")  # 或从 self.rollout_signal 拿
            except ValueError:
                rollout_signal = None

            if rollout_signal is not None:
                # 调用 async 远程方法 wait_for_step(step_id)，并在当前任务里阻塞等待
                ray.get(rollout_signal.wait_for_step.remote(global_step))

            (output_dir / f"after_rollout_signal.config.yaml").write_text(
            yaml.dump(single_run_replay_config.model_dump_json(), indent=2)
            )
            #agent = DefaultAgentWithReminder(model, env, **sweagent_config.get("agent", {}))
            agent = get_agent_from_config(agent_config)
            agent.replay_config = single_run_replay_config  # type: ignore[attr-defined]
            agent_start_time= time.time()

            (output_dir / f"startagent.config.yaml").write_text(
            yaml.dump(single_run_replay_config.model_dump_json(), indent=2)
            )
            result = agent.run(
                    problem_statement=instance.problem_statement,
                    env=env,
                    output_dir=output_dir,
                )
            agent_end_time= time.time()
            time_records[num_retries].update({'agent_start_time':agent_start_time,'agent_end_time':agent_end_time,'agent_time_taken':agent_end_time - agent_start_time})
            if len(agent.history)<=2:
                raise Exception("Agent history too short, likely failed run.")
            successful=True
            break  # Exit the retry loop if successful
            #exit_status, result = agent.run(instance.problem_statement)  # type: ignore[arg-type]
        except Exception as e:
            env_ready_time= time.time()
            time_records[num_retries]={'env_start_time':env_startup_time,'env_ready_time':env_ready_time,'time_taken':env_ready_time - env_startup_time}
            #output_dir = Path(self.output_dir) / instance.problem_statement.id
            #write the exception to a file
            (output_dir / f"exception_{num_retries}.log").write_text(traceback.format_exc())
            #remove the output dir to avoid partial results
            # output_dir.rmdir()
            # if output_dir.exists() and num_retries<max_retries:
            #     shutil.rmtree(output_dir, ignore_errors=True)
            logger.error("Error processing instance {}: {}", instance.problem_statement.id, e, exc_info=True)
            #sleep for a while before retrying
            time.sleep(1)

            exit_status, result = type(e).__name__, None
            error = str(e)
            extra_info = {"traceback": traceback.format_exc()}
        finally:
            try:
                env.close()
            except Exception as e:
                print("fail to close env")
    end_time= time.time()
    time_records['total_time']={'start_time':start_time,'end_time':end_time,'time_taken':end_time - start_time}
    if successful:
        save_predictions(output_dir, instance.problem_statement.id, result)
        #also write time records
    (output_dir / f"time_records.yaml").write_text(
        yaml.dump(time_records, indent=2)
        )

    info = result.info if result is not None else {}
    reward = info.get("reward", reward)
    error = "error"
    return (agent.history if agent is not None else [], reward, error)

SkyRL.skyrl-train.examples.swe_agent.swe_generator.init_and_run_sb

init_and_run_sb(instance, litellm_model_name, sweagent_config, generator_cfg, data_source, sampling_params, trajectory_id, global_step, training_phase)

Initialize and run the sandbox agent loop for the given instance. Similar to init_and_run_container but uses minisandbox environment.

Source code in SkyRL/skyrl-train/examples/swe_agent/swe_generator.py

def init_and_run_sb(
    instance: BatchInstance,
    litellm_model_name: str,
    sweagent_config: dict,
    generator_cfg: DictConfig,
    data_source: str,
    sampling_params: dict,
    trajectory_id: TrajectoryID,
    global_step: int,
    training_phase: TrainingPhase,
):
    """Initialize and run the sandbox agent loop for the given instance.
    Similar to init_and_run_container but uses minisandbox environment.
    """
    from loguru import logger
    agent_config = sweagent_config.agent

    # Use new sampling parameters
    # Can also have custom sampling parameters per trajectory (ex: custom max tokens)
    # agent_config.model.update(sampling_params)
    agent_config.model = agent_config.model.model_copy(update=sampling_params)
    agent_config.model.completion_kwargs=sampling_params
    agent = None
    env = None
    extra_info = None
    result = None
    reward = 0
    error = None

    env_type=sweagent_config.env_type


    single_run_replay_config = RunSingleConfig(
            agent=agent_config,
            problem_statement=instance.problem_statement,
            env=instance.env,
        )


    instance.env.name = f"{instance.problem_statement.id}"

    # implement loop retry logic
    successful=False
    max_retries = 10
    num_retries = 0
    time_records={}
    Cpu_data_records={}
    start_time= time.time()
    while num_retries < max_retries:
        num_retries += 1
        try:
            output_dir = Path(sweagent_config.output_dir)/ f"step_{global_step}" / training_phase / instance.problem_statement.id
            output_dir.mkdir(parents=True, exist_ok=True)
            (output_dir / f"{instance.problem_statement.id}.config.yaml").write_text(
            yaml.dump(single_run_replay_config.model_dump_json(), indent=2)
            )
            # env = get_sb_environment(sweagent_config, instance, data_source)
            bundles = agent_config.tools.bundles
            # env = SWEEnv.from_config(ds=instance.ds,bundles=bundles,config=instance.env)
            # env_startup_time= time.time()
            # #
            # env = ray.get(start_sandbox_remote.remote(instance,bundles))
            # #env = SWEsbEnv.from_config(ds=instance.ds,bundles=bundles,config=instance.env)

            # Time_data=env.start()
            # env_ready_time= time.time()
            def start_env_logic(instance):
                env = ray.get(start_sandbox_remote.remote(instance,bundles))
                Time_data = env.start()
                return env, Time_data

            (env, Time_data), cpu_samples, env_startup_time, env_ready_time = monitor_cpu_with_children(start_env_logic, instance)
            Cpu_data_records[num_retries] = cpu_samples
            time_records[num_retries]={'env_start_time':env_startup_time,'env_ready_time':env_ready_time,'time_taken':env_ready_time - env_startup_time}
            time_records[num_retries]['detailed_time_data']=Time_data
            try:
                rollout_signal = ray.get_actor("rollout_signal_actor")  # 或从 self.rollout_signal 拿
            except ValueError:
                rollout_signal = None

            if rollout_signal is not None:
                # 调用 async 远程方法 wait_for_step(step_id)，并在当前任务里阻塞等待
                ray.get(rollout_signal.wait_for_step.remote(global_step))
            #agent = DefaultAgentWithReminder(model, env, **sweagent_config.get("agent", {}))
            agent = get_agent_from_config(agent_config)
            agent.replay_config = single_run_replay_config  # type: ignore[attr-defined]
            agent_start_time= time.time()
            result = agent.run(
                    problem_statement=instance.problem_statement,
                    env=env,
                    output_dir=output_dir,
                )
            agent_end_time= time.time()


            time_records[num_retries].update({'agent_start_time':agent_start_time,'agent_end_time':agent_end_time,'agent_time_taken':agent_end_time - agent_start_time})

            if len(agent.history)<=2:
                raise Exception("Agent history too short, likely failed run.")
            successful=True

            break  # Exit the retry loop if successful
            #exit_status, result = agent.run(instance.problem_statement)  # type: ignore[arg-type]
        except Exception as e:
            env_ready_time= time.time()
            time_records[num_retries]={'env_start_time':env_startup_time,'env_ready_time':env_ready_time,'time_taken':env_ready_time - env_startup_time}
            #output_dir = Path(self.output_dir) / instance.problem_statement.id
            #write the exception to a file
            (output_dir / f"exception_{num_retries}.log").write_text(traceback.format_exc())
            #remove the output dir to avoid partial results
            # output_dir.rmdir()
            # if output_dir.exists() and num_retries<max_retries:
            #     shutil.rmtree(output_dir, ignore_errors=True)
            logger.error("Error processing instance {}: {}", instance.problem_statement.id, e, exc_info=True)

            # einfo=AgentInfo()
            # etra=[TrajectoryStep()]
            exit_status, result = type(e).__name__, None
            error = str(e)
            extra_info = {"traceback": traceback.format_exc()}
        finally:
            try:
                env.close()
            except Exception as e:
                print("fail to close env")
    end_time= time.time()
    time_records['total_time']={'start_time':start_time,'end_time':end_time,'time_taken':end_time - start_time}
    if successful:
        save_predictions(output_dir, instance.problem_statement.id, result)
        #also write time records
    (output_dir / f"time_records.yaml").write_text(
        yaml.dump(time_records, indent=2)
        )
    # write cpu records to json cpu_usage_records.json

    (output_dir / f"cpu_usage_records.json").write_text(
        json.dumps(Cpu_data_records, indent=2)
    )


    info = result.info if result is not None else {}
    reward = info.get("reward", reward)
    error = "error"
    return (agent.history if agent is not None else [], reward, error)

API Reference

SkyRL.skyrl-train.examples.swe_agent.main_swe.MiniSWEPPOExp

get_generator

get_trainer

SkyRL.skyrl-train.examples.swe_agent.swe_generator.SweAgentGenerator

generate async

minisweagent_agent_loop async

SkyRL.skyrl-train.examples.swe_agent.swe_generator.init_and_run_container

SkyRL.skyrl-train.examples.swe_agent.swe_generator.init_and_run_sb

generate `async`

minisweagent_agent_loop `async`