From 2bd615d119a9e9e5362d8633fcccf59129660f15 Mon Sep 17 00:00:00 2001 From: Dmitrii Khizbullin Date: Sun, 24 Mar 2024 16:31:11 +0300 Subject: [PATCH 01/16] Add specialist agent that aswers as if it were a --- experiments/run_mmlu.py | 4 +- swarm/environment/agents/__init__.py | 2 + .../agents/mmlu/specialist_agent.py | 15 +++ swarm/environment/operations/__init__.py | 2 + .../operations/specialist_answer.py | 119 ++++++++++++++++++ .../agents/test_specialist_agent.py | 32 +++++ 6 files changed, 172 insertions(+), 2 deletions(-) create mode 100644 swarm/environment/agents/mmlu/specialist_agent.py create mode 100644 swarm/environment/operations/specialist_answer.py create mode 100644 test/swarm/environment/agents/test_specialist_agent.py diff --git a/experiments/run_mmlu.py b/experiments/run_mmlu.py index e105c75..f05426d 100644 --- a/experiments/run_mmlu.py +++ b/experiments/run_mmlu.py @@ -16,7 +16,7 @@ def parse_args(): choices=['DirectAnswer', 'FullConnectedSwarm', 'RandomSwarm', 'OptimizedSwarm'], help="Mode of operation. Default is 'OptimizedSwarm'.") - parser.add_argument('--num-truthful-agents', type=int, default=1, + parser.add_argument('--num-truthful-agents', type=int, default=3, # 1 help="Number of truthful agents. The total will be N truthful and N adversarial.") parser.add_argument('--num-iterations', type=int, default=200, @@ -60,7 +60,7 @@ async def main(): else: N = args.num_truthful_agents M = N - agent_name_list = N * ["IO"] + M * ["AdversarialAgent"] + agent_name_list = N * ["SpecialistAgent"] + M * ["AdversarialAgent"] swarm_name = f"{N}true_{M}adv" diff --git a/swarm/environment/agents/__init__.py b/swarm/environment/agents/__init__.py index cdd00c4..e577525 100644 --- a/swarm/environment/agents/__init__.py +++ b/swarm/environment/agents/__init__.py @@ -12,6 +12,7 @@ from swarm.environment.agents.gaia.normal_io import NormalIO from swarm.environment.agents.humaneval.code_io import CodeIO # from swarm.environment.agents.humaneval.code_reflection import CodeReflection +from swarm.environment.agents.mmlu.specialist_agent import SpecialistAgent __all__ = [ "IO", @@ -27,4 +28,5 @@ "NormalIO", "WebIO", "CodeIO", + "SpecialistAgent", ] \ No newline at end of file diff --git a/swarm/environment/agents/mmlu/specialist_agent.py b/swarm/environment/agents/mmlu/specialist_agent.py new file mode 100644 index 0000000..c5d8885 --- /dev/null +++ b/swarm/environment/agents/mmlu/specialist_agent.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from swarm.graph import Graph +from swarm.environment.operations import SpecialistAnswer +from swarm.environment.agents.agent_registry import AgentRegistry + + +@AgentRegistry.register('SpecialistAgent') +class SpecialistAgent(Graph): + def build_graph(self): + io = SpecialistAnswer(self.domain, self.model_name) + self.add_node(io) + self.input_nodes = [io] + self.output_nodes = [io] diff --git a/swarm/environment/operations/__init__.py b/swarm/environment/operations/__init__.py index 38ef351..796015b 100644 --- a/swarm/environment/operations/__init__.py +++ b/swarm/environment/operations/__init__.py @@ -1,6 +1,7 @@ from swarm.environment.operations.combine_answer import CombineAnswer from swarm.environment.operations.generate_query import GenerateQuery from swarm.environment.operations.direct_answer import DirectAnswer +from swarm.environment.operations.specialist_answer import SpecialistAnswer from swarm.environment.operations.file_analyse import FileAnalyse from swarm.environment.operations.web_search import WebSearch from swarm.environment.operations.reflect import Reflect @@ -13,6 +14,7 @@ "CombineAnswer", "GenerateQuery", "DirectAnswer", + "SpecialistAnswer", "FileAnalyse", "WebSearch", "Reflect", diff --git a/swarm/environment/operations/specialist_answer.py b/swarm/environment/operations/specialist_answer.py new file mode 100644 index 0000000..4218078 --- /dev/null +++ b/swarm/environment/operations/specialist_answer.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from copy import deepcopy +from collections import defaultdict +from swarm.llm.format import Message +from swarm.graph import Node +from swarm.memory.memory import GlobalMemory +from typing import List, Any, Optional +from swarm.utils.log import logger, swarmlog +from swarm.utils.globals import Cost +from swarm.environment.prompt.prompt_set_registry import PromptSetRegistry +from swarm.llm.format import Message +from swarm.llm import LLMRegistry +from swarm.optimizer.node_optimizer import MetaPromptOptimizer + + +""" +Imagine someone who has to answer questions. +They can be any person. +Make a list of their possible specializations or social roles. +Make the list as diverse as possible so that you expect them to answer the same question differently. +Make a list of 20, list items only, no need for a description. +""" + +class SpecialistAnswer(Node): + role_list = [ + "Botanist", + "Data Scientist", + "Social Worker", + "Journalist", + "Pilot", + "Anthropologist", + "Fitness Coach", + "Politician", + "Artist", + "Marine Biologist", + "Ethicist", + "Entrepreneur", + "Linguist", + "Archaeologist", + "Nurse", + "Graphic Designer", + "Philanthropist", + "Meteorologist", + "Sommelier", + "Cybersecurity Expert" + ] + + def __init__(self, + domain: str, + model_name: Optional[str], + operation_description: str = "Aswer as if you were a specialist in .", + max_token: int = 50, + id=None): + super().__init__(operation_description, id, True) + self.domain = domain + self.model_name = model_name + self.llm = LLMRegistry.get(model_name) + self.max_token = max_token + self.prompt_set = PromptSetRegistry.get(domain) + + @property + def node_name(self): + return self.__class__.__name__ + + async def node_optimize(self, input, meta_optmize=False): + task = input["task"] + self.prompt_set = PromptSetRegistry.get(self.domain) + role = self.prompt_set.get_role() + constraint = self.prompt_set.get_constraint() + + if meta_optmize: + update_role = role + node_optmizer = MetaPromptOptimizer(self.model_name, self.node_name) + update_constraint = await node_optmizer.generate(constraint, task) + return update_role, update_constraint + + return role, constraint + + + async def _execute(self, inputs: List[Any] = [], **kwargs): + + node_inputs = self.process_input(inputs) + outputs = [] + + for input in node_inputs: + task = input["task"] + _, constraint = await self.node_optimize(input, meta_optmize=False) + + # Override role with a specialist role. + idx_role = hash(self.id) % len(self.role_list) + role = self.role_list[idx_role] + print(role) + + system_message = f"You are a {role}. {constraint}. Aswer with one of the 4 letters only: A, B, C or D." + + prompt = self.prompt_set.get_answer_prompt(question=task) + message = [Message(role="system", content=system_message), + Message(role="user", content=prompt)] + response = await self.llm.agen(message, max_tokens=self.max_token) + + execution = { + "operation": self.node_name, + "task": task, + "files": input.get("files", []), + "input": task, + "role": role, + "constraint": constraint, + "prompt": prompt, + "output": response, + "ground_truth": input.get("GT", []), + "format": "natural language" + } + outputs.append(execution) + self.memory.add(self.id, execution) + + # self.log() + return outputs \ No newline at end of file diff --git a/test/swarm/environment/agents/test_specialist_agent.py b/test/swarm/environment/agents/test_specialist_agent.py new file mode 100644 index 0000000..bbd2f98 --- /dev/null +++ b/test/swarm/environment/agents/test_specialist_agent.py @@ -0,0 +1,32 @@ +import pytest + +from swarm.environment.agents.mmlu.specialist_agent import SpecialistAgent + + +@pytest.mark.parametrize("model_name", [ + pytest.param('mock', marks=pytest.mark.mock_llm), + pytest.param(None), +]) +@pytest.mark.asyncio +async def test_io(model_name): + task = """ +What is love? +A: a feeling +B: a utopia +C: a chemical process +D: baby don't hurt me no more +""" + responses = [] + for _ in range(10): + io = SpecialistAgent("mmlu", model_name) + response = await io.run([{"task": task}]) + print(response) + responses.append(response) + + print(responses[0]) + + print() + + +if __name__ == "__main__": + pytest.main([__file__, "-s", "-m", "not mock_llm"]) From 1127d93afe1101c369b42340149d9cfc6c887946 Mon Sep 17 00:00:00 2001 From: Dmitrii Khizbullin Date: Sun, 24 Mar 2024 16:37:06 +0300 Subject: [PATCH 02/16] Fix prints --- test/swarm/environment/agents/test_specialist_agent.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/swarm/environment/agents/test_specialist_agent.py b/test/swarm/environment/agents/test_specialist_agent.py index bbd2f98..ddd62c5 100644 --- a/test/swarm/environment/agents/test_specialist_agent.py +++ b/test/swarm/environment/agents/test_specialist_agent.py @@ -20,10 +20,10 @@ async def test_io(model_name): for _ in range(10): io = SpecialistAgent("mmlu", model_name) response = await io.run([{"task": task}]) - print(response) - responses.append(response) + print(response[0]) + responses.append(response[0]) - print(responses[0]) + print(responses) print() From 29f6e7998a56a449a4ced0bba72362773ed992ff Mon Sep 17 00:00:00 2001 From: Dmitrii Khizbullin Date: Sun, 24 Mar 2024 21:26:06 +0300 Subject: [PATCH 03/16] Debug DirectAnswer first --- experiments/evaluator/evaluator.py | 2 +- experiments/run_mmlu.py | 12 ++++++---- .../environment/operations/combine_answer.py | 2 +- swarm/environment/operations/file_analyse.py | 2 +- .../environment/operations/final_decision.py | 2 +- .../operations/specialist_answer.py | 24 ++++++++++--------- swarm/environment/operations/web_search.py | 2 +- swarm/graph/graph.py | 4 ++-- 8 files changed, 27 insertions(+), 23 deletions(-) diff --git a/experiments/evaluator/evaluator.py b/experiments/evaluator/evaluator.py index 53d2733..994a135 100644 --- a/experiments/evaluator/evaluator.py +++ b/experiments/evaluator/evaluator.py @@ -99,7 +99,7 @@ async def evaluate_swarm( ], edge_probs: Optional[torch.Tensor] = None, limit_questions: Optional[int] = None, - eval_batch_size: int = 4, + eval_batch_size: int = 1, # 4, ) -> float: assert self._swarm is not None diff --git a/experiments/run_mmlu.py b/experiments/run_mmlu.py index f05426d..30a6471 100644 --- a/experiments/run_mmlu.py +++ b/experiments/run_mmlu.py @@ -16,10 +16,10 @@ def parse_args(): choices=['DirectAnswer', 'FullConnectedSwarm', 'RandomSwarm', 'OptimizedSwarm'], help="Mode of operation. Default is 'OptimizedSwarm'.") - parser.add_argument('--num-truthful-agents', type=int, default=3, # 1 + parser.add_argument('--num-truthful-agents', type=int, default=5, # 1 help="Number of truthful agents. The total will be N truthful and N adversarial.") - parser.add_argument('--num-iterations', type=int, default=200, + parser.add_argument('--num-iterations', type=int, default=5, # 200, help="Number of optimization iterations. Default 200.") parser.add_argument('--model_name', type=str, default=None, @@ -59,10 +59,12 @@ async def main(): swarm = None else: N = args.num_truthful_agents - M = N - agent_name_list = N * ["SpecialistAgent"] + M * ["AdversarialAgent"] + # M = N + # agent_name_list = N * ["SpecialistAgent"] + agent_name_list = N * ["IO"] - swarm_name = f"{N}true_{M}adv" + # swarm_name = f"{N}true_{M}adv" + swarm_name = f"{N}specialist" swarm = Swarm( agent_name_list, diff --git a/swarm/environment/operations/combine_answer.py b/swarm/environment/operations/combine_answer.py index b7c2e1a..794429b 100644 --- a/swarm/environment/operations/combine_answer.py +++ b/swarm/environment/operations/combine_answer.py @@ -77,7 +77,7 @@ async def _execute(self, inputs: List[Any] = [], **kwargs): self.memory.add(self.id, executions) - self.log() + # self.log() return [executions] #return executions diff --git a/swarm/environment/operations/file_analyse.py b/swarm/environment/operations/file_analyse.py index 030eaf4..cf242e1 100644 --- a/swarm/environment/operations/file_analyse.py +++ b/swarm/environment/operations/file_analyse.py @@ -57,7 +57,7 @@ async def _execute(self, inputs: List[Any] = [], **kwargs): outputs.append(executions) self.memory.add(self.id, executions) - self.log() + # self.log() return outputs diff --git a/swarm/environment/operations/final_decision.py b/swarm/environment/operations/final_decision.py index 6ee63a4..18f32fe 100644 --- a/swarm/environment/operations/final_decision.py +++ b/swarm/environment/operations/final_decision.py @@ -142,7 +142,7 @@ async def _execute(self, inputs: List[Any] = [], "format": "natural language"} self.memory.add(self.id, executions) - self.log() + # self.log() return executions diff --git a/swarm/environment/operations/specialist_answer.py b/swarm/environment/operations/specialist_answer.py index 4218078..ffdfc3b 100644 --- a/swarm/environment/operations/specialist_answer.py +++ b/swarm/environment/operations/specialist_answer.py @@ -50,7 +50,7 @@ class SpecialistAnswer(Node): def __init__(self, domain: str, model_name: Optional[str], - operation_description: str = "Aswer as if you were a specialist in .", + operation_description: str = "Answer as if you were a specialist in .", max_token: int = 50, id=None): super().__init__(operation_description, id, True) @@ -60,9 +60,14 @@ def __init__(self, self.max_token = max_token self.prompt_set = PromptSetRegistry.get(domain) + # Override role with a specialist role. + idx_role = hash(self.id) % len(self.role_list) + self.role = self.role_list[idx_role] + print(f"Creating a node with specialization {self.role}") + @property def node_name(self): - return self.__class__.__name__ + return f"{self.__class__.__name__}_{self.role}" async def node_optimize(self, input, meta_optmize=False): task = input["task"] @@ -78,7 +83,6 @@ async def node_optimize(self, input, meta_optmize=False): return role, constraint - async def _execute(self, inputs: List[Any] = [], **kwargs): node_inputs = self.process_input(inputs) @@ -88,12 +92,10 @@ async def _execute(self, inputs: List[Any] = [], **kwargs): task = input["task"] _, constraint = await self.node_optimize(input, meta_optmize=False) - # Override role with a specialist role. - idx_role = hash(self.id) % len(self.role_list) - role = self.role_list[idx_role] - print(role) - - system_message = f"You are a {role}. {constraint}. Aswer with one of the 4 letters only: A, B, C or D." + system_message = ( + f"You are a {self.role}. {constraint}. " + "Answer with one of the 4 letters: A, B, C or D. " + "And then elaborate in a separate sentense.") prompt = self.prompt_set.get_answer_prompt(question=task) message = [Message(role="system", content=system_message), @@ -105,7 +107,7 @@ async def _execute(self, inputs: List[Any] = [], **kwargs): "task": task, "files": input.get("files", []), "input": task, - "role": role, + "role": self.role, "constraint": constraint, "prompt": prompt, "output": response, @@ -116,4 +118,4 @@ async def _execute(self, inputs: List[Any] = [], **kwargs): self.memory.add(self.id, execution) # self.log() - return outputs \ No newline at end of file + return outputs diff --git a/swarm/environment/operations/web_search.py b/swarm/environment/operations/web_search.py index 126cf70..4a81b58 100644 --- a/swarm/environment/operations/web_search.py +++ b/swarm/environment/operations/web_search.py @@ -74,7 +74,7 @@ async def _execute(self, inputs: List[Any] = [], max_keywords: int = 5, **kwargs self.memory.add(self.id, executions) outputs.append(executions) - self.log() + # self.log() return outputs def web_search(self, query): diff --git a/swarm/graph/graph.py b/swarm/graph/graph.py index e5ea61d..c666b02 100644 --- a/swarm/graph/graph.py +++ b/swarm/graph/graph.py @@ -52,7 +52,7 @@ def __init__(self, self.domain = domain self.model_name = model_name self.meta_prompt = meta_prompt - self.nodes = {} + self.nodes: Dict[str, Node] = {} self.memory = GlobalMemory.instance() self.is_aggregate = False self.input_nodes: List[Node] = [] @@ -167,7 +167,7 @@ def is_node_useful(node): final_answers.append("No answer since there are no inputs provided") return final_answers - def find_node(self, id: str): + def find_node(self, id: str) -> Node: for node in self.nodes.values(): if node.id == id: return node From e38c480b2dc52c74927cc446382adaa79d0bf27d Mon Sep 17 00:00:00 2001 From: Dmitrii Khizbullin Date: Sun, 24 Mar 2024 21:27:11 +0300 Subject: [PATCH 04/16] Enable potential edges towards input nodes --- swarm/graph/node.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/swarm/graph/node.py b/swarm/graph/node.py index e2a14cd..0f5d257 100644 --- a/swarm/graph/node.py +++ b/swarm/graph/node.py @@ -110,25 +110,27 @@ async def execute(self, **kwargs): self.outputs = [] tasks = [] - if not self.inputs and self.predecessors: + # if not self.inputs and self.predecessors: + # if len(self.inputs) == 0 and len(self.predecessors) > 0: + if True: if self.combine_inputs_as_one: - combined_inputs = [] + combined_inputs = self.inputs for predecessor in self.predecessors: predecessor_outputs = predecessor.outputs if predecessor_outputs is not None and isinstance(predecessor_outputs, list): combined_inputs.extend(predecessor_outputs) tasks.append(asyncio.create_task(self._execute(combined_inputs, **kwargs))) else: - for predecessor in self.predecessors: + for predecessor in self.predecessors: # TODO fix this branch as well, IT IS BROKEN predecessor_outputs = predecessor.outputs if isinstance(predecessor_outputs, list) and predecessor_outputs: for predecessor_output in predecessor_outputs: tasks.append(asyncio.create_task(self._execute(predecessor_output, **kwargs))) - elif self.inputs: - tasks = [asyncio.create_task(self._execute(input, **kwargs)) for input in self.inputs] - else: - warnings.warn("No input received.") - return + # elif self.inputs: + # tasks = [asyncio.create_task(self._execute(input, **kwargs)) for input in self.inputs] + # else: + # warnings.warn("No input received.") + # return if tasks: results = await asyncio.gather(*tasks, return_exceptions=True) From 6e7858ed3731e251c2e47d7530cd2ed1006ff18d Mon Sep 17 00:00:00 2001 From: Dmitrii Khizbullin Date: Mon, 25 Mar 2024 14:32:22 +0300 Subject: [PATCH 05/16] Parametrize is_async --- experiments/evaluator/evaluator.py | 32 ++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/experiments/evaluator/evaluator.py b/experiments/evaluator/evaluator.py index 994a135..60c33a6 100644 --- a/experiments/evaluator/evaluator.py +++ b/experiments/evaluator/evaluator.py @@ -99,7 +99,8 @@ async def evaluate_swarm( ], edge_probs: Optional[torch.Tensor] = None, limit_questions: Optional[int] = None, - eval_batch_size: int = 1, # 4, + eval_batch_size: int = 4, + is_async: bool = True, ) -> float: assert self._swarm is not None @@ -143,7 +144,7 @@ def eval_loader(batch_size: int) -> Iterator[List[Any]]: start_ts = time.time() - future_answers = [] + maybe_future_answers = [] for record in record_batch: if mode == 'randomly_connected_swarm': realized_graph, _ = self._swarm.connection_dist.realize(self._swarm.composite_graph) @@ -153,9 +154,16 @@ def eval_loader(batch_size: int) -> Iterator[List[Any]]: print(input_dict) future_answer = self._swarm.arun(input_dict, realized_graph) - future_answers.append(future_answer) + if is_async: + maybe_future_answer = future_answer + else: + maybe_future_answer = await future_answer + maybe_future_answers.append(maybe_future_answer) - raw_answers = await asyncio.gather(*future_answers) + if is_async: + raw_answers = await asyncio.gather(*maybe_future_answers) + else: + raw_answers = maybe_future_answers print(f"Batch time {time.time() - start_ts:.3f}") @@ -206,6 +214,7 @@ async def optimize_swarm( num_iters: int, lr: float, batch_size: int = 4, + is_async: bool = True, ) -> torch.Tensor: assert self._swarm is not None @@ -240,7 +249,7 @@ def infinite_data_loader() -> Iterator[pd.DataFrame]: start_ts = time.time() - future_answers = [] + maybe_future_answers = [] log_probs = [] correct_answers = [] for i_record, record in zip(range(batch_size), loader): @@ -251,13 +260,20 @@ def infinite_data_loader() -> Iterator[pd.DataFrame]: ) input_dict = dataset.record_to_swarm_input(record) - answer = self._swarm.arun(input_dict, realized_graph) - future_answers.append(answer) + future_answer = self._swarm.arun(input_dict, realized_graph) + if is_async: + maybe_future_answer = future_answer + else: + maybe_future_answer = await future_answer + maybe_future_answers.append(maybe_future_answer) log_probs.append(log_prob) correct_answer = dataset.record_to_target_answer(record) correct_answers.append(correct_answer) - raw_answers = await asyncio.gather(*future_answers) + if is_async: + raw_answers = await asyncio.gather(*maybe_future_answers) + else: + raw_answers = maybe_future_answers print(f"Batch time {time.time() - start_ts:.3f}") From c10895bc54fd4b97bb2a2b77b2565db77de99bcc Mon Sep 17 00:00:00 2001 From: Dmitrii Khizbullin Date: Mon, 25 Mar 2024 14:33:07 +0300 Subject: [PATCH 06/16] Rework DirectAnswer to use extra incoming edges --- swarm/environment/operations/direct_answer.py | 61 ++++++++++++------- swarm/environment/prompt/mmlu_prompt_set.py | 18 +++--- 2 files changed, 49 insertions(+), 30 deletions(-) diff --git a/swarm/environment/operations/direct_answer.py b/swarm/environment/operations/direct_answer.py index 9c51adb..e08dff8 100644 --- a/swarm/environment/operations/direct_answer.py +++ b/swarm/environment/operations/direct_answer.py @@ -56,28 +56,47 @@ async def _execute(self, inputs: List[Any] = [], **kwargs): node_inputs = self.process_input(inputs) outputs = [] + task: Optional[str] = None + additional_knowledge: List[str] = [] for input in node_inputs: - task = input["task"] - role, constraint = await self.node_optimize(input, meta_optmize=False) - prompt = self.prompt_set.get_answer_prompt(question=task) - message = [Message(role="system", content=f"You are a {role}. {constraint}"), - Message(role="user", content=prompt)] - response = await self.llm.agen(message, max_tokens=self.max_token) - - execution = { - "operation": self.node_name, - "task": task, - "files": input.get("files", []), - "input": task, - "role": role, - "constraint": constraint, - "prompt": prompt, - "output": response, - "ground_truth": input.get("GT", []), - "format": "natural language" - } - outputs.append(execution) - self.memory.add(self.id, execution) + if len(input) == 1 and 'task' in input: # Swarm input + task = input['task'] + else: # All other incoming edges + extra_knowledge = f"Opinion of {input['operation']} is \"{input['output']}\"." + additional_knowledge.append(extra_knowledge) + + if task is None: + raise ValueError(f"{self.__class__.__name__} expects swarm input among inputs") + + user_message = "\n\n" + if len(additional_knowledge) > 0: + for extra_knowledge in additional_knowledge: + user_message = user_message + extra_knowledge + "\n\n" + + prompt = self.prompt_set.get_answer_prompt(question=task) + user_message = user_message + prompt + + role, constraint = await self.node_optimize(input, meta_optmize=False) + system_message = f"You are a {role}. {constraint}" + + message = [Message(role="system", content=system_message), + Message(role="user", content=user_message)] + response = await self.llm.agen(message, max_tokens=self.max_token) + + execution = { + "operation": self.node_name, + "task": task, + "files": input.get("files", []), + "input": task, + "role": role, + "constraint": constraint, + "prompt": prompt, + "output": response, + "ground_truth": input.get("GT", []), + "format": "natural language" + } + outputs.append(execution) + self.memory.add(self.id, execution) # self.log() return outputs \ No newline at end of file diff --git a/swarm/environment/prompt/mmlu_prompt_set.py b/swarm/environment/prompt/mmlu_prompt_set.py index a9a735f..a916a24 100644 --- a/swarm/environment/prompt/mmlu_prompt_set.py +++ b/swarm/environment/prompt/mmlu_prompt_set.py @@ -20,14 +20,14 @@ def get_role(): @staticmethod def get_constraint(): return """ - I will ask you a question. - I will also give you 4 answers enumerated as A, B, C and D. - Only one answer out of the offered 4 is correct. - You must choose the correct answer to the question. - Your response must be one of the 4 letters: A, B, C or D, - corresponding to the correct answer. - Only one letter (A, B, C or D) is allowed in your answer. - """ +I will ask you a question. +I will also give you 4 answers enumerated as A, B, C and D. +Only one answer out of the offered 4 is correct. +You must choose the correct answer to the question. +Your response must be one of the 4 letters: A, B, C or D, +corresponding to the correct answer. +Only one letter (A, B, C or D) is allowed in your answer. +""" @staticmethod def get_format(): @@ -51,7 +51,7 @@ def get_websearch_prompt(query): @staticmethod def get_adversarial_answer_prompt(question): - return f"""Answer a lie to the following question: {question}. """ + return f"Answer a lie to the following question: {question}. " @staticmethod def get_distill_websearch_prompt(query, results): From fce3a65f686cb1020983a08894752a574916d29d Mon Sep 17 00:00:00 2001 From: Dmitrii Khizbullin Date: Mon, 25 Mar 2024 15:05:55 +0300 Subject: [PATCH 07/16] Allow agent to elaborate the single-letter answer --- experiments/run_mmlu.py | 7 +- .../agents/mmlu/specialist_agent.py | 8 +-- .../operations/specialist_answer.py | 69 +++++++++++-------- swarm/environment/prompt/mmlu_prompt_set.py | 4 +- .../agents/test_specialist_agent.py | 6 +- 5 files changed, 55 insertions(+), 39 deletions(-) diff --git a/experiments/run_mmlu.py b/experiments/run_mmlu.py index 30a6471..895b356 100644 --- a/experiments/run_mmlu.py +++ b/experiments/run_mmlu.py @@ -19,7 +19,7 @@ def parse_args(): parser.add_argument('--num-truthful-agents', type=int, default=5, # 1 help="Number of truthful agents. The total will be N truthful and N adversarial.") - parser.add_argument('--num-iterations', type=int, default=5, # 200, + parser.add_argument('--num-iterations', type=int, default=50, # 200 help="Number of optimization iterations. Default 200.") parser.add_argument('--model_name', type=str, default=None, @@ -60,10 +60,11 @@ async def main(): else: N = args.num_truthful_agents # M = N - # agent_name_list = N * ["SpecialistAgent"] - agent_name_list = N * ["IO"] + # agent_name_list = N * ["IO"] + agent_name_list = N * ["SpecialistAgent"] # swarm_name = f"{N}true_{M}adv" + # swarm_name = f"{N}io" swarm_name = f"{N}specialist" swarm = Swarm( diff --git a/swarm/environment/agents/mmlu/specialist_agent.py b/swarm/environment/agents/mmlu/specialist_agent.py index c5d8885..287e8e7 100644 --- a/swarm/environment/agents/mmlu/specialist_agent.py +++ b/swarm/environment/agents/mmlu/specialist_agent.py @@ -9,7 +9,7 @@ @AgentRegistry.register('SpecialistAgent') class SpecialistAgent(Graph): def build_graph(self): - io = SpecialistAnswer(self.domain, self.model_name) - self.add_node(io) - self.input_nodes = [io] - self.output_nodes = [io] + sa = SpecialistAnswer(self.domain, self.model_name) + self.add_node(sa) + self.input_nodes = [sa] + self.output_nodes = [sa] diff --git a/swarm/environment/operations/specialist_answer.py b/swarm/environment/operations/specialist_answer.py index ffdfc3b..b5cabaf 100644 --- a/swarm/environment/operations/specialist_answer.py +++ b/swarm/environment/operations/specialist_answer.py @@ -67,7 +67,7 @@ def __init__(self, @property def node_name(self): - return f"{self.__class__.__name__}_{self.role}" + return f"{self.__class__.__name__} {self.role}" async def node_optimize(self, input, meta_optmize=False): task = input["task"] @@ -88,34 +88,47 @@ async def _execute(self, inputs: List[Any] = [], **kwargs): node_inputs = self.process_input(inputs) outputs = [] + task: Optional[str] = None + additional_knowledge: List[str] = [] for input in node_inputs: - task = input["task"] - _, constraint = await self.node_optimize(input, meta_optmize=False) - - system_message = ( - f"You are a {self.role}. {constraint}. " - "Answer with one of the 4 letters: A, B, C or D. " - "And then elaborate in a separate sentense.") - - prompt = self.prompt_set.get_answer_prompt(question=task) - message = [Message(role="system", content=system_message), - Message(role="user", content=prompt)] - response = await self.llm.agen(message, max_tokens=self.max_token) - - execution = { - "operation": self.node_name, - "task": task, - "files": input.get("files", []), - "input": task, - "role": self.role, - "constraint": constraint, - "prompt": prompt, - "output": response, - "ground_truth": input.get("GT", []), - "format": "natural language" - } - outputs.append(execution) - self.memory.add(self.id, execution) + if len(input) == 1 and 'task' in input: # Swarm input + task = input['task'] + else: # All other incoming edges + extra_knowledge = f"Opinion of {input['operation']} is \"{input['output']}\"." + additional_knowledge.append(extra_knowledge) + + if task is None: + raise ValueError(f"{self.__class__.__name__} expects swarm input among inputs") + + user_message = "\n\n" + if len(additional_knowledge) > 0: + for extra_knowledge in additional_knowledge: + user_message = user_message + extra_knowledge + "\n\n" + + prompt = self.prompt_set.get_answer_prompt(question=task) + user_message = user_message + prompt + + _, constraint = await self.node_optimize(input, meta_optmize=False) + system_message = f"You are a {self.role}. {constraint}" + + message = [Message(role="system", content=system_message), + Message(role="user", content=user_message)] + response = await self.llm.agen(message, max_tokens=self.max_token) + + execution = { + "operation": self.node_name, + "task": task, + "files": input.get("files", []), + "input": task, + "role": self.role, + "constraint": constraint, + "prompt": prompt, + "output": response, + "ground_truth": input.get("GT", []), + "format": "natural language" + } + outputs.append(execution) + self.memory.add(self.id, execution) # self.log() return outputs diff --git a/swarm/environment/prompt/mmlu_prompt_set.py b/swarm/environment/prompt/mmlu_prompt_set.py index a916a24..15b5180 100644 --- a/swarm/environment/prompt/mmlu_prompt_set.py +++ b/swarm/environment/prompt/mmlu_prompt_set.py @@ -24,9 +24,9 @@ def get_constraint(): I will also give you 4 answers enumerated as A, B, C and D. Only one answer out of the offered 4 is correct. You must choose the correct answer to the question. -Your response must be one of the 4 letters: A, B, C or D, +Your response must start with one of the 4 letters: A, B, C or D, corresponding to the correct answer. -Only one letter (A, B, C or D) is allowed in your answer. +After the single-letter answer, add an short explaination of why you gave this answer. """ @staticmethod diff --git a/test/swarm/environment/agents/test_specialist_agent.py b/test/swarm/environment/agents/test_specialist_agent.py index ddd62c5..94dc625 100644 --- a/test/swarm/environment/agents/test_specialist_agent.py +++ b/test/swarm/environment/agents/test_specialist_agent.py @@ -12,19 +12,21 @@ async def test_io(model_name): task = """ What is love? A: a feeling -B: a utopia +B: all you need C: a chemical process D: baby don't hurt me no more """ responses = [] for _ in range(10): io = SpecialistAgent("mmlu", model_name) - response = await io.run([{"task": task}]) + response = await io.run({"task": task}) print(response[0]) responses.append(response[0]) print(responses) + print() + print([r[0] for r in responses]) print() From 3be92428e914102a94c976ade57437f58385d4f0 Mon Sep 17 00:00:00 2001 From: Dmitrii Khizbullin Date: Mon, 25 Mar 2024 17:10:03 +0300 Subject: [PATCH 08/16] Fix MajorityVote for single letter --- swarm/environment/operations/final_decision.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/swarm/environment/operations/final_decision.py b/swarm/environment/operations/final_decision.py index 18f32fe..ffde8e9 100644 --- a/swarm/environment/operations/final_decision.py +++ b/swarm/environment/operations/final_decision.py @@ -87,6 +87,10 @@ async def _execute(self, inputs: List[Any] = [], if len(inputs) == 0: raise Exception("No inputs is not supported for MajorityVote") answers = [input.get("output") for input in inputs] + + # Quick hack + answers = [s[0].upper() for s in answers] + counter = Counter(answers) sorted_counter = counter.most_common() max_freq = sorted_counter[0][1] From 8bde68e9e6b25eec92caf2474bc17dceefc74bb3 Mon Sep 17 00:00:00 2001 From: Dmitrii Khizbullin Date: Mon, 25 Mar 2024 18:25:45 +0300 Subject: [PATCH 09/16] Better opinion template --- .../operations/specialist_answer.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/swarm/environment/operations/specialist_answer.py b/swarm/environment/operations/specialist_answer.py index b5cabaf..6a0df66 100644 --- a/swarm/environment/operations/specialist_answer.py +++ b/swarm/environment/operations/specialist_answer.py @@ -94,19 +94,25 @@ async def _execute(self, inputs: List[Any] = [], **kwargs): if len(input) == 1 and 'task' in input: # Swarm input task = input['task'] else: # All other incoming edges - extra_knowledge = f"Opinion of {input['operation']} is \"{input['output']}\"." + extra_knowledge = f"Opinion of {input['operation']} is {input['output']}." additional_knowledge.append(extra_knowledge) if task is None: raise ValueError(f"{self.__class__.__name__} expects swarm input among inputs") - user_message = "\n\n" + opinions = "" if len(additional_knowledge) > 0: for extra_knowledge in additional_knowledge: - user_message = user_message + extra_knowledge + "\n\n" + opinions = opinions + extra_knowledge + "\n\n" - prompt = self.prompt_set.get_answer_prompt(question=task) - user_message = user_message + prompt + question = self.prompt_set.get_answer_prompt(question=task) + user_message = question + if len(opinions) > 0: + user_message = f"""{user_message} + +Take into accound the following opinions which may or may not be true: + +{opinions}""" _, constraint = await self.node_optimize(input, meta_optmize=False) system_message = f"You are a {self.role}. {constraint}" @@ -122,7 +128,7 @@ async def _execute(self, inputs: List[Any] = [], **kwargs): "input": task, "role": self.role, "constraint": constraint, - "prompt": prompt, + "prompt": user_message, "output": response, "ground_truth": input.get("GT", []), "format": "natural language" From f0600d34664427c033ec1776bd7ab4bf0491cda8 Mon Sep 17 00:00:00 2001 From: Dmitrii Khizbullin Date: Tue, 26 Mar 2024 16:43:43 +0300 Subject: [PATCH 10/16] Test on 20% of test as DyLAN. Use gpt-3.5-turbo-1106. --- experiments/evaluator/evaluator.py | 4 +++- experiments/run_mmlu.py | 9 ++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/experiments/evaluator/evaluator.py b/experiments/evaluator/evaluator.py index 60c33a6..98a2936 100644 --- a/experiments/evaluator/evaluator.py +++ b/experiments/evaluator/evaluator.py @@ -62,7 +62,9 @@ async def evaluate_direct_answer(self, accuracy = Accuracy() - for i_question, record in tqdm(enumerate(dataset)): + data_len = min(len(dataset), limit_questions) if limit_questions is not None else len(dataset) + + for i_question, record in tqdm(enumerate(dataset), total=data_len): print(80*'-') if limit_questions is not None: if i_question >= limit_questions: diff --git a/experiments/run_mmlu.py b/experiments/run_mmlu.py index 895b356..7a9e38d 100644 --- a/experiments/run_mmlu.py +++ b/experiments/run_mmlu.py @@ -22,7 +22,7 @@ def parse_args(): parser.add_argument('--num-iterations', type=int, default=50, # 200 help="Number of optimization iterations. Default 200.") - parser.add_argument('--model_name', type=str, default=None, + parser.add_argument('--model_name', type=str, default='gpt-3.5-turbo-1106', # None, 'gpt-35-turbo-0301' help="Model name, None runs the default ChatGPT4.") parser.add_argument('--domain', type=str, default="mmlu", @@ -81,7 +81,10 @@ async def main(): download() dataset_train = MMLUDataset('dev') - dataset_val = MMLUDataset('val') + # dataset_val = MMLUDataset('val') + dataset_val = MMLUDataset('test') + + print(len(dataset_val)) evaluator = Evaluator( swarm, @@ -92,7 +95,7 @@ async def main(): enable_artifacts=True, tensorboard_tag=tag) - limit_questions = 5 if debug else 153 + limit_questions = 5 if debug else 2808 # 14042*20%=2808 # 153 is 10% of val if mode == 'DirectAnswer': score = await evaluator.evaluate_direct_answer( From 5911d90aade2362e97f3503c3f4c64fcc075725a Mon Sep 17 00:00:00 2001 From: Dmitrii Khizbullin Date: Wed, 27 Mar 2024 16:55:32 +0300 Subject: [PATCH 11/16] Batch 32 and normalization of utility --- experiments/evaluator/evaluator.py | 5 +++-- experiments/run_mmlu.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/experiments/evaluator/evaluator.py b/experiments/evaluator/evaluator.py index 98a2936..0a8a541 100644 --- a/experiments/evaluator/evaluator.py +++ b/experiments/evaluator/evaluator.py @@ -215,7 +215,7 @@ async def optimize_swarm( self, num_iters: int, lr: float, - batch_size: int = 4, + batch_size: int = 32, # 4, is_async: bool = True, ) -> torch.Tensor: @@ -289,7 +289,8 @@ def infinite_data_loader() -> Iterator[pd.DataFrame]: accuracy.update(answer, correct_answer) utility = accuracy.get() utilities.append(utility) - single_loss = - log_prob * utility + offset_utility = utility - 0.59 # ATTENTION HARDCODE + single_loss = - log_prob * offset_utility loss_list.append(single_loss) print("utilities:", utilities) diff --git a/experiments/run_mmlu.py b/experiments/run_mmlu.py index 7a9e38d..95ec379 100644 --- a/experiments/run_mmlu.py +++ b/experiments/run_mmlu.py @@ -16,7 +16,7 @@ def parse_args(): choices=['DirectAnswer', 'FullConnectedSwarm', 'RandomSwarm', 'OptimizedSwarm'], help="Mode of operation. Default is 'OptimizedSwarm'.") - parser.add_argument('--num-truthful-agents', type=int, default=5, # 1 + parser.add_argument('--num-truthful-agents', type=int, default=7, help="Number of truthful agents. The total will be N truthful and N adversarial.") parser.add_argument('--num-iterations', type=int, default=50, # 200 From 0ba0ea9df401946b210539813da5090ae7730f94 Mon Sep 17 00:00:00 2001 From: Dmitrii Khizbullin Date: Wed, 27 Mar 2024 17:36:15 +0300 Subject: [PATCH 12/16] Get back to val 10% anf gpt4 --- experiments/evaluator/evaluator.py | 5 ++--- experiments/run_mmlu.py | 10 ++++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/experiments/evaluator/evaluator.py b/experiments/evaluator/evaluator.py index 0a8a541..fd0f70a 100644 --- a/experiments/evaluator/evaluator.py +++ b/experiments/evaluator/evaluator.py @@ -215,7 +215,7 @@ async def optimize_swarm( self, num_iters: int, lr: float, - batch_size: int = 32, # 4, + batch_size: int = 4, # 32 is_async: bool = True, ) -> torch.Tensor: @@ -289,8 +289,7 @@ def infinite_data_loader() -> Iterator[pd.DataFrame]: accuracy.update(answer, correct_answer) utility = accuracy.get() utilities.append(utility) - offset_utility = utility - 0.59 # ATTENTION HARDCODE - single_loss = - log_prob * offset_utility + single_loss = - log_prob * utility loss_list.append(single_loss) print("utilities:", utilities) diff --git a/experiments/run_mmlu.py b/experiments/run_mmlu.py index 95ec379..557ade3 100644 --- a/experiments/run_mmlu.py +++ b/experiments/run_mmlu.py @@ -22,7 +22,7 @@ def parse_args(): parser.add_argument('--num-iterations', type=int, default=50, # 200 help="Number of optimization iterations. Default 200.") - parser.add_argument('--model_name', type=str, default='gpt-3.5-turbo-1106', # None, 'gpt-35-turbo-0301' + parser.add_argument('--model_name', type=str, default=None, # None, 'gpt-35-turbo-0301' 'gpt-3.5-turbo-1106' help="Model name, None runs the default ChatGPT4.") parser.add_argument('--domain', type=str, default="mmlu", @@ -81,10 +81,8 @@ async def main(): download() dataset_train = MMLUDataset('dev') - # dataset_val = MMLUDataset('val') - dataset_val = MMLUDataset('test') - - print(len(dataset_val)) + dataset_val = MMLUDataset('val') + # dataset_val = MMLUDataset('test') evaluator = Evaluator( swarm, @@ -95,7 +93,7 @@ async def main(): enable_artifacts=True, tensorboard_tag=tag) - limit_questions = 5 if debug else 2808 # 14042*20%=2808 # 153 is 10% of val + limit_questions = 5 if debug else 153 # 14042*20%=2808 # 153 is 10% of val if mode == 'DirectAnswer': score = await evaluator.evaluate_direct_answer( From 1f9943d64dfbc6621bddae5d4fbeaff56427fd5f Mon Sep 17 00:00:00 2001 From: Dmitrii Khizbullin Date: Wed, 27 Mar 2024 17:49:42 +0300 Subject: [PATCH 13/16] Use cost calculation, save to the report --- experiments/evaluator/evaluator.py | 10 +++++++++- experiments/run_mmlu.py | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/experiments/evaluator/evaluator.py b/experiments/evaluator/evaluator.py index fd0f70a..cf75cca 100644 --- a/experiments/evaluator/evaluator.py +++ b/experiments/evaluator/evaluator.py @@ -17,6 +17,8 @@ from experiments.evaluator.datasets.base_dataset import BaseDataset from experiments.evaluator.accuracy import Accuracy +from swarm.utils.globals import Time, Cost, CompletionTokens, PromptTokens + class Evaluator(): def __init__( @@ -183,7 +185,13 @@ def eval_loader(batch_size: int) -> Iterator[List[Any]]: self._dump_eval_results(dict( accuracy=accuracy.get(), - limit_questions=limit_questions)) + limit_questions=limit_questions, + total_cost=dict( + Cost=Cost.instance().value, + PromptTokens=PromptTokens.instance().value, + CompletionTokens=CompletionTokens.instance().value, + ), + )) return accuracy.get() diff --git a/experiments/run_mmlu.py b/experiments/run_mmlu.py index 557ade3..223143a 100644 --- a/experiments/run_mmlu.py +++ b/experiments/run_mmlu.py @@ -108,7 +108,7 @@ async def main(): limit_questions=limit_questions) elif mode == 'OptimizedSwarm': - num_iters = 5 if debug else args.num_iterations + num_iters = 2 if debug else args.num_iterations lr = 0.1 From 6aefa57e977627f38225068923da169c9d208bd0 Mon Sep 17 00:00:00 2001 From: Dmitrii Khizbullin Date: Wed, 27 Mar 2024 18:12:15 +0300 Subject: [PATCH 14/16] Count cost for train and eval --- experiments/evaluator/evaluator.py | 43 +++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/experiments/evaluator/evaluator.py b/experiments/evaluator/evaluator.py index cf75cca..ab961d1 100644 --- a/experiments/evaluator/evaluator.py +++ b/experiments/evaluator/evaluator.py @@ -52,10 +52,14 @@ def __init__( else: self._logger = None + self._optimization_cost: Optional[Dict[str, float]] = None + async def evaluate_direct_answer(self, limit_questions: Optional[int] = None, ) -> float: + self._reset_cost() + dataset = self._val_dataset print(f"Evaluating DirectAnswer on {dataset.get_domain()} split {dataset.split}") @@ -89,7 +93,9 @@ async def evaluate_direct_answer(self, self._dump_eval_results(dict( accuracy=accuracy.get(), - limit_questions=limit_questions)) + limit_questions=limit_questions, + eval_cost=self._get_cost(), + )) print("Done!") return accuracy.get() @@ -109,6 +115,8 @@ async def evaluate_swarm( assert self._swarm is not None + self._reset_cost() + dataset = self._val_dataset print(f"Evaluating swarm on {dataset.__class__.__name__} split {dataset.split}") @@ -182,16 +190,15 @@ def eval_loader(batch_size: int) -> Iterator[List[Any]]: accuracy.print() print("Done!") - - self._dump_eval_results(dict( + + result_dict = dict( accuracy=accuracy.get(), limit_questions=limit_questions, - total_cost=dict( - Cost=Cost.instance().value, - PromptTokens=PromptTokens.instance().value, - CompletionTokens=CompletionTokens.instance().value, - ), - )) + eval_cost=self._get_cost(), + ) + if self._optimization_cost is not None: + result_dict['train_cost'] = self._optimization_cost + self._dump_eval_results(result_dict) return accuracy.get() @@ -219,6 +226,20 @@ def _print_conns(self, edge_probs: torch.Tensor, save_to_file: bool = False): with open(txt_name, "w") as f: f.writelines(msgs) + @staticmethod + def _reset_cost(): + Cost.instance().reset() + PromptTokens.instance().reset() + CompletionTokens.instance().reset() + + @staticmethod + def _get_cost() -> Dict[str, float]: + return dict( + Cost=Cost.instance().value, + PromptTokens=PromptTokens.instance().value, + CompletionTokens=CompletionTokens.instance().value, + ) + async def optimize_swarm( self, num_iters: int, @@ -229,6 +250,8 @@ async def optimize_swarm( assert self._swarm is not None + self._reset_cost() + dataset = self._train_dataset print(f"Optimizing swarm on {dataset.__class__.__name__} split {dataset.split}") @@ -329,6 +352,8 @@ def infinite_data_loader() -> Iterator[pd.DataFrame]: if edge_probs is not None: self._print_conns(edge_probs, save_to_file=True) + self._optimization_cost = self._get_cost() + print("Done!") edge_probs = torch.sigmoid(self._swarm.connection_dist.edge_logits) return edge_probs From 661c1c174b3f3ad60743e012588fed1f8b681cd4 Mon Sep 17 00:00:00 2001 From: Dmitrii Khizbullin Date: Thu, 28 Mar 2024 15:04:21 +0300 Subject: [PATCH 15/16] Specialist + Adversarial experiments --- experiments/run_mmlu.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/experiments/run_mmlu.py b/experiments/run_mmlu.py index 223143a..19bd616 100644 --- a/experiments/run_mmlu.py +++ b/experiments/run_mmlu.py @@ -59,13 +59,15 @@ async def main(): swarm = None else: N = args.num_truthful_agents - # M = N + M = N # agent_name_list = N * ["IO"] - agent_name_list = N * ["SpecialistAgent"] + # agent_name_list = N * ["SpecialistAgent"] + agent_name_list = N * ["SpecialistAgent"] + M * ["AdversarialAgent"] # swarm_name = f"{N}true_{M}adv" # swarm_name = f"{N}io" - swarm_name = f"{N}specialist" + # swarm_name = f"{N}specialist" + swarm_name = f"{N}S{M}A" swarm = Swarm( agent_name_list, From 3ed01461e5e1247070f91163d1b2f44af59a1289 Mon Sep 17 00:00:00 2001 From: Dmitrii Khizbullin Date: Sun, 31 Mar 2024 12:33:30 +0300 Subject: [PATCH 16/16] Added LR --- experiments/run_mmlu.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/experiments/run_mmlu.py b/experiments/run_mmlu.py index 19bd616..3e3174a 100644 --- a/experiments/run_mmlu.py +++ b/experiments/run_mmlu.py @@ -31,6 +31,9 @@ def parse_args(): parser.add_argument('--debug', action='store_true', default=False, help="Set for a quick debug cycle") + parser.add_argument('--lr', type=float, default=0.1, + help="Learning rate") + args = parser.parse_args() return args @@ -60,14 +63,15 @@ async def main(): else: N = args.num_truthful_agents M = N + # agent_name_list = N * ["IO"] + M * ["AdversarialAgent"] # agent_name_list = N * ["IO"] - # agent_name_list = N * ["SpecialistAgent"] - agent_name_list = N * ["SpecialistAgent"] + M * ["AdversarialAgent"] + agent_name_list = N * ["SpecialistAgent"] + # agent_name_list = N * ["SpecialistAgent"] + M * ["AdversarialAgent"] # swarm_name = f"{N}true_{M}adv" # swarm_name = f"{N}io" - # swarm_name = f"{N}specialist" - swarm_name = f"{N}S{M}A" + swarm_name = f"{N}specialist" + # swarm_name = f"{N}S{M}A" swarm = Swarm( agent_name_list, @@ -112,9 +116,7 @@ async def main(): num_iters = 2 if debug else args.num_iterations - lr = 0.1 - - edge_probs = await evaluator.optimize_swarm(num_iters=num_iters, lr=lr) + edge_probs = await evaluator.optimize_swarm(num_iters=num_iters, lr=args.lr) score = await evaluator.evaluate_swarm( mode='external_edge_probs',