From 4a464006e2526e724033c638e4dc9290eea9951e Mon Sep 17 00:00:00 2001 From: Shiqi HE Date: Mon, 10 Feb 2025 14:38:53 -0500 Subject: [PATCH 1/3] add Mordal arXiv --- source/_data/SymbioticLab.bib | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/source/_data/SymbioticLab.bib b/source/_data/SymbioticLab.bib index 6b4f4329..1a72492f 100644 --- a/source/_data/SymbioticLab.bib +++ b/source/_data/SymbioticLab.bib @@ -1955,4 +1955,23 @@ @Article{mercury:arxiv24 In this paper, we present Mercury, a QoS-aware tiered memory system that ensures predictable performance for coexisting memory-intensive applications with different SLOs. Mercury enables per-tier page reclamation for application-level resource management and uses a proactive admission control algorithm to satisfy SLOs via per-tier memory capacity allocation and intra- and inter-tier bandwidth interference mitigation. It reacts to dynamic requirement changes via real-time adaptation. Extensive evaluations show that Mercury improves application performance by up to 53.4% and 20.3% compared to TPP and Colloid, respectively. } -} \ No newline at end of file +} + + +@Article{mordal:arxiv25, + author = {Shiqi He and Insu Jang Mosharaf Chowdhury}, + title     = {{Mordal}: Automated Pretrained Model Selection for Vision Language Models}, + year          = {2025}, + month         = {Feb}, + volume        = {abs/2502.00241}, + archiveprefix = {arXiv}, + eprint        = {2502.00241}, + url           = {https://arxiv.org/abs/2502.00241}, + publist_confkey = {arXiv:2502.00241}, + publist_link = {paper || https://arxiv.org/abs/2502.00241}, + publist_topic = {Systems + AI}, + publist_abstract = { +Incorporating multiple modalities into large language models (LLMs) is a powerful way to enhance their understanding of non-textual data, enabling them to perform multimodal tasks. Vision language models (VLMs) form the fastest growing category of multimodal models because of their many practical use cases, including in healthcare, robotics, and accessibility. Unfortunately, even though different VLMs in the literature demonstrate impressive visual capabilities in different benchmarks, they are handcrafted by human experts; there is no automated framework to create task-specific multimodal models. +We introduce Mordal, an automated multimodal model search framework that efficiently finds the best VLM for a user-defined task without manual intervention. Mordal achieves this both by reducing the number of candidates to consider during the search process and by minimizing the time required to evaluate each remaining candidate. Our evaluation shows that Mordal can find the best VLM for a given problem using up to 8.9×-11.6× lower GPU hours than grid search. In the process of our evaluation, we have also discovered new VLMs that outperform their state-of-the-art counterparts. + } +} From 46ef15c76b85ed69b3b63d3517cc4c5a33a0d02a Mon Sep 17 00:00:00 2001 From: Shiqi HE Date: Mon, 10 Feb 2025 14:54:40 -0500 Subject: [PATCH 2/3] fix Mordal arxiv --- source/_data/SymbioticLab.bib | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/_data/SymbioticLab.bib b/source/_data/SymbioticLab.bib index 1a72492f..40b6af5a 100644 --- a/source/_data/SymbioticLab.bib +++ b/source/_data/SymbioticLab.bib @@ -1959,8 +1959,8 @@ @Article{mercury:arxiv24 @Article{mordal:arxiv25, - author = {Shiqi He and Insu Jang Mosharaf Chowdhury}, - title     = {{Mordal}: Automated Pretrained Model Selection for Vision Language Models}, + author = {Shiqi He and Insu Jang and Mosharaf Chowdhury}, + title = {{Mordal}: Automated Pretrained Model Selection for Vision Language Models}, year          = {2025}, month         = {Feb}, volume        = {abs/2502.00241}, From 20ace1dc4ac326e5213dc3a82b90579ad1ae03fa Mon Sep 17 00:00:00 2001 From: Shiqi HE Date: Mon, 10 Feb 2025 14:59:24 -0500 Subject: [PATCH 3/3] fix Mordal arxiv bib format --- source/_data/SymbioticLab.bib | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/source/_data/SymbioticLab.bib b/source/_data/SymbioticLab.bib index 40b6af5a..bc6f3333 100644 --- a/source/_data/SymbioticLab.bib +++ b/source/_data/SymbioticLab.bib @@ -1957,21 +1957,21 @@ @Article{mercury:arxiv24 } } - @Article{mordal:arxiv25, - author = {Shiqi He and Insu Jang and Mosharaf Chowdhury}, - title = {{Mordal}: Automated Pretrained Model Selection for Vision Language Models}, - year          = {2025}, - month         = {Feb}, - volume        = {abs/2502.00241}, - archiveprefix = {arXiv}, - eprint        = {2502.00241}, - url           = {https://arxiv.org/abs/2502.00241}, + author = {Shiqi He and Insu Jang and Mosharaf Chowdhury}, + title = {{Mordal}: Automated Pretrained Model Selection for Vision Language Models}, + year = {2025}, + month = {Feb}, + volume = {abs/2502.00241}, + archivePrefix = {arXiv}, + eprint = {2502.00241}, + url = {https://arxiv.org/abs/2502.00241}, publist_confkey = {arXiv:2502.00241}, - publist_link = {paper || https://arxiv.org/abs/2502.00241}, - publist_topic = {Systems + AI}, + publist_link = {paper || https://arxiv.org/abs/2502.00241}, + publist_topic = {Systems + AI}, publist_abstract = { -Incorporating multiple modalities into large language models (LLMs) is a powerful way to enhance their understanding of non-textual data, enabling them to perform multimodal tasks. Vision language models (VLMs) form the fastest growing category of multimodal models because of their many practical use cases, including in healthcare, robotics, and accessibility. Unfortunately, even though different VLMs in the literature demonstrate impressive visual capabilities in different benchmarks, they are handcrafted by human experts; there is no automated framework to create task-specific multimodal models. -We introduce Mordal, an automated multimodal model search framework that efficiently finds the best VLM for a user-defined task without manual intervention. Mordal achieves this both by reducing the number of candidates to consider during the search process and by minimizing the time required to evaluate each remaining candidate. Our evaluation shows that Mordal can find the best VLM for a given problem using up to 8.9×-11.6× lower GPU hours than grid search. In the process of our evaluation, we have also discovered new VLMs that outperform their state-of-the-art counterparts. + Incorporating multiple modalities into large language models (LLMs) is a powerful way to enhance their understanding of non-textual data, enabling them to perform multimodal tasks. Vision language models (VLMs) form the fastest growing category of multimodal models because of their many practical use cases, including in healthcare, robotics, and accessibility. Unfortunately, even though different VLMs in the literature demonstrate impressive visual capabilities in different benchmarks, they are handcrafted by human experts; there is no automated framework to create task-specific multimodal models. + + We introduce Mordal, an automated multimodal model search framework that efficiently finds the best VLM for a user-defined task without manual intervention. Mordal achieves this both by reducing the number of candidates to consider during the search process and by minimizing the time required to evaluate each remaining candidate. Our evaluation shows that Mordal can find the best VLM for a given problem using up to 8.9×-11.6× lower GPU hours than grid search. In the process of our evaluation, we have also discovered new VLMs that outperform their state-of-the-art counterparts. } }