{"id":1170659,"date":"2026-05-04T13:42:02","date_gmt":"2026-05-04T20:42:02","guid":{"rendered":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/?post_type=msr-research-item&#038;p=1170659"},"modified":"2026-05-04T13:42:02","modified_gmt":"2026-05-04T20:42:02","slug":"forestcoll-throughput-optimal-collective-communications-on-heterogeneous-network-fabrics","status":"publish","type":"msr-research-item","link":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/publication\/forestcoll-throughput-optimal-collective-communications-on-heterogeneous-network-fabrics\/","title":{"rendered":"ForestColl: Throughput-Optimal Collective Communications on Heterogeneous Network Fabrics"},"content":{"rendered":"<p>s modern DNN models grow ever larger, collective commu\u0002nications between the accelerators (allreduce, etc.) emerge as a significant performance bottleneck. Designing efficient com\u0002munication schedules is challenging, given today\u2019s heteroge\u0002neous and diverse network fabrics. We present ForestColl, a tool that generates throughput-optimal schedules for any net\u0002work topology. ForestColl constructs broadcast\/aggregation<br \/>\nspanning trees as the communication schedule, achieving the\u0002oretical optimality. Its schedule generation runs in polynomial time and is highly scalable. ForestColl supports any network fabric, including both switching fabrics and direct accelerator connections. We evaluated ForestColl on AMD MI250 and NVIDIA DGX A100 & H100 clusters. ForestColl shows significant improvements over the vendors\u2019 own optimized communication libraries across various settings and in LLM training. ForestColl also outperforms other state-of-the-art schedule generation techniques with both more efficient gen\u0002erated schedules and substantially faster generation speed.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>s modern DNN models grow ever larger, collective commu\u0002nications between the accelerators (allreduce, etc.) emerge as a significant performance bottleneck. Designing efficient com\u0002munication schedules is challenging, given today\u2019s heteroge\u0002neous and diverse network fabrics. We present ForestColl, a tool that generates throughput-optimal schedules for any net\u0002work topology. ForestColl constructs broadcast\/aggregation spanning trees as the communication schedule, [&hellip;]<\/p>\n","protected":false},"featured_media":0,"template":"","meta":{"msr-url-field":"","msr-podcast-episode":"","msrModifiedDate":"","msrModifiedDateEnabled":false,"ep_exclude_from_search":false,"_classifai_error":"","msr-author-ordering":null,"msr_publishername":"","msr_publisher_other":"","msr_booktitle":"","msr_chapter":"","msr_edition":"","msr_editors":"","msr_how_published":"","msr_isbn":"","msr_issue":"","msr_journal":"","msr_number":"","msr_organization":"","msr_pages_string":"","msr_page_range_start":"","msr_page_range_end":"","msr_series":"","msr_volume":"","msr_copyright":"","msr_conference_name":"","msr_doi":"","msr_arxiv_id":"","msr_s2_paper_id":"","msr_mag_id":"","msr_pubmed_id":"","msr_other_authors":"","msr_other_contributors":"","msr_speaker":"","msr_award":"","msr_affiliation":"","msr_institution":"","msr_host":"","msr_version":"","msr_duration":"","msr_original_fields_of_study":"","msr_release_tracker_id":"","msr_s2_match_type":"","msr_citation_count_updated":"","msr_published_date":"2026-05","msr_highlight_text":"","msr_notes":"","msr_longbiography":"","msr_publicationurl":"","msr_external_url":"","msr_secondary_video_url":"","msr_conference_url":"","msr_journal_url":"","msr_s2_pdf_url":"","msr_year":0,"msr_citation_count":0,"msr_influential_citations":0,"msr_reference_count":0,"msr_s2_match_confidence":0,"msr_microsoftintellectualproperty":false,"msr_s2_open_access":false,"msr_s2_author_ids":[],"msr_pub_ids":[],"msr_hide_image_in_river":null,"footnotes":""},"msr-research-highlight":[],"research-area":[13547],"msr-publication-type":[193726],"msr-publisher":[],"msr-focus-area":[],"msr-locale":[268875],"msr-post-option":[269142],"msr-field-of-study":[268170],"msr-conference":[],"msr-journal":[],"msr-impact-theme":[],"msr-pillar":[],"class_list":["post-1170659","msr-research-item","type-msr-research-item","status-publish","hentry","msr-research-area-systems-and-networking","msr-locale-en_us","msr-post-option-include-in-river","msr-field-of-study-systems-and-networking"],"msr_publishername":"","msr_edition":"","msr_affiliation":"","msr_published_date":"2026-05","msr_host":"","msr_duration":"","msr_version":"","msr_speaker":"","msr_other_contributors":"","msr_booktitle":"","msr_pages_string":"","msr_chapter":"","msr_isbn":"","msr_journal":"","msr_volume":"","msr_number":"","msr_editors":"","msr_series":"","msr_issue":"","msr_organization":"","msr_how_published":"","msr_notes":"","msr_highlight_text":"","msr_release_tracker_id":"","msr_original_fields_of_study":"","msr_download_urls":"","msr_external_url":"","msr_secondary_video_url":"","msr_longbiography":"","msr_microsoftintellectualproperty":0,"msr_main_download":"","msr_publicationurl":"","msr_doi":"","msr_publication_uploader":[{"type":"url","viewUrl":"false","id":"false","title":"https:\/\/www.usenix.org\/system\/files\/nsdi26-zhao-liangyu.pdf","label_id":"243109","label":0}],"msr_related_uploader":[{"type":"url","viewUrl":"false","id":"false","title":"https:\/\/www.usenix.org\/conference\/nsdi26\/presentation\/zhao-liangyu","label_id":"243118","label":0}],"msr_citation_count":0,"msr_citation_count_updated":"","msr_s2_paper_id":"","msr_influential_citations":0,"msr_reference_count":0,"msr_arxiv_id":"","msr_s2_author_ids":[],"msr_s2_open_access":false,"msr_s2_pdf_url":null,"msr_attachments":[],"msr-author-ordering":[{"type":"text","value":"Liangyu Zhao","user_id":0,"rest_url":false},{"type":"user_nicename","value":"Saeed Maleki","user_id":36131,"rest_url":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Saeed Maleki"},{"type":"text","value":"Yuanhong Wang","user_id":0,"rest_url":false},{"type":"text","value":"Zezhou Wang","user_id":0,"rest_url":false},{"type":"user_nicename","value":"Ziyue Yang","user_id":41653,"rest_url":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Ziyue Yang"},{"type":"text","value":"Hossein Pourreza","user_id":0,"rest_url":false},{"type":"text","value":"Arvind Krishnamurthy","user_id":0,"rest_url":false}],"msr_impact_theme":[],"msr_research_lab":[],"msr_event":[],"msr_group":[],"msr_project":[],"publication":[],"video":[],"msr-tool":[],"msr_publication_type":"unpublished","related_content":[],"_links":{"self":[{"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/1170659","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item"}],"about":[{"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/types\/msr-research-item"}],"version-history":[{"count":1,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/1170659\/revisions"}],"predecessor-version":[{"id":1170660,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/1170659\/revisions\/1170660"}],"wp:attachment":[{"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/media?parent=1170659"}],"wp:term":[{"taxonomy":"msr-research-highlight","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-research-highlight?post=1170659"},{"taxonomy":"msr-research-area","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/research-area?post=1170659"},{"taxonomy":"msr-publication-type","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-publication-type?post=1170659"},{"taxonomy":"msr-publisher","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-publisher?post=1170659"},{"taxonomy":"msr-focus-area","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-focus-area?post=1170659"},{"taxonomy":"msr-locale","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-locale?post=1170659"},{"taxonomy":"msr-post-option","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-post-option?post=1170659"},{"taxonomy":"msr-field-of-study","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-field-of-study?post=1170659"},{"taxonomy":"msr-conference","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-conference?post=1170659"},{"taxonomy":"msr-journal","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-journal?post=1170659"},{"taxonomy":"msr-impact-theme","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-impact-theme?post=1170659"},{"taxonomy":"msr-pillar","embeddable":true,"href":"https:\/\/cm-edgetun.pages.dev\/en-us\/research\/wp-json\/wp\/v2\/msr-pillar?post=1170659"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}