Publications

@article{miao26flexllm,
  title = {FlexLLM: Token-Level Co-Serving of LLM Inference and Fine-Tuning with SLO Guarantees},
  author = {Oliaro, Gabriele and Miao, Xupeng and Cheng, Xinhao and Kada, Vineeth and Gao, Ruohan and Huang, Yingyi and Delacourt, Remi and Yang, April and Wang, Yingcheng and Wu, Mengdi and Unger, Colin and Jia, Zhihao},
  journal = {Proceedings of NSDI Conference},
  year = {2026},
  cofirst = {true},
}

EuroSys

AdaServe: Accelerating Multi-SLO LLM Serving with SLO-Customized Speculative Decoding

Zikun Li, Zhuofu Chen, Remi Delacourt, Gabriele Oliaro and 10 more authors

Proceedings of EuroSys Conference 2026

@article{li26adaserve,
  title = {AdaServe: Accelerating Multi-SLO LLM Serving with SLO-Customized Speculative Decoding},
  author = {Li, Zikun and Chen, Zhuofu and Delacourt, Remi and Oliaro, Gabriele and Wang, Zeyu and Chen, Qinghan and Lin, Shuhuai and Yang, April and Zhang, Zhihao and Chen, Zhuoming and Lai, Sean and Cheng, Xinhao and Miao, Xupeng and Jia, Zhihao},
  journal = {Proceedings of EuroSys Conference},
  year = {2026},
}

CSUR

Towards Efficient Generative Large Language Model Serving: A Survey from Algorithms to Systems

Xupeng Miao, Gabriele Oliaro, Zhihao Zhang, Xinhao Cheng and 3 more authors

ACM Computing Surveys 2026

@article{miao26serving,
  title = {Towards Efficient Generative Large Language Model Serving: A Survey from Algorithms to Systems},
  author = {Miao, Xupeng and Oliaro, Gabriele and Zhang, Zhihao and Cheng, Xinhao and Jin, Hongyi and Chen, Tianqi and Jia, Zhihao},
  journal = {ACM Computing Surveys},
  year = {2026},
}

2025

OSDI

Mirage: A Multi-Level Superoptimizer for Tensor Programs

Mengdi Wu, Xinhao Cheng, Shengyu Liu, Chunan Shi and 6 more authors

Proceedings of OSDI Conference 2025

@article{wang25mirage,
  title = {Mirage: A Multi-Level Superoptimizer for Tensor Programs},
  author = {Wu, Mengdi and Cheng, Xinhao and Liu, Shengyu and Shi, Chunan and Ji, Jianan and Ao, Kit and Velliengiri, Praveen and Miao, Xupeng and Padan, Oded and Jia, Zhihao},
  journal = {Proceedings of OSDI Conference},
  year = {2025},
}

ASPLOS

Helix: Distributed Serving of Large Language Models via Max-Flow on Heterogeneous GPUs

Yixuan Mei, Yonghao Zhuang, Xupeng Miao, Juncheng Yang and 2 more authors

Proceedings of ASPLOS Conference 2025

@article{mei25helix,
  title = {Helix: Distributed Serving of Large Language Models via Max-Flow on Heterogeneous GPUs},
  author = {Mei, Yixuan and Zhuang, Yonghao and Miao, Xupeng and Yang, Juncheng and Jia, Zhihao and Vinayak, Rashmi},
  journal = {Proceedings of ASPLOS Conference},
  year = {2025},
}

ASPLOS

GraphPipe: Improving Performance and Scalability of DNN Training with Graph Pipeline Parallelism

Byungsoo Jeon, Mengdi Wu, Shiyi Cao, Sunghyun Kim and 10 more authors

Proceedings of ASPLOS Conference 2025

@article{jeon25graphpipe,
  title = {GraphPipe: Improving Performance and Scalability of DNN Training with Graph Pipeline Parallelism},
  author = {Jeon, Byungsoo and Wu, Mengdi and Cao, Shiyi and Kim, Sunghyun and Park, Sunghyun and Aggarwal, Neeraj and Unger, Colin and Arfeen, Daiyaan and Liao, Peiyuan and Miao, Xupeng and Alizadeh, Mohammad and Ganger, Gregory R. and Chen, Tianqi and Jia, Zhihao},
  journal = {Proceedings of ASPLOS Conference},
  year = {2025},
}

ASPLOS

Spindle: Efficient Distributed Training of Multi-Task Large Models via Wavefront Scheduling

Yujie Wang, Shenhan Zhu, Fangcheng Fu, Xupeng Miao and 5 more authors

Proceedings of ASPLOS Conference 2025

@article{wang25spindle,
  title = {Spindle: Efficient Distributed Training of Multi-Task Large Models via Wavefront Scheduling},
  author = {Wang, Yujie and Zhu, Shenhan and Fu, Fangcheng and Miao, Xupeng and Zhang, Jie and Zhu, Juan and Hong, Fan and Li, Yong and Cui, Bin},
  journal = {Proceedings of ASPLOS Conference},
  year = {2025},
}

SIGMOD

PQCache: Product Quantization-based KVCache for Long Context LLM Inference

Hailin Zhang, Xiaodong Ji, Yilin Chen, Fangcheng Fu and 4 more authors

Proceedings of SIGMOD Conference 2025

@article{zhang25pqcache,
  title = {PQCache: Product Quantization-based KVCache for Long Context LLM Inference},
  author = {Zhang, Hailin and Ji, Xiaodong and Chen, Yilin and Fu, Fangcheng and Miao, Xupeng and Nie, Xiaonan and Chen, Weipeng and Cui, Bin},
  journal = {Proceedings of SIGMOD Conference},
  year = {2025},
}

ICLR

NetMoE: Accelerating MoE Training through Dynamic Sample Placement (Spotlight)

Xinyi Liu, Yujie Wang, Fangcheng Fu, Xupeng Miao and 3 more authors

Proceedings of ICLR Conference 2025

Bib

@article{liu2025netmoe,
  author = {Liu, Xinyi and Wang, Yujie and Fu, Fangcheng and Miao, Xupeng and Zhu, Shenhan and Nie, Xiaonan and Cui, Bin},
  title = {NetMoE: Accelerating MoE Training through Dynamic Sample Placement},
  journal = {Proceedings of ICLR Conference},
  year = {2025}
}

ICML

Demystifying Cost-Efficiency in LLM Serving over Heterogeneous GPUs

Youhe Jiang, Fangcheng Fu, Xiaozhe Yao, Guoliang He and 5 more authors

Proceedings of ICML Conference 2025

@article{jiang2025demy,
  author = {Jiang, Youhe and Fu, Fangcheng and Yao, Xiaozhe and He, Guoliang and Miao, Xupeng and Klimovic, Ana and Cui, Bin and Yuan, Binhang and Yoneki, Eiko},
  title = {Demystifying Cost-Efficiency in LLM Serving over Heterogeneous GPUs},
  journal = {Proceedings of ICML Conference},
  year = {2025}
}

VLDBJ

Efficient and Scalable Huge Embedding Model Training via Distributed Cache Management (Special Issue on Best Papers of VLDB 2022)

Xupeng Miao, Hailin Zhang, Yining Shi, Xiaonan Nie and 4 more authors

The VLDB Journal 2025

@article{miao2025het,
  title = {Efficient and Scalable Huge Embedding Model Training via Distributed Cache Management},
  author = {Miao, Xupeng and Zhang, Hailin and Shi, Yining and Nie, Xiaonan and Yang, Zhi and Tao, Yangyu and Jiang, Jie and Cui, Bin},
  journal = {The VLDB Journal},
  year = {2025}
}

2024

NeurIPS

LSH-MoE: Communication-efficient MoE Training via Locality-Sensitive Hashing

Xiaonan Nie, Qibin Liu, Fangcheng Fu, Shenhan Zhu and 5 more authors

Proceedings of NeurIPS Conference 2024

@article{nie2024lshmoe,
  author = {Nie, Xiaonan and Liu, Qibin and Fu, Fangcheng and Zhu, Shenhan and Miao, Xupeng and Li, Xiaoyang and Zhang, Yang and Liu, Shouda and Cui, Bin},
  title = {LSH-MoE: Communication-efficient MoE Training via Locality-Sensitive Hashing},
  journal = {Proceedings of NeurIPS Conference},
  year = {2024}
}

SOSP

Enabling Parallelism Hot Switching for Efficient Training of Large Language Models

Hao Ge, Fangcheng Fu, Haoyang Li, Xuanyu Wang and 6 more authors

Proceedings of SOSP Conference 2024

@article{ge24hotspa,
  title = {Enabling Parallelism Hot Switching for Efficient Training of Large Language Models},
  author = {Ge, Hao and Fu, Fangcheng and Li, Haoyang and Wang, Xuanyu and Lin, Sheng and Wang, Yujie and Nie, Xiaonan and Zhang, Hailin and Miao, Xupeng and Cui, Bin},
  journal = {Proceedings of SOSP Conference},
  year = {2024},
  publisher = {{ACM}}
}

Atlas: Hierarchical Partitioning for Quantum Circuit Simulation on GPUs

Mingkuan Xu, Shiyi Cao, Xupeng Miao, Umut Acar and 1 more author

Proceedings of SC Conference 2024

@article{xu24atlas,
  title = {Atlas: Hierarchical Partitioning for Quantum Circuit Simulation on GPUs},
  author = {Xu, Mingkuan and Cao, Shiyi and Miao, Xupeng and Acar, Umut and Jia, Zhihao},
  journal = {Proceedings of SC Conference},
  year = {2024}
}

SIGMOD

Demystifying Data Management for Large Language Models (Tutorial)

Xupeng Miao, Zhihao Jia, and Bin Cui

Proceedings of SIGMOD Conference 2024

@article{miao24dm4llm,
  title = {Demystifying Data Management for Large Language Models},
  author = {Miao, Xupeng and Jia, Zhihao and Cui, Bin},
  journal = {Proceedings of SIGMOD Conference},
  year = {2024},
  publisher = {{ACM}}
}

ASPLOS

SpotServe: Serving Generative Large Language Models on Preemptible Instances (Distinguished Artifact Award), (IEEE Micro Top Picks Honorable Mention)

Xupeng Miao, Chunan Shi, Jiangfei Duan, Xiaoli Xi and 3 more authors

Proceedings of ASPLOS Conference 2024

@article{miao24spotserve,
  title = {SpotServe: Serving Generative Large Language Models on Preemptible Instances},
  author = {Miao, Xupeng and Shi, Chunan and Duan, Jiangfei and Xi, Xiaoli and Lin, Dahua and Cui, Bin and Jia, Zhihao},
  journal = {Proceedings of ASPLOS Conference},
  year = {2024},
}

ASPLOS

SpecInfer: Accelerating Generative Large Language Model Serving with Speculative Inference and Token Tree Verification

Xupeng Miao, Gabriele Oliaro, Zhihao Zhang, Xinhao Cheng and 10 more authors

Proceedings of ASPLOS Conference 2024

@article{miao23specinfer,
  title = {SpecInfer: Accelerating Generative Large Language Model Serving with Speculative Inference and Token Tree Verification},
  author = {Miao, Xupeng and Oliaro, Gabriele and Zhang, Zhihao and Cheng, Xinhao and Wang, Zeyu and Wong, Rae Ying Yee and Zhu, Alan and Yang, Lijie and Shi, Xiaoxiang and Shi, Chunan and Chen, Zhuoming and Arfeen, Daiyaan and Abhyankar, Reyna and Jia, Zhihao},
  journal = {Proceedings of ASPLOS Conference},
  year = {2024},
  doi = {10.48550/arXiv.2305.09781},
}

ASPLOS

Optimal Kernel Orchestration for Tensor Programs with Korch

Muyan Hu, Ashwin Venkatram, Shreyashri Biswas, Balamurugan Marimuthu and 7 more authors

Proceedings of ASPLOS Conference 2024

@article{hu24korch,
  title = {Optimal Kernel Orchestration for Tensor Programs with Korch},
  author = {Hu, Muyan and Venkatram, Ashwin and Biswas, Shreyashri and Marimuthu, Balamurugan and Hou, Bohan and Oliaro, Gabriele and Wang, Haojie and Zheng, Liyan and Miao, Xupeng and Zhai, Jidong and Jia, Zhihao},
  journal = {Proceedings of ASPLOS Conference},
  year = {2024}
}

NSDI

Parcae: Proactive, Liveput-Optimized DNN Training on Preemptible Instances

Jiangfei Duan¹, Ziang Song¹, Xupeng Miao¹, Xiaoli Xi and 4 more authors

Proceedings of NSDI Conference 2024

@article{nsdi24parcae,
  title = {Parcae: Proactive, Liveput-Optimized DNN Training on Preemptible Instances},
  author = {Duan, Jiangfei and Song, Ziang and Miao, Xupeng and Xi, Xiaoli and Lin, Dahua and Xu, Harry and Zhang, Minjia and Jia, Zhihao},
  journal = {Proceedings of NSDI Conference},
  cofirst = {true},
  year = {2024},
}

ACL

Quantized Side Tuning: Fast and Memory-Efficient Tuning of Quantized Large Language Models (Outstanding Paper Award)

Zhengxin Zhang, Dan Zhao, Xupeng Miao, Gabriele Oliaro and 3 more authors

Proceedings of ACL Conference 2024

@article{acl24qst,
  title = {Quantized Side Tuning: Fast and Memory-Efficient Tuning of Quantized Large Language Models},
  author = {Zhang, Zhengxin and Zhao, Dan and Miao, Xupeng and Oliaro, Gabriele and Li, Qing and Jiang, Yong and Jia, Zhihao},
  journal = {Proceedings of ACL Conference},
  year = {2024}
}

IJCAI

X-former Elucidator: Reviving Efficient Attention for Long Context Language Modeling

Xupeng Miao, Shenhan Zhu, Fangcheng Fu, Ziyu Guo and 4 more authors

Proceedings of IJCAI Conference 2024

@article{ijcai24xformer,
  title = {X-former Elucidator: Reviving Efficient Attention for Long Context Language Modeling},
  author = {Miao, Xupeng and Zhu, Shenhan and Fu, Fangcheng and Guo, Ziyu and Yang, Zhi and Tu, Yaofeng and Jia, Zhihao and Cui, Bin},
  journal = {Proceedings of IJCAI Conference},
  year = {2024}
}

VLDB

Experimental Analysis of Large-scale Learnable Vector Storage Compression

Hailin Zhang, Penghao Zhao, Xupeng Miao, Yingxia Shao and 3 more authors

Proc. VLDB Endow. 2024

@article{vldb24eazhang,
  title = {Experimental Analysis of Large-scale Learnable Vector Storage Compression},
  author = {Zhang, Hailin and Zhao, Penghao and Miao, Xupeng and Shao, Yingxia and Liu, Zirui and Yang, Tong and Cui, Bin},
  journal = {Proc. {VLDB} Endow.},
  year = {2024},
}

ICDE

MFIX: An Efficient and Reliable Index Advisor via Multi-Fidelity Bayesian Optimization

Zhuo Chang, Xinyi Zhang, Yang Li, Xupeng Miao and 2 more authors

Proceedings of ICDE Conference 2024

@article{icde24mfix,
  title = {MFIX: An Efficient and Reliable Index Advisor via Multi-Fidelity Bayesian Optimization},
  author = {Chang, Zhuo and Zhang, Xinyi and Li, Yang and Miao, Xupeng and Qin, Yanzhao and Cui, Bin},
  journal = {Proceedings of ICDE Conference},
  year = {2024}
}

TKDE

Improving Automatic Parallel Training via Balanced Memory Workload Optimization

Yujie Wang, Youhe Jiang, Xupeng Miao, Fangcheng Fu and 4 more authors

IEEE Transactions on Knowledge and Data Engineering 2024

@article{wang2024galvatronbmw,
  title = {Improving Automatic Parallel Training via Balanced Memory Workload Optimization},
  author = {Wang, Yujie and Jiang, Youhe and Miao, Xupeng and Fu, Fangcheng and Zhu, Shenhan and Nie, Xiaonan and Tu, Yaofeng and Cui, Bin},
  journal = {IEEE Transactions on Knowledge and Data Engineering},
  year = {2024},
  doi = {10.1109/TKDE.2024.3370614},
  publisher = {IEEE},
}

AAAI

Accelerating Text-to-image Editing via Cache-enabled Sparse Diffusion Inference

Zihao Yu, Haoyang Li, Fangcheng Fu, Xupeng Miao and 1 more author

In Proceedings of AAAI Conference 2024

@inproceedings{aaai24fisedit,
  author = {Yu, Zihao and Li, Haoyang and Fu, Fangcheng and Miao, Xupeng and Cui, Bin},
  title = {Accelerating Text-to-image Editing via Cache-enabled Sparse Diffusion Inference},
  booktitle = {Proceedings of AAAI Conference},
  year = {2024},
}

EACL

Generative Dense Retrieval: Memory Can Be a Burden

Peiwen Yuan, Xinglin Wang, Shaoxiong Feng, Boyuan Pan and 4 more authors

Proceedings of EACL Conference 2024

@article{eacl24yuan,
  title = {Generative Dense Retrieval: Memory Can Be a Burden},
  author = {Yuan, Peiwen and Wang, Xinglin and Feng, Shaoxiong and Pan, Boyuan and Li, Yiwei and Wang, Heda and Miao, Xupeng and Li, Kan},
  journal = {Proceedings of EACL Conference},
  year = {2024},
}

CSUR

Distributed Graph Neural Network Training: A Survey

Yingxia Shao, Hongzheng Li, Xizhi Gu, Hongbo Yin and 5 more authors

ACM Computing Surveys 2024

@article{shao2022distributed,
  title = {Distributed Graph Neural Network Training: A Survey},
  author = {Shao, Yingxia and Li, Hongzheng and Gu, Xizhi and Yin, Hongbo and Li, Yawen and Miao, Xupeng and Zhang, Wentao and Cui, Bin and Chen, Lei},
  journal = {ACM Computing Surveys},
  year = {2024},
  doi = {10.48550/arXiv.2211.00216}
}

2023

arXiv

Towards Efficient Generative Large Language Model Serving: A Survey from Algorithms to Systems

Xupeng Miao, Gabriele Oliaro, Zhihao Zhang, Xinhao Cheng and 3 more authors

arXiv preprint arXiv:2312.15234 2023

@article{miao23efficient,
  title = {Towards Efficient Generative Large Language Model Serving: A Survey from Algorithms to Systems},
  author = {Miao, Xupeng and Oliaro, Gabriele and Zhang, Zhihao and Cheng, Xinhao and Jin, Hongyi and Chen, Tianqi and Jia, Zhihao},
  journal = {arXiv preprint arXiv:2312.15234},
  year = {2023},
  doi = {10.48550/arXiv.2312.15234}
}

OSDI

EinNet: Optimizing Tensor Programs with Derivation-Based Transformations

Liyan Zheng, Haojie Wang, Jidong Zhai, Muyan Hu and 7 more authors

Proceedings of OSDI Conference 2023

@article{osdi23einnet,
  title = {EinNet: Optimizing Tensor Programs with Derivation-Based Transformations},
  author = {Zheng, Liyan and Wang, Haojie and Zhai, Jidong and Hu, Muyan and Ma, Zixuan and Wang, Tuowei and Huang, Shuhong and Miao, Xupeng and Tang, Shizhi and Huang, Kezhao and Jia, Zhihao},
  journal = {Proceedings of OSDI Conference},
  year = {2023}
}

VLDB

SDPipe: A Semi-Decentralized Framework for Heterogeneity-aware Pipeline-parallel Training

Xupeng Miao, Yining Shi, Zhi Yang, Bin Cui and 1 more author

Proc. VLDB Endow. 2023

@article{miao2023sdpipe,
  title = {SDPipe: A Semi-Decentralized Framework for Heterogeneity-aware Pipeline-parallel Training},
  author = {Miao, Xupeng and Shi, Yining and Yang, Zhi and Cui, Bin and Jia, Zhihao},
  journal = {Proc. {VLDB} Endow.},
  volume = {16},
  year = {2023},
  publisher = {VLDB Endowment},
}

VLDB

Galvatron: Efficient Transformer Training over Multiple GPUs Using Automatic Parallelism

Xupeng Miao, Yujie Wang, Youhe Jiang, Chunan Shi and 3 more authors

Proc. VLDB Endow. 2023

@article{miao2023galvatron,
  title = {Galvatron: Efficient Transformer Training over Multiple GPUs Using Automatic Parallelism},
  author = {Miao, Xupeng and Wang, Yujie and Jiang, Youhe and Shi, Chunan and Nie, Xiaonan and Zhang, Hailin and Cui, Bin},
  journal = {Proc. {VLDB} Endow.},
  volume = {16},
  number = {3},
  pages = {470--479},
  year = {2023},
  doi = {10.14778/3570690.3570697},
  publisher = {VLDB Endowment},
}

VLDB

Angel-PTM: A Scalable and Economical Large-scale Pre-training System in Tencent

Xiaonan Nie, Yi Liu, Fangcheng Fu, Jinbao Xue and 4 more authors

Proc. VLDB Endow. (Industry) 2023

@article{DBLP:journals/corr/abs-2303-02868,
  title = {Angel-PTM: A Scalable and Economical Large-scale Pre-training System in Tencent},
  author = {Nie, Xiaonan and Liu, Yi and Fu, Fangcheng and Xue, Jinbao and Jiao, Dian and Miao, Xupeng and Tao, Yangyu and Cui, Bin},
  journal = {Proc. {VLDB} Endow. (Industry)},
  year = {2023},
  doi = {10.48550/arXiv.2303.02868}
}

SIGMOD

FlexMoE: Scaling Large-scale Sparse Pre-trained Model Training via Dynamic Device Placement

Xiaonan Nie, Xupeng Miao, Zilong Wang, Jilong Xue and 4 more authors

Proceedings of SIGMOD Conference 2023

@article{nie2023flexmoe,
  title = {FlexMoE: Scaling Large-scale Sparse Pre-trained Model Training via Dynamic Device Placement},
  author = {Nie, Xiaonan and Miao, Xupeng and Wang, Zilong and Xue, Jilong and Ma, Lingxiao and Yang, Zichao and Cao, Gang and Cui, Bin},
  journal = {Proceedings of SIGMOD Conference},
  year = {2023},
  publisher = {{ACM}}
}

IJCAI

OSDP: Optimal Sharded Data Parallel for Distributed Deep Learning

Youhe Jiang, Fangcheng Fu, Xupeng Miao, Xiaonan Nie and 1 more author

Proceedings of IJCAI Conference 2023

@article{jiang2023osdp,
  author = {Jiang, Youhe and Fu, Fangcheng and Miao, Xupeng and Nie, Xiaonan and Cui, Bin},
  title = {OSDP: Optimal Sharded Data Parallel for Distributed Deep Learning},
  journal = {Proceedings of IJCAI Conference},
  year = {2023},
}

NeurIPS

Model-enhanced Vector Index

Hailin Zhang, Yujing Wang, Qi Chen, Ruiheng Chang and 15 more authors

Proceedings of NeurIPS Conference 2023

@article{zhang2023mevi,
  author = {Zhang, Hailin and Wang, Yujing and Chen, Qi and Chang, Ruiheng and Zhang, Ting and Miao, Ziming and Hou, Yingyan and Ding, Yang and Miao, Xupeng and Wang, Haonan and Pang, Bochen and Zhan, Yuefeng and Sun, Hao and Deng, Weiwei and Zhang, Qi and Yang, Fan and Xie, Xing and Yang, Mao and Cui, Bin},
  title = {Model-enhanced Vector Index},
  journal = {Proceedings of NeurIPS Conference},
  year = {2023},
}

NeurIPS

Accelerating Text-to-image Editing via Cache-enabled Sparse Diffusion Inference

Zihao Yu, Haoyang Li, Fangcheng Fu, Xupeng Miao and 1 more author

In Proceedings of NeurIPS ML for Systems (MLSys) Workshop 2023

arXiv Bib HTML

@inproceedings{fisedit2023,
  author = {Yu, Zihao and Li, Haoyang and Fu, Fangcheng and Miao, Xupeng and Cui, Bin},
  title = {Accelerating Text-to-image Editing via Cache-enabled Sparse Diffusion Inference},
  booktitle = {Proceedings of NeurIPS ML for Systems (MLSys) Workshop},
  year = {2023},
}

AAAI

CALIP: Zero-Shot Enhancement of CLIP with Parameter-free Attention

Ziyu Guo, Renrui Zhang, Longtian Qiu, Xianzheng Ma and 3 more authors

Proceedings of AAAI Conference 2023

@article{guo2023calip,
  title = {CALIP: Zero-Shot Enhancement of CLIP with Parameter-free Attention},
  author = {Guo, Ziyu and Zhang, Renrui and Qiu, Longtian and Ma, Xianzheng and Miao, Xupeng and He, Xuming and Cui, Bin},
  journal = {Proceedings of AAAI Conference},
  year = {2023},
  publisher = {{AAAI}}
}

2022

SCIS

Hetu: A highly efficient automatic parallel distributed deep learning system

Xupeng Miao, Xiaonan Nie, Hailin Zhang, Tong Zhao and 1 more author

Sci. China Inf. Sci. 2022

Bib PDF CCF-T1

@article{DBLP:journals/chinaf/MiaoXP22,
  author = {Miao, Xupeng and Nie, Xiaonan and Zhang, Hailin and Zhao, Tong and Cui, Bin},
  title = {Hetu:  A highly efficient automatic parallel distributed deep learning system},
  journal = {Sci. China Inf. Sci.},
  url = {http://engine.scichina.com/doi/10.1007/s11432-022-3581-9},
  doi = {10.1007/s11432-022-3581-9},
  year = {2022},
}

VLDB

HET: Scaling out Huge Embedding Model Training via Cache-enabled Distributed Framework (Best Scalable Data Science Paper Award)

Xupeng Miao, Hailin Zhang, Yining Shi, Xiaonan Nie and 3 more authors

Proc. VLDB Endow. 2022

@article{miao2021het,
  title = {{HET:} Scaling out Huge Embedding Model Training via Cache-enabled Distributed Framework},
  author = {Miao, Xupeng and Zhang, Hailin and Shi, Yining and Nie, Xiaonan and Yang, Zhi and Tao, Yangyu and Cui, Bin},
  journal = {Proc. {VLDB} Endow.},
  volume = {15},
  number = {2},
  pages = {312--320},
  year = {2022},
  publisher = {VLDB Endowment},
}

VLDB

Towards Communication-efficient Vertical Federated Learning Training via Cache-enabled Local Updates

Fangcheng Fu, Xupeng Miao, Jiawei Jiang, Huanran Xue and 1 more author

Proc. VLDB Endow. 2022

@article{DBLP:journals/corr/abs-2207-14628,
  author = {Fu, Fangcheng and Miao, Xupeng and Jiang, Jiawei and Xue, Huanran and Cui, Bin},
  title = {Towards Communication-efficient Vertical Federated Learning Training
                 via Cache-enabled Local Updates},
  journal = {Proc. {VLDB} Endow.},
  year = {2022},
  doi = {10.48550/arXiv.2207.14628},
}

SIGMOD

HET-GMP: A Graph-based System Approach to Scaling Large Embedding Model Training

Xupeng Miao, Yining Shi, Hailin Zhang, Xin Zhang and 3 more authors

In Proceedings of SIGMOD Conference 2022

@inproceedings{miao2022hetgmp,
  author = {Miao, Xupeng and Shi, Yining and Zhang, Hailin and Zhang, Xin and Nie, Xiaonan and Yang, Zhi and Cui, Bin},
  title = {{HET-GMP:} {A} Graph-based System Approach to Scaling Large Embedding Model Training},
  booktitle = {Proceedings of SIGMOD Conference},
  pages = {470--480},
  publisher = {{ACM}},
  year = {2022},
  doi = {10.1145/3514221.3517902},
}

VLDBJ

P2CG: A Privacy Preserving Collaborative Graph Neural Network Training Framework

Xupeng Miao, Wentao Zhang, Yuezihan Jiang, Fangcheng Fu and 5 more authors

The VLDB Journal 2022

@article{miao2022p2cg,
  title = {P2CG: A Privacy Preserving Collaborative Graph Neural Network Training Framework},
  author = {Miao, Xupeng and Zhang, Wentao and Jiang, Yuezihan and Fu, Fangcheng and Shao, Yingxia and Chen, Lei and Tao, Yangyu and Cao, Gang and Cui, Bin},
  journal = {The VLDB Journal},
  year = {2022}
}

ICDE

TSPLIT: Fine-grained GPU Memory Management for Efficient DNN Training via Tensor Splitting

Xiaonan Nie, Xupeng Miao, Zhi Yang, and Bin Cui

In Proceedings of ICDE Conference 2022

@inproceedings{DBLP:conf/icde/NieMYC22,
  author = {Nie, Xiaonan and Miao, Xupeng and Yang, Zhi and Cui, Bin},
  title = {{TSPLIT:} Fine-grained {GPU} Memory Management for Efficient {DNN} Training via Tensor Splitting},
  booktitle = {Proceedings of ICDE Conference},
  pages = {2615--2628},
  publisher = {{IEEE}},
  year = {2022},
  doi = {10.1109/ICDE53745.2022.00241},
}

ICDE

HET-KG: Communication-Efficient Knowledge Graph Embedding Training via Hotness-Aware Cache

Sicong Dong¹, Xupeng Miao¹, Pengkai Liu, Xin Wang and 2 more authors

In Proceedings of ICDE Conference 2022

@inproceedings{DBLP:conf/icde/DongMLWCL22,
  author = {Dong, Sicong and Miao, Xupeng and Liu, Pengkai and Wang, Xin and Cui, Bin and Li, Jianxin},
  title = {{HET-KG:} Communication-Efficient Knowledge Graph Embedding Training via Hotness-Aware Cache},
  booktitle = {Proceedings of ICDE Conference},
  pages = {1754--1766},
  publisher = {{IEEE}},
  year = {2022},
  doi = {10.1109/ICDE53745.2022.00177},
  cofirst = {true},
}

ICDE

Zoomer: Boosting Retrieval on Web-scale Graphs by Regions of Interest

Yuezihan Jiang, Yu Cheng, Hanyu Zhao, Wentao Zhang and 5 more authors

In Proceedings of ICDE Conference 2022

@inproceedings{DBLP:conf/icde/JiangCZZMHWYC22,
  author = {Jiang, Yuezihan and Cheng, Yu and Zhao, Hanyu and Zhang, Wentao and Miao, Xupeng and He, Yu and Wang, Liang and Yang, Zhi and Cui, Bin},
  title = {Zoomer: Boosting Retrieval on Web-scale Graphs by Regions of Interest},
  booktitle = {Proceedings of ICDE Conference},
  pages = {2224--2236},
  publisher = {{IEEE}},
  year = {2022},
  doi = {10.1109/ICDE53745.2022.00212}
}

CIKM

Scalable Graph Sampling on GPUs with Compressed Graph

Hongbo Yin, Yingxia Shao, Xupeng Miao, Yawen Li and 1 more author

In Proceedings of CIKM Conference 2022

Bib HTML CCF-B

@inproceedings{DBLP:conf/cikm/YinSMLC22,
  author = {Yin, Hongbo and Shao, Yingxia and Miao, Xupeng and Li, Yawen and Cui, Bin},
  title = {Scalable Graph Sampling on GPUs with Compressed Graph},
  booktitle = {Proceedings of CIKM Conference},
  pages = {2383–2392},
  year = {2022},
  doi = {10.1145/3511808.3557443}
}

ICDE Poster

Lasagne: A Multi-Layer Graph Convolutional Network Framework via Node-aware Deep Architecture (Extended Abstract)

Xupeng Miao, Wentao Zhang, Yingxia Shao, Bin Cui and 3 more authors

In Proceedings of ICDE Conference 2022

Bib PDF

@inproceedings{DBLP:conf/icde/MiaoZSCCZJ22,
  author = {Miao, Xupeng and Zhang, Wentao and Shao, Yingxia and Cui, Bin and Chen, Lei and Zhang, Ce and Jiang, Jiawei},
  title = {Lasagne: {A} Multi-Layer Graph Convolutional Network Framework via
                 Node-aware Deep Architecture (Extended Abstract)},
  booktitle = {Proceedings of ICDE Conference},
  pages = {1561--1562},
  publisher = {{IEEE}},
  year = {2022},
  doi = {10.1109/ICDE53745.2022.00157},
}

软件学报

Graph Neural Network Training Acceleration over Multi-GPUs

Xupeng Miao, Yujie Wang, Jia Shen, Yingxia Shao and 1 more author

In Journal of Software (Chinese) 2022

Bib PDF CCF-T1

@inproceedings{jos2022gnn,
  author = {Miao, Xupeng and Wang, Yujie and Shen, Jia and Shao, Yingxia and Cui, Bin},
  title = {Graph Neural Network Training Acceleration over Multi-GPUs},
  booktitle = {Journal of Software (Chinese)},
  year = {2022},
  doi = {10.13328/j.cnki.jos.006647},
}

arXiv

HetuMoE: An Efficient Trillion-scale Mixture-of-Expert Distributed Training System

Xiaonan Nie, Pinxue Zhao, Xupeng Miao, Tong Zhao and 1 more author

arXiv preprint arXiv:2203.14685 2022

@article{DBLP:journals/corr/abs-2203-14685,
  author = {Nie, Xiaonan and Zhao, Pinxue and Miao, Xupeng and Zhao, Tong and Cui, Bin},
  title = {HetuMoE: An Efficient Trillion-scale Mixture-of-Expert Distributed
                 Training System},
  journal = {arXiv preprint arXiv:2203.14685},
  year = {2022},
  doi = {10.48550/arXiv.2203.14685}
}

CVPR

PointCLIP: Point Cloud Understanding by CLIP

Renrui Zhang, Ziyu Guo, Wei Zhang, Kunchang Li and 5 more authors

In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition 2022

@inproceedings{zhang2022pointclip,
  author = {Zhang, Renrui and Guo, Ziyu and Zhang, Wei and Li, Kunchang and Miao, Xupeng and Cui, Bin and Qiao, Yu and Gao, Peng and Li, Hongsheng},
  title = {PointCLIP: Point Cloud Understanding by {CLIP}},
  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages = {8552--8562},
  year = {2022},
}

ICML

OSDP: Optimal Sharded Data Parallel for Distributed Deep Learning

Youhe Jiang, Xupeng Miao, Xiaonan Nie, and Bin Cui

In Proceedings of ICML Hardware Aware Efficient Training (HAET) Workshop 2022

Bib HTML

@inproceedings{osdp2022,
  author = {Jiang, Youhe and Miao, Xupeng and Nie, Xiaonan and Cui, Bin},
  title = {OSDP: Optimal Sharded Data Parallel for Distributed Deep Learning},
  booktitle = {Proceedings of ICML Hardware Aware Efficient Training (HAET) Workshop},
  year = {2022},
}

2021

SIGMOD

Heterogeneity-Aware Distributed Machine Learning Training via Partial Reduce

Xupeng Miao, Xiaonan Nie, Yingxia Shao, Zhi Yang and 3 more authors

In Proceedings of SIGMOD Conference 2021

@inproceedings{DBLP:conf/sigmod/MiaoNSYJM021,
  author = {Miao, Xupeng and Nie, Xiaonan and Shao, Yingxia and Yang, Zhi and Jiang, Jiawei and Ma, Lingxiao and Cui, Bin},
  title = {Heterogeneity-Aware Distributed Machine Learning Training via Partial
                 Reduce},
  booktitle = {Proceedings of SIGMOD Conference},
  pages = {2262--2270},
  publisher = {{ACM}},
  year = {2021},
  doi = {10.1145/3448016.3452773},
}

ICDE Poster

CuWide: Towards Efficient Flow-based Training for Sparse Wide Models on GPUs (Extended Abstract)

Xupeng Miao, Lingxiao Ma, Zhi Yang, Yingxia Shao and 3 more authors

In Proceedings of ICDE Conference 2021

Bib PDF

@inproceedings{DBLP:conf/icde/MiaoMYS0YJ21,
  author = {Miao, Xupeng and Ma, Lingxiao and Yang, Zhi and Shao, Yingxia and Cui, Bin and Yu, Lele and Jiang, Jiawei},
  title = {CuWide: Towards Efficient Flow-based Training for Sparse Wide Models
                 on GPUs (Extended Abstract)},
  booktitle = {Proceedings of ICDE Conference},
  pages = {2330--2331},
  publisher = {{IEEE}},
  year = {2021},
  doi = {10.1109/ICDE51399.2021.00251},
}

TKDE

Lasagne: A multi-layer graph convolutional network framework via node-aware deep architecture

Xupeng Miao, Wentao Zhang, Yingxia Shao, Bin Cui and 3 more authors

IEEE Transactions on Knowledge and Data Engineering 2021

@article{miao2021lasagne,
  title = {Lasagne: A multi-layer graph convolutional network framework via node-aware deep architecture},
  author = {Miao, Xupeng and Zhang, Wentao and Shao, Yingxia and Cui, Bin and Chen, Lei and Zhang, Ce and Jiang, Jiawei},
  journal = {IEEE Transactions on Knowledge and Data Engineering},
  year = {2021},
  publisher = {IEEE},
  doi = {10.1109/TKDE.2021.3103984},
}

SIGKDD

DeGNN: Improving Graph Neural Networks with Graph Decomposition

Xupeng Miao, Nezihe Merve Gürel, Wentao Zhang, Zhichao Han and 16 more authors

In Proceedings of SIGKDD Conference 2021

@inproceedings{DBLP:conf/kdd/MiaoGZHLMRRSSWW21,
  author = {Miao, Xupeng and G{\"{u}}rel, Nezihe Merve and Zhang, Wentao and Han, Zhichao and Li, Bo and Min, Wei and Rao, Susie Xi and Ren, Hansheng and Shan, Yinan and Shao, Yingxia and Wang, Yujie and Wu, Fan and Xue, Hui and Yang, Yaming and Zhang, Zitao and Zhao, Yang and Zhang, Shuai and Wang, Yujing and Cui, Bin and Zhang, Ce},
  title = {DeGNN: Improving Graph Neural Networks with Graph Decomposition},
  booktitle = {Proceedings of SIGKDD Conference},
  pages = {1223--1233},
  publisher = {{ACM}},
  year = {2021},
  doi = {10.1145/3447548.3467312},
}

SIGKDD

ROD: Reception-aware Online Distillation for Sparse Graphs

Wentao Zhang, Yuezihan Jiang, Yang Li, Zeang Sheng and 5 more authors

In Proceedings of SIGKDD Conference 2021

@inproceedings{DBLP:conf/kdd/ZhangJLSSMWY021,
  author = {Zhang, Wentao and Jiang, Yuezihan and Li, Yang and Sheng, Zeang and Shen, Yu and Miao, Xupeng and Wang, Liang and Yang, Zhi and Cui, Bin},
  title = {{ROD:} Reception-aware Online Distillation for Sparse Graphs},
  booktitle = {Proceedings of SIGKDD Conference},
  pages = {2232--2242},
  publisher = {{ACM}},
  year = {2021},
  doi = {10.1145/3447548.3467221}
}

VLDBJ

Memory-aware framework for fast and scalable second-order random walk over billion-edge natural graphs

Yingxia Shao, Shiyue Huang, Yawen Li, Xupeng Miao and 2 more authors

The VLDB Journal 2021

@article{DBLP:journals/vldb/ShaoHLMCC21,
  author = {Shao, Yingxia and Huang, Shiyue and Li, Yawen and Miao, Xupeng and Cui, Bin and Chen, Lei},
  title = {Memory-aware framework for fast and scalable second-order random walk
                 over billion-edge natural graphs},
  journal = {The VLDB Journal},
  volume = {30},
  number = {5},
  pages = {769--797},
  year = {2021},
  doi = {10.1007/s00778-021-00669-2}
}

arXiv

EvoMoE: An Evolutional Mixture-of-Experts Training Framework via Dense-To-Sparse Gate

Xiaonan Nie, Xupeng Miao, Shijie Cao, Lingxiao Ma and 6 more authors

arXiv preprint arXiv:2112.14397 2021

@article{DBLP:journals/corr/abs-2112-14397,
  author = {Nie, Xiaonan and Miao, Xupeng and Cao, Shijie and Ma, Lingxiao and Liu, Qibin and Xue, Jilong and Miao, Youshan and Liu, Yi and Yang, Zhi and Cui, Bin},
  title = {EvoMoE: An Evolutional Mixture-of-Experts Training Framework via Dense-To-Sparse Gate},
  journal = {arXiv preprint arXiv:2112.14397},
  year = {2021},
}

2020

TKDE

Cuwide: Towards efficient flow-based training for sparse wide models on gpus

Xupeng Miao, Lingxiao Ma, Zhi Yang, Yingxia Shao and 3 more authors

IEEE Transactions on Knowledge and Data Engineering 2020

@article{miao2020cuwide,
  title = {Cuwide: Towards efficient flow-based training for sparse wide models on gpus},
  author = {Miao, Xupeng and Ma, Lingxiao and Yang, Zhi and Shao, Yingxia and Cui, Bin and Yu, Lele and Jiang, Jiawei},
  journal = {IEEE Transactions on Knowledge and Data Engineering},
  year = {2020},
  publisher = {IEEE},
}

SIGMOD

Reliable Data Distillation on Graph Convolutional Network

Wentao Zhang¹, Xupeng Miao¹, Yingxia Shao, Jiawei Jiang and 3 more authors

In Proceedings of SIGMOD 2020

@inproceedings{DBLP:conf/sigmod/ZhangMSJCR020,
  author = {Zhang, Wentao and Miao, Xupeng and Shao, Yingxia and Jiang, Jiawei and Chen, Lei and Ruas, Olivier and Cui, Bin},
  title = {Reliable Data Distillation on Graph Convolutional Network},
  booktitle = {Proceedings of SIGMOD},
  pages = {1399--1414},
  publisher = {{ACM}},
  year = {2020},
  doi = {10.1145/3318464.3389706},
  cofirst = {true}
}

SIGMOD

Memory-Aware Framework for Efficient Second-Order Random Walk on Large Graphs

Yingxia Shao, Shiyue Huang, Xupeng Miao, Bin Cui and 1 more author

In Proceedings of SIGMOD 2020

@inproceedings{DBLP:conf/sigmod/ShaoHM0020,
  author = {Shao, Yingxia and Huang, Shiyue and Miao, Xupeng and Cui, Bin and Chen, Lei},
  title = {Memory-Aware Framework for Efficient Second-Order Random Walk on Large
                 Graphs},
  booktitle = {Proceedings of SIGMOD},
  pages = {1797--1812},
  publisher = {{ACM}},
  year = {2020},
  doi = {10.1145/3318464.3380562}
}

ICDE

PSGraph: How Tencent trains extremely large-scale graphs with Spark?

Jiawei Jiang, Pin Xiao, Lele Yu, Xiaosen Li and 4 more authors

In Proceedings of ICDE Conference 2020

@inproceedings{DBLP:conf/icde/JiangXYLCMZ020,
  author = {Jiang, Jiawei and Xiao, Pin and Yu, Lele and Li, Xiaosen and Cheng, Jiefeng and Miao, Xupeng and Zhang, Zhipeng and Cui, Bin},
  title = {PSGraph: How Tencent trains extremely large-scale graphs with Spark?},
  booktitle = {Proceedings of ICDE Conference},
  pages = {1549--1557},
  publisher = {{IEEE}},
  year = {2020},
  doi = {10.1109/ICDE48307.2020.00137}
}

2019

SIGMOD

PS2: Parameter Server on Spark

Zhipeng Zhang, Bin Cui, Yingxia Shao, Lele Yu and 2 more authors

In Proceedings of SIGMOD 2019