@article{osdi23einnet,title={EinNet: Optimizing Tensor Programs with Derivation-Based Transformations},author={Zheng, Liyan and Wang, Haojie and Zhai, Jidong and Hu, Muyan and Ma, Zixuan and Wang, Tuowei and Huang, Shuhong and Miao, Xupeng and Tang, Shizhi and Huang, Kezhao and Jia, Zhihao},journal={Proceedings of OSDI Conference},year={2023}}
VLDB
SDPipe: A Semi-Decentralized Framework for Heterogeneity-aware Pipeline-parallel Training
Xupeng Miao, Yining Shi, Zhi Yang, Bin Cui and 1 more author
@article{miao2023sdpipe,title={SDPipe: A Semi-Decentralized Framework for Heterogeneity-aware Pipeline-parallel Training},author={Miao, Xupeng and Shi, Yining and Yang, Zhi and Cui, Bin and Jia, Zhihao},journal={Proc. {VLDB} Endow.},volume={16},year={2023},publisher={VLDB Endowment},}
arXiv
Angel-PTM: A Scalable and Economical Large-scale Pre-training System in Tencent
Xiaonan Nie, Yi Liu, Fangcheng Fu, Jinbao Xue and 4 more authors
@article{DBLP:journals/corr/abs-2303-02868,title={Angel-PTM: A Scalable and Economical Large-scale Pre-training System in Tencent},author={Nie, Xiaonan and Liu, Yi and Fu, Fangcheng and Xue, Jinbao and Jiao, Dian and Miao, Xupeng and Tao, Yangyu and Cui, Bin},journal={arXiv preprint arXiv:2303.02868},year={2023},doi={10.48550/arXiv.2303.02868}}
VLDB
Galvatron: Efficient Transformer Training over Multiple GPUs Using Automatic Parallelism
Xupeng Miao, Yujie Wang, Youhe Jiang, Chunan Shi and 3 more authors
@article{miao2023galvatron,title={Galvatron: Efficient Transformer Training over Multiple GPUs Using Automatic Parallelism},author={Miao, Xupeng and Wang, Yujie and Jiang, Youhe and Shi, Chunan and Nie, Xiaonan and Zhang, Hailin and Cui, Bin},journal={Proc. {VLDB} Endow.},volume={16},number={3},pages={470--479},year={2023},doi={10.14778/3570690.3570697},publisher={VLDB Endowment},}
SIGMOD
FlexMoE: Scaling Large-scale Sparse Pre-trained Model Training via Dynamic Device Placement
Xiaonan Nie, Xupeng Miao, Zilong Wang, Jilong Xue and 4 more authors
@article{nie2023flexmoe,title={FlexMoE: Scaling Large-scale Sparse Pre-trained Model Training via Dynamic Device Placement},author={Nie, Xiaonan and Miao, Xupeng and Wang, Zilong and Xue, Jilong and Ma, Lingxiao and Yang, Zichao and Cao, Gang and Cui, Bin},journal={Proceedings of SIGMOD Conference},year={2023},publisher={{ACM}}}
AAAI
CALIP: Zero-Shot Enhancement of CLIP with Parameter-free Attention
Ziyu Guo, Renrui Zhang, Longtian Qiu, Xianzheng Ma and 3 more authors
@article{guo2023calip,title={CALIP: Zero-Shot Enhancement of CLIP with Parameter-free Attention},author={Guo, Ziyu and Zhang, Renrui and Qiu, Longtian and Ma, Xianzheng and Miao, Xupeng and He, Xuming and Cui, Bin},journal={Proceedings of AAAI Conference},year={2023},publisher={{AAAI}}}
2022
SCIS
Hetu: A highly efficient automatic parallel distributed deep learning system
Xupeng Miao, Xiaonan Nie, Hailin Zhang, Tong Zhao and 1 more author
@article{DBLP:journals/chinaf/MiaoXP22,author={Miao, Xupeng and Nie, Xiaonan and Zhang, Hailin and Zhao, Tong and Cui, Bin},title={Hetu: A highly efficient automatic parallel distributed deep learning system},journal={Sci. China Inf. Sci.},url={http://engine.scichina.com/doi/10.1007/s11432-022-3581-9},doi={10.1007/s11432-022-3581-9},year={2022},}
VLDB
HET: Scaling out Huge Embedding Model Training via Cache-enabled Distributed Framework (Best Scalable Data Science Paper)
Xupeng Miao, Hailin Zhang, Yining Shi, Xiaonan Nie and 3 more authors
@article{miao2021het,title={{HET:} Scaling out Huge Embedding Model Training via Cache-enabled Distributed Framework},author={Miao, Xupeng and Zhang, Hailin and Shi, Yining and Nie, Xiaonan and Yang, Zhi and Tao, Yangyu and Cui, Bin},journal={Proc. {VLDB} Endow.},volume={15},number={2},pages={312--320},year={2022},publisher={VLDB Endowment},notes={(Best Scalable Data Science Paper)},}
VLDB
Towards Communication-efficient Vertical Federated Learning Training via Cache-enabled Local Updates
Fangcheng Fu, Xupeng Miao, Jiawei Jiang, Huanran Xue and 1 more author
@article{DBLP:journals/corr/abs-2207-14628,author={Fu, Fangcheng and Miao, Xupeng and Jiang, Jiawei and Xue, Huanran and Cui, Bin},title={Towards Communication-efficient Vertical Federated Learning Training
via Cache-enabled Local Updates},journal={Proc. {VLDB} Endow.},year={2022},doi={10.48550/arXiv.2207.14628},}
SIGMOD
HET-GMP: A Graph-based System Approach to Scaling Large Embedding Model Training
Xupeng Miao, Yining Shi, Hailin Zhang, Xin Zhang and 3 more authors
@inproceedings{miao2022hetgmp,author={Miao, Xupeng and Shi, Yining and Zhang, Hailin and Zhang, Xin and Nie, Xiaonan and Yang, Zhi and Cui, Bin},title={{HET-GMP:} {A} Graph-based System Approach to Scaling Large Embedding Model Training},booktitle={Proceedings of SIGMOD Conference},pages={470--480},publisher={{ACM}},year={2022},doi={10.1145/3514221.3517902},}
VLDBJ
P2CG: A Privacy Preserving Collaborative Graph Neural Network Training Framework
Xupeng Miao, Wentao Zhang, Yuezihan Jiang, Fangcheng Fu and 5 more authors
@article{miao2022p2cg,title={P2CG: A Privacy Preserving Collaborative Graph Neural Network Training Framework},author={Miao, Xupeng and Zhang, Wentao and Jiang, Yuezihan and Fu, Fangcheng and Shao, Yingxia and Chen, Lei and Tao, Yangyu and Cao, Gang and Cui, Bin},journal={The VLDB Journal},year={2022}}
ICDE
TSPLIT: Fine-grained GPU Memory Management for Efficient DNN Training via Tensor Splitting
@inproceedings{DBLP:conf/icde/NieMYC22,author={Nie, Xiaonan and Miao, Xupeng and Yang, Zhi and Cui, Bin},title={{TSPLIT:} Fine-grained {GPU} Memory Management for Efficient {DNN} Training via Tensor Splitting},booktitle={Proceedings of ICDE Conference},pages={2615--2628},publisher={{IEEE}},year={2022},doi={10.1109/ICDE53745.2022.00241},}
ICDE
HET-KG: Communication-Efficient Knowledge Graph Embedding Training via Hotness-Aware Cache
Sicong Dong1, Xupeng Miao1, Pengkai Liu, Xin Wang and 2 more authors
@inproceedings{DBLP:conf/icde/DongMLWCL22,author={Dong, Sicong and Miao, Xupeng and Liu, Pengkai and Wang, Xin and Cui, Bin and Li, Jianxin},title={{HET-KG:} Communication-Efficient Knowledge Graph Embedding Training via Hotness-Aware Cache},booktitle={Proceedings of ICDE Conference},pages={1754--1766},publisher={{IEEE}},year={2022},doi={10.1109/ICDE53745.2022.00177},cofirst={true},}
ICDE
Zoomer: Boosting Retrieval on Web-scale Graphs by Regions of Interest
Yuezihan Jiang, Yu Cheng, Hanyu Zhao, Wentao Zhang and 5 more authors
@inproceedings{DBLP:conf/icde/JiangCZZMHWYC22,author={Jiang, Yuezihan and Cheng, Yu and Zhao, Hanyu and Zhang, Wentao and Miao, Xupeng and He, Yu and Wang, Liang and Yang, Zhi and Cui, Bin},title={Zoomer: Boosting Retrieval on Web-scale Graphs by Regions of Interest},booktitle={Proceedings of ICDE Conference},pages={2224--2236},publisher={{IEEE}},year={2022},doi={10.1109/ICDE53745.2022.00212}}
CIKM
Scalable Graph Sampling on GPUs with Compressed Graph
Hongbo Yin, Yingxia Shao, Xupeng Miao, Yawen Li and 1 more author
@inproceedings{DBLP:conf/cikm/YinSMLC22,author={Yin, Hongbo and Shao, Yingxia and Miao, Xupeng and Li, Yawen and Cui, Bin},title={Scalable Graph Sampling on GPUs with Compressed Graph},booktitle={Proceedings of CIKM Conference},pages={2383–2392},year={2022},doi={10.1145/3511808.3557443}}
ICDE Poster
Lasagne: A Multi-Layer Graph Convolutional Network Framework via Node-aware Deep Architecture (Extended Abstract)
Xupeng Miao, Wentao Zhang, Yingxia Shao, Bin Cui and 3 more authors
@inproceedings{DBLP:conf/icde/MiaoZSCCZJ22,author={Miao, Xupeng and Zhang, Wentao and Shao, Yingxia and Cui, Bin and Chen, Lei and Zhang, Ce and Jiang, Jiawei},title={Lasagne: {A} Multi-Layer Graph Convolutional Network Framework via
Node-aware Deep Architecture (Extended Abstract)},booktitle={Proceedings of ICDE Conference},pages={1561--1562},publisher={{IEEE}},year={2022},doi={10.1109/ICDE53745.2022.00157},}
软件学报
Graph Neural Network Training Acceleration over Multi-GPUs
Xupeng Miao, Yujie Wang, Jia Shen, Yingxia Shao and 1 more author
@inproceedings{jos2022gnn,author={Miao, Xupeng and Wang, Yujie and Shen, Jia and Shao, Yingxia and Cui, Bin},title={Graph Neural Network Training Acceleration over Multi-GPUs},booktitle={Journal of Software (Chinese)},year={2022},doi={10.13328/j.cnki.jos.006647},}
arXiv
HetuMoE: An Efficient Trillion-scale Mixture-of-Expert Distributed Training System
Xiaonan Nie, Pinxue Zhao, Xupeng Miao, Tong Zhao and 1 more author
@article{DBLP:journals/corr/abs-2203-14685,author={Nie, Xiaonan and Zhao, Pinxue and Miao, Xupeng and Zhao, Tong and Cui, Bin},title={HetuMoE: An Efficient Trillion-scale Mixture-of-Expert Distributed
Training System},journal={arXiv preprint arXiv:2203.14685},year={2022},doi={10.48550/arXiv.2203.14685}}
arXiv
Distributed Graph Neural Network Training: A Survey
Yingxia Shao, Hongzheng Li, Xizhi Gu, Hongbo Yin and 5 more authors
@article{shao2022distributed,title={Distributed Graph Neural Network Training: A Survey},author={Shao, Yingxia and Li, Hongzheng and Gu, Xizhi and Yin, Hongbo and Li, Yawen and Miao, Xupeng and Zhang, Wentao and Cui, Bin and Chen, Lei},journal={arXiv preprint arXiv:2211.00216},year={2022},doi={10.48550/arXiv.2211.00216}}
CVPR
PointCLIP: Point Cloud Understanding by CLIP
Renrui Zhang, Ziyu Guo, Wei Zhang, Kunchang Li and 5 more authors
In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition 2022
@inproceedings{zhang2022pointclip,author={Zhang, Renrui and Guo, Ziyu and Zhang, Wei and Li, Kunchang and Miao, Xupeng and Cui, Bin and Qiao, Yu and Gao, Peng and Li, Hongsheng},title={PointCLIP: Point Cloud Understanding by {CLIP}},booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},pages={8552--8562},year={2022},}
ICML Workshop
OSDP: Optimal Sharded Data Parallel for Distributed Deep Learning
Youhe Jiang, Xupeng Miao, Xiaonan Nie, and Bin Cui
In Proceedings of ICML Hardware Aware Efficient Training (HAET) Workshop 2022
@inproceedings{osdp2022,author={Jiang, Youhe and Miao, Xupeng and Nie, Xiaonan and Cui, Bin},title={OSDP: Optimal Sharded Data Parallel for Distributed Deep Learning},booktitle={Proceedings of ICML Hardware Aware Efficient Training (HAET) Workshop},year={2022},}
2021
SIGMOD
Heterogeneity-Aware Distributed Machine Learning Training via Partial Reduce
Xupeng Miao, Xiaonan Nie, Yingxia Shao, Zhi Yang and 3 more authors
@inproceedings{DBLP:conf/sigmod/MiaoNSYJM021,author={Miao, Xupeng and Nie, Xiaonan and Shao, Yingxia and Yang, Zhi and Jiang, Jiawei and Ma, Lingxiao and Cui, Bin},title={Heterogeneity-Aware Distributed Machine Learning Training via Partial
Reduce},booktitle={Proceedings of SIGMOD Conference},pages={2262--2270},publisher={{ACM}},year={2021},doi={10.1145/3448016.3452773},}
ICDE Poster
CuWide: Towards Efficient Flow-based Training for Sparse Wide Models on GPUs (Extended Abstract)
Xupeng Miao, Lingxiao Ma, Zhi Yang, Yingxia Shao and 3 more authors
@inproceedings{DBLP:conf/icde/MiaoMYS0YJ21,author={Miao, Xupeng and Ma, Lingxiao and Yang, Zhi and Shao, Yingxia and Cui, Bin and Yu, Lele and Jiang, Jiawei},title={CuWide: Towards Efficient Flow-based Training for Sparse Wide Models
on GPUs (Extended Abstract)},booktitle={Proceedings of ICDE Conference},pages={2330--2331},publisher={{IEEE}},year={2021},doi={10.1109/ICDE51399.2021.00251},}
TKDE
Lasagne: A multi-layer graph convolutional network framework via node-aware deep architecture
Xupeng Miao, Wentao Zhang, Yingxia Shao, Bin Cui and 3 more authors
IEEE Transactions on Knowledge and Data Engineering 2021
@article{miao2021lasagne,title={Lasagne: A multi-layer graph convolutional network framework via node-aware deep architecture},author={Miao, Xupeng and Zhang, Wentao and Shao, Yingxia and Cui, Bin and Chen, Lei and Zhang, Ce and Jiang, Jiawei},journal={IEEE Transactions on Knowledge and Data Engineering},year={2021},publisher={IEEE},doi={10.1109/TKDE.2021.3103984},}
SIGKDD
DeGNN: Improving Graph Neural Networks with Graph Decomposition
Xupeng Miao, Nezihe Merve Gürel, Wentao Zhang, Zhichao Han and 16 more authors
@inproceedings{DBLP:conf/kdd/MiaoGZHLMRRSSWW21,author={Miao, Xupeng and G{\"{u}}rel, Nezihe Merve and Zhang, Wentao and Han, Zhichao and Li, Bo and Min, Wei and Rao, Susie Xi and Ren, Hansheng and Shan, Yinan and Shao, Yingxia and Wang, Yujie and Wu, Fan and Xue, Hui and Yang, Yaming and Zhang, Zitao and Zhao, Yang and Zhang, Shuai and Wang, Yujing and Cui, Bin and Zhang, Ce},title={DeGNN: Improving Graph Neural Networks with Graph Decomposition},booktitle={Proceedings of SIGKDD Conference},pages={1223--1233},publisher={{ACM}},year={2021},doi={10.1145/3447548.3467312},}
SIGKDD
ROD: Reception-aware Online Distillation for Sparse Graphs
Wentao Zhang, Yuezihan Jiang, Yang Li, Zeang Sheng and 5 more authors
@inproceedings{DBLP:conf/kdd/ZhangJLSSMWY021,author={Zhang, Wentao and Jiang, Yuezihan and Li, Yang and Sheng, Zeang and Shen, Yu and Miao, Xupeng and Wang, Liang and Yang, Zhi and Cui, Bin},title={{ROD:} Reception-aware Online Distillation for Sparse Graphs},booktitle={Proceedings of SIGKDD Conference},pages={2232--2242},publisher={{ACM}},year={2021},doi={10.1145/3447548.3467221}}
VLDBJ
Memory-aware framework for fast and scalable second-order random walk over billion-edge natural graphs
Yingxia Shao, Shiyue Huang, Yawen Li, Xupeng Miao and 2 more authors
@article{DBLP:journals/vldb/ShaoHLMCC21,author={Shao, Yingxia and Huang, Shiyue and Li, Yawen and Miao, Xupeng and Cui, Bin and Chen, Lei},title={Memory-aware framework for fast and scalable second-order random walk
over billion-edge natural graphs},journal={The VLDB Journal},volume={30},number={5},pages={769--797},year={2021},doi={10.1007/s00778-021-00669-2}}
arXiv
EvoMoE: An Evolutional Mixture-of-Experts Training Framework via Dense-To-Sparse Gate
Xiaonan Nie, Xupeng Miao, Shijie Cao, Lingxiao Ma and 6 more authors
@article{DBLP:journals/corr/abs-2112-14397,author={Nie, Xiaonan and Miao, Xupeng and Cao, Shijie and Ma, Lingxiao and Liu, Qibin and Xue, Jilong and Miao, Youshan and Liu, Yi and Yang, Zhi and Cui, Bin},title={EvoMoE: An Evolutional Mixture-of-Experts Training Framework via Dense-To-Sparse Gate},journal={arXiv preprint arXiv:2112.14397},year={2021},}
2020
TKDE
Cuwide: Towards efficient flow-based training for sparse wide models on gpus
Xupeng Miao, Lingxiao Ma, Zhi Yang, Yingxia Shao and 3 more authors
IEEE Transactions on Knowledge and Data Engineering 2020
@article{miao2020cuwide,title={Cuwide: Towards efficient flow-based training for sparse wide models on gpus},author={Miao, Xupeng and Ma, Lingxiao and Yang, Zhi and Shao, Yingxia and Cui, Bin and Yu, Lele and Jiang, Jiawei},journal={IEEE Transactions on Knowledge and Data Engineering},year={2020},publisher={IEEE},}
SIGMOD
Reliable Data Distillation on Graph Convolutional Network
Wentao Zhang1, Xupeng Miao1, Yingxia Shao, Jiawei Jiang and 3 more authors
@inproceedings{DBLP:conf/sigmod/ZhangMSJCR020,author={Zhang, Wentao and Miao, Xupeng and Shao, Yingxia and Jiang, Jiawei and Chen, Lei and Ruas, Olivier and Cui, Bin},title={Reliable Data Distillation on Graph Convolutional Network},booktitle={Proceedings of SIGMOD},pages={1399--1414},publisher={{ACM}},year={2020},doi={10.1145/3318464.3389706},cofirst={true}}
SIGMOD
Memory-Aware Framework for Efficient Second-Order Random Walk on Large Graphs
Yingxia Shao, Shiyue Huang, Xupeng Miao, Bin Cui and 1 more author
@inproceedings{DBLP:conf/sigmod/ShaoHM0020,author={Shao, Yingxia and Huang, Shiyue and Miao, Xupeng and Cui, Bin and Chen, Lei},title={Memory-Aware Framework for Efficient Second-Order Random Walk on Large
Graphs},booktitle={Proceedings of SIGMOD},pages={1797--1812},publisher={{ACM}},year={2020},doi={10.1145/3318464.3380562}}
ICDE
PSGraph: How Tencent trains extremely large-scale graphs with Spark?
Jiawei Jiang, Pin Xiao, Lele Yu, Xiaosen Li and 4 more authors
@inproceedings{DBLP:conf/icde/JiangXYLCMZ020,author={Jiang, Jiawei and Xiao, Pin and Yu, Lele and Li, Xiaosen and Cheng, Jiefeng and Miao, Xupeng and Zhang, Zhipeng and Cui, Bin},title={PSGraph: How Tencent trains extremely large-scale graphs with Spark?},booktitle={Proceedings of ICDE Conference},pages={1549--1557},publisher={{IEEE}},year={2020},doi={10.1109/ICDE48307.2020.00137}}
2019
SIGMOD
PS2: Parameter Server on Spark
Zhipeng Zhang, Bin Cui, Yingxia Shao, Lele Yu and 2 more authors
@inproceedings{DBLP:conf/sigmod/Zhang0SYJM19,author={Zhang, Zhipeng and Cui, Bin and Shao, Yingxia and Yu, Lele and Jiang, Jiawei and Miao, Xupeng},title={{PS2:} Parameter Server on Spark},booktitle={Proceedings of SIGMOD},pages={376--388},publisher={{ACM}},year={2019},doi={10.1145/3299869.3314038}}