@inproceedings{wang2025putteacher,
    author = {Wang, Maolin and Chu, Jun and Xie, Sicong and Zang, Xiaoling and Zhao, Yao and Zhong, Wenliang and Zhao, Xiangyu},
    title = {Put Teacher in Student's Shoes: Cross-Distillation for Ultra-compact Model Compression Framework},
    year = {2025},
    isbn = {9798400714542},
    publisher = {Association for Computing Machinery},
    address = {New York, NY, USA},
    url = {https://doi.org/10.1145/3711896.3737257},
    doi = {10.1145/3711896.3737257},
    abstract = {In the era of mobile computing, deploying efficient Natural Language Processing (NLP) models in resource-restricted edge settings presents significant challenges, particularly in environments requiring strict privacy compliance, real-time responsiveness, and diverse multi-tasking capabilities. These challenges create a fundamental need for ultra-compact models that maintain strong performance across various NLP tasks while adhering to stringent memory constraints. To this end, we introduce Edge ultra-lIte BERT framework (EI-BERT) with a novel cross-distillation method. EI-BERT efficiently compresses models through a comprehensive pipeline including hard token pruning, cross-distillation, parameter quantization, and plugin-and-play deployment. Specifically, the cross-distillation method uniquely positions the teacher model to understand the student model's perspective, ensuring efficient knowledge transfer through parameter integration and the mutual interplay between models. Through extensive experiments, we achieve a remarkably compact BERT-based model of only 1.91 MB - the smallest to date for Natural Language Understanding (NLU) tasks. This ultra-compact model has been successfully deployed across multiple scenarios within the Alipay ecosystem, demonstrating significant improvements in real-world applications. For example, it has been integrated into Alipay's live Edge Recommendation system since January 2024, currently serving the app's recommendation traffic across 8.4 million daily active devices.},
    booktitle = {Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2},
    pages = {4975–4985},
    numpages = {11},
    keywords = {alipay, knowledge distillation, language model compression, model deployment, natural language processing, natural language understanding},
    location = {Toronto ON, Canada},
    series = {KDD '25}
}