Publications

@article{dong2024internlmxcomposer2,
  title = {InternLM-XComposer2: Mastering Free-form Text-Image Composition and Comprehension in Vision-Language Large Model},
  author = {Dong, Xiaoyi and Zhang, Pan and Zang, Yuhang and Cao, Yuhang and Wang, Bin and Ouyang, Linke and Wei, Xilin and Zhang, Songyang and Duan, Haodong and Cao, Maosong and Zhang, Wenwei and Li, Yining and Yan, Hang and Gao, Yang and Zhang, Xinyue and Li, Wei and Li, Jingwen and Chen, Kai and He, Conghui and Zhang, Xingcheng and Qiao, Yu and Lin, Dahua and Wang, Jiaqi},
  year = {2024},
  booktitle = {arXiv Preprint,},
}

CVPR

From Pixels to Graphs: Open-Vocabulary Scene Graph Generation with Vision-Language Models

Rongjie Li, Songyang Zhang, Dahua Lin, Kai Chen, and Xuming He

In Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2024

NAACL

Fake Alignment: Are LLMs Really Aligned Well?

Yixu Wang, Yan Teng, Kexin Huang, Chengqi Lyu, Songyang Zhang, and 3 more authors

In Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL), 2024

Bib

@inproceedings{wang2023fake,
  title = {Fake Alignment: Are LLMs Really Aligned Well?},
  author = {Wang, Yixu and Teng, Yan and Huang, Kexin and Lyu, Chengqi and Zhang, Songyang and Zhang, Wenwei and Ma, Xingjun and Wang, Yingchun},
  year = {2024},
  booktitle = {Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL),},
}

T-PAMI

SGTR+: End-to-end Scene Graph Generation with Transformer

Rongjie Li, Songyang Zhang, and Xuming He

In IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 2024

Bib

@inproceedings{li2023sgtrplus,
  title = {SGTR+: End-to-end Scene Graph Generation with Transformer},
  author = {Li, Rongjie and Zhang, Songyang and He, Xuming},
  booktitle = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI),},
  year = {2024},
}

NAACL

BotChat: Evaluating LLMs’ Capabilities of Having Multi-Turn Dialogues

Haodong Duan, Jueqi Wei, Chonghua Wang, Hongwei Liu, Yixiao Fang, and 3 more authors

In Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL), 2024

@inproceedings{duan2023botchat,
  title = {BotChat: Evaluating LLMs' Capabilities of Having Multi-Turn Dialogues},
  author = {Duan, Haodong and Wei, Jueqi and Wang, Chonghua and Liu, Hongwei and Fang, Yixiao and Zhang, Songyang and Lin, Dahua and Chen, Kai},
  year = {2024},
  booktitle = {Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL),},
}

TMLR

PixMIM: Rethinking Pixel Reconstruction in Masked Image Modeling

Yuan Liu, Songyang Zhang, Jiacheng Chen, Kai Chen, and Dahua Lin

In Transactions on Machine Learning Research (TMLR), 2024

@inproceedings{liu2023pixmim,
  title = {PixMIM: Rethinking Pixel Reconstruction in Masked Image Modeling},
  author = {Liu, Yuan and Zhang, Songyang and Chen, Jiacheng and Chen, Kai and Lin, Dahua},
  booktitle = {Transactions on Machine Learning Research (TMLR),},
  year = {2024},
}

2023

ArXiv

InternLM-XComposer: A Vision-Language Large Model for Advanced Text-image Comprehension and Composition

Pan Zhang, Xiaoyi Dong, Bin Wang, Yuhang Cao, Chao Xu, and 16 more authors

2023

@article{zhang2023internlmxcomposer,
  title = {InternLM-XComposer: A Vision-Language Large Model for Advanced Text-image Comprehension and Composition},
  author = {Zhang, Pan and Dong, Xiaoyi and Wang, Bin and Cao, Yuhang and Xu, Chao and Ouyang, Linke and Zhao, Zhiyuan and Duan, Haodong and Zhang, Songyang and Ding, Shuangrui and Zhang, Wenwei and Yan, Hang and Zhang, Xinyue and Li, Wei and Li, Jingwen and Chen, Kai and He, Conghui and Zhang, Xingcheng and Qiao, Yu and Lin, Dahua and Wang, Jiaqi},
  year = {2023},
  booktitle = {arXiv Preprint,},
}

ArXiv

LawBench: Benchmarking Legal Knowledge of Large Language Models

Zhiwei Fei, Xiaoyu Shen, Dawei Zhu, Fengzhe Zhou, Zhuo Han, and 4 more authors

arXiv preprint arXiv:2309.16289 2023

@article{fei2023lawbench,
  title = {LawBench: Benchmarking Legal Knowledge of Large Language Models},
  author = {Fei, Zhiwei and Shen, Xiaoyu and Zhu, Dawei and Zhou, Fengzhe and Han, Zhuo and Zhang, Songyang and Chen, Kai and Shen, Zongwen and Ge, Jidong},
  journal = {arXiv preprint arXiv:2309.16289},
  booktitle = {arXiv Preprint,},
  year = {2023},
}

ArXiv

MMBench: Is Your Multi-modal Model an All-around Player?

Yuan Liu, Haodong Duan, Yuanhan Zhang, Bo Li, Songyang Zhang, and 7 more authors

In arXiv Preprint, 2023

@inproceedings{liu2023mmbench,
  title = {MMBench: Is Your Multi-modal Model an All-around Player?},
  author = {Liu, Yuan and Duan, Haodong and Zhang, Yuanhan and Li, Bo and Zhang, Songyang and Zhao, Wangbo and Yuan, Yike and Wang, Jiaqi and He, Conghui and Liu, Ziwei and Chen, Kai and Lin, Dahua},
  booktitle = {arXiv Preprint,},
  year = {2023},
}

IJCAI

TG-VQA: Ternary Game of Video Question Answering

Hao Li, Peng Jin, Zesen Cheng, Songyang Zhang, Kai Chen, and 3 more authors

In Proceeding of International Joint Conferences on Artificial Intelligence (IJCAI), 2023

@inproceedings{li2023tgvqa,
  title = {TG-VQA: Ternary Game of Video Question Answering},
  author = {Li, Hao and Jin, Peng and Cheng, Zesen and Zhang, Songyang and Chen, Kai and Wang, Zhennan and Liu, Chang and Chen, Jie},
  booktitle = {Proceeding of International Joint Conferences on Artificial Intelligence (IJCAI),},
  year = {2023},
}

ICCV

Improving Pixel-based MIM by Reducing Wasted Modeling Capability

Yuan Liu, Songyang Zhang, Jiacheng Chen, Zhaohui Yu, Kai Chen, and 1 more author

In Proceedings of the IEEE/CVF International Conference on Computer Vision(ICCV), 2023

@inproceedings{liu2023mff,
  title = {Improving Pixel-based MIM by Reducing Wasted Modeling Capability},
  author = {Liu, Yuan and Zhang, Songyang and Chen, Jiacheng and Yu, Zhaohui and Chen, Kai and Lin, Dahua},
  booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision(ICCV),},
  year = {2023},
}

CVPR

RIFormer: Keep Your Vision Backbone Effective But Removing Token Mixer

Jiahao Wang, Songyang Zhang, Yong Liu, Taiqiang Wu, Yujiu Yang, and 4 more authors

In Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2023

@inproceedings{wang2022riformer,
  title = {RIFormer: Keep Your Vision Backbone Effective But Removing Token Mixer},
  author = {Wang, Jiahao and Zhang, Songyang and Liu, Yong and Wu, Taiqiang and Yang, Yujiu and Liu, Xihui and Chen, Kai and Luo, Ping and Lin, Dahua},
  booktitle = {Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),},
  year = {2023},
}

2022

ECCV

Learning Semantic Correspondence with Sparse Annotations

Shuaiyi Huang, Luyu Yang, Bo He, Songyang Zhang, Xuming He, and 1 more author

In Proceeding of the European Conference on Computer Vision (ECCV), 2022

@inproceedings{huang2022learning,
  title = {Learning Semantic Correspondence with Sparse Annotations},
  author = {Huang, Shuaiyi and Yang, Luyu and He, Bo and Zhang, Songyang and He, Xuming and Shrivastava, Abhinav},
  booktitle = {Proceeding of the European Conference on Computer Vision (ECCV),},
  year = {2022},
}

ECCV

Action Quality Assessment with Temporal Parsing Transformer

Yang Bai, Desen Zhou, Songyang Zhang, Jian Wang, Errui Ding, and 2 more authors

In Proceeding of the European Conference on Computer Vision (ECCV), 2022

@inproceedings{bai2022action,
  title = {Action Quality Assessment with Temporal Parsing Transformer},
  author = {Bai, Yang and Zhou, Desen and Zhang, Songyang and Wang, Jian and Ding, Errui and Long, Yang and Wang, Jingdong},
  booktitle = {Proceeding of the European Conference on Computer Vision (ECCV),},
  year = {2022},
}

CVPR

SGTR: End-to-end Scene Graph Generation with Transformer

Rongjie Li, Songyang Zhang, and Xuming He

In Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2022

@inproceedings{li2022sgtr,
  title = {SGTR: End-to-end Scene Graph Generation with Transformer},
  author = {Li, Rongjie and Zhang, Songyang and He, Xuming},
  booktitle = {Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),},
  year = {2022},
}

2021

NeurIPS

Dynamic Grained Encoder for Vision Transformers

Lin Song*, Songyang Zhang*, Songtao Liu, Zeming Li, Xuming He, and 3 more authors

In Proceeding of Advances in Neural Information Processing Systems (NeurIPS), 2021

@inproceedings{lin2021dynamic,
  author = {Song*, Lin and Zhang*, Songyang and Liu, Songtao and Li, Zeming and He, Xuming and Sun, Hongbin and Sun, Jian and Zheng, Nanning},
  booktitle = {Proceeding of Advances in Neural Information Processing Systems (NeurIPS),},
  year = {2021},
}

ACM MM

An EM Framework for Online Incremental Learning of Semantic Segmentation

Shipeng Yan*, Jiale Zhou*, Jiangwei Xie, Songyang Zhang, and Xuming He

In Proceeding of The 29th ACM International Conference on Multimedia (ACM MM), 2021

@inproceedings{yan2021anem,
  title = {An EM Framework for Online Incremental Learning of Semantic Segmentation},
  author = {Yan*, Shipeng and Zhou*, Jiale and Xie, Jiangwei and Zhang, Songyang and He, Xuming},
  booktitle = {Proceeding of The 29th ACM International Conference on Multimedia (ACM MM), },
  year = {2021},
}

IJCAI

Learning Implicit Temporal Alignment for Few-shot Video Classification

Songyang Zhang*, Jiale Zhou*, and Xuming He

In Proceeding of International Joint Conferences on Artificial Intelligence (IJCAI), 2021

@inproceedings{zhang2021learning,
  title = {Learning Implicit Temporal Alignment for Few-shot Video Classification},
  author = {Zhang*, Songyang and Zhou*, Jiale and He, Xuming},
  booktitle = {Proceeding of International Joint Conferences on Artificial Intelligence (IJCAI),},
  year = {2021},
}

CVPR

Bipartite Graph Network with Adaptive Message Passing for Unbiased Scene Graph Generation

Rongjie Li, Songyang Zhang, Bo Wan, and Xuming He

In Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2021

@inproceedings{li2021bipartite,
  title = {Bipartite Graph Network with Adaptive Message Passing for Unbiased Scene Graph Generation},
  author = {Li, Rongjie and Zhang, Songyang and Wan, Bo and He, Xuming},
  booktitle = {Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),},
  year = {2021},
}

CVPR

Distribution Alignment: A Unified Framework for Long-tail Visual Recognition

Songyang Zhang, Zeming Li, Shipeng Yan, Xuming He, and Jian Sun

In Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2021

@inproceedings{zhang2021distribution,
  title = {Distribution Alignment: A Unified Framework for Long-tail Visual Recognition},
  author = {Zhang, Songyang and Li, Zeming and Yan, Shipeng and and He, Xuming and Sun, Jian},
  booktitle = {Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),},
  year = {2021},
}

2020

InterSpeech

Transformer with Bidirectional Decoder for Speech Recognition

Xi Chen, Songyang Zhang, Dandan Song, Peng Ouyang, and Shouyi Yin

In The Conference of the International Speech Communication Association (InterSpeech), 2020

@inproceedings{chen2020transformer,
  author = {Chen, Xi and Zhang, Songyang and Song, Dandan and Ouyang, Peng and Yin, Shouyi},
  booktitle = {The Conference of the International Speech Communication Association (InterSpeech), },
  year = {2020},
}

ECCV

Part-aware Prototype Network for Few-shot Semantic Segmentation

Yongfei Liu*, Xiangyi Zhang*, Songyang Zhang, and Xuming He

In Proceeding of the European Conference on Computer Vision (ECCV), 2020

@inproceedings{liu2020part,
  title = {Part-aware Prototype Network for Few-shot Semantic Segmentation},
  author = {Liu*, Yongfei and Zhang*, Xiangyi and Zhang, Songyang and He, Xuming},
  booktitle = {Proceeding of the European Conference on Computer Vision (ECCV),},
  year = {2020},
}

2019

ICML

LatentGNN: Learning Efficient Non-local Relations for Visual Recognition

Songyang Zhang, Shipeng Yan, and Xuming He

In Proceeding of the 36th International Conference on Machine Learning (ICML),, 2019

@inproceedings{zhang2019latent,
  title = {LatentGNN: Learning Efficient Non-local Relations for Visual Recognition},
  author = {Zhang, Songyang and Yan, Shipeng and He, Xuming},
  booktitle = {Proceeding of the 36th International Conference on Machine Learning (ICML),,},
  year = {2019},
}

AAAI

A Dual Attention Network With Semantic Embedding for Few-shot Learning

Shipeng Yan*, Songyang Zhang*, and Xuming He

In Proceeding of Association for the Advancement of Artificial Intelligence (AAAI), 2019

@inproceedings{yan2019adual,
  title = {A Dual Attention Network With Semantic Embedding for Few-shot Learning},
  author = {Yan*, Shipeng and Zhang*, Songyang and He, Xuming},
  booktitle = {Proceeding of Association for the Advancement of Artificial Intelligence (AAAI),},
  year = {2019},
}

ICCV

Dynamic Context Correspondence Network for Semantic Alignment

Shuaiyi Huang, Qiuyue Wang, Songyang Zhang, and Xuming He

In Proceedings of the IEEE/CVF International Conference on Computer Vision(ICCV), 2019

@inproceedings{huang2019dynamic,
  title = {Dynamic Context Correspondence Network for Semantic Alignment},
  author = {Huang, Shuaiyi and Wang, Qiuyue and Zhang, Songyang and He, Xuming},
  booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision(ICCV),},
  year = {2019},
}

2017

CVPR

Predicting Salient Face in Multiple-face Videos

Yufan Liu, Songyang Zhang, Mai Xu, and Xuming He

In Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2017