Skip to content

Latest commit

 

History

History
48 lines (42 loc) · 1.13 KB

README.md

File metadata and controls

48 lines (42 loc) · 1.13 KB

DeBERTaV3

Train DeBERTa V3 with jax/flax

Citation

@misc{he2021debertav3,
    title={DeBERTaV3: Improving DeBERTa using ELECTRA-Style Pre-Training with Gradient-Disentangled Embedding Sharing}, 
    author={Pengcheng He and Jianfeng Gao and Weizhu Chen},
    year={2021},
    eprint={2111.09543},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}
@inproceedings{clark2020electra,
    title={{ELECTRA}: Pre-training Text Encoders as Discriminators Rather Than Generators},
    author={Kevin Clark and Minh-Thang Luong and Quoc V. Le and Christopher D. Manning},
    booktitle={ICLR},
    year={2020},
    url={https://openreview.net/pdf?id=r1xMH1BtvB}
}
@misc{electra_pytorch,
    title={PyTorch implementation of ELECTRA},
    author={Richard Wang},
    year={2020},
    publisher={GitHub},
    journal={GitHub repository},
    howpublished={\url{https://github.com/richarddwang/electra_pytorch}}
}
@misc{sahajBERT,
    title={sahajBERT},
    author={tanmoyio},
    year={2021},
    publisher={GitHub},
    journal={GitHub repository},
    howpublished={\url{https://github.com/tanmoyio/sahajbert}}
}