update

yale-nlp · Nov 9, 2023 · 085a47b · 085a47b
1 parent 6aeeac7
commit 085a47b
Showing 1 changed file with 16 additions and 3 deletions.
diff --git a/_data/lectures.yml b/_data/lectures.yml
@@ -259,8 +259,15 @@
   lecturer: Arman
   title:
     - Modeling long sequences
+    - Hierarchical and graph-based methods
+    - Recurrence and memory
   readings:
-    - TBA
+    - Higher-order Coreference Resolution with Coarse-to-fine Inference (2018) <a href="https://arxiv.org/pdf/1804.05392.pdf" traget="_blank">[link]</a>
+    - Entity, Relation, and Event Extraction with Contextualized Span Representations (2020) <a href="https://arxiv.org/abs/1909.03546" traget="_blank">[link]</a>
+    - Memorizing transfomers (2022) <a href="https://arxiv.org/abs/2203.08913" traget="_blank">[link]</a>
+    - Hierarchical Graph Network for Multi-hop Question Answering <a href="https://arxiv.org/pdf/1911.03631.pdf" traget="_blank">[link]</a>
+    - Compressive Transformers for Long-Range Sequence Modelling (2020) <a href="https://arxiv.org/pdf/1911.05507.pdf" traget="_blank">[link]</a>
+    - Efficient transformers - A survey (2022) <a href="https://arxiv.org/pdf/2009.06732.pdf" traget="_blank">[link]</a>
   slides:
     - https://yaleedu-my.sharepoint.com/:b:/g/personal/arman_cohan_yale_edu/EWukrTCiFJtJnwXhsnX8rrYBnsj9zpCyZe01rbnCw6VAPw?e=IR7Dgy
   logistics:
@@ -269,10 +276,16 @@
 - date: Thu 11/09/23
   title:
     - Modeling long sequences
+    - Sparse attention patterns
+    - Approximating attention
     - Hardware aware efficiency
-    - Quantization
   readings:
-    - TBA
+    - Longformer- The Long-Document Transformer (2020) <a href="https://arxiv.org/pdf/2004.05150.pdf" traget="_blank">[link]</a>
+    - BigBird - Transformers for Longer Sequences (2020) <a href="https://arxiv.org/pdf/2007.14062.pdf" traget="_blank">[link]</a>
+    - Performer - Rethinking Attention with Performers (2021) <a href="https://arxiv.org/pdf/2009.14794.pdf" traget="_blank">[link]</a>
+    - Reformer - The Efficient Transformer (2020) <a href="https://arxiv.org/pdf/2001.04451.pdf" traget="_blank">[link]</a>
+    - Long T5 - Efficient Text-To-Text Transformer for Long Sequences (2022) <a href="https://arxiv.org/pdf/2103.06336.pdf" traget="_blank">[link]</a>
+    - FlashAttention - Fast and Memory-Efficient Exact Attention with IO-Awareness (2022) <a href="https://arxiv.org/abs/2205.14135" traget="_blank">[link]</a>
 
 - date: Tue 11/14/23
   lecturer: Arman