update

yale-nlp · Nov 29, 2024 · 22184ee · 22184ee
1 parent 8c8318d
commit 22184ee
Show file tree

Hide file tree

Showing 7 changed files with 568 additions and 50 deletions.
diff --git a/README.md b/README.md
@@ -1,3 +1,3 @@
 # CPSC 477/577 Website
-Course website for CPSC 477/577 Natural Language Processing Spring 2024 at Yale University
+Course website for CPSC 477/577 Natural Language Processing Spring 2025 at Yale University
 
diff --git a/_config.yml b/_config.yml
@@ -6,7 +6,7 @@ name: CPSC 477/577
 title: Natural Language Processing
 email: arman.cohan@yale.edu
 description: >
-  Natural Language Processing - Yale University - Spring 2024
+  Natural Language Processing - Yale University - Spring 2025
 footer_text: >
   Powered by <a href="http://jekyllrb.com/" target="_blank">Jekyll</a> with <a href="https://github.com/alshedivat/al-folio">al-folio</a>
   <br/>Theme based on <a href="https://github.com/cmudeeprl"> CMU Deep RL</a>.
@@ -17,7 +17,7 @@ baseurl: # the subpath of your site, e.g. /blog/
 # -----------------------------------------------------------------------------
 # Social integration
 # -----------------------------------------------------------------------------
-github_username: armancohan
+github_username:
 google_analytics:
 youtube_channel:
 rss: # notes rss

diff --git a/_data/lectures.yml b/_data/lectures.yml
@@ -0,0 +1,288 @@
+- date: Tue 01/14/25
+  lecturer:
+    - Arman
+  title:
+    - Course Introduction
+    - Logistics
+  slides:
+  readings:
+  optional:
+  logistics:
+
+- date: Thu 01/16/25
+  lecturer:
+  title:
+    - Word embeddings and vector semantics
+  slides:
+  readings:
+  optional:
+  logistics:
+
+- date: Tue 01/21/25
+  lecturer:
+  title:
+    - Word embeddings and vector semantics (cont.)
+  slides:
+  readings:
+  optional:
+  logistics:
+
+- date: Thu 01/23/25
+  lecturer:
+  title:
+    - Basics of Neural Networks and Language Model Training
+  slides:
+  readings:
+    - The Matrix Calculus You Need For Deep Learning (Terence Parr and Jeremy Howard) <a href="https://arxiv.org/pdf/1802.01528.pdf" target="_blank">[link]</a>
+    - Little book of deep learning (François Fleuret) - Ch 3
+  optional:
+  logistics:
+
+- date: Tue 01/28/25
+  lecturer:
+    - Arman
+  title:
+    - Autograd
+    - Building blocks of Neural Networks
+    - Convolutional layers
+    - Network layers and optimizers
+  slides:
+  readings:
+    - Little book of deep learning (François Fleuret) - Ch 4
+  optional:
+  logistics:
+
+- date: Thu 01/30/25
+  lecturer:
+    - Arman
+  title:
+    - Building blocks of Neural Networks for NLP
+    - Taks specific neural network architectures
+    - RNNs
+  slides:
+    - https://yaleedu-my.sharepoint.com/:b:/g/personal/arman_cohan_yale_edu/ERiCgJVHJoxMreRomSuVlHkBc2IfZPv7K6JRV7JfsSW5OQ?e=1HGdDK
+  readings:
+    - Goldberg Chapter 9
+  optional:
+  logistics:
+
+- date: Tue 02/04/25
+  lecturer:
+    - Arman
+  title:
+    - RNNs (contd.)
+    - Machine translation
+  slides:
+  readings:
+    - Understanding LSTM Networks (Christopher Olah) <a href="https://colah.github.io/posts/2015-08-Understanding-LSTMs/" target="_blank">[link]</a>
+    - Eisenstein, Chapter 18
+  optional:
+    - Neural Machine Translation and Sequence-to-sequence Models- A Tutorial (Graham Neubig) <a href="https://arxiv.org/pdf/1703.01619.pdf" target="_blank">[link]</a>
+  logistics:
+
+- date: Thu 02/06/25
+  lecturer:
+  title:
+    - Machine translation (contd.)
+    - Attention
+    - Transformers
+  slides:
+  readings:
+    - Statistical Machine Translation (Koehn) <a href="https://www.statmt.org/book/" target="_blank">[link]</a>
+    - Neural Machine Translation and Sequence-to-sequence Models- A Tutorial (Graham Neubig) <a href="https://arxiv.org/pdf/1703.01619.pdf" target="_blank">[link]</a>
+    - Learning to Align and Translate with Attention (Bahdanau et al., 2015) <a href="https://arxiv.org/pdf/1409.0473.pdf" target="_blank">[link]</a>
+    - Luong et al. (2015) Effective Approaches to Attention-based Neural Machine Translation <a href="https://arxiv.org/pdf/1508.04025.pdf" target="_blank">[link]</a>
+    - Attention is All You Need (Vaswani et al., 2017) <a href="https://arxiv.org/pdf/1706.03762.pdf" target="_blank">[link]</a>
+    - Illustrated Transformer <a href="http://jalammar.github.io/illustrated-transformer/" target="_blank">[link]</a>
+  logistics:
+
+- date: Tue 02/11/25
+  lecturer:
+  title:
+    - Machine translation (contd.)
+    - Attention
+    - Transformers
+  slides:
+  readings:
+    - Statistical Machine Translation (Koehn) <a href="https://www.statmt.org/book/" target="_blank">[link]</a>
+    - Neural Machine Translation and Sequence-to-sequence Models- A Tutorial (Graham Neubig) <a href="https://arxiv.org/pdf/1703.01619.pdf" target="_blank">[link]</a>
+    - Learning to Align and Translate with Attention (Bahdanau et al., 2015) <a href="https://arxiv.org/pdf/1409.0473.pdf" target="_blank">[link]</a>
+    - Luong et al. (2015) Effective Approaches to Attention-based Neural Machine Translation <a href="https://arxiv.org/pdf/1508.04025.pdf" target="_blank">[link]</a>
+    - Attention is All You Need (Vaswani et al., 2017) <a href="https://arxiv.org/pdf/1706.03762.pdf" target="_blank">[link]</a>
+    - Illustrated Transformer <a href="http://jalammar.github.io/illustrated-transformer/" target="_blank">[link]</a>
+  optional:
+
+- date: Thu 02/13/25
+  lecturer:
+    - Arman
+  title:
+    - Transformers (cont'd.)
+    - Language modeling with Transformers
+  slides:
+  readings:
+    - Illustrated Transformer <a href="http://jalammar.github.io/illustrated-transformer/" target="_blank">[link]</a>
+    - Attention is All You Need (Vaswani et al., 2017) <a href="https://arxiv.org/pdf/1706.03762.pdf" target="_blank">[link]</a>
+    - The Annotated Transformer (Harvard NLP) <a href="http://nlp.seas.harvard.edu/2018/04/03/attention.html" target="_blank">[link]</a>
+    - GPT-2 (Radford et al., 2019) <a href="https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf" target="_blank">[link]</a>
+  optional:
+  logistics:
+
+- date: Tue 02/18/25
+  lecturer:
+    - Arman
+  title:
+    - Pre-training and transfer learning
+    - Objective functions for pre-training
+    - Model architectures
+    - ELMO, BERT, GPT, T5
+  slides:
+  readings:
+    - The Illustrated BERT, ELMo, and co. (Jay Alammar) <a href="http://jalammar.github.io/illustrated-bert/" target="_blank">[link]</a>
+    - BERT- Pre-training of Deep Bidirectional Transformers for Language Understanding (Devlin et al., 2018) <a href="https://arxiv.org/pdf/1810.04805.pdf" target="_blank">[link]</a>
+    - GPT-2 (Radford et al., 2019) <a href="https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf" target="_blank">[link]</a>
+  optional:
+  logistics:
+
+- date: Tue 02/20/25
+  lecturer:
+    - Arman
+  title:
+    - Transfer learning (contd.)
+    - Encoder-decoder pretrained models
+    - Architecture and pretraining objectives
+  slides:
+  readings:
+    - T5- Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer (Raffel et al., 2020) <a href="https://arxiv.org/pdf/1910.10683.pdf" target="_blank">[link]</a>
+    - BART- Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension (Lewis et al., 2019) <a href="https://arxiv.org/pdf/1910.13461.pdf" target="_blank">[link]</a>
+    - What Language Model Architecture and Pretraining Objective Work Best for Zero-Shot Generalization? (Wang et al, 2022) <a href="https://arxiv.org/abs/2204.05832" target="_blank">[link]</a>
+  optional:
+  logistics:
+
+- date: Thu 02/25/25
+  lecturer:
+    - Arman
+  title: >
+    <strong> Midterm Exam 1 </strong>
+
+- date: Thu 02/27/25
+  lecturer:
+    - Arman
+  title:
+    - Decoding and generation
+    - Large language models and impact of scale
+    - In-context learning and prompting
+  slides:
+  readings:
+    - The Curious Case of Neural Text Degeneration (Holtzman et al., 2019) <a href="https://arxiv.org/pdf/1904.09751.pdf" target="_blank">[link]</a>
+    - How to generate text- using different decoding methods for language generation with Transformers <a href="https://huggingface.co/blog/how-to-generate" target="_blank">[link]</a>
+    - Scaling Laws for Neural Language Models (Kaplan et al., 2020) <a href="https://arxiv.org/pdf/2001.08361.pdf" target="_blank">[link]</a>
+    - Training Compute-Optimal Large Language Models (Hoffmann et al., 2022) <a href="https://arxiv.org/pdf/2203.15556.pdf" target="_blank">[link]</a>
+    - GPT3 paper - Language Models are Few-Shot Learners (Brown et al., 2020) <a href="https://arxiv.org/pdf/2005.14165.pdf" target="_blank">[link]</a>
+  optional:
+  logistics:
+
+- date: Tue 03/04/25
+  lecturer: Arman
+  title:
+    - In-context learning and prompting (cont'd)
+    - Improving instruction following and few-shot learning
+  slides:
+  readings:
+    - Few-Shot Learning with Language Models (Brown et al., 2020) <a href="https://arxiv.org/pdf/2005.14165.pdf" target="_blank">[link]</a>
+    - Finetuned Language Models Are Zero-Shot Learners (Wei et al., 2022) <a href="https://arxiv.org/abs/2109.01652" target="_blank">[link]</a>
+    - Multitask Prompted Training Enables Zero-Shot Task Generalization (Sanh et al., 2021) <a href="https://arxiv.org/abs/2110.08207" target="_blank">[link]</a>
+    - Scaling Instruction-Finetuned Language Models (Chung et al., 2022) <a href="https://arxiv.org/abs/2210.11416" target="_blank">[link]</a>
+    - Are Emergent Abilities of Large Language Models a Mirage? (Sha et al., 2023) <a href="https://arxiv.org/pdf/2304.15004.pdf" target="_blank">[link]</a>
+    - Emergent Abilities of Large Language Models (Wei et al., 2022) <a href="https://arxiv.org/abs/2206.07682" target="_blank">[link]</a>
+  logistics:
+
+- date: 03/07/25 - 03/24/25
+  title: >
+    <strong> Spring recess - No classes </strong>
+
+- date: Tue 03/25/25
+  title:
+    - Post-training
+    - Reinforcement learning from Human Feedback
+    - Alignment
+  slides:
+  readings:
+    - Training language models to follow instructions with human feedback (Ouyang et al., 2022) <a href="https://arxiv.org/abs/2203.02155" target="_blank">[link]</a>
+    - Fine-Tuning Language Models from Human Preferences (Ziegler et al., 2019) <a href="https://arxiv.org/abs/1909.08593" target="_blank">[link]</a>
+    - Direct Preference Optimization- Your Language Model is Secretly a Reward Model (Rafailov et al., 2023) <a href="https://arxiv.org/abs/2305.18290" target="_blank">[link]</a>
+    - RLAIF- Scaling Reinforcement Learning from Human Feedback with AI Feedback (Lee et al., 2023) <a href="https://arxiv.org/abs/2309.00267" target="_blank">[link]</a>
+  optional:
+
+- date: Thu 03/27/25
+  title:
+    - Post-training (cont'd)
+  slides:
+  readings:
+  optional:
+
+- date: Tue 04/01/25
+  lecturer:
+    - Arman
+  title: >
+    <strong> Midterm Exam 2 </strong>
+
+- date: Thu 04/03/25
+  lecturer:
+  title:
+    - Evaluation
+  slides:
+  readings:
+  optional:
+  logistics:
+
+- date: Tue 04/08/25
+  lecturer:
+  title:
+    - Parameter-efficient Fine-Tuning
+  slides:
+  readings:
+  optional:
+  logistics:
+
+- date: Thu 04/10/25
+  lecturer:
+  title:
+    - Safety
+    - Noncomplience
+  slides:
+  readings:
+  optional:
+  logistics:
+
+- date: Tue 04/15/25
+  lecturer:
+  title:
+    - Agent-based systems
+  slides:
+  readings:
+  optional:
+  logistics:
+
+- date: Thu 04/17/25
+  guest:
+    - name: TBD
+  title:
+  slides:
+  readings:
+  optional:
+
+- date: Tue 04/22/25
+  guest:
+    - name: TBD
+  title:
+  slides:
+  readings:
+  optional:
+
+- date: Thu 04/24/25
+  guest:
+    - name: TBD
+  title:
+  slides:
+  readings:
+  optional: