Skip to content

Commit

Permalink
Add Büble
Browse files Browse the repository at this point in the history
  • Loading branch information
alanakbik committed Dec 5, 2024
1 parent 355d991 commit d1618a6
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 13 deletions.
56 changes: 56 additions & 0 deletions app/research/(projects)/bueble/page.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import React from "react";

import BlockImage from "@/components/shared/BlockImage";
import H2 from "@/components/shared/H2";
import InlineLink from "@/components/shared/InlineLink";
import P from "@/components/shared/P";
import OL from "@/components/shared/UL";

/**
* Set the title of the page here which is shown in the browser tab.
*/
export const metadata = {
title: "Büble-LM - Alan Akbik",
};

/**
* On this page you can insert the content for a project. You don't need to add the information you put inside the
* array inside @/content/Research.tsx. This will be automatically inserted for you. If you need certain HTML elements
* like a table or an unordered list, please check whether these components contain the functionality you need. If so use them
* instead of the standard HTML element.
*
* UnorderedList: <UL/>
* OrderedList: <OL/>
* Link: <InlineLink/>
* Paragraph: <P/>
* Table: <Table/>
* Image: <BlockImage/>
*
* These components can be imported from @/components/shared.
*/
export default function Page() {
return (
<>
<H2>Büble-LM</H2>
<P>
BübleLM is a state-of-the-art German language model based on Gemma-2B, adapted using trans-tokenization
with a custom German SentencePiece tokenizer.
</P>
<P>
Büble significantly outperforms other German LMs like Sauerkraut-2B and LLäMmlein-1B on most
benchmarks we tried. It was trained with a novel trans-tokenization approach by Pieter Delobelle
when he was a guest researcher at our chair!
</P>
<P>
More details on this model coming soon!
</P>
<H2>Getting Started</H2>
<P>
<OL>
<li>Try out the <InlineLink href="https://huggingface.co/flair/bueble-lm-2b" target="_blank">model</InlineLink></li>
<li>Check the <InlineLink href="https://huggingface.co/flair/bueble-lm-2b#performance" target="_blank">evaluation results</InlineLink></li>
</OL>
</P>
</>
);
}
10 changes: 9 additions & 1 deletion content/LatestNews.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ import type { PinnedMessage, TimelineEntryData } from "@/content/types";
*/

const LatestNews: TimelineEntryData[] = [
{
date: "2024-12-05",
category: "New Model",
content: <>
Announcing 🐤 <InlineLink href="https://huggingface.co/flair/bueble-lm-2b" target="_blank">Büble-LM</InlineLink> 🐤,
our new state-of-the-art 2 billion parameter language model (LM) for German!
</>,
},
{
date: "2024-11-26",
category: "New Project",
Expand All @@ -49,7 +57,7 @@ const LatestNews: TimelineEntryData[] = [
{
date: "2024-10-22",
category: "Paper accepted",
conference: "BabyLM 2025",
conference: "BabyLM 2024",
content: <>
Our paper &quot;BabyHGRN: Exploring RNNs for Sample-Efficient Language Modeling&quot; accepted to BabyLM 2024!
</>,
Expand Down
33 changes: 22 additions & 11 deletions content/Research.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,17 @@ const Research: ResearchProject[] = [
categories: ["Featured", "Libraries"],
uriComponent: "flair",
},
{
title: "Büble-LM",
image: "/büble-lm.jpg",
imagePadding: 0.0,
imageFit: "contain",
introductoryText: <>
Büble-LM is our new state-of-the-art 2 billion parameter language model (LM) for German!
</>,
categories: ["Featured"],
uriComponent: "bueble",
},
{
title: "Zitatsuchmaschine",
image: "/zitatsuchmaschine_logo.svg",
Expand Down Expand Up @@ -71,6 +82,17 @@ const Research: ResearchProject[] = [
categories: ["Libraries", "Featured"],
uriComponent: "transformer-ranker",
},
{
title: "OpinionGPT",
image: "/opiniongpt.png",
imageFit: "contain",
imagePadding: 1,
introductoryText: <>
<InlineLink href="https://opiniongpt.informatik.hu-berlin.de/">OpinionGPT</InlineLink> is a ChatGPT-style model trained specifically to be biased and opinionated!
</>,
categories: ["Featured", "Applications"],
uriComponent: "opiniongpt",
},
{
title: "CleanCoNLL",
image: "/CleanCoNLL_example_sentence.jpg",
Expand All @@ -84,17 +106,6 @@ const Research: ResearchProject[] = [
categories: ["Featured", "Datasets"],
uriComponent: "cleanconll",
},
{
title: "OpinionGPT",
image: "/opiniongpt.png",
imageFit: "contain",
imagePadding: 1,
introductoryText: <>
<InlineLink href="https://opiniongpt.informatik.hu-berlin.de/">OpinionGPT</InlineLink> is a ChatGPT-style model trained specifically to be biased and opinionated!
</>,
categories: ["Featured", "Applications"],
uriComponent: "opiniongpt",
},
{
title: "LM Pub Quiz",
image: "/BEAR-probe.svg",
Expand Down
2 changes: 1 addition & 1 deletion content/types.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ export type ConferenceAndYear = `${Conference} 20${number}${number}`
*
* If you need to add a new one you can extend this type.
*/
export type NewsCategory = "New Paper" | "Paper accepted" | "Senior Area Chair" | "New Lab Member" | "New Research Grant" | "New Startup Grant" | "New Project"
export type NewsCategory = "New Paper" | "Paper accepted" | "Senior Area Chair" | "New Lab Member" | "New Research Grant" | "New Startup Grant" | "New Project" | "New Model"

export interface TimelineEntryData {
date: IsoDate,
Expand Down
Binary file added public/büble-lm.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit d1618a6

Please sign in to comment.