diff --git a/internal/data/data.go b/internal/data/data.go index 68a8a20..b40981b 100644 --- a/internal/data/data.go +++ b/internal/data/data.go @@ -16,21 +16,147 @@ limitations under the License. package data +type SyllableType int + +const ( + Undefined SyllableType = iota + Consonant + Vowel +) + +type SungType int + +const ( + ChosungType SungType = iota + JungsungType + JongsungType +) + const ( StartHangeul = rune(0xAC00) // '가' EndHangeul = rune(0xD7A3) // '힣' ) var ( + Consonants = []rune{ + 'ㄱ', 'ㄲ', 'ㄴ', 'ㄷ', 'ㄸ', 'ㄹ', 'ㅁ', 'ㅂ', 'ㅃ', 'ㅅ', 'ㅆ', 'ㅇ', 'ㅈ', 'ㅉ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ', 'ㄳ', 'ㄵ', 'ㄶ', 'ㄺ', 'ㄻ', 'ㄼ', 'ㄽ', 'ㄾ', 'ㄿ', 'ㅀ', 'ㅄ', + } ChoSung = []rune{ 'ㄱ', 'ㄲ', 'ㄴ', 'ㄷ', 'ㄸ', 'ㄹ', 'ㅁ', 'ㅂ', 'ㅃ', 'ㅅ', 'ㅆ', 'ㅇ', 'ㅈ', 'ㅉ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ', } + ChosungComposer = map[string]string{ + "ㄱ": "ㄱ", + "ㄲ": "ㄲ", + "ㄱㄱ": "ㄲ", + "ㄴ": "ㄴ", + "ㄷ": "ㄷ", + "ㄸ": "ㄸ", + "ㄷㄷ": "ㄸ", + "ㄹ": "ㄹ", + "ㅁ": "ㅁ", + "ㅂ": "ㅂ", + "ㅃ": "ㅃ", + "ㅂㅂ": "ㅃ", + "ㅅ": "ㅅ", + "ㅆ": "ㅆ", + "ㅅㅅ": "ㅆ", + "ㅇ": "ㅇ", + "ㅈ": "ㅈ", + "ㅉ": "ㅉ", + "ㅈㅈ": "ㅉ", + "ㅊ": "ㅊ", + "ㅋ": "ㅋ", + "ㅌ": "ㅌ", + "ㅍ": "ㅍ", + "ㅎ": "ㅎ", + } JungSung = []rune{ 'ㅏ', 'ㅐ', 'ㅑ', 'ㅒ', 'ㅓ', 'ㅔ', 'ㅕ', 'ㅖ', 'ㅗ', 'ㅘ', 'ㅙ', 'ㅚ', 'ㅛ', 'ㅜ', 'ㅝ', 'ㅞ', 'ㅟ', 'ㅠ', 'ㅡ', 'ㅢ', 'ㅣ', } + JungsungComposer = map[string]string{ + "ㅏ": "ㅏ", + "ㅐ": "ㅐ", + "ㅏㅣ": "ㅐ", + "ㅑ": "ㅑ", + "ㅒ": "ㅒ", + "ㅑㅣ": "ㅒ", + "ㅐㅐ": "ㅒ", + "ㅓ": "ㅓ", + "ㅔ": "ㅔ", + "ㅓㅣ": "ㅔ", + "ㅕ": "ㅕ", + "ㅖ": "ㅖ", + "ㅕㅣ": "ㅖ", + "ㅔㅔ": "ㅖ", + "ㅗ": "ㅗ", + "ㅘ": "ㅘ", + "ㅗㅏ": "ㅘ", + "ㅙ": "ㅙ", + "ㅗㅐ": "ㅙ", + "ㅗㅏㅣ": "ㅙ", + "ㅚ": "ㅚ", + "ㅗㅣ": "ㅚ", + "ㅛ": "ㅛ", + "ㅜ": "ㅜ", + "ㅝ": "ㅝ", + "ㅜㅓ": "ㅝ", + "ㅞ": "ㅞ", + "ㅜㅔ": "ㅞ", + "ㅜㅓㅣ": "ㅞ", + "ㅟ": "ㅟ", + "ㅜㅣ": "ㅟ", + "ㅠ": "ㅠ", + "ㅡ": "ㅡ", + "ㅢ": "ㅢ", + "ㅡㅣ": "ㅢ", + "ㅣ": "ㅣ", + } JongSung = []rune{ 0, 'ㄱ', 'ㄲ', 'ㄳ', 'ㄴ', 'ㄵ', 'ㄶ', 'ㄷ', 'ㄹ', 'ㄺ', 'ㄻ', 'ㄼ', 'ㄽ', 'ㄾ', 'ㄿ', 'ㅀ', 'ㅁ', 'ㅂ', 'ㅄ', 'ㅅ', 'ㅆ', 'ㅇ', 'ㅈ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ', } + JongsungComposer = map[string]string{ + "ㄱ": "ㄱ", + "ㄲ": "ㄲ", + "ㄳ": "ㄳ", + "ㄴ": "ㄴ", + "ㄵ": "ㄵ", + "ㄶ": "ㄶ", + "ㄷ": "ㄷ", + "ㄹ": "ㄹ", + "ㄺ": "ㄺ", + "ㄻ": "ㄻ", + "ㄼ": "ㄼ", + "ㄽ": "ㄽ", + "ㄾ": "ㄾ", + "ㄿ": "ㄿ", + "ㅀ": "ㅀ", + "ㅁ": "ㅁ", + "ㅂ": "ㅂ", + "ㅄ": "ㅄ", + "ㅅ": "ㅅ", + "ㅆ": "ㅆ", + "ㅇ": "ㅇ", + "ㅈ": "ㅈ", + "ㅊ": "ㅊ", + "ㅋ": "ㅋ", + "ㅌ": "ㅌ", + "ㅍ": "ㅍ", + "ㅎ": "ㅎ", + + "ㄱㄱ": "ㄲ", + "ㄱㅅ": "ㄳ", + "ㄴㅈ": "ㄵ", + "ㄴㅎ": "ㄶ", + "ㄹㄱ": "ㄺ", + "ㄹㅁ": "ㄻ", + "ㄹㅂ": "ㄼ", + "ㄹㅅ": "ㄽ", + "ㄹㅌ": "ㄾ", + "ㄹㅍ": "ㄿ", + "ㄹㅎ": "ㅀ", + "ㅂㅅ": "ㅄ", + "ㅅㅅ": "ㅆ", + } ) var ( // QWERTY Keyboard diff --git a/pkg/errors/errors.go b/pkg/errors/errors.go new file mode 100644 index 0000000..4847f44 --- /dev/null +++ b/pkg/errors/errors.go @@ -0,0 +1,7 @@ +package JamoError + +import "errors" + +var ( + ErrImpossibleToCompose = errors.New("impossible to compose to hangeul") +) diff --git a/pkg/jamo/jamo.go b/pkg/jamo/jamo.go index d34cfdb..d9bf051 100644 --- a/pkg/jamo/jamo.go +++ b/pkg/jamo/jamo.go @@ -17,7 +17,10 @@ limitations under the License. package jamo import ( + "slices" + "github.com/ymw0407/jamo/internal/data" + JamoError "github.com/ymw0407/jamo/pkg/errors" "github.com/ymw0407/jamo/pkg/options" ) @@ -61,3 +64,175 @@ func DecomposeHangeul(hangeuls string, opts ...options.Options) (decomposedHange return decomposedHangeul } + +// Compose Syllables into hangeul word(not words) // +/* + // example + fmt.Println(DecomposeHangeul("한글 is hangeul!")) // "ㅎㅏㄴㄱㅡㄹ is hangeul!" +*/ +//* TODO: Allow to apply serveral option (now only first option can apply) +func ComposeHangeul(syllables string) (composedHangeuls []string, err error) { + sungType := data.ChosungType + composedHangeuls = append(composedHangeuls, "") + runeSyllables := []rune(syllables) + + for i := 0; i < len(runeSyllables); i++ { + stringSyllable := string(runeSyllables[i]) + runeSyllable := rune(runeSyllables[i]) + syllableType := ClassifySyllables(runeSyllable) + + for ii := i + 1; ii < len(runeSyllables); ii++ { + continuousSyllable := string(runeSyllables[ii]) + if syllableType == ClassifySyllables(runeSyllables[ii]) { + stringSyllable += continuousSyllable + } else { + i = ii - 1 + break + } + } + + switch sungType { + case data.ChosungType: // first chosung only + chosung, ok := data.ChosungComposer[stringSyllable] + if !ok { + return []string{}, JamoError.ErrImpossibleToCompose + } + for ii := 0; ii < len(composedHangeuls); ii++ { + composedHangeuls[ii] += chosung + } + sungType = data.JungsungType + + case data.JungsungType: + jungsung, ok := data.JungsungComposer[stringSyllable] + if !ok { + return []string{}, JamoError.ErrImpossibleToCompose + } + if len(runeSyllables) == i+1 { // if last jungsung // ㄱㅣ -> 기 + jungsung += " " + } + for ii := 0; ii < len(composedHangeuls); ii++ { + composedHangeuls[ii] += jungsung + } + sungType = data.JongsungType + + case data.JongsungType: // only jongsung // only chosung // jongsung + chosung + if len(runeSyllables) == i+1 { // if last jongsung // ㄷㅗㄴ -> 돈 + jongsung, ok := data.JongsungComposer[stringSyllable] + if !ok { + return []string{}, JamoError.ErrImpossibleToCompose + } + for ii := 0; ii < len(composedHangeuls); ii++ { + composedHangeuls[ii] += jongsung + } + break + } + + runeStringSyllable := []rune(stringSyllable) + + switch len(runeStringSyllable) { + case 1: + chosung, ok := data.ChosungComposer[stringSyllable] // 0jonsung + 1chosung // ㄱㅏㅂㅏㅇ -> 가방 + if !ok { + return []string{}, JamoError.ErrImpossibleToCompose + } + for ii := 0; ii < len(composedHangeuls); ii++ { + composedHangeuls[ii] += " " + chosung + } + + case 2: + res, ok := data.ChosungComposer[stringSyllable] + if !ok { // if !ok -> 1jongsung + 1chosung // ㄱㅏㅂㅇㅗㅅ -> 갑옷 + for ii := 0; ii < len(composedHangeuls); ii++ { + composedHangeuls[ii] += stringSyllable + } + } else { // if ok -> 2chosung / 1jongsung + 1chosung // ㄱㅏㅂㅂㅜ -> 가뿌 / 갑부 + length := len(composedHangeuls) + for ii := 0; ii < length; ii++ { + composedHangeuls = append(composedHangeuls, composedHangeuls[ii]+stringSyllable) + composedHangeuls[ii] += " " + res + } + } + + case 3: + jongsung, jongsung2ok := data.JongsungComposer[string(runeStringSyllable[0:2])] + chosung, chosung2ok := data.JongsungComposer[string(runeStringSyllable[1:3])] + length := len(composedHangeuls) + if jongsung2ok && chosung2ok { // 2jongsung + 1chosung / 1jongsung + 2chosung // ㄱㅏㅂㅅㅅㅏㄴ -> 값산 / 갑싼 + for ii := 0; ii < length; ii++ { + composedHangeuls = append(composedHangeuls, composedHangeuls[ii]+string(runeStringSyllable[0:1])+chosung) + composedHangeuls[ii] += jongsung + string(runeStringSyllable[2:3]) + } + } else if jongsung2ok { + for ii := 0; ii < length; ii++ { + composedHangeuls[ii] += jongsung + string(runeStringSyllable[2:3]) + } + } else if chosung2ok { + for ii := 0; ii < length; ii++ { + composedHangeuls[ii] += string(runeStringSyllable[0:1]) + chosung + } + } else { // error + return []string{}, JamoError.ErrImpossibleToCompose + } + + case 4: + jongsung, ok := data.JongsungComposer[string(runeStringSyllable[0:2])] + if !ok { + return []string{}, JamoError.ErrImpossibleToCompose + } + chosung, ok := data.ChosungComposer[string(runeStringSyllable[2:4])] + if !ok { + return []string{}, JamoError.ErrImpossibleToCompose + } + for ii := 0; ii < len(composedHangeuls); ii++ { // 2jongsung + 2chosung // ㄱㅏㅅㅅㅅㅅㅜ -> 갔쑤 + composedHangeuls[ii] += jongsung + chosung + } + default: + return []string{}, JamoError.ErrImpossibleToCompose + } + + sungType = data.JungsungType + } + } + + for i, composedHangeul := range composedHangeuls { + composedHangeuls[i] = combineHangulSyllables([]rune(composedHangeul)) + } + + return composedHangeuls, err +} + +func ClassifySyllables(syllable rune) data.SyllableType { + switch { + case slices.Contains(data.Consonants, syllable): + return data.Consonant + case slices.Contains(data.JungSung, syllable): + return data.Vowel + default: + return data.Undefined + } +} + +func combineHangulSyllables(jamos []rune) string { + result := "" + for i := 0; i < len(jamos); i += 3 { + choIndex := indexOfRune(data.ChoSung, jamos[i]) + jungIndex := indexOfRune(data.JungSung, jamos[i+1]) + jongIndex := indexOfRune(data.JongSung, jamos[i+2]) + + syllableCode := data.StartHangeul + (rune(choIndex) * 21 * 28) + (rune(jungIndex) * 28) + rune(jongIndex) + result += string(syllableCode) + } + return result +} + +func indexOfRune(slice []rune, value rune) int { + if value == ' ' { + return 0 + } + for i, v := range slice { + if v == value { + return i + } + } + return -1 +} diff --git a/pkg/jamo/jamo_test.go b/pkg/jamo/jamo_test.go index 64a7960..ce10a0a 100644 --- a/pkg/jamo/jamo_test.go +++ b/pkg/jamo/jamo_test.go @@ -45,3 +45,121 @@ func TestDecomposeHangeul(t *testing.T) { assert.Equal(t, expected, res, res+" : `"+expected+"` is expected!") }) } + +func TestComposeHangeul(t *testing.T) { + t.Run("깎가-깍까", func(t *testing.T) { + expected := []string{"깎가", "깍까"} + res, _ := jamo.ComposeHangeul("ㄱㄱㅏㄱㄱㄱㅏ") + + assert.Equal(t, expected, res) + }) + + t.Run("까까-깍가", func(t *testing.T) { + expected := []string{"까까", "깍가"} + res, _ := jamo.ComposeHangeul("ㄱㄱㅏㄱㄱㅏ") + + assert.Equal(t, expected, res) + }) + + t.Run("갃소-각쏘", func(t *testing.T) { + expected := []string{"갃소", "각쏘"} + res, _ := jamo.ComposeHangeul("ㄱㅏㄱㅅㅅㅗ") + + assert.Equal(t, expected, res) + }) + + t.Run("테스트1", func(t *testing.T) { + expected := []string{"테스트"} + res, _ := jamo.ComposeHangeul("ㅌㅔㅅㅡㅌㅡ") + + assert.Equal(t, expected, res) + }) + + t.Run("테스트2", func(t *testing.T) { + expected := []string{"테스트"} + res, _ := jamo.ComposeHangeul("ㅌㅓㅣㅅㅡㅌㅡ") + + assert.Equal(t, expected, res) + }) + + t.Run("기", func(t *testing.T) { + expected := []string{"기"} + res, _ := jamo.ComposeHangeul("ㄱㅣ") + + assert.Equal(t, expected, res) + }) + + t.Run("끼", func(t *testing.T) { + expected := []string{"끼"} + res, _ := jamo.ComposeHangeul("ㄲㅣ") + + assert.Equal(t, expected, res) + }) + + t.Run("돈", func(t *testing.T) { + expected := []string{"돈"} + res, _ := jamo.ComposeHangeul("ㄷㅗㄴ") + + assert.Equal(t, expected, res) + }) + + t.Run("가방", func(t *testing.T) { + expected := []string{"가방"} + res, _ := jamo.ComposeHangeul("ㄱㅏㅂㅏㅇ") + + assert.Equal(t, expected, res) + }) + + t.Run("갑옷", func(t *testing.T) { + expected := []string{"갑옷"} + res, _ := jamo.ComposeHangeul("ㄱㅏㅂㅇㅗㅅ") + + assert.Equal(t, expected, res) + }) + + t.Run("갑부-가뿌", func(t *testing.T) { + expected := []string{"가뿌", "갑부"} + res, _ := jamo.ComposeHangeul("ㄱㅏㅂㅂㅜ") + + assert.Equal(t, expected, res) + }) + + t.Run("값산-갑싼", func(t *testing.T) { + expected := []string{"값산", "갑싼"} + res, _ := jamo.ComposeHangeul("ㄱㅏㅂㅅㅅㅏㄴ") + + assert.Equal(t, expected, res) + }) + + t.Run("갔쑤", func(t *testing.T) { + expected := []string{"갔쑤"} + res, _ := jamo.ComposeHangeul("ㄱㅏㅅㅅㅅㅅㅜ") + + assert.Equal(t, expected, res) + }) + + t.Run("err 갔 쑤", func(t *testing.T) { + _, err := jamo.ComposeHangeul("ㄱㅏㅅㅅ ㅅㅅㅜ") + + assert.Error(t, err, err) + }) + + t.Run("err 가e수", func(t *testing.T) { + _, err := jamo.ComposeHangeul("ㄱㅏeㅅㅜ") + + assert.Error(t, err, err) + }) + + t.Run("err 가 수", func(t *testing.T) { + _, err := jamo.ComposeHangeul("ㄱㅏ ㅅㅜ") + + assert.Error(t, err, err) + }) + + t.Run("err 가수", func(t *testing.T) { + _, err := jamo.ComposeHangeul("가ㅅㅜ") + + assert.Error(t, err, err) + }) + +}