Skip to content

Commit

Permalink
Add support for additional code pages
Browse files Browse the repository at this point in the history
(437, 874, 950, 10001, 10004, 10005, 10006, 10007, 10010, 10017, 10021,
10029, 10079, 10081, 10082)
  • Loading branch information
joniles committed Jan 5, 2017
1 parent 87c1892 commit 49da4bf
Show file tree
Hide file tree
Showing 12 changed files with 1,190 additions and 15 deletions.
30 changes: 15 additions & 15 deletions RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class Encoding
// Comment lines based on: https://msdn.microsoft.com/en-us/library/windows/desktop/dd317756(v=vs.85).aspx

// 037 IBM037 IBM EBCDIC US-Canada
// 437 IBM437 OEM United States
LOCALEID_MAPPING.put("437", "Cp437"); // IBM437 OEM United States
// 500 IBM500 IBM EBCDIC International
// 708 ASMO-708 Arabic (ASMO 708)
// 709 Arabic (ASMO-449+, BCON V4)
Expand All @@ -57,12 +57,12 @@ class Encoding
// 866 cp866 OEM Russian; Cyrillic (DOS)
// 869 ibm869 OEM Modern Greek; Greek, Modern (DOS)
// 870 IBM870 IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2
// 874 windows-874 ANSI/OEM Thai (ISO 8859-11); Thai (Windows)
LOCALEID_MAPPING.put("874", "Cp874"); // windows-874 ANSI/OEM Thai (ISO 8859-11); Thai (Windows)
// 875 cp875 IBM EBCDIC Greek Modern
LOCALEID_MAPPING.put("932", "SJIS"); // Japanese
LOCALEID_MAPPING.put("936", "Cp936"); // Simplified Chinese
LOCALEID_MAPPING.put("949", "Cp949"); // Korean
// 950 big5 ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
LOCALEID_MAPPING.put("950", "Cp950"); // ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
LOCALEID_MAPPING.put("1025", "Cp1256"); // Arabic (Saudi Arabia)
LOCALEID_MAPPING.put("1026", "Cp1251"); // Bulgarian
LOCALEID_MAPPING.put("1028", "Cp950"); // Chinese (Taiwan)
Expand Down Expand Up @@ -136,21 +136,21 @@ class Encoding
LOCALEID_MAPPING.put("8193", "Cp1256"); // Arabic (Oman)
LOCALEID_MAPPING.put("9217", "Cp1256"); // Arabic (Yemen)
LOCALEID_MAPPING.put("10000", "MacRoman"); // Mac Roman
// 10001 x-mac-japanese Japanese (Mac)
LOCALEID_MAPPING.put("10001", "Shift_JIS"); // x-mac-japanese Japanese (Mac)
// 10002 x-mac-chinesetrad MAC Traditional Chinese (Big5); Chinese Traditional (Mac)
// 10003 x-mac-korean Korean (Mac)
// 10004 x-mac-arabic Arabic (Mac)
// 10005 x-mac-hebrew Hebrew (Mac)
// 10006 x-mac-greek Greek (Mac)
// 10007 x-mac-cyrillic Cyrillic (Mac)
LOCALEID_MAPPING.put("10004", "x-MacArabic"); // x-mac-arabic Arabic (Mac)
LOCALEID_MAPPING.put("10005", "x-MacHebrew"); // x-mac-hebrew Hebrew (Mac)
LOCALEID_MAPPING.put("10006", "x-MacHebrew"); // x-mac-greek Greek (Mac)
LOCALEID_MAPPING.put("10007", "x-MacCyrillic"); // x-mac-cyrillic Cyrillic (Mac)
// 10008 x-mac-chinesesimp MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac)
// 10010 x-mac-romanian Romanian (Mac)
// 10017 x-mac-ukrainian Ukrainian (Mac)
// 10021 x-mac-thai Thai (Mac)
// 10029 x-mac-ce MAC Latin 2; Central European (Mac)
// 10079 x-mac-icelandic Icelandic (Mac)
// 10081 x-mac-turkish Turkish (Mac)
// 10082 x-mac-croatian Croatian (Mac)
LOCALEID_MAPPING.put("10010", "x-MacRomania"); // x-mac-romanian Romanian (Mac)
LOCALEID_MAPPING.put("10017", "x-MacUkraine"); // x-mac-ukrainian Ukrainian (Mac)
LOCALEID_MAPPING.put("10021", "x-MacThai"); // x-mac-thai Thai (Mac)
LOCALEID_MAPPING.put("10029", "x-MacCentralEurope"); // x-mac-ce MAC Latin 2; Central European (Mac)
LOCALEID_MAPPING.put("10079", "x-MacIceland"); // x-mac-icelandic Icelandic (Mac)
LOCALEID_MAPPING.put("10081", "x-MacTurkish"); // x-mac-turkish Turkish (Mac)
LOCALEID_MAPPING.put("10082", "x-MacCroatian"); // x-mac-croatian Croatian (Mac)
LOCALEID_MAPPING.put("10241", "Cp1256"); // Arabic (Syria)
LOCALEID_MAPPING.put("11265", "Cp1256"); // Arabic (Jordan)
// 12000 utf-32 Unicode UTF-32, little endian byte order
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,36 @@ public void testGreekEncoding() throws Exception
TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "testGreekEncoding");
}

@Test
public void test437Encoding() throws Exception
{
TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "test437Encoding");
}

@Test
public void test874Encoding() throws Exception
{
TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "test874Encoding");
}

@Test
public void test950Encoding() throws Exception
{
TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "test950Encoding");
}

@Test
public void test10001Encoding() throws Exception
{
TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "test10001Encoding");
}

@Test
public void test10007Encoding() throws Exception
{
TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "test10007Encoding");
}

@Test
public void testKoreanEncoding() throws Exception
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{\rtf1\mac\ansicpg10001\cocoartf824\cocoasubrtf230
{\fonttbl\f0\fnil\fcharset78 HiraKakuPro-W6;\f1\fswiss\fcharset77 Helvetica-Bold;\f2\fswiss\fcharset77 Helvetica;
\f3\fnil\fcharset78 HiraKakuPro-W3;}
{\colortbl;\red255\green255\blue255;}
{\*\listtable{\list\listtemplateid1\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc2\leveljcn2\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{disc\}}{\leveltext\leveltemplateid0\'02\'05.;}{\levelnumbers\'01;}}{\listname ;}\listid1}}
{\*\listoverridetable{\listoverride\listid1\listoverridecount0\ls1}}
\margl1440\margr1440\vieww12240\viewh8980\viewkind0
\pard\tx220\tx720\tx1133\tx1700\tx2267\tx2834\tx3401\tx3968\tx4535\tx5102\tx5669\tx6236\tx6803\li720\fi-720\ql\qnatural\pardirnatural
\ls1\ilvl0
\f0\b\fs50 \cf0 \'82\'a8\'93\'c7\'82\'dd\'82\'ad\'82\'be\'82\'b3\'82\'a2
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
<?xml version="1.0" encoding="UTF-8"?>
<rtf>
<group>
<command name="rtf" parameter="1"/>
<command name="cocoartf" parameter="824"/>
<command name="cocoasubrtf" parameter="230"/>
<group>
<command name="fonttbl"/>
<command name="f" parameter="0"/>
<command name="fnil"/>
<command name="fcharset" parameter="78"/>
<chars>HiraKakuPro-W6;</chars>
<command name="f" parameter="1"/>
<command name="fswiss"/>
<command name="fcharset" parameter="77"/>
<chars>Helvetica-Bold;</chars>
<command name="f" parameter="2"/>
<command name="fswiss"/>
<command name="fcharset" parameter="77"/>
<chars>Helvetica;</chars>
<command name="f" parameter="3"/>
<command name="fnil"/>
<command name="fcharset" parameter="78"/>
<chars>HiraKakuPro-W3;</chars>
</group>
<group>
<command name="colortbl"/>
<chars>;</chars>
<command name="red" parameter="255"/>
<command name="green" parameter="255"/>
<command name="blue" parameter="255"/>
<chars>;</chars>
</group>
<group>
<command name="listtable" optional="true"/>
<group>
<command name="list"/>
<command name="listtemplateid" parameter="1"/>
<command name="listhybrid"/>
<group>
<command name="listlevel"/>
<command name="levelnfc" parameter="23"/>
<command name="levelnfcn" parameter="23"/>
<command name="leveljc" parameter="2"/>
<command name="leveljcn" parameter="2"/>
<command name="levelfollow" parameter="0"/>
<command name="levelstartat" parameter="1"/>
<command name="levelspace" parameter="360"/>
<command name="levelindent" parameter="0"/>
<group>
<chars>{disc}</chars>
</group>
<group>
<command name="leveltext"/>
<command name="leveltemplateid" parameter="0"/>
<chars>.;</chars>
</group>
<group>
<command name="levelnumbers"/>
<chars>;</chars>
</group>
</group>
<group>
<command name="listname"/>
<chars>;</chars>
</group>
<command name="listid" parameter="1"/>
</group>
</group>
<group>
<command name="listoverridetable" optional="true"/>
<group>
<command name="listoverride"/>
<command name="listid" parameter="1"/>
<command name="listoverridecount" parameter="0"/>
<command name="ls" parameter="1"/>
</group>
</group>
<command name="margl" parameter="1440"/>
<command name="margr" parameter="1440"/>
<command name="viewkind" parameter="0"/>
<command name="pard"/>
<command name="tx" parameter="220"/>
<command name="tx" parameter="720"/>
<command name="tx" parameter="1133"/>
<command name="tx" parameter="1700"/>
<command name="tx" parameter="2267"/>
<command name="tx" parameter="2834"/>
<command name="tx" parameter="3401"/>
<command name="tx" parameter="3968"/>
<command name="tx" parameter="4535"/>
<command name="tx" parameter="5102"/>
<command name="tx" parameter="5669"/>
<command name="tx" parameter="6236"/>
<command name="tx" parameter="6803"/>
<command name="li" parameter="720"/>
<command name="fi" parameter="-720"/>
<command name="ql"/>
<command name="ls" parameter="1"/>
<command name="ilvl" parameter="0"/>
<command name="f" parameter="0"/>
<command name="b"/>
<command name="fs" parameter="50"/>
<command name="cf" parameter="0"/>
<chars>お読みください</chars>
</group>
</rtf>
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{\rtf1\mac\ansicpg10007\cocoartf102
{\fonttbl\f0\fnil\fcharset77 LucidaGrande;\f1\fnil\fcharset77 Georgia;\f2\fnil\fcharset77 Verdana;
}
{\colortbl;\red255\green255\blue255;}
\margl1440\margr1440\vieww16780\viewh13600\viewkind0
\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\ql\qnatural

\f0\fs36 \cf0 \uc0\u1050 \u1086 \u1084 \u1084 \u1072 \u1085 \u1076 \u1072 \u1088 \u1072 \u1079 \u1088 \u1072 \u1073 \u1086 \u1090 \u1095 \u1080 \u1082 \u1086 \u1074
\f1 \
}


Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<?xml version="1.0" encoding="UTF-8"?>
<rtf>
<group>
<command name="rtf" parameter="1"/>
<command name="cocoartf" parameter="102"/>
<group>
<command name="fonttbl"/>
<command name="f" parameter="0"/>
<command name="fnil"/>
<command name="fcharset" parameter="77"/>
<chars>LucidaGrande;</chars>
<command name="f" parameter="1"/>
<command name="fnil"/>
<command name="fcharset" parameter="77"/>
<chars>Georgia;</chars>
<command name="f" parameter="2"/>
<command name="fnil"/>
<command name="fcharset" parameter="77"/>
<chars>Verdana;</chars>
</group>
<group>
<command name="colortbl"/>
<chars>;</chars>
<command name="red" parameter="255"/>
<command name="green" parameter="255"/>
<command name="blue" parameter="255"/>
<chars>;</chars>
</group>
<command name="margl" parameter="1440"/>
<command name="margr" parameter="1440"/>
<command name="viewkind" parameter="0"/>
<command name="pard"/>
<command name="tx" parameter="720"/>
<command name="tx" parameter="1440"/>
<command name="tx" parameter="2160"/>
<command name="tx" parameter="2880"/>
<command name="tx" parameter="3600"/>
<command name="tx" parameter="4320"/>
<command name="tx" parameter="5040"/>
<command name="tx" parameter="5760"/>
<command name="tx" parameter="6480"/>
<command name="tx" parameter="7200"/>
<command name="tx" parameter="7920"/>
<command name="tx" parameter="8640"/>
<command name="ql"/>
<command name="f" parameter="0"/>
<command name="fs" parameter="36"/>
<command name="cf" parameter="0"/>
<chars>Комманда разработчиков </chars>
<command name="f" parameter="1"/>
<command name="par"/>
</group>
</rtf>
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{\rtf1 \mac \ansicpg437 \cocoartf102 {\fonttbl {\f0 \fnil \fcharset77 Times New Roman{\*\falt Times}
;}
{\f1 \fnil \fcharset77 Helvetica-Bold{\*\falt Helvetica}
;}
{\f2 \fnil \fcharset77 Helvetica;}
{\f3 \fnil \fcharset77 LucidaGrande{\*\falt Lucida Grande}
;}
}
{\colortbl ;\red0 \green0 \blue0 ;}
{\stylesheet {\*\cs335 {\*\nsmpltxt The quick brown fox jumped over the lazy dogs.}
\super footnote reference;}
{\*\cs336 {\*\nsmpltxt The quick brown fox jumped over the lazy dogs.}
\super endnote reference;}
{\s337 \nisusnoteplacement0 \nisusreferencestyle335 {\*\nsmpltxt Some text goes here so you can see what your style will look like.}
\f3 footnote text;}
{\s338 \nisusnoteplacement1 \nisusreferencestyle336 {\*\nsmpltxt Sample text for Foot/End Notes Style}
\f3 endnote text;}
}
\deftab720 \defformat \viewkind1 \viewzk1 {\*\nisuswindow \x70 \y194 \w741 \h638 }
\nshwinv0 \nshwpg1 \hyphauto0 \ftnnar \endnotes \aendnotes \aftnnar \fet2 \ftnbj \paperw12240 \paperh15840 \margl1440 \margr1440 \margt1440 \margb1440 \gutter0 \pgnstart1 \nocolbal \sectd \sbknone \cols1 \ltrsect \colbalsxn0 \marglsxn1440 \margrsxn1440 \margtsxn1440 \margbsxn1440 \guttersxn0 \headery720 \footery720 \pgnstarts1 \pgnrestart \pgndec \sxnstarts1 \sxnrestart \sxndec {\header \pard \ql \sb0 \sa0 \sl240 \slmult1 \ilvl0 \li0 \lin0 \fi0 \ri0 \rin0 \par }
{\footer \pard \ql \sb0 \sa0 \sl240 \slmult1 \ilvl0 \li0 \lin0 \fi0 \ri0 \rin0 \par }
{\pard \ql \sb0 \sa0 \sl240 \slmult1 \ilvl0 \li0 \lin0 \fi0 \ri0 \rin0 {\f1 \fs24 \b \cf1 Test Document\par
\f2 \b0 \par
\b Test Title: \tab (Test Subtitle)\par
\b0 \par
\b Test Heading:\b0 \par
Test Text. \par}
}
}
Loading

0 comments on commit 49da4bf

Please sign in to comment.