Clam- · vladkorotnev · Feb 21, 2021 · Feb 22, 2021 · Feb 22, 2021 · Feb 22, 2021
diff --git a/.gitignore b/.gitignore
@@ -32,4 +32,5 @@ proguard/
 
 #Other
 assets/dict.properties
-t9build.properties
+t9build.properties
+*.keystore
diff --git a/AndroidManifest.xml b/AndroidManifest.xml
@@ -4,7 +4,8 @@
     android:versionCode="4"
     android:versionName="git" >
 
-	<uses-sdk android:minSdkVersion="8" />
+	<uses-sdk android:minSdkVersion="11" />
+	<uses-permission android:name="android.permission.FOREGROUND_SERVICE" />
 	<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"/>
 	<uses-configuration
 		android:reqFiveWayNav="true"
@@ -18,6 +19,7 @@
 		/>
     <application
         android:allowBackup="false"
+		android:requestLegacyExternalStorage="true"
         android:icon="@drawable/ic_launcher"
         android:label="@string/ime_name"
         android:theme="@style/AppTheme" >

diff --git a/NewDicts/generate.py b/NewDicts/generate.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+import msgpack
+import gzip
+import sys
+import pdb
+import codecs
+import unicodedata
+
+filename = sys.argv[1]
+langno = int(sys.argv[2]) #english=1, russian=2
+outfile = sys.argv[3]
+border = int(sys.argv[4])
+
+with gzip.open(filename, 'rb') as infile:
+    data = msgpack.load(infile, raw=False)
+    header = data[0]
+    if (
+        not isinstance(header, dict) or header.get('format') != 'cB'
+        or header.get('version') != 1
+    ):
+        raise ValueError("Unexpected header: %r" % header)
+    dict_data = data[1:]
+    buckets = len(dict_data)
+    with codecs.open(outfile, "w", "utf-8") as outfile:
+        for bucket_no,bucket in enumerate(dict_data):
+            if len(bucket) == 0:
+                continue
+            wfreq = buckets - bucket_no
+            if wfreq < border:
+                break
+            for word in bucket:
+                word = unicodedata.normalize('NFKD', word.lower())
+                if " " in word:
+                    print("spaces not allowed")
+                    continue
+                elif any(char.isdigit() for char in word):# or any(unicodedata.category(c) not in ["Lu", "Ll", "Pc", "Pd", "Po"] for c in word): # http://www.unicode.org/reports/tr44/tr44-6.html#General_Category_Values
+                    print("Weird:", word)
+                    continue
+                else:
+                    outfile.write(word+u" "+str(wfreq)+u" "+str(langno)+u"\n")
diff --git a/NewDicts/large_en.msgpack.gz b/NewDicts/large_en.msgpack.gz
diff --git a/NewDicts/large_ru.msgpack.gz b/NewDicts/large_ru.msgpack.gz
diff --git a/NewDicts/readme.txt b/NewDicts/readme.txt
@@ -0,0 +1,6 @@
+Simple English+Russian preheated backup relatively usable from the get-go
+
+Generated from data by LuminosoInsight:
+https://github.com/LuminosoInsight/wordfreq/tree/02c3cbe3fb13fd133fb602997aa30ccc59c24c24/wordfreq/data
+
+Then manually edited to remove error values