-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
190 lines (159 loc) · 5.71 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
from io import StringIO
from typing import List
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import streamlit as st
from sklearn.cluster import KMeans
from sklearn.pipeline import Pipeline
style_fullscreen_button_css = """
button[title="View fullscreen"] {
left: 0px;
}
button[title="Exit fullscreen"] {
left: 0px;
}
"""
st.markdown(
"<style>" + style_fullscreen_button_css + "</styles>",
unsafe_allow_html=True,
)
def minmax_scale(x):
res = (x - np.min(x)) / (np.max(x) - np.min(x)) # +0.5
return res # + 0.5
def from_zero(x):
res = x - min(x) # ( - np.mean(x)) / np.std(x, ddof=1)
return res
class Elian:
data_boundary = "[DATA]"
stroke_boundary = "0 0 0"
def __init__(self, row_data: List[str], n_aoi):
self.row_data = row_data
self.n_aoi = n_aoi
@property
def header(self):
content: List[str] = [x.strip() for x in self.row_data]
begins_at: List[str] = content.index(self.data_boundary)
return content[:begins_at]
@property
def data(self):
"""
:return:
"[DATA]" から始まる部分を抽出
'0 0 0' の分割でstrokeを算出
"""
content: List[str] = [x.strip() for x in self.row_data]
begins_at: List[str] = content.index(self.data_boundary) + 1
data = content[begins_at:]
return data
@staticmethod
def stroke_validation(stroke: list):
"""_summary_
Args:
stroke (list):
[[stroke_i x y time z(pressure)],
[stroke_i x y time z(pressure)],
...]
Returns:
_type_: bool
"""
if len(stroke) < 2:
return False
stroke = np.array(stroke)
x_diff = np.sum(np.abs(np.diff(stroke[:, 1])))
y_diff = np.sum(np.abs(np.diff(stroke[:, 2])))
z_mean = np.mean(stroke[:, 4])
if x_diff > 1 and y_diff > 1 and z_mean > 1:
return True
return False
@property
def df(self):
data = self.data
stroke_id = 0
stroke, strokes, = (
[],
[],
)
for xytz in data:
if xytz != self.stroke_boundary:
sxytz = [stroke_id] + list(np.fromstring(xytz, dtype=int, sep=" "))
stroke.append(np.array(sxytz, dtype=int))
else: # "0 0 0"
if self.stroke_validation(stroke):
strokes.append(np.array(stroke))
stroke_id += 1
stroke = []
x_y_ms_z = np.concatenate(strokes)
x_y_ms_z_df = pd.DataFrame(x_y_ms_z, columns=["stroke", "x", "y", "ms", "z"])
df = x_y_ms_z_df
pipe = Pipeline([("cluster", KMeans(n_clusters=self.n_aoi, random_state=42))])
df["cluster"] = pipe.fit_predict(df[["x", "y"]])
return df
st.header("Elian Visualizer")
st.subheader("How to use")
st.write(
"""
1. Drag and drop files here から `.elian` ファイルをアップロード(同時に複数可能)
1. 見栄えを調整(左の Figure size や Font size を調整してください。)
1. 分割数を調整(左の Number of ... から調整してください。)
1. もし時間情報が不要なら、Alpha (透明度) for time のチェックをオフ
(初回起動時はちょっともたつきます。)
"""
)
# ハイパラ
figure_size = st.sidebar.slider("Figure size", 5, 10, 8)
font_size = st.sidebar.slider("Font size", 12, 32, 24)
n_components = st.sidebar.slider("Number of pitcure in a file", 1, 5, 4)
use_alpha = st.sidebar.checkbox("Alpha for time", True)
st.subheader("Upload")
# filesを取得
uploaded_files = st.file_uploader("Choose elian files", accept_multiple_files=True)
if len(uploaded_files) == 0:
raise FileNotFoundError("Please upload some elian files!")
st.subheader("Results")
st.write(
"""
* 画像の左上に拡大/縮小マークがでます (Escで縮小もできます)。
* 左でのフォントサイズ等も調整できます。
"""
)
for uploaded_file in uploaded_files:
string_data = StringIO(uploaded_file.getvalue().decode("utf-8")).readlines()
st.write(f"filename: {uploaded_file.name}")
df = Elian(string_data, n_aoi=n_components).df
cols = st.columns(n_components)
for i in range(n_components):
with cols[i]:
df_i = df.query(f"cluster == {i}")
df_i["stroke"] = from_zero(df_i["stroke"])
df_i["ms"] = df_i.groupby("stroke").transform(minmax_scale)["ms"]
plt.figure(figsize=(figure_size, figure_size))
X = list(df_i["x"])
Y = list(df_i["y"] * -1) # 上下逆なため
S = list(df_i["stroke"])
T = list(df_i["ms"] * -1 + 1) # 直感的に後を薄くする
plt.xlim(min(X) - 5, max(X) + 5)
plt.ylim(min(Y) - 5, max(Y) + 5)
for i in range(len(df_i)):
if use_alpha:
plt.text(
X[i],
Y[i],
str(S[i]),
color=f"C{S[i]}",
alpha=T[i],
fontsize=font_size,
) # 67はない
else:
plt.text(
X[i], Y[i], str(S[i]), color=f"C{S[i]}", fontsize=font_size
) # 67はない
st.pyplot(plt)
st.subheader("Appendix")
st.write(
"""
オンライン版はそこまでパワフルではないので、手元のPCでも走らせられます。
が、ちょっと複雑なので今度にします。
もし他にご要望があれば Slack かメール( kishiyama.t@gmail.com ) で岸山にご連絡ください。
"""
)