forked from nickliqian/cnn_captcha
-
Notifications
You must be signed in to change notification settings - Fork 0
/
recognition_object.py
145 lines (124 loc) · 5.86 KB
/
recognition_object.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# -*- coding: utf-8 -*-
"""
识别图像的类,为了快速进行多次识别可以调用此类下面的方法:
R = Recognizer(image_height, image_width, max_captcha)
for i in range(10):
r_img = Image.open(str(i) + ".jpg")
t = R.rec_image(r_img)
简单的图片每张基本上可以达到毫秒级的识别速度
"""
import tensorflow as tf
import numpy as np
from PIL import Image
from sample import sample_conf
class Recognizer(object):
def __init__(self, image_height, image_width, max_captcha, char_set, model_save_dir):
self.w_alpha = 0.01
self.b_alpha = 0.1
self.image_height = image_height
self.image_width = image_width
self.max_captcha = max_captcha
self.char_set = char_set
self.char_set_len = len(self.char_set)
self.model_save_dir = model_save_dir
# tf初始化占位符
self.X = tf.placeholder(tf.float32, [None, self.image_height * self.image_width]) # 特征向量
self.Y = tf.placeholder(tf.float32, [None, self.max_captcha * self.char_set_len]) # 标签
self.keep_prob = tf.placeholder(tf.float32) # dropout值
self.sess = tf.Session()
self.y_predict = self.model()
saver = tf.train.Saver()
saver.restore(self.sess, self.model_save_dir)
def __del__(self):
self.sess.close()
print("session close")
@staticmethod
def convert2gray(img):
"""
图片转为灰度图,如果是3通道图则计算,单通道图则直接返回
:param img:
:return:
"""
if len(img.shape) > 2:
r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]
gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
return gray
else:
return img
def text2vec(self, text):
"""
转标签为oneHot编码
:param text: str
:return: numpy.array
"""
text_len = len(text)
if text_len > self.max_captcha:
raise ValueError('验证码最长{}个字符'.format(self.max_captcha))
vector = np.zeros(self.max_captcha * self.char_set_len)
for i, ch in enumerate(text):
idx = i * self.char_set_len + self.char_set.index(ch)
vector[idx] = 1
return vector
def model(self):
x = tf.reshape(self.X, shape=[-1, self.image_height, self.image_width, 1])
print(">>> input x: {}".format(x))
# 卷积层1
wc1 = tf.get_variable(name='wc1', shape=[3, 3, 1, 32], dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer())
bc1 = tf.Variable(self.b_alpha * tf.random_normal([32]))
conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, wc1, strides=[1, 1, 1, 1], padding='SAME'), bc1))
conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
conv1 = tf.nn.dropout(conv1, self.keep_prob)
# 卷积层2
wc2 = tf.get_variable(name='wc2', shape=[3, 3, 32, 64], dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer())
bc2 = tf.Variable(self.b_alpha * tf.random_normal([64]))
conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, wc2, strides=[1, 1, 1, 1], padding='SAME'), bc2))
conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
conv2 = tf.nn.dropout(conv2, self.keep_prob)
# 卷积层3
wc3 = tf.get_variable(name='wc3', shape=[3, 3, 64, 128], dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer())
bc3 = tf.Variable(self.b_alpha * tf.random_normal([128]))
conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, wc3, strides=[1, 1, 1, 1], padding='SAME'), bc3))
conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
conv3 = tf.nn.dropout(conv3, self.keep_prob)
print(">>> convolution 3: ", conv3.shape)
next_shape = conv3.shape[1] * conv3.shape[2] * conv3.shape[3]
# 全连接层1
wd1 = tf.get_variable(name='wd1', shape=[next_shape, 1024], dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer())
bd1 = tf.Variable(self.b_alpha * tf.random_normal([1024]))
dense = tf.reshape(conv3, [-1, wd1.get_shape().as_list()[0]])
dense = tf.nn.relu(tf.add(tf.matmul(dense, wd1), bd1))
dense = tf.nn.dropout(dense, self.keep_prob)
# 全连接层2
wout = tf.get_variable('name', shape=[1024, self.max_captcha * self.char_set_len], dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer())
bout = tf.Variable(self.b_alpha * tf.random_normal([self.max_captcha * self.char_set_len]))
y_predict = tf.add(tf.matmul(dense, wout), bout)
return y_predict
def rec_image(self, img):
img_array = np.array(img)
test_image = self.convert2gray(img_array)
test_image = test_image.flatten() / 255
predict = tf.argmax(tf.reshape(self.y_predict, [-1, self.max_captcha, self.char_set_len]), 2)
text_list = self.sess.run(predict, feed_dict={self.X: [test_image], self.keep_prob: 1.})
predict_text = text_list[0].tolist()
p_text = ""
for p in predict_text:
p_text += str(self.char_set[p])
# 返回识别结果
return p_text
def main():
image_height = sample_conf["image_height"]
image_width = sample_conf["image_width"]
max_captcha = sample_conf["max_captcha"]
char_set = sample_conf["char_set"]
model_save_dir = sample_conf["model_save_dir"]
R = Recognizer(image_height, image_width, max_captcha, char_set, model_save_dir)
r_img = Image.open("./sample/test/2b3n_6915e26c67a52bc0e4e13d216eb62b37.jpg")
t = R.rec_image(r_img)
print(t)
if __name__ == '__main__':
main()