索引地址:系列索引
手写字符识别:
使用Qt/C++/Linux实现手写字符(主要是界面) 使用MNIST手写字符集作为训练源 使用OpenCV/SVM/KNN训练MNIST数据集 MNIST字符集读取与训练MNIST介绍:SVM+MNIST
将代码简单修改就是本文使用的训练测试源码,在这里就不赘述了,具体可看源码。
手写字符界面所谓手写其实是模拟手写,毕竟一般开发的笔记本和PC没有手写功能,就是用鼠标画图。
先定义两个点lastpoint,endpoint。
鼠标拖动的时候更新两个点数据
1 2 3 4 5 6 7 void Drawing::mousePressEvent (QMouseEvent *event) { if (event->button () == Qt::LeftButton){ lastPoint = event->pos (); } endPoint = lastPoint; }
而Qt会自动调用paintEvent绘图,我们使用lastpoint,endpoint划线,同时两个点数据一直在更新,这样就模拟出手写的效果。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 void Drawing::paintEvent (QPaintEvent *event) { Q_UNUSED (event) QPainter pp (&pix) ; pp.setPen (pen); pp.setFont (font); pp.drawLine (lastPoint,endPoint); pp.setRenderHint (QPainter::HighQualityAntialiasing,true ); lastPoint = endPoint; QPainter painter (this ) ; painter.drawPixmap (0 ,0 ,pix); }
这里为了效果使用缓冲技术,先将线画在一张图片上,在将图片绘制在界面上。
手写字符处理鼠标模拟手写结束之后,需要将我们绘制的图片处理一下。我们绘制的图片显示是黑白的,但是实际上他是彩色的。而且训练的数据是1* 784的向量,我们的图片也要转换成这个尺寸,否则会报错。
处理流程为:
保存界面图片 图片缩放至28*28(这是字符集的尺寸) 灰度化图片 二值化图片 将28 * 28转换为1784(28 28) 将图片的数据转换为CV_32F 得到结果 源码为:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 QImage drawImg = ui->wgtDrawing->getImage (); QImage scaleImg = drawImg.scaled (28 ,28 ); cv::Mat img = toMat (scaleImg); cv::Mat gray = getGrayImg (img); cv::Mat bin = getBinImg (gray);cv::Mat temp (1 ,28 * 28 , CV_8UC1) ;for (int i=0 ;i<bin.rows;i++){ for (int j=0 ;j<bin.cols;j++){ uchar a=bin.at <uchar>(i,j); temp.at <uchar>(0 ,i*28 +j)=a; } } temp.convertTo (temp,CV_32F);
然后就可以是这个图片进行识别了
结果预测预测流程是:
代码为:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 std::cout << "svm预测开始" << std::endl; file.open ("svm.xml" );if (!file.is_open ()) { std::cout << "->SVM训练结果文件svm.xml不存在" << std::endl; } std::cout << "->开始加载svm模型" << std::endl; cv::Ptr<cv::ml::SVM> svm = cv::Algorithm::load <cv::ml::SVM>("svm.xml" ); std::cout << "->svm模型加载完毕" << std::endl; predicted = svm->predict (temp); result = static_cast <int >(predicted); std::cout << "svm预测结束" << std::endl;
源码使用源码有三个文件夹:
data是已经解压的mnist数据集,src是QT的手写字符识别软件,tools里面是SVM/KNN/OpenCV训练测试MNIST工具。
先到tools文件夹下,可以看到:
将data文件夹中的数据集复制到此文件夹下:
然后编译:
knntt
和svmtt
就是训练和测试工具(svmtraintest)。
训练SVM,执行:
输出为:
svm.xml就是训练结果文件,将其复制到手写字符软件编译可执行文件文件夹下。
同理可得knn结果文件:
编译运行手写字符软件:
使用鼠标绘制字符(相当于手写),点击Type下拉框选择SVM/KNN模型(暂时只有这两个)点击GO就会从软件文件夹加载之前训练的*.xml文件然后预测结果:
到此结束,源码注释都有,可自行优化。
源码 knntt1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 #include <iostream> #include <string> #include <fstream> #include <time.h> #include <opencv2/opencv.hpp> class KNNTT {public : KNNTT () {} float getRate () { return _rate; } int getPredictResult () { return _predictResult; } inline int reverseDigit (int num) { unsigned char c1, c2, c3, c4; c1 = num & 255 ; c2 = (num >> 8 ) & 255 ; c3 = (num >> 16 ) & 255 ; c4 = (num >> 24 ) & 255 ; return ((int )c1 << 24 ) + ((int )c2 << 16 ) + ((int )c3 << 8 ) + c4; } cv::Mat readImagesData (int mode) { switch (mode) { case 0 : f.open ("train-images.idx3-ubyte" , std::ios::binary); std::cout << "->读取训练用的图像数据." << std::endl; break ; case 1 : f.open ("t10k-images.idx3-ubyte" , std::ios::binary); std::cout << "->读取测试用的图像数据." << std::endl; break ; } if (!f.is_open ()) { std::cout << "->无法读取图像数据" << std::endl; exit (-1 ); } int magic_number = 0 ; int number_of_images = 0 ; int height = 0 ; int width = 0 ; f.read ((char *)&magic_number, sizeof (magic_number)); magic_number = reverseDigit (magic_number); f.read ((char *)&number_of_images, sizeof (number_of_images)); number_of_images = reverseDigit (number_of_images); std::cout << "->图像数量是:" << number_of_images << std::endl; f.read ((char *)&height, sizeof (height)); height = reverseDigit (height); f.read ((char *)&width, sizeof (width)); width = reverseDigit (width); cv::Mat train_images = cv::Mat (number_of_images, height * width, CV_8UC1); for (int i = 0 ; i < number_of_images; i++) { for (int r = 0 ; r < height; ++r) { for (int c = 0 ; c < width; ++c) { unsigned char temp = 0 ; f.read ((char *)&temp, sizeof (temp)); train_images.at <uchar>(i, r * width + c) = (int )temp; } } } train_images.convertTo (train_images, CV_32F); f.close (); std::cout << "->数据集图像数据读取完毕。" << std::endl; return train_images; } cv::Mat readLabelsData (int mode) { switch (mode) { case 0 : f.open ("train-labels.idx1-ubyte" ); std::cout << "->读取训练标签。" << std::endl; break ; case 1 : f.open ("t10k-labels.idx1-ubyte" ); std::cout << "->读取测试标签。" << std::endl; break ; } if (!f.is_open ()) { std::cout << "->无法读取标签数据" << std::endl; exit (-1 ); } int magic_number = 0 ; int number_of_labels = 0 ; f.read ((char *)&magic_number, sizeof (magic_number)); magic_number = reverseDigit (magic_number); f.read ((char *)&number_of_labels, sizeof (number_of_labels)); number_of_labels = reverseDigit (number_of_labels); std::cout << "->标签数量为:" << number_of_labels << std::endl; cv::Mat labels = cv::Mat (number_of_labels, 1 , CV_8UC1); for (long int i = 0 ; i < number_of_labels; i++) { unsigned char temp = 0 ; f.read ((char *)&temp, sizeof (temp)); labels.at <uchar>(i, 0 ) = temp; } labels.convertTo (labels, CV_32S); f.close (); std::cout << "->数据集标签数据读取完毕." << std::endl; return labels; } void train () { std::cout << "KNN方式训练数据开始" << std::endl; cv::Mat train_images = readImagesData (0 ); if (train_images.size == 0 ) return ; cv::Mat train_labels = readLabelsData (0 ); if (train_labels.size == 0 ) return ; std::cout << "->成功读取图像和标签" << std::endl; std::cout << "->KNN训练开始" << std::endl; time_start = (double )clock (); cv::Ptr<cv::ml::KNearest> knn = cv::ml::KNearest::create (); cv::Ptr<cv::ml::TrainData> tdata = cv::ml::TrainData::create (train_images, cv::ml::ROW_SAMPLE, train_labels); knn->train (tdata); knn->setDefaultK (5 ); knn->setIsClassifier (true ); knn->save ("knn.xml" ); time_end = (double )clock (); std::cout << "->KNN训练数据已成功保存" << std::endl; std::cout << "->KNN训练耗时:" << (time_end - time_start) / 1000.0 <<"ms" << std::endl; std::cout << "KNN训练结束。" << std::endl; } void test () { std::cout << "KNN测试开始" << std::endl; std::ifstream file ("knn.xml" ) ; if (!file.is_open ()) { std::cout << "->没有训练结果文件" << std::endl; return ; } std::cout << "->开始导入KNN训练结果文件" << std::endl; cv::Ptr<cv::ml::KNearest> knn = cv::Algorithm::load <cv::ml::KNearest>("knn.xml" ); std::cout << "->已成功导入KNN训练结果文件" << std::endl; std::cout << "->开始导入测试数据" << std::endl; cv::Mat tData = readImagesData (1 ); if (tData.size == 0 ) return ; cv::Mat tLabel = readLabelsData (1 ); if (tLabel.size == 0 ) return ; std::cout << "->已成功导入测试数据" << std::endl; float total = tData.rows; float correct = 0 ; std::cout << "->KNN测试开始" << std::endl; time_start = (double )clock (); cv::Rect rect; rect.x = 0 ; rect.height = 1 ; rect.width = (28 * 28 ); for (int i = 0 ; i < total; i++) { int actual = tLabel.at <int >(i); rect.y = i; cv::Mat oneImage = tData (rect); cv::Mat result; float predicted = knn->predict (oneImage, result); int digit = static_cast <int >(predicted); if (digit == actual) { correct++; } } time_end = (double )clock (); _rate = correct / total * 100.0 ; std::cout << "->识别准确率是:" << _rate << std::endl; std::cout << "->KNN训练耗时:" << (time_end - time_start) / 1000.0 << "ms" << std::endl; std::cout << "KNN测试结束" << std::endl; } int predict (cv::Mat &img) { std::cout << "knn预测开始" << std::endl; std::ifstream file ("knn.xml" ) ; if (!file.is_open ()) { std::cout << "->KNN训练结果文件knn.xml不存在" << std::endl; return -1 ; } std::cout << "->开始加载knn模型" << std::endl; cv::Ptr<cv::ml::KNearest> knn = cv::Algorithm::load <cv::ml::KNearest>("knn.xml" ); std::cout << "->knn模型加载完毕" << std::endl; cv::Mat result; float predicted = knn->predict (img, result); _predictResult = static_cast <int >(predicted); std::cout << "knn预测结束" << std::endl; return 0 ; }private : std::ifstream f; float _rate; int _predictResult; double time_start; double time_end; };int main () { KNNTT knntt; knntt.train (); knntt.test (); }
svmtt1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 #include <iostream> #include <string> #include <fstream> #include <time.h> #include <opencv2/opencv.hpp> class SVMTT {public : SVMTT () {} float getRate () { return _rate; } int getPredictResult () { return _predictResult; } inline int reverseDigit (int num) { unsigned char c1, c2, c3, c4; c1 = num & 255 ; c2 = (num >> 8 ) & 255 ; c3 = (num >> 16 ) & 255 ; c4 = (num >> 24 ) & 255 ; return ((int )c1 << 24 ) + ((int )c2 << 16 ) + ((int )c3 << 8 ) + c4; } cv::Mat readImagesData (int mode) { switch (mode) { case 0 : f.open ("train-images.idx3-ubyte" , std::ios::binary); std::cout << "->读取训练用的图像数据." << std::endl; break ; case 1 : f.open ("t10k-images.idx3-ubyte" , std::ios::binary); std::cout << "->读取测试用的图像数据." << std::endl; break ; } if (!f.is_open ()) { std::cout << "->无法读取图像数据" << std::endl; exit (-1 ); } int magic_number = 0 ; int number_of_images = 0 ; int height = 0 ; int width = 0 ; f.read ((char *)&magic_number, sizeof (magic_number)); magic_number = reverseDigit (magic_number); f.read ((char *)&number_of_images, sizeof (number_of_images)); number_of_images = reverseDigit (number_of_images); std::cout << "->图像数量是:" << number_of_images << std::endl; f.read ((char *)&height, sizeof (height)); height = reverseDigit (height); f.read ((char *)&width, sizeof (width)); width = reverseDigit (width); cv::Mat train_images = cv::Mat (number_of_images, height * width, CV_8UC1); for (int i = 0 ; i < number_of_images; i++) { for (int r = 0 ; r < height; ++r) { for (int c = 0 ; c < width; ++c) { unsigned char temp = 0 ; f.read ((char *)&temp, sizeof (temp)); train_images.at <uchar>(i, r * width + c) = (int )temp; } } } train_images.convertTo (train_images, CV_32F); f.close (); std::cout << "->数据集图像数据读取完毕。" << std::endl; return train_images; } cv::Mat readLabelsData (int mode) { switch (mode) { case 0 : f.open ("train-labels.idx1-ubyte" ); std::cout << "->读取训练标签。" << std::endl; break ; case 1 : f.open ("t10k-labels.idx1-ubyte" ); std::cout << "->读取测试标签。" << std::endl; break ; } if (!f.is_open ()) { std::cout << "->无法读取标签数据" << std::endl; exit (-1 ); } int magic_number = 0 ; int number_of_labels = 0 ; f.read ((char *)&magic_number, sizeof (magic_number)); magic_number = reverseDigit (magic_number); f.read ((char *)&number_of_labels, sizeof (number_of_labels)); number_of_labels = reverseDigit (number_of_labels); std::cout << "->标签数量为:" << number_of_labels << std::endl; cv::Mat labels = cv::Mat (number_of_labels, 1 , CV_8UC1); for (long int i = 0 ; i < number_of_labels; i++) { unsigned char temp = 0 ; f.read ((char *)&temp, sizeof (temp)); labels.at <uchar>(i, 0 ) = temp; } labels.convertTo (labels, CV_32S); f.close (); std::cout << "->数据集标签数据读取完毕." << std::endl; return labels; } void train () { std::cout << "svm方式训练数据开始" << std::endl; cv::Mat train_images = readImagesData (0 ); if (train_images.size == 0 ) return ; cv::Mat train_labels = readLabelsData (0 ); if (train_labels.size == 0 ) return ; std::cout << "->成功读取图像和标签" << std::endl; std::cout << "->svm训练开始" << std::endl; time_start = (double )clock (); cv::Ptr<cv::ml::SVM> svm = cv::ml::SVM::create (); svm->setType (cv::ml::SVM::C_SVC); svm->setKernel (cv::ml::SVM::LINEAR); svm->setDegree (5 ); svm->setGamma (0.01 ); cv::Ptr<cv::ml::TrainData> tdata = cv::ml::TrainData::create (train_images, cv::ml::ROW_SAMPLE, train_labels); svm->train (tdata); svm->save ("svm.xml" ); time_end = (double )clock (); std::cout << "->svm训练数据已成功保存" << std::endl; std::cout << "->svm训练耗时:" << (time_end - time_start) / 1000.0 <<"ms" << std::endl; std::cout << "svm训练结束。" << std::endl; } void test () { std::cout << "svm测试开始" << std::endl; std::ifstream file ("svm.xml" ) ; if (!file.is_open ()) { std::cout << "->没有训练结果文件" << std::endl; return ; } std::cout << "->开始导入svm训练结果文件" << std::endl; cv::Ptr<cv::ml::SVM> svm = cv::Algorithm::load <cv::ml::SVM>("svm.xml" ); std::cout << "->已成功导入svm训练结果文件" << std::endl; std::cout << "->开始导入测试数据" << std::endl; cv::Mat tData = readImagesData (1 ); if (tData.size == 0 ) return ; cv::Mat tLabel = readLabelsData (1 ); if (tLabel.size == 0 ) return ; std::cout << "->已成功导入测试数据" << std::endl; float total = tData.rows; float correct = 0 ; std::cout << "->svm测试开始" << std::endl; time_start = (double )clock (); float count =0 ; for (int i=0 ;i<tData.rows;i++){ cv::Mat sample = tData.row (i); float res = svm->predict (sample); res = std::abs (res-tLabel.at <unsigned int >(i,0 ))<=FLT_EPSILON?1.0f :0.0f ; count += res; } time_end = (double )clock (); _rate = (count + 0.0 )/10000 *100.0 ; std::cout << "->识别准确率是:" << _rate << std::endl; std::cout << "->svm测试耗时:" << (time_end - time_start) / 1000.0 << "ms" << std::endl; std::cout << "svm测试结束" << std::endl; } int predict (cv::Mat &img) { std::cout << "svm预测开始" << std::endl; std::ifstream file ("svm.xml" ) ; if (!file.is_open ()) { std::cout << "->SVM训练结果文件svm.xml不存在" << std::endl; return -1 ; } std::cout << "->开始加载svm模型" << std::endl; cv::Ptr<cv::ml::SVM> svm = cv::Algorithm::load <cv::ml::SVM>("svm.xml" ); std::cout << "->svm模型加载完毕" << std::endl; float predicted = svm->predict (img); _predictResult = static_cast <int >(predicted); std::cout << "svm预测结束" << std::endl; return 0 ; }private : std::ifstream f; float _rate; int _predictResult; double time_start; double time_end; };int main () { SVMTT svmtt; svmtt.train (); svmtt.test (); }