索引地址:系列索引
手写字符识别:
使用Qt/C++/Linux实现手写字符(主要是界面) 使用MNIST手写字符集作为训练源 使用OpenCV/SVM/KNN训练MNIST数据集 MNIST字符集读取与训练MNIST介绍:SVM+MNIST
将代码简单修改就是本文使用的训练测试源码,在这里就不赘述了,具体可看源码。
手写字符界面所谓手写其实是模拟手写,毕竟一般开发的笔记本和PC没有手写功能,就是用鼠标画图。
先定义两个点lastpoint,endpoint。
鼠标拖动的时候更新两个点数据
1 2 3 4 5 6 7 void Drawing::mousePressEvent (QMouseEvent *event) { if (event->button () == Qt::LeftButton){ lastPoint = event->pos (); } endPoint = lastPoint; }
而Qt会自动调用paintEvent绘图,我们使用lastpoint,endpoint划线,同时两个点数据一直在更新,这样就模拟出手写的效果。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 void Drawing::paintEvent (QPaintEvent *event) { Q_UNUSED (event) QPainter pp (&pix) ; pp.setPen (pen); pp.setFont (font); pp.drawLine (lastPoint,endPoint); pp.setRenderHint (QPainter::HighQualityAntialiasing,true ); lastPoint = endPoint; QPainter painter (this ) ; painter.drawPixmap (0 ,0 ,pix); }
这里为了效果使用缓冲技术,先将线画在一张图片上,在将图片绘制在界面上。
手写字符处理鼠标模拟手写结束之后,需要将我们绘制的图片处理一下。我们绘制的图片显示是黑白的,但是实际上他是彩色的。而且训练的数据是1* 784的向量,我们的图片也要转换成这个尺寸,否则会报错。
处理流程为:
保存界面图片 图片缩放至28*28(这是字符集的尺寸) 灰度化图片 二值化图片 将28 * 28转换为1784(28 28) 将图片的数据转换为CV_32F 得到结果 源码为:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 QImage drawImg = ui->wgtDrawing->getImage (); QImage scaleImg = drawImg.scaled (28 ,28 ); cv::Mat img = toMat (scaleImg); cv::Mat gray = getGrayImg (img); cv::Mat bin = getBinImg (gray);cv::Mat temp (1 ,28 * 28 , CV_8UC1) ;for (int i=0 ;i<bin.rows;i++){ for (int j=0 ;j<bin.cols;j++){ uchar a=bin.at <uchar>(i,j); temp.at <uchar>(0 ,i*28 +j)=a; } } temp.convertTo (temp,CV_32F);
然后就可以是这个图片进行识别了
结果预测预测流程是:
代码为:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 std::cout << "svm预测开始" << std::endl; file.open ("svm.xml" );if (!file.is_open ()) { std::cout << "->SVM训练结果文件svm.xml不存在" << std::endl; } std::cout << "->开始加载svm模型" << std::endl; cv::Ptr<cv::ml::SVM> svm = cv::Algorithm::load <cv::ml::SVM>("svm.xml" ); std::cout << "->svm模型加载完毕" << std::endl; predicted = svm->predict (temp); result = static_cast <int >(predicted); std::cout << "svm预测结束" << std::endl;
源码使用源码有三个文件夹:
data是已经解压的mnist数据集,src是QT的手写字符识别软件,tools里面是SVM/KNN/OpenCV训练测试MNIST工具。
先到tools文件夹下,可以看到:
将data文件夹中的数据集复制到此文件夹下:
然后编译:
knntt
和svmtt
就是训练和测试工具(svmtraintest)。
训练SVM,执行:
输出为:
svm.xml就是训练结果文件,将其复制到手写字符软件编译可执行文件文件夹下。
同理可得knn结果文件:
编译运行手写字符软件:
使用鼠标绘制字符(相当于手写),点击Type下拉框选择SVM/KNN模型(暂时只有这两个)点击GO就会从软件文件夹加载之前训练的*.xml文件然后预测结果:
到此结束,源码注释都有,可自行优化。
源码 knnttinclude <iostream> #include <string> #include <fstream> #include <time.h> #include <opencv2/opencv.hpp> class KNNTT {public : KNNTT () {} float getRate () { return _rate; } int getPredictResult () { return _predictResult; } inline int reverseDigit (int num) { unsigned char c1, c2, c3, c4; c1 = num & 255 ; c2 = (num >> 8 ) & 255 ; c3 = (num >> 16 ) & 255 ; c4 = (num >> 24 ) & 255 ; return ((int )c1 << 24 ) + ((int )c2 << 16 ) + ((int )c3 << 8 ) + c4; } cv::Mat readImagesData (int mode) { switch (mode) { case 0 : f.open ("train-images.idx3-ubyte" , std::ios::binary); std::cout << "->读取训练用的图像数据." << std::endl; break ; case 1 : f.open ("t10k-images.idx3-ubyte" , std::ios::binary); std::cout << "->读取测试用的图像数据." << std::endl; break ; } if (!f.is_open ()) { std::cout << "->无法读取图像数据" << std::endl; exit (-1 ); } int magic_number = 0 ; int number_of_images = 0 ; int height = 0 ; int width = 0 ; f.read ((char *)&magic_number, sizeof (magic_number)); magic_number = reverseDigit (magic_number); f.read ((char *)&number_of_images, sizeof (number_of_images)); number_of_images = reverseDigit (number_of_images); std::cout << "->图像数量是:" << number_of_images << std::endl; f.read ((char *)&height, sizeof (height)); height = reverseDigit (height); f.read ((char *)&width, sizeof (width)); width = reverseDigit (width); cv::Mat train_images = cv::Mat (number_of_images, height * width, CV_8UC1); for (int i = 0 ; i < number_of_images; i++) { for (int r = 0 ; r < height; ++r) { for (int c = 0 ; c < width; ++c) { unsigned char temp = 0 ; f.read ((char *)&temp, sizeof (temp)); train_images.at <uchar>(i, r * width + c) = (int )temp; } } } train_images.convertTo (train_images, CV_32F); f.close (); std::cout << "->数据集图像数据读取完毕。" << std::endl; return train_images; } cv::Mat readLabelsData (int mode) { switch (mode) { case 0 : f.open ("train-labels.idx1-ubyte" ); std::cout << "->读取训练标签。" << std::endl; break ; case 1 : f.open ("t10k-labels.idx1-ubyte" ); std::cout << "->读取测试标签。" << std::endl; break ; } if (!f.is_open ()) { std::cout << "->无法读取标签数据" << std::endl; exit (-1 ); } int magic_number = 0 ; int number_of_labels = 0 ; f.read ((char *)&magic_number, sizeof (magic_number)); magic_number = reverseDigit (magic_number); f.read ((char *)&number_of_labels, sizeof (number_of_labels)); number_of_labels = reverseDigit (number_of_labels); std::cout << "->标签数量为:" << number_of_labels << std::endl; cv::Mat labels = cv::Mat (number_of_labels, 1 , CV_8UC1); for (long int i = 0 ; i < number_of_labels; i++) { unsigned char temp = 0 ; f.read ((char *)&temp, sizeof (temp)); labels.at <uchar>(i, 0 ) = temp; } labels.convertTo (labels, CV_32S); f.close (); std::cout << "->数据集标签数据读取完毕." << std::endl; return labels; } void train () { std::cout << "KNN方式训练数据开始" << std::endl; cv::Mat train_images = readImagesData (0 ); if (train_images.size == 0 ) return ; cv::Mat train_labels = readLabelsData (0 ); if (train_labels.size == 0 ) return ; std::cout << "->成功读取图像和标签" << std::endl; std::cout << "->KNN训练开始" << std::endl; time_start = (double )clock (); cv::Ptr<cv::ml::KNearest> knn = cv::ml::KNearest::create (); cv::Ptr<cv::ml::TrainData> tdata = cv::ml::TrainData::create (train_images, cv::ml::ROW_SAMPLE, train_labels); knn->train (tdata); knn->setDefaultK (5 ); knn->setIsClassifier (true ); knn->save ("knn.xml" ); time_end = (double )clock (); std::cout << "->KNN训练数据已成功保存" << std::endl; std::cout << "->KNN训练耗时:" << (time_end - time_start) / 1000.0 <<"ms" << std::endl; std::cout << "KNN训练结束。" << std::endl; } void test () { std::cout << "KNN测试开始" << std::endl; std::ifstream file ("knn.xml" ) ; if (!file.is_open ()) { std::cout << "->没有训练结果文件" << std::endl; return ; } std::cout << "->开始导入KNN训练结果文件" << std::endl; cv::Ptr<cv::ml::KNearest> knn = cv::Algorithm::load <cv::ml::KNearest>("knn.xml" ); std::cout << "->已成功导入KNN训练结果文件" << std::endl; std::cout << "->开始导入测试数据" << std::endl; cv::Mat tData = readImagesData (1 ); if (tData.size == 0 ) return ; cv::Mat tLabel = readLabelsData (1 ); if (tLabel.size == 0 ) return ; std::cout << "->已成功导入测试数据" << std::endl; float total = tData.rows; float correct = 0 ; std::cout << "->KNN测试开始" << std::endl; time_start = (double )clock (); cv::Rect rect; rect.x = 0 ; rect.height = 1 ; rect.width = (28 * 28 ); for (int i = 0 ; i < total; i++) { int actual = tLabel.at <int >(i); rect.y = i; cv::Mat oneImage = tData (rect); cv::Mat result; float predicted = knn->predict (oneImage, result); int digit = static_cast <int >(predicted); if (digit == actual) { correct++; } } time_end = (double )clock (); _rate = correct / total * 100.0 ; std::cout << "->识别准确率是:" << _rate << std::endl; std::cout << "->KNN训练耗时:" << (time_end - time_start) / 1000.0 << "ms" << std::endl; std::cout << "KNN测试结束" << std::endl; } int predict (cv::Mat &img) { std::cout << "knn预测开始" << std::endl; std::ifstream file ("knn.xml" ) ; if (!file.is_open ()) { std::cout << "->KNN训练结果文件knn.xml不存在" << std::endl; return -1 ; } std::cout << "->开始加载knn模型" << std::endl; cv::Ptr<cv::ml::KNearest> knn = cv::Algorithm::load <cv::ml::KNearest>("knn.xml" ); std::cout << "->knn模型加载完毕" << std::endl; cv::Mat result; float predicted = knn->predict (img, result); _predictResult = static_cast <int >(predicted); std::cout << "knn预测结束" << std::endl; return 0 ; }private : std::ifstream f; float _rate; int _predictResult; double time_start; double time_end; };int main () { KNNTT knntt; knntt.train (); knntt.test (); }
svmttinclude <iostream> #include <string> #include <fstream> #include <time.h> #include <opencv2/opencv.hpp> class SVMTT {public : SVMTT () {} float getRate () { return _rate; } int getPredictResult () { return _predictResult; } inline int reverseDigit (int num) { unsigned char c1, c2, c3, c4; c1 = num & 255 ; c2 = (num >> 8 ) & 255 ; c3 = (num >> 16 ) & 255 ; c4 = (num >> 24 ) & 255 ; return ((int )c1 << 24 ) + ((int )c2 << 16 ) + ((int )c3 << 8 ) + c4; } cv::Mat readImagesData (int mode) { switch (mode) { case 0 : f.open ("train-images.idx3-ubyte" , std::ios::binary); std::cout << "->读取训练用的图像数据." << std::endl; break ; case 1 : f.open ("t10k-images.idx3-ubyte" , std::ios::binary); std::cout << "->读取测试用的图像数据." << std::endl; break ; } if (!f.is_open ()) { std::cout << "->无法读取图像数据" << std::endl; exit (-1 ); } int magic_number = 0 ; int number_of_images = 0 ; int height = 0 ; int width = 0 ; f.read ((char *)&magic_number, sizeof (magic_number)); magic_number = reverseDigit (magic_number); f.read ((char *)&number_of_images, sizeof (number_of_images)); number_of_images = reverseDigit (number_of_images); std::cout << "->图像数量是:" << number_of_images << std::endl; f.read ((char *)&height, sizeof (height)); height = reverseDigit (height); f.read ((char *)&width, sizeof (width)); width = reverseDigit (width); cv::Mat train_images = cv::Mat (number_of_images, height * width, CV_8UC1); for (int i = 0 ; i < number_of_images; i++) { for (int r = 0 ; r < height; ++r) { for (int c = 0 ; c < width; ++c) { unsigned char temp = 0 ; f.read ((char *)&temp, sizeof (temp)); train_images.at <uchar>(i, r * width + c) = (int )temp; } } } train_images.convertTo (train_images, CV_32F); f.close (); std::cout << "->数据集图像数据读取完毕。" << std::endl; return train_images; } cv::Mat readLabelsData (int mode) { switch (mode) { case 0 : f.open ("train-labels.idx1-ubyte" ); std::cout << "->读取训练标签。" << std::endl; break ; case 1 : f.open ("t10k-labels.idx1-ubyte" ); std::cout << "->读取测试标签。" << std::endl; break ; } if (!f.is_open ()) { std::cout << "->无法读取标签数据" << std::endl; exit (-1 ); } int magic_number = 0 ; int number_of_labels = 0 ; f.read ((char *)&magic_number, sizeof (magic_number)); magic_number = reverseDigit (magic_number); f.read ((char *)&number_of_labels, sizeof (number_of_labels)); number_of_labels = reverseDigit (number_of_labels); std::cout << "->标签数量为:" << number_of_labels << std::endl; cv::Mat labels = cv::Mat (number_of_labels, 1 , CV_8UC1); for (long int i = 0 ; i < number_of_labels; i++) { unsigned char temp = 0 ; f.read ((char *)&temp, sizeof (temp)); labels.at <uchar>(i, 0 ) = temp; } labels.convertTo (labels, CV_32S); f.close (); std::cout << "->数据集标签数据读取完毕." << std::endl; return labels; } void train () { std::cout << "svm方式训练数据开始" << std::endl; cv::Mat train_images = readImagesData (0 ); if (train_images.size == 0 ) return ; cv::Mat train_labels = readLabelsData (0 ); if (train_labels.size == 0 ) return ; std::cout << "->成功读取图像和标签" << std::endl; std::cout << "->svm训练开始" << std::endl; time_start = (double )clock (); cv::Ptr<cv::ml::SVM> svm = cv::ml::SVM::create (); svm->setType (cv::ml::SVM::C_SVC); svm->setKernel (cv::ml::SVM::LINEAR); svm->setDegree (5 ); svm->setGamma (0.01 ); cv::Ptr<cv::ml::TrainData> tdata = cv::ml::TrainData::create (train_images, cv::ml::ROW_SAMPLE, train_labels); svm->train (tdata); svm->save ("svm.xml" ); time_end = (double )clock (); std::cout << "->svm训练数据已成功保存" << std::endl; std::cout << "->svm训练耗时:" << (time_end - time_start) / 1000.0 <<"ms" << std::endl; std::cout << "svm训练结束。" << std::endl; } void test () { std::cout << "svm测试开始" << std::endl; std::ifstream file ("svm.xml" ) ; if (!file.is_open ()) { std::cout << "->没有训练结果文件" << std::endl; return ; } std::cout << "->开始导入svm训练结果文件" << std::endl; cv::Ptr<cv::ml::SVM> svm = cv::Algorithm::load <cv::ml::SVM>("svm.xml" ); std::cout << "->已成功导入svm训练结果文件" << std::endl; std::cout << "->开始导入测试数据" << std::endl; cv::Mat tData = readImagesData (1 ); if (tData.size == 0 ) return ; cv::Mat tLabel = readLabelsData (1 ); if (tLabel.size == 0 ) return ; std::cout << "->已成功导入测试数据" << std::endl; float total = tData.rows; float correct = 0 ; std::cout << "->svm测试开始" << std::endl; time_start = (double )clock (); float count =0 ; for (int i=0 ;i<tData.rows;i++){ cv::Mat sample = tData.row (i); float res = svm->predict (sample); res = std::abs (res-tLabel.at <unsigned int >(i,0 ))<=FLT_EPSILON?1.0f :0.0f ; count += res; } time_end = (double )clock (); _rate = (count + 0.0 )/10000 *100.0 ; std::cout << "->识别准确率是:" << _rate << std::endl; std::cout << "->svm测试耗时:" << (time_end - time_start) / 1000.0 << "ms" << std::endl; std::cout << "svm测试结束" << std::endl; } int predict (cv::Mat &img) { std::cout << "svm预测开始" << std::endl; std::ifstream file ("svm.xml" ) ; if (!file.is_open ()) { std::cout << "->SVM训练结果文件svm.xml不存在" << std::endl; return -1 ; } std::cout << "->开始加载svm模型" << std::endl; cv::Ptr<cv::ml::SVM> svm = cv::Algorithm::load <cv::ml::SVM>("svm.xml" ); std::cout << "->svm模型加载完毕" << std::endl; float predicted = svm->predict (img); _predictResult = static_cast <int >(predicted); std::cout << "svm预测结束" << std::endl; return 0 ; }private : std::ifstream f; float _rate; int _predictResult; double time_start; double time_end; };int main () { SVMTT svmtt; svmtt.train (); svmtt.test (); }