src/digit_classifier.cpp



#include "digit_classifier.h"
#include "digit_classifier_data.h"

using namespace std;
typedef unsigned char uchar;

uchar getPixel_uint8(sf::Image image, int x, int y){ // This is incredibly slow.
    sf::Color color = image.getPixel(x, y);
    int v= ((int)color.r + (int)color.g + (int)color.b)/3;
    return (uchar) v;
}
uchar getPixel_uint8(const uchar* pByteBuffer, int x, int y, int cols){
    int i=(y*cols+x)*4; // 4 channels
    uchar red   = pByteBuffer[i++];
    uchar green = pByteBuffer[i++];
    uchar blue  = pByteBuffer[i++];
    // uchar alpha = pByteBuffer[i++];
    int v= ((int)red + (int)green + (int)blue)/3;
    return (uchar) v;
}

int classify_digit(vector<vector<uchar>> image_for_classify){

    // check input image
    if (image_for_classify.empty()){ // use the test image generated by Python.
        for (auto &row : _test_image){
            image_for_classify.push_back(
                vector<uchar>(row, row+image_length));
        }
    }

    // params of size
    const int NUM_PATCH_OF_SIDE=(image_length-patch_length)/2+1;//(28-6)/2+1=12
    const int PATCH_MAX_IDX=image_length-patch_length+1;
    const int NUM_SUB=4, num_patch_mid=(NUM_PATCH_OF_SIDE+1)/2;//6

    // conv image, extract all patches,
    // measure the distances value between a patch and 100 centroids
    double patches_dis_to_all_centers[NUM_PATCH_OF_SIDE][NUM_PATCH_OF_SIDE][num_centers]={0};
    double pooled_dis[4][num_centers]={0};

    for (int i=0; i<PATCH_MAX_IDX; i+=stride_size){
        for (int j=0; j<PATCH_MAX_IDX; j+=stride_size){
            int patch_row=i/stride_size, patch_col=j/stride_size;

            // which subregion this patch belongs to
            int idx_sub; 
            if(patch_row<num_patch_mid && patch_col<num_patch_mid){
                idx_sub=0;
            }else if(patch_row<num_patch_mid && patch_col>=num_patch_mid){
                idx_sub=1;
            }else if(patch_row>=num_patch_mid && patch_col<num_patch_mid){
                idx_sub=2;
            }else if(patch_row>=num_patch_mid && patch_col>=num_patch_mid){
                idx_sub=3;
            }

            // compute this patch's distance to all centers
            double dis_sum=0;
            for (int kth_center=0; kth_center<num_centers; kth_center++){
                double dis_element_square_sum=0, dis;
                int nth_element=0;
                for (int pi=0; pi<patch_length; pi++){
                    for (int pj=0; pj<patch_length; pj++){
                        dis=patch_centers[nth_element++][kth_center]-(int)image_for_classify[i+pi][j+pj];
                        dis_element_square_sum+=dis*dis;
                    }
                }
                double tmp=sqrt(dis_element_square_sum);
                patches_dis_to_all_centers[patch_row][patch_col][kth_center]=tmp;
                dis_sum+=tmp;
            }
            double dis_mean=dis_sum/num_centers;

            // convert distance value to distance feature and add it to one of the 4 subregions
            for (int kth_center=0; kth_center<num_centers; kth_center++){
                double *p=&patches_dis_to_all_centers[patch_row][patch_col][kth_center];
                *p=max(0.0, dis_mean-*p);
                pooled_dis[idx_sub][kth_center]+=*p;
            }            
            
        }
    }

    // connect 4-subregions features into 1 big feature
    const int NUM_FEATURES=NUM_SUB*num_centers;
    double features[NUM_FEATURES]={0}, *p_features=features;
    for ( const auto &row : pooled_dis ){
        for ( double val : row )*p_features++ = val;
    }

    // standardize features by ZCA
    //      features_new = W.T*(features-x_means)/x_stds
    //      W.T.shape = num_reduced_features*NUM_FEATURES=100*400
    for (int j=0; j<NUM_FEATURES; j++){ 
        features[j]-=x_means[j]; // remove mean
    }
    double features_new[num_reduced_features+1]={0};
    features_new[0]=1; // 1*bias
    for (int i=0;i<num_reduced_features;i++){
        double sum_matmul=0;
        for (int j=0; j<NUM_FEATURES; j++){
            sum_matmul+=WT[i][j]*features[j];
        }
        features_new[i+1]=sum_matmul/x_stds[i];
    }

    // Fully connect layer. size: features_new*final_weight
    double labels_scores[num_labels]={0}, max_score=-1e10;
    int max_score_idx=0;
    for (int i=0; i<num_labels; i++){
        double sum_matmul=0;
        for (int j=0; j<num_reduced_features+1; j++){
            sum_matmul+=features_new[j]*final_weight[j][i];
        }
        double ith_score=sum_matmul;
        labels_scores[i]=ith_score;
        if (ith_score>max_score){
            max_score=ith_score;
            max_score_idx=i;
        }
    }

    // return
    int res_digit_value=max_score_idx;
    return res_digit_value;
}


myImage resize_sfImage_to_28x28(sf::Image image){

    // -------------- get image size -------------- 
    sf::Vector2u image_ori_size = image.getSize();
    int rows=image_ori_size.y, cols=image_ori_size.x, size_square=rows;
    // cout << rows <<","<< cols<<endl;

    // --------------  Find the smallest bounding box around the digit -------------- 
    // This bounding box might exceeds the range of original image

    const int LENGTH_BOARDER=10;
    int x_left=INT_MAX, x_right=INT_MIN;
    int y_up=INT_MAX, y_down=INT_MIN;
    bool has_black_pixel=false;

    const uchar* pByteBuffer = image.getPixelsPtr();
    for (int i=LENGTH_BOARDER; i< size_square-LENGTH_BOARDER; i++){
        for (int j=LENGTH_BOARDER;j<size_square-LENGTH_BOARDER;j++){
            // uchar v = getPixel_uint8(image, j, i); //  This takes 5 seconds. Incredibly slow.
            uchar v = getPixel_uint8(pByteBuffer, j, i, cols);
            if(v<127){ // black, the color of digit
                has_black_pixel=true;
                x_left=min(x_left, j);
                x_right=max(x_right, j);
                y_up=min(y_up, i);
                y_down=max(y_down, i);
            }
        }
    }
    if(!has_black_pixel){
        x_left=LENGTH_BOARDER;
        x_right=size_square-LENGTH_BOARDER;
        y_up=LENGTH_BOARDER;
        y_down=size_square-LENGTH_BOARDER;
    }

    int x_center=(x_left+x_right)/2;
    int y_center=(y_up+y_down)/2;
    int radius=max(x_right-x_left, y_down-y_up)/2;
    x_left=x_center-radius;
    x_right=x_center+radius;
    y_up=y_center-radius;
    y_down=y_center+radius;

    // cout << "x_left=" << x_left << endl;
    // cout << "x_right=" << x_right << endl;
    // cout << "y_up=" << y_up << endl;
    // cout << "y_down=" << y_down << endl;

    // -------------- resize the image -------------- 
    const int BOARD_LENGTH=2; // keep some blank at borders
    
    myImage image_for_classify(image_length,
        vector<uchar>(image_length, 255)
    ); // create the new small image

    const int IAMGE_LENGTH_REDUCED=image_length-2*BOARD_LENGTH;
    for (int i=0; i<IAMGE_LENGTH_REDUCED; i++){
        for (int j=0; j<IAMGE_LENGTH_REDUCED; j++){
            // mapping: (j, i) = (0, 0) -> (x_left, y_up)
            //          (j, i) = (image_length-1, image_length-1) -> (x_right, y_down)
            int new_x=x_left+(x_right-x_left)*j/(IAMGE_LENGTH_REDUCED-1);
            int new_y=y_up+(y_down-y_up)*i/(IAMGE_LENGTH_REDUCED-1);
            uchar pixel_val;
            if( new_x<LENGTH_BOARDER || // check if out of border
                new_x>=size_square-LENGTH_BOARDER ||
                new_y<LENGTH_BOARDER ||
                new_y>=size_square-LENGTH_BOARDER
                )
                pixel_val=255; // white
            else{
                pixel_val = getPixel_uint8(pByteBuffer, new_x, new_y, cols);
            }
            image_for_classify[i+BOARD_LENGTH][j+BOARD_LENGTH]=pixel_val;
        }
    }
    return image_for_classify;
}