|
| 1 | +function tracker = create_csr_tracker(img, init_bbox, init_params) |
| 2 | + |
| 3 | + if nargin < 3 |
| 4 | + init_params = read_default_csr_parameters(); |
| 5 | + end |
| 6 | + |
| 7 | + % transform polygon to axis-aligned bbox |
| 8 | + if numel(init_bbox) > 4, |
| 9 | + bb8 = round(init_bbox(:)); |
| 10 | + x1 = round(min(bb8(1:2:end))); |
| 11 | + x2 = round(max(bb8(1:2:end))); |
| 12 | + y1 = round(min(bb8(2:2:end))); |
| 13 | + y2 = round(max(bb8(2:2:end))); |
| 14 | + bb = round([x1, y1, x2 - x1, y2 - y1]); |
| 15 | + |
| 16 | + init_mask = poly2mask(bb8(1:2:end)-bb(1), bb8(2:2:end)-bb(2), bb(4), bb(3)); |
| 17 | + else |
| 18 | + bb = round(init_bbox); |
| 19 | + init_mask = ones(bb(4), bb(3)); |
| 20 | + end |
| 21 | + |
| 22 | + |
| 23 | + % filter parameters |
| 24 | + padding = init_params.padding; % padding |
| 25 | + learning_rate = init_params.learning_rate; % learning rate for updating filter |
| 26 | + feature_type = init_params.feature_type; |
| 27 | + |
| 28 | + % load and store pre-computed lookup table for colornames |
| 29 | + w2c = []; |
| 30 | + if sum(strcmp(feature_type, 'cn')) |
| 31 | + w2c = load('w2crs.mat'); |
| 32 | + w2c = w2c.w2crs; |
| 33 | + end |
| 34 | + |
| 35 | + % segmentation parameters |
| 36 | + hist_lr = init_params.hist_lr; |
| 37 | + nbins = init_params.nbins; % N bins for segmentation |
| 38 | + seg_colorspace = init_params.seg_colorspace; % 'rgb' or 'hsv' |
| 39 | + use_segmentation = init_params.use_segmentation; % false to disable use of segmentation |
| 40 | + mask_diletation_type = init_params.mask_diletation_type; % for function strel (square, disk, ...) |
| 41 | + mask_diletation_sz = init_params.mask_diletation_sz; |
| 42 | + |
| 43 | + % check if grayscale image (only 1 channel) or |
| 44 | + % check if grayscale image (3 the same channels) |
| 45 | + img0 = bsxfun(@minus, double(img), mean(img,3)); |
| 46 | + if size(img,3) < 3 || sum(abs(img0(:))) < 10 |
| 47 | + use_segmentation = false; |
| 48 | + % also do not use colornames |
| 49 | + [isused_cn, cn_idx] = ismember('cn', feature_type); |
| 50 | + if isused_cn |
| 51 | + feature_type(cn_idx) = []; |
| 52 | + end |
| 53 | + end |
| 54 | + |
| 55 | + % features parameters |
| 56 | + cell_size = 1.0; |
| 57 | + if sum(strcmp(feature_type, 'hog')) |
| 58 | + cell_size = min(4, max(1, ceil((bb(3)*bb(4))/400))); |
| 59 | + end |
| 60 | + |
| 61 | + % size parameters |
| 62 | + % reference target size: [width, height] |
| 63 | + base_target_sz = [bb(3), bb(4)]; |
| 64 | + % reference template size: [w, h], does not change during tracking |
| 65 | + template_size = floor(base_target_sz + padding*sqrt(prod(base_target_sz))); |
| 66 | + template_size = mean(template_size); |
| 67 | + template_size = [template_size, template_size]; |
| 68 | + |
| 69 | + % rescale template after extracting to have fixed area |
| 70 | + rescale_ratio = sqrt((200^2) / (template_size(1) * template_size(2))); |
| 71 | + if rescale_ratio > 1 % if already smaller - do not rescale |
| 72 | + rescale_ratio = 1; |
| 73 | + end |
| 74 | + |
| 75 | + rescale_template_size = floor(rescale_ratio * template_size); |
| 76 | + |
| 77 | + % position of the target center |
| 78 | + c = bb([1,2]) + base_target_sz/2; |
| 79 | + |
| 80 | + % create gaussian shaped labels |
| 81 | + sigma = init_params.y_sigma; |
| 82 | + Y = fft2(gaussian_shaped_labels(1,sigma, floor(rescale_template_size([2,1]) / cell_size))); |
| 83 | + |
| 84 | + %store pre-computed cosine window |
| 85 | + cos_win = hann(size(Y,1)) * hann(size(Y,2))'; |
| 86 | + |
| 87 | + % scale adaptation parameters (from DSST) |
| 88 | + currentScaleFactor = init_params.currentScaleFactor; |
| 89 | + n_scales = init_params.n_scales; |
| 90 | + scale_model_factor = init_params.scale_model_factor; |
| 91 | + scale_sigma_factor = init_params.scale_sigma_factor; |
| 92 | + scale_step = init_params.scale_step; |
| 93 | + scale_model_max_area = init_params.scale_model_max_area; |
| 94 | + scale_sigma = sqrt(n_scales) * scale_sigma_factor; |
| 95 | + scale_lr = init_params.scale_lr; % learning rate parameter |
| 96 | + |
| 97 | + %label function for the scales |
| 98 | + ss = (1:n_scales) - ceil(n_scales/2); |
| 99 | + ys = exp(-0.5 * (ss.^2) / scale_sigma^2); |
| 100 | + ysf = single(fft(ys)); |
| 101 | + |
| 102 | + if mod(n_scales,2) == 0 |
| 103 | + scale_window = single(hann(n_scales+1)); |
| 104 | + scale_window = scale_window(2:end); |
| 105 | + else |
| 106 | + scale_window = single(hann(n_scales)); |
| 107 | + end |
| 108 | + |
| 109 | + ss = 1:n_scales; |
| 110 | + scaleFactors = scale_step.^(ceil(n_scales/2) - ss); |
| 111 | + |
| 112 | + template_size_ = template_size; |
| 113 | + if scale_model_factor^2 * prod(template_size_) > scale_model_max_area |
| 114 | + scale_model_factor = sqrt(scale_model_max_area/prod(template_size_)); |
| 115 | + end |
| 116 | + |
| 117 | + scale_model_sz = floor(template_size_ * scale_model_factor); |
| 118 | + scaleSizeFactors = scaleFactors; |
| 119 | + min_scale_factor = scale_step ^ ceil(log(max(5 ./ template_size_)) / log(scale_step)); |
| 120 | + max_scale_factor = scale_step ^ floor(log(min([size(img,1) size(img,2)] ./ base_target_sz)) / log(scale_step)); |
| 121 | + |
| 122 | + % create dummy mask (approximation for segmentation) |
| 123 | + % size of the object in feature space |
| 124 | + obj_size = floor(rescale_ratio * (base_target_sz/cell_size)); |
| 125 | + x0 = floor((size(Y,2)-obj_size(1))/2); |
| 126 | + y0 = floor((size(Y,1)-obj_size(2))/2); |
| 127 | + x1 = x0 + obj_size(1); |
| 128 | + y1 = y0 + obj_size(2); |
| 129 | + target_dummy_mask = zeros(size(Y)); |
| 130 | + target_dummy_mask(y0:y1, x0:x1) = 1; |
| 131 | + target_dummy_mask = single(target_dummy_mask); |
| 132 | + |
| 133 | + target_dummy_area = sum(target_dummy_mask(:)); |
| 134 | + |
| 135 | + if use_segmentation |
| 136 | + % convert image in desired colorspace |
| 137 | + if strcmp(seg_colorspace, 'rgb') |
| 138 | + seg_img = img; |
| 139 | + elseif strcmp(seg_colorspace, 'hsv') |
| 140 | + seg_img = rgb2hsv(img); |
| 141 | + seg_img = seg_img * 255; |
| 142 | + else |
| 143 | + error('Unknown colorspace parameter'); |
| 144 | + end |
| 145 | + |
| 146 | + % object rectangle region (to zero-based coordinates) |
| 147 | + obj_reg = [bb(1), bb(2), bb(1)+bb(3), bb(2)+bb(4)] - [1 1 1 1]; |
| 148 | + |
| 149 | + % extract histograms |
| 150 | + hist_fg = mex_extractforeground(seg_img, obj_reg, nbins); |
| 151 | + hist_bg = mex_extractbackground(seg_img, obj_reg, nbins); |
| 152 | + |
| 153 | + % extract masked patch: mask out parts outside image |
| 154 | + [seg_patch, valid_pixels_mask] = get_patch(seg_img, c, currentScaleFactor, template_size); |
| 155 | + |
| 156 | + % segmentation |
| 157 | + [fg_p, bg_p] = get_location_prior([1 1 size(seg_patch, 2) size(seg_patch, 1)], base_target_sz, [size(seg_patch,2), size(seg_patch, 1)]); |
| 158 | + [~, fg, ~] = mex_segment(seg_patch, hist_fg, hist_bg, nbins, fg_p, bg_p); |
| 159 | + |
| 160 | + % cut out regions outside from image |
| 161 | + mask = single(fg).*single(valid_pixels_mask); |
| 162 | + mask = binarize_softmask(mask); |
| 163 | + |
| 164 | + % use mask from init pose |
| 165 | + init_mask_padded = zeros(size(mask)); |
| 166 | + pm_x0 = floor(size(init_mask_padded,2) / 2 - size(init_mask,2) / 2); |
| 167 | + pm_y0 = floor(size(init_mask_padded,1) / 2 - size(init_mask,1) / 2); |
| 168 | + init_mask_padded(pm_y0:pm_y0+size(init_mask,1)-1, pm_x0:pm_x0+size(init_mask,2)-1) = init_mask; |
| 169 | + mask = mask.*single(init_mask_padded); |
| 170 | + |
| 171 | + % resize to filter size |
| 172 | + mask = imresize(mask, size(Y), 'nearest'); |
| 173 | + |
| 174 | + % check if mask is too small (probably segmentation is not ok then) |
| 175 | + if mask_normal(mask, target_dummy_area) |
| 176 | + if mask_diletation_sz > 0 |
| 177 | + D = strel(mask_diletation_type, mask_diletation_sz); |
| 178 | + mask = imdilate(mask, D); |
| 179 | + end |
| 180 | + else |
| 181 | + mask = target_dummy_mask; |
| 182 | + end |
| 183 | + |
| 184 | + else |
| 185 | + mask = target_dummy_mask; |
| 186 | + end |
| 187 | + |
| 188 | + % extract features |
| 189 | + f = get_csr_features(img, c, currentScaleFactor, template_size, ... |
| 190 | + rescale_template_size, cos_win, feature_type, w2c, cell_size); |
| 191 | + |
| 192 | + % create filter - using segmentation mask |
| 193 | + H = create_csr_filter(f, Y, single(mask)); |
| 194 | + |
| 195 | + % calculate per-channel feature weights |
| 196 | + response = real(ifft2(fft2(f).*conj(H))); |
| 197 | + chann_w = max(reshape(response, [size(response,1)*size(response,2), size(response,3)]), [], 1); |
| 198 | + % normalize: sum = 1 |
| 199 | + chann_w = chann_w / sum(chann_w); |
| 200 | + |
| 201 | + % make a scale search model aswell |
| 202 | + xs = get_scale_subwindow(img, c([2,1]), base_target_sz([2,1]), ... |
| 203 | + currentScaleFactor * scaleSizeFactors, scale_window, scale_model_sz([2,1]), []); |
| 204 | + % fft over the scale dim |
| 205 | + xsf = fft(xs,[],2); |
| 206 | + new_sf_num = bsxfun(@times, ysf, conj(xsf)); |
| 207 | + new_sf_den = sum(xsf .* conj(xsf), 1); |
| 208 | + |
| 209 | + % store all important const's and variables to the tracker structure |
| 210 | + tracker.feature_type = feature_type; |
| 211 | + tracker.padding = padding; |
| 212 | + tracker.learning_rate = learning_rate; % filter learning rate |
| 213 | + tracker.cell_size = cell_size; |
| 214 | + tracker.H = H; |
| 215 | + tracker.weight_lr = init_params.channels_weight_lr; |
| 216 | + tracker.use_channel_weights = init_params.use_channel_weights; |
| 217 | + tracker.chann_w = chann_w; |
| 218 | + tracker.Y = Y; |
| 219 | + tracker.mask_diletation_type = mask_diletation_type; |
| 220 | + tracker.mask_diletation_sz = mask_diletation_sz; |
| 221 | + tracker.target_dummy_mask = target_dummy_mask; |
| 222 | + tracker.target_dummy_area = target_dummy_area; |
| 223 | + tracker.use_segmentation = use_segmentation; |
| 224 | + tracker.bb = bb; |
| 225 | + tracker.cos_win = cos_win; |
| 226 | + tracker.w2c = w2c; |
| 227 | + tracker.template_size = template_size; |
| 228 | + tracker.obj_size = obj_size; |
| 229 | + tracker.c = c; |
| 230 | + tracker.nbins = nbins; |
| 231 | + tracker.currentScaleFactor = currentScaleFactor; |
| 232 | + tracker.rescale_template_size = rescale_template_size; |
| 233 | + tracker.rescale_ratio = rescale_ratio; |
| 234 | + if use_segmentation |
| 235 | + tracker.hist_fg = hist_fg; |
| 236 | + tracker.hist_bg = hist_bg; |
| 237 | + tracker.hist_lr = hist_lr; |
| 238 | + tracker.seg_colorspace = seg_colorspace; |
| 239 | + end |
| 240 | + tracker.ysf = ysf; |
| 241 | + tracker.sf_num = new_sf_num; |
| 242 | + tracker.sf_den = new_sf_den; |
| 243 | + tracker.scale_lr = scale_lr; |
| 244 | + tracker.base_target_sz = base_target_sz; |
| 245 | + tracker.scaleSizeFactors = scaleSizeFactors; |
| 246 | + tracker.scale_window = scale_window; |
| 247 | + tracker.scale_model_sz = scale_model_sz; |
| 248 | + tracker.scaleFactors = scaleFactors; |
| 249 | + tracker.min_scale_factor = min_scale_factor; |
| 250 | + tracker.max_scale_factor = max_scale_factor; |
| 251 | + tracker.mask = mask; |
| 252 | + |
| 253 | + tracker.H_prev = H; |
| 254 | + |
| 255 | + tracker.img_prev = img; |
| 256 | + |
| 257 | +end % endfunction |
0 commit comments