From d583bd7661e551c5373db7267ccc9b13efe77337 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Tue, 10 Nov 2020 08:36:30 -0700
Subject: [PATCH] Transform improvements & DeepDream vs Direction clarification
 (#18)

**New Features:**

* The `vis.py` script now differentiates between direction visualization and DeepDream with the new `-layer_vis` parameter. The new parameter has two options, either  `deepdream` or `direction`. The default is `deepdream`, and `direction` will result in the old behavior before this update. This parameter only works when no `-channel` value is specified.

**Improvements:**

* Improved random scaling based on the affine grid matrices that I learned about for: https://github.com/pytorch/captum/pull/500

* Improvements to tensor normalization.

* Center neuron extraction in  the `vis.py` script now works for layer targets without specifying channels, though I'm not sure how useful this change will be.
---
 README.md              |  1 +
 utils/decorrelation.py | 19 ++++++++++++-------
 utils/vis_utils.py     |  5 ++++-
 vis.py                 |  4 ++++
 4 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 581703d..bcd6720 100644
--- a/README.md
+++ b/README.md
@@ -255,6 +255,7 @@ This script lets you create DeepDream hallucinations with trained GoogleNet mode
 * `-random_scale`: Whether or not to use random scaling. Optionally provide a comma separated list of values for scales to be randomly selected from. If no values are provided, then scales will be randomly selected from the following list: `1, 0.975, 1.025, 0.95, 1.05`.
 * `-random_rotation`: Whether or not to use random rotations. Optionally provide a comma separated list of degree values for rotations to be randomly selected from or a single value to use for randomly selecting degrees from `[-value, value]`. If no values are provided, then a range of `[-5, 5]` wil be used.
 * `-padding`: The amount of padding to use before random scaling and random rotations to prevent edge artifacts. The padding is then removed after the transforms. Default is set to `0` to disable it.
+* `-layer_vis`: Whether to use DeepDream or direction visualization when not visualizing specific layer channels. One of `deepdream` or `direction`; default is `deepdream`.
 
 **Only Required If Model Doesn't Contain Them, Options**:
 * `-data_mean`: Your precalculated list of mean values that was used to train the model, if they weren't saved inside the model.
diff --git a/utils/decorrelation.py b/utils/decorrelation.py
index 67b0188..2c9659b 100644
--- a/utils/decorrelation.py
+++ b/utils/decorrelation.py
@@ -136,20 +136,20 @@ def forward(self, input):
 # Preprocess input after decorrelation
 class TransformLayer(torch.nn.Module):
 
-    def __init__(self, input_mean=[1,1,1], r=255, device='cpu'):
+    def __init__(self, input_mean=[1,1,1], input_sd=[1,1,1], r=255, device='cpu'):
         super(TransformLayer, self).__init__()
-        self.input_mean = torch.as_tensor(input_mean).to(device)
-        self.input_sd = torch.as_tensor([1,1,1]).to(device)
+        self.input_mean = torch.as_tensor(input_mean).view(3, 1, 1).to(device)
+        self.input_sd = torch.as_tensor(input_sd).view(3, 1, 1).to(device)
         self.r = r
         self.activ = lambda x: torch.sigmoid(x)
 
     def untransform(self, input):
-        input = input.add(self.input_mean[None, :, None, None]).mul(self.input_sd[None, :, None, None])
+        input = (input + self.input_mean) * self.input_sd
         return input / self.r
 
     def forward(self, input):
         input = self.activ(input) * self.r
-        return input.sub(self.input_mean[None, :, None, None]).div(self.input_sd[None, :, None, None])
+        return (input - self.input_mean) / self.input_sd
 
 
 # Randomly scale an input
@@ -161,8 +161,13 @@ def __init__(self, scale_list=(1, 0.975, 1.025, 0.95, 1.05)):
         scale_list = [float(s) for s in scale_list.split(',')] if ',' in scale_list else scale_list
         self.scale_list = scale_list
 
-    def rescale_tensor(self, input, scale, align_corners=True):
-        return torch.nn.functional.interpolate(input, scale_factor=scale, mode='bilinear', align_corners=align_corners)
+    def get_scale_mat(self, m, device, dtype):
+        return torch.tensor([[m, 0.0, 0.0], [0.0, m, 0.0]], device=device, dtype=dtype)
+
+    def rescale_tensor(self, x, scale):
+        scale_matrix = self.get_scale_mat(scale, x.device, x.dtype)[None, ...].repeat(x.shape[0], 1, 1)
+        grid = F.affine_grid(scale_matrix, x.size())
+        return F.grid_sample(x, grid)
 
     def forward(self, input):
         n = random.randint(0, len(self.scale_list)-1)
diff --git a/utils/vis_utils.py b/utils/vis_utils.py
index 46825bb..1eff2c3 100644
--- a/utils/vis_utils.py
+++ b/utils/vis_utils.py
@@ -209,6 +209,7 @@ def __init__(self, channel=-1, loss_func=mean_loss, mode='loss', neuron=False):
         self.get_loss = loss_func
         self.mode = mode
         self.get_neuron = neuron
+        self.power = 2
 
     def extract_neuron(self, input):
         x = input.size(2) // 2
@@ -222,9 +223,11 @@ def forward_feature(self, input):
         self.feature = input
 
     def forward(self, module, input, output):
+        output = self.extract_neuron(output) if self.get_neuron == True else output
         if self.channel > -1:
-            output = self.extract_neuron(output) if self.get_neuron == True else output
             output = output[:,self.channel]
+        else:
+            output = output ** self.power
         if self.mode == 'loss':
             self.forward_loss(output)
         elif self.mode == 'feature':
diff --git a/vis.py b/vis.py
index 6ed45a6..0ac32fd 100644
--- a/vis.py
+++ b/vis.py
@@ -38,6 +38,7 @@ def main():
     parser.add_argument("-random_scale", help="", nargs="?", type=str, const="none")
     parser.add_argument("-random_rotation", help="", nargs="?", type=str, const="none")
     parser.add_argument("-padding", type=int, default=0)
+    parser.add_argument("-layer_vis", choices=['deepdream', 'direction'], default='deepdream')
 
     # Tiling options
     parser.add_argument("-tile_size", default='0')
@@ -121,6 +122,9 @@ def main_func(params):
     loss_func = mean_loss
     loss_modules = register_simple_hook(net.net, params.layer, params.channel, loss_func=loss_func, neuron=params.extract_neuron)
 
+    if params.layer_vis == 'direction':
+        loss_modules[0].power = 1
+
     # Create input image
     if params.content_image == '':
         if params.fft_decorrelation: