Extractor docs, fix Bitmap single channel

saddam213 · saddam213 · commit 604321d41008 · 2025-10-18T10:24:05.000+13:00
diff --git a/TensorStack.Extractors/Common/ExtractorStreamOptions.cs b/TensorStack.Extractors/Common/ExtractorStreamOptions.cs
@@ -7,6 +7,6 @@ namespace TensorStack.Extractors.Common
 {
     public record ExtractorStreamOptions : ExtractorOptions
     {
-        public IAsyncEnumerable<VideoFrame> Stream { get; }
+        public IAsyncEnumerable<VideoFrame> Stream { get; init; }
     }
 }
diff --git a/TensorStack.Extractors/Common/ExtractorVideoOptions.cs b/TensorStack.Extractors/Common/ExtractorVideoOptions.cs
@@ -6,6 +6,6 @@ namespace TensorStack.Extractors.Common
 {
     public record ExtractorVideoOptions : ExtractorOptions
     {
-        public VideoTensor Video { get; }
+        public VideoTensor Video { get; init; }
     }
 }
diff --git a/TensorStack.Extractors/README.md b/TensorStack.Extractors/README.md
@@ -1,101 +1,174 @@
 # TensorStack.Extractors
+High-performance ONNX-based feature extraction for AI workflows. Includes models for edge detection, depth estimation, background removal, and other visual analysis tasks — designed for seamless integration with image and video processing pipelines.
 
-### Canny
-* https://huggingface.co/axodoxian/controlnet_onnx/resolve/main/annotators/canny.onnx
 
-### Hed
-* https://huggingface.co/axodoxian/controlnet_onnx/resolve/main/annotators/hed.onnx
+## Quick Start
+This minimal example demonstrates how to extract depth from image and video using `TensorStack.Extractors`.
 
-### Depth
-* https://huggingface.co/axodoxian/controlnet_onnx/resolve/main/annotators/depth.onnx
-* https://huggingface.co/Xenova/depth-anything-large-hf/onnx/model.onnx
-* https://huggingface.co/julienkay/sentis-MiDaS
-
-### Background Removal
-* https://huggingface.co/briaai/RMBG-1.4/resolve/main/onnx/model.onnx
+```csharp
+[nuget: TensorStack.Extractors]
+[nuget: TensorStack.Providers.DML]
+[nuget: TensorStack.Image.Bitmap]
+[nuget: TensorStack.Video.Windows]
+[nuget: TensorStack.Audio.Windows]
+[model: https://huggingface.co/TensorStack/TensorStack/resolve/main/Extractor/Depth.onnx]
+
+static async Task QuickStartAsync()
+{
+    // 1. Create the Extractor Pipeline
+    var pipeline = ExtractorPipeline.Create(new ExtractorConfig
+    {
+        IsDynamicOutput = true,
+        Normalization = Normalization.OneToOne,
+        OutputNormalization = Normalization.MinMaxOneToOne,
+        ExecutionProvider = Provider.GetProvider(),
+        Path = @"M:\TensorStack\Extractor\Depth.onnx"
+    });
+
+    // 2. Extract Depth map from Image
+    var inputImage = new ImageInput("ImageInput.png");
+    var depthMapImage = await pipeline.RunAsync(new ExtractorImageOptions
+    {
+        Image = inputImage
+    });
+    await depthMapImage.SaveAsync("ImageOutput.png");
+
+    // 3. Extract Depth map from Video (Streaming mode)
+    var inputStream = await VideoInputStream.CreateAsync("VideoInput.mp4");
+    var depthMapVideo = pipeline.RunAsync(new ExtractorStreamOptions
+    {
+            Stream = inputStream.GetAsync()
+    });
+    await depthMapVideo.SaveAync("VideoOutput.mp4");
+
+    // 4. Add audio from the source video (optional)
+    await AudioManager.AddAudioAsync("VideoOutput.mp4", "VideoInput.mp4");
+}
+```
 
+## Creating an Extractor Pipeline
 
-# Image Example
 ```csharp
-// Extractor config
-var config = new ExtractorConfig("hed.onnx", Provider.DirectML);
+[nuget: TensorStack.Extractors]
+[nuget: TensorStack.Providers.DML]
 
-// Create Pipeline
-using (var pipeline = ExtractorPipeline.Create(config))
+// Create the pipeline
+var pipeline = ExtractorPipeline.Create(new ExtractorConfig
 {
-    // Load Pipeline
-    await pipeline.LoadAsync();
+    Normalization = Normalization.ZeroToOne,
+    ExecutionProvider = Provider.GetProvider(),
+    Path = @"M:\Models\RealESR-General-4x\model.onnx"
+});
+```
 
-    // Read input image
-    var input = new ImageInput("Input.png");
+**Configuration Options:**
 
-    // Options
-    var options = new ExtractorImageOptions(input);
+- `Normalization` — Input value normalization (`ZeroToOne` or `OneToOne`)  
+- `ExecutionProvider` — Hardware provider (CPU, GPU, DirectML, etc.)  
+- `Path` — Path to the ONNX model  
 
-    // Run Extractor Pipeline
-    var outputTensor = await pipeline.RunAsync(options);
+---
 
-    // Save Output image
-    await outputTensor.SaveAsync("Output.png");
-}
-```
+## Extract Image Features
+```csharp
+    [nuget: TensorStack.Image.Bitmap]
+
+    // Read Image
+    var inputImage = new ImageInput("Input.png");
 
+    // Extract Image
+    var output = await pipeline.RunAsync(new ExtractorImageOptions
+    {
+        Image = inputImage
+    });
+
+    // Write Image
+    await output.SaveAsync("Output.png");
+```
 
+---
 
-# Video Example
+## Extract Video Features (Buffered)
+Buffers all frames in memory. Suitable for short-duration videos, AI-generated content, low-resolution videos, or GIFs.
 ```csharp
-// Extractor config
-var config = new ExtractorConfig("hed.onnx", Provider.DirectML);
+    [nuget: TensorStack.Video.Windows]
 
-// Create Pipeline
-using (var pipeline = ExtractorPipeline.Create(config))
-{
-    // Load Pipeline
-    await pipeline.LoadAsync();
+    // Read Video
+    var inputVideo = await VideoInput.CreateAsync("Input.gif");
 
-    // Read input video
-    var video = new VideoInput("Input.mp4");
+    // Extract Video
+    var outputVideo = await pipeline.RunAsync(new ExtractVideoOptions
+    {  
+        Video = inputVideo
+    });
 
-    // Get video input
-    var input = await video.GetTensorAsync();
+    // Write Video
+    await outputVideo.SaveAync("Output.mp4");
+```
 
-    // Options
-    var options = new ExtractorVideoOptions(input);
+---
 
-    // Run Extractor Pipeline
-    var outputTensor = await pipeline.RunAsync(inputTensor);
+## Extract Video Features (Stream)
+Processes frames one-by-one for minimal memory usage. Ideal for high-resolution or long-duration videos.
+```csharp
+    [nuget: TensorStack.Video.Windows]
 
-    // Save Output video
-    await outputTensor.SaveAync("Output.mp4");
-}
+    // Read Stream
+    var inputStream = await VideoInputStream.CreateAsync("Input.mp4");
+
+    // Extract Stream
+    var outputStream = pipeline.RunAsync(new ExtractStreamOptions
+    {
+        Stream = inputStream.GetAsync()
+    });
+
+    // Write Stream
+    await outputStream.SaveAync("Output.mp4");
 ```
 
 
+---
+
+## Audio Support
+TensorStack.Video only processes video frames, so audio will be missing from the final result.
 
-# Video Stream Example
+You can use the TensorStack.Audio package to restore audio from the source video:
 ```csharp
-// Extractor config
-var config = new ExtractorConfig("hed.onnx", Provider.DirectML);
+    [nuget: TensorStack.Audio.Windows]
 
-// Create Pipeline
-using (var pipeline = ExtractorPipeline.Create(config))
-{
-    // Load Pipeline
-    await pipeline.LoadAsync();
+    await AudioManager.AddAudioAsync("TargetVideo.mp4", "SourceVideo.mp4");
+```
+---
 
-    // Read input video
-    var video = new VideoInput("Input.mp4");
 
-    // Get video stream
-    var videoStream = video.GetStreamAsync();
+## Tiling Support
+Tiling allows images and video frames to be processed in smaller sections (tiles) instead of all at once. This helps reduce memory usage and can improve performance when working with very large images or high-resolution videos.
 
-    // Options
-    var options = new ExtractorStreamOptions(videoStream);
+The `TileMode` determines how these tiles are handled:
 
-    // Get Extractor stream
-    videoStream = pipeline.RunAsync(options);
+* **None:** Processes the entire image/frame in a single pass.
+* **Overlap:** Tiles have overlapping edges to avoid visible seams.
+* **Blend:** Overlapping tiles are blended together for smooth transitions.
+* **Clip:** Tiles are cut without blending.
+* **Clip + Blend:** Combines clipping and blending for high-quality results.
 
-    // Save Video Steam
-    await videoStream.SaveAync("Output.mp4");
-}
-```
+Additional options include:
+
+* **MaxTileSize:** The maximum size of each tile in pixels. Smaller tiles reduce memory usage but may take longer to process.
+* **TileOverlap:** The number of overlapping pixels between tiles. More overlap can prevent visible seams and improve output quality.
+
+Adjusting these settings allows you to balance memory usage, processing speed, and visual quality for your extractor tasks.
+
+---
+
+## Extractor Models
+
+Here is a list of some known and tested models compatible with `TensorStack.Extractors`:
+
+- [Xenova/depth-anything-large-hf](https://huggingface.co/Xenova/depth-anything-large-hf)  
+- [julienkay/sentis-MiDaS](https://huggingface.co/julienkay/sentis-MiDaS)  
+- [axodoxian/controlnet_onnx](https://huggingface.co/axodoxian/controlnet_onnx)  
+- [briaai/RMBG-1.4](https://huggingface.co/briaai/RMBG-1.4)  
+- [TensorStack/FeatureExtractor-amuse](https://huggingface.co/TensorStack/FeatureExtractor-amuse)  
+
+---
diff --git a/TensorStack.Image.Bitmap/Extensions.cs b/TensorStack.Image.Bitmap/Extensions.cs
@@ -112,14 +112,40 @@ internal static Bitmap ToBitmapImage(this ImageTensor tensor)
                     byte* row = (byte*)bitmapData.Scan0 + (y * bitmapData.Stride);
                     for (int x = 0; x < width; x++)
                     {
-                        row[x * 4 + 3] = channels == 4 ? GetByteValue(tensor[0, 3, y, x]) : byte.MaxValue; // A
-                        row[x * 4 + 2] = GetByteValue(tensor[0, 0, y, x]); // R
-                        row[x * 4 + 1] = GetByteValue(tensor[0, 1, y, x]); // G
-                        row[x * 4 + 0] = GetByteValue(tensor[0, 2, y, x]); // B
+                        byte r, g, b, a;
+                        switch (channels)
+                        {
+                            case 1: // Grayscale
+                                r = g = b = GetByteValue(tensor[0, 0, y, x]);
+                                a = byte.MaxValue;
+                                break;
+
+                            case 3: // RGB
+                                r = GetByteValue(tensor[0, 0, y, x]);
+                                g = GetByteValue(tensor[0, 1, y, x]);
+                                b = GetByteValue(tensor[0, 2, y, x]);
+                                a = byte.MaxValue;
+                                break;
+
+                            case 4: // RGBA
+                                r = GetByteValue(tensor[0, 0, y, x]);
+                                g = GetByteValue(tensor[0, 1, y, x]);
+                                b = GetByteValue(tensor[0, 2, y, x]);
+                                a = GetByteValue(tensor[0, 3, y, x]);
+                                break;
+
+                            default:
+                                throw new NotSupportedException($"Unsupported channel count: {channels}");
+                        }
+
+                        row[x * 4 + 0] = b; // B
+                        row[x * 4 + 1] = g; // G
+                        row[x * 4 + 2] = r; // R
+                        row[x * 4 + 3] = a; // A
                     }
                 }
-                bitmap.UnlockBits(bitmapData);
             }
+            bitmap.UnlockBits(bitmapData);
             return bitmap;
         }
 

Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,6 @@ namespace TensorStack.Extractors.Common`
`7`	`7`	`{`
`8`	`8`	`public record ExtractorStreamOptions : ExtractorOptions`
`9`	`9`	`{`
`10`		`- public IAsyncEnumerable<VideoFrame> Stream { get; }`
	`10`	`+ public IAsyncEnumerable<VideoFrame> Stream { get; init; }`
`11`	`11`	`}`
`12`	`12`	`}`
Original file line number	Diff line number	Diff line change
`@@ -6,6 +6,6 @@ namespace TensorStack.Extractors.Common`
`6`	`6`	`{`
`7`	`7`	`public record ExtractorVideoOptions : ExtractorOptions`
`8`	`8`	`{`
`9`		`- public VideoTensor Video { get; }`
	`9`	`+ public VideoTensor Video { get; init; }`
`10`	`10`	`}`
`11`	`11`	`}`