Skip to content

Commit 604321d

Browse files
committed
Extractor docs, fix Bitmap single channel
1 parent 86edf08 commit 604321d

File tree

4 files changed

+173
-74
lines changed

4 files changed

+173
-74
lines changed

TensorStack.Extractors/Common/ExtractorStreamOptions.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,6 @@ namespace TensorStack.Extractors.Common
77
{
88
public record ExtractorStreamOptions : ExtractorOptions
99
{
10-
public IAsyncEnumerable<VideoFrame> Stream { get; }
10+
public IAsyncEnumerable<VideoFrame> Stream { get; init; }
1111
}
1212
}

TensorStack.Extractors/Common/ExtractorVideoOptions.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@ namespace TensorStack.Extractors.Common
66
{
77
public record ExtractorVideoOptions : ExtractorOptions
88
{
9-
public VideoTensor Video { get; }
9+
public VideoTensor Video { get; init; }
1010
}
1111
}

TensorStack.Extractors/README.md

Lines changed: 140 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,101 +1,174 @@
11
# TensorStack.Extractors
2+
High-performance ONNX-based feature extraction for AI workflows. Includes models for edge detection, depth estimation, background removal, and other visual analysis tasks — designed for seamless integration with image and video processing pipelines.
23

3-
### Canny
4-
* https://huggingface.co/axodoxian/controlnet_onnx/resolve/main/annotators/canny.onnx
54

6-
### Hed
7-
* https://huggingface.co/axodoxian/controlnet_onnx/resolve/main/annotators/hed.onnx
5+
## Quick Start
6+
This minimal example demonstrates how to extract depth from image and video using `TensorStack.Extractors`.
87

9-
### Depth
10-
* https://huggingface.co/axodoxian/controlnet_onnx/resolve/main/annotators/depth.onnx
11-
* https://huggingface.co/Xenova/depth-anything-large-hf/onnx/model.onnx
12-
* https://huggingface.co/julienkay/sentis-MiDaS
13-
14-
### Background Removal
15-
* https://huggingface.co/briaai/RMBG-1.4/resolve/main/onnx/model.onnx
8+
```csharp
9+
[nuget: TensorStack.Extractors]
10+
[nuget: TensorStack.Providers.DML]
11+
[nuget: TensorStack.Image.Bitmap]
12+
[nuget: TensorStack.Video.Windows]
13+
[nuget: TensorStack.Audio.Windows]
14+
[model: https://huggingface.co/TensorStack/TensorStack/resolve/main/Extractor/Depth.onnx]
15+
16+
static async Task QuickStartAsync()
17+
{
18+
// 1. Create the Extractor Pipeline
19+
var pipeline = ExtractorPipeline.Create(new ExtractorConfig
20+
{
21+
IsDynamicOutput = true,
22+
Normalization = Normalization.OneToOne,
23+
OutputNormalization = Normalization.MinMaxOneToOne,
24+
ExecutionProvider = Provider.GetProvider(),
25+
Path = @"M:\TensorStack\Extractor\Depth.onnx"
26+
});
27+
28+
// 2. Extract Depth map from Image
29+
var inputImage = new ImageInput("ImageInput.png");
30+
var depthMapImage = await pipeline.RunAsync(new ExtractorImageOptions
31+
{
32+
Image = inputImage
33+
});
34+
await depthMapImage.SaveAsync("ImageOutput.png");
35+
36+
// 3. Extract Depth map from Video (Streaming mode)
37+
var inputStream = await VideoInputStream.CreateAsync("VideoInput.mp4");
38+
var depthMapVideo = pipeline.RunAsync(new ExtractorStreamOptions
39+
{
40+
Stream = inputStream.GetAsync()
41+
});
42+
await depthMapVideo.SaveAync("VideoOutput.mp4");
43+
44+
// 4. Add audio from the source video (optional)
45+
await AudioManager.AddAudioAsync("VideoOutput.mp4", "VideoInput.mp4");
46+
}
47+
```
1648

49+
## Creating an Extractor Pipeline
1750

18-
# Image Example
1951
```csharp
20-
// Extractor config
21-
var config = new ExtractorConfig("hed.onnx", Provider.DirectML);
52+
[nuget: TensorStack.Extractors]
53+
[nuget: TensorStack.Providers.DML]
2254

23-
// Create Pipeline
24-
using (var pipeline = ExtractorPipeline.Create(config))
55+
// Create the pipeline
56+
var pipeline = ExtractorPipeline.Create(new ExtractorConfig
2557
{
26-
// Load Pipeline
27-
await pipeline.LoadAsync();
58+
Normalization = Normalization.ZeroToOne,
59+
ExecutionProvider = Provider.GetProvider(),
60+
Path = @"M:\Models\RealESR-General-4x\model.onnx"
61+
});
62+
```
2863

29-
// Read input image
30-
var input = new ImageInput("Input.png");
64+
**Configuration Options:**
3165

32-
// Options
33-
var options = new ExtractorImageOptions(input);
66+
- `Normalization` — Input value normalization (`ZeroToOne` or `OneToOne`)
67+
- `ExecutionProvider` — Hardware provider (CPU, GPU, DirectML, etc.)
68+
- `Path` — Path to the ONNX model
3469

35-
// Run Extractor Pipeline
36-
var outputTensor = await pipeline.RunAsync(options);
70+
---
3771

38-
// Save Output image
39-
await outputTensor.SaveAsync("Output.png");
40-
}
41-
```
72+
## Extract Image Features
73+
```csharp
74+
[nuget: TensorStack.Image.Bitmap]
75+
76+
// Read Image
77+
var inputImage = new ImageInput("Input.png");
4278

79+
// Extract Image
80+
var output = await pipeline.RunAsync(new ExtractorImageOptions
81+
{
82+
Image = inputImage
83+
});
84+
85+
// Write Image
86+
await output.SaveAsync("Output.png");
87+
```
4388

89+
---
4490

45-
# Video Example
91+
## Extract Video Features (Buffered)
92+
Buffers all frames in memory. Suitable for short-duration videos, AI-generated content, low-resolution videos, or GIFs.
4693
```csharp
47-
// Extractor config
48-
var config = new ExtractorConfig("hed.onnx", Provider.DirectML);
94+
[nuget: TensorStack.Video.Windows]
4995

50-
// Create Pipeline
51-
using (var pipeline = ExtractorPipeline.Create(config))
52-
{
53-
// Load Pipeline
54-
await pipeline.LoadAsync();
96+
// Read Video
97+
var inputVideo = await VideoInput.CreateAsync("Input.gif");
5598

56-
// Read input video
57-
var video = new VideoInput("Input.mp4");
99+
// Extract Video
100+
var outputVideo = await pipeline.RunAsync(new ExtractVideoOptions
101+
{
102+
Video = inputVideo
103+
});
58104

59-
// Get video input
60-
var input = await video.GetTensorAsync();
105+
// Write Video
106+
await outputVideo.SaveAync("Output.mp4");
107+
```
61108

62-
// Options
63-
var options = new ExtractorVideoOptions(input);
109+
---
64110

65-
// Run Extractor Pipeline
66-
var outputTensor = await pipeline.RunAsync(inputTensor);
111+
## Extract Video Features (Stream)
112+
Processes frames one-by-one for minimal memory usage. Ideal for high-resolution or long-duration videos.
113+
```csharp
114+
[nuget: TensorStack.Video.Windows]
67115

68-
// Save Output video
69-
await outputTensor.SaveAync("Output.mp4");
70-
}
116+
// Read Stream
117+
var inputStream = await VideoInputStream.CreateAsync("Input.mp4");
118+
119+
// Extract Stream
120+
var outputStream = pipeline.RunAsync(new ExtractStreamOptions
121+
{
122+
Stream = inputStream.GetAsync()
123+
});
124+
125+
// Write Stream
126+
await outputStream.SaveAync("Output.mp4");
71127
```
72128

73129

130+
---
131+
132+
## Audio Support
133+
TensorStack.Video only processes video frames, so audio will be missing from the final result.
74134

75-
# Video Stream Example
135+
You can use the TensorStack.Audio package to restore audio from the source video:
76136
```csharp
77-
// Extractor config
78-
var config = new ExtractorConfig("hed.onnx", Provider.DirectML);
137+
[nuget: TensorStack.Audio.Windows]
79138

80-
// Create Pipeline
81-
using (var pipeline = ExtractorPipeline.Create(config))
82-
{
83-
// Load Pipeline
84-
await pipeline.LoadAsync();
139+
await AudioManager.AddAudioAsync("TargetVideo.mp4", "SourceVideo.mp4");
140+
```
141+
---
85142

86-
// Read input video
87-
var video = new VideoInput("Input.mp4");
88143

89-
// Get video stream
90-
var videoStream = video.GetStreamAsync();
144+
## Tiling Support
145+
Tiling allows images and video frames to be processed in smaller sections (tiles) instead of all at once. This helps reduce memory usage and can improve performance when working with very large images or high-resolution videos.
91146

92-
// Options
93-
var options = new ExtractorStreamOptions(videoStream);
147+
The `TileMode` determines how these tiles are handled:
94148

95-
// Get Extractor stream
96-
videoStream = pipeline.RunAsync(options);
149+
* **None:** Processes the entire image/frame in a single pass.
150+
* **Overlap:** Tiles have overlapping edges to avoid visible seams.
151+
* **Blend:** Overlapping tiles are blended together for smooth transitions.
152+
* **Clip:** Tiles are cut without blending.
153+
* **Clip + Blend:** Combines clipping and blending for high-quality results.
97154

98-
// Save Video Steam
99-
await videoStream.SaveAync("Output.mp4");
100-
}
101-
```
155+
Additional options include:
156+
157+
* **MaxTileSize:** The maximum size of each tile in pixels. Smaller tiles reduce memory usage but may take longer to process.
158+
* **TileOverlap:** The number of overlapping pixels between tiles. More overlap can prevent visible seams and improve output quality.
159+
160+
Adjusting these settings allows you to balance memory usage, processing speed, and visual quality for your extractor tasks.
161+
162+
---
163+
164+
## Extractor Models
165+
166+
Here is a list of some known and tested models compatible with `TensorStack.Extractors`:
167+
168+
- [Xenova/depth-anything-large-hf](https://huggingface.co/Xenova/depth-anything-large-hf)
169+
- [julienkay/sentis-MiDaS](https://huggingface.co/julienkay/sentis-MiDaS)
170+
- [axodoxian/controlnet_onnx](https://huggingface.co/axodoxian/controlnet_onnx)
171+
- [briaai/RMBG-1.4](https://huggingface.co/briaai/RMBG-1.4)
172+
- [TensorStack/FeatureExtractor-amuse](https://huggingface.co/TensorStack/FeatureExtractor-amuse)
173+
174+
---

TensorStack.Image.Bitmap/Extensions.cs

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -112,14 +112,40 @@ internal static Bitmap ToBitmapImage(this ImageTensor tensor)
112112
byte* row = (byte*)bitmapData.Scan0 + (y * bitmapData.Stride);
113113
for (int x = 0; x < width; x++)
114114
{
115-
row[x * 4 + 3] = channels == 4 ? GetByteValue(tensor[0, 3, y, x]) : byte.MaxValue; // A
116-
row[x * 4 + 2] = GetByteValue(tensor[0, 0, y, x]); // R
117-
row[x * 4 + 1] = GetByteValue(tensor[0, 1, y, x]); // G
118-
row[x * 4 + 0] = GetByteValue(tensor[0, 2, y, x]); // B
115+
byte r, g, b, a;
116+
switch (channels)
117+
{
118+
case 1: // Grayscale
119+
r = g = b = GetByteValue(tensor[0, 0, y, x]);
120+
a = byte.MaxValue;
121+
break;
122+
123+
case 3: // RGB
124+
r = GetByteValue(tensor[0, 0, y, x]);
125+
g = GetByteValue(tensor[0, 1, y, x]);
126+
b = GetByteValue(tensor[0, 2, y, x]);
127+
a = byte.MaxValue;
128+
break;
129+
130+
case 4: // RGBA
131+
r = GetByteValue(tensor[0, 0, y, x]);
132+
g = GetByteValue(tensor[0, 1, y, x]);
133+
b = GetByteValue(tensor[0, 2, y, x]);
134+
a = GetByteValue(tensor[0, 3, y, x]);
135+
break;
136+
137+
default:
138+
throw new NotSupportedException($"Unsupported channel count: {channels}");
139+
}
140+
141+
row[x * 4 + 0] = b; // B
142+
row[x * 4 + 1] = g; // G
143+
row[x * 4 + 2] = r; // R
144+
row[x * 4 + 3] = a; // A
119145
}
120146
}
121-
bitmap.UnlockBits(bitmapData);
122147
}
148+
bitmap.UnlockBits(bitmapData);
123149
return bitmap;
124150
}
125151

0 commit comments

Comments
 (0)