Skip to content

Commit 0dd5723

Browse files
mthrokfacebook-github-bot
authored andcommitted
Refactor FilterGraph interface (#2508)
Summary: FilterGraph is necessary for StreamWriter when saving video as Tensor array format cannot express commonot video formats like yub420. The current implementation of FilterGraph is specific to StreamReader, as it takes AVCodecParameters object. Not individual parameters. This PR refactor FilterGraph interface so that it can be constructed from more primitive information. Pull Request resolved: #2508 Reviewed By: hwangjeff Differential Revision: D37466033 Pulled By: mthrok fbshipit-source-id: 8414e985da7579c2dfe260b4dccd2afe113bb573
1 parent 0ad03ad commit 0dd5723

File tree

7 files changed

+121
-89
lines changed

7 files changed

+121
-89
lines changed

torchaudio/csrc/ffmpeg/README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,11 @@ decoder::~Decoder();
8989

9090
```c++
9191
// Default construction (no memory allocation)
92-
filter_graph = FilterGraph();
92+
filter_graph = FilterGraph(AVMEDIA_TYPE_AUDIO);
9393
// Filter configuration
94-
...
94+
filter_fraph.add_audio_src(..)
95+
filter_fraph.add_sink(..)
96+
filter_fraph.add_process("<filter expression>")
9597
filter_graph.create_filter();
9698
// Apply filter
9799
fitler_graph.add_frame(pFrame);

torchaudio/csrc/ffmpeg/filter_graph.cpp

Lines changed: 46 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -4,93 +4,87 @@
44
namespace torchaudio {
55
namespace ffmpeg {
66

7-
FilterGraph::FilterGraph(
8-
AVRational time_base,
9-
AVCodecParameters* codecpar,
10-
const c10::optional<std::string>& filter_description)
11-
: input_time_base(time_base),
12-
codecpar(codecpar),
13-
filter_description(filter_description.value_or(
14-
codecpar->codec_type == AVMEDIA_TYPE_AUDIO ? "anull" : "null")),
15-
media_type(codecpar->codec_type) {
16-
init();
7+
FilterGraph::FilterGraph(AVMediaType media_type) : media_type(media_type) {
8+
switch (media_type) {
9+
case AVMEDIA_TYPE_AUDIO:
10+
case AVMEDIA_TYPE_VIDEO:
11+
break;
12+
default:
13+
throw std::runtime_error("Only audio and video type is supported.");
14+
}
1715
}
1816

19-
////////////////////////////////////////////////////////////////////////////////
20-
// Query method
21-
////////////////////////////////////////////////////////////////////////////////
22-
std::string FilterGraph::get_description() const {
23-
return filter_description;
24-
};
25-
2617
////////////////////////////////////////////////////////////////////////////////
2718
// Configuration methods
2819
////////////////////////////////////////////////////////////////////////////////
2920
namespace {
3021
std::string get_audio_src_args(
22+
AVSampleFormat format,
3123
AVRational time_base,
32-
AVCodecParameters* codecpar) {
24+
int sample_rate,
25+
uint64_t channel_layout) {
3326
char args[512];
3427
std::snprintf(
3528
args,
3629
sizeof(args),
3730
"time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%" PRIx64,
3831
time_base.num,
3932
time_base.den,
40-
codecpar->sample_rate,
41-
av_get_sample_fmt_name(static_cast<AVSampleFormat>(codecpar->format)),
42-
codecpar->channel_layout);
33+
sample_rate,
34+
av_get_sample_fmt_name(format),
35+
channel_layout);
4336
return std::string(args);
4437
}
4538

4639
std::string get_video_src_args(
40+
AVPixelFormat format,
4741
AVRational time_base,
48-
AVCodecParameters* codecpar) {
42+
int width,
43+
int height,
44+
AVRational sample_aspect_ratio) {
4945
char args[512];
5046
std::snprintf(
5147
args,
5248
sizeof(args),
5349
"video_size=%dx%d:pix_fmt=%s:time_base=%d/%d:pixel_aspect=%d/%d",
54-
codecpar->width,
55-
codecpar->height,
56-
av_get_pix_fmt_name(static_cast<AVPixelFormat>(codecpar->format)),
50+
width,
51+
height,
52+
av_get_pix_fmt_name(format),
5753
time_base.num,
5854
time_base.den,
59-
codecpar->sample_aspect_ratio.num,
60-
codecpar->sample_aspect_ratio.den);
55+
sample_aspect_ratio.num,
56+
sample_aspect_ratio.den);
6157
return std::string(args);
6258
}
6359

6460
} // namespace
6561

66-
void FilterGraph::init() {
67-
add_src();
68-
add_sink();
69-
add_process();
70-
create_filter();
62+
void FilterGraph::add_audio_src(
63+
AVSampleFormat format,
64+
AVRational time_base,
65+
int sample_rate,
66+
uint64_t channel_layout) {
67+
TORCH_CHECK(
68+
media_type == AVMEDIA_TYPE_AUDIO, "The filter graph is not audio type.");
69+
std::string args =
70+
get_audio_src_args(format, time_base, sample_rate, channel_layout);
71+
add_src(args);
7172
}
7273

73-
void FilterGraph::reset() {
74-
pFilterGraph.reset();
75-
buffersrc_ctx = nullptr;
76-
buffersink_ctx = nullptr;
77-
78-
init();
74+
void FilterGraph::add_video_src(
75+
AVPixelFormat format,
76+
AVRational time_base,
77+
int width,
78+
int height,
79+
AVRational sample_aspect_ratio) {
80+
TORCH_CHECK(
81+
media_type == AVMEDIA_TYPE_VIDEO, "The filter graph is not video type.");
82+
std::string args =
83+
get_video_src_args(format, time_base, width, height, sample_aspect_ratio);
84+
add_src(args);
7985
}
8086

81-
void FilterGraph::add_src() {
82-
std::string args;
83-
switch (media_type) {
84-
case AVMEDIA_TYPE_AUDIO:
85-
args = get_audio_src_args(input_time_base, codecpar);
86-
break;
87-
case AVMEDIA_TYPE_VIDEO:
88-
args = get_video_src_args(input_time_base, codecpar);
89-
break;
90-
default:
91-
throw std::runtime_error("Only audio/video are supported.");
92-
}
93-
87+
void FilterGraph::add_src(const std::string& args) {
9488
const AVFilter* buffersrc = avfilter_get_by_name(
9589
media_type == AVMEDIA_TYPE_AUDIO ? "abuffer" : "buffer");
9690
int ret = avfilter_graph_create_filter(
@@ -103,9 +97,6 @@ void FilterGraph::add_src() {
10397
}
10498

10599
void FilterGraph::add_sink() {
106-
if (media_type == AVMEDIA_TYPE_UNKNOWN) {
107-
throw std::runtime_error("Source buffer is not allocated.");
108-
}
109100
if (buffersink_ctx) {
110101
throw std::runtime_error("Sink buffer is already allocated.");
111102
}
@@ -158,7 +149,7 @@ class InOuts {
158149

159150
} // namespace
160151

161-
void FilterGraph::add_process() {
152+
void FilterGraph::add_process(const std::string& filter_description) {
162153
// Note
163154
// The official example and other derived codes out there use
164155
// https://ffmpeg.org/doxygen/4.1/filtering_audio_8c-example.html#_a37

torchaudio/csrc/ffmpeg/filter_graph.h

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,26 +5,17 @@ namespace torchaudio {
55
namespace ffmpeg {
66

77
class FilterGraph {
8-
// Parameters required for `reset`
9-
// Recreats the underlying filter_graph struct
10-
AVRational input_time_base;
11-
AVCodecParameters* codecpar;
12-
std::string filter_description;
13-
14-
// Constant just for convenient access.
158
AVMediaType media_type;
169

1710
AVFilterGraphPtr pFilterGraph;
11+
1812
// AVFilterContext is freed as a part of AVFilterGraph
1913
// so we do not manage the resource.
2014
AVFilterContext* buffersrc_ctx = nullptr;
2115
AVFilterContext* buffersink_ctx = nullptr;
2216

2317
public:
24-
FilterGraph(
25-
AVRational time_base,
26-
AVCodecParameters* codecpar,
27-
const c10::optional<std::string>& filter_desc);
18+
explicit FilterGraph(AVMediaType media_type);
2819
// Custom destructor to release AVFilterGraph*
2920
~FilterGraph() = default;
3021
// Non-copyable
@@ -34,24 +25,27 @@ class FilterGraph {
3425
FilterGraph(FilterGraph&&) = default;
3526
FilterGraph& operator=(FilterGraph&&) = default;
3627

37-
//////////////////////////////////////////////////////////////////////////////
38-
// Query method
39-
//////////////////////////////////////////////////////////////////////////////
40-
std::string get_description() const;
41-
4228
//////////////////////////////////////////////////////////////////////////////
4329
// Configuration methods
4430
//////////////////////////////////////////////////////////////////////////////
45-
void init();
31+
void add_audio_src(
32+
AVSampleFormat format,
33+
AVRational time_base,
34+
int sample_rate,
35+
uint64_t channel_layout);
4636

47-
void reset();
37+
void add_video_src(
38+
AVPixelFormat format,
39+
AVRational time_base,
40+
int width,
41+
int height,
42+
AVRational sample_aspect_ratio);
4843

49-
private:
50-
void add_src();
44+
void add_src(const std::string& arg);
5145

5246
void add_sink();
5347

54-
void add_process();
48+
void add_process(const std::string& filter_description);
5549

5650
void create_filter();
5751

torchaudio/csrc/ffmpeg/sink.cpp

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,28 +23,64 @@ std::unique_ptr<Buffer> get_buffer(
2323
av_get_media_type_string(type));
2424
}
2525
}
26-
} // namespace
2726

28-
Sink::Sink(
27+
std::unique_ptr<FilterGraph> get_filter_graph(
2928
AVRational input_time_base,
3029
AVCodecParameters* codecpar,
30+
const std::string& filter_description) {
31+
auto p = std::make_unique<FilterGraph>(codecpar->codec_type);
32+
33+
switch (codecpar->codec_type) {
34+
case AVMEDIA_TYPE_AUDIO:
35+
p->add_audio_src(
36+
static_cast<AVSampleFormat>(codecpar->format),
37+
input_time_base,
38+
codecpar->sample_rate,
39+
codecpar->channel_layout);
40+
break;
41+
case AVMEDIA_TYPE_VIDEO:
42+
p->add_video_src(
43+
static_cast<AVPixelFormat>(codecpar->format),
44+
input_time_base,
45+
codecpar->width,
46+
codecpar->height,
47+
codecpar->sample_aspect_ratio);
48+
break;
49+
default:
50+
throw std::runtime_error("Only audio/video are supported.");
51+
}
52+
p->add_sink();
53+
p->add_process(filter_description);
54+
p->create_filter();
55+
return p;
56+
}
57+
58+
} // namespace
59+
60+
Sink::Sink(
61+
AVRational input_time_base_,
62+
AVCodecParameters* codecpar_,
3163
int frames_per_chunk,
3264
int num_chunks,
33-
const c10::optional<std::string>& filter_description,
65+
const c10::optional<std::string>& filter_description_,
3466
const torch::Device& device)
35-
: filter(input_time_base, codecpar, filter_description),
67+
: input_time_base(input_time_base_),
68+
codecpar(codecpar_),
69+
filter_description(filter_description_.value_or(
70+
codecpar->codec_type == AVMEDIA_TYPE_AUDIO ? "anull" : "null")),
71+
filter(get_filter_graph(input_time_base_, codecpar_, filter_description)),
3672
buffer(get_buffer(
37-
codecpar->codec_type,
73+
codecpar_->codec_type,
3874
frames_per_chunk,
3975
num_chunks,
4076
device)) {}
4177

4278
// 0: some kind of success
4379
// <0: Some error happened
4480
int Sink::process_frame(AVFrame* pFrame) {
45-
int ret = filter.add_frame(pFrame);
81+
int ret = filter->add_frame(pFrame);
4682
while (ret >= 0) {
47-
ret = filter.get_frame(frame);
83+
ret = filter->get_frame(frame);
4884
// AVERROR(EAGAIN) means that new input data is required to return new
4985
// output.
5086
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
@@ -56,12 +92,16 @@ int Sink::process_frame(AVFrame* pFrame) {
5692
return ret;
5793
}
5894

95+
std::string Sink::get_filter_description() const {
96+
return filter_description;
97+
}
98+
5999
bool Sink::is_buffer_ready() const {
60100
return buffer->is_ready();
61101
}
62102

63103
void Sink::flush() {
64-
filter.reset();
104+
filter = get_filter_graph(input_time_base, codecpar, filter_description);
65105
buffer->flush();
66106
}
67107

torchaudio/csrc/ffmpeg/sink.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,13 @@ namespace ffmpeg {
1010
class Sink {
1111
AVFramePtr frame;
1212

13+
// Parameters for recreating FilterGraph
14+
AVRational input_time_base;
15+
AVCodecParameters* codecpar;
16+
std::string filter_description;
17+
std::unique_ptr<FilterGraph> filter;
18+
1319
public:
14-
FilterGraph filter;
1520
std::unique_ptr<Buffer> buffer;
1621
Sink(
1722
AVRational input_time_base,
@@ -21,6 +26,7 @@ class Sink {
2126
const c10::optional<std::string>& filter_description,
2227
const torch::Device& device);
2328

29+
std::string get_filter_description() const;
2430
int process_frame(AVFrame* frame);
2531
bool is_buffer_ready() const;
2632

torchaudio/csrc/ffmpeg/stream_processor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ void StreamProcessor::remove_stream(KeyType key) {
5353
// Query methods
5454
////////////////////////////////////////////////////////////////////////////////
5555
std::string StreamProcessor::get_filter_description(KeyType key) const {
56-
return sinks.at(key).filter.get_description();
56+
return sinks.at(key).get_filter_description();
5757
}
5858

5959
bool StreamProcessor::is_buffer_ready() const {

torchaudio/csrc/ffmpeg/stream_reader.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#pragma once
22
#include <torchaudio/csrc/ffmpeg/decoder.h>
3-
#include <torchaudio/csrc/ffmpeg/filter_graph.h>
43
#include <torchaudio/csrc/ffmpeg/stream_processor.h>
54
#include <torchaudio/csrc/ffmpeg/typedefs.h>
65
#include <vector>

0 commit comments

Comments
 (0)