diff --git a/vod/subtitle/dfxp_format.c b/vod/subtitle/dfxp_format.c index f3608ca5..d7302f3e 100644 --- a/vod/subtitle/dfxp_format.c +++ b/vod/subtitle/dfxp_format.c @@ -17,6 +17,7 @@ #define DFXP_ELEMENT_P (u_char*)"p" #define DFXP_ELEMENT_BR (u_char*)"br" #define DFXP_ELEMENT_SPAN (u_char*)"span" +#define DFXP_ELEMENT_DIV (u_char*)"div" #define DFXP_ATTR_BEGIN (u_char*)"begin" #define DFXP_ATTR_END (u_char*)"end" @@ -223,46 +224,58 @@ dfxp_parse_timestamp(u_char* ts) return -1; } -static int64_t -dfxp_get_end_time(xmlNode *cur_node) +typedef struct{ + int64_t start_time; + int64_t end_time; +} dfxp_timestamp_t; + +static int +dfxp_parse_timestamp0(xmlNode* node, xmlChar* name, int64_t* ts) { - xmlChar* attr; - int64_t begin; - int64_t dur; - - // prefer the end attribute - attr = dfxp_get_xml_prop(cur_node, DFXP_ATTR_END); - if (attr != NULL) + *ts = -1; + xmlChar* attr = dfxp_get_xml_prop(node, name); + if (attr == NULL) { - return dfxp_parse_timestamp(attr); + return 0; } - - // fall back to dur + start - attr = dfxp_get_xml_prop(cur_node, DFXP_ATTR_DUR); - if (attr == NULL) + *ts = dfxp_parse_timestamp(attr); + return *ts >= 0; +} + +static int +dfxp_extract_time(xmlNode* node, dfxp_timestamp_t* t, int try_end_only) +{ + if (dfxp_parse_timestamp0(node, DFXP_ATTR_END, &t->end_time) && try_end_only) { - return -1; + return 1; // wanted end only } - - dur = dfxp_parse_timestamp(attr); - if (dur < 0) + + if (dfxp_parse_timestamp0(node, DFXP_ATTR_BEGIN, &t->start_time) && !try_end_only) { - return -1; + return 1; // wanted start, end } - - attr = dfxp_get_xml_prop(cur_node, DFXP_ATTR_BEGIN); - if (attr == NULL) + + // need to look at duration, but only if start exists + if (t->start_time < 0 || !dfxp_parse_timestamp0(node, DFXP_ATTR_DUR, &t->end_time)) { - return -1; + return 0; // either dur or start doesn't exist } - - begin = dfxp_parse_timestamp(attr); - if (begin < 0) + t->end_time += t->start_time; + return 1; +} + +static int64_t +dfxp_clamp(int64_t v, int64_t lo, int64_t hi) +{ + if (v < lo) { - return -1; + return lo; } - - return begin + dur; + if (v > hi) + { + return hi; + } + return v; } static uint64_t @@ -274,7 +287,7 @@ dfxp_get_duration(xmlDoc *doc) unsigned node_stack_pos = 0; int nodes_left = DFXP_DURATION_ESTIMATE_NODES; int64_t result = 0; - int64_t cur; + dfxp_timestamp_t ts = {0}; for (cur_node = xmlDocGetRootElement(doc); ; cur_node = cur_node->prev) { @@ -295,6 +308,16 @@ dfxp_get_duration(xmlDoc *doc) continue; } + // timestamp information can be inside a div tag too + if (vod_strcmp(cur_node->name, DFXP_ELEMENT_DIV) == 0) + { + dfxp_extract_time(cur_node, &ts, 1); + if (ts.end_time > result) + { + result = ts.end_time; + } + } + // recurse into non-p nodes if (vod_strcmp(cur_node->name, DFXP_ELEMENT_P) != 0) { @@ -310,11 +333,11 @@ dfxp_get_duration(xmlDoc *doc) continue; } - // get the end time of this p node - cur = dfxp_get_end_time(cur_node); - if (cur > result) + // timestamp information can be inside a p tag + dfxp_extract_time(cur_node, &ts, 1); + if (ts.end_time > result) { - result = cur; + result = ts.end_time; } nodes_left--; @@ -695,13 +718,13 @@ dfxp_parse_frames( uint64_t start; uint64_t end; int64_t last_start_time = 0; - int64_t start_time = 0; - int64_t end_time = 0; - int64_t duration; + + dfxp_timestamp_t t = {0}; + xmlNode* node_stack[DFXP_MAX_STACK_DEPTH]; xmlNode* cur_node; + xmlNode* last_div = NULL; xmlNode temp_node; - xmlChar* attr; unsigned node_stack_pos = 0; vod_str_t text; vod_status_t rc; @@ -754,13 +777,17 @@ dfxp_parse_frames( { if (cur_frame != NULL) { - cur_frame->duration = end_time - start_time; - track->total_frames_duration = end_time - track->first_frame_time_offset; + cur_frame->duration = t.end_time - t.start_time; + track->total_frames_duration = t.end_time - track->first_frame_time_offset; } break; } cur_node = node_stack[--node_stack_pos]; + if (cur_node == last_div) + { + last_div = NULL; + } continue; } @@ -771,106 +798,46 @@ dfxp_parse_frames( if (vod_strcmp(cur_node->name, DFXP_ELEMENT_P) != 0) { - if (cur_node->children == NULL || - node_stack_pos >= vod_array_entries(node_stack)) + if (cur_node->children == NULL || node_stack_pos >= vod_array_entries(node_stack)) { continue; } + // NOTE(as): It's a div, so save it in case the p-tag doesn't have the time inside + if (vod_strcmp(cur_node->name, DFXP_ELEMENT_DIV) == 0) + { + last_div = cur_node; + } + node_stack[node_stack_pos++] = cur_node; temp_node.next = cur_node->children; cur_node = &temp_node; continue; } - // handle p element - attr = dfxp_get_xml_prop(cur_node, DFXP_ATTR_END); - if (attr != NULL) + // handle p element or the last-visited div + if (!dfxp_extract_time(cur_node, &t, 0)) { - // has end time - end_time = dfxp_parse_timestamp(attr); - if (end_time < 0) - { - continue; - } - - if ((uint64_t)end_time < start) - { - track->first_frame_index++; - continue; - } - - attr = dfxp_get_xml_prop(cur_node, DFXP_ATTR_BEGIN); - if (attr == NULL) - { - continue; - } - - start_time = dfxp_parse_timestamp(attr); - if (start_time < 0) + if (last_div == NULL || !dfxp_extract_time(last_div, &t, 0)) { continue; } } - else - { - // no end time - attr = dfxp_get_xml_prop(cur_node, DFXP_ATTR_DUR); - if (attr == NULL) - { - continue; - } - - duration = dfxp_parse_timestamp(attr); - if (duration < 0) - { - continue; - } - - attr = dfxp_get_xml_prop(cur_node, DFXP_ATTR_BEGIN); - if (attr == NULL) - { - continue; - } - - start_time = dfxp_parse_timestamp(attr); - if (start_time < 0) - { - continue; - } - end_time = start_time + duration; - if ((uint64_t)end_time < start) - { + if ((uint64_t)t.end_time < start) + { track->first_frame_index++; continue; - } } - if (start_time >= end_time) + if (t.start_time >= t.end_time) { continue; } // apply clipping - if (start_time >= (int64_t)base_time) - { - start_time -= base_time; - if ((uint64_t)start_time > clip_to) - { - start_time = clip_to; - } - } - else - { - start_time = 0; - } - - end_time -= base_time; - if ((uint64_t)end_time > clip_to) - { - end_time = clip_to; - } + t.start_time = dfxp_clamp(t.start_time - base_time, 0, clip_to); + t.end_time = dfxp_clamp(t.end_time - base_time, 0, clip_to); // get the text rc = dfxp_get_frame_body( @@ -892,16 +859,16 @@ dfxp_parse_frames( // adjust the duration of the previous frame if (cur_frame != NULL) { - cur_frame->duration = start_time - last_start_time; + cur_frame->duration = t.start_time - last_start_time; } else { - track->first_frame_time_offset = start_time; + track->first_frame_time_offset = t.start_time; } - if ((uint64_t)start_time >= end) + if ((uint64_t)t.start_time >= end) { - track->total_frames_duration = start_time - track->first_frame_time_offset; + track->total_frames_duration = t.start_time - track->first_frame_time_offset; break; } @@ -916,11 +883,11 @@ dfxp_parse_frames( cur_frame->offset = (uintptr_t)text.data; cur_frame->size = text.len; - cur_frame->pts_delay = end_time - start_time; + cur_frame->pts_delay = t.end_time - t.start_time; cur_frame->key_frame = 0; track->total_frames_size += cur_frame->size; - last_start_time = start_time; + last_start_time = t.start_time; } track->frame_count = frames.nelts;