diff --git a/src/Tests/Logic/StrippableTextTest.cs b/src/Tests/Logic/StrippableTextTest.cs index 0a8e6987c2..0a9a53fded 100644 --- a/src/Tests/Logic/StrippableTextTest.cs +++ b/src/Tests/Logic/StrippableTextTest.cs @@ -57,9 +57,9 @@ public void StrippableTextItalic3() public void StrippableTextFontDontTouch() { var st = new StrippableText("{MAN} Hi, how are you today!"); - Assert.AreEqual("", st.Pre); + Assert.AreEqual("{", st.Pre); Assert.AreEqual("!", st.Post); - Assert.AreEqual("{MAN} Hi, how are you today", st.StrippedText); + Assert.AreEqual("MAN} Hi, how are you today", st.StrippedText); } [TestMethod] diff --git a/src/libse/Common/HtmlUtil.cs b/src/libse/Common/HtmlUtil.cs index cb9487c96f..b81b560019 100644 --- a/src/libse/Common/HtmlUtil.cs +++ b/src/libse/Common/HtmlUtil.cs @@ -1527,5 +1527,17 @@ public static string GetClosingPair(string tag) /// The character to check. /// True if the character is a start tag symbol; otherwise, false. public static bool IsStartTagSymbol(char ch) => ch == '<' || ch == '{'; + + private static readonly HashSet KnownHtmlTags = new HashSet(StringComparer.OrdinalIgnoreCase) + { + "", "", "" //, "", "", "", "" + }; + + public static bool IsKnownHtmlTag(string tag) + { + return KnownHtmlTags.Contains(tag) || + tag.StartsWith(" 0 && ("<{" + stripStartCharacters).Contains(text[0])) + var len = input.Length; + var l = 0; + for (var r = 0; r < len; r++) { - int beginLength; - do + if (input[r] == '<' || input[r] == '{') + { + l = r; + } + else if ((input[r] == '>' && input[l] == '<') || (input[r] == '}' && input[l] == '{')) { - beginLength = text.Length; + // get the tag from l to r + var tag = input.Substring(l, r - l + 1); - while (text.Length > 0 && stripStartCharacters.Contains(text[0])) + // {man} etc... + if (!HtmlUtil.IsKnownHtmlTag(tag) && !Utilities.IsKnownAssTags(tag)) { - Pre += text[0]; - text = text.Remove(0, 1); - } + // try to find non-visible char + l++; + while (l < r && stripStartCharacters.Contains(input[l]) || input[l] == '{' || input[l] == '<') + { + l++; + } - // ASS/SSA codes like {\an9} - int endIndex = text.IndexOf('}'); - if (endIndex > 0 && text.StartsWith("{\\", StringComparison.Ordinal)) - { - int nextStartIndex = text.IndexOf('{', 2); - if (nextStartIndex == -1 || nextStartIndex > endIndex) + // non-special char found in between l and r + if (input[l] != '<' && input[l] != '{') { - endIndex++; - Pre += text.Substring(0, endIndex); - text = text.Remove(0, endIndex); + break; } } - // tags like or - endIndex = text.IndexOf('>'); - if (text.StartsWith('<') && endIndex >= 2) - { - endIndex++; - Pre += text.Substring(0, endIndex); - text = text.Remove(0, endIndex); - } + l = r + 1; + } + else if (input[l] != '<' && stripStartCharacters.Contains(input[r])) + { + l = r + 1; + } + else if (input[l] != '<' && input[l] != '{') + { + break; } - while (text.Length < beginLength); } - Post = string.Empty; - if (text.Length > 0 && (">" + stripEndCharacters).Contains(text[text.Length - 1])) + var k = len - 1; + for (int j = len - 1; j >= l; j--) { - int beginLength; - do + if (input[j] == '>') { - beginLength = text.Length; - - while (text.Length > 0 && stripEndCharacters.Contains(text[text.Length - 1])) - { - Post = text[text.Length - 1] + Post; - text = text.Substring(0, text.Length - 1); - } - - if (text.EndsWith('>')) + k = j; + } + else if (input[j] == '<' && input[k] == '>') + { + if (!HtmlUtil.IsKnownHtmlTag(input.Substring(j, k - j + 1))) { - // tags - if (text.EndsWith("", StringComparison.OrdinalIgnoreCase) || - text.EndsWith("", StringComparison.OrdinalIgnoreCase) || - text.EndsWith("", StringComparison.OrdinalIgnoreCase)) - { - Post = text.Substring(text.Length - 4) + Post; - text = text.Substring(0, text.Length - 4); - } - - // tag - if (text.EndsWith("", StringComparison.OrdinalIgnoreCase)) + while (j > k && stripEndCharacters.Contains(input[k]) || input[k] == '>' || input[k] == '}') { - Post = text.Substring(text.Length - 7) + Post; - text = text.Substring(0, text.Length - 7); + k--; } - if (text.EndsWith('>')) + if (input[k] != '>') { - var lastIndexOfStart = text.LastIndexOf("<"); - if (lastIndexOfStart >= 0) - { - var tag = text.Substring(lastIndexOfStart); - tag = tag.TrimStart('<').TrimEnd('>'); - if (tag.StartsWith("/c.", StringComparison.Ordinal) && !tag.Contains(' ') && !tag.Contains('\n')) - { - Post = text.Substring(lastIndexOfStart) + Post; - text = text.Substring(0, lastIndexOfStart); - } - } + break; } } + + k = j - 1; + } + else if (input[k] != '>' && stripEndCharacters.Contains(input[j])) + { + k = j - 1; + } + else if (input[k] != '>') + { + break; } - while (text.Length < beginLength); } - StrippedText = text; + StrippedText = input.Substring(l, k - l + 1); + Pre = input.Substring(0, l); + Post = input.Substring(k + 1); } private static string GetAndInsertNextId(List replaceIds, List replaceNames, string name, int idName) diff --git a/src/libse/Common/Utilities.cs b/src/libse/Common/Utilities.cs index aada8425eb..59ba135a33 100644 --- a/src/libse/Common/Utilities.cs +++ b/src/libse/Common/Utilities.cs @@ -3313,5 +3313,10 @@ public static SubtitleFormat GetSubtitleFormatByFriendlyName(object value) { throw new NotImplementedException(); } + + public static bool IsKnownAssTags(string tag) + { + return tag.StartsWith("{\\", StringComparison.Ordinal); + } } }