diff --git a/Text/Remove-unsupported-HTML-tags/.NET/Remove-unsupported-HTML-tags.slnx b/Text/Remove-unsupported-HTML-tags/.NET/Remove-unsupported-HTML-tags.slnx new file mode 100644 index 00000000..6c8f156f --- /dev/null +++ b/Text/Remove-unsupported-HTML-tags/.NET/Remove-unsupported-HTML-tags.slnx @@ -0,0 +1,3 @@ + + + diff --git a/Text/Remove-unsupported-HTML-tags/.NET/Remove-unsupported-HTML-tags/Program.cs b/Text/Remove-unsupported-HTML-tags/.NET/Remove-unsupported-HTML-tags/Program.cs new file mode 100644 index 00000000..77039157 --- /dev/null +++ b/Text/Remove-unsupported-HTML-tags/.NET/Remove-unsupported-HTML-tags/Program.cs @@ -0,0 +1,49 @@ +using System.Text.RegularExpressions; + + +HashSet allowedTags = new HashSet(StringComparer.OrdinalIgnoreCase) + { + "font", "b", "i", "u", "sub", "br", "p" + }; +string htmlText = "\r\n

This is supported text with inline span.

\r\nBold text and italic text are supported.\r\nThis tag is deprecated and unsupported in modern HTML.\r\nBlink tag is unsupported and ignored by browsers.\r\n\r\n"; +Console.WriteLine("Input Text: " + htmlText + "\n"); +string text1 = FilterSupportedHtml(htmlText); +Console.WriteLine("Sanitize tags: " + text1 + "\n"); + +string FilterSupportedHtml(string input) +{ + if (string.IsNullOrEmpty(input)) + return input; + + // Step 1: Decode HTML entities (< > etc.) + string decodedHtml = System.Net.WebUtility.HtmlDecode(input); + + // Step 2: Remove script tags completely (security) + decodedHtml = Regex.Replace( + decodedHtml, + @"]*>.*?", + string.Empty, + RegexOptions.IgnoreCase | RegexOptions.Singleline + ); + + string output = Regex.Replace( + decodedHtml, + @"]*)?>", + match => + { + string tagName = match.Groups[1].Value; + + // If supported tag → keep as is + if (allowedTags.Contains(tagName)) + { + return match.Value; + } + + // Else → remove tag + return string.Empty; + }, + RegexOptions.IgnoreCase + ); + + return output; +} \ No newline at end of file diff --git a/Text/Remove-unsupported-HTML-tags/.NET/Remove-unsupported-HTML-tags/Remove-unsupported-HTML-tags.csproj b/Text/Remove-unsupported-HTML-tags/.NET/Remove-unsupported-HTML-tags/Remove-unsupported-HTML-tags.csproj new file mode 100644 index 00000000..ac7f5b57 --- /dev/null +++ b/Text/Remove-unsupported-HTML-tags/.NET/Remove-unsupported-HTML-tags/Remove-unsupported-HTML-tags.csproj @@ -0,0 +1,13 @@ + + + + Exe + net10.0 + Remove_unsupported_HTML_tags + enable + enable + + + + +