-
截取HTML C#代码
香水坏坏 发表于2007-7-7 [ASP.NET]。。。。。。。头晕 肯定还没考虑完善 闷。。。
C#代码- protected string TrimHtml(string input, int length)
- {
- string inputHTML = input.Trim();
- StringBuilder outHTML = new StringBuilder();
- int maxLength = length;
- if (maxLength < inputHTML.Length)
- {
- inputHTML = inputHTML.Substring(0, 505);
- string firstMatch = "";
- string lastMatch = "";
- bool isRegexMatch = false;
- string patternMatchHTML = @"(<([a-z0-9]*)([^>]*)>(.*)<\s*/\s*\2>)|(<([a-z0-9]*)([^>]*)/\s*>)”;
- Regex regex = new Regex(patternMatchHTML, RegexOptions.IgnoreCase);
- Match m = regex.Match(inputHTML);
- while(m.Success)
- {
- isRegexMatch = true;
- if (firstMatch==”")
- firstMatch = m.Value;
- lastMatch = m.Value;
- outHTML.Append(m.Value);
- m = m.NextMatch();
- }
- if (isRegexMatch)
- {
- string beforeString = “”;
- if (firstMatch != “”)
- {
- beforeString = inputHTML.Substring(0, inputHTML.IndexOf(firstMatch));
- }
- bool hasMeetingTag = false;
- for (int i = beforeString.Length - 1; i >= 0; i–)
- {
- if (beforeString[i] == ‘>’)
- {
- hasMeetingTag = true;
- }
- else if (beforeString[i] == ‘<')
- {
- hasMeetingTag = false;
- }
- else
- {
- if (!hasMeetingTag)
- outHTML.Insert(0, beforeString[i]);
- }
- }
- string lastString = "";
- if (lastMatch != "")
- {
- lastString = inputHTML.Substring(inputHTML.LastIndexOf(lastMatch) + lastMatch.Length);
- }
- hasMeetingTag = false;
- for (int i = 0; i < lastString.Length; i++)
- {
- if (lastString[i] == '<')
- {
- hasMeetingTag = true;
- }
- else if (lastString[i] == '>‘)
- {
- hasMeetingTag = false;
- }
- else
- {
- if (!hasMeetingTag)
- outHTML.Append(lastString[i]);
- }
- }
- }
- else
- {
- outHTML.Append(inputHTML);
- }
- }
- else
- {
- outHTML.Append(inputHTML);
- }
- return outHTML.ToString().Trim();
- }

