asp.net 截取文章摘要(无损返回html)的代码

发布时间:2020-03-17编辑:脚本学堂
asp.net代码实现截取文章的前几行作为列表摘要,并无损返回HTML代码,避免了截取不全或乱码的问题。

asp.net 截取文章前几行作为列表摘要,无损返回HTML代码。

复制代码 代码示例:

//截取文章摘要,无损返回html代码
//site http://www.jb200.com
public static string LeftH(string str, int l)
{
//为了防止错误
string odstr = str;
bool isHtml = false;//判断截取开头是否是字符串
int maxLen = str.Length;
int n = 0, i = 0, b = 0, e = 0;
string c, c2 = "", Start = "", EndLabel = "", AllLabel = "";
if (l >= str.Length) return str;//截取字段大于字符长度

bool isErr = false;
try
{
while (n < l && i < maxLen)
{
//1
i++;
c = str.Substring(i - 1, 1);

string end2str = "";
try
{
end2str = str.Substring(i - 1, 2);
}
catch
{
end2str = "";
}
if (c == "<" && end2str != "</")
{
isHtml = true;
b = i;//记录一个位置
//5 解决非成对标签

string end2tag = "";
try
{
end2tag = str.Substring(i, 2);
}
catch
{
end2tag = "";
}

if (end2tag == "br")
{
isHtml = false;
}
if (str.Substring(i, 1).ToLower() == "%")
{
isHtml = false;
}
if (str.Substring(i, 1).ToLower() == "?")
{
isHtml = false;
}
else if (end2tag == "hr")
{
isHtml = false;
}
//5end
}
//1end

//2
if (c == "<" && end2str == "</")
{
c2 = str.Substring(i - 1, str.Substring(i - 1).IndexOf(">") + 1);
AllLabel = AllLabel.Substring(c2.Length);
}
//2end

Start = Start + c;//记录当前字符及其前面字符

if (!isHtml)
{
//6 较为准确的计数
try
{
if (str.Substring(i, 2).ToLower() == "br")
{
}
else if (str.Substring(i, 2).ToLower() == "hr")
{
}
else
{
n++;
}
}
catch
{
//如果出错,则返回纯文本(@"<[^>]+>|</[^>]+>");
//odstr = Regex.Replace(odstr, @"<[^>]+>|</[^>]+>", "", RegexOptions.IgnoreCase);
odstr = odstr.Replace("<", "<").Replace(">", ">");
if (l > odstr.Length) l = odstr.Length;
return odstr.Substring(0, l);
}
//6end
}
else
{
if (c == ">")//如果循环到>时,要处理的成对标签中间部分
{
if (isHtml)
{
EndLabel = str.Substring(b, i - b);//获取<到>之间的代码
e = EndLabel.IndexOf(" ");//标签属性都有空格,以空格为分割,获取标签名
if (e > 0)
{
EndLabel = "</" + EndLabel.Substring(0, e) + ">";
}
else
{
EndLabel = "</" + EndLabel;
}
AllLabel = EndLabel + AllLabel;//记录当前结束标签,及所有有关结束标签
}
isHtml = false;//重置属性,从新开始
}
}
//4 当结束时,发现有未闭合的标签,则重新来一次
if (n + 1 >= l)
{
if (InStrN(Start, "<") != InStrN(Start, ">"))
{
n--;
}
}
//4end
}//end while

}
catch
{
isErr = true;
}

string newReString = Start + AllLabel + "...";

//最后再次验证
Regex Rg = new Regex("<.[^>/]+>", RegexOptions.Compiled);
Regex Rg2 = new Regex(@"<s*/[a-z]s*>", RegexOptions.Compiled);

if (inStrLen(newReString, "<") != inStrLen(newReString, ">"))
{
isErr = true;
}
else if (inStrLen(newReString.Replace(""", """), """) % 2 == 1)
{
isErr = true;
}
else if (Rg.Matches(newReString).Count != Rg2.Matches(newReString).Count)
{
isErr = true;
}
if (isErr)
{
odstr = odstr.Replace("<", "<").Replace(">", ">").Replace(""", """).Replace("'", "'");
if (l > odstr.Length)
{ newReString = odstr; }
else
{
try
{
newReString = odstr.Substring(0, l) + "...";
}
catch
{
newReString = odstr;
}
}
}

return newReString;//返回
}