命名空间
using System.Text.RegularExpressions;
1.我们采集到的数据 比如有些日期很不规则 带有中文 比如2013年5月8日 2013年5月 这里提供把这类不规则的日期转化成datatime类型
public DateTime ConvertTime(string a)
{
DateTime dt;
try
{
try
{
dt = Convert.ToDateTime(a);
}
catch
{
dt = ClearDate(a);
}
}
catch
{
string newStr = "";
Regex re = new Regex(@"(\d{2,4})年(\d{1,2})月(\d{1,2})日*|(\d{2,4})年(\d{1,2})月*|(\d{1,2})月(\d{1,2})日*|(\d
{2,4})年*", RegexOptions.None);
string[] lines = re.Split(a);
List<string> dates = new List<string>();
int n;
if (lines.Length >= 4)
{
for (int i = 0; i < 4; i++)
{
if (int.TryParse(lines[i], out n))
{
dates.Add(lines[i].ToString()); //为数字
}
}
}
else
{
for (int i = 0; i < lines.Length; i++)
{
if (int.TryParse(lines[i], out n))
{
dates.Add(lines[i].ToString()); //为数字
}
}
}
if (dates.Count() == 1)
{
newStr = dates[0].ToString()+"-1-1";
}
else
{
for (int s = 0; s < dates.Count(); s++)
{
if (s == dates.Count() - 1)
{
if (dates[s] == "" | dates[s] == "0" | dates[s] == "00")
{
dates[s] = "1";
}
newStr += dates[s].ToString();
}
else
{
if (dates[s] == "" | dates[s] == "0" | dates[s] == "00")
{
dates[s] = "1";
}
newStr += dates[s].ToString() + "-";
}
}
}
dt = Convert.ToDateTime(newStr);
}
return dt;
}
public DateTime ClearDate(string a)
{
Regex r = new Regex(@"([1-9]\d*\-{1}\d*\-{1}\d*)");
//开始匹配
Match m = r.Match(a);
string newStr = "";
while (m.Success)
{
//匹配成功
newStr += m.Groups[0].Value;
//从上一个匹配结束的位置开始下一个匹配
m = m.NextMatch();
}
return Convert.ToDateTime(newStr);
}
直接调用ConvertTime就可完成转换
2.带中文的字符串只保留数字和小数点
public double ConvertNumber(string a)
{
Regex r = new Regex(@"([1-9]\d*\.?\d*)|(0\.\d*[1-9])");
//开始匹配
Match m = r.Match(a);
string newStr = "";
while (m.Success)
{
//匹配成功
newStr += m.Groups[0].Value;
//从上一个匹配结束的位置开始下一个匹配
m = m.NextMatch();
}
if (newStr == "")
{
newStr = "0.0";
}
return Convert.ToDouble(newStr);
}
3.去html标志
public static string NoHTML(string Htmlstring)
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "",
RegexOptions.IgnoreCase);
//删除HTML
Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "",
RegexOptions.IgnoreCase);
Htmlstring.Replace("<", "");
Htmlstring.Replace(">", "");
Htmlstring.Replace("\r\n", "");
// Htmlstring = System.Web.HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
Htmlstring = Htmlstring.Trim();
return Htmlstring;
}