From 7cf43844e3cb8ad96cae71aff54ca86dd2d1cc28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=9F=B3=E5=A4=B4?= Date: Sat, 10 Aug 2024 18:09:26 +0800 Subject: [PATCH] =?UTF-8?q?=E6=8F=92=E4=BB=B6=E4=B8=8B=E8=BD=BD=EF=BC=8C?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=93=88=E5=B8=8C=E6=A0=A1=E9=AA=8C=EF=BC=8C?= =?UTF-8?q?=E5=85=85=E5=88=86=E5=88=A9=E7=94=A8=E6=9C=AC=E5=9C=B0=E7=BC=93?= =?UTF-8?q?=E5=AD=98=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- NewLife.Core/Web/Link.cs | 47 +++++++++++++++++++++++++--------- NewLife.Core/Web/WebClientX.cs | 27 ++++++++++++++++--- Test/Program.cs | 4 ++- 3 files changed, 62 insertions(+), 16 deletions(-) diff --git a/NewLife.Core/Web/Link.cs b/NewLife.Core/Web/Link.cs index 92c1d3fd5..2e4da0951 100644 --- a/NewLife.Core/Web/Link.cs +++ b/NewLife.Core/Web/Link.cs @@ -28,13 +28,16 @@ public class Link /// 时间 public DateTime Time { get; set; } + /// 哈希 + public String? Hash { get; set; } + /// 原始Html public String? Html { get; set; } #endregion #region 方法 - static readonly Regex _regA = new("[^>]*) href=?\"(?<链接>[^>\"]*)?\"(?<其它2>[^>]*)>(?<名称>[^<]*)", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled); - static readonly Regex _regTitle = new("title=(\"?)(?<标题>[^ \']*?)\\1", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled); + static readonly Regex _regA = new("""]* href=?"(?<链接>[^>"]*)?"[^>]*>(?<名称>[^<]*)\s*[^>]*]*>(?<哈希>[^<]*)""", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled); + static readonly Regex _regTitle = new("""title=("?)(?<标题>[^ ']*?)\1""", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled); /// 分析HTML中的链接 /// Html文本 @@ -58,8 +61,10 @@ public static Link[] Parse(String html, String? baseUrl = null, Func 0) link.Name = link.Name[..p]; + // 去掉后缀,特殊处理.tar.gz双后缀 + var name = link.Name; + if (name.EndsWithIgnoreCase(".tar.gz")) + link.Name = name[..^7]; + else + { + var p = name.LastIndexOf('.'); + if (p > 0) link.Name = name[..p]; + } list.Add(link); } @@ -156,9 +167,15 @@ private static Link[] ParseFTP(String html, String? baseUrl, Func idx = link.ParseVersion(); if (idx > 0) link.Title = link.Title[..idx]; - // 去掉后缀 - var p = link.Name.LastIndexOf('.'); - if (p > 0) link.Name = link.Name[..p]; + // 去掉后缀,特殊处理.tar.gz双后缀 + var name = link.Name; + if (name.EndsWithIgnoreCase(".tar.gz")) + link.Name = name[..^7]; + else + { + var p = name.LastIndexOf('.'); + if (p > 0) link.Name = name[..p]; + } list.Add(link); } @@ -178,9 +195,15 @@ public Link Parse(String file) ParseTime(); ParseVersion(); - // 去掉后缀 - var p = Name.LastIndexOf('.'); - if (p > 0) Name = Name[..p]; + // 去掉后缀,特殊处理.tar.gz双后缀 + var name = Name; + if (name.EndsWithIgnoreCase(".tar.gz")) + Name = name[..^7]; + else + { + var p = name.LastIndexOf('.'); + if (p > 0) Name = name[..p]; + } // 时间 if (Time.Year < 2000) diff --git a/NewLife.Core/Web/WebClientX.cs b/NewLife.Core/Web/WebClientX.cs index 918391e45..d81cb4f63 100644 --- a/NewLife.Core/Web/WebClientX.cs +++ b/NewLife.Core/Web/WebClientX.cs @@ -1,6 +1,7 @@ using System.Diagnostics; using System.Net; using System.Net.Http; +using System.Security.Cryptography; using NewLife.Http; using NewLife.Log; @@ -216,13 +217,33 @@ public String DownloadLink(String urls, String name, String destdir) // 已经提前检查过,这里几乎不可能有文件存在 if (File.Exists(file2)) { - // 如果连接名所表示的文件存在,并且带有时间,那么就智能是它啦 + // 如果连接名所表示的文件存在,并且带有时间,那么就只能是它啦 var p = linkName.LastIndexOf("_"); if (p > 0 && (p + 8 + 1 == linkName.Length || p + 14 + 1 == linkName.Length)) { - Log.Info("分析得到文件 {0},目标文件已存在,无需下载 {1}", linkName, link.Url); + Log.Info("分析得到文件:{0},目标文件已存在,无需下载:{1}", linkName, link.Url); return file2; } + // 校验哈希是否一致 + if (!link.Hash.IsNullOrEmpty() && link.Hash.Length == 32) + { + var hash = file2.AsFile().MD5().ToHex(); + if (link.Hash.EqualIgnoreCase(hash)) + { + Log.Info("分析得到文件:{0},目标文件已存在,且MD5哈希一致", linkName, link.Url); + return file2; + } + } + if (!link.Hash.IsNullOrEmpty() && link.Hash.Length == 128) + { + using var fs = file2.AsFile().OpenRead(); + var hash = SHA512.Create().ComputeHash(fs).ToHex(); + if (link.Hash.EqualIgnoreCase(hash)) + { + Log.Info("分析得到文件:{0},目标文件已存在,且SHA512哈希一致", linkName, link.Url); + return file2; + } + } } Log.Info("分析得到文件 {0},准备下载 {1},保存到 {2}", linkName, link.Url, file2); @@ -230,7 +251,7 @@ public String DownloadLink(String urls, String name, String destdir) file2 = file2.EnsureDirectory(); var sw = Stopwatch.StartNew(); - Task.Run(() => DownloadFileAsync(link.Url, file2)).Wait(); + Task.Run(() => DownloadFileAsync(link.Url, file2)).Wait(Timeout); sw.Stop(); if (File.Exists(file2)) diff --git a/Test/Program.cs b/Test/Program.cs index ae77e2c50..aaa6cd7a2 100644 --- a/Test/Program.cs +++ b/Test/Program.cs @@ -100,7 +100,9 @@ private static void Test1() { var client = new WebClientX { Log = XTrace.Log }; //var rs = client.DownloadLink("http://sh03.newlifex.com,http://x.newlifex.com", "ip.gz", "tt/"); - var rs = client.DownloadLink("http://sh03.newlifex.com,http://x.newlifex.com", "leaf.png", "tt/"); + //var rs = client.DownloadLink("http://sh03.newlifex.com,http://x.newlifex.com", "leaf", "tt/"); + var rs = client.DownloadLink("http://sh03.newlifex.com,https://x.newlifex.com/dotNet/8.0.7", "dotnet-runtime-8.0.7-linux-x64", "tt/"); + XTrace.WriteLine(rs); } private static void Test2()