Merge pull request #6 from Guovin/dev

feat:recent_days
Guovin · Mar 13, 2024 · b4f5843 · b4f5843
2 parents fe41d10 + 4ae3000
commit b4f5843
Show file tree

Hide file tree

Showing 4 changed files with 69 additions and 19 deletions.
diff --git a/README-EN.md b/README-EN.md
@@ -27,11 +27,12 @@ Customize channel menus, automatically fetch and update the latest live source i
    - source_file: Template file, default value: demo.txt
    - final_file: Generated file, default value: result.txt
    - favorite_list: List of focus channel names
-   - favorite_page_num: Number of pages fetched for focus channels, default value: 5
-   - default_page_num: Number of pages fetched for regular channels, default value: 3
+   - favorite_page_num: Number of pages fetched for focus channels, default value: 8
+   - default_page_num: Number of pages fetched for regular channels, default value: 5
    - urls_limit: Number of interfaces, default value: 15
    - response_time_weight: Response time weight value, default value: 0.5
    - resolution_weight: Resolution weight value, default value: 0.5
+   - recent_days: Interface to get the most recent updates (in days), default value: 60
 
    #### .github/workflows/main.yml:
 
@@ -44,6 +45,11 @@ Customize channel menus, automatically fetch and update the latest live source i
 
 ## Update Log
 
+### 2024/3/13
+
+- Added configuration item: recent_days, a filter to get the most recently updated interfaces, default to the last 60 days
+- Adjusted default values: fetch 8 pages for followed channels, 5 pages for regular channels
+
 ### 2024/3/6
 
 - Update file proxy description

diff --git a/README.md b/README.md
@@ -27,11 +27,12 @@
    - source_file：模板文件，默认值：demo.txt
    - final_file：生成文件，默认值：result.txt
    - favorite_list：关注频道名称列表
-   - favorite_page_num：关注频道获取分页数量，默认值：5
-   - default_page_num：常规频道获取分页数量，默认值：3
+   - favorite_page_num：关注频道获取分页数量，默认值：8
+   - default_page_num：常规频道获取分页数量，默认值：5
    - urls_limit：接口数量，默认值：15
    - response_time_weight：响应时间权重值，默认值：0.5
    - resolution_weight：分辨率权重值，默认值：0.5
+   - recent_days：获取最近更新（单位天）的接口，默认值：60
 
    #### .github/workflows/main.yml：
 
@@ -44,6 +45,11 @@
 
 ## 更新日志
 
+### 2024/3/13
+
+- 增加配置项：recent_days，筛选获取最近更新的接口，默认最近 60 天
+- 调整默认值：关注频道获取 8 页，常规频道获取 5 页
+
 ### 2024/3/6
 
 - 更新文件代理说明

diff --git a/config.py b/config.py
@@ -15,8 +15,9 @@
     "湖南卫视",
     "翡翠台",
 ]
-favorite_page_num = 5
-default_page_num = 3
+favorite_page_num = 8
+default_page_num = 5
 urls_limit = 15
 response_time_weight = 0.5
 resolution_weight = 0.5
+recent_days = 60
diff --git a/main.py b/main.py
@@ -11,6 +11,7 @@
 import asyncio
 from bs4 import BeautifulSoup
 import re
+import datetime
 
 
 class GetSource:
@@ -80,7 +81,7 @@ async def getSpeed(self, url):
 
     async def compareSpeedAndResolution(self, infoList):
         response_times = await asyncio.gather(
-            *(self.getSpeed(url) for url, _ in infoList)
+            *(self.getSpeed(url) for url, _, _ in infoList)
         )
         valid_responses = [
             (info, rt)
@@ -114,16 +115,15 @@ def extract_resolution(resolution_str):
             resolution_weight = default_resolution_weight
 
         def combined_key(item):
-            (_, resolution), response_time = item
+            (_, _, resolution), response_time = item
             resolution_value = extract_resolution(resolution) if resolution else 0
             return (
                 -(response_time_weight * response_time[1])
                 + resolution_weight * resolution_value
             )
 
         sorted_res = sorted(valid_responses, key=combined_key)
-        urls = [url for (url, _), _ in sorted_res]
-        return urls
+        return sorted_res
 
     def removeFile(self):
         if os.path.exists(config.final_file):
@@ -139,6 +139,34 @@ def outputTxt(self, cate, channelUrls):
                         f.write(name + "," + url + "\n")
             f.write("\n")
 
+    def filterByDate(data):
+        default_recent_days = 60
+        use_recent_days = getattr(config, "recent_days", 60)
+        if (
+            not isinstance(use_recent_days, int)
+            or use_recent_days <= 0
+            or use_recent_days > 365
+        ):
+            use_recent_days = default_recent_days
+        start_date = datetime.datetime.now() - datetime.timedelta(days=use_recent_days)
+        recent_data = []
+        for (url, date, resolution), response_time in data:
+            if date:
+                date = datetime.datetime.strptime(date, "%d-%m-%Y")
+                if date >= start_date:
+                    recent_data.append(((url, date, resolution), response_time))
+        return recent_data
+
+    def getTotalUrls(self, data):
+        total_urls = []
+        if len(data) > config.urls_limit:
+            total_urls = [
+                url for (url, _, _), _ in self.filterByDate(data)[: config.urls_limit]
+            ]
+        else:
+            total_urls = [url for (url, _, _), _ in data]
+        return list(dict.fromkeys(total_urls))
+
     async def visitPage(self, channelItems):
         self.removeFile()
         for cate, channelObj in channelItems.items():
@@ -175,23 +203,32 @@ async def visitPage(self, channelItems):
                             info_div = (
                                 m3u8_div.find_next_sibling("div") if m3u8_div else None
                             )
-                            resolution = None
+                            date = resolution = None
                             if info_div:
                                 info_text = info_div.text.strip()
-                                resolution = (
-                                    info_text.partition(" ")[2].partition("•")[2]
-                                    if info_text.partition(" ")[2].partition("•")[2]
-                                    else None
+                                date, resolution = (
+                                    (
+                                        info_text.partition(" ")[0]
+                                        if info_text.partition(" ")[0]
+                                        else None
+                                    ),
+                                    (
+                                        info_text.partition(" ")[2].partition("•")[2]
+                                        if info_text.partition(" ")[2].partition("•")[2]
+                                        else None
+                                    ),
                                 )
-                            infoList.append((url, resolution))
+                            infoList.append((url, date, resolution))
                     except Exception as e:
                         print(f"Error on page {page}: {e}")
                         continue
                 try:
-                    urls = list(
-                        dict.fromkeys(await self.compareSpeedAndResolution(infoList))
+                    sorted_data = await self.compareSpeedAndResolution(
+                        infoList
                     )  # Sort by speed and resolution
-                    channelUrls[name] = (urls or channelObj[name])[: config.urls_limit]
+                    channelUrls[name] = (
+                        self.getTotalUrls(sorted_data) or channelObj[name]
+                    )  # Get the total urls with filter by date and limit
                 except Exception as e:
                     print(f"Error on sorting: {e}")
                     continue