-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGeneration 1.py
60 lines (56 loc) · 1.92 KB
/
Generation 1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#新浪新闻
#coding:utf-8
import requests
from bs4 import BeautifulSoup
import re
url = "https://news.sina.com.cn/china/"
wbdata = requests.get(url)
wbdata.encoding = 'utf-8'
soup = BeautifulSoup(wbdata.text,'html.parser')
i=0
def Nofind(title):
if(title.find("降低") != -1):
return -1
if(title.find("对外") != -1):
return -1
if(title.find("整齐") != -1):
return -1
if(title.find("对外") != -1):
return -1
def Select(title):
if(title.find(str("xx")) != -1 and Nofind(title) != -1):
return 1
print("\n-----News in the front page.-----\n\n-----1.news in the left-content.-----\n")
for news in soup.select('.left-content'):
for i in range(7):
title = news.select('a')[i].text
href = news.select('a')[i]['href']
time = re.findall(r"2019\d+",news.select('img')[0]['src'])
if(Select(title) == 1):
print(time,title[2:len(title) - 2], ':', href)
i=0
print('\n-----2.news in the right-content-----\n')
for news in soup.select('.right-content'):
for i in range(10):
title = news.select('a')[i].text
href = news.select('a')[i]['href']
time = re.findall(r"2019-\d+-\d+", href)
if(Select(title) == 1):
print(time,title, ':', href)
i=0
print('\n-----3.news in the news-2 -----','\n')
for news in soup.select('.news-2'):
for i in range(11):
title = news.select('a')[i].text
href = news.select('a')[i]['href']
time = re.findall(r"2019\-\d+-\d+", href)
if(Select(title) == 1):
print(time,title, ':', href)
print("\n-----4.news in the switch-text-----\n")
for news in soup.select('.switch-text'):
for i in range(8):
title = news.select('a')[i].text
href = news.select('a')[i]['href']
time = re.findall(r"2019\d+", news.select('img')[0]['src'])
if(Select(title) == 1):
print(time,title[2:len(title) - 2], ':', href)