-
-
Notifications
You must be signed in to change notification settings - Fork 73
/
Copy pathfinancial.py
181 lines (169 loc) · 7.35 KB
/
financial.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#!/usr/bin/env python
# encoding: utf-8
'''
#-------------------------------------------------------------------
# CONFIDENTIAL --- CUSTOM STUDIOS
#-------------------------------------------------------------------
#
# @Project Name : 上市公司股票财务下载助手
#
# @File Name : main.py
#
# @Programmer : tinygeeker
#
# @Start Date : 2022/01/10 13:14
#
# @Last Update : 2022/01/10 13:14
#
#-------------------------------------------------------------------
'''
import sys, re, pymysql, requests
from bs4 import BeautifulSoup
class financial():
'''
This is a main Class, the file contains all documents.
One document contains paragraphs that have several sentences
It loads the original file and converts the original file to new content
Then the new content will be saved by this class
'''
def __init__(self):
#服务器域名
self.server = 'http://quotes.money.163.com/'
self.cwnb = 'http://quotes.money.163.com/hkstock/cwsj_'
#主要财务指标
self.cwzb_dict = {'EPS':'基本每股收益','EPS_DILUTED':'摊薄每股收益','GROSS_MARGIN':'毛利率',
'CAPITAL_ADEQUACY':'资本充足率','LOANS_DEPOSITS':'贷款回报率','ROTA':'总资产收益率',
'ROEQUITY':'净资产收益率','CURRENT_RATIO':'流动比率','QUICK_RATIO':'速动比率',
'ROLOANS':'存贷比','INVENTORY_TURNOVER':'存货周转率','GENERAL_ADMIN_RATIO':'管理费用比率',
'TOTAL_ASSET2TURNOVER':'资产周转率','FINCOSTS_GROSSPROFIT':'财务费用比率','TURNOVER_CASH':'销售现金比率','YEAREND_DATE':'报表日期'}
#利润表
self.lrb_dict = {'TURNOVER':'总营收','OPER_PROFIT':'经营利润','PBT':'除税前利润',
'NET_PROF':'净利润','EPS':'每股基本盈利','DPS':'每股派息',
'INCOME_INTEREST':'利息收益','INCOME_NETTRADING':'交易收益','INCOME_NETFEE':'费用收益','YEAREND_DATE':'报表日期'}
#资产负债表
self.fzb_dict = {
'FIX_ASS':'固定资产','CURR_ASS':'流动资产','CURR_LIAB':'流动负债',
'INVENTORY':'存款','CASH':'现金及银行存结','OTHER_ASS':'其他资产',
'TOTAL_ASS':'总资产','TOTAL_LIAB':'总负债','EQUITY':'股东权益',
'CASH_SHORTTERMFUND':'库存现金及短期资金','DEPOSITS_FROM_CUSTOMER':'客户存款',
'FINANCIALASSET_SALE':'可供出售之证券','LOAN_TO_BANK':'银行同业存款及贷款',
'DERIVATIVES_LIABILITIES':'金融负债','DERIVATIVES_ASSET':'金融资产','YEAREND_DATE':'报表日期'}
#现金流表
self.llb_dict = {
'CF_NCF_OPERACT':'经营活动产生的现金流','CF_INT_REC':'已收利息','CF_INT_PAID':'已付利息',
'CF_INT_REC':'已收股息','CF_DIV_PAID':'已派股息','CF_INV':'投资活动产生现金流',
'CF_FIN_ACT':'融资活动产生现金流','CF_BEG':'期初现金及现金等价物','CF_CHANGE_CSH':'现金及现金等价物净增加额',
'CF_END':'期末现金及现金等价物','CF_EXCH':'汇率变动影响','YEAREND_DATE':'报表日期'}
#总表
self.table_dict = {'cwzb':self.cwzb_dict,'lrb':self.lrb_dict,'fzb':self.fzb_dict,'llb':self.llb_dict}
#请求头
self.headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.8',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.109 Safari/537.36'}
def hello(self):
'''
This is a welcome speech
:return: self
'''
print('*' * 50)
print(' ' * 10 + '上市公司股票财务下载助手')
print(' ' * 5 + '作者: tinygeeker Date: 2022-01-10 13:14')
print(' ' * 5 + '主页: https://tinygeeker.blog.csdn.net')
print('*' * 50)
return self
def get_information(self, url):
'''
get the info of page
:return: str
'''
req = requests.get(url = url, headers = self.headers)
req.encoding = 'utf-8'
html = req.text
page_bf = BeautifulSoup(html, 'lxml')
#股票名称,股票代码
name = page_bf.find_all('span', class_ = 'name')[0].string
# code = page_bf.find_all('span', class_ = 'code')[0].string
# code = re.findall('\d+',code)[0]
#存储各个表名的列表
table_name_list = []
table_date_list = []
each_date_list = []
url_list = []
#表名和表时间
table_name = page_bf.find_all('div', class_ = 'titlebar3')
for each_table_name in table_name:
#表名
table_name_list.append(each_table_name.span.string)
#表时间
for each_table_date in each_table_name.div.find_all('select', id = re.compile('.+1$')):
url_list.append(re.findall('(\w+)1',each_table_date.get('id'))[0])
for each_date in each_table_date.find_all('option'):
each_date_list.append(each_date.string)
table_date_list.append(each_date_list)
each_date_list = []
return name,table_name_list,table_date_list,url_list
def insert_tables(self, name, table_name_list,table_date_list, url_list):
'''
Insert the info to database
'''
#打开数据库连接:host-连接主机地址,port-端口号,user-用户名,passwd-用户密码,db-数据库名,charset-编码
conn = pymysql.connect(
host='127.0.0.1',
port=3306, user='root',
passwd='yourpasswd',
db='financialdata',
charset='utf8'
)
#使用cursor()方法获取操作游标
cursor = conn.cursor()
#插入信息
for i in range(len(table_name_list)):
sys.stdout.write(' [正在下载 ] %s' % table_name_list[i] + '\r')
#获取数据地址
url = self.server + 'hk/service/cwsj_service.php?symbol={}&start={}&end={}&type={}&unit=yuan'.format(code,table_date_list[i][-1],table_date_list[i][0],url_list[i])
req_table = requests.get(url = url, headers = self.headers)
table = req_table.json()
nums = len(table)
value_dict = {}
for num in range(nums):
sys.stdout.write(' [正在下载 %.2f%%] ' % (((num+1) / nums)*100) + '\r')
sys.stdout.flush()
value_dict['股票名'] = name
value_dict['股票代码'] = code
for key, value in table[i].items():
if key in self.table_dict[url_list[i]]:
value_dict[self.table_dict[url_list[i]][key]] = value
sql1 = """
INSERT INTO %s (`股票名`,`股票代码`,`报表日期`) VALUES ('%s','%s','%s')""" % (url_list[i],value_dict['股票名'],value_dict['股票代码'],value_dict['报表日期'])
try:
cursor.execute(sql1)
# 执行sql语句
conn.commit()
except:
# 发生错误时回滚
conn.rollback()
for key, value in value_dict.items():
if key not in ['股票名','股票代码','报表日期']:
sql2 = """
UPDATE %s SET %s='%s' WHERE `股票名`='%s' AND `报表日期`='%s'""" % (url_list[i],key,value,value_dict['股票名'],value_dict['报表日期'])
try:
cursor.execute(sql2)
# 执行sql语句
conn.commit()
except:
# 发生错误时回滚
conn.rollback()
value_dict = {}
print(' [下载完成 ')
# 关闭数据库连接
cursor.close()
conn.close()
def run(self):
code = input('请输入上市股票代码:')
name, table_name_list, table_date_list, url_list = self.get_information(self.cwnb + code + '.html')
print('\n %s:(%s)财务数据下载中!\n' % (name, code))
self.insert_tables(name, table_name_list, table_date_list, url_list)
print('\n %s:(%s)财务数据下载完成!' % (name, code))
if __name__ == '__main__':
financial().hello().run()