Python获取A股上市公司财报数据
'''
import pandas as pd
import tushare as ts
from time import sleep
'''
下面是参数说明:
ts_code 股票代码
end_date 报告期
report_type 报表类型,1合并报表 2单季合并 3调整单季合并表 4调整合并报表 5调整前合并报表 6母公司报表 7母公司单季报 8 母公司调整单季表 9母公司调整表 10母公司调整前报表 11调整前合并报表 12母公司调整前报表
basic_eps 基本每股收益
diluted_eps 稀释每股收益
total_revenue 营业总收入
sell_exp 销售费用
admin_exp 管理费用
fin_exp 财务费用
total_profit 利润总额
income_tax 所得税费用
n_income 净利润(含少数股东损益)
'''
#这里是获取数据前的定义,方便修改,重复利用
#注册地址
https://tushare.pro/register?reg=229016
token='**这个需要大家去注册一个tushare,免费的,然后从后台获取**'
pro = ts.pro_api(token)
my_code='603858.SH'
fbegdate='20150101'
fenddate='20201231'
lrb_files='ts_code,end_date,report_type,basic_eps,diluted_eps,total_revenue,'
'sell_exp,admin_exp,fin_exp,total_profit,income_tax,n_income'
zcfzb_files='ts_code,end_date,report_type,money_cap,notes_receiv,accounts_receiv,oth_receiv,inventories,total_cur_assets,'
'cip,r_and_d,total_assets,total_cur_liab,total_liab,minority_int'
xjllb_files='ts_code,end_date,report_type,net_profit,finan_exp,c_fr_sale_sg,c_inf_fr_operate_a,c_paid_to_for_empl,'
'st_cash_out_act,n_cashflow_act,n_cashflow_inv_act,n_cash_flows_fnc_act'
cwzb_files='ts_code,end_date,'
'profit_dedt,gross_margin,inv_turn,ar_turn,ca_turn,fa_turn,assets_turn,fcff,'
'ocfps,netprofit_margin,grossprofit_margin,cogs_of_sales,expense_of_sales,roe,'
'roe_yearly,debt_to_assets,fixed_assets,rd_exp'
#下面是利用代码,获取网上数据
while True:
try:
#利润表
lr_df = pro.income(ts_code=my_code, start_date=fbegdate, end_date=fenddate,fields=lrb_files)
sleep(2)
#资产负债表
zcfz_df = pro.balancesheet(ts_code=my_code, start_date=fbegdate, end_date=fenddate,fields=zcfzb_files)
sleep(2)
#现金流量表
xjll_df = pro.cashflow(ts_code=my_code, start_date=fbegdate, end_date=fenddate, fields=xjllb_files)
sleep(2)
#财务主要指标表
cwzb_df = pro.fina_indicator(ts_code=my_code, start_date=fbegdate, end_date=fenddate,fields=cwzb_files)
sleep(2)
break
except Exception as e:
print(e)
continue
# 这里是利用年月日的时间生成,年、季度列;插入指定列,0表示第一列,即生成年度列和季度列
lr_df.insert(1, 'fyear',lr_df['end_date'])
lr_df.insert(2,'fperiod',lr_df['end_date'])
#利用drop_duplicates删除重复行
nlr_df=lr_df.drop_duplicates(subset=['ts_code','end_date','report_type'], keep='first')
nzcfz_df=zcfz_df.drop_duplicates(subset=['ts_code','end_date','report_type'], keep='first')
hz_df1=pd.merge(nlr_df,nzcfz_df,on=['ts_code','end_date'])
hz_df2=pd.merge(hz_df1,nxjll_df,on=['ts_code','end_date'])
hz_df3=pd.merge(hz_df2,ncwzb_df,on=['ts_code','end_date'])
#以下是我用来重命名,改成中文的主要为了方便,所以有点长,其实可以省略的
def renname(df):
i = 0 # 用i的目的是区别,列名重复的问题,确保重命名后不重复,为删除列做好准备
# 获取列名
for column in df.iloc[:,:]:
# 用i的目的是区别,列名重复的问题,确保重命名后不重复,为删除列做好准备
print(column)
i = i + 1
if column == 'ts_code':
newname = '股票代码'
elif column == 'end_date':
newname='时间'
elif column== 'fyear':
newname='年度'
elif column == 'fperiod':
newname='月份'
elif column == 'report_type_x':
newname='利润表类型'
elif column == 'basic_eps':
newname='基本每股收益'
elif column == 'diluted_eps':
newname='稀释每股收益'
elif column == 'total_revenue':
newname='营业总收入'
elif column == 'sell_exp':
newname='销售费用'
elif column == 'admin_exp':
newname='管理费用'
elif column == 'fin_exp':
newname='财务费用'
elif column == 'total_profit':
newname='利润总额'
elif column == 'income_tax':
newname='所得税费用'
elif column == 'n_income':
newname='净利润(含少数股东损益)'
elif column == 'report_type_y':
newname='资产负债表类型'
elif column == 'money_cap':
newname = '货币资金'
elif column == 'notes_receiv':
newname='应收票据'
elif column == 'accounts_receiv':
newname='应收账款'
elif column == 'oth_receiv':
newname='其他应收款'
elif column == 'inventories':
newname='存货'
elif column == 'total_cur_assets':
newname='流动资产合计'
elif column == 'cip':
newname='在建工程'
elif column == 'r_and_d':
newname='研发支出'
elif column == 'total_assets':
newname='资产总计'
elif column == 'total_cur_liab':
newname='流动负债合计'
elif column == 'total_liab':
newname='负债合计'
elif column == 'minority_int':
newname='少数股东权益'
elif column == 'report_type':
newname='现金流量表类型'
elif column == 'net_profit':
newname='净利润'
elif column == 'finan_exp':
newname='(现)财务费用'
elif column == 'c_fr_sale_sg':
newname='销售商品、提供劳务收到的现金'
elif column == 'c_inf_fr_operate_a':
newname='经营活动现金流入小计'
elif column == 'c_paid_to_for_empl':
newname='支付给职工以及为职工支付的现金'
elif column == 'st_cash_out_act':
newname='经营活动现金流出小计'
elif column == 'n_cashflow_act':
newname='经营活动产生的现金流量净额'
elif column == 'n_cashflow_inv_act':
newname='投资活动产生的现金流量净额'
elif column == 'n_cash_flows_fnc_act':
newname='筹资活动产生的现金流量净额'
elif column == 'profit_dedt':
newname='扣除非经常性损益后的净利润'
elif column == 'gross_margin':
newname='毛利'
elif column == 'inv_turn':
newname='存货周转率'
elif column == 'ar_turn':
newname='应收账款周转率'
elif column == 'ca_turn':
newname='流动资产周转率'
elif column == 'fa_turn':
newname='固定资产周转率'
elif column == 'assets_turn':
newname='总资产周转率'
elif column == 'fcff':
newname='企业自由现金流量'
elif column == 'ocfps':
newname='每股经营活动产生的现金流量净额'
elif column == 'netprofit_margin':
newname='销售净利率'
elif column == 'grossprofit_margin':
newname='销售毛利率'
elif column == 'cogs_of_sales':
newname='销售成本率'
elif column == 'expense_of_sales':
newname='销售期间费用率'
elif column == 'roe':
newname='净资产收益率'
elif column == 'roe_yearly':
newname='年化净资产收益率'
elif column == 'debt_to_assets':
newname='资产负债率'
elif column == 'fixed_assets':
newname='固定资产合计'
elif column == 'rd_exp':
newname='研发费用'
else:
newname=column
# df=df.drop([column],axis=1, inplace=True) #根据列名删除该列
df.rename(columns=newname}, inplace=True)
return df
#这里是调用了重命名函数,也可以不用的
renname(hz_df3)
#把结果保存的excel,备用
hz_df3.to_excel('C:/Test/%s_cbhz.xlsx'%my_code)
print('完成')