Home >  > 如何下载上市公司年报

如何下载上市公司年报

0

一、高速公路公司名单
http://summary.jrj.com.cn/hybk/400128959.shtml

http://data.eastmoney.com/bkzj/421.html

二、下载数据
网址:http://www.sse.com.cn/disclosure/listedinfo/regular/

深市:
http://www.szse.cn/disclosure/listed/fixed/index.html

三、源码下载

#coding:utf-8
import re
import urllib.request
import random
import os


def html_f(code):
	url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vCB_Bulletin/stockid/%s/page_type/ndbg.phtml'%code
	html = urllib.request.urlopen(url).read().decode('gbk')
	print(html)
	return html

def targrt_f(html):
	target = r'&id=[0-9]{7}'
	#target1 = r'&id=[0-9]{6}'
	target_l = re.findall(target,html)# + re.findall(target1,html)
	print(target_l)
	return target_l

def title_f(html):
	title = re.compile(r">(.*?)</a><br>")
	item = re.findall(title,html)
	print(item)
	return item




def download(target,path,name):
	url = 'http://vip.stock.finance.sina.com.cn/corp/view/vCB_AllBulletinDetail.php?stockid=000048%s'%target
	url_html = urllib.request.urlopen(url).read().decode('gbk')
	target_pdf = re.findall("http://file.finance.sina.com.cn/211.154.219.97:9494/MRGG/CNSESZ_STOCK/.*?.PDF",url_html)


	try:
		print(target_pdf[0])
		# local =r'%s.pdf'%name.encode('gbk')
		# x = random.randint(0,999999)
		urllib.request.urlretrieve(target_pdf[0],path+'\\%s.pdf' % name)

	except: print("pass")

codes=['000885','600035','600012','600033','000900','600003','200429','000916','601188','000548','600350']

code2 = ['601518','000755','600106','601107','000828','001965','600269','000429','600368','600548','600020','600377]']

for code in codes:
	mkpath="C:\\Users\\Administrator\\Desktop\\e\\{}".format(code)
	path = os.makedirs(mkpath)
	html = html_f(code)
	target_list = targrt_f(html)
	name = title_f(html)


	if len(target_list) < len(name):
		name[len(target_list)] = name[0:len(target_list)]

	for each,title in zip(target_list,name):
		print(each,title)
		download(each,mkpath,title)

四、生成股票列表

import sys
result=[]
with open('stock.txt','r') as f:
	for line in f:
		result.append(line.strip('\n').split(',')[0])
print(result)
本文暂无标签

发表评论

*

*