import sqlite3
con=sqlite3.connect('posted.db')
print('Opened database successfully')
cursor = con.cursor()
# cur.execute('''CREATE TABLE IF NOT EXISTS herbel
#(ID INT PRIMARY KEY NOT NULL,
#ch_name TEXT NOT NULL,
# age INT NOT NULL,
# address CHAR(50),
# salary REAL
# );''')
cursor.execute('CREATE TABLE IF NOT EXISTS posturl(id INTEGER PRIMARY KEY,url TEXT,url_md5 TEXT)') #插入有逗号的内容,用两个单引号替换一个单引号
print('Table created successfully')
con.commit()
#connection.close()
#查看数据库中是否已经存在url
def get_data_from_db(url_md5):
cursor.execute('''select * from posturl where url_md5='%s' ''' % (url_md5))
results = cursor.fetchall()
if len(results) > 0:
return results
else:
return None
#将采集完成的url写入本地数据posted数据库中,防止重复采集。
def posted_url_to_db(url,url_md5):
# url_md5 = get_md5(url)
sql ="INSERT or IGNORE INTO posturl(url,url_md5) VALUES ('%s','%s')"%(url,url_md5) #最前面的引号要变成双引号,ignore表示忽略重复数据,不过先要设定索引
try:
cursor.execute(sql)
tag_id = cursor.lastrowid
con.commit()
print('url为{}写入数据库成功!md5为{}'.format(url,url_md5))
print("*"*150)
except:
con.rollback()
traceback.print_exc()
print('url为{}写入数据库失败!'.format(url))
tag_id = 0
print("*"*150)
cursor.close()
con.close()