import reimport jsonimport timeimport pymysqlimport requests
URL = 'https://c.y.qq.com/base/fcgi-bin/fcg_global_comment_h5.fcg?'
HEADERS = { 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}
PARAMS = { 'g_tk': '5381', 'jsonpCallback': 'jsoncallback4823183319594757', 'loginUin': '0', 'hostUin': '0', 'format': 'jsonp', 'inCharset': 'utf8', 'outCharset': 'GB2312', 'notice': '0', 'platform': 'yqq', 'needNewCode': '0', 'cid': '205360772', 'reqtype': '2', 'biztype': '1', 'topid': '213910991', 'cmd': '8', 'needmusiccrit': '0', 'pagenum': '0', 'pagesize': '25', 'lasthotcommentid': '', 'callback': 'jsoncallback4823183319594757', 'domain': 'qq.com', 'ct': '24', 'cv': '101010',}
LAST_COMMENT_ID = ''
db = pymysql.connect(host='127.0.0.1', user='root', password='774110919', port=3306, db='QQ_Music', charset='utf8mb4')cursor = db.cursor()
def get_html(url, headers, params=None, tries=3): try: response = requests.get(url=url, headers=headers, params=params) response.raise_for_status() response.encoding = 'utf-8' except requests.HTTPError: print("connect failed") if tries > 0: print("reconnect...") last_url = url get_html(last_url, headers, tries-1) else: print("3 times failure") return None return response
def paras_html(html): data = {} content = json.loads(html[29:-3]) for item in content['comment']['commentlist']: data["nike"] = item.get("nick") data["comment"] = re.sub(r"\\n", " ", item.get("rootcommentcontent")) data["comment"] = (re.sub(r"\n", " ", data["comment"]))[0:255] data["praisenum"] = item.get("praisenum") data["comment_id"] = item.get("commentid") data["time"] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(item.get("time")))) yield data
def to_mysql(data): table = 'comments' keys = ', '.join(data.keys()) values = ', '.join(['%s'] * len(data)) sql = 'INSERT INTO {table}({keys}) VALUES ({values}) ON DUPLICATE KEY UPDATE'.format(table=table, keys=keys, values=values) update = ','.join([" {key} = %s".format(key=key) for key in data]) sql += update try: if cursor.execute(sql, tuple(data.values())*2): print('Successful') except: print('Failed') db.rollback() db.commit()
if __name__ == '__main__': main()
评论