import time import pymysql import pymongo from get_json import getsplit, get_json_str from sql import user, hosp import os import json def save(filename, contents): fd = filename.strip(filename.split('/')[-1]) if not os.path.exists(fd): os.makedirs(fd) with open(filename, 'w', encoding = 'utf-8') as fh: fh.write(contents) fh.close() mysqldb = pymysql.connect(host = '122.112.141.214', user = 'chyy_demo', passwd = 'Waw8DXh3fJJ-', port = 3306, db = 'shchyy_dw', charset = 'utf8') mysqlcur = mysqldb.cursor() # 生成游标对象 mysqlcur.execute(user) # # 创建mongodb数据库连接 # mongodb = pymongo.MongoClient(host='localhost', port=27017) # # 获取数据库 # db = mongodb['cancer'] # 或者db=client.pp,相当于数据库中的use pp; dinfo0 = [] # 定义一个函数来处理字段拆分 dinfo1 = [] # 定义一个函数来处理字段拆分 dinfo2 = [] # 定义一个函数来处理字段拆分 def handle_json_str(str, hos): data = json.loads(str) data['reportContent']['isPrivate'] = 0 data['hospitalBean'] = hos data['progressStatus'] = 0 data['auditStatus'] = 4 data['auditRecords'] = [ { "create_time": "2018-02-08 17:11:11", "status": 1, "auditUserId": "superadmin", "auditUserName": "超级管理员" } ] data['noticeFlag'] = 0 data['dateSource'] = 1 data['isPrivate'] = 0 data['isDeleted'] = 0 data['handler'] = 'tsh' # print(data) return data def get_hospital_bean(patient_id, hospitalId, hosp_list): # print(patient_id) qur2 = mysqldb.cursor() qur2.execute(hosp(patient_id)) hos = qur2.fetchone() # print(get_hos_oid(hosp_list, '上海长海医院')) # print(get_hos_other_info(hosp_list, '上海长海医院', 'shortName')) # print(get_hos_other_info(hosp_list, '上海长海医院', 'hospitalNumber').rjust(6, '0')) return { "hospitalId": get_hos_oid(hosp_list, hos[2]) if hos is not None else '', "hospitalName": hos[2] if hos is not None else '', "shortName": get_hos_other_info(hosp_list, hos[2], 'shortName') if hos is not None else '', "hospitalNumber": get_hos_other_info(hosp_list, hos[2], 'hospitalNumber').rjust(6, '0') if hos is not None else hospitalId, "ownership": get_hos_other_info(hosp_list, hos[2], 'ownership') if hos is not None else '', "mechanismCategory": get_hos_other_info(hosp_list, hos[2], 'mechanismCategory') if hos is not None else '', "hospitalRank": get_hos_other_info(hosp_list, hos[2], 'hospitalRank') if hos is not None else '', "hospitalHierarchy": get_hos_other_info(hosp_list, hos[2], 'hospitalHierarchy') if hos is not None else '', "provinceId": get_hos_other_info(hosp_list, hos[2], 'provinceId') if hos is not None else '', "provinceName": get_hos_other_info(hosp_list, hos[2], 'provinceName') if hos is not None else '', "prefectureId": get_hos_other_info(hosp_list, hos[2], 'prefectureId') if hos is not None else '', "prefectureName": get_hos_other_info(hosp_list, hos[2], 'prefectureName') if hos is not None else '', "roles": [ { "roleId": "5db654b23bce1ecbbfc96dff", "roleCode": "1001", "roleName": "超级管理员" } ] } def get_hos_oid(json_array, name): try: return [obj for obj in json_array if obj['hospitalName'] == name][0]['_id']['$oid'] except IndexError as e: return None def get_hos_other_info(json_array, name, rt_field): try: return [obj for obj in json_array if obj['hospitalName'] == name][0][rt_field] except IndexError as e1: return None except KeyError as e2: return "" if __name__ == '__main__': hosp_list = [] with open('./data/hospital.jsonl', 'r') as f: for i in f.readlines(): hosp_list.append(json.loads(i.split('\n')[0])) count = 0 err_num = 0 json_list = [] for row in mysqlcur.fetchall(): # 循环处理所有记录 count += 1 try: diagnose_info0 = row[33] # 筛查 diagnose_info1 = row[53] # 第一次随访 diagnose_info2 = row[73] # 第二次随访 if diagnose_info0: dinfo0 = getsplit(diagnose_info0) # 定义一个函数来处理字段拆分 if diagnose_info1: dinfo1 = getsplit(diagnose_info1) # 定义一个函数来处理字段拆分 if diagnose_info2: dinfo2 = getsplit(diagnose_info2) # 定义一个函数来处理字段拆分 patient_id = row[0] hospitalId = row[2] hos = get_hospital_bean(patient_id, hospitalId, hosp_list) json_list.append(handle_json_str(get_json_str(row, dinfo0, dinfo1, dinfo2), hos)) # print(handle_json_str(get_json_str(row, dinfo0, dinfo1, dinfo2), hos)) print("\r%d : %s ✓" % (count, row[1]), end = "") # save('/Users/Bureaux/Documents/workspace/PyCharmProjects/mongodb2mysql/data/tb_lab_inspect/' + row[1] + '.json', # json.dumps(handle_json_str(get_json_str(row, dinfo0, dinfo1, dinfo2), hos), ensure_ascii = False)) except Exception as e: print("%d : %s %s Error! msg: %s" % (count, row[0], row[1], str(e))) err_num += 1 save('/Users/Bureaux/Documents/workspace/PyCharmProjects/mongodb2mysql/data/tb_lab_inspect/test.json', json.dumps(json_list, ensure_ascii = False)) print('\nhandled: %d, success: %d, error: %d' % (count, count - err_num, err_num))