import sys, os import pathlib current_dir = pathlib.Path(__file__).parent.resolve() while "cls_train" != current_dir.name: current_dir = current_dir.parent sys.path.append(current_dir.as_posix()) from cls_utils.log_utils import get_logger from sqlalchemy import create_engine, and_ from sqlalchemy.orm import sessionmaker, scoped_session import sys import os import numpy as np import argparse import threading from tqdm import tqdm import pandas as pd from datetime import datetime import json import re import threading from pathlib import Path import scipy import SimpleITK as sitk from joblib import Parallel, delayed from scipy.ndimage import rotate as scipy_rotate import torch import torchio as tio import torch.nn.functional as F from multiprocessing import Process from torchvision import transforms as T import cupy as cp import radiomics from cupyx.scipy.ndimage import rotate as cupy_rotate from data.domain import DicomStudy, PatientInfo, UserLabel, UserLabelDelineation, DicomSeries from data.data_process_utils.test_sitk_utils import CTSeries, base64_to_list, meta_to_list from PIL import Image from sklearn.preprocessing import StandardScaler from collections import defaultdict logger = get_logger(log_file="/df_lung/ai-project/cls_train/log/data/get_db_data_to_feat.log") # from cls_utils.sitk_utils import CTSeries # from cls_utils.data_utils import crop_ct_data, get_crop_data_padding, get_crop_data_2d # from cls_utils.utils import hu_value_to_uint8, normalize, base64_to_list # from cls_utils.data import save_supplement_data_csv, save_data_to_npy, load_npy_to_data, create_cls_train_csv, \ # load_all_dicom_file, load_json, create_cls_train_all_csv, create_cls_train_csv_3d, \ # replace_label_ids, add_label_ids, create_cls_train_last_3d MYSQL_SERVER = 'mysql+pymysql://lung:lung1qaz2wsx@127.0.0.1:3306/ct_file?charset=utf8' """ 连接数据库,返回一个session """ def conect_mysql(): engine = create_engine(MYSQL_SERVER, pool_recycle=3600) #onnection = engine.connect() db_session = sessionmaker(bind=engine) session = scoped_session(db_session) return session def get_cts(dicom_path=None): cts = CTSeries() cts.load_dicoms(dicom_path) return cts def generate_node_all_label_id_df(node_time=None): ''' 查询条件: 1、系统显示 dicom_file_study.status != 5 patient_info.status != 1 2、标注状态是正常 user_label.status != 1 关联查询 user_label.study_id = dicom_file_study.id # user_label.pid == Null dicom_file_study.patient_info_id = patient_info.id 查询步骤: 1、先查询user_label所有数据,再过滤 2、根据dicom_file_study、patient_info 筛选数据 3、筛选条件: user_label.study_id = dicom_file_study.id dicom_file_study.patient_info_id = patient_info.id dicom_file_study.status != 5 patient_info.status != 1 user_label.status != 1 user_label.deleted_time == None user_label.node_time == node_time 返回值: label_ids: 所有label_id ''' if node_time is None: return None session = conect_mysql() logger.info(f"start query") query = session.query( UserLabel.node_time, UserLabel.id, PatientInfo.patient_id, UserLabel.study_id, UserLabel.series_id, DicomStudy.study_uid, DicomStudy.folder_name, DicomSeries.series_instance_uid ).join( DicomStudy, UserLabel.study_id == DicomStudy.id ).join( PatientInfo, DicomStudy.patient_info_id == PatientInfo.id ).join( DicomSeries, UserLabel.series_id == DicomSeries.id ).filter( and_( DicomStudy.status != 5, PatientInfo.status != 1, UserLabel.status != 1, UserLabel.deleted_time == None, UserLabel.node_time == node_time ) ) result = query.all() node_times = [row[0] for row in result] label_ids = [row[1] for row in result] patient_ids = [row[2] for row in result] study_ids = [row[3] for row in result] series_ids = [row[4] for row in result] study_uids = [row[5] for row in result] folder_names = [row[6] for row in result] series_instance_uids = [row[7] for row in result] session.close() df = pd.DataFrame({'node_time': node_times, 'label_id': label_ids, 'patient_id': patient_ids, 'study_id': study_ids, 'series_id': series_ids, 'study_uid': study_uids, 'folder_name': folder_names, 'series_instance_uid': series_instance_uids}) df["patient_id"] = df["patient_id"].astype(str) df["study_id"] = df["study_id"].astype(str) df["series_id"] = df["series_id"].astype(str) df["study_uid"] = df["study_uid"].astype(str) df["folder_name"] = df["folder_name"].astype(str) df["series_instance_uid"] = df["series_instance_uid"].astype(str) return df def select_single_label_id(label_id=None): session = conect_mysql() label = session.query(UserLabel).filter( and_(UserLabel.id == label_id)).first() if label is None: return None, f"{label_id}, 标注数据不存在" node_time = label.node_time bundle = session.query(DicomSeries).filter( and_(DicomSeries.id == label.series_id)).first() if bundle is None: return None, f"{label_id}, 关联的dicom数据不存在" delineations = session.query(UserLabelDelineation).filter( and_(UserLabelDelineation.label_id == label_id, UserLabelDelineation.status == 0)).order_by( UserLabelDelineation.z_index.asc()).all() session.close() return (label, bundle, delineations), "success" def generate_single_series_raw_data_3d_by_label_id(label_id=None, dicom_folder=""): (label, bundle, delineations), result = select_single_label_id(label_id=label_id) patient_id = bundle.patient_id series_instance_uid = bundle.series_instance_uid data, selected_box, node_time, raw_cood_3d, z_index_list_3d = None, None, None, None, None if result != "success": return data, selected_box, node_time, patient_id, series_instance_uid, raw_cood_3d, z_index_list_3d dicom_path = f"{dicom_folder}/{patient_id}-{series_instance_uid}" cts = get_cts(dicom_path) data = cts.get_raw_data() spacing = cts.get_raw_spacing() mask = np.zeros((len(data), len(data[1]), len(data[1][1])), np.uint8) node_time = label.node_time z_count = 0 for delineation in delineations: if (delineation.contour is None or len(delineation.contour) == 0) and delineation.meta is None: continue indexlist, indexs, img_np = base64_to_list(delineation.contour) if delineation.contour is None and delineation.meta: indexlist, indexs, img_np = meta_to_list(delineation.meta, mask[0].copy()) mask[delineation.z_index] = img_np z_count += 1 if mask is not None and np.sum(mask == 1) > 0: coords = np.asarray(np.where(mask == 1)) selected_box = np.zeros((3, 2), np.float32) selected_box[:, 0] = coords.min(axis=1) selected_box[:, 1] = coords.max(axis=1) + 1 select_box_z_count = selected_box[0][1] - selected_box[0][0] logger.info(f"selected_box: {selected_box}, select_box_z_count: {select_box_z_count}, z_count: {z_count}, data: {data.shape}, cood: {mask.shape}") raw_cood_3d = mask z_index_list_3d = list(range(int(selected_box[0][0]), int(selected_box[0][0]) + z_count)) return data, selected_box, node_time, patient_id, series_instance_uid, raw_cood_3d, z_index_list_3d def generate_raw_data_3d_npy_data_by_single_label_id(label_id=None, dicom_folder="", save_path="", is_save_flag=False): patient_id, series_instance_uid, raw_data_3d_npy_file, raw_cood_3d_npy_file, z_index_list_3d = None, None, None, None, None if label_id is None: return patient_id, series_instance_uid, raw_data_3d_npy_file, raw_cood_3d_npy_file, z_index_list_3d data, selected_box, node_time, patient_id, series_instance_uid, raw_cood_3d, z_index_list_3d = generate_single_series_raw_data_3d_by_label_id(label_id=label_id, dicom_folder=dicom_folder) if not os.path.exists(save_path): Path(save_path).mkdir(parents=True, exist_ok=True) raw_data_3d_npy_file = f"{save_path}/{node_time}_{label_id}_raw_data_3d.npy" raw_cood_3d_npy_file = f"{save_path}/{node_time}_{label_id}_raw_cood_3d.npy" if is_save_flag: np.save(raw_data_3d_npy_file, data) np.save(raw_cood_3d_npy_file, raw_cood_3d) logger.info(f"save 3d npy data -> {raw_data_3d_npy_file}\n{raw_cood_3d_npy_file}") return patient_id, series_instance_uid, raw_data_3d_npy_file, raw_cood_3d_npy_file, z_index_list_3d def generate_raw_data_3d_npy_data_by_all_label_id_df(csv_file=None, node_raw_data_3d_npy_file=None, dicom_folder="/opt/lung/ai", save_path="", is_save_flag=False): node_df = pd.read_csv(csv_file) data_3d_node_list = [] data_3d_label_id_list = [] raw_data_3d_npy_file_list = [] raw_cood_3d_npy_file_list = [] data_3d_z_index_list = [] data_3d_patient_id_list = [] data_3d_series_instance_uid_list = [] for idx in tqdm(range(len(node_df))): node_time = node_df.loc[idx, 'node_time'] label_id = node_df.loc[idx, 'label_id'] idx_patient_id = node_df.loc[idx, 'patient_id'] idx_series_instance_uid = node_df.loc[idx, 'series_instance_uid'] patient_id, series_instance_uid, raw_data_3d_npy_file, raw_cood_3d_npy_file, z_index_list_3d = generate_raw_data_3d_npy_data_by_single_label_id( label_id=label_id, dicom_folder=dicom_folder, save_path=save_path, is_save_flag=is_save_flag ) if patient_id is None and series_instance_uid is None: raise Exception(f"node_df, idx: {idx}, is None, {node_df[idx:idx+1]}") data_3d_node_list += [node_time] data_3d_label_id_list += [label_id] raw_data_3d_npy_file_list += [raw_data_3d_npy_file] raw_cood_3d_npy_file_list += [raw_cood_3d_npy_file] data_3d_z_index_list += [[z_index_list_3d]] data_3d_patient_id_list += [patient_id] data_3d_series_instance_uid_list += [series_instance_uid] raw_data_3d_df = pd.DataFrame({ 'node': data_3d_node_list, 'label_id': data_3d_label_id_list, 'patient_id': data_3d_patient_id_list, 'raw_data_3d_npy_file': raw_data_3d_npy_file_list, 'raw_cood_3d_npy_file': raw_cood_3d_npy_file_list, 'z_index': data_3d_z_index_list, 'series_instance_uid': data_3d_series_instance_uid_list }) if is_save_flag: raw_data_3d_df.to_csv(node_raw_data_3d_npy_file, index=False, encoding="utf-8") logger.info(f"generate raw data_3d, save to {node_raw_data_3d_npy_file}") return def get_feature_excutor(): config = { 'binWidth': 25, 'resampledPixelSpacing': None, 'normalize': True, 'normalizeScale': 1.0, 'featureClass': { 'shape': True, 'firstorder': True, 'glcm': True, 'glrlm': True, 'glszm': True, 'ngtdm': True, 'gldm': True } } extractor = radiomics.featureextractor.RadiomicsFeatureExtractor(**config) extractor.settings['normalize'] = True extractor.settings['normalizeScale'] = 1.0 return extractor def process_features(args): csv_file = args[0] node_feature_file = args[1] is_save_flag = args[2] df = pd.read_csv(csv_file, header=0, encoding = 'utf-8') from collections import OrderedDict def parse_feat(feature): result = OrderedDict() for idx_feature, idx_score in feature.items(): if isinstance(idx_score, np.ndarray): idx_score = idx_score.tolist() elif isinstance(idx_score, (np.float16, np.float32, np.float64)): idx_score = float(idx_score) elif isinstance(idx_score, (np.int32, np.int64)): idx_score = int(idx_score) result[idx_feature] = idx_score return result node_list = [] label_id_list = [] patient_id_list = [] feature_list = [] extractor = get_feature_excutor() for idx in range(len(df)): idx_node = df.loc[idx, 'node'] idx_label_id = df.loc[idx, 'label_id'] idx_patient_id = df.loc[idx, 'patient_id'] idx_data_3d_npy_file = df.loc[idx, 'raw_data_3d_npy_file'] idx_cood_3d_npy_file = df.loc[idx, 'raw_cood_3d_npy_file'] idx_data_3d = np.load(idx_data_3d_npy_file) idx_cood = np.load(idx_cood_3d_npy_file) idx_data_3d_sitk = sitk.GetImageFromArray(idx_data_3d) idx_cood_sitk = sitk.GetImageFromArray(idx_cood.astype(np.uint8)) idx_feature = extractor.execute(idx_data_3d_sitk, idx_cood_sitk) idx_feature = parse_feat(idx_feature) idx_feature = json.dumps(idx_feature) node_list += [idx_node] label_id_list += [idx_label_id] patient_id_list += [idx_patient_id] feature_list += [idx_feature] node_feature_df = pd.DataFrame( { 'node': node_list, 'label_id': label_id_list, 'patient_id': patient_id_list, 'feature': feature_list } ) if is_save_flag: node_feature_df.to_csv(node_feature_file, index=False, encoding="utf-8") logger.info(f"process node feature, save to node_feature_file: {node_feature_file}") def get_data_3d_feature(node_csv_file_list=None, is_save_flag=False): process_args_list = [] for node_time, csv_file in node_csv_file_list: idx_node_feature_file = f"{csv_file.replace('.csv', f'_feature')}.csv" process_args_list.append( [ csv_file, idx_node_feature_file, is_save_flag ] ) process_count = len(process_args_list) process_list = [] for idx in range(process_count): process_args = process_args_list[idx] idx_process = Process(target=process_features, args=(process_args,)) idx_process.start() process_list.append(idx_process) for idx_process in process_list: idx_process.join() return def get_node_time_all_label_ids_df(node_time=None, csv_data_dir=""): if node_time is None: return None df = generate_node_all_label_id_df(node_time=node_time) task_info = datetime.now().strftime("%Y%m%d_%H%M%S") csv_file = f"{csv_data_dir}/{node_time}/{node_time}_{task_info}_rotate_10.csv" Path(csv_file).parent.mkdir(parents=True, exist_ok=True) df.to_csv(csv_file, index=False, encoding="utf-8") logger.info(f"save csv data -> {csv_file}") return csv_file def generate_feature_train_npy_csv_file(node_csv_file_list=None, csv_data_dir="", train_csv_dir="", train_ratio=0.8, val_ratio=0.1, test_ratio=0.1, is_pad_df=True, is_save_csv=False, seed=100004): node_csv_file_dict = { idx[0]: idx[1] for idx in node_csv_file_list } node_list = list(node_csv_file_dict.keys()) from sklearn.model_selection import train_test_split def pad_df(df, max_len): if len(df) == max_len: return df elif len(df) > max_len: return df[:max_len] else: pad_df_list = [df] lens = len(df) while lens < max_len: pad_df_list.append(df) lens += len(df) pad_df = pd.concat(pad_df_list, ignore_index=True) return pad_df[:max_len] def get_expand_feature(df=None, is_norm_flag=False): expanded_data = defaultdict(list) for index, row in df.iterrows(): try: feature_dict = json.loads(row['feature']) except json.JSONDecodeError: print(f"Failed to decode JSON at index {index}") continue for key, value in feature_dict.items(): if isinstance(value, list): for i, item in enumerate(value): col_name = f"{key}_{i}" expanded_data[col_name].append(item) else: expanded_data[key].append(value) expanded_df = pd.DataFrame(expanded_data) expanded_df.index = df.index df = pd.concat([df.drop(columns=['feature']), expanded_df], axis=1) filter_col_list = [ 'node', 'label_id', 'patient_id', 'class', 'diagnostics_Versions_PyRadiomics', 'diagnostics_Versions_Numpy', 'diagnostics_Versions_SimpleITK', 'diagnostics_Versions_PyWavelet', 'diagnostics_Versions_Python', 'diagnostics_Configuration_Settings', 'diagnostics_Configuration_EnabledImageTypes', 'diagnostics_Image-original_Hash', 'diagnostics_Image-original_Dimensionality', 'diagnostics_Mask-original_Hash', ] feature_list = df.columns.tolist() filter_col_list = list(set(feature_list)-set(filter_col_list)) if is_norm_flag: scaler = StandardScaler() selected_features = df[filter_col_list] normalized_features = scaler.fit_transform(selected_features) normalized_features_df = pd.DataFrame(normalized_features, columns=[f"{col}_zscore" for col in selected_features.columns]) df = pd.concat([df, normalized_features_df], axis=1) return df # 遍历正负node, 返回对应的训练数据 train_csv_file_list = [] print(f"node_list: {node_list}") for idx_pos_node in node_list: for idx_neg_node in node_list: if idx_pos_node == idx_neg_node: continue idx_train_file = f"{idx_neg_node}_{idx_pos_node}_data_3d_feature_train.csv" idx_val_file = f"{idx_neg_node}_{idx_pos_node}_data_3d_feature_val.csv" idx_test_file = f"{idx_neg_node}_{idx_pos_node}_data_3d_feature_test.csv" idx_train_file = os.path.join(train_csv_dir, idx_train_file) idx_val_file = os.path.join(train_csv_dir, idx_val_file) idx_test_file = os.path.join(train_csv_dir, idx_test_file) idx_pos_df = pd.read_csv(node_csv_file_dict[idx_pos_node]) idx_neg_df = pd.read_csv(node_csv_file_dict[idx_neg_node]) idx_pos_df['class'] = 1 idx_neg_df['class'] = 0 lens_pos = len(idx_pos_df) lens_neg = len(idx_neg_df) if lens_pos < 3 or lens_neg < 3: logger.info(f"{idx_pos_node}_{idx_neg_node} count < 3, skip") continue idx_df = pd.concat([idx_pos_df, idx_neg_df], ignore_index=True) idx_df = get_expand_feature(df=idx_df, is_norm_flag=True) idx_pos_df = idx_df[idx_df['class']==1] idx_neg_df = idx_df[idx_df['class']==0] idx_pos_df = idx_pos_df.reset_index(drop=True) idx_neg_df = idx_neg_df.reset_index(drop=True) idx_pos_train_df, idx_pos_test_val_df = train_test_split(idx_pos_df, test_size=1-train_ratio, random_state=seed) idx_pos_val_df, idx_pos_test_df = train_test_split(idx_pos_test_val_df, test_size=test_ratio/(val_ratio+test_ratio), random_state=seed) idx_neg_train_df, idx_neg_test_val_df = train_test_split(idx_neg_df, test_size=1-train_ratio, random_state=seed) idx_neg_val_df, idx_neg_test_df = train_test_split(idx_neg_test_val_df, test_size=test_ratio/(val_ratio+test_ratio), random_state=seed) lens_pos_train = len(idx_pos_train_df) lens_neg_train = len(idx_neg_train_df) logger.info(f"generate_feature_train, before pos: {idx_pos_node}, {lens_pos_train}, {len(idx_pos_val_df)}, {len(idx_pos_test_df)}, neg: {idx_neg_node}, {lens_neg_train}, {len(idx_neg_val_df)}, {len(idx_neg_test_df)}") if is_pad_df: if lens_pos_train < lens_neg_train: idx_pos_train_df = pad_df(idx_pos_train_df, lens_neg_train) elif lens_pos_train > lens_neg_train: idx_neg_train_df = pad_df(idx_neg_train_df, lens_pos_train) logger.info(f"generate_feature_train, after pos: {idx_pos_node}, {len(idx_pos_train_df)}, {len(idx_pos_val_df)}, {len(idx_pos_test_df)}, neg: {idx_neg_node}, {len(idx_neg_train_df)}, {len(idx_neg_val_df)}, {len(idx_neg_test_df)}") idx_train_df = pd.concat([idx_pos_train_df, idx_neg_train_df], ignore_index=True) idx_val_df = pd.concat([idx_pos_val_df, idx_neg_val_df], ignore_index=True) idx_test_df = pd.concat([idx_pos_test_df, idx_neg_test_df], ignore_index=True) if is_save_csv: idx_train_df.to_csv(idx_train_file, index=False, encoding='utf-8') idx_val_df.to_csv(idx_val_file, index=False, encoding='utf-8') idx_test_df.to_csv(idx_test_file, index=False, encoding='utf-8') train_csv_file_list.append((idx_train_file, idx_val_file, idx_test_file)) logger.info(f"generate_feature_train, save to : {idx_train_file}\n{idx_val_file}\n{idx_test_file}") print(f"训练集信息: {train_csv_file_list}") return csv_data_dir = "/df_lung/cls_train_data/csv_data" npy_data_dir = "/df_lung/cls_train_data/npy_data" def get_train_data_info_csv(node_time_list=[]): for node_time in node_time_list: csv_file = get_node_time_all_label_ids_df(node_time=node_time, csv_data_dir=csv_data_dir) logger.info(f"{node_time}: {csv_file}\n") def process_npy(args): generate_raw_data_3d_npy_data_by_all_label_id_df( csv_file=args[0], node_raw_data_3d_npy_file=args[1], dicom_folder=args[2], save_path=args[3], is_save_flag=args[4] ) logger.info(f"process_raw_data_3d_npy finished: csv_file: {args[0]}") def get_npy_data(node_csv_file_list=[], is_save_flag=False): if False in [False for _ in node_csv_file_list if f"{_[0]}_" not in _[1]]: raise ValueError(f"node_csv_file_list: {node_csv_file_list}") process_args_list = [] for node_time, csv_file in node_csv_file_list: save_path = f"{npy_data_dir}/{node_time}" Path(save_path).mkdir(parents=True, exist_ok=True) idx_node_raw_data_3d_npy_file = f"{csv_file.replace('.csv', f'_feat_raw_data_3d_npy')}.csv" idx_node_raw_data_3d_npy_file = os.path.join(save_path, idx_node_raw_data_3d_npy_file) process_args_list.append( [ csv_file, idx_node_raw_data_3d_npy_file, "/opt/lung/ai", save_path, is_save_flag ] ) process_count = len(process_args_list) process_list = [] for idx in range(process_count): process_args = process_args_list[idx] idx_process = Process(target=process_npy, args=(process_args,)) idx_process.start() process_list.append(idx_process) for idx_process in process_list: idx_process.join() return # # 生成csv数据 # node_time_list = [2046, 2047, 2048, 2060, 2061, 2062, 3001, 4001, 5001, 6001, 1016] # get_train_data_info_csv(node_time_list=node_time_list) ''' 2021: /df_lung/cls_train_data/csv_data/2021/2021_20241204_094025_rotate_10.csv 2031: /df_lung/cls_train_data/csv_data/2031/2031_20241204_094025_rotate_10.csv 2041: /df_lung/cls_train_data/csv_data/2041/2041_20241204_094026_rotate_10.csv 1010: /df_lung/cls_train_data/csv_data/1010/1010_20241204_093726_rotate_10.csv 1020: /df_lung/cls_train_data/csv_data/1020/1020_20241204_093726_rotate_10.csv 2011: /df_lung/cls_train_data/csv_data/2011/2011_20241204_093726_rotate_10.csv 2046: /df_lung/cls_train_data/csv_data/2046/2046_20241211_155642_rotate_10.csv 2047: /df_lung/cls_train_data/csv_data/2047/2047_20241211_155642_rotate_10.csv 2048: /df_lung/cls_train_data/csv_data/2048/2048_20241211_155642_rotate_10.csv 2060: /df_lung/cls_train_data/csv_data/2060/2060_20241211_155643_rotate_10.csv 2061: /df_lung/cls_train_data/csv_data/2061/2061_20241211_155643_rotate_10.csv 2062: /df_lung/cls_train_data/csv_data/2062/2062_20241211_155643_rotate_10.csv 3001: /df_lung/cls_train_data/csv_data/3001/3001_20241211_155643_rotate_10.csv 4001: /df_lung/cls_train_data/csv_data/4001/4001_20241211_155643_rotate_10.csv 5001: /df_lung/cls_train_data/csv_data/5001/5001_20241211_155643_rotate_10.csv 6001: /df_lung/cls_train_data/csv_data/6001/6001_20241211_155643_rotate_10.csv 1016: /df_lung/cls_train_data/csv_data/1016/1016_20241211_155643_rotate_10.csv ''' # # 生成npy数据 # node_csv_file_list = [ # (2021, "/df_lung/cls_train_data/csv_data/2021/2021_20241204_094025_rotate_10.csv"), # (2031, "/df_lung/cls_train_data/csv_data/2031/2031_20241204_094025_rotate_10.csv"), # (2041, "/df_lung/cls_train_data/csv_data/2041/2041_20241204_094026_rotate_10.csv"), # (1010, "/df_lung/cls_train_data/csv_data/1010/1010_20241204_093726_rotate_10.csv"), # (1020, "/df_lung/cls_train_data/csv_data/1020/1020_20241204_093726_rotate_10.csv"), # (2011, "/df_lung/cls_train_data/csv_data/2011/2011_20241204_093726_rotate_10.csv"), # (2046, "/df_lung/cls_train_data/csv_data/2046/2046_20241211_155642_rotate_10.csv"), # (2047, "/df_lung/cls_train_data/csv_data/2047/2047_20241211_155642_rotate_10.csv"), # (2048, "/df_lung/cls_train_data/csv_data/2048/2048_20241211_155642_rotate_10.csv"), # (2060, "/df_lung/cls_train_data/csv_data/2060/2060_20241211_155643_rotate_10.csv"), # (2061, "/df_lung/cls_train_data/csv_data/2061/2061_20241211_155643_rotate_10.csv"), # (2062, "/df_lung/cls_train_data/csv_data/2062/2062_20241211_155643_rotate_10.csv"), # (3001, "/df_lung/cls_train_data/csv_data/3001/3001_20241211_155643_rotate_10.csv"), # (4001, "/df_lung/cls_train_data/csv_data/4001/4001_20241211_155643_rotate_10.csv"), # (5001, "/df_lung/cls_train_data/csv_data/5001/5001_20241211_155643_rotate_10.csv"), # (6001, "/df_lung/cls_train_data/csv_data/6001/6001_20241211_155643_rotate_10.csv"), # (1016, "/df_lung/cls_train_data/csv_data/1016/1016_20241211_155643_rotate_10.csv") # ] # is_save_flag = True # get_npy_data(node_csv_file_list=node_csv_file_list, is_save_flag=is_save_flag) ''' ''' # # 影像组学特征 # node_csv_file_list = [ # (2021, "/df_lung/cls_train_data/csv_data/2021/2021_20241204_094025_rotate_10_feat_raw_data_3d_npy.csv"), # (2031, "/df_lung/cls_train_data/csv_data/2031/2031_20241204_094025_rotate_10_feat_raw_data_3d_npy.csv"), # (2041, "/df_lung/cls_train_data/csv_data/2041/2041_20241204_094026_rotate_10_feat_raw_data_3d_npy.csv"), # (1010, "/df_lung/cls_train_data/csv_data/1010/1010_20241204_093726_rotate_10_feat_raw_data_3d_npy.csv"), # (1020, "/df_lung/cls_train_data/csv_data/1020/1020_20241204_093726_rotate_10_feat_raw_data_3d_npy.csv"), # (2011, "/df_lung/cls_train_data/csv_data/2011/2011_20241204_093726_rotate_10_feat_raw_data_3d_npy.csv"), # (2046, "/df_lung/cls_train_data/csv_data/2046/2046_20241211_155642_rotate_10_feat_raw_data_3d_npy.csv"), # (2047, "/df_lung/cls_train_data/csv_data/2047/2047_20241211_155642_rotate_10_feat_raw_data_3d_npy.csv"), # (2048, "/df_lung/cls_train_data/csv_data/2048/2048_20241211_155642_rotate_10_feat_raw_data_3d_npy.csv"), # (2060, "/df_lung/cls_train_data/csv_data/2060/2060_20241211_155643_rotate_10_feat_raw_data_3d_npy.csv"), # (2061, "/df_lung/cls_train_data/csv_data/2061/2061_20241211_155643_rotate_10_feat_raw_data_3d_npy.csv"), # (2062, "/df_lung/cls_train_data/csv_data/2062/2062_20241211_155643_rotate_10_feat_raw_data_3d_npy.csv"), # (3001, "/df_lung/cls_train_data/csv_data/3001/3001_20241211_155643_rotate_10_feat_raw_data_3d_npy.csv"), # (4001, "/df_lung/cls_train_data/csv_data/4001/4001_20241211_155643_rotate_10_feat_raw_data_3d_npy.csv"), # (5001, "/df_lung/cls_train_data/csv_data/5001/5001_20241211_155643_rotate_10_feat_raw_data_3d_npy.csv"), # (6001, "/df_lung/cls_train_data/csv_data/6001/6001_20241211_155643_rotate_10_feat_raw_data_3d_npy.csv"), # (1016, "/df_lung/cls_train_data/csv_data/1016/1016_20241211_155643_rotate_10_feat_raw_data_3d_npy.csv") # ] # is_save_flag = True # get_data_3d_feature(node_csv_file_list=node_csv_file_list, is_save_flag=is_save_flag) # 生成训练数据 node_csv_file_list = [ (2021, "/df_lung/cls_train_data/csv_data/2021/2021_20241204_094025_rotate_10_feat_raw_data_3d_npy_feature.csv"), (2031, "/df_lung/cls_train_data/csv_data/2031/2031_20241204_094025_rotate_10_feat_raw_data_3d_npy_feature.csv"), (2041, "/df_lung/cls_train_data/csv_data/2041/2041_20241204_094026_rotate_10_feat_raw_data_3d_npy_feature.csv"), (1010, "/df_lung/cls_train_data/csv_data/1010/1010_20241204_093726_rotate_10_feat_raw_data_3d_npy_feature.csv"), (1020, "/df_lung/cls_train_data/csv_data/1020/1020_20241204_093726_rotate_10_feat_raw_data_3d_npy_feature.csv"), (2011, "/df_lung/cls_train_data/csv_data/2011/2011_20241204_093726_rotate_10_feat_raw_data_3d_npy_feature.csv"), (2046, "/df_lung/cls_train_data/csv_data/2046/2046_20241211_155642_rotate_10_feat_raw_data_3d_npy_feature.csv"), (2047, "/df_lung/cls_train_data/csv_data/2047/2047_20241211_155642_rotate_10_feat_raw_data_3d_npy_feature.csv"), (2048, "/df_lung/cls_train_data/csv_data/2048/2048_20241211_155642_rotate_10_feat_raw_data_3d_npy_feature.csv"), (2060, "/df_lung/cls_train_data/csv_data/2060/2060_20241211_155643_rotate_10_feat_raw_data_3d_npy_feature.csv"), (2061, "/df_lung/cls_train_data/csv_data/2061/2061_20241211_155643_rotate_10_feat_raw_data_3d_npy_feature.csv"), (2062, "/df_lung/cls_train_data/csv_data/2062/2062_20241211_155643_rotate_10_feat_raw_data_3d_npy_feature.csv"), (3001, "/df_lung/cls_train_data/csv_data/3001/3001_20241211_155643_rotate_10_feat_raw_data_3d_npy_feature.csv"), (4001, "/df_lung/cls_train_data/csv_data/4001/4001_20241211_155643_rotate_10_feat_raw_data_3d_npy_feature.csv"), (5001, "/df_lung/cls_train_data/csv_data/5001/5001_20241211_155643_rotate_10_feat_raw_data_3d_npy_feature.csv"), (6001, "/df_lung/cls_train_data/csv_data/6001/6001_20241211_155643_rotate_10_feat_raw_data_3d_npy_feature.csv"), (1016, "/df_lung/cls_train_data/csv_data/1016/1016_20241211_155643_rotate_10_feat_raw_data_3d_npy_feature.csv") ] csv_data_dir = "/df_lung/cls_train_data/csv_data" train_csv_dir = "/df_lung/cls_train_data/train_csv_data" is_pad_df = True is_save_csv = True seed = 100004 generate_feature_train_npy_csv_file( node_csv_file_list = node_csv_file_list, csv_data_dir=csv_data_dir, train_csv_dir=train_csv_dir, is_pad_df=is_pad_df, is_save_csv=is_save_csv, seed=seed )