tag 标签: 数据清洗

相关博文
  • 热度 2
    2023-10-23 10:24
    79 次阅读|
    0 个评论
    import pandas as pd import re file_path = r'D: DOE5.csv' spec_path = r'D: \spec.csv' wafer_column_name = "WAFER" rawdata = pd.read_csv(file_path , encoding = "utf-8" ) def read_e (file , sheet): to_read= pd.read_excel(file , sheet) return to_read file_name = r'D: \IGBT lot5.xlsx' sheet = sheet1 = read_e(file_name , sheet ) sheet2 = read_e(file_name , sheet ) sheet3 = read_e(file_name , sheet ) merged_sheet = pd.concat( ) merged_sheet =merged_sheet .map( lambda x:x ) merged_sheet =merged_sheet .map( lambda x:x ) del merged_sheet ft1 =merged_sheet.pivot_table( index = , columns = 'axis' , values = 'result_string' ) ft1_1 = ft1.reset_index() ft1_1 = 0 def dic_cd (x , x_f): dic ={} for i in range ( 0 , len (x_f)): dic ] =x return dic y_c = range ( 5 , 23 , 1 ) y_ft = range (- 35 , - 17 , 1 ) y_cd = dic_cd(y_c , y_ft) x_c = range ( 5 , 27 , 1 ) x_ft = range ( 21 , - 1 , - 1 ) x_cd =dic_cd(x_c , x_ft) ft1_1 = ft1_1 .map( lambda x:x_cd ) ft1_1 = ft1_1 .map( lambda x:y_cd ) key1 = for key in key1: ft1_1 = ft1_1 .astype(rawdata .dtype) merged1 = pd.merge(rawdata , ft1_1 , on =key1 , how = 'left' ) waferlist = sorted (merged1 .unique()) bin0 = bin2 = bin4 = def caculate_CP_bin (rawdata , split , key , bin): if rawdata == key) & (rawdata == bin)] is not None : bin_rate = rawdata == key) & (rawdata == bin)] .size/rawdata == key] .size else : bin_rate= 0 return bin_rate for Yield_wafer_number in waferlist: bin0.append(caculate_CP_bin( rawdata =merged1 , split =wafer_column_name , key =Yield_wafer_number , bin = 0 )) bin1.append(caculate_CP_bin( rawdata =merged1 , split =wafer_column_name , key =Yield_wafer_number , bin = 1 )) bin2.append(caculate_CP_bin( rawdata =merged1 , split =wafer_column_name , key =Yield_wafer_number , bin = 2 )) bin3.append(caculate_CP_bin( rawdata =merged1 , split =wafer_column_name , key =Yield_wafer_number , bin = 3 )) bin4.append(caculate_CP_bin( rawdata =merged1 , split =wafer_column_name , key =Yield_wafer_number , bin = 4 )) bin5.append(caculate_CP_bin( rawdata =merged1 , split =wafer_column_name , key =Yield_wafer_number , bin = 5 )) CP_Yield ={ 'Wafer' : waferlist , 'Yield' : bin0 , 'IGES_Fail' :bin1 , 'ICES_Fail' :bin2 , 'VTH_Fail' : bin3 , 'Delta_Vth fail' :bin4 , 'VCESAT_Fail' :bin5} Yield_key_table = pd.DataFrame(CP_Yield) merged1.to_csv( r'D: final2_py.csv' , index = False ) -------------代码2----------------------- import pandas as pd import os folder_path = r'D:\ Summary' file_names = combined_df = combined_df = df.drop(row , inplace = True ) df.reset_index( drop = True, inplace = True ) df = str (file_name) df = int ( str (file_name) ) combined_df.append(df) combined_df1 = pd.concat(combined_df) return combined_df1 folder_path = r'D: \IGBT' savedfolder = os.path.join(folder_path , 'merged.CSV' ) df2 = read_to_df(folder_path) df2.to_csv(savedfolder , index = False )
相关资源
  • 所需E币: 3
    时间: 2019-6-12 19:45
    大小: 1.04MB
    上传者: royalark_912907664
    文中针对生产车间现场信息准确采集及有效应用等关键问题,研究资源信息标识方法,支持对生产信息的采集以及生产车间物联环境的构建;提出制造物联环境下的数据清洗方法,在保证数据准确率的基础上能够更好适应标签移动的生产场景;提出集成数据融合估计理论的卡尔曼滤波数据处理方法,支持对多传感器的数据融合。并在实验室以惯组测试过程为应用对象进行数据处理模拟分析,验证了本文所提技术的可行性和有效性。