1.导入:
1 import numpy as np2 import pandas as pd
原始文件:
1 xlsx=pd.ExcelFile('1.xlsx')2 df=pd.read_excel(xlsx,0,index_col=None,na_values=['NA'])3 print(df.head())
C:\Users\zte\AppData\Local\Programs\Python\Python37-32\python.exe C:/Users/zte/PycharmProjects/radioparametercheck/csvfiles/txt.py Name Index num cell0 C1 GB0 1 11 C4 GBC2 3 52 C7 GBC4 5 93 C10 GBC6 7 134 C13 GBC8 9 17Process finished with exit code 0
2.数据筛选和选择:
1 print(df.shape) 2 print("*"*111) 3 print(df.index) 4 print("*"*111) 5 print(df.index.values) 6 print("*"*111) 7 print(df.columns) 8 print("*"*111) 9 print(df.columns.values)10 print("*"*111)11 print(df.columns.get_loc("num"))12 print("*"*111)13 print(df.columns.get_indexer(["num","cell"]))14 print("*"*111)15 print(df["num"])16 print("*"*111)17 print(df[["num","cell"]])18 print("*"*111)19 print(df[["num","cell"]].values)20 print("*"*111)21 print(df.iloc[2:6])22 print("*"*111)23 print(df.iloc[2:6,2:4])24 print("*"*111)25 print(df.iloc[2:6,df.columns.get_indexer(["cell","num"])])26 print("*"*111)27 print(df.loc[2:6,["cell","num"]])28 print("*"*111)29 print(df.loc[2:6,"num":"cell"])30 print("*"*111)31 print(df.loc[2:8:2,["cell","num"]])32 print("*"*111)33 print(df[(df["cell"]==9)|(df["num"]==9)])34 print("*"*111)35 print(df[(df["num"]==9)|(df["num"]==1)])36 print("*"*111)37 print(df.loc[df["num"]<7])38 print("*"*111)39 print(df.loc[df.num<7,['cell','num']])
结果:
C:\Users\zte\AppData\Local\Programs\Python\Python37-32\python.exe C:/Users/zte/PycharmProjects/radioparametercheck/csvfiles/txt.py(12, 4)***************************************************************************************************************RangeIndex(start=0, stop=12, step=1)***************************************************************************************************************[ 0 1 2 3 4 5 6 7 8 9 10 11]***************************************************************************************************************Index(['Name', 'Index', 'num', 'cell'], dtype='object')***************************************************************************************************************['Name' 'Index' 'num' 'cell']***************************************************************************************************************2***************************************************************************************************************[2 3]***************************************************************************************************************0 11 32 53 74 95 116 137 158 179 1910 2111 23Name: num, dtype: int64*************************************************************************************************************** num cell0 1 11 3 52 5 93 7 134 9 175 11 216 13 257 15 298 17 339 19 3710 21 4111 23 45***************************************************************************************************************[[ 1 1] [ 3 5] [ 5 9] [ 7 13] [ 9 17] [11 21] [13 25] [15 29] [17 33] [19 37] [21 41] [23 45]]*************************************************************************************************************** Name Index num cell2 C7 GBC4 5 93 C10 GBC6 7 134 C13 GBC8 9 175 C16 GBC10 11 21*************************************************************************************************************** num cell2 5 93 7 134 9 175 11 21*************************************************************************************************************** cell num2 9 53 13 74 17 95 21 11*************************************************************************************************************** cell num2 9 53 13 74 17 95 21 116 25 13*************************************************************************************************************** num cell2 5 93 7 134 9 175 11 216 13 25*************************************************************************************************************** cell num2 9 54 17 96 25 138 33 17*************************************************************************************************************** Name Index num cell2 C7 GBC4 5 94 C13 GBC8 9 17*************************************************************************************************************** Name Index num cell0 C1 GB0 1 14 C13 GBC8 9 17*************************************************************************************************************** Name Index num cell0 C1 GB0 1 11 C4 GBC2 3 52 C7 GBC4 5 9*************************************************************************************************************** cell num0 1 11 5 32 9 5Process finished with exit code 0
按照行列遍历
1 import numpy as np 2 import pandas as pd 3 xlsx=pd.ExcelFile('1.xlsx') 4 df=pd.read_excel(xlsx,0,index_col=None,na_values=['NA']) 5 print(df) 6 print("*"*111) 7 #三种迭代函数,interrows()返回(index,Series)行对 8 for index,row in df.iterrows(): 9 print(index,'\n',row)10 print("*"*111)11 #返回本身就是一个series,可以按照series的情况提取具体元素12 for index,row in df.iterrows():13 print(row[2:3])14 print("*"*111)15 #interitems()返回(column,series)列对16 for index,col in df.iteritems():17 print(col[2:3])18 print("*"*111)19 #以上方式遍历不能对数据进行修改,要对数据进行修改,数据小的话直接按照行数遍历修改20 #如果很大,建议使用apply函数或者转为list进行处理21 for i in df.index:22 print(df.loc[i,:])23 print("*"*111)24 # 也可以用shape[0]表示行数25 for i in range(df.shape[0]):26 print(df.loc[i,:])
结果:
C:\Users\zte\AppData\Local\Programs\Python\Python37-32\python.exe C:/Users/zte/PycharmProjects/radioparametercheck/csvfiles/txt.py Name Index num cell0 C1 GB0 1 11 C4 GBC2 3 52 C7 GBC4 5 93 C10 GBC6 7 134 C13 GBC8 9 175 C16 GBC10 11 216 C19 GBC12 13 257 C4 GBC14 15 298 C7 GBC16 17 339 C10 GBC18 19 3710 C13 GBC20 21 4111 C16 GBC22 23 45***************************************************************************************************************0 Name C1Index GB0num 1cell 1Name: 0, dtype: object1 Name C4Index GBC2num 3cell 5Name: 1, dtype: object2 Name C7Index GBC4num 5cell 9Name: 2, dtype: object3 Name C10Index GBC6num 7cell 13Name: 3, dtype: object4 Name C13Index GBC8num 9cell 17Name: 4, dtype: object5 Name C16Index GBC10num 11cell 21Name: 5, dtype: object6 Name C19Index GBC12num 13cell 25Name: 6, dtype: object7 Name C4Index GBC14num 15cell 29Name: 7, dtype: object8 Name C7Index GBC16num 17cell 33Name: 8, dtype: object9 Name C10Index GBC18num 19cell 37Name: 9, dtype: object10 Name C13Index GBC20num 21cell 41Name: 10, dtype: object11 Name C16Index GBC22num 23cell 45Name: 11, dtype: object***************************************************************************************************************num 1Name: 0, dtype: objectnum 3Name: 1, dtype: objectnum 5Name: 2, dtype: objectnum 7Name: 3, dtype: objectnum 9Name: 4, dtype: objectnum 11Name: 5, dtype: objectnum 13Name: 6, dtype: objectnum 15Name: 7, dtype: objectnum 17Name: 8, dtype: objectnum 19Name: 9, dtype: objectnum 21Name: 10, dtype: objectnum 23Name: 11, dtype: object***************************************************************************************************************2 C7Name: Name, dtype: object2 GBC4Name: Index, dtype: object2 5Name: num, dtype: int642 9Name: cell, dtype: int64***************************************************************************************************************Name C1Index GB0num 1cell 1Name: 0, dtype: objectName C4Index GBC2num 3cell 5Name: 1, dtype: objectName C7Index GBC4num 5cell 9Name: 2, dtype: objectName C10Index GBC6num 7cell 13Name: 3, dtype: objectName C13Index GBC8num 9cell 17Name: 4, dtype: objectName C16Index GBC10num 11cell 21Name: 5, dtype: objectName C19Index GBC12num 13cell 25Name: 6, dtype: objectName C4Index GBC14num 15cell 29Name: 7, dtype: objectName C7Index GBC16num 17cell 33Name: 8, dtype: objectName C10Index GBC18num 19cell 37Name: 9, dtype: objectName C13Index GBC20num 21cell 41Name: 10, dtype: objectName C16Index GBC22num 23cell 45Name: 11, dtype: object***************************************************************************************************************Name C1Index GB0num 1cell 1Name: 0, dtype: objectName C4Index GBC2num 3cell 5Name: 1, dtype: objectName C7Index GBC4num 5cell 9Name: 2, dtype: objectName C10Index GBC6num 7cell 13Name: 3, dtype: objectName C13Index GBC8num 9cell 17Name: 4, dtype: objectName C16Index GBC10num 11cell 21Name: 5, dtype: objectName C19Index GBC12num 13cell 25Name: 6, dtype: objectName C4Index GBC14num 15cell 29Name: 7, dtype: objectName C7Index GBC16num 17cell 33Name: 8, dtype: objectName C10Index GBC18num 19cell 37Name: 9, dtype: objectName C13Index GBC20num 21cell 41Name: 10, dtype: objectName C16Index GBC22num 23cell 45Name: 11, dtype: objectProcess finished with exit code 0
重要:
1 #如果数据量较大,可以在list中转下 2 modify_result=[] #优化遍历速度,使用list进行 3 column_names=list(df.columns.values) 4 modify_result.append(column_names) #添加标题行 5 for i ,row in df.iterrows(): 6 newrows=list(row) 7 if i>=4: 8 newrows[column_names.index("num")]=99 9 modify_result.append(newrows) #添加每一行数据10 df=pd.DataFrame(modify_result[1:],columns=modify_result[0]) #重新变回df11 print(df)
结果:
C:\Users\zte\AppData\Local\Programs\Python\Python37-32\python.exe C:/Users/zte/PycharmProjects/radioparametercheck/csvfiles/txt.py Name Index num cell0 C1 GB0 1 11 C4 GBC2 3 52 C7 GBC4 5 93 C10 GBC6 7 134 C13 GBC8 99 175 C16 GBC10 99 216 C19 GBC12 99 257 C4 GBC14 99 298 C7 GBC16 99 339 C10 GBC18 99 3710 C13 GBC20 99 4111 C16 GBC22 99 45Process finished with exit code 0
赋值修改:
1 for i in df.index:2 if i>2:3 df.loc[i,"cell"]=284 5 print(df)
C:\Users\zte\AppData\Local\Programs\Python\Python37-32\python.exe C:/Users/zte/PycharmProjects/radioparametercheck/csvfiles/txt.py Name Index num cell0 C1 GB0 1 11 C4 GBC2 3 52 C7 GBC4 5 93 C10 GBC6 7 284 C13 GBC8 9 285 C16 GBC10 11 286 C19 GBC12 13 287 C4 GBC14 15 288 C7 GBC16 17 289 C10 GBC18 19 2810 C13 GBC20 21 2811 C16 GBC22 23 28Process finished with exit code 0
矢量运算:
1 df['num']=502 print(df)
C:\Users\zte\AppData\Local\Programs\Python\Python37-32\python.exe C:/Users/zte/PycharmProjects/radioparametercheck/csvfiles/txt.py Name Index num cell0 C1 GB0 50 11 C4 GBC2 50 52 C7 GBC4 50 93 C10 GBC6 50 134 C13 GBC8 50 175 C16 GBC10 50 216 C19 GBC12 50 257 C4 GBC14 50 298 C7 GBC16 50 339 C10 GBC18 50 3710 C13 GBC20 50 4111 C16 GBC22 50 45Process finished with exit code 0
组建新的形式
1 for m ,row in df.iterrows(): 2 excel_id="{}-{}-{}".format(row['cell'],row['num'],row['Name']) 3 print(excel_id) 4 print("*" * 111) 5 for i, row in df.iterrows(): 6 print(row[['num','Name']]) 7 print("*"*111) 8 for t in df.index: 9 excel_id="{}-{}-{}".format(df.loc[t,'cell'],df.loc[t,'num'],df.loc[t,'Name'])10 print(excel_id)
1 df['Col_sum'] = df.apply(lambda x: x.sum(), axis=1)2 3 df.loc['Row_sum'] = df.apply(lambda x: x.sum())4 print(df)
结果:
C:\Users\zte\AppData\Local\Programs\Python\Python37-32\python.exe C:/Users/zte/PycharmProjects/radioparametercheck/csvfiles/12.py1-1-C15-3-C49-5-C713-7-C1017-9-C1321-11-C1625-13-C1929-15-C433-17-C737-19-C1041-21-C1345-23-C16***************************************************************************************************************num 1Name C1Name: 0, dtype: objectnum 3Name C4Name: 1, dtype: objectnum 5Name C7Name: 2, dtype: objectnum 7Name C10Name: 3, dtype: objectnum 9Name C13Name: 4, dtype: objectnum 11Name C16Name: 5, dtype: objectnum 13Name C19Name: 6, dtype: objectnum 15Name C4Name: 7, dtype: objectnum 17Name C7Name: 8, dtype: objectnum 19Name C10Name: 9, dtype: objectnum 21Name C13Name: 10, dtype: objectnum 23Name C16Name: 11, dtype: object***************************************************************************************************************1-1-C15-3-C49-5-C713-7-C1017-9-C1321-11-C1625-13-C1929-15-C433-17-C737-19-C1041-21-C1345-23-C16Process finished with exit code 0
C:\Users\zte\AppData\Local\Programs\Python\Python37-32\python.exe C:/Users/zte/PycharmProjects/radioparametercheck/csvfiles/12.py name index num cell Col_sum0 1 0 1 1 31 4 2 3 5 142 7 4 5 9 253 10 6 7 13 364 13 8 9 17 475 16 10 5 21 526 10 8 9 5 327 13 10 13 7 438 16 12 17 9 549 19 14 3 5 4110 4 20 5 41 7011 16 22 7 45 90Row_sum 129 116 84 178 507Process finished with exit code 0
排序:
1 1 print(df)2 2 print("*"*111)3 3 print(df.sort_values(by="cell"))4 4 print("*"*111)5 5 print(df)6 6 print("*"*111)7 7 print(df.sort_values(by=['cell','num','name']).reset_index(drop=True))8 8 print("*"*111)9 9 print(df.sort_values(by=['cell','num','name']).reset_index(drop=False))
结果:
C:\Users\zte\AppData\Local\Programs\Python\Python37-32\python.exe C:/Users/zte/PycharmProjects/radioparametercheck/csvfiles/12.py name index num cell0 1 0 1 11 4 2 3 52 7 4 5 93 10 6 7 134 13 8 9 175 16 10 5 216 10 8 9 57 13 10 13 78 16 12 17 99 19 14 3 510 4 20 5 4111 16 22 7 45*************************************************************************************************************** name index num cell0 1 0 1 11 4 2 3 56 10 8 9 59 19 14 3 57 13 10 13 72 7 4 5 98 16 12 17 93 10 6 7 134 13 8 9 175 16 10 5 2110 4 20 5 4111 16 22 7 45*************************************************************************************************************** name index num cell0 1 0 1 11 4 2 3 52 7 4 5 93 10 6 7 134 13 8 9 175 16 10 5 216 10 8 9 57 13 10 13 78 16 12 17 99 19 14 3 510 4 20 5 4111 16 22 7 45*************************************************************************************************************** name index num cell0 1 0 1 11 4 2 3 52 19 14 3 53 10 8 9 54 13 10 13 75 7 4 5 96 16 12 17 97 10 6 7 138 13 8 9 179 16 10 5 2110 4 20 5 4111 16 22 7 45*************************************************************************************************************** level_0 name index num cell0 0 1 0 1 11 1 4 2 3 52 9 19 14 3 53 6 10 8 9 54 7 13 10 13 75 2 7 4 5 96 8 16 12 17 97 3 10 6 7 138 4 13 8 9 179 5 16 10 5 2110 10 4 20 5 4111 11 16 22 7 45Process finished with exit code 0
合并方法:注意表头一致
1 import numpy as np 2 import pandas as pd 3 excel1 = pd.ExcelFile('1.xlsx') 4 df1 = pd.read_excel(excel1 ,0,index_col=None,na_values=['NA']) 5 excel2 = pd.ExcelFile('2.xlsx') 6 df2 = pd.read_excel(excel2 ,0,index_col=None,na_values=['NA']) 7 print(df1) 8 print("*"*111) 9 print(df2)10 print("*"*111)11 print(df1[:3])12 print("*"*111)13 pieces=[df1[1:4],df1[2:4],df2[2:3]]14 print( pd.concat(pieces,sort=True))15 print("*"*111)16 print( pd.concat(pieces,sort=False))
结果:
C:\Users\zte\AppData\Local\Programs\Python\Python37-32\python.exe C:/Users/zte/PycharmProjects/radioparametercheck/csvfiles/12.py N IN num cell0 1 0 1 11 4 2 3 52 7 4 5 93 10 6 7 134 13 8 9 175 16 10 5 216 10 8 9 57 13 10 13 78 16 12 17 99 19 14 3 510 4 20 5 4111 16 22 7 45*************************************************************************************************************** name index num cell0 1 6 1 11 2 7 3 52 3 8 5 93 4 9 7 134 5 10 9 175 6 11 11 216 7 12 13 257 8 13 15 298 9 14 17 339 10 15 19 3710 11 16 21 4111 12 17 23 45*************************************************************************************************************** N IN num cell0 1 0 1 11 4 2 3 52 7 4 5 9*************************************************************************************************************** IN N cell index name num1 2.0 4.0 5 NaN NaN 32 4.0 7.0 9 NaN NaN 53 6.0 10.0 13 NaN NaN 72 4.0 7.0 9 NaN NaN 53 6.0 10.0 13 NaN NaN 72 NaN NaN 9 8.0 3.0 5*************************************************************************************************************** N IN num cell name index1 4.0 2.0 3 5 NaN NaN2 7.0 4.0 5 9 NaN NaN3 10.0 6.0 7 13 NaN NaN2 7.0 4.0 5 9 NaN NaN3 10.0 6.0 7 13 NaN NaN2 NaN NaN 5 9 3.0 8.0Process finished with exit code 0
C:\Users\zte\AppData\Local\Programs\Python\Python37-32\python.exe C:/Users/zte/PycharmProjects/radioparametercheck/csvfiles/12.py name index num cell0 1 0 1 11 4 2 3 52 7 4 5 93 10 6 7 134 13 8 9 175 16 10 5 216 10 8 9 57 13 10 13 78 16 12 17 99 19 14 3 510 4 20 5 4111 16 22 7 45*************************************************************************************************************** name index num cell0 1 6 1 11 2 7 3 52 3 8 5 93 4 9 7 134 5 10 9 175 6 11 11 216 7 12 13 257 8 13 15 298 9 14 17 339 10 15 19 3710 11 16 21 4111 12 17 23 45*************************************************************************************************************** name index num cell0 1 0 1 11 4 2 3 52 7 4 5 9*************************************************************************************************************** cell index name num1 5 2 4 32 9 4 7 53 13 6 10 72 9 4 7 53 13 6 10 72 9 8 3 5*************************************************************************************************************** name index num cell1 4 2 3 52 7 4 5 93 10 6 7 132 7 4 5 93 10 6 7 132 3 8 5 9Process finished with exit code 0
第一个表头不一致,第二个一致
三种合并的方法:
1 import numpy as np 2 import pandas as pd 3 excel1 = pd.ExcelFile('1.xlsx') 4 df1 = pd.read_excel(excel1 ,0,index_col=None,na_values=['NA']) 5 excel2 = pd.ExcelFile('2.xlsx') 6 df2 = pd.read_excel(excel2 ,0,index_col=None,na_values=['NA']) 7 excel3 = pd.ExcelFile('3.xlsx') 8 df3 = pd.read_excel(excel3 ,0,index_col=None,na_values=['NA']) 9 excel4 = pd.ExcelFile('4.xlsx')10 df4 = pd.read_excel(excel4 ,0,index_col=None,na_values=['NA'])11 print(df1)12 print(df2)13 print(df3)14 print(df4)
表格结果如下:
C:\Users\zte\AppData\Local\Programs\Python\Python37-32\python.exe C:/Users/zte/PycharmProjects/radioparametercheck/csvfiles/12.py name index num cell0 1 1 5 51 2 1 2 42 3 1 3 43 4 2 2 24 5 2 1 1 name index num cell0 6 6 7 101 7 7 8 72 8 8 9 83 9 9 7 94 10 6 9 10 name index num cell0 11 12 13 101 12 13 12 132 13 10 13 123 14 12 10 104 15 14 12 12 name index num cell0 16 20 16 171 17 17 17 172 18 18 18 183 19 19 19 204 20 16 17 16Process finished with exit code 0
concat参数
1 """ 2 concat参数 3 pd.concat(objs, axis=0, join='outer', join_axes=None, 4 ignore_index=False,keys=None,levels=None, 5 names=None,verify_integrity=False, 6 copy=True) 7 1、objs : #输入,DataFrame格式或list(多个合并). 8 9 2、axis: {0, 1, ...}, default 0. #控制连接的方向,0代表列纵向,1代表行横向10 11 3、join : {‘inner’, ‘outer’}, default ‘outer’. #控制连接的索引,inner表示取表索引之间的交集,outer表示取索引的并集12 13 4、ignore_index: boolean, default False. #是否使用原索引,选舍弃便对新表重新进行索引排序。14 15 5、join_axes : list of Index objects. #设定使用的索引,例以df1的索引为标准,join_axes=[df1.index]16 17 6、keys : sequence, default None. #类似Multiindex,设立另一层索引18 """19 pieces=[df1[1:4],df2[2:4],df3[2:],df4[4:]]20 print( pd.concat(pieces,axis=0))21 print("*"*111)22 print( pd.concat(pieces,axis=1))23 print("*"*111)24 list=(df1,df2,df3,df4)25 print( pd.concat(list))26 print("*"*111)27 print( pd.concat(list,axis=1))28 print("*"*111)29 print( pd.concat(list,axis =0, ignore_index=False))30 print("*"*111)31 print( pd.concat(list,axis =0, ignore_index=True))
结果:
C:\Users\zte\AppData\Local\Programs\Python\Python37-32\python.exe C:/Users/zte/PycharmProjects/radioparametercheck/csvfiles/txt.py name index num cell1 2 1 2 42 3 1 3 43 4 2 2 22 8 8 9 83 9 9 7 92 13 10 13 123 14 12 10 104 15 14 12 124 20 16 17 16*************************************************************************************************************** name index num cell name ... cell name index num cell1 2.0 1.0 2.0 4.0 NaN ... NaN NaN NaN NaN NaN2 3.0 1.0 3.0 4.0 8.0 ... 12.0 NaN NaN NaN NaN3 4.0 2.0 2.0 2.0 9.0 ... 10.0 NaN NaN NaN NaN4 NaN NaN NaN NaN NaN ... 12.0 20.0 16.0 17.0 16.0[4 rows x 16 columns]*************************************************************************************************************** name index num cell0 1 1 5 51 2 1 2 42 3 1 3 43 4 2 2 24 5 2 1 10 6 6 7 101 7 7 8 72 8 8 9 83 9 9 7 94 10 6 9 100 11 12 13 101 12 13 12 132 13 10 13 123 14 12 10 104 15 14 12 120 16 20 16 171 17 17 17 172 18 18 18 183 19 19 19 204 20 16 17 16*************************************************************************************************************** name index num cell name index ... num cell name index num cell0 1 1 5 5 6 6 ... 13 10 16 20 16 171 2 1 2 4 7 7 ... 12 13 17 17 17 172 3 1 3 4 8 8 ... 13 12 18 18 18 183 4 2 2 2 9 9 ... 10 10 19 19 19 204 5 2 1 1 10 6 ... 12 12 20 16 17 16[5 rows x 16 columns]*************************************************************************************************************** name index num cell0 1 1 5 51 2 1 2 42 3 1 3 43 4 2 2 24 5 2 1 10 6 6 7 101 7 7 8 72 8 8 9 83 9 9 7 94 10 6 9 100 11 12 13 101 12 13 12 132 13 10 13 123 14 12 10 104 15 14 12 120 16 20 16 171 17 17 17 172 18 18 18 183 19 19 19 204 20 16 17 16*************************************************************************************************************** name index num cell0 1 1 5 51 2 1 2 42 3 1 3 43 4 2 2 24 5 2 1 15 6 6 7 106 7 7 8 77 8 8 9 88 9 9 7 99 10 6 9 1010 11 12 13 1011 12 13 12 1312 13 10 13 1213 14 12 10 1014 15 14 12 1215 16 20 16 1716 17 17 17 1717 18 18 18 1818 19 19 19 2019 20 16 17 16Process finished with exit code 0
merge的参数(暂时没有用到,用到时补充)merge(left, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=True, suffixes=('_x', '_y'), copy=True, indicator=False)1、on:列名,join用来对齐的那一列的名字,用到这个参数的时候一定要保证左表和右表用来对齐的那一列都有相同的列名。2、left_on:左表对齐的列,可以是列名,也可以是和dataframe同样长度的arrays。3、right_on:右表对齐的列,可以是列名,也可以是和dataframe同样长度的arrays。4、left_index/ right_index: 如果是True的haunted以index作为对齐的key5、how:数据融合的方法。6、 sort:根据dataframe合并的keys按字典顺序排序,默认是,如果置false可以提高表现。
Append用法: df1.append([df2,df3,df4])= pd.concat([df1,df2,df3,df4])
判断是否重复:
df1 = pd.read_excel(excel1 ,0,index_col=None,na_values=['NA'])print(df1)print("*"*111)print(df1.duplicated())
结果:
C:\Users\zte\AppData\Local\Programs\Python\Python37-32\python.exe C:/Users/zte/PycharmProjects/radioparametercheck/csvfiles/txt.py name index num cell0 1 1 5 51 2 1 2 42 3 3 3 33 4 2 2 24 4 2 2 2***************************************************************************************************************0 False1 False2 False3 False4 Truedtype: boolProcess finished with exit code 0
剔除重复:
1 print(df1)2 print("*"*111)3 print(df1.drop_duplicates("index"))
结果:
C:\Users\zte\AppData\Local\Programs\Python\Python37-32\python.exe C:/Users/zte/PycharmProjects/radioparametercheck/csvfiles/txt.py name index num cell0 1 1 5 51 2 1 2 42 3 3 3 33 4 2 2 24 4 2 2 2*************************************************************************************************************** name index num cell0 1 1 5 52 3 3 3 33 4 2 2 2Process finished with exit code 0
分类汇总:
1 import numpy as np 2 import pandas as pd 3 excel1 = pd.ExcelFile('1.xlsx') 4 df1 = pd.read_excel(excel1 ,0,index_col=None,na_values=['NA']) 5 print("原始数据:",df1) 6 print("*"*111) 7 print(df1.groupby(['num'])) 8 print("*"*111) 9 print(df1.groupby(['num']).groups)10 print("*"*111)11 for name ,group in df1.groupby(['num']):12 print("group:",name)13 print(group)14 print("*"*111)15 print(df1.groupby(['num']).sum())16 print("*"*111)17 grouped=df1.groupby(['num'])18 print(grouped.size())19 print("*"*111)20 print(len(grouped))21 print("*"*111)22 print(grouped.first())23 print("*"*111)24 print(grouped.last())25 print("*"*111)26 print(grouped.get_group(3))27 print("*"*111)28 grouped2=df1.groupby(['num','cell'])29 print(grouped2.groups)30 print("*"*111)31 print(grouped2.get_group((2,4)))32 print("*"*111)33 for (k1,k2),group_t in grouped2:34 print("group:",k1,k2)35 print(group_t)
结果:
C:\Users\zte\AppData\Local\Programs\Python\Python37-32\python.exe C:/Users/zte/PycharmProjects/radioparametercheck/csvfiles/txt.py原始数据: name index num cell0 1 1 5 51 2 1 2 42 3 3 3 33 4 2 2 24 4 2 2 2******************************************************************************************************************************************************************************************************************************{ 2: Int64Index([1, 3, 4], dtype='int64'), 3: Int64Index([2], dtype='int64'), 5: Int64Index([0], dtype='int64')}***************************************************************************************************************group: 2 name index num cell1 2 1 2 43 4 2 2 24 4 2 2 2group: 3 name index num cell2 3 3 3 3group: 5 name index num cell0 1 1 5 5*************************************************************************************************************** name index cellnum 2 10 5 83 3 3 35 1 1 5***************************************************************************************************************num2 33 15 1dtype: int64***************************************************************************************************************3*************************************************************************************************************** name index cellnum 2 2 1 43 3 3 35 1 1 5*************************************************************************************************************** name index cellnum 2 4 2 23 3 3 35 1 1 5*************************************************************************************************************** cell index name2 3 3 3***************************************************************************************************************{(2, 2): Int64Index([3, 4], dtype='int64'), (2, 4): Int64Index([1], dtype='int64'), (3, 3): Int64Index([2], dtype='int64'), (5, 5): Int64Index([0], dtype='int64')}*************************************************************************************************************** name index num cell1 2 1 2 4***************************************************************************************************************group: 2 2 name index num cell3 4 2 2 24 4 2 2 2group: 2 4 name index num cell1 2 1 2 4group: 3 3 name index num cell2 3 3 3 3group: 5 5 name index num cell0 1 1 5 5Process finished with exit code 0
数据透视表
1 import numpy as np 2 import pandas as pd 3 excel1 = pd.ExcelFile('1.xlsx') 4 df1 = pd.read_excel(excel1 ,0,index_col=None,na_values=['NA']) 5 print(df1) 6 print("*"*111) 7 pvt=pd.pivot_table(df1,index=["name"],values=['num','cell'],aggfunc=np.sum) 8 print(pvt) 9 print("*"*111)10 print(pvt.loc[('A'),'num'])11 print("*"*111)12 print(pvt.reset_index())13 print("*"*111)14 print(df1)15 print("*"*111)16 pvt2=pd.pivot_table(df1,index=["name"],values=['num'],columns=['dex'],aggfunc=np.sum)17 print(pvt2)18 print("*"*111)19 df2=pvt2.reset_index()20 print(df2)21 print("*"*111)22 print(df2.columns.values)23 print("*"*111)24 print(df2.shape[1])
结果:
C:\Users\zte\AppData\Local\Programs\Python\Python37-32\python.exe C:/Users/zte/PycharmProjects/radioparametercheck/csvfiles/txt.py name dex num cell0 A 1 5 51 A 2 2 42 B 3 3 33 B 4 2 24 E 5 2 2*************************************************************************************************************** cell numname A 9 7B 5 5E 2 2***************************************************************************************************************7*************************************************************************************************************** name cell num0 A 9 71 B 5 52 E 2 2*************************************************************************************************************** name dex num cell0 A 1 5 51 A 2 2 42 B 3 3 33 B 4 2 24 E 5 2 2*************************************************************************************************************** num dex 1 2 3 4 5name A 5.0 2.0 NaN NaN NaNB NaN NaN 3.0 2.0 NaNE NaN NaN NaN NaN 2.0*************************************************************************************************************** name num dex 1 2 3 4 50 A 5.0 2.0 NaN NaN NaN1 B NaN NaN 3.0 2.0 NaN2 E NaN NaN NaN NaN 2.0***************************************************************************************************************[('name', '') ('num', 1) ('num', 2) ('num', 3) ('num', 4) ('num', 5)]***************************************************************************************************************6Process finished with exit code 0