1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
| import numpy as np import pandas as pd import matplotlib.pyplot as plt
arr = np.array([1,2,3], dtype=np.float64) np.zeros((3,6)) np.empty((2,3,2)) np.arange(15) arr.dtype arr.ndim arr.shape arr.astype(np.int32) arr * arr arr - arr 1/arr arr= np.arange(32).reshape((8,4)) arr[1:3, : ] arr[[1,2,3]] arr.T arr.transpose((...)) arr.swapaxes(...) arr.dot np.sqrt(arr) np.exp(arr) randn(8)#正态分布值 np.maximum(x,y) np.where(cond, xarr, yarr) #当cond为真,取xarr,否则取yarr arr.mean() arr.mean(axis=1) arr.sum() arr.std() arr.var() arr.min() arr.max() arr.argmin() arr.argmax() arr.cumsum() arr.cumprod() arr.all() arr.any() arr.sort() arr.sort(1) arr.unique() np.in1d(arr1, arr2) np.load() np.loadtxt() np.save() np.savez() #读取、保存文件 np.concatenate([arr, arr], axis=1) #连接两个arr,按行的方向
ser = Series() ser = Series([...], index=[...]) ser.values ser.index ser.reindex([...], fill_value=0) ser.isnull() pd.isnull(ser) pd.notnull(ser) ser.name= ser.index.name= ser.drop('x') ser +ser ser.sort_index() ser.order() #按索引排序、按值排序 df = DataFrame(data, columns=[...], index=[...]) df.ix['x'] del df['ly'] df.T df.index.name df.columns.name df.values df.drop([...]) df + df df1.add(df2, fill_vaule=0) df -ser f=lambda x: x.max()-x.min() df.apply(f) df.sort_index(axis=1, ascending=False) df.sort_index(by=['a','b']) ser.rank() df.rank(axis=1) df.sum() df.sum(axis=1) df.mean(axis=1, skipna=False) df.idxmax() df.cumsum() df.describe() ser.describe() ser.unique() ser.value_counts() df.value_counts() #返回一个series,其索引为唯一值,值为频率 ser.isin(['x', 'y']) ser.dropna() ser.isnull() ser.notnull() ser.fillna(0) df.unstack() df.swaplevel('key1','key2') df.sortlevel(1) df.set_index(['c','d'], drop=False) read_csv read_table read_fwf pd.read_csv('...', nrows=5) pd.read_csv('...', chunksize=1000) pd.load() pd.ExcelFile('...xls').parse('Sheet1') df.to_csv('...csv', sep='|', index=False, header=False) pd.merge(df1, df2, on='key', suffixes=('_left', '_right')) pd.merge(df1, df2, left_on='lkey', right_on='rkey') pd.merge(df1, df2, how='outer') df1.join(df2, on='key', how='outer') pd.concat([ser1, ser2, ser3], axis=1) ser1.combine_first(ser2) df1.combine_first(df2) df.stack() df.unstack() df.pivot() df.duplicated() df.drop_duplicates() df[''].map(lambda x: abs(x)) ser.replace(-999, np.nan) df.rename(index={}, columns={}, inplace=True) pd.cut(ser, bins) df[(np.abs(df)>3).any(1)] permutation take pd.get_dummies(df['key'], prefix='key') df[...].str.contains() df[...].str.findall(pattern, flags=re.IGNORECASE) df[...].str.match(pattern, flags=...) df[...].str.get()
ser.plot() df.plot() kind='kde' kind='bar' kind='barh' ser.hist(bins=50) plt.scatter(x,y) pd.scatter_matrix(df, diagonal='kde', color='k', alpha='0.3')
groupby() 默认在axis=0轴上分组,也可以在1组上分组;可以用for进行分组迭代 df.groupby(df['key1']) df['key2'].groupby(df['key1']) df['key3'].groupby(df['key1'], df['key2']) df['key2'].groupby(df['key1']).size() df.groupby(df['key1'])['data1'] 等价于 df['data1'].groupby(df['key1']) df.groupby(df['key1'])[['data1']] 等价于 df[['data1']].groupby(df['key1']) df.groupby(mapping, axis=1) ser(mapping) df.groupby(len) df.groupby(level='...', axis=1) df.groupby([], as_index=False) df.groupby(...).agg(['mean', 'std']) df.groupby(...).transform(np.mean) df.groupby().apply()
df.pivot_table(['',''], rows=['',''], cols='', margins=True) pd.crosstab(df.col1, df.col2, margins=True)
fig=plt.figure() #图像所在的基对象 ax=fig.add_subplot(2,2,1) fig, axes = plt.subplots(nrows, nclos, sharex, sharey) plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
ax.plot(x, y, linestyle='--', color='g') ax.set_xticks([...]) ax.set_xticklabels([...]) ax.set_xlabel('...') ax.set_title('....') #设置图名 ax.legend(loc='best') ax.text(x,y, 'hello', family='monospace', fontsize=10) ax.add_patch() plt.savefig('...png', dpi=400, bbox_inches='tight')
from mpl_toolkits.basemap import Basemap import matplotlib.pyplot as plt
pd.to_datetime(datestrs) pd.date_range('1/1/2000', periods=1000) ts.resample('D', how='mean') #重采样会聚合,即将短频率(日)变成长频率(月),对应的值叠加; #升采样会插值,即将长频率变为短频率,中间产生新值 ts.shift(2, freq='D') ts.shift(-2, freq='D') now+Day() now+MonthEnd() import pytz pytz.timezone('US/Eastern') pd.Period('2010', freq='A-DEC') #period表示时间区间,叫做时期 pd.PeriodIndex ts.to_period('M') pd.rolling_mean(...) pd.rolling_std(...)
|