# -*- coding: utf-8 -*-
import numpy
import pandas
data = pandas.read_csv(
'D:\\PDA\\4.9\\data.csv'
)
#設(shè)置隨機(jī)種子
numpy.random.seed(seed=2)
#按照個(gè)數(shù)抽樣
data.sample(n=10)
#按照百分比抽樣
data.sample(frac=0.02)
#是否可放回抽樣属百,
#replace=True秫筏,可放回,
#replace=False您单,不可放回
data.sample(n=10, replace=True)
#典型抽樣豌鸡,分層抽樣
gbr = data.groupby("class")
gbr.groups
typicalNDict = {
1: 2,
2: 4,
3: 6
}#定義一個(gè)字典,從一班級(jí)中抽取2個(gè)人粪躬,二班級(jí)中抽取4個(gè)人瑰排,三班級(jí)中抽取6個(gè)人
def typicalSampling(group, typicalNDict):
name = group.name
n = typicalNDict[name]
return group.sample(n=n)
result = data.groupby(
'class', group_keys=False
).apply(typicalSampling, typicalNDict)#向量化運(yùn)算的函數(shù)apply
typicalFracDict = {
1: 0.2,
2: 0.4,
3: 0.6
}#定義字典,按照百分比抽樣
def typicalSampling(group, typicalFracDict):
name = group.name
frac = typicalFracDict[name]
return group.sample(frac=frac)
result = data.groupby(
'class', group_keys=False
).apply(typicalSampling, typicalFracDict)
id class score
39 40 1 45
4 5 1 63
53 54 1 95
25 26 1 64
37 38 1 107
70 71 1 75
85 86 2 77
81 82 2 63
54 55 2 121
68 69 2 56
13 14 2 69
86 87 2 93
57 58 2 82
84 85 2 85
94 95 2 103
96 97 2 108
35 36 2 101
89 90 2 86
45 46 2 95
80 81 2 81
20 21 2 138
65 66 3 83
83 84 3 52
34 35 3 66
6 7 3 87
77 78 3 77
82 83 3 54
55 56 3 126
17 18 3 58
67 68 3 93
10 11 3 89
26 27 3 64
61 62 3 103
88 89 3 89
69 70 3 96
0 1 3 77
90 91 3 91
91 92 3 59
48 49 3 98
7 8 3 48
52 53 3 62