加載樣本數(shù)據(jù)
from sklearn import datasets
digits = datasets.load_digits()
features = digits.data #x
target = digits.target #y
'''
load_boston
包含 503 個波士頓房價的觀察值。這是一個用于研究回歸算法的優(yōu)質(zhì)數(shù)據(jù)集。
load_iris
包含 150 個鳶尾花尺寸的觀察值碱呼。這是一個用于研究分類算法的優(yōu)質(zhì)數(shù)據(jù)集爆袍。
load_digits
包含 1797 個手寫數(shù)字圖片的觀察值吕嘀。這是一個用于研究圖像分類算法的優(yōu)質(zhì)數(shù)據(jù)
集。
'''
數(shù)據(jù)仿真
回歸
from sklearn.datasets import make_regression #回歸仿真
features, target, coefficients = make_regression(n_samples = 100, n_features = 3, n_informative = 3,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? n_targets = 1, noise = 0.0, coef = True, random_state = 1)
分類
from sklearn.datasets import make_classification #分類仿真
features, target = make_classification(n_samples = 100, n_features = 3, n_informative = 3, n_redundant = 0,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? n_classes = 2, weights = [.25, .75], random_state = 1)
聚類
from sklearn.datasets import make_blobs #聚類
features, target = make_blobs(n_samples = 100, n_features = 2, centers = 3, cluster_std = 0.5, shuffle = True,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? random_state = 1)
讀取數(shù)據(jù)
csv
dataframe = pd.read_csv(url)
excel
dataframe = pd.read_excel(url, sheetname=0, header=1)? #sheetname=[0,1,2, "Monthly Sales"]
json
dataframe = pd.read_json(url, orient='columns') #orient?文件結(jié)構(gòu)
#json_normalize谒臼,它能將半結(jié)構(gòu)化的 JSON數(shù)據(jù)轉(zhuǎn)換為 pandas 的 DataFrame 類型
SQL
import pandas as pd
from sqlalchemy import create_engine
database_connection = create_engine('sqlite:///sample.db')
dataframe = pd.read_sql_query('SELECT * FROM data', database_connection)