小白的自我救贖
書接上回,在action recognition中,我們已經(jīng)學(xué)會了視頻數(shù)據(jù)中幀圖像的讀取,并將讀取到的幀圖像保存在文件夾frames_of_video中仆百,今天講學(xué)習(xí)如何講在這些幀圖像中,隨機(jī)讀取并加載其中的16幀圖像拆又,并保存為張量形式儒旬,然后進(jìn)行隨機(jī)裁剪等操作栏账,實(shí)現(xiàn)pytorch中幀圖像加載帖族,以便完成動作識別。
參考文檔:
http://pytorch123.com/ThirdSection/DataLoding/
http://www.reibang.com/p/4ebf2a82017b
這段代碼的總體思想就是索引到存儲幀圖像的文件夾挡爵,按照train_list挨個(gè)索引到對應(yīng)的幀圖像竖般,隨機(jī)取初始幀,然后讀取連續(xù)16幀茶鹃,存儲到4維張量中涣雕,格式為[z,c,h,w]
z:幀數(shù)=16
c:圖像深度
h:圖像高度
w:圖像寬度
代碼如下:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 26 21:35:32 2019
@author: xuguangying
"""
from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import pandas as pd
import os
import random
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")
print('a')
#%%
class UCF101(Dataset):
"""UCF101 Landmarks dataset."""
def __init__(self, info_list, root_dir, transform=None):
"""
Args:
info_list (string): Path to the info list file with annotations.
root_dir (string): Directory with all the video frames.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.landmarks_frame = pd.read_csv(info_list,delimiter=' ', header=None)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.landmarks_frame)
# get (16,240,320,3)
def __getitem__(self, idx):
aaa = self.landmarks_frame.iloc[idx, 0]
video_label=self.landmarks_frame.iloc[idx,1]
video_x=self.get_single_video_x(aaa)
sample = {'video_x':video_x, 'video_label':video_label}
if self.transform:
sample = self.transform(sample)
return sample
def get_single_video_x(self,aaa):
name, ext = os.path.splitext(aaa)
name1 = os.path.join(name+'/')
pic_path = os.path.join(root_list,name1)
pic_names = os.listdir(pic_path)
num = len(pic_names)
image_start=random.randint(1,num-15)
image_id=image_start
video_x=np.zeros((16,240,320,3))
for i in range(16):
s="%05d" % image_id
image_name='image_'+s+'.jpg'
image_path=os.path.join(pic_path,image_name)
tmp_image = io.imread(image_path)
video_x[i,:,:,:]=tmp_image
image_id+=1
return video_x
#%%
class ClipSubstractMean(object):
def __init__(self, b=104, g=117, r=123):
self.means = np.array((r, g, b))
def __call__(self, sample):
video_x,video_label=sample['video_x'],sample['video_label']
new_video_x=video_x - self.means
return {'video_x': new_video_x, 'video_label': video_label}
#%%
class Rescale(object):
def __init__(self, output_size=(182,242)):
assert isinstance(output_size, (int, tuple))#判斷一個(gè)變量是否是某個(gè)類型可以用isinstance()判斷
self.output_size = output_size
def __call__(self, sample):
video_x, video_label = sample['video_x'], sample['video_label']
h, w = video_x.shape[1],video_x[2]
if isinstance(self.output_size, int):#判斷期望的output_size變量是否是int類型
if h > w:
new_h, new_w = self.output_size * h / w, self.output_size#長短邊同比例縮放,短邊變換為期望大小
else:
new_h, new_w = self.output_size, self.output_size * w / h
else:
new_h, new_w = self.output_size
new_h, new_w = int(new_h), int(new_w)
new_video_x=np.zeros((16,new_h,new_w,3))
for i in range(16):
image=video_x[i,:,:,:]
img = transform.resize(image, (new_h, new_w))
new_video_x[i,:,:,:]=img
return {'video_x': new_video_x, 'video_label': video_label}
#%%
class RandomCrop(object):
"""隨機(jī)裁剪樣本中的圖像.
Args:
output_size(tuple或int):所需的輸出大小艰亮。 如果是int,方形裁剪是挣郭。
"""
def __init__(self, output_size=(160,160)):
assert isinstance(output_size, (int, tuple))
if isinstance(output_size, int):
self.output_size = (output_size, output_size)
else:
assert len(output_size) == 2
self.output_size = output_size
def __call__(self, sample):
video_x, video_label = sample['video_x'], sample['video_label']
h, w = video_x.shape[1],video_x.shape[2]
new_h, new_w = self.output_size
top = np.random.randint(0, h - new_h)
left = np.random.randint(0, w - new_w)
new_video_x=np.zeros((16,new_h,new_w,3))
for i in range(16):
image=video_x[i,:,:,:]
image = image[top: top + new_h,left: left + new_w]
new_video_x[i,:,:,:]=image
return {'video_x': new_video_x, 'video_label': video_label}
#%%
class ToTensor(object):
"""Convert ndarrays in sample to Tensors."""
def __call__(self, sample):
video_x, video_label = sample['video_x'], sample['video_label']
# swap color axis because
# numpy image: batch_size x H x W x C
# torch image: batch_size x C X H X W
video_x = video_x.transpose((0, 3, 1, 2))
video_x=np.array(video_x)
video_label = [video_label]
return {'video_x':torch.from_numpy(video_x),'video_label':torch.FloatTensor(video_label)}
#%%
if __name__=='__main__':
root_list='/media/xuguangying/action recogniton/database/UTH/frames_of_video'
info_list='/media/xuguangying/action recogniton/database/UTH/ucfTrainTestlist/trainlist01.txt'
#myUCF101=UCF101(info_list,root_list)
myUCF101=UCF101(info_list,root_list,transform=transforms.Compose([ClipSubstractMean(),Rescale(),RandomCrop(),ToTensor()]))
dataloader=DataLoader(myUCF101,batch_size=8,shuffle=True,num_workers=6)
for i_batch,sample_batched in enumerate(dataloader):
print (i_batch,sample_batched['video_x'].size(),sample_batched['video_label'].size())#dayin meige batch de size yiji label de size
print (i_batch,sample_batched['video_x'].size(),sample_batched['video_label']) #dayin meige batch de size yiji meige batch zhong yangben de label