dataset的設置

製作數據集

    train_data_loader = DataLoader(
        dataset=train_dataset,
        batch_size=train_data_args["batch_size"],
        num_workers=train_data_args["num_workers"],
        shuffle=train_data_args["shuffle"]
    )
import os
import glob
import torch
import numpy as np
import scipy.io.wavfile as wav
from scipy.io.wavfile import read
from utils_mfcc import computer_feature
from torch.utils.data import Dataset
import pdb
class MyDataset_train(Dataset):
    def __init__(self):
        self.data = []
        tempdata =[]
        path = './5000'
        speech_wav_files = glob.glob(os.path.join(path, 'speech', '*.wav'))
        music_wav_files = glob.glob(os.path.join(path, 'music', '*.wav'))
        
        for speech in speech_wav_files:
            mfcc_features = computer_feature(speech)
            mfcc_features= np.transpose(mfcc_features)
            num=int(mfcc_features.shape[0]/20)
            for i in range(num):
              tempdata.append((mfcc_features[i*20:(i+1)*20], 0))
              self.data.append((mfcc_features[i*20:(i+1)*20], 0))
        
        print('the number of speech for training: {}'.format(len(self.data)))
        speech_data = len(self.data)

        ##music label: 1
        for music in music_wav_files:
            mfcc_features = computer_feature(music)
            mfcc_features= np.transpose(mfcc_features)
            num=int(mfcc_features.shape[0]/20)
            for i in range(num):
              self.data.append((mfcc_features[i*20:(i+1)*20], 1))
       
        print('the number of music for training: {}'.format(len(self.data)-speech_data)) 
    
    def __getitem__(self, idx):
        (m_feat, label) = self.data[idx]
        
        return {'inputs':torch.FloatTensor(m_feat), 'label':label}
            
    def __len__(self):
        return len(self.data)

 

 

 

import numpy as np

from torch.utils.data import Dataset


class Single_SNR_Train_Dataset(Dataset):
    """
    定義用於單信噪比的訓練數據集
    """

    def __init__(self, mixture_dataset, clean_dataset, target_1_dataset, target_2_dataset):
        super(Single_SNR_Train_Dataset, self).__init__()
        print("Loading Mixture Dataset...")
        self.mixture_dataset_7_frames_wise = np.concatenate(list(np.load(mixture_dataset).item().values()))
        print(f"Loaded: {self.mixture_dataset_7_frames_wise.shape}")

        print("Loading Clean Dataset...")
        self.clean_dataset_frame_wise = np.concatenate(list(np.load(clean_dataset).item().values()))
        print(f"Loaded: {self.clean_dataset_frame_wise.shape}")

        print("Loading Target 1 Dataset...")
        self.target_1_dataset_frame_wise = np.concatenate(list(np.load(target_1_dataset).item().values()))
        print(f"Loaded: {self.target_1_dataset_frame_wise.shape}")

        print("Loading Target 2 Dataset...")
        self.target_2_dataset_frame_wise = np.concatenate(list(np.load(target_2_dataset).item().values()))
        print(f"Loaded: {self.target_2_dataset_frame_wise.shape}")

    def __len__(self):
        return self.mixture_dataset_7_frames_wise.shape[0]

    def __getitem__(self, item):
        # print(item)
        return (
            self.mixture_dataset_7_frames_wise[item],
            self.target_1_dataset_frame_wise[item],
            self.target_2_dataset_frame_wise[item],
            self.clean_dataset_frame_wise[item]
        )

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章