-
Notifications
You must be signed in to change notification settings - Fork 1
/
search_data.py
50 lines (42 loc) · 1.68 KB
/
search_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# search_data.py
# Construct and preprocess data for model
import torch
import torchvision
from torchvision import transforms, datasets
from utils import *
import os
import random
'''
Retrieve images from CIFAR-10 dataset using torchvision
'''
def get_cifar_data():
transform = transforms.Compose([
transforms.Resize((224,224)),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])
trainset = torchvision.datasets.CIFAR10(root='../data', train=True, download=True, transform=transform)
# Construct DataLoaders for training and test sets
trainloader = torch.utils.data.DataLoader(trainset, batch_size=1, shuffle=False, num_workers=1)
testset = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=1)
return trainloader, testloader, trainset
def index_dataset(trainset):
feature_indexer = Indexer()
add_dataset_features(trainset, feature_indexer)
print(len(feature_indexer))
'''
Parse and retrieve data from train_path. Also construct an image indexer from index to image_path
'''
def get_ml_data(train_path, train_cutoff=.95):
indexer = Indexer()
files = [os.path.join(train_path, p) for p in sorted(os.listdir(train_path))]
for file in files:
indexer.get_index(file)
# Generate training and test set - 95% traning, 5% test
a = [i for i in range(len(files))]
random.shuffle(a)
cutoff = int(len(files)*train_cutoff)
train_data = a[:cutoff]
test_data = a[cutoff:]
return train_data, test_data, indexer