- 第0步:导入数据集
- 第1步:检测人
- 第2步:检测狗
- 第3步:创建CNN以对狗品种进行分类(来自Scratch)
- 步骤4:创建CNN以对狗品种进行分类(使用迁移学习)
- 第5步:测试算法
from sklearn.datasets import load_files from keras.utils import np_utils import numpy as np from glob import glob # define function to load train, test, and validation datasets def load_dataset(path): data = load_files(path) dog_files = np.array(data['filenames']) dog_targets = np_utils.to_categorical(np.array(data['target']), 133) return dog_files, dog_targets # load train, test, and validation datasets train_files, train_targets = load_dataset('dog_images/train') valid_files, valid_targets = load_dataset('dog_images/valid') test_files, test_targets = load_dataset('dog_images/test') # load list of dog names dog_names = [item[20:-1] for item in sorted(glob("dog_images/train/*/"))]
下载狗数据集(https://s3-us-west-1.amazonaws.com/udacity-aind/dog-project/dogImages.zip)。解压缩文件,路径示例`path / to / dog_images`。
import random random.seed(8675309) # load filenames in shuffled human dataset human_files = np.array(glob("lfw/*/*")) random.shuffle(human_files)
下载人数据集(https://s3-us-west-1.amazonaws.com/udacity-aind/dog-project/lfw.zip)。解压缩文件,路径示例:`path / to / lfw`。
# returns "True" if face is detected in image stored at img_path def face_detector(img_path): img = cv2.imread(img_path) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale(gray) return len(faces) > 0
human_files_short = human_files[:100] dog_files_short = train_files[:100] print("human_files: " + str(sum([1 for human_file in human_files_short if face_detector(human_file) == True])) + "%") print("dog_files: " + str(sum([1 for dog_file in dog_files_short if face_detector(dog_file) == True])) + "%")
from keras.applications.resnet50 import ResNet50 # define ResNet50 model ResNet50_model = ResNet50(weights='imagenet')
当使用TensorFlow作为后端时,Keras CNN需要一个4D数组(我们也将其称为4D张量)作为输入,具有形状
下面的函数path_to_tensor将图像文件路径作为输入,并返回适合提供给Keras CNN的4D张量。该函数首先加载图像并将其大小调整为224×224像素的正方形图像。接下来,将图像转换为数组,然后将其调整为4D张量。在这种情况下,由于我们正在处理彩色图像,因此每个图像都有三个通道。同样,由于我们正在处理单个图像(或样本),因此返回的张量将始终具有形状
from keras.preprocessing import image from tqdm import tqdm def path_to_tensor(img_path): """The path_to_tensor function below takes a string-valued file path to a color image as input and returns a 4D tensor suitable for supplying to a Keras CNN. Args: img_path: string. a file path to a color images. Retruns: numpy.array, a 4D tensor suitable for supplying to a Keras CNN. Output shape is (1,224,224,3). """ # loads RGB image as PIL.Image.Image type img = image.load_img(img_path, target_size=(224, 224)) # convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3) x = image.img_to_array(img) # convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor return np.expand_dims(x, axis=0) def paths_to_tensor(img_paths): """The paths_to_tensor function takes a numpy array of string-valued image paths as input and returns a 4D tensor with shape. Args: img_path: string. a file path to a color images. Retruns: numpy.array, a 4D tensor. Output shape is (n_samples,224,224,3). """ list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)] return np.vstack(list_of_tensors)
from keras.applications.resnet50 import preprocess_input, decode_predictions def ResNet50_predict_labels(img_path): """This is accomplished with the predict method, which returns an array whose i-th entry is the model's predicted probability that the image belongs to the i-th ImageNet category. Args: img_path: string. a file path to a color images. Returns: string, an ImageNet category. """ # returns prediction vector for image located at img_path img = preprocess_input(path_to_tensor(img_path)) return np.argmax(ResNet50_model.predict(img))
在查看字典时,您会注意到与狗对应的类别以不间断的顺序出现,并与字典键151-268对应,包括从“Chihuahua”到“Mexican hairless”的所有类别。因此,为了检查预训练的ResNet-50模型是否预测图像包含狗,我们只需要检查上面的resnet50_predict_label函数是否返回151到268之间的值(包括)。
### returns "True" if a dog is detected in the image stored at img_path def dog_detector(img_path): """The function returns True if a dog is detected in an image (and False if not). In order to check to see if an image is predicted to contain a dog by the pre-trained ResNet-50 model, we need only check if the ResNet50_predict_labels function above returns a value between 151 and 268 (inclusive). Args: img_path: string. a file path to a color images. Returns: boolean, images show a dog or not. """ prediction = ResNet50_predict_labels(img_path) return ((prediction <= 268) & (prediction >= 151))
human_files_short = human_files[:100] dog_files_short = train_files[:100] print('human_files: ' + str(sum([1 for human_file in human_files_short if dog_detector(human_file) == True])) + '%') print('dog_files: ' + str(sum([1 for dog_file in dog_files_short if dog_detector(dog_file) == True])) + '%')
第3步:创建卷积神经网络(CNN)以对狗品种进行分类(from Scratch)
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D from keras.layers import Dropout, Flatten, Dense from keras.models import Sequential model = Sequential() model.add(Conv2D(32, 3, padding='same', activation='relu', input_shape=(224, 224, 3))) model.add(Conv2D(32, 3, padding='same', activation='relu')) model.add(MaxPooling2D(pool_size=2)) model.add(Dropout(0.2)) model.add(Conv2D(64, 3, padding='same', activation='relu')) model.add(Conv2D(64, 3, padding='same', activation='relu')) model.add(MaxPooling2D(pool_size=2)) model.add(Dropout(0.2)) model.add(GlobalAveragePooling2D()) model.add(Dense(512, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(133, activation='softmax'))
定义了Keras CNN的训练和测试
from keras.callbacks import ModelCheckpoint def train_model(model, train_tensors, train_targets, valid_tensors, valid_targets, save_filepath, epochs=20, batch_size=20): """Fit model to train dataset, and check accuracy for valid dataset. Args: model: complied keras model train_tensors: train datatset train_targets: train targets valid_tensors: valid dataset valid_targets: valid targts save_filepath: save filepath epochs: epochs, default 20 batch_size: batch size, default 20 """ checkpointer = ModelCheckpoint(filepath=save_filepath, verbose=1, save_best_only=True) model.fit( train_tensors, train_targets, validation_data=(valid_tensors, valid_targets), epochs=epochs, batch_size=20, callbacks=[checkpointer], verbose=2) def test_model(model, test_tensors, test_targets): """Test model for test datasets. Args: model: complied keras model test_tensors: test datasets test_targets: test targets """ # get index of predicted dog breed for each image in test set predictions = [np.argmax(model.predict(np.expand_dims(tensor, axis=0))) for tensor in test_tensors] # report test accuracy test_accuracy = 100*np.sum(np.array(predictions)==np.argmax(test_targets, axis=1))/len(predictions) print('Test accuracy: %.4f%%' % test_accuracy)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
首先,定义了提取ResNet-50的bottleneck 特征以用作我们的狗品种分类模型的输入的函数。该include_top参数是是否包含在该网络的顶部的全连接层,这里是False。因为在我们的应用程序中,网络顶部的全连接层是唯一用来对犬种进行分类的。
注意:没有全连接层的ResNet50会返回由ImageNet数据集预先训练的bottleneck 特征。
from keras.applications.resnet50 import ResNet50, preprocess_input def extract_Resnet50(tensor): """Extracting bottleneck features of ResNet-50 to use as input our classifying model. Args: tensor: numpy.array. a 4D tensor suitable for supplying to a Keras CNN. Retruns: numpy.array, bottleneck features of ResNet-50 """ return ResNet50(weights='imagenet', include_top=False).predict(preprocess_input(tensor))
然后,我们提取bottleneck 特征
train_Resnet50 = extract_Resnet50(paths_to_tensor(train_files).astype('float32')) valid_Resnet50 = extract_Resnet50(paths_to_tensor(valid_files).astype('float32')) test_Resnet50 = extract_Resnet50(paths_to_tensor(test_files).astype('float32'))
Resnet50_model = Sequential() Resnet50_model.add(GlobalAveragePooling2D(input_shape=train_Resnet50.shape[1:])) Resnet50_model.add(Dense(133, activation='softmax'))
这就是我们使用迁移学习所做的一切!然后我们编译我们的狗品种分类模型,并用bottleneck 特征训练和测试模型。尽管在步骤3中使用相同参数进行了训练,但最终测试精度为82.1770%。
def Resnet50_predict_breed(img_path): """Return the dog breed that is predicted by the transer learing model. Args: img_path: string. a file path to a color images. Returns: dog breed: string, and probability: float. """ bottleneck_feature = extract_Resnet50(path_to_tensor(img_path)) predicted_vector = Resnet50_model.predict(bottleneck_feature) idx = np.argmax(predicted_vector) return dog_names[idx], (predicted_vector.flatten())[idx] def img_show(img_path): """Show images by using matplotlib. Args: img_path: string. a file path to a color images. """ cv_rgb = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) plt.imshow(cv_rgb) plt.show() def show_dog(img_path): """Show dogs with breed by using ResNet-50. Args: img_path: string. a file path to a color images. """ name, prob = Resnet50_predict_breed(img_path) print("hello, dog!") img_show(img_path) print("The dog looks like a ...") print(name + " (Probability: " + str(round(prob, 2)) + ")") def show_human(img_path): """Show humans with dog breed by using ResNet-50. Args: img_path: string. a file path to a color images. """ name, prob = Resnet50_predict_breed(img_path) print("hello, human!") img_show(img_path) print("You look like a ...") print(name + " (Probability: " + str(round(prob, 2)) + ")") def show_error(): """Show error. """ print("sorry, you look like neither dog or human...") def main(img_path): """Main function of our apllication. Args: img_path: string. a file path to a color images. """ if dog_detector(img_path): show_dog(img_path) elif face_detector(img_path): show_human(img_path) else: show_error()