DeepLab 源碼分析之 input_preprocess.py

這次我們分析 input_preprocess.py 主要是預處理數據用於DeepLab訓練或驗證

使用了 core/preprocess_utils.py 的大量函數

首先 import 必要的庫

import tensorflow as tf
from deeplab.core import feature_extractor
from deeplab.core import preprocess_utils

# 訓練時左右反轉的概率
_PROB_OF_FLIP = 0.5

函數 preprocess_image_and_label
返回 原圖
處理後的圖片 [crop_height, crop_width, 3]
標籤 [crop_height, crop_width, 1]

def preprocess_image_and_label(image,
                               label,
                               crop_height,
                               crop_width,
                               min_resize_value=None,
                               max_resize_value=None,
                               resize_factor=None,
                               min_scale_factor=1.,
                               max_scale_factor=1.,
                               scale_factor_step_size=0,
                               ignore_label=255,
                               is_training=True,
                               model_variant=None):
  """預處理圖片和標籤.

  Args:
    image: 輸入圖像 Input image.
    label: GT分割圖 Ground truth annotation label.
    crop_height: The height value used to crop the image and label.
    crop_width: The width value used to crop the image and label.
    min_resize_value: Desired size of the smaller image side.
    max_resize_value: Maximum allowed size of the larger image side.
    resize_factor: Resized dimensions are multiple of factor plus one.
    min_scale_factor: Minimum scale factor value.
    max_scale_factor: Maximum scale factor value.
    scale_factor_step_size: The step size from min scale factor to max scale
      factor. The input is randomly scaled based on the value of
      (min_scale_factor, max_scale_factor, scale_factor_step_size).
    ignore_label: The label value which will be ignored for training and
      evaluation.
    is_training: If the preprocessing is used for training or not.
    model_variant: Model variant (string) for choosing how to mean-subtract the
      images. See feature_extractor.network_map for supported model variants.

  Returns:
    original_image: 原始圖像(resized過) Original image (could be resized).
    processed_image: 處理後圖像 Preprocessed image.
    label: 處理過的分割圖 Preprocessed ground truth segmentation label.

  Raises:
    ValueError: Ground truth label not provided during training.
  """
  # 如果訓練階段沒有label, 則報錯
  if is_training and label is None:
    raise ValueError('During training, label must be provided.')

  # model_variant  ?
  if model_variant is None:
    tf.logging.warning('Default mean-subtraction is performed. Please specify '
                       'a model_variant. See feature_extractor.network_map for '
                       'supported model variants.')

  # 保存一下原始圖像
  original_image = image

  processed_image = tf.cast(image, tf.float32)

  if label is not None:
    label = tf.cast(label, tf.int32)

  # Resize image and label to the desired range.
  if min_resize_value is not None or max_resize_value is not None:
    # 調用core/preprocess_utils.resize_to_range函數
    [processed_image, label] = (
        preprocess_utils.resize_to_range(
            image=processed_image,
            label=label,
            min_size=min_resize_value,
            max_size=max_resize_value,
            factor=resize_factor,
            align_corners=True))
    # 原始圖更換爲resized後的圖片
    original_image = tf.identity(processed_image)

  # 隨機放縮數據增強 調用core/preprocess_utils中的兩個函數
  scale = preprocess_utils.get_random_scale(
      min_scale_factor, max_scale_factor, scale_factor_step_size)
  processed_image, label = preprocess_utils.randomly_scale_image_and_label(
      processed_image, label, scale)
  processed_image.set_shape([None, None, 3])

  # Pad圖片和Label到指定大小 [crop_height, crop_width]
  image_shape = tf.shape(processed_image)
  image_height = image_shape[0]
  image_width = image_shape[1]

  target_height = image_height + tf.maximum(crop_height - image_height, 0)
  target_width = image_width + tf.maximum(crop_width - image_width, 0)

  # 用圖片均值進行pad圖片 core/preprocess_utils
  mean_pixel = tf.reshape(
      feature_extractor.mean_pixel(model_variant), [1, 1, 3])
  processed_image = preprocess_utils.pad_to_bounding_box(
      processed_image, 0, 0, target_height, target_width, mean_pixel)

  if label is not None:
    label = preprocess_utils.pad_to_bounding_box(
        label, 0, 0, target_height, target_width, ignore_label)

  # 隨機裁剪 preprocess_utils.random_crop
  if is_training and label is not None:
    processed_image, label = preprocess_utils.random_crop(
        [processed_image, label], crop_height, crop_width)

  processed_image.set_shape([crop_height, crop_width, 3])

  if label is not None:
    label.set_shape([crop_height, crop_width, 1])

  # 如果是訓練階段,隨機翻轉
  if is_training:
    # Randomly left-right flip the image and label.
    processed_image, label, _ = preprocess_utils.flip_dim(
        [processed_image, label], _PROB_OF_FLIP, dim=1)

  return original_image, processed_image, label
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章