From 8c8a125147bd5d59a8bafc34e2d97441b791b008 Mon Sep 17 00:00:00 2001
From: Nataniel Ruiz <nruiz@Nataniels-MacBook-Pro.local>
Date: Wed, 25 Dec 2019 19:12:55 -0400
Subject: [PATCH] next

---
 cyclegan/.gitignore                           |  1 -
 cyclegan/datasets/bibtex/cityscapes.tex       |  6 ++
 cyclegan/datasets/bibtex/facades.tex          |  7 ++
 cyclegan/datasets/bibtex/handbags.tex         | 13 +++
 cyclegan/datasets/bibtex/shoes.tex            | 14 +++
 cyclegan/datasets/bibtex/transattr.tex        |  8 ++
 cyclegan/datasets/combine_A_and_B.py          | 48 +++++++++
 .../datasets/download_cyclegan_dataset.sh     | 21 ++++
 cyclegan/datasets/download_pix2pix_dataset.sh | 22 +++++
 cyclegan/datasets/make_dataset_aligned.py     | 63 ++++++++++++
 .../datasets/prepare_cityscapes_dataset.py    | 99 +++++++++++++++++++
 11 files changed, 301 insertions(+), 1 deletion(-)
 create mode 100644 cyclegan/datasets/bibtex/cityscapes.tex
 create mode 100644 cyclegan/datasets/bibtex/facades.tex
 create mode 100644 cyclegan/datasets/bibtex/handbags.tex
 create mode 100644 cyclegan/datasets/bibtex/shoes.tex
 create mode 100644 cyclegan/datasets/bibtex/transattr.tex
 create mode 100644 cyclegan/datasets/combine_A_and_B.py
 create mode 100755 cyclegan/datasets/download_cyclegan_dataset.sh
 create mode 100755 cyclegan/datasets/download_pix2pix_dataset.sh
 create mode 100644 cyclegan/datasets/make_dataset_aligned.py
 create mode 100644 cyclegan/datasets/prepare_cityscapes_dataset.py

diff --git a/cyclegan/.gitignore b/cyclegan/.gitignore
index 4fdef3e..8d9fb64 100644
--- a/cyclegan/.gitignore
+++ b/cyclegan/.gitignore
@@ -1,6 +1,5 @@
 .DS_Store
 debug*
-datasets/
 checkpoints/
 results/
 build/
diff --git a/cyclegan/datasets/bibtex/cityscapes.tex b/cyclegan/datasets/bibtex/cityscapes.tex
new file mode 100644
index 0000000..a87bdbf
--- /dev/null
+++ b/cyclegan/datasets/bibtex/cityscapes.tex
@@ -0,0 +1,6 @@
+@inproceedings{Cordts2016Cityscapes,
+title={The Cityscapes Dataset for Semantic Urban Scene Understanding},
+author={Cordts, Marius and Omran, Mohamed and Ramos, Sebastian and Rehfeld, Timo and Enzweiler, Markus and Benenson, Rodrigo and Franke, Uwe and Roth, Stefan and Schiele, Bernt},
+booktitle={Proc. of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+year={2016}
+}
diff --git a/cyclegan/datasets/bibtex/facades.tex b/cyclegan/datasets/bibtex/facades.tex
new file mode 100644
index 0000000..08b773e
--- /dev/null
+++ b/cyclegan/datasets/bibtex/facades.tex
@@ -0,0 +1,7 @@
+@INPROCEEDINGS{Tylecek13,
+  author = {Radim Tyle{\v c}ek, Radim {\v S}{\' a}ra},
+  title = {Spatial Pattern Templates for Recognition of Objects with Regular Structure},
+  booktitle = {Proc. GCPR},
+  year = {2013},
+  address = {Saarbrucken, Germany},
+}
diff --git a/cyclegan/datasets/bibtex/handbags.tex b/cyclegan/datasets/bibtex/handbags.tex
new file mode 100644
index 0000000..b79710c
--- /dev/null
+++ b/cyclegan/datasets/bibtex/handbags.tex
@@ -0,0 +1,13 @@
+@inproceedings{zhu2016generative,
+  title={Generative Visual Manipulation on the Natural Image Manifold},
+  author={Zhu, Jun-Yan and Kr{\"a}henb{\"u}hl, Philipp and Shechtman, Eli and Efros, Alexei A.},
+  booktitle={Proceedings of European Conference on Computer Vision (ECCV)},
+  year={2016}
+}
+
+@InProceedings{xie15hed,
+  author = {"Xie, Saining and Tu, Zhuowen"},
+  Title = {Holistically-Nested Edge Detection},
+  Booktitle = "Proceedings of IEEE International Conference on Computer Vision",
+  Year  = {2015},
+}
diff --git a/cyclegan/datasets/bibtex/shoes.tex b/cyclegan/datasets/bibtex/shoes.tex
new file mode 100644
index 0000000..e67e158
--- /dev/null
+++ b/cyclegan/datasets/bibtex/shoes.tex
@@ -0,0 +1,14 @@
+@InProceedings{fine-grained,
+  author = {A. Yu and K. Grauman},
+  title = {{F}ine-{G}rained {V}isual {C}omparisons with {L}ocal {L}earning},
+  booktitle = {Computer Vision and Pattern Recognition (CVPR)},
+  month = {June},
+  year = {2014}
+}
+
+@InProceedings{xie15hed,
+  author = {"Xie, Saining and Tu, Zhuowen"},
+  Title = {Holistically-Nested Edge Detection},
+  Booktitle = "Proceedings of IEEE International Conference on Computer Vision",
+  Year  = {2015},
+}
diff --git a/cyclegan/datasets/bibtex/transattr.tex b/cyclegan/datasets/bibtex/transattr.tex
new file mode 100644
index 0000000..0585849
--- /dev/null
+++ b/cyclegan/datasets/bibtex/transattr.tex
@@ -0,0 +1,8 @@
+@article {Laffont14,
+    title = {Transient Attributes for High-Level Understanding and Editing of Outdoor Scenes},
+    author = {Pierre-Yves Laffont and Zhile Ren and Xiaofeng Tao and Chao Qian and James Hays},
+    journal = {ACM Transactions on Graphics (proceedings of SIGGRAPH)},
+    volume = {33},
+    number = {4},
+    year = {2014}
+}
diff --git a/cyclegan/datasets/combine_A_and_B.py b/cyclegan/datasets/combine_A_and_B.py
new file mode 100644
index 0000000..2eebdaf
--- /dev/null
+++ b/cyclegan/datasets/combine_A_and_B.py
@@ -0,0 +1,48 @@
+import os
+import numpy as np
+import cv2
+import argparse
+
+parser = argparse.ArgumentParser('create image pairs')
+parser.add_argument('--fold_A', dest='fold_A', help='input directory for image A', type=str, default='../dataset/50kshoes_edges')
+parser.add_argument('--fold_B', dest='fold_B', help='input directory for image B', type=str, default='../dataset/50kshoes_jpg')
+parser.add_argument('--fold_AB', dest='fold_AB', help='output directory', type=str, default='../dataset/test_AB')
+parser.add_argument('--num_imgs', dest='num_imgs', help='number of images', type=int, default=1000000)
+parser.add_argument('--use_AB', dest='use_AB', help='if true: (0001_A, 0001_B) to (0001_AB)', action='store_true')
+args = parser.parse_args()
+
+for arg in vars(args):
+    print('[%s] = ' % arg, getattr(args, arg))
+
+splits = os.listdir(args.fold_A)
+
+for sp in splits:
+    img_fold_A = os.path.join(args.fold_A, sp)
+    img_fold_B = os.path.join(args.fold_B, sp)
+    img_list = os.listdir(img_fold_A)
+    if args.use_AB:
+        img_list = [img_path for img_path in img_list if '_A.' in img_path]
+
+    num_imgs = min(args.num_imgs, len(img_list))
+    print('split = %s, use %d/%d images' % (sp, num_imgs, len(img_list)))
+    img_fold_AB = os.path.join(args.fold_AB, sp)
+    if not os.path.isdir(img_fold_AB):
+        os.makedirs(img_fold_AB)
+    print('split = %s, number of images = %d' % (sp, num_imgs))
+    for n in range(num_imgs):
+        name_A = img_list[n]
+        path_A = os.path.join(img_fold_A, name_A)
+        if args.use_AB:
+            name_B = name_A.replace('_A.', '_B.')
+        else:
+            name_B = name_A
+        path_B = os.path.join(img_fold_B, name_B)
+        if os.path.isfile(path_A) and os.path.isfile(path_B):
+            name_AB = name_A
+            if args.use_AB:
+                name_AB = name_AB.replace('_A.', '.')  # remove _A
+            path_AB = os.path.join(img_fold_AB, name_AB)
+            im_A = cv2.imread(path_A, 1) # python2: cv2.CV_LOAD_IMAGE_COLOR; python3: cv2.IMREAD_COLOR
+            im_B = cv2.imread(path_B, 1) # python2: cv2.CV_LOAD_IMAGE_COLOR; python3: cv2.IMREAD_COLOR
+            im_AB = np.concatenate([im_A, im_B], 1)
+            cv2.imwrite(path_AB, im_AB)
diff --git a/cyclegan/datasets/download_cyclegan_dataset.sh b/cyclegan/datasets/download_cyclegan_dataset.sh
new file mode 100755
index 0000000..5cae447
--- /dev/null
+++ b/cyclegan/datasets/download_cyclegan_dataset.sh
@@ -0,0 +1,21 @@
+FILE=$1
+
+if [[ $FILE != "ae_photos" && $FILE != "apple2orange" && $FILE != "summer2winter_yosemite" &&  $FILE != "horse2zebra" && $FILE != "monet2photo" && $FILE != "cezanne2photo" && $FILE != "ukiyoe2photo" && $FILE != "vangogh2photo" && $FILE != "maps" && $FILE != "cityscapes" && $FILE != "facades" && $FILE != "iphone2dslr_flower" && $FILE != "mini" && $FILE != "mini_pix2pix" && $FILE != "mini_colorization" ]]; then
+    echo "Available datasets are: apple2orange, summer2winter_yosemite, horse2zebra, monet2photo, cezanne2photo, ukiyoe2photo, vangogh2photo, maps, cityscapes, facades, iphone2dslr_flower, ae_photos"
+    exit 1
+fi
+
+if [[ $FILE == "cityscapes" ]]; then
+    echo "Due to license issue, we cannot provide the Cityscapes dataset from our repository. Please download the Cityscapes dataset from https://cityscapes-dataset.com, and use the script ./datasets/prepare_cityscapes_dataset.py."
+    echo "You need to download gtFine_trainvaltest.zip and leftImg8bit_trainvaltest.zip. For further instruction, please read ./datasets/prepare_cityscapes_dataset.py"
+    exit 1
+fi
+
+echo "Specified [$FILE]"
+URL=https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/$FILE.zip
+ZIP_FILE=./datasets/$FILE.zip
+TARGET_DIR=./datasets/$FILE/
+wget -N $URL -O $ZIP_FILE
+mkdir $TARGET_DIR
+unzip $ZIP_FILE -d ./datasets/
+rm $ZIP_FILE
diff --git a/cyclegan/datasets/download_pix2pix_dataset.sh b/cyclegan/datasets/download_pix2pix_dataset.sh
new file mode 100755
index 0000000..4cfbfb1
--- /dev/null
+++ b/cyclegan/datasets/download_pix2pix_dataset.sh
@@ -0,0 +1,22 @@
+FILE=$1
+
+if [[ $FILE != "cityscapes" && $FILE != "night2day" && $FILE != "edges2handbags" && $FILE != "edges2shoes" && $FILE != "facades" && $FILE != "maps" ]]; then
+  echo "Available datasets are cityscapes, night2day, edges2handbags, edges2shoes, facades, maps"
+  exit 1
+fi
+
+if [[ $FILE == "cityscapes" ]]; then
+    echo "Due to license issue, we cannot provide the Cityscapes dataset from our repository. Please download the Cityscapes dataset from https://cityscapes-dataset.com, and use the script ./datasets/prepare_cityscapes_dataset.py."
+    echo "You need to download gtFine_trainvaltest.zip and leftImg8bit_trainvaltest.zip. For further instruction, please read ./datasets/prepare_cityscapes_dataset.py"
+    exit 1
+fi
+
+echo "Specified [$FILE]"
+
+URL=http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/$FILE.tar.gz
+TAR_FILE=./datasets/$FILE.tar.gz
+TARGET_DIR=./datasets/$FILE/
+wget -N $URL -O $TAR_FILE
+mkdir -p $TARGET_DIR
+tar -zxvf $TAR_FILE -C ./datasets/
+rm $TAR_FILE
diff --git a/cyclegan/datasets/make_dataset_aligned.py b/cyclegan/datasets/make_dataset_aligned.py
new file mode 100644
index 0000000..739c767
--- /dev/null
+++ b/cyclegan/datasets/make_dataset_aligned.py
@@ -0,0 +1,63 @@
+import os
+
+from PIL import Image
+
+
+def get_file_paths(folder):
+    image_file_paths = []
+    for root, dirs, filenames in os.walk(folder):
+        filenames = sorted(filenames)
+        for filename in filenames:
+            input_path = os.path.abspath(root)
+            file_path = os.path.join(input_path, filename)
+            if filename.endswith('.png') or filename.endswith('.jpg'):
+                image_file_paths.append(file_path)
+
+        break  # prevent descending into subfolders
+    return image_file_paths
+
+
+def align_images(a_file_paths, b_file_paths, target_path):
+    if not os.path.exists(target_path):
+        os.makedirs(target_path)
+
+    for i in range(len(a_file_paths)):
+        img_a = Image.open(a_file_paths[i])
+        img_b = Image.open(b_file_paths[i])
+        assert(img_a.size == img_b.size)
+
+        aligned_image = Image.new("RGB", (img_a.size[0] * 2, img_a.size[1]))
+        aligned_image.paste(img_a, (0, 0))
+        aligned_image.paste(img_b, (img_a.size[0], 0))
+        aligned_image.save(os.path.join(target_path, '{:04d}.jpg'.format(i)))
+
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--dataset-path',
+        dest='dataset_path',
+        help='Which folder to process (it should have subfolders testA, testB, trainA and trainB'
+    )
+    args = parser.parse_args()
+
+    dataset_folder = args.dataset_path
+    print(dataset_folder)
+
+    test_a_path = os.path.join(dataset_folder, 'testA')
+    test_b_path = os.path.join(dataset_folder, 'testB')
+    test_a_file_paths = get_file_paths(test_a_path)
+    test_b_file_paths = get_file_paths(test_b_path)
+    assert(len(test_a_file_paths) == len(test_b_file_paths))
+    test_path = os.path.join(dataset_folder, 'test')
+
+    train_a_path = os.path.join(dataset_folder, 'trainA')
+    train_b_path = os.path.join(dataset_folder, 'trainB')
+    train_a_file_paths = get_file_paths(train_a_path)
+    train_b_file_paths = get_file_paths(train_b_path)
+    assert(len(train_a_file_paths) == len(train_b_file_paths))
+    train_path = os.path.join(dataset_folder, 'train')
+
+    align_images(test_a_file_paths, test_b_file_paths, test_path)
+    align_images(train_a_file_paths, train_b_file_paths, train_path)
diff --git a/cyclegan/datasets/prepare_cityscapes_dataset.py b/cyclegan/datasets/prepare_cityscapes_dataset.py
new file mode 100644
index 0000000..2079139
--- /dev/null
+++ b/cyclegan/datasets/prepare_cityscapes_dataset.py
@@ -0,0 +1,99 @@
+import os
+import glob
+from PIL import Image
+
+help_msg = """
+The dataset can be downloaded from https://cityscapes-dataset.com.
+Please download the datasets [gtFine_trainvaltest.zip] and [leftImg8bit_trainvaltest.zip] and unzip them.
+gtFine contains the semantics segmentations. Use --gtFine_dir to specify the path to the unzipped gtFine_trainvaltest directory. 
+leftImg8bit contains the dashcam photographs. Use --leftImg8bit_dir to specify the path to the unzipped leftImg8bit_trainvaltest directory. 
+The processed images will be placed at --output_dir.
+
+Example usage:
+
+python prepare_cityscapes_dataset.py --gitFine_dir ./gtFine/ --leftImg8bit_dir ./leftImg8bit --output_dir ./datasets/cityscapes/
+"""
+
+def load_resized_img(path):
+    return Image.open(path).convert('RGB').resize((256, 256))
+
+def check_matching_pair(segmap_path, photo_path):
+    segmap_identifier = os.path.basename(segmap_path).replace('_gtFine_color', '')
+    photo_identifier = os.path.basename(photo_path).replace('_leftImg8bit', '')
+        
+    assert segmap_identifier == photo_identifier, \
+        "[%s] and [%s] don't seem to be matching. Aborting." % (segmap_path, photo_path)
+    
+
+def process_cityscapes(gtFine_dir, leftImg8bit_dir, output_dir, phase):
+    save_phase = 'test' if phase == 'val' else 'train'
+    savedir = os.path.join(output_dir, save_phase)
+    os.makedirs(savedir, exist_ok=True)
+    os.makedirs(savedir + 'A', exist_ok=True)
+    os.makedirs(savedir + 'B', exist_ok=True)
+    print("Directory structure prepared at %s" % output_dir)
+    
+    segmap_expr = os.path.join(gtFine_dir, phase) + "/*/*_color.png"
+    segmap_paths = glob.glob(segmap_expr)
+    segmap_paths = sorted(segmap_paths)
+
+    photo_expr = os.path.join(leftImg8bit_dir, phase) + "/*/*_leftImg8bit.png"
+    photo_paths = glob.glob(photo_expr)
+    photo_paths = sorted(photo_paths)
+
+    assert len(segmap_paths) == len(photo_paths), \
+        "%d images that match [%s], and %d images that match [%s]. Aborting." % (len(segmap_paths), segmap_expr, len(photo_paths), photo_expr)
+
+    for i, (segmap_path, photo_path) in enumerate(zip(segmap_paths, photo_paths)):
+        check_matching_pair(segmap_path, photo_path)
+        segmap = load_resized_img(segmap_path)
+        photo = load_resized_img(photo_path)
+
+        # data for pix2pix where the two images are placed side-by-side
+        sidebyside = Image.new('RGB', (512, 256))
+        sidebyside.paste(segmap, (256, 0))
+        sidebyside.paste(photo, (0, 0))
+        savepath = os.path.join(savedir, "%d.jpg" % i)
+        sidebyside.save(savepath, format='JPEG', subsampling=0, quality=100)
+
+        # data for cyclegan where the two images are stored at two distinct directories
+        savepath = os.path.join(savedir + 'A', "%d_A.jpg" % i)
+        photo.save(savepath, format='JPEG', subsampling=0, quality=100)
+        savepath = os.path.join(savedir + 'B', "%d_B.jpg" % i)
+        segmap.save(savepath, format='JPEG', subsampling=0, quality=100)
+        
+        if i % (len(segmap_paths) // 10) == 0:
+            print("%d / %d: last image saved at %s, " % (i, len(segmap_paths), savepath))
+
+
+        
+        
+        
+        
+        
+    
+
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--gtFine_dir', type=str, required=True,
+                        help='Path to the Cityscapes gtFine directory.')
+    parser.add_argument('--leftImg8bit_dir', type=str, required=True,
+                        help='Path to the Cityscapes leftImg8bit_trainvaltest directory.')
+    parser.add_argument('--output_dir', type=str, required=True,
+                        default='./datasets/cityscapes',
+                        help='Directory the output images will be written to.')
+    opt = parser.parse_args()
+
+    print(help_msg)
+    
+    print('Preparing Cityscapes Dataset for val phase')
+    process_cityscapes(opt.gtFine_dir, opt.leftImg8bit_dir, opt.output_dir, "val")
+    print('Preparing Cityscapes Dataset for train phase')
+    process_cityscapes(opt.gtFine_dir, opt.leftImg8bit_dir, opt.output_dir, "train")
+
+    print('Done')
+
+    
+