Source code for nyuki.geotiff_chopper

# -*- coding: utf-8 -*-

"""Console script for geotiff_reprojector."""
import sys
import os
import click
import numpy as np
import rasterio
import rasterio.features
from PIL import Image
import PIL
from osgeo import gdal
from tqdm import trange, tqdm
import geopandas as gpd
import pandas as pd

# disable pixel size checks
PIL.Image.MAX_IMAGE_PIXELS = None

@click.command()
@click.option('--sourcetiff', type=click.Path(exists=True), required=True,
              prompt="Enter the path for the source tiff image",
              help="Path to the GEOTIFF raster image")
@click.option('--sourcemask', type=click.Path(), required=True,
              prompt="Enter the path for the source mask file",
              help="Path to the labels geojson file that corresponds to the GEOTIFF file")
@click.option('--size', type=int, required=True, default=512,
              prompt="Enter the size of the sliced images in pixels",
              help="The size in pixels of the chopped images, such as 512 for 512x512 images")
def main(sourcetiff, sourcemask, size):
    """Application: Geotiff Chopper.

        This tool will slice a raster GEOTIFF image into size x size PNG images with
        corresponding labels.
        The user enters the source GEOTIFF filename, the mask filename, and the sliced
        image size. Output files are saved to 'images' and 'labels' folders in the
        working directory.

        This tool is used as follows. The User should first use a cool like QGIS or
        similar to clip the raster layer using the vector (mask) layer--or alternatively
        clipping the vector layer using the raster layer. Then these two equally sized
        layers are exported as a GEOTIFF and geojson layer. It is critical that both
        files respect the same exact geographical boundaries, otherwise there will be
        misalignments when the images are sliced.

        Note that the mask and source tiff file do not need to be in the same EPSG or
        coordinate projection. Because both files are converted to PNG files for slicing,
        the projection does not matter.

        Examples:

        Commandline app:\n
        >>> geotiff-slicer --sourcetiff file1.tif --sourcemask file1.geojson --size 512

        Invoke interactive mode:\n
        >>> geotiff-slicer

        """

    chopper(sourcetiff, sourcemask, size)

    return 0

[docs]def chopper(sourcetiff, sourcemask, size):

    raster = rasterio.open(sourcetiff)
    buildings = gpd.read_file(sourcemask)

    click.echo("Application Settings:\n")
    click.echo(f"source image: {sourcetiff}")
    click.echo(f"source mask: {sourcemask}")
    click.echo(f"output image size: {size} x {size} ")
    click.echo(f"source epsg: {raster.crs}")
    click.echo(f"masks epsg: {buildings.crs['init'].upper()}")

    click.confirm(f'The image and masks files must be in the same coordinate projection. \n Please convert the files so that the coordinate projections match.\n', abort=True)

    # filenames for temporary files
    tmp_imagefilename = 'tmp_sourceimage.png'
    tmp_masksfilename = 'tmp_masks.png'
    img_master_directory = os.getcwd()
    img_prefix = os.path.basename(sourcetiff).split('.')[0]

    # read masks and remove empty geometries
    buildings = buildings[~buildings.is_empty]

    # rasterize the
    print('[INFO] Rasterizing the masks')
    tfl_raster = rasterize_masks(buildings, raster)

    # write masks to new png file.
    img = Image.fromarray(tfl_raster)
    img.save(tmp_masksfilename)

    options_list = [
        '-ot Byte',
        '-of PNG',
        '-scale'
    ]
    options_string = " ".join(options_list)

    print('[INFO] Converting image from TIFF to PNG format for chopping.\n This might take some time depending upon compression of the original TIFF file.')

    gdal.Translate(tmp_imagefilename,
                   sourcetiff,
                   options=options_string)

    print('[INFO] Beginning the image slicing process.')
    max_height, max_width = img_chopper(tmp_imagefilename,
                                        tmp_masksfilename,
                                        img_master_directory,
                                        prefix=img_prefix,
                                        height=size,
                                        width=size)
    print(f"max height slices: {max_height}, max width slices: {max_width}")

    print('[INFO] Complete slicing image.')
    print('[INFO] Cleaning up files.')
    os.remove(tmp_imagefilename)
    os.remove(tmp_masksfilename)
    print('[INFO] Done.')

[docs]def rasterize_masks(masks, raster):
    tfl_raster = rasterio.features.rasterize(
        [(x.geometry, 255) for i, x in masks.iterrows()],
        out_shape=raster.shape,
        transform=raster.transform,
        fill=0,
        all_touched=True,
        dtype=rasterio.uint8)
    return tfl_raster

[docs]def check_empty_pixels_below_threshold(image_array, threshold=0.50):
    status = True
    empty_pixels = np.sum(image_array==0)/image_array.size
    white_pixels = np.sum(image_array==255)/image_array.size
    if empty_pixels >= threshold:
        status = False
    if white_pixels >= threshold:
        status = False
    return status


[docs]def img_chopper(img, label,
                images_master_directory,
                prefix=None,
                height=512,
                width=512,
                image_directory_name='images',
                label_directory_name='labels'):
    img_dir = os.path.join(images_master_directory, image_directory_name)
    label_dir = os.path.join(images_master_directory, label_directory_name)
    csv_dir = images_master_directory
    os.makedirs(img_dir, exist_ok=True)
    os.makedirs(label_dir, exist_ok=True)

    counter = 1
    im = Image.open(img)
    lb = Image.open(label)
    imgwidth, imgheight = im.size
    lbwidth, lbheight = lb.size
    print(f'image size {im.size}')
    print(f'label size {lb.size}')
    assert (im.size == lb.size), "image and label sizes don't match"

    # setup csv export
    image_max_height = len(range(0, imgheight, height))
    image_max_width = len(range(0, imgwidth, width))
    csv_export = pd.DataFrame(np.zeros([image_max_height * image_max_width, 3]))
    csv_export.columns = ['image', 'label', 'naive_test_train_split90_10']

    for i in tqdm(range(0, imgheight, height), desc='height_dimension'):
        for j in tqdm(range(0, imgwidth, width), desc='width_dimension', leave=False):
            box = (j, i, j + width, i + height)
            a = im.crop(box)
            b = lb.crop(box)
            if check_empty_pixels_below_threshold(np.asarray(a)):
                try:
                    a.save(os.path.join(img_dir, f"{prefix}_{counter}.png"))
                    b.save(os.path.join(label_dir, f"{prefix}_{counter}.png"))
                    csv_export.loc[counter - 1, 'image'] = os.path.join(prefix + '/' + image_directory_name,
                                                                        f"{prefix}_{counter}.png")
                    csv_export.loc[counter - 1, 'label'] = os.path.join(prefix + '/' + label_directory_name,
                                                                        f"{prefix}_{counter}.png")
                    csv_export.loc[
                        counter - 1, 'naive_test_train_split90_10'] = "Train" if np.random.uniform() <= 0.90 else "Test"
                    counter += 1
                except:
                    pass
    csv_export.to_csv(os.path.join(csv_dir, f'{prefix}' + '_data_index.csv'), header=True)
    return (image_max_height, image_max_width)

if __name__ == "__main__":
    sys.exit(main())  # pragma: no cover