Tutorial¶
In this tutorial, we will walk through the process of creating a simple OME-Zarr converter. For this example we will use this hiPSC 3D Tiny.
Step 0: Parse the necessary metadata from the raw data¶
Fractal converters tools does not provide any tooling for this step, since it is highly dependent on the raw data format, instrument, and other factors.
In the example dataset we have a single well, with two fields of view and two z-slices.
To simplify the tutorial we have already pre-parsed the metadata and saved in:
metadata.json
file with the global metadata for the dataset, like (pixel size, channel names, etc.).data/
a directory with the raw data files, which in this case are 4 PNG images (2 fields of view, 2 z-slices).tiles.csv
file with the metadata for each of the png files.
These files are just an example, can adapt them to whatever format you prefer, as long as you can parse the necessary metadata from the raw data.
import pandas as pd
tiles_meta = pd.read_csv("../tests/hiPSC_Tiny/tiles.csv")
tiles_meta
path | row | column | acquisition_id | fov | channel | z | t | pos_x | pos_y | shape_x | shape_y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 20200812-CardiomyocyteDifferentiation14-Cycle1... | B | 3 | 0 | 1 | 0 | 0 | 0 | -1448.3 | 1517.7 | 2560 | 2160 |
1 | 20200812-CardiomyocyteDifferentiation14-Cycle1... | B | 3 | 0 | 1 | 0 | 1 | 0 | -1448.3 | 1517.7 | 2560 | 2160 |
2 | 20200812-CardiomyocyteDifferentiation14-Cycle1... | B | 3 | 0 | 2 | 0 | 0 | 0 | -1032.3 | 1517.7 | 2560 | 2160 |
3 | 20200812-CardiomyocyteDifferentiation14-Cycle1... | B | 3 | 0 | 2 | 0 | 1 | 0 | -1032.3 | 1517.7 | 2560 | 2160 |
import json
from pprint import pprint
with open("../tests/hiPSC_Tiny/metadata.json") as f:
metadata = json.load(f)
pprint(metadata)
{'channel_names': ['DAPI'], 'pixel_size': {'t': 1.0, 'x': 0.1625, 'y': 0.1625, 'z': 0.5}, 'plate_name': 'hiPSC_Tiny', 'wavelength_ids': ['DAPI']}
Step 1: Create the TiledImage object¶
- Group together the metadata for each
TiledImage
object. In this case we have a singleTiledImage
object, so we can just use the global metadata. - Create a
TiledImage
object with the metadata and the path to the raw data files.
from ome_zarr_converters_tools import PlatePathBuilder, TiledImage
tiled_image = TiledImage(
name=f"{metadata.get('plate_name')}_B3_0",
path_builder=PlatePathBuilder(
plate_name=metadata.get("plate_name"),
row="B",
column=3,
acquisition_id=0,
), # you can also use the SimplePathBuilder if the data is not path of a plate
channel_names=metadata.get("channel_names", []),
wavelength_ids=metadata.get("wavelength_ids", []),
)
tiled_image
TiledImage(name=hiPSC_Tiny_B3_0, path=hiPSC_Tiny.zarr/B/3/0)
Step 2: Create the Loader object¶
We need to create a Loader
object to load the raw data files.
the Loader
object should implement the TileLoader
protocol, which defines the interface for loading the tile data.
class TileLoader(Protocol):
"""Tile loader interface."""
def load(self) -> np.ndarray | Array:
"""Load the tile data into a numpy array in the format (t, c, z, y, x)."""
...
@property
def dtype(self) -> str:
"""Return the dtype of the tile."""
...
import numpy as np
import PIL.Image
class PngLoader:
"""Png loader for tiled images."""
def __init__(self, dir: str, paths: list[str]):
"""Initialize the PngLoader.
Args:
dir (str): Directory where the PNG files are located.
paths (list[str]): List of PNG file paths relative to the directory.
"""
self.dir = dir
self.paths = paths
def load(self) -> np.ndarray:
"""Load the PNG files and stack them into a numpy array."""
arrays = []
for path in self.paths:
full_path = f"{self.dir}/{path}"
img = PIL.Image.open(full_path)
arrays.append(np.array(img))
stacked_arrays = np.stack(arrays, axis=0)[None, None, :, :, :]
return stacked_arrays
@property
def dtype(self):
"""Return the data type of the loaded images."""
return "uint8"
Step 3: Add the tiles to the TiledImage object¶
- Now we need to add
Tiles
to ourTiledImage
object.
# Aggregate together the tiles metadata
agg_tiles_meta = tiles_meta.groupby(["row", "column", "acquisition_id", "fov"]).agg(
list
)
agg_tiles_meta
path | channel | z | t | pos_x | pos_y | shape_x | shape_y | ||||
---|---|---|---|---|---|---|---|---|---|---|---|
row | column | acquisition_id | fov | ||||||||
B | 3 | 0 | 1 | [20200812-CardiomyocyteDifferentiation14-Cycle... | [0, 0] | [0, 1] | [0, 0] | [-1448.3, -1448.3] | [1517.7, 1517.7] | [2560, 2560] | [2160, 2160] |
2 | [20200812-CardiomyocyteDifferentiation14-Cycle... | [0, 0] | [0, 1] | [0, 0] | [-1032.3, -1032.3] | [1517.7, 1517.7] | [2560, 2560] | [2160, 2160] |
from ngio import PixelSize
from ome_zarr_converters_tools import OriginDict, Point, Tile
px_size_meta = metadata.get("pixel_size", {})
pixel_size = PixelSize(
x=px_size_meta.get("x", 1),
y=px_size_meta.get("y", 1),
z=px_size_meta.get("z", 1), # Assuming a constant pixel size in z
t=px_size_meta.get("t", 1), # Assuming a constant pixel size in t
)
for tile in agg_tiles_meta.itertuples():
# All tiles should have the same pos_x and pos_y
# This is a sanity check
assert all(tile.pos_x[0] == x for x in tile.pos_x)
assert all(tile.pos_y[0] == y for y in tile.pos_y)
pos_x, pos_y = tile.pos_x[0], tile.pos_y[0]
size_x = tile.shape_x[0] * pixel_size.x
size_y = tile.shape_y[0] * pixel_size.y
# Find top-left corner of the tile
min_t, min_z, min_c = min(tile.t), min(tile.z), min(tile.channel)
top_left = Point(
x=pos_x,
y=pos_y,
z=min_z,
t=min_t,
c=min_c,
)
# Find bottom-right corner of the tile
max_t, max_z, max_c = max(tile.t), max(tile.z), max(tile.channel)
bottom_right = Point(
x=pos_x + size_x,
y=pos_y + size_y,
t=int(
(max_t + 1) * pixel_size.t
), # +1 because we want to include the last pixel
z=(max_z + 1) * pixel_size.z, # +1 because we want to include the last pixel
c=max_c + 1,
)
# Origin in this case is basically the top-left corner of the tile
# But in more complex case can be different
# For example t and z should be integer in the Point Object
# But here ideally we would like to have the original microscope position
origin = OriginDict(
x_micrometer_original=pos_x,
y_micrometer_original=pos_y,
z_micrometer_original=0,
)
# Create a loader object for the tile
loader = PngLoader("../tests/hiPSC_Tiny/data/", tile.path)
tile_obj = Tile.from_points(
top_l=top_left,
bot_r=bottom_right,
pixel_size=PixelSize(
x=pixel_size.x, y=pixel_size.y, z=pixel_size.z, t=pixel_size.t
),
origin=origin,
data_loader=loader,
)
tiled_image.add_tile(tile_obj)
Step 2: Setup the fractal task¶
Init
task
from ome_zarr_converters_tools import (
AdvancedComputeOptions,
build_parallelization_list,
initiate_ome_zarr_plates,
)
# Build parallelization list for the compute phase
parallelization_list = build_parallelization_list(
zarr_dir="./test_zarr",
tiled_images=[tiled_image],
overwrite=True,
advanced_compute_options=AdvancedComputeOptions(),
)
# Initiate the OME-Zarr plates with the tiled image
initiate_ome_zarr_plates(
zarr_dir="./test_zarr",
tiled_images=[tiled_image],
overwrite=True,
)
Compute
task
from ome_zarr_converters_tools import ConvertParallelInitArgs, generic_compute_task
for task in parallelization_list:
print(f"Running task: {task}")
generic_compute_task(
zarr_url=task["zarr_url"],
init_args=ConvertParallelInitArgs(**task["init_args"]),
)
Running task: {'zarr_url': 'test_zarr/hiPSC_Tiny.zarr/B/3/0', 'init_args': {'tiled_image_pickled_path': 'test_zarr/_tmp_converter_dir/738bfcc1-f77a-4ad4-8bd5-bb458bba1dbf.pkl', 'overwrite': True, 'advanced_compute_options': {'num_levels': 5, 'tiling_mode': 'auto', 'swap_xy': False, 'invert_x': False, 'invert_y': False, 'max_xy_chunk': 4096, 'z_chunk': 10, 'c_chunk': 1, 't_chunk': 1}}}
Check the results¶
from ngio import open_ome_zarr_plate
plate = open_ome_zarr_plate("./test_zarr/hiPSC_Tiny.zarr")
plate
Plate([rows x columns] (1 x 1))
container = plate.get_image(row="B", column=3, image_path="0")
container
OmeZarrContainer(levels=5, tables=['well_ROI_table', 'FOV_ROI_table'])