Skip to content

utils

plantdb.commons.utils Link

This module contains utility functions.

fsdb_file_from_local_file Link

fsdb_file_from_local_file(path)

Creates a temporary fsdb.File object from a local file.

Parameters:

Name Type Description Default
path Path or str

The file path to use to create the temporary local database.

required

Returns:

Type Description
File

The temporary fsdb.File.

Source code in plantdb/commons/utils.py
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
def fsdb_file_from_local_file(path):
    """Creates a temporary ``fsdb.File`` object from a local file.

    Parameters
    ----------
    path : pathlib.Path or str
        The file path to use to create the temporary local database.

    Returns
    -------
    plantdb.commons.fsdb.File
        The temporary ``fsdb.File``.
    """
    from plantdb.commons.fsdb import FSDB
    from plantdb.commons.fsdb import Scan
    from plantdb.commons.fsdb import Fileset
    from plantdb.commons.fsdb import File
    from plantdb.commons.fsdb.core import MARKER_FILE_NAME
    path = Path(path)
    dirname, fname = path.parent, path.name
    id = Path(fname).stem
    with tempfile.TemporaryDirectory() as tmpdir:
        # Initialise a temporary `FSDB`:
        Path(f"{tmpdir}/{MARKER_FILE_NAME}").touch()  # add the db marker file
        db = FSDB(tmpdir)
        # Initialize a `Scan` instance:
        scan = Scan(db, "tmp")
        # Initialize a `Fileset` instance:
        fileset = Fileset(scan, dirname)
        # Initialize a `File` instance & return it:
        f = File(db=db, fileset=fileset, f_id=id)
        f.filename = fname
        f.metadata = None
    return f

is_radians Link

is_radians(angles)

Guess if the sequence of angles is in radians.

Parameters:

Name Type Description Default
angles list of float

Sequence of angle values.

required

Returns:

Type Description
bool

True if the sequence is in radians, else `False.

Notes

This assumes that the angles can not be greater than 360 degrees or its equivalent in radians.

Source code in plantdb/commons/utils.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
def is_radians(angles):
    """Guess if the sequence of angles is in radians.

    Parameters
    ----------
    angles : list of float
        Sequence of angle values.

    Returns
    -------
    bool
        `True` if the sequence is in radians, else `False.

    Notes
    -----
    This assumes that the angles can not be greater than 360 degrees or its equivalent in radians.
    """
    from math import radians
    if all([angle < radians(360) for angle in angles]):
        return True
    else:
        return False

locate_task_filesets Link

locate_task_filesets(scan, tasks)

Map the task names to task filesets.

Parameters:

Name Type Description Default
scan Scan

A Scan instance from a local plant database (FSDB).

required
tasks list of str

A list of task names to look up in the scan's list of filesets.

required

Returns:

Type Description
dict

A task indexed dictionary of fileset ids, value may be "None" if no matching fileset was found.

Notes

If more than one fileset id matches a task name, only the first one (found) will be returned!

Examples:

>>> from plantdb.commons.utils import locate_task_filesets
>>> from plantdb.commons.fsdb import FSDB
>>> from plantdb.commons.test_database import test_database
>>> db = test_database('real_plant_analyzed')
>>> db.connect()
>>> scan = db.get_scan('real_plant_analyzed')
>>> tasks_fs = locate_task_filesets(scan, ['Masks', 'PointCloud', 'UnknownTask'])
>>> print(tasks_fs)
{'Masks': 'Masks_1__0__1__0____channel____rgb_5619aa428d', 'PointCloud': 'PointCloud_1_0_1_0_10_0_7ee836e5a9', 'UnknownTask': 'None'}
>>> db.disconnect()
Source code in plantdb/commons/utils.py
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def locate_task_filesets(scan, tasks):
    """Map the task names to task filesets.

    Parameters
    ----------
    scan : plantdb.commons.fsdb.Scan
        A ``Scan`` instance from a local plant database (FSDB).
    tasks : list of str
        A list of task names to look up in the scan's list of filesets.

    Returns
    -------
    dict
        A task indexed dictionary of fileset ids, value may be "None" if no matching fileset was found.

    Notes
    -----
    If more than one fileset id matches a task name, only the first one (found) will be returned!

    Examples
    --------
    >>> from plantdb.commons.utils import locate_task_filesets
    >>> from plantdb.commons.fsdb import FSDB
    >>> from plantdb.commons.test_database import test_database
    >>> db = test_database('real_plant_analyzed')
    >>> db.connect()
    >>> scan = db.get_scan('real_plant_analyzed')
    >>> tasks_fs = locate_task_filesets(scan, ['Masks', 'PointCloud', 'UnknownTask'])
    >>> print(tasks_fs)
    {'Masks': 'Masks_1__0__1__0____channel____rgb_5619aa428d', 'PointCloud': 'PointCloud_1_0_1_0_10_0_7ee836e5a9', 'UnknownTask': 'None'}
    >>> db.disconnect()
    """
    # List all filesets in the scan dataset:
    fs_list = scan.list_filesets()
    # Find the fileset corresponding to the given list of tasks, if any:
    fileset_names = {}
    for task in tasks:
        try:
            # TODO: could be improved by using the saved config ('pipeline.toml') and recreate the name hash from luigi...
            fileset_names[task] = [fs for fs in fs_list if fs.startswith(task)][0]
        except IndexError:
            fileset_names[task] = "None"
    return fileset_names

partial_match Link

partial_match(reference, target, fuzzy=False)

Partial matching of a reference dictionary against a target, potentially using regexp.

Parameters:

Name Type Description Default
reference dict

The reference dictionary with partial information to test against the target.

required
target dict

The target dictionary.

required
fuzzy bool

Whether to use fuzzy matching or not, that is the use of regular expressions.

False

Returns:

Type Description
bool

Whether the partial match is found or not.

Examples:

>>> from plantdb.commons.utils import partial_match
>>> ref = {"object": {"environment":"virtual"}}
>>> target = {'object': {'environment': 'virtual', 'plant_id': 'arabidopsis000', 'species': 'Arabidopsis Thaliana'}, 'scanner': {'workspace': {'x': [-200, 200], 'y': [-200, 200], 'z': [10, 1000]}}}
>>> partial_match(ref, target)
True
>>> ref = {"object": {"species":"Arabidopsis.*"}}
>>> partial_match(ref, target, fuzzy=True)
True
>>> ref = {"species":"Arabidopsis.*"}
>>> partial_match(ref, target, fuzzy=True)
False
Source code in plantdb/commons/utils.py
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
def partial_match(reference, target, fuzzy=False):
    """Partial matching of a reference dictionary against a target, potentially using regexp.

    Parameters
    ----------
    reference : dict
        The reference dictionary with partial information to test against the target.
    target : dict
        The target dictionary.
    fuzzy : bool
        Whether to use fuzzy matching or not, that is the use of regular expressions.

    Returns
    -------
    bool
        Whether the partial match is found or not.

    Examples
    --------
    >>> from plantdb.commons.utils import partial_match
    >>> ref = {"object": {"environment":"virtual"}}
    >>> target = {'object': {'environment': 'virtual', 'plant_id': 'arabidopsis000', 'species': 'Arabidopsis Thaliana'}, 'scanner': {'workspace': {'x': [-200, 200], 'y': [-200, 200], 'z': [10, 1000]}}}
    >>> partial_match(ref, target)
    True
    >>> ref = {"object": {"species":"Arabidopsis.*"}}
    >>> partial_match(ref, target, fuzzy=True)
    True
    >>> ref = {"species":"Arabidopsis.*"}
    >>> partial_match(ref, target, fuzzy=True)
    False

    """
    from re import match
    if isinstance(reference, dict) and isinstance(target, dict):
        return all(
            key in target and partial_match(value, target[key], fuzzy)
            for key, value in reference.items()
        )
    elif isinstance(reference, list) and isinstance(target, list):
        return len(reference) <= len(target) and all(
            any(partial_match(ref_item, target_item, fuzzy) for target_item in target)
            for ref_item in reference
        )
    elif fuzzy and isinstance(reference, str) and isinstance(target, str):
        return bool(match(reference, target))
    else:
        return reference == target

read_image_from_file Link

read_image_from_file(filename)

Read an image from a file and return it.

Parameters:

Name Type Description Default
filename str or Path

The path to the image to read.

required

Returns:

Name Type Description
image Image

The loaded image.

Examples:

>>> from plantdb.commons.utils import read_image_from_file
>>> from plantdb.commons.test_database import test_database
>>> db = test_database('real_plant')
>>> db.connect()
>>> scan = db.get_scan('real_plant')
>>> fileset = scan.get_fileset('images')
>>> img_path = fileset.get_file('00000_rgb').path()
>>> image = read_image_from_file(img_path)
>>> print(image.size)
(1440, 1080)
>>> db.disconnect()
Source code in plantdb/commons/utils.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
def read_image_from_file(filename):
    """Read an image from a file and return it.

    Parameters
    ----------
    filename : str or pathlib.Path
        The path to the image to read.

    Returns
    -------
    image : PIL.Image.Image
        The loaded image.

    Examples
    --------
    >>> from plantdb.commons.utils import read_image_from_file
    >>> from plantdb.commons.test_database import test_database
    >>> db = test_database('real_plant')
    >>> db.connect()
    >>> scan = db.get_scan('real_plant')
    >>> fileset = scan.get_fileset('images')
    >>> img_path = fileset.get_file('00000_rgb').path()
    >>> image = read_image_from_file(img_path)
    >>> print(image.size)
    (1440, 1080)
    >>> db.disconnect()
    """
    image = Image.open(filename)
    image.load()
    return image

tmpdir_from_fileset Link

tmpdir_from_fileset(fileset)

Creates a temporary directory to host the Fileset object and write files.

Parameters:

Name Type Description Default
fileset Fileset

The fileset to use to create the temporary local database.

required

Returns:

Type Description
TemporaryDirectory

The temporary directory hosting the fileset and file(s), if any.

Source code in plantdb/commons/utils.py
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def tmpdir_from_fileset(fileset):
    """Creates a temporary directory to host the ``Fileset`` object and write files.

    Parameters
    ----------
    fileset : plantdb.commons.fsdb.Fileset
        The fileset to use to create the temporary local database.

    Returns
    -------
    tempfile.TemporaryDirectory
        The temporary directory hosting the fileset and file(s), if any.
    """
    tmpdir = tempfile.TemporaryDirectory()
    for f in fileset.get_files():
        filepath = Path(tmpdir.name) / f.filename
        to_file(f, filepath)
    return tmpdir

to_file Link

to_file(dbfile, path)

Write a dbfile to a file in the filesystem.

Parameters:

Name Type Description Default
dbfile File

The File instance to save under given path.

required
path Path or str

The file path to use to save the dbfile.

required
Source code in plantdb/commons/utils.py
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
def to_file(dbfile, path):
    """Write a `dbfile` to a file in the filesystem.

    Parameters
    ----------
    dbfile : plantdb.commons.fsdb.File
        The ``File`` instance to save under given `path`.
    path : pathlib.Path or str
        The file path to use to save the `dbfile`.
    """
    b = dbfile.read_raw()
    path = Path(path)
    with path.open(mode="wb") as fh:
        fh.write(b)
    return