assets

ArchiveError Link

Bases: RuntimeError

Base class for archive‑related errors.

Subclassed by:

Reference API server server services assets
- ExtractionError
- ValidationError

ExtractionError Link

Bases: ArchiveError

Raised when an error occurs while extracting an archive.

FileUploadError Link

Bases: RuntimeError

Raised when something goes wrong while persisting an uploaded file.

Raised by:

Reference API server server services assets validate_upload_headers

ValidationError Link

Bases: ArchiveError

Raised when an uploaded archive fails validation.

create_zip_for_scan Link

create_zip_for_scan(scan_path, logger)

Create a temporary ZIP file containing all files under scan_path (except any webcache directories).

Parameters:

Name	Type	Description	Default
`scan_path` Link	`Path`	The root directory of the scan to archive.	required
`logger` Link	`Logger`	Logger used for diagnostic messages.	required

Returns:

Type	Description
`Path`	The path to the created temporary ZIP file.

Raises:

Type	Description
`ArchiveError`	If the ZIP file cannot be created.

Source code in plantdb/server/services/assets.py

def create_zip_for_scan(scan_path: Path, logger: logging.Logger) -> Path:
    """Create a temporary ZIP file containing all files under ``scan_path`` (except any ``webcache`` directories).

    Parameters
    ----------
    scan_path:
        The root directory of the scan to archive.
    logger:
        Logger used for diagnostic messages.

    Returns
    -------
    pathlib.Path
        The path to the created temporary ZIP file.

    Raises
    ------
    plantdb.server.services.assets_service.ArchiveError
        If the ZIP file cannot be created.
    """
    fd, zip_path_str = mkstemp(suffix=".zip")
    os.close(fd)  # we only need the path, not the open file descriptor
    zip_path = Path(zip_path_str)

    logger.info("Creating archive for scan at %s", scan_path)

    try:
        with ZipFile(zip_path, "w") as zip_f:
            for root, _dirs, files in os.walk(scan_path):
                # Skip any ``webcache`` sub‑directories
                if "webcache" in Path(root).parts:
                    continue
                for file in files:
                    full_path = Path(root, file)
                    relative_path = full_path.relative_to(scan_path)
                    zip_f.write(full_path, arcname=str(relative_path))
    except Exception as exc:
        logger.error("Failed to create archive: %s", exc)
        zip_path.unlink(missing_ok=True)
        raise ArchiveError(f"Could not create archive: {exc}") from exc

    return zip_path

extract_zip_to_scan Link

extract_zip_to_scan(zip_path, destination, logger)

Extract zip_path into destination while performing safety checks.

Parameters:

Name	Type	Description	Default
`zip_path` Link	`Path`	Path to the validated temporary ZIP file.	required
`destination` Link	`Path`	Directory where the archive contents should be placed.	required
`logger` Link	`Logger`	Logger for diagnostic output.	required

Returns:

Type	Description
`list[str]`	List of extracted absolute file paths.

Raises:

Type	Description
`ExtractionError`	If any step of the extraction fails.

Notes

Safety checks include: path traversal, duplicate files, and hash verification

Source code in plantdb/server/services/assets.py

def extract_zip_to_scan(zip_path: Path, destination: Path, logger: logging.Logger) -> List[str]:
    """ Extract ``zip_path`` into ``destination`` while performing safety checks.

    Parameters
    ----------
    zip_path:
        Path to the validated temporary ZIP file.
    destination:
        Directory where the archive contents should be placed.
    logger:
        Logger for diagnostic output.

    Returns
    -------
    list[str]
        List of extracted absolute file paths.

    Raises
    ------
    plantdb.server.services.assets_service.ExtractionError
        If any step of the extraction fails.

    Notes
    -----
    Safety checks include: path traversal, duplicate files, and hash verification
    """
    logger.debug(f"Preparing to extract {zip_path} into {destination}")

    # Detect a single top‑level directory that should be stripped
    with ZipFile(zip_path, "r") as zip_f:
        top_level_dirs = [
            name
            for name in zip_f.namelist()
            if name.endswith("/") and name.count("/") == 1
        ]
    strip_top_dir = len(top_level_dirs) == 1

    extracted: List[str] = []

    try:
        with ZipFile(zip_path, "r") as zip_f:
            for member in zip_f.namelist():
                # Skip directory entries
                if member.endswith("/"):
                    continue

                # Ensure the filename is UTF‑8 decodable
                try:
                    member = member.encode("utf-8").decode("utf-8")
                except UnicodeDecodeError as exc:
                    raise ExtractionError(
                        f"Filename encoding error in archive entry '{member}'."
                    ) from exc

                # Remove the top‑level folder if present
                target_rel = Path(member)
                if strip_top_dir:
                    target_rel = Path(*target_rel.parts[1:])

                target_path = (destination / target_rel).resolve()

                # Safety: the target must stay inside ``destination``
                if not str(target_path).startswith(str(destination.resolve())):
                    raise ExtractionError(
                        f"Path traversal attempt detected: {member}"
                    )

                # Create parent directories
                target_path.parent.mkdir(parents=True, exist_ok=True)

                # Extract with hash verification
                with zip_f.open(member) as src, target_path.open("wb") as dst:
                    src_hash = hashlib.sha256()
                    while chunk := src.read(8192):
                        src_hash.update(chunk)
                        dst.write(chunk)
                    extracted_hash = src_hash.hexdigest()

                # Verify the file on disk matches the hash we just computed
                with target_path.open("rb") as f:
                    dst_hash = hashlib.sha256()
                    while chunk := f.read(8192):
                        dst_hash.update(chunk)

                if extracted_hash != dst_hash.hexdigest():
                    logger.error("Hash mismatch after extracting %s", target_path)

                extracted.append(str(target_path))

    except Exception as exc:
        logger.error("Extraction failed: %s", exc)
        raise ExtractionError(str(exc)) from exc

    return extracted

get_scan_path Link

get_scan_path(db, scan_id, **kwargs)

Resolve the absolute path of a scan.

Parameters:

Name	Type	Description	Default
`db` Link	`FSDB`	Database object exposing `get_scan`.	required
`scan_id` Link	`str`	Identifier of the scan.	required

Returns:

Type	Description
`Path`	Absolute directory of the scan.

Source code in plantdb/server/services/assets.py

def get_scan_path(db: plantdb.commons.fsdb.core.FSDB, scan_id: str, **kwargs) -> Path:
    """Resolve the absolute path of a scan.

    Parameters
    ----------
    db : plantdb.commons.fsdb.core.FSDB
        Database object exposing ``get_scan``.
    scan_id : str
        Identifier of the scan.

    Returns
    -------
    pathlib.Path
        Absolute directory of the scan.
    """
    scan = db.get_scan(scan_id, **kwargs)
    return Path(scan.path())

is_valid_archive Link

is_valid_archive(archive_path)

Validate if a given archive meets specific directory and file requirements.

This function checks if the provided archive contains certain required directories and files, and verifies that the directory structure does not exceed a specified depth.

Parameters:

Name	Type	Description	Default
`archive_path` Link	`str or Path`	The path to the archive file (zip) to be validated.	required

Returns:

Type	Description
`bool`	Returns `True` if the archive meets all requirements, otherwise `False`.

Notes

The function currently assumes that the required directories and files are all at or above a specified depth in the archive.
Make sure that the provided archive_path points to a valid zip file.

See Also

zipfile.ZipFile : Python's built-in module for reading and writing ZIP files.

Examples:

>>> import os
>>> from zipfile import ZipFile
>>> from plantdb.server.services.assets import is_valid_archive
>>> # Creating a valid archive for demonstration purposes
>>> with ZipFile('test_archive.zip', 'w') as zip_ref:
...    zip_ref.writestr('images/', '')
...    zip_ref.writestr('images/test1.jpg', '')
...    zip_ref.writestr('metadata/', '')
...    zip_ref.writestr('metadata/data.json', '')
...    zip_ref.writestr('files.json', '')
>>> # Validate the archive
>>> is_valid_archive('test_archive.zip')
True
>>> is_valid_archive('/tmp/real_plant.zip')
True
>>> is_valid_archive('/tmp/real_plant_analyzed.zip')
True

Source code in plantdb/server/services/assets.py

def is_valid_archive(archive_path):
    """Validate if a given archive meets specific directory and file requirements.

    This function checks if the provided archive contains certain required directories and files,
    and verifies that the directory structure does not exceed a specified depth.

    Parameters
    ----------
    archive_path : str or pathlib.Path
        The path to the archive file (zip) to be validated.

    Returns
    -------
    bool
        Returns ``True`` if the archive meets all requirements, otherwise ``False``.

    Notes
    -----
    - The function currently assumes that the required directories and files are all at or above a specified depth in the archive.
    - Make sure that the provided `archive_path` points to a valid zip file.

    See Also
    --------
    zipfile.ZipFile : Python's built-in module for reading and writing ZIP files.

    Examples
    --------
    >>> import os
    >>> from zipfile import ZipFile
    >>> from plantdb.server.services.assets import is_valid_archive
    >>> # Creating a valid archive for demonstration purposes
    >>> with ZipFile('test_archive.zip', 'w') as zip_ref:
    ...    zip_ref.writestr('images/', '')
    ...    zip_ref.writestr('images/test1.jpg', '')
    ...    zip_ref.writestr('metadata/', '')
    ...    zip_ref.writestr('metadata/data.json', '')
    ...    zip_ref.writestr('files.json', '')
    >>> # Validate the archive
    >>> is_valid_archive('test_archive.zip')
    True
    >>> is_valid_archive('/tmp/real_plant.zip')
    True
    >>> is_valid_archive('/tmp/real_plant_analyzed.zip')
    True
    """
    req_dirs = ['images/', 'metadata/']
    req_files = ['files.json']

    top_dir = ''
    max_dir_dept = 2
    with ZipFile(archive_path, 'r') as zip_ref:
        zip_files = zip_ref.namelist()

    # List all top-level members in the zip file
    top_level_dirs = {name for name in zip_files if name.count('/') == 1 and name.endswith('/')}
    top_level_dirs |= {name.split('/')[0] + '/' for name in zip_files if name.count('/') == 1 and name.split('/')[0]}

    # If a lone directory is found at the top, move one step down
    if len(top_level_dirs) == 1:
        top_dir = next(iter(top_level_dirs))
        top_level_dirs = {name.replace(top_dir, '') for name in zip_files if
                          name.count('/') == 2 and name.endswith('/')}
        top_level_dirs |= {name.split('/')[1] + '/' for name in zip_files if
                           name.count('/') == 2 and '/'.join(name.split('/')[:-1])}

    # Check if the required file and directories are among them
    has_req_dirs = [rd in top_level_dirs for rd in req_dirs]
    has_req_files = [f'{top_dir}{rf}' in zip_files for rf in req_files]
    req_dir_depth = all(
        name.count('/') <= max_dir_dept + 1 if 'metadata' in name else name.count('/') <= max_dir_dept for name in
        zip_files)

    if all(has_req_dirs) and all(has_req_files) and req_dir_depth:
        return True
    else:
        # if not all(has_req_dirs):
        #    print(f"Missing required directories: {list(zip(req_dirs, [not r for r in has_req_dirs]))}")
        # if not all(has_req_files):
        #    print(f"Missing required files: {list(zip(req_files, [not r for r in has_req_files]))}")
        return False

validate_upload_headers Link

validate_upload_headers(headers)

Verify that the request contains all mandatory headers and return a dictionary with normalized values.

Parameters:

Name	Type	Description	Default
`headers` Link	`dict`	`request.headers` mapping.	required

Returns:

Type	Description
`dict`	`{ "rel_filename": str, "content_length": int, "chunk_size": int, }`

Raises:

Type	Description
`FileUploadError`	If a required header is missing or malformed.

Source code in plantdb/server/services/assets.py

def validate_upload_headers(headers: dict) -> dict:
    """
    Verify that the request contains all mandatory headers and return a
    dictionary with normalized values.

    Parameters
    ----------
    headers :
        ``request.headers`` mapping.

    Returns
    -------
    dict
        ``{
            "rel_filename": str,
            "content_length": int,
            "chunk_size": int,
        }``

    Raises
    ------
    FileUploadError
        If a required header is missing or malformed.
    """
    missing = _missing_headers(headers)
    if missing:
        raise FileUploadError(f"Missing required header(s): {', '.join(missing)}")

    rel_filename = headers.get("X-File-Path", "").strip()
    if not rel_filename:
        raise FileUploadError("Header 'X-File-Path' must contain a non‑empty value")

    try:
        content_length = int(headers.get("Content-Length", "0"))
    except ValueError as exc:
        raise FileUploadError("Header 'Content-Length' must be an integer") from exc
    if content_length <= 0:
        raise FileUploadError("Header 'Content-Length' must be > 0")

    try:
        chunk_size = int(headers.get("X-Chunk-Size", "0"))
    except ValueError as exc:
        raise FileUploadError("Header 'X-Chunk-Size' must be an integer") from exc
    if chunk_size < 0:
        raise FileUploadError("Header 'X-Chunk-Size' cannot be negative")

    return {
        "rel_filename": rel_filename,
        "content_length": content_length,
        "chunk_size": chunk_size,
    }

write_file Link

write_file(file_path, data)

Write data to file_path in a single operation.

Returns:

Type	Description
`int`	Number of bytes written.

Source code in plantdb/server/services/assets.py

def write_file(file_path: Path, data: bytes) -> int:
    """Write ``data`` to ``file_path`` in a single operation.

    Returns
    -------
    int
        Number of bytes written.
    """
    with file_path.open("wb") as fp:
        written = fp.write(data)
    return written

write_streamed_file Link

write_streamed_file(file_path, content_length, chunk_size)

Persist a streamed upload. The Flask request object is accessed globally - this function only deals with the low‑level I/O.

Parameters:

Name	Type	Description	Default
`file_path` Link	`Path`	Destination file.	required
`content_length` Link	`int`	Expected total size (from the `Content‑Length` header).	required
`chunk_size` Link	`int`	Size of each chunk read from `request.stream`. `0` means “no streaming”, but the caller should have already handled that case.	required

Returns:

Type	Description
`int`	Number of bytes actually written.

Source code in plantdb/server/services/assets.py

def write_streamed_file(file_path: Path, content_length: int, chunk_size: int) -> int:
    """
    Persist a streamed upload.  The Flask ``request`` object is accessed
    globally - this function only deals with the low‑level I/O.

    Parameters
    ----------
    file_path : pathlib.Path
        Destination file.
    content_length : int
        Expected total size (from the ``Content‑Length`` header).
    chunk_size : int
        Size of each chunk read from ``request.stream``.  ``0`` means “no
        streaming”, but the caller should have already handled that case.

    Returns
    -------
    int
        Number of bytes actually written.
    """
    bytes_received = 0
    with file_path.open("wb") as fp:
        while bytes_received < content_length:
            to_read = min(chunk_size, content_length - bytes_received)
            chunk = request.stream.read(to_read)
            if not chunk:  # pragma: no cover - defensive
                break
            fp.write(chunk)
            bytes_received += len(chunk)
    return bytes_received

assets

ArchiveError Link

ExtractionError Link

FileUploadError Link

ValidationError Link

create_zip_for_scan Link

`scan_path` Link

`logger` Link

extract_zip_to_scan Link

`zip_path` Link

`destination` Link

`logger` Link

get_scan_path Link

`db` Link

`scan_id` Link

is_valid_archive Link

`archive_path` Link

validate_upload_headers Link

`headers` Link

write_file Link

write_streamed_file Link

`file_path` Link

`content_length` Link

`chunk_size` Link

assets

ArchiveError Link

ExtractionError Link

FileUploadError Link

ValidationError Link

create_zip_for_scan Link

scan_path Link

logger Link

extract_zip_to_scan Link

zip_path Link

destination Link

logger Link

get_scan_path Link

db Link

scan_id Link

is_valid_archive Link

archive_path Link

validate_upload_headers Link

headers Link

write_file Link

write_streamed_file Link

file_path Link

content_length Link

chunk_size Link

`scan_path` Link

`logger` Link

`zip_path` Link

`destination` Link

`logger` Link

`db` Link

`scan_id` Link

`archive_path` Link

`headers` Link

`file_path` Link

`content_length` Link

`chunk_size` Link