rustling.elan#

ELAN (.eaf) file parsing.

Package Contents#

class rustling.elan.Annotation#

A single annotation within an ELAN tier.

property id: str#

Annotation ID (e.g. “a1”).

property start_time: int | None#

Start time in milliseconds, or None if unresolvable.

property end_time: int | None#

End time in milliseconds, or None if unresolvable.

property value: str#

The annotation text content.

property parent_id: str | None#

Parent annotation ID for REF_ANNOTATIONs, or None.

class rustling.elan.Tier#

An annotation tier (layer) within an ELAN file.

property id: str#

Tier ID (e.g. “G-jyutping”).

property participant: str#

Participant name.

property annotator: str#

Annotator name.

property linguistic_type_ref: str#

Linguistic type reference.

property parent_id: str | None#

Parent tier ID, or None for root tiers.

property child_ids: list[str] | None#

Child tier IDs, or None if no children.

property annotations: list[Annotation]#

Annotations in this tier.

class rustling.elan.ELAN#

ELAN (.eaf) data reader.

classmethod from_strs(strs: Sequence[str], ids: Sequence[str] | None = None, parallel: bool = True) ELAN#

Parse ELAN data from in-memory strings.

classmethod from_files(paths: Sequence[str | os.PathLike[str]], *, parallel: bool = True) ELAN#

Load ELAN data from file paths.

classmethod from_dir(path: str | os.PathLike[str], *, match: str | None = None, extension: str = '.eaf', parallel: bool = True) ELAN#

Recursively load ELAN data from a directory.

classmethod from_zip(path: str | os.PathLike[str], *, match: str | None = None, extension: str = '.eaf', parallel: bool = True) ELAN#

Load ELAN data from a ZIP archive.

classmethod from_git(url: str, *, rev: str | None = None, depth: int | None = None, match: str | None = None, extension: str = '.eaf', cache_dir: str | os.PathLike[str] | None = None, force_download: bool = False, parallel: bool = True) ELAN#

Load ELAN data from a git repository.

Clones the repository (or uses a cached clone) and parses all matching files from the resulting directory.

Parameters:
  • url – Git repository URL.

  • rev – Branch, tag, or commit hash. If None, uses the repository’s default branch.

  • depth – Clone depth. Defaults to 1 (shallow clone). Ignored when rev is a commit hash.

  • match – Regex pattern to include only matching file paths.

  • extension – File extension to filter by (default: “.eaf”).

  • cache_dir – Directory for caching cloned repositories. Defaults to ~/.rustling/cache/.

  • force_download – If True, re-clone even if a cached copy exists.

  • parallel – If True, use parallel processing.

Returns:

A new ELAN reader with the parsed data.

classmethod from_url(url: str, *, match: str | None = None, extension: str = '.eaf', cache_dir: str | os.PathLike[str] | None = None, force_download: bool = False, parallel: bool = True) ELAN#

Load ELAN data from a URL.

Downloads the file (or uses a cached copy) and parses it. ZIP files are automatically detected and extracted.

Parameters:
  • url – URL to download from.

  • match – Regex pattern to include only matching file paths (applicable for ZIP files).

  • extension – File extension to filter by (default: “.eaf”, applicable for ZIP files).

  • cache_dir – Directory for caching downloads. Defaults to ~/.rustling/cache/.

  • force_download – If True, re-download even if a cached copy exists.

  • parallel – If True, use parallel processing.

Returns:

A new ELAN reader with the parsed data.

property file_paths: list[str]#

Return the list of file paths.

property n_files: int#

Return the number of files.

tiers() list[collections.OrderedDict[str, Tier]]#

Return tiers as a list of OrderedDicts, one per file.

to_strs() list[str]#

Return EAF XML strings, one per file.

to_chat_strs(*, participants: Sequence[str] | None = None) list[str]#

Return CHAT format strings, one per file.

Parameters:

participants – Participant codes (tier IDs) to treat as CHAT main tiers. If None, auto-detects parent tiers with a 3-character ID.

Returns:

A list of CHAT-formatted strings.

to_chat(*, participants: Sequence[str] | None = None) rustling.chat.CHAT#

Convert to a CHAT object.

Each ELAN file produces one CHAT file. Parent tiers become CHAT main tiers, and child tiers matching {name}@{code} become dependent tiers (e.g., mor@CHI becomes %mor).

Parameters:

participants – Participant codes (tier IDs) to treat as CHAT main tiers. If None, auto-detects parent tiers with a 3-character ID.

Returns:

A CHAT object.

to_chat_files(dir_path: str | os.PathLike[str], /, *, participants: Sequence[str] | None = None, filenames: Sequence[str] | None = None) None#

Write CHAT (.cha) files to a directory.

Parameters:
  • dir_path – Directory path to write .cha files to.

  • participants – Participant codes (tier IDs) to treat as CHAT main tiers. If None, auto-detects parent tiers with a 3-character ID.

  • filenames – Custom filenames for the output files. If None, filenames are derived from the original source file paths with the extension changed to .cha. Falls back to 0001.cha, 0002.cha, etc. when the data was parsed from in-memory strings.

Raises:
  • ValueError – If filenames count doesn’t match file count.

  • IOError – If writing fails.

to_files(dir_path: str | os.PathLike[str], /, *, filenames: Sequence[str] | None = None) None#

Write ELAN (.eaf) files to a directory.

Parameters:
  • dir_path – Directory path to write .eaf files to.

  • filenames – Custom filenames for the output files. If None, filenames are derived from the original source file paths. Falls back to 0001.eaf, 0002.eaf, etc. when the data was parsed from in-memory strings.

Raises:
  • ValueError – If filenames count doesn’t match file count.

  • IOError – If writing fails.

to_srt_strs(*, participants: Sequence[str] | None = None) list[str]#

Return SRT format strings, one per file.

Parameters:

participants – Participant codes (tier IDs) to include. If None, auto-detects parent tiers with a 3-character ID. Annotations without time marks are skipped.

Returns:

A list of SRT-formatted strings.

to_srt(*, participants: Sequence[str] | None = None) rustling.srt.SRT#

Convert to an SRT object.

Each ELAN file produces one SRT file. When multiple tiers are selected, subtitle text is prefixed with the tier ID (e.g., "CHI: more cookie ."). Annotations without time marks are skipped.

Parameters:

participants – Participant codes (tier IDs) to include. If None, auto-detects parent tiers with a 3-character ID.

Returns:

A SRT object.

to_srt_files(dir_path: str | os.PathLike[str], /, *, participants: Sequence[str] | None = None, filenames: Sequence[str] | None = None) None#

Write SRT (.srt) files to a directory.

Parameters:
  • dir_path – Directory path to write .srt files to.

  • participants – Participant codes (tier IDs) to include. If None, auto-detects parent tiers with a 3-character ID.

  • filenames – Custom filenames for the output files.

Raises:
  • ValueError – If filenames count doesn’t match file count.

  • IOError – If writing fails.

to_textgrid_strs() list[str]#

Return TextGrid format strings, one per file.

to_textgrid() rustling.textgrid.TextGrid#

Convert to a TextGrid object.

to_textgrid_files(dir_path: str | os.PathLike[str], /, *, filenames: Sequence[str] | None = None) None#

Write TextGrid (.TextGrid) files to a directory.

append(other: ELAN, /) None#

Append data from another ELAN reader.

append_left(other: ELAN, /) None#

Left-append data from another ELAN reader, preserving order.

extend(others: Sequence[ELAN], /) None#

Extend data from multiple ELAN readers.

pop() ELAN#

Remove and return the last file as a new ELAN reader.

pop_left() ELAN#

Remove and return the first file as a new ELAN reader.

clear() None#

Remove all data from this reader.

rustling.elan.read_elan(path: str | os.PathLike[str], *, cls: type[ELAN] = ELAN) ELAN#

Read ELAN data.

Parameters:
  • path – Path to a .zip file, a local directory containing .eaf files, a single .eaf file, a git repository URL (ending in .git), or an HTTP/HTTPS URL.

  • cls – The class used to create the reader. Must be ELAN or a subclass of it.

Returns:

An ELAN instance.

Raises:
  • TypeError – If cls is not ELAN or a subclass of it.

  • ValueError – If path does not point to a recognized source.