Source code for scorevideo_lib.transfer_lights_on_marks

# This file is part of scorevideo_lib: A library for working with scorevideo
# Use of this file is governed by the license in LICENSE.txt.

"""A tool that adds marks to scored log files based on a ``LIGHTS ON`` behavior

The marks are added with negative time and frame so as to accurately record
when, relative to the start of the scored log file, the lights were recorded
coming on.

When called directly, this script assumes that the log files are present in the
current directory (``.``). Files are partitioned such that each partition holds
the logs for one fish on one day. Afternoon files are ignored, and the
``LIGHTS_ON`` behavior in the ``_1`` or ``_2`` logs is transferred to the
``_Morning`` log.

WARNING: This script is NOT general. It is specific to one particular
    experiment. It may, however, be a useful example for other researchers.

"""

import os
import re
from typing import List, Tuple, Optional
from scorevideo_lib.parse_log import Log, RawLog
from scorevideo_lib.add_marks import copy_mark, get_ending_mark, \
    get_ending_behav
from scorevideo_lib.base_utils import equiv_partition


[docs]class ExpectedFile: """Describes the characteristics of a file name for matching This is used in :py:const:`PART_REQUIRED` and :py:const:`PART_OPTIONAL` to describe required and allowed files. """ def __init__(self, present: List[str] = None, absent: List[str] = None, regex: str = None) -> None: """Create a new file description Args: present: Substrings expected to be present in the file name absent: Substrings expected to be absent in the file name regex: Regular expression expected to match the file name """ if present: self.present = present else: self.present = [] if absent: self.absent = absent else: self.absent = [] self.regex = regex
[docs] def match(self, to_test: str) -> bool: """Checks whether a file name matches this description. A file matches if it satisfies every specified instance field. For example: >>> ExpectedFile(['a', 'b'], ['c']).match('ab') True >>> ExpectedFile(['a', 'b'], ['c']).match('abc') False >>> ExpectedFile(['a', 'b'], ['c']).match('ac') False >>> ExpectedFile(['a', 'b'], ['c']).match('a') False >>> ExpectedFile(['a', 'b'], ['c'], r'[abc]*.txt').match('ab') False >>> ExpectedFile(['a', 'b'], ['c'], r'[abc]*.txt').match('ab.txt') True >>> ExpectedFile(['a', 'b'], ['c'], r'[abc]*.txt').match('abc.txt') False Args: to_test: The string to check for matching Returns: ``True`` if and only if the file name matches. """ for s in self.present: if s not in to_test: return False for s in self.absent: if s in to_test: return False if self.regex and re.fullmatch(self.regex, to_test) is None: return False return True
def __repr__(self) -> str: return "ExpectedFile[present={}, absent={}, regex={}]".format( self.present, self.absent, self.regex) def __str__(self) -> str: return repr(self)
# Specify regular expressions that identify logs required for every partition PART_REQUIRED = [ExpectedFile(["_Morning."], ["_LIGHTSON.txt"]), ExpectedFile(["_1."], ["_LIGHTSON.txt"])] # Specify regular expressions that identify logs optional for every partition PART_OPTIONAL = [ExpectedFile(["_2."], ["_LIGHTSON.txt"]), ExpectedFile(["_LIGHTSON.txt"])] # Any files in partitions not matching any of the above throw errors # # Specify regular expressions that identify logs required for every partition # PART_REQUIRED = [ExpectedFile(["_Morning."], ["_LIGHTSON.txt"]), # ExpectedFile(["_1."], ["_LIGHTSON.txt"]), # ExpectedFile(["_LIGHTSON.txt"])] # # Specify regular expressions that identify logs optional for every partition # PART_OPTIONAL = [ExpectedFile(["_2."], ["_LIGHTSON.txt"]), # ] # # Any files in partitions not matching any of the above throw errors
[docs]def read_aggr_behav_list() -> List[str]: """Read in the list of FM behaviors that are aggressive / submissive Returns: List of behaviors that constitute the start of behavior, trimming off trailing whitespace """ with open('fm_behaviors.txt', 'r') as f: return [line.rstrip() for line in f]
[docs]def copy_lights_on(aggr_logs: List[Log], scored_log: RawLog, aggr_behav_des=List[str]) -> RawLog: """Copy a ``LIGHTS ON`` mark from aggression logs to the scored log Args: aggr_logs: Aggression logs are the ``_1`` or ``_2`` logs in which the researcher is looking for the first aggressive or submissive behavior by the focal male to begin scoring. scored_log: The scored log is the log from the video that was fully scored for behaviors. aggr_behav_des: List of behavior description sections that indicate that a particular behavior is considered aggressive or submissive for the purposes of beginning to fully score the video. Returns: A copy of ``scored_log``, but with the ``LIGHTS ON`` mark inserted. """ # For any video i except for the last video, video i+1 starts at the end # of video i. log_tuples = [] for log in aggr_logs[:-1]: end_mark = get_ending_mark(log.marks) s_time = end_mark.time s_frame = end_mark.frame log_tuples.append((log, s_time, s_frame)) # For the last video, the next video starts at the first aggressive behavior # because only the pre-scoring videos should be in aggr_logs last_log = aggr_logs[-1] try: s_behav = get_ending_behav(last_log.full, aggr_behav_des) except ValueError as error: msg = "No ending behavior in aggression logs {}: {}".format(aggr_logs, error) raise ValueError(msg) log_tuples.append((last_log, s_behav.time, s_behav.frame)) return copy_mark(log_tuples, 'LIGHTS ON', scored_log, 'LIGHTS ON')
[docs]def get_name_core(filename: str) -> str: """Get the core of a filename The core is the part of the filename that precedes the identifier that separates videos of the same fish on the same day. For example: >>> get_name_core("log050118_OB5B030618_TA23_Dyad_Morning.avi_CS") 'log050118_OB5B030618_TA23_Dyad' >>> get_name_core("log050118_OB5B030618_TA23_Dyad_1.avi_CS.txt") 'log050118_OB5B030618_TA23_Dyad' >>> get_name_core("tmp/log050118_OB5B030618_TA23_Dyad_Morning.avi_CS") 'log050118_OB5B030618_TA23_Dyad' Args: filename: The filename from which to extract the core Returns: The core of the filename """ # Discard any file extensions (e.g. .wmv_AA.txt) no_extension: str = os.path.basename(filename).split('.', 1)[0] # Discard everything after the last `_` (e.g. 1, 2, or Morning) core = no_extension.split('_')[:-1] return "_".join(core)
[docs]def get_last_name_elem(filename: str) -> str: """Get the last underscore-delimited element of the name minus extensions The last element is the part that distinguishes videos of the same fish on the same day. For example: >>> get_last_name_elem("log050118_OB5B030618_TA23_Dyad_Morning.avi_CS") 'Morning' >>> get_last_name_elem("log050118_OB5B030618_TA23_Dyad_2.avi_CS") '2' Args: filename: The name from which to get the last element Returns: The last element of the file, which distinguishes videos of the same fish on the same day """ # Discard any file extensions (e.g. .wmv_AA.txt) no_extension: str = os.path.basename(filename).split('.', 1)[0] # Keep everything after the last `_` (e.g. 1, 2, or Morning) end = no_extension.split('_')[-1] return end
[docs]def same_fish_and_day(name1: str, name2: str) -> bool: """Check whether two files are from the same fish on the same day Uses :py:func:`get_name_core` to see whether the names have the same core. >>> same_fish_and_day("log050118_OB5B030618_TA23_Dyad_Morning.avi_CS", \ "log050118_OB5B030618_TA23_Dyad_1.avi_CS") True >>> same_fish_and_day("050118_OB5B030618_TA23_Dyad_Morning.avi_CS", \ "log050118_OB5B030618_TA23_Dyad_1.avi_CS") True >>> same_fish_and_day("log050118_OB5B030618_TA25_Dyad_Morning.avi_CS", \ "log050118_OB5B030618_TA23_Dyad_1.avi_CS") False >>> same_fish_and_day("050118_OB5B030618_TA25_Dyad_Morning.avi_CS", \ "log050118_OB5B030618_TA23_Dyad_1.avi_CS") False Args: name1: One filename to check name2: One filename to check Returns: Whether the names share a core """ _, name1 = os.path.split(name1) _, name2 = os.path.split(name2) name1 = normalize_name(name1) name2 = normalize_name(name2) return get_name_core(name1) == get_name_core(name2)
[docs]def is_scored(filename: str) -> bool: """Check whether a filename is for a full scoring log Uses :py:func:`get_last_name_elem` and checks whether the last name element is ``Morning`` or ``Afternoon``. >>> is_scored("log050118_OB5B030618_TA23_Dyad_Morning.avi_CS") True >>> is_scored("log050118_OB5B030618_TA23_Dyad_1.avi_CS") False Args: filename: The filename to check Returns: Whether the file is for a full scoring log """ last_elem = get_last_name_elem(filename) return last_elem in ("Morning", "Afternoon")
[docs]def is_lights_on(filename: str) -> bool: """Check whether a filename is for a lights-on log A lights-on log has the same name as another log, but ends with ``_LIGHTSON``. This signals that the ``LIGHTS ON`` behavior in the lights-on log should be transferred, maintaining timestamp and frame number, to the log of the same name (minus ``_LIGHTSON``, and perhaps different scoring initials). Note that the terminal file extension (e.g. ``.txt``) is ignored. >>> is_lights_on("log050118_OB5B030618_TA23_Dyad_Morning.avi_CS.txt") False >>> is_lights_on("log050118_OB5B030618_TA23_Dyad_1.avi_CS_LIGHTSON.txt") True Args: filename: Name of log file to check Returns: Whether the file is a lights-on log """ filename, _ = os.path.splitext(filename) terminal = filename.split('_')[-1] return terminal == "LIGHTSON"
[docs]def normalize_name(filename: str) -> str: """Normalize a filename by adding a prefix ``log`` if not already present >>> normalize_name("1.wmv_CS.txt") 'log1.wmv_CS.txt' >>> normalize_name("log1.wmv_CS.txt") 'log1.wmv_CS.txt' >>> normalize_name("logfoo") 'logfoo' Args: filename: The filename to normalize Returns: The normalized filename. """ if len(filename) >= 3 and filename[:3] == "log": return filename return "log" + filename
[docs]def name_filter(filename: str) -> bool: """Filter for filenames that should be included for processing Includes the numbered log files, and the ``Morning`` log files. Excludes the ``Afternoon`` log files. >>> name_filter("log050118_OB5B030618_TA23_Dyad_Morning.avi_CS.txt") True >>> name_filter("log050118_OB5B030618_TA23_Dyad_Afternoon.avi_CS.txt") False >>> name_filter("log050118_OB5B030618_TA23_Dyad_3.avi_CS.txt") True The ``log`` prefix is ignored >>> name_filter("050118_OB5B030618_TA23_Dyad_Morning.avi_CS.txt") True >>> name_filter("050118_OB5B030618_TA23_Dyad_Afternoon.avi_CS.txt") False >>> name_filter("050118_OB5B030618_TA23_Dyad_3.avi_CS.txt") True Args: filename: The filename to check Returns: Whether the file should be included for analysis """ form = r"\Alog[0-9]{6}_[0-9A-Z]+[0-9]{6}_[0-9A-Z]+_Dyad_([0-9]+|(Morning)).*\Z" filename = normalize_name(filename) return re.fullmatch(form, filename) is not None
[docs]def validate_partition(partition: List[str]) -> List[str]: """Validates a partitioning of files Ensures that no two files match an element of :py:const:`PART_OPTIONAL`, and ensures that exactly one file matches each element of :py:const:`PART_REQUIRED`. Also ensures that no files that don't match any element of either are present. Args: partition: The list of file names to validate Returns: A list of problem descriptions, one for each problem discovered. No problems are found if and only if ``[]`` is returned. """ probs = [] required: List[List[str]] = [[] for _ in PART_REQUIRED] optional: List[List[str]] = [[] for _ in PART_OPTIONAL] for name in partition: matched = [] for i, req in enumerate(PART_REQUIRED): if req.match(name): matched.append(PART_REQUIRED[i]) required[i].append(name) for i, opt in enumerate(PART_OPTIONAL): if opt.match(name): matched.append(PART_OPTIONAL[i]) optional[i].append(name) if len(matched) > 1: probs.append("File {} matched multiple expectations: {}". format(name, matched)) for i, files in enumerate(required): if not files: probs.append("No file found that matches: {}". format(PART_REQUIRED[i])) if len(files) > 1: probs.append("{} matched multiple files: {}".format( PART_REQUIRED[i], files)) for i, files in enumerate(optional): if len(files) > 1: probs.append("{} matched multiple files: {}".format( PART_REQUIRED[i], files)) return probs
[docs]def find_scored_lights(partition: List[str]) -> \ Tuple[str, Optional[str]]: """Find the full scoring and lights-on log of a partition Full scoring logs are identified by :py:func:`is_scored`, and lights-on logs are identified by :py:func:`is_lights_on`. Args: partition: The list of file names from which to identify lights-on and full scoring logs. Returns: Tuple of file names of full scoring log and lights-on log. If no lights on log is found, ``None`` is returned instead. Raises: ValueError: If duplicate full scoring logs or lights-on logs are found, if no full scoring log is found, or if the scoring log is the same as the lights-on log. """ scored = None lightson: Optional[str] = None for filename in partition: if is_scored(filename): if scored is not None: msg = "Duplicate full scoring log: {}".format(scored) raise ValueError(msg) scored = filename if is_lights_on(filename): if lightson is not None: msg = "Duplicate lights-on log: {}".format(lightson) raise ValueError(msg) lightson = filename if scored is None: msg = "No full scoring log found for {}".format(partition) raise ValueError(msg) if lightson: if lightson == scored: msg = "Lights-on log {} same as the full scoring log {}".format( lightson, scored) raise ValueError(msg) return scored, lightson
[docs]def get_partitions(path_to_log_dir: str): """Get partitioned file names from the specified directory Files beginning with ``.`` are filtered out, as are any files for which :py:func:`name_filter` returns ``False``. Names are partitioned using :py:func:`equiv_partition`, where equivalence is determined by :py:func:`same_fish_and_day` returning ``True``. Each name includes the provided path as a prefix. Partitions are validated using :py:func:`validate_partition`. Args: path_to_log_dir: Path to the directory containing log files to partition Returns: A valid partitioning of the file names. Raises: ValueError: If any of the partitions fail validation """ files = [x for x in os.listdir(path_to_log_dir) if x[0] != '.'] files = [os.path.join(path_to_log_dir, x) for x in files if name_filter(x)] partitions: List[List[str]] = equiv_partition(files, same_fish_and_day) probs = False for partition in partitions: part_probs = validate_partition(partition) if part_probs: probs = True print("Problems with partition: {}".format(partition)) for prob in part_probs: print("\t{}".format(prob)) if probs: raise ValueError("Some partitions are invalid.") return partitions
[docs]def batch_mark_lights_on(path_to_log_dir: str) -> None: """Transfer ``LIGHTS ON`` marks en masse for all logs in a directory The logs are partitioned using :py:func:`same_fish_and_day` into groups of logs that pertain to the same fish on the same day. A ``LIGHTS ON`` behavior in one of the aggression logs is transferred to the full scoring log, accounting for the change in reference point for frame numbers and times. The ``LIGHTS ON`` behavior can instead be specified in a separate lights-on log (see :py:func:`is_lights_on`). This log should have the same name as the log in which the ``LIGHTS ON`` behavior would otherwise be (before being transferred), except its name (before the terminal extension like ``.txt``) should end in ``_LIGHTSON`` and the initials of the scorer may differ. Args: path_to_log_dir: Path to the directory of logs to process Returns: None """ partitions = get_partitions(path_to_log_dir) for partition in partitions: scored, lightson = find_scored_lights(partition) if lightson: with open(lightson, 'r') as f: lightson_log = Log.from_file(f) log_names = [name for name in partition if name not in (scored, lightson)] log_names = sorted(log_names, key=lambda x: int(get_last_name_elem(x))) logs = [] for name in log_names: with open(name, 'r') as f: log = Log.from_file(f) if lightson and get_last_name_elem(name) == \ get_last_name_elem(lightson): log.extend(lightson_log) logs.append(log) with open(scored, 'r') as f: scored_raw = RawLog.from_file(f) final = copy_lights_on(logs, scored_raw, read_aggr_behav_list()) with open(scored, 'w') as f: lines = final.to_lines() for line in lines: f.write(line + "\n")
if __name__ == "__main__": batch_mark_lights_on("work")