Source code for sfftk.core.parser

"""
``sfftk.core.parser``
===========================

A large number of functions in ``sfftk`` consume only two arguments: ``args``, which is the direct output of
Python's :py:class:`argparse.ArgumentParser` and a ``configs`` dictionary, which consists of all persistent configs.
This module extends the parser object :py:class:`sfftkrw.core.parser.Parser` as well as includes a
:py:func:`sfftk.core.parser.parse_args` function which does sanity checking of all command line arguments.
"""
import argparse
import configparser
import os
import pathlib
import re
import sys
from copy import deepcopy

from sfftkrw.core import _dict_iter_keys, _decode, _input, _str, _xrange
# extend the sfftkrw Parser object
from sfftkrw.core.parser import Parser, subparsers, convert_parser, view_parser, tests_parser, tool_list, add_args
from sfftkrw.core.print_tools import print_date

from ..core.prep import check_mask_is_binary
from ..notes import RESOURCE_LIST

__author__ = 'Paul K. Korir, PhD'
__email__ = 'pkorir@ebi.ac.uk, paul.korir@gmail.com'
__date__ = '2016-06-10'
__updated__ = '2018-02-14'

VERBOSITY_RANGE = _xrange(4)
MULTI_FILE_FORMATS = ['stl', 'map', 'mrc', 'rec', 'star']
PREPABLE_FILE_FORMATS = ['mrc', 'map', 'rec']
RESCALABLE_FILE_FORMATS = ['stl']
# some file extensions are used by multiple file types
# this dictionary lists indices that may be used for subtypes by extension
# each value of each extension is a tuple of a friendly name and the FQN for the corresponding class
EXTENSION_SUBTYPE_INDICES = dict()
# .h5
EXTENSION_SUBTYPE_INDICES['h5'] = dict()
EXTENSION_SUBTYPE_INDICES['h5'][0] = 'SuRVoS'
EXTENSION_SUBTYPE_INDICES['h5'][1] = 'ilastik'
EXTENSION_SUBTYPE_INDICES['h5'][2] = 'EMDB-SFF'

Parser.description = "The EMDB-SFF Toolkit (sfftk)"

# =========================================================================
# common arguments
# =========================================================================
sff_file = {
    'args': ['sff_file'],
    'kwargs': {
        'help': 'path (rel/abs) to an EMDB-SFF file',
    }
}
description = {
    'args': ['-d', '--description'],
    'kwargs': {
        'help': 'the description'
    }
}
details = {
    'args': ['-D', '--details'],
    'kwargs': {
        'help': "populates <details>...</details> in the XML file"
    }
}
external_ref_id = {
    'args': ['-e', '--external-ref-id'],
    'kwargs': {
        'type': int,
        'help': "the external reference ID as shown with the 'list' command",
    }
}
external_ref = {
    'args': ['-E', '--external-ref'],
    'kwargs': {
        'nargs': 3,
        'help': (
            "An external reference consists of three components: the "
            "name of the external reference, a URL to the particular external reference "
            "and the accession. If you use the sff notes search utility these will "
            "correspond to the resource, url and accession. The following is a list "
            "of valid external references: {}. You can also specify multiple external "
            "reference arguments e.g. sff notes add -i <int> -E r11 r12 r13 -E r21 r22 r23 "
            "file.json"
        ).format(', '.join(RESOURCE_LIST.keys())),
    }
}
FORMAT_LIST = [
    ('sff', 'XML'),
    ('hff', 'HDF5'),
    ('json', 'JSON'),
]
format_ = {
    'args': ['-f', '--format'],
    'kwargs': {
        'help': "output file format; valid options are: {} [default: sff]".format(
            ", ".join(map(lambda x: "{} ({})".format(x[0], x[1]), FORMAT_LIST))
        ),
    }
}
header = {
    'args': ['-H', '--header'],
    'kwargs': {
        'default': False,
        'action': 'store_true',
        'help': 'show EMDB-SFF header (global) attributes [default: False]'
    }
}
number_of_instances = {
    'args': ['-I', '--number-of-instances'],
    'kwargs': {
        'type': int,
        'help': 'the number of instances',
    }
}
segment_id = {
    'args': ['-i', '--segment-id'],
    'kwargs': {
        'help': 'refer to a segment by its ID'
    }
}
name = {
    'args': ['-N', '--name'],
    'kwargs': {
        'help': "the segmentation name"
    }
}
output = {
    'args': ['-o', '--output'],
    'kwargs': {
        'default': None,
        'help': "file to convert to; the extension (.sff, .hff, .json) determines the output format [default: None]"
    }
}
software_proc_details = {
    'args': ['-P', '--software-processing-details'],
    'kwargs': {
        'help': "details of how the segmentation was processed"
    }
}
config_path = {
    'args': ['-p', '--config-path'],
    'kwargs': {
        'help': "path to configs file"
    }
}
primary_descriptor = {
    'args': ['-R', '--primary-descriptor'],
    'kwargs': {
        'help': (
            "populates the <primaryDescriptor>...</primaryDescriptor> to this value [valid values:  "
            "three_d_volume, mesh_list, shape_primitive_list]"
        )
    }
}
software_name = {
    'args': ['-S', '--software-name'],
    'kwargs': {
        'help': "the name of the software used to create the segmentation"
    }
}
software_id = {
    'args': ['-s', '--software-id'],
    'kwargs': {
        'type': int,
        'help': "the software to edit",
    }
}
segment_name = {
    'args': ['-n', '--segment-name'],
    'kwargs': {
        'help': "the name of the segment"
    }
}
shipped_configs = {
    'args': ['-b', '--shipped-configs'],
    'kwargs': {
        'default': False,
        'action': 'store_true',
        'help': 'use shipped configs only if config path and user configs fail [default: False]'
    }
}
software_version = {
    'args': ['-T', '--software-version'],
    'kwargs': {
        'help': "the version of software used to create the segmentation"
    }
}
subtype_index = {
    'args': ['--subtype-index'],
    'kwargs': {
        'default': -1,
        'type': int,
        'help': """some file extensions are used by multiple file types"""
    }
}
verbose = {
    'args': ['-v', '--verbose'],
    'kwargs': {
        'action': 'store_true',
        'default': False,
        'help': "verbose output"
    },
}
transform_id = {
    'args': ['-x', '--transform-id'],
    'kwargs': {
        'type': int,
        'help': "the transform ID to edit"
    }
}
transform = {
    'args': ['-X', '--transform'],
    'kwargs': {
        'nargs': 12,
        'type': float,
        'help': "twelve (12) floats to specify the 3x4 matrix; rows first"
    }
}

# =========================================================================
# prep subparser
# =========================================================================
prep_parser = subparsers.add_parser(
    'prep',
    description="Prepare a segmentation for conversion to EMDB-SFF",
    help="prepares a segmentation"
)
prep_subparsers = prep_parser.add_subparsers(
    title='Segmentation preparation utility',
    dest='prep_subcommand',
    description="The following commands provide a number of pre-processing steps for various segmentation file formats. "
                "Most only apply to one file type. See the help for each command by typing 'sff prep <command>'",
    metavar='Preparation steps:'
)
# =========================================================================
# prep: binmap
# =========================================================================
binmap_prep_parser = prep_subparsers.add_parser(
    'binmap',
    description='Bin the CCP4 file to reduce file size',
    help='bin a CCP4 map',
)
binmap_prep_parser.add_argument(
    'from_file', help='the name of the segmentation file'
)
add_args(binmap_prep_parser, config_path)
add_args(binmap_prep_parser, shipped_configs)
binmap_prep_parser.add_argument(
    '-m', '--mask-value',
    default=1, type=int,
    help='value to set to; all other voxels set to zero [default: 1]'
)
binmap_prep_parser.add_argument(
    '-o', '--output',
    default=None,
    help='output file name [default: <infile>_binned.<ext>]'
)
binmap_prep_parser.add_argument(
    '--overwrite',
    default=False,
    action='store_true',
    help='overwrite output file [default: False]'
)
binmap_prep_parser.add_argument(
    '-c', '--contour-level',
    default=0,
    type=float,
    help='value (exclusive) about which to threshold [default: 0.0]'
)
binmap_prep_parser.add_argument(
    '--negate',
    default=False,
    action='store_true',
    help='use values below the contour level [default: False]'
)
binmap_prep_parser.add_argument(
    '-B', '--bytes-per-voxel',
    default=1,
    type=int,
    choices=[1, 2, 4, 8, 16],
    help='number of bytes per voxel [default: 1]'
)
binmap_prep_parser.add_argument(
    '--infix',
    default='prep',
    help="infix to be added to filenames e.g. file.map -> file_<infix>.map [default: 'prep']",
)
add_args(binmap_prep_parser, verbose)

# =========================================================================
# prep: transform
# =========================================================================
transform_prep_parser = prep_subparsers.add_parser(
    'transform',
    description='Transform the STL mesh vertices by the given values',
    help='transform an STL mesh',
)
# todo: add a new option for the voxel coordinates e.g. --voxel-size <v_x> <v_y> <v_z> which is
#  mutually exclusive with --lengths and --indices
transform_prep_parser.add_argument(
    'from_file', help="the name of the segmentation file"
)
add_args(transform_prep_parser, config_path)
add_args(transform_prep_parser, shipped_configs)
transform_prep_parser.add_argument(
    '-L', '--lengths',
    nargs=3, type=float,
    required=True,
    help="the X, Y and Z physical lengths (in angstrom) of the space; three (3) space-separated values [required]"
)
transform_prep_parser.add_argument(
    '-I', '--indices',
    nargs=3, type=int,
    required=True,
    help="the I, J, and K image dimensions of the space, corresponding to X, Y and Z, respectively; three (3) "
         "space-separated integers [required]"
)
transform_prep_parser.add_argument(
    '-O', '--origin',
    nargs=3, type=float,
    default=[0.0, 0.0, 0.0],
    help="the origin position (in ångström); literally, the distance between the first voxel (lowest indices) and the "
         "physical origin; three (3) space-separated values [default: 0.0 0.0 0.0]"
)
transform_prep_parser.add_argument(
    '-o', '--output',
    default=None,
    help='output file name [default: <infile>_transformed.<ext>]'
)
transform_prep_parser.add_argument(
    '--infix',
    default='transformed',
    help="infix to be added to filenames e.g. file.stl -> file_<infix>.stl [default: 'transformed']",
)
add_args(transform_prep_parser, verbose)
# =========================================================================
# prep: mergemask
# =========================================================================
mergemask_prep_parser = prep_subparsers.add_parser(
    'mergemask',
    description='Merge two or more binary masks into one with integer labels',
    help='merge two or more binary masks',
)
add_args(mergemask_prep_parser, config_path)
add_args(mergemask_prep_parser, shipped_configs)
add_args(mergemask_prep_parser, verbose)
mergemask_prep_parser.add_argument(
    'masks',
    nargs='+',
    help="a sequence of two masks in a CCP4-like format e.g. .mrc, .map, .rec; "
         "the order of placement determines the order of labels"
)
mergemask_prep_parser.add_argument(
    '-P', '--output-prefix',
    default="merged_mask",
    help="the prefix to use for the output; two files are written to the output: merged_mask.mrc and merged_mask.json "
         "(metadata showing the mask-label relations)"
)
mergemask_prep_parser.add_argument(
    '--mask-extension',
    default="mrc",
    help="the file extension to use [default: 'mrc']"
)
mergemask_prep_parser.add_argument(
    '--overwrite',
    action='store_true',
    help="if the output already exists overwrite it [default: False]"
)
mergemask_prep_parser.add_argument(
    '--skip-assessment',
    action='store_true',
    help="use this option to avoid the lengthy task of assessing the masks to make sure they are binary [default: False]"
)
mergemask_prep_parser.add_argument(
    '--allow-overlap',
    action='store_true',
    help="allow masks to have overlapping voxels; this results in a complex hierarchy of labels described in the "
         "label_tree [default: False]"
)
# =========================================================================
# prep: starsplit
# =========================================================================
starsplit_prep_parser = prep_subparsers.add_parser(
    'starsplit',
    description='Split a composite star file into individual star files distinguished by the <rlnImageName> key',
    help='split a composite star file into individual star files',
)
add_args(starsplit_prep_parser, config_path)
add_args(starsplit_prep_parser, shipped_configs)
add_args(starsplit_prep_parser, verbose)
starsplit_prep_parser.add_argument(
    'star_file',
    help="the composite star file"
)
starsplit_prep_parser.add_argument(
    '--output-prefix',
    help="a prefix to use for the output files; the output files are "
         "named <prefix>_<rlnImageName>.star [default: '<composite-name>_']"
)
starsplit_prep_parser.add_argument(
    '--image-path',
    default='',
    help="the correct local path to the tomogram files [default: '']"
)
# tomogram prefix
starsplit_prep_parser.add_argument(
    '--image-extension',
    default='mrc',
    help="the file extension to use for the tomogram files [default: 'mrc']"
)
starsplit_prep_parser.add_argument(
    '--image-name-prefix',
    default="",
    help="in many star files, the <rlnImageName> values will be a local path; the actual image name (a .mrc file) may contain additional characters that makes it difficult to categorise the tomograms e.g. 'path/my_tomogram1_001.mrc', 'path/my_tomogram1_002.mrc', 'path/my_tomogram2_001.mrc'. In this example, we have two tomograms ('my_tomogram1' and 'my_tomogram2') but the additional characters ('_001', '_002') make it difficult to categorise the tomograms. This option allows you to specify a prefix to remove from the <rlnImageName> values. You can also use a REGEX in quotes e.g. 'my_tomogram\d'. [default: '']"
)
starsplit_prep_parser.add_argument(
    '--image-name-field',
    default='_rlnImageName',
    help="the field in the star file that contains the image name [default: '_rlnImageName']"
)

# =========================================================================
# prep: starcrop
# =========================================================================
starcrop_prep_parser = prep_subparsers.add_parser(
    'starcrop',
    description='Truncate a composite star file to the specified number of rows (default: 100)',
    help='truncate a star file to have as many rows as specified (exluding header)',
)
add_args(starcrop_prep_parser, config_path)
add_args(starcrop_prep_parser, shipped_configs)
add_args(starcrop_prep_parser, verbose)
starcrop_prep_parser.add_argument(
    'star_file',
    help="the composite star file"
)
starcrop_prep_parser.add_argument(
    '-o', '--output',
    default=None,
    help='output file name [default: <infile>_cropped.star]'
)
starcrop_prep_parser.add_argument(
    '--infix',
    default='cropped',
    help="infix to be added to filenames e.g. file.star -> file_<infix>.star [default: 'cropped']",
)
starcrop_prep_parser.add_argument(
    '--rows',
    default=100,
    type=int,
    help="the number of rows to keep [default: 100]"
)
starcrop_prep_parser.add_argument(
    '--image-name-field',
    default='_rlnImageName',
    help="the field in the star file that contains the image name [default: '_rlnImageName']"
)
# =========================================================================
# convert subparser
# =========================================================================
# extend the sfftk-rw convert parser
convert_parser.description = "Perform conversions to EMDB-SFF"
convert_parser.help = "converts to EMDB-SFF"
add_args(convert_parser, config_path)
add_args(convert_parser, shipped_configs)
convert_parser.add_argument(
    '-a', '--all-levels',
    default=False,
    action='store_true',
    help="for segments structured hierarchically (e.g. Segger from UCSF Chimera and Chimera X) "
         "convert all segment leves in the hierarchy [default: False]"
)
multi_or_label_mutex_parser = convert_parser.add_mutually_exclusive_group(required=False)
# convert_parser.add_argument(
multi_or_label_mutex_parser.add_argument(
    '-m', '--multi-file',
    action='store_true',
    default=False,
    help=(
        "enables convert to treat multiple files as individual segments of a single segmentation; only works for the "
        "following filetypes: {} [default: False]"
    ).format(
        ', '.join(MULTI_FILE_FORMATS),
    )
)
add_args(convert_parser, subtype_index)
convert_parser.add_argument(
    '--image',
    help="specify the segmented EMDB MAP/MRC file from which to determine the correct image-to-physical transform"
)
multi_or_label_mutex_parser.add_argument(
    '--label-tree',
    help="a JSON file produced by running 'sff prep mergemask' which captures: "
         "1) the mask labels (key: 'mask_to_label') and "
         "2) the hierarchical relationship between labels (key: 'label_tree')"
)
# multi_or_label_mutex_parser.add_argument(
#     '--subtomogram-average',
#     help="the result of subtomogram averaging in CCP4 format (.mrc, .map, .rec)"
# )
convert_parser.add_argument(
    '--subtomogram-average',
    help="the result of subtomogram averaging or a particle mask for visualisation in CCP4 format (.mrc, .map, .rec)"
)
convert_parser.add_argument(
    '--image-name-field',
    default='_rlnImageName',
    help="the field in the star file that contains the image name [default: '_rlnImageName']"
)

[docs]
class UpperAction(argparse.Action):
    def __call__(self, parser, namespace, values, option_string=None):
        setattr(namespace, self.dest, _str(values).upper())

convert_parser.add_argument(
    '--euler-angle-convention',
    default='zyz',
    type=str,
    choices=['zyz', 'zxz', 'xyx', 'xzx', 'yxy', 'yzy'],
    action=UpperAction,
    help="the Euler angle convention used in the subtomogram averaging [default: 'zyz' - case insensitive]"
)
convert_parser.add_argument(
    '--radians',
    action='store_true',
    help="use radians instead of degrees for Euler angles [default: False i.e. use degrees]"
)

# =========================================================================
# config subparser
# =========================================================================
config_parser = subparsers.add_parser(
    'config',
    description="Configuration utility",
    help="manage sfftk configs"
)
config_subparsers = config_parser.add_subparsers(
    title='sfftk configurations',
    dest='config_subcommand',
    description='Persistent configurations utility',
    metavar='Commands:'
)

# =============================================================================
# config: get
# =============================================================================
get_config_parser = config_subparsers.add_parser(
    'get',
    description='Get the value of a single configuration parameter',
    help='get single sfftk config'
)
get_config_parser.add_argument(
    'name',
    nargs="?",
    default=None,
    help="the name of the argument to retrieve",
)
add_args(get_config_parser, config_path)
add_args(get_config_parser, shipped_configs)
get_config_parser.add_argument(
    '-a', '--all',
    action='store_true',
    default=False,
    help='get all configs'
)
add_args(get_config_parser, verbose)

# =============================================================================
# config: set
# =============================================================================
set_config_parser = config_subparsers.add_parser(
    'set',
    description='Set the value of a single configuration parameter',
    help='set single sfftk config'
)
set_config_parser.add_argument(
    'name', help="the name of the argument to set",
)
set_config_parser.add_argument(
    'value', help="the value of the argument to set",
)
add_args(set_config_parser, config_path)
add_args(set_config_parser, shipped_configs)
add_args(set_config_parser, verbose)
set_config_parser.add_argument(
    '-f', '--force',
    action='store_true',
    default=False,
    help='force overwriting of an existing config; do not ask to confirm [default: False]'
)

# =============================================================================
# config: del
# =============================================================================
del_config_parser = config_subparsers.add_parser(
    'del',
    description='Delete the named configuration parameter',
    help='delete single sfftk config'
)
del_config_parser.add_argument(
    'name',
    nargs='?',
    default=None,
    help="the name of the argument to be deleted"
)
add_args(del_config_parser, config_path)
add_args(del_config_parser, shipped_configs)
del_config_parser.add_argument(
    '-a', '--all',
    action='store_true',
    default=False,
    help='delete all configs (asks the user to confirm before deleting) [default: False]'
)
del_config_parser.add_argument(
    '-f', '--force',
    action='store_true',
    default=False,
    help='force deletion; do not ask to confirm deletion [default: False]'
)
add_args(del_config_parser, verbose)

# =========================================================================
# view subparser
# =========================================================================
# extend the sfftk-rw view parser
# handle configs
add_args(view_parser, config_path)
add_args(view_parser, shipped_configs)
view_parser.add_argument(
    '-C', '--show-chunks', action='store_true',
    help="show sequence of chunks in IMOD file; only works with IMOD model files (.mod) [default: False]"
)
view_parser.add_argument(
    '-X', '--transform',
    action='store_true',
    help="when specified, the file should be the segmented EMDB MAP/MRC file from "
         "which to determine the correct image-to-physical transform"
)
view_format_mutex = view_parser.add_mutually_exclusive_group()
view_format_mutex.add_argument(
    '--print-array', default=True, action='store_true',
    help="display the implied image-to-physical transform as the raw numpy array"
)
view_format_mutex.add_argument(
    '--print-csv', action='store_true',
    help="display the implied image-to-physical transform as a comma-separated values form"
)
view_format_mutex.add_argument(
    '--print-ssv', action='store_true',
    help="display the implied image-to-physical transform as a space-separated value form"
)

# =============================================================================
# notes parser
# =============================================================================
notes_parser = subparsers.add_parser(
    'notes',
    description="The EMDB-SFF Annotation Toolkit",
    help="annotate an EMDB-SFF file",
)

notes_subparsers = notes_parser.add_subparsers(
    title='Annotation tools',
    dest='notes_subcommand',
    description='The EMDB-SFF Annotation Toolkit provides the following tools:',
    metavar="EMDB-SFF annotation tools",
)

# =========================================================================
# notes: search
# =========================================================================
search_notes_parser = notes_subparsers.add_parser(
    'search',
    description="Search ontologies for annotation by text labels",
    help="search for terms by labels",
)
search_notes_parser.add_argument(
    'search_term',
    nargs='?',
    default='',
    help="the term to search; add quotes if spaces are included")
add_args(search_notes_parser, config_path)
add_args(search_notes_parser, shipped_configs)
resources_list = list(RESOURCE_LIST.keys())
search_notes_parser.add_argument(
    '-R', '--resource', default=resources_list[0], choices=resources_list,
    help=(
        'the resource to search for terms or accessions; other valid options are {resources} [default: {default}]'
    ).format(
        resources=resources_list,
        default=resources_list[0],
    )
)
search_notes_parser.add_argument(
    '--start', type=int, default=1, help="start index [default: 1]"
)
search_notes_parser.add_argument(
    '--rows', type=int, default=10, help="number of rows [default: 10]"
)
search_notes_parser.add_argument(
    '--as-text', action='store_true', help="output as CSV [default: False]"
)
search_notes_parser.add_argument(
    '--filter-rows', nargs='*',
    help="space-separated list of search result rows to display; "
         "only works when --as-text flag is selected [default: False]"
)
search_notes_parser.add_argument(
    '--no-header', action='store_true',
    help="do not show CSV header (useful when concatenating "
         "search results to an existing file which already has the header) [default: False]"
)
ols_parser = search_notes_parser.add_argument_group(
    title='EBI Ontology Lookup Service (OLS)',
    description='The Ontology Lookup Service (OLS) is a repository for biomedical ontologies that aims to provide a '
                'single point of access to the latest ontology versions. You can use the following options to modify '
                'your search against OLS by ensuring that the -R/--resource flag is set to \'ols\' (default).'
)
ols_parser.add_argument(
    '-O', '--ontology', default=None, help="the ontology to search [default: None]")
ols_parser.add_argument(
    '-x', '--exact', default=False, action='store_true', help="exact matches? [default: False]")
ols_parser.add_argument(
    '-o', '--obsoletes', default=False, action='store_true', help="include obsoletes? [default: False]")
ols_parser.add_argument(
    '-L', '--list-ontologies', default=False,
    action='store_true', help="list available ontologies [default: False]"
)
ols_parser.add_argument(
    '-l', '--short-list-ontologies', default=False,
    action='store_true', help="short list of available ontologies [default: False]"
)

# =========================================================================
# notes: list
# =========================================================================
list_notes_parser = notes_subparsers.add_parser(
    'list',
    description="List all available annotations present in an EMDB-SFF file",
    help="list available annotations",
)
add_args(list_notes_parser, sff_file)
add_args(list_notes_parser, header)
add_args(list_notes_parser, config_path)
add_args(list_notes_parser, shipped_configs)
long_format = {
    'args': ['-l', '--long-format'],
    'kwargs': {
        'default': False,
        'action': 'store_true',
        'help': "only show segment ID and description (if present) [default: False]"
    }
}
add_args(list_notes_parser, long_format)
list_notes_parser.add_argument('-D', '--sort-by-name', default=False,
                               action='store_true', help="sort listings by segment name [default: False (sorts by ID)]")
list_notes_parser.add_argument(
    '-r', '--reverse', default=False, action='store_true', help="reverse the sort order [default: False]")
list_notes_parser.add_argument('-I', '--list-ids', default=False, action='store_true',
                               help="only list the IDs for segments one per line [default: False]")
add_args(list_notes_parser, verbose)

# =========================================================================
# notes: show
# =========================================================================
show_notes_parser = notes_subparsers.add_parser(
    'show',
    description="Show a specific annotations by ID present in an EMDB-SFF file",
    help="show an annotation by ID",
)
add_args(show_notes_parser, sff_file)
add_args(show_notes_parser, config_path)
add_args(show_notes_parser, shipped_configs)
add_args(show_notes_parser, header)
add_args(show_notes_parser, long_format)
add_args(show_notes_parser, verbose)
show_segment_id = deepcopy(segment_id)
# todo: use nargs='+' instead of csv
show_segment_id['kwargs'][
    'help'] += "; pass more than one ID as a comma-separated list with no spaces e.g. 'id1,id2,...,idN'"
show_notes_parser.add_argument(
    *show_segment_id['args'], **show_segment_id['kwargs'])

# =========================================================================
# notes:add
# =========================================================================
add_notes_parser = notes_subparsers.add_parser(
    'add',
    description="Add a new annotation to an EMDB-SFF file",
    help="add new annotations",
)
# all notes refer to some sff file
add_args(add_notes_parser, sff_file)
add_args(add_notes_parser, config_path)
add_args(add_notes_parser, shipped_configs)
# external references apply to both
external_ref['kwargs']['action'] = 'append'
add_args(add_notes_parser, external_ref)
add_args(add_notes_parser, verbose)
del external_ref['kwargs']['action']
# global notes
add_global_notes_parser = add_notes_parser.add_argument_group(
    title="add global notes",
    description="add global attributes to an EMDB-SFF file"
)
add_args(add_global_notes_parser, name)
add_args(add_global_notes_parser, software_name)
add_args(add_global_notes_parser, software_version)
add_args(add_global_notes_parser, software_proc_details)
add_args(add_global_notes_parser, transform)
add_args(add_global_notes_parser, details)
# segment notes
add_segment_notes_parser = add_notes_parser.add_argument_group(
    title="add segment notes",
    description="add attributes to a single segment in an EMDB-SFF file"
)
add_args(add_segment_notes_parser, segment_id)
add_args(add_segment_notes_parser, segment_name)
add_args(add_segment_notes_parser, description)
add_args(add_segment_notes_parser, number_of_instances)

# =========================================================================
# notes: edit
# =========================================================================
edit_notes_parser = notes_subparsers.add_parser(
    'edit',
    description="Edit an existing annotation to an EMDB-SFF file",
    help="edit existing annotations",
)
add_args(edit_notes_parser, sff_file)
add_args(edit_notes_parser, config_path)
add_args(edit_notes_parser, shipped_configs)
add_args(edit_notes_parser, external_ref_id)
external_ref['kwargs']['action'] = 'append'
add_args(edit_notes_parser, external_ref)
add_args(edit_notes_parser, verbose)
del external_ref['kwargs']['action']
# global notes
edit_global_notes_parser = edit_notes_parser.add_argument_group(
    title="edit global notes",
    description="edit global attributes to an EMDB-SFF file"
)
add_args(edit_global_notes_parser, name)
add_args(edit_global_notes_parser, software_id)
add_args(edit_global_notes_parser, software_name)
add_args(edit_global_notes_parser, software_version)
add_args(edit_global_notes_parser, software_proc_details)
add_args(edit_global_notes_parser, transform_id)
add_args(edit_global_notes_parser, transform)
add_args(edit_global_notes_parser, details)
# segment notes
edit_segment_notes_parser = edit_notes_parser.add_argument_group(
    title="edit segment notes",
    description="edit attributes to a single segment in an EMDB-SFF file"
)
add_args(edit_segment_notes_parser, segment_id)
add_args(edit_segment_notes_parser, segment_name)
add_args(edit_segment_notes_parser, description)
add_args(edit_segment_notes_parser, number_of_instances)

# =========================================================================
# notes: del
# =========================================================================
# todo: sff notes del -e 1,3,4,5,6 file.json
del_notes_parser = notes_subparsers.add_parser(
    'del',
    description="Delete an existing annotation to an EMDB-SFF file",
    help="delete existing annotations",
)
add_args(del_notes_parser, sff_file)
add_args(del_notes_parser, config_path)
add_args(del_notes_parser, shipped_configs)
add_args(del_notes_parser, verbose)
# for deleting notes we handle external refs as a comma'd string e.g. 1,2,3,4 therefore not an 'int'
del external_ref_id['kwargs']['type']
add_args(del_notes_parser, external_ref_id)
# put it back
external_ref_id['kwargs']['type'] = int
# global notes
del_global_notes_parser = del_notes_parser.add_argument_group(
    title="delete global notes",
    description="delete global attributes to an EMDB-SFF file"
)
# name['kwargs'] = {
#     'action': 'store_true',
#     'default': False,
#     'help': 'delete the name [default: False]',
# }
# add_args(del_global_notes_parser, name)
# we need a way to identify which software entity in the list is to be acted up
# remove type so that we can store a list of comma-sep'd ints
del software_id['kwargs']['type']
_software_id_help = software_id['kwargs']['help']
software_id['kwargs']['help'] = 'the software(s) to delete; delete depends on whether -S, -T and -P are specified ' \
                                '(see below); if none are specified then the whole software is deleted from the list'
# add it to the parser
add_args(del_global_notes_parser, software_id)
# return things to the way you found them
software_id['kwargs']['type'] = int
software_id['kwargs']['help'] = _software_id_help
software_name['kwargs'] = {
    'action': 'store_true',
    'default': False,
    'help': 'delete the software name for the specified software id(s) [default: False]'
}
add_args(del_global_notes_parser, software_name)
software_version['kwargs'] = {
    'action': 'store_true',
    'default': False,
    'help': 'delete the software version for the specified software id(s) [default: False]'
}
add_args(del_global_notes_parser, software_version)
software_proc_details['kwargs'] = {
    'action': 'store_true',
    'default': False,
    'help': 'delete the software processing details for the specified software id(s) [default: False]'
}
add_args(del_global_notes_parser, software_proc_details)
details['kwargs'] = {
    'action': 'store_true',
    'default': False,
    'help': 'delete the details [default: False]'
}
add_args(del_global_notes_parser, details)
# segment notes
del_segment_notes_parser = del_notes_parser.add_argument_group(
    title="delete segment notes",
    description="delete attributes to a single segment in an EMDB-SFF file"
)
add_args(del_segment_notes_parser, segment_id)
segment_name['kwargs'] = {
    'action': 'store_true',
    'default': False,
    'help': 'delete the segment name [default: False]'
}
add_args(del_segment_notes_parser, segment_name)
description['kwargs'] = {
    'action': 'store_true',
    'default': False,
    'help': 'delete the description [default: False]',
}
add_args(del_segment_notes_parser, description)
del number_of_instances['kwargs']['type']
number_of_instances['kwargs'] = {
    'action': 'store_true',
    'default': False,
    'help': 'delete the number of instances [default: False]',
}
add_args(del_segment_notes_parser, number_of_instances)
# we need a way to identify which transform entity in the list is to be acted up
# remove type so that we can store a list of comma-sep'd ints
del transform_id['kwargs']['type']
_transform_id_help = transform_id['kwargs']['help']
transform_id['kwargs']['help'] = 'the transforms(s) to delete'
# add it to the parser
add_args(del_global_notes_parser, transform_id)
# return things to the way you found them
transform_id['kwargs']['type'] = int
transform_id['kwargs']['help'] = _transform_id_help

# =============================================================================
# notes: copy
# =============================================================================
copy_notes_parser = notes_subparsers.add_parser(
    'copy',
    description="Copy notes from one/multiple segment to one/multiple/all other segments within the same EMDB-SFF file",
    help="copy notes across segments within the same EMDB-SFF file"
)
add_args(copy_notes_parser, sff_file)
add_args(copy_notes_parser, config_path)
add_args(copy_notes_parser, shipped_configs)
# todo: merge with segment_id above
copy_notes_parser.add_argument(
    '-i', '--segment-id',
    help=(
        "segment ID or a comma-separated sequence of segment IDs of source segment(s); "
        "run 'sff notes list <file>' for a list of segment IDs"
    )
)
copy_global_notes_parse = copy_notes_parser.add_mutually_exclusive_group()
copy_global_notes_parse.add_argument(
    '--from-global',
    action='store_true',
    default=False,
    help="copy notes from global (metadata) to --to-segment segments"
)
copy_global_notes_parse.add_argument(
    '--to-global',
    action='store_true',
    default=False,
    help="copy notes from --segment-id segment to global (metadata)"
)
to_segment_or_all_copy_notes_parser = copy_notes_parser.add_mutually_exclusive_group()
to_segment_or_all_copy_notes_parser.add_argument(
    '-t', '--to-segment',
    help=(
        "segment ID or a comma-separated sequence of segment IDs of destination segment(s); "
        "run 'sff notes list <file>' for a list of segment IDs"
    ),
)
to_segment_or_all_copy_notes_parser.add_argument(
    '--to-all',
    action='store_true',
    default=False,
    help="copy notes from --segment-id segment to all (other) segments"
)

# =============================================================================
# notes: clear
# =============================================================================
clear_notes_parser = notes_subparsers.add_parser(
    'clear',
    description="Clear all notes for one or more segments in an EMDB-SFF file",
    help="clear notes in an EMDB-SFF file"
)
add_args(clear_notes_parser, config_path)
add_args(clear_notes_parser, shipped_configs)
add_args(clear_notes_parser, sff_file)
add_args(clear_notes_parser, verbose)
clear_notes_parser.add_argument(
    '--all',
    action='store_true',
    default=False,
    help="clear all notes; USE WITH CARE!"
)
clear_notes_parser.add_argument(
    '--from-global',
    action='store_true',
    default=False,
    help="clear notes from global (metadata)"
)
from_segment_or_all_clear_notes_parser = clear_notes_parser.add_mutually_exclusive_group()
from_segment_or_all_clear_notes_parser.add_argument(
    '-i', '--segment-id',
    help=(
        "segment ID or a comma-separated sequence of segment IDs of source segment(s); "
        "run 'sff notes list <file>' for a list of segment IDs"
    ),
)
from_segment_or_all_clear_notes_parser.add_argument(
    '--from-all-segments',
    action='store_true',
    default=False,
    help="clear notes from all segments"
)

# =============================================================================
# notes: merge
# =============================================================================
merge_notes_parser = notes_subparsers.add_parser(
    'merge',
    description="Merge notes from two EMDB-SFF files",
    help="merge notes from two EMDB-SFF files"
)
add_args(merge_notes_parser, config_path)
add_args(merge_notes_parser, shipped_configs)
merge_notes_parser.add_argument('--source', help="EMDB-SFF file from which to obtain notes", required=True)
merge_notes_parser.add_argument(
    'other',
    help="EMDB-SFF file whose content will be merged with notes from the file specified with --source"
)
output['kwargs'][
    'help'] = (
    "file to convert to; the extension (.sff, .hff, .json) determines the output format; "
    "if not specified then NOTES IN OTHER ONLY will be overwritten [default: None]"
)
merge_notes_parser.add_argument(*output['args'], **output['kwargs'])
merge_notes_parser.add_argument(*verbose['args'], **verbose['kwargs'])
merge_notes_parser.add_argument(
    '--include-colour',
    action='store_true',
    help="use the segment colours from the sources [default: False]"
)

# =========================================================================
# notes: save
# =========================================================================
save_notes_parser = notes_subparsers.add_parser(
    'save',
    description="Save all changes made to the actual file",
    help="write all changes made since the last 'save' action"
)
save_notes_parser.add_argument(*sff_file['args'], **sff_file['kwargs'])
add_args(save_notes_parser, config_path)
add_args(save_notes_parser, shipped_configs)

# =========================================================================
# notes: trash
# =========================================================================
trash_notes_parser = notes_subparsers.add_parser(
    'trash',
    description="Discard all notes by deleting the temporary file",
    help="discard all changes made since the last the edit action (add, edit, del)",
)
trash_notes_parser.add_argument(*sff_file['args'], **sff_file['kwargs'])
add_args(trash_notes_parser, config_path)
add_args(trash_notes_parser, shipped_configs)

# get the full list of tools from the Parser object
tool_list = list(tool_list) + ['all_sfftk', 'main_sfftk', 'formats', 'notes', 'readers']

# tests
# on inspection the second action is the tests action
tests_parser_tools = tests_parser._actions[1]
test_help = "one or none of the following: {}".format(", ".join(tool_list))
# update the help
tests_parser_tools.help = test_help
# add config paths
add_args(tests_parser, config_path)
add_args(tests_parser, shipped_configs)


def _get_file_extension(fn):
    """Extract the file extension

    :param str fn: filename
    :return str ext: extension
    """
    return fn.split('.')[-1]



[docs]
def check_multi_file_formats(file_names):
    """Check file names for file formats

    When working with multifile segmentations, this function checks that all files are consistent

    :param list file_names: a list of file names
    :return: a tuple consisting of whether or not the set of file formats if valid, the set of file formats observed
        and the set of invalid file formats
    :rtype: tuple[bool, set, set]
    """
    is_valid_format = True
    file_formats = set()
    invalid_formats = set()
    for fn in file_names:
        # ff = fn.split('.')[-1].lower()
        ff = _get_file_extension(fn)
        if ff in MULTI_FILE_FORMATS:
            file_formats.add(ff)
        else:
            invalid_formats.add(ff)
            is_valid_format = False
    if len(file_formats) == 1:
        file_format = file_formats.pop()
    else:
        file_format = None
        invalid_formats.union(file_formats)
    return is_valid_format, file_format, invalid_formats



def _set_subtype_index(args, ext):
    """Set the --subtype-index argument value

    :params arg: an argument namespace
    :type arg: :py:class:`argparse.Namespace`
    :param str ext: a file extension
    :return arg: the updated argument namespace
    :rtype arg: :py:class:`argparse.Namespace`
    """
    # if --subtype-index is set don't bother
    if args.subtype_index > -1:
        return args
    try:
        index_range = EXTENSION_SUBTYPE_INDICES[ext].keys()
    except KeyError:
        print_date("Invalid file extension: {ext}".format(ext=ext))
        return args
    print("The file extension {ext} has multiple formats associated with it.".format(
        ext=ext,
    ))
    print("(You can avoid this intercept by using the --subtype-index <value> option.)")
    for k, v in EXTENSION_SUBTYPE_INDICES[ext].items():
        print("\t[{k}] - {v}".format(k=k, v=v))
    try:
        index = int(_input("Please enter a valid choice [{min_index}-{max_index}]: ".format(
            min_index=min(index_range),
            max_index=max(index_range),
        )))
    except ValueError:
        print_date("Please enter a numeric value only.")
        return args
    try:
        assert index in index_range
    except AssertionError:
        print_date("Invalid index ({index})".format(index=index))
        return args
    args.subtype_index = index
    return args


def _masks_exist(args: argparse.Namespace) -> bool:
    """Test whether all mask files exist"""
    all_exist = True
    for mask in args.masks:
        if not os.path.exists(mask):
            all_exist = all_exist and False
            if args.verbose:
                print_date(f"missing mask: {mask}")
    return all_exist


def _mask_all_correct_files(args: argparse.Namespace) -> bool:
    """Test that all masks have the right file format"""
    all_masks = True
    for mask in args.masks:
        if not re.search(r"(map|mrc|rec)$", mask, re.IGNORECASE):
            all_masks = all_masks and False
            if args.verbose:
                print_date(f"invalid extension {mask}")
    return all_masks


def _masks_have_same_dimensions(args: argparse.Namespace) -> bool:
    """Test that a list of paths to masks have the same dimensions"""
    dimensions = list()
    from ..readers.mapreader import Map
    if hasattr(args, 'masks'):
        masks = args.masks
    elif hasattr(args, 'from_file'):
        masks = args.from_file
    else:
        raise ValueError('args missing attribute with masks')
    for filename in masks:
        this_map = Map(filename, header_only=True)
        dimension = (this_map._nc, this_map._nr, this_map._ns)
        if args.verbose:
            print_date(f"info: mask {filename} has dimension {dimension} ")
        dimensions.append(dimension)
    dimension_comparisons = list(map(lambda d: d == dimensions[0], dimensions))
    return all(dimension_comparisons)


# def _masks_have_same_mode(args: argparse.Namespace) -> bool:
#     """Test that a list of paths to masks have the same mode"""
#     modes = list()
#     from ..readers.mapreader import Map
#     for filename in args.masks:
#         this_map = Map(filename, header_only=True)
#         mode = this_map._mode
#         if args.verbose:
#             print_date(f"info: mask {filename} has mode {mode} ")
#         modes.append(mode)
#     modes_comparisons = list(map(lambda d: d == modes[0], modes))
#     return all(modes_comparisons)


def _masks_have_mode_zero(args: argparse.Namespace) -> bool:
    """Test that mode must be zero to proceed"""
    from ..readers.mapreader import Map
    for filename in args.masks:
        this_map = Map(filename, header_only=True)
        if this_map._mode != 0:
            if args.verbose:
                print_date(f"error: mask {filename} has mode {this_map._mode}; mode must be 0")
            return False
    return True


# parser function

[docs]
def parse_args(_args, use_shlex=False):
    """
    Parse and check command-line arguments and also return configs.

    This function does all the heavy lifting in ensuring that commandline
    arguments are properly formatted and checked for sanity. It also
    extracts configs from the config files.

    In this way command handlers (defined in :py:mod:`sfftk.sff` e.g. :py:meth:`sfftk.sff.handle_convert`) assume
    correct argument values and can concentrate on functionality making the code more readable.

    :param list _args: list of arguments (``use_shlex=False``); string of arguments (``use_shlex=True``)
    :type _args: list or str
    :param bool use_shlex: treat ``_args`` as a string instead for parsing using ``shlex`` lib
    :return: parsed arguments
    :rtype: tuple[:py:class:`argparse.Namespace`, :py:class:`sfftk.core.configs.Configs`]
    """
    if use_shlex:  # if we treat _args as a command string for shlex to process
        try:
            assert isinstance(_args, str)
        except AssertionError:
            return 64, None
        import shlex
        _args = shlex.split(_args)

    # if we have no subcommands then show the available tools
    if len(_args) == 0:
        Parser.print_help()
        return 0, None
    # if we only have a subcommand then show that subcommand's help
    elif len(_args) == 1:
        # print(_args[0])
        # print(Parser._actions[2].choices)
        # if _args[0] == 'tests':
        #     pass
        if _args[0] == '-V' or _args[0] == '--version':
            from .. import SFFTK_VERSION
            print_date("sfftk version: {}".format(SFFTK_VERSION))
            return 0, None
        # anytime a new argument is added to the base parser subparsers are bumped down in index
        elif _args[0] in _dict_iter_keys(Parser._actions[2].choices):
            exec('{}_parser.print_help()'.format(_args[0]))
            return 0, None
    # if we have 'notes' as the subcommand and a sub-subcommand show the
    # options for that sub-subcommand
    elif len(_args) == 2:
        if _args[0] == 'notes':
            if _args[1] in _dict_iter_keys(Parser._actions[2].choices['notes']._actions[1].choices):
                exec('{}_notes_parser.print_help()'.format(_args[1]))
                return 0, None
        elif _args[0] == 'prep':
            if _args[1] in _dict_iter_keys(Parser._actions[2].choices['prep']._actions[1].choices):
                exec('{}_prep_parser.print_help()'.format(_args[1]))
                return 0, None
        elif _args[0] == 'config':
            if _args[1] in _dict_iter_keys(Parser._actions[2].choices['config']._actions[1].choices):
                exec('{}_config_parser.print_help()'.format(_args[1]))
                return 0, None
    # parse arguments
    args = Parser.parse_args(_args)
    from .configs import get_config_file_path
    # get the file to use for configs given args
    config_file_path = get_config_file_path(args)
    if config_file_path is None:
        print_date("Invalid destination for configs. Omit --shipped-configs to write to user configs.")
        return 64, None
    from .configs import load_configs
    # now get configs to use
    configs = load_configs(config_file_path)

    # check values
    # config
    if args.subcommand == 'config':
        if args.verbose:
            print_date("Reading configs from {}...".format(config_file_path))
        # handle config-specific argument modifications here
        if args.config_subcommand == 'del':
            if args.name not in configs and not args.all:
                print_date("Missing config with name '{}'. Aborting...".format(args.name))
                return None, configs
            # if force pass
            if not args.force:
                default_choice = 'n'
                # get user choice
                user_choice = _input("Are you sure you want to delete config '{}' [y/N]? ".format(
                    args.name)).lower()
                if user_choice == '':
                    choice = default_choice
                elif user_choice == 'n' or user_choice == 'N':
                    choice = 'n'
                elif user_choice == 'y' or user_choice == 'Y':
                    choice = 'y'
                else:
                    print_date("Invalid choice: '{}'")
                    return 64, configs
                # act on user choice
                if choice == 'n':
                    print_date("You have opted to cancel deletion of '{}'".format(args.name))
                    return 64, configs
                elif choice == 'y':
                    pass
        elif args.config_subcommand == 'set':
            if args.name in configs:
                # if force pass
                if not args.force:
                    default_choice = 'n'
                    # get user choice
                    user_choice = _input("Are you sure you want to overwrite config '{}={}' [y/N]? ".format(
                        args.name, configs[args.name])).lower()
                    if user_choice == '':
                        choice = default_choice
                    elif user_choice == 'n' or user_choice == 'N':
                        choice = 'n'
                    elif user_choice == 'y' or user_choice == 'Y':
                        choice = 'y'
                    else:
                        print_date("Invalid choice: '{}'")
                        return 64, configs
                    # act on user choice
                    if choice == 'n':
                        print_date("You have opted to cancel overwriting of '{}'".format(args.name))
                        return None, configs
                    elif choice == 'y':
                        pass
    # prep
    elif args.subcommand == 'prep':
        # binmap
        if args.prep_subcommand == 'binmap':
            ext = args.from_file.split('.')[-1]
            if ext.lower() not in PREPABLE_FILE_FORMATS:
                print_date("File format {} not available for prepping".format(ext.lower()))
                return 64, configs
            if args.output is None:
                if args.infix != '':
                    args.output = '.'.join(args.from_file.split('.')[:-1]) + '_' + args.infix + '.' + ext
                else:
                    print_date("Cannot overwrite input file")
                    return 64, configs
                if args.verbose:
                    print_date("Output will be written to {}".format(args.output))
        # transform
        elif args.prep_subcommand == 'transform':
            ext = args.from_file.split('.')[-1]
            if ext.lower() not in RESCALABLE_FILE_FORMATS:
                print_date("File format {} not available for transforming".format(ext.lower()))
                return 64, configs
            if args.output is None:
                if args.infix != '':
                    args.output = '.'.join(args.from_file.split('.')[:-1]) + '_' + args.infix + '.' + ext
                else:
                    print_date("Cannot overwrite input file")
                    return 64, configs
                if args.verbose:
                    print_date("Output will be written to {}".format(args.output))
        # mergemask
        elif args.prep_subcommand == 'mergemask':
            if len(args.masks) < 2:
                print_date("error: mergemask requires two or more masks")
                return 64, configs
            if len(args.masks) > 255:
                print_date(f"error: mergemask can handle at most 255 masks ({len(args.masks)} provided)")
                return 64, configs
            if not _masks_exist(args):
                print_date("error: one or more masks missing; please verify that all paths are correct")
                return 65, configs
            if not _mask_all_correct_files(args):
                print_date("error: one or more invalid file formats; please retry")
                return 65, configs
            if not _masks_have_same_dimensions(args):
                print_date(
                    "error: inhomogeneous masks: dimension differs between masks (use --verbose to view details)")
                return 65, configs
            if not _masks_have_mode_zero(args):
                print_date("error: mode must be zero (0); please run `sff prep binmap` first on all masks")
                return 65, configs
        # starsplit
        elif args.prep_subcommand == 'starsplit':
            if args.output_prefix is None:
                args.output_prefix = os.path.splitext(args.star_file)[0] + "_"
            else:
                if args.output_prefix[-1] != '_':
                    args.output_prefix += '_'
        # starcrop
        elif args.prep_subcommand == 'starcrop':
            if args.output is None:
                args.output = f"{pathlib.Path(args.star_file).stem}_{args.infix}_{args.rows}.star"
            if args.rows <= 0:
                print_date("error: rows must be positive")
                return 65, configs

    # view
    elif args.subcommand == 'view':
        if args.show_chunks:
            if not re.match(r".*\.mod$", args.from_file, re.IGNORECASE):
                print_date("Invalid file type to view chunks. Only works with IMOD files")
                return 64, configs  # 64 = USAGE
        if args.transform:
            if not re.search(r".*\.(map|mrc|rec)$", args.from_file, re.IGNORECASE):
                print_date("Invalid file type to treat as image. Only works with .map/.mrc/.rec files")
                return 64, configs  # 64 = USAGE
    # convert
    elif args.subcommand == 'convert':
        # --image must be .map,.mrc,.rec
        if args.image:
            try:
                assert re.match(r".*\.(map|mrc|rec)$", args.image, re.IGNORECASE)
            except AssertionError:
                print_date("Invalid file type for --image. Please use .map, .mrc or .rec files only.")
                return 65, configs  # 65 = DATAERR
        else:
            print_date("Warning: missing --image <file.map> option to accurately determine image-to-physical transform",
                       stream=sys.stderr)
        # convert details to unicode
        if args.details is not None:
            args.details = _decode(args.details, 'utf-8')
        # single vs. multi-file
        # single vs. multiple file names provided
        if len(args.from_file) == 1:
            args.from_file = args.from_file[0]
            try:
                assert os.path.exists(args.from_file)
            except AssertionError:
                print_date("File {} was not found".format(args.from_file))
                return 64, configs
            # only bother handling extension disambiguation if the file exists
            ext = _get_file_extension(args.from_file)
            # check if this is an ambiguous extension
            if ext in EXTENSION_SUBTYPE_INDICES.keys():
                args = _set_subtype_index(args, ext)
            # now, let's check that this file is strictly binary
            if re.match(r".*\.(map|mrc|rec)$", args.from_file, re.IGNORECASE):
                if not check_mask_is_binary(args.from_file, verbose=args.verbose) and args.label_tree is None:
                    print_date(
                        "Error: non-binary mask; either use a binary mask or include the label "
                        "tree with --label-tree flag"
                    )
                    return 65, configs
        else:
            if args.multi_file:
                is_valid_format, file_format, invalid_formats = check_multi_file_formats(args.from_file)
                if is_valid_format:
                    file_missing = False
                    for fn in args.from_file:
                        try:
                            assert os.path.exists(fn)  # check that all files exist
                        except AssertionError:
                            print_date("File {} was not found".format(fn))
                            file_missing = True
                    if file_missing:
                        return 64, configs
                    # none of the files are missing and the extensions are OK
                    # now check for homogeneity of CCP4/MRC masks
                    if re.search(r"(map|mrc|rec)$", file_format, re.IGNORECASE):
                        if not _masks_have_same_dimensions(args):
                            return 65, configs
                else:
                    print_date("Invalid format(s) for multi-file segmentation: {}; should be only one of: {}".format(
                        ', '.join(invalid_formats),
                        ', '.join(MULTI_FILE_FORMATS),
                    ))
                    return 64, configs
                # now we check if this is an ambiguous extension
                if file_format in EXTENSION_SUBTYPE_INDICES.keys():
                    args = _set_subtype_index(args, file_format)
            else:
                print_date("Please use -m/--multi-file argument for multi-file segmentations")
                return 64, configs
        # set the output file
        if args.output is None:
            if args.multi_file:
                from_file = args.from_file[0]
            else:
                from_file = args.from_file
            dirname = os.path.dirname(from_file)
            if args.format:
                try:
                    assert args.format in list(map(lambda x: x[0], FORMAT_LIST))
                except AssertionError:
                    print_date("Invalid output format: {}; valid values are: {}".format(
                        args.format, ", ".join(map(lambda x: x[0], FORMAT_LIST))))
                    return 64, configs
                fn = ".".join(os.path.basename(from_file).split(
                    '.')[:-1]) + '.{}'.format(args.format)
                args.__setattr__('output', os.path.join(dirname, fn))
            # convert file.sff to file.hff
            elif re.match(r'.*\.sff$', from_file):
                fn = ".".join(
                    os.path.basename(from_file).split('.')[:-1]) + '.hff'
                args.__setattr__('output', os.path.join(dirname, fn))
            # convert file.hff to file.sff
            elif re.match(r'.*\.hff$', from_file):
                fn = ".".join(
                    os.path.basename(from_file).split('.')[:-1]) + '.sff'
                args.__setattr__('output', os.path.join(dirname, fn))
            else:
                fn = ".".join(
                    os.path.basename(from_file).split('.')[:-1]) + '.sff'
                args.__setattr__('output', os.path.join(dirname, fn))
            if args.verbose:
                print_date("Setting output file to {}".format(args.output))
        else:
            print_date("Writing output to {}".format(args.output))

        # ensure valid primary_descriptor
        if args.primary_descriptor:
            try:
                assert args.primary_descriptor in [
                    'three_d_volume', 'mesh_list', 'shape_primitive_list']
            except AssertionError:
                if args.verbose:
                    print_date(
                        "Invalid value for primary descriptor: {}".format(args.primary_descriptor))
                return 64, configs
            if args.verbose:
                print_date(
                    "Trying to set primary descriptor to {}".format(args.primary_descriptor))
        # using -a/--all-levels
        if args.all_levels:
            if args.verbose:
                print_date("Writing out all levels of segment hierarchy")

    # tests
    elif args.subcommand == 'tests':
        # check if we have a temp-annotated file and complain then die if one exists
        if os.path.exists(configs['__TEMP_FILE']):
            print_date("Unable to run tests with {} in current path ({})".format(configs['__TEMP_FILE'],
                                                                                 os.path.abspath(__file__)))
            print_date("Run 'sff notes save <file.sff>' or 'sff notes trash @' before proceeding.")
            return 64, configs
        # normalise tool list
        # if 'all' is specified together with others then it should simply be 'all'
        if 'all' in args.tool:
            args.tool = ['all']
        # same for 'all_sfftk' but only if 'all' not present
        elif 'all_sfftk' in args.tool:
            args.tool = ['all_sfftk']
        # if isinstance(args.tool, list):
        for tool in args.tool:
            try:
                assert tool in tool_list
            except AssertionError:
                print_date(
                    "Unknown tool: {}; Available tools for test: {}".format(tool, ", ".join(tool_list))
                )
                return 64, configs
        if args.verbosity:
            try:
                assert args.verbosity in range(4)
            except AssertionError:
                print_date(
                    "Verbosity should be in {}-{}: {} given".format(
                        VERBOSITY_RANGE[0],
                        VERBOSITY_RANGE[-1],
                        args.verbosity
                    )
                )
                return 64, configs
    # notes
    elif args.subcommand == 'notes':
        # convenience: the user can use '@' to refer to an EMDB-SFF file whch is the previous
        # file that was edited ('add', 'edit', 'del', 'copy', 'clear')
        temp_file = configs['__TEMP_FILE']
        temp_file_ref = configs['__TEMP_FILE_REF']
        if args.notes_subcommand in ['list', 'show', 'add', 'edit', 'del', 'save', 'trash', 'copy', 'clear']:
            # find, view
            if args.notes_subcommand in ['list', 'show', 'search']:
                if args.sff_file == temp_file_ref:
                    if os.path.exists(temp_file):
                        args.sff_file = temp_file
                        if args.verbose:
                            print_date(
                                "Working on temp file {}".format(temp_file), stream=sys.stdout)
                    else:
                        print_date("Temporary file {} does not exist. \
Try invoking an edit ('add', 'edit', 'del') action on a valid EMDB-SFF file.".format(temp_file), stream=sys.stdout)
                        return 64, configs
                else:
                    if args.verbose:
                        print_date(
                            "Reading directly from {}".format(args.sff_file), stream=sys.stdout)
            # modify
            elif args.notes_subcommand in ['save']:
                try:
                    assert os.path.exists(args.sff_file)
                except AssertionError:
                    print_date(
                        "Save-to file {} not found.".format(args.sff_file))
                    return 64, configs

        if args.notes_subcommand == "search":
            # ensure start is valid
            if args.start < 1:
                print_date("Invalid start value: {}; should be greater than 1".format(args.start))
                return 64, configs
            # ensure rows is valid
            if args.rows < 1:
                print_date("Invalid rows value: {}; should be greater than 1".format(args.rows))
                return 64, configs
            if args.resource != 'ols' and (
                    args.ontology is not None or args.exact or args.list_ontologies or
                    args.short_list_ontologies or args.obsoletes
            ):
                print_date("Invalid usage: -O, -x, -o, -L, -l can only be used with -R ols")
                return 64, None
            if args.as_text:
                if args.filter_rows:
                    # make sure all the values are digits
                    try:
                        assert all(map(lambda i: i.isdecimal(), args.filter_rows))
                    except AssertionError:
                        print_date("Invalid filter rows: {}; should be a comma-separated list of digits".format(
                            args.filter_rows))
                        return 64, configs
                    # now validate the filter rows against --start and --rows
                    # valid values will ensure that there is a non-empty intersection between the set of --filter-row values and the range {--start,..,(--start+--rows)-1}
                    filter_row_values = set(list(map(int, args.filter_rows)))
                    valid_index_values = set(range(args.start, args.start + args.rows))
                    if len(filter_row_values.intersection(valid_index_values)) == 0:
                        print_date(
                            f"Invalid filter rows: {args.filter_rows}; should be in range {args.start}-{args.start + args.rows - 1}")
                        return 64, configs
        elif args.notes_subcommand == "show":
            if args.segment_id is not None:
                args.segment_id = list(map(int, args.segment_id.split(',')))

        elif args.notes_subcommand == "add":
            # if we want to add to a segment
            if args.segment_id is not None:
                args.segment_id = list(map(int, args.segment_id.split(',')))

                # ensure we have at least one item to add
                try:
                    assert (args.segment_name is not None) or (args.description is not None) or \
                           (args.number_of_instances is not None) or \
                           (args.external_ref is not None)
                except AssertionError:
                    print_date(
                        "Nothing specified to add. Use one or more of the following options:\n\t"
                        "-n <segment_name> \n\t-D <description> \n\t-E <extrefType> <extrefOtherType> <extrefValue> "
                        "\n\t -I <int>"
                    )

                    return 64, configs

            # unicode conversion
            if args.name is not None:
                args.name = _decode(args.name, 'utf-8')
            if args.details is not None:
                args.details = _decode(args.details, 'utf-8')
            if args.software_name is not None:
                args.software_name = _decode(args.software_name, 'utf-8')
            if args.software_version is not None:
                args.software_version = _decode(args.software_version, 'utf-8')
            if args.software_processing_details is not None:
                args.software_processing_details = _decode(args.software_processing_details, 'utf-8')
            if args.external_ref is not None:
                external_ref = list()
                for t, o, v in args.external_ref:
                    external_ref.append([_decode(t, 'utf-8'), _decode(o, 'utf-8'), _decode(v, 'utf-8')])
                args.external_ref = external_ref
            if args.segment_name is not None:
                args.segment_name = _decode(args.segment_name, 'utf-8')
            if args.description is not None:
                args.description = _decode(args.description, 'utf-8')

        elif args.notes_subcommand == "edit":
            # external references can be added globally (header) or to a
            # segment
            if args.external_ref:
                try:
                    assert args.external_ref_id is not None
                except AssertionError:
                    print_date("Will not be able to edit an external reference without \
specifying an external reference ID. Run 'list' or 'show' to see available \
external reference IDs for segment {}".format(args.segment_id))
                    return 64, configs

                # consistency of format
                # todo: check this; doesn't seem right
                if len(args.external_ref) == 0 and isinstance(args.external_ref[0], _str):
                    args.external_ref = [args.external_ref]

            # software
            if args.software_name or args.software_version or args.software_processing_details:
                try:
                    assert args.software_id is not None
                except AssertionError:
                    print_date("Will not be able to edit a software instance without specifying an software ID. "
                               "Run 'show' to see the available software IDs.")
                    return 64, configs

            if args.segment_id is not None:
                args.segment_id = list(map(int, args.segment_id.split(',')))

            # transforms
            if args.transform:
                try:
                    assert args.transform_id is not None
                except AssertionError:
                    print_date("Will not be able to edit a transform without specifying a transform ID. "
                               "Run 'show' to see the available transform IDs.")
                    return 64, configs

            # unicode
            if args.name is not None:
                args.name = _decode(args.name, 'utf-8')
            if args.details is not None:
                args.details = _decode(args.details, 'utf-8')
            if args.software_name is not None:
                args.software_name = _decode(args.software_name, 'utf-8')
            if args.software_version is not None:
                args.software_version = _decode(args.software_version, 'utf-8')
            if args.software_processing_details is not None:
                args.software_processing_details = _decode(args.software_processing_details, 'utf-8')
            if args.external_ref is not None:
                external_ref = list()
                for t, o, v in args.external_ref:
                    external_ref.append([_decode(t, 'utf-8'), _decode(o, 'utf-8'), _decode(v, 'utf-8')])
                args.external_ref = external_ref
            if args.segment_name is not None:
                args.segment_name = _decode(args.segment_name, 'utf-8')
            if args.description is not None:
                args.description = _decode(args.description, 'utf-8')

        elif args.notes_subcommand == "del":
            if args.segment_id is not None:
                try:
                    assert args.segment_id is not None
                except AssertionError:
                    print_date(
                        "Please specify a segment ID", stream=sys.stdout)
                    return 64, configs

                args.segment_id = list(map(int, args.segment_id.split(',')))

                # ensure we have at least one item to del
                try:
                    assert args.segment_name or args.description or args.number_of_instances or \
                           (args.external_ref_id is not None)
                except AssertionError:
                    print_date("Incorrect usage; please use -h for help")
                    return 64, configs
            # convert from string to list of ints
            if args.external_ref_id is not None:
                ext_ref_ids = list(map(int, args.external_ref_id.split(',')))
                args.external_ref_id = ext_ref_ids
            # convert from string to list of ints for software
            if args.software_id is not None:
                software_ids = list(map(int, args.software_id.split(',')))
                args.software_id = software_ids

                # if missing -S, -T, and -P then set them since we have -s set
                if not args.software_name and not args.software_version and not args.software_processing_details:
                    args.software_name = True
                    args.software_version = True
                    args.software_processing_details = True
            # convert from string to list of ints for transforms
            if args.transform_id is not None:
                transform_ids = list(map(int, args.transform_id.split(',')))
                args.transform_id = transform_ids

        elif args.notes_subcommand == "copy":
            # convert from and to to lists of ints
            if args.segment_id is not None:
                from_segment = list(map(int, args.segment_id.split(',')))
                if isinstance(from_segment, int):
                    args.segment_id = [from_segment]
                else:
                    args.segment_id = from_segment
            if args.to_segment is not None:
                to_segment = list(map(int, args.to_segment.split(',')))
                if isinstance(to_segment, int):
                    args.to_segment = [to_segment]
                else:
                    args.to_segment = to_segment

            if args.segment_id is not None and args.to_segment is not None:
                from_set = set(args.segment_id)
                to_set = set(args.to_segment)
                common = from_set.intersection(to_set)
                if len(common) > 0:
                    print_date(
                        "the following segment IDs appear in both --segment-id and --to-segment: {}".format(
                            " ".join(map(str, common))
                        ))
                    return 64, configs

        elif args.notes_subcommand == "clear":
            # where to clear notes from
            if args.segment_id is not None:
                from_segment = list(map(int, args.segment_id.split(',')))
                if isinstance(from_segment, int):
                    args.segment_id = [from_segment]
                else:
                    args.segment_id = from_segment
            elif args.all:
                args.from_global = True
                args.from_all_segments = True

        elif args.notes_subcommand == "merge":
            if args.output is None:
                args.output = args.other

    return args, configs




[docs]
def cli(cmd: str) -> (argparse.Namespace, configparser.ConfigParser):
    """CLI function"""
    import shlex
    sys.argv = shlex.split(cmd)
    return parse_args(sys.argv)