"""Pattern-matching utility functions for Sphinx."""

import os.path
import re
from typing import Callable, Dict, Iterable, Iterator, List, Match, Optional, Pattern

from sphinx.util.osutil import canon_path, path_stabilize


def _translate_pattern(pat: str) -> str:
    """Translate a shell-style glob pattern to a regular expression.

    Adapted from the fnmatch module, but enhanced so that single stars don't
    match slashes.
    """
    i, n = 0, len(pat)
    res = ''
    while i < n:
        c = pat[i]
        i += 1
        if c == '*':
            if i < n and pat[i] == '*':
                # double star matches slashes too
                i += 1
                res = res + '.*'
            else:
                # single star doesn't match slashes
                res = res + '[^/]*'
        elif c == '?':
            # question mark doesn't match slashes too
            res = res + '[^/]'
        elif c == '[':
            j = i
            if j < n and pat[j] == '!':
                j += 1
            if j < n and pat[j] == ']':
                j += 1
            while j < n and pat[j] != ']':
                j += 1
            if j >= n:
                res = res + '\\['
            else:
                stuff = pat[i:j].replace('\\', '\\\\')
                i = j + 1
                if stuff[0] == '!':
                    # negative pattern mustn't match slashes too
                    stuff = '^/' + stuff[1:]
                elif stuff[0] == '^':
                    stuff = '\\' + stuff
                res = '%s[%s]' % (res, stuff)
        else:
            res += re.escape(c)
    return res + '$'


def compile_matchers(patterns: Iterable[str]) -> List[Callable[[str], Optional[Match[str]]]]:
    return [re.compile(_translate_pattern(pat)).match for pat in patterns]


class Matcher:
    """A pattern matcher for Multiple shell-style glob patterns.

    Note: this modifies the patterns to work with copy_asset().
          For example, "**/index.rst" matches with "index.rst"
    """

    def __init__(self, exclude_patterns: Iterable[str]) -> None:
        expanded = [pat[3:] for pat in exclude_patterns if pat.startswith('**/')]
        self.patterns = compile_matchers(list(exclude_patterns) + expanded)

    def __call__(self, string: str) -> bool:
        return self.match(string)

    def match(self, string: str) -> bool:
        string = canon_path(string)
        return any(pat(string) for pat in self.patterns)


DOTFILES = Matcher(['**/.*'])


_pat_cache: Dict[str, Pattern] = {}


def patmatch(name: str, pat: str) -> Optional[Match[str]]:
    """Return if name matches the regular expression (pattern)
    ``pat```. Adapted from fnmatch module."""
    if pat not in _pat_cache:
        _pat_cache[pat] = re.compile(_translate_pattern(pat))
    return _pat_cache[pat].match(name)


def patfilter(names: Iterable[str], pat: str) -> List[str]:
    """Return the subset of the list ``names`` that match
    the regular expression (pattern) ``pat``.

    Adapted from fnmatch module.
    """
    if pat not in _pat_cache:
        _pat_cache[pat] = re.compile(_translate_pattern(pat))
    match = _pat_cache[pat].match
    return list(filter(match, names))


def get_matching_files(
    dirname: str,
    include_patterns: Iterable[str] = ("**",),
    exclude_patterns: Iterable[str] = (),
) -> Iterator[str]:
    """Get all file names in a directory, recursively.

    Filter file names by the glob-style include_patterns and exclude_patterns.
    The default values include all files ("**") and exclude nothing ("").

    Only files matching some pattern in *include_patterns* are included, and
    exclusions from *exclude_patterns* take priority over inclusions.

    """
    # dirname is a normalized absolute path.
    dirname = os.path.normpath(os.path.abspath(dirname))

    exclude_matchers = compile_matchers(exclude_patterns)
    include_matchers = compile_matchers(include_patterns)

    for root, dirs, files in os.walk(dirname, followlinks=True):
        relative_root = os.path.relpath(root, dirname)
        if relative_root == ".":
            relative_root = ""  # suppress dirname for files on the target dir

        # Filter files
        included_files = []
        for entry in sorted(files):
            entry = path_stabilize(os.path.join(relative_root, entry))
            keep = False
            for matcher in include_matchers:
                if matcher(entry):
                    keep = True
                    break  # break the inner loop

            for matcher in exclude_matchers:
                if matcher(entry):
                    keep = False
                    break  # break the inner loop

            if keep:
                included_files.append(entry)

        # Filter directories
        filtered_dirs = []
        for dir_name in sorted(dirs):
            normalised = path_stabilize(os.path.join(relative_root, dir_name))
            for matcher in exclude_matchers:
                if matcher(normalised):
                    break  # break the inner loop
            else:
                # if the loop didn't break
                filtered_dirs.append(dir_name)

        dirs[:] = filtered_dirs

        # Yield filtered files
        yield from included_files
