import logging
import re


from documents.models import MatchingModel, Correspondent, DocumentType, Tag


logger = logging.getLogger("paperless.matching")


def log_reason(matching_model, document, reason):
    class_name = type(matching_model).__name__
    logger.debug(
        f"{class_name} {matching_model.name} matched on document "
        f"{document} because {reason}")


def match_correspondents(document, classifier):
    if classifier:
        pred_id = classifier.predict_correspondent(document.content)
    else:
        pred_id = None

    correspondents = Correspondent.objects.all()

    return list(filter(
        lambda o: matches(o, document) or o.pk == pred_id,
        correspondents))


def match_document_types(document, classifier):
    if classifier:
        pred_id = classifier.predict_document_type(document.content)
    else:
        pred_id = None

    document_types = DocumentType.objects.all()

    return list(filter(
        lambda o: matches(o, document) or o.pk == pred_id,
        document_types))


def match_tags(document, classifier):
    if classifier:
        predicted_tag_ids = classifier.predict_tags(document.content)
    else:
        predicted_tag_ids = []

    tags = Tag.objects.all()

    return list(filter(
        lambda o: matches(o, document) or o.pk in predicted_tag_ids,
        tags))


def matches(matching_model, document):
    search_kwargs = {}

    document_content = document.content.lower()

    # Check that match is not empty
    if matching_model.match.strip() == "":
        return False

    if matching_model.is_insensitive:
        search_kwargs = {"flags": re.IGNORECASE}

    if matching_model.matching_algorithm == MatchingModel.MATCH_ALL:
        for word in _split_match(matching_model):
            search_result = re.search(
                rf"\b{word}\b", document_content, **search_kwargs)
            if not search_result:
                return False
        log_reason(
            matching_model, document,
            f"it contains all of these words: {matching_model.match}"
        )
        return True

    elif matching_model.matching_algorithm == MatchingModel.MATCH_ANY:
        for word in _split_match(matching_model):
            if re.search(rf"\b{word}\b", document_content, **search_kwargs):
                log_reason(
                    matching_model, document,
                    f"it contains this word: {word}"
                )
                return True
        return False

    elif matching_model.matching_algorithm == MatchingModel.MATCH_LITERAL:
        result = bool(re.search(
            rf"\b{re.escape(matching_model.match)}\b",
            document_content,
            **search_kwargs
        ))
        if result:
            log_reason(
                matching_model, document,
                f"it contains this string: \"{matching_model.match}\""
            )
        return result

    elif matching_model.matching_algorithm == MatchingModel.MATCH_REGEX:
        try:
            match = re.search(
                re.compile(matching_model.match, **search_kwargs),
                document_content
            )
        except re.error:
            logger.error(
                f"Error while processing regular expression "
                f"{matching_model.match}"
            )
            return False
        if match:
            log_reason(
                matching_model, document,
                f"the string {match.group()} matches the regular expression "
                f"{matching_model.match}"
            )
        return bool(match)

    elif matching_model.matching_algorithm == MatchingModel.MATCH_FUZZY:
        from fuzzywuzzy import fuzz

        match = re.sub(r'[^\w\s]', '', matching_model.match)
        text = re.sub(r'[^\w\s]', '', document_content)
        if matching_model.is_insensitive:
            match = match.lower()
            text = text.lower()
        if fuzz.partial_ratio(match, text) >= 90:
            # TODO: make this better
            log_reason(
                matching_model, document,
                f"parts of the document content somehow match the string "
                f"{matching_model.match}"
            )
            return True
        else:
            return False

    elif matching_model.matching_algorithm == MatchingModel.MATCH_AUTO:
        # this is done elsewhere.
        return False

    else:
        raise NotImplementedError("Unsupported matching algorithm")


def _split_match(matching_model):
    """
    Splits the match to individual keywords, getting rid of unnecessary
    spaces and grouping quoted words together.

    Example:
      '  some random  words "with   quotes  " and   spaces'
        ==>
      ["some", "random", "words", "with+quotes", "and", "spaces"]
    """
    findterms = re.compile(r'"([^"]+)"|(\S+)').findall
    normspace = re.compile(r"\s+").sub
    return [
        # normspace(" ", (t[0] or t[1]).strip()).replace(" ", r"\s+")
        re.escape(
            normspace(" ", (t[0] or t[1]).strip())
        ).replace(r"\ ", r"\s+")
        for t in findterms(matching_model.match)
    ]