"""
Conflict / similarity detection engine.

Scores an incoming trademark application against an admin-managed watched mark
and returns a 0-100 similarity with the kind of match. Combines:

  exact      identical normalized string                       -> 100
  variant    the application's stored transliteration variants  -> 95
             overlap a watched-mark spelling
  fuzzy      Levenshtein ratio between normalized strings        -> ratio*100
  phonetic   same phonetic key (Soundex-like, tuned for the     -> 80
             romanizations this system produces)

Class overlap doesn't change the string score but is recorded and used to rank
(a near-identical mark in the SAME class is a far stronger conflict than one in
an unrelated class). Callers typically keep alerts at or above a threshold.
"""
import re
import unicodedata


# ---- normalization ---------------------------------------------------------
def norm(s):
    if not s:
        return ""
    s = unicodedata.normalize("NFKC", s)
    s = s.lower()
    s = re.sub(r"[^a-z0-9\u0600-\u06FF]+", "", s)  # drop spaces/punct, keep letters
    return s


# ---- Levenshtein ratio (no external dependency) ----------------------------
def _levenshtein(a, b):
    if a == b:
        return 0
    if not a:
        return len(b)
    if not b:
        return len(a)
    prev = list(range(len(b) + 1))
    for i, ca in enumerate(a, 1):
        cur = [i]
        for j, cb in enumerate(b, 1):
            cur.append(min(prev[j] + 1, cur[j - 1] + 1, prev[j - 1] + (ca != cb)))
        prev = cur
    return prev[-1]


def fuzzy_ratio(a, b):
    a, b = norm(a), norm(b)
    if not a or not b:
        return 0.0
    d = _levenshtein(a, b)
    return 1.0 - d / max(len(a), len(b))


# ---- phonetic key (tuned for Persian->Latin romanizations) -----------------
# Group letters that sound alike in the spellings this system generates, so
# "Ghasem"/"Qasem", "Simorgh"/"Seemorgh", "Ejaz"/"Ajaz" collide.
_PHON = [
    (r"gh|q", "G"), (r"kh|x", "K"), (r"ch", "C"), (r"sh", "S"), (r"zh|j", "J"),
    (r"th|s|z|c", "S"), (r"ph|f|v|w", "F"), (r"k|g", "K"), (r"d|t", "T"),
    (r"b|p", "B"), (r"ee|i|y", "I"), (r"aa|a", "A"), (r"oo|u|o", "O"),
    (r"h", ""), (r"'", ""),
]


def phonetic_key(s):
    s = norm(s)
    if not s:
        return ""
    for pat, rep in _PHON:
        s = re.sub(pat, rep, s)
    s = re.sub(r"(.)\1+", r"\1", s)  # collapse repeats
    return s


# ---- scoring ----------------------------------------------------------------
def score_pair(watched_strings, app_strings):
    """
    watched_strings: list of spellings for the watched mark.
    app_strings:     list of spellings for the application (translit variants etc.).
    Returns (score 0..100, kind, matched_on) for the best match found.
    """
    best = (0.0, "none", "")
    wn = {norm(w): w for w in watched_strings if w}
    an = {norm(x): x for x in app_strings if x}

    # exact
    for nk, orig in wn.items():
        if nk and nk in an:
            return (100.0, "exact", f"{orig} = {an[nk]}")

    # phonetic
    wphon = {phonetic_key(w): w for w in watched_strings if w}
    aphon = {phonetic_key(x): x for x in app_strings if x}
    for pk, worig in wphon.items():
        if pk and pk in aphon:
            best = max(best, (80.0, "phonetic", f"{worig} ~ {aphon[pk]}"),
                       key=lambda t: t[0])

    # fuzzy (best pair)
    for w in watched_strings:
        for x in app_strings:
            r = fuzzy_ratio(w, x)
            if r * 100 > best[0]:
                best = (round(r * 100, 1), "fuzzy", f"{w} ≈ {x}")

    return best


def application_strings(application):
    """All spellings to compare for an application: stored transliteration
    variants, the EN translit, the mark translation, and the Farsi text."""
    out = []
    for t in application.transliterations:
        out.append(t.value)
    for v in (application.mark_translit, application.mark_translation,
              application.mark_text):
        if v:
            out.append(v)
    # de-dup preserving order
    seen, uniq = set(), []
    for s in out:
        k = norm(s)
        if k and k not in seen:
            seen.add(k); uniq.append(s)
    return uniq


def watched_strings(watched):
    out = [watched.label]
    if watched.farsi:
        out.append(watched.farsi)
    return [s for s in out if s]


def evaluate(watched, application):
    """
    Score one watched mark vs one application.
    Returns dict {score, kind, matched_on, class_overlap} or None if below floor.
    """
    score, kind, matched = score_pair(watched_strings(watched),
                                      application_strings(application))
    if score < 60:   # floor: ignore weak coincidental matches
        return None
    wc = watched.class_set()
    ac = application.class_set()
    class_overlap = bool(wc is None or (wc and ac and (wc & ac)))
    return {"score": score, "kind": kind, "matched_on": matched,
            "class_overlap": class_overlap}