# -*- coding: utf-8 -*-
"""
PDF-парсер АБ «Укргазбанк» / форма «Обороти по рахунку»,  v2.5
Исправлены: ИНН, поступления, назначение одним полем.
"""

import logging, re
from datetime import datetime
from typing import List, Optional
import pdfplumber

from app.models.transaction import Transaction
from app.parsers.base_parser import BaseBankStatementParser

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
sh = logging.StreamHandler()
sh.setFormatter(logging.Formatter("%(asctime)s %(levelname)s: %(message)s"))
logger.addHandler(sh)

_RE_DATE  = re.compile(r"^\d{2}\.\d{2}\.\d{4}(?:\s+\d{2}:\d{2}:\d{2})?$")
_RE_IBAN  = re.compile(r"^UA\d{25}$")
_RE_INN   = re.compile(r"\b\d{8,10}\b")
_RE_NUM   = re.compile(r"\d[\d\u00A0\u202F ]*,\d{2}")        # 98 000,00
_SPACES   = re.compile(r"[ \u00A0\u202F]")                   # пробел, NBSP, NNBSP

def _clean(num: str) -> str:
    return _SPACES.sub("", num).replace(",", ".")

def _to_float(num: str) -> float:
    return float(_clean(num))


class UkrgasbankPdfParser(BaseBankStatementParser):

    # ------------------------------------------------------------------ public
    def parse(self, file_path: str) -> List[Transaction]:
        logger.debug("Старт парсинга %s", file_path)
        with pdfplumber.open(file_path) as pdf:
            self._init_header(pdf.pages[0].extract_text() or "")

        rows   = self._normalize_rows(self._extract_rows(file_path))
        blocks = self._group_blocks(rows)
        logger.debug("Найдено блоков: %d", len(blocks))

        txs, seen = [], set()
        for blk in blocks:
            t = self._parse_block(blk)
            if not t:                                       # пропускаем мусор
                continue
            # фильтр дубликатов
            key = (t.number, t.date, abs(t.amount))
            if key not in seen:
                seen.add(key)
                txs.append(t)

        logger.debug("Уникальных транзакций: %d", len(txs))
        return txs

    # ---------------------------------------------------------------- helpers
    def _init_header(self, txt: str):
        g = lambda r: (re.search(r, txt) or [None, ""])[1].strip()
        self.our_company_name    = g(r"Назва\s+рахунку:\s*(.+)")
        self.our_company_inn     = g(r"Код\s+ЄДРПОУ:\s*(\d{8,10})")
        self.our_company_account = g(r"(UA\d{25})")
        logger.debug("Шапка: %s / %s / %s",
                     self.our_company_name,
                     self.our_company_inn,
                     self.our_company_account)

    # -------- pdfplumber -----------------------------------------------------
    def _extract_rows(self, path: str):
        out = []
        with pdfplumber.open(path) as pdf:
            for p, page in enumerate(pdf.pages, 1):
                tbl = page.extract_table({
                    "vertical_strategy":   "lines",
                    "horizontal_strategy": "lines",
                    "intersection_tolerance": 2,
                    "snap_tolerance": 3
                }) or page.extract_table({
                    "vertical_strategy":   "text",
                    "horizontal_strategy": "text",
                    "keep_blank_chars":    True
                })
                if not tbl:
                    continue
                for row in tbl:
                    out.append([c.strip() if c else "" for c in row])
                logger.debug("Страница %d: +%d строк", p, len(tbl))
        return out

    def _normalize_rows(self, raw):
        ok = []
        for r in raw:
            if not any(r):                                          continue
            if any("Стор." in c for c in r):                       continue
            if any(c.startswith("Номер документ") for c in r):     continue
            if r[0].startswith(("Підсумок", "Вихідний")):          continue
            ok.append(r)
        return ok

    # -------------- блоки ----------------------------------------------------
    def _group_blocks(self, rows):
        blocks, cur = [], []
        for r in rows:
            # новый блок – появился номер документа (не дата, не IBAN)
            if r[0] and not _RE_DATE.match(r[0]) and not _RE_IBAN.match(r[0]):
                if cur and self._has_date(cur):
                    blocks.append(cur)
                cur = [r]
            else:
                cur.append(r)
        if cur and self._has_date(cur):
            blocks.append(cur)
        return blocks

    @staticmethod
    def _has_date(block):
        return any(_RE_DATE.match(r[0]) for r in block)

    # -------------- разбор блока --------------------------------------------
    def _parse_block(self, blk) -> Optional[Transaction]:
        num = blk[0][0]                                # номер документа

        # ---- дата ----------------------------------------------------------
        date_cell = next(c for r in blk for c in r if _RE_DATE.match(c))
        dt = (datetime.strptime(date_cell, "%d.%m.%Y %H:%M:%S")
              if " " in date_cell
              else datetime.strptime(date_cell, "%d.%m.%Y"))

        # ---- сумма + дебет/кредит -----------------------------------------
        amt, is_debit, amt_row = self._find_amount_in_table(blk)
        if amt is None:                                # fallback по «сырому» тексту
            joined = " ".join(" ".join(r) for r in blk)
            m = _RE_NUM.search(joined)
            if not m:
                logger.warning("Нет суммы в блоке «%s»", num)
                return None
            amt  = _to_float(m.group())
            is_debit = False

        # ---- контрагент, счёт, ИНН -----------------------------------------
        payer_name = amt_row[1].strip()
        # ищем IBAN / ИНН по всему блоку (встречаются позже строкой)
        acc = next((c for r in blk for c in r if _RE_IBAN.fullmatch(c)), "")
        inn = next((c for r in blk for c in r if _RE_INN.fullmatch(_clean(c))), "")

        # ---- назначение платежа -------------------------------------------
        purpose = " ".join(r[5] for r in blk if len(r) > 5 and r[5])
        purpose = re.sub(r"\s+", " ", purpose).strip()

        # ---- уточняем тип операции ----------------------------------------
        debit_words = ("плата", "абон", "подат", "еквайринг", "ком.", "списано")
        if payer_name == self.our_company_name or any(w in purpose.lower() for w in debit_words):
            is_debit = True

        # ---- формируем Transaction ----------------------------------------
        if is_debit:                                   # исходящий платеж
            payer   = (self.our_company_name, self.our_company_inn, self.our_company_account)
            recip   = (payer_name, inn, acc)
            amt     = -amt
            dout, din = dt, None
        else:                                          # поступление
            payer   = (payer_name, inn, acc)
            recip   = (self.our_company_name, self.our_company_inn, self.our_company_account)
            dout, din = None, dt

        logger.debug("%s %s %.2f → %s (INN %s)",
                     "DEBIT" if is_debit else "CREDIT",
                     num, amt, payer_name, inn)

        return Transaction(
            number=num,
            date=dt,
            amount=amt,
            payment_details=purpose,
            payer_name=payer[0],    payer_inn=payer[1],    payer_account=payer[2],
            recipient_name=recip[0],recipient_inn=recip[1],recipient_account=recip[2],
            date_outcome=dout,
            date_income=din,
        )

    # ---------- таблица → сумма ---------------------------------------------
    def _find_amount_in_table(self, blk):
        """
        Возвращает (сумма, is_debit, row_with_amount)
        """
        for row in blk:
            nums = [(idx, _to_float(_clean(c)))
                    for idx, c in enumerate(row) if _RE_NUM.fullmatch(c)]
            if not nums:
                continue

            # Обычно только одно число в строке, но на всякий случай берём первое
            idx, val = nums[0]
            return val, False, row          # is_debit выясняем позже
        return None, None, None
