import pdfplumber
import re
import logging
from datetime import datetime
from typing import List, Optional, Tuple, Any

from app.parsers.base_parser import BaseBankStatementParser
from app.models.transaction import Transaction

logging.basicConfig(
    level=logging.DEBUG,
    format="%(asctime)s %(levelname)s: %(message)s"
)

class MTBBankPdfParser(BaseBankStatementParser):
    """
    Парсер PDF-выписок МТБ Банка.
    """

    RE_DATE_LINE   = re.compile(r'(?:Дата\s+)(\d{2}\.\d{2}\.\d{4})')
    RE_ANY_DATE    = re.compile(r'\b\d{2}\.\d{2}\.\d{4}\b')
    RE_DOC         = re.compile(r'(?:Док\.\s*№\s*)([\S]+)')
    RE_INN         = re.compile(r'(?:Ід\.код|ЄДРПОУ|ІПН)\s*(\d+)')
    RE_DIGITS8_10  = re.compile(r'\b\d{8,10}\b')
    # ▸ Цифры после «Сума» ОБЯЗАТЕЛЬНЫ — нет цифр → блок пропустим
    RE_SUM_INLINE  = re.compile(r'Сума\s+([\d\s.,]+)', re.IGNORECASE)  # ← изменено

    def __init__(self):
        super().__init__()
        self.our_bank_name:   Optional[str] = None
        self.our_bank_code:   Optional[str] = None
        self.our_account:     Optional[str] = None
        self.our_company_name:Optional[str] = None
        self.our_company_inn: Optional[str] = None

    # ------------------------------------------------------------------ public

    def parse(self, file_path: str) -> List[Transaction]:
        with pdfplumber.open(file_path) as pdf:
            if not pdf.pages:
                return []

            self._extract_header_data(pdf.pages[0])

            full_text = [
                t for p in pdf.pages for t in (p.extract_text() or "").splitlines()
            ]
            all_lines = [ln.strip() for ln in full_text if ln.strip()]

        logging.debug(f"Всего строк после объединения: {len(all_lines)}")

        sections = self._extract_all_sections(all_lines)
        all_transactions: List[Transaction] = []

        for sec_lines, is_debit in sections:
            all_transactions.extend(
                self._parse_section_as_blocks(sec_lines, is_debit)
            )

        # удаляем дубли
        uniq: List[Transaction] = []
        seen  = set()
        for tx in all_transactions:
            k = (tx.number, tx.date, tx.amount)
            if k not in seen:
                uniq.append(tx)
                seen.add(k)
        return uniq

    # ------------------------------------------------------------------ helpers

    def _extract_header_data(self, page: Any) -> None:
        text = page.extract_text() or ""

        if m := re.search(r'Банк\s+ПАТ\s+"?МТБ\s+БАНК"?\s+Код\s+банку\s+(\d+)', text):
            self.our_bank_name = 'ПАТ "МТБ БАНК"'
            self.our_bank_code = m.group(1)

        if m := re.search(r'№\s*рахунка\s*(UA\s*\d[\w\s]+)', text):
            self.our_account = m.group(1).replace(" ", "")

        if m := re.search(r'№\s*рахунка\s*UA[^\n]+\s+\[*\]*\s*(ФОП\s+[^\n]+)', text):
            self.our_company_name = m.group(1).strip()

        if m := self.RE_INN.search(text):
            self.our_company_inn = m.group(1)

    def _extract_all_sections(self, lines: List[str]) -> List[Tuple[List[str], bool]]:
        idxs = [
            (i, s.startswith("реєстр документів за кредитом"))
            for i, s in enumerate(map(str.lower, lines))
            if s.startswith("реєстр документів за дебетом")
               or s.startswith("реєстр документів за кредитом")
        ]

        if not idxs:
            return [(lines, False)]

        sections = []
        for j, (start, is_credit) in enumerate(idxs):
            end = idxs[j + 1][0] if j + 1 < len(idxs) else len(lines)
            sections.append((lines[start + 1:end], is_credit))
        return sections

    # ------------------------------------------------------------ main parsing

    def _parse_section_as_blocks(
        self, lines: List[str], is_debit: bool
    ) -> List[Transaction]:
        res, block = [], []
        for line in lines + ["Проведений …"]:      # sentinel для завершения последнего
            low = line.lower()

            if self._line_has_sum(line):
                if block:
                    if tx := self._finalize_block(block, is_debit):
                        res.append(tx)
                    block = []
                block.append(line)

            elif low.startswith("проведений"):
                block.append(line)
                if tx := self._finalize_block(block, is_debit):
                    res.append(tx)
                block = []

            elif block:                            # внутри текущего блока
                block.append(line)

        return res

    def _finalize_block(
        self, block_lines: List[str], is_debit: bool
    ) -> Optional[Transaction]:
        text = "\n".join(block_lines)
        logging.debug(f"\n--- Finalizing block ---\n{text}\n--- End block ---")

        # 1) сумма
        m_sum = self.RE_SUM_INLINE.search(text)
        if not m_sum:                              # нет «Сума <цифры>» → пропуск
            logging.debug("Не нашли цифры после «Сума» — блок пропускаем")
            return None

        raw_sum = m_sum.group(1).replace(" ", "").replace(",", ".")
        try:
            amount = float(raw_sum)
        except ValueError:
            logging.debug("Сумма не преобразуется в float — блок пропускаем")
            return None

        # 2) номер документа
        doc_number = (self.RE_DOC.search(text) or [""])[1]

        # 3) дата
        if m := self.RE_DATE_LINE.search(text):
            doc_date = self._parse_date(m.group(1))
        elif m := self.RE_ANY_DATE.search(text):
            doc_date = self._parse_date(m.group(0))
        else:
            doc_date = datetime.now()
        doc_date = doc_date or datetime.now()

        # 4) корреспондент / назначение
        corr_part, purpose_part, purpose = [], [], False
        for ln in block_lines:
            low = ln.lower()
            if "розрах за" in low or "з реєстром" in low:
                purpose = True
            if low.startswith("кореспондент"):
                corr_part.append(ln)
            elif low.startswith("призн. платежу"):
                purpose = True
                purpose_part.append(ln)
            else:
                (purpose_part if purpose else corr_part).append(ln)

        corr_str   = " ".join(corr_part)
        purpose_str= " ".join(purpose_part)
        contr_inn  = self._find_inn(f"{corr_str} {purpose_str}")

        # 5) знак суммы
        amount = -abs(amount) if is_debit else abs(amount)

        # 6) собираем Transaction
        tx = Transaction(
            number           = doc_number,
            date             = doc_date.date(),
            amount           = amount,
            payer_inn        = (self.our_company_inn or "") if is_debit else contr_inn,
            payer_name       = (self.our_company_name or "OUR_COMPANY") if is_debit else corr_str,
            payer_account    = (self.our_account or "") if is_debit else "",
            recipient_inn    = contr_inn if is_debit else (self.our_company_inn or ""),
            recipient_name   = corr_str if is_debit else (self.our_company_name or "OUR_COMPANY"),
            recipient_account= "" if is_debit else (self.our_account or ""),
            payment_details  = purpose_str,
            date_income      = None if is_debit else doc_date.date(),
            date_outcome     = doc_date.date() if is_debit else None,
        )
        logging.debug(f"Got transaction: {tx}")
        return tx

    # ----------------------------------------------------------------- misc

    @staticmethod
    def _parse_date(s: str) -> Optional[datetime]:
        for fmt in ("%d.%m.%Y", "%d.%m.%Y %H:%M", "%d.%m.%Y %H.%M.%S"):
            try:
                return datetime.strptime(s, fmt)
            except ValueError:
                pass
        return None

    def _find_inn(self, text: str) -> str:
        if m := self.RE_INN.search(text):
            return m.group(1)
        if m := self.RE_DIGITS8_10.search(text):
            return m.group(0)
        return ""

    def _line_has_sum(self, line: str) -> bool:
        """
        «Сума» + ОБЯЗАТЕЛЬНО хотя бы одна цифра.
        """
        m = self.RE_SUM_INLINE.search(line)
        return bool(m and m.group(1))
