import logging
import re
from datetime import datetime
from typing import List, Optional

import pdfplumber

from app.parsers.base_parser import BaseBankStatementParser
from app.models.transaction import Transaction


# ------------------------------------------------------------------------------
# базовая настройка логов
# ------------------------------------------------------------------------------

logging.basicConfig(
    level=logging.DEBUG,
    format="%(asctime)s %(levelname)s: %(message)s",
)


# ------------------------------------------------------------------------------
# основной класс
# ------------------------------------------------------------------------------

class PrivatBankPdfParser(BaseBankStatementParser):
    """
    Парсер PDF-выписок ПриватБанка.

    Алгоритм
    --------
    1. Читаем все таблицы со всех страниц.
    2. Отбрасываем заголовки, итоговые и балансовые строки.
    3. Склеиваем «ломаные» записи (там, где первая ячейка пуста).
    4. Разбираем каждую строку в `Transaction`.
       Если в назначении найдена комиссия — создаём вторую,
       отрицательную транзакцию.
    5. Удаляем дубли (номер + дата + сумма).
    """

    RE_COMMISSION = re.compile(
        r'(?i)(?:ком[\s\.]*бан(?:ку)?|комiсiя)'
        r'(?:\s+|\s*\.\s*|\s*\:\s*)([\d\.,]+)\s*грн'
    )

    # ------------------------------------------------------------------ init

    def __init__(self) -> None:
        self.our_company_name:   Optional[str] = None
        self.our_company_inn:    Optional[str] = None
        self.our_company_account:Optional[str] = None
        self.our_bank_name:      Optional[str] = None
        self.our_bank_edrpou:    Optional[str] = None
        self.our_bank_branch:    Optional[str] = None

    # ------------------------------------------------------------------ public

    # ------------------------------------------------------------------ public
    def parse(self, file_path: str) -> List[Transaction]:
        logging.debug(f"Начало парсинга файла: {file_path}")
        transactions: List[Transaction] = []

        with pdfplumber.open(file_path) as pdf:
            if pdf.pages:
                self._extract_our_company_data(pdf.pages[0])

            # --- собираем
            all_rows = [
                [(c or "") for c in row]
                for page in pdf.pages
                for tbl in (page.extract_tables() or [])
                for row in tbl
                if any((c or "").strip() for c in row)
            ]

            rows = self._merge_continuation_rows(self._filter_rows(all_rows))

            tmp: List[Transaction] = []
            for row in rows:
                row += [""] * (7 - len(row))

                doc_number = (row[0] or "").strip()
                date_str = (row[1] or "").strip()
                amount_str = (row[2] or "").replace(",", ".").replace(" ", "")
                details = self._to_single_line(row[3] or "")

                # --- сумма
                amount = float(amount_str) if re.fullmatch(r"-?[\d\.]+", amount_str) else None
                # пропускаем служебные строки без номера и с нулевой/невалидной суммой
                if amount is None or (amount == 0 and not doc_number):
                    continue

                # --- контрагент
                contr_full = " ".join(
                    " ".join((cell or "").splitlines()) for cell in row[4:]
                )
                contr_full = re.sub(r"\s+", " ", contr_full).strip()

                contr_inn = self._find_inn(contr_full) or self.our_bank_edrpou or "1"
                contr_acc = self._find_account(contr_full)
                contr_name = self._clean_name(contr_full, contr_inn, contr_acc)

                tmp.append(
                    self._build_transaction(
                        number=doc_number,
                        op_date=self._parse_date(date_str),
                        amount=amount,
                        payment_details=details,
                        contragent_name=contr_name,
                        contragent_inn=contr_inn,
                        contragent_account=contr_acc,
                    )
                )

                if amount > 0:
                    for idx, fee in enumerate(self._extract_commissions(details), 1):
                        tmp.append(
                            self._build_commission_transaction(
                                f"{doc_number}_COM{idx}",
                                self._parse_date(date_str),
                                fee,
                                details,
                            )
                        )

        # --- убираем дубли
        seen = set()
        for tx in tmp:
            key = (tx.number, tx.date, tx.amount)
            if key not in seen:
                seen.add(key)
                transactions.append(tx)

        logging.debug(f"Парсинг завершён, уникальных транзакций: {len(transactions)}")
        return transactions

    # ---------------------------------------------------------------- фильтры

    def _filter_rows(self, rows: List[List[str]]) -> List[List[str]]:
        filtered: List[List[str]] = []
        for row in rows:
            row = row + [""] * (7 - len(row))

            if self._is_summary_line(row)  \
               or self._looks_like_header(row) \
               or self._is_balance_line(row):
                continue

            joined = " ".join(c.strip().lower() for c in row if c)
            if ("найменування" in joined and "рахунок" in joined and "єдрпо" in joined):
                continue

            filtered.append(row)
        return filtered

    def _is_summary_line(self, row: List[str]) -> bool:
        return (row[0] or "").strip().startswith("За ")

    def _looks_like_header(self, row: List[str]) -> bool:
        """
        Определяем, является ли строка повторным заголовком таблицы.
        Учитываем как «большой» заголовок («Номер документа …»),
        так и подзаголовок блока «Реквізити контрагента»
        («Найменування / РНОКПП / Рахунок / Банк»).
        """
        txt = " ".join(c.strip().lower() for c in row if c)

        # основной заголовок
        if re.search(
                r"(номер\s*документ|дата\s*та\s*час|призначення\s*платежу|реквізити\s*контрагента)",
                txt,
        ):
            return True

        # подзаголовок контрагента
        return "найменування" in txt and "рнокпп" in txt and "рахунок" in txt

    def _is_balance_line(self, row: List[str]) -> bool:
        txt = " ".join(c.strip().lower() for c in row if c)
        return "вхідний залишок" in txt or "вихідний залишок" in txt

    # -------------------------------------------------------- merge continuation

    @staticmethod
    def _merge_continuation_rows(rows: List[List[str]]) -> List[List[str]]:
        merged, current = [], None
        for row in rows:
            row += [""] * (7 - len(row))
            if (row[0] or "").strip():
                if current:
                    merged.append(current)
                current = row
            else:
                # продолжаем предыдущую
                if not current:
                    current = row
                    continue
                current[3] = (current[3] or "") + "\n" + (row[3] or "")
                current[4] = (current[4] or "") + "\n" + (row[4] or "")
                current[5] = (current[5] or "") + "\n" + (row[5] or "")
                current[6] = (current[6] or "") + "\n" + (row[6] or "")
        if current:
            merged.append(current)
        return merged

    # ------------------------------------------------------------- commissions

    def _extract_commissions(self, details: str) -> List[float]:
        vals = []
        for m in self.RE_COMMISSION.finditer(details):
            try:
                vals.append(float(m.group(1).replace(",", ".")))
            except ValueError:
                pass
        return vals

    # --------------------------------------------------------------- header data

    def _extract_our_company_data(self, page) -> None:
        for line in (page.extract_text() or "").split("\n"):
            line = line.strip()

            if m := re.search(r'(АТ\s+КБ\s+"[^"]+"),?\s*ЄДРПОУ\s+(\d+)', line):
                self.our_bank_name, self.our_bank_edrpou = m.groups()

            if m := re.search(r"Клієнт\s+(.+?),\s+ЄДРПОУ\s+(\d+)", line):
                self.our_company_name, self.our_company_inn = m.groups()

            if m := re.search(r"Поточний рахунок\s+№(\w+)", line):
                self.our_company_account = m.group(1)

    # ------------------------------------------------------------- misc helpers

    @staticmethod
    def _to_single_line(text: str) -> str:
        return " ".join(text.split()) if text else ""

    @staticmethod
    def _parse_date(date_str: str) -> datetime:
        date_str = date_str.replace("\n", " ").strip()
        for fmt in ("%d.%m.%Y %H:%M", "%d.%m.%Y", "%d/%m/%Y %H:%M"):
            try:
                return datetime.strptime(date_str, fmt)
            except ValueError:
                continue
        logging.debug(f"Не удалось распарсить дату: {date_str}")
        return datetime.now()

    @staticmethod
    def _find_inn(text: str) -> str:
        if m := re.search(r"ЄДРПОУ:\s*(\d+)", text):
            return m.group(1)
        if m := re.search(r"\b\d{8,10}\b", text):
            return m.group(0)
        return ""

    @staticmethod
    def _find_account(text: str) -> str:
        if m := re.search(r"Рахунок:\s*(UA[\w\d]+)", text):
            return m.group(1).replace(" ", "")
        if m := re.search(r"\b(UA\d{2,})\b", text):
            return m.group(1)
        return ""

    @staticmethod
    def _clean_name(text: str, inn: str, acc: str) -> str:
        cleaned = re.sub(r"ЄДРПОУ:\s*\d+", "", text)
        if inn:
            cleaned = cleaned.replace(inn, "")
        cleaned = re.sub(r"Рахунок:\s*UA[\w\d]+", "", cleaned)
        if acc:
            cleaned = cleaned.replace(acc, "")
        return re.sub(r"\s+", " ", cleaned).strip()

    # ------------------------------------------------------- build transactions

    def _build_commission_transaction(
        self,
        doc_number: str,
        op_date: datetime,
        comm_value: float,
        original_details: str,
    ) -> Transaction:
        our_name  = self.our_company_name or "OUR_COMPANY"
        our_inn   = self.our_company_inn or ""
        our_acc   = self.our_company_account or ""
        bank_name = self.our_bank_name or 'ПРИВАТБАНК'
        bank_inn  = self.our_bank_edrpou or "14360570"

        return Transaction(
            number           = doc_number,
            date             = op_date.date() if op_date else None,
            amount           = -abs(comm_value),
            payer_inn        = our_inn,
            payer_name       = our_name,
            payer_account    = our_acc,
            recipient_inn    = bank_inn,
            recipient_name   = bank_name,
            recipient_account= "",
            payment_details  = f"Комісія банку (з операції: {original_details})",
            date_income      = None,
            date_outcome     = op_date.date() if op_date else None,
        )

    def _build_transaction(
        self,
        number: str,
        op_date: datetime,
        amount: float,
        payment_details: str,
        contragent_name: str,
        contragent_inn: str,
        contragent_account: str,
    ) -> Transaction:
        our_name, our_inn, our_acc = (
            self.our_company_name or "OUR_COMPANY",
            self.our_company_inn or "",
            self.our_company_account or "",
        )

        if amount >= 0:  # приход
            payer_name, payer_inn, payer_acc = contragent_name, contragent_inn, contragent_account
            recip_name, recip_inn, recip_acc = our_name, our_inn, our_acc
            date_inc, date_out = op_date.date(), None
        else:            # расход
            payer_name, payer_inn, payer_acc = our_name, our_inn, our_acc
            recip_name, recip_inn, recip_acc = contragent_name, contragent_inn, contragent_account
            date_inc, date_out = None, op_date.date()

        return Transaction(
            number           = number,
            date             = op_date.date() if op_date else None,
            amount           = amount,
            payer_inn        = payer_inn,
            payer_name       = payer_name,
            payer_account    = payer_acc,
            recipient_inn    = recip_inn,
            recipient_name   = recip_name,
            recipient_account= recip_acc,
            payment_details  = payment_details.strip(),
            date_income      = date_inc,
            date_outcome     = date_out,
        )