scripts_zubabot/agregar_planos.py

from __future__ import annotations

import os
import sys
import json
import re
import unicodedata
import datetime as dt
from decimal import Decimal
from urllib.request import Request, urlopen

# --------------------
# Boot libs (Rocketbot portable)
# --------------------
base_dir = os.path.dirname(sys.executable)  # ...\Rocketbot
libs_dir = os.path.join(base_dir, "py_libs", "py310")
sys.path.insert(0, libs_dir)

# Rocketbot a veces ya trae pyparsing viejo en memoria
for k in list(sys.modules.keys()):
    if k == "pyparsing" or k.startswith("pyparsing."):
        del sys.modules[k]

from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google.oauth2 import service_account

# --------------------
# Rocketbot vars helpers
# --------------------
def _missing(v) -> bool:
    if v is None:
        return True
    if isinstance(v, str):
        s = v.strip()
        return s == "" or s == "ERROR_NOT_VAR"
    return False

def _gv(name, default=None):
    try:
        v = GetVar(name)
    except Exception:
        return default
    return default if _missing(v) else v

def _gvs(name, default="") -> str:
    v = _gv(name, default)
    if v is None:
        return default
    s = str(v).strip()
    return default if (s == "" or s == "ERROR_NOT_VAR") else s

def _sv(name, value):
    try:
        SetVar(name, value)
    except Exception:
        pass

# --------------------
# Google Auth
# --------------------
SCOPES = {scopes_api_google}

def _load_json(path):
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

def get_services(credentials_json_path, impersonated_user):
    info = _load_json(credentials_json_path)
    if not isinstance(info, dict) or info.get("type") != "service_account":
        raise RuntimeError("gdoc_sa_json debe apuntar a un JSON de cuenta de servicio.")

    impersonated_user = (impersonated_user or "").strip()
    if not impersonated_user:
        raise RuntimeError("Falta la variable gdoc_impersonated_user.")

    creds = service_account.Credentials.from_service_account_file(
        credentials_json_path,
        scopes=SCOPES,
        subject=impersonated_user,
    )
    docs = build("docs", "v1", credentials=creds, cache_discovery=False)
    drive = build("drive", "v3", credentials=creds, cache_discovery=False)
    return docs, drive, "service_account_impersonated"

# --------------------
# Drive helpers
# --------------------
def extract_doc_id_from_url(url):
    m = re.search(r"/document/d/([a-zA-Z0-9_-]+)", url or "")
    return m.group(1) if m else ""

def ensure_docs_api_compatible(drive_service, file_id: str):
    """
    Si el ID es shortcut o NO es Google Docs nativo (ej: docx),
    crea una copia convertida a application/vnd.google-apps.document y devuelve el nuevo ID.
    """
    meta = drive_service.files().get(
        fileId=file_id,
        fields="id,name,mimeType,shortcutDetails",
        supportsAllDrives=True
    ).execute()

    # Shortcut -> target
    if meta.get("mimeType") == "application/vnd.google-apps.shortcut":
        target = (meta.get("shortcutDetails") or {}).get("targetId", "")
        if not target:
            raise RuntimeError("Es shortcut pero no trae targetId.")
        file_id = target
        meta = drive_service.files().get(
            fileId=file_id,
            fields="id,name,mimeType",
            supportsAllDrives=True
        ).execute()

    # Convert if not native Google Doc
    if meta.get("mimeType") != "application/vnd.google-apps.document":
        new_name = (meta.get("name") or "Documento") + " (Google Docs)"
        converted = drive_service.files().copy(
            fileId=file_id,
            body={"name": new_name, "mimeType": "application/vnd.google-apps.document"},
            fields="id,name,mimeType",
            supportsAllDrives=True
        ).execute()
        return converted["id"], converted, True

    return file_id, meta, False

# --------------------
# Docs GET (tabs)
# --------------------
def docs_get(docs_service, doc_id: str):
    # includeTabsContent=true para docs con Tabs
    try:
        return docs_service.documents().get(documentId=doc_id, includeTabsContent=True).execute()
    except TypeError:
        return docs_service.documents().get(documentId=doc_id).execute()

def _walk_tabs(tabs):
    if not tabs:
        return
    for t in tabs:
        yield t
        for x in _walk_tabs(t.get("childTabs") or []):
            yield x

def _get_tab_body_content(doc, tab_id):
    tabs = doc.get("tabs")
    if not tabs:
        return (doc.get("body") or {}).get("content", [])
    for t in _walk_tabs(tabs):
        tid = ((t.get("tabProperties") or {}).get("tabId")) or ""
        if tid == (tab_id or ""):
            dtab = t.get("documentTab") or {}
            return (dtab.get("body") or {}).get("content", [])
    return []

# --------------------
# Marker search (BODY only, robust)
# --------------------
def _is_alnum(ch: str) -> bool:
    return ch.isalnum()

def _norm_alnum(s: str) -> str:
    if s is None:
        return ""
    s = unicodedata.normalize("NFKD", str(s)).lower()
    out = []
    for ch in s:
        if unicodedata.category(ch) == "Mn":
            continue
        if ch.isalnum():
            out.append(ch)
    return "".join(out)

def _iter_text_chars_from_body(doc, tab_id):
    """
    Yields (docIndex:int, ch:str) en orden, solo del BODY.
    """
    content = _get_tab_body_content(doc, tab_id)

    def walk(content_list):
        for el in content_list or []:
            if not isinstance(el, dict):
                continue

            p = el.get("paragraph")
            if p:
                for pe in p.get("elements", []):
                    tr = pe.get("textRun")
                    if not tr:
                        continue
                    txt = tr.get("content", "")
                    st = pe.get("startIndex")
                    if st is None or txt is None:
                        continue
                    for i, ch in enumerate(txt):
                        yield (st + i, ch)
                continue

            t = el.get("table")
            if t:
                for row in t.get("tableRows", []):
                    for cell in row.get("tableCells", []):
                        yield from walk(cell.get("content", []))
                continue

            toc = el.get("tableOfContents")
            if toc:
                yield from walk(toc.get("content", []))
                continue

    yield from walk(content)

def find_marker_in_body(doc, marker: str):
    """
    Busca marker en el BODY (no headers/footers), soporta tabs.
    Matching: alfanumérico normalizado (fuzzy).
    Retorna dict: {tabId, start, end, mode}
    """
    target = _norm_alnum(marker)
    if not target:
        return None

    tabs = doc.get("tabs")
    tab_ids = [None]
    if tabs:
        tab_ids = []
        for t in _walk_tabs(tabs):
            tid = ((t.get("tabProperties") or {}).get("tabId")) or ""
            tab_ids.append(tid)

    for tab_id in tab_ids:
        norm_chars = []
        norm_to_docidx = []  # norm position -> real doc index of that char

        for doc_i, ch in _iter_text_chars_from_body(doc, tab_id):
            if _is_alnum(ch):
                norm_chars.append(ch.lower())
                norm_to_docidx.append(doc_i)

        norm_text = "".join(norm_chars)
        pos = norm_text.find(target)
        if pos != -1:
            start_doc = norm_to_docidx[pos]
            end_doc = norm_to_docidx[pos + len(target) - 1] + 1
            return {"tabId": tab_id, "start": int(start_doc), "end": int(end_doc), "mode": "fuzzy_alnum"}

    return None

# --------------------
# Docs batch helpers (tab-aware)
# --------------------
def _loc(index, tab_id):
    d = {"index": int(index)}
    if tab_id:
        d["tabId"] = tab_id
    return d

def _range(start, end, tab_id):
    d = {"startIndex": int(start), "endIndex": int(end)}
    if tab_id:
        d["tabId"] = tab_id
    return d

def _dim_pt(x: float):
    return {"magnitude": float(x), "unit": "PT"}

def _batch_update(docs_service, doc_id, reqs, chunk=900):
    for i in range(0, len(reqs), chunk):
        docs_service.documents().batchUpdate(
            documentId=doc_id,
            body={"requests": reqs[i:i + chunk]}
        ).execute()

# --------------------
# Page box (max size)
# --------------------
def _get_pt(dimension_obj, default_val):
    try:
        if not dimension_obj:
            return float(default_val)
        mag = dimension_obj.get("magnitude", default_val)
        return float(mag)
    except Exception:
        return float(default_val)

def get_page_limits_pt(doc):
    """
    Devuelve (max_w_pt, max_h_pt) basado en pageSize - margins.
    Fallback a Letter 8.5x11 con 1in margen si no existe.
    """
    ds = doc.get("documentStyle") or {}
    ps = ds.get("pageSize") or {}
    w_pt = _get_pt(ps.get("width"), 612.0)   # 8.5in * 72
    h_pt = _get_pt(ps.get("height"), 792.0)  # 11in * 72

    ml = _get_pt(ds.get("marginLeft"), 72.0)
    mr = _get_pt(ds.get("marginRight"), 72.0)
    mt = _get_pt(ds.get("marginTop"), 72.0)
    mb = _get_pt(ds.get("marginBottom"), 72.0)

    max_w = max(100.0, w_pt - ml - mr - 12.0)
    max_h = max(100.0, h_pt - mt - mb - 12.0)
    return max_w, max_h

# --------------------
# Image URL + dimensions
# --------------------
def extract_drive_file_id(url: str) -> str:
    if not url:
        return ""
    # /file/d/<id>/... or /d/<id>/...
    m = re.search(r"/d/([a-zA-Z0-9_-]{10,})", url)
    if m:
        return m.group(1)
    # ?id=<id>
    m = re.search(r"[?&]id=([a-zA-Z0-9_-]{10,})", url)
    if m:
        return m.group(1)
    return ""

def sniff_png_wh(data: bytes):
    # PNG: width/height en IHDR (bytes 16..24)
    if len(data) >= 24 and data[:8] == b"\x89PNG\r\n\x1a\n":
        w = int.from_bytes(data[16:20], "big")
        h = int.from_bytes(data[20:24], "big")
        return w, h
    return None

def sniff_gif_wh(data: bytes):
    if len(data) >= 10 and (data[:6] in (b"GIF87a", b"GIF89a")):
        w = int.from_bytes(data[6:8], "little")
        h = int.from_bytes(data[8:10], "little")
        return w, h
    return None

def sniff_jpeg_wh(data: bytes):
    # JPEG: buscar SOF0/SOF2
    if len(data) < 4 or data[0:2] != b"\xFF\xD8":
        return None
    i = 2
    n = len(data)
    while i + 9 < n:
        if data[i] != 0xFF:
            i += 1
            continue
        # saltar FFs
        while i < n and data[i] == 0xFF:
            i += 1
        if i >= n:
            break
        marker = data[i]
        i += 1
        # markers sin length
        if marker in (0xD8, 0xD9):
            continue
        if i + 1 >= n:
            break
        seglen = int.from_bytes(data[i:i+2], "big")
        if seglen < 2 or i + seglen > n:
            break
        # SOF0..SOF3, SOF5..SOF7, SOF9..SOF11, SOF13..SOF15
        if marker in (0xC0, 0xC1, 0xC2, 0xC3, 0xC5, 0xC6, 0xC7, 0xC9, 0xCA, 0xCB, 0xCD, 0xCE, 0xCF):
            # data: [len2][precision1][height2][width2]...
            if i + 7 < n:
                h = int.from_bytes(data[i+3:i+5], "big")
                w = int.from_bytes(data[i+5:i+7], "big")
                return w, h
        i += seglen
    return None

def sniff_image_wh_from_url(url: str, timeout=20):
    """
    Intenta leer solo cabecera (Range) para detectar dims.
    """
    headers = {
        "User-Agent": "Mozilla/5.0",
        "Range": "bytes=0-65535",
    }
    req = Request(url, headers=headers)
    with urlopen(req, timeout=timeout) as resp:
        data = resp.read(65536)

    for fn in (sniff_png_wh, sniff_gif_wh, sniff_jpeg_wh):
        wh = fn(data)
        if wh:
            return wh[0], wh[1]
    return None, None

def resolve_image(drive_service, url: str):
    """
    Retorna: (uri_para_docs, w_px, h_px, source)
    Si es link de Drive y NO se puede acceder/leer metadata, retorna uri="" (no encontrado).
    """
    if not url:
        return ("", None, None, "empty")

    fid = extract_drive_file_id(url)
    if fid:
        try:
            meta = drive_service.files().get(
                fileId=fid,
                fields="id,mimeType,imageMediaMetadata(width,height)",
                supportsAllDrives=True
            ).execute()

            im = meta.get("imageMediaMetadata") or {}
            w = im.get("width")
            h = im.get("height")

            uri = f"https://drive.google.com/uc?export=download&id={fid}"
            return (uri, w, h, "drive")

        except Exception as e:
            return ("", None, None, f"drive_error:{type(e).__name__}")

    # URL externa
    w = h = None
    try:
        w, h = sniff_image_wh_from_url(url)
    except Exception:
        w, h = None, None

    return (url, w, h, "url")

def fit_object_size_pt(w_px, h_px, max_w_pt, max_h_pt):
    """
    Convierte px->pt usando 0.75 (96dpi->72pt) y escala para no exceder max_w/max_h.
    """
    if not w_px or not h_px:
        # fallback seguro
        w_pt = float(max_w_pt)
        h_pt = float(min(max_h_pt, max_w_pt * 0.75))
        return w_pt, h_pt

    w_pt0 = float(w_px) * 0.75
    h_pt0 = float(h_px) * 0.75
    if w_pt0 <= 0 or h_pt0 <= 0:
        w_pt = float(max_w_pt)
        h_pt = float(min(max_h_pt, max_w_pt * 0.75))
        return w_pt, h_pt

    scale = min(max_w_pt / w_pt0, max_h_pt / h_pt0, 1.0)
    return w_pt0 * scale, h_pt0 * scale

# --------------------
# Insert planos at marker
# --------------------
def insert_planos_at_marker(docs_service, drive_service, doc_id: str, marker: str, url1: str, url2: str):
    # Flags Rocketbot: arrancar siempre en False
    _sv("plano1_found", "False")
    _sv("plano2_found", "False")

    doc = docs_get(docs_service, doc_id)

    hit = find_marker_in_body(doc, marker)
    if not hit:
        _sv("gdoc_planos_marker_found", "0")
        _sv("gdoc_planos_marker", marker)
        raise RuntimeError("No encontre el marcador en el BODY del documento: " + str(marker))

    tab_id = hit.get("tabId")
    start = int(hit["start"])
    end = int(hit["end"])

    _sv("gdoc_planos_marker_found", "1")
    _sv("gdoc_planos_marker_mode", hit.get("mode", ""))
    _sv("gdoc_planos_marker_tabId", tab_id or "")
    _sv("gdoc_planos_marker_start", str(start))
    _sv("gdoc_planos_marker_end", str(end))

    max_w_pt, max_h_pt = get_page_limits_pt(doc)
    _sv("gdoc_planos_max_w_pt", str(max_w_pt))
    _sv("gdoc_planos_max_h_pt", str(max_h_pt))

    # Resolver URLs + dims (y setear flags)
    uri1, w1, h1, src1 = resolve_image(drive_service, url1)  # plano producto
    if uri1:
        _sv("plano1_found", "True")

    uri2, w2, h2, src2 = resolve_image(drive_service, url2)  # plano piso
    if uri2:
        _sv("plano2_found", "True")

    # Validación: si falta alguno, cortar (y Rocketbot ya ve cuáles quedaron True/False)
    if not uri1 or not uri2:
        if not uri1 and not uri2:
            raise RuntimeError("No se encontró ningún plano (plano 1 y plano 2). Revisá url_plano_producto y url_plano_piso.")
        if not uri1:
            raise RuntimeError("No se encontró el plano 1 (url_plano_producto). Verificá la URL / permisos en Drive.")
        raise RuntimeError("No se encontró el plano 2 (url_plano_piso). Verificá la URL / permisos en Drive.")

    _sv("gdoc_planos_uri1", uri1)
    _sv("gdoc_planos_uri2", uri2)
    _sv("gdoc_planos_src1", src1)
    _sv("gdoc_planos_src2", src2)
    _sv("gdoc_planos_w1_px", "" if w1 is None else str(w1))
    _sv("gdoc_planos_h1_px", "" if h1 is None else str(h1))
    _sv("gdoc_planos_w2_px", "" if w2 is None else str(w2))
    _sv("gdoc_planos_h2_px", "" if h2 is None else str(h2))

    w1_pt, h1_pt = fit_object_size_pt(w1, h1, max_w_pt, max_h_pt)
    w2_pt, h2_pt = fit_object_size_pt(w2, h2, max_w_pt, max_h_pt)

    # Construcción robusta SIN depender de index+1:
    # Insertamos SIEMPRE en el mismo start para apilar:
    # [img1][\n][img2][\n]
    reqs = [
        {"deleteContentRange": {"range": _range(start, end, tab_id)}},

        # 1) newline final
        {"insertText": {"location": _loc(start, tab_id), "text": "\n"}},

        # 2) img2 (queda antes del newline final)
        {"insertInlineImage": {
            "location": _loc(start, tab_id),
            "uri": uri2,
            "objectSize": {"width": _dim_pt(w2_pt), "height": _dim_pt(h2_pt)},
        }},

        # 3) newline entre imágenes (queda antes de img2)
        {"insertText": {"location": _loc(start, tab_id), "text": "\n"}},

        # 4) img1 (queda antes de newline entre imágenes)
        {"insertInlineImage": {
            "location": _loc(start, tab_id),
            "uri": uri1,
            "objectSize": {"width": _dim_pt(w1_pt), "height": _dim_pt(h1_pt)},
        }},
    ]

    _batch_update(docs_service, doc_id, reqs)

# Rocketbot quirk FIX
globals().update(locals())

# --------------------
# MAIN
# --------------------
try:
    # Flags globales: arrancar siempre en False (por si fallamos antes de la función)
    _sv("plano1_found", "False")
    _sv("plano2_found", "False")

    marker = _gvs("gdoc_marker_planos", "INSERTAR_PLANOS")

    url1 = _gvs("url_plano_producto", "")
    url2 = _gvs("url_plano_piso", "")

    cred_path = _gvs("gdoc_sa_json", "")
    if cred_path == "":
        raise RuntimeError("Falta gdoc_sa_json (ruta al JSON de cuenta de servicio)")
    if not os.path.isabs(cred_path):
        cred_path = os.path.join(base_dir, cred_path)
    if not os.path.exists(cred_path):
        raise RuntimeError("No existe JSON de cuenta de servicio: " + cred_path)

    impersonated_user = _gvs("gdoc_impersonated_user", "")

    _sv("gdoc_credentials_used", cred_path)
    _sv("gdoc_impersonated_user_used", impersonated_user)

    docs_service, drive_service, auth_mode = get_services(cred_path, impersonated_user)
    _sv("gdoc_auth_mode", auth_mode)

    # doc_id desde current_url, que lo setea duplicar_plantilla_contrato.py
    url_doc = _gvs("current_url", "")
    _sv("gdoc_url_used", url_doc)

    doc_id = extract_doc_id_from_url(url_doc)
    if doc_id == "":
        raise RuntimeError("No pude extraer documentId de la URL: " + url_doc)

    doc_id_final, meta_file, converted = ensure_docs_api_compatible(drive_service, doc_id)
    _sv("gdoc_original_id", doc_id)
    _sv("gdoc_id", doc_id_final)
    _sv("gdoc_file_name", meta_file.get("name", ""))
    _sv("gdoc_file_mimeType", meta_file.get("mimeType", ""))
    _sv("gdoc_converted", "1" if converted else "0")
    _sv("gdoc_url_final", f"https://docs.google.com/document/d/{doc_id_final}/edit")

    insert_planos_at_marker(docs_service, drive_service, doc_id_final, marker, url1, url2)

    _sv("gdoc_planos_status", "OK")
    _sv("gdoc_planos_error", "")

except HttpError as e:
    _sv("gdoc_planos_status", "ERROR")
    _sv("gdoc_planos_error", "HttpError: " + str(e))
    raise
except Exception as e:
    _sv("gdoc_planos_status", "ERROR")
    _sv("gdoc_planos_error", str(e))
    raise