from bs4 import BeautifulSoup
import os

# 🗂️ Caminho da pasta
BASE_PATH = r"C:\Users\Erick Rodrigues\Documents\PDF\your_instagram_activity\messages\inbox\isadoramaria_1654806459245582"

# 📄 Lista dos arquivos
FILES = [
    "message_1.html",
    "message_2.html",
    "message_3.html",
    "message_4.html"
]

for file_name in FILES:
    file_path = os.path.join(BASE_PATH, file_name)
    output_path = os.path.join(BASE_PATH, f"mensagens_extraidas_{file_name.replace('.html','.txt')}")

    with open(file_path, 'r', encoding='utf-8') as f:
        soup = BeautifulSoup(f, 'html.parser')

    # Extrai todos os textos visíveis
    raw_texts = soup.find_all(string=True)
    visible_texts = [t.strip() for t in raw_texts if t.strip()]

    messages = []
    i = 0
    while i < len(visible_texts) - 2:
        nome = visible_texts[i]
        texto = visible_texts[i + 1]
        datahora = visible_texts[i + 2]

        # Heurística: data tem formato "Jun 27, 2025 11:10 pm"
        if ',' in datahora and ':' in datahora:
            messages.append(f"[{datahora}] {nome}: {texto}")
            i += 3
        else:
            i += 1

    with open(output_path, 'w', encoding='utf-8') as f:
        for msg in messages:
            f.write(msg + '\n')

    print(f"Arquivo {output_path} criado com {len(messages)} mensagens.")
