ubuntu_python_www/myworld_project/myworld_app/tests.py

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup

options = Options()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

url = "https://news.jtbc.co.kr/article/NB12243324"
driver.get(url)

soup = BeautifulSoup(driver.page_source, "html.parser")

# ✅ 제목 출력
title = soup.title.get_text(strip=True)
print("✅ 제목:", title)

# ✅ 본문 추출 (HTML 포함)
content_div = soup.find("div", id="ijam_content")
if content_div:
    # 불필요한 광고 div 제거
    for unwanted in content_div.select("#reo_0GqQ"):
        unwanted.decompose()

    # ✅ 본문 내용을 HTML 형태로 가져옴 (img 태그 포함됨)
    content_html = content_div.decode_contents()

    print("\n✅ 본문(텍스트 + 이미지 태그 포함):\n")
    print(content_html)  # 여기에 <img src="..." /> 등 이미지도 포함되어 있음

else:
    print("❌ 본문을 찾을 수 없습니다.")

driver.quit()