import os import requests import pandas as pd from bs4 import BeautifulSoup import pymysql def fetch_search_results(search_term): # URL 설정 base_url = 'http://aha-dic.com/View.asp?word=' url = f"{base_url}{search_term}" # HTTP GET 요청을 보내고 응답을 가져옴 response = requests.get(url) response.raise_for_status() return response.text def parse_results(html, download_folder): soup = BeautifulSoup(html, 'html.parser') results = {} # 결과를 포함하는 div를 찾기 result_div = soup.find('div', id='container_result') if result_div: # class 'word' 텍스트 추출 word_span = result_div.find('span', class_='word') results['Word'] = word_span.get_text(strip=True) if word_span else '' # class 'phoneticKor' 텍스트 추출 및 HTML 수정 phonetic_kor_span = result_div.find('span', class_='phoneticKor') if phonetic_kor_span: for accent_span in phonetic_kor_span.find_all('span', class_='accent'): accent_span.name = 'b' # 태그를 태그로 변경 accent_span.attrs = {} # 모든 속성을 제거 phonetic_kor_html = str(phonetic_kor_span) phonetic_kor_html = phonetic_kor_html.replace('', '').replace('', '') results['PhoneticKor'] = phonetic_kor_html else: results['PhoneticKor'] = '' # class 'playSound middle'에서 mp3 url 추출 play_sound = result_div.find('span', class_='playSound middle') mp3_url = play_sound['mp3'] if play_sound else '' if mp3_url: full_mp3_url = f"http://aha-dic.com{mp3_url}" mp3_filename = os.path.basename(mp3_url) download_file(full_mp3_url, os.path.join(download_folder, mp3_filename)) results['MP3_File'] = mp3_filename else: results['MP3_File'] = '' # 여러 개의 ul li HTML 코드 추출 meanings = [] ul_elements = result_div.find_all('ul') for ul in ul_elements: li_elements = ul.find_all('li') for li in li_elements: meanings.append(li.get_text(strip=True)) results['Meanings'] = '; '.join(meanings) # 예문 및 품사 패널 내용 추출 example_sentence = '' part_of_speech = '' panels = result_div.find_all('fieldset', class_='panel') for panel in panels: legend = panel.find('legend') span = panel.find('span') if legend and span: if '예문' in legend.get_text(strip=True): example_sentence = span.decode_contents().replace('

', '\n').strip() elif '품사' in legend.get_text(strip=True): part_of_speech = span.get_text(strip=True) results['ExampleSentence'] = BeautifulSoup(example_sentence, 'html.parser').get_text(strip=True) results['PartOfSpeech'] = part_of_speech return results def download_file(url, local_filename): # MP3 파일을 다운로드하여 지정된 경로에 저장 try: with requests.get(url, stream=True) as r: r.raise_for_status() with open(local_filename, 'wb') as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) print(f"Downloaded: {local_filename}") except Exception as e: print(f"Failed to download {url}: {e}") def save_to_database(data, connection): with connection.cursor() as cursor: sql = """ INSERT INTO ew_word (level, word, phonetic_kor, phonetic_symbol, mp3_file, meanings, examplesentence, partofspeech) VALUES (%s, %s, %s, %s, %s, %s, %s, %s) """ values = ( '1', # level은 1로 고정 data['Word'], data['PhoneticKor'], '', # phonetic_symbol은 빈 값으로 설정 data['MP3_File'], data['Meanings'], data['ExampleSentence'], data['PartOfSpeech'] ) cursor.execute(sql, values) connection.commit() def main(): # 검색어가 포함된 CSV 파일 경로 search_terms_file = 'search_terms.csv' # CSV 파일 경로 설정 download_folder = r'D:\_SUNGRO_DEV\python_project\english_project\mp3' # 다운로드할 폴더 경로 설정 if not os.path.exists(download_folder): os.makedirs(download_folder) # MySQL 연결 설정 connection = pymysql.connect( host="syye.net", user="pythonUser", password="Tjekdfl1324%^", db="English_words", charset='utf8mb4', autocommit=True ) failed_words = [] try: # CSV 파일에서 검색어 불러오기 search_terms_df = pd.read_csv(search_terms_file) print("CSV 파일의 내용:") print(search_terms_df.head()) # CSV 파일 내용 확인 if 'search_term' not in search_terms_df.columns: raise KeyError("CSV 파일에 'search_term' 열이 없습니다.") search_terms = search_terms_df['search_term'] all_results = [] # 검색어 루프 돌면서 검색 및 결과 저장 for i, search_term in enumerate(search_terms, start=1): print(f"Processing {i}/{len(search_terms)}: {search_term}") try: html = fetch_search_results(search_term) result = parse_results(html, download_folder) result['SearchTerm'] = search_term # 검색어도 결과에 포함 all_results.append(result) print(f"Finished processing {search_term}") # 데이터베이스에 저장 save_to_database(result, connection) print(f"Saved {search_term} to database") except Exception as e: print(f"Error processing {search_term}: {e}") failed_words.append({'word': search_term, 'error': str(e)}) finally: connection.close() print("Database connection closed") # 저장하지 못한 단어 출력 if failed_words: print("\nFailed to save the following words:") for entry in failed_words: print(f"Word: {entry['word']}, Error: {entry['error']}") if __name__ == '__main__': main()