#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 计算机英语词汇数据转换脚本 将txt格式的词汇文件转换为JavaScript数组格式 """ import re import json def parse_vocabulary_file(file_path): """解析词汇文件""" vocabulary_list = [] with open(file_path, 'r', encoding='utf-8') as file: lines = file.readlines() for line in lines: line = line.strip() if not line or line.startswith('51 - 100') or line.startswith('101 - 150') or line.startswith('151 - 200'): continue # 使用正则表达式匹配词汇条目 # 格式: 数字. 单词 [音标] 词性 中文释义 pattern = r'^(\d+)\. ([a-zA-Z\-\']+) \[([^\]]*(?:\][^\]]*)*) ([a-zA-Z\.&]+) (.+)$' match = re.match(pattern, line) if match: word_id, word, phonetic, word_type, meaning = match.groups() # 修复音标格式问题 if not phonetic.endswith(']'): # 查找下一个有效的词性和释义 parts = line.split() for i, part in enumerate(parts): if part in ['n.', 'v.', 'vt.', 'vi.', 'a.', 'ad.', 'prep.', 'conj.', 'pron.']: word_type = part meaning = ' '.join(parts[i+1:]) # 从原始行中提取音标 phonetic_start = line.find('[') + 1 phonetic_end = line.find(word_type) - 1 phonetic = line[phonetic_start:phonetic_end].strip() break vocabulary_item = { 'id': int(word_id), 'word': word.strip(), 'phonetic': f'[{phonetic}]', 'type': word_type.strip(), 'meaning': meaning.strip() } vocabulary_list.append(vocabulary_item) return vocabulary_list def generate_js_file(vocabulary_list, output_path): """生成JavaScript文件""" js_content = '''// 计算机英语词汇数据 const vocabularyData = [ ''' for i, item in enumerate(vocabulary_list): js_content += f' {{ id: {item["id"]}, word: "{item["word"]}", phonetic: "{item["phonetic"]}", type: "{item["type"]}", meaning: "{item["meaning"]}" }}' if i < len(vocabulary_list) - 1: js_content += ',\n' else: js_content += '\n' js_content += ''']; // 导出数据供其他文件使用 if (typeof module !== 'undefined' && module.exports) { module.exports = vocabularyData; } ''' with open(output_path, 'w', encoding='utf-8') as file: file.write(js_content) print(f'成功生成JavaScript文件: {output_path}') print(f'共转换 {len(vocabulary_list)} 个词汇') def main(): """主函数""" input_file = '计算机英语词汇.txt' output_file = 'vocabulary-data.js' try: print('开始解析词汇文件...') vocabulary_list = parse_vocabulary_file(input_file) print('开始生成JavaScript文件...') generate_js_file(vocabulary_list, output_file) print('转换完成!') except FileNotFoundError: print(f'错误: 找不到文件 {input_file}') except Exception as e: print(f'转换过程中发生错误: {e}') if __name__ == '__main__': main()