diff --git a/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/Wdic2Xml.java b/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/Wdic2Xml.java index c81ae7d..e197c57 100644 --- a/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/Wdic2Xml.java +++ b/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/Wdic2Xml.java @@ -1,45 +1,18 @@ package io.github.eb4j.xml2eb.converter.wdic; -import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; -import java.text.MessageFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.Stack; -import java.util.TreeMap; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Document; import org.w3c.dom.Element; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; -import org.w3c.dom.Text; - -import org.apache.commons.io.FilenameUtils; -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.LineIterator; -import org.apache.commons.io.filefilter.FileFilterUtils; -import org.apache.commons.lang.ArrayUtils; -import org.apache.commons.lang.CharUtils; -import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import io.github.eb4j.xml2eb.CatalogInfo; -import io.github.eb4j.xml2eb.util.BmpUtil; -import io.github.eb4j.xml2eb.util.FontUtil; import io.github.eb4j.xml2eb.util.HexUtil; -import io.github.eb4j.xml2eb.util.WordUtil; import io.github.eb4j.xml2eb.util.XmlUtil; /** @@ -54,11 +27,6 @@ public class Wdic2Xml { */ private static final String PROGRAM = Wdic2Xml.class.getName(); - private static final String ENCODING = "UTF-8"; - private static final String WDIC_GLYPH_DIR = "glyph"; - private static final String WDIC_PLUGIN_DIR = "plugin"; - private static final String WDIC_GAIJI_DIR = "gaiji"; - private static final String WDIC_TABLE_DIR = "table"; private static final String BOOK_XML = "book.xml"; private static final String BOOK_TITLE = "通信用語の基礎知識"; @@ -74,42 +42,11 @@ public class Wdic2Xml { * ベースディレクトリ */ private File basedir = null; - /** - * WDICグループリスト - */ - private WdicGroupList groupList = null; - /** - * WDIC分類リスト - */ - private WdicDirList dirList = null; - /** - * WDICマニュアル - */ - private WdicMan manual = null; - /** - * プラグインマップ - */ - private Map> pluginMap = null; - /** - * 外字マップ - */ - private Map gaijiMap = null; - /** - * グリフリスト - */ - private List glyphList = null; - /** - * 表リスト - */ - private List tableList = null; - /** * メインメソッド。 * * @param args command line arguments. - * @throws IOException when fails to convert or write file. - * @throws ParserConfigurationException when fail to parse input file. */ public static void main(final String[] args) { if (args.length == 0) { @@ -117,9 +54,7 @@ public static void main(final String[] args) { } else { try { new Wdic2Xml(args[0]).convert(); - } catch (ParserConfigurationException e) { - System.exit(1); - } catch (IOException e) { + } catch (ParserConfigurationException | IOException e) { System.exit(1); } } @@ -138,15 +73,14 @@ public Wdic2Xml(final String path) { /** * コンストラクタ。 * - * @param dir ベースディレクトリ + * @param basedir ベースディレクトリ */ - public Wdic2Xml(final File dir) { + public Wdic2Xml(final File basedir) { super(); logger = LoggerFactory.getLogger(getClass()); - basedir = dir; + this.basedir = basedir; } - /** * 変換します。 * @@ -154,2082 +88,21 @@ public Wdic2Xml(final File dir) { * @throws IOException 入出力エラーが発生した場合 */ public void convert() throws ParserConfigurationException, IOException { - File file = new File(basedir, "FILE.GL"); - groupList = new WdicGroupList(file); - file = new File(basedir, "DIR.LST"); - dirList = new WdicDirList(file); - file = new File(basedir, "WDICALL.MAN"); - manual = new WdicMan(file); - pluginMap = groupList.getPluginMap(); - gaijiMap = new TreeMap<>(); - glyphList = new ArrayList<>(); - tableList = new ArrayList<>(); - + WdicNode wdicNode = new WdicNode(basedir); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); Document doc = builder.newDocument(); Element book = doc.createElement("book"); doc.appendChild(book); - Element subbook = _appendElement(book, "subbook"); + Element subbook = wdicNode.appendElement(book, "subbook"); subbook.setAttribute("title", BOOK_TITLE); subbook.setAttribute("dir", BOOK_DIR); subbook.setAttribute("type", BOOK_TYPE); + wdicNode.makeNodes(subbook); - logger.info("create content node..."); - _makeContentNode(subbook); - logger.info("create graphic node..."); - _makeGraphicNode(subbook); - logger.info("create sound node..."); - _makeSoundNode(subbook); - logger.info("create font node..."); - _makeFontNode(subbook); - - file = new File(basedir, BOOK_XML); + File file = new File(basedir, BOOK_XML); logger.info("write file: " + file.getPath()); XmlUtil.write(doc, file); } - - /** - * 画像データノードを作成します。 - * - * @param subbook subbookノード - */ - private void _makeGraphicNode(final Element subbook) { - Element graphic = _appendElement(subbook, "graphic"); - File plugin = new File(basedir, WDIC_PLUGIN_DIR); - for (String name : pluginMap.keySet()) { - if (name.endsWith(".jpg")) { - File jpg = new File(plugin, name); - if (!jpg.exists()) { - logger.error("file not found: " + jpg.getPath()); - } - String path = FilenameUtils.concat(WDIC_PLUGIN_DIR, name); - _appendData(graphic, name, path, "jpg"); - } else if (name.endsWith(".png")) { - String bmpName = name + ".bmp"; - File bmp = new File(plugin, bmpName); - if (!bmp.exists()) { - logger.error("file not found: " + bmp.getPath()); - } - String path = FilenameUtils.concat(WDIC_PLUGIN_DIR, bmpName); - _appendData(graphic, name, path, "bmp"); - } - } - - File glyph = new File(basedir, WDIC_GLYPH_DIR); - for (String name : glyphList) { - String bmpName = name + ".50px.png.bmp"; - File bmp = new File(glyph, bmpName); - if (!bmp.exists()) { - logger.error("file not found: " + bmp.getPath()); - } - String path = FilenameUtils.concat(WDIC_GLYPH_DIR, bmpName); - _appendData(graphic, "glyph-" + name, path, "bmp"); - } - - File table = new File(basedir, WDIC_TABLE_DIR); - for (String name : tableList) { - name += ".bmp"; - File bmp = new File(table, name); - if (!bmp.exists()) { - logger.error("file not found: " + bmp.getPath()); - } - String path = FilenameUtils.concat(WDIC_TABLE_DIR, name); - _appendData(graphic, name, path, "bmp"); - } - } - - /** - * 音声データノードを作成します。 - * - * @param subbook subbookノード - */ - private void _makeSoundNode(final Element subbook) { - Element sound = _appendElement(subbook, "sound"); - File plugin = new File(basedir, WDIC_PLUGIN_DIR); - for (String name : pluginMap.keySet()) { - if (name.endsWith(".mp3") || name.endsWith(".ogg")) { - String wavName = name + ".wav"; - File wav = new File(plugin, wavName); - if (!wav.exists()) { - logger.error("file not found: " + wav.getPath()); - } - String path = FilenameUtils.concat(WDIC_PLUGIN_DIR, wavName); - _appendData(sound, name, path, "wav"); - } else if (name.endsWith(".mid")) { - File midi = new File(plugin, name); - if (!midi.exists()) { - logger.error("file not found: " + midi.getPath()); - } - String path = FilenameUtils.concat(WDIC_PLUGIN_DIR, name); - _appendData(sound, name, path, "mid"); - } - } - } - - /** - * 外字データノードを作成します。 - * - * @param subbook subbookノード - */ - private void _makeFontNode(final Element subbook) { - Element font = _appendElement(subbook, "font"); - File gaiji = new File(basedir, WDIC_GAIJI_DIR); - for (Map.Entry entry : gaijiMap.entrySet()) { - String name = entry.getKey(); - String type = entry.getValue(); - File file = new File(gaiji, name + ".xbm"); - if (!file.exists()) { - logger.error("file not found: " + file.getPath()); - } - String path = FilenameUtils.concat(WDIC_GAIJI_DIR, file.getName()); - Element charElem = _appendElement(font, "char"); - charElem.setAttribute("name", name); - charElem.setAttribute("type", type); - Element dataElem = _appendElement(charElem, "data"); - dataElem.setAttribute("size", "16"); - dataElem.setAttribute("src", path); - } - } - - /** - * 辞書データノードを作成します。 - * - * @param subbook subbookノード - */ - private void _makeContentNode(final Element subbook) { - Element content = _appendElement(subbook, "content"); - - logger.info("create item node..."); - for (WdicGroup group : groupList.getGroups()) { - for (Wdic dic : group.getWdics()) { - for (WdicItem item : dic.getWdicItems()) { - _makeItemNode(content, item); - } - } - } - _makeItemNode(content); - - logger.info("create menu node..."); - _makeMenuNode(content); - - logger.info("create copyright node..."); - _makeCopyrightNode(content); - } - - /** - * 辞書項目ノードを作成します。 - * - * @param content コンテントノード - * @param item 辞書項目 - */ - private void _makeItemNode(final Element content, final WdicItem item) { - String grpId = item.getWdic().getGroupId(); - String grpName = item.getWdic().getGroupName(); - String partName = item.getWdic().getPartName(); - String partId = item.getWdic().getPartId(); - - String head = item.getHead(); - logger.debug(" [" + grpId + ":" + partId + "] " + head); - Element itemElem = _appendItem(content, "WDIC:" + grpId + ":" + head); - Element headElem = _appendElement(itemElem, "head"); - _appendRawText(headElem, head + " 【" + grpName + ":" + partName + "】"); - boolean wordAvail = false; - if (WordUtil.isValidWord(head)) { - Element wordElem = _appendElement(itemElem, "word"); - _appendRawText(wordElem, head); - wordAvail = true; - } - - // 読みを検索語として登録 - List yomiList = item.getYomi(); - int n = yomiList.size(); - for (int i = 0; i < n; i++) { - String yomi = yomiList.get(i); - String word = WdicUtil.unescape(yomi); - if (!"???".equals(yomi) && !head.equals(word) && WordUtil.isValidWord(word)) { - Element wordElem = _appendElement(itemElem, "word"); - _appendRawText(wordElem, word); - wordAvail = true; - } - } - // 英字表記を検索語として登録 - Map spellMap = item.getSpell(); - for (Map.Entry entry : spellMap.entrySet()) { - String str = WdicUtil.unescape(entry.getValue()); - if (StringUtils.isAsciiPrintable(str)) { - int idx = str.indexOf(": "); - if (idx > 0) { - // 略語 - String ss = str.substring(0, idx).trim(); - if (!head.equals(ss) && WordUtil.isValidWord(ss)) { - Element wordElem = _appendElement(itemElem, "word"); - _appendRawText(wordElem, ss); - wordAvail = true; - } - // 元の語形 - str = str.substring(idx + 2).trim(); - } - if (!head.equals(str) && WordUtil.isValidWord(str)) { - Element wordElem = _appendElement(itemElem, "word"); - _appendRawText(wordElem, str); - wordAvail = true; - } - } - } - - if (!wordAvail) { - logger.warn("word not defined: " + grpId + ":" + partId + ":" + head); - } - - // 本文の登録 - Element bodyElem = _appendElement(itemElem, "body"); - Element keyElem = _appendElement(bodyElem, "key"); - _appendRawText(keyElem, head); - _appendRawText(bodyElem, " 【"); - Element refElem = _appendIdReference(bodyElem, "MENU:group:" + grpId); - _appendRawText(refElem, grpName); - _appendRawText(bodyElem, ":"); - refElem = _appendIdReference(bodyElem, "MENU:group:" + grpId + ":" + partId); - _appendRawText(refElem, partName + "編"); - _appendRawText(bodyElem, "】"); - _appendNewLine(bodyElem); - - // 分類 - List dirs = item.getDir(); - n = dirs.size(); - for (int i = 0; i < n; i++) { - refElem = _appendIdReference(bodyElem, "DIR:/"); - _appendRawText(refElem, "分類"); - _appendRawText(bodyElem, ":"); - String str = dirs.get(i); - if (str.startsWith("/")) { - str = str.substring(1); - } - String[] ss = str.split("/"); - String key = ""; - int m = ss.length; - for (int j = 0; j < m; j++) { - if (j != 0) { - _appendRawText(bodyElem, " > "); - } - key += "/" + ss[j]; - refElem = _appendIdReference(bodyElem, "DIR:" + key); - _appendRawText(refElem, dirList.getName(key)); - } - _appendNewLine(bodyElem); - } - - // 読み - if (!yomiList.isEmpty()) { - n = yomiList.size(); - for (int i = 0; i < n; i++) { - String str = "読み:" + yomiList.get(i); - _appendText(item, bodyElem, str); - _appendNewLine(bodyElem); - } - } - - // 外語 - if (!spellMap.isEmpty()) { - Iterator> spellIt = spellMap.entrySet().iterator(); - while (spellIt.hasNext()) { - Map.Entry entry = spellIt.next(); - String str = "外語:[" + entry.getKey() + "] " + entry.getValue(); - _appendText(item, bodyElem, str); - _appendNewLine(bodyElem); - } - } - - // 発音 - Map pronMap = item.getPronounce(); - if (!pronMap.isEmpty()) { - Iterator> pronIt = pronMap.entrySet().iterator(); - while (pronIt.hasNext()) { - Map.Entry entry = pronIt.next(); - String str = "発音:[" + entry.getKey() + "] " + entry.getValue(); - _appendRawText(bodyElem, str); - _appendNewLine(bodyElem); - } - } - - // 品詞 - List speechList = item.getSpeech(); - if (!speechList.isEmpty()) { - StringBuilder buf = new StringBuilder(); - for (String s : speechList) { - if (buf.length() == 0) { - buf.append("品詞:"); - } else { - buf.append(","); - } - buf.append(s); - } - _appendRawText(bodyElem, buf.toString()); - _appendNewLine(bodyElem); - } - - // 内容 - _appendNewLine(bodyElem); - Stack indentStack = new Stack<>(); - int curIndent = 0; - int ignoreTabs = 0; - int section = 0; - int tableNum = 0; - Element indentElem = _appendElement(bodyElem, "indent"); - Map numMap = new HashMap<>(); - boolean linkBlock = false; - List bodyList = item.getBody(); - n = bodyList.size(); - for (int i = 0; i < n; i++) { - String body = bodyList.get(i); - String block = WdicUtil.deleteTab(body); - - if ("//LINK".equals(block)) { - while (!indentStack.isEmpty()) { - indentElem = indentStack.pop(); - } - _appendNewLine(indentElem); - linkBlock = true; - curIndent = 0; - // リンク部では常に1段インデントを無視する ("//LINK"部のタブ分) - ignoreTabs = 1; - continue; - } - - int indent = WdicUtil.getTabCount(body); - if (block.startsWith("= ")) { - // 無視するタブ数を変更 - section = indent; - ignoreTabs = indent + 1; - indent = 0; - curIndent = 0; - while (!indentStack.isEmpty()) { - indentElem = indentStack.pop(); - } - } else { - indent -= ignoreTabs; - indent = Math.max(indent, 0); - if (block.startsWith("+ ")) { - // キー"-1"にこのブロックのインデント数を設定する - numMap.put(-1, indent); - if (numMap.size() > 1) { - // 次段以降はインデントを下げない - indent = curIndent; - } - } else { - // 数字あり箇条書きでない場合はマップをクリアする - numMap.clear(); - } - } - if (curIndent < indent) { - while (curIndent != indent) { - indentStack.push(indentElem); - indentElem = _appendElement(indentElem, "indent"); - curIndent++; - } - } else if (curIndent > indent) { - while (curIndent != indent) { - indentElem = indentStack.pop(); - curIndent--; - } - } - - if (linkBlock) { - // リンク部 - _appendItemLinkBlock(item, indentElem, block); - _appendNewLine(indentElem); - } else { - // 本文部 - if (block.startsWith("))")) { - // 整形済み - _appendNewLine(indentElem); - for (; i < n; i++) { - body = bodyList.get(i); - block = WdicUtil.deleteTab(body); - if (!block.startsWith("))")) { - i--; - break; - } - if (block.startsWith(")) ")) { - Element nobr = _appendElement(indentElem, "nobr"); - _appendText(item, nobr, block.substring(3)); - } - _appendNewLine(indentElem); - } - _appendNewLine(indentElem); - } else if (block.startsWith(">>")) { - // 引用 - Element indentElem2 = _appendElement(indentElem, "indent"); - for (; i < n; i++) { - body = bodyList.get(i); - block = WdicUtil.deleteTab(body); - if (!block.startsWith(">>")) { - i--; - break; - } - if (block.startsWith(">> ")) { - _appendText(item, indentElem2, block.substring(3)); - } - _appendNewLine(indentElem2); - } - } else if (block.startsWith(":: ")) { - // 定義語 (簡易形式) - for (; i < n; i++) { - body = bodyList.get(i); - block = WdicUtil.deleteTab(body); - if (!block.startsWith(":: ")) { - i--; - break; - } - int idx = WdicUtil.indexOf(block, "|", 3); - if (idx >= 0) { // return minus when not found - String dt = block.substring(3, idx).trim(); - String dd = block.substring(idx + 1).trim(); - _appendText(item, indentElem, "\u30fb " + dt); - _appendNewLine(indentElem); - Element indentElem2 = _appendElement(indentElem, "indent"); - _appendText(item, indentElem2, dd); - _appendNewLine(indentElem2); - } - } - } else if (block.startsWith(": ")) { - // 定義語 (完全形式) - int tab = curIndent; - boolean term = false; - Element indentElem2 = null; - for (i = i + 1; i < n; i++) { - body = bodyList.get(i); - int t = WdicUtil.getTabCount(body) - ignoreTabs; - if (t <= tab) { - i--; - break; - } - block = WdicUtil.deleteTab(body); - if (block.startsWith("+ ")) { - // キー"-1"にこのブロックのインデント数を設定する - numMap.put(-1, t); - } else { - // 数字あり箇条書きでない場合はマップをクリアする - numMap.clear(); - } - if (block.startsWith(":>")) { - if (indentElem2 != null) { - indentElem2 = null; - } - term = true; - String dt = block.substring(2).trim(); - if (StringUtils.isNotBlank(dt)) { - _appendText(item, indentElem, "\u30fb " + dt); - _appendNewLine(indentElem); - } - } else if (block.startsWith(":<")) { - if (indentElem2 == null) { - indentElem2 = _appendElement(indentElem, "indent"); - } - term = false; - String dd = block.substring(2).trim(); - if (StringUtils.isNotBlank(dd)) { - _appendText(item, indentElem2, dd); - _appendNewLine(indentElem2); - } - } else { - if (term) { - _appendItemBodyBlock(item, indentElem, block, numMap, "\u30fb "); - _appendNewLine(indentElem); - } else { - if (indentElem2 != null) { - _appendItemBodyBlock(item, indentElem2, block, numMap, null); - _appendNewLine(indentElem2); - } - } - } - } - } else if (block.startsWith("| ")) { - // 表 (完全形式) - tableNum++; - WdicTable table = new WdicTable(item); - table.add(block); - int tab = curIndent; - for (i = i + 1; i < n; i++) { - body = bodyList.get(i); - int t = WdicUtil.getTabCount(body) - ignoreTabs; - if (t <= tab) { - i--; - break; - } - block = WdicUtil.deleteTab(body); - table.add(block); - } - File dir = new File(basedir, WDIC_TABLE_DIR); - if (!dir.exists() && !dir.mkdirs()) { - logger.error("failed to create directories: " + dir.getPath()); - } - String name = grpId + "_" + partId + "_" + item.getIndex() + "-" + tableNum; - File file = new File(dir, name + ".bmp"); - if (!file.exists()) { - BufferedImage img = table.getImage(); - try { - BmpUtil.write(img, file); - } catch (IOException e) { - logger.error(e.getMessage(), e); - if (file.exists() && !file.delete()) { - logger.error("failed to delete file: " + file.getPath()); - } - } finally { - if (img != null) { - img.flush(); - } - } - } - if (!tableList.contains(name)) { - tableList.add(name); - } - Element elem = _appendDataReference(indentElem, file.getName(), "graphic"); - _appendRawText(elem, "[表]"); - _appendNewLine(indentElem); - } else if (block.startsWith("|| ") || block.startsWith("|= ")) { - // 表 (簡易形式) - tableNum++; - WdicTable table = new WdicTable(item); - table.add(block); - for (i = i + 1; i < n; i++) { - body = bodyList.get(i); - block = WdicUtil.deleteTab(body); - if (!block.startsWith("|| ") && !block.startsWith("|= ")) { - i--; - break; - } - table.add(block); - } - File dir = new File(basedir, WDIC_TABLE_DIR); - if (!dir.exists() && !dir.mkdirs()) { - logger.error("failed to create directories: " + dir.getPath()); - } - String name = grpId + "_" + partId + "_" + item.getIndex() + "-" + tableNum; - File file = new File(dir, name + ".bmp"); - if (!file.exists()) { - BufferedImage img = table.getImage(); - if (img != null) { - try { - BmpUtil.write(img, file); - } catch (IOException e) { - logger.error(e.getMessage(), e); - if (file.exists() && !file.delete()) { - logger.error("failed to delete file: " + file.getPath()); - } - } finally { - img.flush(); - } - if (!tableList.contains(name)) { - tableList.add(name); - } - Element elem = _appendDataReference(indentElem, file.getName(), "graphic"); - _appendRawText(elem, "[表]"); - _appendNewLine(indentElem); - } - } - } else if (block.startsWith("= ")) { - // 章見出し - if (i > 0) { - String prev = WdicUtil.deleteTab(bodyList.get(i - 1)); - if (!prev.startsWith("= ")) { - _appendNewLine(indentElem); - } - } - // U+25A0: Black Square - // U+25A1: White Square - StringBuilder buf = new StringBuilder(); - int black = 5 - section; - int white = section; - for (int j = 0; j < white; j++) { - buf.append('\u25a1'); - } - for (int j = 0; j < black; j++) { - buf.append('\u25a0'); - } - buf.append(" "); - buf.append(block.substring(2)); - buf.append(" "); - for (int j = 0; j < black; j++) { - buf.append('\u25a0'); - } - for (int j = 0; j < white; j++) { - buf.append('\u25a1'); - } - _appendText(item, indentElem, buf.toString()); - _appendNewLine(indentElem); - } else { - // その他 - _appendItemBodyBlock(item, indentElem, block, numMap, null); - _appendNewLine(indentElem); - } - } - } - } - - /** - * 辞書項目内容を追加します。 - * - * @param item 辞書項目 - * @param elem 追加対象の要素 - * @param block 追加する内容 - * @param numMap インデント数と箇条書き数とのマップ - * @param prefix プレフィックス - */ - private void _appendItemBodyBlock(final WdicItem item, final Element elem, final String block, - final Map numMap, final String prefix) { - String target = block; - if (target.startsWith("* ")) { - // 文章 - // U+25c6: Black Diamond - target = "\u25c6 " + target.substring(2); - } else if (target.startsWith("- ")) { - // 数字なし箇条書き - // U+30FB: Katakana Middle Dot - target = "\u30fb " + target.substring(2); - } else if (target.startsWith("+ ")) { - // 数字あり箇条書き - // キー"-1"にこのブロックのインデント数が設定されている - int indent = numMap.get(-1); - // 現在の階層の数値 - int val = 1; - if (numMap.containsKey(indent)) { - val = numMap.get(indent) + 1; - } - String num = Integer.toString(val); - numMap.put(indent, val); - // 下位階層をクリア - int lower = indent + 1; - while (true) { - if (!numMap.containsKey(lower)) { - break; - } - numMap.remove(lower); - lower++; - } - // 上位階層の数値を追加 - int upper = indent - 1; - while (upper >= 0) { - if (!numMap.containsKey(upper)) { - break; - } - num = numMap.get(upper) + "." + num; - upper--; - } - target = num + ") " + target.substring(2); - } else if (target.startsWith("=> ")) { - // 参照 - // U+21D2: Rightwards Double Arrow - target = "\u21d2 " + target.substring(3); - } - if (prefix != null) { - target = prefix + target; - } - _appendText(item, elem, target); - } - - /** - * 辞書項目のリンク部を追加します。 - * - * @param item 辞書項目 - * @param elem 追加対象の要素 - * @param block 追加する内容 - */ - private void _appendItemLinkBlock(final WdicItem item, final Element elem, final String block) { - String target = block; - if (target.startsWith("= ")) { - // グループ見出し - _appendNewLine(elem); - // U+25BC: Black Down-Pointing Triangle - target = "\u25bc " + target.substring(2); - } else if (target.startsWith("- ")) { - // 関連語、外部リンク - // U+21D2: Rightwards Double Arrow - target = "\u21d2 " + target.substring(2); - } else if (target.startsWith("-! ")) { - // 反対語 - // U+21D4: Left Right Double Arrow - target = "\u21d4 " + target.substring(3); - } - _appendText(item, elem, target, true); - } - - /** - * 辞書項目ノードを作成します。 - * - * @param content コンテントノード - */ - private void _makeItemNode(final Element content) { - Iterator>> it; - Map.Entry> entry; - - // 画像グラグイン - logger.debug(" graphic plugin"); - String[] ext = {".jpg", ".png"}; - int len = ext.length; - for (int i = 0; i < len; i++) { - it = pluginMap.entrySet().iterator(); - while (it.hasNext()) { - entry = it.next(); - String name = entry.getKey(); - if (name.endsWith(ext[i])) { - Element itemElem = _appendItem(content, "PLUGIN:" + name); - Element headElem = _appendElement(itemElem, "head"); - _appendRawText(headElem, name + " 【プラグイン】"); - // ファイル名をキーワードとして登録 - Element keywordElem = _appendElement(itemElem, "keyword"); - _appendRawText(keywordElem, name); - - Element bodyElem = _appendElement(itemElem, "body"); - Element keyElem = _appendElement(bodyElem, "key"); - _appendRawText(keyElem, name); - _appendNewLine(bodyElem); - Element refElem = _appendDataReference(bodyElem, name, "graphic"); - _appendRawText(refElem, "[図版]"); - _appendNewLine(bodyElem); - - // プラグインを参照している項目を列挙 - for (WdicItem item : entry.getValue()) { - _appendRawText(bodyElem, "\u2192 "); - String head = item.getHead(); - String grpId = item.getWdic().getGroupId(); - String id = "WDIC:" + grpId + ":" + head; - refElem = _appendIdReference(bodyElem, id); - String gname = item.getWdic().getGroupName(); - String part = item.getWdic().getPartName(); - String title = head + " 《" + gname + ":" + part + "》"; - _appendRawText(refElem, title); - _appendNewLine(bodyElem); - } - } - } - } - - // 音声プラグイン - logger.debug(" sound plugin"); - ext = new String[]{".mp3", ".ogg", ".mid"}; - len = ext.length; - for (int i = 0; i < len; i++) { - it = pluginMap.entrySet().iterator(); - while (it.hasNext()) { - entry = it.next(); - String name = entry.getKey(); - if (name.endsWith(ext[i])) { - Element itemElem = _appendItem(content, "PLUGIN:" + name); - Element headElem = _appendElement(itemElem, "head"); - _appendRawText(headElem, name + " 【プラグイン】"); - // ファイル名をキーワードとして登録 - Element keywordElem = _appendElement(itemElem, "keyword"); - _appendRawText(keywordElem, name); - - Element bodyElem = _appendElement(itemElem, "body"); - Element keyElem = _appendElement(bodyElem, "key"); - _appendRawText(keyElem, name); - _appendNewLine(bodyElem); - Element refElem = _appendDataReference(bodyElem, name, "sound"); - _appendRawText(refElem, "[音声]"); - _appendNewLine(bodyElem); - - // プラグインを参照している項目を列挙 - for (WdicItem item : entry.getValue()) { - _appendRawText(bodyElem, "\u2192 "); - String head = item.getHead(); - String grpId = item.getWdic().getGroupId(); - String id = "WDIC:" + grpId + ":" + head; - refElem = _appendIdReference(bodyElem, id); - String gname = item.getWdic().getGroupName(); - String part = item.getWdic().getPartName(); - String title = head + " 《" + gname + ":" + part + "》"; - _appendRawText(refElem, title); - _appendNewLine(bodyElem); - } - } - } - } - - // その他のプラグイン - logger.debug(" document plugin"); - File plugin = new File(basedir, WDIC_PLUGIN_DIR); - ext = new String[]{".jpg", ".png", ".mp3", ".ogg", ".mid"}; - len = ext.length; - it = pluginMap.entrySet().iterator(); - while (it.hasNext()) { - entry = it.next(); - String name = entry.getKey(); - boolean add = true; - for (int i = 0; i < len; i++) { - if (name.endsWith(ext[i])) { - add = false; - break; - } - } - if (add) { - File file = new File(plugin, name); - if (!file.exists()) { - logger.error("file not found: " + file.getPath()); - continue; - } - Element itemElem = _appendItem(content, "PLUGIN:" + name); - Element headElem = _appendElement(itemElem, "head"); - _appendRawText(headElem, name + " 【プラグイン】"); - // ファイル名をキーワードとして登録 - Element keywordElem = _appendElement(itemElem, "keyword"); - _appendRawText(keywordElem, name); - - Element bodyElem = _appendElement(itemElem, "body"); - Element keyElem = _appendElement(bodyElem, "key"); - _appendRawText(keyElem, name); - _appendNewLine(bodyElem); - - // プラグインを参照している項目を列挙 - for (WdicItem item : entry.getValue()) { - _appendRawText(bodyElem, "\u2192 "); - String head = item.getHead(); - String grpId = item.getWdic().getGroupId(); - String id = "WDIC:" + grpId + ":" + head; - Element refElem = _appendIdReference(bodyElem, id); - String gname = item.getWdic().getGroupName(); - String part = item.getWdic().getPartName(); - String title = head + " 《" + gname + ":" + part + "》"; - _appendRawText(refElem, title); - _appendNewLine(bodyElem); - } - - // プラグインの内容 - Element indentElem = _appendElement(bodyElem, "indent"); - try { - LineIterator lineIt = FileUtils.lineIterator(file, ENCODING); - while (lineIt.hasNext()) { - String line = WdicUtil.sanitize(lineIt.nextLine()); - _appendRawText(indentElem, line); - _appendNewLine(indentElem); - } - } catch (IOException e) { - logger.error(e.getMessage(), e); - } - } - } - } - - /** - * 著作権ノードを作成します。 - * - * @param content コンテントノード - */ - private void _makeCopyrightNode(final Element content) { - Element copyright = _appendElement(content, "copyright"); - String[] line = manual.getCopyright(); - int len = line.length; - for (int i = 0; i < len; i++) { - _appendRawText(copyright, line[i]); - _appendNewLine(copyright); - } - } - - /** - * メニューノードを作成します。 - * - * @param content コンテントノード - */ - private void _makeMenuNode(final Element content) { - Element menu = _appendElement(content, "menu"); - Element layerElem = _appendLayer(menu, "MENU:top"); - - Element refElem = _appendIdReference(layerElem, "MENU:manual"); - String title = groupList.getName() + " " + groupList.getEdition(); - _appendRawText(refElem, title); - _appendNewLine(layerElem); - - refElem = _appendIdReference(layerElem, "MENU:bib"); - _appendRawText(refElem, "基礎文献"); - _appendNewLine(layerElem); - - refElem = _appendIdReference(layerElem, "DIR:/"); - _appendRawText(refElem, "分類別収録語一覧"); - _appendNewLine(layerElem); - - refElem = _appendIdReference(layerElem, "MENU:group"); - _appendRawText(refElem, "グループ別収録語一覧"); - _appendNewLine(layerElem); - - refElem = _appendIdReference(layerElem, "MENU:plugin"); - _appendRawText(refElem, "グループ別プラグイン一覧"); - _appendNewLine(layerElem); - - refElem = _appendIdReference(layerElem, "MENU:image"); - _appendRawText(refElem, "画像プラグイン一覧"); - _appendNewLine(layerElem); - - refElem = _appendIdReference(layerElem, "MENU:sound"); - _appendRawText(refElem, "音声プラグイン一覧"); - _appendNewLine(layerElem); - - refElem = _appendIdReference(layerElem, "MENU:text"); - _appendRawText(refElem, "文書プラグイン一覧"); - _appendNewLine(layerElem); - - logger.debug(" manual"); - _createManualLayer(menu); - logger.debug(" bibliography"); - _createBibliographyLayer(menu); - logger.debug(" directory"); - _createDirectoryLayer(menu); - logger.debug(" group"); - _createGroupLayer(menu); - logger.debug(" plugin list"); - _createPluginLayer(menu); - logger.debug(" graphic list"); - _createImageLayer(menu); - logger.debug(" sound list"); - _createSoundLayer(menu); - logger.debug(" document list"); - _createTextLayer(menu); - } - - /** - * マニュアルメニュー階層を作成します。 - * - * @param menu メニューノード - */ - private void _createManualLayer(final Element menu) { - Element layerElem = _appendLayer(menu, "MENU:manual"); - String[] sec = manual.getSections(); - int len = sec.length; - for (int i = 0; i < len; i++) { - Element refElem = _appendIdReference(layerElem, "MENU:manual:" + sec[i]); - _appendRawText(refElem, sec[i]); - _appendNewLine(layerElem); - String prev = null; - String next = null; - if (i > 0) { - prev = sec[i - 1]; - } - if (i < (len - 1)) { - next = sec[i + 1]; - } - _createManualLayer(menu, sec[i], prev, next); - } - } - - /** - * マニュアルメニュー階層を作成します。 - * - * @param menu メニューノード - * @param sec セクション - * @para prev 前セクション - * @para next 次セクション - */ - private void _createManualLayer(final Element menu, final String sec, final String prev, - final String next) { - Element layerElem = _appendLayer(menu, "MENU:manual:" + sec); - Element keyElem = _appendElement(layerElem, "key"); - _appendRawText(keyElem, sec); - - Element indent1Elem = _appendElement(layerElem, "indent"); - Element indent2Elem = null; - Element indent3Elem = null; - Element indentElem = indent1Elem; - for (String str : manual.getContents(sec)) { - if (str.length() > 0) { - if (str.startsWith("\t")) { - if (indent2Elem == null) { - indent2Elem = _appendElement(indent1Elem, "indent"); - } - if (str.startsWith("\t\t")) { - if (indent3Elem == null) { - indent3Elem = _appendElement(indent2Elem, "indent"); - } - str = str.substring(2); - indentElem = indent3Elem; - } else { - str = str.substring(1); - indentElem = indent2Elem; - indent3Elem = null; - } - } else { - indentElem = indent1Elem; - indent2Elem = null; - indent3Elem = null; - } - _appendRawText(indentElem, str); - } - _appendNewLine(indentElem); - } - - _appendNewLine(layerElem); - - if (prev != null) { - _appendRawText(layerElem, "\u2190 "); - Element refElem = _appendIdReference(layerElem, "MENU:manual:" + prev); - _appendRawText(refElem, prev); - _appendRawText(layerElem, " | "); - } - if (next != null) { - _appendRawText(layerElem, "\u2192 "); - Element refElem = _appendIdReference(layerElem, "MENU:manual:" + next); - _appendRawText(refElem, next); - _appendRawText(layerElem, " | "); - } - _appendRawText(layerElem, "\u2191 "); - String title = groupList.getName() + " " + groupList.getEdition(); - Element refElem = _appendIdReference(layerElem, "MENU:manual"); - _appendRawText(refElem, title); - _appendNewLine(layerElem); - } - - /** - * 基礎文献メニュー階層を作成します。 - * - * @param menu メニューノード - */ - private void _createBibliographyLayer(final Element menu) { - Element layerElem = _appendLayer(menu, "MENU:bib"); - Collection groups = groupList.getGroups(); - for (WdicGroup group : groups) { - Element refElem = _appendIdReference(layerElem, "MENU:bib:" + group.getGroupId()); - _appendRawText(refElem, group.getGroupName() + "用語の基礎知識"); - _appendNewLine(layerElem); - _createBibliographyLayer(menu, group); - } - } - - /** - * 基礎文献メニュー階層を作成します。 - * - * @param menu メニューノード - * @param group 辞書グループ - */ - private void _createBibliographyLayer(final Element menu, final WdicGroup group) { - Element layerElem = _appendLayer(menu, "MENU:bib:" + group.getGroupId()); - Element keyElem = _appendElement(layerElem, "key"); - _appendRawText(keyElem, group.getGroupName() + "用語の基礎知識"); - Element indentElem = _appendElement(layerElem, "indent"); - for (String str: group.getWdicBib().getBibliography()) { - if (str.length() > 0) { - _appendRawText(indentElem, str); - } - _appendNewLine(indentElem); - } - } - - /** - * 分類一覧メニュー階層を作成します。 - * - * @param menu メニューノード - */ - private void _createDirectoryLayer(final Element menu) { - Element layerElem = _appendLayer(menu, "DIR:/"); - for (String dir: dirList.getChildren("/")) { - _appendRawText(layerElem, "\u21d2 "); - Element refElem = _appendIdReference(layerElem, "DIR:" + dir); - _appendRawText(refElem, dirList.getName(dir)); - _appendNewLine(layerElem); - _createDirectoryLayer(menu, dir); - } - } - - /** - * 分類一覧メニュー階層を作成します。 - * - * @param menu メニューノード - * @param dir 分類 - */ - private void _createDirectoryLayer(final Element menu, final String dir) { - Element layerElem = _appendLayer(menu, "DIR:" + dir); - Element refElem = _appendIdReference(layerElem, "DIR:/"); - _appendRawText(refElem, "分類"); - String[] dirs; - if (dir.startsWith("/")) { - dirs = dir.substring(1).split("/"); - } else { - dirs = dir.split("/"); - } - String key = ""; - int len = dirs.length; - for (int i = 0; i < len - 1; i++) { - _appendRawText(layerElem, " > "); - key += "/" + dirs[i]; - refElem = _appendIdReference(layerElem, "DIR:" + key); - _appendRawText(refElem, dirList.getName(key)); - } - _appendRawText(layerElem, " > " + dirList.getName(dir)); - _appendNewLine(layerElem); - - List children = dirList.getChildren(dir); - len = children.size(); - int cnt = len; - for (int i = 0; i < len; i++) { - String child = children.get(i); - _appendRawText(layerElem, "\u21d2 "); - if (dirList.hasAlias(child)) { - String alias = dirList.getAlias(child); - refElem = _appendIdReference(layerElem, "DIR:" + alias); - _appendRawText(refElem, dirList.getName(child) + "@"); - _appendNewLine(layerElem); - } else { - refElem = _appendIdReference(layerElem, "DIR:" + child); - _appendRawText(refElem, dirList.getName(child)); - _appendNewLine(layerElem); - _createDirectoryLayer(menu, child); - } - } - - List items = groupList.getWdicItem(dir); - len = items.size(); - cnt += len; - for (int i = 0; i < len; i++) { - WdicItem item = items.get(i); - _appendRawText(layerElem, "\u2192 "); - String head = item.getHead(); - String grpId = item.getWdic().getGroupId(); - String id = "WDIC:" + grpId + ":" + head; - refElem = _appendIdReference(layerElem, id); - String gname = item.getWdic().getGroupName(); - String part = item.getWdic().getPartName(); - String title = head + " 《" + gname + ":" + part + "》"; - _appendRawText(refElem, title); - _appendNewLine(layerElem); - } - - if (cnt == 0) { - _appendRawText(layerElem, "(該当単語なし)"); - _appendNewLine(layerElem); - } - } - - /** - * グループ一覧メニュー階層を作成します。 - * - * @param menu メニューノード - */ - private void _createGroupLayer(final Element menu) { - String id = "MENU:group"; - Element layerElem = _appendLayer(menu, id); - for (WdicGroup group : groupList.getGroups()) { - Element refElem = _appendIdReference(layerElem, id + ":" + group.getGroupId()); - _appendRawText(refElem, MessageFormat.format("{0}用語の基礎知識", group.getGroupName())); - _appendNewLine(layerElem); - _createGroupLayer(menu, group); - } - } - - /** - * グループ一覧メニュー階層を作成します。 - * - * @param menu メニューノード - * @param group 辞書グループ - */ - private void _createGroupLayer(final Element menu, final WdicGroup group) { - String id = "MENU:group:" + group.getGroupId(); - Element layerElem = _appendLayer(menu, id); - Element refElem = _appendIdReference(layerElem, "MENU:group"); - _appendRawText(refElem, "グループ"); - _appendRawText(layerElem, " > " + group.getGroupName()); - _appendNewLine(layerElem); - for (Wdic wdic : group.getWdics()) { - String name = wdic.getPartName() + "編"; - refElem = _appendIdReference(layerElem, id + ":" + wdic.getPartId()); - _appendRawText(refElem, name); - _appendNewLine(layerElem); - _createGroupLayer(menu, wdic); - } - } - - /** - * グループ一覧メニュー階層を作成します。 - * - * @param menu メニューノード - * @param wdic 辞書 - */ - private void _createGroupLayer(final Element menu, final Wdic wdic) { - String grpId = wdic.getGroupId(); - String partId = wdic.getPartId(); - String id = "MENU:group:" + grpId + ":" + partId; - Element layerElem = _appendLayer(menu, id); - Element refElem = _appendIdReference(layerElem, "MENU:group"); - _appendRawText(refElem, "グループ"); - _appendRawText(layerElem, " > "); - refElem = _appendIdReference(layerElem, "MENU:group:" + grpId); - _appendRawText(refElem, wdic.getGroupName()); - _appendRawText(layerElem, " > " + wdic.getPartName() + "編"); - _appendNewLine(layerElem); - for (WdicItem item : wdic.getWdicItems()) { - if (item.isAlias()) { - String name = item.getHead(); - _appendRawText(layerElem, name); - List yomiList = item.getYomi(); - if (yomiList.isEmpty()) { - logger.info("yomi not defined: " + grpId + ":" + partId + ":" + item.getHead()); - } else { - String yomi = yomiList.get(0); - _appendText(item, layerElem, " [" + yomi + "]"); - } - _appendRawText(layerElem, " \u21d2 "); - name = item.getRealName(); - String refid = WdicUtil.unescape(name); - id = "WDIC:" + grpId + ":" + refid; - refElem = _appendIdReference(layerElem, id); - _appendText(item, refElem, name); - _appendNewLine(layerElem); - } else { - String name = item.getHead(); - id = "WDIC:" + grpId + ":" + name; - refElem = _appendIdReference(layerElem, id); - _appendRawText(refElem, name); - List yomiList = item.getYomi(); - if (yomiList.isEmpty()) { - logger.info("yomi not defined: " + grpId + ":" + partId + ":" + item.getHead()); - } else { - String yomi = yomiList.get(0); - _appendText(item, layerElem, " [" + yomi + "]"); - } - _appendNewLine(layerElem); - } - } - } - - /** - * プラグイン一覧メニュー階層を作成します。 - * - * @param menu メニューノード - */ - private void _createPluginLayer(final Element menu) { - String id = "MENU:plugin"; - Element layerElem = _appendLayer(menu, id); - for (WdicGroup group: groupList.getGroups()) { - Element refElem = _appendIdReference(layerElem, id + ":" + group.getGroupId()); - _appendRawText(refElem, group.getGroupName() + "用語の基礎知識"); - _appendNewLine(layerElem); - _createPluginLayer(menu, group); - } - } - - /** - * プラグイン一覧メニュー階層を作成します。 - * - * @param menu メニューノード - * @param group 辞書グループ - */ - private void _createPluginLayer(final Element menu, final WdicGroup group) { - String id = "MENU:plugin:" + group.getGroupId(); - Element layerElem = _appendLayer(menu, id); - Element refElem = _appendIdReference(layerElem, "MENU:plugin"); - _appendRawText(refElem, "グループ"); - _appendRawText(layerElem, " > " + group.getGroupName()); - _appendNewLine(layerElem); - - pluginMap.entrySet().stream() - .filter(entry -> entry.getValue().stream() - .anyMatch(v -> group.equals(v.getWdic().getGroup()))) - .forEach(entry -> { - String name = entry.getKey(); - _appendRawText(_appendIdReference(layerElem, "PLUGIN:" + name), name); - _appendNewLine(layerElem); - }); - } - - /** - * 画像一覧メニュー階層を作成します。 - * - * @param menu メニューノード - */ - private void _createImageLayer(final Element menu) { - Element layerElem = _appendLayer(menu, "MENU:image"); - List ext = Arrays.asList(".jpg", ".png"); - pluginMap.keySet().stream() - .filter(v -> ext.stream().anyMatch(s -> s.endsWith(v))) - .forEach(name -> { - Element refElem = _appendIdReference(layerElem, "PLUGIN:" + name); - _appendRawText(refElem, name); - _appendNewLine(layerElem); - }); - } - - /** - * 音声一覧メニュー階層を作成します。 - * - * @param menu メニューノード - */ - private void _createSoundLayer(final Element menu) { - Element layerElem = _appendLayer(menu, "MENU:sound"); - List ext = Arrays.asList(".mp3", ".ogg", ".mid"); - pluginMap.keySet().stream() - .filter(v -> ext.stream().anyMatch(s -> s.endsWith(v))) - .forEach(name -> { - Element refElem = _appendIdReference(layerElem, "PLUGIN:" + name); - _appendRawText(refElem, name); - _appendNewLine(layerElem); - }); - } - - /** - * 文書一覧メニュー階層を作成します。 - * - * @param menu メニューノード - */ - private void _createTextLayer(final Element menu) { - Element layerElem = _appendLayer(menu, "MENU:text"); - List ext = Arrays.asList(".jpg", ".png", ".mp3", ".ogg", ".mid"); - pluginMap.keySet().stream() - .filter(v -> ext.stream().noneMatch(s -> s.endsWith(v))) - .forEach(name -> { - Element refElem = _appendIdReference(layerElem, "PLUGIN:" + name); - _appendRawText(refElem, name); - _appendNewLine(layerElem); - }); - } - - /** - * テキストノードを追加します。 - * - * @param node テキストを追加するノード - * @param str 文字列 - */ - private void _appendRawText(final Node node, final String str) { - if (str != null && str.trim().length() > 0) { - String tmp = str.replace((char)0x3099, (char)0x309b) - .replace((char)0x309a, (char)0x309c); - if (node != null) { - Text text = node.getOwnerDocument().createTextNode(tmp); - node.appendChild(text); - _checkCharacter(text); - } - } - } - - /** - * 要素を追加します。 - * - * @param node 要素を追加するノード - * @param tag 要素のタグ名称 - * @return 追加された要素 - */ - private Element _appendElement(final Node node, final String tag) { - Element elem = node.getOwnerDocument().createElement(tag); - return (Element)node.appendChild(elem); - } - - /** - * 改行要素を追加します。 - * - * @param node 改行を追加するノード - * @return 追加された改行要素 - */ - private Element _appendNewLine(final Node node) { - return _appendElement(node, "br"); - } - - /** - * 項目要素を追加します。 - * - * @param node 項目要素を追加するノード - * @param id ID属性値 - * @return 追加された項目要素 - */ - private Element _appendItem(final Node node, final String id) { - Element elem = _appendElement(node, "item"); - elem.setAttribute("id", id); - return elem; - } - - /** - * 参照要素を追加します。 - * - * @param node 参照要素を追加するノード - * @param id ID属性値 - * @return 追加された参照要素 - */ - private Element _appendIdReference(final Node node, final String id) { - Element elem = _appendElement(node, "ref"); - elem.setAttribute("id", id); - return elem; - } - - /** - * 参照要素を追加します。 - * - * @param node 参照要素を追加するノード - * @param data data属性値 - * @param type type属性値 - * @return 追加された参照要素 - */ - private Element _appendDataReference(final Node node, final String data, final String type) { - Element elem = _appendElement(node, "ref"); - elem.setAttribute("data", data); - elem.setAttribute("type", type); - return elem; - } - - /** - * 外字参照要素を追加します。 - * - * @param node 外字参照要素を追加するノード - * @param name name属性値 - * @param type type属性値 - * @return 追加された参照要素 - */ - private Element _appendCharReference(final Node node, final String name, final String type) { - Element elem = _appendElement(node, "char"); - elem.setAttribute("name", name); - elem.setAttribute("type", type); - return elem; - } - - /** - * レイヤ要素を追加します。 - * - * @param node レイヤ要素を追加するノード - * @param id ID属性値 - * @return 追加されたレイヤ要素 - */ - private Element _appendLayer(final Node node, final String id) { - Element elem = _appendElement(node, "layer"); - elem.setAttribute("id", id); - return elem; - } - - /** - * データ要素を追加します。 - * - * @param node データ要素を追加するノード - * @param name name属性値 - * @param src src属性値 - * @param format format属性値 - * @return 追加されたデータ要素 - */ - private Element _appendData(final Node node, final String name, final String src, - final String format) { - Element elem = _appendElement(node, "data"); - elem.setAttribute("name", name); - elem.setAttribute("src", src); - elem.setAttribute("format", format); - return elem; - } - - /** - * テキストノードを追加します。 - * - * @param item 辞書項目 - * @param node テキストを追加するノード - * @param str 文字列 - */ - private void _appendText(final WdicItem item, final Node node, final String str) { - _appendText(item, node, str, false); - } - - /** - * テキストノードを追加します。 - * - * @param item 辞書項目 - * @param node テキストを追加するノード - * @param str 文字列 - * @param linkBlock リンク部の場合はtrue - */ - private void _appendText(final WdicItem item, final Node node, final String str, - final boolean linkBlock) { - String grpId = item.getWdic().getGroupId(); - String partId = item.getWdic().getPartId(); - String itemId = grpId + ":" + partId + ":" + item.getHead(); - StringBuilder buf = new StringBuilder(); - int len = str.length(); - for (int i = 0; i < len; i++) { - char ch = str.charAt(i); - if (Character.isHighSurrogate(ch) - || Character.isLowSurrogate(ch)) { - buf.append(ch); - continue; - } - - if (ch == '\'') { - StringBuilder bracket = new StringBuilder("'"); - int idx1 = i + 1; - for (; idx1 < len; idx1++) { - if (str.charAt(idx1) != '\'') { - break; - } - bracket.append("'"); - } - if (bracket.length() > 1) { - // 2個以上は強調表示 - int idx2 = WdicUtil.indexOf(str, bracket.toString(), idx1); - if (idx2 != -1) { - // 強調 - _appendRawText(node, buf.toString()); - buf.delete(0, buf.length()); - Element elem = _appendElement(node, "em"); - _appendText(item, elem, str.substring(idx1, idx2), linkBlock); - i = idx2 + bracket.length() - 1; - } else { - // 閉じられていないのでそのまま追加する - buf.append(bracket); - i = idx1 - 1; - } - continue; - } - } else if (ch == '[') { - if (i + 1 < len && str.charAt(i + 1) == '[') { - int idx1 = i + 1; - int idx2 = WdicUtil.indexOf(str, "]]", idx1 + 1); - if (idx2 != -1) { - // リンク - _appendRawText(node, buf.toString()); - buf.delete(0, buf.length()); - String ref = str.substring(idx1 + 1, idx2); - String name = null; - if (ref.startsWith("<")) { - // 表示内容 - int idx3 = WdicUtil.indexOf(ref, ">", 1); - if (idx3 != -1) { - name = ref.substring(1, idx3); - ref = ref.substring(idx3 + 1); - } - } - if (ref.startsWith("http:") - || ref.startsWith("https:") - || ref.startsWith("ftp:") - || ref.startsWith("news:") - || ref.startsWith("gopher:") - || ref.startsWith("mailto:") - || ref.startsWith("phone:") - || ref.startsWith("urn:") - || ref.startsWith("x-geo:")) { - // URI - if (StringUtils.isNotBlank(name)) { - if (linkBlock) { - ref = name + " <" + ref + ">"; - } else { - ref = name + "<" + ref + ">"; - } - } - _appendText(item, node, ref, linkBlock); - } else if (ref.startsWith("//")) { - // プラグイン - int idx3 = ref.indexOf("|"); - if (idx3 > 0) { - // delete option - ref = ref.substring(0, idx3); - } - String gid = null; - String file = null; - idx3 = ref.indexOf("/", 2); - if (idx3 != -1) { - gid = ref.substring(2, idx3); - file = ref.substring(idx3 + 1); - } else { - gid = grpId; - file = ref.substring(2); - } - Element refElem = null; - if (file.endsWith(".jpg") || file.endsWith(".png")) { - refElem = _appendDataReference(node, file, "graphic"); - } else if (file.endsWith(".mp3") || file.endsWith(".ogg") - || file.endsWith(".mid")) { - refElem = _appendDataReference(node, file, "sound"); - } else { - refElem = _appendIdReference(node, "PLUGIN:" + file); - } - if (StringUtils.isBlank(name)) { - name = file; - } - if (linkBlock) { - WdicGroup group = groupList.getGroup(gid); - if (group != null) { - String gname = group.getGroupName(); - name = name + " 《" + gname + "》"; - } - } - _appendRawText(refElem, name); - } else { - if (ref.startsWith("x-wdic:")) { - // x-wdic:/グループ名/単語 - ref = ref.substring("x-wdic:".length()); - } - String gid = null; - String head = null; - if (ref.startsWith("/")) { - // グループ名/単語 - int idx3 = WdicUtil.indexOf(ref, "/", 1); - if (idx3 != -1) { - gid = ref.substring(1, idx3); - head = ref.substring(idx3 + 1); - } else { - head = ref.substring(1); - } - } else { - // 単語 - head = ref; - } - String refid = WdicUtil.unescape(head); - if (StringUtils.isBlank(name)) { - name = head; - } - if (StringUtils.isBlank(gid)) { - // 同一グループ内 - gid = grpId; - } - WdicGroup group = groupList.getGroup(gid); - if (group != null) { - String gname = group.getGroupName(); - Wdic wdic = group.getWdic(refid); - if (wdic != null) { - String id = "WDIC:" + gid + ":" + refid; - Element refElem = _appendIdReference(node, id); - if (linkBlock) { - name = name + " 《" + gname + ":" + wdic.getPartName() + "》"; - } - _appendText(item, refElem, name, linkBlock); - } else { - logger.error("undefined word: " + gid + "/" + refid); - if (linkBlock) { - name = name + " 《" + gname + "》"; - } - _appendText(item, node, name, linkBlock); - } - } else { - logger.error("undefined group: " + gid); - _appendText(item, node, name, linkBlock); - } - } - i = idx2 + 1; - } else { - // 閉じられていないのでそのまま追加する - buf.append("[["); - i = idx1; - } - continue; - } - } - - if (ch != '\\') { - // バックスラッシュ以外はそのまま追加 - buf.append(ch); - continue; - } - if (i + 1 >= len) { - // バックスラッシュに続く文字がないのでそのまま追加 - buf.append(ch); - continue; - } - - char ch1 = str.charAt(i + 1); - if (ch1 >= 0x21 && ch1 <= 0x7e) { - if (!CharUtils.isAsciiAlphanumeric(ch1)) { - // 1文字エスケープ (英数字以外の記号) - i++; - buf.append(ch1); - continue; - } - } - - int idx = WdicUtil.indexOf(str, ";", i + 1); - if (idx < 0) { - logger.error("unexpected format: " + str); - buf.append(ch); - continue; - } - String ref = str.substring(i + 1, idx); - i = idx; - int sep1 = WdicUtil.indexOf(ref, "{", 0); - int sep2 = WdicUtil.indexOf(ref, ":", 0); - if (sep1 == -1 && sep2 == -1) { - // 実体参照 - buf.append(WdicUtil.getCharacter(ref)); - continue; - } - - // 特殊機能 - String name; - ArrayList param = new ArrayList<>(); - if (sep1 != -1 && sep2 != -1) { - if (sep2 < sep1) { - sep1 = -1; - } else { - sep2 = -1; - } - } - if (sep1 != -1) { - // 引数は{}で括られている - name = ref.substring(0, sep1); - int idx1 = sep1; - int idx2; - while (idx1 != -1) { - idx2 = ref.indexOf('}', idx1 + 1); - if (idx2 == -1) { - idx2 = ref.length(); - } - param.add(ref.substring(idx1 + 1, idx2)); - idx1 = ref.indexOf('{', idx2 + 1); - } - } else { - // 引数は:で区切られている - name = ref.substring(0, sep2); - Collections.addAll(param, ref.substring(sep2 + 1).split(":")); - } - - if ("x".equals(name)) { - String code = param.get(0); - try { - int codePoint = Integer.parseInt(code, 16); - buf.appendCodePoint(codePoint); - } catch (Exception e) { - logger.error("unknown character code: " + code); - } - } else if ("sup".equals(name) || "sub".equals(name)) { - _appendRawText(node, buf.toString()); - buf.delete(0, buf.length()); - Element elem = _appendElement(node, name); - _appendText(item, elem, param.get(0), linkBlock); - } else if ("ruby".equals(name)) { - _appendRawText(node, buf.toString()); - buf.delete(0, buf.length()); - _appendText(item, node, param.get(0), linkBlock); - if (param.size() > 1) { - Element elem = _appendElement(node, "sub"); - _appendText(item, elem, "(" + param.get(1) + ")", linkBlock); - } - } else if ("asin".equals(name)) { - String asin = param.get(0); - String url; - switch (asin.charAt(0)) { - case '4': - url = "http://www.amazon.co.jp/exec/obidos/ASIN/"; - break; - case '3': - url = "http://www.amazon.de/exec/obidos/ASIN/"; - break; - case '2': - url = "http://www.amazon.fr/exec/obidos/ASIN/"; - break; - case '1': - url = "http://www.amazon.co.uk/exec/obidos/ASIN/"; - break; - case '0': - default: - url = "http://www.amazon.com/exec/obidos/ASIN/"; - break; - } - buf.append(url + asin); - } else if ("flag".equals(name)) { - // ignore - } else if ("mex".equals(name)) { - buf.append("[" + param.get(0) + "]"); - } else if ("glyph".equals(name)) { - _appendRawText(node, buf.toString()); - buf.delete(0, buf.length()); - String glyph = param.get(0); - if (!glyphList.contains(glyph)) { - glyphList.add(glyph); - } - Element elem = _appendDataReference(node, "glyph-" + glyph, "inlineGraphic"); - _appendRawText(elem, "[グリフ:" + glyph + "]"); - } else if ("oline".equals(name)) { - _appendRawText(node, buf.toString()); - buf.delete(0, buf.length()); - String pstr = param.get(0); - int n = pstr.length(); - for (int j = 0; j < n; j++) { - int codePoint = pstr.codePointAt(j); - String hex = HexUtil.toHexString(codePoint, 6); - String fontname = "U" + hex + "-OL"; - File dir = new File(basedir, WDIC_GAIJI_DIR); - if (!dir.exists() && !dir.mkdirs()) { - logger.error("failed to create directories: " + dir.getPath()); - } - File file = new File(dir, fontname + ".xbm"); - if (!file.exists()) { - BufferedImage img = WdicUtil.toOverLineImage(codePoint); - try { - FontUtil.writeXbm(img, file); - } catch (IOException e) { - logger.error(e.getMessage(), e); - if (file.exists() && !file.delete()) { - logger.error("failed to delete file: " + file.getPath()); - } - } finally { - if (img != null) { - img.flush(); - } - } - } - String type = gaijiMap.get(fontname); - if (type == null) { - type = FontUtil.getFontType(codePoint); - gaijiMap.put(fontname, type); - } - _appendCharReference(node, fontname, type); - } - } else if ("uline".equals(name)) { - _appendRawText(node, buf.toString()); - buf.delete(0, buf.length()); - String pstr = param.get(0); - int n = pstr.length(); - for (int j = 0; j < n; j++) { - int codePoint = pstr.codePointAt(j); - String hex = HexUtil.toHexString(codePoint, 6); - String fontname = "U" + hex + "-UL"; - File dir = new File(basedir, WDIC_GAIJI_DIR); - if (!dir.exists() && !dir.mkdirs()) { - logger.error("failed to create directories: " + dir.getPath()); - } - File file = new File(dir, fontname + ".xbm"); - if (!file.exists()) { - BufferedImage img = WdicUtil.toUnderLineImage(codePoint); - try { - FontUtil.writeXbm(img, file); - } catch (IOException e) { - logger.error(e.getMessage(), e); - if (file.exists() && !file.delete()) { - logger.error("failed to delete file: " + file.getPath()); - } - } finally { - if (img != null) { - img.flush(); - } - } - } - String type = gaijiMap.get(fontname); - if (type == null) { - type = FontUtil.getFontType(codePoint); - gaijiMap.put(fontname, type); - } - _appendCharReference(node, fontname, type); - } - } else if ("sout".equals(name)) { - _appendRawText(node, buf.toString()); - buf.delete(0, buf.length()); - String pstr = param.get(0); - int n = pstr.length(); - for (int j = 0; j < n; j++) { - int codePoint = pstr.codePointAt(j); - String hex = HexUtil.toHexString(codePoint, 6); - String fontname = "U" + hex + "-LT"; - File dir = new File(basedir, WDIC_GAIJI_DIR); - if (!dir.exists() && !dir.mkdirs()) { - logger.error("failed to create directories: " + dir.getPath()); - } - File file = new File(dir, fontname + ".xbm"); - if (!file.exists()) { - BufferedImage img = WdicUtil.toLineThroughImage(codePoint); - try { - FontUtil.writeXbm(img, file); - } catch (IOException e) { - logger.error(e.getMessage(), e); - if (file.exists() && !file.delete()) { - logger.error("failed to delete file: " + file.getPath()); - } - } finally { - if (img != null) { - img.flush(); - } - } - } - String type = gaijiMap.get(fontname); - if (type == null) { - type = FontUtil.getFontType(codePoint); - gaijiMap.put(fontname, type); - } - _appendCharReference(node, fontname, type); - } - } else if ("date".equals(name) || "dt".equals(name)) { - _appendRawText(node, buf.toString()); - buf.delete(0, buf.length()); - buf.append(param.get(0)); - // if (param.size() > 1) { - // String type = param.get(1); - // if ("JC".equals(type)) { - // buf.append("[ユリウス歴]"); - // } else if ("GC".equals(type)) { - // buf.append("[グレゴリオ歴]"); - // } else if ("LC".equals(type)) { - // buf.append("[太陰太陽歴]"); - // } else { - // logger.error("unknown function parameter: " + itemId + - // " [" + name + ":" + type + "]"); - // } - // } - _appendText(item, node, buf.toString(), linkBlock); - buf.delete(0, buf.length()); - } else { - if (!"unit".equals(name)) { - logger.error("unknown function name: " + itemId + " [" + name + "]"); - } - _appendRawText(node, buf.toString()); - buf.delete(0, buf.length()); - _appendText(item, node, param.get(0), linkBlock); - } - } - _appendRawText(node, buf.toString()); - } - - /** - * 使用されている文字が有効かどうかを確認します。 - * - * @param node ノード - */ - private void _checkCharacter(final Node node) { - if (node.getNodeType() == Node.TEXT_NODE) { - Text text = (Text)node; - String str = text.getNodeValue(); - int len = str.length(); - int idx = 0; - while (idx < len) { - int codePoint = str.codePointAt(idx); - int cnt = Character.charCount(codePoint); - if (WordUtil.isValidChar(codePoint)) { - idx += cnt; - continue; - } - int end = idx + cnt; - Character.UnicodeBlock unicodeBlock = Character.UnicodeBlock.of(codePoint); - if (Character.UnicodeBlock.HEBREW.equals(unicodeBlock) - || Character.UnicodeBlock.ARABIC.equals(unicodeBlock) - || Character.UnicodeBlock.DEVANAGARI.equals(unicodeBlock)) { - while (end < len) { - int cp = str.codePointAt(end); - if (!unicodeBlock.equals(Character.UnicodeBlock.of(cp))) { - if (cp == ' ' && (end + 1) < len) { - cp = str.codePointAt(end + 1); - if (unicodeBlock.equals(Character.UnicodeBlock.of(cp))) { - end += 1 + Character.charCount(cp); - continue; - } - } - break; - } - end += Character.charCount(cp); - } - } - if (end > idx + cnt) { - String s = str.substring(idx, end); - Node parent = text.getParentNode(); - text = text.splitText(idx); - text.deleteData(0, end - idx); - Element nobr = text.getOwnerDocument().createElement("nobr"); - parent.insertBefore(nobr, text); - - int n = s.length(); - StringBuilder buf = new StringBuilder(); - for (int i = 0; i < n; i++) { - if (i > 0) { - buf.append("_"); - } - int cp = s.codePointAt(i); - buf.append("U").append(HexUtil.toHexString(cp, 6)); - i = i + Character.charCount(cp) - 1; - } - String name = buf.toString() + "-N"; - File dir = new File(basedir, WDIC_GAIJI_DIR); - if (!dir.exists() && !dir.mkdirs()) { - logger.error("failed to create directories: " + dir.getPath()); - } - String[] files = - dir.list(FileFilterUtils.andFileFilter( - FileFilterUtils.prefixFileFilter(name), - FileFilterUtils.suffixFileFilter(".xbm"))); - if (ArrayUtils.isEmpty(files)) { - if (unicodeBlock == null) { - logger.info("unsupported characters:" - + " '" + s + "'" - + " UNKNOWN_UNICODE_BLOCK"); - } else { - logger.info("unsupported characters:" - + " '" + s + "'" - + " " + unicodeBlock.toString()); - } - BufferedImage img = WdicUtil.toImage(s); - int height = img.getHeight(); - int width = 8; - byte[][] b = FontUtil.split(img, width); - img.flush(); - n = b.length; - for (int i = 0; i < n; i++) { - File file = new File(dir, name + i + ".xbm"); - try { - FontUtil.writeXbm(b[i], width, height, file); - } catch (IOException e) { - logger.error(e.getMessage(), e); - if (file.exists() && !file.delete()) { - logger.error("failed to delete file: " + file.getPath()); - } - } - } - } else { - n = files.length; - } - for (int i = 0; i < n; i++) { - String name0 = name + i; - if (!gaijiMap.containsKey(name0)) { - gaijiMap.put(name0, "narrow"); - } - _appendCharReference(nobr, name0, "narrow"); - } - } else { - String hex = HexUtil.toHexString(codePoint, 6); - String name = "U" + hex; - File dir = new File(basedir, WDIC_GAIJI_DIR); - if (!dir.exists() && !dir.mkdirs()) { - logger.error("failed to create directories: " + dir.getPath()); - } - File file = new File(dir, name + ".xbm"); - if (!file.exists()) { - String s = String.valueOf(Character.toChars(codePoint)); - if (unicodeBlock == null) { - logger.info("unsupported characters:" - + " '" + s + "'" - + " UNKNOWN_UNICODE_BLOCK"); - } else { - logger.info("unsupported character:" - + " [U+" + hex + "]" - + " '" + s + "'" - + " " + unicodeBlock.toString()); - } - BufferedImage img = WdicUtil.toImage(codePoint); - try { - FontUtil.writeXbm(img, file); - } catch (IOException e) { - logger.error(e.getMessage(), e); - if (file.exists() && !file.delete()) { - logger.error("failed to delete file: " + file.getPath()); - } - } finally { - if (img != null) { - img.flush(); - } - } - } - String type = gaijiMap.get(name); - if (type == null) { - type = FontUtil.getFontType(codePoint); - gaijiMap.put(name, type); - } - Node parent = text.getParentNode(); - text = text.splitText(idx); - text.deleteData(0, cnt); - Element elem = text.getOwnerDocument().createElement("char"); - elem.setAttribute("name", name); - elem.setAttribute("type", type); - parent.insertBefore(elem, text); - } - str = text.getNodeValue(); - len = str.length(); - idx = 0; - } - } - if (node.hasChildNodes()) { - NodeList nlist = node.getChildNodes(); - int len = nlist.getLength(); - for (int i = 0; i < len; i++) { - Node child = nlist.item(i); - _checkCharacter(child); - int n = nlist.getLength(); - if (len < n) { - i += n - len; - len = n; - } - } - } - } } - // end of Wdic2Xml.java diff --git a/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicGaijiNode.java b/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicGaijiNode.java new file mode 100644 index 0000000..50e4160 --- /dev/null +++ b/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicGaijiNode.java @@ -0,0 +1,323 @@ +package io.github.eb4j.xml2eb.converter.wdic; + +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.IOException; +import java.util.Map; + +import org.apache.commons.io.FilenameUtils; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.w3c.dom.Text; + +import org.apache.commons.io.filefilter.FileFilterUtils; +import org.apache.commons.lang.ArrayUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import io.github.eb4j.xml2eb.util.FontUtil; +import io.github.eb4j.xml2eb.util.HexUtil; +import io.github.eb4j.xml2eb.util.WordUtil; + + +/** + * Gaiji handler class. + * Created by miurahr on 16/07/17. + */ +class WdicGaijiNode { + + private Logger logger = LoggerFactory.getLogger(getClass()); + private static final String WDIC_GAIJI_DIR = "gaiji"; + + private File gaijidir; + private Map gaijiMap; + + WdicGaijiNode(final File gaijidir, final Map gaijiMap) { + this.gaijidir = gaijidir; + this.gaijiMap = gaijiMap; + } + + /** + * 外字データノードを作成します。 + * + * @param subbook subbookノード + */ + void makeFontNode(final Element subbook) { + Element font = appendElement(subbook, "font"); + for (Map.Entry entry : gaijiMap.entrySet()) { + String name = entry.getKey(); + String type = entry.getValue(); + File file = new File(gaijidir, name + ".xbm"); + if (!file.exists()) { + logger.error("file not found: " + file.getPath()); + } + String path = FilenameUtils.concat(WDIC_GAIJI_DIR, file.getName()); + Element charElem = appendElement(font, "char"); + charElem.setAttribute("name", name); + charElem.setAttribute("type", type); + Element dataElem = appendElement(charElem, "data"); + dataElem.setAttribute("size", "16"); + dataElem.setAttribute("src", path); + } + } + + /** + * Indicate logic type for gaiji style. + */ + private enum GaijiStyle { underLine, overLine, lineThrough } + + void addOverLineGaijiFont(final Node node, final String pstr) { + addGaijiFont2(node, pstr, GaijiStyle.overLine); + } + + void addUnderLineGaijiFont(final Node node, final String pstr) { + addGaijiFont2(node, pstr, GaijiStyle.underLine); + } + + void addLineThroughGaijiFont(final Node node, final String pstr) { + addGaijiFont2(node, pstr, GaijiStyle.lineThrough); + } + + /** + * 使用されている文字が有効かどうかを確認します。 + * + * @param node ノード + */ + void checkCharacter(final Node node) { + if (node.getNodeType() == Node.TEXT_NODE) { + checkTextNodeGaiji(node); + } + if (node.hasChildNodes()) { + NodeList nlist = node.getChildNodes(); + int len = nlist.getLength(); + for (int i = 0; i < len; i++) { + Node child = nlist.item(i); + checkCharacter(child); + int n = nlist.getLength(); + if (len < n) { + i += n - len; + len = n; + } + } + } + } + + private void addGaijiFont2(final Node node, final String pstr, final GaijiStyle style) { + int n = pstr.length(); + for (int j = 0; j < n; j++) { + int codePoint = pstr.codePointAt(j); + String hex = HexUtil.toHexString(codePoint, 6); + String fontname = "U" + hex + "-LT"; + if (!gaijidir.exists() && !gaijidir.mkdirs()) { + logger.error("failed to create directories: " + gaijidir.getPath()); + } + File file = new File(gaijidir, fontname + ".xbm"); + if (!file.exists()) { + BufferedImage img; + switch (style) { + case overLine: + img = WdicUtil.toOverLineImage(codePoint); + break; + case underLine: + img = WdicUtil.toUnderLineImage(codePoint); + break; + case lineThrough: + img = WdicUtil.toLineThroughImage(codePoint); + break; + default: + // don't come here + // FIXME dummy + img = WdicUtil.toLineThroughImage(codePoint); + break; + } + try { + FontUtil.writeXbm(img, file); + } catch (IOException e) { + logger.error(e.getMessage(), e); + if (file.exists() && !file.delete()) { + logger.error("failed to delete file: " + file.getPath()); + } + } finally { + if (img != null) { + img.flush(); + } + } + } + String type = gaijiMap.get(fontname); + if (type == null) { + type = FontUtil.getFontType(codePoint); + gaijiMap.put(fontname, type); + } + _appendCharReference(node, fontname, type); + } + } + + private void checkTextNodeGaiji(final Node node) { + Text text = (Text)node; + String str = text.getNodeValue(); + int len = str.length(); + int idx = 0; + while (idx < len) { + int codePoint = str.codePointAt(idx); + int cnt = Character.charCount(codePoint); + if (WordUtil.isValidChar(codePoint)) { + idx += cnt; + continue; + } + int end = idx + cnt; + Character.UnicodeBlock unicodeBlock = Character.UnicodeBlock.of(codePoint); + if (Character.UnicodeBlock.HEBREW.equals(unicodeBlock) + || Character.UnicodeBlock.ARABIC.equals(unicodeBlock) + || Character.UnicodeBlock.DEVANAGARI.equals(unicodeBlock)) { + while (end < len) { + int cp = str.codePointAt(end); + if (!unicodeBlock.equals(Character.UnicodeBlock.of(cp))) { + if (cp == ' ' && (end + 1) < len) { + cp = str.codePointAt(end + 1); + if (unicodeBlock.equals(Character.UnicodeBlock.of(cp))) { + end += 1 + Character.charCount(cp); + continue; + } + } + break; + } + end += Character.charCount(cp); + } + } + if (end > idx + cnt) { + String s = str.substring(idx, end); + Node parent = text.getParentNode(); + text = text.splitText(idx); + text.deleteData(0, end - idx); + Element nobr = text.getOwnerDocument().createElement("nobr"); + parent.insertBefore(nobr, text); + + int n = s.length(); + StringBuilder buf = new StringBuilder(); + for (int i = 0; i < n; i++) { + if (i > 0) { + buf.append("_"); + } + int cp = s.codePointAt(i); + buf.append("U").append(HexUtil.toHexString(cp, 6)); + i = i + Character.charCount(cp) - 1; + } + String name = buf.toString() + "-N"; + String[] files = + gaijidir.list(FileFilterUtils.andFileFilter( + FileFilterUtils.prefixFileFilter(name), + FileFilterUtils.suffixFileFilter(".xbm"))); + if (ArrayUtils.isEmpty(files)) { + if (unicodeBlock == null) { + logger.info("unsupported characters:" + + " '" + s + "'" + + " UNKNOWN_UNICODE_BLOCK"); + } else { + logger.info("unsupported characters:" + + " '" + s + "'" + + " " + unicodeBlock.toString()); + } + BufferedImage img = WdicUtil.toImage(s); + int height = img.getHeight(); + int width = 8; + byte[][] b = FontUtil.split(img, width); + img.flush(); + n = b.length; + for (int i = 0; i < n; i++) { + File file = new File(gaijidir, name + i + ".xbm"); + try { + FontUtil.writeXbm(b[i], width, height, file); + } catch (IOException e) { + logger.error(e.getMessage(), e); + if (file.exists() && !file.delete()) { + logger.error("failed to delete file: " + file.getPath()); + } + } + } + } else { + n = files.length; + } + for (int i = 0; i < n; i++) { + String name0 = name + i; + if (!gaijiMap.containsKey(name0)) { + gaijiMap.put(name0, "narrow"); + } + _appendCharReference(nobr, name0, "narrow"); + } + } else { + String hex = HexUtil.toHexString(codePoint, 6); + String name = "U" + hex; + File file = new File(gaijidir, name + ".xbm"); + if (!file.exists()) { + String s = String.valueOf(Character.toChars(codePoint)); + if (unicodeBlock == null) { + logger.info("unsupported characters:" + + " '" + s + "'" + + " UNKNOWN_UNICODE_BLOCK"); + } else { + logger.info("unsupported character:" + + " [U+" + hex + "]" + + " '" + s + "'" + + " " + unicodeBlock.toString()); + } + BufferedImage img = WdicUtil.toImage(codePoint); + try { + FontUtil.writeXbm(img, file); + } catch (IOException e) { + logger.error(e.getMessage(), e); + if (file.exists() && !file.delete()) { + logger.error("failed to delete file: " + file.getPath()); + } + } finally { + if (img != null) { + img.flush(); + } + } + } + String type = gaijiMap.get(name); + if (type == null) { + type = FontUtil.getFontType(codePoint); + gaijiMap.put(name, type); + } + Node parent = text.getParentNode(); + text = text.splitText(idx); + text.deleteData(0, cnt); + Element elem = text.getOwnerDocument().createElement("char"); + elem.setAttribute("name", name); + elem.setAttribute("type", type); + parent.insertBefore(elem, text); + } + str = text.getNodeValue(); + len = str.length(); + idx = 0; + } + } + + /** + * 要素を追加します。 + * + * @param node 要素を追加するノード + * @param tag 要素のタグ名称 + * @return 追加された要素 + */ + private Element appendElement(final Node node, final String tag) { + Element elem = node.getOwnerDocument().createElement(tag); + return (Element) node.appendChild(elem); + } + + /** + * 外字参照要素を追加します。 + * + * @param node 外字参照要素を追加するノード + * @param name name属性値 + * @param type type属性値 + * @return 追加された参照要素 + */ + private Element _appendCharReference(final Node node, final String name, final String type) { + Element elem = appendElement(node, "char"); + elem.setAttribute("name", name); + elem.setAttribute("type", type); + return elem; + } +} diff --git a/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicGraphicNode.java b/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicGraphicNode.java new file mode 100644 index 0000000..e07b42f --- /dev/null +++ b/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicGraphicNode.java @@ -0,0 +1,110 @@ +package io.github.eb4j.xml2eb.converter.wdic; + + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +import org.w3c.dom.Element; + +import org.apache.commons.io.FilenameUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * Wdic graphic node generation class. + * Created by miurahr on 16/07/17. + */ +public class WdicGraphicNode { + + private Logger logger = LoggerFactory.getLogger(getClass()); + private static final String WDIC_GLYPH_DIR = "glyph"; + private static final String WDIC_PLUGIN_DIR = "plugin"; + private static final String WDIC_TABLE_DIR = "table"; + + private WdicNode wdicNode; + private File plugin; + private File glyph; + private File table; + + /** + * グリフリスト + */ + private List glyphList = new ArrayList<>(); + /** + * 表リスト + */ + private List tableList = new ArrayList<>(); + + + /** + * Wdic Graphic node generation class. + * @param wdicNode parent object. + * @param basedir project base directory. + */ + public WdicGraphicNode(final WdicNode wdicNode, final File basedir) { + this.wdicNode = wdicNode; + plugin = new File(basedir, WDIC_PLUGIN_DIR); + glyph = new File(basedir, WDIC_GLYPH_DIR); + table = new File(basedir, WDIC_TABLE_DIR); + } + + void addTableItem(final String item) { + if (!tableList.contains(item)) { + tableList.add(item); + } + } + void addGlyphItem(final String item) { + if (!glyphList.contains(item)) { + glyphList.add(item); + } + } + + /** + * 画像データノードを作成します。 + * + * @param subbook subbookノード + */ + void makeGraphicNode(final Element subbook) { + Element graphic = wdicNode.appendElement(subbook, "graphic"); + for (String name : wdicNode.getPluginMapKeySet()) { + if (name.endsWith(".jpg")) { + File jpg = new File(plugin, name); + if (!jpg.exists()) { + logger.error("file not found: " + jpg.getPath()); + } + String path = FilenameUtils.concat(WDIC_PLUGIN_DIR, name); + wdicNode.appendData(graphic, name, path, "jpg"); + } else if (name.endsWith(".png")) { + String bmpName = name + ".bmp"; + File bmp = new File(plugin, bmpName); + if (!bmp.exists()) { + logger.error("file not found: " + bmp.getPath()); + } + String path = FilenameUtils.concat(WDIC_PLUGIN_DIR, bmpName); + wdicNode.appendData(graphic, name, path, "bmp"); + } + } + + for (String name : glyphList) { + String bmpName = name + ".50px.png.bmp"; + File bmp = new File(glyph, bmpName); + if (!bmp.exists()) { + logger.error("file not found: " + bmp.getPath()); + } + String path = FilenameUtils.concat(WDIC_GLYPH_DIR, bmpName); + wdicNode.appendData(graphic, "glyph-" + name, path, "bmp"); + } + + for (String name : tableList) { + name += ".bmp"; + File bmp = new File(table, name); + if (!bmp.exists()) { + logger.error("file not found: " + bmp.getPath()); + } + String path = FilenameUtils.concat(WDIC_TABLE_DIR, name); + wdicNode.appendData(graphic, name, path, "bmp"); + } + } +} diff --git a/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicMenuNode.java b/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicMenuNode.java new file mode 100644 index 0000000..991c33e --- /dev/null +++ b/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicMenuNode.java @@ -0,0 +1,516 @@ +package io.github.eb4j.xml2eb.converter.wdic; + +import java.text.MessageFormat; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.w3c.dom.Element; +import org.w3c.dom.Node; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * Menu Node creation class. + * + * Created by miurahr on 16/07/17. + */ +public class WdicMenuNode { + + private Logger logger = LoggerFactory.getLogger(getClass()); + private WdicNode wdicNode; + private Map> pluginMap; + private WdicMan manual; + private WdicGroupList groupList; + private WdicDirList dirList = null; + + /** + * Genrate menu node. + * @param node to add menu as sub node. + * @param pmap pluginMap. + * @param manual manual file. + * @param list group list. + * @param dir directory list. + */ + public WdicMenuNode(final WdicNode node, final Map> pmap, + final WdicMan manual, final WdicGroupList list, final WdicDirList dir) { + wdicNode = node; + this.pluginMap = pmap; + this.manual = manual; + this.groupList = list; + this.dirList = dir; + } + + /** + * メニューノードを作成します。 + * + * @param menu メニューノード + */ + public void makeMenuNode(final Element menu) { + Element layerElem = _appendLayer(menu, "MENU:top"); + + Element refElem = wdicNode.appendIdReference(layerElem, "MENU:manual"); + String title = groupList.getName() + " " + groupList.getEdition(); + wdicNode.appendRawText(refElem, title); + wdicNode.appendNewLine(layerElem); + + refElem = wdicNode.appendIdReference(layerElem, "MENU:bib"); + wdicNode.appendRawText(refElem, "基礎文献"); + wdicNode.appendNewLine(layerElem); + + refElem = wdicNode.appendIdReference(layerElem, "DIR:/"); + wdicNode.appendRawText(refElem, "分類別収録語一覧"); + wdicNode.appendNewLine(layerElem); + + refElem = wdicNode.appendIdReference(layerElem, "MENU:group"); + wdicNode.appendRawText(refElem, "グループ別収録語一覧"); + wdicNode.appendNewLine(layerElem); + + refElem = wdicNode.appendIdReference(layerElem, "MENU:plugin"); + wdicNode.appendRawText(refElem, "グループ別プラグイン一覧"); + wdicNode.appendNewLine(layerElem); + + refElem = wdicNode.appendIdReference(layerElem, "MENU:image"); + wdicNode.appendRawText(refElem, "画像プラグイン一覧"); + wdicNode.appendNewLine(layerElem); + + refElem = wdicNode.appendIdReference(layerElem, "MENU:sound"); + wdicNode.appendRawText(refElem, "音声プラグイン一覧"); + wdicNode.appendNewLine(layerElem); + + refElem = wdicNode.appendIdReference(layerElem, "MENU:text"); + wdicNode.appendRawText(refElem, "文書プラグイン一覧"); + wdicNode.appendNewLine(layerElem); + + logger.debug(" manual"); + _createManualLayer(menu); + logger.debug(" bibliography"); + _createBibliographyLayer(menu); + logger.debug(" directory"); + _createDirectoryLayer(menu); + logger.debug(" group"); + _createGroupLayer(menu); + logger.debug(" plugin list"); + _createPluginLayer(menu); + logger.debug(" graphic list"); + _createImageLayer(menu); + logger.debug(" sound list"); + _createSoundLayer(menu); + logger.debug(" document list"); + _createTextLayer(menu); + } + + /** + * マニュアルメニュー階層を作成します。 + * + * @param menu メニューノード + */ + private void _createManualLayer(final Element menu) { + Element layerElem = _appendLayer(menu, "MENU:manual"); + String[] sec = manual.getSections(); + int len = sec.length; + for (int i = 0; i < len; i++) { + Element refElem = wdicNode.appendIdReference(layerElem, "MENU:manual:" + sec[i]); + wdicNode.appendRawText(refElem, sec[i]); + wdicNode.appendNewLine(layerElem); + String prev = null; + String next = null; + if (i > 0) { + prev = sec[i - 1]; + } + if (i < (len - 1)) { + next = sec[i + 1]; + } + _createManualLayer(menu, sec[i], prev, next); + } + } + + /** + * マニュアルメニュー階層を作成します。 + * + * @param menu メニューノード + * @param sec セクション + * @para prev 前セクション + * @para next 次セクション + */ + private void _createManualLayer(final Element menu, final String sec, final String prev, + final String next) { + Element layerElem = _appendLayer(menu, "MENU:manual:" + sec); + Element keyElem = wdicNode.appendElement(layerElem, "key"); + wdicNode.appendRawText(keyElem, sec); + + Element indent1Elem = wdicNode.appendElement(layerElem, "indent"); + Element indent2Elem = null; + Element indent3Elem = null; + Element indentElem = indent1Elem; + for (String str : manual.getContents(sec)) { + if (str.length() > 0) { + if (str.startsWith("\t")) { + if (indent2Elem == null) { + indent2Elem = wdicNode.appendElement(indent1Elem, "indent"); + } + if (str.startsWith("\t\t")) { + if (indent3Elem == null) { + indent3Elem = wdicNode.appendElement(indent2Elem, "indent"); + } + str = str.substring(2); + indentElem = indent3Elem; + } else { + str = str.substring(1); + indentElem = indent2Elem; + indent3Elem = null; + } + } else { + indentElem = indent1Elem; + indent2Elem = null; + indent3Elem = null; + } + wdicNode.appendRawText(indentElem, str); + } + wdicNode.appendNewLine(indentElem); + } + + wdicNode.appendNewLine(layerElem); + + if (prev != null) { + wdicNode.appendRawText(layerElem, "\u2190 "); + Element refElem = wdicNode.appendIdReference(layerElem, "MENU:manual:" + prev); + wdicNode.appendRawText(refElem, prev); + wdicNode.appendRawText(layerElem, " | "); + } + if (next != null) { + wdicNode.appendRawText(layerElem, "\u2192 "); + Element refElem = wdicNode.appendIdReference(layerElem, "MENU:manual:" + next); + wdicNode.appendRawText(refElem, next); + wdicNode.appendRawText(layerElem, " | "); + } + wdicNode.appendRawText(layerElem, "\u2191 "); + String title = groupList.getName() + " " + groupList.getEdition(); + Element refElem = wdicNode.appendIdReference(layerElem, "MENU:manual"); + wdicNode.appendRawText(refElem, title); + wdicNode.appendNewLine(layerElem); + } + + /** + * 基礎文献メニュー階層を作成します。 + * + * @param menu メニューノード + */ + private void _createBibliographyLayer(final Element menu) { + Element layerElem = _appendLayer(menu, "MENU:bib"); + Collection groups = groupList.getGroups(); + for (WdicGroup group : groups) { + Element refElem = wdicNode.appendIdReference(layerElem, "MENU:bib:" + + group.getGroupId()); + wdicNode.appendRawText(refElem, group.getGroupName() + "用語の基礎知識"); + wdicNode.appendNewLine(layerElem); + _createBibliographyLayer(menu, group); + } + } + + /** + * 基礎文献メニュー階層を作成します。 + * + * @param menu メニューノード + * @param group 辞書グループ + */ + private void _createBibliographyLayer(final Element menu, final WdicGroup group) { + Element layerElem = _appendLayer(menu, "MENU:bib:" + group.getGroupId()); + Element keyElem = wdicNode.appendElement(layerElem, "key"); + wdicNode.appendRawText(keyElem, group.getGroupName() + "用語の基礎知識"); + Element indentElem = wdicNode.appendElement(layerElem, "indent"); + for (String str: group.getWdicBib().getBibliography()) { + if (str.length() > 0) { + wdicNode.appendRawText(indentElem, str); + } + wdicNode.appendNewLine(indentElem); + } + } + + /** + * 分類一覧メニュー階層を作成します。 + * + * @param menu メニューノード + */ + private void _createDirectoryLayer(final Element menu) { + Element layerElem = _appendLayer(menu, "DIR:/"); + for (String dir: dirList.getChildren("/")) { + wdicNode.appendRawText(layerElem, "\u21d2 "); + Element refElem = wdicNode.appendIdReference(layerElem, "DIR:" + dir); + wdicNode.appendRawText(refElem, dirList.getName(dir)); + wdicNode.appendNewLine(layerElem); + _createDirectoryLayer(menu, dir); + } + } + + /** + * 分類一覧メニュー階層を作成します。 + * + * @param menu メニューノード + * @param dir 分類 + */ + private void _createDirectoryLayer(final Element menu, final String dir) { + Element layerElem = _appendLayer(menu, "DIR:" + dir); + Element refElem = wdicNode.appendIdReference(layerElem, "DIR:/"); + wdicNode.appendRawText(refElem, "分類"); + String[] dirs; + if (dir.startsWith("/")) { + dirs = dir.substring(1).split("/"); + } else { + dirs = dir.split("/"); + } + String key = ""; + int len = dirs.length; + for (int i = 0; i < len - 1; i++) { + wdicNode.appendRawText(layerElem, " > "); + key += "/" + dirs[i]; + refElem = wdicNode.appendIdReference(layerElem, "DIR:" + key); + wdicNode.appendRawText(refElem, dirList.getName(key)); + } + wdicNode.appendRawText(layerElem, " > " + dirList.getName(dir)); + wdicNode.appendNewLine(layerElem); + + List children = dirList.getChildren(dir); + len = children.size(); + int cnt = len; + for (int i = 0; i < len; i++) { + String child = children.get(i); + wdicNode.appendRawText(layerElem, "\u21d2 "); + if (dirList.hasAlias(child)) { + String alias = dirList.getAlias(child); + refElem = wdicNode.appendIdReference(layerElem, "DIR:" + alias); + wdicNode.appendRawText(refElem, dirList.getName(child) + "@"); + wdicNode.appendNewLine(layerElem); + } else { + refElem = wdicNode.appendIdReference(layerElem, "DIR:" + child); + wdicNode.appendRawText(refElem, dirList.getName(child)); + wdicNode.appendNewLine(layerElem); + _createDirectoryLayer(menu, child); + } + } + + List items = groupList.getWdicItem(dir); + len = items.size(); + cnt += len; + for (int i = 0; i < len; i++) { + WdicItem item = items.get(i); + wdicNode.appendRawText(layerElem, "\u2192 "); + String head = item.getHead(); + String grpId = item.getWdic().getGroupId(); + String id = "WDIC:" + grpId + ":" + head; + refElem = wdicNode.appendIdReference(layerElem, id); + String gname = item.getWdic().getGroupName(); + String part = item.getWdic().getPartName(); + String title = head + " 《" + gname + ":" + part + "》"; + wdicNode.appendRawText(refElem, title); + wdicNode.appendNewLine(layerElem); + } + + if (cnt == 0) { + wdicNode.appendRawText(layerElem, "(該当単語なし)"); + wdicNode.appendNewLine(layerElem); + } + } + + /** + * グループ一覧メニュー階層を作成します。 + * + * @param menu メニューノード + */ + private void _createGroupLayer(final Element menu) { + String id = "MENU:group"; + Element layerElem = _appendLayer(menu, id); + for (WdicGroup group : groupList.getGroups()) { + Element refElem = wdicNode.appendIdReference(layerElem, id + ":" + group.getGroupId()); + wdicNode.appendRawText(refElem, MessageFormat.format("{0}用語の基礎知識", + group.getGroupName())); + wdicNode.appendNewLine(layerElem); + _createGroupLayer(menu, group); + } + } + + /** + * グループ一覧メニュー階層を作成します。 + * + * @param menu メニューノード + * @param group 辞書グループ + */ + private void _createGroupLayer(final Element menu, final WdicGroup group) { + String id = "MENU:group:" + group.getGroupId(); + Element layerElem = _appendLayer(menu, id); + Element refElem = wdicNode.appendIdReference(layerElem, "MENU:group"); + wdicNode.appendRawText(refElem, "グループ"); + wdicNode.appendRawText(layerElem, " > " + group.getGroupName()); + wdicNode.appendNewLine(layerElem); + for (Wdic wdic : group.getWdics()) { + String name = wdic.getPartName() + "編"; + refElem = wdicNode.appendIdReference(layerElem, id + ":" + wdic.getPartId()); + wdicNode.appendRawText(refElem, name); + wdicNode.appendNewLine(layerElem); + _createGroupLayer(menu, wdic); + } + } + + /** + * グループ一覧メニュー階層を作成します。 + * + * @param menu メニューノード + * @param wdic 辞書 + */ + private void _createGroupLayer(final Element menu, final Wdic wdic) { + String grpId = wdic.getGroupId(); + String partId = wdic.getPartId(); + String id = "MENU:group:" + grpId + ":" + partId; + Element layerElem = _appendLayer(menu, id); + Element refElem = wdicNode.appendIdReference(layerElem, "MENU:group"); + wdicNode.appendRawText(refElem, "グループ"); + wdicNode.appendRawText(layerElem, " > "); + refElem = wdicNode.appendIdReference(layerElem, "MENU:group:" + grpId); + wdicNode.appendRawText(refElem, wdic.getGroupName()); + wdicNode.appendRawText(layerElem, " > " + wdic.getPartName() + "編"); + wdicNode.appendNewLine(layerElem); + for (WdicItem item : wdic.getWdicItems()) { + if (item.isAlias()) { + String name = item.getHead(); + wdicNode.appendRawText(layerElem, name); + List yomiList = item.getYomi(); + if (yomiList.isEmpty()) { + logger.info("yomi not defined: " + grpId + ":" + partId + ":" + item.getHead()); + } else { + String yomi = yomiList.get(0); + wdicNode.appendText(item, layerElem, " [" + yomi + "]"); + } + wdicNode.appendRawText(layerElem, " \u21d2 "); + name = item.getRealName(); + String refid = WdicUtil.unescape(name); + id = "WDIC:" + grpId + ":" + refid; + refElem = wdicNode.appendIdReference(layerElem, id); + wdicNode.appendText(item, refElem, name); + wdicNode.appendNewLine(layerElem); + } else { + String name = item.getHead(); + id = "WDIC:" + grpId + ":" + name; + refElem = wdicNode.appendIdReference(layerElem, id); + wdicNode.appendRawText(refElem, name); + List yomiList = item.getYomi(); + if (yomiList.isEmpty()) { + logger.info("yomi not defined: " + grpId + ":" + partId + ":" + item.getHead()); + } else { + String yomi = yomiList.get(0); + wdicNode.appendText(item, layerElem, " [" + yomi + "]"); + } + wdicNode.appendNewLine(layerElem); + } + } + } + + /** + * プラグイン一覧メニュー階層を作成します。 + * + * @param menu メニューノード + */ + private void _createPluginLayer(final Element menu) { + String id = "MENU:plugin"; + Element layerElem = _appendLayer(menu, id); + for (WdicGroup group: groupList.getGroups()) { + Element refElem = wdicNode.appendIdReference(layerElem, id + ":" + group.getGroupId()); + wdicNode.appendRawText(refElem, group.getGroupName() + "用語の基礎知識"); + wdicNode.appendNewLine(layerElem); + _createPluginLayer(menu, group); + } + } + + /** + * プラグイン一覧メニュー階層を作成します。 + * + * @param menu メニューノード + * @param group 辞書グループ + */ + private void _createPluginLayer(final Element menu, final WdicGroup group) { + String id = "MENU:plugin:" + group.getGroupId(); + Element layerElem = _appendLayer(menu, id); + Element refElem = wdicNode.appendIdReference(layerElem, "MENU:plugin"); + wdicNode.appendRawText(refElem, "グループ"); + wdicNode.appendRawText(layerElem, " > " + group.getGroupName()); + wdicNode.appendNewLine(layerElem); + + pluginMap.entrySet().stream() + .filter(entry -> entry.getValue().stream() + .anyMatch(v -> group.equals(v.getWdic().getGroup()))) + .forEach(entry -> { + String name = entry.getKey(); + wdicNode.appendRawText(wdicNode.appendIdReference(layerElem, "PLUGIN:" + name), + name); + wdicNode.appendNewLine(layerElem); + }); + } + + /** + * 画像一覧メニュー階層を作成します。 + * + * @param menu メニューノード + */ + private void _createImageLayer(final Element menu) { + Element layerElem = _appendLayer(menu, "MENU:image"); + List ext = Arrays.asList(".jpg", ".png"); + pluginMap.keySet().stream() + .filter(v -> ext.stream().anyMatch(s -> s.endsWith(v))) + .forEach(name -> { + Element refElem = wdicNode.appendIdReference(layerElem, "PLUGIN:" + name); + wdicNode.appendRawText(refElem, name); + wdicNode.appendNewLine(layerElem); + }); + } + + /** + * 音声一覧メニュー階層を作成します。 + * + * @param menu メニューノード + */ + private void _createSoundLayer(final Element menu) { + Element layerElem = _appendLayer(menu, "MENU:sound"); + List ext = Arrays.asList(".mp3", ".ogg", ".mid"); + pluginMap.keySet().stream() + .filter(v -> ext.stream().anyMatch(s -> s.endsWith(v))) + .forEach(name -> { + Element refElem = wdicNode.appendIdReference(layerElem, "PLUGIN:" + name); + wdicNode.appendRawText(refElem, name); + wdicNode.appendNewLine(layerElem); + }); + } + + /** + * 文書一覧メニュー階層を作成します。 + * + * @param menu メニューノード + */ + private void _createTextLayer(final Element menu) { + Element layerElem = _appendLayer(menu, "MENU:text"); + List ext = Arrays.asList(".jpg", ".png", ".mp3", ".ogg", ".mid"); + pluginMap.keySet().stream() + .filter(v -> ext.stream().noneMatch(s -> s.endsWith(v))) + .forEach(name -> { + Element refElem = wdicNode.appendIdReference(layerElem, "PLUGIN:" + name); + wdicNode.appendRawText(refElem, name); + wdicNode.appendNewLine(layerElem); + }); + } + + /** + * レイヤ要素を追加します。 + * + * @param node レイヤ要素を追加するノード + * @param id ID属性値 + * @return 追加されたレイヤ要素 + */ + private Element _appendLayer(final Node node, final String id) { + Element elem = wdicNode.appendElement(node, "layer"); + elem.setAttribute("id", id); + return elem; + } + +} diff --git a/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicNode.java b/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicNode.java new file mode 100644 index 0000000..889cc7b --- /dev/null +++ b/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicNode.java @@ -0,0 +1,1316 @@ +package io.github.eb4j.xml2eb.converter.wdic; + + +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Stack; +import java.util.TreeMap; + +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.Text; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.LineIterator; +import org.apache.commons.lang.CharUtils; +import org.apache.commons.lang.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import io.github.eb4j.xml2eb.util.BmpUtil; +import io.github.eb4j.xml2eb.util.WordUtil; + + +/** + * Node generation class. + * Created by miurahr on 16/07/17. + */ +public class WdicNode { + + private Logger logger = LoggerFactory.getLogger(getClass()); + + private static final String ENCODING = "UTF-8"; + private static final String WDIC_PLUGIN_DIR = "plugin"; + private static final String WDIC_GAIJI_DIR = "gaiji"; + private static final String WDIC_TABLE_DIR = "table"; + + /** + * ベースディレクトリ + */ + private File basedir = null; + + /** + * WDICグループリスト + */ + private WdicGroupList groupList = null; + /** + * WDIC分類リスト + */ + private WdicDirList dirList = null; + /** + * WDICマニュアル + */ + private WdicMan manual = null; + /** + * プラグインマップ + */ + private Map> pluginMap = null; + /** + * 外字マップ + */ + private Map gaijiMap = null; + + private WdicMenuNode wdicMenuNode; + private WdicGraphicNode wdicGraphicNode; + private WdicSoundNode wdicSoundNode; + private WdicGaijiNode wdicGaijiNode; + + + /** + * Node handler main class. + * @param basedir project base directory. + */ + public WdicNode(final File basedir) { + this.basedir = basedir; + File file = new File(basedir, "FILE.GL"); + this.groupList = new WdicGroupList(file); + file = new File(basedir, "DIR.LST"); + this.dirList = new WdicDirList(file); + file = new File(basedir, "WDICALL.MAN"); + this.manual = new WdicMan(file); + this.pluginMap = groupList.getPluginMap(); + this.gaijiMap = new TreeMap<>(); + + wdicMenuNode = new WdicMenuNode(this, pluginMap, manual, groupList, dirList); + wdicGraphicNode = new WdicGraphicNode(this, basedir); + wdicSoundNode = new WdicSoundNode(this, basedir); + + File dir = new File(basedir, WDIC_GAIJI_DIR); + if (!dir.exists() && !dir.mkdirs()) { + logger.error("failed to create directories: " + dir.getPath()); + } + this.wdicGaijiNode = new WdicGaijiNode(dir, gaijiMap); + } + + Set getPluginMapKeySet() { + return pluginMap.keySet(); + } + + void makeNodes(final Element subbook) { + logger.info("create content node..."); + makeContentNode(subbook); + logger.info("create graphic node..."); + wdicGraphicNode.makeGraphicNode(subbook); + logger.info("create sound node..."); + wdicSoundNode.makeSoundNode(subbook); + logger.info("create font node..."); + wdicGaijiNode.makeFontNode(subbook); + } + + /** + * メニューノードを作成します。 + * + * @param content コンテントノード + */ + public void makeMenuNode(final Element content) { + Element menu = appendElement(content, "menu"); + wdicMenuNode.makeMenuNode(menu); + } + + /** + * 辞書データノードを作成します。 + * + * @param subbook subbookノード + */ + void makeContentNode(final Element subbook) { + Element content = appendElement(subbook, "content"); + + logger.info("create item node..."); + for (WdicGroup group : groupList.getGroups()) { + for (Wdic dic : group.getWdics()) { + for (WdicItem item : dic.getWdicItems()) { + makeItemNode(content, item); + } + } + } + makeItemNode(content); + + logger.info("create menu node..."); + makeMenuNode(content); + + logger.info("create copyright node..."); + _makeCopyrightNode(content); + } + + /** + * レイヤ要素を追加します。 + * + * @param node レイヤ要素を追加するノード + * @param id ID属性値 + * @return 追加されたレイヤ要素 + */ + Element appendLayer(final Node node, final String id) { + Element elem = appendElement(node, "layer"); + elem.setAttribute("id", id); + return elem; + } + + + /** + * 辞書項目ノードを作成します。 + * + * @param content コンテントノード + * @param item 辞書項目 + */ + void makeItemNode(final Element content, final WdicItem item) { + String grpId = item.getWdic().getGroupId(); + String grpName = item.getWdic().getGroupName(); + String partName = item.getWdic().getPartName(); + String partId = item.getWdic().getPartId(); + + String head = item.getHead(); + logger.debug(" [" + grpId + ":" + partId + "] " + head); + Element itemElem = _appendItem(content, "WDIC:" + grpId + ":" + head); + Element headElem = appendElement(itemElem, "head"); + appendRawText(headElem, head + " 【" + grpName + ":" + partName + "】"); + boolean wordAvail = false; + if (WordUtil.isValidWord(head)) { + Element wordElem = appendElement(itemElem, "word"); + appendRawText(wordElem, head); + wordAvail = true; + } + + // 読みを検索語として登録 + List yomiList = item.getYomi(); + int n = yomiList.size(); + for (int i = 0; i < n; i++) { + String yomi = yomiList.get(i); + String word = WdicUtil.unescape(yomi); + if (!"???".equals(yomi) && !head.equals(word) && WordUtil.isValidWord(word)) { + Element wordElem = appendElement(itemElem, "word"); + appendRawText(wordElem, word); + wordAvail = true; + } + } + // 英字表記を検索語として登録 + Map spellMap = item.getSpell(); + for (Map.Entry entry : spellMap.entrySet()) { + String str = WdicUtil.unescape(entry.getValue()); + if (StringUtils.isAsciiPrintable(str)) { + int idx = str.indexOf(": "); + if (idx > 0) { + // 略語 + String ss = str.substring(0, idx).trim(); + if (!head.equals(ss) && WordUtil.isValidWord(ss)) { + Element wordElem = appendElement(itemElem, "word"); + appendRawText(wordElem, ss); + wordAvail = true; + } + // 元の語形 + str = str.substring(idx + 2).trim(); + } + if (!head.equals(str) && WordUtil.isValidWord(str)) { + Element wordElem = appendElement(itemElem, "word"); + appendRawText(wordElem, str); + wordAvail = true; + } + } + } + + if (!wordAvail) { + logger.warn("word not defined: " + grpId + ":" + partId + ":" + head); + } + + // 本文の登録 + Element bodyElem = appendElement(itemElem, "body"); + Element keyElem = appendElement(bodyElem, "key"); + appendRawText(keyElem, head); + appendRawText(bodyElem, " 【"); + Element refElem = appendIdReference(bodyElem, "MENU:group:" + grpId); + appendRawText(refElem, grpName); + appendRawText(bodyElem, ":"); + refElem = appendIdReference(bodyElem, "MENU:group:" + grpId + ":" + partId); + appendRawText(refElem, partName + "編"); + appendRawText(bodyElem, "】"); + appendNewLine(bodyElem); + + // 分類 + List dirs = item.getDir(); + n = dirs.size(); + for (int i = 0; i < n; i++) { + refElem = appendIdReference(bodyElem, "DIR:/"); + appendRawText(refElem, "分類"); + appendRawText(bodyElem, ":"); + String str = dirs.get(i); + if (str.startsWith("/")) { + str = str.substring(1); + } + String[] ss = str.split("/"); + String key = ""; + int m = ss.length; + for (int j = 0; j < m; j++) { + if (j != 0) { + appendRawText(bodyElem, " > "); + } + key += "/" + ss[j]; + refElem = appendIdReference(bodyElem, "DIR:" + key); + appendRawText(refElem, dirList.getName(key)); + } + appendNewLine(bodyElem); + } + + // 読み + if (!yomiList.isEmpty()) { + n = yomiList.size(); + for (int i = 0; i < n; i++) { + String str = "読み:" + yomiList.get(i); + appendText(item, bodyElem, str); + appendNewLine(bodyElem); + } + } + + // 外語 + if (!spellMap.isEmpty()) { + Iterator> spellIt = spellMap.entrySet().iterator(); + while (spellIt.hasNext()) { + Map.Entry entry = spellIt.next(); + String str = "外語:[" + entry.getKey() + "] " + entry.getValue(); + appendText(item, bodyElem, str); + appendNewLine(bodyElem); + } + } + + // 発音 + Map pronMap = item.getPronounce(); + if (!pronMap.isEmpty()) { + Iterator> pronIt = pronMap.entrySet().iterator(); + while (pronIt.hasNext()) { + Map.Entry entry = pronIt.next(); + String str = "発音:[" + entry.getKey() + "] " + entry.getValue(); + appendRawText(bodyElem, str); + appendNewLine(bodyElem); + } + } + + // 品詞 + List speechList = item.getSpeech(); + if (!speechList.isEmpty()) { + StringBuilder buf = new StringBuilder(); + for (String s : speechList) { + if (buf.length() == 0) { + buf.append("品詞:"); + } else { + buf.append(","); + } + buf.append(s); + } + appendRawText(bodyElem, buf.toString()); + appendNewLine(bodyElem); + } + + // 内容 + appendNewLine(bodyElem); + Stack indentStack = new Stack<>(); + int curIndent = 0; + int ignoreTabs = 0; + int section = 0; + int tableNum = 0; + Element indentElem = appendElement(bodyElem, "indent"); + Map numMap = new HashMap<>(); + boolean linkBlock = false; + List bodyList = item.getBody(); + n = bodyList.size(); + for (int i = 0; i < n; i++) { + String body = bodyList.get(i); + String block = WdicUtil.deleteTab(body); + + if ("//LINK".equals(block)) { + while (!indentStack.isEmpty()) { + indentElem = indentStack.pop(); + } + appendNewLine(indentElem); + linkBlock = true; + curIndent = 0; + // リンク部では常に1段インデントを無視する ("//LINK"部のタブ分) + ignoreTabs = 1; + continue; + } + + int indent = WdicUtil.getTabCount(body); + if (block.startsWith("= ")) { + // 無視するタブ数を変更 + section = indent; + ignoreTabs = indent + 1; + indent = 0; + curIndent = 0; + while (!indentStack.isEmpty()) { + indentElem = indentStack.pop(); + } + } else { + indent -= ignoreTabs; + indent = Math.max(indent, 0); + if (block.startsWith("+ ")) { + // キー"-1"にこのブロックのインデント数を設定する + numMap.put(-1, indent); + if (numMap.size() > 1) { + // 次段以降はインデントを下げない + indent = curIndent; + } + } else { + // 数字あり箇条書きでない場合はマップをクリアする + numMap.clear(); + } + } + if (curIndent < indent) { + while (curIndent != indent) { + indentStack.push(indentElem); + indentElem = appendElement(indentElem, "indent"); + curIndent++; + } + } else if (curIndent > indent) { + while (curIndent != indent) { + indentElem = indentStack.pop(); + curIndent--; + } + } + + if (linkBlock) { + // リンク部 + appendItemLinkBlock(item, indentElem, block); + appendNewLine(indentElem); + } else { + // 本文部 + if (block.startsWith("))")) { + // 整形済み + appendNewLine(indentElem); + for (; i < n; i++) { + body = bodyList.get(i); + block = WdicUtil.deleteTab(body); + if (!block.startsWith("))")) { + i--; + break; + } + if (block.startsWith(")) ")) { + Element nobr = appendElement(indentElem, "nobr"); + appendText(item, nobr, block.substring(3)); + } + appendNewLine(indentElem); + } + appendNewLine(indentElem); + } else if (block.startsWith(">>")) { + // 引用 + Element indentElem2 = appendElement(indentElem, "indent"); + for (; i < n; i++) { + body = bodyList.get(i); + block = WdicUtil.deleteTab(body); + if (!block.startsWith(">>")) { + i--; + break; + } + if (block.startsWith(">> ")) { + appendText(item, indentElem2, block.substring(3)); + } + appendNewLine(indentElem2); + } + } else if (block.startsWith(":: ")) { + // 定義語 (簡易形式) + for (; i < n; i++) { + body = bodyList.get(i); + block = WdicUtil.deleteTab(body); + if (!block.startsWith(":: ")) { + i--; + break; + } + int idx = WdicUtil.indexOf(block, "|", 3); + if (idx >= 0) { // return minus when not found + String dt = block.substring(3, idx).trim(); + String dd = block.substring(idx + 1).trim(); + appendText(item, indentElem, "\u30fb " + dt); + appendNewLine(indentElem); + Element indentElem2 = appendElement(indentElem, "indent"); + appendText(item, indentElem2, dd); + appendNewLine(indentElem2); + } + } + } else if (block.startsWith(": ")) { + // 定義語 (完全形式) + int tab = curIndent; + boolean term = false; + Element indentElem2 = null; + for (i = i + 1; i < n; i++) { + body = bodyList.get(i); + int t = WdicUtil.getTabCount(body) - ignoreTabs; + if (t <= tab) { + i--; + break; + } + block = WdicUtil.deleteTab(body); + if (block.startsWith("+ ")) { + // キー"-1"にこのブロックのインデント数を設定する + numMap.put(-1, t); + } else { + // 数字あり箇条書きでない場合はマップをクリアする + numMap.clear(); + } + if (block.startsWith(":>")) { + if (indentElem2 != null) { + indentElem2 = null; + } + term = true; + String dt = block.substring(2).trim(); + if (StringUtils.isNotBlank(dt)) { + appendText(item, indentElem, "\u30fb " + dt); + appendNewLine(indentElem); + } + } else if (block.startsWith(":<")) { + if (indentElem2 == null) { + indentElem2 = appendElement(indentElem, "indent"); + } + term = false; + String dd = block.substring(2).trim(); + if (StringUtils.isNotBlank(dd)) { + appendText(item, indentElem2, dd); + appendNewLine(indentElem2); + } + } else { + if (term) { + appendItemBodyBlock(item, indentElem, block, numMap, "\u30fb "); + appendNewLine(indentElem); + } else { + if (indentElem2 != null) { + appendItemBodyBlock(item, indentElem2, block, numMap, null); + appendNewLine(indentElem2); + } + } + } + } + } else if (block.startsWith("| ")) { + // 表 (完全形式) + tableNum++; + WdicTable table = new WdicTable(item); + table.add(block); + int tab = curIndent; + for (i = i + 1; i < n; i++) { + body = bodyList.get(i); + int t = WdicUtil.getTabCount(body) - ignoreTabs; + if (t <= tab) { + i--; + break; + } + block = WdicUtil.deleteTab(body); + table.add(block); + } + File dir = new File(basedir, WDIC_TABLE_DIR); + if (!dir.exists() && !dir.mkdirs()) { + logger.error("failed to create directories: " + dir.getPath()); + } + String name = grpId + "_" + partId + "_" + item.getIndex() + "-" + tableNum; + File file = new File(dir, name + ".bmp"); + if (!file.exists()) { + BufferedImage img = table.getImage(); + try { + BmpUtil.write(img, file); + } catch (IOException e) { + logger.error(e.getMessage(), e); + if (file.exists() && !file.delete()) { + logger.error("failed to delete file: " + file.getPath()); + } + } finally { + if (img != null) { + img.flush(); + } + } + } + wdicGraphicNode.addTableItem(name); + Element elem = _appendDataReference(indentElem, file.getName(), "graphic"); + appendRawText(elem, "[表]"); + appendNewLine(indentElem); + } else if (block.startsWith("|| ") || block.startsWith("|= ")) { + // 表 (簡易形式) + tableNum++; + WdicTable table = new WdicTable(item); + table.add(block); + for (i = i + 1; i < n; i++) { + body = bodyList.get(i); + block = WdicUtil.deleteTab(body); + if (!block.startsWith("|| ") && !block.startsWith("|= ")) { + i--; + break; + } + table.add(block); + } + File dir = new File(basedir, WDIC_TABLE_DIR); + if (!dir.exists() && !dir.mkdirs()) { + logger.error("failed to create directories: " + dir.getPath()); + } + String name = grpId + "_" + partId + "_" + item.getIndex() + "-" + tableNum; + File file = new File(dir, name + ".bmp"); + if (!file.exists()) { + BufferedImage img = table.getImage(); + if (img != null) { + try { + BmpUtil.write(img, file); + } catch (IOException e) { + logger.error(e.getMessage(), e); + if (file.exists() && !file.delete()) { + logger.error("failed to delete file: " + file.getPath()); + } + } finally { + img.flush(); + } + wdicGraphicNode.addTableItem(name); + Element elem = _appendDataReference(indentElem, file.getName(), + "graphic"); + appendRawText(elem, "[表]"); + appendNewLine(indentElem); + } + } + } else if (block.startsWith("= ")) { + // 章見出し + if (i > 0) { + String prev = WdicUtil.deleteTab(bodyList.get(i - 1)); + if (!prev.startsWith("= ")) { + appendNewLine(indentElem); + } + } + // U+25A0: Black Square + // U+25A1: White Square + StringBuilder buf = new StringBuilder(); + int black = 5 - section; + int white = section; + for (int j = 0; j < white; j++) { + buf.append('\u25a1'); + } + for (int j = 0; j < black; j++) { + buf.append('\u25a0'); + } + buf.append(" "); + buf.append(block.substring(2)); + buf.append(" "); + for (int j = 0; j < black; j++) { + buf.append('\u25a0'); + } + for (int j = 0; j < white; j++) { + buf.append('\u25a1'); + } + appendText(item, indentElem, buf.toString()); + appendNewLine(indentElem); + } else { + // その他 + appendItemBodyBlock(item, indentElem, block, numMap, null); + appendNewLine(indentElem); + } + } + } + } + + /** + * 辞書項目内容を追加します。 + * + * @param item 辞書項目 + * @param elem 追加対象の要素 + * @param block 追加する内容 + * @param numMap インデント数と箇条書き数とのマップ + * @param prefix プレフィックス + */ + void appendItemBodyBlock(final WdicItem item, final Element elem, final String block, + final Map numMap, final String prefix) { + String target = block; + if (target.startsWith("* ")) { + // 文章 + // U+25c6: Black Diamond + target = "\u25c6 " + target.substring(2); + } else if (target.startsWith("- ")) { + // 数字なし箇条書き + // U+30FB: Katakana Middle Dot + target = "\u30fb " + target.substring(2); + } else if (target.startsWith("+ ")) { + // 数字あり箇条書き + // キー"-1"にこのブロックのインデント数が設定されている + int indent = numMap.get(-1); + // 現在の階層の数値 + int val = 1; + if (numMap.containsKey(indent)) { + val = numMap.get(indent) + 1; + } + String num = Integer.toString(val); + numMap.put(indent, val); + // 下位階層をクリア + int lower = indent + 1; + while (true) { + if (!numMap.containsKey(lower)) { + break; + } + numMap.remove(lower); + lower++; + } + // 上位階層の数値を追加 + int upper = indent - 1; + while (upper >= 0) { + if (!numMap.containsKey(upper)) { + break; + } + num = numMap.get(upper) + "." + num; + upper--; + } + target = num + ") " + target.substring(2); + } else if (target.startsWith("=> ")) { + // 参照 + // U+21D2: Rightwards Double Arrow + target = "\u21d2 " + target.substring(3); + } + if (prefix != null) { + target = prefix + target; + } + appendText(item, elem, target); + } + + /** + * 辞書項目のリンク部を追加します。 + * + * @param item 辞書項目 + * @param elem 追加対象の要素 + * @param block 追加する内容 + */ + void appendItemLinkBlock(final WdicItem item, final Element elem, final String block) { + String target = block; + if (target.startsWith("= ")) { + // グループ見出し + appendNewLine(elem); + // U+25BC: Black Down-Pointing Triangle + target = "\u25bc " + target.substring(2); + } else if (target.startsWith("- ")) { + // 関連語、外部リンク + // U+21D2: Rightwards Double Arrow + target = "\u21d2 " + target.substring(2); + } else if (target.startsWith("-! ")) { + // 反対語 + // U+21D4: Left Right Double Arrow + target = "\u21d4 " + target.substring(3); + } + appendText(item, elem, target, true); + } + + /** + * 辞書項目ノードを作成します。 + * + * @param content コンテントノード + */ + void makeItemNode(final Element content) { + Iterator>> it; + Map.Entry> entry; + // 画像グラグイン + logger.debug(" graphic plugin"); + String[] ext = {".jpg", ".png"}; + int len = ext.length; + for (int i = 0; i < len; i++) { + it = pluginMap.entrySet().iterator(); + while (it.hasNext()) { + entry = it.next(); + String name = entry.getKey(); + if (name.endsWith(ext[i])) { + Element itemElem = _appendItem(content, "PLUGIN:" + name); + Element headElem = appendElement(itemElem, "head"); + appendRawText(headElem, name + " 【プラグイン】"); + // ファイル名をキーワードとして登録 + Element keywordElem = appendElement(itemElem, "keyword"); + appendRawText(keywordElem, name); + Element bodyElem = appendElement(itemElem, "body"); + Element keyElem = appendElement(bodyElem, "key"); + appendRawText(keyElem, name); + appendNewLine(bodyElem); + Element refElem = _appendDataReference(bodyElem, name, "graphic"); + appendRawText(refElem, "[図版]"); + appendNewLine(bodyElem); + + // プラグインを参照している項目を列挙 + for (WdicItem item : entry.getValue()) { + appendRawText(bodyElem, "\u2192 "); + String head = item.getHead(); + String grpId = item.getWdic().getGroupId(); + String id = "WDIC:" + grpId + ":" + head; + refElem = appendIdReference(bodyElem, id); + String gname = item.getWdic().getGroupName(); + String part = item.getWdic().getPartName(); + String title = head + " 《" + gname + ":" + part + "》"; + appendRawText(refElem, title); + appendNewLine(bodyElem); + } + } + } + } + // 音声プラグイン + logger.debug(" sound plugin"); + ext = new String[]{".mp3", ".ogg", ".mid"}; + len = ext.length; + for (int i = 0; i < len; i++) { + it = pluginMap.entrySet().iterator(); + while (it.hasNext()) { + entry = it.next(); + String name = entry.getKey(); + if (name.endsWith(ext[i])) { + Element itemElem = _appendItem(content, "PLUGIN:" + name); + Element headElem = appendElement(itemElem, "head"); + appendRawText(headElem, name + " 【プラグイン】"); + // ファイル名をキーワードとして登録 + Element keywordElem = appendElement(itemElem, "keyword"); + appendRawText(keywordElem, name); + + Element bodyElem = appendElement(itemElem, "body"); + Element keyElem = appendElement(bodyElem, "key"); + appendRawText(keyElem, name); + appendNewLine(bodyElem); + Element refElem = _appendDataReference(bodyElem, name, "sound"); + appendRawText(refElem, "[音声]"); + appendNewLine(bodyElem); + // プラグインを参照している項目を列挙 + for (WdicItem item : entry.getValue()) { + appendRawText(bodyElem, "\u2192 "); + String head = item.getHead(); + String grpId = item.getWdic().getGroupId(); + String id = "WDIC:" + grpId + ":" + head; + refElem = appendIdReference(bodyElem, id); + String gname = item.getWdic().getGroupName(); + String part = item.getWdic().getPartName(); + String title = head + " 《" + gname + ":" + part + "》"; + appendRawText(refElem, title); + appendNewLine(bodyElem); + } + } + } + } + + // その他のプラグイン + logger.debug(" document plugin"); + File plugin = new File(basedir, WDIC_PLUGIN_DIR); + ext = new String[]{".jpg", ".png", ".mp3", ".ogg", ".mid"}; + len = ext.length; + it = pluginMap.entrySet().iterator(); + while (it.hasNext()) { + entry = it.next(); + String name = entry.getKey(); + boolean add = true; + for (int i = 0; i < len; i++) { + if (name.endsWith(ext[i])) { + add = false; + break; + } + } + if (add) { + File file = new File(plugin, name); + if (!file.exists()) { + logger.error("file not found: " + file.getPath()); + continue; + } + Element itemElem = _appendItem(content, "PLUGIN:" + name); + Element headElem = appendElement(itemElem, "head"); + appendRawText(headElem, name + " 【プラグイン】"); + // ファイル名をキーワードとして登録 + Element keywordElem = appendElement(itemElem, "keyword"); + appendRawText(keywordElem, name); + + Element bodyElem = appendElement(itemElem, "body"); + Element keyElem = appendElement(bodyElem, "key"); + appendRawText(keyElem, name); + appendNewLine(bodyElem); + + // プラグインを参照している項目を列挙 + for (WdicItem item : entry.getValue()) { + appendRawText(bodyElem, "\u2192 "); + String head = item.getHead(); + String grpId = item.getWdic().getGroupId(); + String id = "WDIC:" + grpId + ":" + head; + Element refElem = appendIdReference(bodyElem, id); + String gname = item.getWdic().getGroupName(); + String part = item.getWdic().getPartName(); + String title = head + " 《" + gname + ":" + part + "》"; + appendRawText(refElem, title); + appendNewLine(bodyElem); + } + + // プラグインの内容 + Element indentElem = appendElement(bodyElem, "indent"); + try { + LineIterator lineIt = FileUtils.lineIterator(file, ENCODING); + while (lineIt.hasNext()) { + String line = WdicUtil.sanitize(lineIt.nextLine()); + appendRawText(indentElem, line); + appendNewLine(indentElem); + } + } catch (IOException e) { + logger.error(e.getMessage(), e); + } + } + } + } + + /** + * 著作権ノードを作成します。 + * + * @param content コンテントノード + */ + private void _makeCopyrightNode(final Element content) { + Element copyright = appendElement(content, "copyright"); + String[] line = manual.getCopyright(); + int len = line.length; + for (int i = 0; i < len; i++) { + appendRawText(copyright, line[i]); + appendNewLine(copyright); + } + } + + /** + * テキストノードを追加します。 + * + * @param node テキストを追加するノード + * @param str 文字列 + */ + void appendRawText(final Node node, final String str) { + if (str != null && str.trim().length() > 0) { + String tmp = str.replace((char) 0x3099, (char) 0x309b) + .replace((char) 0x309a, (char) 0x309c); + if (node != null) { + Text text = node.getOwnerDocument().createTextNode(tmp); + node.appendChild(text); + wdicGaijiNode.checkCharacter(text); + } + } + } + + /** + * 要素を追加します。 + * + * @param node 要素を追加するノード + * @param tag 要素のタグ名称 + * @return 追加された要素 + */ + Element appendElement(final Node node, final String tag) { + Element elem = node.getOwnerDocument().createElement(tag); + return (Element) node.appendChild(elem); + } + + /** + * 改行要素を追加します。 + * + * @param node 改行を追加するノード + * @return 追加された改行要素 + */ + Element appendNewLine(final Node node) { + return appendElement(node, "br"); + } + + /** + * 項目要素を追加します。 + * + * @param node 項目要素を追加するノード + * @param id ID属性値 + * @return 追加された項目要素 + */ + private Element _appendItem(final Node node, final String id) { + Element elem = appendElement(node, "item"); + elem.setAttribute("id", id); + return elem; + } + + /** + * 参照要素を追加します。 + * + * @param node 参照要素を追加するノード + * @param id ID属性値 + * @return 追加された参照要素 + */ + Element appendIdReference(final Node node, final String id) { + Element elem = appendElement(node, "ref"); + elem.setAttribute("id", id); + return elem; + } + + /** + * 参照要素を追加します。 + * + * @param node 参照要素を追加するノード + * @param data data属性値 + * @param type type属性値 + * @return 追加された参照要素 + */ + private Element _appendDataReference(final Node node, final String data, final String type) { + Element elem = appendElement(node, "ref"); + elem.setAttribute("data", data); + elem.setAttribute("type", type); + return elem; + } + + /** + * データ要素を追加します。 + * + * @param node データ要素を追加するノード + * @param name name属性値 + * @param src src属性値 + * @param format format属性値 + * @return 追加されたデータ要素 + */ + Element appendData(final Node node, final String name, final String src, + final String format) { + Element elem = appendElement(node, "data"); + elem.setAttribute("name", name); + elem.setAttribute("src", src); + elem.setAttribute("format", format); + return elem; + } + + /** + * テキストノードを追加します。 + * + * @param item 辞書項目 + * @param node テキストを追加するノード + * @param str 文字列 + */ + void appendText(final WdicItem item, final Node node, final String str) { + appendText(item, node, str, false); + } + + /** + * テキストノードを追加します。 + * + * @param item 辞書項目 + * @param node テキストを追加するノード + * @param str 文字列 + * @param linkBlock リンク部の場合はtrue + */ + void appendText(final WdicItem item, final Node node, final String str, + final boolean linkBlock) { + String grpId = item.getWdic().getGroupId(); + String partId = item.getWdic().getPartId(); + String itemId = grpId + ":" + partId + ":" + item.getHead(); + StringBuilder buf = new StringBuilder(); + int len = str.length(); + for (int i = 0; i < len; i++) { + char ch = str.charAt(i); + if (Character.isHighSurrogate(ch) + || Character.isLowSurrogate(ch)) { + buf.append(ch); + continue; + } + + if (ch == '\'') { + StringBuilder bracket = new StringBuilder("'"); + int idx1 = i + 1; + for (; idx1 < len; idx1++) { + if (str.charAt(idx1) != '\'') { + break; + } + bracket.append("'"); + } + if (bracket.length() > 1) { + // 2個以上は強調表示 + int idx2 = WdicUtil.indexOf(str, bracket.toString(), idx1); + if (idx2 != -1) { + // 強調 + appendRawText(node, buf.toString()); + buf.delete(0, buf.length()); + Element elem = appendElement(node, "em"); + appendText(item, elem, str.substring(idx1, idx2), linkBlock); + i = idx2 + bracket.length() - 1; + } else { + // 閉じられていないのでそのまま追加する + buf.append(bracket); + i = idx1 - 1; + } + continue; + } + } else if (ch == '[') { + if (i + 1 < len && str.charAt(i + 1) == '[') { + int idx1 = i + 1; + int idx2 = WdicUtil.indexOf(str, "]]", idx1 + 1); + if (idx2 != -1) { + // リンク + appendRawText(node, buf.toString()); + buf.delete(0, buf.length()); + String ref = str.substring(idx1 + 1, idx2); + String name = null; + if (ref.startsWith("<")) { + // 表示内容 + int idx3 = WdicUtil.indexOf(ref, ">", 1); + if (idx3 != -1) { + name = ref.substring(1, idx3); + ref = ref.substring(idx3 + 1); + } + } + if (ref.startsWith("http:") + || ref.startsWith("https:") + || ref.startsWith("ftp:") + || ref.startsWith("news:") + || ref.startsWith("gopher:") + || ref.startsWith("mailto:") + || ref.startsWith("phone:") + || ref.startsWith("urn:") + || ref.startsWith("x-geo:")) { + // URI + if (StringUtils.isNotBlank(name)) { + if (linkBlock) { + ref = name + " <" + ref + ">"; + } else { + ref = name + "<" + ref + ">"; + } + } + appendText(item, node, ref, linkBlock); + } else if (ref.startsWith("//")) { + // プラグイン + int idx3 = ref.indexOf("|"); + if (idx3 > 0) { + // delete option + ref = ref.substring(0, idx3); + } + String gid = null; + String file = null; + idx3 = ref.indexOf("/", 2); + if (idx3 != -1) { + gid = ref.substring(2, idx3); + file = ref.substring(idx3 + 1); + } else { + gid = grpId; + file = ref.substring(2); + } + Element refElem = null; + if (file.endsWith(".jpg") || file.endsWith(".png")) { + refElem = _appendDataReference(node, file, "graphic"); + } else if (file.endsWith(".mp3") || file.endsWith(".ogg") + || file.endsWith(".mid")) { + refElem = _appendDataReference(node, file, "sound"); + } else { + refElem = appendIdReference(node, "PLUGIN:" + file); + } + if (StringUtils.isBlank(name)) { + name = file; + } + if (linkBlock) { + WdicGroup group = groupList.getGroup(gid); + if (group != null) { + String gname = group.getGroupName(); + name = name + " 《" + gname + "》"; + } + } + appendRawText(refElem, name); + } else { + if (ref.startsWith("x-wdic:")) { + // x-wdic:/グループ名/単語 + ref = ref.substring("x-wdic:".length()); + } + String gid = null; + String head = null; + if (ref.startsWith("/")) { + // グループ名/単語 + int idx3 = WdicUtil.indexOf(ref, "/", 1); + if (idx3 != -1) { + gid = ref.substring(1, idx3); + head = ref.substring(idx3 + 1); + } else { + head = ref.substring(1); + } + } else { + // 単語 + head = ref; + } + String refid = WdicUtil.unescape(head); + if (StringUtils.isBlank(name)) { + name = head; + } + if (StringUtils.isBlank(gid)) { + // 同一グループ内 + gid = grpId; + } + WdicGroup group = groupList.getGroup(gid); + if (group != null) { + String gname = group.getGroupName(); + Wdic wdic = group.getWdic(refid); + if (wdic != null) { + String id = "WDIC:" + gid + ":" + refid; + Element refElem = appendIdReference(node, id); + if (linkBlock) { + name = name + " 《" + gname + ":" + wdic.getPartName() + "》"; + } + appendText(item, refElem, name, linkBlock); + } else { + logger.error("undefined word: " + gid + "/" + refid); + if (linkBlock) { + name = name + " 《" + gname + "》"; + } + appendText(item, node, name, linkBlock); + } + } else { + logger.error("undefined group: " + gid); + appendText(item, node, name, linkBlock); + } + } + i = idx2 + 1; + } else { + // 閉じられていないのでそのまま追加する + buf.append("[["); + i = idx1; + } + continue; + } + } + + if (ch != '\\') { + // バックスラッシュ以外はそのまま追加 + buf.append(ch); + continue; + } + if (i + 1 >= len) { + // バックスラッシュに続く文字がないのでそのまま追加 + buf.append(ch); + continue; + } + + char ch1 = str.charAt(i + 1); + if (ch1 >= 0x21 && ch1 <= 0x7e) { + if (!CharUtils.isAsciiAlphanumeric(ch1)) { + // 1文字エスケープ (英数字以外の記号) + i++; + buf.append(ch1); + continue; + } + } + + int idx = WdicUtil.indexOf(str, ";", i + 1); + if (idx < 0) { + logger.error("unexpected format: " + str); + buf.append(ch); + continue; + } + String ref = str.substring(i + 1, idx); + i = idx; + int sep1 = WdicUtil.indexOf(ref, "{", 0); + int sep2 = WdicUtil.indexOf(ref, ":", 0); + if (sep1 == -1 && sep2 == -1) { + // 実体参照 + buf.append(WdicUtil.getCharacter(ref)); + continue; + } + + // 特殊機能 + String name; + ArrayList param = new ArrayList<>(); + if (sep1 != -1 && sep2 != -1) { + if (sep2 < sep1) { + sep1 = -1; + } else { + sep2 = -1; + } + } + if (sep1 != -1) { + // 引数は{}で括られている + name = ref.substring(0, sep1); + int idx1 = sep1; + int idx2; + while (idx1 != -1) { + idx2 = ref.indexOf('}', idx1 + 1); + if (idx2 == -1) { + idx2 = ref.length(); + } + param.add(ref.substring(idx1 + 1, idx2)); + idx1 = ref.indexOf('{', idx2 + 1); + } + } else { + // 引数は:で区切られている + name = ref.substring(0, sep2); + Collections.addAll(param, ref.substring(sep2 + 1).split(":")); + } + + if ("x".equals(name)) { + String code = param.get(0); + try { + int codePoint = Integer.parseInt(code, 16); + buf.appendCodePoint(codePoint); + } catch (Exception e) { + logger.error("unknown character code: " + code); + } + } else if ("sup".equals(name) || "sub".equals(name)) { + appendRawText(node, buf.toString()); + buf.delete(0, buf.length()); + Element elem = appendElement(node, name); + appendText(item, elem, param.get(0), linkBlock); + } else if ("ruby".equals(name)) { + appendRawText(node, buf.toString()); + buf.delete(0, buf.length()); + appendText(item, node, param.get(0), linkBlock); + if (param.size() > 1) { + Element elem = appendElement(node, "sub"); + appendText(item, elem, "(" + param.get(1) + ")", linkBlock); + } + } else if ("asin".equals(name)) { + String asin = param.get(0); + String url; + switch (asin.charAt(0)) { + case '4': + url = "http://www.amazon.co.jp/exec/obidos/ASIN/"; + break; + case '3': + url = "http://www.amazon.de/exec/obidos/ASIN/"; + break; + case '2': + url = "http://www.amazon.fr/exec/obidos/ASIN/"; + break; + case '1': + url = "http://www.amazon.co.uk/exec/obidos/ASIN/"; + break; + case '0': + default: + url = "http://www.amazon.com/exec/obidos/ASIN/"; + break; + } + buf.append(url + asin); + } else if ("flag".equals(name)) { + // ignore + } else if ("mex".equals(name)) { + buf.append("[" + param.get(0) + "]"); + } else if ("glyph".equals(name)) { + appendRawText(node, buf.toString()); + buf.delete(0, buf.length()); + String glyph = param.get(0); + wdicGraphicNode.addGlyphItem(glyph); + Element elem = _appendDataReference(node, "glyph-" + glyph, "inlineGraphic"); + appendRawText(elem, "[グリフ:" + glyph + "]"); + } else if ("oline".equals(name)) { + appendRawText(node, buf.toString()); + buf.delete(0, buf.length()); + String pstr = param.get(0); + wdicGaijiNode.addOverLineGaijiFont(node, pstr); + } else if ("uline".equals(name)) { + appendRawText(node, buf.toString()); + buf.delete(0, buf.length()); + String pstr = param.get(0); + wdicGaijiNode.addUnderLineGaijiFont(node, pstr); + } else if ("sout".equals(name)) { + appendRawText(node, buf.toString()); + buf.delete(0, buf.length()); + String pstr = param.get(0); + wdicGaijiNode.addLineThroughGaijiFont(node, pstr); + } else if ("date".equals(name) || "dt".equals(name)) { + appendRawText(node, buf.toString()); + buf.delete(0, buf.length()); + buf.append(param.get(0)); + appendText(item, node, buf.toString(), linkBlock); + buf.delete(0, buf.length()); + } else { + if (!"unit".equals(name)) { + logger.error("unknown function name: " + itemId + " [" + name + "]"); + } + appendRawText(node, buf.toString()); + buf.delete(0, buf.length()); + appendText(item, node, param.get(0), linkBlock); + } + } + appendRawText(node, buf.toString()); + } + +} diff --git a/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicSoundNode.java b/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicSoundNode.java new file mode 100644 index 0000000..61c7f74 --- /dev/null +++ b/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicSoundNode.java @@ -0,0 +1,56 @@ +package io.github.eb4j.xml2eb.converter.wdic; + +import java.io.File; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Element; + +import org.apache.commons.io.FilenameUtils; + + +/** + * Wdic sound node class. + * + * Created by miurahr on 16/07/17. + */ +class WdicSoundNode { + + private Logger logger = LoggerFactory.getLogger(getClass()); + private static final String WDIC_PLUGIN_DIR = "plugin"; + + private WdicNode wdicNode; + private File plugin; + + WdicSoundNode(final WdicNode wdicNode, final File basedir) { + this.wdicNode = wdicNode; + plugin = new File(basedir, WDIC_PLUGIN_DIR); + } + + /** + * 音声データノードを作成します。 + * + * @param subbook subbookノード + */ + void makeSoundNode(final Element subbook) { + Element sound = wdicNode.appendElement(subbook, "sound"); + for (String name : wdicNode.getPluginMapKeySet()) { + if (name.endsWith(".mp3") || name.endsWith(".ogg")) { + String wavName = name + ".wav"; + File wav = new File(plugin, wavName); + if (!wav.exists()) { + logger.error("file not found: " + wav.getPath()); + } + String path = FilenameUtils.concat(WDIC_PLUGIN_DIR, wavName); + wdicNode.appendData(sound, name, path, "wav"); + } else if (name.endsWith(".mid")) { + File midi = new File(plugin, name); + if (!midi.exists()) { + logger.error("file not found: " + midi.getPath()); + } + String path = FilenameUtils.concat(WDIC_PLUGIN_DIR, name); + wdicNode.appendData(sound, name, path, "mid"); + } + } + } +} diff --git a/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicTable.java b/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicTable.java index 4a0ff29..6cb9820 100644 --- a/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicTable.java +++ b/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicTable.java @@ -288,7 +288,8 @@ private List _parse() { buf.append(tableData); buf.append(sep); } - _logger.warn("unexpected table format: " + _id + sep + buf.toString(), e.getMessage()); + _logger.warn("unexpected table format: " + _id + sep + buf.toString(), + e.getMessage()); } } if (rowList == null) { diff --git a/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicUtil.java b/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicUtil.java index 7ec2c57..a64065d 100644 --- a/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicUtil.java +++ b/conv-wdic/src/main/java/io/github/eb4j/xml2eb/converter/wdic/WdicUtil.java @@ -11,7 +11,14 @@ import java.io.IOException; import java.net.URL; import java.nio.ByteBuffer; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Optional; import org.apache.commons.collections.ExtendedProperties; import org.apache.commons.io.IOUtils;