1 package org.gnomekr.potron.parser; 2 3 import java.io.File; 4 import java.io.FileInputStream; 5 import java.io.FileReader; 6 import java.io.IOException; 7 import java.io.InputStreamReader; 8 import java.io.Reader; 9 import java.io.UnsupportedEncodingException; 10 import java.util.ArrayList; 11 import java.util.List; 12 import java.util.StringTokenizer; 13 import java.util.regex.Matcher; 14 import java.util.regex.Pattern; 15 16 import org.apache.commons.lang.NullArgumentException; 17 import org.apache.commons.lang.StringEscapeUtils; 18 import org.apache.commons.lang.StringUtils; 19 20 import antlr.ANTLRException; 21 import antlr.collections.AST; 22 23 /*** 24 * POParser.java 25 * @author Xavier Cho 26 * @version $Revision 1.1 $ $Date: 2005/08/28 11:47:14 $ 27 * TODO: Throw ParseException with sensible information when the input is invalid. 28 */ 29 public class POParser { 30 31 private static final String HEADER_PLURAL_FORM = "Plural-Forms"; 32 private static final Pattern REGEXP_PLURAL_FORM = Pattern 33 .compile("^//s*nplurals//s*=//s*([0-9]+)//s*;//s*([^;]+);$"); 34 35 private IPOParserCallback callback; 36 37 public POParser() { 38 } 39 40 /*** 41 * @param callback 42 */ 43 public POParser(IPOParserCallback callback) { 44 this.callback = callback; 45 } 46 47 /*** 48 * @return Returns the callback. 49 */ 50 public IPOParserCallback getCallback() { 51 return callback; 52 } 53 54 /*** 55 * @param callback The callback to set. 56 */ 57 public void setCallback(IPOParserCallback callback) { 58 this.callback = callback; 59 } 60 61 /*** 62 * @param file 63 * @param encoding 64 * @throws ParseException 65 * @throws UnsupportedEncodingException 66 * @throws IOException 67 */ 68 public synchronized void parse(File file, String encoding) 69 throws ParseException, UnsupportedEncodingException, IOException { 70 if (file == null) { 71 throw new NullArgumentException("file"); 72 } 73 74 if (encoding == null) { 75 parse(new FileReader(file)); 76 } else { 77 parse(new InputStreamReader(new FileInputStream(file), encoding)); 78 } 79 } 80 81 /*** 82 * @param reader 83 * @throws ParseException 84 * @throws IOException 85 */ 86 public synchronized void parse(Reader reader) throws ParseException, 87 IOException { 88 if (reader == null) { 89 throw new NullArgumentException("reader"); 90 } 91 92 if (callback == null) { 93 String msg = "No parser callback instance has been registered."; 94 throw new IllegalStateException(msg); 95 } 96 97 try { 98 callback.startDocument(); 99 100 POLexer lexer = new POLexer(reader); 101 PORecognizer parser = new PORecognizer(lexer); 102 103 parser.body(); 104 105 AST root = parser.getAST(); 106 AST header = root.getFirstChild(); 107 108 if (header == null) { 109 String msg = "Unable to find header section of the PO file."; 110 throw new ParseException(msg); 111 } 112 113 parseHeader(header); 114 115 AST entry = header; 116 while ((entry = entry.getNextSibling()) != null) { 117 parseEntry(entry); 118 } 119 120 callback.endDocument(); 121 } catch (ANTLRException e) { 122 throw new ParseException(e); 123 } finally { 124 reader.close(); 125 } 126 } 127 128 protected void parseHeader(AST ast) throws ParseException { 129 checkNodeType(ast, PORecognizerTokenTypes.ENTRY); 130 AST comments = ast.getFirstChild(); 131 132 if (comments == null) { 133 throw new ParseException("Header comment section is not found : " 134 + ast.toString()); 135 } 136 137 parseComment(comments); 138 139 AST msgid = comments.getNextSibling(); 140 141 if (msgid == null) { 142 throw new ParseException("Header msgid section is not found : " 143 + ast.toString()); 144 } 145 146 AST msgstr = msgid.getNextSibling(); 147 148 if (msgstr == null) { 149 throw new ParseException("Header msgstr section is not found : " 150 + ast.toString()); 151 } 152 153 checkNodeType(msgstr, PORecognizerTokenTypes.LITERAL_msgstr); 154 155 String content = getLiteralContent(msgstr); 156 157 StringTokenizer tokenizer = new StringTokenizer(content, System 158 .getProperty("line.separator")); 159 160 while (tokenizer.hasMoreTokens()) { 161 String line = tokenizer.nextToken(); 162 163 int index = line.indexOf(":"); 164 if (index != -1) { 165 String key = line.substring(0, index).trim(); 166 String value = line.substring(index + 1).trim(); 167 168 if (key.equalsIgnoreCase(HEADER_PLURAL_FORM)) { 169 parsePluralForm(value); 170 } else { 171 callback.onHeader(key, value); 172 } 173 } 174 } 175 } 176 177 protected void parseComment(AST ast) throws ParseException { 178 parseComment(ast, null); 179 } 180 181 protected void parseComment(AST ast, ParserEntry entry) 182 throws ParseException { 183 StringBuffer buffer = new StringBuffer(); 184 185 AST child = ast.getFirstChild(); 186 187 checkNodeType(ast, PORecognizerTokenTypes.COMMENT); 188 189 String linefeed = System.getProperty("line.separator"); 190 191 while (child != null) { 192 checkNodeType(child, PORecognizerTokenTypes.COMMENT); 193 194 String line = child.getText(); 195 196 if (entry == null) { 197 buffer.append(linefeed); 198 buffer.append(line); 199 } else { 200 if (line.startsWith("#:")) { 201 entry.setReferences(line.substring(2).trim()); 202 } else if (StringUtils.deleteWhitespace(line).equals("#,fuzzy")) { 203 entry.setFuzzy(true); 204 } else { 205 buffer.append(linefeed); 206 buffer.append(line); 207 } 208 } 209 210 child = child.getNextSibling(); 211 } 212 213 String comment = buffer.toString().trim(); 214 if (entry == null) { 215 callback.onComment(comment); 216 } else { 217 entry.setComment(comment); 218 } 219 } 220 221 protected void parseEntry(AST ast) throws ParseException { 222 ParserEntry entry = new ParserEntry(); 223 224 checkNodeType(ast, PORecognizerTokenTypes.ENTRY); 225 AST comments = ast.getFirstChild(); 226 227 if (comments == null) { 228 throw new ParseException("Entry comment section is not found : " 229 + ast.toString()); 230 } 231 232 parseComment(comments, entry); 233 234 AST msgid = comments.getNextSibling(); 235 236 if (msgid == null) { 237 throw new ParseException("Header msgid section is not found : " 238 + ast.toString()); 239 } 240 241 entry.setMsgId(getLiteralContent(msgid)); 242 243 AST msgstr = null; 244 AST child = msgid.getNextSibling(); 245 246 if (child == null) { 247 throw new ParseException("Header msgstr section is not found : " 248 + ast.toString()); 249 } 250 251 if (child.getType() == PORecognizerTokenTypes.LITERAL_msgid_plural) { 252 entry.setMsgIdPlural(getLiteralContent(child)); 253 msgstr = child.getNextSibling(); 254 } else { 255 msgstr = child; 256 } 257 258 if (msgstr == null) { 259 throw new ParseException("Header msgstr section is not found : " 260 + ast.toString()); 261 } 262 263 List<String> messages = new ArrayList<String>(); 264 265 while (msgstr != null) { 266 checkNodeType(msgstr, PORecognizerTokenTypes.LITERAL_msgstr); 267 String message = getLiteralContent(msgstr); 268 269 if (StringUtils.isNotBlank(message)) { 270 messages.add(message); 271 } 272 273 msgstr = msgstr.getNextSibling(); 274 } 275 276 entry.setMsgStr(messages); 277 278 callback.onEntry(entry); 279 } 280 281 protected String getLiteralContent(AST ast) throws ParseException { 282 StringBuffer buffer = new StringBuffer(); 283 284 AST child = ast.getFirstChild(); 285 286 while (child != null) { 287 buffer.append(child.getText()); 288 child = child.getNextSibling(); 289 } 290 291 return StringEscapeUtils.unescapeJava(buffer.toString()); 292 } 293 294 protected void parsePluralForm(String value) { 295 Matcher matcher = REGEXP_PLURAL_FORM.matcher(value); 296 if (matcher.matches() && matcher.groupCount() > 1) { 297 int nplural = Integer.parseInt(matcher.group(1)); 298 String expression = matcher.group(2).trim(); 299 300 callback.onHeaderPluralForm(nplural, expression); 301 } 302 } 303 304 private void checkNodeType(AST ast, int type) throws ParseException { 305 if (ast.getType() != type) { 306 StringBuffer buffer = new StringBuffer(); 307 308 buffer.append("Invalid AST node type was specified :\n"); 309 buffer.append(" - line : "); 310 buffer.append(ast.getLine()); 311 buffer.append(", column : "); 312 buffer.append(ast.getColumn()); 313 buffer.append("\n - expected type : "); 314 buffer.append(type); 315 buffer.append("\n - actual type : "); 316 buffer.append(ast.getType()); 317 318 throw new ParseException(buffer.toString()); 319 } 320 } 321 }