1 package org.gnomekr.potron.parser;
2
3 import java.io.File;
4 import java.io.FileInputStream;
5 import java.io.FileReader;
6 import java.io.IOException;
7 import java.io.InputStreamReader;
8 import java.io.Reader;
9 import java.io.UnsupportedEncodingException;
10 import java.util.ArrayList;
11 import java.util.List;
12 import java.util.StringTokenizer;
13 import java.util.regex.Matcher;
14 import java.util.regex.Pattern;
15
16 import org.apache.commons.lang.NullArgumentException;
17 import org.apache.commons.lang.StringEscapeUtils;
18 import org.apache.commons.lang.StringUtils;
19
20 import antlr.ANTLRException;
21 import antlr.collections.AST;
22
23 /***
24 * POParser.java
25 * @author Xavier Cho
26 * @version $Revision 1.1 $ $Date: 2005/08/28 11:47:14 $
27 * TODO: Throw ParseException with sensible information when the input is invalid.
28 */
29 public class POParser {
30
31 private static final String HEADER_PLURAL_FORM = "Plural-Forms";
32 private static final Pattern REGEXP_PLURAL_FORM = Pattern
33 .compile("^//s*nplurals//s*=//s*([0-9]+)//s*;//s*([^;]+);$");
34
35 private IPOParserCallback callback;
36
37 public POParser() {
38 }
39
40 /***
41 * @param callback
42 */
43 public POParser(IPOParserCallback callback) {
44 this.callback = callback;
45 }
46
47 /***
48 * @return Returns the callback.
49 */
50 public IPOParserCallback getCallback() {
51 return callback;
52 }
53
54 /***
55 * @param callback The callback to set.
56 */
57 public void setCallback(IPOParserCallback callback) {
58 this.callback = callback;
59 }
60
61 /***
62 * @param file
63 * @param encoding
64 * @throws ParseException
65 * @throws UnsupportedEncodingException
66 * @throws IOException
67 */
68 public synchronized void parse(File file, String encoding)
69 throws ParseException, UnsupportedEncodingException, IOException {
70 if (file == null) {
71 throw new NullArgumentException("file");
72 }
73
74 if (encoding == null) {
75 parse(new FileReader(file));
76 } else {
77 parse(new InputStreamReader(new FileInputStream(file), encoding));
78 }
79 }
80
81 /***
82 * @param reader
83 * @throws ParseException
84 * @throws IOException
85 */
86 public synchronized void parse(Reader reader) throws ParseException,
87 IOException {
88 if (reader == null) {
89 throw new NullArgumentException("reader");
90 }
91
92 if (callback == null) {
93 String msg = "No parser callback instance has been registered.";
94 throw new IllegalStateException(msg);
95 }
96
97 try {
98 callback.startDocument();
99
100 POLexer lexer = new POLexer(reader);
101 PORecognizer parser = new PORecognizer(lexer);
102
103 parser.body();
104
105 AST root = parser.getAST();
106 AST header = root.getFirstChild();
107
108 if (header == null) {
109 String msg = "Unable to find header section of the PO file.";
110 throw new ParseException(msg);
111 }
112
113 parseHeader(header);
114
115 AST entry = header;
116 while ((entry = entry.getNextSibling()) != null) {
117 parseEntry(entry);
118 }
119
120 callback.endDocument();
121 } catch (ANTLRException e) {
122 throw new ParseException(e);
123 } finally {
124 reader.close();
125 }
126 }
127
128 protected void parseHeader(AST ast) throws ParseException {
129 checkNodeType(ast, PORecognizerTokenTypes.ENTRY);
130 AST comments = ast.getFirstChild();
131
132 if (comments == null) {
133 throw new ParseException("Header comment section is not found : "
134 + ast.toString());
135 }
136
137 parseComment(comments);
138
139 AST msgid = comments.getNextSibling();
140
141 if (msgid == null) {
142 throw new ParseException("Header msgid section is not found : "
143 + ast.toString());
144 }
145
146 AST msgstr = msgid.getNextSibling();
147
148 if (msgstr == null) {
149 throw new ParseException("Header msgstr section is not found : "
150 + ast.toString());
151 }
152
153 checkNodeType(msgstr, PORecognizerTokenTypes.LITERAL_msgstr);
154
155 String content = getLiteralContent(msgstr);
156
157 StringTokenizer tokenizer = new StringTokenizer(content, System
158 .getProperty("line.separator"));
159
160 while (tokenizer.hasMoreTokens()) {
161 String line = tokenizer.nextToken();
162
163 int index = line.indexOf(":");
164 if (index != -1) {
165 String key = line.substring(0, index).trim();
166 String value = line.substring(index + 1).trim();
167
168 if (key.equalsIgnoreCase(HEADER_PLURAL_FORM)) {
169 parsePluralForm(value);
170 } else {
171 callback.onHeader(key, value);
172 }
173 }
174 }
175 }
176
177 protected void parseComment(AST ast) throws ParseException {
178 parseComment(ast, null);
179 }
180
181 protected void parseComment(AST ast, ParserEntry entry)
182 throws ParseException {
183 StringBuffer buffer = new StringBuffer();
184
185 AST child = ast.getFirstChild();
186
187 checkNodeType(ast, PORecognizerTokenTypes.COMMENT);
188
189 String linefeed = System.getProperty("line.separator");
190
191 while (child != null) {
192 checkNodeType(child, PORecognizerTokenTypes.COMMENT);
193
194 String line = child.getText();
195
196 if (entry == null) {
197 buffer.append(linefeed);
198 buffer.append(line);
199 } else {
200 if (line.startsWith("#:")) {
201 entry.setReferences(line.substring(2).trim());
202 } else if (StringUtils.deleteWhitespace(line).equals("#,fuzzy")) {
203 entry.setFuzzy(true);
204 } else {
205 buffer.append(linefeed);
206 buffer.append(line);
207 }
208 }
209
210 child = child.getNextSibling();
211 }
212
213 String comment = buffer.toString().trim();
214 if (entry == null) {
215 callback.onComment(comment);
216 } else {
217 entry.setComment(comment);
218 }
219 }
220
221 protected void parseEntry(AST ast) throws ParseException {
222 ParserEntry entry = new ParserEntry();
223
224 checkNodeType(ast, PORecognizerTokenTypes.ENTRY);
225 AST comments = ast.getFirstChild();
226
227 if (comments == null) {
228 throw new ParseException("Entry comment section is not found : "
229 + ast.toString());
230 }
231
232 parseComment(comments, entry);
233
234 AST msgid = comments.getNextSibling();
235
236 if (msgid == null) {
237 throw new ParseException("Header msgid section is not found : "
238 + ast.toString());
239 }
240
241 entry.setMsgId(getLiteralContent(msgid));
242
243 AST msgstr = null;
244 AST child = msgid.getNextSibling();
245
246 if (child == null) {
247 throw new ParseException("Header msgstr section is not found : "
248 + ast.toString());
249 }
250
251 if (child.getType() == PORecognizerTokenTypes.LITERAL_msgid_plural) {
252 entry.setMsgIdPlural(getLiteralContent(child));
253 msgstr = child.getNextSibling();
254 } else {
255 msgstr = child;
256 }
257
258 if (msgstr == null) {
259 throw new ParseException("Header msgstr section is not found : "
260 + ast.toString());
261 }
262
263 List<String> messages = new ArrayList<String>();
264
265 while (msgstr != null) {
266 checkNodeType(msgstr, PORecognizerTokenTypes.LITERAL_msgstr);
267 String message = getLiteralContent(msgstr);
268
269 if (StringUtils.isNotBlank(message)) {
270 messages.add(message);
271 }
272
273 msgstr = msgstr.getNextSibling();
274 }
275
276 entry.setMsgStr(messages);
277
278 callback.onEntry(entry);
279 }
280
281 protected String getLiteralContent(AST ast) throws ParseException {
282 StringBuffer buffer = new StringBuffer();
283
284 AST child = ast.getFirstChild();
285
286 while (child != null) {
287 buffer.append(child.getText());
288 child = child.getNextSibling();
289 }
290
291 return StringEscapeUtils.unescapeJava(buffer.toString());
292 }
293
294 protected void parsePluralForm(String value) {
295 Matcher matcher = REGEXP_PLURAL_FORM.matcher(value);
296 if (matcher.matches() && matcher.groupCount() > 1) {
297 int nplural = Integer.parseInt(matcher.group(1));
298 String expression = matcher.group(2).trim();
299
300 callback.onHeaderPluralForm(nplural, expression);
301 }
302 }
303
304 private void checkNodeType(AST ast, int type) throws ParseException {
305 if (ast.getType() != type) {
306 StringBuffer buffer = new StringBuffer();
307
308 buffer.append("Invalid AST node type was specified :\n");
309 buffer.append(" - line : ");
310 buffer.append(ast.getLine());
311 buffer.append(", column : ");
312 buffer.append(ast.getColumn());
313 buffer.append("\n - expected type : ");
314 buffer.append(type);
315 buffer.append("\n - actual type : ");
316 buffer.append(ast.getType());
317
318 throw new ParseException(buffer.toString());
319 }
320 }
321 }