View Javadoc

1   package org.gnomekr.potron.parser;
2   
3   import java.io.File;
4   import java.io.FileInputStream;
5   import java.io.FileReader;
6   import java.io.IOException;
7   import java.io.InputStreamReader;
8   import java.io.Reader;
9   import java.io.UnsupportedEncodingException;
10  import java.util.ArrayList;
11  import java.util.List;
12  import java.util.StringTokenizer;
13  import java.util.regex.Matcher;
14  import java.util.regex.Pattern;
15  
16  import org.apache.commons.lang.NullArgumentException;
17  import org.apache.commons.lang.StringEscapeUtils;
18  import org.apache.commons.lang.StringUtils;
19  
20  import antlr.ANTLRException;
21  import antlr.collections.AST;
22  
23  /***
24   * POParser.java
25   * @author Xavier Cho
26   * @version $Revision 1.1 $ $Date: 2005/08/28 11:47:14 $
27   * TODO: Throw ParseException with sensible information when the input is invalid.
28   */
29  public class POParser {
30  
31      private static final String HEADER_PLURAL_FORM = "Plural-Forms";
32      private static final Pattern REGEXP_PLURAL_FORM = Pattern
33              .compile("^//s*nplurals//s*=//s*([0-9]+)//s*;//s*([^;]+);$");
34  
35      private IPOParserCallback callback;
36  
37      public POParser() {
38      }
39  
40      /***
41       * @param callback
42       */
43      public POParser(IPOParserCallback callback) {
44          this.callback = callback;
45      }
46  
47      /***
48       * @return Returns the callback.
49       */
50      public IPOParserCallback getCallback() {
51          return callback;
52      }
53  
54      /***
55       * @param callback The callback to set.
56       */
57      public void setCallback(IPOParserCallback callback) {
58          this.callback = callback;
59      }
60  
61      /***
62       * @param file
63       * @param encoding
64       * @throws ParseException
65       * @throws UnsupportedEncodingException
66       * @throws IOException
67       */
68      public synchronized void parse(File file, String encoding)
69              throws ParseException, UnsupportedEncodingException, IOException {
70          if (file == null) {
71              throw new NullArgumentException("file");
72          }
73  
74          if (encoding == null) {
75              parse(new FileReader(file));
76          } else {
77              parse(new InputStreamReader(new FileInputStream(file), encoding));
78          }
79      }
80  
81      /***
82       * @param reader
83       * @throws ParseException
84       * @throws IOException
85       */
86      public synchronized void parse(Reader reader) throws ParseException,
87              IOException {
88          if (reader == null) {
89              throw new NullArgumentException("reader");
90          }
91  
92          if (callback == null) {
93              String msg = "No parser callback instance has been registered.";
94              throw new IllegalStateException(msg);
95          }
96  
97          try {
98              callback.startDocument();
99  
100             POLexer lexer = new POLexer(reader);
101             PORecognizer parser = new PORecognizer(lexer);
102 
103             parser.body();
104 
105             AST root = parser.getAST();
106             AST header = root.getFirstChild();
107 
108             if (header == null) {
109                 String msg = "Unable to find header section of the PO file.";
110                 throw new ParseException(msg);
111             }
112 
113             parseHeader(header);
114 
115             AST entry = header;
116             while ((entry = entry.getNextSibling()) != null) {
117                 parseEntry(entry);
118             }
119 
120             callback.endDocument();
121         } catch (ANTLRException e) {
122             throw new ParseException(e);
123         } finally {
124             reader.close();
125         }
126     }
127 
128     protected void parseHeader(AST ast) throws ParseException {
129         checkNodeType(ast, PORecognizerTokenTypes.ENTRY);
130         AST comments = ast.getFirstChild();
131 
132         if (comments == null) {
133             throw new ParseException("Header comment section is not found : "
134                     + ast.toString());
135         }
136 
137         parseComment(comments);
138 
139         AST msgid = comments.getNextSibling();
140 
141         if (msgid == null) {
142             throw new ParseException("Header msgid section is not found : "
143                     + ast.toString());
144         }
145 
146         AST msgstr = msgid.getNextSibling();
147 
148         if (msgstr == null) {
149             throw new ParseException("Header msgstr section is not found : "
150                     + ast.toString());
151         }
152 
153         checkNodeType(msgstr, PORecognizerTokenTypes.LITERAL_msgstr);
154 
155         String content = getLiteralContent(msgstr);
156 
157         StringTokenizer tokenizer = new StringTokenizer(content, System
158                 .getProperty("line.separator"));
159 
160         while (tokenizer.hasMoreTokens()) {
161             String line = tokenizer.nextToken();
162 
163             int index = line.indexOf(":");
164             if (index != -1) {
165                 String key = line.substring(0, index).trim();
166                 String value = line.substring(index + 1).trim();
167 
168                 if (key.equalsIgnoreCase(HEADER_PLURAL_FORM)) {
169                     parsePluralForm(value);
170                 } else {
171                     callback.onHeader(key, value);
172                 }
173             }
174         }
175     }
176 
177     protected void parseComment(AST ast) throws ParseException {
178         parseComment(ast, null);
179     }
180 
181     protected void parseComment(AST ast, ParserEntry entry)
182             throws ParseException {
183         StringBuffer buffer = new StringBuffer();
184 
185         AST child = ast.getFirstChild();
186 
187         checkNodeType(ast, PORecognizerTokenTypes.COMMENT);
188 
189         String linefeed = System.getProperty("line.separator");
190 
191         while (child != null) {
192             checkNodeType(child, PORecognizerTokenTypes.COMMENT);
193 
194             String line = child.getText();
195 
196             if (entry == null) {
197                 buffer.append(linefeed);
198                 buffer.append(line);
199             } else {
200                 if (line.startsWith("#:")) {
201                     entry.setReferences(line.substring(2).trim());
202                 } else if (StringUtils.deleteWhitespace(line).equals("#,fuzzy")) {
203                     entry.setFuzzy(true);
204                 } else {
205                     buffer.append(linefeed);
206                     buffer.append(line);
207                 }
208             }
209 
210             child = child.getNextSibling();
211         }
212 
213         String comment = buffer.toString().trim();
214         if (entry == null) {
215             callback.onComment(comment);
216         } else {
217             entry.setComment(comment);
218         }
219     }
220 
221     protected void parseEntry(AST ast) throws ParseException {
222         ParserEntry entry = new ParserEntry();
223 
224         checkNodeType(ast, PORecognizerTokenTypes.ENTRY);
225         AST comments = ast.getFirstChild();
226 
227         if (comments == null) {
228             throw new ParseException("Entry comment section is not found : "
229                     + ast.toString());
230         }
231 
232         parseComment(comments, entry);
233 
234         AST msgid = comments.getNextSibling();
235 
236         if (msgid == null) {
237             throw new ParseException("Header msgid section is not found : "
238                     + ast.toString());
239         }
240 
241         entry.setMsgId(getLiteralContent(msgid));
242 
243         AST msgstr = null;
244         AST child = msgid.getNextSibling();
245 
246         if (child == null) {
247             throw new ParseException("Header msgstr section is not found : "
248                     + ast.toString());
249         }
250 
251         if (child.getType() == PORecognizerTokenTypes.LITERAL_msgid_plural) {
252             entry.setMsgIdPlural(getLiteralContent(child));
253             msgstr = child.getNextSibling();
254         } else {
255             msgstr = child;
256         }
257 
258         if (msgstr == null) {
259             throw new ParseException("Header msgstr section is not found : "
260                     + ast.toString());
261         }
262 
263         List<String> messages = new ArrayList<String>();
264 
265         while (msgstr != null) {
266             checkNodeType(msgstr, PORecognizerTokenTypes.LITERAL_msgstr);
267             String message = getLiteralContent(msgstr);
268 
269             if (StringUtils.isNotBlank(message)) {
270                 messages.add(message);
271             }
272 
273             msgstr = msgstr.getNextSibling();
274         }
275 
276         entry.setMsgStr(messages);
277 
278         callback.onEntry(entry);
279     }
280 
281     protected String getLiteralContent(AST ast) throws ParseException {
282         StringBuffer buffer = new StringBuffer();
283 
284         AST child = ast.getFirstChild();
285 
286         while (child != null) {
287             buffer.append(child.getText());
288             child = child.getNextSibling();
289         }
290 
291         return StringEscapeUtils.unescapeJava(buffer.toString());
292     }
293 
294     protected void parsePluralForm(String value) {
295         Matcher matcher = REGEXP_PLURAL_FORM.matcher(value);
296         if (matcher.matches() && matcher.groupCount() > 1) {
297             int nplural = Integer.parseInt(matcher.group(1));
298             String expression = matcher.group(2).trim();
299 
300             callback.onHeaderPluralForm(nplural, expression);
301         }
302     }
303 
304     private void checkNodeType(AST ast, int type) throws ParseException {
305         if (ast.getType() != type) {
306             StringBuffer buffer = new StringBuffer();
307 
308             buffer.append("Invalid AST node type was specified :\n");
309             buffer.append(" - line : ");
310             buffer.append(ast.getLine());
311             buffer.append(", column : ");
312             buffer.append(ast.getColumn());
313             buffer.append("\n - expected type : ");
314             buffer.append(type);
315             buffer.append("\n - actual type : ");
316             buffer.append(ast.getType());
317 
318             throw new ParseException(buffer.toString());
319         }
320     }
321 }