001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package examples.mail; 019 020import java.io.BufferedWriter; 021import java.io.File; 022import java.io.FileWriter; 023import java.io.IOException; 024import java.net.URI; 025import java.net.URISyntaxException; 026import java.text.ParseException; 027import java.text.SimpleDateFormat; 028import java.util.ArrayList; 029import java.util.Date; 030import java.util.Iterator; 031import java.util.List; 032import java.util.TimeZone; 033import java.util.regex.Matcher; 034import java.util.regex.Pattern; 035 036import org.apache.commons.net.PrintCommandListener; 037import org.apache.commons.net.ProtocolCommandEvent; 038import org.apache.commons.net.imap.IMAP.IMAPChunkListener; 039import org.apache.commons.net.imap.IMAP; 040import org.apache.commons.net.imap.IMAPClient; 041import org.apache.commons.net.imap.IMAPReply; 042 043/** 044 * This is an example program demonstrating how to use the IMAP[S]Client class. 045 * This program connects to a IMAP[S] server and exports selected messages from a folder into an mbox file. 046 * <p> 047 * Usage: IMAPExportMbox imap[s]://user:password@host[:port]/folder/path <mboxfile> [sequence-set] [item-names] 048 * <p> 049 * An example sequence-set might be: 050 * <ul> 051 * <li>11,2,3:10,20:*</li> 052 * <li>1:* - this is the default</li> 053 * </ul> 054 * <p> 055 * Some example item-names might be: 056 * <ul> 057 * <li>BODY.PEEK[HEADER]</li> 058 * <li>'BODY.PEEK[HEADER.FIELDS (SUBJECT)]'</li> 059 * <li>ALL - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE ENVELOPE)'</li> 060 * <li>FAST - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE)'</li> 061 * <li>FULL - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE ENVELOPE BODY)'</li> 062 * <li>ENVELOPE X-GM-LABELS</li> 063 * <li>'(INTERNALDATE BODY.PEEK[])' - this is the default</li> 064 * </ul> 065 * <p> 066 * Macro names cannot be combined with anything else; they must be used alone.<br> 067 * Note that using BODY will set the \Seen flag. This is why the default uses BODY.PEEK[].<br> 068 * The item name X-GM-LABELS is a Google Mail extension; it shows the labels for a message.<br> 069 * For example:<br> 070 * IMAPExportMbox imaps://username:password@imap.googlemail.com/messages_for_export exported.mbox 1:10,20<br> 071 * IMAPExportMbox imaps://username:password@imap.googlemail.com/messages_for_export exported.mbox 3 ENVELOPE X-GM-LABELS<br> 072 * <p> 073 * The sequence-set is passed unmodified to the FETCH command.<br> 074 * The item names are wrapped in parentheses if more than one is provided. 075 * Otherwise, the parameter is assumed to be wrapped if necessary.<br> 076 * Parameters with spaces must be quoted otherwise the OS shell will normally treat them as separate parameters.<br> 077 * Also the listener that writes the mailbox only captures the multi-line responses (e.g. ones that include BODY references). 078 * It does not capture the output from FETCH commands using item names such as ENVELOPE or FLAGS that return a single line response. 079 */ 080public final class IMAPExportMbox 081{ 082 083 private static final String CRLF = "\r\n"; 084 private static final String LF = "\n"; 085 private static final String EOL_DEFAULT = System.getProperty("line.separator"); 086 087 private static final Pattern PATFROM = Pattern.compile(">*From "); // unescaped From_ 088 // e.g. * nnn (INTERNALDATE "27-Oct-2013 07:43:24 +0000" BODY[] {nn} ...) 089 private static final Pattern PATID = // INTERNALDATE 090 Pattern.compile(".*INTERNALDATE \"(\\d\\d-\\w{3}-\\d{4} \\d\\d:\\d\\d:\\d\\d [+-]\\d+)\""); 091 private static final int PATID_DATE_GROUP = 1; 092 093 private static final Pattern PATSEQ = Pattern.compile("\\* (\\d+) "); // Sequence number 094 private static final int PATSEQ_SEQUENCE_GROUP = 1; 095 096 // e.g. * 382 EXISTS 097 private static final Pattern PATEXISTS = Pattern.compile("\\* (\\d+) EXISTS"); // Response from SELECT 098 099 // AAAC NO [TEMPFAIL] FETCH Temporary failure on server [CODE: WBL] 100 private static final Pattern PATTEMPFAIL = Pattern.compile("[A-Z]{4} NO \\[TEMPFAIL\\] FETCH .*"); 101 102 private static final int CONNECT_TIMEOUT = 10; // Seconds 103 private static final int READ_TIMEOUT = 10; 104 105 public static void main(String[] args) throws IOException, URISyntaxException 106 { 107 int connect_timeout = CONNECT_TIMEOUT; 108 int read_timeout = READ_TIMEOUT; 109 110 int argIdx = 0; 111 String eol = EOL_DEFAULT; 112 boolean printHash = false; 113 boolean printMarker = false; 114 int retryWaitSecs = 0; 115 116 for(argIdx = 0; argIdx < args.length; argIdx++) { 117 if (args[argIdx].equals("-c")) { 118 connect_timeout = Integer.parseInt(args[++argIdx]); 119 } else if (args[argIdx].equals("-r")) { 120 read_timeout = Integer.parseInt(args[++argIdx]); 121 } else if (args[argIdx].equals("-R")) { 122 retryWaitSecs = Integer.parseInt(args[++argIdx]); 123 } else if (args[argIdx].equals("-LF")) { 124 eol = LF; 125 } else if (args[argIdx].equals("-CRLF")) { 126 eol = CRLF; 127 } else if (args[argIdx].equals("-.")) { 128 printHash = true; 129 } else if (args[argIdx].equals("-X")) { 130 printMarker = true; 131 } else { 132 break; 133 } 134 } 135 136 final int argCount = args.length - argIdx; 137 138 if (argCount < 2) 139 { 140 System.err.println("Usage: IMAPExportMbox [-LF|-CRLF] [-c n] [-r n] [-R n] [-.] [-X]" + 141 " imap[s]://user:password@host[:port]/folder/path [+|-]<mboxfile> [sequence-set] [itemnames]"); 142 System.err.println("\t-LF | -CRLF set end-of-line to LF or CRLF (default is the line.separator system property)"); 143 System.err.println("\t-c connect timeout in seconds (default 10)"); 144 System.err.println("\t-r read timeout in seconds (default 10)"); 145 System.err.println("\t-R temporary failure retry wait in seconds (default 0; i.e. disabled)"); 146 System.err.println("\t-. print a . for each complete message received"); 147 System.err.println("\t-X print the X-IMAP line for each complete message received"); 148 System.err.println("\tthe mboxfile is where the messages are stored; use '-' to write to standard output."); 149 System.err.println("\tPrefix filename with '+' to append to the file. Prefix with '-' to allow overwrite."); 150 System.err.println("\ta sequence-set is a list of numbers/number ranges e.g. 1,2,3-10,20:* - default 1:*"); 151 System.err.println("\titemnames are the message data item name(s) e.g. BODY.PEEK[HEADER.FIELDS (SUBJECT)]" + 152 " or a macro e.g. ALL - default (INTERNALDATE BODY.PEEK[])"); 153 System.exit(1); 154 } 155 156 final String uriString = args[argIdx++]; 157 URI uri; 158 try { 159 uri = URI.create(uriString); 160 } catch(IllegalArgumentException e) { // cannot parse the path as is; let's pull it apart and try again 161 Matcher m = Pattern.compile("(imaps?://[^/]+)(/.*)").matcher(uriString); 162 if (m.matches()) { 163 uri = URI.create(m.group(1)); // Just the scheme and auth parts 164 uri = new URI(uri.getScheme(), uri.getAuthority(), m.group(2), null, null); 165 } else { 166 throw e; 167 } 168 } 169 final String file = args[argIdx++]; 170 String sequenceSet = argCount > 2 ? args[argIdx++] : "1:*"; 171 final String itemNames; 172 // Handle 0, 1 or multiple item names 173 if (argCount > 3) { 174 if (argCount > 4) { 175 StringBuilder sb = new StringBuilder(); 176 sb.append("("); 177 for(int i=4; i <= argCount; i++) { 178 if (i>4) { 179 sb.append(" "); 180 } 181 sb.append(args[argIdx++]); 182 } 183 sb.append(")"); 184 itemNames = sb.toString(); 185 } else { 186 itemNames = args[argIdx++]; 187 } 188 } else { 189 itemNames = "(INTERNALDATE BODY.PEEK[])"; 190 } 191 192 final boolean checkSequence = sequenceSet.matches("\\d+:(\\d+|\\*)"); // are we expecting a sequence? 193 final MboxListener chunkListener; 194 if (file.equals("-")) { 195 chunkListener = null; 196 } else if (file.startsWith("+")) { 197 final File mbox = new File(file.substring(1)); 198 System.out.println("Appending to file " + mbox); 199 chunkListener = new MboxListener( 200 new BufferedWriter(new FileWriter(mbox, true)), eol, printHash, printMarker, checkSequence); 201 } else if (file.startsWith("-")) { 202 final File mbox = new File(file.substring(1)); 203 System.out.println("Writing to file " + mbox); 204 chunkListener = new MboxListener( 205 new BufferedWriter(new FileWriter(mbox, false)), eol, printHash, printMarker, checkSequence); 206 } else { 207 final File mbox = new File(file); 208 if (mbox.exists() && mbox.length() > 0) { 209 throw new IOException("mailbox file: " + mbox + " already exists and is non-empty!"); 210 } 211 System.out.println("Creating file " + mbox); 212 chunkListener = new MboxListener(new BufferedWriter(new FileWriter(mbox)), eol, printHash, printMarker, checkSequence); 213 } 214 215 String path = uri.getPath(); 216 if (path == null || path.length() < 1) { 217 throw new IllegalArgumentException("Invalid folderPath: '" + path + "'"); 218 } 219 String folder = path.substring(1); // skip the leading / 220 221 // suppress login details 222 final PrintCommandListener listener = new PrintCommandListener(System.out, true) { 223 @Override 224 public void protocolReplyReceived(ProtocolCommandEvent event) { 225 if (event.getReplyCode() != IMAPReply.PARTIAL){ // This is dealt with by the chunk listener 226 super.protocolReplyReceived(event); 227 } 228 } 229 }; 230 231 // Connect and login 232 final IMAPClient imap = IMAPUtils.imapLogin(uri, connect_timeout * 1000, listener); 233 234 String maxIndexInFolder = null; 235 236 try { 237 238 imap.setSoTimeout(read_timeout * 1000); 239 240 if (!imap.select(folder)){ 241 throw new IOException("Could not select folder: " + folder); 242 } 243 244 for(String line : imap.getReplyStrings()) { 245 maxIndexInFolder = matches(line, PATEXISTS, 1); 246 if (maxIndexInFolder != null) { 247 break; 248 } 249 } 250 251 if (chunkListener != null) { 252 imap.setChunkListener(chunkListener); 253 } // else the command listener displays the full output without processing 254 255 256 while(true) { 257 boolean ok = imap.fetch(sequenceSet, itemNames); 258 // If the fetch failed, can we retry? 259 if (!ok && retryWaitSecs > 0 && chunkListener != null && checkSequence) { 260 final String replyString = imap.getReplyString(); //includes EOL 261 if (startsWith(replyString, PATTEMPFAIL)) { 262 System.err.println("Temporary error detected, will retry in " + retryWaitSecs + "seconds"); 263 sequenceSet = (chunkListener.lastSeq+1)+":*"; 264 try { 265 Thread.sleep(retryWaitSecs * 1000); 266 } catch (InterruptedException e) { 267 // ignored 268 } 269 } else { 270 throw new IOException("FETCH " + sequenceSet + " " + itemNames+ " failed with " + replyString); 271 } 272 } else { 273 break; 274 } 275 } 276 277 } catch (IOException ioe) { 278 String count = chunkListener == null ? "?" : Integer.toString(chunkListener.total); 279 System.err.println( 280 "FETCH " + sequenceSet + " " + itemNames + " failed after processing " + count + " complete messages "); 281 if (chunkListener != null) { 282 System.err.println("Last complete response seen: "+chunkListener.lastFetched); 283 } 284 throw ioe; 285 } finally { 286 287 if (printHash) { 288 System.err.println(); 289 } 290 291 if (chunkListener != null) { 292 chunkListener.close(); 293 final Iterator<String> missingIds = chunkListener.missingIds.iterator(); 294 if (missingIds.hasNext()) { 295 StringBuilder sb = new StringBuilder(); 296 for(;;) { 297 sb.append(missingIds.next()); 298 if (!missingIds.hasNext()) { 299 break; 300 } 301 sb.append(","); 302 } 303 System.err.println("*** Missing ids: " + sb.toString()); 304 } 305 } 306 imap.logout(); 307 imap.disconnect(); 308 } 309 if (chunkListener != null) { 310 System.out.println("Processed " + chunkListener.total + " messages."); 311 } 312 if (maxIndexInFolder != null) { 313 System.out.println("Folder contained " + maxIndexInFolder + " messages."); 314 } 315 } 316 317 private static boolean startsWith(String input, Pattern pat) { 318 Matcher m = pat.matcher(input); 319 return m.lookingAt(); 320 } 321 322 private static String matches(String input, Pattern pat, int index) { 323 Matcher m = pat.matcher(input); 324 if (m.lookingAt()) { 325 return m.group(index); 326 } 327 return null; 328 } 329 330 private static class MboxListener implements IMAPChunkListener { 331 332 private final BufferedWriter bw; 333 volatile int total = 0; 334 volatile String lastFetched; 335 volatile List<String> missingIds = new ArrayList<String>(); 336 volatile long lastSeq = -1; 337 private final String eol; 338 private final SimpleDateFormat DATE_FORMAT // for mbox From_ lines 339 = new SimpleDateFormat("EEE MMM dd HH:mm:ss YYYY"); 340 341 // e.g. INTERNALDATE "27-Oct-2013 07:43:24 +0000" 342 private final SimpleDateFormat IDPARSE // for parsing INTERNALDATE 343 = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss Z"); 344 private final boolean printHash; 345 private final boolean printMarker; 346 private final boolean checkSequence; 347 348 MboxListener(BufferedWriter bw, String eol, boolean printHash, boolean printMarker, boolean checkSequence) 349 throws IOException { 350 this.eol = eol; 351 this.printHash = printHash; 352 this.printMarker = printMarker; 353 DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("GMT")); 354 this.bw = bw; 355 this.checkSequence = checkSequence; 356 } 357 358 @Override 359 public boolean chunkReceived(IMAP imap) { 360 final String[] replyStrings = imap.getReplyStrings(); 361 Date received = new Date(); 362 final String firstLine = replyStrings[0]; 363 Matcher m = PATID.matcher(firstLine); 364 if (m.lookingAt()) { // found a match 365 String date = m.group(PATID_DATE_GROUP); 366 try { 367 received=IDPARSE.parse(date); 368 } catch (ParseException e) { 369 System.err.println(e); 370 } 371 } else { 372 System.err.println("No timestamp found in: " + firstLine + " - using current time"); 373 } 374 String replyTo = "MAILER-DAEMON"; // default 375 for(int i=1; i< replyStrings.length - 1; i++) { 376 final String line = replyStrings[i]; 377 if (line.startsWith("Return-Path: ")) { 378 String[] parts = line.split(" ", 2); 379 replyTo = parts[1]; 380 if (replyTo.startsWith("<")) { 381 replyTo = replyTo.substring(1,replyTo.length()-1); // drop <> wrapper 382 } else { 383 System.err.println("Unexpected Return-path:" + line+ " in " + firstLine); 384 } 385 break; 386 } 387 } 388 try { 389 // Add initial mbox header line 390 bw.append("From "); 391 bw.append(replyTo); 392 bw.append(' '); 393 bw.append(DATE_FORMAT.format(received)); 394 bw.append(eol); 395 // Debug 396 bw.append("X-IMAP-Response: ").append(firstLine).append(eol); 397 if (printMarker) { 398 System.err.println("[" + total + "] " + firstLine); 399 } 400 // Skip first and last lines 401 for(int i=1; i< replyStrings.length - 1; i++) { 402 final String line = replyStrings[i]; 403 if (startsWith(line, PATFROM)) { 404 bw.append('>'); // Escape a From_ line 405 } 406 bw.append(line); 407 bw.append(eol); 408 } 409 // The last line ends with the trailing closing ")" which needs to be stripped 410 String lastLine = replyStrings[replyStrings.length-1]; 411 final int lastLength = lastLine.length(); 412 if (lastLength > 1) { // there's some content, we need to save it 413 bw.append(lastLine, 0, lastLength-1); 414 bw.append(eol); 415 } 416 bw.append(eol); // blank line between entries 417 } catch (IOException e) { 418 e.printStackTrace(); 419 throw new RuntimeException(e); // chunkReceived cannot throw a checked Exception 420 } 421 lastFetched = firstLine; 422 total++; 423 if (checkSequence) { 424 m = PATSEQ.matcher(firstLine); 425 if (m.lookingAt()) { // found a match 426 final long msgSeq = Long.parseLong(m.group(PATSEQ_SEQUENCE_GROUP)); // Cannot fail to parse 427 if (lastSeq != -1) { 428 long missing = msgSeq - lastSeq - 1; 429 if (missing != 0) { 430 for(long j = lastSeq + 1; j < msgSeq; j++) { 431 missingIds.add(String.valueOf(j)); 432 } 433 System.err.println( 434 "*** Sequence error: current=" + msgSeq + " previous=" + lastSeq + " Missing=" + missing); 435 } 436 } 437 lastSeq = msgSeq; 438 } 439 } 440 if (printHash) { 441 System.err.print("."); 442 } 443 return true; 444 } 445 446 public void close() throws IOException { 447 if (bw != null) { 448 bw.close(); 449 } 450 } 451 } 452}