001/* 002 * Copyright 2007-2018 Ping Identity Corporation 003 * All Rights Reserved. 004 */ 005/* 006 * Copyright (C) 2008-2018 Ping Identity Corporation 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU General Public License (GPLv2 only) 010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only) 011 * as published by the Free Software Foundation. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Public License for more details. 017 * 018 * You should have received a copy of the GNU General Public License 019 * along with this program; if not, see <http://www.gnu.org/licenses>. 020 */ 021package com.unboundid.ldif; 022 023 024 025import java.io.BufferedReader; 026import java.io.Closeable; 027import java.io.File; 028import java.io.FileInputStream; 029import java.io.InputStream; 030import java.io.InputStreamReader; 031import java.io.IOException; 032import java.text.ParseException; 033import java.util.ArrayList; 034import java.util.Collection; 035import java.util.Iterator; 036import java.util.HashSet; 037import java.util.LinkedHashMap; 038import java.util.List; 039import java.util.Set; 040import java.util.concurrent.BlockingQueue; 041import java.util.concurrent.ArrayBlockingQueue; 042import java.util.concurrent.TimeUnit; 043import java.util.concurrent.atomic.AtomicBoolean; 044import java.nio.charset.Charset; 045 046import com.unboundid.asn1.ASN1OctetString; 047import com.unboundid.ldap.matchingrules.CaseIgnoreStringMatchingRule; 048import com.unboundid.ldap.matchingrules.MatchingRule; 049import com.unboundid.ldap.sdk.Attribute; 050import com.unboundid.ldap.sdk.Control; 051import com.unboundid.ldap.sdk.Entry; 052import com.unboundid.ldap.sdk.Modification; 053import com.unboundid.ldap.sdk.ModificationType; 054import com.unboundid.ldap.sdk.LDAPException; 055import com.unboundid.ldap.sdk.schema.AttributeTypeDefinition; 056import com.unboundid.ldap.sdk.schema.Schema; 057import com.unboundid.util.AggregateInputStream; 058import com.unboundid.util.Base64; 059import com.unboundid.util.LDAPSDKThreadFactory; 060import com.unboundid.util.ThreadSafety; 061import com.unboundid.util.ThreadSafetyLevel; 062import com.unboundid.util.parallel.AsynchronousParallelProcessor; 063import com.unboundid.util.parallel.Result; 064import com.unboundid.util.parallel.ParallelProcessor; 065import com.unboundid.util.parallel.Processor; 066 067import static com.unboundid.ldif.LDIFMessages.*; 068import static com.unboundid.util.Debug.*; 069import static com.unboundid.util.StaticUtils.*; 070import static com.unboundid.util.Validator.*; 071 072/** 073 * This class provides an LDIF reader, which can be used to read and decode 074 * entries and change records from a data source using the LDAP Data Interchange 075 * Format as per <A HREF="http://www.ietf.org/rfc/rfc2849.txt">RFC 2849</A>. 076 * <BR> 077 * This class is not synchronized. If multiple threads read from the 078 * LDIFReader, they must be synchronized externally. 079 * <BR><BR> 080 * <H2>Example</H2> 081 * The following example iterates through all entries contained in an LDIF file 082 * and attempts to add them to a directory server: 083 * <PRE> 084 * LDIFReader ldifReader = new LDIFReader(pathToLDIFFile); 085 * 086 * int entriesRead = 0; 087 * int entriesAdded = 0; 088 * int errorsEncountered = 0; 089 * while (true) 090 * { 091 * Entry entry; 092 * try 093 * { 094 * entry = ldifReader.readEntry(); 095 * if (entry == null) 096 * { 097 * // All entries have been read. 098 * break; 099 * } 100 * 101 * entriesRead++; 102 * } 103 * catch (LDIFException le) 104 * { 105 * errorsEncountered++; 106 * if (le.mayContinueReading()) 107 * { 108 * // A recoverable error occurred while attempting to read a change 109 * // record, at or near line number le.getLineNumber() 110 * // The entry will be skipped, but we'll try to keep reading from the 111 * // LDIF file. 112 * continue; 113 * } 114 * else 115 * { 116 * // An unrecoverable error occurred while attempting to read an entry 117 * // at or near line number le.getLineNumber() 118 * // No further LDIF processing will be performed. 119 * break; 120 * } 121 * } 122 * catch (IOException ioe) 123 * { 124 * // An I/O error occurred while attempting to read from the LDIF file. 125 * // No further LDIF processing will be performed. 126 * errorsEncountered++; 127 * break; 128 * } 129 * 130 * LDAPResult addResult; 131 * try 132 * { 133 * addResult = connection.add(entry); 134 * // If we got here, then the change should have been processed 135 * // successfully. 136 * entriesAdded++; 137 * } 138 * catch (LDAPException le) 139 * { 140 * // If we got here, then the change attempt failed. 141 * addResult = le.toLDAPResult(); 142 * errorsEncountered++; 143 * } 144 * } 145 * 146 * ldifReader.close(); 147 * </PRE> 148 */ 149@ThreadSafety(level=ThreadSafetyLevel.NOT_THREADSAFE) 150public final class LDIFReader 151 implements Closeable 152{ 153 /** 154 * The default buffer size (128KB) that will be used when reading from the 155 * data source. 156 */ 157 public static final int DEFAULT_BUFFER_SIZE = 128 * 1024; 158 159 160 161 /* 162 * When processing asynchronously, this determines how many of the allocated 163 * worker threads are used to parse each batch of read entries. 164 */ 165 private static final int ASYNC_MIN_PER_PARSING_THREAD = 3; 166 167 168 169 /** 170 * When processing asynchronously, this specifies the size of the pending and 171 * completed queues. 172 */ 173 private static final int ASYNC_QUEUE_SIZE = 500; 174 175 176 177 /** 178 * Special entry used internally to signal that the LDIFReaderEntryTranslator 179 * has signalled that a read Entry should be skipped by returning null, 180 * which normally implies EOF. 181 */ 182 private static final Entry SKIP_ENTRY = new Entry("cn=skipped"); 183 184 185 186 /** 187 * The default base path that will be prepended to relative paths. It will 188 * end with a trailing slash. 189 */ 190 private static final String DEFAULT_RELATIVE_BASE_PATH; 191 static 192 { 193 final File currentDir; 194 final String currentDirString = System.getProperty("user.dir"); 195 if (currentDirString == null) 196 { 197 currentDir = new File("."); 198 } 199 else 200 { 201 currentDir = new File(currentDirString); 202 } 203 204 final String currentDirAbsolutePath = currentDir.getAbsolutePath(); 205 if (currentDirAbsolutePath.endsWith(File.separator)) 206 { 207 DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath; 208 } 209 else 210 { 211 DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath + File.separator; 212 } 213 } 214 215 216 217 // The buffered reader that will be used to read LDIF data. 218 private final BufferedReader reader; 219 220 // The behavior that should be exhibited when encountering duplicate attribute 221 // values. 222 private volatile DuplicateValueBehavior duplicateValueBehavior; 223 224 // A line number counter. 225 private long lineNumberCounter = 0; 226 227 // The change record translator to use, if any. 228 private final LDIFReaderChangeRecordTranslator changeRecordTranslator; 229 230 // The entry translator to use, if any. 231 private final LDIFReaderEntryTranslator entryTranslator; 232 233 // The schema that will be used when processing, if applicable. 234 private Schema schema; 235 236 // Specifies the base path that will be prepended to relative paths for file 237 // URLs. 238 private volatile String relativeBasePath; 239 240 // The behavior that should be exhibited with regard to illegal trailing 241 // spaces in attribute values. 242 private volatile TrailingSpaceBehavior trailingSpaceBehavior; 243 244 // True iff we are processing asynchronously. 245 private final boolean isAsync; 246 247 // 248 // The following only apply to asynchronous processing. 249 // 250 251 // Parses entries asynchronously. 252 private final AsynchronousParallelProcessor<UnparsedLDIFRecord, LDIFRecord> 253 asyncParser; 254 255 // Set to true when the end of the input is reached. 256 private final AtomicBoolean asyncParsingComplete; 257 258 // The records that have been read and parsed. 259 private final BlockingQueue<Result<UnparsedLDIFRecord, LDIFRecord>> 260 asyncParsedRecords; 261 262 263 264 /** 265 * Creates a new LDIF reader that will read data from the specified file. 266 * 267 * @param path The path to the file from which the data is to be read. It 268 * must not be {@code null}. 269 * 270 * @throws IOException If a problem occurs while opening the file for 271 * reading. 272 */ 273 public LDIFReader(final String path) 274 throws IOException 275 { 276 this(new FileInputStream(path)); 277 } 278 279 280 281 /** 282 * Creates a new LDIF reader that will read data from the specified file 283 * and parses the LDIF records asynchronously using the specified number of 284 * threads. 285 * 286 * @param path The path to the file from which the data is to be read. It 287 * must not be {@code null}. 288 * @param numParseThreads If this value is greater than zero, then the 289 * specified number of threads will be used to 290 * asynchronously read and parse the LDIF file. 291 * 292 * @throws IOException If a problem occurs while opening the file for 293 * reading. 294 * 295 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 296 * constructor for more details about asynchronous processing. 297 */ 298 public LDIFReader(final String path, final int numParseThreads) 299 throws IOException 300 { 301 this(new FileInputStream(path), numParseThreads); 302 } 303 304 305 306 /** 307 * Creates a new LDIF reader that will read data from the specified file. 308 * 309 * @param file The file from which the data is to be read. It must not be 310 * {@code null}. 311 * 312 * @throws IOException If a problem occurs while opening the file for 313 * reading. 314 */ 315 public LDIFReader(final File file) 316 throws IOException 317 { 318 this(new FileInputStream(file)); 319 } 320 321 322 323 /** 324 * Creates a new LDIF reader that will read data from the specified file 325 * and optionally parses the LDIF records asynchronously using the specified 326 * number of threads. 327 * 328 * @param file The file from which the data is to be read. It 329 * must not be {@code null}. 330 * @param numParseThreads If this value is greater than zero, then the 331 * specified number of threads will be used to 332 * asynchronously read and parse the LDIF file. 333 * 334 * @throws IOException If a problem occurs while opening the file for 335 * reading. 336 */ 337 public LDIFReader(final File file, final int numParseThreads) 338 throws IOException 339 { 340 this(new FileInputStream(file), numParseThreads); 341 } 342 343 344 345 /** 346 * Creates a new LDIF reader that will read data from the specified files in 347 * the order in which they are provided and optionally parses the LDIF records 348 * asynchronously using the specified number of threads. 349 * 350 * @param files The files from which the data is to be read. It 351 * must not be {@code null} or empty. 352 * @param numParseThreads If this value is greater than zero, then the 353 * specified number of threads will be used to 354 * asynchronously read and parse the LDIF file. 355 * @param entryTranslator The LDIFReaderEntryTranslator to apply to entries 356 * before they are returned. This is normally 357 * {@code null}, which causes entries to be returned 358 * unaltered. This is particularly useful when 359 * parsing the input file in parallel because the 360 * entry translation is also done in parallel. 361 * 362 * @throws IOException If a problem occurs while opening the file for 363 * reading. 364 */ 365 public LDIFReader(final File[] files, final int numParseThreads, 366 final LDIFReaderEntryTranslator entryTranslator) 367 throws IOException 368 { 369 this(files, numParseThreads, entryTranslator, null); 370 } 371 372 373 374 /** 375 * Creates a new LDIF reader that will read data from the specified files in 376 * the order in which they are provided and optionally parses the LDIF records 377 * asynchronously using the specified number of threads. 378 * 379 * @param files The files from which the data is to be 380 * read. It must not be {@code null} or 381 * empty. 382 * @param numParseThreads If this value is greater than zero, then 383 * the specified number of threads will be 384 * used to asynchronously read and parse the 385 * LDIF file. 386 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 387 * entries before they are returned. This is 388 * normally {@code null}, which causes entries 389 * to be returned unaltered. This is 390 * particularly useful when parsing the input 391 * file in parallel because the entry 392 * translation is also done in parallel. 393 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 394 * apply to change records before they are 395 * returned. This is normally {@code null}, 396 * which causes change records to be returned 397 * unaltered. This is particularly useful 398 * when parsing the input file in parallel 399 * because the change record translation is 400 * also done in parallel. 401 * 402 * @throws IOException If a problem occurs while opening the file for 403 * reading. 404 */ 405 public LDIFReader(final File[] files, final int numParseThreads, 406 final LDIFReaderEntryTranslator entryTranslator, 407 final LDIFReaderChangeRecordTranslator changeRecordTranslator) 408 throws IOException 409 { 410 this(files, numParseThreads, entryTranslator, changeRecordTranslator, 411 "UTF-8"); 412 } 413 414 415 416 /** 417 * Creates a new LDIF reader that will read data from the specified files in 418 * the order in which they are provided and optionally parses the LDIF records 419 * asynchronously using the specified number of threads. 420 * 421 * @param files The files from which the data is to be 422 * read. It must not be {@code null} or 423 * empty. 424 * @param numParseThreads If this value is greater than zero, then 425 * the specified number of threads will be 426 * used to asynchronously read and parse the 427 * LDIF file. 428 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 429 * entries before they are returned. This is 430 * normally {@code null}, which causes entries 431 * to be returned unaltered. This is 432 * particularly useful when parsing the input 433 * file in parallel because the entry 434 * translation is also done in parallel. 435 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 436 * apply to change records before they are 437 * returned. This is normally {@code null}, 438 * which causes change records to be returned 439 * unaltered. This is particularly useful 440 * when parsing the input file in parallel 441 * because the change record translation is 442 * also done in parallel. 443 * @param characterSet The character set to use when reading from 444 * the input stream. It must not be 445 * {@code null}. 446 * 447 * @throws IOException If a problem occurs while opening the file for 448 * reading. 449 */ 450 public LDIFReader(final File[] files, final int numParseThreads, 451 final LDIFReaderEntryTranslator entryTranslator, 452 final LDIFReaderChangeRecordTranslator changeRecordTranslator, 453 final String characterSet) 454 throws IOException 455 { 456 this(createAggregateInputStream(files), numParseThreads, entryTranslator, 457 changeRecordTranslator, characterSet); 458 } 459 460 461 462 /** 463 * Creates a new aggregate input stream that will read data from the specified 464 * files. If there are multiple files, then a "padding" file will be inserted 465 * between them to ensure that there is at least one blank line between the 466 * end of one file and the beginning of another. 467 * 468 * @param files The files from which the data is to be read. It must not be 469 * {@code null} or empty. 470 * 471 * @return The input stream to use to read data from the provided files. 472 * 473 * @throws IOException If a problem is encountered while attempting to 474 * create the input stream. 475 */ 476 private static InputStream createAggregateInputStream(final File... files) 477 throws IOException 478 { 479 if (files.length == 0) 480 { 481 throw new IOException(ERR_READ_NO_LDIF_FILES.get()); 482 } 483 else 484 { 485 return new AggregateInputStream(true, files); 486 } 487 } 488 489 490 491 /** 492 * Creates a new LDIF reader that will read data from the provided input 493 * stream. 494 * 495 * @param inputStream The input stream from which the data is to be read. 496 * It must not be {@code null}. 497 */ 498 public LDIFReader(final InputStream inputStream) 499 { 500 this(inputStream, 0); 501 } 502 503 504 505 /** 506 * Creates a new LDIF reader that will read data from the specified stream 507 * and parses the LDIF records asynchronously using the specified number of 508 * threads. 509 * 510 * @param inputStream The input stream from which the data is to be read. 511 * It must not be {@code null}. 512 * @param numParseThreads If this value is greater than zero, then the 513 * specified number of threads will be used to 514 * asynchronously read and parse the LDIF file. 515 * 516 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 517 * constructor for more details about asynchronous processing. 518 */ 519 public LDIFReader(final InputStream inputStream, final int numParseThreads) 520 { 521 // UTF-8 is required by RFC 2849. Java guarantees it's always available. 522 this(new BufferedReader(new InputStreamReader(inputStream, 523 Charset.forName("UTF-8")), 524 DEFAULT_BUFFER_SIZE), 525 numParseThreads); 526 } 527 528 529 530 /** 531 * Creates a new LDIF reader that will read data from the specified stream 532 * and parses the LDIF records asynchronously using the specified number of 533 * threads. 534 * 535 * @param inputStream The input stream from which the data is to be read. 536 * It must not be {@code null}. 537 * @param numParseThreads If this value is greater than zero, then the 538 * specified number of threads will be used to 539 * asynchronously read and parse the LDIF file. 540 * @param entryTranslator The LDIFReaderEntryTranslator to apply to read 541 * entries before they are returned. This is normally 542 * {@code null}, which causes entries to be returned 543 * unaltered. This is particularly useful when parsing 544 * the input file in parallel because the entry 545 * translation is also done in parallel. 546 * 547 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 548 * constructor for more details about asynchronous processing. 549 */ 550 public LDIFReader(final InputStream inputStream, final int numParseThreads, 551 final LDIFReaderEntryTranslator entryTranslator) 552 { 553 this(inputStream, numParseThreads, entryTranslator, null); 554 } 555 556 557 558 /** 559 * Creates a new LDIF reader that will read data from the specified stream 560 * and parses the LDIF records asynchronously using the specified number of 561 * threads. 562 * 563 * @param inputStream The input stream from which the data is to 564 * be read. It must not be {@code null}. 565 * @param numParseThreads If this value is greater than zero, then 566 * the specified number of threads will be 567 * used to asynchronously read and parse the 568 * LDIF file. 569 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 570 * entries before they are returned. This is 571 * normally {@code null}, which causes entries 572 * to be returned unaltered. This is 573 * particularly useful when parsing the input 574 * file in parallel because the entry 575 * translation is also done in parallel. 576 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 577 * apply to change records before they are 578 * returned. This is normally {@code null}, 579 * which causes change records to be returned 580 * unaltered. This is particularly useful 581 * when parsing the input file in parallel 582 * because the change record translation is 583 * also done in parallel. 584 * 585 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 586 * constructor for more details about asynchronous processing. 587 */ 588 public LDIFReader(final InputStream inputStream, final int numParseThreads, 589 final LDIFReaderEntryTranslator entryTranslator, 590 final LDIFReaderChangeRecordTranslator changeRecordTranslator) 591 { 592 // UTF-8 is required by RFC 2849. Java guarantees it's always available. 593 this(inputStream, numParseThreads, entryTranslator, changeRecordTranslator, 594 "UTF-8"); 595 } 596 597 598 599 /** 600 * Creates a new LDIF reader that will read data from the specified stream 601 * and parses the LDIF records asynchronously using the specified number of 602 * threads. 603 * 604 * @param inputStream The input stream from which the data is to 605 * be read. It must not be {@code null}. 606 * @param numParseThreads If this value is greater than zero, then 607 * the specified number of threads will be 608 * used to asynchronously read and parse the 609 * LDIF file. 610 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 611 * entries before they are returned. This is 612 * normally {@code null}, which causes entries 613 * to be returned unaltered. This is 614 * particularly useful when parsing the input 615 * file in parallel because the entry 616 * translation is also done in parallel. 617 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 618 * apply to change records before they are 619 * returned. This is normally {@code null}, 620 * which causes change records to be returned 621 * unaltered. This is particularly useful 622 * when parsing the input file in parallel 623 * because the change record translation is 624 * also done in parallel. 625 * @param characterSet The character set to use when reading from 626 * the input stream. It must not be 627 * {@code null}. 628 * 629 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 630 * constructor for more details about asynchronous processing. 631 */ 632 public LDIFReader(final InputStream inputStream, final int numParseThreads, 633 final LDIFReaderEntryTranslator entryTranslator, 634 final LDIFReaderChangeRecordTranslator changeRecordTranslator, 635 final String characterSet) 636 { 637 this(new BufferedReader( 638 new InputStreamReader(inputStream, Charset.forName(characterSet)), 639 DEFAULT_BUFFER_SIZE), 640 numParseThreads, entryTranslator, changeRecordTranslator); 641 } 642 643 644 645 /** 646 * Creates a new LDIF reader that will use the provided buffered reader to 647 * read the LDIF data. The encoding of the underlying Reader must be set to 648 * "UTF-8" as required by RFC 2849. 649 * 650 * @param reader The buffered reader that will be used to read the LDIF 651 * data. It must not be {@code null}. 652 */ 653 public LDIFReader(final BufferedReader reader) 654 { 655 this(reader, 0); 656 } 657 658 659 660 /** 661 * Creates a new LDIF reader that will read data from the specified buffered 662 * reader and parses the LDIF records asynchronously using the specified 663 * number of threads. The encoding of the underlying Reader must be set to 664 * "UTF-8" as required by RFC 2849. 665 * 666 * @param reader The buffered reader that will be used to read the LDIF data. 667 * It must not be {@code null}. 668 * @param numParseThreads If this value is greater than zero, then the 669 * specified number of threads will be used to 670 * asynchronously read and parse the LDIF file. 671 * 672 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 673 * constructor for more details about asynchronous processing. 674 */ 675 public LDIFReader(final BufferedReader reader, final int numParseThreads) 676 { 677 this(reader, numParseThreads, null); 678 } 679 680 681 682 /** 683 * Creates a new LDIF reader that will read data from the specified buffered 684 * reader and parses the LDIF records asynchronously using the specified 685 * number of threads. The encoding of the underlying Reader must be set to 686 * "UTF-8" as required by RFC 2849. 687 * 688 * @param reader The buffered reader that will be used to read the LDIF data. 689 * It must not be {@code null}. 690 * @param numParseThreads If this value is greater than zero, then the 691 * specified number of threads will be used to 692 * asynchronously read and parse the LDIF file. 693 * This should only be set to greater than zero when 694 * performance analysis has demonstrated that reading 695 * and parsing the LDIF is a bottleneck. The default 696 * synchronous processing is normally fast enough. 697 * There is little benefit in passing in a value 698 * greater than four (unless there is an 699 * LDIFReaderEntryTranslator that does time-consuming 700 * processing). A value of zero implies the 701 * default behavior of reading and parsing LDIF 702 * records synchronously when one of the read 703 * methods is called. 704 * @param entryTranslator The LDIFReaderEntryTranslator to apply to read 705 * entries before they are returned. This is normally 706 * {@code null}, which causes entries to be returned 707 * unaltered. This is particularly useful when parsing 708 * the input file in parallel because the entry 709 * translation is also done in parallel. 710 */ 711 public LDIFReader(final BufferedReader reader, 712 final int numParseThreads, 713 final LDIFReaderEntryTranslator entryTranslator) 714 { 715 this(reader, numParseThreads, entryTranslator, null); 716 } 717 718 719 720 /** 721 * Creates a new LDIF reader that will read data from the specified buffered 722 * reader and parses the LDIF records asynchronously using the specified 723 * number of threads. The encoding of the underlying Reader must be set to 724 * "UTF-8" as required by RFC 2849. 725 * 726 * @param reader The buffered reader that will be used to 727 * read the LDIF data. It must not be 728 * {@code null}. 729 * @param numParseThreads If this value is greater than zero, then 730 * the specified number of threads will be 731 * used to asynchronously read and parse the 732 * LDIF file. 733 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 734 * entries before they are returned. This is 735 * normally {@code null}, which causes entries 736 * to be returned unaltered. This is 737 * particularly useful when parsing the input 738 * file in parallel because the entry 739 * translation is also done in parallel. 740 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 741 * apply to change records before they are 742 * returned. This is normally {@code null}, 743 * which causes change records to be returned 744 * unaltered. This is particularly useful 745 * when parsing the input file in parallel 746 * because the change record translation is 747 * also done in parallel. 748 */ 749 public LDIFReader(final BufferedReader reader, final int numParseThreads, 750 final LDIFReaderEntryTranslator entryTranslator, 751 final LDIFReaderChangeRecordTranslator changeRecordTranslator) 752 { 753 ensureNotNull(reader); 754 ensureTrue(numParseThreads >= 0, 755 "LDIFReader.numParseThreads must not be negative."); 756 757 this.reader = reader; 758 this.entryTranslator = entryTranslator; 759 this.changeRecordTranslator = changeRecordTranslator; 760 761 duplicateValueBehavior = DuplicateValueBehavior.STRIP; 762 trailingSpaceBehavior = TrailingSpaceBehavior.REJECT; 763 764 relativeBasePath = DEFAULT_RELATIVE_BASE_PATH; 765 766 if (numParseThreads == 0) 767 { 768 isAsync = false; 769 asyncParser = null; 770 asyncParsingComplete = null; 771 asyncParsedRecords = null; 772 } 773 else 774 { 775 isAsync = true; 776 asyncParsingComplete = new AtomicBoolean(false); 777 778 // Decodes entries in parallel. 779 final LDAPSDKThreadFactory threadFactory = 780 new LDAPSDKThreadFactory("LDIFReader Worker", true, null); 781 final ParallelProcessor<UnparsedLDIFRecord, LDIFRecord> parallelParser = 782 new ParallelProcessor<UnparsedLDIFRecord, LDIFRecord>( 783 new RecordParser(), threadFactory, numParseThreads, 784 ASYNC_MIN_PER_PARSING_THREAD); 785 786 final BlockingQueue<UnparsedLDIFRecord> pendingQueue = new 787 ArrayBlockingQueue<UnparsedLDIFRecord>(ASYNC_QUEUE_SIZE); 788 789 // The output queue must be a little more than twice as big as the input 790 // queue to more easily handle being shutdown in the middle of processing 791 // when the queues are full and threads are blocked. 792 asyncParsedRecords = new ArrayBlockingQueue 793 <Result<UnparsedLDIFRecord, LDIFRecord>>(2 * ASYNC_QUEUE_SIZE + 100); 794 795 asyncParser = new AsynchronousParallelProcessor 796 <UnparsedLDIFRecord, LDIFRecord>(pendingQueue, parallelParser, 797 asyncParsedRecords); 798 799 final LineReaderThread lineReaderThread = new LineReaderThread(); 800 lineReaderThread.start(); 801 } 802 } 803 804 805 806 /** 807 * Reads entries from the LDIF file with the specified path and returns them 808 * as a {@code List}. This is a convenience method that should only be used 809 * for data sets that are small enough so that running out of memory isn't a 810 * concern. 811 * 812 * @param path The path to the LDIF file containing the entries to be read. 813 * 814 * @return A list of the entries read from the given LDIF file. 815 * 816 * @throws IOException If a problem occurs while attempting to read data 817 * from the specified file. 818 * 819 * @throws LDIFException If a problem is encountered while attempting to 820 * decode data read as LDIF. 821 */ 822 public static List<Entry> readEntries(final String path) 823 throws IOException, LDIFException 824 { 825 return readEntries(new LDIFReader(path)); 826 } 827 828 829 830 /** 831 * Reads entries from the specified LDIF file and returns them as a 832 * {@code List}. This is a convenience method that should only be used for 833 * data sets that are small enough so that running out of memory isn't a 834 * concern. 835 * 836 * @param file A reference to the LDIF file containing the entries to be 837 * read. 838 * 839 * @return A list of the entries read from the given LDIF file. 840 * 841 * @throws IOException If a problem occurs while attempting to read data 842 * from the specified file. 843 * 844 * @throws LDIFException If a problem is encountered while attempting to 845 * decode data read as LDIF. 846 */ 847 public static List<Entry> readEntries(final File file) 848 throws IOException, LDIFException 849 { 850 return readEntries(new LDIFReader(file)); 851 } 852 853 854 855 /** 856 * Reads and decodes LDIF entries from the provided input stream and 857 * returns them as a {@code List}. This is a convenience method that should 858 * only be used for data sets that are small enough so that running out of 859 * memory isn't a concern. 860 * 861 * @param inputStream The input stream from which the entries should be 862 * read. The input stream will be closed before 863 * returning. 864 * 865 * @return A list of the entries read from the given input stream. 866 * 867 * @throws IOException If a problem occurs while attempting to read data 868 * from the input stream. 869 * 870 * @throws LDIFException If a problem is encountered while attempting to 871 * decode data read as LDIF. 872 */ 873 public static List<Entry> readEntries(final InputStream inputStream) 874 throws IOException, LDIFException 875 { 876 return readEntries(new LDIFReader(inputStream)); 877 } 878 879 880 881 /** 882 * Reads entries from the provided LDIF reader and returns them as a list. 883 * 884 * @param reader The reader from which the entries should be read. It will 885 * be closed before returning. 886 * 887 * @return A list of the entries read from the provided reader. 888 * 889 * @throws IOException If a problem was encountered while attempting to read 890 * data from the LDIF data source. 891 * 892 * @throws LDIFException If a problem is encountered while attempting to 893 * decode data read as LDIF. 894 */ 895 private static List<Entry> readEntries(final LDIFReader reader) 896 throws IOException, LDIFException 897 { 898 try 899 { 900 final ArrayList<Entry> entries = new ArrayList<Entry>(10); 901 while (true) 902 { 903 final Entry e = reader.readEntry(); 904 if (e == null) 905 { 906 break; 907 } 908 909 entries.add(e); 910 } 911 912 return entries; 913 } 914 finally 915 { 916 reader.close(); 917 } 918 } 919 920 921 922 /** 923 * Closes this LDIF reader and the underlying LDIF source. 924 * 925 * @throws IOException If a problem occurs while closing the underlying LDIF 926 * source. 927 */ 928 public void close() 929 throws IOException 930 { 931 reader.close(); 932 933 if (isAsync()) 934 { 935 // Closing the reader will trigger the LineReaderThread to complete, but 936 // not if it's blocked submitting the next UnparsedLDIFRecord. To avoid 937 // this, we clear out the completed output queue, which is larger than 938 // the input queue, so the LineReaderThread will stop reading and 939 // shutdown the asyncParser. 940 asyncParsedRecords.clear(); 941 } 942 } 943 944 945 946 /** 947 * Indicates whether to ignore any duplicate values encountered while reading 948 * LDIF records. 949 * 950 * @return {@code true} if duplicate values should be ignored, or 951 * {@code false} if any LDIF records containing duplicate values 952 * should be rejected. 953 * 954 * @deprecated Use the {@link #getDuplicateValueBehavior} method instead. 955 */ 956 @Deprecated() 957 public boolean ignoreDuplicateValues() 958 { 959 return (duplicateValueBehavior == DuplicateValueBehavior.STRIP); 960 } 961 962 963 964 /** 965 * Specifies whether to ignore any duplicate values encountered while reading 966 * LDIF records. 967 * 968 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 969 * attribute values encountered while reading 970 * LDIF records. 971 * 972 * @deprecated Use the {@link #setDuplicateValueBehavior} method instead. 973 */ 974 @Deprecated() 975 public void setIgnoreDuplicateValues(final boolean ignoreDuplicateValues) 976 { 977 if (ignoreDuplicateValues) 978 { 979 duplicateValueBehavior = DuplicateValueBehavior.STRIP; 980 } 981 else 982 { 983 duplicateValueBehavior = DuplicateValueBehavior.REJECT; 984 } 985 } 986 987 988 989 /** 990 * Retrieves the behavior that should be exhibited if the LDIF reader 991 * encounters an entry with duplicate values. 992 * 993 * @return The behavior that should be exhibited if the LDIF reader 994 * encounters an entry with duplicate values. 995 */ 996 public DuplicateValueBehavior getDuplicateValueBehavior() 997 { 998 return duplicateValueBehavior; 999 } 1000 1001 1002 1003 /** 1004 * Specifies the behavior that should be exhibited if the LDIF reader 1005 * encounters an entry with duplicate values. 1006 * 1007 * @param duplicateValueBehavior The behavior that should be exhibited if 1008 * the LDIF reader encounters an entry with 1009 * duplicate values. 1010 */ 1011 public void setDuplicateValueBehavior( 1012 final DuplicateValueBehavior duplicateValueBehavior) 1013 { 1014 this.duplicateValueBehavior = duplicateValueBehavior; 1015 } 1016 1017 1018 1019 /** 1020 * Indicates whether to strip off any illegal trailing spaces that may appear 1021 * in LDIF records (e.g., after an entry DN or attribute value). The LDIF 1022 * specification strongly recommends that any value which legitimately 1023 * contains trailing spaces be base64-encoded, and any spaces which appear 1024 * after the end of non-base64-encoded values may therefore be considered 1025 * invalid. If any such trailing spaces are encountered in an LDIF record and 1026 * they are not to be stripped, then an {@link LDIFException} will be thrown 1027 * for that record. 1028 * <BR><BR> 1029 * Note that this applies only to spaces after the end of a value, and not to 1030 * spaces which may appear at the end of a line for a value that is wrapped 1031 * and continued on the next line. 1032 * 1033 * @return {@code true} if illegal trailing spaces should be stripped off, or 1034 * {@code false} if LDIF records containing illegal trailing spaces 1035 * should be rejected. 1036 * 1037 * @deprecated Use the {@link #getTrailingSpaceBehavior} method instead. 1038 */ 1039 @Deprecated() 1040 public boolean stripTrailingSpaces() 1041 { 1042 return (trailingSpaceBehavior == TrailingSpaceBehavior.STRIP); 1043 } 1044 1045 1046 1047 /** 1048 * Specifies whether to strip off any illegal trailing spaces that may appear 1049 * in LDIF records (e.g., after an entry DN or attribute value). The LDIF 1050 * specification strongly recommends that any value which legitimately 1051 * contains trailing spaces be base64-encoded, and any spaces which appear 1052 * after the end of non-base64-encoded values may therefore be considered 1053 * invalid. If any such trailing spaces are encountered in an LDIF record and 1054 * they are not to be stripped, then an {@link LDIFException} will be thrown 1055 * for that record. 1056 * <BR><BR> 1057 * Note that this applies only to spaces after the end of a value, and not to 1058 * spaces which may appear at the end of a line for a value that is wrapped 1059 * and continued on the next line. 1060 * 1061 * @param stripTrailingSpaces Indicates whether to strip off any illegal 1062 * trailing spaces, or {@code false} if LDIF 1063 * records containing them should be rejected. 1064 * 1065 * @deprecated Use the {@link #setTrailingSpaceBehavior} method instead. 1066 */ 1067 @Deprecated() 1068 public void setStripTrailingSpaces(final boolean stripTrailingSpaces) 1069 { 1070 trailingSpaceBehavior = stripTrailingSpaces 1071 ? TrailingSpaceBehavior.STRIP 1072 : TrailingSpaceBehavior.REJECT; 1073 } 1074 1075 1076 1077 /** 1078 * Retrieves the behavior that should be exhibited when encountering attribute 1079 * values which are not base64-encoded but contain trailing spaces. The LDIF 1080 * specification strongly recommends that any value which legitimately 1081 * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser 1082 * may be configured to automatically strip these spaces, to preserve them, or 1083 * to reject any entry or change record containing them. 1084 * 1085 * @return The behavior that should be exhibited when encountering attribute 1086 * values which are not base64-encoded but contain trailing spaces. 1087 */ 1088 public TrailingSpaceBehavior getTrailingSpaceBehavior() 1089 { 1090 return trailingSpaceBehavior; 1091 } 1092 1093 1094 1095 /** 1096 * Specifies the behavior that should be exhibited when encountering attribute 1097 * values which are not base64-encoded but contain trailing spaces. The LDIF 1098 * specification strongly recommends that any value which legitimately 1099 * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser 1100 * may be configured to automatically strip these spaces, to preserve them, or 1101 * to reject any entry or change record containing them. 1102 * 1103 * @param trailingSpaceBehavior The behavior that should be exhibited when 1104 * encountering attribute values which are not 1105 * base64-encoded but contain trailing spaces. 1106 */ 1107 public void setTrailingSpaceBehavior( 1108 final TrailingSpaceBehavior trailingSpaceBehavior) 1109 { 1110 this.trailingSpaceBehavior = trailingSpaceBehavior; 1111 } 1112 1113 1114 1115 /** 1116 * Retrieves the base path that will be prepended to relative paths in order 1117 * to obtain an absolute path. This will only be used for "file:" URLs that 1118 * have paths which do not begin with a slash. 1119 * 1120 * @return The base path that will be prepended to relative paths in order to 1121 * obtain an absolute path. 1122 */ 1123 public String getRelativeBasePath() 1124 { 1125 return relativeBasePath; 1126 } 1127 1128 1129 1130 /** 1131 * Specifies the base path that will be prepended to relative paths in order 1132 * to obtain an absolute path. This will only be used for "file:" URLs that 1133 * have paths which do not begin with a space. 1134 * 1135 * @param relativeBasePath The base path that will be prepended to relative 1136 * paths in order to obtain an absolute path. 1137 */ 1138 public void setRelativeBasePath(final String relativeBasePath) 1139 { 1140 setRelativeBasePath(new File(relativeBasePath)); 1141 } 1142 1143 1144 1145 /** 1146 * Specifies the base path that will be prepended to relative paths in order 1147 * to obtain an absolute path. This will only be used for "file:" URLs that 1148 * have paths which do not begin with a space. 1149 * 1150 * @param relativeBasePath The base path that will be prepended to relative 1151 * paths in order to obtain an absolute path. 1152 */ 1153 public void setRelativeBasePath(final File relativeBasePath) 1154 { 1155 final String path = relativeBasePath.getAbsolutePath(); 1156 if (path.endsWith(File.separator)) 1157 { 1158 this.relativeBasePath = path; 1159 } 1160 else 1161 { 1162 this.relativeBasePath = path + File.separator; 1163 } 1164 } 1165 1166 1167 1168 /** 1169 * Retrieves the schema that will be used when reading LDIF records, if 1170 * defined. 1171 * 1172 * @return The schema that will be used when reading LDIF records, or 1173 * {@code null} if no schema should be used and all attributes should 1174 * be treated as case-insensitive strings. 1175 */ 1176 public Schema getSchema() 1177 { 1178 return schema; 1179 } 1180 1181 1182 1183 /** 1184 * Specifies the schema that should be used when reading LDIF records. 1185 * 1186 * @param schema The schema that should be used when reading LDIF records, 1187 * or {@code null} if no schema should be used and all 1188 * attributes should be treated as case-insensitive strings. 1189 */ 1190 public void setSchema(final Schema schema) 1191 { 1192 this.schema = schema; 1193 } 1194 1195 1196 1197 /** 1198 * Reads a record from the LDIF source. It may be either an entry or an LDIF 1199 * change record. 1200 * 1201 * @return The record read from the LDIF source, or {@code null} if there are 1202 * no more entries to be read. 1203 * 1204 * @throws IOException If a problem occurs while trying to read from the 1205 * LDIF source. 1206 * 1207 * @throws LDIFException If the data read could not be parsed as an entry or 1208 * an LDIF change record. 1209 */ 1210 public LDIFRecord readLDIFRecord() 1211 throws IOException, LDIFException 1212 { 1213 if (isAsync()) 1214 { 1215 return readLDIFRecordAsync(); 1216 } 1217 else 1218 { 1219 return readLDIFRecordInternal(); 1220 } 1221 } 1222 1223 1224 1225 /** 1226 * Reads an entry from the LDIF source. 1227 * 1228 * @return The entry read from the LDIF source, or {@code null} if there are 1229 * no more entries to be read. 1230 * 1231 * @throws IOException If a problem occurs while attempting to read from the 1232 * LDIF source. 1233 * 1234 * @throws LDIFException If the data read could not be parsed as an entry. 1235 */ 1236 public Entry readEntry() 1237 throws IOException, LDIFException 1238 { 1239 if (isAsync()) 1240 { 1241 return readEntryAsync(); 1242 } 1243 else 1244 { 1245 return readEntryInternal(); 1246 } 1247 } 1248 1249 1250 1251 /** 1252 * Reads an LDIF change record from the LDIF source. The LDIF record must 1253 * have a changetype. 1254 * 1255 * @return The change record read from the LDIF source, or {@code null} if 1256 * there are no more records to be read. 1257 * 1258 * @throws IOException If a problem occurs while attempting to read from the 1259 * LDIF source. 1260 * 1261 * @throws LDIFException If the data read could not be parsed as an LDIF 1262 * change record. 1263 */ 1264 public LDIFChangeRecord readChangeRecord() 1265 throws IOException, LDIFException 1266 { 1267 return readChangeRecord(false); 1268 } 1269 1270 1271 1272 /** 1273 * Reads an LDIF change record from the LDIF source. Optionally, if the LDIF 1274 * record does not have a changetype, then it may be assumed to be an add 1275 * change record. 1276 * 1277 * @param defaultAdd Indicates whether an LDIF record not containing a 1278 * changetype should be retrieved as an add change record. 1279 * If this is {@code false} and the record read does not 1280 * include a changetype, then an {@link LDIFException} 1281 * will be thrown. 1282 * 1283 * @return The change record read from the LDIF source, or {@code null} if 1284 * there are no more records to be read. 1285 * 1286 * @throws IOException If a problem occurs while attempting to read from the 1287 * LDIF source. 1288 * 1289 * @throws LDIFException If the data read could not be parsed as an LDIF 1290 * change record. 1291 */ 1292 public LDIFChangeRecord readChangeRecord(final boolean defaultAdd) 1293 throws IOException, LDIFException 1294 { 1295 if (isAsync()) 1296 { 1297 return readChangeRecordAsync(defaultAdd); 1298 } 1299 else 1300 { 1301 return readChangeRecordInternal(defaultAdd); 1302 } 1303 } 1304 1305 1306 1307 /** 1308 * Reads the next {@code LDIFRecord}, which was read and parsed by a different 1309 * thread. 1310 * 1311 * @return The next parsed record or {@code null} if there are no more 1312 * records to read. 1313 * 1314 * @throws IOException If IOException was thrown when reading or parsing 1315 * the record. 1316 * 1317 * @throws LDIFException If LDIFException was thrown parsing the record. 1318 */ 1319 private LDIFRecord readLDIFRecordAsync() 1320 throws IOException, LDIFException 1321 { 1322 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1323 LDIFRecord record = null; 1324 while (record == null) 1325 { 1326 result = readLDIFRecordResultAsync(); 1327 if (result == null) 1328 { 1329 return null; 1330 } 1331 1332 record = result.getOutput(); 1333 1334 // This is a special value that means we should skip this Entry. We have 1335 // to use something different than null because null means EOF. 1336 if (record == SKIP_ENTRY) 1337 { 1338 record = null; 1339 } 1340 } 1341 return record; 1342 } 1343 1344 1345 1346 /** 1347 * Reads an entry asynchronously from the LDIF source. 1348 * 1349 * @return The entry read from the LDIF source, or {@code null} if there are 1350 * no more entries to be read. 1351 * 1352 * @throws IOException If a problem occurs while attempting to read from the 1353 * LDIF source. 1354 * @throws LDIFException If the data read could not be parsed as an entry. 1355 */ 1356 private Entry readEntryAsync() 1357 throws IOException, LDIFException 1358 { 1359 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1360 LDIFRecord record = null; 1361 while (record == null) 1362 { 1363 result = readLDIFRecordResultAsync(); 1364 if (result == null) 1365 { 1366 return null; 1367 } 1368 1369 record = result.getOutput(); 1370 1371 // This is a special value that means we should skip this Entry. We have 1372 // to use something different than null because null means EOF. 1373 if (record == SKIP_ENTRY) 1374 { 1375 record = null; 1376 } 1377 } 1378 1379 if (record instanceof Entry) 1380 { 1381 return (Entry) record; 1382 } 1383 else if (record instanceof LDIFChangeRecord) 1384 { 1385 try 1386 { 1387 // Some LDIFChangeRecord can be converted to an Entry. This is really 1388 // an edge case though. 1389 return ((LDIFChangeRecord)record).toEntry(); 1390 } 1391 catch (final LDIFException e) 1392 { 1393 debugException(e); 1394 final long firstLineNumber = result.getInput().getFirstLineNumber(); 1395 throw new LDIFException(e.getExceptionMessage(), 1396 firstLineNumber, true, e); 1397 } 1398 } 1399 1400 throw new AssertionError("LDIFRecords must either be an Entry or an " + 1401 "LDIFChangeRecord"); 1402 } 1403 1404 1405 1406 /** 1407 * Reads an LDIF change record from the LDIF source asynchronously. 1408 * Optionally, if the LDIF record does not have a changetype, then it may be 1409 * assumed to be an add change record. 1410 * 1411 * @param defaultAdd Indicates whether an LDIF record not containing a 1412 * changetype should be retrieved as an add change record. 1413 * If this is {@code false} and the record read does not 1414 * include a changetype, then an {@link LDIFException} will 1415 * be thrown. 1416 * 1417 * @return The change record read from the LDIF source, or {@code null} if 1418 * there are no more records to be read. 1419 * 1420 * @throws IOException If a problem occurs while attempting to read from the 1421 * LDIF source. 1422 * @throws LDIFException If the data read could not be parsed as an LDIF 1423 * change record. 1424 */ 1425 private LDIFChangeRecord readChangeRecordAsync(final boolean defaultAdd) 1426 throws IOException, LDIFException 1427 { 1428 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1429 LDIFRecord record = null; 1430 while (record == null) 1431 { 1432 result = readLDIFRecordResultAsync(); 1433 if (result == null) 1434 { 1435 return null; 1436 } 1437 1438 record = result.getOutput(); 1439 1440 // This is a special value that means we should skip this Entry. We have 1441 // to use something different than null because null means EOF. 1442 if (record == SKIP_ENTRY) 1443 { 1444 record = null; 1445 } 1446 } 1447 1448 if (record instanceof LDIFChangeRecord) 1449 { 1450 return (LDIFChangeRecord) record; 1451 } 1452 else if (record instanceof Entry) 1453 { 1454 if (defaultAdd) 1455 { 1456 return new LDIFAddChangeRecord((Entry) record); 1457 } 1458 else 1459 { 1460 final long firstLineNumber = result.getInput().getFirstLineNumber(); 1461 throw new LDIFException( 1462 ERR_READ_NOT_CHANGE_RECORD.get(firstLineNumber), firstLineNumber, 1463 true); 1464 } 1465 } 1466 1467 throw new AssertionError("LDIFRecords must either be an Entry or an " + 1468 "LDIFChangeRecord"); 1469 } 1470 1471 1472 1473 /** 1474 * Reads the next LDIF record, which was read and parsed asynchronously by 1475 * separate threads. 1476 * 1477 * @return The next LDIF record or {@code null} if there are no more records. 1478 * 1479 * @throws IOException If a problem occurs while attempting to read from the 1480 * LDIF source. 1481 * 1482 * @throws LDIFException If the data read could not be parsed as an entry. 1483 */ 1484 private Result<UnparsedLDIFRecord, LDIFRecord> readLDIFRecordResultAsync() 1485 throws IOException, LDIFException 1486 { 1487 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1488 1489 // If the asynchronous reading and parsing is complete, then we don't have 1490 // to block waiting for the next record to show up on the queue. If there 1491 // isn't a record there, then return null (EOF) right away. 1492 if (asyncParsingComplete.get()) 1493 { 1494 result = asyncParsedRecords.poll(); 1495 } 1496 else 1497 { 1498 try 1499 { 1500 // We probably could just do a asyncParsedRecords.take() here, but 1501 // there are some edge case error scenarios where 1502 // asyncParsingComplete might be set without a special EOF sentinel 1503 // Result enqueued. So to guard against this, we have a very cautious 1504 // polling interval of 1 second. During normal processing, we never 1505 // have to wait for this to expire, when there is something to do 1506 // (like shutdown). 1507 while ((result == null) && (!asyncParsingComplete.get())) 1508 { 1509 result = asyncParsedRecords.poll(1, TimeUnit.SECONDS); 1510 } 1511 1512 // There's a very small chance that we missed the value, so double-check 1513 if (result == null) 1514 { 1515 result = asyncParsedRecords.poll(); 1516 } 1517 } 1518 catch (final InterruptedException e) 1519 { 1520 debugException(e); 1521 Thread.currentThread().interrupt(); 1522 throw new IOException(e); 1523 } 1524 } 1525 if (result == null) 1526 { 1527 return null; 1528 } 1529 1530 rethrow(result.getFailureCause()); 1531 1532 // Check if we reached the end of the input 1533 final UnparsedLDIFRecord unparsedRecord = result.getInput(); 1534 if (unparsedRecord.isEOF()) 1535 { 1536 // This might have been set already by the LineReaderThread, but 1537 // just in case it hasn't gotten to it yet, do so here. 1538 asyncParsingComplete.set(true); 1539 1540 // Enqueue this EOF result again for any other thread that might be 1541 // blocked in asyncParsedRecords.take() even though having multiple 1542 // threads call this method concurrently breaks the contract of this 1543 // class. 1544 try 1545 { 1546 asyncParsedRecords.put(result); 1547 } 1548 catch (final InterruptedException e) 1549 { 1550 // We shouldn't ever get interrupted because the put won't ever block. 1551 // Once we are done reading, this is the only item left in the queue, 1552 // so we should always be able to re-enqueue it. 1553 debugException(e); 1554 Thread.currentThread().interrupt(); 1555 } 1556 return null; 1557 } 1558 1559 return result; 1560 } 1561 1562 1563 1564 /** 1565 * Indicates whether this LDIF reader was constructed to perform asynchronous 1566 * processing. 1567 * 1568 * @return {@code true} if this LDIFReader was constructed to perform 1569 * asynchronous processing, or {@code false} if not. 1570 */ 1571 private boolean isAsync() 1572 { 1573 return isAsync; 1574 } 1575 1576 1577 1578 /** 1579 * If not {@code null}, rethrows the specified Throwable as either an 1580 * IOException or LDIFException. 1581 * 1582 * @param t The exception to rethrow. If it's {@code null}, then nothing 1583 * is thrown. 1584 * 1585 * @throws IOException If t is an IOException or a checked Exception that 1586 * is not an LDIFException. 1587 * @throws LDIFException If t is an LDIFException. 1588 */ 1589 static void rethrow(final Throwable t) 1590 throws IOException, LDIFException 1591 { 1592 if (t == null) 1593 { 1594 return; 1595 } 1596 1597 if (t instanceof IOException) 1598 { 1599 throw (IOException) t; 1600 } 1601 else if (t instanceof LDIFException) 1602 { 1603 throw (LDIFException) t; 1604 } 1605 else if (t instanceof RuntimeException) 1606 { 1607 throw (RuntimeException) t; 1608 } 1609 else if (t instanceof Error) 1610 { 1611 throw (Error) t; 1612 } 1613 else 1614 { 1615 throw new IOException(t); 1616 } 1617 } 1618 1619 1620 1621 /** 1622 * Reads a record from the LDIF source. It may be either an entry or an LDIF 1623 * change record. 1624 * 1625 * @return The record read from the LDIF source, or {@code null} if there are 1626 * no more entries to be read. 1627 * 1628 * @throws IOException If a problem occurs while trying to read from the 1629 * LDIF source. 1630 * @throws LDIFException If the data read could not be parsed as an entry or 1631 * an LDIF change record. 1632 */ 1633 private LDIFRecord readLDIFRecordInternal() 1634 throws IOException, LDIFException 1635 { 1636 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord(); 1637 return decodeRecord(unparsedRecord, relativeBasePath, schema); 1638 } 1639 1640 1641 1642 /** 1643 * Reads an entry from the LDIF source. 1644 * 1645 * @return The entry read from the LDIF source, or {@code null} if there are 1646 * no more entries to be read. 1647 * 1648 * @throws IOException If a problem occurs while attempting to read from the 1649 * LDIF source. 1650 * @throws LDIFException If the data read could not be parsed as an entry. 1651 */ 1652 private Entry readEntryInternal() 1653 throws IOException, LDIFException 1654 { 1655 Entry e = null; 1656 while (e == null) 1657 { 1658 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord(); 1659 if (unparsedRecord.isEOF()) 1660 { 1661 return null; 1662 } 1663 1664 e = decodeEntry(unparsedRecord, relativeBasePath); 1665 debugLDIFRead(e); 1666 1667 if (entryTranslator != null) 1668 { 1669 e = entryTranslator.translate(e, unparsedRecord.getFirstLineNumber()); 1670 } 1671 } 1672 return e; 1673 } 1674 1675 1676 1677 /** 1678 * Reads an LDIF change record from the LDIF source. Optionally, if the LDIF 1679 * record does not have a changetype, then it may be assumed to be an add 1680 * change record. 1681 * 1682 * @param defaultAdd Indicates whether an LDIF record not containing a 1683 * changetype should be retrieved as an add change record. 1684 * If this is {@code false} and the record read does not 1685 * include a changetype, then an {@link LDIFException} will 1686 * be thrown. 1687 * 1688 * @return The change record read from the LDIF source, or {@code null} if 1689 * there are no more records to be read. 1690 * 1691 * @throws IOException If a problem occurs while attempting to read from the 1692 * LDIF source. 1693 * @throws LDIFException If the data read could not be parsed as an LDIF 1694 * change record. 1695 */ 1696 private LDIFChangeRecord readChangeRecordInternal(final boolean defaultAdd) 1697 throws IOException, LDIFException 1698 { 1699 LDIFChangeRecord r = null; 1700 while (r == null) 1701 { 1702 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord(); 1703 if (unparsedRecord.isEOF()) 1704 { 1705 return null; 1706 } 1707 1708 r = decodeChangeRecord(unparsedRecord, relativeBasePath, defaultAdd, 1709 schema); 1710 debugLDIFRead(r); 1711 1712 if (changeRecordTranslator != null) 1713 { 1714 r = changeRecordTranslator.translate(r, 1715 unparsedRecord.getFirstLineNumber()); 1716 } 1717 } 1718 return r; 1719 } 1720 1721 1722 1723 /** 1724 * Reads a record (either an entry or a change record) from the LDIF source 1725 * and places it in the line list. 1726 * 1727 * @return The line number for the first line of the entry that was read. 1728 * 1729 * @throws IOException If a problem occurs while attempting to read from the 1730 * LDIF source. 1731 * 1732 * @throws LDIFException If the data read could not be parsed as a valid 1733 * LDIF record. 1734 */ 1735 private UnparsedLDIFRecord readUnparsedRecord() 1736 throws IOException, LDIFException 1737 { 1738 final ArrayList<StringBuilder> lineList = new ArrayList<StringBuilder>(20); 1739 boolean lastWasComment = false; 1740 long firstLineNumber = lineNumberCounter + 1; 1741 while (true) 1742 { 1743 final String line = reader.readLine(); 1744 lineNumberCounter++; 1745 1746 if (line == null) 1747 { 1748 // We've hit the end of the LDIF source. If we haven't read any entry 1749 // data, then return null. Otherwise, the last entry wasn't followed by 1750 // a blank line, which is OK, and we should decode that entry. 1751 if (lineList.isEmpty()) 1752 { 1753 return new UnparsedLDIFRecord(new ArrayList<StringBuilder>(0), 1754 duplicateValueBehavior, trailingSpaceBehavior, schema, -1); 1755 } 1756 else 1757 { 1758 break; 1759 } 1760 } 1761 1762 if (line.length() == 0) 1763 { 1764 // It's a blank line. If we have read entry data, then this signals the 1765 // end of the entry. Otherwise, it's an extra space between entries, 1766 // which is OK. 1767 lastWasComment = false; 1768 if (lineList.isEmpty()) 1769 { 1770 firstLineNumber++; 1771 continue; 1772 } 1773 else 1774 { 1775 break; 1776 } 1777 } 1778 1779 if (line.charAt(0) == ' ') 1780 { 1781 // The line starts with a space, which means that it must be a 1782 // continuation of the previous line. This is true even if the last 1783 // line was a comment. 1784 if (lastWasComment) 1785 { 1786 // What we've read is part of a comment, so we don't care about its 1787 // content. 1788 } 1789 else if (lineList.isEmpty()) 1790 { 1791 throw new LDIFException( 1792 ERR_READ_UNEXPECTED_FIRST_SPACE.get(lineNumberCounter), 1793 lineNumberCounter, false); 1794 } 1795 else 1796 { 1797 lineList.get(lineList.size() - 1).append(line.substring(1)); 1798 lastWasComment = false; 1799 } 1800 } 1801 else if (line.charAt(0) == '#') 1802 { 1803 lastWasComment = true; 1804 } 1805 else 1806 { 1807 // We want to make sure that we skip over the "version:" line if it 1808 // exists, but that should only occur at the beginning of an entry where 1809 // it can't be confused with a possible "version" attribute. 1810 if (lineList.isEmpty() && line.startsWith("version:")) 1811 { 1812 lastWasComment = true; 1813 } 1814 else 1815 { 1816 lineList.add(new StringBuilder(line)); 1817 lastWasComment = false; 1818 } 1819 } 1820 } 1821 1822 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior, 1823 trailingSpaceBehavior, schema, firstLineNumber); 1824 } 1825 1826 1827 1828 /** 1829 * Decodes the provided set of LDIF lines as an entry. The provided set of 1830 * lines must contain exactly one entry. Long lines may be wrapped as per the 1831 * LDIF specification, and it is acceptable to have one or more blank lines 1832 * following the entry. A default trailing space behavior of 1833 * {@link TrailingSpaceBehavior#REJECT} will be used. 1834 * 1835 * @param ldifLines The set of lines that comprise the LDIF representation 1836 * of the entry. It must not be {@code null} or empty. 1837 * 1838 * @return The entry read from LDIF. 1839 * 1840 * @throws LDIFException If the provided LDIF data cannot be decoded as an 1841 * entry. 1842 */ 1843 public static Entry decodeEntry(final String... ldifLines) 1844 throws LDIFException 1845 { 1846 final Entry e = decodeEntry(prepareRecord(DuplicateValueBehavior.STRIP, 1847 TrailingSpaceBehavior.REJECT, null, ldifLines), 1848 DEFAULT_RELATIVE_BASE_PATH); 1849 debugLDIFRead(e); 1850 return e; 1851 } 1852 1853 1854 1855 /** 1856 * Decodes the provided set of LDIF lines as an entry. The provided set of 1857 * lines must contain exactly one entry. Long lines may be wrapped as per the 1858 * LDIF specification, and it is acceptable to have one or more blank lines 1859 * following the entry. A default trailing space behavior of 1860 * {@link TrailingSpaceBehavior#REJECT} will be used. 1861 * 1862 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 1863 * attribute values encountered while parsing. 1864 * @param schema The schema to use when parsing the record, 1865 * if applicable. 1866 * @param ldifLines The set of lines that comprise the LDIF 1867 * representation of the entry. It must not be 1868 * {@code null} or empty. 1869 * 1870 * @return The entry read from LDIF. 1871 * 1872 * @throws LDIFException If the provided LDIF data cannot be decoded as an 1873 * entry. 1874 */ 1875 public static Entry decodeEntry(final boolean ignoreDuplicateValues, 1876 final Schema schema, 1877 final String... ldifLines) 1878 throws LDIFException 1879 { 1880 return decodeEntry(ignoreDuplicateValues, TrailingSpaceBehavior.REJECT, 1881 schema, ldifLines); 1882 } 1883 1884 1885 1886 /** 1887 * Decodes the provided set of LDIF lines as an entry. The provided set of 1888 * lines must contain exactly one entry. Long lines may be wrapped as per the 1889 * LDIF specification, and it is acceptable to have one or more blank lines 1890 * following the entry. 1891 * 1892 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 1893 * attribute values encountered while parsing. 1894 * @param trailingSpaceBehavior The behavior that should be exhibited when 1895 * encountering attribute values which are not 1896 * base64-encoded but contain trailing spaces. 1897 * It must not be {@code null}. 1898 * @param schema The schema to use when parsing the record, 1899 * if applicable. 1900 * @param ldifLines The set of lines that comprise the LDIF 1901 * representation of the entry. It must not be 1902 * {@code null} or empty. 1903 * 1904 * @return The entry read from LDIF. 1905 * 1906 * @throws LDIFException If the provided LDIF data cannot be decoded as an 1907 * entry. 1908 */ 1909 public static Entry decodeEntry( 1910 final boolean ignoreDuplicateValues, 1911 final TrailingSpaceBehavior trailingSpaceBehavior, 1912 final Schema schema, 1913 final String... ldifLines) throws LDIFException 1914 { 1915 final Entry e = decodeEntry(prepareRecord( 1916 (ignoreDuplicateValues 1917 ? DuplicateValueBehavior.STRIP 1918 : DuplicateValueBehavior.REJECT), 1919 trailingSpaceBehavior, schema, ldifLines), 1920 DEFAULT_RELATIVE_BASE_PATH); 1921 debugLDIFRead(e); 1922 return e; 1923 } 1924 1925 1926 1927 /** 1928 * Decodes the provided set of LDIF lines as an LDIF change record. The 1929 * provided set of lines must contain exactly one change record and it must 1930 * include a changetype. Long lines may be wrapped as per the LDIF 1931 * specification, and it is acceptable to have one or more blank lines 1932 * following the entry. 1933 * 1934 * @param ldifLines The set of lines that comprise the LDIF representation 1935 * of the change record. It must not be {@code null} or 1936 * empty. 1937 * 1938 * @return The change record read from LDIF. 1939 * 1940 * @throws LDIFException If the provided LDIF data cannot be decoded as a 1941 * change record. 1942 */ 1943 public static LDIFChangeRecord decodeChangeRecord(final String... ldifLines) 1944 throws LDIFException 1945 { 1946 return decodeChangeRecord(false, ldifLines); 1947 } 1948 1949 1950 1951 /** 1952 * Decodes the provided set of LDIF lines as an LDIF change record. The 1953 * provided set of lines must contain exactly one change record. Long lines 1954 * may be wrapped as per the LDIF specification, and it is acceptable to have 1955 * one or more blank lines following the entry. 1956 * 1957 * @param defaultAdd Indicates whether an LDIF record not containing a 1958 * changetype should be retrieved as an add change record. 1959 * If this is {@code false} and the record read does not 1960 * include a changetype, then an {@link LDIFException} 1961 * will be thrown. 1962 * @param ldifLines The set of lines that comprise the LDIF representation 1963 * of the change record. It must not be {@code null} or 1964 * empty. 1965 * 1966 * @return The change record read from LDIF. 1967 * 1968 * @throws LDIFException If the provided LDIF data cannot be decoded as a 1969 * change record. 1970 */ 1971 public static LDIFChangeRecord decodeChangeRecord(final boolean defaultAdd, 1972 final String... ldifLines) 1973 throws LDIFException 1974 { 1975 final LDIFChangeRecord r = 1976 decodeChangeRecord( 1977 prepareRecord(DuplicateValueBehavior.STRIP, 1978 TrailingSpaceBehavior.REJECT, null, ldifLines), 1979 DEFAULT_RELATIVE_BASE_PATH, defaultAdd, null); 1980 debugLDIFRead(r); 1981 return r; 1982 } 1983 1984 1985 1986 /** 1987 * Decodes the provided set of LDIF lines as an LDIF change record. The 1988 * provided set of lines must contain exactly one change record. Long lines 1989 * may be wrapped as per the LDIF specification, and it is acceptable to have 1990 * one or more blank lines following the entry. 1991 * 1992 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 1993 * attribute values encountered while parsing. 1994 * @param schema The schema to use when processing the change 1995 * record, or {@code null} if no schema should 1996 * be used and all values should be treated as 1997 * case-insensitive strings. 1998 * @param defaultAdd Indicates whether an LDIF record not 1999 * containing a changetype should be retrieved 2000 * as an add change record. If this is 2001 * {@code false} and the record read does not 2002 * include a changetype, then an 2003 * {@link LDIFException} will be thrown. 2004 * @param ldifLines The set of lines that comprise the LDIF 2005 * representation of the change record. It 2006 * must not be {@code null} or empty. 2007 * 2008 * @return The change record read from LDIF. 2009 * 2010 * @throws LDIFException If the provided LDIF data cannot be decoded as a 2011 * change record. 2012 */ 2013 public static LDIFChangeRecord decodeChangeRecord( 2014 final boolean ignoreDuplicateValues, 2015 final Schema schema, 2016 final boolean defaultAdd, 2017 final String... ldifLines) 2018 throws LDIFException 2019 { 2020 return decodeChangeRecord(ignoreDuplicateValues, 2021 TrailingSpaceBehavior.REJECT, schema, defaultAdd, ldifLines); 2022 } 2023 2024 2025 2026 /** 2027 * Decodes the provided set of LDIF lines as an LDIF change record. The 2028 * provided set of lines must contain exactly one change record. Long lines 2029 * may be wrapped as per the LDIF specification, and it is acceptable to have 2030 * one or more blank lines following the entry. 2031 * 2032 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 2033 * attribute values encountered while parsing. 2034 * @param trailingSpaceBehavior The behavior that should be exhibited when 2035 * encountering attribute values which are not 2036 * base64-encoded but contain trailing spaces. 2037 * It must not be {@code null}. 2038 * @param schema The schema to use when processing the change 2039 * record, or {@code null} if no schema should 2040 * be used and all values should be treated as 2041 * case-insensitive strings. 2042 * @param defaultAdd Indicates whether an LDIF record not 2043 * containing a changetype should be retrieved 2044 * as an add change record. If this is 2045 * {@code false} and the record read does not 2046 * include a changetype, then an 2047 * {@link LDIFException} will be thrown. 2048 * @param ldifLines The set of lines that comprise the LDIF 2049 * representation of the change record. It 2050 * must not be {@code null} or empty. 2051 * 2052 * @return The change record read from LDIF. 2053 * 2054 * @throws LDIFException If the provided LDIF data cannot be decoded as a 2055 * change record. 2056 */ 2057 public static LDIFChangeRecord decodeChangeRecord( 2058 final boolean ignoreDuplicateValues, 2059 final TrailingSpaceBehavior trailingSpaceBehavior, 2060 final Schema schema, 2061 final boolean defaultAdd, 2062 final String... ldifLines) 2063 throws LDIFException 2064 { 2065 final LDIFChangeRecord r = decodeChangeRecord( 2066 prepareRecord( 2067 (ignoreDuplicateValues 2068 ? DuplicateValueBehavior.STRIP 2069 : DuplicateValueBehavior.REJECT), 2070 trailingSpaceBehavior, schema, ldifLines), 2071 DEFAULT_RELATIVE_BASE_PATH, defaultAdd, null); 2072 debugLDIFRead(r); 2073 return r; 2074 } 2075 2076 2077 2078 /** 2079 * Parses the provided set of lines into a list of {@code StringBuilder} 2080 * objects suitable for decoding into an entry or LDIF change record. 2081 * Comments will be ignored and wrapped lines will be unwrapped. 2082 * 2083 * @param duplicateValueBehavior The behavior that should be exhibited if 2084 * the LDIF reader encounters an entry with 2085 * duplicate values. 2086 * @param trailingSpaceBehavior The behavior that should be exhibited when 2087 * encountering attribute values which are not 2088 * base64-encoded but contain trailing spaces. 2089 * @param schema The schema to use when parsing the record, 2090 * if applicable. 2091 * @param ldifLines The set of lines that comprise the record 2092 * to decode. It must not be {@code null} or 2093 * empty. 2094 * 2095 * @return The prepared list of {@code StringBuilder} objects ready to be 2096 * decoded. 2097 * 2098 * @throws LDIFException If the provided lines do not contain valid LDIF 2099 * content. 2100 */ 2101 private static UnparsedLDIFRecord prepareRecord( 2102 final DuplicateValueBehavior duplicateValueBehavior, 2103 final TrailingSpaceBehavior trailingSpaceBehavior, 2104 final Schema schema, final String... ldifLines) 2105 throws LDIFException 2106 { 2107 ensureNotNull(ldifLines); 2108 ensureFalse(ldifLines.length == 0, 2109 "LDIFReader.prepareRecord.ldifLines must not be empty."); 2110 2111 boolean lastWasComment = false; 2112 final ArrayList<StringBuilder> lineList = 2113 new ArrayList<StringBuilder>(ldifLines.length); 2114 for (int i=0; i < ldifLines.length; i++) 2115 { 2116 final String line = ldifLines[i]; 2117 if (line.length() == 0) 2118 { 2119 // This is only acceptable if there are no more non-empty lines in the 2120 // array. 2121 for (int j=i+1; j < ldifLines.length; j++) 2122 { 2123 if (ldifLines[j].length() > 0) 2124 { 2125 throw new LDIFException(ERR_READ_UNEXPECTED_BLANK.get(i), i, true, 2126 ldifLines, null); 2127 } 2128 2129 // If we've gotten here, then we know that we're at the end of the 2130 // entry. If we have read data, then we can decode it as an entry. 2131 // Otherwise, there was no real data in the provided LDIF lines. 2132 if (lineList.isEmpty()) 2133 { 2134 throw new LDIFException(ERR_READ_ONLY_BLANKS.get(), 0, true, 2135 ldifLines, null); 2136 } 2137 else 2138 { 2139 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior, 2140 trailingSpaceBehavior, schema, 0); 2141 } 2142 } 2143 } 2144 2145 if (line.charAt(0) == ' ') 2146 { 2147 if (i > 0) 2148 { 2149 if (! lastWasComment) 2150 { 2151 lineList.get(lineList.size() - 1).append(line.substring(1)); 2152 } 2153 } 2154 else 2155 { 2156 throw new LDIFException( 2157 ERR_READ_UNEXPECTED_FIRST_SPACE_NO_NUMBER.get(), 0, 2158 true, ldifLines, null); 2159 } 2160 } 2161 else if (line.charAt(0) == '#') 2162 { 2163 lastWasComment = true; 2164 } 2165 else 2166 { 2167 lineList.add(new StringBuilder(line)); 2168 lastWasComment = false; 2169 } 2170 } 2171 2172 if (lineList.isEmpty()) 2173 { 2174 throw new LDIFException(ERR_READ_NO_DATA.get(), 0, true, ldifLines, null); 2175 } 2176 else 2177 { 2178 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior, 2179 trailingSpaceBehavior, schema, 0); 2180 } 2181 } 2182 2183 2184 2185 /** 2186 * Decodes the unparsed record that was read from the LDIF source. It may be 2187 * either an entry or an LDIF change record. 2188 * 2189 * @param unparsedRecord The unparsed LDIF record that was read from the 2190 * input. It must not be {@code null} or empty. 2191 * @param relativeBasePath The base path that will be prepended to relative 2192 * paths in order to obtain an absolute path. 2193 * @param schema The schema to use when parsing. 2194 * 2195 * @return The parsed record, or {@code null} if there are no more entries to 2196 * be read. 2197 * 2198 * @throws LDIFException If the data read could not be parsed as an entry or 2199 * an LDIF change record. 2200 */ 2201 private static LDIFRecord decodeRecord( 2202 final UnparsedLDIFRecord unparsedRecord, 2203 final String relativeBasePath, 2204 final Schema schema) 2205 throws LDIFException 2206 { 2207 // If there was an error reading from the input, then we rethrow it here. 2208 final Exception readError = unparsedRecord.getFailureCause(); 2209 if (readError != null) 2210 { 2211 if (readError instanceof LDIFException) 2212 { 2213 // If the error was an LDIFException, which will normally be the case, 2214 // then rethrow it with all of the same state. We could just 2215 // throw (LDIFException) readError; 2216 // but that's considered bad form. 2217 final LDIFException ldifEx = (LDIFException) readError; 2218 throw new LDIFException(ldifEx.getMessage(), 2219 ldifEx.getLineNumber(), 2220 ldifEx.mayContinueReading(), 2221 ldifEx.getDataLines(), 2222 ldifEx.getCause()); 2223 } 2224 else 2225 { 2226 throw new LDIFException(getExceptionMessage(readError), 2227 -1, true, readError); 2228 } 2229 } 2230 2231 if (unparsedRecord.isEOF()) 2232 { 2233 return null; 2234 } 2235 2236 final ArrayList<StringBuilder> lineList = unparsedRecord.getLineList(); 2237 if (unparsedRecord.getLineList() == null) 2238 { 2239 return null; // We can get here if there was an error reading the lines. 2240 } 2241 2242 final LDIFRecord r; 2243 if (lineList.size() == 1) 2244 { 2245 r = decodeEntry(unparsedRecord, relativeBasePath); 2246 } 2247 else 2248 { 2249 final String lowerSecondLine = toLowerCase(lineList.get(1).toString()); 2250 if (lowerSecondLine.startsWith("control:") || 2251 lowerSecondLine.startsWith("changetype:")) 2252 { 2253 r = decodeChangeRecord(unparsedRecord, relativeBasePath, true, schema); 2254 } 2255 else 2256 { 2257 r = decodeEntry(unparsedRecord, relativeBasePath); 2258 } 2259 } 2260 2261 debugLDIFRead(r); 2262 return r; 2263 } 2264 2265 2266 2267 /** 2268 * Decodes the provided set of LDIF lines as an entry. The provided list must 2269 * not contain any blank lines or comments, and lines are not allowed to be 2270 * wrapped. 2271 * 2272 * @param unparsedRecord The unparsed LDIF record that was read from the 2273 * input. It must not be {@code null} or empty. 2274 * @param relativeBasePath The base path that will be prepended to relative 2275 * paths in order to obtain an absolute path. 2276 * 2277 * @return The entry read from LDIF. 2278 * 2279 * @throws LDIFException If the provided LDIF data cannot be read as an 2280 * entry. 2281 */ 2282 private static Entry decodeEntry(final UnparsedLDIFRecord unparsedRecord, 2283 final String relativeBasePath) 2284 throws LDIFException 2285 { 2286 final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList(); 2287 final long firstLineNumber = unparsedRecord.getFirstLineNumber(); 2288 2289 final Iterator<StringBuilder> iterator = ldifLines.iterator(); 2290 2291 // The first line must start with either "version:" or "dn:". If the first 2292 // line starts with "version:" then the second must start with "dn:". 2293 StringBuilder line = iterator.next(); 2294 handleTrailingSpaces(line, null, firstLineNumber, 2295 unparsedRecord.getTrailingSpaceBehavior()); 2296 int colonPos = line.indexOf(":"); 2297 if ((colonPos > 0) && 2298 line.substring(0, colonPos).equalsIgnoreCase("version")) 2299 { 2300 // The first line is "version:". Under most conditions, this will be 2301 // handled by the LDIF reader, but this can happen if you call 2302 // decodeEntry with a set of data that includes a version. At any rate, 2303 // read the next line, which must specify the DN. 2304 line = iterator.next(); 2305 handleTrailingSpaces(line, null, firstLineNumber, 2306 unparsedRecord.getTrailingSpaceBehavior()); 2307 } 2308 2309 colonPos = line.indexOf(":"); 2310 if ((colonPos < 0) || 2311 (! line.substring(0, colonPos).equalsIgnoreCase("dn"))) 2312 { 2313 throw new LDIFException( 2314 ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber), 2315 firstLineNumber, true, ldifLines, null); 2316 } 2317 2318 final String dn; 2319 final int length = line.length(); 2320 if (length == (colonPos+1)) 2321 { 2322 // The colon was the last character on the line. This is acceptable and 2323 // indicates that the entry has the null DN. 2324 dn = ""; 2325 } 2326 else if (line.charAt(colonPos+1) == ':') 2327 { 2328 // Skip over any spaces leading up to the value, and then the rest of the 2329 // string is the base64-encoded DN. 2330 int pos = colonPos+2; 2331 while ((pos < length) && (line.charAt(pos) == ' ')) 2332 { 2333 pos++; 2334 } 2335 2336 try 2337 { 2338 final byte[] dnBytes = Base64.decode(line.substring(pos)); 2339 dn = new String(dnBytes, "UTF-8"); 2340 } 2341 catch (final ParseException pe) 2342 { 2343 debugException(pe); 2344 throw new LDIFException( 2345 ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, 2346 pe.getMessage()), 2347 firstLineNumber, true, ldifLines, pe); 2348 } 2349 catch (final Exception e) 2350 { 2351 debugException(e); 2352 throw new LDIFException( 2353 ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, e), 2354 firstLineNumber, true, ldifLines, e); 2355 } 2356 } 2357 else 2358 { 2359 // Skip over any spaces leading up to the value, and then the rest of the 2360 // string is the DN. 2361 int pos = colonPos+1; 2362 while ((pos < length) && (line.charAt(pos) == ' ')) 2363 { 2364 pos++; 2365 } 2366 2367 dn = line.substring(pos); 2368 } 2369 2370 2371 // The remaining lines must be the attributes for the entry. However, we 2372 // will allow the case in which an entry does not have any attributes, to be 2373 // able to support reading search result entries in which no attributes were 2374 // returned. 2375 if (! iterator.hasNext()) 2376 { 2377 return new Entry(dn, unparsedRecord.getSchema()); 2378 } 2379 2380 return new Entry(dn, unparsedRecord.getSchema(), 2381 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(), 2382 unparsedRecord.getTrailingSpaceBehavior(), 2383 unparsedRecord.getSchema(), ldifLines, iterator, relativeBasePath, 2384 firstLineNumber)); 2385 } 2386 2387 2388 2389 /** 2390 * Decodes the provided set of LDIF lines as a change record. The provided 2391 * list must not contain any blank lines or comments, and lines are not 2392 * allowed to be wrapped. 2393 * 2394 * @param unparsedRecord The unparsed LDIF record that was read from the 2395 * input. It must not be {@code null} or empty. 2396 * @param relativeBasePath The base path that will be prepended to relative 2397 * paths in order to obtain an absolute path. 2398 * @param defaultAdd Indicates whether an LDIF record not containing a 2399 * changetype should be retrieved as an add change 2400 * record. If this is {@code false} and the record 2401 * read does not include a changetype, then an 2402 * {@link LDIFException} will be thrown. 2403 * @param schema The schema to use in parsing. 2404 * 2405 * @return The change record read from LDIF. 2406 * 2407 * @throws LDIFException If the provided LDIF data cannot be decoded as a 2408 * change record. 2409 */ 2410 private static LDIFChangeRecord decodeChangeRecord( 2411 final UnparsedLDIFRecord unparsedRecord, 2412 final String relativeBasePath, 2413 final boolean defaultAdd, 2414 final Schema schema) 2415 throws LDIFException 2416 { 2417 final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList(); 2418 final long firstLineNumber = unparsedRecord.getFirstLineNumber(); 2419 2420 Iterator<StringBuilder> iterator = ldifLines.iterator(); 2421 2422 // The first line must start with either "version:" or "dn:". If the first 2423 // line starts with "version:" then the second must start with "dn:". 2424 StringBuilder line = iterator.next(); 2425 handleTrailingSpaces(line, null, firstLineNumber, 2426 unparsedRecord.getTrailingSpaceBehavior()); 2427 int colonPos = line.indexOf(":"); 2428 int linesRead = 1; 2429 if ((colonPos > 0) && 2430 line.substring(0, colonPos).equalsIgnoreCase("version")) 2431 { 2432 // The first line is "version:". Under most conditions, this will be 2433 // handled by the LDIF reader, but this can happen if you call 2434 // decodeEntry with a set of data that includes a version. At any rate, 2435 // read the next line, which must specify the DN. 2436 line = iterator.next(); 2437 linesRead++; 2438 handleTrailingSpaces(line, null, firstLineNumber, 2439 unparsedRecord.getTrailingSpaceBehavior()); 2440 } 2441 2442 colonPos = line.indexOf(":"); 2443 if ((colonPos < 0) || 2444 (! line.substring(0, colonPos).equalsIgnoreCase("dn"))) 2445 { 2446 throw new LDIFException( 2447 ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber), 2448 firstLineNumber, true, ldifLines, null); 2449 } 2450 2451 final String dn; 2452 final int length = line.length(); 2453 if (length == (colonPos+1)) 2454 { 2455 // The colon was the last character on the line. This is acceptable and 2456 // indicates that the entry has the null DN. 2457 dn = ""; 2458 } 2459 else if (line.charAt(colonPos+1) == ':') 2460 { 2461 // Skip over any spaces leading up to the value, and then the rest of the 2462 // string is the base64-encoded DN. 2463 int pos = colonPos+2; 2464 while ((pos < length) && (line.charAt(pos) == ' ')) 2465 { 2466 pos++; 2467 } 2468 2469 try 2470 { 2471 final byte[] dnBytes = Base64.decode(line.substring(pos)); 2472 dn = new String(dnBytes, "UTF-8"); 2473 } 2474 catch (final ParseException pe) 2475 { 2476 debugException(pe); 2477 throw new LDIFException( 2478 ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, 2479 pe.getMessage()), 2480 firstLineNumber, true, ldifLines, pe); 2481 } 2482 catch (final Exception e) 2483 { 2484 debugException(e); 2485 throw new LDIFException( 2486 ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, 2487 e), 2488 firstLineNumber, true, ldifLines, e); 2489 } 2490 } 2491 else 2492 { 2493 // Skip over any spaces leading up to the value, and then the rest of the 2494 // string is the DN. 2495 int pos = colonPos+1; 2496 while ((pos < length) && (line.charAt(pos) == ' ')) 2497 { 2498 pos++; 2499 } 2500 2501 dn = line.substring(pos); 2502 } 2503 2504 2505 // An LDIF change record may contain zero or more controls, with the end of 2506 // the controls signified by the changetype. The changetype element must be 2507 // present, unless defaultAdd is true in which case the first thing that is 2508 // neither control or changetype will trigger the start of add attribute 2509 // parsing. 2510 if (! iterator.hasNext()) 2511 { 2512 throw new LDIFException(ERR_READ_CR_TOO_SHORT.get(firstLineNumber), 2513 firstLineNumber, true, ldifLines, null); 2514 } 2515 2516 String changeType = null; 2517 ArrayList<Control> controls = null; 2518 while (true) 2519 { 2520 line = iterator.next(); 2521 handleTrailingSpaces(line, dn, firstLineNumber, 2522 unparsedRecord.getTrailingSpaceBehavior()); 2523 colonPos = line.indexOf(":"); 2524 if (colonPos < 0) 2525 { 2526 throw new LDIFException( 2527 ERR_READ_CR_SECOND_LINE_MISSING_COLON.get(firstLineNumber), 2528 firstLineNumber, true, ldifLines, null); 2529 } 2530 2531 final String token = toLowerCase(line.substring(0, colonPos)); 2532 if (token.equals("control")) 2533 { 2534 if (controls == null) 2535 { 2536 controls = new ArrayList<Control>(5); 2537 } 2538 2539 controls.add(decodeControl(line, colonPos, firstLineNumber, ldifLines, 2540 relativeBasePath)); 2541 } 2542 else if (token.equals("changetype")) 2543 { 2544 changeType = 2545 decodeChangeType(line, colonPos, firstLineNumber, ldifLines); 2546 break; 2547 } 2548 else if (defaultAdd) 2549 { 2550 // The line we read wasn't a control or changetype declaration, so we'll 2551 // assume it's an attribute in an add record. However, we're not ready 2552 // for that yet, and since we can't rewind an iterator we'll create a 2553 // new one that hasn't yet gotten to this line. 2554 changeType = "add"; 2555 iterator = ldifLines.iterator(); 2556 for (int i=0; i < linesRead; i++) 2557 { 2558 iterator.next(); 2559 } 2560 break; 2561 } 2562 else 2563 { 2564 throw new LDIFException( 2565 ERR_READ_CR_CT_LINE_DOESNT_START_WITH_CONTROL_OR_CT.get( 2566 firstLineNumber), 2567 firstLineNumber, true, ldifLines, null); 2568 } 2569 2570 linesRead++; 2571 } 2572 2573 2574 // Make sure that the change type is acceptable and then decode the rest of 2575 // the change record accordingly. 2576 final String lowerChangeType = toLowerCase(changeType); 2577 if (lowerChangeType.equals("add")) 2578 { 2579 // There must be at least one more line. If not, then that's an error. 2580 // Otherwise, parse the rest of the data as attribute-value pairs. 2581 if (iterator.hasNext()) 2582 { 2583 final Collection<Attribute> attrs = 2584 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(), 2585 unparsedRecord.getTrailingSpaceBehavior(), 2586 unparsedRecord.getSchema(), ldifLines, iterator, 2587 relativeBasePath, firstLineNumber); 2588 final Attribute[] attributes = new Attribute[attrs.size()]; 2589 final Iterator<Attribute> attrIterator = attrs.iterator(); 2590 for (int i=0; i < attributes.length; i++) 2591 { 2592 attributes[i] = attrIterator.next(); 2593 } 2594 2595 return new LDIFAddChangeRecord(dn, attributes, controls); 2596 } 2597 else 2598 { 2599 throw new LDIFException(ERR_READ_CR_NO_ATTRIBUTES.get(firstLineNumber), 2600 firstLineNumber, true, ldifLines, null); 2601 } 2602 } 2603 else if (lowerChangeType.equals("delete")) 2604 { 2605 // There shouldn't be any more data. If there is, then that's an error. 2606 // Otherwise, we can just return the delete change record with what we 2607 // already know. 2608 if (iterator.hasNext()) 2609 { 2610 throw new LDIFException( 2611 ERR_READ_CR_EXTRA_DELETE_DATA.get(firstLineNumber), 2612 firstLineNumber, true, ldifLines, null); 2613 } 2614 else 2615 { 2616 return new LDIFDeleteChangeRecord(dn, controls); 2617 } 2618 } 2619 else if (lowerChangeType.equals("modify")) 2620 { 2621 // There must be at least one more line. If not, then that's an error. 2622 // Otherwise, parse the rest of the data as a set of modifications. 2623 if (iterator.hasNext()) 2624 { 2625 final Modification[] mods = parseModifications(dn, 2626 unparsedRecord.getTrailingSpaceBehavior(), ldifLines, iterator, 2627 firstLineNumber, schema); 2628 return new LDIFModifyChangeRecord(dn, mods, controls); 2629 } 2630 else 2631 { 2632 throw new LDIFException(ERR_READ_CR_NO_MODS.get(firstLineNumber), 2633 firstLineNumber, true, ldifLines, null); 2634 } 2635 } 2636 else if (lowerChangeType.equals("moddn") || 2637 lowerChangeType.equals("modrdn")) 2638 { 2639 // There must be at least one more line. If not, then that's an error. 2640 // Otherwise, parse the rest of the data as a set of modifications. 2641 if (iterator.hasNext()) 2642 { 2643 return parseModifyDNChangeRecord(ldifLines, iterator, dn, controls, 2644 unparsedRecord.getTrailingSpaceBehavior(), firstLineNumber); 2645 } 2646 else 2647 { 2648 throw new LDIFException(ERR_READ_CR_NO_NEWRDN.get(firstLineNumber), 2649 firstLineNumber, true, ldifLines, null); 2650 } 2651 } 2652 else 2653 { 2654 throw new LDIFException(ERR_READ_CR_INVALID_CT.get(changeType, 2655 firstLineNumber), 2656 firstLineNumber, true, ldifLines, null); 2657 } 2658 } 2659 2660 2661 2662 /** 2663 * Decodes information about a control from the provided line. 2664 * 2665 * @param line The line to process. 2666 * @param colonPos The position of the colon that separates the 2667 * control token string from tbe encoded control. 2668 * @param firstLineNumber The line number for the start of the record. 2669 * @param ldifLines The lines that comprise the LDIF representation 2670 * of the full record being parsed. 2671 * @param relativeBasePath The base path that will be prepended to relative 2672 * paths in order to obtain an absolute path. 2673 * 2674 * @return The decoded control. 2675 * 2676 * @throws LDIFException If a problem is encountered while trying to decode 2677 * the changetype. 2678 */ 2679 private static Control decodeControl(final StringBuilder line, 2680 final int colonPos, 2681 final long firstLineNumber, 2682 final ArrayList<StringBuilder> ldifLines, 2683 final String relativeBasePath) 2684 throws LDIFException 2685 { 2686 final String controlString; 2687 int length = line.length(); 2688 if (length == (colonPos+1)) 2689 { 2690 // The colon was the last character on the line. This is not 2691 // acceptable. 2692 throw new LDIFException( 2693 ERR_READ_CONTROL_LINE_NO_CONTROL_VALUE.get(firstLineNumber), 2694 firstLineNumber, true, ldifLines, null); 2695 } 2696 else if (line.charAt(colonPos+1) == ':') 2697 { 2698 // Skip over any spaces leading up to the value, and then the rest of 2699 // the string is the base64-encoded control representation. This is 2700 // unusual and unnecessary, but is nevertheless acceptable. 2701 int pos = colonPos+2; 2702 while ((pos < length) && (line.charAt(pos) == ' ')) 2703 { 2704 pos++; 2705 } 2706 2707 try 2708 { 2709 final byte[] controlBytes = Base64.decode(line.substring(pos)); 2710 controlString = new String(controlBytes, "UTF-8"); 2711 } 2712 catch (final ParseException pe) 2713 { 2714 debugException(pe); 2715 throw new LDIFException( 2716 ERR_READ_CANNOT_BASE64_DECODE_CONTROL.get( 2717 firstLineNumber, pe.getMessage()), 2718 firstLineNumber, true, ldifLines, pe); 2719 } 2720 catch (final Exception e) 2721 { 2722 debugException(e); 2723 throw new LDIFException( 2724 ERR_READ_CANNOT_BASE64_DECODE_CONTROL.get(firstLineNumber, e), 2725 firstLineNumber, true, ldifLines, e); 2726 } 2727 } 2728 else 2729 { 2730 // Skip over any spaces leading up to the value, and then the rest of 2731 // the string is the encoded control. 2732 int pos = colonPos+1; 2733 while ((pos < length) && (line.charAt(pos) == ' ')) 2734 { 2735 pos++; 2736 } 2737 2738 controlString = line.substring(pos); 2739 } 2740 2741 // If the resulting control definition is empty, then that's invalid. 2742 if (controlString.length() == 0) 2743 { 2744 throw new LDIFException( 2745 ERR_READ_CONTROL_LINE_NO_CONTROL_VALUE.get(firstLineNumber), 2746 firstLineNumber, true, ldifLines, null); 2747 } 2748 2749 2750 // The first element of the control must be the OID, and it must be followed 2751 // by a space (to separate it from the criticality), a colon (to separate it 2752 // from the value and indicate a default criticality of false), or the end 2753 // of the line (to indicate a default criticality of false and no value). 2754 String oid = null; 2755 boolean hasCriticality = false; 2756 boolean hasValue = false; 2757 int pos = 0; 2758 length = controlString.length(); 2759 while (pos < length) 2760 { 2761 final char c = controlString.charAt(pos); 2762 if (c == ':') 2763 { 2764 // This indicates that there is no criticality and that the value 2765 // immediately follows the OID. 2766 oid = controlString.substring(0, pos++); 2767 hasValue = true; 2768 break; 2769 } 2770 else if (c == ' ') 2771 { 2772 // This indicates that there is a criticality. We don't know anything 2773 // about the presence of a value yet. 2774 oid = controlString.substring(0, pos++); 2775 hasCriticality = true; 2776 break; 2777 } 2778 else 2779 { 2780 pos++; 2781 } 2782 } 2783 2784 if (oid == null) 2785 { 2786 // This indicates that the string representation of the control is only 2787 // the OID. 2788 return new Control(controlString, false); 2789 } 2790 2791 2792 // See if we need to read the criticality. If so, then do so now. 2793 // Otherwise, assume a default criticality of false. 2794 final boolean isCritical; 2795 if (hasCriticality) 2796 { 2797 // Skip over any spaces before the criticality. 2798 while (controlString.charAt(pos) == ' ') 2799 { 2800 pos++; 2801 } 2802 2803 // Read until we find a colon or the end of the string. 2804 final int criticalityStartPos = pos; 2805 while (pos < length) 2806 { 2807 final char c = controlString.charAt(pos); 2808 if (c == ':') 2809 { 2810 hasValue = true; 2811 break; 2812 } 2813 else 2814 { 2815 pos++; 2816 } 2817 } 2818 2819 final String criticalityString = 2820 toLowerCase(controlString.substring(criticalityStartPos, pos)); 2821 if (criticalityString.equals("true")) 2822 { 2823 isCritical = true; 2824 } 2825 else if (criticalityString.equals("false")) 2826 { 2827 isCritical = false; 2828 } 2829 else 2830 { 2831 throw new LDIFException( 2832 ERR_READ_CONTROL_LINE_INVALID_CRITICALITY.get(criticalityString, 2833 firstLineNumber), 2834 firstLineNumber, true, ldifLines, null); 2835 } 2836 2837 if (hasValue) 2838 { 2839 pos++; 2840 } 2841 } 2842 else 2843 { 2844 isCritical = false; 2845 } 2846 2847 // See if we need to read the value. If so, then do so now. It may be 2848 // a string, or it may be base64-encoded. It could conceivably even be read 2849 // from a URL. 2850 final ASN1OctetString value; 2851 if (hasValue) 2852 { 2853 // The character immediately after the colon that precedes the value may 2854 // be one of the following: 2855 // - A second colon (optionally followed by a single space) to indicate 2856 // that the value is base64-encoded. 2857 // - A less-than symbol to indicate that the value should be read from a 2858 // location specified by a URL. 2859 // - A single space that precedes the non-base64-encoded value. 2860 // - The first character of the non-base64-encoded value. 2861 switch (controlString.charAt(pos)) 2862 { 2863 case ':': 2864 try 2865 { 2866 if (controlString.length() == (pos+1)) 2867 { 2868 value = new ASN1OctetString(); 2869 } 2870 else if (controlString.charAt(pos+1) == ' ') 2871 { 2872 value = new ASN1OctetString( 2873 Base64.decode(controlString.substring(pos+2))); 2874 } 2875 else 2876 { 2877 value = new ASN1OctetString( 2878 Base64.decode(controlString.substring(pos+1))); 2879 } 2880 } 2881 catch (final Exception e) 2882 { 2883 debugException(e); 2884 throw new LDIFException( 2885 ERR_READ_CONTROL_LINE_CANNOT_BASE64_DECODE_VALUE.get( 2886 firstLineNumber, getExceptionMessage(e)), 2887 firstLineNumber, true, ldifLines, e); 2888 } 2889 break; 2890 case '<': 2891 try 2892 { 2893 final String urlString; 2894 if (controlString.charAt(pos+1) == ' ') 2895 { 2896 urlString = controlString.substring(pos+2); 2897 } 2898 else 2899 { 2900 urlString = controlString.substring(pos+1); 2901 } 2902 value = new ASN1OctetString(retrieveURLBytes(urlString, 2903 relativeBasePath, firstLineNumber)); 2904 } 2905 catch (final Exception e) 2906 { 2907 debugException(e); 2908 throw new LDIFException( 2909 ERR_READ_CONTROL_LINE_CANNOT_RETRIEVE_VALUE_FROM_URL.get( 2910 firstLineNumber, getExceptionMessage(e)), 2911 firstLineNumber, true, ldifLines, e); 2912 } 2913 break; 2914 case ' ': 2915 value = new ASN1OctetString(controlString.substring(pos+1)); 2916 break; 2917 default: 2918 value = new ASN1OctetString(controlString.substring(pos)); 2919 break; 2920 } 2921 } 2922 else 2923 { 2924 value = null; 2925 } 2926 2927 return new Control(oid, isCritical, value); 2928 } 2929 2930 2931 2932 /** 2933 * Decodes the changetype element from the provided line. 2934 * 2935 * @param line The line to process. 2936 * @param colonPos The position of the colon that separates the 2937 * changetype string from its value. 2938 * @param firstLineNumber The line number for the start of the record. 2939 * @param ldifLines The lines that comprise the LDIF representation of 2940 * the full record being parsed. 2941 * 2942 * @return The decoded changetype string. 2943 * 2944 * @throws LDIFException If a problem is encountered while trying to decode 2945 * the changetype. 2946 */ 2947 private static String decodeChangeType(final StringBuilder line, 2948 final int colonPos, final long firstLineNumber, 2949 final ArrayList<StringBuilder> ldifLines) 2950 throws LDIFException 2951 { 2952 final int length = line.length(); 2953 if (length == (colonPos+1)) 2954 { 2955 // The colon was the last character on the line. This is not 2956 // acceptable. 2957 throw new LDIFException( 2958 ERR_READ_CT_LINE_NO_CT_VALUE.get(firstLineNumber), firstLineNumber, 2959 true, ldifLines, null); 2960 } 2961 else if (line.charAt(colonPos+1) == ':') 2962 { 2963 // Skip over any spaces leading up to the value, and then the rest of 2964 // the string is the base64-encoded changetype. This is unusual and 2965 // unnecessary, but is nevertheless acceptable. 2966 int pos = colonPos+2; 2967 while ((pos < length) && (line.charAt(pos) == ' ')) 2968 { 2969 pos++; 2970 } 2971 2972 try 2973 { 2974 final byte[] changeTypeBytes = Base64.decode(line.substring(pos)); 2975 return new String(changeTypeBytes, "UTF-8"); 2976 } 2977 catch (final ParseException pe) 2978 { 2979 debugException(pe); 2980 throw new LDIFException( 2981 ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber, 2982 pe.getMessage()), 2983 firstLineNumber, true, ldifLines, pe); 2984 } 2985 catch (final Exception e) 2986 { 2987 debugException(e); 2988 throw new LDIFException( 2989 ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber, e), 2990 firstLineNumber, true, ldifLines, e); 2991 } 2992 } 2993 else 2994 { 2995 // Skip over any spaces leading up to the value, and then the rest of 2996 // the string is the changetype. 2997 int pos = colonPos+1; 2998 while ((pos < length) && (line.charAt(pos) == ' ')) 2999 { 3000 pos++; 3001 } 3002 3003 return line.substring(pos); 3004 } 3005 } 3006 3007 3008 3009 /** 3010 * Parses the data available through the provided iterator as a collection of 3011 * attributes suitable for use in an entry or an add change record. 3012 * 3013 * @param dn The DN of the record being read. 3014 * @param duplicateValueBehavior The behavior that should be exhibited if 3015 * the LDIF reader encounters an entry with 3016 * duplicate values. 3017 * @param trailingSpaceBehavior The behavior that should be exhibited when 3018 * encountering attribute values which are not 3019 * base64-encoded but contain trailing spaces. 3020 * @param schema The schema to use when parsing the 3021 * attributes, or {@code null} if none is 3022 * needed. 3023 * @param ldifLines The lines that comprise the LDIF 3024 * representation of the full record being 3025 * parsed. 3026 * @param iterator The iterator to use to access the attribute 3027 * lines. 3028 * @param relativeBasePath The base path that will be prepended to 3029 * relative paths in order to obtain an 3030 * absolute path. 3031 * @param firstLineNumber The line number for the start of the 3032 * record. 3033 * 3034 * @return The collection of attributes that were read. 3035 * 3036 * @throws LDIFException If the provided LDIF data cannot be decoded as a 3037 * set of attributes. 3038 */ 3039 private static ArrayList<Attribute> parseAttributes(final String dn, 3040 final DuplicateValueBehavior duplicateValueBehavior, 3041 final TrailingSpaceBehavior trailingSpaceBehavior, final Schema schema, 3042 final ArrayList<StringBuilder> ldifLines, 3043 final Iterator<StringBuilder> iterator, final String relativeBasePath, 3044 final long firstLineNumber) 3045 throws LDIFException 3046 { 3047 final LinkedHashMap<String,Object> attributes = 3048 new LinkedHashMap<String,Object>(ldifLines.size()); 3049 while (iterator.hasNext()) 3050 { 3051 final StringBuilder line = iterator.next(); 3052 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3053 final int colonPos = line.indexOf(":"); 3054 if (colonPos <= 0) 3055 { 3056 throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber), 3057 firstLineNumber, true, ldifLines, null); 3058 } 3059 3060 final String attributeName = line.substring(0, colonPos); 3061 final String lowerName = toLowerCase(attributeName); 3062 3063 final MatchingRule matchingRule; 3064 if (schema == null) 3065 { 3066 matchingRule = CaseIgnoreStringMatchingRule.getInstance(); 3067 } 3068 else 3069 { 3070 matchingRule = 3071 MatchingRule.selectEqualityMatchingRule(attributeName, schema); 3072 } 3073 3074 Attribute attr; 3075 final LDIFAttribute ldifAttr; 3076 final Object attrObject = attributes.get(lowerName); 3077 if (attrObject == null) 3078 { 3079 attr = null; 3080 ldifAttr = null; 3081 } 3082 else 3083 { 3084 if (attrObject instanceof Attribute) 3085 { 3086 attr = (Attribute) attrObject; 3087 ldifAttr = new LDIFAttribute(attr.getName(), matchingRule, 3088 attr.getRawValues()[0]); 3089 attributes.put(lowerName, ldifAttr); 3090 } 3091 else 3092 { 3093 attr = null; 3094 ldifAttr = (LDIFAttribute) attrObject; 3095 } 3096 } 3097 3098 final int length = line.length(); 3099 if (length == (colonPos+1)) 3100 { 3101 // This means that the attribute has a zero-length value, which is 3102 // acceptable. 3103 if (attrObject == null) 3104 { 3105 attr = new Attribute(attributeName, matchingRule, ""); 3106 attributes.put(lowerName, attr); 3107 } 3108 else 3109 { 3110 try 3111 { 3112 if (! ldifAttr.addValue(new ASN1OctetString(), 3113 duplicateValueBehavior)) 3114 { 3115 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3116 { 3117 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3118 firstLineNumber, attributeName), firstLineNumber, true, 3119 ldifLines, null); 3120 } 3121 } 3122 } 3123 catch (final LDAPException le) 3124 { 3125 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn, 3126 firstLineNumber, attributeName, getExceptionMessage(le)), 3127 firstLineNumber, true, ldifLines, le); 3128 } 3129 } 3130 } 3131 else if (line.charAt(colonPos+1) == ':') 3132 { 3133 // Skip over any spaces leading up to the value, and then the rest of 3134 // the string is the base64-encoded attribute value. 3135 int pos = colonPos+2; 3136 while ((pos < length) && (line.charAt(pos) == ' ')) 3137 { 3138 pos++; 3139 } 3140 3141 try 3142 { 3143 final byte[] valueBytes = Base64.decode(line.substring(pos)); 3144 if (attrObject == null) 3145 { 3146 attr = new Attribute(attributeName, matchingRule, valueBytes); 3147 attributes.put(lowerName, attr); 3148 } 3149 else 3150 { 3151 try 3152 { 3153 if (! ldifAttr.addValue(new ASN1OctetString(valueBytes), 3154 duplicateValueBehavior)) 3155 { 3156 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3157 { 3158 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3159 firstLineNumber, attributeName), firstLineNumber, true, 3160 ldifLines, null); 3161 } 3162 } 3163 } 3164 catch (final LDAPException le) 3165 { 3166 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn, 3167 firstLineNumber, attributeName, getExceptionMessage(le)), 3168 firstLineNumber, true, ldifLines, le); 3169 } 3170 } 3171 } 3172 catch (final ParseException pe) 3173 { 3174 debugException(pe); 3175 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get( 3176 attributeName, firstLineNumber, 3177 pe.getMessage()), 3178 firstLineNumber, true, ldifLines, pe); 3179 } 3180 } 3181 else if (line.charAt(colonPos+1) == '<') 3182 { 3183 // Skip over any spaces leading up to the value, and then the rest of 3184 // the string is a URL that indicates where to get the real content. 3185 // At the present time, we'll only support the file URLs. 3186 int pos = colonPos+2; 3187 while ((pos < length) && (line.charAt(pos) == ' ')) 3188 { 3189 pos++; 3190 } 3191 3192 final byte[] urlBytes; 3193 final String urlString = line.substring(pos); 3194 try 3195 { 3196 urlBytes = 3197 retrieveURLBytes(urlString, relativeBasePath, firstLineNumber); 3198 } 3199 catch (final Exception e) 3200 { 3201 debugException(e); 3202 throw new LDIFException( 3203 ERR_READ_URL_EXCEPTION.get(attributeName, urlString, 3204 firstLineNumber, e), 3205 firstLineNumber, true, ldifLines, e); 3206 } 3207 3208 if (attrObject == null) 3209 { 3210 attr = new Attribute(attributeName, matchingRule, urlBytes); 3211 attributes.put(lowerName, attr); 3212 } 3213 else 3214 { 3215 try 3216 { 3217 if (! ldifAttr.addValue(new ASN1OctetString(urlBytes), 3218 duplicateValueBehavior)) 3219 { 3220 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3221 { 3222 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3223 firstLineNumber, attributeName), firstLineNumber, true, 3224 ldifLines, null); 3225 } 3226 } 3227 } 3228 catch (final LDIFException le) 3229 { 3230 debugException(le); 3231 throw le; 3232 } 3233 catch (final Exception e) 3234 { 3235 debugException(e); 3236 throw new LDIFException( 3237 ERR_READ_URL_EXCEPTION.get(attributeName, urlString, 3238 firstLineNumber, e), 3239 firstLineNumber, true, ldifLines, e); 3240 } 3241 } 3242 } 3243 else 3244 { 3245 // Skip over any spaces leading up to the value, and then the rest of 3246 // the string is the value. 3247 int pos = colonPos+1; 3248 while ((pos < length) && (line.charAt(pos) == ' ')) 3249 { 3250 pos++; 3251 } 3252 3253 final String valueString = line.substring(pos); 3254 if (attrObject == null) 3255 { 3256 attr = new Attribute(attributeName, matchingRule, valueString); 3257 attributes.put(lowerName, attr); 3258 } 3259 else 3260 { 3261 try 3262 { 3263 if (! ldifAttr.addValue(new ASN1OctetString(valueString), 3264 duplicateValueBehavior)) 3265 { 3266 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3267 { 3268 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3269 firstLineNumber, attributeName), firstLineNumber, true, 3270 ldifLines, null); 3271 } 3272 } 3273 } 3274 catch (final LDAPException le) 3275 { 3276 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn, 3277 firstLineNumber, attributeName, getExceptionMessage(le)), 3278 firstLineNumber, true, ldifLines, le); 3279 } 3280 } 3281 } 3282 } 3283 3284 final ArrayList<Attribute> attrList = 3285 new ArrayList<Attribute>(attributes.size()); 3286 for (final Object o : attributes.values()) 3287 { 3288 if (o instanceof Attribute) 3289 { 3290 attrList.add((Attribute) o); 3291 } 3292 else 3293 { 3294 attrList.add(((LDIFAttribute) o).toAttribute()); 3295 } 3296 } 3297 3298 return attrList; 3299 } 3300 3301 3302 3303 /** 3304 * Retrieves the bytes that make up the file referenced by the given URL. 3305 * 3306 * @param urlString The string representation of the URL to retrieve. 3307 * @param relativeBasePath The base path that will be prepended to relative 3308 * paths in order to obtain an absolute path. 3309 * @param firstLineNumber The line number for the start of the record. 3310 * 3311 * @return The bytes contained in the specified file, or an empty array if 3312 * the specified file is empty. 3313 * 3314 * @throws LDIFException If the provided URL is malformed or references a 3315 * nonexistent file. 3316 * 3317 * @throws IOException If a problem is encountered while attempting to read 3318 * from the target file. 3319 */ 3320 private static byte[] retrieveURLBytes(final String urlString, 3321 final String relativeBasePath, 3322 final long firstLineNumber) 3323 throws LDIFException, IOException 3324 { 3325 int pos; 3326 final String path; 3327 final String lowerURLString = toLowerCase(urlString); 3328 if (lowerURLString.startsWith("file:/")) 3329 { 3330 pos = 6; 3331 while ((pos < urlString.length()) && (urlString.charAt(pos) == '/')) 3332 { 3333 pos++; 3334 } 3335 3336 path = urlString.substring(pos-1); 3337 } 3338 else if (lowerURLString.startsWith("file:")) 3339 { 3340 // A file: URL that doesn't include a slash will be interpreted as a 3341 // relative path. 3342 path = relativeBasePath + urlString.substring(5); 3343 } 3344 else 3345 { 3346 throw new LDIFException(ERR_READ_URL_INVALID_SCHEME.get(urlString), 3347 firstLineNumber, true); 3348 } 3349 3350 final File f = new File(path); 3351 if (! f.exists()) 3352 { 3353 throw new LDIFException( 3354 ERR_READ_URL_NO_SUCH_FILE.get(urlString, f.getAbsolutePath()), 3355 firstLineNumber, true); 3356 } 3357 3358 // In order to conserve memory, we'll only allow values to be read from 3359 // files no larger than 10 megabytes. 3360 final long fileSize = f.length(); 3361 if (fileSize > (10 * 1024 * 1024)) 3362 { 3363 throw new LDIFException( 3364 ERR_READ_URL_FILE_TOO_LARGE.get(urlString, f.getAbsolutePath(), 3365 (10*1024*1024)), 3366 firstLineNumber, true); 3367 } 3368 3369 int fileBytesRemaining = (int) fileSize; 3370 final byte[] fileData = new byte[(int) fileSize]; 3371 final FileInputStream fis = new FileInputStream(f); 3372 try 3373 { 3374 int fileBytesRead = 0; 3375 while (fileBytesRead < fileSize) 3376 { 3377 final int bytesRead = 3378 fis.read(fileData, fileBytesRead, fileBytesRemaining); 3379 if (bytesRead < 0) 3380 { 3381 // We hit the end of the file before we expected to. This shouldn't 3382 // happen unless the file size changed since we first looked at it, 3383 // which we won't allow. 3384 throw new LDIFException( 3385 ERR_READ_URL_FILE_SIZE_CHANGED.get(urlString, 3386 f.getAbsolutePath()), 3387 firstLineNumber, true); 3388 } 3389 3390 fileBytesRead += bytesRead; 3391 fileBytesRemaining -= bytesRead; 3392 } 3393 3394 if (fis.read() != -1) 3395 { 3396 // There is still more data to read. This shouldn't happen unless the 3397 // file size changed since we first looked at it, which we won't allow. 3398 throw new LDIFException( 3399 ERR_READ_URL_FILE_SIZE_CHANGED.get(urlString, f.getAbsolutePath()), 3400 firstLineNumber, true); 3401 } 3402 } 3403 finally 3404 { 3405 fis.close(); 3406 } 3407 3408 return fileData; 3409 } 3410 3411 3412 3413 /** 3414 * Parses the data available through the provided iterator into an array of 3415 * modifications suitable for use in a modify change record. 3416 * 3417 * @param dn The DN of the entry being parsed. 3418 * @param trailingSpaceBehavior The behavior that should be exhibited when 3419 * encountering attribute values which are not 3420 * base64-encoded but contain trailing spaces. 3421 * @param ldifLines The lines that comprise the LDIF 3422 * representation of the full record being 3423 * parsed. 3424 * @param iterator The iterator to use to access the 3425 * modification data. 3426 * @param firstLineNumber The line number for the start of the record. 3427 * @param schema The schema to use in processing. 3428 * 3429 * @return An array containing the modifications that were read. 3430 * 3431 * @throws LDIFException If the provided LDIF data cannot be decoded as a 3432 * set of modifications. 3433 */ 3434 private static Modification[] parseModifications(final String dn, 3435 final TrailingSpaceBehavior trailingSpaceBehavior, 3436 final ArrayList<StringBuilder> ldifLines, 3437 final Iterator<StringBuilder> iterator, 3438 final long firstLineNumber, final Schema schema) 3439 throws LDIFException 3440 { 3441 final ArrayList<Modification> modList = 3442 new ArrayList<Modification>(ldifLines.size()); 3443 3444 while (iterator.hasNext()) 3445 { 3446 // The first line must start with "add:", "delete:", "replace:", or 3447 // "increment:" followed by an attribute name. 3448 StringBuilder line = iterator.next(); 3449 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3450 int colonPos = line.indexOf(":"); 3451 if (colonPos < 0) 3452 { 3453 throw new LDIFException(ERR_READ_MOD_CR_NO_MODTYPE.get(firstLineNumber), 3454 firstLineNumber, true, ldifLines, null); 3455 } 3456 3457 final ModificationType modType; 3458 final String modTypeStr = toLowerCase(line.substring(0, colonPos)); 3459 if (modTypeStr.equals("add")) 3460 { 3461 modType = ModificationType.ADD; 3462 } 3463 else if (modTypeStr.equals("delete")) 3464 { 3465 modType = ModificationType.DELETE; 3466 } 3467 else if (modTypeStr.equals("replace")) 3468 { 3469 modType = ModificationType.REPLACE; 3470 } 3471 else if (modTypeStr.equals("increment")) 3472 { 3473 modType = ModificationType.INCREMENT; 3474 } 3475 else 3476 { 3477 throw new LDIFException(ERR_READ_MOD_CR_INVALID_MODTYPE.get(modTypeStr, 3478 firstLineNumber), 3479 firstLineNumber, true, ldifLines, null); 3480 } 3481 3482 String attributeName; 3483 int length = line.length(); 3484 if (length == (colonPos+1)) 3485 { 3486 // The colon was the last character on the line. This is not 3487 // acceptable. 3488 throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get( 3489 firstLineNumber), 3490 firstLineNumber, true, ldifLines, null); 3491 } 3492 else if (line.charAt(colonPos+1) == ':') 3493 { 3494 // Skip over any spaces leading up to the value, and then the rest of 3495 // the string is the base64-encoded attribute name. 3496 int pos = colonPos+2; 3497 while ((pos < length) && (line.charAt(pos) == ' ')) 3498 { 3499 pos++; 3500 } 3501 3502 try 3503 { 3504 final byte[] dnBytes = Base64.decode(line.substring(pos)); 3505 attributeName = new String(dnBytes, "UTF-8"); 3506 } 3507 catch (final ParseException pe) 3508 { 3509 debugException(pe); 3510 throw new LDIFException( 3511 ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get( 3512 firstLineNumber, pe.getMessage()), 3513 firstLineNumber, true, ldifLines, pe); 3514 } 3515 catch (final Exception e) 3516 { 3517 debugException(e); 3518 throw new LDIFException( 3519 ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get( 3520 firstLineNumber, e), 3521 firstLineNumber, true, ldifLines, e); 3522 } 3523 } 3524 else 3525 { 3526 // Skip over any spaces leading up to the value, and then the rest of 3527 // the string is the attribute name. 3528 int pos = colonPos+1; 3529 while ((pos < length) && (line.charAt(pos) == ' ')) 3530 { 3531 pos++; 3532 } 3533 3534 attributeName = line.substring(pos); 3535 } 3536 3537 if (attributeName.length() == 0) 3538 { 3539 throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get( 3540 firstLineNumber), 3541 firstLineNumber, true, ldifLines, null); 3542 } 3543 3544 3545 // The next zero or more lines may be the set of attribute values. Keep 3546 // reading until we reach the end of the iterator or until we find a line 3547 // with just a "-". 3548 final ArrayList<ASN1OctetString> valueList = 3549 new ArrayList<ASN1OctetString>(ldifLines.size()); 3550 while (iterator.hasNext()) 3551 { 3552 line = iterator.next(); 3553 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3554 if (line.toString().equals("-")) 3555 { 3556 break; 3557 } 3558 3559 colonPos = line.indexOf(":"); 3560 if (colonPos < 0) 3561 { 3562 throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber), 3563 firstLineNumber, true, ldifLines, null); 3564 } 3565 else if (! line.substring(0, colonPos).equalsIgnoreCase(attributeName)) 3566 { 3567 // There are a couple of cases in which this might be acceptable: 3568 // - If the two names are logically equivalent, but have an alternate 3569 // name (or OID) for the target attribute type, or if there are 3570 // attribute options and the options are just in a different order. 3571 // - If this is the first value for the target attribute and the 3572 // alternate name includes a "binary" option that the original 3573 // attribute name did not have. In this case, all subsequent values 3574 // will also be required to have the binary option. 3575 final String alternateName = line.substring(0, colonPos); 3576 3577 3578 // Check to see if the base names are equivalent. 3579 boolean baseNameEquivalent = false; 3580 final String expectedBaseName = Attribute.getBaseName(attributeName); 3581 final String alternateBaseName = Attribute.getBaseName(alternateName); 3582 if (alternateBaseName.equalsIgnoreCase(expectedBaseName)) 3583 { 3584 baseNameEquivalent = true; 3585 } 3586 else 3587 { 3588 if (schema != null) 3589 { 3590 final AttributeTypeDefinition expectedAT = 3591 schema.getAttributeType(expectedBaseName); 3592 final AttributeTypeDefinition alternateAT = 3593 schema.getAttributeType(alternateBaseName); 3594 if ((expectedAT != null) && (alternateAT != null) && 3595 expectedAT.equals(alternateAT)) 3596 { 3597 baseNameEquivalent = true; 3598 } 3599 } 3600 } 3601 3602 3603 // Check to see if the attribute options are equivalent. 3604 final Set<String> expectedOptions = 3605 Attribute.getOptions(attributeName); 3606 final Set<String> lowerExpectedOptions = 3607 new HashSet<String>(expectedOptions.size()); 3608 for (final String s : expectedOptions) 3609 { 3610 lowerExpectedOptions.add(toLowerCase(s)); 3611 } 3612 3613 final Set<String> alternateOptions = 3614 Attribute.getOptions(alternateName); 3615 final Set<String> lowerAlternateOptions = 3616 new HashSet<String>(alternateOptions.size()); 3617 for (final String s : alternateOptions) 3618 { 3619 lowerAlternateOptions.add(toLowerCase(s)); 3620 } 3621 3622 final boolean optionsEquivalent = 3623 lowerAlternateOptions.equals(lowerExpectedOptions); 3624 3625 3626 if (baseNameEquivalent && optionsEquivalent) 3627 { 3628 // This is fine. The two attribute descriptions are logically 3629 // equivalent. We'll continue using the attribute description that 3630 // was provided first. 3631 } 3632 else if (valueList.isEmpty() && baseNameEquivalent && 3633 lowerAlternateOptions.remove("binary") && 3634 lowerAlternateOptions.equals(lowerExpectedOptions)) 3635 { 3636 // This means that the provided value is the first value for the 3637 // attribute, and that the only significant difference is that the 3638 // provided attribute description included an unexpected "binary" 3639 // option. We'll accept this, but will require any additional 3640 // values for this modification to also include the binary option, 3641 // and we'll use the binary option in the attribute that is 3642 // eventually created. 3643 attributeName = alternateName; 3644 } 3645 else 3646 { 3647 // This means that either the base names are different or the sets 3648 // of options are incompatible. This is not acceptable. 3649 throw new LDIFException(ERR_READ_MOD_CR_ATTR_MISMATCH.get( 3650 firstLineNumber, 3651 line.substring(0, colonPos), 3652 attributeName), 3653 firstLineNumber, true, ldifLines, null); 3654 } 3655 } 3656 3657 length = line.length(); 3658 final ASN1OctetString value; 3659 if (length == (colonPos+1)) 3660 { 3661 // The colon was the last character on the line. This is fine. 3662 value = new ASN1OctetString(); 3663 } 3664 else if (line.charAt(colonPos+1) == ':') 3665 { 3666 // Skip over any spaces leading up to the value, and then the rest of 3667 // the string is the base64-encoded value. This is unusual and 3668 // unnecessary, but is nevertheless acceptable. 3669 int pos = colonPos+2; 3670 while ((pos < length) && (line.charAt(pos) == ' ')) 3671 { 3672 pos++; 3673 } 3674 3675 try 3676 { 3677 value = new ASN1OctetString(Base64.decode(line.substring(pos))); 3678 } 3679 catch (final ParseException pe) 3680 { 3681 debugException(pe); 3682 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get( 3683 attributeName, firstLineNumber, pe.getMessage()), 3684 firstLineNumber, true, ldifLines, pe); 3685 } 3686 catch (final Exception e) 3687 { 3688 debugException(e); 3689 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get( 3690 firstLineNumber, e), 3691 firstLineNumber, true, ldifLines, e); 3692 } 3693 } 3694 else 3695 { 3696 // Skip over any spaces leading up to the value, and then the rest of 3697 // the string is the value. 3698 int pos = colonPos+1; 3699 while ((pos < length) && (line.charAt(pos) == ' ')) 3700 { 3701 pos++; 3702 } 3703 3704 value = new ASN1OctetString(line.substring(pos)); 3705 } 3706 3707 valueList.add(value); 3708 } 3709 3710 final ASN1OctetString[] values = new ASN1OctetString[valueList.size()]; 3711 valueList.toArray(values); 3712 3713 // If it's an add modification type, then there must be at least one 3714 // value. 3715 if ((modType.intValue() == ModificationType.ADD.intValue()) && 3716 (values.length == 0)) 3717 { 3718 throw new LDIFException(ERR_READ_MOD_CR_NO_ADD_VALUES.get(attributeName, 3719 firstLineNumber), 3720 firstLineNumber, true, ldifLines, null); 3721 } 3722 3723 // If it's an increment modification type, then there must be exactly one 3724 // value. 3725 if ((modType.intValue() == ModificationType.INCREMENT.intValue()) && 3726 (values.length != 1)) 3727 { 3728 throw new LDIFException(ERR_READ_MOD_CR_INVALID_INCR_VALUE_COUNT.get( 3729 firstLineNumber, attributeName), 3730 firstLineNumber, true, ldifLines, null); 3731 } 3732 3733 modList.add(new Modification(modType, attributeName, values)); 3734 } 3735 3736 final Modification[] mods = new Modification[modList.size()]; 3737 modList.toArray(mods); 3738 return mods; 3739 } 3740 3741 3742 3743 /** 3744 * Parses the data available through the provided iterator as the body of a 3745 * modify DN change record (i.e., the newrdn, deleteoldrdn, and optional 3746 * newsuperior lines). 3747 * 3748 * @param ldifLines The lines that comprise the LDIF 3749 * representation of the full record being 3750 * parsed. 3751 * @param iterator The iterator to use to access the modify DN 3752 * data. 3753 * @param dn The current DN of the entry. 3754 * @param controls The set of controls to include in the change 3755 * record. 3756 * @param trailingSpaceBehavior The behavior that should be exhibited when 3757 * encountering attribute values which are not 3758 * base64-encoded but contain trailing spaces. 3759 * @param firstLineNumber The line number for the start of the record. 3760 * 3761 * @return The decoded modify DN change record. 3762 * 3763 * @throws LDIFException If the provided LDIF data cannot be decoded as a 3764 * modify DN change record. 3765 */ 3766 private static LDIFModifyDNChangeRecord parseModifyDNChangeRecord( 3767 final ArrayList<StringBuilder> ldifLines, 3768 final Iterator<StringBuilder> iterator, final String dn, 3769 final List<Control> controls, 3770 final TrailingSpaceBehavior trailingSpaceBehavior, 3771 final long firstLineNumber) 3772 throws LDIFException 3773 { 3774 // The next line must be the new RDN, and it must start with "newrdn:". 3775 StringBuilder line = iterator.next(); 3776 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3777 int colonPos = line.indexOf(":"); 3778 if ((colonPos < 0) || 3779 (! line.substring(0, colonPos).equalsIgnoreCase("newrdn"))) 3780 { 3781 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_COLON.get( 3782 firstLineNumber), 3783 firstLineNumber, true, ldifLines, null); 3784 } 3785 3786 final String newRDN; 3787 int length = line.length(); 3788 if (length == (colonPos+1)) 3789 { 3790 // The colon was the last character on the line. This is not acceptable. 3791 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get( 3792 firstLineNumber), 3793 firstLineNumber, true, ldifLines, null); 3794 } 3795 else if (line.charAt(colonPos+1) == ':') 3796 { 3797 // Skip over any spaces leading up to the value, and then the rest of the 3798 // string is the base64-encoded new RDN. 3799 int pos = colonPos+2; 3800 while ((pos < length) && (line.charAt(pos) == ' ')) 3801 { 3802 pos++; 3803 } 3804 3805 try 3806 { 3807 final byte[] dnBytes = Base64.decode(line.substring(pos)); 3808 newRDN = new String(dnBytes, "UTF-8"); 3809 } 3810 catch (final ParseException pe) 3811 { 3812 debugException(pe); 3813 throw new LDIFException( 3814 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber, 3815 pe.getMessage()), 3816 firstLineNumber, true, ldifLines, pe); 3817 } 3818 catch (final Exception e) 3819 { 3820 debugException(e); 3821 throw new LDIFException( 3822 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber, 3823 e), 3824 firstLineNumber, true, ldifLines, e); 3825 } 3826 } 3827 else 3828 { 3829 // Skip over any spaces leading up to the value, and then the rest of the 3830 // string is the new RDN. 3831 int pos = colonPos+1; 3832 while ((pos < length) && (line.charAt(pos) == ' ')) 3833 { 3834 pos++; 3835 } 3836 3837 newRDN = line.substring(pos); 3838 } 3839 3840 if (newRDN.length() == 0) 3841 { 3842 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get( 3843 firstLineNumber), 3844 firstLineNumber, true, ldifLines, null); 3845 } 3846 3847 3848 // The next line must be the deleteOldRDN flag, and it must start with 3849 // 'deleteoldrdn:'. 3850 if (! iterator.hasNext()) 3851 { 3852 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get( 3853 firstLineNumber), 3854 firstLineNumber, true, ldifLines, null); 3855 } 3856 3857 line = iterator.next(); 3858 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3859 colonPos = line.indexOf(":"); 3860 if ((colonPos < 0) || 3861 (! line.substring(0, colonPos).equalsIgnoreCase("deleteoldrdn"))) 3862 { 3863 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get( 3864 firstLineNumber), 3865 firstLineNumber, true, ldifLines, null); 3866 } 3867 3868 final String deleteOldRDNStr; 3869 length = line.length(); 3870 if (length == (colonPos+1)) 3871 { 3872 // The colon was the last character on the line. This is not acceptable. 3873 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_VALUE.get( 3874 firstLineNumber), 3875 firstLineNumber, true, ldifLines, null); 3876 } 3877 else if (line.charAt(colonPos+1) == ':') 3878 { 3879 // Skip over any spaces leading up to the value, and then the rest of the 3880 // string is the base64-encoded value. This is unusual and 3881 // unnecessary, but is nevertheless acceptable. 3882 int pos = colonPos+2; 3883 while ((pos < length) && (line.charAt(pos) == ' ')) 3884 { 3885 pos++; 3886 } 3887 3888 try 3889 { 3890 final byte[] changeTypeBytes = Base64.decode(line.substring(pos)); 3891 deleteOldRDNStr = new String(changeTypeBytes, "UTF-8"); 3892 } 3893 catch (final ParseException pe) 3894 { 3895 debugException(pe); 3896 throw new LDIFException( 3897 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get( 3898 firstLineNumber, pe.getMessage()), 3899 firstLineNumber, true, ldifLines, pe); 3900 } 3901 catch (final Exception e) 3902 { 3903 debugException(e); 3904 throw new LDIFException( 3905 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get( 3906 firstLineNumber, e), 3907 firstLineNumber, true, ldifLines, e); 3908 } 3909 } 3910 else 3911 { 3912 // Skip over any spaces leading up to the value, and then the rest of the 3913 // string is the value. 3914 int pos = colonPos+1; 3915 while ((pos < length) && (line.charAt(pos) == ' ')) 3916 { 3917 pos++; 3918 } 3919 3920 deleteOldRDNStr = line.substring(pos); 3921 } 3922 3923 final boolean deleteOldRDN; 3924 if (deleteOldRDNStr.equals("0")) 3925 { 3926 deleteOldRDN = false; 3927 } 3928 else if (deleteOldRDNStr.equals("1")) 3929 { 3930 deleteOldRDN = true; 3931 } 3932 else if (deleteOldRDNStr.equalsIgnoreCase("false") || 3933 deleteOldRDNStr.equalsIgnoreCase("no")) 3934 { 3935 // This is technically illegal, but we'll allow it. 3936 deleteOldRDN = false; 3937 } 3938 else if (deleteOldRDNStr.equalsIgnoreCase("true") || 3939 deleteOldRDNStr.equalsIgnoreCase("yes")) 3940 { 3941 // This is also technically illegal, but we'll allow it. 3942 deleteOldRDN = false; 3943 } 3944 else 3945 { 3946 throw new LDIFException(ERR_READ_MODDN_CR_INVALID_DELOLDRDN.get( 3947 deleteOldRDNStr, firstLineNumber), 3948 firstLineNumber, true, ldifLines, null); 3949 } 3950 3951 3952 // If there is another line, then it must be the new superior DN and it must 3953 // start with "newsuperior:". If this is absent, then it's fine. 3954 final String newSuperiorDN; 3955 if (iterator.hasNext()) 3956 { 3957 line = iterator.next(); 3958 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3959 colonPos = line.indexOf(":"); 3960 if ((colonPos < 0) || 3961 (! line.substring(0, colonPos).equalsIgnoreCase("newsuperior"))) 3962 { 3963 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWSUPERIOR_COLON.get( 3964 firstLineNumber), 3965 firstLineNumber, true, ldifLines, null); 3966 } 3967 3968 length = line.length(); 3969 if (length == (colonPos+1)) 3970 { 3971 // The colon was the last character on the line. This is fine. 3972 newSuperiorDN = ""; 3973 } 3974 else if (line.charAt(colonPos+1) == ':') 3975 { 3976 // Skip over any spaces leading up to the value, and then the rest of 3977 // the string is the base64-encoded new superior DN. 3978 int pos = colonPos+2; 3979 while ((pos < length) && (line.charAt(pos) == ' ')) 3980 { 3981 pos++; 3982 } 3983 3984 try 3985 { 3986 final byte[] dnBytes = Base64.decode(line.substring(pos)); 3987 newSuperiorDN = new String(dnBytes, "UTF-8"); 3988 } 3989 catch (final ParseException pe) 3990 { 3991 debugException(pe); 3992 throw new LDIFException( 3993 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get( 3994 firstLineNumber, pe.getMessage()), 3995 firstLineNumber, true, ldifLines, pe); 3996 } 3997 catch (final Exception e) 3998 { 3999 debugException(e); 4000 throw new LDIFException( 4001 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get( 4002 firstLineNumber, e), 4003 firstLineNumber, true, ldifLines, e); 4004 } 4005 } 4006 else 4007 { 4008 // Skip over any spaces leading up to the value, and then the rest of 4009 // the string is the new superior DN. 4010 int pos = colonPos+1; 4011 while ((pos < length) && (line.charAt(pos) == ' ')) 4012 { 4013 pos++; 4014 } 4015 4016 newSuperiorDN = line.substring(pos); 4017 } 4018 } 4019 else 4020 { 4021 newSuperiorDN = null; 4022 } 4023 4024 4025 // There must not be any more lines. 4026 if (iterator.hasNext()) 4027 { 4028 throw new LDIFException(ERR_READ_CR_EXTRA_MODDN_DATA.get(firstLineNumber), 4029 firstLineNumber, true, ldifLines, null); 4030 } 4031 4032 return new LDIFModifyDNChangeRecord(dn, newRDN, deleteOldRDN, 4033 newSuperiorDN, controls); 4034 } 4035 4036 4037 4038 /** 4039 * Examines the line contained in the provided buffer to determine whether it 4040 * may contain one or more illegal trailing spaces. If it does, then those 4041 * spaces will either be stripped out or an exception will be thrown to 4042 * indicate that they are illegal. 4043 * 4044 * @param buffer The buffer to be examined. 4045 * @param dn The DN of the LDIF record being parsed. It 4046 * may be {@code null} if the DN is not yet 4047 * known (e.g., because the provided line is 4048 * expected to contain that DN). 4049 * @param firstLineNumber The approximate line number in the LDIF 4050 * source on which the LDIF record begins. 4051 * @param trailingSpaceBehavior The behavior that should be exhibited when 4052 * encountering attribute values which are not 4053 * base64-encoded but contain trailing spaces. 4054 * 4055 * @throws LDIFException If the line contained in the provided buffer ends 4056 * with one or more illegal trailing spaces and 4057 * {@code stripTrailingSpaces} was provided with a 4058 * value of {@code false}. 4059 */ 4060 private static void handleTrailingSpaces(final StringBuilder buffer, 4061 final String dn, final long firstLineNumber, 4062 final TrailingSpaceBehavior trailingSpaceBehavior) 4063 throws LDIFException 4064 { 4065 int pos = buffer.length() - 1; 4066 boolean trailingFound = false; 4067 while ((pos >= 0) && (buffer.charAt(pos) == ' ')) 4068 { 4069 trailingFound = true; 4070 pos--; 4071 } 4072 4073 if (trailingFound && (buffer.charAt(pos) != ':')) 4074 { 4075 switch (trailingSpaceBehavior) 4076 { 4077 case STRIP: 4078 buffer.setLength(pos+1); 4079 break; 4080 4081 case REJECT: 4082 if (dn == null) 4083 { 4084 throw new LDIFException( 4085 ERR_READ_ILLEGAL_TRAILING_SPACE_WITHOUT_DN.get(firstLineNumber, 4086 buffer.toString()), 4087 firstLineNumber, true); 4088 } 4089 else 4090 { 4091 throw new LDIFException( 4092 ERR_READ_ILLEGAL_TRAILING_SPACE_WITH_DN.get(dn, 4093 firstLineNumber, buffer.toString()), 4094 firstLineNumber, true); 4095 } 4096 4097 case RETAIN: 4098 default: 4099 // No action will be taken. 4100 break; 4101 } 4102 } 4103 } 4104 4105 4106 4107 /** 4108 * This represents an unparsed LDIFRecord. It stores the line number of the 4109 * first line of the record and each line of the record. 4110 */ 4111 private static final class UnparsedLDIFRecord 4112 { 4113 private final ArrayList<StringBuilder> lineList; 4114 private final long firstLineNumber; 4115 private final Exception failureCause; 4116 private final boolean isEOF; 4117 private final DuplicateValueBehavior duplicateValueBehavior; 4118 private final Schema schema; 4119 private final TrailingSpaceBehavior trailingSpaceBehavior; 4120 4121 4122 4123 /** 4124 * Constructor. 4125 * 4126 * @param lineList The lines that comprise the LDIF record. 4127 * @param duplicateValueBehavior The behavior to exhibit if the entry 4128 * contains duplicate attribute values. 4129 * @param trailingSpaceBehavior Specifies the behavior to exhibit when 4130 * encountering trailing spaces in 4131 * non-base64-encoded attribute values. 4132 * @param schema The schema to use when parsing, if 4133 * applicable. 4134 * @param firstLineNumber The first line number of the LDIF record. 4135 */ 4136 private UnparsedLDIFRecord(final ArrayList<StringBuilder> lineList, 4137 final DuplicateValueBehavior duplicateValueBehavior, 4138 final TrailingSpaceBehavior trailingSpaceBehavior, 4139 final Schema schema, final long firstLineNumber) 4140 { 4141 this.lineList = lineList; 4142 this.firstLineNumber = firstLineNumber; 4143 this.duplicateValueBehavior = duplicateValueBehavior; 4144 this.trailingSpaceBehavior = trailingSpaceBehavior; 4145 this.schema = schema; 4146 4147 failureCause = null; 4148 isEOF = 4149 (firstLineNumber < 0) || ((lineList != null) && lineList.isEmpty()); 4150 } 4151 4152 4153 4154 /** 4155 * Constructor. 4156 * 4157 * @param failureCause The Exception thrown when reading from the input. 4158 */ 4159 private UnparsedLDIFRecord(final Exception failureCause) 4160 { 4161 this.failureCause = failureCause; 4162 4163 lineList = null; 4164 firstLineNumber = 0; 4165 duplicateValueBehavior = DuplicateValueBehavior.REJECT; 4166 trailingSpaceBehavior = TrailingSpaceBehavior.REJECT; 4167 schema = null; 4168 isEOF = false; 4169 } 4170 4171 4172 4173 /** 4174 * Return the lines that comprise the LDIF record. 4175 * 4176 * @return The lines that comprise the LDIF record. 4177 */ 4178 private ArrayList<StringBuilder> getLineList() 4179 { 4180 return lineList; 4181 } 4182 4183 4184 4185 /** 4186 * Retrieves the behavior to exhibit when encountering duplicate attribute 4187 * values. 4188 * 4189 * @return The behavior to exhibit when encountering duplicate attribute 4190 * values. 4191 */ 4192 private DuplicateValueBehavior getDuplicateValueBehavior() 4193 { 4194 return duplicateValueBehavior; 4195 } 4196 4197 4198 4199 /** 4200 * Retrieves the behavior that should be exhibited when encountering 4201 * attribute values which are not base64-encoded but contain trailing 4202 * spaces. The LDIF specification strongly recommends that any value which 4203 * legitimately contains trailing spaces be base64-encoded, but the LDAP SDK 4204 * LDIF parser may be configured to automatically strip these spaces, to 4205 * preserve them, or to reject any entry or change record containing them. 4206 * 4207 * @return The behavior that should be exhibited when encountering 4208 * attribute values which are not base64-encoded but contain 4209 * trailing spaces. 4210 */ 4211 private TrailingSpaceBehavior getTrailingSpaceBehavior() 4212 { 4213 return trailingSpaceBehavior; 4214 } 4215 4216 4217 4218 /** 4219 * Retrieves the schema that should be used when parsing the record, if 4220 * applicable. 4221 * 4222 * @return The schema that should be used when parsing the record, or 4223 * {@code null} if none should be used. 4224 */ 4225 private Schema getSchema() 4226 { 4227 return schema; 4228 } 4229 4230 4231 4232 /** 4233 * Return the first line number of the LDIF record. 4234 * 4235 * @return The first line number of the LDIF record. 4236 */ 4237 private long getFirstLineNumber() 4238 { 4239 return firstLineNumber; 4240 } 4241 4242 4243 4244 /** 4245 * Return {@code true} iff the end of the input was reached. 4246 * 4247 * @return {@code true} iff the end of the input was reached. 4248 */ 4249 private boolean isEOF() 4250 { 4251 return isEOF; 4252 } 4253 4254 4255 4256 /** 4257 * Returns the reason that reading the record lines failed. This normally 4258 * is only non-null if something bad happened to the input stream (like 4259 * a disk read error). 4260 * 4261 * @return The reason that reading the record lines failed. 4262 */ 4263 private Exception getFailureCause() 4264 { 4265 return failureCause; 4266 } 4267 } 4268 4269 4270 /** 4271 * When processing in asynchronous mode, this thread is responsible for 4272 * reading the raw unparsed records from the input and submitting them for 4273 * processing. 4274 */ 4275 private final class LineReaderThread 4276 extends Thread 4277 { 4278 /** 4279 * Constructor. 4280 */ 4281 private LineReaderThread() 4282 { 4283 super("Asynchronous LDIF line reader"); 4284 setDaemon(true); 4285 } 4286 4287 4288 4289 /** 4290 * Reads raw, unparsed records from the input and submits them for 4291 * processing until the input is finished or closed. 4292 */ 4293 @Override() 4294 public void run() 4295 { 4296 try 4297 { 4298 boolean stopProcessing = false; 4299 while (!stopProcessing) 4300 { 4301 UnparsedLDIFRecord unparsedRecord = null; 4302 try 4303 { 4304 unparsedRecord = readUnparsedRecord(); 4305 } 4306 catch (final IOException e) 4307 { 4308 debugException(e); 4309 unparsedRecord = new UnparsedLDIFRecord(e); 4310 stopProcessing = true; 4311 } 4312 catch (final Exception e) 4313 { 4314 debugException(e); 4315 unparsedRecord = new UnparsedLDIFRecord(e); 4316 } 4317 4318 try 4319 { 4320 asyncParser.submit(unparsedRecord); 4321 } 4322 catch (final InterruptedException e) 4323 { 4324 debugException(e); 4325 // If this thread is interrupted, then someone wants us to stop 4326 // processing, so that's what we'll do. 4327 Thread.currentThread().interrupt(); 4328 stopProcessing = true; 4329 } 4330 4331 if ((unparsedRecord == null) || (unparsedRecord.isEOF())) 4332 { 4333 stopProcessing = true; 4334 } 4335 } 4336 } 4337 finally 4338 { 4339 try 4340 { 4341 asyncParser.shutdown(); 4342 } 4343 catch (final InterruptedException e) 4344 { 4345 debugException(e); 4346 Thread.currentThread().interrupt(); 4347 } 4348 finally 4349 { 4350 asyncParsingComplete.set(true); 4351 } 4352 } 4353 } 4354 } 4355 4356 4357 4358 /** 4359 * Used to parse Records asynchronously. 4360 */ 4361 private final class RecordParser implements Processor<UnparsedLDIFRecord, 4362 LDIFRecord> 4363 { 4364 /** 4365 * {@inheritDoc} 4366 */ 4367 @Override() 4368 public LDIFRecord process(final UnparsedLDIFRecord input) 4369 throws LDIFException 4370 { 4371 LDIFRecord record = decodeRecord(input, relativeBasePath, schema); 4372 4373 if ((record instanceof Entry) && (entryTranslator != null)) 4374 { 4375 record = entryTranslator.translate((Entry) record, 4376 input.getFirstLineNumber()); 4377 4378 if (record == null) 4379 { 4380 record = SKIP_ENTRY; 4381 } 4382 } 4383 if ((record instanceof LDIFChangeRecord) && 4384 (changeRecordTranslator != null)) 4385 { 4386 record = changeRecordTranslator.translate((LDIFChangeRecord) record, 4387 input.getFirstLineNumber()); 4388 4389 if (record == null) 4390 { 4391 record = SKIP_ENTRY; 4392 } 4393 } 4394 return record; 4395 } 4396 } 4397}