// /////////////////////////////////////////////////////////////////////////////
// REFCODES.ORG
// =============================================================================
// This code is copyright (c) by Siegfried Steiner, Munich, Germany and licensed
// under the following (see "http://en.wikipedia.org/wiki/Multi-licensing")
// licenses:
// =============================================================================
// GNU General Public License, v3.0 ("http://www.gnu.org/licenses/gpl-3.0.html")
// together with the GPL linking exception applied; as being applied by the GNU
// Classpath ("http://www.gnu.org/software/classpath/license.html")
// =============================================================================
// Apache License, v2.0 ("http://www.apache.org/licenses/LICENSE-2.0")
// =============================================================================
// Please contact the copyright holding author(s) of the software artifacts in
// question for licensing issues not being covered by the above listed licenses,
// also regarding commercial licensing models or regarding the compatibility
// with other open source licenses.
// /////////////////////////////////////////////////////////////////////////////

package org.refcodes.tabular;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Enumeration;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
import java.util.zip.ZipFile;

import org.refcodes.data.FilenameExtension;

/**
 * The {@link CsvFileRecordsImpl} is an implementation of the {@link Records}
 * interface and provides functionality to parse CSV files. It extends the more
 * generic {@link CsvInputStreamRecordsImpl} for parsing CSV input streams.
 *
 * @param <T> The type managed by the {@link Records}.
 */
public class CsvFileRecordsImpl<T> extends CsvInputStreamRecordsImpl<T> {

	// /////////////////////////////////////////////////////////////////////////
	// CONSTRUCTORS:
	// /////////////////////////////////////////////////////////////////////////

	/**
	 * Constructs a {@link CsvFileRecordsImpl} with the given parameters.
	 * Internally {@link Column} instances are generated according to the keys
	 * found in the CSV top line. The {@link Column} instances are required to
	 * convert the CSV line values. If a {@link Header} is provided, then the
	 * {@link Header} is used for generating the {@link Column} instances
	 * instead of the top line of the CSV file.
	 *
	 * @param aHeader The {@link Header} to use when parsing the lines retrieved
	 *        from the {@link InputStream}.
	 * @param aCsvFile The CSV {@link File} which to parse.
	 * @throws FileNotFoundException the file not found exception
	 * @throws IOException in case there were problems working with the given
	 *         {@link File}.
	 * @throws ZipException the zip exception
	 */
	public CsvFileRecordsImpl( Header<T> aHeader, File aCsvFile ) throws FileNotFoundException, IOException, ZipException {
		super( aHeader, toInputStream( aCsvFile ) );
	}

	/**
	 * Constructs a {@link CsvFileRecordsImpl} with the given parameters.
	 * Internally {@link Column} instances are generated according to the keys
	 * found in the CSV top line. The {@link Column} instances are required to
	 * convert the CSV line values. If a {@link Header} is provided, then the
	 * {@link Header} is used for generating the {@link Column} instances
	 * instead of the top line of the CSV file.
	 *
	 * @param aHeader The {@link Header} to use when parsing the lines retrieved
	 *        from the {@link InputStream}.
	 * @param aCsvFile The CSV {@link File} which to parse.
	 * @param aCsvSeparator The delimiter being expected for the CSV input
	 *        stream.
	 * @throws FileNotFoundException the file not found exception
	 * @throws IOException in case there were problems working with the given
	 *         {@link File}.
	 * @throws ZipException the zip exception
	 */
	public CsvFileRecordsImpl( Header<T> aHeader, File aCsvFile, char aCsvSeparator ) throws FileNotFoundException, IOException, ZipException {
		super( aHeader, toInputStream( aCsvFile ), aCsvSeparator );
	}

	/**
	 * Constructs a {@link CsvFileRecordsImpl} with the given parameters.
	 * Internally {@link Column} instances are generated according to the keys
	 * found in the CSV top line. The {@link Column} instances are required to
	 * convert the CSV line values. If a {@link Header} is provided, then the
	 * {@link Header} is used for generating the {@link Column} instances
	 * instead of the top line of the CSV file.
	 *
	 * @param aHeader The {@link Header} to use when parsing the lines retrieved
	 *        from the {@link InputStream}.
	 * @param aCsvFile The CSV {@link File} which to parse.
	 * @param isStrict When true, then parsing will abort with an exception in
	 *        case of parsing problems, else parsing is gracefully continued and
	 *        erroneous records are skipped. The error count
	 *        {@link #getErroneousRecordCount()} is incremented by each
	 *        erroneous {@link Record}.
	 * @throws FileNotFoundException the file not found exception
	 * @throws IOException in case there were problems working with the given
	 *         {@link File}.
	 * @throws ZipException the zip exception
	 */
	public CsvFileRecordsImpl( Header<T> aHeader, File aCsvFile, boolean isStrict ) throws FileNotFoundException, IOException, ZipException {
		super( aHeader, toInputStream( aCsvFile ), isStrict );
	}

	/**
	 * Constructs a {@link CsvFileRecordsImpl} with the given parameters.
	 * Internally {@link Column} instances are generated according to the keys
	 * found in the CSV top line. The {@link Column} instances are required to
	 * convert the CSV line values. If a {@link Header} is provided, then the
	 * {@link Header} is used for generating the {@link Column} instances
	 * instead of the top line of the CSV file.
	 *
	 * @param aHeader The {@link Header} to use when parsing the lines retrieved
	 *        from the {@link InputStream}.
	 * @param aCsvFile The CSV {@link File} which to parse.
	 * @param aCsvSeparator The delimiter being expected for the CSV input
	 *        stream.
	 * @param isStrict When true, then parsing will abort with an exception in
	 *        case of parsing problems, else parsing is gracefully continued and
	 *        erroneous records are skipped. The error count
	 *        {@link #getErroneousRecordCount()} is incremented by each
	 *        erroneous {@link Record}.
	 * @throws FileNotFoundException the file not found exception
	 * @throws IOException in case there were problems working with the given
	 *         {@link File}.
	 * @throws ZipException the zip exception
	 */
	public CsvFileRecordsImpl( Header<T> aHeader, File aCsvFile, char aCsvSeparator, boolean isStrict ) throws FileNotFoundException, IOException, ZipException {
		super( aHeader, toInputStream( aCsvFile ), aCsvSeparator, isStrict );
	}

	/**
	 * Constructs a {@link CsvFileRecordsImpl} with the given parameters. This
	 * constructor supports a {@link ColumnFactory} for creating {@link Column}
	 * instance according to the keys found in the CSV top line. The
	 * {@link Column} instances are required to convert the CSV line values from
	 * the storage format to the actual required type.
	 *
	 * @param aColumnFactory A {@link ColumnFactory} to be used to generate
	 *        {@link Column} instances from the top line of the CSF file,
	 *        required for parsing the CSV lines and converting them to
	 *        {@link Record} instances.
	 * @param aCsvFile The CSV {@link File} which to parse.
	 * @throws FileNotFoundException the file not found exception
	 * @throws IOException in case there were problems working with the given
	 *         {@link File}.
	 * @throws ZipException the zip exception
	 */
	public CsvFileRecordsImpl( ColumnFactory<T> aColumnFactory, File aCsvFile ) throws FileNotFoundException, IOException, ZipException {
		super( aColumnFactory, toInputStream( aCsvFile ) );
	}

	/**
	 * Constructs a {@link CsvFileRecordsImpl} with the given parameters. This
	 * constructor supports a {@link ColumnFactory} for creating {@link Column}
	 * instance according to the keys found in the CSV top line. The
	 * {@link Column} instances are required to convert the CSV line values from
	 * the storage format to the actual required type.
	 *
	 * @param aColumnFactory A {@link ColumnFactory} to be used to generate
	 *        {@link Column} instances from the top line of the CSF file,
	 *        required for parsing the CSV lines and converting them to
	 *        {@link Record} instances.
	 * @param aCsvFile The CSV {@link File} which to parse.
	 * @param aCsvSeparator The delimiter being expected for the CSV input
	 *        stream.
	 * @throws FileNotFoundException the file not found exception
	 * @throws IOException in case there were problems working with the given
	 *         {@link File}.
	 * @throws ZipException the zip exception
	 */
	public CsvFileRecordsImpl( ColumnFactory<T> aColumnFactory, File aCsvFile, char aCsvSeparator ) throws FileNotFoundException, IOException, ZipException {
		super( aColumnFactory, toInputStream( aCsvFile ), aCsvSeparator );
	}

	/**
	 * Constructs a {@link CsvFileRecordsImpl} with the given parameters. This
	 * constructor supports a {@link ColumnFactory} for creating {@link Column}
	 * instance according to the keys found in the CSV top line. The
	 * {@link Column} instances are required to convert the CSV line values from
	 * the storage format to the actual required type.
	 *
	 * @param aColumnFactory A {@link ColumnFactory} to be used to generate
	 *        {@link Column} instances from the top line of the CSF file,
	 *        required for parsing the CSV lines and converting them to
	 *        {@link Record} instances.
	 * @param aCsvFile The CSV {@link File} which to parse.
	 * @param isStrict When true, then parsing will abort with an exception in
	 *        case of parsing problems, else parsing is gracefully continued and
	 *        erroneous records are skipped. The error count
	 *        {@link #getErroneousRecordCount()} is incremented by each
	 *        erroneous {@link Record}.
	 * @throws FileNotFoundException the file not found exception
	 * @throws IOException in case there were problems working with the given
	 *         {@link File}.
	 * @throws ZipException the zip exception
	 */
	public CsvFileRecordsImpl( ColumnFactory<T> aColumnFactory, File aCsvFile, boolean isStrict ) throws FileNotFoundException, IOException, ZipException {
		super( aColumnFactory, toInputStream( aCsvFile ), isStrict );
	}

	/**
	 * Constructs a {@link CsvFileRecordsImpl} with the given parameters. This
	 * constructor supports a {@link ColumnFactory} for creating {@link Column}
	 * instance according to the keys found in the CSV top line. The
	 * {@link Column} instances are required to convert the CSV line values from
	 * the storage format to the actual required type.
	 *
	 * @param aColumnFactory A {@link ColumnFactory} to be used to generate
	 *        {@link Column} instances from the top line of the CSF file,
	 *        required for parsing the CSV lines and converting them to
	 *        {@link Record} instances.
	 * @param aCsvFile The CSV {@link File} which to parse.
	 * @param aCsvSeparator The delimiter being expected for the CSV input
	 *        stream.
	 * @param isStrict When true, then parsing will abort with an exception in
	 *        case of parsing problems, else parsing is gracefully continued and
	 *        erroneous records are skipped. The error count
	 *        {@link #getErroneousRecordCount()} is incremented by each
	 *        erroneous {@link Record}.
	 * @throws FileNotFoundException the file not found exception
	 * @throws IOException in case there were problems working with the given
	 *         {@link File}.
	 * @throws ZipException the zip exception
	 */
	public CsvFileRecordsImpl( ColumnFactory<T> aColumnFactory, File aCsvFile, char aCsvSeparator, boolean isStrict ) throws FileNotFoundException, IOException, ZipException {
		super( aColumnFactory, toInputStream( aCsvFile ), aCsvSeparator, isStrict );
	}

	// /////////////////////////////////////////////////////////////////////////
	// HELPER:
	// /////////////////////////////////////////////////////////////////////////

	/**
	 * Returns an {@link InputStream} from the provided {@link File}. In case
	 * the file points to a ZIP compressed file, then the uncompressed data is
	 * provided by the {@link InputStream}.
	 * 
	 * @param aFile The {@link File} for which to get the {@link InputStream}.
	 * 
	 * @return An {@link InputStream}, in case of a ZIP compressed {@link File},
	 *         an uncompressed {@link InputStream} is returned.
	 * 
	 * @throws ZipException in case there were problems when accessing the ZIP
	 *         compressed {@link File}.
	 * 
	 * @throws IOException in case there were problems working with the
	 *         {@link File}.
	 * 
	 * @throws FileNotFoundException in case there was none such {@link File}
	 *         found.
	 */
	@SuppressWarnings("resource")
	protected static InputStream toInputStream( File aFile ) throws ZipException, IOException, FileNotFoundException {
		String theUnZipFileName = toFileNameFromZip( aFile.getName() );
		if ( theUnZipFileName != null ) {
			ZipFile theZipFile = new ZipFile( aFile );
			if ( theZipFile.size() != 1 ) {
				throw new ZipException( "The file \"" + aFile.getAbsolutePath() + "\" has <" + theZipFile.size() + "\" entries, expecting exactly one entry with name \"" + theUnZipFileName + "\"!" );
			}
			Enumeration<?> e = theZipFile.entries();
			ZipEntry theEntry = (ZipEntry) e.nextElement();
			if ( !theUnZipFileName.equals( theEntry.getName() ) ) {
				throw new ZipException( "The file \"" + aFile.getAbsolutePath() + "\" contains an entry with name \"" + theEntry.getName() + "\", though expecting entry with name \"" + theUnZipFileName + "\"!" );
			}
			return new BufferedInputStream( theZipFile.getInputStream( theEntry ) );
		}
		return new BufferedInputStream( new FileInputStream( aFile ) );
	}

	/**
	 * Truncates the ".zip" suffix from the filename and returns the result. For
	 * example a file with name "log-2023-07-12.txt.zip" results in
	 * "log-2023-07-12.txt".
	 * 
	 * @param aZipFileName The file name of the ZIP file for which to get the
	 *        "inner" file name.
	 * 
	 * @return The "inner" file name if the file suffix was ".zip", else null.
	 */
	protected static String toFileNameFromZip( String aZipFileName ) {
		if ( aZipFileName.toLowerCase().endsWith( FilenameExtension.ZIP.getFilenameExtension() ) ) {
			return aZipFileName.substring( 0, aZipFileName.length() - FilenameExtension.ZIP.getFilenameExtension().length() );
		}
		return null;
	}
}
