summaryrefslogtreecommitdiff
path: root/ThirdParty/CsvHelper-master/src/CsvHelper/CsvParser.cs
diff options
context:
space:
mode:
Diffstat (limited to 'ThirdParty/CsvHelper-master/src/CsvHelper/CsvParser.cs')
-rw-r--r--ThirdParty/CsvHelper-master/src/CsvHelper/CsvParser.cs1194
1 files changed, 1194 insertions, 0 deletions
diff --git a/ThirdParty/CsvHelper-master/src/CsvHelper/CsvParser.cs b/ThirdParty/CsvHelper-master/src/CsvHelper/CsvParser.cs
new file mode 100644
index 0000000..6d0db59
--- /dev/null
+++ b/ThirdParty/CsvHelper-master/src/CsvHelper/CsvParser.cs
@@ -0,0 +1,1194 @@
+// Copyright 2009-2022 Josh Close
+// This file is a part of CsvHelper and is dual licensed under MS-PL and Apache 2.0.
+// See LICENSE.txt for details or visit http://www.opensource.org/licenses/ms-pl.html for MS-PL and http://opensource.org/licenses/Apache-2.0 for Apache 2.0.
+// https://github.com/JoshClose/CsvHelper
+using CsvHelper.Configuration;
+using CsvHelper.Delegates;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Globalization;
+using System.IO;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Text;
+using System.Text.RegularExpressions;
+using System.Threading.Tasks;
+
+namespace CsvHelper
+{
+ /// <summary>
+ /// Parses a CSV file.
+ /// </summary>
+ public class CsvParser : IParser, IDisposable
+ {
+ private readonly IParserConfiguration configuration;
+ private readonly FieldCache fieldCache = new FieldCache();
+ private readonly TextReader reader;
+ private readonly char quote;
+ private readonly char escape;
+ private readonly bool countBytes;
+ private readonly Encoding encoding;
+ private readonly bool ignoreBlankLines;
+ private readonly char comment;
+ private readonly bool allowComments;
+ private readonly BadDataFound badDataFound;
+ private readonly bool lineBreakInQuotedFieldIsBadData;
+ private readonly TrimOptions trimOptions;
+ private readonly char[] whiteSpaceChars;
+ private readonly bool leaveOpen;
+ private readonly CsvMode mode;
+ private readonly string newLine;
+ private readonly char newLineFirstChar;
+ private readonly bool isNewLineSet;
+ private readonly bool cacheFields;
+ private readonly string[] delimiterValues;
+ private readonly bool detectDelimiter;
+ private readonly double maxFieldSize;
+
+ private string delimiter;
+ private char delimiterFirstChar;
+ private char[] buffer;
+ private int bufferSize;
+ private int charsRead;
+ private int bufferPosition;
+ private int rowStartPosition;
+ private int fieldStartPosition;
+ private int row;
+ private int rawRow;
+ private long charCount;
+ private long byteCount;
+ private bool inQuotes;
+ private bool inEscape;
+ private Field[] fields;
+ private string[] processedFields;
+ private int fieldsPosition;
+ private bool disposed;
+ private int quoteCount;
+ private char[] processFieldBuffer;
+ private int processFieldBufferSize;
+ private ParserState state;
+ private int delimiterPosition = 1;
+ private int newLinePosition = 1;
+ private bool fieldIsBadData;
+ private bool fieldIsQuoted;
+ private bool isProcessingField;
+ private bool isRecordProcessed;
+ private string[]? record;
+
+ /// <inheritdoc/>
+ public long CharCount => charCount;
+
+ /// <inheritdoc/>
+ public long ByteCount => byteCount;
+
+ /// <inheritdoc/>
+ public int Row => row;
+
+ /// <inheritdoc/>
+ public string[]? Record
+ {
+ get
+ {
+ if (isRecordProcessed == true)
+ {
+ return this.record;
+ }
+
+ if (fieldsPosition == 0)
+ {
+ return null;
+ }
+
+ var record = new string[fieldsPosition];
+
+ for (var i = 0; i < record.Length; i++)
+ {
+ record[i] = this[i];
+ }
+
+ this.record = record;
+ isRecordProcessed = true;
+
+ return this.record;
+ }
+ }
+
+ /// <inheritdoc/>
+ public string RawRecord => new string(buffer, rowStartPosition, bufferPosition - rowStartPosition);
+
+ /// <inheritdoc/>
+ public int Count => fieldsPosition;
+
+ /// <inheritdoc/>
+ public int RawRow => rawRow;
+
+ /// <inheritdoc/>
+ public string Delimiter => delimiter;
+
+ /// <inheritdoc/>
+ public CsvContext Context { get; private set; }
+
+ /// <inheritdoc/>
+ public IParserConfiguration Configuration => configuration;
+
+ /// <inheritdoc/>
+ public string this[int index]
+ {
+ get
+ {
+ if (isProcessingField)
+ {
+ var message =
+ $"You can't access {nameof(IParser)}[int] or {nameof(IParser)}.{nameof(IParser.Record)} inside of the {nameof(BadDataFound)} callback. " +
+ $"Use {nameof(BadDataFoundArgs)}.{nameof(BadDataFoundArgs.Field)} and {nameof(BadDataFoundArgs)}.{nameof(BadDataFoundArgs.RawRecord)} instead."
+ ;
+
+ throw new ParserException(Context, message);
+ }
+
+ isProcessingField = true;
+
+ var field = GetField(index);
+
+ isProcessingField = false;
+
+ return field;
+ }
+ }
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="CsvParser"/> class.
+ /// </summary>
+ /// <param name="reader">The reader.</param>
+ /// <param name="culture">The culture.</param>
+ /// <param name="leaveOpen">if set to <c>true</c> [leave open].</param>
+ public CsvParser(TextReader reader, CultureInfo culture, bool leaveOpen = false) : this(reader, new CsvConfiguration(culture), leaveOpen) { }
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="CsvParser"/> class.
+ /// </summary>
+ /// <param name="reader">The reader.</param>
+ /// <param name="configuration">The configuration.</param>
+ /// <param name="leaveOpen">if set to <c>true</c> [leave open].</param>
+ public CsvParser(TextReader reader, IParserConfiguration configuration, bool leaveOpen = false)
+ {
+ this.reader = reader ?? throw new ArgumentNullException(nameof(reader));
+ this.configuration = configuration ?? throw new ArgumentNullException(nameof(configuration));
+
+ configuration.Validate();
+
+ Context = new CsvContext(this);
+
+ allowComments = configuration.AllowComments;
+ badDataFound = configuration.BadDataFound;
+ bufferSize = configuration.BufferSize;
+ cacheFields = configuration.CacheFields;
+ comment = configuration.Comment;
+ countBytes = configuration.CountBytes;
+ delimiter = configuration.Delimiter;
+ delimiterFirstChar = configuration.Delimiter[0];
+ delimiterValues = configuration.DetectDelimiterValues;
+ detectDelimiter = configuration.DetectDelimiter;
+ encoding = configuration.Encoding;
+ escape = configuration.Escape;
+ ignoreBlankLines = configuration.IgnoreBlankLines;
+ isNewLineSet = configuration.IsNewLineSet;
+ this.leaveOpen = leaveOpen;
+ lineBreakInQuotedFieldIsBadData = configuration.LineBreakInQuotedFieldIsBadData;
+ maxFieldSize = configuration.MaxFieldSize;
+ newLine = configuration.NewLine;
+ newLineFirstChar = configuration.NewLine[0];
+ mode = configuration.Mode;
+ processFieldBufferSize = configuration.ProcessFieldBufferSize;
+ quote = configuration.Quote;
+ whiteSpaceChars = configuration.WhiteSpaceChars;
+ trimOptions = configuration.TrimOptions;
+
+ buffer = new char[bufferSize];
+ processFieldBuffer = new char[processFieldBufferSize];
+ fields = new Field[128];
+ processedFields = new string[128];
+ }
+
+ /// <inheritdoc/>
+ public bool Read()
+ {
+ isRecordProcessed = false;
+ rowStartPosition = bufferPosition;
+ fieldStartPosition = rowStartPosition;
+ fieldsPosition = 0;
+ quoteCount = 0;
+ row++;
+ rawRow++;
+ var c = '\0';
+ var cPrev = c;
+
+ while (true)
+ {
+ if (bufferPosition >= charsRead)
+ {
+ if (!FillBuffer())
+ {
+ return ReadEndOfFile();
+ }
+
+ if (row == 1 && detectDelimiter)
+ {
+ DetectDelimiter();
+ }
+ }
+
+ if (ReadLine(ref c, ref cPrev) == ReadLineResult.Complete)
+ {
+ return true;
+ }
+ }
+ }
+
+ /// <inheritdoc/>
+ public async Task<bool> ReadAsync()
+ {
+ isRecordProcessed = false;
+ rowStartPosition = bufferPosition;
+ fieldStartPosition = rowStartPosition;
+ fieldsPosition = 0;
+ quoteCount = 0;
+ row++;
+ rawRow++;
+ var c = '\0';
+ var cPrev = c;
+
+ while (true)
+ {
+ if (bufferPosition >= charsRead)
+ {
+ if (!await FillBufferAsync().ConfigureAwait(false))
+ {
+ return ReadEndOfFile();
+ }
+
+ if (row == 1 && detectDelimiter)
+ {
+ DetectDelimiter();
+ }
+ }
+
+ if (ReadLine(ref c, ref cPrev) == ReadLineResult.Complete)
+ {
+ return true;
+ }
+ }
+ }
+
+ private void DetectDelimiter()
+ {
+ var text = new string(buffer, 0, charsRead);
+ var newDelimiter = configuration.GetDelimiter(new GetDelimiterArgs(text, configuration));
+ if (newDelimiter != null)
+ {
+ delimiter = newDelimiter;
+ delimiterFirstChar = newDelimiter[0];
+ configuration.Validate();
+ }
+ }
+
+ private ReadLineResult ReadLine(ref char c, ref char cPrev)
+ {
+ while (bufferPosition < charsRead)
+ {
+ if (state != ParserState.None)
+ {
+ // Continue the state before doing anything else.
+ ReadLineResult result;
+ switch (state)
+ {
+ case ParserState.Spaces:
+ result = ReadSpaces(ref c);
+ break;
+ case ParserState.BlankLine:
+ result = ReadBlankLine(ref c);
+ break;
+ case ParserState.Delimiter:
+ result = ReadDelimiter(ref c);
+ break;
+ case ParserState.LineEnding:
+ result = ReadLineEnding(ref c);
+ break;
+ case ParserState.NewLine:
+ result = ReadNewLine(ref c);
+ break;
+ default:
+ throw new InvalidOperationException($"Parser state '{state}' is not valid.");
+ }
+
+ var shouldReturn =
+ // Buffer needs to be filled.
+ result == ReadLineResult.Incomplete ||
+ // Done reading row.
+ result == ReadLineResult.Complete && (state == ParserState.LineEnding || state == ParserState.NewLine)
+ ;
+
+ if (result == ReadLineResult.Complete)
+ {
+ state = ParserState.None;
+ }
+
+ if (shouldReturn)
+ {
+ return result;
+ }
+ }
+
+ cPrev = c;
+ c = buffer[bufferPosition];
+ bufferPosition++;
+ charCount++;
+
+ if (countBytes)
+ {
+ byteCount += encoding.GetByteCount(new char[] { c });
+ }
+
+ if (maxFieldSize > 0 && bufferPosition - fieldStartPosition - 1 > maxFieldSize)
+ {
+ throw new MaxFieldSizeException(Context);
+ }
+
+ var isFirstCharOfRow = rowStartPosition == bufferPosition - 1;
+ if (isFirstCharOfRow && (allowComments && c == comment || ignoreBlankLines && ((c == '\r' || c == '\n') && !isNewLineSet || c == newLineFirstChar && isNewLineSet)))
+ {
+ state = ParserState.BlankLine;
+ var result = ReadBlankLine(ref c);
+ if (result == ReadLineResult.Complete)
+ {
+ state = ParserState.None;
+
+ continue;
+ }
+ else
+ {
+ return ReadLineResult.Incomplete;
+ }
+ }
+
+ if (mode == CsvMode.RFC4180)
+ {
+ var isFirstCharOfField = fieldStartPosition == bufferPosition - 1;
+ if (isFirstCharOfField)
+ {
+ if ((trimOptions & TrimOptions.Trim) == TrimOptions.Trim && ArrayHelper.Contains(whiteSpaceChars, c))
+ {
+ // Skip through whitespace. This is so we can process the field later.
+ var result = ReadSpaces(ref c);
+ if (result == ReadLineResult.Incomplete)
+ {
+ fieldStartPosition = bufferPosition;
+ return result;
+ }
+ }
+
+ // Fields are only quoted if the first character is a quote.
+ // If not, read until a delimiter or newline is found.
+ fieldIsQuoted = c == quote;
+ }
+
+ if (fieldIsQuoted)
+ {
+ if (c == quote || c == escape)
+ {
+ quoteCount++;
+
+ if (!inQuotes && !isFirstCharOfField && cPrev != escape)
+ {
+ fieldIsBadData = true;
+ }
+ else if (!fieldIsBadData)
+ {
+ // Don't process field quotes after bad data has been detected.
+ inQuotes = !inQuotes;
+ }
+ }
+
+ if (inQuotes)
+ {
+ if (c == '\r' || c == '\n' && cPrev != '\r')
+ {
+ rawRow++;
+ }
+
+ // We don't care about anything else if we're in quotes.
+ continue;
+ }
+ }
+ else
+ {
+ if (c == quote || c == escape)
+ {
+ // If the field isn't quoted but contains a
+ // quote or escape, it's has bad data.
+ fieldIsBadData = true;
+ }
+ }
+ }
+ else if (mode == CsvMode.Escape)
+ {
+ if (inEscape)
+ {
+ inEscape = false;
+
+ continue;
+ }
+
+ if (c == escape)
+ {
+ inEscape = true;
+
+ continue;
+ }
+ }
+
+ if (c == delimiterFirstChar)
+ {
+ state = ParserState.Delimiter;
+ var result = ReadDelimiter(ref c);
+ if (result == ReadLineResult.Incomplete)
+ {
+ return result;
+ }
+
+ state = ParserState.None;
+
+ continue;
+ }
+
+ if (!isNewLineSet && (c == '\r' || c == '\n'))
+ {
+ state = ParserState.LineEnding;
+ var result = ReadLineEnding(ref c);
+ if (result == ReadLineResult.Complete)
+ {
+ state = ParserState.None;
+ }
+
+ return result;
+ }
+
+ if (isNewLineSet && c == newLineFirstChar)
+ {
+ state = ParserState.NewLine;
+ var result = ReadNewLine(ref c);
+ if (result == ReadLineResult.Complete)
+ {
+ state = ParserState.None;
+ }
+
+ return result;
+ }
+ }
+
+ return ReadLineResult.Incomplete;
+ }
+
+ private ReadLineResult ReadSpaces(ref char c)
+ {
+ while (ArrayHelper.Contains(whiteSpaceChars, c))
+ {
+ if (bufferPosition >= charsRead)
+ {
+ return ReadLineResult.Incomplete;
+ }
+
+ c = buffer[bufferPosition];
+ bufferPosition++;
+ charCount++;
+ if (countBytes)
+ {
+ byteCount += encoding.GetByteCount(new char[] { c });
+ }
+ }
+
+ return ReadLineResult.Complete;
+ }
+
+ private ReadLineResult ReadBlankLine(ref char c)
+ {
+ while (bufferPosition < charsRead)
+ {
+ if (c == '\r' || c == '\n')
+ {
+ var result = ReadLineEnding(ref c);
+ if (result == ReadLineResult.Complete)
+ {
+ rowStartPosition = bufferPosition;
+ fieldStartPosition = rowStartPosition;
+ row++;
+ rawRow++;
+ }
+
+ return result;
+ }
+
+ c = buffer[bufferPosition];
+ bufferPosition++;
+ charCount++;
+ if (countBytes)
+ {
+ byteCount += encoding.GetByteCount(new char[] { c });
+ }
+ }
+
+ return ReadLineResult.Incomplete;
+ }
+
+ private ReadLineResult ReadDelimiter(ref char c)
+ {
+ for (var i = delimiterPosition; i < delimiter.Length; i++)
+ {
+ if (bufferPosition >= charsRead)
+ {
+ return ReadLineResult.Incomplete;
+ }
+
+ delimiterPosition++;
+
+ c = buffer[bufferPosition];
+ if (c != delimiter[i])
+ {
+ c = buffer[bufferPosition - 1];
+ delimiterPosition = 1;
+
+ return ReadLineResult.Complete;
+ }
+
+ bufferPosition++;
+ charCount++;
+ if (countBytes)
+ {
+ byteCount += encoding.GetByteCount(new[] { c });
+ }
+
+ if (bufferPosition >= charsRead)
+ {
+ return ReadLineResult.Incomplete;
+ }
+ }
+
+ AddField(fieldStartPosition, bufferPosition - fieldStartPosition - delimiter.Length);
+
+ fieldStartPosition = bufferPosition;
+ delimiterPosition = 1;
+ fieldIsBadData = false;
+
+ return ReadLineResult.Complete;
+ }
+
+ private ReadLineResult ReadLineEnding(ref char c)
+ {
+ var lessChars = 1;
+
+ if (c == '\r')
+ {
+ if (bufferPosition >= charsRead)
+ {
+ return ReadLineResult.Incomplete;
+ }
+
+ c = buffer[bufferPosition];
+
+ if (c == '\n')
+ {
+ lessChars++;
+ bufferPosition++;
+ charCount++;
+ if (countBytes)
+ {
+ byteCount += encoding.GetByteCount(new char[] { c });
+ }
+ }
+ }
+
+ if (state == ParserState.LineEnding)
+ {
+ AddField(fieldStartPosition, bufferPosition - fieldStartPosition - lessChars);
+ }
+
+ fieldIsBadData = false;
+
+ return ReadLineResult.Complete;
+ }
+
+ private ReadLineResult ReadNewLine(ref char c)
+ {
+ for (var i = newLinePosition; i < newLine.Length; i++)
+ {
+ if (bufferPosition >= charsRead)
+ {
+ return ReadLineResult.Incomplete;
+ }
+
+ newLinePosition++;
+
+ c = buffer[bufferPosition];
+ if (c != newLine[i])
+ {
+ c = buffer[bufferPosition - 1];
+ newLinePosition = 1;
+
+ return ReadLineResult.Complete;
+ }
+
+ bufferPosition++;
+ charCount++;
+ if (countBytes)
+ {
+ byteCount += encoding.GetByteCount(new[] { c });
+ }
+
+ if (bufferPosition >= charsRead)
+ {
+ return ReadLineResult.Incomplete;
+ }
+ }
+
+ AddField(fieldStartPosition, bufferPosition - fieldStartPosition - newLine.Length);
+
+ fieldStartPosition = bufferPosition;
+ newLinePosition = 1;
+ fieldIsBadData = false;
+
+ return ReadLineResult.Complete;
+ }
+
+ private bool ReadEndOfFile()
+ {
+ var state = this.state;
+ this.state = ParserState.None;
+
+ if (state == ParserState.BlankLine)
+ {
+ return false;
+ }
+
+ if (state == ParserState.Delimiter)
+ {
+ AddField(fieldStartPosition, bufferPosition - fieldStartPosition - delimiter.Length);
+
+ fieldStartPosition = bufferPosition;
+
+ AddField(fieldStartPosition, bufferPosition - fieldStartPosition);
+
+ return true;
+ }
+
+ if (state == ParserState.LineEnding)
+ {
+ AddField(fieldStartPosition, bufferPosition - fieldStartPosition - 1);
+
+ return true;
+ }
+
+ if (state == ParserState.NewLine)
+ {
+ AddField(fieldStartPosition, bufferPosition - fieldStartPosition - newLine.Length);
+
+ return true;
+ }
+
+ if (rowStartPosition < bufferPosition)
+ {
+ AddField(fieldStartPosition, bufferPosition - fieldStartPosition);
+ }
+
+ return fieldsPosition > 0;
+ }
+
+ private void AddField(int start, int length)
+ {
+ if (fieldsPosition >= fields.Length)
+ {
+ var newSize = fields.Length * 2;
+ Array.Resize(ref fields, newSize);
+ Array.Resize(ref processedFields, newSize);
+ }
+
+ ref var field = ref fields[fieldsPosition];
+ field.Start = start - rowStartPosition;
+ field.Length = length;
+ field.QuoteCount = quoteCount;
+ field.IsBad = fieldIsBadData;
+ field.IsProcessed = false;
+
+ fieldsPosition++;
+ quoteCount = 0;
+ }
+
+ private bool FillBuffer()
+ {
+ // Don't forget the async method below.
+
+ if (rowStartPosition == 0 && charCount > 0 && charsRead == bufferSize)
+ {
+ // The record is longer than the memory buffer. Increase the buffer.
+ bufferSize *= 2;
+ var tempBuffer = new char[bufferSize];
+ buffer.CopyTo(tempBuffer, 0);
+ buffer = tempBuffer;
+ }
+
+ var charsLeft = Math.Max(charsRead - rowStartPosition, 0);
+
+ Array.Copy(buffer, rowStartPosition, buffer, 0, charsLeft);
+
+ fieldStartPosition -= rowStartPosition;
+ rowStartPosition = 0;
+ bufferPosition = charsLeft;
+
+ charsRead = reader.Read(buffer, charsLeft, buffer.Length - charsLeft);
+ if (charsRead == 0)
+ {
+ return false;
+ }
+
+ charsRead += charsLeft;
+
+ return true;
+ }
+
+ private async Task<bool> FillBufferAsync()
+ {
+ if (rowStartPosition == 0 && charCount > 0 && charsRead == bufferSize)
+ {
+ // The record is longer than the memory buffer. Increase the buffer.
+ bufferSize *= 2;
+ var tempBuffer = new char[bufferSize];
+ buffer.CopyTo(tempBuffer, 0);
+ buffer = tempBuffer;
+ }
+
+ var charsLeft = Math.Max(charsRead - rowStartPosition, 0);
+
+ Array.Copy(buffer, rowStartPosition, buffer, 0, charsLeft);
+
+ fieldStartPosition -= rowStartPosition;
+ rowStartPosition = 0;
+ bufferPosition = charsLeft;
+
+ charsRead = await reader.ReadAsync(buffer, charsLeft, buffer.Length - charsLeft).ConfigureAwait(false);
+ if (charsRead == 0)
+ {
+ return false;
+ }
+
+ charsRead += charsLeft;
+
+ return true;
+ }
+
+ private string GetField(int index)
+ {
+ if (index > fieldsPosition)
+ {
+ throw new IndexOutOfRangeException();
+ }
+
+ ref var field = ref fields[index];
+
+ if (field.Length == 0)
+ {
+ return string.Empty;
+ }
+
+ if (field.IsProcessed)
+ {
+ return processedFields[index];
+ }
+
+ var start = field.Start + rowStartPosition;
+ var length = field.Length;
+ var quoteCount = field.QuoteCount;
+
+ ProcessedField processedField;
+ switch (mode)
+ {
+ case CsvMode.RFC4180:
+ processedField = field.IsBad
+ ? ProcessRFC4180BadField(start, length)
+ : ProcessRFC4180Field(start, length, quoteCount);
+ break;
+ case CsvMode.Escape:
+ processedField = ProcessEscapeField(start, length);
+ break;
+ case CsvMode.NoEscape:
+ processedField = ProcessNoEscapeField(start, length);
+ break;
+ default:
+ throw new InvalidOperationException($"ParseMode '{mode}' is not handled.");
+ }
+
+ var value = cacheFields
+ ? fieldCache.GetField(processedField.Buffer, processedField.Start, processedField.Length)
+ : new string(processedField.Buffer, processedField.Start, processedField.Length);
+
+ processedFields[index] = value;
+ field.IsProcessed = true;
+
+ return value;
+ }
+
+ /// <summary>
+ /// Processes a field that complies with RFC4180.
+ /// </summary>
+ /// <param name="start">The start index of the field.</param>
+ /// <param name="length">The length of the field.</param>
+ /// <param name="quoteCount">The number of counted quotes.</param>
+ /// <returns>The processed field.</returns>
+ protected ProcessedField ProcessRFC4180Field(int start, int length, int quoteCount)
+ {
+ var newStart = start;
+ var newLength = length;
+
+ if ((trimOptions & TrimOptions.Trim) == TrimOptions.Trim)
+ {
+ ArrayHelper.Trim(buffer, ref newStart, ref newLength, whiteSpaceChars);
+ }
+
+ if (quoteCount == 0)
+ {
+ // Not quoted.
+ // No processing needed.
+
+ return new ProcessedField(newStart, newLength, buffer);
+ }
+
+ if (buffer[newStart] != quote || buffer[newStart + newLength - 1] != quote || newLength == 1 && buffer[newStart] == quote)
+ {
+ // If the field doesn't have quotes on the ends, or the field is a single quote char, it's bad data.
+ return ProcessRFC4180BadField(start, length);
+ }
+
+ if (lineBreakInQuotedFieldIsBadData)
+ {
+ for (var i = newStart; i < newStart + newLength; i++)
+ {
+ if (buffer[i] == '\r' || buffer[i] == '\n')
+ {
+ return ProcessRFC4180BadField(start, length);
+ }
+ }
+ }
+
+ // Remove the quotes from the ends.
+ newStart += 1;
+ newLength -= 2;
+
+ if ((trimOptions & TrimOptions.InsideQuotes) == TrimOptions.InsideQuotes)
+ {
+ ArrayHelper.Trim(buffer, ref newStart, ref newLength, whiteSpaceChars);
+ }
+
+ if (quoteCount == 2)
+ {
+ // The only quotes are the ends of the field.
+ // No more processing is needed.
+ return new ProcessedField(newStart, newLength, buffer);
+ }
+
+ if (newLength > processFieldBuffer.Length)
+ {
+ // Make sure the field processing buffer is large engough.
+ while (newLength > processFieldBufferSize)
+ {
+ processFieldBufferSize *= 2;
+ }
+
+ processFieldBuffer = new char[processFieldBufferSize];
+ }
+
+ // Remove escapes.
+ var inEscape = false;
+ var position = 0;
+ for (var i = newStart; i < newStart + newLength; i++)
+ {
+ var c = buffer[i];
+
+ if (inEscape)
+ {
+ inEscape = false;
+ }
+ else if (c == escape)
+ {
+ inEscape = true;
+
+ continue;
+ }
+
+ processFieldBuffer[position] = c;
+ position++;
+ }
+
+ return new ProcessedField(0, position, processFieldBuffer);
+ }
+
+ /// <summary>
+ /// Processes a field that does not comply with RFC4180.
+ /// </summary>
+ /// <param name="start">The start index of the field.</param>
+ /// <param name="length">The length of the field.</param>
+ /// <returns>The processed field.</returns>
+ protected ProcessedField ProcessRFC4180BadField(int start, int length)
+ {
+ // If field is already known to be bad, different rules can be applied.
+
+ var args = new BadDataFoundArgs(new string(buffer, start, length), RawRecord, Context);
+ badDataFound?.Invoke(args);
+
+ var newStart = start;
+ var newLength = length;
+
+ if ((trimOptions & TrimOptions.Trim) == TrimOptions.Trim)
+ {
+ ArrayHelper.Trim(buffer, ref newStart, ref newLength, whiteSpaceChars);
+ }
+
+ if (buffer[newStart] != quote)
+ {
+ // If the field doesn't start with a quote, don't process it.
+ return new ProcessedField(newStart, newLength, buffer);
+ }
+
+ if (newLength > processFieldBuffer.Length)
+ {
+ // Make sure the field processing buffer is large engough.
+ while (newLength > processFieldBufferSize)
+ {
+ processFieldBufferSize *= 2;
+ }
+
+ processFieldBuffer = new char[processFieldBufferSize];
+ }
+
+ // Remove escapes until the last quote is found.
+ var inEscape = false;
+ var position = 0;
+ var c = '\0';
+ var doneProcessing = false;
+ for (var i = newStart + 1; i < newStart + newLength; i++)
+ {
+ var cPrev = c;
+ c = buffer[i];
+
+ // a,"b",c
+ // a,"b "" c",d
+ // a,"b "c d",e
+
+ if (inEscape)
+ {
+ inEscape = false;
+
+ if (c == quote)
+ {
+ // Ignore the quote after an escape.
+ continue;
+ }
+ else if (cPrev == quote)
+ {
+ // The escape and quote are the same character.
+ // This is the end of the field.
+ // Don't process escapes for the rest of the field.
+ doneProcessing = true;
+ }
+ }
+
+ if (c == escape && !doneProcessing)
+ {
+ inEscape = true;
+
+ continue;
+ }
+
+ processFieldBuffer[position] = c;
+ position++;
+ }
+
+ return new ProcessedField(0, position, processFieldBuffer);
+ }
+
+ /// <summary>
+ /// Processes an escaped field.
+ /// </summary>
+ /// <param name="start">The start index of the field.</param>
+ /// <param name="length">The length of the field.</param>
+ /// <returns>The processed field.</returns>
+ protected ProcessedField ProcessEscapeField(int start, int length)
+ {
+ var newStart = start;
+ var newLength = length;
+
+ if ((trimOptions & TrimOptions.Trim) == TrimOptions.Trim)
+ {
+ ArrayHelper.Trim(buffer, ref newStart, ref newLength, whiteSpaceChars);
+ }
+
+ if (newLength > processFieldBuffer.Length)
+ {
+ // Make sure the field processing buffer is large engough.
+ while (newLength > processFieldBufferSize)
+ {
+ processFieldBufferSize *= 2;
+ }
+
+ processFieldBuffer = new char[processFieldBufferSize];
+ }
+
+ // Remove escapes.
+ var inEscape = false;
+ var position = 0;
+ for (var i = newStart; i < newStart + newLength; i++)
+ {
+ var c = buffer[i];
+
+ if (inEscape)
+ {
+ inEscape = false;
+ }
+ else if (c == escape)
+ {
+ inEscape = true;
+ continue;
+ }
+
+ processFieldBuffer[position] = c;
+ position++;
+ }
+
+ return new ProcessedField(0, position, processFieldBuffer);
+ }
+
+ /// <inheritdoc/>
+ /// <summary>
+ /// Processes an non-escaped field.
+ /// </summary>
+ /// <param name="start">The start index of the field.</param>
+ /// <param name="length">The length of the field.</param>
+ /// <returns>The processed field.</returns>
+ protected ProcessedField ProcessNoEscapeField(int start, int length)
+ {
+ var newStart = start;
+ var newLength = length;
+
+ if ((trimOptions & TrimOptions.Trim) == TrimOptions.Trim)
+ {
+ ArrayHelper.Trim(buffer, ref newStart, ref newLength, whiteSpaceChars);
+ }
+
+ return new ProcessedField(newStart, newLength, buffer);
+ }
+
+ /// <inheritdoc/>
+ public void Dispose()
+ {
+ // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
+ Dispose(disposing: true);
+ GC.SuppressFinalize(this);
+ }
+
+ /// <summary>
+ /// Disposes the object.
+ /// </summary>
+ /// <param name="disposing">Indicates if the object is being disposed.</param>
+ protected virtual void Dispose(bool disposing)
+ {
+ if (disposed)
+ {
+ return;
+ }
+
+ if (disposing)
+ {
+ // Dispose managed state (managed objects)
+
+ if (!leaveOpen)
+ {
+ reader?.Dispose();
+ }
+ }
+
+ // Free unmanaged resources (unmanaged objects) and override finalizer
+ // Set large fields to null
+
+ disposed = true;
+ }
+
+ /// <summary>
+ /// Processes a raw field based on configuration.
+ /// This will remove quotes, remove escapes, and trim if configured to.
+ /// </summary>
+ [DebuggerDisplay("Start = {Start}, Length = {Length}, Buffer.Length = {Buffer.Length}")]
+ protected readonly struct ProcessedField
+ {
+ /// <summary>
+ /// The start of the field in the buffer.
+ /// </summary>
+ public readonly int Start;
+
+ /// <summary>
+ /// The length of the field in the buffer.
+ /// </summary>
+ public readonly int Length;
+
+ /// <summary>
+ /// The buffer that contains the field.
+ /// </summary>
+ public readonly char[] Buffer;
+
+ /// <summary>
+ /// Creates a new instance of ProcessedField.
+ /// </summary>
+ /// <param name="start">The start of the field in the buffer.</param>
+ /// <param name="length">The length of the field in the buffer.</param>
+ /// <param name="buffer">The buffer that contains the field.</param>
+ public ProcessedField(int start, int length, char[] buffer)
+ {
+ Start = start;
+ Length = length;
+ Buffer = buffer;
+ }
+ }
+
+ private enum ReadLineResult
+ {
+ None = 0,
+ Complete,
+ Incomplete,
+ }
+
+ private enum ParserState
+ {
+ None = 0,
+ Spaces,
+ BlankLine,
+ Delimiter,
+ LineEnding,
+ NewLine,
+ }
+
+ [DebuggerDisplay("Start = {Start}, Length = {Length}, QuoteCount = {QuoteCount}, IsBad = {IsBad}")]
+ private struct Field
+ {
+ /// <summary>
+ /// Starting position of the field.
+ /// This is an offset from <see cref="rowStartPosition"/>.
+ /// </summary>
+ public int Start;
+
+ public int Length;
+
+ public int QuoteCount;
+
+ public bool IsBad;
+
+ public bool IsProcessed;
+ }
+ }
+}