/*
* CSV Parser for C#.
*
* These codes are licensed under CC0.
* https://github.com/yutokun/CSV-Parser
*/
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace yutokun
{
public static class CSVParser
{
///
/// Load CSV data from specified path.
///
/// CSV file path.
/// Delimiter.
/// Type of text encoding. (default UTF-8)
/// Nested list that CSV parsed.
public static List> LoadFromPath(string path, Delimiter delimiter = Delimiter.Auto, Encoding encoding = null)
{
encoding = encoding ?? Encoding.UTF8;
if (delimiter == Delimiter.Auto)
{
delimiter = EstimateDelimiter(path);
}
var data = File.ReadAllText(path, encoding);
return Parse(data, delimiter);
}
///
/// Load CSV data asynchronously from specified path.
///
/// CSV file path.
/// Delimiter.
/// Type of text encoding. (default UTF-8)
/// Nested list that CSV parsed.
public static async Task>> LoadFromPathAsync(string path, Delimiter delimiter = Delimiter.Auto, Encoding encoding = null)
{
encoding = encoding ?? Encoding.UTF8;
if (delimiter == Delimiter.Auto)
{
delimiter = EstimateDelimiter(path);
}
using (var reader = new StreamReader(path, encoding))
{
var data = await reader.ReadToEndAsync();
return Parse(data, delimiter);
}
}
static Delimiter EstimateDelimiter(string path)
{
var extension = Path.GetExtension(path);
if (extension.Equals(".csv", StringComparison.OrdinalIgnoreCase))
{
return Delimiter.Comma;
}
if (extension.Equals(".tsv", StringComparison.OrdinalIgnoreCase))
{
return Delimiter.Tab;
}
throw new Exception($"Delimiter estimation failed. Unknown Extension: {extension}");
}
///
/// Load CSV data from string.
///
/// CSV string
/// Delimiter.
/// Nested list that CSV parsed.
public static List> LoadFromString(string data, Delimiter delimiter = Delimiter.Comma)
{
if (delimiter == Delimiter.Auto) throw new InvalidEnumArgumentException("Delimiter estimation from string is not supported.");
return Parse(data, delimiter);
}
static List> Parse(string data, Delimiter delimiter)
{
ConvertToCrlf(ref data);
var sheet = new List>();
var row = new List();
var cell = new StringBuilder();
var insideQuoteCell = false;
var start = 0;
var delimiterSpan = delimiter.ToChar().ToString().AsSpan();
var crlfSpan = "\r\n".AsSpan();
var oneDoubleQuotSpan = "\"".AsSpan();
var twoDoubleQuotSpan = "\"\"".AsSpan();
while (start < data.Length)
{
var length = start <= data.Length - 2 ? 2 : 1;
var span = data.AsSpan(start, length);
if (span.StartsWith(delimiterSpan))
{
if (insideQuoteCell)
{
cell.Append(delimiter.ToChar());
}
else
{
AddCell(row, cell);
}
start += 1;
}
else if (span.StartsWith(crlfSpan))
{
if (insideQuoteCell)
{
cell.Append("\r\n");
}
else
{
AddCell(row, cell);
AddRow(sheet, ref row);
}
start += 2;
}
else if (span.StartsWith(twoDoubleQuotSpan))
{
cell.Append("\"");
start += 2;
}
else if (span.StartsWith(oneDoubleQuotSpan))
{
insideQuoteCell = !insideQuoteCell;
start += 1;
}
else
{
cell.Append(span[0]);
start += 1;
}
}
if (row.Count > 0 || cell.Length > 0)
{
AddCell(row, cell);
AddRow(sheet, ref row);
}
return sheet;
}
static void AddCell(List row, StringBuilder cell)
{
row.Add(cell.ToString());
cell.Length = 0; // Old C#.
}
static void AddRow(List> sheet, ref List row)
{
sheet.Add(row);
row = new List();
}
static void ConvertToCrlf(ref string data)
{
data = Regex.Replace(data, @"\r\n|\r|\n", "\r\n");
}
}
}