/* * CSV Parser for C#. * * These codes are licensed under CC0. * https://github.com/yutokun/CSV-Parser */ using System; using System.Collections.Generic; using System.ComponentModel; using System.IO; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; namespace yutokun { public static class CSVParser { /// /// Load CSV data from specified path. /// /// CSV file path. /// Delimiter. /// Type of text encoding. (default UTF-8) /// Nested list that CSV parsed. public static List> LoadFromPath(string path, Delimiter delimiter = Delimiter.Auto, Encoding encoding = null) { encoding = encoding ?? Encoding.UTF8; if (delimiter == Delimiter.Auto) { delimiter = EstimateDelimiter(path); } var data = File.ReadAllText(path, encoding); return Parse(data, delimiter); } /// /// Load CSV data asynchronously from specified path. /// /// CSV file path. /// Delimiter. /// Type of text encoding. (default UTF-8) /// Nested list that CSV parsed. public static async Task>> LoadFromPathAsync(string path, Delimiter delimiter = Delimiter.Auto, Encoding encoding = null) { encoding = encoding ?? Encoding.UTF8; if (delimiter == Delimiter.Auto) { delimiter = EstimateDelimiter(path); } using (var reader = new StreamReader(path, encoding)) { var data = await reader.ReadToEndAsync(); return Parse(data, delimiter); } } static Delimiter EstimateDelimiter(string path) { var extension = Path.GetExtension(path); if (extension.Equals(".csv", StringComparison.OrdinalIgnoreCase)) { return Delimiter.Comma; } if (extension.Equals(".tsv", StringComparison.OrdinalIgnoreCase)) { return Delimiter.Tab; } throw new Exception($"Delimiter estimation failed. Unknown Extension: {extension}"); } /// /// Load CSV data from string. /// /// CSV string /// Delimiter. /// Nested list that CSV parsed. public static List> LoadFromString(string data, Delimiter delimiter = Delimiter.Comma) { if (delimiter == Delimiter.Auto) throw new InvalidEnumArgumentException("Delimiter estimation from string is not supported."); return Parse(data, delimiter); } static List> Parse(string data, Delimiter delimiter) { ConvertToCrlf(ref data); var sheet = new List>(); var row = new List(); var cell = new StringBuilder(); var insideQuoteCell = false; var start = 0; var delimiterSpan = delimiter.ToChar().ToString().AsSpan(); var crlfSpan = "\r\n".AsSpan(); var oneDoubleQuotSpan = "\"".AsSpan(); var twoDoubleQuotSpan = "\"\"".AsSpan(); while (start < data.Length) { var length = start <= data.Length - 2 ? 2 : 1; var span = data.AsSpan(start, length); if (span.StartsWith(delimiterSpan)) { if (insideQuoteCell) { cell.Append(delimiter.ToChar()); } else { AddCell(row, cell); } start += 1; } else if (span.StartsWith(crlfSpan)) { if (insideQuoteCell) { cell.Append("\r\n"); } else { AddCell(row, cell); AddRow(sheet, ref row); } start += 2; } else if (span.StartsWith(twoDoubleQuotSpan)) { cell.Append("\""); start += 2; } else if (span.StartsWith(oneDoubleQuotSpan)) { insideQuoteCell = !insideQuoteCell; start += 1; } else { cell.Append(span[0]); start += 1; } } if (row.Count > 0 || cell.Length > 0) { AddCell(row, cell); AddRow(sheet, ref row); } return sheet; } static void AddCell(List row, StringBuilder cell) { row.Add(cell.ToString()); cell.Length = 0; // Old C#. } static void AddRow(List> sheet, ref List row) { sheet.Add(row); row = new List(); } static void ConvertToCrlf(ref string data) { data = Regex.Replace(data, @"\r\n|\r|\n", "\r\n"); } } }