// Copyright 2009-2022 Josh Close
// This file is a part of CsvHelper and is dual licensed under MS-PL and Apache 2.0.
// See LICENSE.txt for details or visit http://www.opensource.org/licenses/ms-pl.html for MS-PL and http://opensource.org/licenses/Apache-2.0 for Apache 2.0.
// https://github.com/JoshClose/CsvHelper
using CsvHelper.Delegates;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Reflection;
using System.Text;
using System.Text.RegularExpressions;
namespace CsvHelper.Configuration
{
/// Holds the default callback methods for delegate members of CsvHelper.Configuration.Configuration.
public static class ConfigurationFunctions
{
private static readonly char[] lineEndingChars = new char[] { '\r', '\n' };
///
/// Throws a if is not empty.
///
public static void HeaderValidated(HeaderValidatedArgs args)
{
if (args.InvalidHeaders.Count() == 0)
{
return;
}
var errorMessage = new StringBuilder();
foreach (var invalidHeader in args.InvalidHeaders)
{
errorMessage.AppendLine($"Header with name '{string.Join("' or '", invalidHeader.Names)}'[{invalidHeader.Index}] was not found.");
}
if (args.Context.Reader.HeaderRecord != null)
{
foreach (var header in args.Context.Reader.HeaderRecord)
{
errorMessage.AppendLine($"Headers: '{string.Join("', '", args.Context.Reader.HeaderRecord)}'");
}
}
var messagePostfix =
$"If you are expecting some headers to be missing and want to ignore this validation, " +
$"set the configuration {nameof(HeaderValidated)} to null. You can also change the " +
$"functionality to do something else, like logging the issue.";
errorMessage.AppendLine(messagePostfix);
throw new HeaderValidationException(args.Context, args.InvalidHeaders, errorMessage.ToString());
}
///
/// Throws a MissingFieldException.
///
public static void MissingFieldFound(MissingFieldFoundArgs args)
{
var messagePostfix = $"You can ignore missing fields by setting {nameof(MissingFieldFound)} to null.";
// Get by index.
if (args.HeaderNames == null || args.HeaderNames.Length == 0)
{
throw new MissingFieldException(args.Context, $"Field at index '{args.Index}' does not exist. {messagePostfix}");
}
// Get by name.
var indexText = args.Index > 0 ? $" at field index '{args.Index}'" : string.Empty;
if (args.HeaderNames.Length == 1)
{
throw new MissingFieldException(args.Context, $"Field with name '{args.HeaderNames[0]}'{indexText} does not exist. {messagePostfix}");
}
throw new MissingFieldException(args.Context, $"Field containing names '{string.Join("' or '", args.HeaderNames)}'{indexText} does not exist. {messagePostfix}");
}
///
/// Throws a .
///
public static void BadDataFound(BadDataFoundArgs args)
{
throw new BadDataException(args.Field, args.RawRecord, args.Context, $"You can ignore bad data by setting {nameof(BadDataFound)} to null.");
}
///
/// Throws the given .
///
public static bool ReadingExceptionOccurred(ReadingExceptionOccurredArgs args)
{
return true;
}
///
/// Returns true if the field contains a ,
/// starts with a space, ends with a space, contains \r or \n, or contains
/// the .
///
/// The args.
/// true if the field should be quoted, otherwise false.
public static bool ShouldQuote(ShouldQuoteArgs args)
{
var config = args.Row.Configuration;
var shouldQuote = !string.IsNullOrEmpty(args.Field) &&
(
args.Field.Contains(config.Quote) // Contains quote
|| args.Field[0] == ' ' // Starts with a space
|| args.Field[args.Field.Length - 1] == ' ' // Ends with a space
|| (config.Delimiter.Length > 0 && args.Field.Contains(config.Delimiter)) // Contains delimiter
|| !config.IsNewLineSet && args.Field.IndexOfAny(lineEndingChars) > -1 // Contains line ending characters
|| config.IsNewLineSet && args.Field.Contains(config.NewLine) // Contains newline
);
return shouldQuote;
}
///
/// Returns the as given.
///
public static string PrepareHeaderForMatch(PrepareHeaderForMatchArgs args)
{
return args.Header;
}
///
/// Returns true if :
/// 1. does not have a parameterless constructor
/// 2. has a constructor
/// 3. is not a value type
/// 4. is not a primitive
/// 5. is not an enum
/// 6. is not an interface
/// 7. TypeCode is an Object.
///
public static bool ShouldUseConstructorParameters(ShouldUseConstructorParametersArgs args)
{
return !args.ParameterType.HasParameterlessConstructor()
&& args.ParameterType.HasConstructor()
&& !args.ParameterType.IsValueType
&& !args.ParameterType.IsPrimitive
&& !args.ParameterType.IsEnum
&& !args.ParameterType.IsInterface
&& Type.GetTypeCode(args.ParameterType) == TypeCode.Object;
}
///
/// Returns the type's constructor with the most parameters.
/// If two constructors have the same number of parameters, then
/// there is no guarantee which one will be returned. If you have
/// that situation, you should probably implement this function yourself.
///
public static ConstructorInfo GetConstructor(GetConstructorArgs args)
{
return args.ClassType.GetConstructorWithMostParameters();
}
///
/// Returns the header name ran through .
/// If no header exists, property names will be Field1, Field2, Field3, etc.
///
/// The args.
public static string GetDynamicPropertyName(GetDynamicPropertyNameArgs args)
{
if (args.Context.Reader.HeaderRecord == null)
{
return $"Field{args.FieldIndex + 1}";
}
var header = args.Context.Reader.HeaderRecord[args.FieldIndex];
var prepareHeaderForMatchArgs = new PrepareHeaderForMatchArgs(header, args.FieldIndex);
header = args.Context.Reader.Configuration.PrepareHeaderForMatch(prepareHeaderForMatchArgs);
return header;
}
///
/// Detects the delimiter based on the given text.
/// Return the detected delimiter or null if one wasn't found.
///
/// The args.
public static string? GetDelimiter(GetDelimiterArgs args)
{
var text = args.Text;
var config = args.Configuration;
if (config.Mode == CsvMode.RFC4180)
{
// Remove text in between pairs of quotes.
text = Regex.Replace(text, $"{config.Quote}.*?{config.Quote}", string.Empty, RegexOptions.Singleline);
}
else if (config.Mode == CsvMode.Escape)
{
// Remove escaped characters.
text = Regex.Replace(text, $"({config.Escape}.)", string.Empty, RegexOptions.Singleline);
}
var newLine = config.NewLine;
if ((new[] { "\r\n", "\r", "\n" }).Contains(newLine))
{
newLine = "\r\n|\r|\n";
}
var lineDelimiterCounts = new List>();
while (text.Length > 0)
{
// Since all escaped text has been removed, we can reliably read line by line.
var match = Regex.Match(text, newLine);
var line = match.Success ? text.Substring(0, match.Index + match.Length) : text;
var delimiterCounts = new Dictionary();
foreach (var delimiter in config.DetectDelimiterValues)
{
// Escape regex special chars to use as regex pattern.
var pattern = Regex.Replace(delimiter, @"([.$^{\[(|)*+?\\])", "\\$1");
delimiterCounts[delimiter] = Regex.Matches(line, pattern).Count;
}
lineDelimiterCounts.Add(delimiterCounts);
text = match.Success ? text.Substring(match.Index + match.Length) : string.Empty;
}
if (lineDelimiterCounts.Count > 1)
{
// The last line isn't complete and can't be used to reliably detect a delimiter.
lineDelimiterCounts.Remove(lineDelimiterCounts.Last());
}
// Rank only the delimiters that appear on every line.
var delimiters =
(
from counts in lineDelimiterCounts
from count in counts
group count by count.Key into g
where g.All(x => x.Value > 0)
let sum = g.Sum(x => x.Value)
orderby sum descending
select new
{
Delimiter = g.Key,
Count = sum
}
).ToList();
string? newDelimiter = null;
if (delimiters.Any(x => x.Delimiter == config.CultureInfo.TextInfo.ListSeparator) && lineDelimiterCounts.Count > 1)
{
// The culture's separator is on every line. Assume this is the delimiter.
newDelimiter = config.CultureInfo.TextInfo.ListSeparator;
}
else
{
// Choose the highest ranked delimiter.
newDelimiter = delimiters.Select(x => x.Delimiter).FirstOrDefault();
}
if (newDelimiter != null)
{
config.Validate();
}
return newDelimiter ?? config.Delimiter;
}
}
}