diff options
author | chai <chaifix@163.com> | 2021-10-30 11:32:16 +0800 |
---|---|---|
committer | chai <chaifix@163.com> | 2021-10-30 11:32:16 +0800 |
commit | 42ec7286b2d36a9ba22925f816a17cb1cc2aa5ce (patch) | |
tree | 24bc7009457a8d7500f264e89946dc20d069294f /Data/Libraries/Penlight/lua/pl/input.lua | |
parent | 164885fd98d48703bd771f802d79557b7db97431 (diff) |
+ Penlight
Diffstat (limited to 'Data/Libraries/Penlight/lua/pl/input.lua')
-rw-r--r-- | Data/Libraries/Penlight/lua/pl/input.lua | 171 |
1 files changed, 171 insertions, 0 deletions
diff --git a/Data/Libraries/Penlight/lua/pl/input.lua b/Data/Libraries/Penlight/lua/pl/input.lua new file mode 100644 index 0000000..f81de87 --- /dev/null +++ b/Data/Libraries/Penlight/lua/pl/input.lua @@ -0,0 +1,171 @@ +--- Iterators for extracting words or numbers from an input source. +-- +-- require 'pl' +-- local total,n = seq.sum(input.numbers()) +-- print('average',total/n) +-- +-- _source_ is defined as a string or a file-like object (i.e. has a read() method which returns the next line) +-- +-- See @{06-data.md.Reading_Unstructured_Text_Data|here} +-- +-- Dependencies: `pl.utils` +-- @module pl.input +local strfind = string.find +local strsub = string.sub +local strmatch = string.match +local utils = require 'pl.utils' +local unpack = utils.unpack +local pairs,type,tonumber = pairs,type,tonumber +local patterns = utils.patterns +local io = io + +local input = {} + +--- create an iterator over all tokens. +-- based on allwords from PiL, 7.1 +-- @func getter any function that returns a line of text +-- @string pattern +-- @string[opt] fn Optionally can pass a function to process each token as it's found. +-- @return an iterator +function input.alltokens (getter,pattern,fn) + local line = getter() -- current line + local pos = 1 -- current position in the line + return function () -- iterator function + while line do -- repeat while there are lines + local s, e = strfind(line, pattern, pos) + if s then -- found a word? + pos = e + 1 -- next position is after this token + local res = strsub(line, s, e) -- return the token + if fn then res = fn(res) end + return res + else + line = getter() -- token not found; try next line + pos = 1 -- restart from first position + end + end + return nil -- no more lines: end of traversal + end +end +local alltokens = input.alltokens + +-- question: shd this _split_ a string containing line feeds? + +--- create a function which grabs the next value from a source. If the source is a string, then the getter +-- will return the string and thereafter return nil. If not specified then the source is assumed to be stdin. +-- @param f a string or a file-like object (i.e. has a read() method which returns the next line) +-- @return a getter function +function input.create_getter(f) + if f then + if type(f) == 'string' then + local ls = utils.split(f,'\n') + local i,n = 0,#ls + return function() + i = i + 1 + if i > n then return nil end + return ls[i] + end + else + -- anything that supports the read() method! + if not f.read then error('not a file-like object') end + return function() return f:read() end + end + else + return io.read -- i.e. just read from stdin + end +end + +--- generate a sequence of numbers from a source. +-- @param f A source +-- @return An iterator +function input.numbers(f) + return alltokens(input.create_getter(f), + '('..patterns.FLOAT..')',tonumber) +end + +--- generate a sequence of words from a source. +-- @param f A source +-- @return An iterator +function input.words(f) + return alltokens(input.create_getter(f),"%w+") +end + +local function apply_tonumber (no_fail,...) + local args = {...} + for i = 1,#args do + local n = tonumber(args[i]) + if n == nil then + if not no_fail then return nil,args[i] end + else + args[i] = n + end + end + return args +end + +--- parse an input source into fields. +-- By default, will fail if it cannot convert a field to a number. +-- @param ids a list of field indices, or a maximum field index +-- @string delim delimiter to parse fields (default space) +-- @param f a source @see create_getter +-- @tab opts option table, `{no_fail=true}` +-- @return an iterator with the field values +-- @usage for x,y in fields {2,3} do print(x,y) end -- 2nd and 3rd fields from stdin +function input.fields (ids,delim,f,opts) + local sep + local s + local getter = input.create_getter(f) + local no_fail = opts and opts.no_fail + local no_convert = opts and opts.no_convert + if not delim or delim == ' ' then + delim = '%s' + sep = '%s+' + s = '%s*' + else + sep = delim + s = '' + end + local max_id = 0 + if type(ids) == 'table' then + for i,id in pairs(ids) do + if id > max_id then max_id = id end + end + else + max_id = ids + ids = {} + for i = 1,max_id do ids[#ids+1] = i end + end + local pat = '[^'..delim..']*' + local k = 1 + for i = 1,max_id do + if ids[k] == i then + k = k + 1 + s = s..'('..pat..')' + else + s = s..pat + end + if i < max_id then + s = s..sep + end + end + local linecount = 1 + return function() + local line,results,err + repeat + line = getter() + linecount = linecount + 1 + if not line then return nil end + if no_convert then + results = {strmatch(line,s)} + else + results,err = apply_tonumber(no_fail,strmatch(line,s)) + if not results then + utils.quit("line "..(linecount-1)..": cannot convert '"..err.."' to number") + end + end + until #results > 0 + return unpack(results) + end +end + +return input + |