1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
|
-- This library was written to help find the ID of a known
-- action message corresponding to an entry in the dialog tables.
-- While the IDs can be collected in-game, they occasionally
-- change and would otherwise need to be manually updated.
-- It can also be used to find and decode an entry given the ID.
-- Common parameters:
--
-- dat: Either the entire content of the zone dialog DAT file or
-- a file descriptor.
-- i.e. either local dat = io.open('path/to/dialog/DAT', 'rb')
-- or dat = dat:read('*a')
-- The functions are expected to be faster when passed a string,
-- but will use less memory when receiving a file descriptor.
--
-- entry: The string you are looking for. Whether or not the string
-- is expected to be encoded should be indicated in the parameter's
-- name. If you do not know the entire string, use dev.find_substring
-- and serialize the result.
local xor = require('bit').bxor
local floor = require('math').floor
local string = require('string')
local find = string.find
local sub = string.sub
local gsub = string.gsub
local format = string.format
local char = string.char
local byte = string.byte
require('pack')
local unpack = string.unpack
local pack = string.pack
local function decode(int)
return xor(int, 0x80808080)
end
local encode = decode
local function binary_search(pos, dat, n)
local l, r, m = 1, n
while l < r do
m = floor((l + r) / 2)
if decode(unpack('<I', dat, 1 + 4 * m)) < pos then
-- offset given by mth ID < offset to string
l = m + 1
else
r = m
end
end
return l - 2 -- we want the index to the left of where "pos" would be placed
end
local function plain_text_gmatch(text, substring, n)
n = n or 1
return function()
local head, tail = find(text, substring, n, true)
if head then n = head + 1 end
return head, tail
end
end
local dialog = {}
-- Returns the number of entries in the given dialog DAT file
function dialog.entry_count(dat)
if type(dat) == 'userdata' then
dat:seek('set', 4)
return decode(unpack('<I', dat:read(4))) / 4
end
return decode(unpack('<I', dat, 5)) / 4
end
-- Returns an array-like table containing every ID which matched
-- the given entry. Note that the tables contain an enormous
-- number of duplicate entries.
function dialog.get_ids_matching_entry(dat, encoded_entry)
local res = {}
local n = 0
if type(dat) == 'string' then
local last_offset = decode(unpack('<I', dat, 5))
local start = 5
for head, tail in plain_text_gmatch(dat, encoded_entry, last_offset) do
local encoded_pos = pack('<I', encode(head - 5))
local offset = find(dat, encoded_pos, start, true)
if offset then
offset = offset - 1
local next_pos
if offset > last_offset then
break
elseif offset == last_offset then
next_pos = #dat + 1
else
next_pos = decode(unpack('<I', dat, offset + 5)) + 5
end
if next_pos - head == tail - head + 1 then
n = n + 1
res[n] = (offset - 4) / 4
end
start = offset + 1
end
end
elseif type(dat) == 'userdata' then
dat:seek('set', 4)
local offset = decode(unpack('<I', dat:read(4)))
local entry_count = offset / 4
local entry_length = #encoded_entry
for i = 1, entry_count - 1 do
dat:seek('set', 4 * i + 4)
local next_offset = decode(unpack('<I', dat:read(4)))
if next_offset - offset == entry_length then
dat:seek('set', offset + 4)
if dat:read(entry_length) == encoded_entry then
n = n + 1
res[n] = i - 1
end
end
offset = next_offset
end
local m = dat:seek('end')
if m - offset - 4 == entry_length then
dat:seek('set', offset + 4)
if dat:read(entry_length) == encoded_entry then
n = n + 1
res[n] = entry_count - 1
end
end
end
return res
end
-- Returns the encoded entry from a given dialog table. If you
-- want to decode the entry, use dialog.decode_string.
function dialog.get_entry(dat, id)
local entry_count, offset, next_offset
if type(dat) == 'string' then
entry_count = decode(unpack('<I', dat, 5)) / 4
if id == entry_count - 1 then
offset = decode(unpack('<I', dat, 4 * id + 5)) + 5
next_offset = #dat + 1
else
offset, next_offset = unpack('<II', dat, 4 * id + 5)
offset, next_offset = decode(offset) + 5, decode(next_offset) + 5
end
return sub(dat, offset, next_offset - 1)
elseif type(dat) == 'userdata' then
dat:seek('set', 4)
entry_count = decode(unpack('<I', dat:read(4))) / 4
dat:seek('set', 4 * id + 4)
if id == entry_count - 1 then
offset = decode(unpack('<I', dat:read(4)))
next_offset = dat:seek('end') + 1
else
offset, next_offset = unpack('<II', dat:read(8))
offset, next_offset = decode(offset), decode(next_offset)
end
dat:seek('set', offset + 4)
return dat:read(next_offset - offset)
end
end
-- Creates a serialized representation of a string which can
-- be copied and pasted into the contents of an addon.
function dialog.serialize(entry)
return 'string.char('
.. sub(gsub(entry, '.', function(c)
return tostring(string.byte(c)) .. ','
end), 1, -2)
..')'
end
function dialog.encode_string(s)
return gsub(s, '.', function(c)
return char(xor(byte(c), 0x80))
end)
end
dialog.decode_string = dialog.encode_string
dialog.dev = {}
-- Returns the hex offset of the dialog entry with the given ID.
-- May be useful if you are viewing the file in a hex editor.
function dialog.dev.get_offset(dat, id)
local offset
if type(dat) == 'string' then
offset = unpack('<I', dat, 5 + 4 * id)
elseif type(dat) == 'userdata' then
dat:seek('set', 4 * id + 4)
offset = unpack('<I', dat:read(4))
end
return format('0x%08X', decode(offset))
end
-- This function is intended to be used only during development
-- to find the ID of a dialog entry given a substring.
-- This is necessary because SE uses certain bytes to indicate
-- things like placeholders or pauses and it is unlikely you
-- will know the entire content of the entry you're looking for
-- from the get-go.
-- Returns an array-like table which contains the ID of every entry
-- containing a given substring.
function dialog.dev.find_substring(dat, unencoded_string)
local last_offset = decode(unpack('<I', dat, 5)) + 5
local res = {}
-- local pos = find(dat, unencoded_string), last_offset, true)
local n = 0
for i in plain_text_gmatch(dat, dialog.encode_string(unencoded_string), last_offset) do
n = n + 1
res[n] = i
end
if n == 0 then print('No results for ', unencoded_string) return end
local entry_count = (last_offset - 5) / 4
for i = 1, n do
res[i] = binary_search(res[i] - 1, dat, entry_count)
end
return res
end
return dialog
|