Skip to content

Commit 7073b13

Browse files
committed
Add FaceDatDecoder for EIF Face.dat parsing
1 parent 7de71df commit 7073b13

File tree

1 file changed

+239
-0
lines changed

1 file changed

+239
-0
lines changed
Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
// Copyright © 2017-2025 QL-Win Contributors
2+
//
3+
// This file is part of QuickLook program.
4+
//
5+
// This program is free software: you can redistribute it and/or modify
6+
// it under the terms of the GNU General Public License as published by
7+
// the Free Software Foundation, either version 3 of the License, or
8+
// (at your option) any later version.
9+
//
10+
// This program is distributed in the hope that it will be useful,
11+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
// GNU General Public License for more details.
14+
//
15+
// You should have received a copy of the GNU General Public License
16+
// along with this program. If not, see <http://www.gnu.org/licenses/>.
17+
18+
using System;
19+
using System.Collections.Generic;
20+
using System.Text;
21+
22+
namespace QuickLook.Plugin.ArchiveViewer.CompoundFileBinary;
23+
24+
/// <summary>
25+
/// Decoder for Face.dat entries used by QQ EIF packages.
26+
/// Re-implements the behavior of the provided Python scripts:
27+
/// - Finds special marker sequence <c>e_str_file_org</c> inside each line
28+
/// - Skips 4 bytes after the marker (same as the Python implementation)
29+
/// - Locates a repeating-key pattern and extracts the XOR-encrypted block
30+
/// - XOR-decodes the block and parses group\filename entries
31+
/// Provides a method to build the same group -> (filename -> index) mapping as the Python tool.
32+
/// </summary>
33+
public static class FaceDatDecoder
34+
{
35+
/// <summary>
36+
/// Marker sequence used in the Python script
37+
/// </summary>
38+
private static readonly byte[] EStrFileOrg = [0x98, 0xEB, 0x9F, 0xEB, 0x99, 0xEB, 0xAD, 0xEB, 0x82, 0xEB, 0x87, 0xEB, 0x8E, 0xEB, 0x84, 0xEB, 0x99, 0xEB, 0x8C, 0xEB];
39+
40+
/// <summary>
41+
/// Decode returns a mapping of group name to a dictionary mapping file name to index within the group.
42+
/// This matches the Python script's <c>group_dict</c> structure.
43+
/// </summary>
44+
/// <param name="fileBytes">The raw bytes of Face.dat.</param>
45+
/// <returns>Nested dictionary: group -> (filename -> index).</returns>
46+
public static Dictionary<string, Dictionary<string, int>> Decode(byte[] fileBytes)
47+
{
48+
return BuildGroupIndex(fileBytes);
49+
}
50+
51+
/// <summary>
52+
/// Build group index mapping from Face.dat bytes like the Python script does.
53+
/// </summary>
54+
/// <param name="fileBytes">The raw bytes of Face.dat.</param>
55+
/// <returns>Dictionary where key is group name and value maps filename to index.</returns>
56+
public static Dictionary<string, Dictionary<string, int>> BuildGroupIndex(byte[] fileBytes)
57+
{
58+
var result = new Dictionary<string, Dictionary<string, int>>(StringComparer.OrdinalIgnoreCase);
59+
if (fileBytes == null || fileBytes.Length == 0)
60+
return result;
61+
62+
// Split into lines by LF, trimming optional CR (same semantics as Python's strip on lines)
63+
int lineStart = 0;
64+
for (int i = 0; i <= fileBytes.Length; i++)
65+
{
66+
if (i == fileBytes.Length || fileBytes[i] == (byte)'\n')
67+
{
68+
int len = i - lineStart;
69+
if (len > 0)
70+
{
71+
// Trim trailing CR if present
72+
if (fileBytes[lineStart + len - 1] == (byte)'\r')
73+
len--;
74+
75+
var line = new byte[len];
76+
Buffer.BlockCopy(fileBytes, lineStart, line, 0, len);
77+
78+
ProcessLineForIndex(line, result);
79+
}
80+
81+
lineStart = i + 1;
82+
}
83+
}
84+
85+
return result;
86+
}
87+
88+
/// <summary>
89+
/// Process a single decoded line and update the group dictionary if a valid entry is found.
90+
/// </summary>
91+
private static void ProcessLineForIndex(byte[] line, Dictionary<string, Dictionary<string, int>> groupDict)
92+
{
93+
// Find marker sequence
94+
int start = IndexOfSequence(line, EStrFileOrg, 0);
95+
if (start == -1)
96+
return;
97+
98+
// Take bytes after marker plus 4 (matches Python behavior)
99+
int partStart = start + EStrFileOrg.Length + 4;
100+
if (partStart >= line.Length)
101+
return;
102+
103+
int partLen = line.Length - partStart;
104+
var part = new byte[partLen];
105+
Buffer.BlockCopy(line, partStart, part, 0, partLen);
106+
107+
var (key, idx) = FindKey(part, 0);
108+
if (key == null)
109+
return;
110+
111+
var (eStrFileOrgValue, _) = GetPart(part, key.Value, idx);
112+
113+
string dPart = XorDecodeToString(eStrFileOrgValue, key.Value);
114+
if (string.IsNullOrEmpty(dPart))
115+
return;
116+
117+
// If the decoded part contains a colon, the Python script expects it to start with the prefix
118+
const string prefix = "UserDataCustomFace";
119+
string remainder;
120+
var colonParts = dPart.Split([':'], 2);
121+
if (colonParts.Length > 1)
122+
{
123+
if (!dPart.StartsWith(prefix, StringComparison.Ordinal))
124+
return; // same as Python: skip if prefix missing
125+
126+
// Strip prefix and the following ':'
127+
int removeLen = prefix.Length + 1;
128+
if (dPart.Length <= removeLen)
129+
return;
130+
remainder = dPart.Substring(removeLen);
131+
}
132+
else
133+
{
134+
remainder = dPart;
135+
}
136+
137+
// Split remainder by backslash to get group and filename
138+
var arr = remainder.Split(['\\'], StringSplitOptions.None);
139+
if (arr.Length < 2)
140+
return;
141+
142+
string group = arr[0];
143+
string filename = arr[1];
144+
145+
if (!groupDict.TryGetValue(group, out var files))
146+
{
147+
files = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase);
148+
groupDict[group] = files;
149+
}
150+
151+
if (!files.ContainsKey(filename))
152+
{
153+
files[filename] = files.Count;
154+
}
155+
}
156+
157+
/// <summary>
158+
/// Locate subsequence in data starting at fromIndex, return -1 if not found
159+
/// </summary>
160+
private static int IndexOfSequence(byte[] data, byte[] seq, int fromIndex)
161+
{
162+
if (seq.Length == 0)
163+
return fromIndex <= data.Length ? fromIndex : -1;
164+
for (int i = fromIndex; i <= data.Length - seq.Length; i++)
165+
{
166+
bool ok = true;
167+
for (int j = 0; j < seq.Length; j++)
168+
{
169+
if (data[i + j] != seq[j])
170+
{
171+
ok = false;
172+
break;
173+
}
174+
}
175+
if (ok)
176+
return i;
177+
}
178+
return -1;
179+
}
180+
181+
/// <summary>
182+
/// Equivalent to Python find_key: find a byte that repeats at offsets +2 and +4
183+
/// </summary>
184+
private static (byte? key, int seek) FindKey(byte[] data, int startIdx)
185+
{
186+
for (int i = startIdx; i + 4 < data.Length; i++)
187+
{
188+
byte b = data[i];
189+
if (b == data[i + 2] && b == data[i + 4])
190+
return (b, i);
191+
}
192+
return (null, 0);
193+
}
194+
195+
/// <summary>
196+
/// Equivalent to Python get_part: extract the encrypted part starting at startIdx-1 up to end
197+
/// </summary>
198+
private static (byte[] part, int end) GetPart(byte[] data, byte key, int startIdx)
199+
{
200+
int end = 0;
201+
for (int i = startIdx; i < data.Length; i += 2)
202+
{
203+
if (data[i] != key)
204+
{
205+
end = i - 1;
206+
break;
207+
}
208+
}
209+
if (end == 0)
210+
end = data.Length - 1;
211+
212+
int start = startIdx - 1;
213+
int length = end - start; // Python slice end is exclusive => length = end - (startIdx-1)
214+
if (length <= 0)
215+
return (Array.Empty<byte>(), end);
216+
217+
var part = new byte[length];
218+
Buffer.BlockCopy(data, start, part, 0, length);
219+
return (part, end);
220+
}
221+
222+
/// <summary>
223+
/// XOR-decode bytes and build a string, ignoring zero bytes (matches Python behavior)
224+
/// </summary>
225+
private static string XorDecodeToString(byte[] data, byte key)
226+
{
227+
if (data == null || data.Length == 0)
228+
return string.Empty;
229+
230+
var sb = new StringBuilder();
231+
foreach (var b in data)
232+
{
233+
byte v = (byte)(b ^ key);
234+
if (v != 0)
235+
sb.Append((char)v);
236+
}
237+
return sb.ToString();
238+
}
239+
}

0 commit comments

Comments
 (0)