Skip to content
Merged
4 changes: 2 additions & 2 deletions objdiff-cli/src/cmd/diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,8 @@ fn run_oneshot(
.transpose()?;
let result =
diff::diff_objs(target.as_ref(), base.as_ref(), None, &diff_config, &mapping_config)?;
let left = target.as_ref().and_then(|o| result.left.as_ref().map(|d| (o, d)));
let right = base.as_ref().and_then(|o| result.right.as_ref().map(|d| (o, d)));
let left = target.as_ref().zip(result.left.as_ref());
let right = base.as_ref().zip(result.right.as_ref());
let diff_result = DiffResult::new(left, right, &diff_config)?;
write_output(&diff_result, Some(output), output_format)?;
Ok(())
Expand Down
46 changes: 46 additions & 0 deletions objdiff-core/config-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,51 @@
"name": "Combine text sections",
"description": "Combines all text sections into one."
},
{
"id": "preferredStringEncoding",
"type": "choice",
"default": "auto",
"name": "Preferred string encoding",
"description": "Which encoding to use when diffing string literals used in functions.",
"items": [
{
"value": "auto",
"name": "Auto"
},
{
"value": "ascii",
"name": "ASCII"
},
{
"value": "utf_8",
"name": "UTF-8"
},
{
"value": "shift_jis",
"name": "Shift JIS"
},
{
"value": "windows_1252",
"name": "Windows-1252"
},
{
"value": "euc_jp",
"name": "EUC-JP"
},
{
"value": "big5",
"name": "Big5"
},
{
"value": "utf_16be",
"name": "UTF-16BE"
},
{
"value": "utf_16le",
"name": "UTF-16LE"
}
]
},
{
"id": "arm.archVersion",
"type": "choice",
Expand Down Expand Up @@ -338,6 +383,7 @@
"id": "general",
"name": "General",
"properties": [
"preferredStringEncoding",
"functionRelocDiffs",
"demangler",
"showSymbolSizes",
Expand Down
122 changes: 85 additions & 37 deletions objdiff-core/src/arch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use object::Endian as _;

use crate::{
diff::{
DiffObjConfig, DiffSide,
ConfigEnum, DiffObjConfig, DiffSide, PreferredStringEncoding,
display::{ContextItem, HoverItem, InstructionPart},
},
obj::{
Expand Down Expand Up @@ -52,6 +52,33 @@ const SUPPORTED_ENCODINGS_WITH_NULL_TERM: [(&encoding_rs::Encoding, &str); 5] =
const SUPPORTED_ENCODINGS_NO_NULL_TERM: [(&encoding_rs::Encoding, &str); 2] =
[(encoding_rs::UTF_16BE, "UTF-16BE"), (encoding_rs::UTF_16LE, "UTF-16LE")];

#[derive(Debug, Clone, Default, PartialEq)]
pub struct LiteralInfo {
pub literal: String,
pub label_override: Option<String>,
pub copy_string: Option<String>,
pub hidden: bool, // Only used when the user hasn't set a preferred string encoding
pub is_string: bool,
}

impl LiteralInfo {
pub fn hidden(&self, diff_config: Option<&DiffObjConfig>) -> bool {
let Some(diff_config) = diff_config else {
return self.hidden;
};
if !self.is_string {
return self.hidden;
}
if diff_config.preferred_string_encoding == PreferredStringEncoding::Auto {
return self.hidden;
}
let Some(ref label) = self.label_override else {
return self.hidden;
};
*label != diff_config.preferred_string_encoding.name()
}
}

/// Represents the type of data associated with an instruction
#[derive(PartialEq)]
pub enum DataType {
Expand Down Expand Up @@ -83,18 +110,14 @@ impl fmt::Display for DataType {
impl DataType {
pub fn display_labels(&self, endian: object::Endianness, bytes: &[u8]) -> Vec<String> {
let mut strs = Vec::new();
for (literal, label_override, _) in self.display_literals(endian, bytes) {
let label = label_override.unwrap_or_else(|| self.to_string());
strs.push(format!("{label}: {literal:?}"))
for lit_info in self.display_literals(endian, bytes) {
let label = lit_info.label_override.unwrap_or_else(|| self.to_string());
strs.push(format!("{}: {:?}", label, lit_info.literal))
}
strs
}

pub fn display_literals(
&self,
endian: object::Endianness,
bytes: &[u8],
) -> Vec<(String, Option<String>, Option<String>)> {
pub fn display_literals(&self, endian: object::Endianness, bytes: &[u8]) -> Vec<LiteralInfo> {
let mut strs = Vec::new();
if self.required_len().is_some_and(|l| bytes.len() < l) {
log::warn!(
Expand All @@ -118,60 +141,70 @@ impl DataType {
match self {
DataType::Int8 => {
let i = i8::from_ne_bytes(bytes.try_into().unwrap());
strs.push((format!("{i:#x}"), None, None));
strs.push(LiteralInfo { literal: format!("{i:#x}"), ..Default::default() });

if i < 0 {
strs.push((format!("{:#x}", ReallySigned(i)), None, None));
strs.push(LiteralInfo {
literal: format!("{:#x}", ReallySigned(i)),
..Default::default()
});
}
}
DataType::Int16 => {
let i = endian.read_i16_bytes(bytes.try_into().unwrap());
strs.push((format!("{i:#x}"), None, None));
strs.push(LiteralInfo { literal: format!("{i:#x}"), ..Default::default() });

if i < 0 {
strs.push((format!("{:#x}", ReallySigned(i)), None, None));
strs.push(LiteralInfo {
literal: format!("{:#x}", ReallySigned(i)),
..Default::default()
});
}
}
DataType::Int32 => {
let i = endian.read_i32_bytes(bytes.try_into().unwrap());
strs.push((format!("{i:#x}"), None, None));
strs.push(LiteralInfo { literal: format!("{i:#x}"), ..Default::default() });

if i < 0 {
strs.push((format!("{:#x}", ReallySigned(i)), None, None));
strs.push(LiteralInfo {
literal: format!("{:#x}", ReallySigned(i)),
..Default::default()
});
}
}
DataType::Int64 => {
let i = endian.read_i64_bytes(bytes.try_into().unwrap());
strs.push((format!("{i:#x}"), None, None));
strs.push(LiteralInfo { literal: format!("{i:#x}"), ..Default::default() });

if i < 0 {
strs.push((format!("{:#x}", ReallySigned(i)), None, None));
strs.push(LiteralInfo {
literal: format!("{:#x}", ReallySigned(i)),
..Default::default()
});
}
}
DataType::Float => {
let bytes: [u8; 4] = bytes.try_into().unwrap();
strs.push((
format!("{:?}f", match endian {
strs.push(LiteralInfo {
literal: format!("{:?}f", match endian {
object::Endianness::Little => f32::from_le_bytes(bytes),
object::Endianness::Big => f32::from_be_bytes(bytes),
}),
None,
None,
));
..Default::default()
});
}
DataType::Double => {
let bytes: [u8; 8] = bytes.try_into().unwrap();
strs.push((
format!("{:?}", match endian {
strs.push(LiteralInfo {
literal: format!("{:?}", match endian {
object::Endianness::Little => f64::from_le_bytes(bytes),
object::Endianness::Big => f64::from_be_bytes(bytes),
}),
None,
None,
));
..Default::default()
});
}
DataType::Bytes => {
strs.push((format!("{bytes:#?}"), None, None));
strs.push(LiteralInfo { literal: format!("{bytes:#?}"), ..Default::default() });
}
DataType::String => {
if let Some(nul_idx) = bytes.iter().position(|&c| c == b'\0') {
Expand All @@ -181,15 +214,28 @@ impl DataType {
if !had_errors && cow.is_ascii() {
let string = format!("{cow}");
let copy_string = escape_special_ascii_characters(&string);
strs.push((string, Some("ASCII".into()), Some(copy_string)));
strs.push(LiteralInfo {
literal: string,
label_override: Some("ASCII".into()),
copy_string: Some(copy_string),
hidden: false,
is_string: true,
});
}
for (encoding, encoding_name) in SUPPORTED_ENCODINGS_WITH_NULL_TERM {
let (cow, _, had_errors) = encoding.decode(str_bytes);
// Avoid showing ASCII-only strings more than once if the encoding is ASCII-compatible.
if !had_errors && (!encoding.is_ascii_compatible() || !cow.is_ascii()) {
if !had_errors {
let string = format!("{cow}");
let copy_string = escape_special_ascii_characters(&string);
strs.push((string, Some(encoding_name.into()), Some(copy_string)));
// Avoid showing ASCII-only strings more than once if the encoding is ASCII-compatible.
let hidden = encoding.is_ascii_compatible() && cow.is_ascii();
strs.push(LiteralInfo {
literal: string,
label_override: Some(encoding_name.into()),
copy_string: Some(copy_string),
hidden,
is_string: true,
});
}
}
}
Expand All @@ -202,11 +248,13 @@ impl DataType {
let trimmed = cow.trim_end_matches('\0');
if !trimmed.is_empty() {
let copy_string = escape_special_ascii_characters(trimmed);
strs.push((
trimmed.to_string(),
Some(encoding_name.into()),
Some(copy_string),
));
strs.push(LiteralInfo {
literal: trimmed.to_string(),
label_override: Some(encoding_name.into()),
copy_string: Some(copy_string),
hidden: false,
is_string: true,
});
}
}
}
Expand Down
27 changes: 23 additions & 4 deletions objdiff-core/src/diff/code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ use anyhow::{Context, Result, anyhow, ensure};

use super::{
DiffObjConfig, FunctionRelocDiffs, InstructionArgDiffIndex, InstructionBranchFrom,
InstructionBranchTo, InstructionDiffKind, InstructionDiffRow, SymbolDiff,
display::display_ins_data_literals,
InstructionBranchTo, InstructionDiffKind, InstructionDiffRow, PreferredStringEncoding,
SymbolDiff, display::display_ins_data_literals,
};
use crate::obj::{
InstructionArg, InstructionArgValue, InstructionRef, Object, ResolvedInstructionRef,
Expand Down Expand Up @@ -296,6 +296,26 @@ pub(crate) fn section_name_eq(
})
}

fn ins_data_literals_eq(
left_obj: &Object,
right_obj: &Object,
left_ins: ResolvedInstructionRef,
right_ins: ResolvedInstructionRef,
diff_config: &DiffObjConfig,
) -> bool {
let mut left_literals = display_ins_data_literals(left_obj, left_ins);
let mut right_literals = display_ins_data_literals(right_obj, right_ins);
if left_literals == right_literals {
return true;
}
if diff_config.preferred_string_encoding == PreferredStringEncoding::Auto {
return left_literals == right_literals;
}
left_literals.retain(|lit_info| !lit_info.hidden(Some(diff_config)));
right_literals.retain(|lit_info| !lit_info.hidden(Some(diff_config)));
left_literals == right_literals
}

fn reloc_eq(
left_obj: &Object,
right_obj: &Object,
Expand Down Expand Up @@ -330,8 +350,7 @@ fn reloc_eq(
&& (diff_config.function_reloc_diffs == FunctionRelocDiffs::NameAddress
|| left_reloc.symbol.kind != SymbolKind::Object
|| right_reloc.symbol.size == 0 // Likely a pool symbol like ...data, don't treat this as a diff
|| display_ins_data_literals(left_obj, left_ins)
== display_ins_data_literals(right_obj, right_ins))
|| ins_data_literals_eq(left_obj, right_obj, left_ins, right_ins, diff_config))
}
(Some(_), None) | (None, Some(_)) | (None, None) => symbol_name_addend_matches,
}
Expand Down
Loading
Loading